Archived
1
0
Fork 0
This repository has been archived on 2024-04-26. You can view files and clone it, but cannot push or open issues or pull requests.
akari-bot/modules/wiki/wikilib.py

102 lines
4.8 KiB
Python
Raw Normal View History

2020-08-12 16:01:34 +00:00
import aiohttp
2020-06-13 12:43:43 +00:00
import re
2020-08-01 03:25:34 +00:00
import traceback
2020-08-12 16:01:34 +00:00
import urllib
2020-08-18 13:12:38 +00:00
from modules.interwikilist import iwlist, iwlink
2020-08-12 16:01:34 +00:00
2020-08-12 08:01:00 +00:00
async def get_data(url: str, fmt: str):
async with aiohttp.ClientSession() as session:
2020-08-12 16:01:34 +00:00
async with session.get(url, timeout=aiohttp.ClientTimeout(total=20)) as req:
2020-08-12 08:01:00 +00:00
if hasattr(req, fmt):
return await getattr(req, fmt)()
else:
raise ValueError(f"NoSuchMethod: {fmt}")
2020-08-13 07:09:51 +00:00
async def wiki1(wikilink, pagename):
2020-08-04 09:23:32 +00:00
print(pagename)
2020-08-13 07:09:51 +00:00
getlinkurl = wikilink + 'api.php?action=query&format=json&prop=info&inprop=url&redirects&titles=' + pagename
print(getlinkurl)
file = await get_data(getlinkurl, "json")
2020-06-13 12:43:43 +00:00
try:
2020-08-13 07:09:51 +00:00
pages = file['query']['pages']
pageid = sorted(pages.keys())[0]
if int(pageid) == -1:
if 'invalid' in pages['-1']:
2020-08-12 16:01:34 +00:00
rs = re.sub('The requested page title contains invalid characters:', '请求的页面标题包含非法字符:',
2020-08-13 07:09:51 +00:00
pages['-1']['invalidreason'])
2020-08-12 16:01:34 +00:00
return ('发生错误:“' + rs + '”。')
2020-06-15 13:18:07 +00:00
else:
2020-08-13 07:09:51 +00:00
if 'missing' in pages['-1']:
2020-08-01 03:54:29 +00:00
try:
try:
2020-08-13 07:09:51 +00:00
searchurl = wikilink + 'api.php?action=query&generator=search&gsrsearch=' + pagename + '&gsrsort=just_match&gsrenablerewrites&prop=info&gsrlimit=1&format=json'
2020-08-13 09:29:53 +00:00
getsecjson = await get_data(searchurl, "json")
secpages = getsecjson['query']['pages']
secpageid = sorted(secpages.keys())[0]
sectitle = secpages[secpageid]['title']
return ('找不到条目,您是否要找的是:' + sectitle + '')
2020-08-01 03:54:29 +00:00
except Exception:
2020-08-13 07:09:51 +00:00
searchurl = wikilink + 'api.php?action=query&list=search&srsearch=' + pagename + '&srwhat=text&srlimit=1&srenablerewrites=&format=json'
2020-08-13 09:29:53 +00:00
getsecjson = await get_data(searchurl, "json")
sectitle = getsecjson['query']['search'][0]['title']
return ('找不到条目,您是否要找的是:' + sectitle + '')
2020-07-23 10:39:23 +00:00
except Exception:
return ('找不到条目。')
else:
2020-08-13 07:09:51 +00:00
return ('您要的' + pagename + '' + wikilink + urllib.parse.quote(pagename.encode('UTF-8')))
2020-06-13 12:43:43 +00:00
else:
2020-08-13 07:09:51 +00:00
getfullurl = pages[pageid]['fullurl']
geturlpagename = re.match(r'https?://.*?/(?:index.php/|wiki/|)(.*)', getfullurl, re.M | re.I)
2020-06-13 12:43:43 +00:00
try:
2020-08-13 07:09:51 +00:00
descurl = getlinkurl + '/api.php?action=query&prop=extracts&exsentences=1&&explaintext&exsectionformat=wiki&format=json&titles=' + geturlpagename.group(
2020-08-12 16:01:34 +00:00
1)
2020-08-13 07:09:51 +00:00
loadtext = await get_data(descurl, "json")
desc = loadtext['query']['pages'][pageid]['extract']
2020-06-13 12:43:43 +00:00
except Exception:
2020-08-13 07:09:51 +00:00
desc = ''
2020-08-01 03:09:47 +00:00
try:
2020-08-13 07:09:51 +00:00
section = re.match(r'.*(\#.*)', pagename)
getfullurl = pages[pageid]['fullurl'] + urllib.parse.quote(section.group(1).encode('UTF-8'))
2020-08-01 03:09:47 +00:00
except Exception:
2020-08-13 07:09:51 +00:00
getfullurl = pages[pageid]['fullurl']
getfinalpagename = re.match(r'https?://.*?/(?:index.php/|wiki/|)(.*)', getfullurl)
finalpagename = urllib.parse.unquote(getfinalpagename.group(1), encoding='UTF-8')
finalpagename = re.sub('_', ' ', finalpagename)
if finalpagename == pagename:
rmlstlb = re.sub('\n$', '', getfullurl + '\n' + desc)
2020-08-01 03:09:47 +00:00
else:
2020-08-13 07:09:51 +00:00
rmlstlb = re.sub('\n$', '', '\n(重定向[' + pagename + ']至[' + finalpagename + ']\n' + getfullurl + '\n' + desc)
2020-08-20 04:57:46 +00:00
rmlstlb = re.sub('\n\n', '\n', rmlstlb)
2020-08-20 05:00:20 +00:00
rmlstlb = re.sub('\n\n', '\n', rmlstlb)
2020-08-20 05:02:12 +00:00
rmlstlb = re.sub('\n\n', '\n', rmlstlb)
rmlstlb = re.sub('\n\n', '\n', rmlstlb)
2020-08-20 04:49:54 +00:00
try:
rm5lline = re.findall(r'.*\n.*\n.*\n.*\n.*\n',rmlstlb)
result = rm5lline[0] + '\n...行数过多已截断。'
except:
result = rmlstlb
return ('您要的' + pagename + "" + result)
2020-06-13 12:43:43 +00:00
except Exception:
try:
2020-08-13 07:09:51 +00:00
matchinterwiki = re.match(r'(.*?):(.*)', pagename)
interwiki = matchinterwiki.group(1)
if interwiki in iwlist():
return (await wiki2(interwiki, matchinterwiki.group(2)))
2020-06-13 12:43:43 +00:00
else:
2020-08-12 16:01:34 +00:00
return ('发生错误:内容非法。')
2020-06-13 12:43:43 +00:00
except Exception as e:
2020-08-01 03:25:34 +00:00
traceback.print_exc()
2020-08-12 16:01:34 +00:00
return ('发生错误:' + str(e))
2020-08-03 15:19:27 +00:00
2020-08-13 07:09:51 +00:00
async def wiki2(interwiki, str1):
2020-08-03 15:19:27 +00:00
try:
2020-08-13 07:09:51 +00:00
url = iwlink(interwiki)
return (await wiki1(url, str1))
2020-08-03 15:19:27 +00:00
except Exception as e:
traceback.print_exc()
2020-08-12 16:01:34 +00:00
return (str(e))