Archived
1
0
Fork 0

part refactor to reduce overhead and bugfix

This commit is contained in:
yzhh 2021-04-18 20:25:25 +08:00
parent 12c8e58e1c
commit 90f2858f13
4 changed files with 138 additions and 92 deletions

View file

@ -161,7 +161,7 @@ async def set_start_wiki(kwargs: dict):
if Group in kwargs:
if check_permission(kwargs):
check = await check_wiki_available(command)
if check:
if check[0]:
result = WikiDB.add_start_wiki('start_wiki_link_group', kwargs[Group].id, check[0])
await sendMessage(kwargs, MessageChain.create([Plain(result + check[1])]))
else:
@ -172,7 +172,7 @@ async def set_start_wiki(kwargs: dict):
await sendMessage(kwargs, MessageChain.create([Plain(result)]))
if Friend in kwargs:
check = await check_wiki_available(command)
if check:
if check[0]:
result = WikiDB.add_start_wiki('start_wiki_link_self', kwargs[Friend].id, check[0])
await sendMessage(kwargs, MessageChain.create([Plain(result + check[1])]))
else:
@ -205,18 +205,18 @@ async def interwiki(kwargs: dict):
except:
await sendMessage(kwargs, '错误:命令不合法:~wiki iw add <interwiki> <url>')
return
if check:
if check[0]:
result = WikiDB.config_custom_interwiki('add', table, target, iw[0],
check[0])
await sendMessage(kwargs, MessageChain.create([Plain(result + f'{iw[0]} > {check[1]}')]))
else:
result = '错误此Wiki不是一个有效的MediaWiki/尝试建立连接超时。'
if check[1] == 'Timeout':
result = '错误:尝试建立连接超时。'
else:
result = '错误此站点也许不是一个有效的Mediawiki。'
link = re.match(r'^(https?://).*', iw[1])
if not link:
result = '错误所给的链接没有指明协议头链接应以http://或https://开头)。'
article = re.match(r'.*/wiki/', iw[1])
if article:
result += '\n提示:所给的链接似乎是文章地址(/wiki/请将文章地址去掉或直接指定api地址后再试。'
await sendMessage(kwargs, MessageChain.create([Plain(result)]))
elif command[0] == 'del':
result = WikiDB.config_custom_interwiki('del', table, target, command[1])

View file

@ -168,5 +168,20 @@ class WD:
traceback.print_exc()
return '发生错误' + str(e)
def update_wikiinfo(self, apilink, siteinfo):
a = self.c.execute(f"SELECT * FROM wiki_info WHERE LINK='{apilink}'").fetchone()
if a:
self.c.execute(f"UPDATE wiki_info SET SITEINFO='{siteinfo}' WHERE LINK='{apilink}'")
else:
self.c.execute(f"INSERT INTO wiki_info (LINK, SITEINFO) VALUES (?, ?)", (apilink, siteinfo))
self.conn.commit()
def get_wikiinfo(self, apilink):
a = self.c.execute(f"SELECT * FROM wiki_info WHERE LINK='{apilink}'").fetchone()
if a:
return a[1:]
else:
return False
WikiDB = WD()

View file

@ -1,37 +1,56 @@
import datetime
import json
import re
import traceback
import aiohttp
from .database import WikiDB
async def get_url(url):
async def get_data(url: str, fmt: str, headers=None):
async with aiohttp.ClientSession() as session:
async with session.get(url, timeout=aiohttp.ClientTimeout(total=20)) as req:
return json.loads(await req.read())
try:
async with session.get(url, timeout=aiohttp.ClientTimeout(total=20)) as req:
if hasattr(req, fmt):
return await getattr(req, fmt)()
else:
raise ValueError(f"NoSuchMethod: {fmt}")
except Exception:
traceback.print_exc()
return False
async def check_wiki_available(link):
query = '?action=query&meta=siteinfo&siprop=general|extensions&format=json'
query = '?action=query&meta=siteinfo&siprop=general|namespaces|namespacealiases|interwikimap|extensions&format=json'
getcacheinfo = WikiDB.get_wikiinfo(link)
if getcacheinfo and ((datetime.datetime.strptime(getcacheinfo[1], "%Y-%m-%d %H:%M:%S") + datetime.timedelta(
hours=8)).timestamp() - datetime.datetime.now().timestamp()) > - 43200:
return link, json.loads(getcacheinfo[0])['query']['general']['sitename']
try:
api = re.match(r'(https?://.*?/api.php$)', link)
wlink = api.group(1)
json1 = await get_url(api.group(1) + query)
json1 = json.loads(await get_data(api.group(1) + query, 'json'))
except:
if link[-1] not in ['/', '\\']:
link = link + '/'
test1 = link + 'api.php' + query
try:
json1 = await get_url(test1)
wlink = link + 'api.php'
except:
try:
test2 = link + 'w/api.php' + query
json1 = await get_url(test2)
wlink = link + 'w/api.php'
except:
traceback.print_exc()
return False
getpage = await get_data(link, 'text')
m = re.findall(r'(?im)<\s*link\s*rel="EditURI"\s*type="application/rsd\+xml"\s*href="([^>]+?)\?action=rsd"\s*/\s*>', getpage)
if m:
api = m[0]
if api.startswith('//'):
api = link.split('//')[0] + api
getcacheinfo = WikiDB.get_wikiinfo(api)
if getcacheinfo and (
(datetime.datetime.strptime(getcacheinfo[1], "%Y-%m-%d %H:%M:%S") + datetime.timedelta(
hours=8)).timestamp() - datetime.datetime.now().timestamp()) > - 43200:
return api, json.loads(getcacheinfo[0])['query']['general']['sitename']
json1 = await get_data(api + query, 'json')
wlink = api
except aiohttp.ClientTimeout:
return False, 'Timeout'
except Exception as e:
return False, str(e)
WikiDB.update_wikiinfo(wlink, json.dumps(json1))
wikiname = json1['query']['general']['sitename']
extensions = json1['query']['extensions']
extlist = []
@ -41,4 +60,4 @@ async def check_wiki_available(link):
if 'TextExtracts' not in extlist:
wikiname = wikiname + '\n警告此wiki没有启用TextExtracts扩展返回的页面预览内容将为未处理的原始Wikitext文本。'
return wlink, wikiname
return wlink, wikiname

View file

@ -1,3 +1,5 @@
import datetime
import json
import re
import traceback
import urllib.parse
@ -6,6 +8,7 @@ import aiohttp
from core import dirty_check
from .helper import check_wiki_available
from .database import WikiDB
class wikilib:
@ -53,41 +56,59 @@ class wikilib:
randompage = json['query']['random'][0]['title']
return await self.main(url, randompage, interwiki=iw, headers=headers)
async def get_interwiki(self, url):
interwiki_list = url + '?action=query&meta=siteinfo&siprop=interwikimap&format=json'
json = await self.get_data(interwiki_list, 'json')
async def get_wiki_info(self, url=None):
url = url if url is not None else self.wikilink
getcacheinfo = WikiDB.get_wikiinfo(url)
if getcacheinfo and ((datetime.datetime.strptime(getcacheinfo[1], "%Y-%m-%d %H:%M:%S") + datetime.timedelta(
hours=8)).timestamp() - datetime.datetime.now().timestamp()) > - 43200:
return json.loads(getcacheinfo[0])
wiki_info_url = url + '?action=query&meta=siteinfo&siprop=general|namespaces|namespacealiases|interwikimap|extensions&format=json'
j = await self.get_data(wiki_info_url, 'json')
WikiDB.update_wikiinfo(url, json.dumps(j))
return j
async def get_interwiki(self, url=None):
if url is None:
json = self.wiki_info
else:
json = await self.get_wiki_info(url)
interwikimap = json['query']['interwikimap']
interwiki_dict = {}
for interwiki in interwikimap:
interwiki_dict[interwiki['prefix']] = re.sub(r'(?:wiki/|)\$1', '', interwiki['url'])
interwiki_dict[interwiki['prefix']] = interwiki['url']
return interwiki_dict
async def get_siteinfo(self, url=None):
url = url if url is not None else self.wikilink
siteinfo_url = url + '?action=query&meta=siteinfo&siprop=general&format=json'
j = await self.get_data(siteinfo_url, 'json')
return j
async def get_namespace(self, url=None):
url = url if url is not None else self.wikilink
namespace_url = url + '?action=query&meta=siteinfo&siprop=namespaces|namespacealiases&format=json'
j = await self.get_data(namespace_url, 'json')
if url is None:
j = self.wiki_info
else:
j = await self.get_wiki_info(url)
d = {}
for x in j['query']['namespaces']:
try:
d[j['query']['namespaces'][x]['*']] = j['query']['namespaces'][x]['canonical']
except KeyError:
pass
except:
traceback.print_exc()
for x in j['query']['namespacealiases']:
try:
d[x['*']] = 'aliases'
except KeyError:
pass
except:
traceback.print_exc()
return d
async def get_article_path(self, url):
siteinfo = await self.get_siteinfo(url)
article_path = siteinfo['query']['general']['articlepath']
async def get_article_path(self, url=None):
if url is None:
wiki_info = self.wiki_info
url = self.wikilink
else:
wiki_info = await self.get_wiki_info(url)
if not wiki_info:
return '发生错误此站点或许不是有效的Mediawiki网站。' + url
article_path = wiki_info['query']['general']['articlepath']
article_path = re.sub(r'\$1', '', article_path)
baseurl = re.match(r'(https?://.*?)/.*', url)
return baseurl.group(1) + article_path
@ -167,10 +188,11 @@ class wikilib:
async def getdesc(self):
try:
descurl = self.wikilink + '?action=query&prop=info|pageprops|extracts&ppprop=description|displaytitle|disambiguation|infoboxes&explaintext=true&exsectionformat=plain&exsentences=1&format=json&titles=' + self.querytextname
descurl = self.wikilink + '?action=query&prop=info|pageprops|extracts&ppprop=description|displaytitle|disambiguation|infoboxes&explaintext=true&exsectionformat=plain&exchars=200&format=json&titles=' + self.querytextname
loadtext = await self.get_data(descurl, "json", self.headers)
pageid = self.parsepageid(loadtext)
desc = loadtext['query']['pages'][pageid]['extract']
desc = re.findall(r'(.*?(?:\!|\?|\.|\;|||。|)).*', desc, re.S | re.M)[0]
except Exception:
traceback.print_exc()
desc = ''
@ -182,7 +204,7 @@ class wikilib:
loaddesc = await self.get_data(descurl, 'json', self.headers)
descraw = loaddesc['parse']['wikitext']['*']
try:
cutdesc = re.findall(r'(.*(?:!|\?|\.|;|||。|))', descraw, re.S | re.M)
cutdesc = re.findall(r'(.*?(?:!|\?|\.|;|||。|)).*', descraw, re.S | re.M)
desc = cutdesc[0]
except IndexError:
desc = descraw
@ -202,13 +224,6 @@ class wikilib:
return desc
async def step1(self):
if self.template:
self.pagename = 'Template:' + self.pagename
self.pageraw = await self.getpage()
if not self.pageraw:
return {'status': 'done', 'text': '发生错误:无法获取到页面。'}
if 'redirects' in self.pageraw['query']:
self.pagename = self.pageraw['query']['redirects'][0]['to']
try:
self.pageid = self.parsepageid(self.pageraw)
except:
@ -216,23 +231,19 @@ class wikilib:
self.psepgraw = self.pageraw['query']['pages'][self.pageid]
if self.pageid == '-1':
if self.igmessage == False:
if self.template == True:
self.pagename = self.orginpagename = re.sub(r'^Template:', '', self.pagename)
self.template = False
self.templateprompt = f'提示:[Template:{self.pagename}]不存在,已自动回滚搜索页面。\n'
return await self.step1()
return await self.nullpage()
if self.template == True:
self.pagename = self.orginpagename = re.sub(r'^Template:', '', self.pagename)
self.template = False
self.templateprompt = f'提示:[Template:{self.pagename}]不存在,已自动回滚搜索页面。\n'
return await self.step1()
return await self.nullpage()
else:
return await self.step2()
async def step2(self):
try:
fullurl = self.psepgraw['fullurl']
print(fullurl)
artpath = await self.get_article_path(self.wikilink)
artpath = re.sub(r'https?://', '', artpath)
geturlpagename = re.sub(r'.*' + artpath, '', fullurl)
geturlpagename = fullurl.split(self.wiki_articlepath)[1]
self.querytextname = urllib.parse.unquote(geturlpagename)
querytextnamesplit = self.querytextname.split(':')
if len(querytextnamesplit) > 1:
@ -248,7 +259,7 @@ class wikilib:
getdocraw = await self.getpage(getdoc)
getdocid = self.parsepageid(getdocraw)
getdoclink = getdocraw['query']['pages'][getdocid]['fullurl']
getdocpagename = re.sub(r'.*' + artpath, '', getdoclink)
getdocpagename = getdoclink.split(self.wiki_articlepath)[1]
self.querytextname = getdocpagename
else:
self.querytextname = geturlpagename + '/doc'
@ -287,12 +298,11 @@ class wikilib:
f'(重定向[{target}{self.orginpagename}] -> [{target}{finpgname}]' + (
'\n' if desc != '' else '') + f'{desc}')
rmlstlb = re.sub('\n\n', '\n', rmlstlb)
rmlstlb = re.sub('\n\n', '\n', rmlstlb)
if len(rmlstlb) > 250:
rmlstlb = rmlstlb[0:250] + '\n...字数过多已截断。'
rmlstlb = rmlstlb[0:250] + '...'
try:
rm5lline = re.findall(r'.*\n.*\n.*\n.*\n.*\n', rmlstlb)
result = rm5lline[0] + '...行数过多已截断。'
result = rm5lline[0] + '...'
except Exception:
result = rmlstlb
msgs = {'status': 'done', 'url': fullurl, 'text': result, 'apilink': self.wikilink}
@ -314,7 +324,7 @@ class wikilib:
traceback.print_exc()
return {'status': 'done', 'text': '发生错误:' + str(e)}
async def main(self, wikilink, pagename, interwiki=None, igmessage=False, template=False, headers=None, tryiw=0):
async def main(self, wikilink, pagename, interwiki=None, template=False, headers=None, tryiw=0):
print(wikilink)
print(pagename)
print(interwiki)
@ -322,8 +332,7 @@ class wikilib:
return {'status': 'done', 'text': await self.get_article_path(wikilink)}
pagename = re.sub('_', ' ', pagename)
pagename = pagename.split('|')[0]
self.orginwikilink = wikilink
self.wikilink = re.sub('index.php/', '', self.orginwikilink) # fxxk
self.wikilink = wikilink
danger_check = self.danger_wiki_check()
if danger_check:
if await self.danger_text_check(pagename):
@ -334,33 +343,36 @@ class wikilib:
self.interwiki = ''
else:
self.interwiki = interwiki
self.igmessage = igmessage
self.wiki_info = await self.get_wiki_info()
self.wiki_namespace = await self.get_namespace()
self.wiki_articlepath = await self.get_article_path()
self.template = template
self.templateprompt = None
self.headers = headers
try:
matchinterwiki = re.match(r'(.*?):(.*)', self.pagename)
if matchinterwiki:
if self.template:
self.pagename = 'Template:' + self.pagename
self.pageraw = await self.getpage()
if not self.pageraw:
return {'status': 'done', 'text': '发生错误:无法获取到页面。'}
if 'interwiki' in self.pageraw['query']:
iwp = self.pageraw['query']['interwiki'][0]
matchinterwiki = re.match(iwp['iw'] + r':(.*)', iwp['title'])
if tryiw <= 5:
iwlist = await self.get_interwiki(self.wikilink)
if matchinterwiki.group(1) in iwlist:
if tryiw <= 5:
interwiki_link = iwlist[matchinterwiki.group(1)]
check = await check_wiki_available(interwiki_link)
print(check)
if check:
return await self.main(check[0], matchinterwiki.group(2),
((
interwiki + ':') if interwiki is not None else '') + matchinterwiki.group(
1),
self.igmessage, self.template, headers, tryiw + 1)
else:
return {'status': 'done',
'text': f'发生错误指向的interwiki不是一个有效的MediaWiki。{interwiki_link}{matchinterwiki.group(2)}'}
else:
return {'status': 'warn', 'text': '警告尝试重定向已超过5次继续尝试将有可能导致你被机器人加入黑名单。'}
interwiki_link = iwlist[iwp['iw']]
check = await check_wiki_available(interwiki_link)
if check:
return await self.main(check[0], matchinterwiki.group(1),
((interwiki + ':') if interwiki is not None else '') + iwp['iw'], self.template, headers, tryiw + 1)
else:
return {'status': 'done',
'text': f'发生错误指向的interwiki不是一个有效的MediaWiki。{interwiki_link}{matchinterwiki.group(1)}'}
else:
return {'status': 'warn', 'text': '警告尝试重定向已超过5次继续尝试将有可能导致你被机器人加入黑名单。'}
if 'redirects' in self.pageraw['query']:
self.pagename = self.pageraw['query']['redirects'][0]['to']
try:
return await self.step1()
except Exception as e:
traceback.print_exc()
if igmessage == False:
return f'发生错误:{str(e)}' + '\n'
return f'发生错误:{str(e)}' + '\n'