Archived
1
0
Fork 0
This repository has been archived on 2024-04-26. You can view files and clone it, but cannot push or open issues or pull requests.
akari-bot/modules/wiki/wikilib.py

480 lines
22 KiB
Python
Raw Normal View History

import datetime
import json
2020-06-13 12:43:43 +00:00
import re
2020-08-01 03:25:34 +00:00
import traceback
import urllib.parse
2020-08-12 16:01:34 +00:00
2020-09-05 09:51:43 +00:00
import aiohttp
2021-02-19 11:26:19 +00:00
from core import dirty_check
from .database import WikiDB
2020-08-12 16:01:34 +00:00
2021-02-01 15:13:11 +00:00
class wikilib:
2021-03-21 05:00:17 +00:00
async def get_data(self, url: str, fmt: str, headers=None):
async with aiohttp.ClientSession(headers=headers) as session:
2020-10-27 15:48:41 +00:00
try:
async with session.get(url, timeout=aiohttp.ClientTimeout(total=20)) as req:
if hasattr(req, fmt):
return await getattr(req, fmt)()
else:
raise ValueError(f"NoSuchMethod: {fmt}")
except Exception:
traceback.print_exc()
2021-02-01 15:13:11 +00:00
return False
2021-04-20 16:32:53 +00:00
def encode_query_string(self, kwargs: dict):
return '?' + urllib.parse.urlencode(kwargs)
async def check_wiki_available(self, link):
query_string = {'action': 'query', 'meta': 'siteinfo',
'siprop': 'general|namespaces|namespacealiases|interwikimap|extensions', 'format': 'json'}
query = self.encode_query_string(query_string)
getcacheinfo = WikiDB.get_wikiinfo(link)
if getcacheinfo and ((datetime.datetime.strptime(getcacheinfo[1], "%Y-%m-%d %H:%M:%S") + datetime.timedelta(
hours=8)).timestamp() - datetime.datetime.now().timestamp()) > - 43200:
return link, json.loads(getcacheinfo[0])['query']['general']['sitename']
try:
api = re.match(r'(https?://.*?/api.php$)', link)
wlink = api.group(1)
json1 = json.loads(await self.get_data(api.group(1) + query, 'json'))
except:
try:
getpage = await self.get_data(link, 'text')
m = re.findall(
r'(?im)<\s*link\s*rel="EditURI"\s*type="application/rsd\+xml"\s*href="([^>]+?)\?action=rsd"\s*/\s*>',
getpage)
api = m[0]
if api.startswith('//'):
api = link.split('//')[0] + api
getcacheinfo = WikiDB.get_wikiinfo(api)
if getcacheinfo and (
(datetime.datetime.strptime(getcacheinfo[1], "%Y-%m-%d %H:%M:%S") + datetime.timedelta(
hours=8)).timestamp() - datetime.datetime.now().timestamp()) > - 43200:
return api, json.loads(getcacheinfo[0])['query']['general']['sitename']
json1 = await self.get_data(api + query, 'json')
wlink = api
except aiohttp.ClientTimeout:
return False, 'Timeout'
except Exception as e:
return False, str(e)
WikiDB.update_wikiinfo(wlink, json.dumps(json1))
wikiname = json1['query']['general']['sitename']
extensions = json1['query']['extensions']
extlist = []
for ext in extensions:
extlist.append(ext['name'])
if 'TextExtracts' not in extlist:
wikiname = wikiname + '\n警告此wiki没有启用TextExtracts扩展返回的页面预览内容将为未处理的原始Wikitext文本。'
return wlink, wikiname
2021-02-01 15:13:11 +00:00
def danger_wiki_check(self):
2021-04-20 16:32:53 +00:00
if self.wiki_api_endpoint.upper().find('WIKIPEDIA') != -1:
2021-02-01 15:13:11 +00:00
return True
2021-04-20 16:32:53 +00:00
if self.wiki_api_endpoint.upper().find('UNCYCLOPEDIA') != -1:
2021-02-02 11:40:13 +00:00
return True
2021-04-20 16:32:53 +00:00
if self.wiki_api_endpoint.upper().find('HMOEGIRL') != -1:
2021-02-02 11:40:13 +00:00
return True
2021-04-20 16:32:53 +00:00
if self.wiki_api_endpoint.upper().find('EVCHK') != -1:
2021-02-02 11:40:13 +00:00
return True
2021-04-20 16:32:53 +00:00
if self.wiki_api_endpoint.upper().find('HONGKONG.FANDOM') != -1:
2021-02-02 11:40:13 +00:00
return True
2021-04-20 16:32:53 +00:00
if self.wiki_api_endpoint.upper().find('WIKILEAKS') != -1:
2021-02-02 11:40:13 +00:00
return True
2021-04-20 16:32:53 +00:00
if self.wiki_api_endpoint.upper().find('NANFANGGONGYUAN') != -1:
2021-02-04 08:25:13 +00:00
return True
2021-02-01 15:13:11 +00:00
return False
async def danger_text_check(self, text):
if not self.danger_wiki_check():
return False
2021-02-19 11:26:19 +00:00
check = await dirty_check.check(text)
2021-02-01 15:13:11 +00:00
print(check)
if check.find('<吃掉了>') != -1 or check.find('<全部吃掉了>') != -1:
return True
return False
2021-03-21 12:33:05 +00:00
async def random_page(self, url, iw=None, headers=None):
2021-04-20 16:32:53 +00:00
query_string = {'action': 'query',
'list': 'random',
'format': 'json'}
random_url = url + self.encode_query_string(query_string)
2021-03-21 12:33:05 +00:00
json = await self.get_data(random_url, 'json')
randompage = json['query']['random'][0]['title']
2021-03-21 12:41:07 +00:00
return await self.main(url, randompage, interwiki=iw, headers=headers)
2021-03-21 12:33:05 +00:00
async def get_wiki_info(self, url=None):
2021-04-20 16:32:53 +00:00
url = url if url is not None else self.wiki_api_endpoint
getcacheinfo = WikiDB.get_wikiinfo(url)
if getcacheinfo and ((datetime.datetime.strptime(getcacheinfo[1], "%Y-%m-%d %H:%M:%S") + datetime.timedelta(
hours=8)).timestamp() - datetime.datetime.now().timestamp()) > - 43200:
return json.loads(getcacheinfo[0])
2021-04-20 16:32:53 +00:00
query_string = {'action': 'query', 'meta': 'siteinfo',
'siprop': 'general|namespaces|namespacealiases|interwikimap|extensions', 'format': 'json'}
wiki_info_url = url + self.encode_query_string(query_string)
j = await self.get_data(wiki_info_url, 'json')
WikiDB.update_wikiinfo(url, json.dumps(j))
return j
async def get_interwiki(self, url=None):
if url is None:
json = self.wiki_info
else:
json = await self.get_wiki_info(url)
2021-02-01 15:13:11 +00:00
interwikimap = json['query']['interwikimap']
interwiki_dict = {}
for interwiki in interwikimap:
interwiki_dict[interwiki['prefix']] = interwiki['url']
2021-02-01 15:13:11 +00:00
return interwiki_dict
2021-02-14 16:34:04 +00:00
async def get_namespace(self, url=None):
if url is None:
j = self.wiki_info
else:
j = await self.get_wiki_info(url)
2021-02-14 16:34:04 +00:00
d = {}
for x in j['query']['namespaces']:
try:
d[j['query']['namespaces'][x]['*']] = j['query']['namespaces'][x]['canonical']
except KeyError:
pass
2021-02-14 16:34:04 +00:00
except:
traceback.print_exc()
2021-04-02 16:35:26 +00:00
for x in j['query']['namespacealiases']:
try:
2021-04-02 16:46:37 +00:00
d[x['*']] = 'aliases'
except KeyError:
pass
2021-04-02 16:35:26 +00:00
except:
traceback.print_exc()
2021-02-14 16:34:04 +00:00
return d
async def get_article_path(self, url=None):
if url is None:
wiki_info = self.wiki_info
2021-04-20 16:32:53 +00:00
url = self.wiki_api_endpoint
else:
wiki_info = await self.get_wiki_info(url)
if not wiki_info:
return False
article_path = wiki_info['query']['general']['articlepath']
2021-02-09 15:10:51 +00:00
article_path = re.sub(r'\$1', '', article_path)
2021-04-20 16:32:53 +00:00
base_url = re.match(r'(https?://.*?)/.*', url)
return base_url.group(1) + article_path
2021-02-09 15:10:51 +00:00
async def get_enabled_extensions(self, url=None):
if url is None:
wiki_info = self.wiki_info
else:
wiki_info = await self.get_wiki_info(url)
extensions = wiki_info['query']['extensions']
2021-04-20 16:32:53 +00:00
ext_list = []
for ext in extensions:
2021-04-20 16:32:53 +00:00
ext_list.append(ext['name'])
return ext_list
2021-04-19 17:00:54 +00:00
async def get_real_address(self, url=None):
if url is None:
wiki_info = self.wiki_info
else:
wiki_info = await self.get_wiki_info(url)
2021-04-20 16:32:53 +00:00
real_url = wiki_info['query']['general']['server']
return real_url
2021-04-20 16:32:53 +00:00
async def get_image(self, page_name, wiki_api_endpoint=None):
2021-02-01 15:13:11 +00:00
try:
2021-04-20 16:32:53 +00:00
query_string = {'action': 'query', 'titles': page_name, 'prop': 'imageinfo', 'iiprop': 'url',
'format': 'json'}
url = (
wiki_api_endpoint if wiki_api_endpoint is not None else self.wiki_api_endpoint) + self.encode_query_string(
query_string)
json_ = await self.get_data(url, 'json')
parse_page_id = self.parse_page_id(json_)
image_link = json_['query']['pages'][parse_page_id]['imageinfo'][0]['url']
return image_link
2021-02-01 15:13:11 +00:00
except:
traceback.print_exc()
return False
2020-10-27 15:48:41 +00:00
2021-04-20 16:32:53 +00:00
async def get_page_link(self, page_name=None):
page_name = page_name if page_name is not None else self.page_name
page_name = re.sub('(.*)\?.*$', '\\1', page_name)
query_string = {'action': 'query', 'format': 'json', 'prop': 'info', 'inprop': 'url', 'redirects': 'True',
'titles': page_name}
get_link_url = self.wiki_api_endpoint + self.encode_query_string(query_string)
get_page = await self.get_data(get_link_url, "json")
return get_page
2020-10-27 15:48:41 +00:00
2021-04-20 16:32:53 +00:00
def parse_page_id(self, page_raw):
page_raw = page_raw['query']['pages']
page_list = iter(page_raw)
page_id = page_list.__next__()
return page_id
2020-10-27 15:48:41 +00:00
2021-04-20 16:32:53 +00:00
async def research_page(self):
2020-09-09 12:16:01 +00:00
try:
2021-04-20 16:32:53 +00:00
query_string = {'action': 'query', 'list': 'search', 'srsearch': self.page_name, 'srwhat': 'text',
'srlimit': '1', 'srenablerewrites': '', 'format': 'json'}
search_url = self.wiki_api_endpoint + self.encode_query_string(query_string)
get_sec_json = await self.get_data(search_url, "json", self.headers)
sec_title = get_sec_json['query']['search'][0]['title']
2020-10-27 15:48:41 +00:00
if self.interwiki == '':
target = ''
else:
target = f'{self.interwiki}:'
2021-04-20 16:32:53 +00:00
prompt = f'找不到{target}{self.page_name},您是否要找的是:[[{target}{sec_title}]]'
title_split = self.page_name.split(':')
if len(title_split) > 1:
2021-03-20 12:33:55 +00:00
try:
get_namespace = await self.get_namespace()
2021-04-20 16:32:53 +00:00
if title_split[0] not in get_namespace:
prompt += f'\n提示此Wiki上找不到“{title_split[0]}”名字空间请检查是否设置了对应的Interwiki使用~wiki iw list命令可以查询当前已设置的Interwiki'
2021-03-20 12:33:55 +00:00
except:
traceback.print_exc()
2021-04-20 16:32:53 +00:00
if self.template_prompt:
prompt = self.template_prompt + prompt
2021-02-01 15:13:11 +00:00
if await self.danger_text_check(prompt):
2021-02-01 17:20:45 +00:00
return {'status': 'done', 'text': 'https://wdf.ink/6OUp'}
2021-04-20 16:32:53 +00:00
return {'status': 'wait', 'title': f'{target}{sec_title}', 'text': prompt}
2020-10-27 15:48:41 +00:00
except Exception:
2021-02-14 16:34:04 +00:00
traceback.print_exc()
return {'status': 'done', 'text': '找不到条目。'}
2020-10-27 15:48:41 +00:00
2021-04-20 16:32:53 +00:00
async def page_not_found(self):
2020-10-27 15:48:41 +00:00
if 'invalid' in self.psepgraw:
rs1 = re.sub('The requested page title contains invalid characters:', '请求的页面标题包含非法字符:',
self.psepgraw['invalidreason'])
rs = '发生错误:“' + rs1 + '”。'
rs = re.sub('".”', '"', rs)
2021-02-01 15:13:11 +00:00
return {'status': 'done', 'text': rs}
2020-10-27 15:48:41 +00:00
if 'missing' in self.psepgraw:
2021-04-20 16:32:53 +00:00
self.rspt = await self.research_page()
2020-10-27 15:48:41 +00:00
return self.rspt
2021-04-20 16:32:53 +00:00
msg = await self.get_article_path(self.wiki_api_endpoint) + urllib.parse.quote(self.page_name.encode('UTF-8'))
2021-02-01 15:13:11 +00:00
return {'status': 'done', 'text': msg}
2020-10-27 15:48:41 +00:00
2021-04-20 16:32:53 +00:00
async def get_desc(self):
2020-10-27 15:48:41 +00:00
try:
2021-04-20 16:32:53 +00:00
query_string = {'action': 'query', 'prop': 'info|pageprops|extracts',
'ppprop': 'description|displaytitle|disambiguation|infoboxes', 'explaintext': 'true',
'exsectionformat': 'plain', 'exchars': '200', 'format': 'json',
'titles': self.query_text_name}
desc_url = self.wiki_api_endpoint + self.encode_query_string(query_string)
load_text = await self.get_data(desc_url, "json", self.headers)
page_id = self.parse_page_id(load_text)
desc = load_text['query']['pages'][page_id]['extract'].split('\n')
desc_list = []
2021-04-19 05:08:46 +00:00
for x in desc:
if x != '':
2021-04-20 16:32:53 +00:00
desc_list.append(x)
desc = '\n'.join(desc_list)
desc_end = re.findall(r'(.*?(?:\!|\?|\.|\;|||。|)).*', desc, re.S | re.M)
if desc_end:
desc = desc_end[0]
2020-09-09 12:16:01 +00:00
except Exception:
2020-10-28 15:27:36 +00:00
traceback.print_exc()
2020-10-27 15:48:41 +00:00
desc = ''
if desc == '...':
desc = ''
2020-10-27 15:48:41 +00:00
return desc
2020-09-05 09:51:43 +00:00
2021-04-20 16:32:53 +00:00
async def get_first_line(self):
2020-10-27 15:48:41 +00:00
try:
2021-04-20 16:32:53 +00:00
query_string = {'action': 'parse', 'page': self.query_text_name, 'prop': 'wikitext', 'section': '0',
'format': 'json'}
desc_url = self.wiki_api_endpoint + self.encode_query_string(query_string)
load_desc = await self.get_data(desc_url, 'json', self.headers)
desc_raw = load_desc['parse']['wikitext']['*'].split('\n')
desc_list = []
for x in desc_raw:
2021-04-19 05:08:46 +00:00
if x != '':
2021-04-20 16:32:53 +00:00
desc_list.append(x)
desc_raw = '\n'.join(desc_list)
cut_desc = re.findall(r'(.*?(?:!|\?|\.|;|||。|)).*', desc_raw, re.S | re.M)
if cut_desc:
desc = cut_desc[0]
2021-04-19 14:03:29 +00:00
else:
2021-04-20 16:32:53 +00:00
desc = desc_raw
2021-02-14 15:25:39 +00:00
except Exception:
traceback.print_exc()
desc = ''
return desc
2021-04-20 16:32:53 +00:00
async def get_all_wikitext(self):
2021-02-14 15:25:39 +00:00
try:
2021-04-20 16:32:53 +00:00
query_string = {'action': 'parse', 'page': self.query_text_name, 'prop': 'wikitext', 'format': 'json'}
desc_url = self.wiki_api_endpoint + self.encode_query_string(query_string)
load_desc = await self.get_data(desc_url, 'json', self.headers)
desc = load_desc['parse']['wikitext']['*']
2020-10-27 15:48:41 +00:00
except Exception:
2021-02-01 15:13:11 +00:00
traceback.print_exc()
2020-10-27 15:48:41 +00:00
desc = ''
return desc
async def step1(self):
2021-02-02 10:01:46 +00:00
try:
2021-04-20 16:32:53 +00:00
self.page_id = self.parse_page_id(self.page_raw)
2021-02-02 10:01:46 +00:00
except:
return {'status': 'done', 'text': '发生错误无法获取到页面请检查是否设置了对应Interwiki。'}
2021-04-20 16:32:53 +00:00
self.psepgraw = self.page_raw['query']['pages'][self.page_id]
2020-10-27 15:48:41 +00:00
2021-04-20 16:32:53 +00:00
if self.page_id == '-1':
if self.template == True:
2021-04-20 16:32:53 +00:00
self.page_name = self.orginpagename = re.sub(r'^Template:', '', self.page_name)
self.template = False
2021-04-20 16:32:53 +00:00
self.template_prompt = f'提示:[Template:{self.page_name}]不存在,已自动回滚搜索页面。\n'
return await self.step1()
2021-04-20 16:32:53 +00:00
return await self.page_not_found()
2020-06-13 12:43:43 +00:00
else:
2020-10-27 15:48:41 +00:00
return await self.step2()
async def step2(self):
try:
2021-04-20 16:32:53 +00:00
full_url = self.psepgraw['fullurl']
try:
geturl_pagename = full_url.split(self.wiki_articlepath)
geturl_pagename = geturl_pagename[1]
except:
geturl_pagename = full_url
self.query_text_name = urllib.parse.unquote(geturl_pagename)
query_text_name_split = self.query_text_name.split(':')
if len(query_text_name_split) > 1:
2021-02-14 16:34:04 +00:00
namespaces = await self.get_namespace()
2021-04-20 16:32:53 +00:00
if query_text_name_split[0] in namespaces:
if namespaces[query_text_name_split[0]] == 'Template':
get_all_text = await self.get_all_wikitext()
2021-02-14 16:34:04 +00:00
try:
2021-04-20 16:32:53 +00:00
match_doc = re.match(r'.*{{documentation\|?(.*?)}}.*', get_all_text, re.I | re.S)
match_link = re.match(r'link=(.*)', match_doc.group(1), re.I | re.S)
if match_link:
get_doc = match_link.group(1)
get_doc_raw = await self.get_page_link(get_doc)
get_doc_id = self.parse_page_id(get_doc_raw)
get_doc_link = get_doc_raw['query']['pages'][get_doc_id]['fullurl']
get_doc_pagename = get_doc_link.split(self.wiki_articlepath)[1]
self.query_text_name = get_doc_pagename
2021-02-14 16:34:04 +00:00
else:
2021-04-20 16:32:53 +00:00
self.query_text_name = geturl_pagename + '/doc'
2021-02-14 16:34:04 +00:00
except AttributeError:
2021-04-20 16:32:53 +00:00
self.query_text_name = geturl_pagename + '/doc'
if 'TextExtracts' in await self.get_enabled_extensions():
2021-04-20 16:32:53 +00:00
desc = await self.get_desc()
else:
desc = ''
2021-02-14 15:25:39 +00:00
if desc == '':
2021-04-20 16:32:53 +00:00
desc = await self.get_first_line()
2021-02-14 15:25:39 +00:00
print(desc)
2021-04-20 16:32:53 +00:00
fin_page_name = geturl_pagename
2021-02-14 15:25:39 +00:00
try:
2021-04-20 16:32:53 +00:00
section = re.match(r'.*(\#.*)', self.page_name)
2021-02-18 11:44:00 +00:00
if section:
2021-04-20 16:32:53 +00:00
fin_page_name = geturl_pagename + urllib.parse.quote(section.group(1).encode('UTF-8'))
full_url = self.psepgraw['fullurl'] + urllib.parse.quote(section.group(1).encode('UTF-8'))
2021-02-14 15:25:39 +00:00
except Exception:
2021-03-21 05:00:17 +00:00
traceback.print_exc()
2021-02-18 11:44:00 +00:00
try:
2021-04-20 16:32:53 +00:00
pgstr = re.match(r'.*(\?.*)', self.page_name)
if pgstr:
fin_page_name = geturl_pagename + pgstr.group(1)
full_url = full_url + pgstr.group(1)
2021-02-18 11:44:00 +00:00
except Exception:
2021-03-21 05:00:17 +00:00
traceback.print_exc()
2021-04-20 16:32:53 +00:00
fin_page_name = urllib.parse.unquote(fin_page_name)
fin_page_name = re.sub('_', ' ', fin_page_name)
if fin_page_name == self.orginpagename:
2021-02-14 15:25:39 +00:00
rmlstlb = re.sub('\n$', '', desc)
else:
if self.interwiki == '':
target = ''
else:
target = f'{self.interwiki}:'
rmlstlb = re.sub('\n$', '',
2021-04-20 16:32:53 +00:00
f'(重定向[{target}{self.orginpagename}] -> [{target}{fin_page_name}]' + (
2021-02-18 11:44:00 +00:00
'\n' if desc != '' else '') + f'{desc}')
2021-02-14 15:25:39 +00:00
rmlstlb = re.sub('\n\n', '\n', rmlstlb)
if len(rmlstlb) > 250:
rmlstlb = rmlstlb[0:250] + '...'
2021-02-14 15:25:39 +00:00
try:
rm5lline = re.findall(r'.*\n.*\n.*\n.*\n.*\n', rmlstlb)
result = rm5lline[0] + '...'
2021-02-14 15:25:39 +00:00
except Exception:
result = rmlstlb
2021-04-20 16:32:53 +00:00
msgs = {'status': 'done', 'url': full_url, 'text': result, 'apilink': self.wiki_api_endpoint}
match_img = re.match(r'File:.*?\.(?:png|gif|jpg|jpeg|webp|bmp|ico)', self.page_name, re.I)
if match_img:
getimg = await self.get_image(self.page_name)
2021-02-14 15:25:39 +00:00
if getimg:
msgs['net_image'] = getimg
2021-04-20 16:32:53 +00:00
match_aud = re.match(r'File:.*?\.(?:oga|ogg|flac|mp3|wav)', self.page_name, re.I)
if match_aud:
getaud = await self.get_image(self.page_name)
if getaud:
msgs['net_audio'] = getaud
2021-03-21 05:00:17 +00:00
if result != '' and await self.danger_text_check(result):
2021-02-14 15:25:39 +00:00
return {'status': 'done', 'text': 'https://wdf.ink/6OUp'}
return msgs
except Exception as e:
traceback.print_exc()
return {'status': 'done', 'text': '发生错误:' + str(e)}
2021-02-01 15:13:11 +00:00
2021-04-20 16:32:53 +00:00
async def main(self, api_endpoint_link, page_name, interwiki=None, template=False, headers=None, tryiw=0):
2021-04-19 17:00:54 +00:00
print(api_endpoint_link)
2021-04-20 16:32:53 +00:00
print(page_name)
2020-10-27 15:48:41 +00:00
print(interwiki)
2021-04-20 16:32:53 +00:00
if page_name == '':
2021-04-19 17:00:54 +00:00
article_path = await self.get_article_path(api_endpoint_link)
if not article_path:
2021-04-19 17:00:54 +00:00
article_path = '发生错误此站点或许不是有效的Mediawiki网站。' + api_endpoint_link
return {'status': 'done', 'text': article_path}
2021-04-20 16:32:53 +00:00
page_name = re.sub('_', ' ', page_name)
page_name = page_name.split('|')[0]
self.wiki_api_endpoint = api_endpoint_link
2021-02-01 15:13:11 +00:00
danger_check = self.danger_wiki_check()
if danger_check:
2021-04-20 16:32:53 +00:00
if await self.danger_text_check(page_name):
2021-02-01 17:20:45 +00:00
return {'status': 'done', 'text': 'https://wdf.ink/6OUp'}
2021-04-20 16:32:53 +00:00
self.orginpagename = page_name
self.page_name = page_name
2021-02-01 15:13:11 +00:00
if interwiki == None:
self.interwiki = ''
else:
self.interwiki = interwiki
self.wiki_info = await self.get_wiki_info()
self.wiki_namespace = await self.get_namespace()
2021-04-19 17:00:54 +00:00
real_wiki_url = await self.get_real_address()
api_endpoint = re.match(r'^https?://.*?/(.*)', api_endpoint_link)
2021-04-20 16:32:53 +00:00
self.wiki_api_endpoint = real_wiki_url + '/' + api_endpoint.group(1)
self.wiki_articlepath = await self.get_article_path()
2020-10-27 15:48:41 +00:00
self.template = template
2021-04-20 16:32:53 +00:00
self.template_prompt = None
2021-03-21 05:00:17 +00:00
self.headers = headers
if self.template:
2021-04-20 16:32:53 +00:00
if not re.match('^Template:', self.page_name, re.I):
self.page_name = 'Template:' + self.page_name
self.page_raw = await self.get_page_link()
if not self.page_raw:
return {'status': 'done', 'text': '发生错误:无法获取到页面。'}
2021-04-20 16:32:53 +00:00
if 'interwiki' in self.page_raw['query']:
iwp = self.page_raw['query']['interwiki'][0]
match_interwiki = re.match(r'^' + iwp['iw'] + r':(.*)', iwp['title'])
if tryiw <= 5:
2021-04-20 16:32:53 +00:00
iw_list = await self.get_interwiki(self.wiki_api_endpoint)
interwiki_link = iw_list[iwp['iw']]
check = await self.check_wiki_available(interwiki_link)
if check[0]:
2021-04-20 16:32:53 +00:00
return await self.main(check[0], match_interwiki.group(1),
((interwiki + ':') if interwiki is not None else '') + iwp['iw'],
self.template, headers, tryiw + 1)
else:
return {'status': 'done',
2021-04-20 16:32:53 +00:00
'text': f'发生错误指向的interwiki或许不是一个有效的MediaWiki。{interwiki_link}{match_interwiki.group(1)}'}
else:
return {'status': 'warn', 'text': '警告尝试重定向已超过5次继续尝试将有可能导致你被机器人加入黑名单。'}
2021-04-20 16:32:53 +00:00
if 'redirects' in self.page_raw['query']:
self.page_name = self.page_raw['query']['redirects'][0]['to']
try:
2020-10-27 15:48:41 +00:00
return await self.step1()
except Exception as e:
traceback.print_exc()
return f'发生错误:{str(e)}' + '\n'