Archived
1
0
Fork 0
This repository has been archived on 2024-04-26. You can view files and clone it, but cannot push or open issues or pull requests.
akari-bot/wiki/wikilib.py

89 lines
4 KiB
Python
Raw Normal View History

2020-06-13 12:43:43 +00:00
import json
import re
import requests
import urllib
2020-08-01 03:25:34 +00:00
import traceback
2020-08-04 09:23:32 +00:00
from interwikilist import iwlist,iwlink
2020-08-07 06:19:34 +00:00
async def wiki1(path1,pagename):
2020-08-04 09:23:32 +00:00
print(pagename)
metaurl = path1 +'api.php?action=query&format=json&prop=info&inprop=url&redirects&titles=' + pagename
2020-08-01 03:27:03 +00:00
print(metaurl)
2020-06-13 12:43:43 +00:00
metatext = requests.get(metaurl, timeout=10)
file = json.loads(metatext.text)
try:
2020-08-04 07:52:16 +00:00
x = file['query']['pages']
y = sorted(x.keys())[0]
2020-06-13 12:43:43 +00:00
if int(y) == -1:
2020-07-23 10:39:23 +00:00
if 'invalid' in x['-1']:
rs = re.sub('The requested page title contains invalid characters:','请求的页面标题包含非法字符:',x['-1']['invalidreason'])
return('发生错误:“'+rs+'”。')
2020-06-15 13:18:07 +00:00
else:
2020-07-23 10:39:23 +00:00
if 'missing' in x['-1']:
2020-08-01 03:54:29 +00:00
try:
try:
2020-08-04 09:23:32 +00:00
searchurl = path1+'api.php?action=query&generator=search&gsrsearch=' + pagename + '&gsrsort=just_match&gsrenablerewrites&prop=info&gsrlimit=1&format=json'
2020-08-01 03:54:29 +00:00
f = requests.get(searchurl)
g = json.loads(f.text)
j = g['query']['pages']
b = sorted(j.keys())[0]
m = j[b]['title']
return ('找不到条目,您是否要找的是:' + m +'')
except Exception:
2020-08-04 09:23:32 +00:00
searchurl = path1+'api.php?action=query&list=search&srsearch='+pagename+'&srwhat=text&srlimit=1&srenablerewrites=&format=json'
2020-08-01 03:54:29 +00:00
f = requests.get(searchurl)
g = json.loads(f.text)
2020-08-01 04:05:17 +00:00
m = g['query']['search'][0]['title']
2020-08-01 03:54:29 +00:00
return ('找不到条目,您是否要找的是:' + m +'')
2020-07-23 10:39:23 +00:00
except Exception:
return ('找不到条目。')
else:
2020-08-04 09:23:32 +00:00
return ('您要的'+ pagename +''+path1 + urllib.parse.quote(pagename.encode('UTF-8')))
2020-06-13 12:43:43 +00:00
else:
2020-08-01 03:09:47 +00:00
z = x[y]['fullurl']
if z.find('index.php') != -1 or z.find('Index.php') !=-1:
h = re.match(r'https?://.*/.*/(.*)', z, re.M | re.I)
else:
h = re.match(r'https?://.*/(.*)', z, re.M | re.I)
2020-06-13 12:43:43 +00:00
try:
texturl = metaurl + '/api.php?action=query&prop=extracts&exsentences=1&&explaintext&exsectionformat=wiki&format=json&titles=' + h.group(1)
gettext = requests.get(texturl, timeout=10)
loadtext = json.loads(gettext.text)
v = loadtext['query']['pages'][y]['extract']
except Exception:
2020-08-01 03:09:47 +00:00
v = ''
try:
s = re.match(r'.*(\#.*)',pagename)
z = x[y]['fullurl'] + urllib.parse.quote(s.group(1).encode('UTF-8'))
except Exception:
z = x[y]['fullurl']
if z.find('index.php') != -1 or z.find('Index.php') !=-1:
n = re.match(r'https?://.*?/.*/(.*)',z)
else:
n = re.match(r'https?://.*?/(.*)',z)
k = urllib.parse.unquote(n.group(1),encoding='UTF-8')
k = re.sub('_',' ',k)
if k == pagename:
xx = re.sub('\n$', '', z + '\n' + v)
else:
2020-08-03 15:58:14 +00:00
xx = re.sub('\n$', '', '\n(重定向['+pagename +']至['+k+']\n'+z + '\n' + v)
2020-08-01 03:09:47 +00:00
return('您要的'+pagename+""+xx)
2020-06-13 12:43:43 +00:00
except Exception:
try:
2020-08-04 07:58:21 +00:00
w = re.match(r'(.*?):(.*)',pagename)
2020-08-03 15:19:27 +00:00
i = w.group(1)
2020-08-04 09:23:32 +00:00
if i in iwlist():
2020-08-04 08:01:42 +00:00
return(await wiki2(i,w.group(2)))
2020-06-13 12:43:43 +00:00
else:
return('发生错误:内容非法。')
except Exception as e:
2020-08-01 03:25:34 +00:00
traceback.print_exc()
2020-08-03 15:19:27 +00:00
return('发生错误:'+str(e))
2020-08-04 08:01:42 +00:00
async def wiki2(lang,str1):
2020-08-03 15:19:27 +00:00
try:
2020-08-04 09:23:32 +00:00
metaurl = iwlink(lang)
2020-08-07 06:19:34 +00:00
return(await wiki1(metaurl,str1))
2020-08-03 15:19:27 +00:00
except Exception as e:
traceback.print_exc()
return (str(e))