akari-bot/modules/wiki/wikilib.py

import aiohttp
import re
import traceback
import urllib

from modules.interwikilist import iwlist, iwlink


async def get_data(url: str, fmt: str):
    async with aiohttp.ClientSession() as session:
        async with session.get(url, timeout=aiohttp.ClientTimeout(total=20)) as req:
            if hasattr(req, fmt):
                return await getattr(req, fmt)()
            else:
                raise ValueError(f"NoSuchMethod: {fmt}")


async def wiki1(wikilink, pagename):
    print(pagename)
    getlinkurl = wikilink + 'api.php?action=query&format=json&prop=info&inprop=url&redirects&titles=' + pagename
    print(getlinkurl)
    file = await get_data(getlinkurl, "json")
    try:
        pages = file['query']['pages']
        pageid = sorted(pages.keys())[0]
        if int(pageid) == -1:
            if 'invalid' in pages['-1']:
                rs = re.sub('The requested page title contains invalid characters:', '请求的页面标题包含非法字符：',
                            pages['-1']['invalidreason'])
                return ('发生错误：“' + rs + '”。')
            else:
                if 'missing' in pages['-1']:
                    try:
                        try:
                            searchurl = wikilink + 'api.php?action=query&generator=search&gsrsearch=' + pagename + '&gsrsort=just_match&gsrenablerewrites&prop=info&gsrlimit=1&format=json'
                            getsecjson = await get_data(searchurl, "json")
                            secpages = getsecjson['query']['pages']
                            secpageid = sorted(secpages.keys())[0]
                            sectitle = secpages[secpageid]['title']
                            return ('找不到条目，您是否要找的是：' + sectitle + '？')
                        except Exception:
                            searchurl = wikilink + 'api.php?action=query&list=search&srsearch=' + pagename + '&srwhat=text&srlimit=1&srenablerewrites=&format=json'
                            getsecjson = await get_data(searchurl, "json")
                            sectitle = getsecjson['query']['search'][0]['title']
                            return ('找不到条目，您是否要找的是：' + sectitle + '？')
                    except Exception:
                        return ('找不到条目。')
                else:
                    return ('您要的' + pagename + '：' + wikilink + urllib.parse.quote(pagename.encode('UTF-8')))
        else:
            getfullurl = pages[pageid]['fullurl']
            geturlpagename = re.match(r'https?://.*?/(?:index.php/|wiki/|)(.*)', getfullurl, re.M | re.I)
            try:
                descurl = getlinkurl + '/api.php?action=query&prop=extracts&exsentences=1&&explaintext&exsectionformat=wiki&format=json&titles=' + geturlpagename.group(
                    1)
                loadtext = await get_data(descurl, "json")
                desc = loadtext['query']['pages'][pageid]['extract']
            except Exception:
                desc = ''
            try:
                section = re.match(r'.*(\#.*)', pagename)
                getfullurl = pages[pageid]['fullurl'] + urllib.parse.quote(section.group(1).encode('UTF-8'))
            except Exception:
                getfullurl = pages[pageid]['fullurl']
            getfinalpagename = re.match(r'https?://.*?/(?:index.php/|wiki/|)(.*)', getfullurl)
            finalpagename = urllib.parse.unquote(getfinalpagename.group(1), encoding='UTF-8')
            finalpagename = re.sub('_', ' ', finalpagename)
            if finalpagename == pagename:
                rmlstlb = re.sub('\n$', '', getfullurl + '\n' + desc)
            else:
                rmlstlb = re.sub('\n$', '', '\n（重定向[' + pagename + ']至[' + finalpagename + ']）\n' + getfullurl + '\n' + desc)
            rmlstlb = re.sub('\n\n', '\n', rmlstlb)
            rmlstlb = re.sub('\n\n', '\n', rmlstlb)
            rmlstlb = re.sub('\n\n', '\n', rmlstlb)
            rmlstlb = re.sub('\n\n', '\n', rmlstlb)
            try:
                rm5lline = re.findall(r'.*\n.*\n.*\n.*\n.*\n',rmlstlb)
                result = rm5lline[0] + '\n...行数过多已截断。'
            except:
                result = rmlstlb
            return ('您要的' + pagename + "：" + result)
    except Exception:
        try:
            matchinterwiki = re.match(r'(.*?):(.*)', pagename)
            interwiki = matchinterwiki.group(1)
            if interwiki in iwlist():
                return (await wiki2(interwiki, matchinterwiki.group(2)))
            else:
                return ('发生错误：内容非法。')
        except Exception as e:
            traceback.print_exc()
            return ('发生错误：' + str(e))


async def wiki2(interwiki, str1):
    try:
        url = iwlink(interwiki)
        return (await wiki1(url, str1))
    except Exception as e:
        traceback.print_exc()
        return (str(e))
-												auto reformat code

											
										
										
											2020-08-12 16:01:34 +00:00
+								import aiohttp
-												first upload

											
										
										
											2020-06-13 12:43:43 +00:00
+								import re
-

											
										
										
											2020-08-01 03:25:34 +00:00
+								import traceback
-												auto reformat code

											
										
										
											2020-08-12 16:01:34 +00:00
+								import urllib
-												rename to modules

											
										
										
											2020-08-18 13:12:38 +00:00
+								from modules.interwikilist import iwlist, iwlink
-												auto reformat code

											
										
										
											2020-08-12 16:01:34 +00:00
-												use aiohttp instead of request

											
										
										
											2020-08-12 08:01:00 +00:00
 								async def get_data(url: str, fmt: str):
 								    async with aiohttp.ClientSession() as session:
-												auto reformat code

											
										
										
											2020-08-12 16:01:34 +00:00
+								        async with session.get(url, timeout=aiohttp.ClientTimeout(total=20)) as req:
-												use aiohttp instead of request

											
										
										
											2020-08-12 08:01:00 +00:00
+								            if hasattr(req, fmt):
 								                return await getattr(req, fmt)()
 								            else:
 								                raise ValueError(f"NoSuchMethod: {fmt}")
-												deobf and improve match

											
										
										
											2020-08-13 07:09:51 +00:00
+								async def wiki1(wikilink, pagename):
-												dehardcode

											
										
										
											2020-08-04 09:23:32 +00:00
+								    print(pagename)
-												deobf and improve match

											
										
										
											2020-08-13 07:09:51 +00:00
+								    getlinkurl = wikilink + 'api.php?action=query&format=json&prop=info&inprop=url&redirects&titles=' + pagename
 								    print(getlinkurl)
 								    file = await get_data(getlinkurl, "json")
-												first upload

											
										
										
											2020-06-13 12:43:43 +00:00
+								    try:
-												deobf and improve match

											
										
										
											2020-08-13 07:09:51 +00:00
+								        pages = file['query']['pages']
 								        pageid = sorted(pages.keys())[0]
 								        if int(pageid) == -1:
 								            if 'invalid' in pages['-1']:
-												auto reformat code

											
										
										
											2020-08-12 16:01:34 +00:00
+								                rs = re.sub('The requested page title contains invalid characters:', '请求的页面标题包含非法字符：',
-												deobf and improve match

											
										
										
											2020-08-13 07:09:51 +00:00
+								                            pages['-1']['invalidreason'])
-												auto reformat code

											
										
										
											2020-08-12 16:01:34 +00:00
+								                return ('发生错误：“' + rs + '”。')
-												fixeeeeeed

											
										
										
											2020-06-15 13:18:07 +00:00
+								            else:
-												deobf and improve match

											
										
										
											2020-08-13 07:09:51 +00:00
+								                if 'missing' in pages['-1']:
-												test

											
										
										
											2020-08-01 03:54:29 +00:00
+								                    try:
 								                        try:
-												deobf and improve match

											
										
										
											2020-08-13 07:09:51 +00:00
+								                            searchurl = wikilink + 'api.php?action=query&generator=search&gsrsearch=' + pagename + '&gsrsort=just_match&gsrenablerewrites&prop=info&gsrlimit=1&format=json'
-												Update wikilib.py

											
										
										
											2020-08-13 09:29:53 +00:00
+								                            getsecjson = await get_data(searchurl, "json")
 								                            secpages = getsecjson['query']['pages']
 								                            secpageid = sorted(secpages.keys())[0]
 								                            sectitle = secpages[secpageid]['title']
 								                            return ('找不到条目，您是否要找的是：' + sectitle + '？')
-												test

											
										
										
											2020-08-01 03:54:29 +00:00
+								                        except Exception:
-												deobf and improve match

											
										
										
											2020-08-13 07:09:51 +00:00
+								                            searchurl = wikilink + 'api.php?action=query&list=search&srsearch=' + pagename + '&srwhat=text&srlimit=1&srenablerewrites=&format=json'
-												Update wikilib.py

											
										
										
											2020-08-13 09:29:53 +00:00
+								                            getsecjson = await get_data(searchurl, "json")
 								                            sectitle = getsecjson['query']['search'][0]['title']
 								                            return ('找不到条目，您是否要找的是：' + sectitle + '？')
-												fix invalid prompt

											
										
										
											2020-07-23 10:39:23 +00:00
+								                    except Exception:
 								                        return ('找不到条目。')
 								                else:
-												deobf and improve match

											
										
										
											2020-08-13 07:09:51 +00:00
+								                    return ('您要的' + pagename + '：' + wikilink + urllib.parse.quote(pagename.encode('UTF-8')))
-												first upload

											
										
										
											2020-06-13 12:43:43 +00:00
+								        else:
-												deobf and improve match

											
										
										
											2020-08-13 07:09:51 +00:00
+								            getfullurl = pages[pageid]['fullurl']
 								            geturlpagename = re.match(r'https?://.*?/(?:index.php/|wiki/|)(.*)', getfullurl, re.M | re.I)
-												first upload

											
										
										
											2020-06-13 12:43:43 +00:00
+								            try:
-												deobf and improve match

											
										
										
											2020-08-13 07:09:51 +00:00
+								                descurl = getlinkurl + '/api.php?action=query&prop=extracts&exsentences=1&&explaintext&exsectionformat=wiki&format=json&titles=' + geturlpagename.group(
-												auto reformat code

											
										
										
											2020-08-12 16:01:34 +00:00
+)
-												deobf and improve match

											
										
										
											2020-08-13 07:09:51 +00:00
+								                loadtext = await get_data(descurl, "json")
 								                desc = loadtext['query']['pages'][pageid]['extract']
-												first upload

											
										
										
											2020-06-13 12:43:43 +00:00
+								            except Exception:
-												deobf and improve match

											
										
										
											2020-08-13 07:09:51 +00:00
+								                desc = ''
-												fixed

											
										
										
											2020-08-01 03:09:47 +00:00
+								            try:
-												deobf and improve match

											
										
										
											2020-08-13 07:09:51 +00:00
+								                section = re.match(r'.*(\#.*)', pagename)
 								                getfullurl = pages[pageid]['fullurl'] + urllib.parse.quote(section.group(1).encode('UTF-8'))
-												fixed

											
										
										
											2020-08-01 03:09:47 +00:00
+								            except Exception:
-												deobf and improve match

											
										
										
											2020-08-13 07:09:51 +00:00
+								                getfullurl = pages[pageid]['fullurl']
 								            getfinalpagename = re.match(r'https?://.*?/(?:index.php/|wiki/|)(.*)', getfullurl)
 								            finalpagename = urllib.parse.unquote(getfinalpagename.group(1), encoding='UTF-8')
 								            finalpagename = re.sub('_', ' ', finalpagename)
 								            if finalpagename == pagename:
 								                rmlstlb = re.sub('\n$', '', getfullurl + '\n' + desc)
-												fixed

											
										
										
											2020-08-01 03:09:47 +00:00
+								            else:
-												deobf and improve match

											
										
										
											2020-08-13 07:09:51 +00:00
+								                rmlstlb = re.sub('\n$', '', '\n（重定向[' + pagename + ']至[' + finalpagename + ']）\n' + getfullurl + '\n' + desc)
-												test

											
										
										
											2020-08-20 04:57:46 +00:00
+								            rmlstlb = re.sub('\n\n', '\n', rmlstlb)
-												test

											
										
										
											2020-08-20 05:00:20 +00:00
+								            rmlstlb = re.sub('\n\n', '\n', rmlstlb)
-												test

											
										
										
											2020-08-20 05:02:12 +00:00
+								            rmlstlb = re.sub('\n\n', '\n', rmlstlb)
 								            rmlstlb = re.sub('\n\n', '\n', rmlstlb)
-												test

											
										
										
											2020-08-20 04:49:54 +00:00
+								            try:
 								                rm5lline = re.findall(r'.*\n.*\n.*\n.*\n.*\n',rmlstlb)
 								                result = rm5lline[0] + '\n...行数过多已截断。'
 								            except:
 								                result = rmlstlb
 								            return ('您要的' + pagename + "：" + result)
-												first upload

											
										
										
											2020-06-13 12:43:43 +00:00
+								    except Exception:
 								        try:
-												deobf and improve match

											
										
										
											2020-08-13 07:09:51 +00:00
+								            matchinterwiki = re.match(r'(.*?):(.*)', pagename)
 								            interwiki = matchinterwiki.group(1)
 								            if interwiki in iwlist():
 								                return (await wiki2(interwiki, matchinterwiki.group(2)))
-												first upload

											
										
										
											2020-06-13 12:43:43 +00:00
+								            else:
-												auto reformat code

											
										
										
											2020-08-12 16:01:34 +00:00
+								                return ('发生错误：内容非法。')
-												first upload

											
										
										
											2020-06-13 12:43:43 +00:00
+								        except Exception as e:
-

											
										
										
											2020-08-01 03:25:34 +00:00
+								            traceback.print_exc()
-												auto reformat code

											
										
										
											2020-08-12 16:01:34 +00:00
+								            return ('发生错误：' + str(e))
-												remove m.py

											
										
										
											2020-08-03 15:19:27 +00:00
-												deobf and improve match

											
										
										
											2020-08-13 07:09:51 +00:00
+								async def wiki2(interwiki, str1):
-												remove m.py

											
										
										
											2020-08-03 15:19:27 +00:00
+								    try:
-												deobf and improve match

											
										
										
											2020-08-13 07:09:51 +00:00
+								        url = iwlink(interwiki)
 								        return (await wiki1(url, str1))
-												remove m.py

											
										
										
											2020-08-03 15:19:27 +00:00
+								    except Exception as e:
 								        traceback.print_exc()
-												auto reformat code

											
										
										
											2020-08-12 16:01:34 +00:00
+								        return (str(e))