Archived
1
0
Fork 0

diff support

This commit is contained in:
yzhh 2022-04-13 23:36:55 +08:00
parent a4221c434c
commit 0e2f7a9301
2 changed files with 70 additions and 36 deletions

View file

@ -14,7 +14,7 @@ from core.utils import download_to_cache
from core.utils.image_table import image_table_render, ImageTable
from database import BotDBUtil
from .dbutils import WikiTargetInfo, Audit
from .getinfobox import get_infobox_pic
from .getinfobox import get_pic
from .utils.ab import ab
from .utils.ab_qq import ab_qq
from .utils.newbie import newbie
@ -494,7 +494,7 @@ async def query_pages(session: Union[MessageSession, QueryInfo], title: Union[st
else:
if r.link is not None and r.section is None:
render_infobox_list.append(
{r.link: r.info.realurl})
{r.link: {'url': r.info.realurl, 'in_allowlist': r.info.in_allowlist}})
elif r.link is not None and r.section is not None and r.info.in_allowlist:
render_section_list.append(
{r.link: {'url': r.info.realurl, 'section': r.section}})
@ -534,7 +534,7 @@ async def query_pages(session: Union[MessageSession, QueryInfo], title: Union[st
infobox_msg_list = []
for i in render_infobox_list:
for ii in i:
get_infobox = await get_infobox_pic(i[ii], ii, headers)
get_infobox = await get_pic(i[ii]['url'], ii, headers, allow_special_page=i[ii]['in_allowlist'])
if get_infobox:
infobox_msg_list.append(Image(get_infobox))
if infobox_msg_list:
@ -543,7 +543,7 @@ async def query_pages(session: Union[MessageSession, QueryInfo], title: Union[st
section_msg_list = []
for i in render_section_list:
for ii in i:
get_section = await get_infobox_pic(i[ii]['url'], ii, headers, section=i[ii]['section'])
get_section = await get_pic(i[ii]['url'], ii, headers, section=i[ii]['section'])
if get_section:
section_msg_list.append(Image(get_section))
if section_msg_list:

View file

@ -15,7 +15,7 @@ from core.logger import Logger
web_render = Config('web_render')
async def get_infobox_pic(link, page_link, headers, section=None) -> Union[str, bool]:
async def get_pic(link, page_link, headers, section=None, allow_special_page=False) -> Union[str, bool]:
if not web_render:
return False
try:
@ -74,40 +74,74 @@ async def get_infobox_pic(link, page_link, headers, section=None) -> Union[str,
open_file.write(str(x))
if section is None:
infoboxes = ['notaninfobox', 'portable-infobox', 'infobox', 'tpl-infobox', 'infoboxtable',
'infotemplatebox', 'skin-infobox', 'arcaeabox']
find_infobox = None
for i in infoboxes:
find_infobox = soup.find(class_=i)
if find_infobox is not None:
break
if find_infobox is None:
Logger.info('Found nothing...')
return False # 找你妈,不找了<-咱还是回家吧
Logger.info('Found infobox...')
find_diff = None
if allow_special_page:
diff = 'diff diff-contentalign-left'
find_diff = soup.find(class_=diff)
if find_diff is not None:
Logger.info('Found diff...')
for x in soup.find_all('body'):
if x.has_attr('class'):
open_file.write(f'<body class="{" ".join(x.get("class"))}">')
for x in find_infobox.find_all(['a', 'img', 'span']):
if x.has_attr('href'):
x.attrs['href'] = join_url(link, x.get('href'))
if x.has_attr('src'):
x.attrs['src'] = join_url(link, x.get('src'))
if x.has_attr('srcset'):
x.attrs['srcset'] = join_url(link, x.get('srcset'))
if x.has_attr('style'):
x.attrs['style'] = re.sub(r'url\(/(.*)\)', 'url(' + link + '\\1)', x.get('style'))
for x in soup.find_all('div'):
if x.has_attr('id'):
if x.get('id') in ['content', 'mw-content-text']:
fl = []
for f in x.attrs:
if isinstance(x.attrs[f], str):
fl.append(f'{f}="{x.attrs[f]}"')
elif isinstance(x.attrs[f], list):
fl.append(f'{f}="{" ".join(x.attrs[f])}"')
open_file.write(f'<div {" ".join(fl)}>')
open_file.write('<div class="mw-parser-output">')
for x in find_infobox.find_all(class_='lazyload'):
if x.has_attr('class') and x.has_attr('data-src'):
x.attrs['class'] = 'image'
x.attrs['src'] = x.attrs['data-src']
for x in soup.find_all('main'):
fl = []
for f in x.attrs:
if isinstance(x.attrs[f], str):
fl.append(f'{f}="{x.attrs[f]}"')
elif isinstance(x.attrs[f], list):
fl.append(f'{f}="{" ".join(x.attrs[f])}"')
open_file.write(f'<main {" ".join(fl)}>')
open_file.write(str(find_diff))
w = 2000
if find_diff is None:
infoboxes = ['notaninfobox', 'portable-infobox', 'infobox', 'tpl-infobox', 'infoboxtable',
'infotemplatebox', 'skin-infobox', 'arcaeabox']
find_infobox = None
for i in infoboxes:
find_infobox = soup.find(class_=i)
if find_infobox is not None:
break
if find_infobox is None:
Logger.info('Found nothing...')
return False
else:
Logger.info('Found infobox...')
for x in find_infobox.find_all(class_='lazyload'):
if x.has_attr('class') and x.has_attr('data-src'):
x.attrs['class'] = 'image'
x.attrs['src'] = x.attrs['data-src']
open_file.write(str(find_infobox))
w = 500
open_file.write('</div>')
for x in find_infobox.find_all(['a', 'img', 'span']):
if x.has_attr('href'):
x.attrs['href'] = join_url(link, x.get('href'))
if x.has_attr('src'):
x.attrs['src'] = join_url(link, x.get('src'))
if x.has_attr('srcset'):
x.attrs['srcset'] = join_url(link, x.get('srcset'))
if x.has_attr('style'):
x.attrs['style'] = re.sub(r'url\(/(.*)\)', 'url(' + link + '\\1)', x.get('style'))
for x in find_infobox.find_all(class_='lazyload'):
if x.has_attr('class') and x.has_attr('data-src'):
x.attrs['class'] = 'image'
x.attrs['src'] = x.attrs['data-src']
for x in find_infobox.find_all(class_='lazyload'):
if x.has_attr('class') and x.has_attr('data-src'):
x.attrs['class'] = 'image'
x.attrs['src'] = x.attrs['data-src']
open_file.write(str(find_infobox))
w = 500
open_file.write('</div>')
else:
for x in soup.find_all('body'):
if x.has_attr('class'):