From 96dc7b22742df3f8fe4550e4238f44d3e5183cc3 Mon Sep 17 00:00:00 2001 From: yzhh <2596322644@qq.com> Date: Sun, 30 Jan 2022 15:09:01 +0800 Subject: [PATCH] Update getinfobox.py --- modules/wiki/getinfobox.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/modules/wiki/getinfobox.py b/modules/wiki/getinfobox.py index f6194d75..9bde70c9 100644 --- a/modules/wiki/getinfobox.py +++ b/modules/wiki/getinfobox.py @@ -49,7 +49,17 @@ async def get_infobox_pic(link, page_link, headers, section=None) -> Union[str, target = ' '.join(targetlist) return target - open_file.write('\n\n\n') + open_file.write('\n') + for x in soup.find_all('html'): + fl = [] + for f in x.attrs: + if isinstance(x.attrs[f], str): + fl.append(f'{f}="{x.attrs[f]}"') + elif isinstance(x.attrs[f], list): + fl.append(f'{f}="{" ".join(x.attrs[f])}"') + open_file.write(f'') + + open_file.write('\n') for x in soup.find_all(rel='stylesheet'): if x.has_attr('href'): x.attrs['href'] = re.sub(';', '&', urljoin(wlink, x.get('href'))) @@ -62,10 +72,6 @@ async def get_infobox_pic(link, page_link, headers, section=None) -> Union[str, open_file.write(str(x)) open_file.write('') - for x in soup.find_all('body'): - if x.has_attr('class'): - open_file.write(f'') - if section is None: infoboxes = ['notaninfobox', 'portable-infobox', 'infobox', 'tpl-infobox', 'infoboxtable', 'infotemplatebox', 'skin-infobox', 'arcaeabox'] @@ -101,8 +107,13 @@ async def get_infobox_pic(link, page_link, headers, section=None) -> Union[str, open_file.write(str(find_infobox)) if find_infobox.parent.has_attr('style'): open_file.write(join_url(link, find_infobox.parent.get('style'))) - w = 1000 + w = 500 + open_file.write('') else: + for x in soup.find_all('body'): + if x.has_attr('class'): + open_file.write(f'') + for x in soup.find_all('div'): if x.has_attr('id'): if x.get('id') in ['content', 'mw-content-text']: @@ -184,6 +195,7 @@ async def get_infobox_pic(link, page_link, headers, section=None) -> Union[str, open_file = open(url, 'w', encoding='utf-8') open_file.write(str(soup)) w = 1000 + open_file.write('') open_file.write('') - open_file.write('') + open_file.write('') open_file.close() read_file = open(url, 'r', encoding='utf-8') html = {'content': read_file.read(), 'width': w}