From 16604d8cf2ec3af7279850a42c8b1f87952bfe71 Mon Sep 17 00:00:00 2001 From: yzhh <2596322644@qq.com> Date: Sat, 15 May 2021 23:50:17 +0800 Subject: [PATCH] Update getinfobox.py --- modules/wiki/getinfobox.py | 42 ++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/modules/wiki/getinfobox.py b/modules/wiki/getinfobox.py index 01bf1b0c..3132ac17 100644 --- a/modules/wiki/getinfobox.py +++ b/modules/wiki/getinfobox.py @@ -32,6 +32,8 @@ async def get_infobox_pic(link, pagelink, headers): if os.path.exists(url): os.remove(url) logger_info('Downloaded raw.') + open_file = open(url, 'a', encoding='utf-8') + html_list = [] find_infobox = soup.find(class_='notaninfobox') # 我 if find_infobox is None: # 找 find_infobox = soup.find(class_='portable-infobox') # 找 @@ -46,26 +48,28 @@ async def get_infobox_pic(link, pagelink, headers): if find_infobox is None: # 找 find_infobox = soup.find(class_='skin-infobox') # 找 if find_infobox is None: # 找 - find_infobox = soup.find(class_='songbox') # 找 (arcw) + elementlist = [] + for x in soup.find_all('style'): + if x.has_attr('href'): + x.attrs['href'] = re.sub(';', '&', urljoin(wlink, x.get('href'))) + if x.has_attr('style'): + x.attrs['style'] = re.sub(r'url\(/(.*)\)', 'url(' + link + '\\1)', x.get('style')) + print(x) + elementlist.append(str(x.parent)) + + find_infobox = BeautifulSoup('\n'.join(elementlist), 'html.parser') if find_infobox is None: # 找 find_infobox = soup.find(class_='songtable') # 找 (arcw) if find_infobox is None: # 找 return False # 找你妈,不找了<-咱还是回家吧 logger_info('Find infobox, start modding...') - if infobox_render is None: - open_file = open(url, 'a', encoding='utf-8') - else: - html_list = [] - for x in soup.find_all(rel='stylesheet'): - y = str(x.get('href')) - z = urljoin(wlink, y) - z = re.sub(';', '&', z) - if infobox_render is None: - open_file.write(f'\n') - else: - html_list.append(f'\n') + if x.has_attr('href'): + x.attrs['href'] = re.sub(';', '&', urljoin(wlink, x.get('href'))) + print(x) + open_file.write(str(x)) + html_list.append(str(x)) def join_url(base, target): target = target.split(' ') @@ -90,13 +94,11 @@ async def get_infobox_pic(link, pagelink, headers): x.attrs['style'] = re.sub(r'url\(/(.*)\)', 'url(' + link + '\\1)', x.get('style')) replace_link = find_infobox - if infobox_render is None: - open_file.write(str(replace_link)) - open_file.close() - else: - html_list.append(str(replace_link)) - html = '\n'.join(html_list) - html = {'content': html} + open_file.write(str(replace_link)) + open_file.close() + html_list.append(str(replace_link)) + html = '\n'.join(html_list) + html = {'content': html} logger_info('Start rendering...') picname = os.path.abspath(f'./cache/{pagename}.jpg') if os.path.exists(picname):