import re from html import unescape from bs4 import BeautifulSoup import ujson as json from core.builtins import Bot from core.builtins import Plain, Image, Url from core.component import module from core.logger import Logger from core.utils.http import get_url from core.utils.i18n import Locale from core.utils.image import msgchain2image from modules.wiki.utils.screenshot_image import generate_screenshot_v2 from modules.wiki.utils.wikilib import WikiLib from .teahouse import get_rss as get_teahouse_rss async def get_weekly(with_img=False, zh_tw=False): locale = Locale('zh_cn' if not zh_tw else 'zh_tw') result = json.loads(await get_url( 'https://zh.minecraft.wiki/api.php?action=parse&page=Minecraft_Wiki&prop=text|revid|images&format=json' + ('&variant=zh-tw' if zh_tw else ''), 200)) b_result = BeautifulSoup(result['parse']['text']['*'], 'html.parser') html = b_result.find('div', id='fp-section-weekly') content = html.find('div', class_='weekly-content') text = re.sub(r'

', '\n', str(content)) # 分段 text = re.sub(r'<(.*?)>', '', text, flags=re.DOTALL) # 移除所有 HTML 标签 text = re.sub(r'\n\n\n', '\n\n', text) # 移除不必要的空行 text = re.sub(r'\n*$', '', text) text = unescape(text) img = html.find('div', class_='weekly-image').find(class_='image') img_filename = re.match(r'/w/(.*)', img.attrs['href']) page = re.findall(r'(?<=