#!./bin/python3 import pdfkit import weasyprint import re import os import argparse # CLI arguments parser = argparse.ArgumentParser() parser.add_argument('--backend', '-b', dest = 'backend', help = 'change the download backend; default: pdfkit', default = 'pdfkit', choices = ['pdfkit', 'weasyprint']) args = parser.parse_args() for filename in os.listdir("."): if not filename.endswith(".txt"): continue print("Examining: " + filename) try: os.mkdir(filename[:-4]) except: pass file = open(filename, "r") links = file.readlines() for link in links: print("Downloading: " + link) name = os.path.join(filename[:-4], re.sub(r'(?u)[^-\w.]', '', link[5:]) + ".pdf") # name = re.sub(r'(?u)[^-\w.]', '', link[5:]) + ".pdf" # print(name) try: # weasyprint seems faster? if args.backend == 'pdfkit': pdfkit.from_url(link, name) else: pdf = weasyprint.HTML(link).write_pdf() open(name, 'wb').write(pdf) except: # Ignore exceptions # Probably not a good idea pass