Astronomy/mkbinder.py

75 lines
2.1 KiB
Python
Raw Normal View History

2020-12-08 03:57:46 +00:00
#!venv/bin/python3
2020-11-28 00:29:13 +00:00
import pdfkit
2020-11-28 01:09:07 +00:00
import weasyprint
2020-11-28 00:08:09 +00:00
import re
2020-11-28 00:29:13 +00:00
import os
2020-11-28 01:09:07 +00:00
import argparse
2020-12-14 03:23:26 +00:00
import pickle
from htmldate import find_date
from datetime import date
2020-11-28 01:09:07 +00:00
# CLI arguments
parser = argparse.ArgumentParser()
parser.add_argument('--backend', '-b', dest = 'backend', help = 'change the download backend; default: pdfkit', default = 'pdfkit', choices = ['pdfkit', 'weasyprint'])
args = parser.parse_args()
2020-11-28 00:08:09 +00:00
2020-11-28 00:29:13 +00:00
for filename in os.listdir("."):
if not filename.endswith(".txt"): continue
2020-12-08 03:57:46 +00:00
if filename == "requirements.txt": continue
2020-11-28 00:29:13 +00:00
print("Examining: " + filename)
2020-11-28 00:08:09 +00:00
try:
2020-11-28 00:29:13 +00:00
os.mkdir(filename[:-4])
2020-11-28 00:08:09 +00:00
except:
pass
2020-12-14 03:23:26 +00:00
try:
dates = pickle.load(open(filename[:-4] + ".pickle", 'rb'))
except:
dates = {}
2020-11-28 00:29:13 +00:00
file = open(filename, "r")
links = file.readlines()
for link in links:
2020-12-14 03:23:26 +00:00
new_date_str = find_date(link[:-1])
if new_date_str == None:
new_date = date.fromisoformat("9999-01-01")
else:
new_date = date.fromisoformat(new_date_str)
2020-11-28 00:29:13 +00:00
try:
2020-12-14 03:23:26 +00:00
old_date = dates[link[:-1]]
2020-11-28 00:29:13 +00:00
except:
2020-12-14 03:23:26 +00:00
old_date = date.fromisoformat("1970-01-01")
if new_date > old_date:
print("Downloading: " + link[:-1])
print("Edit date: " + str(new_date))
name = os.path.join(filename[:-4], re.sub(r'(?u)[^-\w.]', '', link[5:]) + ".pdf")
# name = re.sub(r'(?u)[^-\w.]', '', link[5:]) + ".pdf"
# print(name)
try:
# weasyprint seems faster?
if args.backend == 'pdfkit':
pdfkit.from_url(link, name)
else:
pdf = weasyprint.HTML(link).write_pdf()
open(name, 'wb').write(pdf)
except:
2020-12-14 03:29:11 +00:00
print("Error when printing")
2020-12-14 03:23:26 +00:00
pass
if new_date != date.fromisoformat("9999-01-01"):
dates[link[:-1]] = new_date
pickle.dump(dates, open(filename[:-4] + ".pickle", 'wb'))