85 lines
2.7 KiB
Python
Executable file
85 lines
2.7 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
import pdfkit
|
|
import weasyprint
|
|
import re
|
|
import os
|
|
import argparse
|
|
import pickle
|
|
from htmldate import find_date
|
|
from datetime import date
|
|
|
|
|
|
# CLI arguments
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('--backend', '-b', dest = 'backend', help = 'change the download backend; default: pdfkit', default = 'pdfkit', choices = ['pdfkit', 'weasyprint'])
|
|
parser.add_argument('--force', '-f', dest = 'force', help = 'force download all links instead of only the ones that need to be updated; default: False', default = False, choices = [False, True])
|
|
args = parser.parse_args()
|
|
|
|
|
|
for filename in os.listdir('Links'):
|
|
if not filename.endswith('.txt'): continue
|
|
|
|
print('Examining: ' + filename)
|
|
|
|
try:
|
|
os.mkdir(filename[:-4])
|
|
except: # I love bad error handling
|
|
pass
|
|
|
|
try:
|
|
dates = pickle.load(open(os.path.join('Links', filename[:-4] + '.pickle'), 'rb'))
|
|
except:
|
|
dates = {}
|
|
|
|
file = open(os.path.join('Links', filename), 'r')
|
|
links = file.readlines()
|
|
section = ''
|
|
for link in links:
|
|
if link[0:2] == '# ':
|
|
section = link[2:]
|
|
if link[0] == '#' or link[0] == '\n':
|
|
continue
|
|
|
|
try:
|
|
new_date_str = find_date(link[:-1])
|
|
except:
|
|
pass
|
|
|
|
if new_date_str == None:
|
|
new_date = date.fromisoformat('9999-01-01')
|
|
else:
|
|
new_date = date.fromisoformat(new_date_str)
|
|
|
|
try:
|
|
old_date = dates[link[:-1]]
|
|
except:
|
|
old_date = date.fromisoformat('1970-01-01')
|
|
|
|
# Time to print!
|
|
if new_date > old_date or args.force:
|
|
print('Downloading: ' + link[:-1])
|
|
print('Edit date: ' + str(new_date))
|
|
|
|
name = os.path.join(filename[:-4], section + ' - ' + re.sub(r'(?u)[^-\w.]', '', link[5:]) + '.pdf')
|
|
# name = re.sub(r'(?u)[^-\w.]', '', link[5:]) + '.pdf'
|
|
# print(name)
|
|
try:
|
|
# weasyprint seems faster?
|
|
# but seems to be broken sometimes???
|
|
if args.backend == 'pdfkit':
|
|
pdfkit.from_url(link, name)
|
|
else:
|
|
pdf = weasyprint.HTML(link).write_pdf()
|
|
open(name, 'wb').write(pdf)
|
|
except: # Maybe should handle errors a little bit better?
|
|
print('Error when printing')
|
|
pass
|
|
|
|
# Update date
|
|
if new_date != date.fromisoformat('9999-01-01'):
|
|
dates[link[:-1]] = new_date
|
|
|
|
# Dump dates to a pickle
|
|
pickle.dump(dates, open(os.path.join('Links', filename[:-4] + '.pickle'), 'wb'))
|
|
|