2021-11-16 02:33:58 +00:00
|
|
|
#!/usr/bin/env python3
|
2022-01-23 03:42:27 +00:00
|
|
|
|
2020-11-28 00:29:13 +00:00
|
|
|
import os
|
2022-01-23 03:59:32 +00:00
|
|
|
import re
|
2022-01-23 03:42:27 +00:00
|
|
|
from argparse import ArgumentParser
|
|
|
|
|
|
|
|
# PDF conversion backends
|
|
|
|
import pdfkit
|
|
|
|
import weasyprint
|
|
|
|
|
2020-11-28 01:09:07 +00:00
|
|
|
|
|
|
|
# CLI arguments
|
2022-01-23 03:42:27 +00:00
|
|
|
parser = ArgumentParser()
|
|
|
|
parser.add_argument('--backend', '-b', help = 'change the download backend', default = 'pdfkit', choices = ['pdfkit', 'weasyprint'])
|
2022-01-23 04:29:44 +00:00
|
|
|
parser.add_argument('--force', '-f', help = 'force download all links', action='store_true')
|
2020-11-28 01:09:07 +00:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
2020-11-28 00:08:09 +00:00
|
|
|
|
2022-01-23 04:50:45 +00:00
|
|
|
failed = []
|
|
|
|
|
2022-01-23 04:13:20 +00:00
|
|
|
for category in os.listdir('Links'):
|
2022-01-23 04:36:43 +00:00
|
|
|
print('🔍 Examining: ' + category)
|
2022-01-23 04:13:20 +00:00
|
|
|
os.makedirs(category, exist_ok=True)
|
2020-11-28 00:29:13 +00:00
|
|
|
|
2021-11-25 03:26:07 +00:00
|
|
|
section = ''
|
2022-01-23 04:13:20 +00:00
|
|
|
for link in open(os.path.join('Links', category), 'r').readlines():
|
2021-11-25 03:26:07 +00:00
|
|
|
if link[0:2] == '# ':
|
2022-01-23 04:13:20 +00:00
|
|
|
section = link[2:-1]
|
2021-11-25 03:26:07 +00:00
|
|
|
if link[0] == '#' or link[0] == '\n':
|
|
|
|
continue
|
2020-12-14 03:33:09 +00:00
|
|
|
|
2022-01-23 04:13:20 +00:00
|
|
|
name = os.path.join(category, section + ' - ' + re.sub(r'(?u)[^-\w.]', '', link[5:]) + '.pdf')
|
|
|
|
if not os.path.exists(name) or args.force:
|
2022-01-23 04:36:43 +00:00
|
|
|
print('📲 Downloading: ' + link[:-1])
|
|
|
|
print('💾 Destination: ' + name)
|
2020-12-14 03:23:26 +00:00
|
|
|
|
2022-01-23 03:59:32 +00:00
|
|
|
# Time to print!
|
2020-12-14 03:23:26 +00:00
|
|
|
try:
|
|
|
|
if args.backend == 'pdfkit':
|
|
|
|
pdfkit.from_url(link, name)
|
|
|
|
else:
|
|
|
|
pdf = weasyprint.HTML(link).write_pdf()
|
|
|
|
open(name, 'wb').write(pdf)
|
2022-01-23 04:36:43 +00:00
|
|
|
except KeyboardInterrupt:
|
|
|
|
print('😭 Exiting')
|
2022-01-23 04:39:15 +00:00
|
|
|
exit()
|
2022-02-11 01:40:04 +00:00
|
|
|
except Exception as e:
|
2022-01-23 04:36:43 +00:00
|
|
|
print('😱 Error when printing')
|
2022-02-11 01:40:04 +00:00
|
|
|
print(e)
|
2022-01-23 04:50:45 +00:00
|
|
|
failed.append(name)
|
|
|
|
|
|
|
|
# Output failed downloads
|
2022-02-11 01:43:43 +00:00
|
|
|
if len(failed) > 0:
|
|
|
|
print(failed)
|
|
|
|
print(failed, file=open('failed', 'w'))
|