minor tweaks

This commit is contained in:
Nick Sweeting 2017-06-15 00:45:40 -05:00
parent 5c4f17f5ad
commit 15f051f8f7

View file

@ -124,7 +124,7 @@ def parse_bookmarks_export(html):
'time': dt, 'time': dt,
'timestamp': secs, 'timestamp': secs,
'tags': "", 'tags': "",
'title': match.group(3) 'title': match.group(3),
} }
info['type'] = get_link_type(info) info['type'] = get_link_type(info)
@ -135,7 +135,7 @@ def parse_bookmarks_export(html):
def fetch_wget(out_dir, link, overwrite=False): def fetch_wget(out_dir, link, overwrite=False):
# download full site # download full site
if not os.path.exists('{}/{}'.format(out_dir, link, overwrite=link['domain'])) or overwrite: if not os.path.exists('{}/{}'.format(out_dir, link)) or overwrite:
print(' - Downloading Full Site') print(' - Downloading Full Site')
CMD = [ CMD = [
*'wget --no-clobber --page-requisites --adjust-extension --convert-links --no-parent'.split(' '), *'wget --no-clobber --page-requisites --adjust-extension --convert-links --no-parent'.split(' '),
@ -150,7 +150,7 @@ def fetch_wget(out_dir, link, overwrite=False):
def fetch_pdf(out_dir, link, overwrite=False): def fetch_pdf(out_dir, link, overwrite=False):
# download PDF # download PDF
if (not os.path.exists('{}/output.pdf'.format(out_dir)) or overwrite) and not link['base_url'].endswith('.pdf'): if (not os.path.exists('{}/output.pdf'.format(out_dir)) or overwrite) and link['type'] not in ('PDF', 'image'):
print(' - Printing PDF') print(' - Printing PDF')
chrome_args = '--headless --disable-gpu --print-to-pdf'.split(' ') chrome_args = '--headless --disable-gpu --print-to-pdf'.split(' ')
try: try:
@ -162,7 +162,7 @@ def fetch_pdf(out_dir, link, overwrite=False):
def fetch_screenshot(out_dir, link, overwrite=False): def fetch_screenshot(out_dir, link, overwrite=False):
# take screenshot # take screenshot
if (not os.path.exists('{}/screenshot.png'.format(out_dir)) or overwrite) and not link['base_url'].endswith('.pdf'): if (not os.path.exists('{}/screenshot.png'.format(out_dir)) or overwrite) and link['type'] not in ('PDF', 'image'):
print(' - Snapping Screenshot') print(' - Snapping Screenshot')
chrome_args = '--headless --disable-gpu --screenshot'.split(' ') chrome_args = '--headless --disable-gpu --screenshot'.split(' ')
try: try:
@ -307,10 +307,7 @@ def create_archive(export_file, service, resume=None):
links = [link for link in links if link['timestamp'] >= resume] links = [link for link in links if link['timestamp'] >= resume]
if not links: if not links:
if service == 'pocket': print('[X] No links found in {}, is it a {} export file?'.format(export_file, service))
print('[X] No links found in {}, is it a getpocket.com/export export?'.format(export_file))
elif service == 'pinboard':
print('[X] No links found in {}, is it a pinboard.in/export/format:json/ export?'.format(export_file))
raise SystemExit(1) raise SystemExit(1)
dump_index(links, service) dump_index(links, service)