From 77ab8ebda6fd8609a26aeb3c075663611d638049 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 23 Oct 2017 04:56:21 -0500 Subject: [PATCH] cleanup archive.py --- archive.py | 71 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/archive.py b/archive.py index 0c71e7dc..19a787b3 100755 --- a/archive.py +++ b/archive.py @@ -7,8 +7,8 @@ import sys from datetime import datetime +from parse import parse_links from links import validate_links -from parse import parse_export from archive_methods import archive_links, _RESULTS_TOTALS from index import ( write_links_index, @@ -27,39 +27,18 @@ from util import ( download_url, check_dependencies, progress, + cleanup_archive, ) -DESCRIPTION = 'Bookmark Archiver: Create a browsable html archive of a list of links.' +__DESCRIPTION__ = 'Bookmark Archiver: Create a browsable html archive of a list of links.' __DOCUMENTATION__ = 'https://github.com/pirate/bookmark-archiver' - -def update_archive(export_path, resume=None, append=True): - """update or create index.html and download archive of all links""" +def update_archive(export_path, links, resume=None, append=True): + """update or create index.html+json given a path to an export file containing new links""" start_ts = datetime.now().timestamp() - # parse an validate the export file - new_links = validate_links(parse_export(export_path)) - - # load existing links if archive folder is present - if append: - existing_links = parse_json_links_index(HTML_FOLDER) - links = validate_links(existing_links + new_links) - else: - existing_links = [] - - # merge existing links and new links - num_new_links = len(links) - len(existing_links) - print('[*] [{}] Adding {} new links from {} to index'.format( - datetime.now().strftime('%Y-%m-%d %H:%M:%S'), - num_new_links, - export_path, - )) - - # write link index html & json - write_links_index(HTML_FOLDER, links) - # loop over links and archive them archive_links(ARCHIVE_FOLDER, links, export_path, resume=resume) @@ -73,16 +52,43 @@ def update_archive(export_path, resume=None, append=True): duration, ANSI['reset'], )) - print(' - {} skipped'.format(_RESULTS_TOTALS['skipped'])) - print(' - {} updates'.format(_RESULTS_TOTALS['succeded'])) + print(' - {} entries skipped'.format(_RESULTS_TOTALS['skipped'])) + print(' - {} entries updated'.format(_RESULTS_TOTALS['succeded'])) print(' - {} errors'.format(_RESULTS_TOTALS['failed'])) +def update_index(export_path, resume=None, append=True): + """handling parsing new links into the json index, returns a set of clean links""" + + # parse an validate the export file + new_links = validate_links(parse_links(export_path)) + + # load existing links if archive folder is present + existing_links = [] + if append: + existing_links = parse_json_links_index(HTML_FOLDER) + links = validate_links(existing_links + new_links) + + + # merge existing links and new links + num_new_links = len(links) - len(existing_links) + print('[*] [{}] Adding {} new links from {} to index'.format( + datetime.now().strftime('%Y-%m-%d %H:%M:%S'), + num_new_links, + export_path, + )) + + # write link index html & json + write_links_index(HTML_FOLDER, links) + + return links + + if __name__ == '__main__': argc = len(sys.argv) if argc < 2 or sys.argv[1] in ('-h', '--help', 'help'): - print(DESCRIPTION) + print(__DESCRIPTION__) print("Documentation: {}".format(__DOCUMENTATION__)) print("") print("Usage:") @@ -96,4 +102,9 @@ if __name__ == '__main__': if any(export_path.startswith(s) for s in ('http://', 'https://', 'ftp://')): export_path = download_url(export_path) - update_archive(export_path, resume=resume_from) + links = update_index(export_path, resume=resume_from, append=True) + + # make sure folder structure is sane + cleanup_archive(ARCHIVE_FOLDER, links) + raise SystemExit(0) + update_archive(export_path, links, resume=resume_from, append=True)