From 88721512d40395a854b53cdddfed41f88c0cbea4 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Wed, 27 Mar 2019 15:16:53 -0400 Subject: [PATCH] more detailed parsing and indexing cli output --- archivebox/index.py | 5 +++-- archivebox/logs.py | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/archivebox/index.py b/archivebox/index.py index c1ea5dc5..50cd000f 100644 --- a/archivebox/index.py +++ b/archivebox/index.py @@ -76,10 +76,11 @@ def load_links_index(out_dir: str=OUTPUT_DIR, import_path: Optional[str]=None) - # merge existing links in out_dir and new links all_links = list(validate_links(existing_links + new_links)) - num_new_links = len(all_links) - len(existing_links) if import_path and parser_name: - log_parsing_finished(num_new_links, parser_name) + num_parsed = len(raw_links) + num_new_links = len(all_links) - len(existing_links) + log_parsing_finished(num_parsed, num_new_links, parser_name) return all_links, new_links diff --git a/archivebox/logs.py b/archivebox/logs.py index b2913c18..660e27cc 100644 --- a/archivebox/logs.py +++ b/archivebox/logs.py @@ -37,12 +37,13 @@ def log_parsing_started(source_file: str): **ANSI, )) -def log_parsing_finished(num_new_links: int, parser_name: str): +def log_parsing_finished(num_parsed: int, num_new_links: int, parser_name: str): end_ts = datetime.now() _LAST_RUN_STATS.parse_end_ts = end_ts - print(' > Adding {} new links to index (parsed import as {})'.format( + print(' > Parsed {} links as {}'.format(num_parsed, parser_name)) + print(' > Adding {} new links to collection: {}'.format( num_new_links, - parser_name, + OUTPUT_DIR, )) @@ -95,12 +96,10 @@ def log_archiving_paused(num_links: int, idx: int, timestamp: str): timestamp=timestamp, total=num_links, )) - print(' To view your archive, open: {}/index.html'.format(OUTPUT_DIR.replace(REPO_DIR + '/', ''))) - print(' Continue where you left off by running:') - print(' {} {}'.format( - pretty_path(sys.argv[0]), - timestamp, - )) + print(' To view your archive, open:') + print(' {}/index.html'.format(OUTPUT_DIR)) + print(' Continue archiving where you left off by running:') + print(' archivebox {}'.format(timestamp)) def log_archiving_finished(num_links: int): end_ts = datetime.now() @@ -121,7 +120,8 @@ def log_archiving_finished(num_links: int): print(' - {} links skipped'.format(_LAST_RUN_STATS.skipped)) print(' - {} links updated'.format(_LAST_RUN_STATS.succeeded)) print(' - {} links had errors'.format(_LAST_RUN_STATS.failed)) - print(' To view your archive, open: {}/index.html'.format(OUTPUT_DIR.replace(REPO_DIR + '/', ''))) + print(' To view your archive, open:') + print(' {}/index.html'.format(OUTPUT_DIR)) def log_link_archiving_started(link_dir: str, link: Link, is_new: bool):