From 1c5732d5c60cf7ac9afe1604cacf8ba1e62b2937 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Fri, 22 Mar 2019 14:01:27 -0400 Subject: [PATCH] improve logging of skipped links --- archivebox/archive_methods.py | 13 ++++++++++++- archivebox/index.py | 1 - archivebox/logs.py | 23 ++++++++++++++++++----- 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/archivebox/archive_methods.py b/archivebox/archive_methods.py index b403f637..c2b93e92 100644 --- a/archivebox/archive_methods.py +++ b/archivebox/archive_methods.py @@ -52,6 +52,7 @@ from util import ( from logs import ( _LAST_RUN_STATS, log_link_archiving_started, + log_link_archiving_finished, log_archive_method_starting, log_archive_method_finished, ) @@ -86,6 +87,7 @@ def archive_link(link_dir, link): link = load_json_link_index(link_dir, link) log_link_archiving_started(link_dir, link, is_new) + skipped_entirely = True for method_name, should_run, method_function in ARCHIVE_METHODS: if method_name not in link['history']: @@ -95,6 +97,10 @@ def archive_link(link_dir, link): if not should_run(link_dir, link): continue + if skipped_entirely: + skipped_entirely = False + print() + log_archive_method_starting(method_name) result = method_function(link_dir, link) log_archive_method_finished(result) @@ -102,12 +108,17 @@ def archive_link(link_dir, link): link['history'][method_name].append(result) if result['status'] == 'succeeded': link['latest'][method_name] = result['output'] - + + if result['status'] != 'skipped': + made_changes = True + _LAST_RUN_STATS[result['status']] += 1 write_link_index(link_dir, link) patch_links_index(link) + log_link_archiving_finished(link_dir, link, is_new, skipped_entirely) + except Exception as err: print(' ! Failed to archive link: {}: {}'.format(err.__class__.__name__, err)) raise diff --git a/archivebox/index.py b/archivebox/index.py index 83659644..2b27a067 100644 --- a/archivebox/index.py +++ b/archivebox/index.py @@ -201,7 +201,6 @@ def write_link_index(out_dir, link): link['updated'] = str(datetime.now().timestamp()) write_json_link_index(out_dir, link) write_html_link_index(out_dir, link) - # print(' √ index.html, index.json') def write_json_link_index(out_dir, link): """write a json file with some info about the link""" diff --git a/archivebox/logs.py b/archivebox/logs.py index c82deb48..8d87d032 100644 --- a/archivebox/logs.py +++ b/archivebox/logs.py @@ -27,16 +27,29 @@ def pretty_path(path): def log_link_archiving_started(link_dir, link, is_new): - print('[{symbol_color}{symbol}{reset}] [{now}] "{title}"\n {blue}{url}{reset}'.format( - symbol='+' if is_new else '*', + # [*] [2019-03-22 13:46:45] "Log Structured Merge Trees - ben stopford" + # http://www.benstopford.com/2015/02/14/log-structured-merge-trees/ + # > output/archive/1478739709 + + print('\n[{symbol_color}{symbol}{reset}] [{symbol_color}{now}{reset}] "{title}"'.format( symbol_color=ANSI['green' if is_new else 'black'], + symbol='+' if is_new else '*', now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), - **{**link, 'title': link['title'] or link['url']}, + title=link['title'] or link['url'], **ANSI, )) + print(' {blue}{url}{reset}'.format(url=link['url'], **ANSI)) + sys.stdout.write(' > {}{}'.format( + pretty_path(link_dir), + ' (new)' if is_new else '', + )) - print(' > {}{}'.format(pretty_path(link_dir), ' (new)' if is_new else '')) - +def log_link_archiving_finished(link_dir, link, is_new, skipped_entirely): + if skipped_entirely: + print('\r √ {}{}'.format( + pretty_path(link_dir), + ' (new)' if is_new else '', + )) def log_archive_method_starting(method): print(' > {}'.format(method))