From 901666bae682b21a3692e27f7820afd750e4d695 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 4 Feb 2019 19:20:36 -0800 Subject: [PATCH] dont show wget errors unless all files failed to download --- archivebox/archive_methods.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/archivebox/archive_methods.py b/archivebox/archive_methods.py index 74c54a2e..0769ba31 100644 --- a/archivebox/archive_methods.py +++ b/archivebox/archive_methods.py @@ -231,10 +231,19 @@ def fetch_wget(link_dir, link, requisites=FETCH_WGET_REQUISITES, warc=FETCH_WARC end() output = wget_output_path(link, look_in=domain_dir) + output_tail = [' ' + line for line in (result.stdout + result.stderr).decode().rsplit('\n', 3)[-3:] if line.strip()] + + # parse out number of files downloaded from "Downloaded: 76 files, 4.0M in 1.6s (2.52 MB/s)" + files_downloaded = ( + int(output_tail[-1].strip().split(' ', 2)[1] or 0) + if 'Downloaded:' in output_tail[-1] + else 0 + ) + # Check for common failure cases - if result.returncode > 0: + if result.returncode > 0 and files_downloaded < 1: print(' Got wget response code {}:'.format(result.returncode)) - print('\n'.join(' ' + line for line in (result.stdout + result.stderr).decode().rsplit('\n', 3)[-3:] if line.strip())) + print('\n'.join(output_tail)) if b'403: Forbidden' in result.stderr: raise Exception('403 Forbidden (try changing WGET_USER_AGENT)') if b'404: Not Found' in result.stderr: