From e09c704d5021aec70d7f464d90c29683da323503 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 4 Jul 2017 03:21:47 -0500 Subject: [PATCH] disable wget --mirror in favor of timestamping --- archive.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/archive.py b/archive.py index c308bf8e..436059a3 100755 --- a/archive.py +++ b/archive.py @@ -168,7 +168,7 @@ def fetch_wget(out_dir, link, overwrite=False): if not os.path.exists('{}/{}'.format(out_dir, domain)) or overwrite: print(' - Downloading Full Site') CMD = [ - *'wget --mirror --adjust-extension --convert-links --no-parent'.split(' '), + *'wget --timestamping --adjust-extension --convert-links --no-parent'.split(' '), *(('--page-requisites',) if FETCH_WGET_IMAGES else ()), link['url'], ] @@ -225,8 +225,9 @@ def archive_dot_org(out_dir, link, overwrite=False): submit_url = 'https://web.archive.org/save/{}'.format(link['url'].split('?', 1)[0]) success = False + CMD = ['curl', '-I', submit_url] try: - result = run(['curl', '-I', submit_url], stdout=PIPE, stderr=DEVNULL, cwd=out_dir, timeout=TIMEOUT) # archive.org + result = run(CMD, stdout=PIPE, stderr=DEVNULL, cwd=out_dir, timeout=TIMEOUT) # archive.org headers = result.stdout.splitlines() content_location = [h for h in headers if b'Content-Location: ' in h] if content_location: @@ -234,6 +235,7 @@ def archive_dot_org(out_dir, link, overwrite=False): saved_url = 'https://web.archive.org{}'.format(archive_path) success = True else: + print(' Visit url to see output:', ' '.join(CMD)) raise Exception('Failed to find Content-Location URL in Archive.org response headers.') except Exception as e: print(' Exception: {} {}'.format(e.__class__.__name__, e))