From e6d5cd44327a93394e8ef892452ca526acac22cd Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Wed, 6 Feb 2019 22:06:28 -0800 Subject: [PATCH] ignore robots.txt when using wget --- archivebox/archive_methods.py | 1 + 1 file changed, 1 insertion(+) diff --git a/archivebox/archive_methods.py b/archivebox/archive_methods.py index 9e00070f..0148849d 100644 --- a/archivebox/archive_methods.py +++ b/archivebox/archive_methods.py @@ -217,6 +217,7 @@ def fetch_wget(link_dir, link, requisites=FETCH_WGET_REQUISITES, warc=FETCH_WARC '--backup-converted', '--span-hosts', '--no-parent', + '-e', 'robots=off', '--restrict-file-names=unix', '--timeout={}'.format(timeout), *(() if warc else ('--timestamping',)),