mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-14 07:04:27 -04:00
ignore robots.txt when using wget
This commit is contained in:
parent
56d382235f
commit
e6d5cd4432
1 changed files with 1 additions and 0 deletions
|
@ -217,6 +217,7 @@ def fetch_wget(link_dir, link, requisites=FETCH_WGET_REQUISITES, warc=FETCH_WARC
|
||||||
'--backup-converted',
|
'--backup-converted',
|
||||||
'--span-hosts',
|
'--span-hosts',
|
||||||
'--no-parent',
|
'--no-parent',
|
||||||
|
'-e', 'robots=off',
|
||||||
'--restrict-file-names=unix',
|
'--restrict-file-names=unix',
|
||||||
'--timeout={}'.format(timeout),
|
'--timeout={}'.format(timeout),
|
||||||
*(() if warc else ('--timestamping',)),
|
*(() if warc else ('--timestamping',)),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue