mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-16 16:14:28 -04:00
Add EXTRA_*_ARGS
for wget, curl, and singlefile
This commit is contained in:
parent
31d05d8526
commit
4e69d2c9e1
8 changed files with 88 additions and 35 deletions
|
@ -15,9 +15,11 @@ from ..util import (
|
|||
path,
|
||||
domain,
|
||||
urldecode,
|
||||
dedupe,
|
||||
)
|
||||
from ..config import (
|
||||
WGET_ARGS,
|
||||
WGET_EXTRA_ARGS,
|
||||
TIMEOUT,
|
||||
SAVE_WGET,
|
||||
SAVE_WARC,
|
||||
|
@ -55,10 +57,8 @@ def save_wget(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
|
|||
|
||||
# WGET CLI Docs: https://www.gnu.org/software/wget/manual/wget.html
|
||||
output: ArchiveOutput = None
|
||||
cmd = [
|
||||
WGET_BINARY,
|
||||
# '--server-response', # print headers for better error parsing
|
||||
*WGET_ARGS,
|
||||
# earlier options take precedence
|
||||
options = [
|
||||
'--timeout={}'.format(timeout),
|
||||
*(['--restrict-file-names={}'.format(RESTRICT_FILE_NAMES)] if RESTRICT_FILE_NAMES else []),
|
||||
*(['--warc-file={}'.format(str(warc_path))] if SAVE_WARC else []),
|
||||
|
@ -68,6 +68,13 @@ def save_wget(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
|
|||
*(['--compression=auto'] if WGET_AUTO_COMPRESSION else []),
|
||||
*([] if SAVE_WARC else ['--timestamping']),
|
||||
*([] if CHECK_SSL_VALIDITY else ['--no-check-certificate', '--no-hsts']),
|
||||
# '--server-response', # print headers for better error parsing
|
||||
*WGET_EXTRA_ARGS,
|
||||
*WGET_ARGS,
|
||||
]
|
||||
cmd = [
|
||||
WGET_BINARY,
|
||||
*dedupe(*options),
|
||||
link.url,
|
||||
]
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue