mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 06:34:25 -04:00
feat: Add WGET_ARGS to control wget arguments
This commit is contained in:
parent
65530e1e5b
commit
24e7a74855
3 changed files with 15 additions and 9 deletions
|
@ -120,7 +120,17 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
|
||||||
'--audio-format', 'mp3',
|
'--audio-format', 'mp3',
|
||||||
'--audio-quality', '320K',
|
'--audio-quality', '320K',
|
||||||
'--embed-thumbnail',
|
'--embed-thumbnail',
|
||||||
'--add-metadata']}
|
'--add-metadata']},
|
||||||
|
|
||||||
|
'WGET_ARGS': {'type': list, 'default': ['--no-verbose',
|
||||||
|
'--adjust-extension',
|
||||||
|
'--convert-links',
|
||||||
|
'--force-directories',
|
||||||
|
'--backup-converted',
|
||||||
|
'--span-hosts',
|
||||||
|
'--no-parent',
|
||||||
|
'-e', 'robots=off',
|
||||||
|
]}
|
||||||
},
|
},
|
||||||
|
|
||||||
'DEPENDENCY_CONFIG': {
|
'DEPENDENCY_CONFIG': {
|
||||||
|
@ -276,6 +286,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
||||||
'WGET_USER_AGENT': {'default': lambda c: c['WGET_USER_AGENT'].format(**c)},
|
'WGET_USER_AGENT': {'default': lambda c: c['WGET_USER_AGENT'].format(**c)},
|
||||||
'SAVE_WGET': {'default': lambda c: c['USE_WGET'] and c['SAVE_WGET']},
|
'SAVE_WGET': {'default': lambda c: c['USE_WGET'] and c['SAVE_WGET']},
|
||||||
'SAVE_WARC': {'default': lambda c: c['USE_WGET'] and c['SAVE_WARC']},
|
'SAVE_WARC': {'default': lambda c: c['USE_WGET'] and c['SAVE_WARC']},
|
||||||
|
'WGET_ARGS': {'default': lambda c: c['WGET_ARGS'] or []},
|
||||||
|
|
||||||
'USE_SINGLEFILE': {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']},
|
'USE_SINGLEFILE': {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']},
|
||||||
'SINGLEFILE_VERSION': {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None},
|
'SINGLEFILE_VERSION': {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None},
|
||||||
|
|
|
@ -95,6 +95,7 @@ class ConfigDict(BaseConfig, total=False):
|
||||||
CHROME_BINARY: Optional[str]
|
CHROME_BINARY: Optional[str]
|
||||||
|
|
||||||
YOUTUBEDL_ARGS: Optional[str]
|
YOUTUBEDL_ARGS: Optional[str]
|
||||||
|
WGET_ARGS: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]
|
ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]
|
||||||
|
|
|
@ -19,6 +19,7 @@ from ..util import (
|
||||||
urldecode,
|
urldecode,
|
||||||
)
|
)
|
||||||
from ..config import (
|
from ..config import (
|
||||||
|
WGET_ARGS,
|
||||||
TIMEOUT,
|
TIMEOUT,
|
||||||
SAVE_WGET,
|
SAVE_WGET,
|
||||||
SAVE_WARC,
|
SAVE_WARC,
|
||||||
|
@ -59,14 +60,7 @@ def save_wget(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
|
||||||
cmd = [
|
cmd = [
|
||||||
WGET_BINARY,
|
WGET_BINARY,
|
||||||
# '--server-response', # print headers for better error parsing
|
# '--server-response', # print headers for better error parsing
|
||||||
'--no-verbose',
|
*WGET_ARGS,
|
||||||
'--adjust-extension',
|
|
||||||
'--convert-links',
|
|
||||||
'--force-directories',
|
|
||||||
'--backup-converted',
|
|
||||||
'--span-hosts',
|
|
||||||
'--no-parent',
|
|
||||||
'-e', 'robots=off',
|
|
||||||
'--timeout={}'.format(timeout),
|
'--timeout={}'.format(timeout),
|
||||||
*(['--restrict-file-names={}'.format(RESTRICT_FILE_NAMES)] if RESTRICT_FILE_NAMES else []),
|
*(['--restrict-file-names={}'.format(RESTRICT_FILE_NAMES)] if RESTRICT_FILE_NAMES else []),
|
||||||
*(['--warc-file={}'.format(str(warc_path))] if SAVE_WARC else []),
|
*(['--warc-file={}'.format(str(warc_path))] if SAVE_WARC else []),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue