From 563d0f94ecd3073949662a0378ad27afbd5db9cc Mon Sep 17 00:00:00 2001 From: Cristian Date: Thu, 15 Oct 2020 08:49:54 -0500 Subject: [PATCH] feat: Use CURL_ARGS in favicon extractor --- archivebox/config/__init__.py | 1 - archivebox/extractors/archive_org.py | 1 + archivebox/extractors/favicon.py | 5 ++--- archivebox/extractors/headers.py | 1 + 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py index 80107e0f..390c5539 100644 --- a/archivebox/config/__init__.py +++ b/archivebox/config/__init__.py @@ -133,7 +133,6 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = { ]}, 'CURL_ARGS': {'type': list, 'default': ['--silent', '--location', - '--head', '--compressed' ]} }, diff --git a/archivebox/extractors/archive_org.py b/archivebox/extractors/archive_org.py index 6ddd2133..db9e2517 100644 --- a/archivebox/extractors/archive_org.py +++ b/archivebox/extractors/archive_org.py @@ -47,6 +47,7 @@ def save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, timeout: int= cmd = [ CURL_BINARY, *CURL_ARGS, + '--head', '--max-time', str(timeout), *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []), *([] if CHECK_SSL_VALIDITY else ['--insecure']), diff --git a/archivebox/extractors/favicon.py b/archivebox/extractors/favicon.py index 86d2c506..5e7c1fb0 100644 --- a/archivebox/extractors/favicon.py +++ b/archivebox/extractors/favicon.py @@ -11,6 +11,7 @@ from ..config import ( TIMEOUT, SAVE_FAVICON, CURL_BINARY, + CURL_ARGS, CURL_VERSION, CHECK_SSL_VALIDITY, CURL_USER_AGENT, @@ -34,10 +35,8 @@ def save_favicon(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) output: ArchiveOutput = 'favicon.ico' cmd = [ CURL_BINARY, - '--silent', + *CURL_ARGS, '--max-time', str(timeout), - '--location', - '--compressed', '--output', str(output), *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []), *([] if CHECK_SSL_VALIDITY else ['--insecure']), diff --git a/archivebox/extractors/headers.py b/archivebox/extractors/headers.py index 0cc366e3..2ddae8d0 100644 --- a/archivebox/extractors/headers.py +++ b/archivebox/extractors/headers.py @@ -43,6 +43,7 @@ def save_headers(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) cmd = [ CURL_BINARY, *CURL_ARGS, + '--head', '--max-time', str(timeout), *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []), *([] if CHECK_SSL_VALIDITY else ['--insecure']),