config and attr access improvements

This commit is contained in:
Nick Sweeting 2024-08-20 18:31:21 -07:00
parent 4ae186dfca
commit 0285aa52a0
No known key found for this signature in database
15 changed files with 203 additions and 187 deletions

View file

@ -11,27 +11,18 @@ from ..util import (
domain,
dedupe,
)
from ..config import (
TIMEOUT,
SAVE_FAVICON,
FAVICON_PROVIDER,
CURL_BINARY,
CURL_ARGS,
CURL_EXTRA_ARGS,
CURL_VERSION,
CHECK_SSL_VALIDITY,
CURL_USER_AGENT,
)
from ..config import CONFIG
from ..logging_util import TimedProgress
@enforce_types
def should_save_favicon(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
out_dir = out_dir or Path(link.link_dir)
def should_save_favicon(link: Link, out_dir: str | Path | None=None, overwrite: bool=False) -> bool:
assert link.link_dir
out_dir = Path(out_dir or link.link_dir)
if not overwrite and (out_dir / 'favicon.ico').exists():
return False
return SAVE_FAVICON
return CONFIG.SAVE_FAVICON
@enforce_types
def get_output_path():
@ -39,24 +30,26 @@ def get_output_path():
@enforce_types
def save_favicon(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
def save_favicon(link: Link, out_dir: str | Path | None=None, timeout: int=CONFIG.TIMEOUT) -> ArchiveResult:
"""download site favicon from google's favicon api"""
out_dir = out_dir or link.link_dir
out_dir = Path(out_dir or link.link_dir)
assert out_dir.exists()
output: ArchiveOutput = 'favicon.ico'
# later options take precedence
options = [
*CURL_ARGS,
*CURL_EXTRA_ARGS,
*CONFIG.CURL_ARGS,
*CONFIG.CURL_EXTRA_ARGS,
'--max-time', str(timeout),
'--output', str(output),
*(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
*([] if CHECK_SSL_VALIDITY else ['--insecure']),
*(['--user-agent', '{}'.format(CONFIG.CURL_USER_AGENT)] if CONFIG.CURL_USER_AGENT else []),
*([] if CONFIG.CHECK_SSL_VALIDITY else ['--insecure']),
]
cmd = [
CURL_BINARY,
CONFIG.CURL_BINARY,
*dedupe(options),
FAVICON_PROVIDER.format(domain(link.url)),
CONFIG.FAVICON_PROVIDER.format(domain(link.url)),
]
status = 'failed'
timer = TimedProgress(timeout, prefix=' ')
@ -72,7 +65,7 @@ def save_favicon(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT)
return ArchiveResult(
cmd=cmd,
pwd=str(out_dir),
cmd_version=CURL_VERSION,
cmd_version=CONFIG.CURL_VERSION,
output=output,
status=status,
**timer.stats,

View file

@ -14,15 +14,7 @@ from ..util import (
without_query,
without_fragment,
)
from ..config import (
TIMEOUT,
SAVE_GIT,
GIT_BINARY,
GIT_ARGS,
GIT_VERSION,
GIT_DOMAINS,
CHECK_SSL_VALIDITY
)
from ..config import CONFIG
from ..logging_util import TimedProgress
@ -50,17 +42,17 @@ def should_save_git(link: Link, out_dir: Optional[Path]=None, overwrite: Optiona
return False
is_clonable_url = (
(domain(link.url) in GIT_DOMAINS)
(domain(link.url) in CONFIG.GIT_DOMAINS)
or (extension(link.url) == 'git')
)
if not is_clonable_url:
return False
return SAVE_GIT
return CONFIG.SAVE_GIT
@enforce_types
def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=CONFIG.TIMEOUT) -> ArchiveResult:
"""download full site using git"""
out_dir = out_dir or Path(link.link_dir)
@ -68,10 +60,10 @@ def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
output_path = out_dir / output
output_path.mkdir(exist_ok=True)
cmd = [
GIT_BINARY,
CONFIG.GIT_BINARY,
'clone',
*GIT_ARGS,
*([] if CHECK_SSL_VALIDITY else ['-c', 'http.sslVerify=false']),
*CONFIG.GIT_ARGS,
*([] if CONFIG.CHECK_SSL_VALIDITY else ['-c', 'http.sslVerify=false']),
without_query(without_fragment(link.url)),
]
status = 'succeeded'
@ -96,7 +88,7 @@ def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
return ArchiveResult(
cmd=cmd,
pwd=str(out_dir),
cmd_version=GIT_VERSION,
cmd_version=CONFIG.GIT_VERSION,
output=output,
status=status,
**timer.stats,