swap more direct settings.CONFIG access to abx getters

This commit is contained in:
Nick Sweeting 2024-10-24 15:42:19 -07:00
parent b61f6ff8d8
commit 4b6f08b0fe
No known key found for this signature in database
3 changed files with 26 additions and 18 deletions

View file

@ -8,6 +8,8 @@ from pathlib import Path
from datetime import datetime, timezone
from typing import List, Optional, Iterator, Any, Union
import abx.archivebox.reads
from archivebox.config import VERSION, DATA_DIR, CONSTANTS
from archivebox.config.common import SERVER_CONFIG, SHELL_CONFIG
@ -19,8 +21,6 @@ from archivebox.misc.util import enforce_types
@enforce_types
def generate_json_index_from_links(links: List[Link], with_headers: bool):
from django.conf import settings
MAIN_INDEX_HEADER = {
'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.',
'schema': 'archivebox.index.json',
@ -33,11 +33,10 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool):
'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki',
'source': 'https://github.com/ArchiveBox/ArchiveBox',
'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues',
'dependencies': settings.BINARIES.to_dict(),
'dependencies': dict(abx.archivebox.reads.get_BINARIES()),
},
}
if with_headers:
output = {
**MAIN_INDEX_HEADER,

View file

@ -1052,7 +1052,8 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
from rich import print
from django.conf import settings
from archivebox import CONSTANTS
import abx.archivebox.reads
from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
from archivebox.config.paths import get_or_create_working_lib_dir
@ -1075,11 +1076,11 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
package_manager_names = ', '.join(
f'[yellow]{binprovider.name}[/yellow]'
for binprovider in reversed(list(settings.BINPROVIDERS.values()))
for binprovider in reversed(list(abx.archivebox.reads.get_BINPROVIDERS().values()))
if not binproviders or (binproviders and binprovider.name in binproviders)
)
print(f'[+] Setting up package managers {package_manager_names}...')
for binprovider in reversed(list(settings.BINPROVIDERS.values())):
for binprovider in reversed(list(abx.archivebox.reads.get_BINPROVIDERS().values())):
if binproviders and binprovider.name not in binproviders:
continue
try:
@ -1092,7 +1093,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
print()
for binary in reversed(list(settings.BINARIES.values())):
for binary in reversed(list(abx.archivebox.reads.get_BINARIES().values())):
if binary.name in ('archivebox', 'django', 'sqlite', 'python'):
# obviously must already be installed if we are running
continue

View file

@ -5,7 +5,7 @@ import requests
import json as pyjson
import http.cookiejar
from typing import List, Optional, Any
from typing import List, Optional, Any, Callable
from pathlib import Path
from inspect import signature
from functools import wraps
@ -19,14 +19,13 @@ from requests.exceptions import RequestException, ReadTimeout
from base32_crockford import encode as base32_encode # type: ignore
from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
try:
import chardet
import chardet # type:ignore
detect_encoding = lambda rawdata: chardet.detect(rawdata)["encoding"]
except ImportError:
detect_encoding = lambda rawdata: "utf-8"
from archivebox.config import CONSTANTS
from archivebox.config.common import ARCHIVING_CONFIG
from archivebox.config.constants import CONSTANTS
from .logging import COLOR_DICT
@ -187,11 +186,11 @@ def str_between(string: str, start: str, end: str=None) -> str:
@enforce_types
def parse_date(date: Any) -> Optional[datetime]:
def parse_date(date: Any) -> datetime:
"""Parse unix timestamps, iso format, and human-readable strings"""
if date is None:
return None
return None # type: ignore
if isinstance(date, datetime):
if date.tzinfo is None:
@ -213,6 +212,8 @@ def parse_date(date: Any) -> Optional[datetime]:
def download_url(url: str, timeout: int=None) -> str:
"""Download the contents of a remote url and return the text"""
from archivebox.config.common import ARCHIVING_CONFIG
timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
session = requests.Session()
@ -242,8 +243,12 @@ def download_url(url: str, timeout: int=None) -> str:
return url.rsplit('/', 1)[-1]
@enforce_types
def get_headers(url: str, timeout: int=None) -> str:
def get_headers(url: str, timeout: int | None=None) -> str:
"""Download the contents of a remote url and return the headers"""
# TODO: get rid of this and use an abx pluggy hook instead
from archivebox.config.common import ARCHIVING_CONFIG
timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
try:
@ -308,13 +313,13 @@ def ansi_to_html(text: str) -> str:
@enforce_types
def dedupe(options: List[str]) -> List[str]:
"""
Deduplicates the given options. Options that come later clobber earlier
conflicting options.
Deduplicates the given CLI args by key=value. Options that come later override earlier.
"""
deduped = {}
for option in options:
deduped[option.split('=')[0]] = option
key = option.split('=')[0]
deduped[key] = option
return list(deduped.values())
@ -347,6 +352,9 @@ class ExtendedEncoder(pyjson.JSONEncoder):
elif cls_name in ('dict_items', 'dict_keys', 'dict_values'):
return tuple(obj)
elif isinstance(obj, Callable):
return str(obj)
return pyjson.JSONEncoder.default(self, obj)