diff --git a/archivebox/index/json.py b/archivebox/index/json.py index 017dbc94..eaa93c2e 100644 --- a/archivebox/index/json.py +++ b/archivebox/index/json.py @@ -8,6 +8,8 @@ from pathlib import Path from datetime import datetime, timezone from typing import List, Optional, Iterator, Any, Union +import abx.archivebox.reads + from archivebox.config import VERSION, DATA_DIR, CONSTANTS from archivebox.config.common import SERVER_CONFIG, SHELL_CONFIG @@ -19,8 +21,6 @@ from archivebox.misc.util import enforce_types @enforce_types def generate_json_index_from_links(links: List[Link], with_headers: bool): - from django.conf import settings - MAIN_INDEX_HEADER = { 'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.', 'schema': 'archivebox.index.json', @@ -33,11 +33,10 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool): 'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki', 'source': 'https://github.com/ArchiveBox/ArchiveBox', 'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues', - 'dependencies': settings.BINARIES.to_dict(), + 'dependencies': dict(abx.archivebox.reads.get_BINARIES()), }, } - if with_headers: output = { **MAIN_INDEX_HEADER, diff --git a/archivebox/main.py b/archivebox/main.py index e05c696d..fab99dc9 100755 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -1052,7 +1052,8 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina from rich import print from django.conf import settings - from archivebox import CONSTANTS + + import abx.archivebox.reads from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP from archivebox.config.paths import get_or_create_working_lib_dir @@ -1075,11 +1076,11 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina package_manager_names = ', '.join( f'[yellow]{binprovider.name}[/yellow]' - for binprovider in reversed(list(settings.BINPROVIDERS.values())) + for binprovider in reversed(list(abx.archivebox.reads.get_BINPROVIDERS().values())) if not binproviders or (binproviders and binprovider.name in binproviders) ) print(f'[+] Setting up package managers {package_manager_names}...') - for binprovider in reversed(list(settings.BINPROVIDERS.values())): + for binprovider in reversed(list(abx.archivebox.reads.get_BINPROVIDERS().values())): if binproviders and binprovider.name not in binproviders: continue try: @@ -1092,7 +1093,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina print() - for binary in reversed(list(settings.BINARIES.values())): + for binary in reversed(list(abx.archivebox.reads.get_BINARIES().values())): if binary.name in ('archivebox', 'django', 'sqlite', 'python'): # obviously must already be installed if we are running continue diff --git a/archivebox/misc/util.py b/archivebox/misc/util.py index 6f54ada6..6195252e 100644 --- a/archivebox/misc/util.py +++ b/archivebox/misc/util.py @@ -5,7 +5,7 @@ import requests import json as pyjson import http.cookiejar -from typing import List, Optional, Any +from typing import List, Optional, Any, Callable from pathlib import Path from inspect import signature from functools import wraps @@ -19,14 +19,13 @@ from requests.exceptions import RequestException, ReadTimeout from base32_crockford import encode as base32_encode # type: ignore from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding try: - import chardet + import chardet # type:ignore detect_encoding = lambda rawdata: chardet.detect(rawdata)["encoding"] except ImportError: detect_encoding = lambda rawdata: "utf-8" -from archivebox.config import CONSTANTS -from archivebox.config.common import ARCHIVING_CONFIG +from archivebox.config.constants import CONSTANTS from .logging import COLOR_DICT @@ -187,11 +186,11 @@ def str_between(string: str, start: str, end: str=None) -> str: @enforce_types -def parse_date(date: Any) -> Optional[datetime]: +def parse_date(date: Any) -> datetime: """Parse unix timestamps, iso format, and human-readable strings""" if date is None: - return None + return None # type: ignore if isinstance(date, datetime): if date.tzinfo is None: @@ -213,6 +212,8 @@ def parse_date(date: Any) -> Optional[datetime]: def download_url(url: str, timeout: int=None) -> str: """Download the contents of a remote url and return the text""" + from archivebox.config.common import ARCHIVING_CONFIG + timeout = timeout or ARCHIVING_CONFIG.TIMEOUT session = requests.Session() @@ -242,8 +243,12 @@ def download_url(url: str, timeout: int=None) -> str: return url.rsplit('/', 1)[-1] @enforce_types -def get_headers(url: str, timeout: int=None) -> str: +def get_headers(url: str, timeout: int | None=None) -> str: """Download the contents of a remote url and return the headers""" + # TODO: get rid of this and use an abx pluggy hook instead + + from archivebox.config.common import ARCHIVING_CONFIG + timeout = timeout or ARCHIVING_CONFIG.TIMEOUT try: @@ -308,13 +313,13 @@ def ansi_to_html(text: str) -> str: @enforce_types def dedupe(options: List[str]) -> List[str]: """ - Deduplicates the given options. Options that come later clobber earlier - conflicting options. + Deduplicates the given CLI args by key=value. Options that come later override earlier. """ deduped = {} for option in options: - deduped[option.split('=')[0]] = option + key = option.split('=')[0] + deduped[key] = option return list(deduped.values()) @@ -346,6 +351,9 @@ class ExtendedEncoder(pyjson.JSONEncoder): elif cls_name in ('dict_items', 'dict_keys', 'dict_values'): return tuple(obj) + + elif isinstance(obj, Callable): + return str(obj) return pyjson.JSONEncoder.default(self, obj)