diff --git a/archivebox/abx/archivebox/base_configset.py b/archivebox/abx/archivebox/base_configset.py index 6462d6be..8a8298a3 100644 --- a/archivebox/abx/archivebox/base_configset.py +++ b/archivebox/abx/archivebox/base_configset.py @@ -1,12 +1,9 @@ __package__ = 'abx.archivebox' import os -import re -import json from pathlib import Path -from typing import Type, Tuple, Callable, ClassVar, Any +from typing import Type, Tuple, Callable, ClassVar -import toml from benedict import benedict from pydantic import model_validator, TypeAdapter from pydantic_settings import BaseSettings, SettingsConfigDict, PydanticBaseSettingsSource @@ -17,33 +14,13 @@ from pydantic_pkgr.base_types import func_takes_args_or_kwargs import abx from .base_hook import BaseHook, HookType -from archivebox.misc import ini_to_toml +from archivebox.misc import toml_util PACKAGE_DIR = Path(__file__).resolve().parent.parent DATA_DIR = Path(os.curdir).resolve() -def better_toml_dump_str(val: Any) -> str: - try: - return toml.encoder._dump_str(val) # type: ignore - except Exception: - # if we hit any of toml's numerous encoding bugs, - # fall back to using json representation of string - return json.dumps(str(val)) - -class CustomTOMLEncoder(toml.encoder.TomlEncoder): - """ - Custom TomlEncoder to work around https://github.com/uiri/toml's many encoding bugs. - More info: https://github.com/fabiocaccamo/python-benedict/issues/439 - >>> toml.dumps(value, encoder=CustomTOMLEncoder()) - """ - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.dump_funcs[str] = better_toml_dump_str - self.dump_funcs[re.RegexFlag] = better_toml_dump_str - - class FlatTomlConfigSettingsSource(TomlConfigSettingsSource): """ @@ -155,7 +132,7 @@ class ArchiveBoxBaseConfig(BaseSettings): # Convert ArchiveBox.conf in INI format to TOML and save original to .ArchiveBox.bak original_ini = ARCHIVEBOX_CONFIG_FILE.read_text() ARCHIVEBOX_CONFIG_FILE_BAK.write_text(original_ini) - new_toml = ini_to_toml.convert(original_ini) + new_toml = toml_util.convert(original_ini) ARCHIVEBOX_CONFIG_FILE.write_text(new_toml) precedence_order = { diff --git a/archivebox/config/constants.py b/archivebox/config/constants.py index 669fd22e..577cbb08 100644 --- a/archivebox/config/constants.py +++ b/archivebox/config/constants.py @@ -6,6 +6,7 @@ import re from typing import Dict from pathlib import Path import importlib.metadata +from collections.abc import Mapping from benedict import benedict @@ -37,7 +38,7 @@ def _detect_installed_version(PACKAGE_DIR: Path): VERSION: str = _detect_installed_version(PACKAGE_DIR) -class CONSTANTS: +class ConstantsDict(Mapping): PACKAGE_DIR: Path = PACKAGE_DIR # archivebox source code dir DATA_DIR: Path = DATA_DIR # archivebox user data dir ARCHIVE_DIR: Path = ARCHIVE_DIR # archivebox snapshot data dir @@ -262,11 +263,24 @@ class CONSTANTS: }, }) - def __getitem__(self, key: str): - return getattr(self, key) + @classmethod + def __getitem__(cls, key: str): + return getattr(cls, key) + + @classmethod + def __benedict__(cls): + return benedict({key: value for key, value in cls.__dict__.items() if key.isupper() and not key.startswith('_')}) + + @classmethod + def __len__(cls): + return len(cls.__benedict__()) + @classmethod + def __iter__(cls): + return iter(cls.__benedict__()) + +CONSTANTS = ConstantsDict() +CONSTANTS_CONFIG = CONSTANTS.__benedict__() # add all key: values to globals() for easier importing -globals().update(CONSTANTS.__dict__) - -CONSTANTS_CONFIG = CONSTANTS +globals().update(CONSTANTS) diff --git a/archivebox/config/legacy.py b/archivebox/config/legacy.py index 55424646..48ed1a56 100644 --- a/archivebox/config/legacy.py +++ b/archivebox/config/legacy.py @@ -353,7 +353,7 @@ def load_config_file(out_dir: str | None=CONSTANTS.DATA_DIR) -> Optional[benedic def write_config_file(config: Dict[str, str], out_dir: str | None=CONSTANTS.DATA_DIR) -> benedict: """load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf""" - from ..system import atomic_write + from archivebox.misc.system import atomic_write CONFIG_HEADER = ( """# This is the config file for your ArchiveBox collection. diff --git a/archivebox/core/models.py b/archivebox/core/models.py index 0630f625..69abb4ab 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -22,7 +22,7 @@ from archivebox.config import CONSTANTS from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField from queues.tasks import bg_archive_snapshot -from ..system import get_dir_size +from archivebox.misc.system import get_dir_size from ..util import parse_date, base_url from ..index.schema import Link from ..index.html import snapshot_icons diff --git a/archivebox/extractors/archive_org.py b/archivebox/extractors/archive_org.py index ac73f721..77586190 100644 --- a/archivebox/extractors/archive_org.py +++ b/archivebox/extractors/archive_org.py @@ -6,7 +6,7 @@ from typing import Optional, List, Dict, Tuple from collections import defaultdict from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError -from ..system import run, chmod_file +from archivebox.misc.system import run, chmod_file from ..util import ( enforce_types, is_static_file, diff --git a/archivebox/extractors/dom.py b/archivebox/extractors/dom.py index 675aa62e..8c5a8a68 100644 --- a/archivebox/extractors/dom.py +++ b/archivebox/extractors/dom.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Optional from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError -from ..system import run, chmod_file, atomic_write +from archivebox.misc.system import run, chmod_file, atomic_write from ..util import ( enforce_types, is_static_file, diff --git a/archivebox/extractors/favicon.py b/archivebox/extractors/favicon.py index 791184f9..82482183 100644 --- a/archivebox/extractors/favicon.py +++ b/archivebox/extractors/favicon.py @@ -5,7 +5,7 @@ from pathlib import Path from typing import Optional from ..index.schema import Link, ArchiveResult, ArchiveOutput -from ..system import chmod_file, run +from archivebox.misc.system import chmod_file, run from ..util import ( enforce_types, domain, diff --git a/archivebox/extractors/git.py b/archivebox/extractors/git.py index 1e9decbb..bf05fe52 100644 --- a/archivebox/extractors/git.py +++ b/archivebox/extractors/git.py @@ -5,7 +5,7 @@ from pathlib import Path from typing import Optional from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError -from ..system import run, chmod_file +from archivebox.misc.system import run, chmod_file from ..util import ( enforce_types, is_static_file, diff --git a/archivebox/extractors/headers.py b/archivebox/extractors/headers.py index 4bd2780b..b4d792a2 100644 --- a/archivebox/extractors/headers.py +++ b/archivebox/extractors/headers.py @@ -5,7 +5,7 @@ from pathlib import Path from typing import Optional from ..index.schema import Link, ArchiveResult, ArchiveOutput -from ..system import atomic_write +from archivebox.misc.system import atomic_write from ..util import ( enforce_types, get_headers, diff --git a/archivebox/extractors/htmltotext.py b/archivebox/extractors/htmltotext.py index 276ed5b8..3e0083df 100644 --- a/archivebox/extractors/htmltotext.py +++ b/archivebox/extractors/htmltotext.py @@ -12,7 +12,7 @@ from ..config.legacy import ( ) from ..index.schema import Link, ArchiveResult, ArchiveError from ..logging_util import TimedProgress -from ..system import atomic_write +from archivebox.misc.system import atomic_write from ..util import ( enforce_types, is_static_file, diff --git a/archivebox/extractors/media.py b/archivebox/extractors/media.py index 9952fc1d..4b38242d 100644 --- a/archivebox/extractors/media.py +++ b/archivebox/extractors/media.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Optional from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError -from ..system import run, chmod_file +from archivebox.misc.system import run, chmod_file from ..util import enforce_types, is_static_file, dedupe from ..logging_util import TimedProgress diff --git a/archivebox/extractors/mercury.py b/archivebox/extractors/mercury.py index 0aad67e6..f4067485 100644 --- a/archivebox/extractors/mercury.py +++ b/archivebox/extractors/mercury.py @@ -7,7 +7,7 @@ from typing import Optional, List import json from ..index.schema import Link, ArchiveResult, ArchiveError -from ..system import run, atomic_write +from archivebox.misc.system import run, atomic_write from ..util import ( enforce_types, is_static_file, diff --git a/archivebox/extractors/pdf.py b/archivebox/extractors/pdf.py index e3c2330e..22762765 100644 --- a/archivebox/extractors/pdf.py +++ b/archivebox/extractors/pdf.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Optional from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError -from ..system import run, chmod_file +from archivebox.misc.system import run, chmod_file from ..util import ( enforce_types, is_static_file, diff --git a/archivebox/extractors/readability.py b/archivebox/extractors/readability.py index e45d9600..fd1b59f1 100644 --- a/archivebox/extractors/readability.py +++ b/archivebox/extractors/readability.py @@ -7,7 +7,7 @@ from typing import Optional import json from ..index.schema import Link, ArchiveResult, ArchiveError -from ..system import run, atomic_write +from archivebox.misc.system import run, atomic_write from ..util import enforce_types, is_static_file from ..logging_util import TimedProgress from .title import get_html diff --git a/archivebox/extractors/screenshot.py b/archivebox/extractors/screenshot.py index d10554b7..30c6e7f4 100644 --- a/archivebox/extractors/screenshot.py +++ b/archivebox/extractors/screenshot.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Optional from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError -from ..system import run, chmod_file +from archivebox.misc.system import run, chmod_file from ..util import enforce_types, is_static_file from ..logging_util import TimedProgress diff --git a/archivebox/extractors/singlefile.py b/archivebox/extractors/singlefile.py index c7184a94..d7aa70e0 100644 --- a/archivebox/extractors/singlefile.py +++ b/archivebox/extractors/singlefile.py @@ -6,7 +6,7 @@ from typing import Optional import json from ..index.schema import Link, ArchiveResult, ArchiveError -from ..system import run, chmod_file +from archivebox.misc.system import run, chmod_file from ..util import enforce_types, is_static_file, dedupe from ..logging_util import TimedProgress diff --git a/archivebox/extractors/wget.py b/archivebox/extractors/wget.py index 9cc30c6f..259dc06e 100644 --- a/archivebox/extractors/wget.py +++ b/archivebox/extractors/wget.py @@ -7,7 +7,7 @@ from typing import Optional from datetime import datetime, timezone from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError -from ..system import run, chmod_file +from archivebox.misc.system import run, chmod_file from ..util import ( enforce_types, without_fragment, diff --git a/archivebox/index/html.py b/archivebox/index/html.py index 4b2c6485..c09da778 100644 --- a/archivebox/index/html.py +++ b/archivebox/index/html.py @@ -9,7 +9,7 @@ from django.utils.html import format_html, mark_safe # type: ignore from django.core.cache import cache from .schema import Link -from ..system import atomic_write +from archivebox.misc.system import atomic_write from ..logging_util import printable_filesize from ..util import ( enforce_types, diff --git a/archivebox/index/json.py b/archivebox/index/json.py index acaa2a18..8d299eb5 100644 --- a/archivebox/index/json.py +++ b/archivebox/index/json.py @@ -11,7 +11,7 @@ from typing import List, Optional, Iterator, Any, Union from archivebox.config import VERSION, DATA_DIR, CONSTANTS, SERVER_CONFIG, SHELL_CONFIG from .schema import Link -from ..system import atomic_write +from archivebox.misc.system import atomic_write from ..util import enforce_types diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py index a6697c9f..46d8eab3 100644 --- a/archivebox/index/schema.py +++ b/archivebox/index/schema.py @@ -21,7 +21,7 @@ from archivebox.config.constants import ARCHIVE_DIR, ARCHIVE_DIR_NAME from plugins_extractor.favicon.apps import FAVICON_CONFIG -from ..system import get_dir_size +from archivebox.misc.system import get_dir_size from ..util import ts_to_date_str, parse_date diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py index baf7030f..603de2fc 100644 --- a/archivebox/logging_util.py +++ b/archivebox/logging_util.py @@ -22,7 +22,7 @@ from rich import print from rich.panel import Panel from archivebox.config import CONSTANTS, DATA_DIR, VERSION, SHELL_CONFIG -from .system import get_dir_size +from archivebox.misc.system import get_dir_size from .util import enforce_types from .misc.logging import ANSI, stderr diff --git a/archivebox/main.py b/archivebox/main.py index 4ec2a93e..a2d9ce8e 100755 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -30,8 +30,8 @@ from .parsers import ( ) from .index.schema import Link from .util import enforce_types # type: ignore -from .system import get_dir_size, dedupe_cron_jobs, CRON_COMMENT -from .system import run as run_shell +from archivebox.misc.system import get_dir_size, dedupe_cron_jobs, CRON_COMMENT +from archivebox.misc.system import run as run_shell from .index import ( load_main_index, parse_links_from_source, diff --git a/archivebox/system.py b/archivebox/misc/system.py similarity index 100% rename from archivebox/system.py rename to archivebox/misc/system.py diff --git a/archivebox/misc/tests.py b/archivebox/misc/tests.py index 3e136cb4..fca938ce 100644 --- a/archivebox/misc/tests.py +++ b/archivebox/misc/tests.py @@ -2,7 +2,7 @@ __package__ = 'abx.archivebox' from django.test import TestCase -from .ini_to_toml import convert, TOML_HEADER +from .toml_util import convert, TOML_HEADER TEST_INPUT = """ [SERVER_CONFIG] diff --git a/archivebox/misc/ini_to_toml.py b/archivebox/misc/toml_util.py similarity index 70% rename from archivebox/misc/ini_to_toml.py rename to archivebox/misc/toml_util.py index 48bd90c6..d4784335 100644 --- a/archivebox/misc/ini_to_toml.py +++ b/archivebox/misc/toml_util.py @@ -3,8 +3,12 @@ from typing import Any, List, Callable import json import ast import inspect +import toml +import re import configparser +from pathlib import Path, PosixPath + from pydantic.json_schema import GenerateJsonSchema from pydantic_core import to_jsonable_python @@ -68,8 +72,12 @@ def convert(ini_str: str) -> str: class JSONSchemaWithLambdas(GenerateJsonSchema): + """ + Encode lambda functions in default values properly. + Usage: + >>> json.dumps(value, encoder=JSONSchemaWithLambdas()) + """ def encode_default(self, default: Any) -> Any: - """Encode lambda functions in default values properly""" config = self._config if isinstance(default, Callable): return '{{lambda ' + inspect.getsource(default).split('=lambda ')[-1].strip()[:-1] + '}}' @@ -83,3 +91,24 @@ class JSONSchemaWithLambdas(GenerateJsonSchema): # for computed_field properties render them like this instead: # inspect.getsource(field.wrapped_property.fget).split('def ', 1)[-1].split('\n', 1)[-1].strip().strip('return '), + +def better_toml_dump_str(val: Any) -> str: + try: + return toml.encoder._dump_str(val) # type: ignore + except Exception: + # if we hit any of toml's numerous encoding bugs, + # fall back to using json representation of string + return json.dumps(str(val)) + +class CustomTOMLEncoder(toml.encoder.TomlEncoder): + """ + Custom TomlEncoder to work around https://github.com/uiri/toml's many encoding bugs. + More info: https://github.com/fabiocaccamo/python-benedict/issues/439 + >>> toml.dumps(value, encoder=CustomTOMLEncoder()) + """ + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.dump_funcs[Path] = lambda x: json.dumps(str(x)) + self.dump_funcs[PosixPath] = lambda x: json.dumps(str(x)) + self.dump_funcs[str] = better_toml_dump_str + self.dump_funcs[re.RegexFlag] = better_toml_dump_str diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py index 9464c53b..5b4967c8 100644 --- a/archivebox/parsers/__init__.py +++ b/archivebox/parsers/__init__.py @@ -13,7 +13,7 @@ from typing import IO, Tuple, List, Optional from datetime import datetime, timezone from pathlib import Path -from ..system import atomic_write +from archivebox.misc.system import atomic_write from ..config.legacy import ( ANSI, OUTPUT_DIR, diff --git a/archivebox/parsers/pocket_api.py b/archivebox/parsers/pocket_api.py index 10daf54e..8f198e50 100644 --- a/archivebox/parsers/pocket_api.py +++ b/archivebox/parsers/pocket_api.py @@ -12,7 +12,7 @@ from archivebox.config import CONSTANTS from ..index.schema import Link from ..util import enforce_types -from ..system import atomic_write +from archivebox.misc.system import atomic_write from ..config.legacy import ( POCKET_CONSUMER_KEY, POCKET_ACCESS_TOKENS, diff --git a/archivebox/parsers/readwise_reader_api.py b/archivebox/parsers/readwise_reader_api.py index c12bdc24..9151fd32 100644 --- a/archivebox/parsers/readwise_reader_api.py +++ b/archivebox/parsers/readwise_reader_api.py @@ -12,7 +12,7 @@ from archivebox.config import CONSTANTS from ..index.schema import Link from ..util import enforce_types -from ..system import atomic_write +from archivebox.misc.system import atomic_write from ..config.legacy import READWISE_READER_TOKENS