diff --git a/archivebox/abx/archivebox/base_configset.py b/archivebox/abx/archivebox/base_configset.py index b27b302b..6462d6be 100644 --- a/archivebox/abx/archivebox/base_configset.py +++ b/archivebox/abx/archivebox/base_configset.py @@ -4,7 +4,7 @@ import os import re import json from pathlib import Path -from typing import Literal, Type, Tuple, Callable, ClassVar, Any, get_args +from typing import Type, Tuple, Callable, ClassVar, Any import toml from benedict import benedict @@ -24,21 +24,6 @@ PACKAGE_DIR = Path(__file__).resolve().parent.parent DATA_DIR = Path(os.curdir).resolve() -ConfigSectionName = Literal[ - 'SHELL_CONFIG', - 'GENERAL_CONFIG', - 'STORAGE_CONFIG', - 'SERVER_CONFIG', - 'ARCHIVING_CONFIG', - 'LDAP_CONFIG', - 'ARCHIVE_METHOD_TOGGLES', - 'ARCHIVE_METHOD_OPTIONS', - 'SEARCH_BACKEND_CONFIG', - 'DEPENDENCY_CONFIG', -] -ConfigSectionNames: Tuple[ConfigSectionName, ...] = get_args(ConfigSectionName) # just gets the list of values from the Literal type - - def better_toml_dump_str(val: Any) -> str: try: return toml.encoder._dump_str(val) # type: ignore @@ -74,14 +59,14 @@ class FlatTomlConfigSettingsSource(TomlConfigSettingsSource): self.nested_toml_data = self._read_files(self.toml_file_path) self.toml_data = {} - for section_name, section in self.nested_toml_data.items(): - if section_name in ConfigSectionNames and isinstance(section, dict): + for top_level_key, top_level_value in self.nested_toml_data.items(): + if isinstance(top_level_value, dict): # value is nested, flatten it - for key, value in section.items(): + for key, value in top_level_value.items(): self.toml_data[key] = value else: # value is already flat, just set it as-is - self.toml_data[section_name] = section + self.toml_data[top_level_key] = top_level_value # filter toml_data to only include keys that are defined on this settings_cls self.toml_data = { @@ -242,8 +227,6 @@ class ArchiveBoxBaseConfig(BaseSettings): class BaseConfigSet(ArchiveBoxBaseConfig, BaseHook): # type: ignore[type-arg] hook_type: ClassVar[HookType] = 'CONFIG' - section: ClassVar[ConfigSectionName] = 'GENERAL_CONFIG' - # def register(self, settings, parent_plugin=None): # # self._plugin = parent_plugin # for debugging only, never rely on this! diff --git a/archivebox/config/constants.py b/archivebox/config/constants.py index d49a3573..669fd22e 100644 --- a/archivebox/config/constants.py +++ b/archivebox/config/constants.py @@ -13,12 +13,12 @@ from ..misc.logging import DEFAULT_CLI_COLORS ###################### Config ########################## -PACKAGE_DIR = Path(__file__).resolve().parent.parent # archivebox source code dir -DATA_DIR = Path(os.curdir).resolve() # archivebox user data dir -ARCHIVE_DIR = DATA_DIR / 'archive' # archivebox snapshot data dir +PACKAGE_DIR: Path = Path(__file__).resolve().parent.parent # archivebox source code dir +DATA_DIR: Path = Path(os.curdir).resolve() # archivebox user data dir +ARCHIVE_DIR: Path = DATA_DIR / 'archive' # archivebox snapshot data dir -def _detect_installed_version(): +def _detect_installed_version(PACKAGE_DIR: Path): """Autodetect the installed archivebox version by using pip package metadata or pyproject.toml file""" try: return importlib.metadata.version(__package__ or 'archivebox') @@ -34,234 +34,239 @@ def _detect_installed_version(): raise Exception('Failed to detect installed archivebox version!') -VERSION = _detect_installed_version() -__version__ = VERSION +VERSION: str = _detect_installed_version(PACKAGE_DIR) -PACKAGE_DIR_NAME: str = PACKAGE_DIR.name -TEMPLATES_DIR_NAME: str = 'templates' -TEMPLATES_DIR: Path = PACKAGE_DIR / TEMPLATES_DIR_NAME -STATIC_DIR: Path = TEMPLATES_DIR / 'static' -USER_PLUGINS_DIR_NAME: str = 'user_plugins' -CUSTOM_TEMPLATES_DIR_NAME: str = 'user_templates' - -ARCHIVE_DIR_NAME: str = 'archive' -SOURCES_DIR_NAME: str = 'sources' -PERSONAS_DIR_NAME: str = 'personas' -CRONTABS_DIR_NAME: str = 'crontabs' -CACHE_DIR_NAME: str = 'cache' -LOGS_DIR_NAME: str = 'logs' -LIB_DIR_NAME: str = 'lib' -TMP_DIR_NAME: str = 'tmp' - -OUTPUT_DIR: Path = DATA_DIR -ARCHIVE_DIR: Path = DATA_DIR / ARCHIVE_DIR_NAME -SOURCES_DIR: Path = DATA_DIR / SOURCES_DIR_NAME -PERSONAS_DIR: Path = DATA_DIR / PERSONAS_DIR_NAME -CACHE_DIR: Path = DATA_DIR / CACHE_DIR_NAME -LOGS_DIR: Path = DATA_DIR / LOGS_DIR_NAME -LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME -TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME -CUSTOM_TEMPLATES_DIR: Path = DATA_DIR / CUSTOM_TEMPLATES_DIR_NAME -USER_PLUGINS_DIR: Path = DATA_DIR / USER_PLUGINS_DIR_NAME - -LIB_PIP_DIR: Path = LIB_DIR / 'pip' -LIB_NPM_DIR: Path = LIB_DIR / 'npm' -LIB_BROWSERS_DIR: Path = LIB_DIR / 'browsers' -LIB_BIN_DIR: Path = LIB_DIR / 'bin' -BIN_DIR: Path = LIB_BIN_DIR - -CONFIG_FILENAME: str = 'ArchiveBox.conf' -SQL_INDEX_FILENAME: str = 'index.sqlite3' - -CONFIG_FILE: Path = DATA_DIR / CONFIG_FILENAME -DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME -QUEUE_DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME.replace('index.', 'queue.') - -JSON_INDEX_FILENAME: str = 'index.json' -HTML_INDEX_FILENAME: str = 'index.html' -ROBOTS_TXT_FILENAME: str = 'robots.txt' -FAVICON_FILENAME: str = 'favicon.ico' - -TIMEZONE: str = 'UTC' -DEFAULT_CLI_COLORS: Dict[str, str] = DEFAULT_CLI_COLORS -DISABLED_CLI_COLORS: Dict[str, str] = benedict({k: '' for k in DEFAULT_CLI_COLORS}) - -ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE - -STATICFILE_EXTENSIONS: frozenset[str] = frozenset(( - # 99.999% of the time, URLs ending in these extensions are static files - # that can be downloaded as-is, not html pages that need to be rendered - 'gif', 'jpeg', 'jpg', 'png', 'tif', 'tiff', 'wbmp', 'ico', 'jng', 'bmp', - 'svg', 'svgz', 'webp', 'ps', 'eps', 'ai', - 'mp3', 'mp4', 'm4a', 'mpeg', 'mpg', 'mkv', 'mov', 'webm', 'm4v', - 'flv', 'wmv', 'avi', 'ogg', 'ts', 'm3u8', - 'pdf', 'txt', 'rtf', 'rtfd', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx', - 'atom', 'rss', 'css', 'js', 'json', - 'dmg', 'iso', 'img', - 'rar', 'war', 'hqx', 'zip', 'gz', 'bz2', '7z', - - # Less common extensions to consider adding later - # jar, swf, bin, com, exe, dll, deb - # ear, hqx, eot, wmlc, kml, kmz, cco, jardiff, jnlp, run, msi, msp, msm, - # pl pm, prc pdb, rar, rpm, sea, sit, tcl tk, der, pem, crt, xpi, xspf, - # ra, mng, asx, asf, 3gpp, 3gp, mid, midi, kar, jad, wml, htc, mml - - # These are always treated as pages, not as static files, never add them: - # html, htm, shtml, xhtml, xml, aspx, php, cgi -)) - -INGORED_PATHS: frozenset[str] = frozenset(( - ".git", - ".svn", - ".DS_Store", - ".gitignore", - "lost+found", - ".DS_Store", - ".env", - "Dockerfile", -)) -PIP_RELATED_NAMES: frozenset[str] = frozenset(( - ".venv", - "venv", - "virtualenv", - ".virtualenv", -)) -NPM_RELATED_NAMES: frozenset[str] = frozenset(( - "node_modules", - "package.json", - "package-lock.json", - "yarn.lock", -)) - -DATA_DIR_NAMES: frozenset[str] = frozenset(( - ARCHIVE_DIR_NAME, - SOURCES_DIR_NAME, - LOGS_DIR_NAME, - CACHE_DIR_NAME, - LIB_DIR_NAME, - PERSONAS_DIR_NAME, - CUSTOM_TEMPLATES_DIR_NAME, - USER_PLUGINS_DIR_NAME, -)) -DATA_DIRS: frozenset[Path] = frozenset(DATA_DIR / dirname for dirname in DATA_DIR_NAMES) -DATA_FILE_NAMES: frozenset[str] = frozenset(( - CONFIG_FILENAME, - SQL_INDEX_FILENAME, - f"{SQL_INDEX_FILENAME}-wal", - f"{SQL_INDEX_FILENAME}-shm", - "queue.sqlite3", - "queue.sqlite3-wal", - "queue.sqlite3-shm", - "search.sqlite3", - JSON_INDEX_FILENAME, - HTML_INDEX_FILENAME, - ROBOTS_TXT_FILENAME, - FAVICON_FILENAME, - CONFIG_FILENAME, - f"{CONFIG_FILENAME}.bak", - "static_index.json", -)) - -# When initializing archivebox in a new directory, we check to make sure the dir is -# actually empty so that we dont clobber someone's home directory or desktop by accident. -# These files are exceptions to the is_empty check when we're trying to init a new dir, -# as they could be from a previous archivebox version, system artifacts, dependencies, etc. -ALLOWED_IN_OUTPUT_DIR: frozenset[str] = frozenset(( - *INGORED_PATHS, - *PIP_RELATED_NAMES, - *NPM_RELATED_NAMES, - *DATA_DIR_NAMES, - *DATA_FILE_NAMES, - "static", # created by old static exports <v0.6.0 - "sonic", # created by docker bind mount -)) - -CODE_LOCATIONS = benedict({ - 'PACKAGE_DIR': { - 'path': (PACKAGE_DIR).resolve(), - 'enabled': True, - 'is_valid': (PACKAGE_DIR / '__main__.py').exists(), - }, - 'LIB_DIR': { - 'path': LIB_DIR.resolve(), - 'enabled': True, - 'is_valid': LIB_DIR.is_dir(), - }, - 'RUNTIME_CONFIG': { - 'path': TMP_DIR.resolve(), - 'enabled': True, - 'is_valid': TMP_DIR.is_dir(), - }, - 'TEMPLATES_DIR': { - 'path': TEMPLATES_DIR.resolve(), - 'enabled': True, - 'is_valid': STATIC_DIR.exists(), - }, - 'CUSTOM_TEMPLATES_DIR': { - 'path': CUSTOM_TEMPLATES_DIR.resolve(), - 'enabled': True, - 'is_valid': CUSTOM_TEMPLATES_DIR.is_dir(), - }, -}) +class CONSTANTS: + PACKAGE_DIR: Path = PACKAGE_DIR # archivebox source code dir + DATA_DIR: Path = DATA_DIR # archivebox user data dir + ARCHIVE_DIR: Path = ARCHIVE_DIR # archivebox snapshot data dir + VERSION: str = VERSION -DATA_LOCATIONS = benedict({ - "OUTPUT_DIR": { - "path": DATA_DIR.resolve(), - "enabled": True, - "is_valid": DATABASE_FILE.exists(), - "is_mount": os.path.ismount(DATA_DIR.resolve()), - }, - "CONFIG_FILE": { - "path": CONFIG_FILE.resolve(), - "enabled": True, - "is_valid": CONFIG_FILE.exists(), - }, - "SQL_INDEX": { - "path": DATABASE_FILE.resolve(), - "enabled": True, - "is_valid": DATABASE_FILE.exists(), - "is_mount": os.path.ismount(DATABASE_FILE.resolve()), - }, - "QUEUE_DATABASE": { - "path": QUEUE_DATABASE_FILE.resolve(), - "enabled": True, - "is_valid": QUEUE_DATABASE_FILE.exists(), - "is_mount": os.path.ismount(QUEUE_DATABASE_FILE.resolve()), - }, - "ARCHIVE_DIR": { - "path": ARCHIVE_DIR.resolve(), - "enabled": True, - "is_valid": ARCHIVE_DIR.exists(), - "is_mount": os.path.ismount(ARCHIVE_DIR.resolve()), - }, - "SOURCES_DIR": { - "path": SOURCES_DIR.resolve(), - "enabled": True, - "is_valid": SOURCES_DIR.exists(), - }, - "PERSONAS_DIR": { - "path": PERSONAS_DIR.resolve(), - "enabled": PERSONAS_DIR.exists(), - "is_valid": PERSONAS_DIR.exists(), - }, - "LOGS_DIR": { - "path": LOGS_DIR.resolve(), - "enabled": True, - "is_valid": LOGS_DIR.is_dir(), - }, - "CACHE_DIR": { - "path": CACHE_DIR.resolve(), - "enabled": True, - "is_valid": CACHE_DIR.is_dir(), - }, -}) + PACKAGE_DIR_NAME: str = PACKAGE_DIR.name + TEMPLATES_DIR_NAME: str = 'templates' + TEMPLATES_DIR: Path = PACKAGE_DIR / TEMPLATES_DIR_NAME + STATIC_DIR: Path = TEMPLATES_DIR / 'static' + USER_PLUGINS_DIR_NAME: str = 'user_plugins' + CUSTOM_TEMPLATES_DIR_NAME: str = 'user_templates' + + ARCHIVE_DIR_NAME: str = 'archive' + SOURCES_DIR_NAME: str = 'sources' + PERSONAS_DIR_NAME: str = 'personas' + CRONTABS_DIR_NAME: str = 'crontabs' + CACHE_DIR_NAME: str = 'cache' + LOGS_DIR_NAME: str = 'logs' + LIB_DIR_NAME: str = 'lib' + TMP_DIR_NAME: str = 'tmp' + + OUTPUT_DIR: Path = DATA_DIR + ARCHIVE_DIR: Path = DATA_DIR / ARCHIVE_DIR_NAME + SOURCES_DIR: Path = DATA_DIR / SOURCES_DIR_NAME + PERSONAS_DIR: Path = DATA_DIR / PERSONAS_DIR_NAME + CACHE_DIR: Path = DATA_DIR / CACHE_DIR_NAME + LOGS_DIR: Path = DATA_DIR / LOGS_DIR_NAME + LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME + TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME + CUSTOM_TEMPLATES_DIR: Path = DATA_DIR / CUSTOM_TEMPLATES_DIR_NAME + USER_PLUGINS_DIR: Path = DATA_DIR / USER_PLUGINS_DIR_NAME + + LIB_PIP_DIR: Path = LIB_DIR / 'pip' + LIB_NPM_DIR: Path = LIB_DIR / 'npm' + LIB_BROWSERS_DIR: Path = LIB_DIR / 'browsers' + LIB_BIN_DIR: Path = LIB_DIR / 'bin' + BIN_DIR: Path = LIB_BIN_DIR + + CONFIG_FILENAME: str = 'ArchiveBox.conf' + SQL_INDEX_FILENAME: str = 'index.sqlite3' + + CONFIG_FILE: Path = DATA_DIR / CONFIG_FILENAME + DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME + QUEUE_DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME.replace('index.', 'queue.') + + JSON_INDEX_FILENAME: str = 'index.json' + HTML_INDEX_FILENAME: str = 'index.html' + ROBOTS_TXT_FILENAME: str = 'robots.txt' + FAVICON_FILENAME: str = 'favicon.ico' + + TIMEZONE: str = 'UTC' + DEFAULT_CLI_COLORS: Dict[str, str] = DEFAULT_CLI_COLORS + DISABLED_CLI_COLORS: Dict[str, str] = benedict({k: '' for k in DEFAULT_CLI_COLORS}) + + ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE + + STATICFILE_EXTENSIONS: frozenset[str] = frozenset(( + # 99.999% of the time, URLs ending in these extensions are static files + # that can be downloaded as-is, not html pages that need to be rendered + 'gif', 'jpeg', 'jpg', 'png', 'tif', 'tiff', 'wbmp', 'ico', 'jng', 'bmp', + 'svg', 'svgz', 'webp', 'ps', 'eps', 'ai', + 'mp3', 'mp4', 'm4a', 'mpeg', 'mpg', 'mkv', 'mov', 'webm', 'm4v', + 'flv', 'wmv', 'avi', 'ogg', 'ts', 'm3u8', + 'pdf', 'txt', 'rtf', 'rtfd', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx', + 'atom', 'rss', 'css', 'js', 'json', + 'dmg', 'iso', 'img', + 'rar', 'war', 'hqx', 'zip', 'gz', 'bz2', '7z', + + # Less common extensions to consider adding later + # jar, swf, bin, com, exe, dll, deb + # ear, hqx, eot, wmlc, kml, kmz, cco, jardiff, jnlp, run, msi, msp, msm, + # pl pm, prc pdb, rar, rpm, sea, sit, tcl tk, der, pem, crt, xpi, xspf, + # ra, mng, asx, asf, 3gpp, 3gp, mid, midi, kar, jad, wml, htc, mml + + # These are always treated as pages, not as static files, never add them: + # html, htm, shtml, xhtml, xml, aspx, php, cgi + )) + + INGORED_PATHS: frozenset[str] = frozenset(( + ".git", + ".svn", + ".DS_Store", + ".gitignore", + "lost+found", + ".DS_Store", + ".env", + "Dockerfile", + )) + PIP_RELATED_NAMES: frozenset[str] = frozenset(( + ".venv", + "venv", + "virtualenv", + ".virtualenv", + )) + NPM_RELATED_NAMES: frozenset[str] = frozenset(( + "node_modules", + "package.json", + "package-lock.json", + "yarn.lock", + )) + + DATA_DIR_NAMES: frozenset[str] = frozenset(( + ARCHIVE_DIR_NAME, + SOURCES_DIR_NAME, + LOGS_DIR_NAME, + CACHE_DIR_NAME, + LIB_DIR_NAME, + PERSONAS_DIR_NAME, + CUSTOM_TEMPLATES_DIR_NAME, + USER_PLUGINS_DIR_NAME, + )) + DATA_DIRS: frozenset[Path] = frozenset(DATA_DIR / dirname for dirname in DATA_DIR_NAMES) + DATA_FILE_NAMES: frozenset[str] = frozenset(( + CONFIG_FILENAME, + SQL_INDEX_FILENAME, + f"{SQL_INDEX_FILENAME}-wal", + f"{SQL_INDEX_FILENAME}-shm", + "queue.sqlite3", + "queue.sqlite3-wal", + "queue.sqlite3-shm", + "search.sqlite3", + JSON_INDEX_FILENAME, + HTML_INDEX_FILENAME, + ROBOTS_TXT_FILENAME, + FAVICON_FILENAME, + CONFIG_FILENAME, + f"{CONFIG_FILENAME}.bak", + "static_index.json", + )) + + # When initializing archivebox in a new directory, we check to make sure the dir is + # actually empty so that we dont clobber someone's home directory or desktop by accident. + # These files are exceptions to the is_empty check when we're trying to init a new dir, + # as they could be from a previous archivebox version, system artifacts, dependencies, etc. + ALLOWED_IN_OUTPUT_DIR: frozenset[str] = frozenset(( + *INGORED_PATHS, + *PIP_RELATED_NAMES, + *NPM_RELATED_NAMES, + *DATA_DIR_NAMES, + *DATA_FILE_NAMES, + "static", # created by old static exports <v0.6.0 + "sonic", # created by docker bind mount + )) + + CODE_LOCATIONS = benedict({ + 'PACKAGE_DIR': { + 'path': (PACKAGE_DIR).resolve(), + 'enabled': True, + 'is_valid': (PACKAGE_DIR / '__main__.py').exists(), + }, + 'LIB_DIR': { + 'path': LIB_DIR.resolve(), + 'enabled': True, + 'is_valid': LIB_DIR.is_dir(), + }, + 'RUNTIME_CONFIG': { + 'path': TMP_DIR.resolve(), + 'enabled': True, + 'is_valid': TMP_DIR.is_dir(), + }, + 'TEMPLATES_DIR': { + 'path': TEMPLATES_DIR.resolve(), + 'enabled': True, + 'is_valid': STATIC_DIR.exists(), + }, + 'CUSTOM_TEMPLATES_DIR': { + 'path': CUSTOM_TEMPLATES_DIR.resolve(), + 'enabled': True, + 'is_valid': CUSTOM_TEMPLATES_DIR.is_dir(), + }, + }) + + DATA_LOCATIONS = benedict({ + "OUTPUT_DIR": { + "path": DATA_DIR.resolve(), + "enabled": True, + "is_valid": DATABASE_FILE.exists(), + "is_mount": os.path.ismount(DATA_DIR.resolve()), + }, + "CONFIG_FILE": { + "path": CONFIG_FILE.resolve(), + "enabled": True, + "is_valid": CONFIG_FILE.exists(), + }, + "SQL_INDEX": { + "path": DATABASE_FILE.resolve(), + "enabled": True, + "is_valid": DATABASE_FILE.exists(), + "is_mount": os.path.ismount(DATABASE_FILE.resolve()), + }, + "QUEUE_DATABASE": { + "path": QUEUE_DATABASE_FILE.resolve(), + "enabled": True, + "is_valid": QUEUE_DATABASE_FILE.exists(), + "is_mount": os.path.ismount(QUEUE_DATABASE_FILE.resolve()), + }, + "ARCHIVE_DIR": { + "path": ARCHIVE_DIR.resolve(), + "enabled": True, + "is_valid": ARCHIVE_DIR.exists(), + "is_mount": os.path.ismount(ARCHIVE_DIR.resolve()), + }, + "SOURCES_DIR": { + "path": SOURCES_DIR.resolve(), + "enabled": True, + "is_valid": SOURCES_DIR.exists(), + }, + "PERSONAS_DIR": { + "path": PERSONAS_DIR.resolve(), + "enabled": PERSONAS_DIR.exists(), + "is_valid": PERSONAS_DIR.exists(), + }, + "LOGS_DIR": { + "path": LOGS_DIR.resolve(), + "enabled": True, + "is_valid": LOGS_DIR.is_dir(), + }, + "CACHE_DIR": { + "path": CACHE_DIR.resolve(), + "enabled": True, + "is_valid": CACHE_DIR.is_dir(), + }, + }) + + def __getitem__(self, key: str): + return getattr(self, key) +# add all key: values to globals() for easier importing +globals().update(CONSTANTS.__dict__) -CONSTANTS = benedict({ - key: value - for key, value in globals().items() - if key.isupper() and not key.startswith('_') -}) CONSTANTS_CONFIG = CONSTANTS diff --git a/archivebox/config/defaults.py b/archivebox/config/defaults.py index 1b7bc15a..f495523a 100644 --- a/archivebox/config/defaults.py +++ b/archivebox/config/defaults.py @@ -4,7 +4,7 @@ import os import sys import shutil -from typing import ClassVar, Dict, Optional +from typing import Dict, Optional from datetime import datetime from pathlib import Path @@ -12,7 +12,7 @@ from rich import print from pydantic import Field, field_validator, model_validator, computed_field from django.utils.crypto import get_random_string -from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName +from abx.archivebox.base_configset import BaseConfigSet from .constants import CONSTANTS, PACKAGE_DIR @@ -21,8 +21,6 @@ from .constants import CONSTANTS, PACKAGE_DIR class ShellConfig(BaseConfigSet): - section: ClassVar[ConfigSectionName] = 'SHELL_CONFIG' - DEBUG: bool = Field(default=lambda: '--debug' in sys.argv) IS_TTY: bool = Field(default=sys.stdout.isatty()) @@ -114,8 +112,6 @@ SHELL_CONFIG = ShellConfig() class StorageConfig(BaseConfigSet): - section: ClassVar[ConfigSectionName] = 'STORAGE_CONFIG' - OUTPUT_PERMISSIONS: str = Field(default='644') RESTRICT_FILE_NAMES: str = Field(default='windows') ENFORCE_ATOMIC_WRITES: bool = Field(default=True) @@ -128,8 +124,6 @@ STORAGE_CONFIG = StorageConfig() class GeneralConfig(BaseConfigSet): - section: ClassVar[ConfigSectionName] = 'GENERAL_CONFIG' - TAG_SEPARATOR_PATTERN: str = Field(default=r'[,]') @@ -137,8 +131,6 @@ GENERAL_CONFIG = GeneralConfig() class ServerConfig(BaseConfigSet): - section: ClassVar[ConfigSectionName] = 'SERVER_CONFIG' - SECRET_KEY: str = Field(default=lambda: get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789_')) BIND_ADDR: str = Field(default=lambda: ['127.0.0.1:8000', '0.0.0.0:8000'][SHELL_CONFIG.IN_DOCKER]) ALLOWED_HOSTS: str = Field(default='*') @@ -163,8 +155,6 @@ SERVER_CONFIG = ServerConfig() class ArchivingConfig(BaseConfigSet): - section: ClassVar[ConfigSectionName] = 'ARCHIVING_CONFIG' - ONLY_NEW: bool = Field(default=True) TIMEOUT: int = Field(default=60) @@ -213,8 +203,6 @@ ARCHIVING_CONFIG = ArchivingConfig() class SearchBackendConfig(BaseConfigSet): - section: ClassVar[ConfigSectionName] = 'SEARCH_BACKEND_CONFIG' - USE_INDEXING_BACKEND: bool = Field(default=True) USE_SEARCHING_BACKEND: bool = Field(default=True) diff --git a/archivebox/plugins_auth/ldap/settings.py b/archivebox/plugins_auth/ldap/settings.py index 36480168..440e592c 100644 --- a/archivebox/plugins_auth/ldap/settings.py +++ b/archivebox/plugins_auth/ldap/settings.py @@ -2,10 +2,10 @@ __package__ = 'archivebox.plugins_auth.ldap' import sys -from typing import Dict, List, ClassVar, Optional +from typing import Dict, List, Optional from pydantic import Field, model_validator, computed_field -from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName +from abx.archivebox.base_configset import BaseConfigSet LDAP_LIB = None try: @@ -24,7 +24,6 @@ class LdapConfig(BaseConfigSet): It needs to be in a separate file from apps.py so that it can be imported during settings.py initialization before the apps are loaded. """ - section: ClassVar[ConfigSectionName] = 'LDAP_CONFIG' LDAP_ENABLED: bool = Field(default=False, alias='LDAP') diff --git a/archivebox/plugins_extractor/chrome/apps.py b/archivebox/plugins_extractor/chrome/apps.py index 35a0f77b..2f96580e 100644 --- a/archivebox/plugins_extractor/chrome/apps.py +++ b/archivebox/plugins_extractor/chrome/apps.py @@ -18,7 +18,7 @@ from pydantic_pkgr import ( # Depends on other Django apps: from abx.archivebox.base_plugin import BasePlugin -from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName +from abx.archivebox.base_configset import BaseConfigSet from abx.archivebox.base_binary import BaseBinary, env # from abx.archivebox.base_extractor import BaseExtractor # from abx.archivebox.base_queue import BaseQueue @@ -83,8 +83,6 @@ def create_macos_app_symlink(target: Path, shortcut: Path): class ChromeConfig(BaseConfigSet): - section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG" - USE_CHROME: bool = Field(default=True) # Chrome Binary diff --git a/archivebox/plugins_extractor/readability/apps.py b/archivebox/plugins_extractor/readability/apps.py index 5af8de7a..3e27587a 100644 --- a/archivebox/plugins_extractor/readability/apps.py +++ b/archivebox/plugins_extractor/readability/apps.py @@ -4,15 +4,13 @@ from pathlib import Path from typing import List, Dict, Optional, ClassVar # from typing_extensions import Self -from django.conf import settings - # Depends on other PyPI/vendor packages: from pydantic import InstanceOf, Field, validate_call from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName, ShallowBinary # Depends on other Django apps: from abx.archivebox.base_plugin import BasePlugin -from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName +from abx.archivebox.base_configset import BaseConfigSet from abx.archivebox.base_binary import BaseBinary, env from abx.archivebox.base_extractor import BaseExtractor from abx.archivebox.base_hook import BaseHook @@ -24,8 +22,6 @@ from plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER ###################### Config ########################## class ReadabilityConfig(BaseConfigSet): - section: ClassVar[ConfigSectionName] = 'ARCHIVING_CONFIG' - SAVE_READABILITY: bool = Field(default=True, alias='USE_READABILITY') READABILITY_TIMEOUT: int = Field(default=lambda: ARCHIVING_CONFIG.TIMEOUT) diff --git a/archivebox/plugins_extractor/singlefile/apps.py b/archivebox/plugins_extractor/singlefile/apps.py index cabfe67f..66ae69cc 100644 --- a/archivebox/plugins_extractor/singlefile/apps.py +++ b/archivebox/plugins_extractor/singlefile/apps.py @@ -4,15 +4,13 @@ from pathlib import Path from typing import List, Dict, Optional, ClassVar # from typing_extensions import Self -from django.conf import settings - # Depends on other PyPI/vendor packages: from pydantic import InstanceOf, Field, validate_call from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName, bin_abspath, ShallowBinary # Depends on other Django apps: from abx.archivebox.base_plugin import BasePlugin -from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName +from abx.archivebox.base_configset import BaseConfigSet from abx.archivebox.base_binary import BaseBinary, env from abx.archivebox.base_extractor import BaseExtractor from abx.archivebox.base_queue import BaseQueue @@ -25,8 +23,6 @@ from plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER ###################### Config ########################## class SinglefileConfig(BaseConfigSet): - section: ClassVar[ConfigSectionName] = 'ARCHIVING_CONFIG' - SAVE_SINGLEFILE: bool = True SINGLEFILE_USER_AGENT: str = Field(default=lambda: ARCHIVING_CONFIG.USER_AGENT) diff --git a/archivebox/plugins_extractor/wget/apps.py b/archivebox/plugins_extractor/wget/apps.py index e794271b..85239173 100644 --- a/archivebox/plugins_extractor/wget/apps.py +++ b/archivebox/plugins_extractor/wget/apps.py @@ -3,13 +3,11 @@ from abx.archivebox.base_plugin import BasePlugin, InstanceOf, BaseHook # class WgetToggleConfig(ConfigSet): -# section: ConfigSectionName = 'ARCHIVE_METHOD_TOGGLES' # SAVE_WGET: bool = True # SAVE_WARC: bool = True # class WgetDependencyConfig(ConfigSet): -# section: ConfigSectionName = 'DEPENDENCY_CONFIG' # WGET_BINARY: str = Field(default='wget') # WGET_ARGS: Optional[List[str]] = Field(default=None) @@ -17,7 +15,6 @@ from abx.archivebox.base_plugin import BasePlugin, InstanceOf, BaseHook # WGET_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}'] # class WgetOptionsConfig(ConfigSet): -# section: ConfigSectionName = 'ARCHIVE_METHOD_OPTIONS' # # loaded from shared config # WGET_AUTO_COMPRESSION: bool = Field(default=True) diff --git a/archivebox/plugins_extractor/ytdlp/apps.py b/archivebox/plugins_extractor/ytdlp/apps.py index fdab408f..8d13af35 100644 --- a/archivebox/plugins_extractor/ytdlp/apps.py +++ b/archivebox/plugins_extractor/ytdlp/apps.py @@ -4,11 +4,9 @@ from subprocess import run, PIPE from pydantic import InstanceOf, Field, model_validator, AliasChoices -from django.conf import settings - from pydantic_pkgr import BinProvider, BinName, BinProviderName, ProviderLookupDict from abx.archivebox.base_plugin import BasePlugin -from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName +from abx.archivebox.base_configset import BaseConfigSet from abx.archivebox.base_binary import BaseBinary, env, apt, brew from abx.archivebox.base_hook import BaseHook @@ -19,8 +17,6 @@ from plugins_pkg.pip.apps import pip class YtdlpConfig(BaseConfigSet): - section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG" - USE_YTDLP: bool = Field(default=True, validation_alias=AliasChoices('USE_YOUTUBEDL', 'SAVE_MEDIA')) YTDLP_BINARY: str = Field(default='yt-dlp', alias='YOUTUBEDL_BINARY') diff --git a/archivebox/plugins_pkg/npm/apps.py b/archivebox/plugins_pkg/npm/apps.py index 5923b9e6..a1d32c47 100644 --- a/archivebox/plugins_pkg/npm/apps.py +++ b/archivebox/plugins_pkg/npm/apps.py @@ -19,8 +19,6 @@ from abx.archivebox.base_hook import BaseHook class NpmDependencyConfigs(BaseConfigSet): - # section: ConfigSectionName = 'DEPENDENCY_CONFIG' - # USE_NPM: bool = True # NPM_BINARY: str = Field(default='npm') # NPM_ARGS: Optional[List[str]] = Field(default=None) diff --git a/archivebox/plugins_pkg/pip/apps.py b/archivebox/plugins_pkg/pip/apps.py index d7596aec..994c09d4 100644 --- a/archivebox/plugins_pkg/pip/apps.py +++ b/archivebox/plugins_pkg/pip/apps.py @@ -17,7 +17,7 @@ from archivebox.config import CONSTANTS, VERSION import abx from abx.archivebox.base_plugin import BasePlugin -from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName +from abx.archivebox.base_configset import BaseConfigSet from abx.archivebox.base_check import BaseCheck from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env, apt, brew from abx.archivebox.base_hook import BaseHook @@ -29,8 +29,6 @@ from ...misc.logging import hint class PipDependencyConfigs(BaseConfigSet): - section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG" - USE_PIP: bool = True PIP_BINARY: str = Field(default='pip') PIP_ARGS: Optional[List[str]] = Field(default=None) diff --git a/archivebox/plugins_pkg/playwright/apps.py b/archivebox/plugins_pkg/playwright/apps.py index 1cb5d765..d1a0aa98 100644 --- a/archivebox/plugins_pkg/playwright/apps.py +++ b/archivebox/plugins_pkg/playwright/apps.py @@ -34,8 +34,6 @@ from plugins_pkg.pip.apps import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_ class PlaywrightConfigs(BaseConfigSet): - # section: ConfigSectionName = 'DEPENDENCY_CONFIG' - # PLAYWRIGHT_BINARY: str = Field(default='wget') # PLAYWRIGHT_ARGS: Optional[List[str]] = Field(default=None) # PLAYWRIGHT_EXTRA_ARGS: List[str] = [] diff --git a/archivebox/plugins_pkg/puppeteer/apps.py b/archivebox/plugins_pkg/puppeteer/apps.py index 8314fb5a..c32c31da 100644 --- a/archivebox/plugins_pkg/puppeteer/apps.py +++ b/archivebox/plugins_pkg/puppeteer/apps.py @@ -32,8 +32,6 @@ from plugins_pkg.npm.apps import LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER class PuppeteerConfigs(BaseConfigSet): - # section: ConfigSectionName = 'DEPENDENCY_CONFIG' - # PUPPETEER_BINARY: str = Field(default='wget') # PUPPETEER_ARGS: Optional[List[str]] = Field(default=None) # PUPPETEER_EXTRA_ARGS: List[str] = [] diff --git a/archivebox/plugins_search/ripgrep/apps.py b/archivebox/plugins_search/ripgrep/apps.py index 1d44d84b..f7a1b986 100644 --- a/archivebox/plugins_search/ripgrep/apps.py +++ b/archivebox/plugins_search/ripgrep/apps.py @@ -3,7 +3,7 @@ __package__ = 'archivebox.plugins_search.ripgrep' import re from pathlib import Path from subprocess import run -from typing import List, Dict, ClassVar, Iterable +from typing import List, Dict, Iterable # from typing_extensions import Self # Depends on other PyPI/vendor packages: @@ -12,7 +12,7 @@ from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinN # Depends on other Django apps: from abx.archivebox.base_plugin import BasePlugin -from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName +from abx.archivebox.base_configset import BaseConfigSet from abx.archivebox.base_binary import BaseBinary, env, apt, brew from abx.archivebox.base_hook import BaseHook from abx.archivebox.base_searchbackend import BaseSearchBackend @@ -23,8 +23,6 @@ from archivebox.config import CONSTANTS, SEARCH_BACKEND_CONFIG ###################### Config ########################## class RipgrepConfig(BaseConfigSet): - section: ClassVar[ConfigSectionName] = 'DEPENDENCY_CONFIG' - RIPGREP_BINARY: str = Field(default='rg') RIPGREP_IGNORE_EXTENSIONS: str = Field(default='css,js,orig,svg') diff --git a/archivebox/plugins_search/sonic/apps.py b/archivebox/plugins_search/sonic/apps.py index 97f7b816..efc47ceb 100644 --- a/archivebox/plugins_search/sonic/apps.py +++ b/archivebox/plugins_search/sonic/apps.py @@ -1,7 +1,7 @@ __package__ = 'archivebox.plugins_search.sonic' import sys -from typing import List, Dict, ClassVar, Generator, cast +from typing import List, Dict, Generator, cast # Depends on other PyPI/vendor packages: from pydantic import InstanceOf, Field, model_validator @@ -9,7 +9,7 @@ from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinN # Depends on other Django apps: from abx.archivebox.base_plugin import BasePlugin -from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName +from abx.archivebox.base_configset import BaseConfigSet from abx.archivebox.base_binary import BaseBinary, env, brew from abx.archivebox.base_hook import BaseHook from abx.archivebox.base_searchbackend import BaseSearchBackend @@ -27,8 +27,6 @@ except ImportError: ###################### Config ########################## class SonicConfig(BaseConfigSet): - section: ClassVar[ConfigSectionName] = 'DEPENDENCY_CONFIG' - SONIC_BINARY: str = Field(default='sonic') SONIC_HOST: str = Field(default='localhost', alias='SEARCH_BACKEND_HOST_NAME') diff --git a/archivebox/plugins_search/sqlite/apps.py b/archivebox/plugins_search/sqlite/apps.py index 28209b0f..98db5363 100644 --- a/archivebox/plugins_search/sqlite/apps.py +++ b/archivebox/plugins_search/sqlite/apps.py @@ -3,7 +3,7 @@ __package__ = 'archivebox.plugins_search.sqlite' import sys import codecs import sqlite3 -from typing import List, ClassVar, Iterable, Callable +from typing import List, Iterable, Callable from django.core.exceptions import ImproperlyConfigured @@ -12,7 +12,7 @@ from pydantic import InstanceOf, Field, model_validator # Depends on other Django apps: from abx.archivebox.base_plugin import BasePlugin -from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName +from abx.archivebox.base_configset import BaseConfigSet from abx.archivebox.base_hook import BaseHook from abx.archivebox.base_searchbackend import BaseSearchBackend @@ -24,8 +24,6 @@ from archivebox.config import SEARCH_BACKEND_CONFIG ###################### Config ########################## class SqliteftsConfig(BaseConfigSet): - section: ClassVar[ConfigSectionName] = 'DEPENDENCY_CONFIG' - SQLITEFTS_SEPARATE_DATABASE: bool = Field(default=True, alias='FTS_SEPARATE_DATABASE') SQLITEFTS_TOKENIZERS: str = Field(default='porter unicode61 remove_diacritics 2', alias='FTS_TOKENIZERS') SQLITEFTS_MAX_LENGTH: int = Field(default=int(1e9), alias='FTS_SQLITE_MAX_LENGTH')