From 7a41b6ae46102901ca11fedecd3816c5e9a86105 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 30 Sep 2024 16:50:36 -0700 Subject: [PATCH] remove ConfigSectionName and add type hints to CONSTANTS --- archivebox/abx/archivebox/base_configset.py | 27 +- archivebox/config/constants.py | 461 +++++++++--------- archivebox/config/defaults.py | 16 +- archivebox/plugins_auth/ldap/settings.py | 5 +- archivebox/plugins_extractor/chrome/apps.py | 4 +- .../plugins_extractor/readability/apps.py | 6 +- .../plugins_extractor/singlefile/apps.py | 6 +- archivebox/plugins_extractor/wget/apps.py | 3 - archivebox/plugins_extractor/ytdlp/apps.py | 6 +- archivebox/plugins_pkg/npm/apps.py | 2 - archivebox/plugins_pkg/pip/apps.py | 4 +- archivebox/plugins_pkg/playwright/apps.py | 2 - archivebox/plugins_pkg/puppeteer/apps.py | 2 - archivebox/plugins_search/ripgrep/apps.py | 6 +- archivebox/plugins_search/sonic/apps.py | 6 +- archivebox/plugins_search/sqlite/apps.py | 6 +- 16 files changed, 253 insertions(+), 309 deletions(-) diff --git a/archivebox/abx/archivebox/base_configset.py b/archivebox/abx/archivebox/base_configset.py index b27b302b..6462d6be 100644 --- a/archivebox/abx/archivebox/base_configset.py +++ b/archivebox/abx/archivebox/base_configset.py @@ -4,7 +4,7 @@ import os import re import json from pathlib import Path -from typing import Literal, Type, Tuple, Callable, ClassVar, Any, get_args +from typing import Type, Tuple, Callable, ClassVar, Any import toml from benedict import benedict @@ -24,21 +24,6 @@ PACKAGE_DIR = Path(__file__).resolve().parent.parent DATA_DIR = Path(os.curdir).resolve() -ConfigSectionName = Literal[ - 'SHELL_CONFIG', - 'GENERAL_CONFIG', - 'STORAGE_CONFIG', - 'SERVER_CONFIG', - 'ARCHIVING_CONFIG', - 'LDAP_CONFIG', - 'ARCHIVE_METHOD_TOGGLES', - 'ARCHIVE_METHOD_OPTIONS', - 'SEARCH_BACKEND_CONFIG', - 'DEPENDENCY_CONFIG', -] -ConfigSectionNames: Tuple[ConfigSectionName, ...] = get_args(ConfigSectionName) # just gets the list of values from the Literal type - - def better_toml_dump_str(val: Any) -> str: try: return toml.encoder._dump_str(val) # type: ignore @@ -74,14 +59,14 @@ class FlatTomlConfigSettingsSource(TomlConfigSettingsSource): self.nested_toml_data = self._read_files(self.toml_file_path) self.toml_data = {} - for section_name, section in self.nested_toml_data.items(): - if section_name in ConfigSectionNames and isinstance(section, dict): + for top_level_key, top_level_value in self.nested_toml_data.items(): + if isinstance(top_level_value, dict): # value is nested, flatten it - for key, value in section.items(): + for key, value in top_level_value.items(): self.toml_data[key] = value else: # value is already flat, just set it as-is - self.toml_data[section_name] = section + self.toml_data[top_level_key] = top_level_value # filter toml_data to only include keys that are defined on this settings_cls self.toml_data = { @@ -242,8 +227,6 @@ class ArchiveBoxBaseConfig(BaseSettings): class BaseConfigSet(ArchiveBoxBaseConfig, BaseHook): # type: ignore[type-arg] hook_type: ClassVar[HookType] = 'CONFIG' - section: ClassVar[ConfigSectionName] = 'GENERAL_CONFIG' - # def register(self, settings, parent_plugin=None): # # self._plugin = parent_plugin # for debugging only, never rely on this! diff --git a/archivebox/config/constants.py b/archivebox/config/constants.py index d49a3573..669fd22e 100644 --- a/archivebox/config/constants.py +++ b/archivebox/config/constants.py @@ -13,12 +13,12 @@ from ..misc.logging import DEFAULT_CLI_COLORS ###################### Config ########################## -PACKAGE_DIR = Path(__file__).resolve().parent.parent # archivebox source code dir -DATA_DIR = Path(os.curdir).resolve() # archivebox user data dir -ARCHIVE_DIR = DATA_DIR / 'archive' # archivebox snapshot data dir +PACKAGE_DIR: Path = Path(__file__).resolve().parent.parent # archivebox source code dir +DATA_DIR: Path = Path(os.curdir).resolve() # archivebox user data dir +ARCHIVE_DIR: Path = DATA_DIR / 'archive' # archivebox snapshot data dir -def _detect_installed_version(): +def _detect_installed_version(PACKAGE_DIR: Path): """Autodetect the installed archivebox version by using pip package metadata or pyproject.toml file""" try: return importlib.metadata.version(__package__ or 'archivebox') @@ -34,234 +34,239 @@ def _detect_installed_version(): raise Exception('Failed to detect installed archivebox version!') -VERSION = _detect_installed_version() -__version__ = VERSION +VERSION: str = _detect_installed_version(PACKAGE_DIR) -PACKAGE_DIR_NAME: str = PACKAGE_DIR.name -TEMPLATES_DIR_NAME: str = 'templates' -TEMPLATES_DIR: Path = PACKAGE_DIR / TEMPLATES_DIR_NAME -STATIC_DIR: Path = TEMPLATES_DIR / 'static' -USER_PLUGINS_DIR_NAME: str = 'user_plugins' -CUSTOM_TEMPLATES_DIR_NAME: str = 'user_templates' - -ARCHIVE_DIR_NAME: str = 'archive' -SOURCES_DIR_NAME: str = 'sources' -PERSONAS_DIR_NAME: str = 'personas' -CRONTABS_DIR_NAME: str = 'crontabs' -CACHE_DIR_NAME: str = 'cache' -LOGS_DIR_NAME: str = 'logs' -LIB_DIR_NAME: str = 'lib' -TMP_DIR_NAME: str = 'tmp' - -OUTPUT_DIR: Path = DATA_DIR -ARCHIVE_DIR: Path = DATA_DIR / ARCHIVE_DIR_NAME -SOURCES_DIR: Path = DATA_DIR / SOURCES_DIR_NAME -PERSONAS_DIR: Path = DATA_DIR / PERSONAS_DIR_NAME -CACHE_DIR: Path = DATA_DIR / CACHE_DIR_NAME -LOGS_DIR: Path = DATA_DIR / LOGS_DIR_NAME -LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME -TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME -CUSTOM_TEMPLATES_DIR: Path = DATA_DIR / CUSTOM_TEMPLATES_DIR_NAME -USER_PLUGINS_DIR: Path = DATA_DIR / USER_PLUGINS_DIR_NAME - -LIB_PIP_DIR: Path = LIB_DIR / 'pip' -LIB_NPM_DIR: Path = LIB_DIR / 'npm' -LIB_BROWSERS_DIR: Path = LIB_DIR / 'browsers' -LIB_BIN_DIR: Path = LIB_DIR / 'bin' -BIN_DIR: Path = LIB_BIN_DIR - -CONFIG_FILENAME: str = 'ArchiveBox.conf' -SQL_INDEX_FILENAME: str = 'index.sqlite3' - -CONFIG_FILE: Path = DATA_DIR / CONFIG_FILENAME -DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME -QUEUE_DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME.replace('index.', 'queue.') - -JSON_INDEX_FILENAME: str = 'index.json' -HTML_INDEX_FILENAME: str = 'index.html' -ROBOTS_TXT_FILENAME: str = 'robots.txt' -FAVICON_FILENAME: str = 'favicon.ico' - -TIMEZONE: str = 'UTC' -DEFAULT_CLI_COLORS: Dict[str, str] = DEFAULT_CLI_COLORS -DISABLED_CLI_COLORS: Dict[str, str] = benedict({k: '' for k in DEFAULT_CLI_COLORS}) - -ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE - -STATICFILE_EXTENSIONS: frozenset[str] = frozenset(( - # 99.999% of the time, URLs ending in these extensions are static files - # that can be downloaded as-is, not html pages that need to be rendered - 'gif', 'jpeg', 'jpg', 'png', 'tif', 'tiff', 'wbmp', 'ico', 'jng', 'bmp', - 'svg', 'svgz', 'webp', 'ps', 'eps', 'ai', - 'mp3', 'mp4', 'm4a', 'mpeg', 'mpg', 'mkv', 'mov', 'webm', 'm4v', - 'flv', 'wmv', 'avi', 'ogg', 'ts', 'm3u8', - 'pdf', 'txt', 'rtf', 'rtfd', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx', - 'atom', 'rss', 'css', 'js', 'json', - 'dmg', 'iso', 'img', - 'rar', 'war', 'hqx', 'zip', 'gz', 'bz2', '7z', - - # Less common extensions to consider adding later - # jar, swf, bin, com, exe, dll, deb - # ear, hqx, eot, wmlc, kml, kmz, cco, jardiff, jnlp, run, msi, msp, msm, - # pl pm, prc pdb, rar, rpm, sea, sit, tcl tk, der, pem, crt, xpi, xspf, - # ra, mng, asx, asf, 3gpp, 3gp, mid, midi, kar, jad, wml, htc, mml - - # These are always treated as pages, not as static files, never add them: - # html, htm, shtml, xhtml, xml, aspx, php, cgi -)) - -INGORED_PATHS: frozenset[str] = frozenset(( - ".git", - ".svn", - ".DS_Store", - ".gitignore", - "lost+found", - ".DS_Store", - ".env", - "Dockerfile", -)) -PIP_RELATED_NAMES: frozenset[str] = frozenset(( - ".venv", - "venv", - "virtualenv", - ".virtualenv", -)) -NPM_RELATED_NAMES: frozenset[str] = frozenset(( - "node_modules", - "package.json", - "package-lock.json", - "yarn.lock", -)) - -DATA_DIR_NAMES: frozenset[str] = frozenset(( - ARCHIVE_DIR_NAME, - SOURCES_DIR_NAME, - LOGS_DIR_NAME, - CACHE_DIR_NAME, - LIB_DIR_NAME, - PERSONAS_DIR_NAME, - CUSTOM_TEMPLATES_DIR_NAME, - USER_PLUGINS_DIR_NAME, -)) -DATA_DIRS: frozenset[Path] = frozenset(DATA_DIR / dirname for dirname in DATA_DIR_NAMES) -DATA_FILE_NAMES: frozenset[str] = frozenset(( - CONFIG_FILENAME, - SQL_INDEX_FILENAME, - f"{SQL_INDEX_FILENAME}-wal", - f"{SQL_INDEX_FILENAME}-shm", - "queue.sqlite3", - "queue.sqlite3-wal", - "queue.sqlite3-shm", - "search.sqlite3", - JSON_INDEX_FILENAME, - HTML_INDEX_FILENAME, - ROBOTS_TXT_FILENAME, - FAVICON_FILENAME, - CONFIG_FILENAME, - f"{CONFIG_FILENAME}.bak", - "static_index.json", -)) - -# When initializing archivebox in a new directory, we check to make sure the dir is -# actually empty so that we dont clobber someone's home directory or desktop by accident. -# These files are exceptions to the is_empty check when we're trying to init a new dir, -# as they could be from a previous archivebox version, system artifacts, dependencies, etc. -ALLOWED_IN_OUTPUT_DIR: frozenset[str] = frozenset(( - *INGORED_PATHS, - *PIP_RELATED_NAMES, - *NPM_RELATED_NAMES, - *DATA_DIR_NAMES, - *DATA_FILE_NAMES, - "static", # created by old static exports