remove ConfigSectionName and add type hints to CONSTANTS

This commit is contained in:
Nick Sweeting 2024-09-30 16:50:36 -07:00
parent 3e5b6ddeae
commit 7a41b6ae46
No known key found for this signature in database
16 changed files with 253 additions and 309 deletions

View file

@ -4,7 +4,7 @@ import os
import re
import json
from pathlib import Path
from typing import Literal, Type, Tuple, Callable, ClassVar, Any, get_args
from typing import Type, Tuple, Callable, ClassVar, Any
import toml
from benedict import benedict
@ -24,21 +24,6 @@ PACKAGE_DIR = Path(__file__).resolve().parent.parent
DATA_DIR = Path(os.curdir).resolve()
ConfigSectionName = Literal[
'SHELL_CONFIG',
'GENERAL_CONFIG',
'STORAGE_CONFIG',
'SERVER_CONFIG',
'ARCHIVING_CONFIG',
'LDAP_CONFIG',
'ARCHIVE_METHOD_TOGGLES',
'ARCHIVE_METHOD_OPTIONS',
'SEARCH_BACKEND_CONFIG',
'DEPENDENCY_CONFIG',
]
ConfigSectionNames: Tuple[ConfigSectionName, ...] = get_args(ConfigSectionName) # just gets the list of values from the Literal type
def better_toml_dump_str(val: Any) -> str:
try:
return toml.encoder._dump_str(val) # type: ignore
@ -74,14 +59,14 @@ class FlatTomlConfigSettingsSource(TomlConfigSettingsSource):
self.nested_toml_data = self._read_files(self.toml_file_path)
self.toml_data = {}
for section_name, section in self.nested_toml_data.items():
if section_name in ConfigSectionNames and isinstance(section, dict):
for top_level_key, top_level_value in self.nested_toml_data.items():
if isinstance(top_level_value, dict):
# value is nested, flatten it
for key, value in section.items():
for key, value in top_level_value.items():
self.toml_data[key] = value
else:
# value is already flat, just set it as-is
self.toml_data[section_name] = section
self.toml_data[top_level_key] = top_level_value
# filter toml_data to only include keys that are defined on this settings_cls
self.toml_data = {
@ -242,8 +227,6 @@ class ArchiveBoxBaseConfig(BaseSettings):
class BaseConfigSet(ArchiveBoxBaseConfig, BaseHook): # type: ignore[type-arg]
hook_type: ClassVar[HookType] = 'CONFIG'
section: ClassVar[ConfigSectionName] = 'GENERAL_CONFIG'
# def register(self, settings, parent_plugin=None):
# # self._plugin = parent_plugin # for debugging only, never rely on this!

View file

@ -13,12 +13,12 @@ from ..misc.logging import DEFAULT_CLI_COLORS
###################### Config ##########################
PACKAGE_DIR = Path(__file__).resolve().parent.parent # archivebox source code dir
DATA_DIR = Path(os.curdir).resolve() # archivebox user data dir
ARCHIVE_DIR = DATA_DIR / 'archive' # archivebox snapshot data dir
PACKAGE_DIR: Path = Path(__file__).resolve().parent.parent # archivebox source code dir
DATA_DIR: Path = Path(os.curdir).resolve() # archivebox user data dir
ARCHIVE_DIR: Path = DATA_DIR / 'archive' # archivebox snapshot data dir
def _detect_installed_version():
def _detect_installed_version(PACKAGE_DIR: Path):
"""Autodetect the installed archivebox version by using pip package metadata or pyproject.toml file"""
try:
return importlib.metadata.version(__package__ or 'archivebox')
@ -34,234 +34,239 @@ def _detect_installed_version():
raise Exception('Failed to detect installed archivebox version!')
VERSION = _detect_installed_version()
__version__ = VERSION
VERSION: str = _detect_installed_version(PACKAGE_DIR)
PACKAGE_DIR_NAME: str = PACKAGE_DIR.name
TEMPLATES_DIR_NAME: str = 'templates'
TEMPLATES_DIR: Path = PACKAGE_DIR / TEMPLATES_DIR_NAME
STATIC_DIR: Path = TEMPLATES_DIR / 'static'
USER_PLUGINS_DIR_NAME: str = 'user_plugins'
CUSTOM_TEMPLATES_DIR_NAME: str = 'user_templates'
ARCHIVE_DIR_NAME: str = 'archive'
SOURCES_DIR_NAME: str = 'sources'
PERSONAS_DIR_NAME: str = 'personas'
CRONTABS_DIR_NAME: str = 'crontabs'
CACHE_DIR_NAME: str = 'cache'
LOGS_DIR_NAME: str = 'logs'
LIB_DIR_NAME: str = 'lib'
TMP_DIR_NAME: str = 'tmp'
OUTPUT_DIR: Path = DATA_DIR
ARCHIVE_DIR: Path = DATA_DIR / ARCHIVE_DIR_NAME
SOURCES_DIR: Path = DATA_DIR / SOURCES_DIR_NAME
PERSONAS_DIR: Path = DATA_DIR / PERSONAS_DIR_NAME
CACHE_DIR: Path = DATA_DIR / CACHE_DIR_NAME
LOGS_DIR: Path = DATA_DIR / LOGS_DIR_NAME
LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME
TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME
CUSTOM_TEMPLATES_DIR: Path = DATA_DIR / CUSTOM_TEMPLATES_DIR_NAME
USER_PLUGINS_DIR: Path = DATA_DIR / USER_PLUGINS_DIR_NAME
LIB_PIP_DIR: Path = LIB_DIR / 'pip'
LIB_NPM_DIR: Path = LIB_DIR / 'npm'
LIB_BROWSERS_DIR: Path = LIB_DIR / 'browsers'
LIB_BIN_DIR: Path = LIB_DIR / 'bin'
BIN_DIR: Path = LIB_BIN_DIR
CONFIG_FILENAME: str = 'ArchiveBox.conf'
SQL_INDEX_FILENAME: str = 'index.sqlite3'
CONFIG_FILE: Path = DATA_DIR / CONFIG_FILENAME
DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME
QUEUE_DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME.replace('index.', 'queue.')
JSON_INDEX_FILENAME: str = 'index.json'
HTML_INDEX_FILENAME: str = 'index.html'
ROBOTS_TXT_FILENAME: str = 'robots.txt'
FAVICON_FILENAME: str = 'favicon.ico'
TIMEZONE: str = 'UTC'
DEFAULT_CLI_COLORS: Dict[str, str] = DEFAULT_CLI_COLORS
DISABLED_CLI_COLORS: Dict[str, str] = benedict({k: '' for k in DEFAULT_CLI_COLORS})
ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE
STATICFILE_EXTENSIONS: frozenset[str] = frozenset((
# 99.999% of the time, URLs ending in these extensions are static files
# that can be downloaded as-is, not html pages that need to be rendered
'gif', 'jpeg', 'jpg', 'png', 'tif', 'tiff', 'wbmp', 'ico', 'jng', 'bmp',
'svg', 'svgz', 'webp', 'ps', 'eps', 'ai',
'mp3', 'mp4', 'm4a', 'mpeg', 'mpg', 'mkv', 'mov', 'webm', 'm4v',
'flv', 'wmv', 'avi', 'ogg', 'ts', 'm3u8',
'pdf', 'txt', 'rtf', 'rtfd', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx',
'atom', 'rss', 'css', 'js', 'json',
'dmg', 'iso', 'img',
'rar', 'war', 'hqx', 'zip', 'gz', 'bz2', '7z',
# Less common extensions to consider adding later
# jar, swf, bin, com, exe, dll, deb
# ear, hqx, eot, wmlc, kml, kmz, cco, jardiff, jnlp, run, msi, msp, msm,
# pl pm, prc pdb, rar, rpm, sea, sit, tcl tk, der, pem, crt, xpi, xspf,
# ra, mng, asx, asf, 3gpp, 3gp, mid, midi, kar, jad, wml, htc, mml
# These are always treated as pages, not as static files, never add them:
# html, htm, shtml, xhtml, xml, aspx, php, cgi
))
INGORED_PATHS: frozenset[str] = frozenset((
".git",
".svn",
".DS_Store",
".gitignore",
"lost+found",
".DS_Store",
".env",
"Dockerfile",
))
PIP_RELATED_NAMES: frozenset[str] = frozenset((
".venv",
"venv",
"virtualenv",
".virtualenv",
))
NPM_RELATED_NAMES: frozenset[str] = frozenset((
"node_modules",
"package.json",
"package-lock.json",
"yarn.lock",
))
DATA_DIR_NAMES: frozenset[str] = frozenset((
ARCHIVE_DIR_NAME,
SOURCES_DIR_NAME,
LOGS_DIR_NAME,
CACHE_DIR_NAME,
LIB_DIR_NAME,
PERSONAS_DIR_NAME,
CUSTOM_TEMPLATES_DIR_NAME,
USER_PLUGINS_DIR_NAME,
))
DATA_DIRS: frozenset[Path] = frozenset(DATA_DIR / dirname for dirname in DATA_DIR_NAMES)
DATA_FILE_NAMES: frozenset[str] = frozenset((
CONFIG_FILENAME,
SQL_INDEX_FILENAME,
f"{SQL_INDEX_FILENAME}-wal",
f"{SQL_INDEX_FILENAME}-shm",
"queue.sqlite3",
"queue.sqlite3-wal",
"queue.sqlite3-shm",
"search.sqlite3",
JSON_INDEX_FILENAME,
HTML_INDEX_FILENAME,
ROBOTS_TXT_FILENAME,
FAVICON_FILENAME,
CONFIG_FILENAME,
f"{CONFIG_FILENAME}.bak",
"static_index.json",
))
# When initializing archivebox in a new directory, we check to make sure the dir is
# actually empty so that we dont clobber someone's home directory or desktop by accident.
# These files are exceptions to the is_empty check when we're trying to init a new dir,
# as they could be from a previous archivebox version, system artifacts, dependencies, etc.
ALLOWED_IN_OUTPUT_DIR: frozenset[str] = frozenset((
*INGORED_PATHS,
*PIP_RELATED_NAMES,
*NPM_RELATED_NAMES,
*DATA_DIR_NAMES,
*DATA_FILE_NAMES,
"static", # created by old static exports <v0.6.0
"sonic", # created by docker bind mount
))
CODE_LOCATIONS = benedict({
'PACKAGE_DIR': {
'path': (PACKAGE_DIR).resolve(),
'enabled': True,
'is_valid': (PACKAGE_DIR / '__main__.py').exists(),
},
'LIB_DIR': {
'path': LIB_DIR.resolve(),
'enabled': True,
'is_valid': LIB_DIR.is_dir(),
},
'RUNTIME_CONFIG': {
'path': TMP_DIR.resolve(),
'enabled': True,
'is_valid': TMP_DIR.is_dir(),
},
'TEMPLATES_DIR': {
'path': TEMPLATES_DIR.resolve(),
'enabled': True,
'is_valid': STATIC_DIR.exists(),
},
'CUSTOM_TEMPLATES_DIR': {
'path': CUSTOM_TEMPLATES_DIR.resolve(),
'enabled': True,
'is_valid': CUSTOM_TEMPLATES_DIR.is_dir(),
},
})
class CONSTANTS:
PACKAGE_DIR: Path = PACKAGE_DIR # archivebox source code dir
DATA_DIR: Path = DATA_DIR # archivebox user data dir
ARCHIVE_DIR: Path = ARCHIVE_DIR # archivebox snapshot data dir
VERSION: str = VERSION
DATA_LOCATIONS = benedict({
"OUTPUT_DIR": {
"path": DATA_DIR.resolve(),
"enabled": True,
"is_valid": DATABASE_FILE.exists(),
"is_mount": os.path.ismount(DATA_DIR.resolve()),
},
"CONFIG_FILE": {
"path": CONFIG_FILE.resolve(),
"enabled": True,
"is_valid": CONFIG_FILE.exists(),
},
"SQL_INDEX": {
"path": DATABASE_FILE.resolve(),
"enabled": True,
"is_valid": DATABASE_FILE.exists(),
"is_mount": os.path.ismount(DATABASE_FILE.resolve()),
},
"QUEUE_DATABASE": {
"path": QUEUE_DATABASE_FILE.resolve(),
"enabled": True,
"is_valid": QUEUE_DATABASE_FILE.exists(),
"is_mount": os.path.ismount(QUEUE_DATABASE_FILE.resolve()),
},
"ARCHIVE_DIR": {
"path": ARCHIVE_DIR.resolve(),
"enabled": True,
"is_valid": ARCHIVE_DIR.exists(),
"is_mount": os.path.ismount(ARCHIVE_DIR.resolve()),
},
"SOURCES_DIR": {
"path": SOURCES_DIR.resolve(),
"enabled": True,
"is_valid": SOURCES_DIR.exists(),
},
"PERSONAS_DIR": {
"path": PERSONAS_DIR.resolve(),
"enabled": PERSONAS_DIR.exists(),
"is_valid": PERSONAS_DIR.exists(),
},
"LOGS_DIR": {
"path": LOGS_DIR.resolve(),
"enabled": True,
"is_valid": LOGS_DIR.is_dir(),
},
"CACHE_DIR": {
"path": CACHE_DIR.resolve(),
"enabled": True,
"is_valid": CACHE_DIR.is_dir(),
},
})
PACKAGE_DIR_NAME: str = PACKAGE_DIR.name
TEMPLATES_DIR_NAME: str = 'templates'
TEMPLATES_DIR: Path = PACKAGE_DIR / TEMPLATES_DIR_NAME
STATIC_DIR: Path = TEMPLATES_DIR / 'static'
USER_PLUGINS_DIR_NAME: str = 'user_plugins'
CUSTOM_TEMPLATES_DIR_NAME: str = 'user_templates'
ARCHIVE_DIR_NAME: str = 'archive'
SOURCES_DIR_NAME: str = 'sources'
PERSONAS_DIR_NAME: str = 'personas'
CRONTABS_DIR_NAME: str = 'crontabs'
CACHE_DIR_NAME: str = 'cache'
LOGS_DIR_NAME: str = 'logs'
LIB_DIR_NAME: str = 'lib'
TMP_DIR_NAME: str = 'tmp'
OUTPUT_DIR: Path = DATA_DIR
ARCHIVE_DIR: Path = DATA_DIR / ARCHIVE_DIR_NAME
SOURCES_DIR: Path = DATA_DIR / SOURCES_DIR_NAME
PERSONAS_DIR: Path = DATA_DIR / PERSONAS_DIR_NAME
CACHE_DIR: Path = DATA_DIR / CACHE_DIR_NAME
LOGS_DIR: Path = DATA_DIR / LOGS_DIR_NAME
LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME
TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME
CUSTOM_TEMPLATES_DIR: Path = DATA_DIR / CUSTOM_TEMPLATES_DIR_NAME
USER_PLUGINS_DIR: Path = DATA_DIR / USER_PLUGINS_DIR_NAME
LIB_PIP_DIR: Path = LIB_DIR / 'pip'
LIB_NPM_DIR: Path = LIB_DIR / 'npm'
LIB_BROWSERS_DIR: Path = LIB_DIR / 'browsers'
LIB_BIN_DIR: Path = LIB_DIR / 'bin'
BIN_DIR: Path = LIB_BIN_DIR
CONFIG_FILENAME: str = 'ArchiveBox.conf'
SQL_INDEX_FILENAME: str = 'index.sqlite3'
CONFIG_FILE: Path = DATA_DIR / CONFIG_FILENAME
DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME
QUEUE_DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME.replace('index.', 'queue.')
JSON_INDEX_FILENAME: str = 'index.json'
HTML_INDEX_FILENAME: str = 'index.html'
ROBOTS_TXT_FILENAME: str = 'robots.txt'
FAVICON_FILENAME: str = 'favicon.ico'
TIMEZONE: str = 'UTC'
DEFAULT_CLI_COLORS: Dict[str, str] = DEFAULT_CLI_COLORS
DISABLED_CLI_COLORS: Dict[str, str] = benedict({k: '' for k in DEFAULT_CLI_COLORS})
ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE
STATICFILE_EXTENSIONS: frozenset[str] = frozenset((
# 99.999% of the time, URLs ending in these extensions are static files
# that can be downloaded as-is, not html pages that need to be rendered
'gif', 'jpeg', 'jpg', 'png', 'tif', 'tiff', 'wbmp', 'ico', 'jng', 'bmp',
'svg', 'svgz', 'webp', 'ps', 'eps', 'ai',
'mp3', 'mp4', 'm4a', 'mpeg', 'mpg', 'mkv', 'mov', 'webm', 'm4v',
'flv', 'wmv', 'avi', 'ogg', 'ts', 'm3u8',
'pdf', 'txt', 'rtf', 'rtfd', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx',
'atom', 'rss', 'css', 'js', 'json',
'dmg', 'iso', 'img',
'rar', 'war', 'hqx', 'zip', 'gz', 'bz2', '7z',
# Less common extensions to consider adding later
# jar, swf, bin, com, exe, dll, deb
# ear, hqx, eot, wmlc, kml, kmz, cco, jardiff, jnlp, run, msi, msp, msm,
# pl pm, prc pdb, rar, rpm, sea, sit, tcl tk, der, pem, crt, xpi, xspf,
# ra, mng, asx, asf, 3gpp, 3gp, mid, midi, kar, jad, wml, htc, mml
# These are always treated as pages, not as static files, never add them:
# html, htm, shtml, xhtml, xml, aspx, php, cgi
))
INGORED_PATHS: frozenset[str] = frozenset((
".git",
".svn",
".DS_Store",
".gitignore",
"lost+found",
".DS_Store",
".env",
"Dockerfile",
))
PIP_RELATED_NAMES: frozenset[str] = frozenset((
".venv",
"venv",
"virtualenv",
".virtualenv",
))
NPM_RELATED_NAMES: frozenset[str] = frozenset((
"node_modules",
"package.json",
"package-lock.json",
"yarn.lock",
))
DATA_DIR_NAMES: frozenset[str] = frozenset((
ARCHIVE_DIR_NAME,
SOURCES_DIR_NAME,
LOGS_DIR_NAME,
CACHE_DIR_NAME,
LIB_DIR_NAME,
PERSONAS_DIR_NAME,
CUSTOM_TEMPLATES_DIR_NAME,
USER_PLUGINS_DIR_NAME,
))
DATA_DIRS: frozenset[Path] = frozenset(DATA_DIR / dirname for dirname in DATA_DIR_NAMES)
DATA_FILE_NAMES: frozenset[str] = frozenset((
CONFIG_FILENAME,
SQL_INDEX_FILENAME,
f"{SQL_INDEX_FILENAME}-wal",
f"{SQL_INDEX_FILENAME}-shm",
"queue.sqlite3",
"queue.sqlite3-wal",
"queue.sqlite3-shm",
"search.sqlite3",
JSON_INDEX_FILENAME,
HTML_INDEX_FILENAME,
ROBOTS_TXT_FILENAME,
FAVICON_FILENAME,
CONFIG_FILENAME,
f"{CONFIG_FILENAME}.bak",
"static_index.json",
))
# When initializing archivebox in a new directory, we check to make sure the dir is
# actually empty so that we dont clobber someone's home directory or desktop by accident.
# These files are exceptions to the is_empty check when we're trying to init a new dir,
# as they could be from a previous archivebox version, system artifacts, dependencies, etc.
ALLOWED_IN_OUTPUT_DIR: frozenset[str] = frozenset((
*INGORED_PATHS,
*PIP_RELATED_NAMES,
*NPM_RELATED_NAMES,
*DATA_DIR_NAMES,
*DATA_FILE_NAMES,
"static", # created by old static exports <v0.6.0
"sonic", # created by docker bind mount
))
CODE_LOCATIONS = benedict({
'PACKAGE_DIR': {
'path': (PACKAGE_DIR).resolve(),
'enabled': True,
'is_valid': (PACKAGE_DIR / '__main__.py').exists(),
},
'LIB_DIR': {
'path': LIB_DIR.resolve(),
'enabled': True,
'is_valid': LIB_DIR.is_dir(),
},
'RUNTIME_CONFIG': {
'path': TMP_DIR.resolve(),
'enabled': True,
'is_valid': TMP_DIR.is_dir(),
},
'TEMPLATES_DIR': {
'path': TEMPLATES_DIR.resolve(),
'enabled': True,
'is_valid': STATIC_DIR.exists(),
},
'CUSTOM_TEMPLATES_DIR': {
'path': CUSTOM_TEMPLATES_DIR.resolve(),
'enabled': True,
'is_valid': CUSTOM_TEMPLATES_DIR.is_dir(),
},
})
DATA_LOCATIONS = benedict({
"OUTPUT_DIR": {
"path": DATA_DIR.resolve(),
"enabled": True,
"is_valid": DATABASE_FILE.exists(),
"is_mount": os.path.ismount(DATA_DIR.resolve()),
},
"CONFIG_FILE": {
"path": CONFIG_FILE.resolve(),
"enabled": True,
"is_valid": CONFIG_FILE.exists(),
},
"SQL_INDEX": {
"path": DATABASE_FILE.resolve(),
"enabled": True,
"is_valid": DATABASE_FILE.exists(),
"is_mount": os.path.ismount(DATABASE_FILE.resolve()),
},
"QUEUE_DATABASE": {
"path": QUEUE_DATABASE_FILE.resolve(),
"enabled": True,
"is_valid": QUEUE_DATABASE_FILE.exists(),
"is_mount": os.path.ismount(QUEUE_DATABASE_FILE.resolve()),
},
"ARCHIVE_DIR": {
"path": ARCHIVE_DIR.resolve(),
"enabled": True,
"is_valid": ARCHIVE_DIR.exists(),
"is_mount": os.path.ismount(ARCHIVE_DIR.resolve()),
},
"SOURCES_DIR": {
"path": SOURCES_DIR.resolve(),
"enabled": True,
"is_valid": SOURCES_DIR.exists(),
},
"PERSONAS_DIR": {
"path": PERSONAS_DIR.resolve(),
"enabled": PERSONAS_DIR.exists(),
"is_valid": PERSONAS_DIR.exists(),
},
"LOGS_DIR": {
"path": LOGS_DIR.resolve(),
"enabled": True,
"is_valid": LOGS_DIR.is_dir(),
},
"CACHE_DIR": {
"path": CACHE_DIR.resolve(),
"enabled": True,
"is_valid": CACHE_DIR.is_dir(),
},
})
def __getitem__(self, key: str):
return getattr(self, key)
# add all key: values to globals() for easier importing
globals().update(CONSTANTS.__dict__)
CONSTANTS = benedict({
key: value
for key, value in globals().items()
if key.isupper() and not key.startswith('_')
})
CONSTANTS_CONFIG = CONSTANTS

View file

@ -4,7 +4,7 @@ import os
import sys
import shutil
from typing import ClassVar, Dict, Optional
from typing import Dict, Optional
from datetime import datetime
from pathlib import Path
@ -12,7 +12,7 @@ from rich import print
from pydantic import Field, field_validator, model_validator, computed_field
from django.utils.crypto import get_random_string
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_configset import BaseConfigSet
from .constants import CONSTANTS, PACKAGE_DIR
@ -21,8 +21,6 @@ from .constants import CONSTANTS, PACKAGE_DIR
class ShellConfig(BaseConfigSet):
section: ClassVar[ConfigSectionName] = 'SHELL_CONFIG'
DEBUG: bool = Field(default=lambda: '--debug' in sys.argv)
IS_TTY: bool = Field(default=sys.stdout.isatty())
@ -114,8 +112,6 @@ SHELL_CONFIG = ShellConfig()
class StorageConfig(BaseConfigSet):
section: ClassVar[ConfigSectionName] = 'STORAGE_CONFIG'
OUTPUT_PERMISSIONS: str = Field(default='644')
RESTRICT_FILE_NAMES: str = Field(default='windows')
ENFORCE_ATOMIC_WRITES: bool = Field(default=True)
@ -128,8 +124,6 @@ STORAGE_CONFIG = StorageConfig()
class GeneralConfig(BaseConfigSet):
section: ClassVar[ConfigSectionName] = 'GENERAL_CONFIG'
TAG_SEPARATOR_PATTERN: str = Field(default=r'[,]')
@ -137,8 +131,6 @@ GENERAL_CONFIG = GeneralConfig()
class ServerConfig(BaseConfigSet):
section: ClassVar[ConfigSectionName] = 'SERVER_CONFIG'
SECRET_KEY: str = Field(default=lambda: get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789_'))
BIND_ADDR: str = Field(default=lambda: ['127.0.0.1:8000', '0.0.0.0:8000'][SHELL_CONFIG.IN_DOCKER])
ALLOWED_HOSTS: str = Field(default='*')
@ -163,8 +155,6 @@ SERVER_CONFIG = ServerConfig()
class ArchivingConfig(BaseConfigSet):
section: ClassVar[ConfigSectionName] = 'ARCHIVING_CONFIG'
ONLY_NEW: bool = Field(default=True)
TIMEOUT: int = Field(default=60)
@ -213,8 +203,6 @@ ARCHIVING_CONFIG = ArchivingConfig()
class SearchBackendConfig(BaseConfigSet):
section: ClassVar[ConfigSectionName] = 'SEARCH_BACKEND_CONFIG'
USE_INDEXING_BACKEND: bool = Field(default=True)
USE_SEARCHING_BACKEND: bool = Field(default=True)

View file

@ -2,10 +2,10 @@ __package__ = 'archivebox.plugins_auth.ldap'
import sys
from typing import Dict, List, ClassVar, Optional
from typing import Dict, List, Optional
from pydantic import Field, model_validator, computed_field
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_configset import BaseConfigSet
LDAP_LIB = None
try:
@ -24,7 +24,6 @@ class LdapConfig(BaseConfigSet):
It needs to be in a separate file from apps.py so that it can be imported
during settings.py initialization before the apps are loaded.
"""
section: ClassVar[ConfigSectionName] = 'LDAP_CONFIG'
LDAP_ENABLED: bool = Field(default=False, alias='LDAP')

View file

@ -18,7 +18,7 @@ from pydantic_pkgr import (
# Depends on other Django apps:
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_configset import BaseConfigSet
from abx.archivebox.base_binary import BaseBinary, env
# from abx.archivebox.base_extractor import BaseExtractor
# from abx.archivebox.base_queue import BaseQueue
@ -83,8 +83,6 @@ def create_macos_app_symlink(target: Path, shortcut: Path):
class ChromeConfig(BaseConfigSet):
section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
USE_CHROME: bool = Field(default=True)
# Chrome Binary

View file

@ -4,15 +4,13 @@ from pathlib import Path
from typing import List, Dict, Optional, ClassVar
# from typing_extensions import Self
from django.conf import settings
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, Field, validate_call
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName, ShallowBinary
# Depends on other Django apps:
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_configset import BaseConfigSet
from abx.archivebox.base_binary import BaseBinary, env
from abx.archivebox.base_extractor import BaseExtractor
from abx.archivebox.base_hook import BaseHook
@ -24,8 +22,6 @@ from plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
###################### Config ##########################
class ReadabilityConfig(BaseConfigSet):
section: ClassVar[ConfigSectionName] = 'ARCHIVING_CONFIG'
SAVE_READABILITY: bool = Field(default=True, alias='USE_READABILITY')
READABILITY_TIMEOUT: int = Field(default=lambda: ARCHIVING_CONFIG.TIMEOUT)

View file

@ -4,15 +4,13 @@ from pathlib import Path
from typing import List, Dict, Optional, ClassVar
# from typing_extensions import Self
from django.conf import settings
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, Field, validate_call
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName, bin_abspath, ShallowBinary
# Depends on other Django apps:
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_configset import BaseConfigSet
from abx.archivebox.base_binary import BaseBinary, env
from abx.archivebox.base_extractor import BaseExtractor
from abx.archivebox.base_queue import BaseQueue
@ -25,8 +23,6 @@ from plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
###################### Config ##########################
class SinglefileConfig(BaseConfigSet):
section: ClassVar[ConfigSectionName] = 'ARCHIVING_CONFIG'
SAVE_SINGLEFILE: bool = True
SINGLEFILE_USER_AGENT: str = Field(default=lambda: ARCHIVING_CONFIG.USER_AGENT)

View file

@ -3,13 +3,11 @@ from abx.archivebox.base_plugin import BasePlugin, InstanceOf, BaseHook
# class WgetToggleConfig(ConfigSet):
# section: ConfigSectionName = 'ARCHIVE_METHOD_TOGGLES'
# SAVE_WGET: bool = True
# SAVE_WARC: bool = True
# class WgetDependencyConfig(ConfigSet):
# section: ConfigSectionName = 'DEPENDENCY_CONFIG'
# WGET_BINARY: str = Field(default='wget')
# WGET_ARGS: Optional[List[str]] = Field(default=None)
@ -17,7 +15,6 @@ from abx.archivebox.base_plugin import BasePlugin, InstanceOf, BaseHook
# WGET_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
# class WgetOptionsConfig(ConfigSet):
# section: ConfigSectionName = 'ARCHIVE_METHOD_OPTIONS'
# # loaded from shared config
# WGET_AUTO_COMPRESSION: bool = Field(default=True)

View file

@ -4,11 +4,9 @@ from subprocess import run, PIPE
from pydantic import InstanceOf, Field, model_validator, AliasChoices
from django.conf import settings
from pydantic_pkgr import BinProvider, BinName, BinProviderName, ProviderLookupDict
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_configset import BaseConfigSet
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
from abx.archivebox.base_hook import BaseHook
@ -19,8 +17,6 @@ from plugins_pkg.pip.apps import pip
class YtdlpConfig(BaseConfigSet):
section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
USE_YTDLP: bool = Field(default=True, validation_alias=AliasChoices('USE_YOUTUBEDL', 'SAVE_MEDIA'))
YTDLP_BINARY: str = Field(default='yt-dlp', alias='YOUTUBEDL_BINARY')

View file

@ -19,8 +19,6 @@ from abx.archivebox.base_hook import BaseHook
class NpmDependencyConfigs(BaseConfigSet):
# section: ConfigSectionName = 'DEPENDENCY_CONFIG'
# USE_NPM: bool = True
# NPM_BINARY: str = Field(default='npm')
# NPM_ARGS: Optional[List[str]] = Field(default=None)

View file

@ -17,7 +17,7 @@ from archivebox.config import CONSTANTS, VERSION
import abx
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_configset import BaseConfigSet
from abx.archivebox.base_check import BaseCheck
from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env, apt, brew
from abx.archivebox.base_hook import BaseHook
@ -29,8 +29,6 @@ from ...misc.logging import hint
class PipDependencyConfigs(BaseConfigSet):
section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
USE_PIP: bool = True
PIP_BINARY: str = Field(default='pip')
PIP_ARGS: Optional[List[str]] = Field(default=None)

View file

@ -34,8 +34,6 @@ from plugins_pkg.pip.apps import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_
class PlaywrightConfigs(BaseConfigSet):
# section: ConfigSectionName = 'DEPENDENCY_CONFIG'
# PLAYWRIGHT_BINARY: str = Field(default='wget')
# PLAYWRIGHT_ARGS: Optional[List[str]] = Field(default=None)
# PLAYWRIGHT_EXTRA_ARGS: List[str] = []

View file

@ -32,8 +32,6 @@ from plugins_pkg.npm.apps import LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER
class PuppeteerConfigs(BaseConfigSet):
# section: ConfigSectionName = 'DEPENDENCY_CONFIG'
# PUPPETEER_BINARY: str = Field(default='wget')
# PUPPETEER_ARGS: Optional[List[str]] = Field(default=None)
# PUPPETEER_EXTRA_ARGS: List[str] = []

View file

@ -3,7 +3,7 @@ __package__ = 'archivebox.plugins_search.ripgrep'
import re
from pathlib import Path
from subprocess import run
from typing import List, Dict, ClassVar, Iterable
from typing import List, Dict, Iterable
# from typing_extensions import Self
# Depends on other PyPI/vendor packages:
@ -12,7 +12,7 @@ from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinN
# Depends on other Django apps:
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_configset import BaseConfigSet
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
from abx.archivebox.base_hook import BaseHook
from abx.archivebox.base_searchbackend import BaseSearchBackend
@ -23,8 +23,6 @@ from archivebox.config import CONSTANTS, SEARCH_BACKEND_CONFIG
###################### Config ##########################
class RipgrepConfig(BaseConfigSet):
section: ClassVar[ConfigSectionName] = 'DEPENDENCY_CONFIG'
RIPGREP_BINARY: str = Field(default='rg')
RIPGREP_IGNORE_EXTENSIONS: str = Field(default='css,js,orig,svg')

View file

@ -1,7 +1,7 @@
__package__ = 'archivebox.plugins_search.sonic'
import sys
from typing import List, Dict, ClassVar, Generator, cast
from typing import List, Dict, Generator, cast
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, Field, model_validator
@ -9,7 +9,7 @@ from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinN
# Depends on other Django apps:
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_configset import BaseConfigSet
from abx.archivebox.base_binary import BaseBinary, env, brew
from abx.archivebox.base_hook import BaseHook
from abx.archivebox.base_searchbackend import BaseSearchBackend
@ -27,8 +27,6 @@ except ImportError:
###################### Config ##########################
class SonicConfig(BaseConfigSet):
section: ClassVar[ConfigSectionName] = 'DEPENDENCY_CONFIG'
SONIC_BINARY: str = Field(default='sonic')
SONIC_HOST: str = Field(default='localhost', alias='SEARCH_BACKEND_HOST_NAME')

View file

@ -3,7 +3,7 @@ __package__ = 'archivebox.plugins_search.sqlite'
import sys
import codecs
import sqlite3
from typing import List, ClassVar, Iterable, Callable
from typing import List, Iterable, Callable
from django.core.exceptions import ImproperlyConfigured
@ -12,7 +12,7 @@ from pydantic import InstanceOf, Field, model_validator
# Depends on other Django apps:
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_configset import BaseConfigSet
from abx.archivebox.base_hook import BaseHook
from abx.archivebox.base_searchbackend import BaseSearchBackend
@ -24,8 +24,6 @@ from archivebox.config import SEARCH_BACKEND_CONFIG
###################### Config ##########################
class SqliteftsConfig(BaseConfigSet):
section: ClassVar[ConfigSectionName] = 'DEPENDENCY_CONFIG'
SQLITEFTS_SEPARATE_DATABASE: bool = Field(default=True, alias='FTS_SEPARATE_DATABASE')
SQLITEFTS_TOKENIZERS: str = Field(default='porter unicode61 remove_diacritics 2', alias='FTS_TOKENIZERS')
SQLITEFTS_MAX_LENGTH: int = Field(default=int(1e9), alias='FTS_SQLITE_MAX_LENGTH')