mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 06:34:25 -04:00
add new pydantic_settings based loader for ConfigSets
Some checks failed
Build Debian package / build (push) Has been cancelled
Build Homebrew package / build (push) Has been cancelled
Build GitHub Pages website / build (push) Has been cancelled
Run linters / lint (push) Has been cancelled
CodeQL / Analyze (python) (push) Has been cancelled
Build Docker image / buildx (push) Has been cancelled
Build Pip package / build (push) Has been cancelled
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Has been cancelled
Run tests / docker_tests (push) Has been cancelled
Build GitHub Pages website / deploy (push) Has been cancelled
Some checks failed
Build Debian package / build (push) Has been cancelled
Build Homebrew package / build (push) Has been cancelled
Build GitHub Pages website / build (push) Has been cancelled
Run linters / lint (push) Has been cancelled
CodeQL / Analyze (python) (push) Has been cancelled
Build Docker image / buildx (push) Has been cancelled
Build Pip package / build (push) Has been cancelled
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Has been cancelled
Run tests / docker_tests (push) Has been cancelled
Build GitHub Pages website / deploy (push) Has been cancelled
This commit is contained in:
parent
c8ff8f2b86
commit
b6cfeb8d40
8 changed files with 201 additions and 21 deletions
|
@ -1,7 +1,6 @@
|
|||
import platform
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Dict, Any
|
||||
from typing_extensions import Self
|
||||
from typing import List, Optional, Dict, ClassVar
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
|
@ -79,7 +78,7 @@ def create_macos_app_symlink(target: Path, shortcut: Path):
|
|||
|
||||
|
||||
class ChromeDependencyConfigs(BaseConfigSet):
|
||||
section: ConfigSectionName = 'DEPENDENCY_CONFIG'
|
||||
section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
|
||||
|
||||
CHROME_BINARY: str = Field(default='chrome')
|
||||
CHROME_ARGS: Optional[List[str]] = Field(default=None)
|
||||
|
|
|
@ -2,7 +2,7 @@ import os
|
|||
import sys
|
||||
import inspect
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Optional
|
||||
from typing import List, Dict, Optional, ClassVar
|
||||
from pydantic import InstanceOf, Field
|
||||
|
||||
import django
|
||||
|
@ -23,7 +23,7 @@ from plugantic.base_hook import BaseHook
|
|||
|
||||
|
||||
class PipDependencyConfigs(BaseConfigSet):
|
||||
section: ConfigSectionName = 'DEPENDENCY_CONFIG'
|
||||
section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
|
||||
|
||||
USE_PIP: bool = True
|
||||
PIP_BINARY: str = Field(default='pip')
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
__package__ = 'archivebox.builtin_plugins.singlefile'
|
||||
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Optional
|
||||
from typing import List, Dict, Optional, ClassVar
|
||||
from typing_extensions import Self
|
||||
|
||||
from django.conf import settings
|
||||
|
@ -25,13 +25,13 @@ from builtin_plugins.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
|
|||
###################### Config ##########################
|
||||
|
||||
class SinglefileToggleConfigs(BaseConfigSet):
|
||||
section: ConfigSectionName = 'ARCHIVE_METHOD_TOGGLES'
|
||||
section: ClassVar[ConfigSectionName] = 'ARCHIVE_METHOD_TOGGLES'
|
||||
|
||||
SAVE_SINGLEFILE: bool = True
|
||||
|
||||
|
||||
class SinglefileOptionsConfigs(BaseConfigSet):
|
||||
section: ConfigSectionName = 'ARCHIVE_METHOD_OPTIONS'
|
||||
section: ClassVar[ConfigSectionName] = 'ARCHIVE_METHOD_OPTIONS'
|
||||
|
||||
# loaded from shared config
|
||||
SINGLEFILE_USER_AGENT: str = Field(default='', alias='USER_AGENT')
|
||||
|
@ -42,7 +42,7 @@ class SinglefileOptionsConfigs(BaseConfigSet):
|
|||
|
||||
|
||||
class SinglefileDependencyConfigs(BaseConfigSet):
|
||||
section: ConfigSectionName = 'DEPENDENCY_CONFIG'
|
||||
section: ClassVar[ConfigSectionName] = 'DEPENDENCY_CONFIG'
|
||||
|
||||
SINGLEFILE_BINARY: str = Field(default='wget')
|
||||
SINGLEFILE_ARGS: Optional[List[str]] = Field(default=None)
|
||||
|
@ -50,7 +50,7 @@ class SinglefileDependencyConfigs(BaseConfigSet):
|
|||
SINGLEFILE_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
|
||||
|
||||
class SinglefileConfigs(SinglefileToggleConfigs, SinglefileOptionsConfigs, SinglefileDependencyConfigs):
|
||||
# section: ConfigSectionName = 'ALL_CONFIGS'
|
||||
# section: ClassVar[ConfigSectionName] = 'ALL_CONFIGS'
|
||||
pass
|
||||
|
||||
DEFAULT_GLOBAL_CONFIG = {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from typing import List, Dict
|
||||
from typing import List, Dict, ClassVar
|
||||
from subprocess import run, PIPE
|
||||
from pydantic import InstanceOf, Field
|
||||
|
||||
|
@ -16,7 +16,7 @@ from builtin_plugins.pip.apps import pip
|
|||
|
||||
|
||||
class YtdlpDependencyConfigs(BaseConfigSet):
|
||||
section: ConfigSectionName = 'DEPENDENCY_CONFIG'
|
||||
section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
|
||||
|
||||
USE_YTDLP: bool = True
|
||||
|
||||
|
|
|
@ -1,36 +1,186 @@
|
|||
__package__ = 'archivebox.plugantic'
|
||||
|
||||
|
||||
from typing import List, Literal
|
||||
from pathlib import Path
|
||||
from typing import List, Literal, Type, Tuple, Callable, ClassVar
|
||||
|
||||
from benedict import benedict
|
||||
from pydantic import model_validator, TypeAdapter
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict, PydanticBaseSettingsSource
|
||||
from pydantic_settings.sources import TomlConfigSettingsSource
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from .base_hook import BaseHook, HookType
|
||||
from ..config_stubs import AttrDict
|
||||
|
||||
from . import ini_to_toml
|
||||
|
||||
ConfigSectionName = Literal[
|
||||
'SHELL_CONFIG',
|
||||
'GENERAL_CONFIG',
|
||||
'SERVER_CONFIG',
|
||||
'ARCHIVE_METHOD_TOGGLES',
|
||||
'ARCHIVE_METHOD_OPTIONS',
|
||||
'SEARCH_BACKEND_CONFIG',
|
||||
'DEPENDENCY_CONFIG',
|
||||
]
|
||||
ConfigSectionNames: List[ConfigSectionName] = [
|
||||
'SHELL_CONFIG',
|
||||
'GENERAL_CONFIG',
|
||||
'SERVER_CONFIG',
|
||||
'ARCHIVE_METHOD_TOGGLES',
|
||||
'ARCHIVE_METHOD_OPTIONS',
|
||||
'SEARCH_BACKEND_CONFIG',
|
||||
'DEPENDENCY_CONFIG',
|
||||
]
|
||||
|
||||
class FlatTomlConfigSettingsSource(TomlConfigSettingsSource):
|
||||
"""
|
||||
A source class that loads variables from a TOML file
|
||||
"""
|
||||
|
||||
class BaseConfigSet(BaseHook):
|
||||
hook_type: HookType = 'CONFIG'
|
||||
def __init__(
|
||||
self,
|
||||
settings_cls: type[BaseSettings],
|
||||
toml_file: Path | None=None,
|
||||
):
|
||||
self.toml_file_path = toml_file or settings_cls.model_config.get("toml_file")
|
||||
|
||||
self.nested_toml_data = self._read_files(self.toml_file_path)
|
||||
self.toml_data = {}
|
||||
for section_name, section in self.nested_toml_data.items():
|
||||
if section_name in ConfigSectionNames and isinstance(section, dict):
|
||||
# value is nested, flatten it
|
||||
for key, value in section.items():
|
||||
self.toml_data[key] = value
|
||||
else:
|
||||
# value is already flat, just set it as-is
|
||||
self.toml_data[section_name] = section
|
||||
|
||||
# filter toml_data to only include keys that are defined on the settings_cls
|
||||
self.toml_data = {
|
||||
key: value
|
||||
for key, value in self.toml_data.items()
|
||||
if key in settings_cls.model_fields
|
||||
}
|
||||
|
||||
super(TomlConfigSettingsSource, self).__init__(settings_cls, self.toml_data)
|
||||
|
||||
section: ConfigSectionName = 'GENERAL_CONFIG'
|
||||
|
||||
class ArchiveBoxBaseConfig(BaseSettings):
|
||||
"""
|
||||
This is the base class for an ArchiveBox ConfigSet.
|
||||
It handles loading values from schema defaults, ArchiveBox.conf TOML config, and environment variables.
|
||||
|
||||
class WgetConfig(ArchiveBoxBaseConfig):
|
||||
WGET_BINARY: str = Field(default='wget', alias='WGET_BINARY_PATH')
|
||||
|
||||
c = WgetConfig()
|
||||
print(c.WGET_BINARY) # outputs: wget
|
||||
|
||||
# you can mutate process environment variable and reload config using .__init__()
|
||||
os.environ['WGET_BINARY_PATH'] = 'wget2'
|
||||
c.__init__()
|
||||
|
||||
print(c.WGET_BINARY) # outputs: wget2
|
||||
|
||||
"""
|
||||
|
||||
# these pydantic config options are all VERY carefully chosen, make sure to test thoroughly before changing!!!
|
||||
model_config = SettingsConfigDict(
|
||||
validate_default=False,
|
||||
case_sensitive=True,
|
||||
extra="ignore",
|
||||
arbitrary_types_allowed=False,
|
||||
populate_by_name=True,
|
||||
from_attributes=True,
|
||||
loc_by_alias=False,
|
||||
validate_assignment=True,
|
||||
validate_return=True,
|
||||
revalidate_instances="always",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def settings_customise_sources(
|
||||
cls,
|
||||
settings_cls: Type[BaseSettings],
|
||||
init_settings: PydanticBaseSettingsSource,
|
||||
env_settings: PydanticBaseSettingsSource,
|
||||
dotenv_settings: PydanticBaseSettingsSource,
|
||||
file_secret_settings: PydanticBaseSettingsSource,
|
||||
) -> Tuple[PydanticBaseSettingsSource, ...]:
|
||||
"""Defines the config precedence order: Schema defaults -> ArchiveBox.conf (TOML) -> Environment variables"""
|
||||
|
||||
ARCHIVEBOX_CONFIG_FILE = settings.DATA_DIR / "ArchiveBox.conf"
|
||||
ARCHIVEBOX_CONFIG_FILE_BAK = ARCHIVEBOX_CONFIG_FILE.parent / ".ArchiveBox.conf.bak"
|
||||
|
||||
# import ipdb; ipdb.set_trace()
|
||||
|
||||
# if ArchiveBox.conf does not exist yet, return defaults -> env order
|
||||
if not ARCHIVEBOX_CONFIG_FILE.is_file():
|
||||
return (
|
||||
init_settings,
|
||||
env_settings,
|
||||
)
|
||||
|
||||
# if ArchiveBox.conf exists and is in TOML format, return default -> TOML -> env order
|
||||
try:
|
||||
return (
|
||||
init_settings,
|
||||
FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
|
||||
env_settings,
|
||||
)
|
||||
except Exception as err:
|
||||
if err.__class__.__name__ != "TOMLDecodeError":
|
||||
raise
|
||||
# if ArchiveBox.conf exists and is in INI format, convert it then return default -> TOML -> env order
|
||||
|
||||
# Convert ArchiveBox.conf in INI format to TOML and save original to .ArchiveBox.bak
|
||||
original_ini = ARCHIVEBOX_CONFIG_FILE.read_text()
|
||||
ARCHIVEBOX_CONFIG_FILE_BAK.write_text(original_ini)
|
||||
new_toml = ini_to_toml.convert(original_ini)
|
||||
ARCHIVEBOX_CONFIG_FILE.write_text(new_toml)
|
||||
|
||||
return (
|
||||
init_settings,
|
||||
FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
|
||||
env_settings,
|
||||
)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def fill_defaults(self):
|
||||
"""Populate any unset values using function provided as their default"""
|
||||
|
||||
for key, field in self.model_fields.items():
|
||||
config_so_far = self.model_dump()
|
||||
value = getattr(self, key)
|
||||
if isinstance(value, Callable):
|
||||
# if value is a function, execute it to get the actual value, passing existing config as a dict arg
|
||||
fallback_value = field.default(config_so_far)
|
||||
|
||||
# check to make sure default factory return value matches type annotation
|
||||
TypeAdapter(field.annotation).validate_python(fallback_value)
|
||||
|
||||
# set generated default value as final validated value
|
||||
setattr(self, key, fallback_value)
|
||||
return self
|
||||
|
||||
class BaseConfigSet(ArchiveBoxBaseConfig, BaseHook): # type: ignore[type-arg]
|
||||
hook_type: ClassVar[HookType] = 'CONFIG'
|
||||
|
||||
section: ClassVar[ConfigSectionName] = 'GENERAL_CONFIG'
|
||||
|
||||
def register(self, settings, parent_plugin=None):
|
||||
# self._plugin = parent_plugin # for debugging only, never rely on this!
|
||||
|
||||
settings.CONFIGS = getattr(settings, "CONFIGS", None) or AttrDict({})
|
||||
settings.CONFIGS[self.id] = self
|
||||
settings.FLAT_CONFIG = getattr(settings, "FLAT_CONFIG", None) or benedict({})
|
||||
settings.CONFIGS = getattr(settings, "CONFIGS", None) or benedict({})
|
||||
|
||||
# pass FLAT_CONFIG so far into our config model to load it
|
||||
loaded_config = self.__class__(**settings.FLAT_CONFIG)
|
||||
# then dump our parsed config back into FLAT_CONFIG for the next plugin to use
|
||||
settings.FLAT_CONFIG.merge(loaded_config.model_dump())
|
||||
|
||||
settings.CONFIGS[self.id] = loaded_config
|
||||
|
||||
super().register(settings, parent_plugin=parent_plugin)
|
||||
|
||||
|
|
30
pdm.lock
generated
30
pdm.lock
generated
|
@ -5,7 +5,7 @@
|
|||
groups = ["default", "all", "ldap", "sonic"]
|
||||
strategy = ["inherit_metadata"]
|
||||
lock_version = "4.5.0"
|
||||
content_hash = "sha256:d7c9e7a40b0a794986eb3f6a3774d5003c9b39985411f63c1aa387dda9986ada"
|
||||
content_hash = "sha256:6b062624538c5dfe6b1bd5be32546fef02b70ee73c4a1710a8eea9764bdd21d8"
|
||||
|
||||
[[metadata.targets]]
|
||||
requires_python = "==3.11.*"
|
||||
|
@ -1147,6 +1147,22 @@ files = [
|
|||
{file = "pydantic_pkgr-0.3.5.tar.gz", hash = "sha256:36444778d53d5cbdc261086fda0d65fb519a072105f5d1c7d88e224bd197dd1d"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pydantic-settings"
|
||||
version = "2.5.2"
|
||||
requires_python = ">=3.8"
|
||||
summary = "Settings management using Pydantic"
|
||||
groups = ["default"]
|
||||
marker = "python_version == \"3.11\""
|
||||
dependencies = [
|
||||
"pydantic>=2.7.0",
|
||||
"python-dotenv>=0.21.0",
|
||||
]
|
||||
files = [
|
||||
{file = "pydantic_settings-2.5.2-py3-none-any.whl", hash = "sha256:2c912e55fd5794a59bf8c832b9de832dcfdf4778d79ff79b708744eed499a907"},
|
||||
{file = "pydantic_settings-2.5.2.tar.gz", hash = "sha256:f90b139682bee4d2065273d5185d71d37ea46cfe57e1b5ae184fc6a0b2484ca0"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pygments"
|
||||
version = "2.18.0"
|
||||
|
@ -1277,6 +1293,18 @@ files = [
|
|||
{file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-dotenv"
|
||||
version = "1.0.1"
|
||||
requires_python = ">=3.8"
|
||||
summary = "Read key-value pairs from a .env file and set them as environment variables"
|
||||
groups = ["default"]
|
||||
marker = "python_version == \"3.11\""
|
||||
files = [
|
||||
{file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"},
|
||||
{file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-fsutil"
|
||||
version = "0.14.1"
|
||||
|
|
|
@ -84,6 +84,7 @@ dependencies = [
|
|||
"base32-crockford==0.3.0",
|
||||
############# Extractor Dependencies #############
|
||||
"yt-dlp>=2024.8.6", # for: media
|
||||
"pydantic-settings>=2.5.2",
|
||||
]
|
||||
|
||||
# pdm lock --group=':all'
|
||||
|
|
|
@ -76,6 +76,7 @@ pycryptodomex==3.20.0; python_version == "3.11"
|
|||
pydantic==2.9.2; python_version == "3.11"
|
||||
pydantic-core==2.23.4; python_version == "3.11"
|
||||
pydantic-pkgr==0.3.5; python_version == "3.11"
|
||||
pydantic-settings==2.5.2; python_version == "3.11"
|
||||
pygments==2.18.0; python_version == "3.11"
|
||||
pyopenssl==24.2.1; python_version == "3.11"
|
||||
python-benedict[html,toml,xls,xml,yaml]==0.33.2; python_version == "3.11"
|
||||
|
@ -83,6 +84,7 @@ python-benedict[io,parse]==0.33.2; python_version == "3.11"
|
|||
python-benedict[xml]==0.33.2; python_version == "3.11"
|
||||
python-crontab==3.2.0; python_version == "3.11"
|
||||
python-dateutil==2.9.0.post0; python_version == "3.11"
|
||||
python-dotenv==1.0.1; python_version == "3.11"
|
||||
python-fsutil==0.14.1; python_version == "3.11"
|
||||
python-ldap==3.4.4; python_version == "3.11"
|
||||
python-slugify==8.0.4; python_version == "3.11"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue