add new pydantic_settings based loader for ConfigSets
Some checks failed
Build Debian package / build (push) Has been cancelled
Build Homebrew package / build (push) Has been cancelled
Build GitHub Pages website / build (push) Has been cancelled
Run linters / lint (push) Has been cancelled
CodeQL / Analyze (python) (push) Has been cancelled
Build Docker image / buildx (push) Has been cancelled
Build Pip package / build (push) Has been cancelled
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Has been cancelled
Run tests / docker_tests (push) Has been cancelled
Build GitHub Pages website / deploy (push) Has been cancelled

This commit is contained in:
Nick Sweeting 2024-09-22 19:30:24 -07:00
parent c8ff8f2b86
commit b6cfeb8d40
No known key found for this signature in database
8 changed files with 201 additions and 21 deletions

View file

@ -1,7 +1,6 @@
import platform import platform
from pathlib import Path from pathlib import Path
from typing import List, Optional, Dict, Any from typing import List, Optional, Dict, ClassVar
from typing_extensions import Self
from django.conf import settings from django.conf import settings
@ -79,7 +78,7 @@ def create_macos_app_symlink(target: Path, shortcut: Path):
class ChromeDependencyConfigs(BaseConfigSet): class ChromeDependencyConfigs(BaseConfigSet):
section: ConfigSectionName = 'DEPENDENCY_CONFIG' section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
CHROME_BINARY: str = Field(default='chrome') CHROME_BINARY: str = Field(default='chrome')
CHROME_ARGS: Optional[List[str]] = Field(default=None) CHROME_ARGS: Optional[List[str]] = Field(default=None)

View file

@ -2,7 +2,7 @@ import os
import sys import sys
import inspect import inspect
from pathlib import Path from pathlib import Path
from typing import List, Dict, Optional from typing import List, Dict, Optional, ClassVar
from pydantic import InstanceOf, Field from pydantic import InstanceOf, Field
import django import django
@ -23,7 +23,7 @@ from plugantic.base_hook import BaseHook
class PipDependencyConfigs(BaseConfigSet): class PipDependencyConfigs(BaseConfigSet):
section: ConfigSectionName = 'DEPENDENCY_CONFIG' section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
USE_PIP: bool = True USE_PIP: bool = True
PIP_BINARY: str = Field(default='pip') PIP_BINARY: str = Field(default='pip')

View file

@ -1,7 +1,7 @@
__package__ = 'archivebox.builtin_plugins.singlefile' __package__ = 'archivebox.builtin_plugins.singlefile'
from pathlib import Path from pathlib import Path
from typing import List, Dict, Optional from typing import List, Dict, Optional, ClassVar
from typing_extensions import Self from typing_extensions import Self
from django.conf import settings from django.conf import settings
@ -25,13 +25,13 @@ from builtin_plugins.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
###################### Config ########################## ###################### Config ##########################
class SinglefileToggleConfigs(BaseConfigSet): class SinglefileToggleConfigs(BaseConfigSet):
section: ConfigSectionName = 'ARCHIVE_METHOD_TOGGLES' section: ClassVar[ConfigSectionName] = 'ARCHIVE_METHOD_TOGGLES'
SAVE_SINGLEFILE: bool = True SAVE_SINGLEFILE: bool = True
class SinglefileOptionsConfigs(BaseConfigSet): class SinglefileOptionsConfigs(BaseConfigSet):
section: ConfigSectionName = 'ARCHIVE_METHOD_OPTIONS' section: ClassVar[ConfigSectionName] = 'ARCHIVE_METHOD_OPTIONS'
# loaded from shared config # loaded from shared config
SINGLEFILE_USER_AGENT: str = Field(default='', alias='USER_AGENT') SINGLEFILE_USER_AGENT: str = Field(default='', alias='USER_AGENT')
@ -42,7 +42,7 @@ class SinglefileOptionsConfigs(BaseConfigSet):
class SinglefileDependencyConfigs(BaseConfigSet): class SinglefileDependencyConfigs(BaseConfigSet):
section: ConfigSectionName = 'DEPENDENCY_CONFIG' section: ClassVar[ConfigSectionName] = 'DEPENDENCY_CONFIG'
SINGLEFILE_BINARY: str = Field(default='wget') SINGLEFILE_BINARY: str = Field(default='wget')
SINGLEFILE_ARGS: Optional[List[str]] = Field(default=None) SINGLEFILE_ARGS: Optional[List[str]] = Field(default=None)
@ -50,7 +50,7 @@ class SinglefileDependencyConfigs(BaseConfigSet):
SINGLEFILE_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}'] SINGLEFILE_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
class SinglefileConfigs(SinglefileToggleConfigs, SinglefileOptionsConfigs, SinglefileDependencyConfigs): class SinglefileConfigs(SinglefileToggleConfigs, SinglefileOptionsConfigs, SinglefileDependencyConfigs):
# section: ConfigSectionName = 'ALL_CONFIGS' # section: ClassVar[ConfigSectionName] = 'ALL_CONFIGS'
pass pass
DEFAULT_GLOBAL_CONFIG = { DEFAULT_GLOBAL_CONFIG = {

View file

@ -1,4 +1,4 @@
from typing import List, Dict from typing import List, Dict, ClassVar
from subprocess import run, PIPE from subprocess import run, PIPE
from pydantic import InstanceOf, Field from pydantic import InstanceOf, Field
@ -16,7 +16,7 @@ from builtin_plugins.pip.apps import pip
class YtdlpDependencyConfigs(BaseConfigSet): class YtdlpDependencyConfigs(BaseConfigSet):
section: ConfigSectionName = 'DEPENDENCY_CONFIG' section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
USE_YTDLP: bool = True USE_YTDLP: bool = True

View file

@ -1,36 +1,186 @@
__package__ = 'archivebox.plugantic' __package__ = 'archivebox.plugantic'
from typing import List, Literal from pathlib import Path
from typing import List, Literal, Type, Tuple, Callable, ClassVar
from benedict import benedict
from pydantic import model_validator, TypeAdapter
from pydantic_settings import BaseSettings, SettingsConfigDict, PydanticBaseSettingsSource
from pydantic_settings.sources import TomlConfigSettingsSource
from django.conf import settings
from .base_hook import BaseHook, HookType from .base_hook import BaseHook, HookType
from ..config_stubs import AttrDict from . import ini_to_toml
ConfigSectionName = Literal[ ConfigSectionName = Literal[
'SHELL_CONFIG',
'GENERAL_CONFIG', 'GENERAL_CONFIG',
'SERVER_CONFIG',
'ARCHIVE_METHOD_TOGGLES', 'ARCHIVE_METHOD_TOGGLES',
'ARCHIVE_METHOD_OPTIONS', 'ARCHIVE_METHOD_OPTIONS',
'SEARCH_BACKEND_CONFIG',
'DEPENDENCY_CONFIG', 'DEPENDENCY_CONFIG',
] ]
ConfigSectionNames: List[ConfigSectionName] = [ ConfigSectionNames: List[ConfigSectionName] = [
'SHELL_CONFIG',
'GENERAL_CONFIG', 'GENERAL_CONFIG',
'SERVER_CONFIG',
'ARCHIVE_METHOD_TOGGLES', 'ARCHIVE_METHOD_TOGGLES',
'ARCHIVE_METHOD_OPTIONS', 'ARCHIVE_METHOD_OPTIONS',
'SEARCH_BACKEND_CONFIG',
'DEPENDENCY_CONFIG', 'DEPENDENCY_CONFIG',
] ]
class FlatTomlConfigSettingsSource(TomlConfigSettingsSource):
"""
A source class that loads variables from a TOML file
"""
class BaseConfigSet(BaseHook): def __init__(
hook_type: HookType = 'CONFIG' self,
settings_cls: type[BaseSettings],
toml_file: Path | None=None,
):
self.toml_file_path = toml_file or settings_cls.model_config.get("toml_file")
self.nested_toml_data = self._read_files(self.toml_file_path)
self.toml_data = {}
for section_name, section in self.nested_toml_data.items():
if section_name in ConfigSectionNames and isinstance(section, dict):
# value is nested, flatten it
for key, value in section.items():
self.toml_data[key] = value
else:
# value is already flat, just set it as-is
self.toml_data[section_name] = section
# filter toml_data to only include keys that are defined on the settings_cls
self.toml_data = {
key: value
for key, value in self.toml_data.items()
if key in settings_cls.model_fields
}
super(TomlConfigSettingsSource, self).__init__(settings_cls, self.toml_data)
section: ConfigSectionName = 'GENERAL_CONFIG'
class ArchiveBoxBaseConfig(BaseSettings):
"""
This is the base class for an ArchiveBox ConfigSet.
It handles loading values from schema defaults, ArchiveBox.conf TOML config, and environment variables.
class WgetConfig(ArchiveBoxBaseConfig):
WGET_BINARY: str = Field(default='wget', alias='WGET_BINARY_PATH')
c = WgetConfig()
print(c.WGET_BINARY) # outputs: wget
# you can mutate process environment variable and reload config using .__init__()
os.environ['WGET_BINARY_PATH'] = 'wget2'
c.__init__()
print(c.WGET_BINARY) # outputs: wget2
"""
# these pydantic config options are all VERY carefully chosen, make sure to test thoroughly before changing!!!
model_config = SettingsConfigDict(
validate_default=False,
case_sensitive=True,
extra="ignore",
arbitrary_types_allowed=False,
populate_by_name=True,
from_attributes=True,
loc_by_alias=False,
validate_assignment=True,
validate_return=True,
revalidate_instances="always",
)
@classmethod
def settings_customise_sources(
cls,
settings_cls: Type[BaseSettings],
init_settings: PydanticBaseSettingsSource,
env_settings: PydanticBaseSettingsSource,
dotenv_settings: PydanticBaseSettingsSource,
file_secret_settings: PydanticBaseSettingsSource,
) -> Tuple[PydanticBaseSettingsSource, ...]:
"""Defines the config precedence order: Schema defaults -> ArchiveBox.conf (TOML) -> Environment variables"""
ARCHIVEBOX_CONFIG_FILE = settings.DATA_DIR / "ArchiveBox.conf"
ARCHIVEBOX_CONFIG_FILE_BAK = ARCHIVEBOX_CONFIG_FILE.parent / ".ArchiveBox.conf.bak"
# import ipdb; ipdb.set_trace()
# if ArchiveBox.conf does not exist yet, return defaults -> env order
if not ARCHIVEBOX_CONFIG_FILE.is_file():
return (
init_settings,
env_settings,
)
# if ArchiveBox.conf exists and is in TOML format, return default -> TOML -> env order
try:
return (
init_settings,
FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
env_settings,
)
except Exception as err:
if err.__class__.__name__ != "TOMLDecodeError":
raise
# if ArchiveBox.conf exists and is in INI format, convert it then return default -> TOML -> env order
# Convert ArchiveBox.conf in INI format to TOML and save original to .ArchiveBox.bak
original_ini = ARCHIVEBOX_CONFIG_FILE.read_text()
ARCHIVEBOX_CONFIG_FILE_BAK.write_text(original_ini)
new_toml = ini_to_toml.convert(original_ini)
ARCHIVEBOX_CONFIG_FILE.write_text(new_toml)
return (
init_settings,
FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
env_settings,
)
@model_validator(mode="after")
def fill_defaults(self):
"""Populate any unset values using function provided as their default"""
for key, field in self.model_fields.items():
config_so_far = self.model_dump()
value = getattr(self, key)
if isinstance(value, Callable):
# if value is a function, execute it to get the actual value, passing existing config as a dict arg
fallback_value = field.default(config_so_far)
# check to make sure default factory return value matches type annotation
TypeAdapter(field.annotation).validate_python(fallback_value)
# set generated default value as final validated value
setattr(self, key, fallback_value)
return self
class BaseConfigSet(ArchiveBoxBaseConfig, BaseHook): # type: ignore[type-arg]
hook_type: ClassVar[HookType] = 'CONFIG'
section: ClassVar[ConfigSectionName] = 'GENERAL_CONFIG'
def register(self, settings, parent_plugin=None): def register(self, settings, parent_plugin=None):
# self._plugin = parent_plugin # for debugging only, never rely on this! # self._plugin = parent_plugin # for debugging only, never rely on this!
settings.CONFIGS = getattr(settings, "CONFIGS", None) or AttrDict({}) settings.FLAT_CONFIG = getattr(settings, "FLAT_CONFIG", None) or benedict({})
settings.CONFIGS[self.id] = self settings.CONFIGS = getattr(settings, "CONFIGS", None) or benedict({})
# pass FLAT_CONFIG so far into our config model to load it
loaded_config = self.__class__(**settings.FLAT_CONFIG)
# then dump our parsed config back into FLAT_CONFIG for the next plugin to use
settings.FLAT_CONFIG.merge(loaded_config.model_dump())
settings.CONFIGS[self.id] = loaded_config
super().register(settings, parent_plugin=parent_plugin) super().register(settings, parent_plugin=parent_plugin)

30
pdm.lock generated
View file

@ -5,7 +5,7 @@
groups = ["default", "all", "ldap", "sonic"] groups = ["default", "all", "ldap", "sonic"]
strategy = ["inherit_metadata"] strategy = ["inherit_metadata"]
lock_version = "4.5.0" lock_version = "4.5.0"
content_hash = "sha256:d7c9e7a40b0a794986eb3f6a3774d5003c9b39985411f63c1aa387dda9986ada" content_hash = "sha256:6b062624538c5dfe6b1bd5be32546fef02b70ee73c4a1710a8eea9764bdd21d8"
[[metadata.targets]] [[metadata.targets]]
requires_python = "==3.11.*" requires_python = "==3.11.*"
@ -1147,6 +1147,22 @@ files = [
{file = "pydantic_pkgr-0.3.5.tar.gz", hash = "sha256:36444778d53d5cbdc261086fda0d65fb519a072105f5d1c7d88e224bd197dd1d"}, {file = "pydantic_pkgr-0.3.5.tar.gz", hash = "sha256:36444778d53d5cbdc261086fda0d65fb519a072105f5d1c7d88e224bd197dd1d"},
] ]
[[package]]
name = "pydantic-settings"
version = "2.5.2"
requires_python = ">=3.8"
summary = "Settings management using Pydantic"
groups = ["default"]
marker = "python_version == \"3.11\""
dependencies = [
"pydantic>=2.7.0",
"python-dotenv>=0.21.0",
]
files = [
{file = "pydantic_settings-2.5.2-py3-none-any.whl", hash = "sha256:2c912e55fd5794a59bf8c832b9de832dcfdf4778d79ff79b708744eed499a907"},
{file = "pydantic_settings-2.5.2.tar.gz", hash = "sha256:f90b139682bee4d2065273d5185d71d37ea46cfe57e1b5ae184fc6a0b2484ca0"},
]
[[package]] [[package]]
name = "pygments" name = "pygments"
version = "2.18.0" version = "2.18.0"
@ -1277,6 +1293,18 @@ files = [
{file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
] ]
[[package]]
name = "python-dotenv"
version = "1.0.1"
requires_python = ">=3.8"
summary = "Read key-value pairs from a .env file and set them as environment variables"
groups = ["default"]
marker = "python_version == \"3.11\""
files = [
{file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"},
{file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"},
]
[[package]] [[package]]
name = "python-fsutil" name = "python-fsutil"
version = "0.14.1" version = "0.14.1"

View file

@ -84,6 +84,7 @@ dependencies = [
"base32-crockford==0.3.0", "base32-crockford==0.3.0",
############# Extractor Dependencies ############# ############# Extractor Dependencies #############
"yt-dlp>=2024.8.6", # for: media "yt-dlp>=2024.8.6", # for: media
"pydantic-settings>=2.5.2",
] ]
# pdm lock --group=':all' # pdm lock --group=':all'

View file

@ -76,6 +76,7 @@ pycryptodomex==3.20.0; python_version == "3.11"
pydantic==2.9.2; python_version == "3.11" pydantic==2.9.2; python_version == "3.11"
pydantic-core==2.23.4; python_version == "3.11" pydantic-core==2.23.4; python_version == "3.11"
pydantic-pkgr==0.3.5; python_version == "3.11" pydantic-pkgr==0.3.5; python_version == "3.11"
pydantic-settings==2.5.2; python_version == "3.11"
pygments==2.18.0; python_version == "3.11" pygments==2.18.0; python_version == "3.11"
pyopenssl==24.2.1; python_version == "3.11" pyopenssl==24.2.1; python_version == "3.11"
python-benedict[html,toml,xls,xml,yaml]==0.33.2; python_version == "3.11" python-benedict[html,toml,xls,xml,yaml]==0.33.2; python_version == "3.11"
@ -83,6 +84,7 @@ python-benedict[io,parse]==0.33.2; python_version == "3.11"
python-benedict[xml]==0.33.2; python_version == "3.11" python-benedict[xml]==0.33.2; python_version == "3.11"
python-crontab==3.2.0; python_version == "3.11" python-crontab==3.2.0; python_version == "3.11"
python-dateutil==2.9.0.post0; python_version == "3.11" python-dateutil==2.9.0.post0; python_version == "3.11"
python-dotenv==1.0.1; python_version == "3.11"
python-fsutil==0.14.1; python_version == "3.11" python-fsutil==0.14.1; python_version == "3.11"
python-ldap==3.4.4; python_version == "3.11" python-ldap==3.4.4; python_version == "3.11"
python-slugify==8.0.4; python_version == "3.11" python-slugify==8.0.4; python_version == "3.11"