add new pydantic_settings based loader for ConfigSets
Some checks failed
Build Debian package / build (push) Has been cancelled
Build Homebrew package / build (push) Has been cancelled
Build GitHub Pages website / build (push) Has been cancelled
Run linters / lint (push) Has been cancelled
CodeQL / Analyze (python) (push) Has been cancelled
Build Docker image / buildx (push) Has been cancelled
Build Pip package / build (push) Has been cancelled
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Has been cancelled
Run tests / docker_tests (push) Has been cancelled
Build GitHub Pages website / deploy (push) Has been cancelled

This commit is contained in:
Nick Sweeting 2024-09-22 19:30:24 -07:00
parent c8ff8f2b86
commit b6cfeb8d40
No known key found for this signature in database
8 changed files with 201 additions and 21 deletions

View file

@ -1,7 +1,6 @@
import platform
from pathlib import Path
from typing import List, Optional, Dict, Any
from typing_extensions import Self
from typing import List, Optional, Dict, ClassVar
from django.conf import settings
@ -79,7 +78,7 @@ def create_macos_app_symlink(target: Path, shortcut: Path):
class ChromeDependencyConfigs(BaseConfigSet):
section: ConfigSectionName = 'DEPENDENCY_CONFIG'
section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
CHROME_BINARY: str = Field(default='chrome')
CHROME_ARGS: Optional[List[str]] = Field(default=None)

View file

@ -2,7 +2,7 @@ import os
import sys
import inspect
from pathlib import Path
from typing import List, Dict, Optional
from typing import List, Dict, Optional, ClassVar
from pydantic import InstanceOf, Field
import django
@ -23,7 +23,7 @@ from plugantic.base_hook import BaseHook
class PipDependencyConfigs(BaseConfigSet):
section: ConfigSectionName = 'DEPENDENCY_CONFIG'
section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
USE_PIP: bool = True
PIP_BINARY: str = Field(default='pip')

View file

@ -1,7 +1,7 @@
__package__ = 'archivebox.builtin_plugins.singlefile'
from pathlib import Path
from typing import List, Dict, Optional
from typing import List, Dict, Optional, ClassVar
from typing_extensions import Self
from django.conf import settings
@ -25,13 +25,13 @@ from builtin_plugins.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
###################### Config ##########################
class SinglefileToggleConfigs(BaseConfigSet):
section: ConfigSectionName = 'ARCHIVE_METHOD_TOGGLES'
section: ClassVar[ConfigSectionName] = 'ARCHIVE_METHOD_TOGGLES'
SAVE_SINGLEFILE: bool = True
class SinglefileOptionsConfigs(BaseConfigSet):
section: ConfigSectionName = 'ARCHIVE_METHOD_OPTIONS'
section: ClassVar[ConfigSectionName] = 'ARCHIVE_METHOD_OPTIONS'
# loaded from shared config
SINGLEFILE_USER_AGENT: str = Field(default='', alias='USER_AGENT')
@ -42,7 +42,7 @@ class SinglefileOptionsConfigs(BaseConfigSet):
class SinglefileDependencyConfigs(BaseConfigSet):
section: ConfigSectionName = 'DEPENDENCY_CONFIG'
section: ClassVar[ConfigSectionName] = 'DEPENDENCY_CONFIG'
SINGLEFILE_BINARY: str = Field(default='wget')
SINGLEFILE_ARGS: Optional[List[str]] = Field(default=None)
@ -50,7 +50,7 @@ class SinglefileDependencyConfigs(BaseConfigSet):
SINGLEFILE_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
class SinglefileConfigs(SinglefileToggleConfigs, SinglefileOptionsConfigs, SinglefileDependencyConfigs):
# section: ConfigSectionName = 'ALL_CONFIGS'
# section: ClassVar[ConfigSectionName] = 'ALL_CONFIGS'
pass
DEFAULT_GLOBAL_CONFIG = {

View file

@ -1,4 +1,4 @@
from typing import List, Dict
from typing import List, Dict, ClassVar
from subprocess import run, PIPE
from pydantic import InstanceOf, Field
@ -16,7 +16,7 @@ from builtin_plugins.pip.apps import pip
class YtdlpDependencyConfigs(BaseConfigSet):
section: ConfigSectionName = 'DEPENDENCY_CONFIG'
section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
USE_YTDLP: bool = True

View file

@ -1,36 +1,186 @@
__package__ = 'archivebox.plugantic'
from typing import List, Literal
from pathlib import Path
from typing import List, Literal, Type, Tuple, Callable, ClassVar
from benedict import benedict
from pydantic import model_validator, TypeAdapter
from pydantic_settings import BaseSettings, SettingsConfigDict, PydanticBaseSettingsSource
from pydantic_settings.sources import TomlConfigSettingsSource
from django.conf import settings
from .base_hook import BaseHook, HookType
from ..config_stubs import AttrDict
from . import ini_to_toml
ConfigSectionName = Literal[
'SHELL_CONFIG',
'GENERAL_CONFIG',
'SERVER_CONFIG',
'ARCHIVE_METHOD_TOGGLES',
'ARCHIVE_METHOD_OPTIONS',
'SEARCH_BACKEND_CONFIG',
'DEPENDENCY_CONFIG',
]
ConfigSectionNames: List[ConfigSectionName] = [
'SHELL_CONFIG',
'GENERAL_CONFIG',
'SERVER_CONFIG',
'ARCHIVE_METHOD_TOGGLES',
'ARCHIVE_METHOD_OPTIONS',
'SEARCH_BACKEND_CONFIG',
'DEPENDENCY_CONFIG',
]
class FlatTomlConfigSettingsSource(TomlConfigSettingsSource):
"""
A source class that loads variables from a TOML file
"""
class BaseConfigSet(BaseHook):
hook_type: HookType = 'CONFIG'
def __init__(
self,
settings_cls: type[BaseSettings],
toml_file: Path | None=None,
):
self.toml_file_path = toml_file or settings_cls.model_config.get("toml_file")
self.nested_toml_data = self._read_files(self.toml_file_path)
self.toml_data = {}
for section_name, section in self.nested_toml_data.items():
if section_name in ConfigSectionNames and isinstance(section, dict):
# value is nested, flatten it
for key, value in section.items():
self.toml_data[key] = value
else:
# value is already flat, just set it as-is
self.toml_data[section_name] = section
# filter toml_data to only include keys that are defined on the settings_cls
self.toml_data = {
key: value
for key, value in self.toml_data.items()
if key in settings_cls.model_fields
}
super(TomlConfigSettingsSource, self).__init__(settings_cls, self.toml_data)
section: ConfigSectionName = 'GENERAL_CONFIG'
class ArchiveBoxBaseConfig(BaseSettings):
"""
This is the base class for an ArchiveBox ConfigSet.
It handles loading values from schema defaults, ArchiveBox.conf TOML config, and environment variables.
class WgetConfig(ArchiveBoxBaseConfig):
WGET_BINARY: str = Field(default='wget', alias='WGET_BINARY_PATH')
c = WgetConfig()
print(c.WGET_BINARY) # outputs: wget
# you can mutate process environment variable and reload config using .__init__()
os.environ['WGET_BINARY_PATH'] = 'wget2'
c.__init__()
print(c.WGET_BINARY) # outputs: wget2
"""
# these pydantic config options are all VERY carefully chosen, make sure to test thoroughly before changing!!!
model_config = SettingsConfigDict(
validate_default=False,
case_sensitive=True,
extra="ignore",
arbitrary_types_allowed=False,
populate_by_name=True,
from_attributes=True,
loc_by_alias=False,
validate_assignment=True,
validate_return=True,
revalidate_instances="always",
)
@classmethod
def settings_customise_sources(
cls,
settings_cls: Type[BaseSettings],
init_settings: PydanticBaseSettingsSource,
env_settings: PydanticBaseSettingsSource,
dotenv_settings: PydanticBaseSettingsSource,
file_secret_settings: PydanticBaseSettingsSource,
) -> Tuple[PydanticBaseSettingsSource, ...]:
"""Defines the config precedence order: Schema defaults -> ArchiveBox.conf (TOML) -> Environment variables"""
ARCHIVEBOX_CONFIG_FILE = settings.DATA_DIR / "ArchiveBox.conf"
ARCHIVEBOX_CONFIG_FILE_BAK = ARCHIVEBOX_CONFIG_FILE.parent / ".ArchiveBox.conf.bak"
# import ipdb; ipdb.set_trace()
# if ArchiveBox.conf does not exist yet, return defaults -> env order
if not ARCHIVEBOX_CONFIG_FILE.is_file():
return (
init_settings,
env_settings,
)
# if ArchiveBox.conf exists and is in TOML format, return default -> TOML -> env order
try:
return (
init_settings,
FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
env_settings,
)
except Exception as err:
if err.__class__.__name__ != "TOMLDecodeError":
raise
# if ArchiveBox.conf exists and is in INI format, convert it then return default -> TOML -> env order
# Convert ArchiveBox.conf in INI format to TOML and save original to .ArchiveBox.bak
original_ini = ARCHIVEBOX_CONFIG_FILE.read_text()
ARCHIVEBOX_CONFIG_FILE_BAK.write_text(original_ini)
new_toml = ini_to_toml.convert(original_ini)
ARCHIVEBOX_CONFIG_FILE.write_text(new_toml)
return (
init_settings,
FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
env_settings,
)
@model_validator(mode="after")
def fill_defaults(self):
"""Populate any unset values using function provided as their default"""
for key, field in self.model_fields.items():
config_so_far = self.model_dump()
value = getattr(self, key)
if isinstance(value, Callable):
# if value is a function, execute it to get the actual value, passing existing config as a dict arg
fallback_value = field.default(config_so_far)
# check to make sure default factory return value matches type annotation
TypeAdapter(field.annotation).validate_python(fallback_value)
# set generated default value as final validated value
setattr(self, key, fallback_value)
return self
class BaseConfigSet(ArchiveBoxBaseConfig, BaseHook): # type: ignore[type-arg]
hook_type: ClassVar[HookType] = 'CONFIG'
section: ClassVar[ConfigSectionName] = 'GENERAL_CONFIG'
def register(self, settings, parent_plugin=None):
# self._plugin = parent_plugin # for debugging only, never rely on this!
settings.CONFIGS = getattr(settings, "CONFIGS", None) or AttrDict({})
settings.CONFIGS[self.id] = self
settings.FLAT_CONFIG = getattr(settings, "FLAT_CONFIG", None) or benedict({})
settings.CONFIGS = getattr(settings, "CONFIGS", None) or benedict({})
# pass FLAT_CONFIG so far into our config model to load it
loaded_config = self.__class__(**settings.FLAT_CONFIG)
# then dump our parsed config back into FLAT_CONFIG for the next plugin to use
settings.FLAT_CONFIG.merge(loaded_config.model_dump())
settings.CONFIGS[self.id] = loaded_config
super().register(settings, parent_plugin=parent_plugin)