rename configfile to collection

This commit is contained in:
Nick Sweeting 2024-10-24 15:40:24 -07:00
parent 63bf902f35
commit 60f0458c77
No known key found for this signature in database
9 changed files with 41 additions and 37 deletions

View file

@ -14,7 +14,6 @@ from pydantic_pkgr import (
EnvProvider,
)
from archivebox.config import CONSTANTS
from archivebox.config.permissions import ARCHIVEBOX_USER
import abx
@ -34,6 +33,7 @@ class BaseBinProvider(BinProvider):
return [self]
class BaseBinary(Binary):
# TODO: formalize state diagram, final states, transitions, side effects, etc.
@staticmethod
def symlink_to_lib(binary, bin_dir=None) -> None:

View file

@ -99,7 +99,7 @@ class BaseConfigSet(BaseSettings):
)
load_from_defaults: ClassVar[bool] = True
load_from_configfile: ClassVar[bool] = True
load_from_collection: ClassVar[bool] = True
load_from_environment: ClassVar[bool] = True
@classmethod
@ -128,7 +128,8 @@ class BaseConfigSet(BaseSettings):
try:
precedence_order = precedence_order or {
'defaults': init_settings,
'configfile': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
# 'collection': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
'collection': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
'environment': env_settings,
}
except Exception as err:
@ -144,14 +145,15 @@ class BaseConfigSet(BaseSettings):
precedence_order = {
'defaults': init_settings,
'configfile': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
# 'collection': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
'collection': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
'environment': env_settings,
}
if not cls.load_from_environment:
precedence_order.pop('environment')
if not cls.load_from_configfile:
precedence_order.pop('configfile')
if not cls.load_from_collection:
precedence_order.pop('collection')
if not cls.load_from_defaults:
precedence_order.pop('defaults')
@ -278,15 +280,15 @@ class BaseConfigSet(BaseSettings):
"""Get the dictionary of {key: value} config loaded from the default values"""
class OnlyDefaultsConfig(self.__class__):
load_from_defaults = True
load_from_configfile = False
load_from_collection = False
load_from_environment = False
return benedict(OnlyDefaultsConfig().model_dump(exclude_unset=False, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
def from_configfile(self) -> Dict[str, Any]:
"""Get the dictionary of {key: value} config loaded from the configfile ArchiveBox.conf"""
def from_collection(self) -> Dict[str, Any]:
"""Get the dictionary of {key: value} config loaded from the collection ArchiveBox.conf"""
class OnlyConfigFileConfig(self.__class__):
load_from_defaults = False
load_from_configfile = True
load_from_collection = True
load_from_environment = False
return benedict(OnlyConfigFileConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
@ -294,7 +296,7 @@ class BaseConfigSet(BaseSettings):
"""Get the dictionary of {key: value} config loaded from the environment variables"""
class OnlyEnvironmentConfig(self.__class__):
load_from_defaults = False
load_from_configfile = False
load_from_collection = False
load_from_environment = True
return benedict(OnlyEnvironmentConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))

View file

@ -4,10 +4,9 @@ import json
import os
from typing import Optional, List, Literal, Annotated, Dict, Any, Tuple
from typing_extensions import Self
from pathlib import Path
from pydantic import model_validator, AfterValidator
from pydantic import AfterValidator
from pydantic_pkgr import BinName
from django.utils.functional import cached_property
from django.utils import timezone
@ -17,36 +16,22 @@ import abx
from .base_binary import BaseBinary
def no_empty_args(args: List[str]) -> List[str]:
def assert_no_empty_args(args: List[str]) -> List[str]:
assert all(len(arg) for arg in args)
return args
ExtractorName = Literal['wget', 'warc', 'media', 'singlefile'] | str
ExtractorName = Annotated[str, AfterValidator(lambda s: s.isidentifier())]
HandlerFuncStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))]
CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(no_empty_args)]
CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(assert_no_empty_args)]
class BaseExtractor:
name: ExtractorName
binary: BinName
output_path_func: HandlerFuncStr = 'self.get_output_path'
should_extract_func: HandlerFuncStr = 'self.should_extract'
extract_func: HandlerFuncStr = 'self.extract'
exec_func: HandlerFuncStr = 'self.exec'
default_args: CmdArgsList = []
extra_args: CmdArgsList = []
args: Optional[CmdArgsList] = None
@model_validator(mode='after')
def validate_model(self) -> Self:
if self.args is None:
self.args = [*self.default_args, *self.extra_args]
return self
def get_output_path(self, snapshot) -> Path:
return Path(self.__class__.__name__.lower())
@ -71,7 +56,7 @@ class BaseExtractor:
snapshot = Snapshot.objects.get(id=snapshot_id)
if not self.should_extract(snapshot):
if not self.should_extract(snapshot.url):
return {}
status = 'failed'

View file

@ -57,7 +57,7 @@ def get_HOOKS() -> Set[str]:
for hook_name in get_PLUGIN(plugin_id).hooks
}
def get_CONFIGS() -> Dict[str, 'BaseConfigSet']:
def get_CONFIGS() -> benedict: # Dict[str, 'BaseConfigSet']
return benedict({
config_id: configset
for plugin_configs in pm.hook.get_CONFIG()

View file

@ -88,7 +88,7 @@ def create_root_snapshot_from_seed(crawl):
def create_archiveresults_pending_from_snapshot(snapshot, config):
config = get_scope_config(
# defaults=settings.CONFIG_FROM_DEFAULTS,
# configfile=settings.CONFIG_FROM_FILE,
# collection=settings.CONFIG_FROM_FILE,
# environment=settings.CONFIG_FROM_ENVIRONMENT,
persona=archiveresult.snapshot.crawl.persona,
seed=archiveresult.snapshot.crawl.seed,