mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-23 11:17:02 -04:00
rename configfile to collection
This commit is contained in:
parent
63bf902f35
commit
60f0458c77
9 changed files with 41 additions and 37 deletions
|
@ -14,7 +14,6 @@ from pydantic_pkgr import (
|
|||
EnvProvider,
|
||||
)
|
||||
|
||||
from archivebox.config import CONSTANTS
|
||||
from archivebox.config.permissions import ARCHIVEBOX_USER
|
||||
|
||||
import abx
|
||||
|
@ -34,6 +33,7 @@ class BaseBinProvider(BinProvider):
|
|||
return [self]
|
||||
|
||||
class BaseBinary(Binary):
|
||||
# TODO: formalize state diagram, final states, transitions, side effects, etc.
|
||||
|
||||
@staticmethod
|
||||
def symlink_to_lib(binary, bin_dir=None) -> None:
|
||||
|
|
|
@ -99,7 +99,7 @@ class BaseConfigSet(BaseSettings):
|
|||
)
|
||||
|
||||
load_from_defaults: ClassVar[bool] = True
|
||||
load_from_configfile: ClassVar[bool] = True
|
||||
load_from_collection: ClassVar[bool] = True
|
||||
load_from_environment: ClassVar[bool] = True
|
||||
|
||||
@classmethod
|
||||
|
@ -128,7 +128,8 @@ class BaseConfigSet(BaseSettings):
|
|||
try:
|
||||
precedence_order = precedence_order or {
|
||||
'defaults': init_settings,
|
||||
'configfile': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
|
||||
# 'collection': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
|
||||
'collection': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
|
||||
'environment': env_settings,
|
||||
}
|
||||
except Exception as err:
|
||||
|
@ -144,14 +145,15 @@ class BaseConfigSet(BaseSettings):
|
|||
|
||||
precedence_order = {
|
||||
'defaults': init_settings,
|
||||
'configfile': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
|
||||
# 'collection': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
|
||||
'collection': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
|
||||
'environment': env_settings,
|
||||
}
|
||||
|
||||
if not cls.load_from_environment:
|
||||
precedence_order.pop('environment')
|
||||
if not cls.load_from_configfile:
|
||||
precedence_order.pop('configfile')
|
||||
if not cls.load_from_collection:
|
||||
precedence_order.pop('collection')
|
||||
if not cls.load_from_defaults:
|
||||
precedence_order.pop('defaults')
|
||||
|
||||
|
@ -278,15 +280,15 @@ class BaseConfigSet(BaseSettings):
|
|||
"""Get the dictionary of {key: value} config loaded from the default values"""
|
||||
class OnlyDefaultsConfig(self.__class__):
|
||||
load_from_defaults = True
|
||||
load_from_configfile = False
|
||||
load_from_collection = False
|
||||
load_from_environment = False
|
||||
return benedict(OnlyDefaultsConfig().model_dump(exclude_unset=False, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
|
||||
|
||||
def from_configfile(self) -> Dict[str, Any]:
|
||||
"""Get the dictionary of {key: value} config loaded from the configfile ArchiveBox.conf"""
|
||||
def from_collection(self) -> Dict[str, Any]:
|
||||
"""Get the dictionary of {key: value} config loaded from the collection ArchiveBox.conf"""
|
||||
class OnlyConfigFileConfig(self.__class__):
|
||||
load_from_defaults = False
|
||||
load_from_configfile = True
|
||||
load_from_collection = True
|
||||
load_from_environment = False
|
||||
return benedict(OnlyConfigFileConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
|
||||
|
||||
|
@ -294,7 +296,7 @@ class BaseConfigSet(BaseSettings):
|
|||
"""Get the dictionary of {key: value} config loaded from the environment variables"""
|
||||
class OnlyEnvironmentConfig(self.__class__):
|
||||
load_from_defaults = False
|
||||
load_from_configfile = False
|
||||
load_from_collection = False
|
||||
load_from_environment = True
|
||||
return benedict(OnlyEnvironmentConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
|
||||
|
||||
|
|
|
@ -4,10 +4,9 @@ import json
|
|||
import os
|
||||
|
||||
from typing import Optional, List, Literal, Annotated, Dict, Any, Tuple
|
||||
from typing_extensions import Self
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import model_validator, AfterValidator
|
||||
from pydantic import AfterValidator
|
||||
from pydantic_pkgr import BinName
|
||||
from django.utils.functional import cached_property
|
||||
from django.utils import timezone
|
||||
|
@ -17,36 +16,22 @@ import abx
|
|||
from .base_binary import BaseBinary
|
||||
|
||||
|
||||
def no_empty_args(args: List[str]) -> List[str]:
|
||||
def assert_no_empty_args(args: List[str]) -> List[str]:
|
||||
assert all(len(arg) for arg in args)
|
||||
return args
|
||||
|
||||
ExtractorName = Literal['wget', 'warc', 'media', 'singlefile'] | str
|
||||
ExtractorName = Annotated[str, AfterValidator(lambda s: s.isidentifier())]
|
||||
|
||||
HandlerFuncStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))]
|
||||
CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(no_empty_args)]
|
||||
CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(assert_no_empty_args)]
|
||||
|
||||
|
||||
class BaseExtractor:
|
||||
|
||||
name: ExtractorName
|
||||
binary: BinName
|
||||
|
||||
output_path_func: HandlerFuncStr = 'self.get_output_path'
|
||||
should_extract_func: HandlerFuncStr = 'self.should_extract'
|
||||
extract_func: HandlerFuncStr = 'self.extract'
|
||||
exec_func: HandlerFuncStr = 'self.exec'
|
||||
|
||||
default_args: CmdArgsList = []
|
||||
extra_args: CmdArgsList = []
|
||||
args: Optional[CmdArgsList] = None
|
||||
|
||||
@model_validator(mode='after')
|
||||
def validate_model(self) -> Self:
|
||||
if self.args is None:
|
||||
self.args = [*self.default_args, *self.extra_args]
|
||||
return self
|
||||
|
||||
|
||||
def get_output_path(self, snapshot) -> Path:
|
||||
return Path(self.__class__.__name__.lower())
|
||||
|
@ -71,7 +56,7 @@ class BaseExtractor:
|
|||
|
||||
snapshot = Snapshot.objects.get(id=snapshot_id)
|
||||
|
||||
if not self.should_extract(snapshot):
|
||||
if not self.should_extract(snapshot.url):
|
||||
return {}
|
||||
|
||||
status = 'failed'
|
||||
|
|
|
@ -57,7 +57,7 @@ def get_HOOKS() -> Set[str]:
|
|||
for hook_name in get_PLUGIN(plugin_id).hooks
|
||||
}
|
||||
|
||||
def get_CONFIGS() -> Dict[str, 'BaseConfigSet']:
|
||||
def get_CONFIGS() -> benedict: # Dict[str, 'BaseConfigSet']
|
||||
return benedict({
|
||||
config_id: configset
|
||||
for plugin_configs in pm.hook.get_CONFIG()
|
||||
|
|
|
@ -88,7 +88,7 @@ def create_root_snapshot_from_seed(crawl):
|
|||
def create_archiveresults_pending_from_snapshot(snapshot, config):
|
||||
config = get_scope_config(
|
||||
# defaults=settings.CONFIG_FROM_DEFAULTS,
|
||||
# configfile=settings.CONFIG_FROM_FILE,
|
||||
# collection=settings.CONFIG_FROM_FILE,
|
||||
# environment=settings.CONFIG_FROM_ENVIRONMENT,
|
||||
persona=archiveresult.snapshot.crawl.persona,
|
||||
seed=archiveresult.snapshot.crawl.seed,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue