From 0abbc11a6bb5e52977fd8a81f12a86bcdf44d9f4 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 15 Oct 2024 22:32:36 -0700 Subject: [PATCH] add get_scope_config method to merge config down to scope --- archivebox/abx/archivebox/hookspec.py | 13 +++++++++++++ archivebox/abx/archivebox/reads.py | 27 +++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/archivebox/abx/archivebox/hookspec.py b/archivebox/abx/archivebox/hookspec.py index b92dae24..54bf1113 100644 --- a/archivebox/abx/archivebox/hookspec.py +++ b/archivebox/abx/archivebox/hookspec.py @@ -4,15 +4,28 @@ from typing import Dict, Any from .. import hookspec +from .base_binary import BaseBinary, BaseBinProvider from .base_configset import BaseConfigSet from .base_extractor import BaseExtractor from .base_searchbackend import BaseSearchBackend +@hookspec +def get_PLUGIN() -> Dict[str, Dict[str, Any]]: + return {} + @hookspec def get_CONFIG() -> Dict[str, BaseConfigSet]: return {} +@hookspec +def get_BINARIES() -> Dict[str, BaseBinary]: + return {} + +@hookspec +def get_BINPROVIDERS() -> Dict[str, BaseBinProvider]: + return {} + @hookspec def get_EXTRACTORS() -> Dict[str, BaseExtractor]: return {} diff --git a/archivebox/abx/archivebox/reads.py b/archivebox/abx/archivebox/reads.py index f2479b5b..5653a7fd 100644 --- a/archivebox/abx/archivebox/reads.py +++ b/archivebox/abx/archivebox/reads.py @@ -128,3 +128,30 @@ def get_SEARCHBACKENDS() -> Dict[str, 'BaseSearchBackend']: for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS() for searchbackend_id,searchbackend in plugin_searchbackends.items() }) + + + +def get_scope_config(defaults=settings.CONFIG, persona=None, seed=None, crawl=None, snapshot=None, archiveresult=None, extra_config=None): + """Get all the relevant config for the given scope, in correct precedence order""" + + snapshot = snapshot or (archiveresult and archiveresult.snapshot) + crawl = crawl or (snapshot and snapshot.crawl) + seed = seed or (crawl and crawl.seed) + persona = persona or (crawl and crawl.persona) + + persona_config = persona.config if persona else {} + seed_config = seed.config if seed else {} + crawl_config = crawl.config if crawl else {} + snapshot_config = snapshot.config if snapshot else {} + archiveresult_config = archiveresult.config if archiveresult else {} + extra_config = extra_config or {} + + return { + **defaults, # defaults / config file / environment variables + **persona_config, # lowest precedence + **seed_config, + **crawl_config, + **snapshot_config, + **archiveresult_config, + **extra_config, # highest precedence + }