From 8d3f45b720664cd0fe38806e5642dc3764958c3e Mon Sep 17 00:00:00 2001 From: Nick Sweeting <github@sweeting.me> Date: Fri, 27 Sep 2024 00:41:21 -0700 Subject: [PATCH] merge plugantic and abx, all praise be to praise our glorious pluggy gods --- archivebox/abx/__init__.py | 204 +---------- archivebox/abx/archivebox/__init__.py | 39 ++ .../abx/archivebox/base_admindataview.py | 38 ++ .../archivebox}/base_binary.py | 33 +- .../archivebox}/base_check.py | 36 +- .../archivebox}/base_configset.py | 61 ++-- .../archivebox}/base_extractor.py | 47 +-- .../archivebox}/base_hook.py | 49 +-- .../archivebox}/base_plugin.py | 92 +---- .../archivebox}/base_queue.py | 86 ++--- .../archivebox}/base_replayer.py | 20 +- .../abx/archivebox/base_searchbackend.py | 33 ++ .../hookspec.py} | 4 +- archivebox/abx/archivebox/use.py | 98 +++++ archivebox/abx/django/__init__.py | 1 + archivebox/abx/{ => django}/apps.py | 3 +- archivebox/abx/django/hookspec.py | 120 +++++++ archivebox/abx/django/use.py | 98 +++++ archivebox/abx/hookspec.py | 6 +- archivebox/abx/hookspec_django_apps.py | 6 - archivebox/abx/hookspec_django_settings.py | 90 ----- archivebox/abx/hookspec_django_urls.py | 12 - archivebox/abx/manager.py | 30 ++ archivebox/abx/pydantic_pkgr/__init__.py | 1 + .../hookspec.py} | 2 +- archivebox/cli/__init__.py | 1 - archivebox/config.py | 27 +- archivebox/core/admin.py | 2 +- archivebox/core/settings.py | 87 ++--- archivebox/core/views.py | 7 +- archivebox/main.py | 20 +- archivebox/misc/checks.py | 30 +- archivebox/plugantic/__init__.py | 1 - archivebox/plugantic/apps.py | 12 - archivebox/plugantic/base_admindataview.py | 39 -- archivebox/plugantic/base_searchbackend.py | 39 -- archivebox/plugantic/management/__init__.py | 0 .../plugantic/management/commands/__init__.py | 0 .../plugantic/management/commands/pkg.py | 72 ---- archivebox/plugantic/tests.py | 337 ------------------ archivebox/plugins_auth/ldap/apps.py | 11 +- archivebox/plugins_auth/ldap/settings.py | 38 +- .../plugins_extractor/archivedotorg/apps.py | 6 +- archivebox/plugins_extractor/chrome/apps.py | 12 +- archivebox/plugins_extractor/favicon/apps.py | 6 +- .../plugins_extractor/readability/apps.py | 10 +- .../plugins_extractor/singlefile/apps.py | 25 +- archivebox/plugins_extractor/wget/apps.py | 74 ++++ archivebox/plugins_extractor/ytdlp/apps.py | 8 +- archivebox/plugins_pkg/npm/apps.py | 8 +- archivebox/plugins_pkg/pip/apps.py | 10 +- archivebox/plugins_pkg/playwright/apps.py | 12 +- archivebox/plugins_pkg/puppeteer/apps.py | 12 +- archivebox/plugins_search/ripgrep/apps.py | 10 +- archivebox/plugins_search/sonic/apps.py | 10 +- archivebox/plugins_search/sqlite/apps.py | 26 +- archivebox/plugins_sys/config/apps.py | 36 +- .../config}/views.py | 14 +- archivebox/util.py | 2 +- 59 files changed, 870 insertions(+), 1343 deletions(-) create mode 100644 archivebox/abx/archivebox/__init__.py create mode 100644 archivebox/abx/archivebox/base_admindataview.py rename archivebox/{plugantic => abx/archivebox}/base_binary.py (77%) rename archivebox/{plugantic => abx/archivebox}/base_check.py (57%) rename archivebox/{plugantic => abx/archivebox}/base_configset.py (88%) rename archivebox/{plugantic => abx/archivebox}/base_extractor.py (70%) rename archivebox/{plugantic => abx/archivebox}/base_hook.py (72%) rename archivebox/{plugantic => abx/archivebox}/base_plugin.py (58%) rename archivebox/{plugantic => abx/archivebox}/base_queue.py (54%) rename archivebox/{plugantic => abx/archivebox}/base_replayer.py (60%) create mode 100644 archivebox/abx/archivebox/base_searchbackend.py rename archivebox/abx/{hookspec_archivebox.py => archivebox/hookspec.py} (85%) create mode 100644 archivebox/abx/archivebox/use.py create mode 100644 archivebox/abx/django/__init__.py rename archivebox/abx/{ => django}/apps.py (79%) create mode 100644 archivebox/abx/django/hookspec.py create mode 100644 archivebox/abx/django/use.py delete mode 100644 archivebox/abx/hookspec_django_apps.py delete mode 100644 archivebox/abx/hookspec_django_settings.py delete mode 100644 archivebox/abx/hookspec_django_urls.py create mode 100644 archivebox/abx/manager.py create mode 100644 archivebox/abx/pydantic_pkgr/__init__.py rename archivebox/abx/{hookspec_pydantic_pkgr.py => pydantic_pkgr/hookspec.py} (85%) delete mode 100644 archivebox/plugantic/__init__.py delete mode 100644 archivebox/plugantic/apps.py delete mode 100644 archivebox/plugantic/base_admindataview.py delete mode 100644 archivebox/plugantic/base_searchbackend.py delete mode 100644 archivebox/plugantic/management/__init__.py delete mode 100644 archivebox/plugantic/management/commands/__init__.py delete mode 100644 archivebox/plugantic/management/commands/pkg.py delete mode 100644 archivebox/plugantic/tests.py create mode 100644 archivebox/plugins_extractor/wget/apps.py rename archivebox/{plugantic => plugins_sys/config}/views.py (98%) diff --git a/archivebox/abx/__init__.py b/archivebox/abx/__init__.py index 04c7d81d..22ce993a 100644 --- a/archivebox/abx/__init__.py +++ b/archivebox/abx/__init__.py @@ -1,19 +1,19 @@ -import itertools +__package__ = 'abx' + import importlib from pathlib import Path from typing import Dict -from benedict import benedict - -import pluggy -import archivebox from . import hookspec as base_spec from .hookspec import hookimpl, hookspec # noqa +from .manager import pm, PluginManager # noqa -pm = pluggy.PluginManager("abx") pm.add_hookspecs(base_spec) + +###### PLUGIN DISCOVERY AND LOADING ######################################################## + def register_hookspecs(hookspecs): for hookspec_import_path in hookspecs: hookspec_module = importlib.import_module(hookspec_import_path) @@ -48,27 +48,6 @@ def get_plugins_in_dirs(plugin_dirs: Dict[str, Path]): DETECTED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix)) return DETECTED_PLUGINS -def get_builtin_plugins(): - PLUGIN_DIRS = { - 'plugins_sys': archivebox.PACKAGE_DIR / 'plugins_sys', - 'plugins_pkg': archivebox.PACKAGE_DIR / 'plugins_pkg', - 'plugins_auth': archivebox.PACKAGE_DIR / 'plugins_auth', - 'plugins_search': archivebox.PACKAGE_DIR / 'plugins_search', - 'plugins_extractor': archivebox.PACKAGE_DIR / 'plugins_extractor', - } - DETECTED_PLUGINS = {} - for plugin_prefix, plugin_dir in PLUGIN_DIRS.items(): - DETECTED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix)) - return DETECTED_PLUGINS - -def get_user_plugins(): - return find_plugins_in_dir(archivebox.DATA_DIR / 'user_plugins', prefix='user_plugins') - - -# BUILTIN_PLUGINS = get_builtin_plugins() -# PIP_PLUGINS = get_pip_installed_plugins() -# USER_PLUGINS = get_user_plugins() -# ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS} # Load all plugins from pip packages, archivebox built-ins, and user plugins @@ -76,7 +55,7 @@ def load_plugins(plugins_dict: Dict[str, Path]): LOADED_PLUGINS = {} for plugin_module, plugin_dir in plugins_dict.items(): # print(f'Loading plugin: {plugin_module} from {plugin_dir}') - plugin_module_loaded = importlib.import_module(plugin_module + '.apps') + plugin_module_loaded = importlib.import_module(plugin_module) pm.register(plugin_module_loaded) LOADED_PLUGINS[plugin_module] = plugin_module_loaded.PLUGIN # print(f' √ Loaded plugin: {plugin_module}') @@ -100,172 +79,3 @@ def get_registered_plugins(): return plugins -def get_plugins_INSTALLLED_APPS(): - return itertools.chain(*pm.hook.get_INSTALLED_APPS()) - -def register_plugins_INSTALLLED_APPS(INSTALLED_APPS): - pm.hook.register_INSTALLED_APPS(INSTALLED_APPS=INSTALLED_APPS) - - -def get_plugins_MIDDLEWARE(): - return itertools.chain(*pm.hook.get_MIDDLEWARE()) - -def register_plugins_MIDDLEWARE(MIDDLEWARE): - pm.hook.register_MIDDLEWARE(MIDDLEWARE=MIDDLEWARE) - - -def get_plugins_AUTHENTICATION_BACKENDS(): - return itertools.chain(*pm.hook.get_AUTHENTICATION_BACKENDS()) - -def register_plugins_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS): - pm.hook.register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS=AUTHENTICATION_BACKENDS) - - -def get_plugins_STATICFILES_DIRS(): - return itertools.chain(*pm.hook.get_STATICFILES_DIRS()) - -def register_plugins_STATICFILES_DIRS(STATICFILES_DIRS): - pm.hook.register_STATICFILES_DIRS(STATICFILES_DIRS=STATICFILES_DIRS) - - -def get_plugins_TEMPLATE_DIRS(): - return itertools.chain(*pm.hook.get_TEMPLATE_DIRS()) - -def register_plugins_TEMPLATE_DIRS(TEMPLATE_DIRS): - pm.hook.register_TEMPLATE_DIRS(TEMPLATE_DIRS=TEMPLATE_DIRS) - -def get_plugins_DJANGO_HUEY_QUEUES(): - HUEY_QUEUES = {} - for plugin_result in pm.hook.get_DJANGO_HUEY_QUEUES(): - HUEY_QUEUES.update(plugin_result) - return HUEY_QUEUES - -def register_plugins_DJANGO_HUEY(DJANGO_HUEY): - pm.hook.register_DJANGO_HUEY(DJANGO_HUEY=DJANGO_HUEY) - -def get_plugins_ADMIN_DATA_VIEWS_URLS(): - return itertools.chain(*pm.hook.get_ADMIN_DATA_VIEWS_URLS()) - -def register_plugins_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS): - pm.hook.register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS=ADMIN_DATA_VIEWS) - - -def register_plugins_settings(settings): - # convert settings dict to an benedict so we can set values using settings.attr = xyz notation - settings_as_obj = benedict(settings, keypath_separator=None) - - # set default values for settings that are used by plugins - settings_as_obj.INSTALLED_APPS = settings_as_obj.get('INSTALLED_APPS', []) - settings_as_obj.MIDDLEWARE = settings_as_obj.get('MIDDLEWARE', []) - settings_as_obj.AUTHENTICATION_BACKENDS = settings_as_obj.get('AUTHENTICATION_BACKENDS', []) - settings_as_obj.STATICFILES_DIRS = settings_as_obj.get('STATICFILES_DIRS', []) - settings_as_obj.TEMPLATE_DIRS = settings_as_obj.get('TEMPLATE_DIRS', []) - settings_as_obj.DJANGO_HUEY = settings_as_obj.get('DJANGO_HUEY', {'queues': {}}) - settings_as_obj.ADMIN_DATA_VIEWS = settings_as_obj.get('ADMIN_DATA_VIEWS', {'URLS': []}) - - # call all the hook functions to mutate the settings values in-place - register_plugins_INSTALLLED_APPS(settings_as_obj.INSTALLED_APPS) - register_plugins_MIDDLEWARE(settings_as_obj.MIDDLEWARE) - register_plugins_AUTHENTICATION_BACKENDS(settings_as_obj.AUTHENTICATION_BACKENDS) - register_plugins_STATICFILES_DIRS(settings_as_obj.STATICFILES_DIRS) - register_plugins_TEMPLATE_DIRS(settings_as_obj.TEMPLATE_DIRS) - register_plugins_DJANGO_HUEY(settings_as_obj.DJANGO_HUEY) - register_plugins_ADMIN_DATA_VIEWS(settings_as_obj.ADMIN_DATA_VIEWS) - - # calls Plugin.settings(settings) on each registered plugin - pm.hook.register_settings(settings=settings_as_obj) - - # then finally update the settings globals() object will all the new settings - settings.update(settings_as_obj) - - -def get_plugins_urlpatterns(): - return list(itertools.chain(*pm.hook.urlpatterns())) - -def register_plugins_urlpatterns(urlpatterns): - pm.hook.register_urlpatterns(urlpatterns=urlpatterns) - - -# PLUGANTIC HOOKS - -def get_plugins_PLUGINS(): - return benedict({ - plugin.PLUGIN.id: plugin.PLUGIN - for plugin in pm.get_plugins() - }) - -def get_plugins_HOOKS(PLUGINS): - return benedict({ - hook.id: hook - for plugin in PLUGINS.values() - for hook in plugin.hooks - }) - -def get_plugins_CONFIGS(): - return benedict({ - config.id: config - for plugin_configs in pm.hook.get_CONFIGS() - for config in plugin_configs - }) - -def get_plugins_FLAT_CONFIG(CONFIGS): - FLAT_CONFIG = {} - for config in CONFIGS.values(): - FLAT_CONFIG.update(config.model_dump()) - return benedict(FLAT_CONFIG) - -def get_plugins_BINPROVIDERS(): - return benedict({ - binprovider.id: binprovider - for plugin_binproviders in pm.hook.get_BINPROVIDERS() - for binprovider in plugin_binproviders - }) - -def get_plugins_BINARIES(): - return benedict({ - binary.id: binary - for plugin_binaries in pm.hook.get_BINARIES() - for binary in plugin_binaries - }) - -def get_plugins_EXTRACTORS(): - return benedict({ - extractor.id: extractor - for plugin_extractors in pm.hook.get_EXTRACTORS() - for extractor in plugin_extractors - }) - -def get_plugins_REPLAYERS(): - return benedict({ - replayer.id: replayer - for plugin_replayers in pm.hook.get_REPLAYERS() - for replayer in plugin_replayers - }) - -def get_plugins_CHECKS(): - return benedict({ - check.id: check - for plugin_checks in pm.hook.get_CHECKS() - for check in plugin_checks - }) - -def get_plugins_ADMINDATAVIEWS(): - return benedict({ - admin_dataview.id: admin_dataview - for plugin_admin_dataviews in pm.hook.get_ADMINDATAVIEWS() - for admin_dataview in plugin_admin_dataviews - }) - -def get_plugins_QUEUES(): - return benedict({ - queue.id: queue - for plugin_queues in pm.hook.get_QUEUES() - for queue in plugin_queues - }) - -def get_plugins_SEARCHBACKENDS(): - return benedict({ - searchbackend.id: searchbackend - for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS() - for searchbackend in plugin_searchbackends - }) diff --git a/archivebox/abx/archivebox/__init__.py b/archivebox/abx/archivebox/__init__.py new file mode 100644 index 00000000..c8ed2146 --- /dev/null +++ b/archivebox/abx/archivebox/__init__.py @@ -0,0 +1,39 @@ +__package__ = 'abx.archivebox' + +import importlib + +from typing import Dict +from pathlib import Path + + +def load_archivebox_plugins(pm, plugins_dict: Dict[str, Path]): + """Load archivebox plugins, very similar to abx.load_plugins but it looks for a pydantic PLUGIN model + hooks in apps.py""" + LOADED_PLUGINS = {} + for plugin_module, plugin_dir in plugins_dict.items(): + # print(f'Loading plugin: {plugin_module} from {plugin_dir}') + + archivebox_plugins_found = [] + + # 1. register the plugin module directly in case it contains any look hookimpls (e.g. in __init__.py) + plugin_module_loaded = importlib.import_module(plugin_module) + pm.register(plugin_module_loaded) + if hasattr(plugin_module_loaded, 'PLUGIN'): + archivebox_plugins_found.append(plugin_module_loaded.PLUGIN) + + # 2. then try to import plugin_module.apps as well + if (plugin_dir / 'apps.py').exists(): + plugin_apps = importlib.import_module(plugin_module + '.apps') + pm.register(plugin_apps) # register the whole .apps in case it contains loose hookimpls (not in a class) + if hasattr(plugin_apps, 'PLUGIN'): + archivebox_plugins_found.append(plugin_apps.PLUGIN) + + # 3. then try to look for plugin_module.PLUGIN and register it + all its hooks + for ab_plugin in archivebox_plugins_found: + pm.register(ab_plugin) + for hook in ab_plugin.hooks: + hook.__signature__ = hook.__class__.__signature__ # fix to make pydantic model usable as Pluggy plugin + pm.register(hook) + LOADED_PLUGINS[plugin_module] = ab_plugin + + # print(f' √ Loaded plugin: {LOADED_PLUGINS}') + return LOADED_PLUGINS diff --git a/archivebox/abx/archivebox/base_admindataview.py b/archivebox/abx/archivebox/base_admindataview.py new file mode 100644 index 00000000..32cf49fc --- /dev/null +++ b/archivebox/abx/archivebox/base_admindataview.py @@ -0,0 +1,38 @@ +__package__ = 'abx.archivebox' + +from typing import Dict + +import abx + +from .base_hook import BaseHook, HookType + + +class BaseAdminDataView(BaseHook): + hook_type: HookType = "ADMINDATAVIEW" + + name: str = 'example_admin_data_view_list' + verbose_name: str = 'Data View' + route: str = '/__OVERRIDE_THIS__/' + view: str = 'plugins_example.example.views.example_view_list' + + items: Dict[str, str] = { + 'route': '<str:key>/', + "name": 'example_admin_data_view_item', + 'view': 'plugins_example.example.views.example_view_item', + } + + @abx.hookimpl + def get_ADMINDATAVIEWS(self): + return [self] + + @abx.hookimpl + def get_ADMIN_DATA_VIEWS_URLS(self): + """routes to be added to django.conf.settings.ADMIN_DATA_VIEWS['urls']""" + route = { + "route": self.route, + "view": self.view, + "name": self.verbose_name, + "items": self.items, + } + return [route] + diff --git a/archivebox/plugantic/base_binary.py b/archivebox/abx/archivebox/base_binary.py similarity index 77% rename from archivebox/plugantic/base_binary.py rename to archivebox/abx/archivebox/base_binary.py index e3a995ef..786f41e5 100644 --- a/archivebox/plugantic/base_binary.py +++ b/archivebox/abx/archivebox/base_binary.py @@ -1,9 +1,8 @@ -__package__ = "archivebox.plugantic" +__package__ = "abx.archivebox" from typing import Dict, List from typing_extensions import Self -from benedict import benedict from pydantic import Field, InstanceOf, validate_call from pydantic_pkgr import ( Binary, @@ -15,10 +14,8 @@ from pydantic_pkgr import ( EnvProvider, ) -from django.conf import settings - +import abx import archivebox - from .base_hook import BaseHook, HookType @@ -37,19 +34,17 @@ class BaseBinProvider(BaseHook, BinProvider): # # return cache.get_or_set(f'bin:version:{bin_name}:{abspath}', get_version_func) # return get_version_func() - def register(self, settings, parent_plugin=None): - # self._plugin = parent_plugin # for debugging only, never rely on this! - - settings.BINPROVIDERS = getattr(settings, "BINPROVIDERS", None) or benedict({}) - settings.BINPROVIDERS[self.id] = self - - super().register(settings, parent_plugin=parent_plugin) + + # TODO: add install/load/load_or_install methods as abx.hookimpl methods @property def admin_url(self) -> str: # e.g. /admin/environment/binproviders/NpmBinProvider/ TODO return "/admin/environment/binaries/" + @abx.hookimpl + def get_BINPROVIDERS(self): + return [self] class BaseBinary(BaseHook, Binary): hook_type: HookType = "BINARY" @@ -57,14 +52,6 @@ class BaseBinary(BaseHook, Binary): binproviders_supported: List[InstanceOf[BinProvider]] = Field(default_factory=list, alias="binproviders") provider_overrides: Dict[BinProviderName, ProviderLookupDict] = Field(default_factory=dict, alias="overrides") - def register(self, settings, parent_plugin=None): - # self._plugin = parent_plugin # for debugging only, never rely on this! - - settings.BINARIES = getattr(settings, "BINARIES", None) or benedict({}) - settings.BINARIES[self.id] = self - - super().register(settings, parent_plugin=parent_plugin) - @staticmethod def symlink_to_lib(binary, bin_dir=None) -> None: bin_dir = bin_dir or archivebox.CONSTANTS.LIB_BIN_DIR @@ -101,6 +88,12 @@ class BaseBinary(BaseHook, Binary): # e.g. /admin/environment/config/LdapConfig/ return f"/admin/environment/binaries/{self.name}/" + @abx.hookimpl + def get_BINARIES(self): + return [self] + + + apt = AptProvider() brew = BrewProvider() env = EnvProvider() diff --git a/archivebox/plugantic/base_check.py b/archivebox/abx/archivebox/base_check.py similarity index 57% rename from archivebox/plugantic/base_check.py rename to archivebox/abx/archivebox/base_check.py index 029113b6..c0d46f1c 100644 --- a/archivebox/plugantic/base_check.py +++ b/archivebox/abx/archivebox/base_check.py @@ -1,10 +1,11 @@ -__package__ = "archivebox.plugantic" +__package__ = "abx.archivebox" -import abx from typing import List from django.core.checks import Warning, Tags, register +import abx + from .base_hook import BaseHook, HookType @@ -26,21 +27,18 @@ class BaseCheck(BaseHook): # logger.debug('[√] Loaded settings.PLUGINS succesfully.') return errors - def register(self, settings, parent_plugin=None): - # self._plugin = parent_plugin # backref to parent is for debugging only, never rely on this! - - abx.pm.hook.register_django_check(check=self, settings=settings) - - - -@abx.hookspec -@abx.hookimpl -def register_django_check(check: BaseCheck, settings): - def run_check(app_configs, **kwargs) -> List[Warning]: - import logging - return check.check(settings, logging.getLogger("checks")) - - run_check.__name__ = check.id - run_check.tags = [check.tag] - register(check.tag)(run_check) + @abx.hookimpl + def get_CHECKS(self): + return [self] + @abx.hookimpl + def register_checks(self): + """Tell django that this check exists so it can be run automatically by django.""" + def run_check(**kwargs): + from django.conf import settings + import logging + return self.check(settings, logging.getLogger("checks")) + + run_check.__name__ = self.id + run_check.tags = [self.tag] + register(self.tag)(run_check) diff --git a/archivebox/plugantic/base_configset.py b/archivebox/abx/archivebox/base_configset.py similarity index 88% rename from archivebox/plugantic/base_configset.py rename to archivebox/abx/archivebox/base_configset.py index 972173dc..b27b302b 100644 --- a/archivebox/plugantic/base_configset.py +++ b/archivebox/abx/archivebox/base_configset.py @@ -1,4 +1,4 @@ -__package__ = 'archivebox.plugantic' +__package__ = 'abx.archivebox' import os import re @@ -14,8 +14,10 @@ from pydantic_settings.sources import TomlConfigSettingsSource from pydantic_pkgr.base_types import func_takes_args_or_kwargs +import abx + from .base_hook import BaseHook, HookType -from . import ini_to_toml +from archivebox.misc import ini_to_toml PACKAGE_DIR = Path(__file__).resolve().parent.parent @@ -236,6 +238,7 @@ class ArchiveBoxBaseConfig(BaseSettings): for key, field in self.model_fields.items() }) + class BaseConfigSet(ArchiveBoxBaseConfig, BaseHook): # type: ignore[type-arg] hook_type: ClassVar[HookType] = 'CONFIG' @@ -261,42 +264,20 @@ class BaseConfigSet(ArchiveBoxBaseConfig, BaseHook): # type: ignore[type-ar # self.__init__() -# class WgetToggleConfig(ConfigSet): -# section: ConfigSectionName = 'ARCHIVE_METHOD_TOGGLES' + @abx.hookimpl + def get_CONFIGS(self): + try: + return {self.id: self} + except Exception as e: + # raise Exception(f'Error computing CONFIGS for {type(self)}: {e.__class__.__name__}: {e}') + print(f'Error computing CONFIGS for {type(self)}: {e.__class__.__name__}: {e}') + return {} -# SAVE_WGET: bool = True -# SAVE_WARC: bool = True - -# class WgetDependencyConfig(ConfigSet): -# section: ConfigSectionName = 'DEPENDENCY_CONFIG' - -# WGET_BINARY: str = Field(default='wget') -# WGET_ARGS: Optional[List[str]] = Field(default=None) -# WGET_EXTRA_ARGS: List[str] = [] -# WGET_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}'] - -# class WgetOptionsConfig(ConfigSet): -# section: ConfigSectionName = 'ARCHIVE_METHOD_OPTIONS' - -# # loaded from shared config -# WGET_AUTO_COMPRESSION: bool = Field(default=True) -# SAVE_WGET_REQUISITES: bool = Field(default=True) -# WGET_USER_AGENT: str = Field(default='', alias='USER_AGENT') -# WGET_TIMEOUT: int = Field(default=60, alias='TIMEOUT') -# WGET_CHECK_SSL_VALIDITY: bool = Field(default=True, alias='CHECK_SSL_VALIDITY') -# WGET_RESTRICT_FILE_NAMES: str = Field(default='windows', alias='RESTRICT_FILE_NAMES') -# WGET_COOKIES_FILE: Optional[Path] = Field(default=None, alias='COOKIES_FILE') - - -# CONFIG = { -# 'CHECK_SSL_VALIDITY': False, -# 'SAVE_WARC': False, -# 'TIMEOUT': 999, -# } - - -# WGET_CONFIG = [ -# WgetToggleConfig(**CONFIG), -# WgetDependencyConfig(**CONFIG), -# WgetOptionsConfig(**CONFIG), -# ] + @abx.hookimpl + def get_FLAT_CONFIG(self): + try: + return self.model_dump() + except Exception as e: + # raise Exception(f'Error computing FLAT_CONFIG for {type(self)}: {e.__class__.__name__}: {e}') + print(f'Error computing FLAT_CONFIG for {type(self)}: {e.__class__.__name__}: {e}') + return {} diff --git a/archivebox/plugantic/base_extractor.py b/archivebox/abx/archivebox/base_extractor.py similarity index 70% rename from archivebox/plugantic/base_extractor.py rename to archivebox/abx/archivebox/base_extractor.py index 5d7b6a27..4ba47cdd 100644 --- a/archivebox/plugantic/base_extractor.py +++ b/archivebox/abx/archivebox/base_extractor.py @@ -1,4 +1,4 @@ -__package__ = 'archivebox.plugantic' +__package__ = 'abx.archivebox' from typing import Optional, List, Literal, Annotated, Dict, Any from typing_extensions import Self @@ -8,9 +8,9 @@ from pathlib import Path from pydantic import model_validator, AfterValidator from pydantic_pkgr import BinName -from .base_hook import BaseHook, HookType -from ..config_stubs import AttrDict +import abx +from .base_hook import BaseHook, HookType def no_empty_args(args: List[str]) -> List[str]: @@ -45,16 +45,6 @@ class BaseExtractor(BaseHook): return self - def register(self, settings, parent_plugin=None): - # self._plugin = parent_plugin # for debugging only, never rely on this! - - settings.EXTRACTORS = getattr(settings, "EXTRACTORS", None) or AttrDict({}) - settings.EXTRACTORS[self.id] = self - - super().register(settings, parent_plugin=parent_plugin) - - - def get_output_path(self, snapshot) -> Path: return Path(self.id.lower()) @@ -64,7 +54,7 @@ class BaseExtractor(BaseHook): return False return True - + # TODO: move this to a hookimpl def extract(self, url: str, **kwargs) -> Dict[str, Any]: output_dir = self.get_output_path(url, **kwargs) @@ -81,6 +71,7 @@ class BaseExtractor(BaseHook): 'returncode': proc.returncode, } + # TODO: move this to a hookimpl def exec(self, args: CmdArgsList, pwd: Optional[Path]=None, settings=None): pwd = pwd or Path('.') if settings is None: @@ -90,28 +81,6 @@ class BaseExtractor(BaseHook): binary = settings.BINARIES[self.binary] return binary.exec(args, pwd=pwd) - -# class YtdlpExtractor(Extractor): -# name: ExtractorName = 'media' -# binary: Binary = YtdlpBinary() - -# def get_output_path(self, snapshot) -> Path: -# return 'media/' - - -# class WgetExtractor(Extractor): -# name: ExtractorName = 'wget' -# binary: Binary = WgetBinary() - -# def get_output_path(self, snapshot) -> Path: -# return get_wget_output_path(snapshot) - - -# class WarcExtractor(Extractor): -# name: ExtractorName = 'warc' -# binary: Binary = WgetBinary() - -# def get_output_path(self, snapshot) -> Path: -# return get_wget_output_path(snapshot) - - + @abx.hookimpl + def get_EXTRACTORS(self): + return [self] diff --git a/archivebox/plugantic/base_hook.py b/archivebox/abx/archivebox/base_hook.py similarity index 72% rename from archivebox/plugantic/base_hook.py rename to archivebox/abx/archivebox/base_hook.py index adc5cad7..1642652d 100644 --- a/archivebox/plugantic/base_hook.py +++ b/archivebox/abx/archivebox/base_hook.py @@ -1,4 +1,4 @@ -__package__ = 'archivebox.plugantic' +__package__ = 'abx.archivebox' import inspect from huey.api import TaskWrapper @@ -7,6 +7,7 @@ from pathlib import Path from typing import Tuple, Literal, ClassVar, get_args from pydantic import BaseModel, ConfigDict +import abx HookType = Literal['CONFIG', 'BINPROVIDER', 'BINARY', 'EXTRACTOR', 'REPLAYER', 'CHECK', 'ADMINDATAVIEW', 'QUEUE', 'SEARCHBACKEND'] hook_type_names: Tuple[HookType] = get_args(HookType) @@ -29,8 +30,8 @@ class BaseHook(BaseModel): plugins_pkg.npm.NpmPlugin().AppConfig.ready() # called by django plugins_pkg.npm.NpmPlugin().register(settings) -> plugins_pkg.npm.NpmConfigSet().register(settings) - plugantic.base_configset.BaseConfigSet().register(settings) - plugantic.base_hook.BaseHook().register(settings, parent_plugin=plugins_pkg.npm.NpmPlugin()) + abx.archivebox.base_configset.BaseConfigSet().register(settings) + abx.archivebox.base_hook.BaseHook().register(settings, parent_plugin=plugins_pkg.npm.NpmPlugin()) ... ... @@ -96,32 +97,20 @@ class BaseHook(BaseModel): # e.g. /admin/environment/config/LdapConfig/ return f"/admin/environment/{self.hook_type.lower()}/{self.id}/" - # def register(self, settings, parent_plugin=None): - # """Load a record of an installed hook into global Django settings.HOOKS at runtime.""" - # self._plugin = parent_plugin # for debugging only, never rely on this! - # # assert json.dumps(self.model_json_schema(), indent=4), f"Hook {self.hook_module} has invalid JSON schema." + @abx.hookimpl + def register(self, settings): + """Called when django.apps.AppConfig.ready() is called""" + + print("REGISTERED HOOK:", self.hook_module) + self._is_registered = True + - # # print(' -', self.hook_module, '.register()') - - # # record installed hook in settings.HOOKS - # settings.REGISTERED_HOOKS[self.id] = self - - # if settings.REGISTERED_HOOKS[self.id]._is_registered: - # raise Exception(f"Tried to run {self.hook_module}.register() but its already been called!") - - # settings.REGISTERED_HOOKS[self.id]._is_registered = True - - # # print("REGISTERED HOOK:", self.hook_module) - - # def ready(self, settings): - # """Runs any runtime code needed when AppConfig.ready() is called (after all models are imported).""" - - # # print(' -', self.hook_module, '.ready()') - - # assert self.id in settings.REGISTERED_HOOKS, f"Tried to ready hook {self.hook_module} but it is not registered in settings.REGISTERED_HOOKS." - - # if settings.REGISTERED_HOOKS[self.id]._is_ready: - # raise Exception(f"Tried to run {self.hook_module}.ready() but its already been called!") - - # settings.REGISTERED_HOOKS[self.id]._is_ready = True + @abx.hookimpl + def ready(self): + """Called when django.apps.AppConfig.ready() is called""" + + assert self._is_registered, f"Tried to run {self.hook_module}.ready() but it was never registered!" + + # print("READY HOOK:", self.hook_module) + self._is_ready = True diff --git a/archivebox/plugantic/base_plugin.py b/archivebox/abx/archivebox/base_plugin.py similarity index 58% rename from archivebox/plugantic/base_plugin.py rename to archivebox/abx/archivebox/base_plugin.py index 2071a648..ac17e9c9 100644 --- a/archivebox/plugantic/base_plugin.py +++ b/archivebox/abx/archivebox/base_plugin.py @@ -1,4 +1,4 @@ -__package__ = 'archivebox.plugantic' +__package__ = 'abx.archivebox' import abx import inspect @@ -16,7 +16,6 @@ from pydantic import ( model_validator, InstanceOf, computed_field, - validate_call, ) from benedict import benedict @@ -124,91 +123,32 @@ class BasePlugin(BaseModel): hooks[hook.hook_type][hook.id] = hook return hooks + + + @abx.hookimpl def register(self, settings): - """Loads this plugin's configs, binaries, extractors, and replayers into global Django settings at import time (before models are imported or any AppConfig.ready() are called).""" + from archivebox.config import bump_startup_progress_bar - from ..config import bump_startup_progress_bar - - # assert settings.PLUGINS[self.id] == self - # # assert self.id not in settings.PLUGINS, f'Tried to register plugin {self.plugin_module} but it conflicts with existing plugin of the same name ({self.app_label}).' - - # ### Mutate django.conf.settings... values in-place to include plugin-provided overrides - - # if settings.PLUGINS[self.id]._is_registered: - # raise Exception(f"Tried to run {self.plugin_module}.register() but its already been called!") - - # for hook in self.hooks: - # hook.register(settings, parent_plugin=self) - - # settings.PLUGINS[self.id]._is_registered = True - # # print('√ REGISTERED PLUGIN:', self.plugin_module) + self._is_registered = True bump_startup_progress_bar() + print('◣----------------- REGISTERED PLUGIN:', self.plugin_module, '-----------------◢') + print() + + @abx.hookimpl def ready(self, settings=None): """Runs any runtime code needed when AppConfig.ready() is called (after all models are imported).""" - from ..config import bump_startup_progress_bar + from archivebox.config import bump_startup_progress_bar + assert self._is_registered, f"Tried to run {self.plugin_module}.ready() but it was never registered!" + self._is_ready = True - # if settings is None: - # from django.conf import settings as django_settings - # settings = django_settings - - # # print() - # # print(self.plugin_module_full, '.ready()') - - # assert ( - # self.id in settings.PLUGINS and settings.PLUGINS[self.id]._is_registered - # ), f"Tried to run plugin.ready() for {self.plugin_module} but plugin is not yet registered in settings.PLUGINS." - - # if settings.PLUGINS[self.id]._is_ready: - # raise Exception(f"Tried to run {self.plugin_module}.ready() but its already been called!") - - # for hook in self.hooks: - # hook.ready(settings) - # settings.PLUGINS[self.id]._is_ready = True bump_startup_progress_bar() - @validate_call - def install_binaries(self) -> Self: - new_binaries = [] - for idx, binary in enumerate(self.binaries): - new_binaries.append(binary.install() or binary) - return self.model_copy(update={ - 'binaries': new_binaries, - }) - @validate_call - def load_binaries(self, cache=True) -> Self: - new_binaries = [] - for idx, binary in enumerate(self.HOOKS_BY_TYPE['BINARY'].values()): - new_binaries.append(binary.load(cache=cache) or binary) - return self.model_copy(update={ - 'binaries': new_binaries, - }) + @abx.hookimpl + def get_INSTALLED_APPS(self): + return [self.plugin_module] - # @validate_call - # def load_or_install_binaries(self, cache=True) -> Self: - # new_binaries = [] - # for idx, binary in enumerate(self.binaries): - # new_binaries.append(binary.load_or_install(cache=cache) or binary) - # return self.model_copy(update={ - # 'binaries': new_binaries, - # }) - - - - -# class YtdlpPlugin(BasePlugin): -# name: str = 'ytdlp' -# configs: List[SerializeAsAny[BaseConfigSet]] = [] -# binaries: List[SerializeAsAny[BaseBinary]] = [YtdlpBinary()] -# extractors: List[SerializeAsAny[BaseExtractor]] = [YtdlpExtractor()] -# replayers: List[SerializeAsAny[BaseReplayer]] = [MEDIA_REPLAYER] - -# class WgetPlugin(BasePlugin): -# name: str = 'wget' -# configs: List[SerializeAsAny[BaseConfigSet]] = [*WGET_CONFIG] -# binaries: List[SerializeAsAny[BaseBinary]] = [WgetBinary()] -# extractors: List[SerializeAsAny[BaseExtractor]] = [WgetExtractor(), WarcExtractor()] diff --git a/archivebox/plugantic/base_queue.py b/archivebox/abx/archivebox/base_queue.py similarity index 54% rename from archivebox/plugantic/base_queue.py rename to archivebox/abx/archivebox/base_queue.py index 7e2b06c6..de0edaf2 100644 --- a/archivebox/plugantic/base_queue.py +++ b/archivebox/abx/archivebox/base_queue.py @@ -1,16 +1,18 @@ -__package__ = 'archivebox.plugantic' +__package__ = 'abx.archivebox' import importlib from typing import Dict, List, TYPE_CHECKING from pydantic import Field, InstanceOf +from benedict import benedict if TYPE_CHECKING: from huey.api import TaskWrapper +import abx + from .base_hook import BaseHook, HookType from .base_binary import BaseBinary -from ..config_stubs import AttrDict @@ -33,13 +35,13 @@ class BaseQueue(BaseHook): if hasattr(task, "task_class") and task.huey.name == self.name: all_tasks[task_name] = task - return AttrDict(all_tasks) + return benedict(all_tasks) - def get_huey_config(self, settings) -> dict: + def get_django_huey_config(self, QUEUE_DATABASE_NAME) -> dict: """Get the config dict to insert into django.conf.settings.DJANGO_HUEY['queues'].""" return { "huey_class": "huey.SqliteHuey", - "filename": settings.QUEUE_DATABASE_NAME, + "filename": QUEUE_DATABASE_NAME, "name": self.name, "results": True, "store_none": True, @@ -58,7 +60,7 @@ class BaseQueue(BaseHook): }, } - def get_supervisor_config(self, settings) -> dict: + def get_supervisord_config(self, settings) -> dict: """Ge the config dict used to tell sueprvisord to start a huey consumer for this queue.""" return { "name": f"worker_{self.name}", @@ -78,7 +80,7 @@ class BaseQueue(BaseHook): print(f"Error starting worker for queue {self.name}: {e}") return None print() - worker = start_worker(supervisor, self.get_supervisor_config(settings), lazy=lazy) + worker = start_worker(supervisor, self.get_supervisord_config(settings), lazy=lazy) # Update settings.WORKERS to include this worker settings.WORKERS = getattr(settings, "WORKERS", None) or AttrDict({}) @@ -86,65 +88,19 @@ class BaseQueue(BaseHook): return worker - def register(self, settings, parent_plugin=None): - # self._plugin = parent_plugin # for debugging only, never rely on this! - - # Side effect: register queue with django-huey multiqueue dict - settings.DJANGO_HUEY = getattr(settings, "DJANGO_HUEY", None) or AttrDict({"queues": {}}) - settings.DJANGO_HUEY["queues"][self.name] = self.get_huey_config(settings) - - # Side effect: register some extra tasks with huey - # on_startup(queue=self.name)(self.on_startup_task) - # db_periodic_task(crontab(minute='*/5'))(self.on_periodic_task) - - # Install queue into settings.QUEUES - settings.QUEUES = getattr(settings, "QUEUES", None) or AttrDict({}) - settings.QUEUES[self.id] = self - - # Record installed hook into settings.HOOKS - super().register(settings, parent_plugin=parent_plugin) + @abx.hookimpl + def get_QUEUES(self): + return [self] + @abx.hookimpl + def get_DJANGO_HUEY_QUEUES(self, QUEUE_DATABASE_NAME): + """queue configs to be added to django.conf.settings.DJANGO_HUEY['queues']""" + return { + self.name: self.get_django_huey_config(QUEUE_DATABASE_NAME) + } + + + # @abx.hookimpl # def ready(self, settings): # self.start_supervisord_worker(settings, lazy=True) # super().ready(settings) - - -# class WgetToggleConfig(ConfigSet): -# section: ConfigSectionName = 'ARCHIVE_METHOD_TOGGLES' - -# SAVE_WGET: bool = True -# SAVE_WARC: bool = True - -# class WgetDependencyConfig(ConfigSet): -# section: ConfigSectionName = 'DEPENDENCY_CONFIG' - -# WGET_BINARY: str = Field(default='wget') -# WGET_ARGS: Optional[List[str]] = Field(default=None) -# WGET_EXTRA_ARGS: List[str] = [] -# WGET_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}'] - -# class WgetOptionsConfig(ConfigSet): -# section: ConfigSectionName = 'ARCHIVE_METHOD_OPTIONS' - -# # loaded from shared config -# WGET_AUTO_COMPRESSION: bool = Field(default=True) -# SAVE_WGET_REQUISITES: bool = Field(default=True) -# WGET_USER_AGENT: str = Field(default='', alias='USER_AGENT') -# WGET_TIMEOUT: int = Field(default=60, alias='TIMEOUT') -# WGET_CHECK_SSL_VALIDITY: bool = Field(default=True, alias='CHECK_SSL_VALIDITY') -# WGET_RESTRICT_FILE_NAMES: str = Field(default='windows', alias='RESTRICT_FILE_NAMES') -# WGET_COOKIES_FILE: Optional[Path] = Field(default=None, alias='COOKIES_FILE') - - -# CONFIG = { -# 'CHECK_SSL_VALIDITY': False, -# 'SAVE_WARC': False, -# 'TIMEOUT': 999, -# } - - -# WGET_CONFIG = [ -# WgetToggleConfig(**CONFIG), -# WgetDependencyConfig(**CONFIG), -# WgetOptionsConfig(**CONFIG), -# ] diff --git a/archivebox/plugantic/base_replayer.py b/archivebox/abx/archivebox/base_replayer.py similarity index 60% rename from archivebox/plugantic/base_replayer.py rename to archivebox/abx/archivebox/base_replayer.py index fbb7388c..7b51ae47 100644 --- a/archivebox/plugantic/base_replayer.py +++ b/archivebox/abx/archivebox/base_replayer.py @@ -1,8 +1,8 @@ -__package__ = 'archivebox.plugantic' +__package__ = 'abx.archivebox' +import abx from .base_hook import BaseHook, HookType -from ..config_stubs import AttrDict class BaseReplayer(BaseHook): @@ -22,16 +22,8 @@ class BaseReplayer(BaseHook): # icon_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon' # thumbnail_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon' - def register(self, settings, parent_plugin=None): - # self._plugin = parent_plugin # for debugging only, never rely on this! + @abx.hookimpl + def get_REPLAYERS(self): + return [self] - settings.REPLAYERS = getattr(settings, 'REPLAYERS', None) or AttrDict({}) - settings.REPLAYERS[self.id] = self - - super().register(settings, parent_plugin=parent_plugin) - -# class MediaReplayer(BaseReplayer): -# name: str = 'MediaReplayer' - - -# MEDIA_REPLAYER = MediaReplayer() + # TODO: add hookimpl methods for get_row_template, get_embed_template, get_fullpage_template, etc... diff --git a/archivebox/abx/archivebox/base_searchbackend.py b/archivebox/abx/archivebox/base_searchbackend.py new file mode 100644 index 00000000..6465dafd --- /dev/null +++ b/archivebox/abx/archivebox/base_searchbackend.py @@ -0,0 +1,33 @@ +__package__ = 'abx.archivebox' + +from typing import Iterable, List +from pydantic import Field + +import abx +from .base_hook import BaseHook, HookType + + + +class BaseSearchBackend(BaseHook): + hook_type: HookType = 'SEARCHBACKEND' + + name: str = Field() # e.g. 'singlefile' + + + # TODO: move these to a hookimpl + + @staticmethod + def index(snapshot_id: str, texts: List[str]): + return + + @staticmethod + def flush(snapshot_ids: Iterable[str]): + return + + @staticmethod + def search(text: str) -> List[str]: + raise NotImplementedError("search method must be implemented by subclass") + + @abx.hookimpl + def get_SEARCHBACKENDS(self): + return [self] diff --git a/archivebox/abx/hookspec_archivebox.py b/archivebox/abx/archivebox/hookspec.py similarity index 85% rename from archivebox/abx/hookspec_archivebox.py rename to archivebox/abx/archivebox/hookspec.py index b241a856..f851679b 100644 --- a/archivebox/abx/hookspec_archivebox.py +++ b/archivebox/abx/archivebox/hookspec.py @@ -1,4 +1,6 @@ -from .hookspec import hookspec +__package__ = 'abx.archivebox' + +from .. import hookspec @hookspec diff --git a/archivebox/abx/archivebox/use.py b/archivebox/abx/archivebox/use.py new file mode 100644 index 00000000..3b26c586 --- /dev/null +++ b/archivebox/abx/archivebox/use.py @@ -0,0 +1,98 @@ +__package__ = 'abx.archivebox' + +from benedict import benedict + +from .. import pm + + +# API exposed to ArchiveBox code + +def get_PLUGINS(): + return benedict({ + plugin.PLUGIN.id: plugin.PLUGIN + for plugin in pm.get_plugins() + }) + +def get_HOOKS(PLUGINS): + return benedict({ + hook.id: hook + for plugin in PLUGINS.values() + for hook in plugin.hooks + }) + +def get_CONFIGS(): + return benedict({ + config_id: config + for plugin_configs in pm.hook.get_CONFIGS() + for config_id, config in plugin_configs.items() + }) + +def get_FLAT_CONFIG(): + return benedict({ + key: value + for plugin_config_dict in pm.hook.get_FLAT_CONFIG() + for key, value in plugin_config_dict.items() + }) + +def get_BINPROVIDERS(): + return benedict({ + binprovider.id: binprovider + for plugin_binproviders in pm.hook.get_BINPROVIDERS() + for binprovider in plugin_binproviders + }) + +def get_BINARIES(): + return benedict({ + binary.id: binary + for plugin_binaries in pm.hook.get_BINARIES() + for binary in plugin_binaries + }) + +def get_EXTRACTORS(): + return benedict({ + extractor.id: extractor + for plugin_extractors in pm.hook.get_EXTRACTORS() + for extractor in plugin_extractors + }) + +def get_REPLAYERS(): + return benedict({ + replayer.id: replayer + for plugin_replayers in pm.hook.get_REPLAYERS() + for replayer in plugin_replayers + }) + +def get_CHECKS(): + return benedict({ + check.id: check + for plugin_checks in pm.hook.get_CHECKS() + for check in plugin_checks + }) + +def get_ADMINDATAVIEWS(): + return benedict({ + admin_dataview.id: admin_dataview + for plugin_admin_dataviews in pm.hook.get_ADMINDATAVIEWS() + for admin_dataview in plugin_admin_dataviews + }) + +def get_QUEUES(): + return benedict({ + queue.id: queue + for plugin_queues in pm.hook.get_QUEUES() + for queue in plugin_queues + }) + +def get_SEARCHBACKENDS(): + return benedict({ + searchbackend.id: searchbackend + for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS() + for searchbackend in plugin_searchbackends + }) + + +########################### + + +def register_all_hooks(settings): + pm.hook.register(settings=settings) diff --git a/archivebox/abx/django/__init__.py b/archivebox/abx/django/__init__.py new file mode 100644 index 00000000..56fe8ddd --- /dev/null +++ b/archivebox/abx/django/__init__.py @@ -0,0 +1 @@ +__package__ = 'abx.django' diff --git a/archivebox/abx/apps.py b/archivebox/abx/django/apps.py similarity index 79% rename from archivebox/abx/apps.py rename to archivebox/abx/django/apps.py index f3880c07..085647c1 100644 --- a/archivebox/abx/apps.py +++ b/archivebox/abx/django/apps.py @@ -1,8 +1,9 @@ +__package__ = 'abx.django' + from django.apps import AppConfig class ABXConfig(AppConfig): - default_auto_field = 'django.db.models.BigAutoField' name = 'abx' def ready(self): diff --git a/archivebox/abx/django/hookspec.py b/archivebox/abx/django/hookspec.py new file mode 100644 index 00000000..04bb359b --- /dev/null +++ b/archivebox/abx/django/hookspec.py @@ -0,0 +1,120 @@ +__package__ = 'abx.django' + +from ..hookspec import hookspec + + +########################################################################################### + +@hookspec +def get_INSTALLED_APPS(): + """Return a list of apps to add to INSTALLED_APPS""" + # e.g. ['your_plugin_type.plugin_name'] + return [] + +# @hookspec +# def register_INSTALLED_APPS(INSTALLED_APPS): +# """Mutate INSTALLED_APPS in place to add your app in a specific position""" +# # idx_of_contrib = INSTALLED_APPS.index('django.contrib.auth') +# # INSTALLED_APPS.insert(idx_of_contrib + 1, 'your_plugin_type.plugin_name') +# pass + + +@hookspec +def get_TEMPLATE_DIRS(): + return [] # e.g. ['your_plugin_type/plugin_name/templates'] + +# @hookspec +# def register_TEMPLATE_DIRS(TEMPLATE_DIRS): +# """Install django settings""" +# # e.g. TEMPLATE_DIRS.insert(0, 'your_plugin_type/plugin_name/templates') +# pass + + +@hookspec +def get_STATICFILES_DIRS(): + return [] # e.g. ['your_plugin_type/plugin_name/static'] + +# @hookspec +# def register_STATICFILES_DIRS(STATICFILES_DIRS): +# """Mutate STATICFILES_DIRS in place to add your static dirs in a specific position""" +# # e.g. STATICFILES_DIRS.insert(0, 'your_plugin_type/plugin_name/static') +# pass + + +@hookspec +def get_MIDDLEWARE(): + return [] # e.g. ['your_plugin_type.plugin_name.middleware.YourMiddleware'] + +# @hookspec +# def register_MIDDLEWARE(MIDDLEWARE): +# """Mutate MIDDLEWARE in place to add your middleware in a specific position""" +# # e.g. MIDDLEWARE.insert(0, 'your_plugin_type.plugin_name.middleware.YourMiddleware') +# pass + + +@hookspec +def get_AUTHENTICATION_BACKENDS(): + return [] # e.g. ['django_auth_ldap.backend.LDAPBackend'] + +# @hookspec +# def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS): +# """Mutate AUTHENTICATION_BACKENDS in place to add your auth backends in a specific position""" +# # e.g. AUTHENTICATION_BACKENDS.insert(0, 'your_plugin_type.plugin_name.backend.YourBackend') +# pass + +@hookspec +def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME): + return [] # e.g. [{'name': 'your_plugin_type.plugin_name', 'HUEY': {...}}] + +# @hookspec +# def register_DJANGO_HUEY(DJANGO_HUEY): +# """Mutate DJANGO_HUEY in place to add your huey queues in a specific position""" +# # e.g. DJANGO_HUEY['queues']['some_queue_name']['some_setting'] = 'some_value' +# pass + + +@hookspec +def get_ADMIN_DATA_VIEWS_URLS(): + return [] + +# @hookspec +# def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS): +# """Mutate ADMIN_DATA_VIEWS in place to add your admin data views in a specific position""" +# # e.g. ADMIN_DATA_VIEWS['URLS'].insert(0, 'your_plugin_type/plugin_name/admin_data_views.py') +# pass + + +# @hookspec +# def register_settings(settings): +# """Mutate settings in place to add your settings / modify existing settings""" +# # settings.SOME_KEY = 'some_value' +# pass + + +########################################################################################### + +@hookspec +def get_urlpatterns(): + return [] # e.g. [path('your_plugin_type/plugin_name/url.py', your_view)] + +# @hookspec +# def register_urlpatterns(urlpatterns): +# """Mutate urlpatterns in place to add your urlpatterns in a specific position""" +# # e.g. urlpatterns.insert(0, path('your_plugin_type/plugin_name/url.py', your_view)) +# pass + +########################################################################################### + +@hookspec +def register_checks(): + """Register django checks with django system checks system""" + pass + + +########################################################################################### + + +@hookspec +def ready(): + """Called when Django apps app.ready() are triggered""" + pass diff --git a/archivebox/abx/django/use.py b/archivebox/abx/django/use.py new file mode 100644 index 00000000..c4960898 --- /dev/null +++ b/archivebox/abx/django/use.py @@ -0,0 +1,98 @@ +__package__ = 'abx.django' + +import itertools +from benedict import benedict + +from .. import pm + + +def get_INSTALLED_APPS(): + return itertools.chain(*reversed(pm.hook.get_INSTALLED_APPS())) + +# def register_INSTALLLED_APPS(INSTALLED_APPS): +# pm.hook.register_INSTALLED_APPS(INSTALLED_APPS=INSTALLED_APPS) + + +def get_MIDDLEWARES(): + return itertools.chain(*reversed(pm.hook.get_MIDDLEWARE())) + +# def register_MIDDLEWARES(MIDDLEWARE): +# pm.hook.register_MIDDLEWARE(MIDDLEWARE=MIDDLEWARE) + + +def get_AUTHENTICATION_BACKENDS(): + return itertools.chain(*reversed(pm.hook.get_AUTHENTICATION_BACKENDS())) + +# def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS): +# pm.hook.register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS=AUTHENTICATION_BACKENDS) + + +def get_STATICFILES_DIRS(): + return itertools.chain(*reversed(pm.hook.get_STATICFILES_DIRS())) + +# def register_STATICFILES_DIRS(STATICFILES_DIRS): +# pm.hook.register_STATICFILES_DIRS(STATICFILES_DIRS=STATICFILES_DIRS) + + +def get_TEMPLATE_DIRS(): + return itertools.chain(*reversed(pm.hook.get_TEMPLATE_DIRS())) + +# def register_TEMPLATE_DIRS(TEMPLATE_DIRS): +# pm.hook.register_TEMPLATE_DIRS(TEMPLATE_DIRS=TEMPLATE_DIRS) + +def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME='queue.sqlite3'): + HUEY_QUEUES = {} + for plugin_result in pm.hook.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=QUEUE_DATABASE_NAME): + HUEY_QUEUES.update(plugin_result) + return HUEY_QUEUES + +# def register_DJANGO_HUEY(DJANGO_HUEY): +# pm.hook.register_DJANGO_HUEY(DJANGO_HUEY=DJANGO_HUEY) + +def get_ADMIN_DATA_VIEWS_URLS(): + return itertools.chain(*reversed(pm.hook.get_ADMIN_DATA_VIEWS_URLS())) + +# def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS): +# pm.hook.register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS=ADMIN_DATA_VIEWS) + + +# def register_settings(settings): +# # convert settings dict to an benedict so we can set values using settings.attr = xyz notation +# settings_as_obj = benedict(settings, keypath_separator=None) + +# # set default values for settings that are used by plugins +# # settings_as_obj.INSTALLED_APPS = settings_as_obj.get('INSTALLED_APPS', []) +# # settings_as_obj.MIDDLEWARE = settings_as_obj.get('MIDDLEWARE', []) +# # settings_as_obj.AUTHENTICATION_BACKENDS = settings_as_obj.get('AUTHENTICATION_BACKENDS', []) +# # settings_as_obj.STATICFILES_DIRS = settings_as_obj.get('STATICFILES_DIRS', []) +# # settings_as_obj.TEMPLATE_DIRS = settings_as_obj.get('TEMPLATE_DIRS', []) +# # settings_as_obj.DJANGO_HUEY = settings_as_obj.get('DJANGO_HUEY', {'queues': {}}) +# # settings_as_obj.ADMIN_DATA_VIEWS = settings_as_obj.get('ADMIN_DATA_VIEWS', {'URLS': []}) + +# # # call all the hook functions to mutate the settings values in-place +# # register_INSTALLLED_APPS(settings_as_obj.INSTALLED_APPS) +# # register_MIDDLEWARES(settings_as_obj.MIDDLEWARE) +# # register_AUTHENTICATION_BACKENDS(settings_as_obj.AUTHENTICATION_BACKENDS) +# # register_STATICFILES_DIRS(settings_as_obj.STATICFILES_DIRS) +# # register_TEMPLATE_DIRS(settings_as_obj.TEMPLATE_DIRS) +# # register_DJANGO_HUEY(settings_as_obj.DJANGO_HUEY) +# # register_ADMIN_DATA_VIEWS(settings_as_obj.ADMIN_DATA_VIEWS) + +# # calls Plugin.settings(settings) on each registered plugin +# pm.hook.register_settings(settings=settings_as_obj) + +# # then finally update the settings globals() object will all the new settings +# # settings.update(settings_as_obj) + + +def get_urlpatterns(): + return list(itertools.chain(*pm.hook.urlpatterns())) + +def register_urlpatterns(urlpatterns): + pm.hook.register_urlpatterns(urlpatterns=urlpatterns) + + +def register_checks(): + """register any django system checks""" + pm.hook.register_checks() + diff --git a/archivebox/abx/hookspec.py b/archivebox/abx/hookspec.py index 53cee225..88685b8d 100644 --- a/archivebox/abx/hookspec.py +++ b/archivebox/abx/hookspec.py @@ -3,10 +3,12 @@ from pathlib import Path from pluggy import HookimplMarker from pluggy import HookspecMarker -hookspec = HookspecMarker("abx") -hookimpl = HookimplMarker("abx") +spec = hookspec = HookspecMarker("abx") +impl = hookimpl = HookimplMarker("abx") @hookspec +@hookimpl def get_system_user() -> str: return Path('~').expanduser().name + diff --git a/archivebox/abx/hookspec_django_apps.py b/archivebox/abx/hookspec_django_apps.py deleted file mode 100644 index 30bc2951..00000000 --- a/archivebox/abx/hookspec_django_apps.py +++ /dev/null @@ -1,6 +0,0 @@ -from .hookspec import hookspec - -@hookspec -def ready(settings): - """Called when the Django app.ready() is triggered""" - pass diff --git a/archivebox/abx/hookspec_django_settings.py b/archivebox/abx/hookspec_django_settings.py deleted file mode 100644 index c340d084..00000000 --- a/archivebox/abx/hookspec_django_settings.py +++ /dev/null @@ -1,90 +0,0 @@ -from .hookspec import hookspec - - -########################################################################################### - -@hookspec -def get_INSTALLED_APPS(): - """Return a list of apps to add to INSTALLED_APPS""" - # e.g. ['your_plugin_type.plugin_name'] - return [] - -@hookspec -def register_INSTALLED_APPS(INSTALLED_APPS): - """Mutate INSTALLED_APPS in place to add your app in a specific position""" - # idx_of_contrib = INSTALLED_APPS.index('django.contrib.auth') - # INSTALLED_APPS.insert(idx_of_contrib + 1, 'your_plugin_type.plugin_name') - pass - - -@hookspec -def get_TEMPLATE_DIRS(): - return [] # e.g. ['your_plugin_type/plugin_name/templates'] - -@hookspec -def register_TEMPLATE_DIRS(TEMPLATE_DIRS): - """Install django settings""" - # e.g. TEMPLATE_DIRS.insert(0, 'your_plugin_type/plugin_name/templates') - pass - - -@hookspec -def get_STATICFILES_DIRS(): - return [] # e.g. ['your_plugin_type/plugin_name/static'] - -@hookspec -def register_STATICFILES_DIRS(STATICFILES_DIRS): - """Mutate STATICFILES_DIRS in place to add your static dirs in a specific position""" - # e.g. STATICFILES_DIRS.insert(0, 'your_plugin_type/plugin_name/static') - pass - - -@hookspec -def get_MIDDLEWARE(): - return [] # e.g. ['your_plugin_type.plugin_name.middleware.YourMiddleware'] - -@hookspec -def register_MIDDLEWARE(MIDDLEWARE): - """Mutate MIDDLEWARE in place to add your middleware in a specific position""" - # e.g. MIDDLEWARE.insert(0, 'your_plugin_type.plugin_name.middleware.YourMiddleware') - pass - - -@hookspec -def get_AUTHENTICATION_BACKENDS(): - return [] # e.g. ['django_auth_ldap.backend.LDAPBackend'] - -@hookspec -def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS): - """Mutate AUTHENTICATION_BACKENDS in place to add your auth backends in a specific position""" - # e.g. AUTHENTICATION_BACKENDS.insert(0, 'your_plugin_type.plugin_name.backend.YourBackend') - pass - -@hookspec -def get_DJANGO_HUEY_QUEUES(): - return [] # e.g. [{'name': 'your_plugin_type.plugin_name', 'HUEY': {...}}] - -@hookspec -def register_DJANGO_HUEY(DJANGO_HUEY): - """Mutate DJANGO_HUEY in place to add your huey queues in a specific position""" - # e.g. DJANGO_HUEY['queues']['some_queue_name']['some_setting'] = 'some_value' - pass - - -@hookspec -def get_ADMIN_DATA_VIEWS_URLS(): - return [] - -@hookspec -def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS): - """Mutate ADMIN_DATA_VIEWS in place to add your admin data views in a specific position""" - # e.g. ADMIN_DATA_VIEWS['URLS'].insert(0, 'your_plugin_type/plugin_name/admin_data_views.py') - pass - - -@hookspec -def register_settings(settings): - """Mutate settings in place to add your settings / modify existing settings""" - # settings.SOME_KEY = 'some_value' - pass - diff --git a/archivebox/abx/hookspec_django_urls.py b/archivebox/abx/hookspec_django_urls.py deleted file mode 100644 index 258a1e89..00000000 --- a/archivebox/abx/hookspec_django_urls.py +++ /dev/null @@ -1,12 +0,0 @@ -from .hookspec import hookspec - - -@hookspec -def get_urlpatterns(): - return [] # e.g. [path('your_plugin_type/plugin_name/url.py', your_view)] - -@hookspec -def register_urlpatterns(urlpatterns): - """Mutate urlpatterns in place to add your urlpatterns in a specific position""" - # e.g. urlpatterns.insert(0, path('your_plugin_type/plugin_name/url.py', your_view)) - pass diff --git a/archivebox/abx/manager.py b/archivebox/abx/manager.py new file mode 100644 index 00000000..8d44a087 --- /dev/null +++ b/archivebox/abx/manager.py @@ -0,0 +1,30 @@ +import inspect + +import pluggy + + +class PluginManager(pluggy.PluginManager): + """ + Patch to fix pluggy's PluginManager to work with pydantic models. + See: https://github.com/pytest-dev/pluggy/pull/536 + """ + def parse_hookimpl_opts(self, plugin, name: str) -> pluggy.HookimplOpts | None: + # IMPORTANT: @property methods can have side effects, and are never hookimpl + # if attr is a property, skip it in advance + plugin_class = plugin if inspect.isclass(plugin) else type(plugin) + if isinstance(getattr(plugin_class, name, None), property): + return None + + # pydantic model fields are like attrs and also can never be hookimpls + plugin_is_pydantic_obj = hasattr(plugin, "__pydantic_core_schema__") + if plugin_is_pydantic_obj and name in getattr(plugin, "model_fields", {}): + # pydantic models mess with the class and attr __signature__ + # so inspect.isroutine(...) throws exceptions and cant be used + return None + + try: + return super().parse_hookimpl_opts(plugin, name) + except AttributeError: + return super().parse_hookimpl_opts(type(plugin), name) + +pm = PluginManager("abx") diff --git a/archivebox/abx/pydantic_pkgr/__init__.py b/archivebox/abx/pydantic_pkgr/__init__.py new file mode 100644 index 00000000..28cd0f81 --- /dev/null +++ b/archivebox/abx/pydantic_pkgr/__init__.py @@ -0,0 +1 @@ +__package__ = 'abx.pydantic_pkgr' diff --git a/archivebox/abx/hookspec_pydantic_pkgr.py b/archivebox/abx/pydantic_pkgr/hookspec.py similarity index 85% rename from archivebox/abx/hookspec_pydantic_pkgr.py rename to archivebox/abx/pydantic_pkgr/hookspec.py index 63a289a6..6b293abb 100644 --- a/archivebox/abx/hookspec_pydantic_pkgr.py +++ b/archivebox/abx/pydantic_pkgr/hookspec.py @@ -1,5 +1,5 @@ -from .hookspec import hookspec +from ..hookspec import hookspec ########################################################################################### diff --git a/archivebox/cli/__init__.py b/archivebox/cli/__init__.py index 50be46de..6cf0c63d 100644 --- a/archivebox/cli/__init__.py +++ b/archivebox/cli/__init__.py @@ -12,7 +12,6 @@ from collections.abc import Mapping from typing import Optional, List, IO, Union, Iterable from pathlib import Path - from ..misc.checks import check_data_folder, check_migrations from ..misc.logging import stderr diff --git a/archivebox/config.py b/archivebox/config.py index 419f60fe..f2c4ca1b 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -788,16 +788,23 @@ def bump_startup_progress_bar(): def setup_django_minimal(): - sys.path.append(str(archivebox.PACKAGE_DIR)) - os.environ.setdefault('OUTPUT_DIR', str(archivebox.DATA_DIR)) - os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings') - django.setup() + # sys.path.append(str(archivebox.PACKAGE_DIR)) + # os.environ.setdefault('OUTPUT_DIR', str(archivebox.DATA_DIR)) + # os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings') + # django.setup() + raise Exception('dont use this anymore') + +DJANGO_SET_UP = False def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None: global INITIAL_STARTUP_PROGRESS global INITIAL_STARTUP_PROGRESS_TASK - + global DJANGO_SET_UP + + if DJANGO_SET_UP: + raise Exception('django is already set up!') + with Progress(transient=True, expand=True, console=CONSOLE) as INITIAL_STARTUP_PROGRESS: INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25) @@ -808,14 +815,12 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=C bump_startup_progress_bar() try: from django.core.management import call_command - - sys.path.append(str(archivebox.PACKAGE_DIR)) - os.environ.setdefault('OUTPUT_DIR', str(archivebox.DATA_DIR)) - os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings') bump_startup_progress_bar() if in_memory_db: + raise Exception('dont use this anymore') + # some commands (e.g. oneshot) dont store a long-lived sqlite3 db file on disk. # in those cases we create a temporary in-memory db and run the migrations # immediately to get a usable in-memory-database at startup @@ -833,8 +838,6 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=C from django.conf import settings - from plugins_sys.config.apps import SHELL_CONFIG - # log startup message to the error log with open(settings.ERROR_LOG, "a", encoding='utf-8') as f: command = ' '.join(sys.argv) @@ -877,6 +880,8 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=C except KeyboardInterrupt: raise SystemExit(2) + + DJANGO_SET_UP = True INITIAL_STARTUP_PROGRESS = None INITIAL_STARTUP_PROGRESS_TASK = None diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py index 5ec77bab..e81c569d 100644 --- a/archivebox/core/admin.py +++ b/archivebox/core/admin.py @@ -22,7 +22,7 @@ import archivebox from signal_webhooks.admin import WebhookAdmin from signal_webhooks.utils import get_webhook_model -# from plugantic.admin import CustomPlugin +# from abx.archivebox.admin import CustomPlugin from ..util import htmldecode, urldecode diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index d74f80ea..cdab906c 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -9,6 +9,10 @@ from pathlib import Path from django.utils.crypto import get_random_string import abx +import abx.archivebox +import abx.archivebox.use +import abx.django.use + import archivebox from archivebox.constants import CONSTANTS @@ -19,22 +23,19 @@ IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3] -VERSION = archivebox.__version__ +VERSION = archivebox.VERSION PACKAGE_DIR = archivebox.PACKAGE_DIR DATA_DIR = archivebox.DATA_DIR -ARCHIVE_DIR = archivebox.DATA_DIR / 'archive' +ARCHIVE_DIR = archivebox.ARCHIVE_DIR ################################################################################ ### ArchiveBox Plugin Settings ################################################################################ PLUGIN_HOOKSPECS = [ - 'abx.hookspec_django_settings', - 'abx.hookspec_django_apps', - 'abx.hookspec_django_urls', - 'abx.hookspec_pydantic_pkgr', - 'abx.hookspec_archivebox', - 'plugantic.base_check', + 'abx.django.hookspec', + 'abx.pydantic_pkgr.hookspec', + 'abx.archivebox.hookspec', ] abx.register_hookspecs(PLUGIN_HOOKSPECS) @@ -55,20 +56,20 @@ USER_PLUGINS = abx.get_plugins_in_dirs(USER_PLUGIN_DIRS) ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS} PLUGIN_MANAGER = abx.pm -PLUGINS = abx.load_plugins(ALL_PLUGINS) -HOOKS = abx.get_plugins_HOOKS(PLUGINS) +PLUGINS = abx.archivebox.load_archivebox_plugins(PLUGIN_MANAGER, ALL_PLUGINS) +HOOKS = abx.archivebox.use.get_HOOKS(PLUGINS) + +CONFIGS = abx.archivebox.use.get_CONFIGS() +FLAT_CONFIG = abx.archivebox.use.get_FLAT_CONFIG() +BINPROVIDERS = abx.archivebox.use.get_BINPROVIDERS() +BINARIES = abx.archivebox.use.get_BINARIES() +EXTRACTORS = abx.archivebox.use.get_EXTRACTORS() +REPLAYERS = abx.archivebox.use.get_REPLAYERS() +CHECKS = abx.archivebox.use.get_CHECKS() +ADMINDATAVIEWS = abx.archivebox.use.get_ADMINDATAVIEWS() +QUEUES = abx.archivebox.use.get_QUEUES() +SEARCHBACKENDS = abx.archivebox.use.get_SEARCHBACKENDS() -CONFIGS = abx.get_plugins_CONFIGS() -# FLAT_CONFIG = abx.get_plugins_FLAT_CONFIG(CONFIGS) -FLAT_CONFIG = CONFIG -BINPROVIDERS = abx.get_plugins_BINPROVIDERS() -BINARIES = abx.get_plugins_BINARIES() -EXTRACTORS = abx.get_plugins_EXTRACTORS() -REPLAYERS = abx.get_plugins_REPLAYERS() -CHECKS = abx.get_plugins_CHECKS() -ADMINDATAVIEWS = abx.get_plugins_ADMINDATAVIEWS() -QUEUES = abx.get_plugins_QUEUES() -SEARCHBACKENDS = abx.get_plugins_SEARCHBACKENDS() ################################################################################ ### Django Core Settings @@ -104,14 +105,13 @@ INSTALLED_APPS = [ 'django_object_actions', # provides easy Django Admin action buttons on change views https://github.com/crccheck/django-object-actions # Our ArchiveBox-provided apps - # 'plugantic', # ArchiveBox plugin API definition + finding/registering/calling interface 'queues', # handles starting and managing background workers and processes 'abid_utils', # handles ABID ID creation, handling, and models 'core', # core django model with Snapshot, ArchiveResult, etc. 'api', # Django-Ninja-based Rest API interfaces, config, APIToken model, etc. # ArchiveBox plugins - *abx.get_plugins_INSTALLLED_APPS(), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins, + *abx.django.use.get_INSTALLED_APPS(), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins, # 3rd-party apps from PyPI that need to be loaded last 'admin_data_views', # handles rendering some convenient automatic read-only views of data in Django admin @@ -136,7 +136,7 @@ MIDDLEWARE = [ 'core.middleware.ReverseProxyAuthMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'core.middleware.CacheControlMiddleware', - *abx.get_plugins_MIDDLEWARE(), + *abx.django.use.get_MIDDLEWARES(), ] @@ -149,7 +149,7 @@ MIDDLEWARE = [ AUTHENTICATION_BACKENDS = [ 'django.contrib.auth.backends.RemoteUserBackend', 'django.contrib.auth.backends.ModelBackend', - *abx.get_plugins_AUTHENTICATION_BACKENDS(), + *abx.django.use.get_AUTHENTICATION_BACKENDS(), ] @@ -177,7 +177,7 @@ STATICFILES_DIRS = [ # for plugin_dir in PLUGIN_DIRS.values() # if (plugin_dir / 'static').is_dir() # ], - *abx.get_plugins_STATICFILES_DIRS(), + *abx.django.use.get_STATICFILES_DIRS(), str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'static'), ] @@ -188,7 +188,7 @@ TEMPLATE_DIRS = [ # for plugin_dir in PLUGIN_DIRS.values() # if (plugin_dir / 'templates').is_dir() # ], - *abx.get_plugins_TEMPLATE_DIRS(), + *abx.django.use.get_TEMPLATE_DIRS(), str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'core'), str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'admin'), str(PACKAGE_DIR / TEMPLATES_DIR_NAME), @@ -225,10 +225,12 @@ DATABASE_NAME = os.environ.get("ARCHIVEBOX_DATABASE_NAME", str(CONSTANTS.DATABAS QUEUE_DATABASE_NAME = DATABASE_NAME.replace('index.sqlite3', 'queue.sqlite3') SQLITE_CONNECTION_OPTIONS = { + "ENGINE": "django.db.backends.sqlite3", "TIME_ZONE": CONSTANTS.TIMEZONE, "OPTIONS": { # https://gcollazo.com/optimal-sqlite-settings-for-django/ - # # https://litestream.io/tips/#busy-timeout + # https://litestream.io/tips/#busy-timeout + # https://docs.djangoproject.com/en/5.1/ref/databases/#setting-pragma-options "timeout": 5, "check_same_thread": False, "transaction_mode": "IMMEDIATE", @@ -246,17 +248,14 @@ SQLITE_CONNECTION_OPTIONS = { DATABASES = { "default": { - "ENGINE": "django.db.backends.sqlite3", "NAME": DATABASE_NAME, - # DB setup is sometimes modified at runtime by setup_django() in config.py + **SQLITE_CONNECTION_OPTIONS, }, "queue": { - "ENGINE": "django.db.backends.sqlite3", "NAME": QUEUE_DATABASE_NAME, **SQLITE_CONNECTION_OPTIONS, }, # 'cache': { - # 'ENGINE': 'django.db.backends.sqlite3', # 'NAME': CACHE_DB_PATH, # **SQLITE_CONNECTION_OPTIONS, # }, @@ -295,7 +294,7 @@ DJANGO_HUEY = { "queues": { HUEY["name"]: HUEY.copy(), # more registered here at plugin import-time by BaseQueue.register() - **abx.get_plugins_DJANGO_HUEY_QUEUES(), + **abx.django.use.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=QUEUE_DATABASE_NAME), }, } @@ -482,45 +481,45 @@ ADMIN_DATA_VIEWS = { }, { "route": "binaries/", - "view": "plugantic.views.binaries_list_view", + "view": "plugins_sys.config.views.binaries_list_view", "name": "Binaries", "items": { "route": "<str:key>/", - "view": "plugantic.views.binary_detail_view", + "view": "plugins_sys.config.views.binary_detail_view", "name": "binary", }, }, { "route": "plugins/", - "view": "plugantic.views.plugins_list_view", + "view": "plugins_sys.config.views.plugins_list_view", "name": "Plugins", "items": { "route": "<str:key>/", - "view": "plugantic.views.plugin_detail_view", + "view": "plugins_sys.config.views.plugin_detail_view", "name": "plugin", }, }, { "route": "workers/", - "view": "plugantic.views.worker_list_view", + "view": "plugins_sys.config.views.worker_list_view", "name": "Workers", "items": { "route": "<str:key>/", - "view": "plugantic.views.worker_detail_view", + "view": "plugins_sys.config.views.worker_detail_view", "name": "worker", }, }, { "route": "logs/", - "view": "plugantic.views.log_list_view", + "view": "plugins_sys.config.views.log_list_view", "name": "Logs", "items": { "route": "<str:key>/", - "view": "plugantic.views.log_detail_view", + "view": "plugins_sys.config.views.log_detail_view", "name": "log", }, }, - *abx.get_plugins_ADMIN_DATA_VIEWS_URLS(), + *abx.django.use.get_ADMIN_DATA_VIEWS_URLS(), ], } @@ -614,5 +613,7 @@ DEBUG_LOGFIRE = DEBUG_LOGFIRE and (DATA_DIR / '.logfire').is_dir() # JET_TOKEN = 'some-api-token-here' -abx.register_plugins_settings(globals()) +abx.django.use.register_checks() +abx.archivebox.use.register_all_hooks(globals()) +# import ipdb; ipdb.set_trace() diff --git a/archivebox/core/views.py b/archivebox/core/views.py index 2de18c8d..5d0c614a 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -1,7 +1,7 @@ __package__ = 'archivebox.core' from typing import Callable - +from benedict import benedict from pathlib import Path from django.shortcuts import render, redirect @@ -36,12 +36,15 @@ from ..config import ( CONFIG_SCHEMA, DYNAMIC_CONFIG_SCHEMA, USER_CONFIG, + CONFIG, ) from ..logging_util import printable_filesize from ..util import base_url, htmlencode, ts_to_date_str from ..search import query_search_index from .serve_static import serve_static_with_byterange_support +CONFIG = benedict({**CONSTANTS, **CONFIG, **settings.FLAT_CONFIG}) + class HomepageView(View): def get(self, request): @@ -533,8 +536,6 @@ def key_is_safe(key: str) -> bool: @render_with_table_view def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext: - CONFIG = settings.FLAT_CONFIG - assert request.user.is_superuser, 'Must be a superuser to view configuration settings.' rows = { diff --git a/archivebox/main.py b/archivebox/main.py index 243fedd3..2c4ce277 100755 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -6,8 +6,6 @@ import shutil import platform import archivebox -CONSTANTS = archivebox.CONSTANTS - from typing import Dict, List, Optional, Iterable, IO, Union from pathlib import Path from datetime import date, datetime @@ -69,9 +67,8 @@ from .index.html import ( from .index.csv import links_to_csv from .extractors import archive_links, archive_link, ignore_methods from .misc.logging import stderr, hint, ANSI -from .misc.checks import check_data_folder, check_dependencies +from .misc.checks import check_data_folder from .config import ( - setup_django_minimal, ConfigDict, IS_TTY, DEBUG, @@ -91,7 +88,6 @@ from .config import ( CONFIG, USER_CONFIG, get_real_name, - setup_django, ) from .logging_util import ( TimedProgress, @@ -108,6 +104,7 @@ from .logging_util import ( printable_dependency_version, ) +CONSTANTS = archivebox.CONSTANTS VERSION = archivebox.VERSION PACKAGE_DIR = archivebox.PACKAGE_DIR OUTPUT_DIR = archivebox.DATA_DIR @@ -190,7 +187,6 @@ def version(quiet: bool=False, out_dir: Path=OUTPUT_DIR) -> None: """Print the ArchiveBox version and dependency information""" - setup_django_minimal() from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG, STORAGE_CONFIG, SHELL_CONFIG from plugins_auth.ldap.apps import LDAP_CONFIG from django.conf import settings @@ -270,7 +266,6 @@ def version(quiet: bool=False, print('{white}[i] Data locations:{reset} (not in a data directory)'.format(**ANSI)) print() - check_dependencies(CONFIG) @enforce_types @@ -461,7 +456,7 @@ def status(out_dir: Path=OUTPUT_DIR) -> None: check_data_folder(CONFIG) from core.models import Snapshot - from django.contrib.auth import get_user_mod, SHELL_CONFIG + from django.contrib.auth import get_user_model User = get_user_model() print('{green}[*] Scanning archive main index...{reset}'.format(**ANSI)) @@ -602,7 +597,7 @@ def add(urls: Union[str, List[str]], # Load list of links from the existing index check_data_folder(CONFIG) - check_dependencies(CONFIG) + # worker = start_cli_workers() new_links: List[Link] = [] @@ -791,7 +786,6 @@ def update(resume: Optional[float]=None, check_data_folder(CONFIG) - check_dependencies(CONFIG) # start_cli_workers() new_links: List[Link] = [] # TODO: Remove input argument: only_new @@ -963,8 +957,6 @@ def setup(out_dir: Path=OUTPUT_DIR) -> None: if not ARCHIVE_DIR.exists(): run_subcommand('init', stdin=None, pwd=out_dir) - setup_django(out_dir=out_dir, check_db=True) - stderr('\n[+] Installing ArchiveBox dependencies automatically...', color='green') from plugins_extractor.ytdlp.apps import YTDLP_BINARY @@ -1109,7 +1101,6 @@ def schedule(add: bool=False, """Set ArchiveBox to regularly import URLs at specific times using cron""" check_data_folder(CONFIG) - setup_django_minimal() from plugins_pkg.pip.apps import ARCHIVEBOX_BINARY from plugins_sys.config.apps import SHELL_CONFIG, CONSTANTS @@ -1256,6 +1247,8 @@ def server(runserver_args: Optional[List[str]]=None, from django.core.management import call_command from django.contrib.auth.models import User + + print('{green}[+] Starting ArchiveBox webserver... {reset}'.format(**ANSI)) print(' > Logging errors to ./logs/errors.log') @@ -1306,7 +1299,6 @@ def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None: """Run an ArchiveBox Django management command""" check_data_folder(CONFIG) - setup_django_minimal() from django.core.management import execute_from_command_line if (args and "createsuperuser" in args) and (IN_DOCKER and not IS_TTY): diff --git a/archivebox/misc/checks.py b/archivebox/misc/checks.py index e2964bcf..c4d3db79 100644 --- a/archivebox/misc/checks.py +++ b/archivebox/misc/checks.py @@ -1,38 +1,10 @@ __package__ = 'archivebox.misc' -# TODO: migrate all of these to new plugantic/base_check.py Check system - from benedict import benedict -from pathlib import Path import archivebox -from .logging import stderr, hint, ANSI - - -def check_dependencies(config: benedict, show_help: bool=True) -> None: - # dont do this on startup anymore, it's too slow - pass - # invalid_dependencies = [ - # (name, binary) for name, info in settings.BINARIES.items() - # if not binary. - # ] - # if invalid_dependencies and show_help: - # stderr(f'[!] Warning: Missing {len(invalid_dependencies)} recommended dependencies', color='lightyellow') - # for dependency, info in invalid_dependencies: - # stderr( - # ' ! {}: {} ({})'.format( - # dependency, - # info['path'] or 'unable to find binary', - # info['version'] or 'unable to detect version', - # ) - # ) - # if dependency in ('YOUTUBEDL_BINARY', 'CHROME_BINARY', 'SINGLEFILE_BINARY', 'READABILITY_BINARY', 'MERCURY_BINARY'): - # hint(('To install all packages automatically run: archivebox setup', - # f'or to disable it and silence this warning: archivebox config --set SAVE_{dependency.rsplit("_", 1)[0]}=False', - # ''), prefix=' ') - # stderr('') - +from .logging import stderr, ANSI def check_data_folder(config: benedict) -> None: diff --git a/archivebox/plugantic/__init__.py b/archivebox/plugantic/__init__.py deleted file mode 100644 index 98372eb4..00000000 --- a/archivebox/plugantic/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__package__ = 'archivebox.plugantic' diff --git a/archivebox/plugantic/apps.py b/archivebox/plugantic/apps.py deleted file mode 100644 index 14703424..00000000 --- a/archivebox/plugantic/apps.py +++ /dev/null @@ -1,12 +0,0 @@ -__package__ = 'archivebox.plugantic' - -from django.apps import AppConfig - -class PluganticConfig(AppConfig): - default_auto_field = 'django.db.models.BigAutoField' - name = 'plugantic' - - def ready(self) -> None: - pass - # from django.conf import settings - # print(f'[🧩] Detected {len(settings.INSTALLED_PLUGINS)} settings.INSTALLED_PLUGINS to load...') diff --git a/archivebox/plugantic/base_admindataview.py b/archivebox/plugantic/base_admindataview.py deleted file mode 100644 index 72bb4df3..00000000 --- a/archivebox/plugantic/base_admindataview.py +++ /dev/null @@ -1,39 +0,0 @@ -__package__ = 'archivebox.plugantic' - -# from typing import Dict - -from .base_hook import BaseHook, HookType -from ..config_stubs import AttrDict - - -class BaseAdminDataView(BaseHook): - hook_type: HookType = "ADMINDATAVIEW" - - # verbose_name: str = 'Data View' - # route: str = '/npm/installed/' - # view: str = 'plugins_pkg.npm.admin.installed_list_view' - # items: Dict[str, str] = { - # "name": "installed_npm_pkg", - # 'route': '<str:key>/', - # 'view': 'plugins_pkg.npm.admin.installed_detail_view', - # } - - def register(self, settings, parent_plugin=None): - # self._plugin = parent_plugin # circular ref to parent only here for easier debugging! never depend on circular backref to parent in real code! - - self.register_route_in_admin_data_view_urls(settings) - - settings.ADMINDATAVIEWS = getattr(settings, "ADMINDATAVIEWS", None) or AttrDict({}) - settings.ADMINDATAVIEWS[self.id] = self - - super().register(settings, parent_plugin) - - def register_route_in_admin_data_view_urls(self, settings): - route = { - "route": self.route, - "view": self.view, - "name": self.verbose_name, - "items": self.items, - } - if route not in settings.ADMIN_DATA_VIEWS.URLS: - settings.ADMIN_DATA_VIEWS.URLS += [route] # append our route (update in place) diff --git a/archivebox/plugantic/base_searchbackend.py b/archivebox/plugantic/base_searchbackend.py deleted file mode 100644 index 860e4145..00000000 --- a/archivebox/plugantic/base_searchbackend.py +++ /dev/null @@ -1,39 +0,0 @@ -__package__ = 'archivebox.plugantic' - -from typing import Iterable, List -from benedict import benedict -from pydantic import Field - - -from .base_hook import BaseHook, HookType - - - -class BaseSearchBackend(BaseHook): - hook_type: HookType = 'SEARCHBACKEND' - - name: str = Field() # e.g. 'singlefile' - - @staticmethod - def index(snapshot_id: str, texts: List[str]): - return - - @staticmethod - def flush(snapshot_ids: Iterable[str]): - return - - @staticmethod - def search(text: str) -> List[str]: - raise NotImplementedError("search method must be implemented by subclass") - - - def register(self, settings, parent_plugin=None): - # self._plugin = parent_plugin # for debugging only, never rely on this! - - # Install queue into settings.SEARCH_BACKENDS - settings.SEARCH_BACKENDS = getattr(settings, "SEARCH_BACKENDS", None) or benedict({}) - settings.SEARCH_BACKENDS[self.id] = self - - # Record installed hook into settings.HOOKS - super().register(settings, parent_plugin=parent_plugin) - diff --git a/archivebox/plugantic/management/__init__.py b/archivebox/plugantic/management/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/archivebox/plugantic/management/commands/__init__.py b/archivebox/plugantic/management/commands/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/archivebox/plugantic/management/commands/pkg.py b/archivebox/plugantic/management/commands/pkg.py deleted file mode 100644 index 2621021b..00000000 --- a/archivebox/plugantic/management/commands/pkg.py +++ /dev/null @@ -1,72 +0,0 @@ -# __package__ = 'archivebox.plugantic.management.commands' - -# from django.core.management.base import BaseCommand -# from django.conf import settings - -# from pydantic_pkgr import Binary, BinProvider, BrewProvider, EnvProvider, SemVer -# from pydantic_pkgr.binprovider import bin_abspath - -# from ....config import bin_path -# from ...base_binary import env - - -# class Command(BaseCommand): -# def handle(self, *args, method, **options): -# method(*args, **options) - -# def add_arguments(self, parser): -# subparsers = parser.add_subparsers(title="sub-commands", required=True) - -# list_parser = subparsers.add_parser("list", help="List archivebox runtime dependencies.") -# list_parser.set_defaults(method=self.list) - -# install_parser = subparsers.add_parser("install", help="Install archivebox runtime dependencies.") -# install_parser.add_argument("--update", action="store_true", help="Update dependencies to latest versions.") -# install_parser.add_argument("package_names", nargs="+", type=str) -# install_parser.set_defaults(method=self.install) - -# def list(self, *args, **options): -# self.stdout.write('################# PLUGINS ####################') -# for plugin in settings.PLUGINS.values(): -# self.stdout.write(f'{plugin.name}:') -# for binary in plugin.binaries: -# try: -# binary = binary.load() -# except Exception as e: -# # import ipdb; ipdb.set_trace() -# raise -# self.stdout.write(f' {binary.name.ljust(14)} {str(binary.version).ljust(11)} {binary.binprovider.INSTALLER_BIN.ljust(5)} {binary.abspath}') - -# self.stdout.write('\n################# LEGACY ####################') -# for bin_key, dependency in settings.CONFIG.DEPENDENCIES.items(): -# bin_name = settings.CONFIG[bin_key] - -# self.stdout.write(f'{bin_key}: {bin_name}') - -# # binary = Binary(name=package_name, providers=[env]) -# # print(binary) - -# # try: -# # loaded_bin = binary.load() -# # self.stdout.write( -# # self.style.SUCCESS(f'Successfully loaded {package_name}:') + str(loaded_bin) -# # ) -# # except Exception as e: -# # self.stderr.write( -# # self.style.ERROR(f"Error loading {package_name}: {e}") -# # ) - -# def install(self, *args, bright, **options): -# for package_name in options["package_names"]: -# binary = Binary(name=package_name, providers=[env]) -# print(binary) - -# try: -# loaded_bin = binary.load() -# self.stdout.write( -# self.style.SUCCESS(f'Successfully loaded {package_name}:') + str(loaded_bin) -# ) -# except Exception as e: -# self.stderr.write( -# self.style.ERROR(f"Error loading {package_name}: {e}") -# ) diff --git a/archivebox/plugantic/tests.py b/archivebox/plugantic/tests.py deleted file mode 100644 index e1b0623c..00000000 --- a/archivebox/plugantic/tests.py +++ /dev/null @@ -1,337 +0,0 @@ -__package__ = 'archivebox.plugantic' - -from django.test import TestCase - -from .ini_to_toml import convert, TOML_HEADER - -TEST_INPUT = """ -[SERVER_CONFIG] -IS_TTY=False -USE_COLOR=False -SHOW_PROGRESS=False -IN_DOCKER=False -IN_QEMU=False -PUID=501 -PGID=20 -OUTPUT_DIR=/opt/archivebox/data -CONFIG_FILE=/opt/archivebox/data/ArchiveBox.conf -ONLY_NEW=True -TIMEOUT=60 -MEDIA_TIMEOUT=3600 -OUTPUT_PERMISSIONS=644 -RESTRICT_FILE_NAMES=windows -URL_DENYLIST=\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$ -URL_ALLOWLIST=None -ADMIN_USERNAME=None -ADMIN_PASSWORD=None -ENFORCE_ATOMIC_WRITES=True -TAG_SEPARATOR_PATTERN=[,] -SECRET_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx -BIND_ADDR=127.0.0.1:8000 -ALLOWED_HOSTS=* -DEBUG=False -PUBLIC_INDEX=True -PUBLIC_SNAPSHOTS=True -PUBLIC_ADD_VIEW=False -FOOTER_INFO=Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests. -SNAPSHOTS_PER_PAGE=40 -CUSTOM_TEMPLATES_DIR=None -TIME_ZONE=UTC -TIMEZONE=UTC -REVERSE_PROXY_USER_HEADER=Remote-User -REVERSE_PROXY_WHITELIST= -LOGOUT_REDIRECT_URL=/ -PREVIEW_ORIGINALS=True -LDAP=False -LDAP_SERVER_URI=None -LDAP_BIND_DN=None -LDAP_BIND_PASSWORD=None -LDAP_USER_BASE=None -LDAP_USER_FILTER=None -LDAP_USERNAME_ATTR=None -LDAP_FIRSTNAME_ATTR=None -LDAP_LASTNAME_ATTR=None -LDAP_EMAIL_ATTR=None -LDAP_CREATE_SUPERUSER=False -SAVE_TITLE=True -SAVE_FAVICON=True -SAVE_WGET=True -SAVE_WGET_REQUISITES=True -SAVE_SINGLEFILE=True -SAVE_READABILITY=True -SAVE_MERCURY=True -SAVE_HTMLTOTEXT=True -SAVE_PDF=True -SAVE_SCREENSHOT=True -SAVE_DOM=True -SAVE_HEADERS=True -SAVE_WARC=True -SAVE_GIT=True -SAVE_MEDIA=True -SAVE_ARCHIVE_DOT_ORG=True -RESOLUTION=1440,2000 -GIT_DOMAINS=github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht -CHECK_SSL_VALIDITY=True -MEDIA_MAX_SIZE=750m -USER_AGENT=None -CURL_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0) -WGET_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5 -CHROME_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) -COOKIES_FILE=None -CHROME_USER_DATA_DIR=None -CHROME_TIMEOUT=0 -CHROME_HEADLESS=True -CHROME_SANDBOX=True -CHROME_EXTRA_ARGS=[] -YOUTUBEDL_ARGS=['--restrict-filenames', '--trim-filenames', '128', '--write-description', '--write-info-json', '--write-annotations', '--write-thumbnail', '--no-call-home', '--write-sub', '--write-auto-subs', '--convert-subs=srt', '--yes-playlist', '--continue', '--no-abort-on-error', '--ignore-errors', '--geo-bypass', '--add-metadata', '--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)'] -YOUTUBEDL_EXTRA_ARGS=[] -WGET_ARGS=['--no-verbose', '--adjust-extension', '--convert-links', '--force-directories', '--backup-converted', '--span-hosts', '--no-parent', '-e', 'robots=off'] -WGET_EXTRA_ARGS=[] -CURL_ARGS=['--silent', '--location', '--compressed'] -CURL_EXTRA_ARGS=[] -GIT_ARGS=['--recursive'] -SINGLEFILE_ARGS=[] -SINGLEFILE_EXTRA_ARGS=[] -MERCURY_ARGS=['--format=text'] -MERCURY_EXTRA_ARGS=[] -FAVICON_PROVIDER=https://www.google.com/s2/favicons?domain={} -USE_INDEXING_BACKEND=True -USE_SEARCHING_BACKEND=True -SEARCH_BACKEND_ENGINE=ripgrep -SEARCH_BACKEND_HOST_NAME=localhost -SEARCH_BACKEND_PORT=1491 -SEARCH_BACKEND_PASSWORD=SecretPassword -SEARCH_PROCESS_HTML=True -SONIC_COLLECTION=archivebox -SONIC_BUCKET=snapshots -SEARCH_BACKEND_TIMEOUT=90 -FTS_SEPARATE_DATABASE=True -FTS_TOKENIZERS=porter unicode61 remove_diacritics 2 -FTS_SQLITE_MAX_LENGTH=1000000000 -USE_CURL=True -USE_WGET=True -USE_SINGLEFILE=True -USE_READABILITY=True -USE_MERCURY=True -USE_GIT=True -USE_CHROME=True -USE_NODE=True -USE_YOUTUBEDL=True -USE_RIPGREP=True -CURL_BINARY=curl -GIT_BINARY=git -WGET_BINARY=wget -SINGLEFILE_BINARY=single-file -READABILITY_BINARY=readability-extractor -MERCURY_BINARY=postlight-parser -YOUTUBEDL_BINARY=yt-dlp -NODE_BINARY=node -RIPGREP_BINARY=rg -CHROME_BINARY=chrome -POCKET_CONSUMER_KEY=None -USER=squash -PACKAGE_DIR=/opt/archivebox/archivebox -TEMPLATES_DIR=/opt/archivebox/archivebox/templates -ARCHIVE_DIR=/opt/archivebox/data/archive -SOURCES_DIR=/opt/archivebox/data/sources -LOGS_DIR=/opt/archivebox/data/logs -PERSONAS_DIR=/opt/archivebox/data/personas -URL_DENYLIST_PTN=re.compile('\\.(css|js|otf|ttf|woff|woff2|gstatic\\.com|googleapis\\.com/css)(\\?.*)?$', re.IGNORECASE|re.MULTILINE) -URL_ALLOWLIST_PTN=None -DIR_OUTPUT_PERMISSIONS=755 -ARCHIVEBOX_BINARY=/opt/archivebox/.venv/bin/archivebox -VERSION=0.8.0 -COMMIT_HASH=102e87578c6036bb0132dd1ebd17f8f05ffc880f -BUILD_TIME=2024-05-15 03:28:05 1715768885 -VERSIONS_AVAILABLE=None -CAN_UPGRADE=False -PYTHON_BINARY=/opt/archivebox/.venv/bin/python3.10 -PYTHON_VERSION=3.10.14 -DJANGO_BINARY=/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py -DJANGO_VERSION=5.0.6 final (0) -SQLITE_BINARY=/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py -SQLITE_VERSION=2.6.0 -CURL_VERSION=curl 8.4.0 (x86_64-apple-darwin23.0) -WGET_VERSION=GNU Wget 1.24.5 -WGET_AUTO_COMPRESSION=True -RIPGREP_VERSION=ripgrep 14.1.0 -SINGLEFILE_VERSION=None -READABILITY_VERSION=None -MERCURY_VERSION=None -GIT_VERSION=git version 2.44.0 -YOUTUBEDL_VERSION=2024.04.09 -CHROME_VERSION=Google Chrome 124.0.6367.207 -NODE_VERSION=v21.7.3 -""" - - -EXPECTED_OUTPUT = TOML_HEADER + '''[SERVER_CONFIG] -IS_TTY = false -USE_COLOR = false -SHOW_PROGRESS = false -IN_DOCKER = false -IN_QEMU = false -PUID = 501 -PGID = 20 -OUTPUT_DIR = "/opt/archivebox/data" -CONFIG_FILE = "/opt/archivebox/data/ArchiveBox.conf" -ONLY_NEW = true -TIMEOUT = 60 -MEDIA_TIMEOUT = 3600 -OUTPUT_PERMISSIONS = 644 -RESTRICT_FILE_NAMES = "windows" -URL_DENYLIST = "\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$" -URL_ALLOWLIST = null -ADMIN_USERNAME = null -ADMIN_PASSWORD = null -ENFORCE_ATOMIC_WRITES = true -TAG_SEPARATOR_PATTERN = "[,]" -SECRET_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" -BIND_ADDR = "127.0.0.1:8000" -ALLOWED_HOSTS = "*" -DEBUG = false -PUBLIC_INDEX = true -PUBLIC_SNAPSHOTS = true -PUBLIC_ADD_VIEW = false -FOOTER_INFO = "Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests." -SNAPSHOTS_PER_PAGE = 40 -CUSTOM_TEMPLATES_DIR = null -TIME_ZONE = "UTC" -TIMEZONE = "UTC" -REVERSE_PROXY_USER_HEADER = "Remote-User" -REVERSE_PROXY_WHITELIST = "" -LOGOUT_REDIRECT_URL = "/" -PREVIEW_ORIGINALS = true -LDAP = false -LDAP_SERVER_URI = null -LDAP_BIND_DN = null -LDAP_BIND_PASSWORD = null -LDAP_USER_BASE = null -LDAP_USER_FILTER = null -LDAP_USERNAME_ATTR = null -LDAP_FIRSTNAME_ATTR = null -LDAP_LASTNAME_ATTR = null -LDAP_EMAIL_ATTR = null -LDAP_CREATE_SUPERUSER = false -SAVE_TITLE = true -SAVE_FAVICON = true -SAVE_WGET = true -SAVE_WGET_REQUISITES = true -SAVE_SINGLEFILE = true -SAVE_READABILITY = true -SAVE_MERCURY = true -SAVE_HTMLTOTEXT = true -SAVE_PDF = true -SAVE_SCREENSHOT = true -SAVE_DOM = true -SAVE_HEADERS = true -SAVE_WARC = true -SAVE_GIT = true -SAVE_MEDIA = true -SAVE_ARCHIVE_DOT_ORG = true -RESOLUTION = [1440, 2000] -GIT_DOMAINS = "github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht" -CHECK_SSL_VALIDITY = true -MEDIA_MAX_SIZE = "750m" -USER_AGENT = null -CURL_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)" -WGET_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5" -CHROME_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)" -COOKIES_FILE = null -CHROME_USER_DATA_DIR = null -CHROME_TIMEOUT = false -CHROME_HEADLESS = true -CHROME_SANDBOX = true -CHROME_EXTRA_ARGS = [] -YOUTUBEDL_ARGS = ["--restrict-filenames", "--trim-filenames", "128", "--write-description", "--write-info-json", "--write-annotations", "--write-thumbnail", "--no-call-home", "--write-sub", "--write-auto-subs", "--convert-subs=srt", "--yes-playlist", "--continue", "--no-abort-on-error", "--ignore-errors", "--geo-bypass", "--add-metadata", "--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)"] -YOUTUBEDL_EXTRA_ARGS = [] -WGET_ARGS = ["--no-verbose", "--adjust-extension", "--convert-links", "--force-directories", "--backup-converted", "--span-hosts", "--no-parent", "-e", "robots=off"] -WGET_EXTRA_ARGS = [] -CURL_ARGS = ["--silent", "--location", "--compressed"] -CURL_EXTRA_ARGS = [] -GIT_ARGS = ["--recursive"] -SINGLEFILE_ARGS = [] -SINGLEFILE_EXTRA_ARGS = [] -MERCURY_ARGS = ["--format=text"] -MERCURY_EXTRA_ARGS = [] -FAVICON_PROVIDER = "https://www.google.com/s2/favicons?domain={}" -USE_INDEXING_BACKEND = true -USE_SEARCHING_BACKEND = true -SEARCH_BACKEND_ENGINE = "ripgrep" -SEARCH_BACKEND_HOST_NAME = "localhost" -SEARCH_BACKEND_PORT = 1491 -SEARCH_BACKEND_PASSWORD = "SecretPassword" -SEARCH_PROCESS_HTML = true -SONIC_COLLECTION = "archivebox" -SONIC_BUCKET = "snapshots" -SEARCH_BACKEND_TIMEOUT = 90 -FTS_SEPARATE_DATABASE = true -FTS_TOKENIZERS = "porter unicode61 remove_diacritics 2" -FTS_SQLITE_MAX_LENGTH = 1000000000 -USE_CURL = true -USE_WGET = true -USE_SINGLEFILE = true -USE_READABILITY = true -USE_MERCURY = true -USE_GIT = true -USE_CHROME = true -USE_NODE = true -USE_YOUTUBEDL = true -USE_RIPGREP = true -CURL_BINARY = "curl" -GIT_BINARY = "git" -WGET_BINARY = "wget" -SINGLEFILE_BINARY = "single-file" -READABILITY_BINARY = "readability-extractor" -MERCURY_BINARY = "postlight-parser" -YOUTUBEDL_BINARY = "yt-dlp" -NODE_BINARY = "node" -RIPGREP_BINARY = "rg" -CHROME_BINARY = "chrome" -POCKET_CONSUMER_KEY = null -USER = "squash" -PACKAGE_DIR = "/opt/archivebox/archivebox" -TEMPLATES_DIR = "/opt/archivebox/archivebox/templates" -ARCHIVE_DIR = "/opt/archivebox/data/archive" -SOURCES_DIR = "/opt/archivebox/data/sources" -LOGS_DIR = "/opt/archivebox/data/logs" -PERSONAS_DIR = "/opt/archivebox/data/personas" -URL_DENYLIST_PTN = "re.compile(\'\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$\', re.IGNORECASE|re.MULTILINE)" -URL_ALLOWLIST_PTN = null -DIR_OUTPUT_PERMISSIONS = 755 -ARCHIVEBOX_BINARY = "/opt/archivebox/.venv/bin/archivebox" -VERSION = "0.8.0" -COMMIT_HASH = "102e87578c6036bb0132dd1ebd17f8f05ffc880f" -BUILD_TIME = "2024-05-15 03:28:05 1715768885" -VERSIONS_AVAILABLE = null -CAN_UPGRADE = false -PYTHON_BINARY = "/opt/archivebox/.venv/bin/python3.10" -PYTHON_VERSION = "3.10.14" -DJANGO_BINARY = "/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py" -DJANGO_VERSION = "5.0.6 final (0)" -SQLITE_BINARY = "/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py" -SQLITE_VERSION = "2.6.0" -CURL_VERSION = "curl 8.4.0 (x86_64-apple-darwin23.0)" -WGET_VERSION = "GNU Wget 1.24.5" -WGET_AUTO_COMPRESSION = true -RIPGREP_VERSION = "ripgrep 14.1.0" -SINGLEFILE_VERSION = null -READABILITY_VERSION = null -MERCURY_VERSION = null -GIT_VERSION = "git version 2.44.0" -YOUTUBEDL_VERSION = "2024.04.09" -CHROME_VERSION = "Google Chrome 124.0.6367.207" -NODE_VERSION = "v21.7.3"''' - - -class IniToTomlTests(TestCase): - def test_convert(self): - first_output = convert(TEST_INPUT) # make sure ini -> toml parses correctly - second_output = convert(first_output) # make sure toml -> toml parses/dumps consistently - assert first_output == second_output == EXPECTED_OUTPUT # make sure parsing is indempotent - -# # DEBUGGING -# import sys -# import difflib -# sys.stdout.writelines(difflib.context_diff(first_output, second_output, fromfile='first', tofile='second')) -# print(repr(second_output)) diff --git a/archivebox/plugins_auth/ldap/apps.py b/archivebox/plugins_auth/ldap/apps.py index fab177d3..3d42d41e 100644 --- a/archivebox/plugins_auth/ldap/apps.py +++ b/archivebox/plugins_auth/ldap/apps.py @@ -1,4 +1,4 @@ -__package__ = 'archivebox.plugins_auth.ldap' +__package__ = 'plugins_auth.ldap' import inspect @@ -6,13 +6,11 @@ from typing import List, Dict from pathlib import Path from pydantic import InstanceOf -from django.conf import settings - from pydantic_pkgr import BinProviderName, ProviderLookupDict, SemVer -from plugantic.base_plugin import BasePlugin -from plugantic.base_hook import BaseHook -from plugantic.base_binary import BaseBinary, BaseBinProvider +from abx.archivebox.base_plugin import BasePlugin +from abx.archivebox.base_hook import BaseHook +from abx.archivebox.base_binary import BaseBinary, BaseBinProvider from plugins_pkg.pip.apps import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER from .settings import LDAP_CONFIG, LDAP_LIB @@ -51,5 +49,4 @@ class LdapAuthPlugin(BasePlugin): PLUGIN = LdapAuthPlugin() -# PLUGIN.register(settings) DJANGO_APP = PLUGIN.AppConfig diff --git a/archivebox/plugins_auth/ldap/settings.py b/archivebox/plugins_auth/ldap/settings.py index a4aa0b40..36480168 100644 --- a/archivebox/plugins_auth/ldap/settings.py +++ b/archivebox/plugins_auth/ldap/settings.py @@ -3,9 +3,9 @@ __package__ = 'archivebox.plugins_auth.ldap' import sys from typing import Dict, List, ClassVar, Optional -from pydantic import Field, model_validator +from pydantic import Field, model_validator, computed_field -from ...plugantic.base_configset import BaseConfigSet, ConfigSectionName +from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName LDAP_LIB = None try: @@ -35,10 +35,10 @@ class LdapConfig(BaseConfigSet): LDAP_USER_FILTER: str = Field(default=None) LDAP_CREATE_SUPERUSER: bool = Field(default=False) - LDAP_USERNAME_ATTR: str = Field(default=None) - LDAP_FIRSTNAME_ATTR: str = Field(default=None) - LDAP_LASTNAME_ATTR: str = Field(default=None) - LDAP_EMAIL_ATTR: str = Field(default=None) + LDAP_USERNAME_ATTR: str = Field(default='username') + LDAP_FIRSTNAME_ATTR: str = Field(default='first_name') + LDAP_LASTNAME_ATTR: str = Field(default='last_name') + LDAP_EMAIL_ATTR: str = Field(default='email') @model_validator(mode='after') def validate_ldap_config(self): @@ -50,14 +50,7 @@ class LdapConfig(BaseConfigSet): self.update(LDAP_ENABLED=False) # Check that all required LDAP config options are set - all_config_is_set = ( - self.LDAP_SERVER_URI - and self.LDAP_BIND_DN - and self.LDAP_BIND_PASSWORD - and self.LDAP_USER_BASE - and self.LDAP_USER_FILTER - ) - if self.LDAP_ENABLED and not all_config_is_set: + if self.LDAP_ENABLED and not self.LDAP_CONFIG_IS_SET: missing_config_options = [ key for key, value in self.model_dump().items() if value is None and key != 'LDAP_ENABLED' @@ -66,7 +59,20 @@ class LdapConfig(BaseConfigSet): sys.stderr.write(f' Missing: {", ".join(missing_config_options)}\n') self.update(LDAP_ENABLED=False) return self + + @computed_field + @property + def LDAP_CONFIG_IS_SET(self) -> bool: + """Check that all required LDAP config options are set""" + return bool(LDAP_LIB) and self.LDAP_ENABLED and bool( + self.LDAP_SERVER_URI + and self.LDAP_BIND_DN + and self.LDAP_BIND_PASSWORD + and self.LDAP_USER_BASE + and self.LDAP_USER_FILTER + ) + @computed_field @property def LDAP_USER_ATTR_MAP(self) -> Dict[str, str]: return { @@ -76,6 +82,7 @@ class LdapConfig(BaseConfigSet): 'email': self.LDAP_EMAIL_ATTR, } + @computed_field @property def AUTHENTICATION_BACKENDS(self) -> List[str]: return [ @@ -83,9 +90,10 @@ class LdapConfig(BaseConfigSet): 'django_auth_ldap.backend.LDAPBackend', ] + @computed_field @property def AUTH_LDAP_USER_SEARCH(self) -> Optional[object]: - return LDAP_LIB and LDAPSearch( + return self.LDAP_USER_FILTER and LDAPSearch( self.LDAP_USER_BASE, LDAP_LIB.SCOPE_SUBTREE, # type: ignore '(&(' + self.LDAP_USERNAME_ATTR + '=%(user)s)' + self.LDAP_USER_FILTER + ')', diff --git a/archivebox/plugins_extractor/archivedotorg/apps.py b/archivebox/plugins_extractor/archivedotorg/apps.py index 8e20ecf5..a06b5108 100644 --- a/archivebox/plugins_extractor/archivedotorg/apps.py +++ b/archivebox/plugins_extractor/archivedotorg/apps.py @@ -2,9 +2,9 @@ __package__ = 'archivebox.plugins_extractor.archivedotorg' from typing import List -from plugantic.base_plugin import BasePlugin -from plugantic.base_configset import BaseConfigSet -from plugantic.base_hook import BaseHook +from abx.archivebox.base_plugin import BasePlugin +from abx.archivebox.base_configset import BaseConfigSet +from abx.archivebox.base_hook import BaseHook ###################### Config ########################## diff --git a/archivebox/plugins_extractor/chrome/apps.py b/archivebox/plugins_extractor/chrome/apps.py index f1cf1b92..8b08ae30 100644 --- a/archivebox/plugins_extractor/chrome/apps.py +++ b/archivebox/plugins_extractor/chrome/apps.py @@ -21,12 +21,12 @@ from pydantic_pkgr import ( import archivebox # Depends on other Django apps: -from plugantic.base_plugin import BasePlugin -from plugantic.base_configset import BaseConfigSet, ConfigSectionName -from plugantic.base_binary import BaseBinary, env -# from plugantic.base_extractor import BaseExtractor -# from plugantic.base_queue import BaseQueue -from plugantic.base_hook import BaseHook +from abx.archivebox.base_plugin import BasePlugin +from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName +from abx.archivebox.base_binary import BaseBinary, env +# from abx.archivebox.base_extractor import BaseExtractor +# from abx.archivebox.base_queue import BaseQueue +from abx.archivebox.base_hook import BaseHook # Depends on Other Plugins: from plugins_sys.config.apps import ARCHIVING_CONFIG, SHELL_CONFIG diff --git a/archivebox/plugins_extractor/favicon/apps.py b/archivebox/plugins_extractor/favicon/apps.py index e05bb823..bfaae21e 100644 --- a/archivebox/plugins_extractor/favicon/apps.py +++ b/archivebox/plugins_extractor/favicon/apps.py @@ -2,9 +2,9 @@ __package__ = 'archivebox.plugins_extractor.favicon' from typing import List -from plugantic.base_plugin import BasePlugin -from plugantic.base_configset import BaseConfigSet -from plugantic.base_hook import BaseHook +from abx.archivebox.base_plugin import BasePlugin +from abx.archivebox.base_configset import BaseConfigSet +from abx.archivebox.base_hook import BaseHook ###################### Config ########################## diff --git a/archivebox/plugins_extractor/readability/apps.py b/archivebox/plugins_extractor/readability/apps.py index b695ee52..14b0a3a6 100644 --- a/archivebox/plugins_extractor/readability/apps.py +++ b/archivebox/plugins_extractor/readability/apps.py @@ -11,11 +11,11 @@ from pydantic import InstanceOf, Field, validate_call from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName, ShallowBinary # Depends on other Django apps: -from plugantic.base_plugin import BasePlugin -from plugantic.base_configset import BaseConfigSet, ConfigSectionName -from plugantic.base_binary import BaseBinary, env -from plugantic.base_extractor import BaseExtractor -from plugantic.base_hook import BaseHook +from abx.archivebox.base_plugin import BasePlugin +from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName +from abx.archivebox.base_binary import BaseBinary, env +from abx.archivebox.base_extractor import BaseExtractor +from abx.archivebox.base_hook import BaseHook # Depends on Other Plugins: from plugins_sys.config.apps import ARCHIVING_CONFIG diff --git a/archivebox/plugins_extractor/singlefile/apps.py b/archivebox/plugins_extractor/singlefile/apps.py index 403a5220..e737e87a 100644 --- a/archivebox/plugins_extractor/singlefile/apps.py +++ b/archivebox/plugins_extractor/singlefile/apps.py @@ -11,12 +11,12 @@ from pydantic import InstanceOf, Field, validate_call from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName, bin_abspath, ShallowBinary # Depends on other Django apps: -from plugantic.base_plugin import BasePlugin -from plugantic.base_configset import BaseConfigSet, ConfigSectionName -from plugantic.base_binary import BaseBinary, env -from plugantic.base_extractor import BaseExtractor -from plugantic.base_queue import BaseQueue -from plugantic.base_hook import BaseHook +from abx.archivebox.base_plugin import BasePlugin +from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName +from abx.archivebox.base_binary import BaseBinary, env +from abx.archivebox.base_extractor import BaseExtractor +from abx.archivebox.base_queue import BaseQueue +from abx.archivebox.base_hook import BaseHook # Depends on Other Plugins: from plugins_sys.config.apps import ARCHIVING_CONFIG @@ -84,19 +84,6 @@ class SinglefileBinary(BaseBinary): return BaseBinary.install(self, binprovider_name=binprovider_name or LIB_NPM_BINPROVIDER.name) -# ALTERNATIVE INSTALL METHOD using Ansible: -# install_playbook = PLUGANTIC_DIR / 'ansible' / 'install_singlefile.yml' -# singlefile_bin = run_playbook(install_playbook, data_dir=settings.CONFIG.OUTPUT_DIR, quiet=quiet).BINARIES.singlefile -# return self.__class__.model_validate( -# { -# **self.model_dump(), -# "loaded_abspath": singlefile_bin.abspath, -# "loaded_version": singlefile_bin.version, -# "loaded_binprovider": env, -# "binproviders_supported": self.binproviders_supported, -# } -# ) - SINGLEFILE_BINARY = SinglefileBinary() diff --git a/archivebox/plugins_extractor/wget/apps.py b/archivebox/plugins_extractor/wget/apps.py new file mode 100644 index 00000000..e794271b --- /dev/null +++ b/archivebox/plugins_extractor/wget/apps.py @@ -0,0 +1,74 @@ +from typing import List +from abx.archivebox.base_plugin import BasePlugin, InstanceOf, BaseHook + + +# class WgetToggleConfig(ConfigSet): +# section: ConfigSectionName = 'ARCHIVE_METHOD_TOGGLES' + +# SAVE_WGET: bool = True +# SAVE_WARC: bool = True + +# class WgetDependencyConfig(ConfigSet): +# section: ConfigSectionName = 'DEPENDENCY_CONFIG' + +# WGET_BINARY: str = Field(default='wget') +# WGET_ARGS: Optional[List[str]] = Field(default=None) +# WGET_EXTRA_ARGS: List[str] = [] +# WGET_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}'] + +# class WgetOptionsConfig(ConfigSet): +# section: ConfigSectionName = 'ARCHIVE_METHOD_OPTIONS' + +# # loaded from shared config +# WGET_AUTO_COMPRESSION: bool = Field(default=True) +# SAVE_WGET_REQUISITES: bool = Field(default=True) +# WGET_USER_AGENT: str = Field(default='', alias='USER_AGENT') +# WGET_TIMEOUT: int = Field(default=60, alias='TIMEOUT') +# WGET_CHECK_SSL_VALIDITY: bool = Field(default=True, alias='CHECK_SSL_VALIDITY') +# WGET_RESTRICT_FILE_NAMES: str = Field(default='windows', alias='RESTRICT_FILE_NAMES') +# WGET_COOKIES_FILE: Optional[Path] = Field(default=None, alias='COOKIES_FILE') + + +# CONFIG = { +# 'CHECK_SSL_VALIDITY': False, +# 'SAVE_WARC': False, +# 'TIMEOUT': 999, +# } + + +# WGET_CONFIG = [ +# WgetToggleConfig(**CONFIG), +# WgetDependencyConfig(**CONFIG), +# WgetOptionsConfig(**CONFIG), +# ] + + + +# class WgetExtractor(Extractor): +# name: ExtractorName = 'wget' +# binary: Binary = WgetBinary() + +# def get_output_path(self, snapshot) -> Path: +# return get_wget_output_path(snapshot) + + +# class WarcExtractor(Extractor): +# name: ExtractorName = 'warc' +# binary: Binary = WgetBinary() + +# def get_output_path(self, snapshot) -> Path: +# return get_wget_output_path(snapshot) + + + + + +class WgetPlugin(BasePlugin): + app_label: str = 'wget' + verbose_name: str = 'WGET' + + hooks: List[InstanceOf[BaseHook]] = [] + + +PLUGIN = WgetPlugin() +DJANGO_APP = PLUGIN.AppConfig diff --git a/archivebox/plugins_extractor/ytdlp/apps.py b/archivebox/plugins_extractor/ytdlp/apps.py index 335b4e1a..e6355103 100644 --- a/archivebox/plugins_extractor/ytdlp/apps.py +++ b/archivebox/plugins_extractor/ytdlp/apps.py @@ -7,10 +7,10 @@ from pydantic import InstanceOf, Field, model_validator, AliasChoices from django.conf import settings from pydantic_pkgr import BinProvider, BinName, BinProviderName, ProviderLookupDict -from plugantic.base_plugin import BasePlugin -from plugantic.base_configset import BaseConfigSet, ConfigSectionName -from plugantic.base_binary import BaseBinary, env, apt, brew -from plugantic.base_hook import BaseHook +from abx.archivebox.base_plugin import BasePlugin +from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName +from abx.archivebox.base_binary import BaseBinary, env, apt, brew +from abx.archivebox.base_hook import BaseHook from plugins_sys.config.apps import ARCHIVING_CONFIG from plugins_pkg.pip.apps import pip diff --git a/archivebox/plugins_pkg/npm/apps.py b/archivebox/plugins_pkg/npm/apps.py index fce0dad1..31e92c4f 100644 --- a/archivebox/plugins_pkg/npm/apps.py +++ b/archivebox/plugins_pkg/npm/apps.py @@ -11,10 +11,10 @@ from pydantic import InstanceOf, model_validator from pydantic_pkgr import BinProvider, NpmProvider, BinName, PATHStr, BinProviderName -from plugantic.base_plugin import BasePlugin -from plugantic.base_configset import BaseConfigSet -from plugantic.base_binary import BaseBinary, BaseBinProvider, env, apt, brew -from plugantic.base_hook import BaseHook +from abx.archivebox.base_plugin import BasePlugin +from abx.archivebox.base_configset import BaseConfigSet +from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env, apt, brew +from abx.archivebox.base_hook import BaseHook ###################### Config ########################## diff --git a/archivebox/plugins_pkg/pip/apps.py b/archivebox/plugins_pkg/pip/apps.py index c283d2f4..78021c48 100644 --- a/archivebox/plugins_pkg/pip/apps.py +++ b/archivebox/plugins_pkg/pip/apps.py @@ -15,11 +15,11 @@ from django.db.backends.sqlite3.base import Database as django_sqlite3 # typ from django.core.checks import Error, Tags from pydantic_pkgr import BinProvider, PipProvider, BinName, BinProviderName, ProviderLookupDict, SemVer -from plugantic.base_plugin import BasePlugin -from plugantic.base_configset import BaseConfigSet, ConfigSectionName -from plugantic.base_check import BaseCheck -from plugantic.base_binary import BaseBinary, BaseBinProvider, env, apt, brew -from plugantic.base_hook import BaseHook +from abx.archivebox.base_plugin import BasePlugin +from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName +from abx.archivebox.base_check import BaseCheck +from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env, apt, brew +from abx.archivebox.base_hook import BaseHook from ...misc.logging import hint diff --git a/archivebox/plugins_pkg/playwright/apps.py b/archivebox/plugins_pkg/playwright/apps.py index 9198eca5..8c01c997 100644 --- a/archivebox/plugins_pkg/playwright/apps.py +++ b/archivebox/plugins_pkg/playwright/apps.py @@ -22,12 +22,12 @@ from pydantic_pkgr import ( import archivebox # Depends on other Django apps: -from plugantic.base_plugin import BasePlugin -from plugantic.base_configset import BaseConfigSet -from plugantic.base_binary import BaseBinary, BaseBinProvider, env -# from plugantic.base_extractor import BaseExtractor -# from plugantic.base_queue import BaseQueue -from plugantic.base_hook import BaseHook +from abx.archivebox.base_plugin import BasePlugin +from abx.archivebox.base_configset import BaseConfigSet +from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env +# from abx.archivebox.base_extractor import BaseExtractor +# from abx.archivebox.base_queue import BaseQueue +from abx.archivebox.base_hook import BaseHook from plugins_pkg.pip.apps import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER diff --git a/archivebox/plugins_pkg/puppeteer/apps.py b/archivebox/plugins_pkg/puppeteer/apps.py index 0efd1e8c..f2d4adf0 100644 --- a/archivebox/plugins_pkg/puppeteer/apps.py +++ b/archivebox/plugins_pkg/puppeteer/apps.py @@ -19,12 +19,12 @@ from pydantic_pkgr import ( import archivebox # Depends on other Django apps: -from plugantic.base_plugin import BasePlugin -from plugantic.base_configset import BaseConfigSet -from plugantic.base_binary import BaseBinary, BaseBinProvider, env -# from plugantic.base_extractor import BaseExtractor -# from plugantic.base_queue import BaseQueue -from plugantic.base_hook import BaseHook +from abx.archivebox.base_plugin import BasePlugin +from abx.archivebox.base_configset import BaseConfigSet +from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env +# from abx.archivebox.base_extractor import BaseExtractor +# from abx.archivebox.base_queue import BaseQueue +from abx.archivebox.base_hook import BaseHook # Depends on Other Plugins: from plugins_pkg.npm.apps import LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER diff --git a/archivebox/plugins_search/ripgrep/apps.py b/archivebox/plugins_search/ripgrep/apps.py index 780c96a1..0e597f8e 100644 --- a/archivebox/plugins_search/ripgrep/apps.py +++ b/archivebox/plugins_search/ripgrep/apps.py @@ -13,11 +13,11 @@ from pydantic import InstanceOf, Field from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName # Depends on other Django apps: -from plugantic.base_plugin import BasePlugin -from plugantic.base_configset import BaseConfigSet, ConfigSectionName -from plugantic.base_binary import BaseBinary, env, apt, brew -from plugantic.base_hook import BaseHook -from plugantic.base_searchbackend import BaseSearchBackend +from abx.archivebox.base_plugin import BasePlugin +from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName +from abx.archivebox.base_binary import BaseBinary, env, apt, brew +from abx.archivebox.base_hook import BaseHook +from abx.archivebox.base_searchbackend import BaseSearchBackend # Depends on Other Plugins: from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG diff --git a/archivebox/plugins_search/sonic/apps.py b/archivebox/plugins_search/sonic/apps.py index fd630fdb..5bf37044 100644 --- a/archivebox/plugins_search/sonic/apps.py +++ b/archivebox/plugins_search/sonic/apps.py @@ -11,11 +11,11 @@ from pydantic import InstanceOf, Field, model_validator from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName # Depends on other Django apps: -from plugantic.base_plugin import BasePlugin -from plugantic.base_configset import BaseConfigSet, ConfigSectionName -from plugantic.base_binary import BaseBinary, env, brew -from plugantic.base_hook import BaseHook -from plugantic.base_searchbackend import BaseSearchBackend +from abx.archivebox.base_plugin import BasePlugin +from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName +from abx.archivebox.base_binary import BaseBinary, env, brew +from abx.archivebox.base_hook import BaseHook +from abx.archivebox.base_searchbackend import BaseSearchBackend # Depends on Other Plugins: from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG diff --git a/archivebox/plugins_search/sqlite/apps.py b/archivebox/plugins_search/sqlite/apps.py index 75e9309e..fe5949f6 100644 --- a/archivebox/plugins_search/sqlite/apps.py +++ b/archivebox/plugins_search/sqlite/apps.py @@ -1,21 +1,20 @@ __package__ = 'archivebox.plugins_search.sqlite' import sys -import sqlite3 import codecs +import sqlite3 from typing import List, ClassVar, Iterable, Callable -from django.conf import settings -from django.db import connection as database +from django.core.exceptions import ImproperlyConfigured # Depends on other PyPI/vendor packages: from pydantic import InstanceOf, Field, model_validator # Depends on other Django apps: -from plugantic.base_plugin import BasePlugin -from plugantic.base_configset import BaseConfigSet, ConfigSectionName -from plugantic.base_hook import BaseHook -from plugantic.base_searchbackend import BaseSearchBackend +from abx.archivebox.base_plugin import BasePlugin +from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName +from abx.archivebox.base_hook import BaseHook +from abx.archivebox.base_searchbackend import BaseSearchBackend # Depends on Other Plugins: from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG @@ -52,6 +51,7 @@ class SqliteftsConfig(BaseConfigSet): if self.SQLITEFTS_SEPARATE_DATABASE: return lambda: sqlite3.connect(self.SQLITEFTS_DB) else: + from django.db import connection as database return database.cursor @property @@ -63,16 +63,20 @@ class SqliteftsConfig(BaseConfigSet): @property def SQLITE_LIMIT_LENGTH(self) -> int: + from django.db import connection as database + # Only Python >= 3.11 supports sqlite3.Connection.getlimit(), # so fall back to the default if the API to get the real value isn't present try: limit_id = sqlite3.SQLITE_LIMIT_LENGTH - try: + + if self.SQLITEFTS_SEPARATE_DATABASE: + cursor = self.get_connection() + return cursor.connection.getlimit(limit_id) + else: with database.temporary_connection() as cursor: # type: ignore[attr-defined] return cursor.connection.getlimit(limit_id) - except AttributeError: - return database.getlimit(limit_id) - except AttributeError: + except (AttributeError, ImproperlyConfigured): return self.SQLITEFTS_MAX_LENGTH SQLITEFTS_CONFIG = SqliteftsConfig() diff --git a/archivebox/plugins_sys/config/apps.py b/archivebox/plugins_sys/config/apps.py index 67607809..4a4ab297 100644 --- a/archivebox/plugins_sys/config/apps.py +++ b/archivebox/plugins_sys/config/apps.py @@ -1,21 +1,24 @@ -__package__ = 'archivebox.plugins_sys.config' +__package__ = 'plugins_sys.config' + import os import sys import shutil -import archivebox from typing import List, ClassVar, Dict, Optional from datetime import datetime from pathlib import Path -from pydantic import InstanceOf, Field, field_validator, model_validator, computed_field + from rich import print - +from pydantic import InstanceOf, Field, field_validator, model_validator, computed_field from django.utils.crypto import get_random_string -from plugantic.base_plugin import BasePlugin -from plugantic.base_configset import BaseConfigSet, ConfigSectionName -from plugantic.base_hook import BaseHook, HookType -from .constants import CONSTANTS, CONSTANTS_CONFIG +from abx.archivebox.base_plugin import BasePlugin +from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName +from abx.archivebox.base_hook import BaseHook + + +import archivebox +from archivebox.constants import CONSTANTS, CONSTANTS_CONFIG # noqa ###################### Config ########################## @@ -123,6 +126,7 @@ class StorageConfig(BaseConfigSet): # not supposed to be user settable: DIR_OUTPUT_PERMISSIONS: str = Field(default=lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')) + STORAGE_CONFIG = StorageConfig() @@ -249,13 +253,13 @@ DJANGO_APP = PLUGIN.AppConfig -# register django apps -@archivebox.plugin.hookimpl -def get_INSTALLED_APPS(): - return [DJANGO_APP.name] +# # register django apps +# @abx.hookimpl +# def get_INSTALLED_APPS(): +# return [DJANGO_APP.name] -# register configs -@archivebox.plugin.hookimpl -def register_CONFIG(): - return PLUGIN.HOOKS_BY_TYPE['CONFIG'].values() +# # register configs +# @abx.hookimpl +# def register_CONFIG(): +# return PLUGIN.HOOKS_BY_TYPE['CONFIG'].values() diff --git a/archivebox/plugantic/views.py b/archivebox/plugins_sys/config/views.py similarity index 98% rename from archivebox/plugantic/views.py rename to archivebox/plugins_sys/config/views.py index 7689deec..c38a957e 100644 --- a/archivebox/plugantic/views.py +++ b/archivebox/plugins_sys/config/views.py @@ -1,8 +1,9 @@ -__package__ = 'archivebox.plugantic' +__package__ = 'abx.archivebox' import os import inspect from typing import Any, List, Dict, cast +from benedict import benedict from django.http import HttpRequest from django.conf import settings @@ -14,8 +15,7 @@ from admin_data_views.utils import render_with_table_view, render_with_item_view import archivebox -from ..config_stubs import AttrDict -from ..util import parse_date +from archivebox.util import parse_date def obj_to_yaml(obj: Any, indent: int=0) -> str: @@ -255,7 +255,7 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext: ) all_config_entries = cast(List[Dict[str, Any]], supervisor.getAllConfigInfo() or []) - all_config = {config["name"]: AttrDict(config) for config in all_config_entries} + all_config = {config["name"]: benedict(config) for config in all_config_entries} # Add top row for supervisord process manager rows["Name"].append(ItemLink('supervisord', key='supervisord')) @@ -274,7 +274,7 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext: # Add a row for each worker process managed by supervisord for proc in cast(List[Dict[str, Any]], supervisor.getAllProcessInfo()): - proc = AttrDict(proc) + proc = benedict(proc) # { # "name": "daphne", # "group": "daphne", @@ -334,7 +334,7 @@ def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext: start_ts = [line for line in relevant_logs.split("\n") if "RPC interface 'supervisor' initialized" in line][-1].split(",", 1)[0] uptime = str(timezone.now() - parse_date(start_ts)).split(".")[0] - proc = AttrDict( + proc = benedict( { "name": "supervisord", "pid": supervisor.getPID(), @@ -347,7 +347,7 @@ def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext: } ) else: - proc = AttrDict(get_worker(supervisor, key) or {}) + proc = benedict(get_worker(supervisor, key) or {}) relevant_config = [config for config in all_config if config['name'] == key][0] relevant_logs = supervisor.tailProcessStdoutLog(key, 0, 10_000_000)[0] diff --git a/archivebox/util.py b/archivebox/util.py index 4db47a85..b26333e0 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -26,7 +26,7 @@ except ImportError: from archivebox.constants import STATICFILE_EXTENSIONS -from archivebox.plugins_sys.config.apps import ARCHIVING_CONFIG +from plugins_sys.config.apps import ARCHIVING_CONFIG from .misc.logging import COLOR_DICT