diff --git a/archivebox/abx/__init__.py b/archivebox/abx/__init__.py new file mode 100644 index 00000000..04c7d81d --- /dev/null +++ b/archivebox/abx/__init__.py @@ -0,0 +1,271 @@ +import itertools +import importlib +from pathlib import Path +from typing import Dict +from benedict import benedict + +import pluggy +import archivebox + +from . import hookspec as base_spec +from .hookspec import hookimpl, hookspec # noqa + + +pm = pluggy.PluginManager("abx") +pm.add_hookspecs(base_spec) + +def register_hookspecs(hookspecs): + for hookspec_import_path in hookspecs: + hookspec_module = importlib.import_module(hookspec_import_path) + pm.add_hookspecs(hookspec_module) + + +def find_plugins_in_dir(plugins_dir: Path, prefix: str) -> Dict[str, Path]: + return { + f"{prefix}.{plugin_entrypoint.parent.name}": plugin_entrypoint.parent + for plugin_entrypoint in sorted(plugins_dir.glob("*/apps.py")) # key=get_plugin_order # Someday enforcing plugin import order may be required, but right now it's not needed + } # "plugins_pkg.pip": "/app/archivebox/plugins_pkg/pip" + + +def get_pip_installed_plugins(group='abx'): + """replaces pm.load_setuptools_entrypoints("abx")""" + import importlib.metadata + + DETECTED_PLUGINS = {} # module_name: module_dir_path + for dist in list(importlib.metadata.distributions()): + for entrypoint in dist.entry_points: + if entrypoint.group != group or pm.is_blocked(entrypoint.name): + continue + DETECTED_PLUGINS[entrypoint.name] = Path(entrypoint.load().__file__).parent + # pm.register(plugin, name=ep.name) + # pm._plugin_distinfo.append((plugin, DistFacade(dist))) + return DETECTED_PLUGINS + + +def get_plugins_in_dirs(plugin_dirs: Dict[str, Path]): + DETECTED_PLUGINS = {} + for plugin_prefix, plugin_dir in plugin_dirs.items(): + DETECTED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix)) + return DETECTED_PLUGINS + +def get_builtin_plugins(): + PLUGIN_DIRS = { + 'plugins_sys': archivebox.PACKAGE_DIR / 'plugins_sys', + 'plugins_pkg': archivebox.PACKAGE_DIR / 'plugins_pkg', + 'plugins_auth': archivebox.PACKAGE_DIR / 'plugins_auth', + 'plugins_search': archivebox.PACKAGE_DIR / 'plugins_search', + 'plugins_extractor': archivebox.PACKAGE_DIR / 'plugins_extractor', + } + DETECTED_PLUGINS = {} + for plugin_prefix, plugin_dir in PLUGIN_DIRS.items(): + DETECTED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix)) + return DETECTED_PLUGINS + +def get_user_plugins(): + return find_plugins_in_dir(archivebox.DATA_DIR / 'user_plugins', prefix='user_plugins') + + +# BUILTIN_PLUGINS = get_builtin_plugins() +# PIP_PLUGINS = get_pip_installed_plugins() +# USER_PLUGINS = get_user_plugins() +# ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS} + +# Load all plugins from pip packages, archivebox built-ins, and user plugins + +def load_plugins(plugins_dict: Dict[str, Path]): + LOADED_PLUGINS = {} + for plugin_module, plugin_dir in plugins_dict.items(): + # print(f'Loading plugin: {plugin_module} from {plugin_dir}') + plugin_module_loaded = importlib.import_module(plugin_module + '.apps') + pm.register(plugin_module_loaded) + LOADED_PLUGINS[plugin_module] = plugin_module_loaded.PLUGIN + # print(f' √ Loaded plugin: {plugin_module}') + return LOADED_PLUGINS + +def get_registered_plugins(): + plugins = {} + plugin_to_distinfo = dict(pm.list_plugin_distinfo()) + for plugin in pm.get_plugins(): + plugin_info = { + "name": plugin.__name__, + "hooks": [h.name for h in pm.get_hookcallers(plugin) or ()], + } + distinfo = plugin_to_distinfo.get(plugin) + if distinfo: + plugin_info["version"] = distinfo.version + plugin_info["name"] = ( + getattr(distinfo, "name", None) or distinfo.project_name + ) + plugins[plugin_info["name"]] = plugin_info + return plugins + + +def get_plugins_INSTALLLED_APPS(): + return itertools.chain(*pm.hook.get_INSTALLED_APPS()) + +def register_plugins_INSTALLLED_APPS(INSTALLED_APPS): + pm.hook.register_INSTALLED_APPS(INSTALLED_APPS=INSTALLED_APPS) + + +def get_plugins_MIDDLEWARE(): + return itertools.chain(*pm.hook.get_MIDDLEWARE()) + +def register_plugins_MIDDLEWARE(MIDDLEWARE): + pm.hook.register_MIDDLEWARE(MIDDLEWARE=MIDDLEWARE) + + +def get_plugins_AUTHENTICATION_BACKENDS(): + return itertools.chain(*pm.hook.get_AUTHENTICATION_BACKENDS()) + +def register_plugins_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS): + pm.hook.register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS=AUTHENTICATION_BACKENDS) + + +def get_plugins_STATICFILES_DIRS(): + return itertools.chain(*pm.hook.get_STATICFILES_DIRS()) + +def register_plugins_STATICFILES_DIRS(STATICFILES_DIRS): + pm.hook.register_STATICFILES_DIRS(STATICFILES_DIRS=STATICFILES_DIRS) + + +def get_plugins_TEMPLATE_DIRS(): + return itertools.chain(*pm.hook.get_TEMPLATE_DIRS()) + +def register_plugins_TEMPLATE_DIRS(TEMPLATE_DIRS): + pm.hook.register_TEMPLATE_DIRS(TEMPLATE_DIRS=TEMPLATE_DIRS) + +def get_plugins_DJANGO_HUEY_QUEUES(): + HUEY_QUEUES = {} + for plugin_result in pm.hook.get_DJANGO_HUEY_QUEUES(): + HUEY_QUEUES.update(plugin_result) + return HUEY_QUEUES + +def register_plugins_DJANGO_HUEY(DJANGO_HUEY): + pm.hook.register_DJANGO_HUEY(DJANGO_HUEY=DJANGO_HUEY) + +def get_plugins_ADMIN_DATA_VIEWS_URLS(): + return itertools.chain(*pm.hook.get_ADMIN_DATA_VIEWS_URLS()) + +def register_plugins_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS): + pm.hook.register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS=ADMIN_DATA_VIEWS) + + +def register_plugins_settings(settings): + # convert settings dict to an benedict so we can set values using settings.attr = xyz notation + settings_as_obj = benedict(settings, keypath_separator=None) + + # set default values for settings that are used by plugins + settings_as_obj.INSTALLED_APPS = settings_as_obj.get('INSTALLED_APPS', []) + settings_as_obj.MIDDLEWARE = settings_as_obj.get('MIDDLEWARE', []) + settings_as_obj.AUTHENTICATION_BACKENDS = settings_as_obj.get('AUTHENTICATION_BACKENDS', []) + settings_as_obj.STATICFILES_DIRS = settings_as_obj.get('STATICFILES_DIRS', []) + settings_as_obj.TEMPLATE_DIRS = settings_as_obj.get('TEMPLATE_DIRS', []) + settings_as_obj.DJANGO_HUEY = settings_as_obj.get('DJANGO_HUEY', {'queues': {}}) + settings_as_obj.ADMIN_DATA_VIEWS = settings_as_obj.get('ADMIN_DATA_VIEWS', {'URLS': []}) + + # call all the hook functions to mutate the settings values in-place + register_plugins_INSTALLLED_APPS(settings_as_obj.INSTALLED_APPS) + register_plugins_MIDDLEWARE(settings_as_obj.MIDDLEWARE) + register_plugins_AUTHENTICATION_BACKENDS(settings_as_obj.AUTHENTICATION_BACKENDS) + register_plugins_STATICFILES_DIRS(settings_as_obj.STATICFILES_DIRS) + register_plugins_TEMPLATE_DIRS(settings_as_obj.TEMPLATE_DIRS) + register_plugins_DJANGO_HUEY(settings_as_obj.DJANGO_HUEY) + register_plugins_ADMIN_DATA_VIEWS(settings_as_obj.ADMIN_DATA_VIEWS) + + # calls Plugin.settings(settings) on each registered plugin + pm.hook.register_settings(settings=settings_as_obj) + + # then finally update the settings globals() object will all the new settings + settings.update(settings_as_obj) + + +def get_plugins_urlpatterns(): + return list(itertools.chain(*pm.hook.urlpatterns())) + +def register_plugins_urlpatterns(urlpatterns): + pm.hook.register_urlpatterns(urlpatterns=urlpatterns) + + +# PLUGANTIC HOOKS + +def get_plugins_PLUGINS(): + return benedict({ + plugin.PLUGIN.id: plugin.PLUGIN + for plugin in pm.get_plugins() + }) + +def get_plugins_HOOKS(PLUGINS): + return benedict({ + hook.id: hook + for plugin in PLUGINS.values() + for hook in plugin.hooks + }) + +def get_plugins_CONFIGS(): + return benedict({ + config.id: config + for plugin_configs in pm.hook.get_CONFIGS() + for config in plugin_configs + }) + +def get_plugins_FLAT_CONFIG(CONFIGS): + FLAT_CONFIG = {} + for config in CONFIGS.values(): + FLAT_CONFIG.update(config.model_dump()) + return benedict(FLAT_CONFIG) + +def get_plugins_BINPROVIDERS(): + return benedict({ + binprovider.id: binprovider + for plugin_binproviders in pm.hook.get_BINPROVIDERS() + for binprovider in plugin_binproviders + }) + +def get_plugins_BINARIES(): + return benedict({ + binary.id: binary + for plugin_binaries in pm.hook.get_BINARIES() + for binary in plugin_binaries + }) + +def get_plugins_EXTRACTORS(): + return benedict({ + extractor.id: extractor + for plugin_extractors in pm.hook.get_EXTRACTORS() + for extractor in plugin_extractors + }) + +def get_plugins_REPLAYERS(): + return benedict({ + replayer.id: replayer + for plugin_replayers in pm.hook.get_REPLAYERS() + for replayer in plugin_replayers + }) + +def get_plugins_CHECKS(): + return benedict({ + check.id: check + for plugin_checks in pm.hook.get_CHECKS() + for check in plugin_checks + }) + +def get_plugins_ADMINDATAVIEWS(): + return benedict({ + admin_dataview.id: admin_dataview + for plugin_admin_dataviews in pm.hook.get_ADMINDATAVIEWS() + for admin_dataview in plugin_admin_dataviews + }) + +def get_plugins_QUEUES(): + return benedict({ + queue.id: queue + for plugin_queues in pm.hook.get_QUEUES() + for queue in plugin_queues + }) + +def get_plugins_SEARCHBACKENDS(): + return benedict({ + searchbackend.id: searchbackend + for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS() + for searchbackend in plugin_searchbackends + }) diff --git a/archivebox/abx/apps.py b/archivebox/abx/apps.py new file mode 100644 index 00000000..f3880c07 --- /dev/null +++ b/archivebox/abx/apps.py @@ -0,0 +1,12 @@ +from django.apps import AppConfig + + +class ABXConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'abx' + + def ready(self): + import abx + from django.conf import settings + + abx.pm.hook.ready(settings=settings) diff --git a/archivebox/abx/hookspec.py b/archivebox/abx/hookspec.py new file mode 100644 index 00000000..53cee225 --- /dev/null +++ b/archivebox/abx/hookspec.py @@ -0,0 +1,12 @@ +from pathlib import Path + +from pluggy import HookimplMarker +from pluggy import HookspecMarker + +hookspec = HookspecMarker("abx") +hookimpl = HookimplMarker("abx") + + +@hookspec +def get_system_user() -> str: + return Path('~').expanduser().name diff --git a/archivebox/abx/hookspec_archivebox.py b/archivebox/abx/hookspec_archivebox.py new file mode 100644 index 00000000..b241a856 --- /dev/null +++ b/archivebox/abx/hookspec_archivebox.py @@ -0,0 +1,30 @@ +from .hookspec import hookspec + + +@hookspec +def get_CONFIGS(): + return {} + +@hookspec +def get_EXTRACTORS(): + return {} + +@hookspec +def get_REPLAYERS(): + return {} + +@hookspec +def get_CHECKS(): + return {} + +@hookspec +def get_ADMINDATAVIEWS(): + return {} + +@hookspec +def get_QUEUES(): + return {} + +@hookspec +def get_SEARCHBACKENDS(): + return {} diff --git a/archivebox/abx/hookspec_django_apps.py b/archivebox/abx/hookspec_django_apps.py new file mode 100644 index 00000000..30bc2951 --- /dev/null +++ b/archivebox/abx/hookspec_django_apps.py @@ -0,0 +1,6 @@ +from .hookspec import hookspec + +@hookspec +def ready(settings): + """Called when the Django app.ready() is triggered""" + pass diff --git a/archivebox/abx/hookspec_django_settings.py b/archivebox/abx/hookspec_django_settings.py new file mode 100644 index 00000000..c340d084 --- /dev/null +++ b/archivebox/abx/hookspec_django_settings.py @@ -0,0 +1,90 @@ +from .hookspec import hookspec + + +########################################################################################### + +@hookspec +def get_INSTALLED_APPS(): + """Return a list of apps to add to INSTALLED_APPS""" + # e.g. ['your_plugin_type.plugin_name'] + return [] + +@hookspec +def register_INSTALLED_APPS(INSTALLED_APPS): + """Mutate INSTALLED_APPS in place to add your app in a specific position""" + # idx_of_contrib = INSTALLED_APPS.index('django.contrib.auth') + # INSTALLED_APPS.insert(idx_of_contrib + 1, 'your_plugin_type.plugin_name') + pass + + +@hookspec +def get_TEMPLATE_DIRS(): + return [] # e.g. ['your_plugin_type/plugin_name/templates'] + +@hookspec +def register_TEMPLATE_DIRS(TEMPLATE_DIRS): + """Install django settings""" + # e.g. TEMPLATE_DIRS.insert(0, 'your_plugin_type/plugin_name/templates') + pass + + +@hookspec +def get_STATICFILES_DIRS(): + return [] # e.g. ['your_plugin_type/plugin_name/static'] + +@hookspec +def register_STATICFILES_DIRS(STATICFILES_DIRS): + """Mutate STATICFILES_DIRS in place to add your static dirs in a specific position""" + # e.g. STATICFILES_DIRS.insert(0, 'your_plugin_type/plugin_name/static') + pass + + +@hookspec +def get_MIDDLEWARE(): + return [] # e.g. ['your_plugin_type.plugin_name.middleware.YourMiddleware'] + +@hookspec +def register_MIDDLEWARE(MIDDLEWARE): + """Mutate MIDDLEWARE in place to add your middleware in a specific position""" + # e.g. MIDDLEWARE.insert(0, 'your_plugin_type.plugin_name.middleware.YourMiddleware') + pass + + +@hookspec +def get_AUTHENTICATION_BACKENDS(): + return [] # e.g. ['django_auth_ldap.backend.LDAPBackend'] + +@hookspec +def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS): + """Mutate AUTHENTICATION_BACKENDS in place to add your auth backends in a specific position""" + # e.g. AUTHENTICATION_BACKENDS.insert(0, 'your_plugin_type.plugin_name.backend.YourBackend') + pass + +@hookspec +def get_DJANGO_HUEY_QUEUES(): + return [] # e.g. [{'name': 'your_plugin_type.plugin_name', 'HUEY': {...}}] + +@hookspec +def register_DJANGO_HUEY(DJANGO_HUEY): + """Mutate DJANGO_HUEY in place to add your huey queues in a specific position""" + # e.g. DJANGO_HUEY['queues']['some_queue_name']['some_setting'] = 'some_value' + pass + + +@hookspec +def get_ADMIN_DATA_VIEWS_URLS(): + return [] + +@hookspec +def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS): + """Mutate ADMIN_DATA_VIEWS in place to add your admin data views in a specific position""" + # e.g. ADMIN_DATA_VIEWS['URLS'].insert(0, 'your_plugin_type/plugin_name/admin_data_views.py') + pass + + +@hookspec +def register_settings(settings): + """Mutate settings in place to add your settings / modify existing settings""" + # settings.SOME_KEY = 'some_value' + pass + diff --git a/archivebox/abx/hookspec_django_urls.py b/archivebox/abx/hookspec_django_urls.py new file mode 100644 index 00000000..258a1e89 --- /dev/null +++ b/archivebox/abx/hookspec_django_urls.py @@ -0,0 +1,12 @@ +from .hookspec import hookspec + + +@hookspec +def get_urlpatterns(): + return [] # e.g. [path('your_plugin_type/plugin_name/url.py', your_view)] + +@hookspec +def register_urlpatterns(urlpatterns): + """Mutate urlpatterns in place to add your urlpatterns in a specific position""" + # e.g. urlpatterns.insert(0, path('your_plugin_type/plugin_name/url.py', your_view)) + pass diff --git a/archivebox/abx/hookspec_pydantic_pkgr.py b/archivebox/abx/hookspec_pydantic_pkgr.py new file mode 100644 index 00000000..63a289a6 --- /dev/null +++ b/archivebox/abx/hookspec_pydantic_pkgr.py @@ -0,0 +1,13 @@ + +from .hookspec import hookspec + +########################################################################################### + +@hookspec +def get_BINPROVIDERS(): + return {} + +@hookspec +def get_BINARIES(): + return {} + diff --git a/archivebox/config.py b/archivebox/config.py index 0852da83..419f60fe 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -27,7 +27,6 @@ import re import sys import json import shutil -import archivebox from hashlib import md5 from pathlib import Path @@ -36,15 +35,20 @@ from typing import Optional, Type, Tuple, Dict from subprocess import run, PIPE, DEVNULL, STDOUT, TimeoutExpired from configparser import ConfigParser -from pydantic_pkgr import SemVer from rich.progress import Progress from rich.console import Console +from benedict import benedict import django from django.db.backends.sqlite3.base import Database as sqlite3 +import archivebox +from archivebox.constants import CONSTANTS +from archivebox.constants import * + +from pydantic_pkgr import SemVer + from .config_stubs import ( - AttrDict, ConfigValue, ConfigDict, ConfigDefaultValue, @@ -52,85 +56,35 @@ from .config_stubs import ( ) from .misc.logging import ( - DEFAULT_CLI_COLORS, - ANSI, - COLOR_DICT, stderr, hint, # noqa ) -# print('STARTING CONFIG LOADING') - -# load fallback libraries from vendor dir -from .vendor import load_vendored_libs -load_vendored_libs() - -# print("LOADED VENDOR LIBS") +from .plugins_sys.config.apps import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG +from .plugins_auth.ldap.apps import LDAP_CONFIG +from .plugins_extractor.favicon.apps import FAVICON_CONFIG +ANSI = SHELL_CONFIG.ANSI +LDAP = LDAP_CONFIG.LDAP_ENABLED ############################### Config Schema ################################## CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = { - 'SHELL_CONFIG': { - 'IS_TTY': {'type': bool, 'default': lambda _: sys.stdout.isatty()}, - 'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']}, - 'SHOW_PROGRESS': {'type': bool, 'default': lambda c: c['IS_TTY']}, # progress bars are buggy on mac, disable for now - 'IN_DOCKER': {'type': bool, 'default': False}, - 'IN_QEMU': {'type': bool, 'default': False}, - 'PUID': {'type': int, 'default': os.getuid()}, - 'PGID': {'type': int, 'default': os.getgid()}, - }, + 'SHELL_CONFIG': SHELL_CONFIG.as_legacy_config_schema(), - 'GENERAL_CONFIG': { - 'OUTPUT_DIR': {'type': str, 'default': None}, - 'CONFIG_FILE': {'type': str, 'default': None}, - 'ONLY_NEW': {'type': bool, 'default': True}, - 'TIMEOUT': {'type': int, 'default': 60}, - 'MEDIA_TIMEOUT': {'type': int, 'default': 3600}, - 'OUTPUT_PERMISSIONS': {'type': str, 'default': '644'}, - 'RESTRICT_FILE_NAMES': {'type': str, 'default': 'windows'}, # TODO: move this to be a default WGET_ARGS + 'SERVER_CONFIG': SERVER_CONFIG.as_legacy_config_schema(), + + 'GENERAL_CONFIG': GENERAL_CONFIG.as_legacy_config_schema(), - 'URL_DENYLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$', 'aliases': ('URL_BLACKLIST',)}, # to avoid downloading code assets as their own pages - 'URL_ALLOWLIST': {'type': str, 'default': None, 'aliases': ('URL_WHITELIST',)}, + 'ARCHIVING_CONFIG': ARCHIVING_CONFIG.as_legacy_config_schema(), + 'SEARCH_BACKEND_CONFIG': SEARCH_BACKEND_CONFIG.as_legacy_config_schema(), - 'ENFORCE_ATOMIC_WRITES': {'type': bool, 'default': True}, - 'TAG_SEPARATOR_PATTERN': {'type': str, 'default': r'[,]'}, - }, + 'STORAGE_CONFIG': STORAGE_CONFIG.as_legacy_config_schema(), + + 'LDAP_CONFIG': LDAP_CONFIG.as_legacy_config_schema(), + + 'FAVICON_CONFIG': FAVICON_CONFIG.as_legacy_config_schema(), - 'SERVER_CONFIG': { - 'ADMIN_USERNAME': {'type': str, 'default': None}, - 'ADMIN_PASSWORD': {'type': str, 'default': None}, - - 'SECRET_KEY': {'type': str, 'default': None}, - 'BIND_ADDR': {'type': str, 'default': lambda c: ['127.0.0.1:8000', '0.0.0.0:8000'][c['IN_DOCKER']]}, - 'ALLOWED_HOSTS': {'type': str, 'default': '*'}, # e.g. archivebox.example.com,archivebox2.example.com - 'CSRF_TRUSTED_ORIGINS': {'type': str, 'default': lambda c: 'http://localhost:8000,http://127.0.0.1:8000,http://0.0.0.0:8000,http://{}'.format(c['BIND_ADDR'])}, # e.g. https://archivebox.example.com,https://archivebox2.example.com:8080 - 'DEBUG': {'type': bool, 'default': False}, - 'PUBLIC_INDEX': {'type': bool, 'default': True}, - 'PUBLIC_SNAPSHOTS': {'type': bool, 'default': True}, - 'PUBLIC_ADD_VIEW': {'type': bool, 'default': False}, - 'FOOTER_INFO': {'type': str, 'default': 'Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.'}, - 'SNAPSHOTS_PER_PAGE': {'type': int, 'default': 40}, - 'CUSTOM_TEMPLATES_DIR': {'type': str, 'default': None}, - 'TIME_ZONE': {'type': str, 'default': 'UTC'}, - 'TIMEZONE': {'type': str, 'default': 'UTC'}, - 'REVERSE_PROXY_USER_HEADER': {'type': str, 'default': 'Remote-User'}, - 'REVERSE_PROXY_WHITELIST': {'type': str, 'default': ''}, - 'LOGOUT_REDIRECT_URL': {'type': str, 'default': '/'}, - 'PREVIEW_ORIGINALS': {'type': bool, 'default': True}, - - 'LDAP': {'type': bool, 'default': False}, - 'LDAP_SERVER_URI': {'type': str, 'default': None}, - 'LDAP_BIND_DN': {'type': str, 'default': None}, - 'LDAP_BIND_PASSWORD': {'type': str, 'default': None}, - 'LDAP_USER_BASE': {'type': str, 'default': None}, - 'LDAP_USER_FILTER': {'type': str, 'default': None}, - 'LDAP_USERNAME_ATTR': {'type': str, 'default': None}, - 'LDAP_FIRSTNAME_ATTR': {'type': str, 'default': None}, - 'LDAP_LASTNAME_ATTR': {'type': str, 'default': None}, - 'LDAP_EMAIL_ATTR': {'type': str, 'default': None}, - 'LDAP_CREATE_SUPERUSER': {'type': bool, 'default': False}, - }, 'ARCHIVE_METHOD_TOGGLES': { 'SAVE_TITLE': {'type': bool, 'default': True, 'aliases': ('FETCH_TITLE',)}, @@ -212,26 +166,6 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = { 'SINGLEFILE_EXTRA_ARGS': {'type': list, 'default': None}, 'MERCURY_ARGS': {'type': list, 'default': ['--format=text']}, 'MERCURY_EXTRA_ARGS': {'type': list, 'default': None}, - 'FAVICON_PROVIDER': {'type': str, 'default': 'https://www.google.com/s2/favicons?domain={}'}, - }, - - 'SEARCH_BACKEND_CONFIG' : { - 'USE_INDEXING_BACKEND': {'type': bool, 'default': True}, - 'USE_SEARCHING_BACKEND': {'type': bool, 'default': True}, - 'SEARCH_BACKEND_ENGINE': {'type': str, 'default': 'ripgrep'}, - 'SEARCH_BACKEND_HOST_NAME': {'type': str, 'default': 'localhost'}, - 'SEARCH_BACKEND_PORT': {'type': int, 'default': 1491}, - 'SEARCH_BACKEND_PASSWORD': {'type': str, 'default': 'SecretPassword'}, - 'SEARCH_PROCESS_HTML': {'type': bool, 'default': True}, - # SONIC - 'SONIC_COLLECTION': {'type': str, 'default': 'archivebox'}, - 'SONIC_BUCKET': {'type': str, 'default': 'snapshots'}, - 'SEARCH_BACKEND_TIMEOUT': {'type': int, 'default': 90}, - # SQLite3 FTS5 - 'FTS_SEPARATE_DATABASE': {'type': bool, 'default': True}, - 'FTS_TOKENIZERS': {'type': str, 'default': 'porter unicode61 remove_diacritics 2'}, - # Default from https://www.sqlite.org/limits.html#max_length - 'FTS_SQLITE_MAX_LENGTH': {'type': int, 'default': int(1e9)}, }, 'DEPENDENCY_CONFIG': { @@ -242,7 +176,6 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = { 'USE_MERCURY': {'type': bool, 'default': True}, 'USE_GIT': {'type': bool, 'default': True}, 'USE_CHROME': {'type': bool, 'default': True}, - 'USE_NODE': {'type': bool, 'default': True}, 'USE_YOUTUBEDL': {'type': bool, 'default': True}, 'USE_RIPGREP': {'type': bool, 'default': True}, @@ -282,60 +215,16 @@ def get_real_name(key: str) -> str: -################################ Constants ##################################### - -PACKAGE_DIR_NAME = 'archivebox' -TEMPLATES_DIR_NAME = 'templates' - -ARCHIVE_DIR_NAME = 'archive' -SOURCES_DIR_NAME = 'sources' -LOGS_DIR_NAME = 'logs' -CACHE_DIR_NAME = 'cache' -LIB_DIR_NAME = 'lib' -PERSONAS_DIR_NAME = 'personas' -CRONTABS_DIR_NAME = 'crontabs' -SQL_INDEX_FILENAME = 'index.sqlite3' -JSON_INDEX_FILENAME = 'index.json' -HTML_INDEX_FILENAME = 'index.html' -ROBOTS_TXT_FILENAME = 'robots.txt' -FAVICON_FILENAME = 'favicon.ico' -CONFIG_FILENAME = 'ArchiveBox.conf' - - - - -ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE - - -CONSTANTS = archivebox.CONSTANTS._asdict() - -############################## Version Config ################################## - - - - - -############################## Derived Config ################################## - - - # These are derived/computed values calculated *after* all user-provided config values are ingested # they appear in `archivebox config` output and are intended to be read-only for the user DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = { - **{ - key: {'default': lambda c: val} - for key, val in archivebox.CONSTANTS.items() - }, - - 'PACKAGE_DIR': {'default': lambda c: archivebox.PACKAGE_DIR.resolve()}, - 'TEMPLATES_DIR': {'default': lambda c: c['PACKAGE_DIR'] / TEMPLATES_DIR_NAME}, + 'TEMPLATES_DIR': {'default': lambda c: c['PACKAGE_DIR'] / CONSTANTS.TEMPLATES_DIR_NAME}, 'CUSTOM_TEMPLATES_DIR': {'default': lambda c: c['CUSTOM_TEMPLATES_DIR'] and Path(c['CUSTOM_TEMPLATES_DIR'])}, - 'URL_DENYLIST_PTN': {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)}, - 'URL_ALLOWLIST_PTN': {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)}, - 'DIR_OUTPUT_PERMISSIONS': {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')}, # exec is always needed to list directories + 'URL_DENYLIST_PTN': {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS)}, + 'URL_ALLOWLIST_PTN': {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS)}, 'USE_CURL': {'default': lambda c: c['USE_CURL'] and (c['SAVE_FAVICON'] or c['SAVE_TITLE'] or c['SAVE_ARCHIVE_DOT_ORG'])}, @@ -356,7 +245,7 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = { 'WGET_EXTRA_ARGS': {'default': lambda c: c['WGET_EXTRA_ARGS'] or []}, 'USE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['SAVE_MERCURY']}, - 'SAVE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['USE_NODE']}, + 'SAVE_MERCURY': {'default': lambda c: c['USE_MERCURY']}, 'MERCURY_VERSION': {'default': lambda c: '1.0.0' if shutil.which(str(bin_path(c['MERCURY_BINARY']))) else None}, # mercury doesnt expose version info until this is merged https://github.com/postlight/parser/pull/750 'MERCURY_ARGS': {'default': lambda c: c['MERCURY_ARGS'] or []}, 'MERCURY_EXTRA_ARGS': {'default': lambda c: c['MERCURY_EXTRA_ARGS'] or []}, @@ -365,8 +254,6 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = { 'GIT_VERSION': {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None}, 'SAVE_GIT': {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']}, - 'USE_NODE': {'default': lambda c: True}, - 'NODE_VERSION': {'default': lambda c: bin_version(c['NODE_BINARY']) if c['USE_NODE'] else None}, 'DEPENDENCIES': {'default': lambda c: get_dependency_info(c)}, # 'CODE_LOCATIONS': {'default': lambda c: get_code_locations(c)}, @@ -550,7 +437,7 @@ def load_config(defaults: ConfigDefaultDict, config: Optional[ConfigDict]=None, out_dir: Optional[str]=None, env_vars: Optional[os._Environ]=None, - config_file_vars: Optional[Dict[str, str]]=None) -> ConfigDict: + config_file_vars: Optional[Dict[str, str]]=None) -> benedict: env_vars = env_vars or os.environ config_file_vars = config_file_vars or load_config_file(out_dir=out_dir) @@ -583,13 +470,7 @@ def load_config(defaults: ConfigDefaultDict, # raise # raise SystemExit(2) - return AttrDict(extended_config) - - -def parse_version_string(version: str) -> Tuple[int, int, int]: - """parses a version tag string formatted like 'vx.x.x' into (major, minor, patch) ints""" - base = version.split('+')[0].split('v')[-1] # remove 'v' prefix and '+editable' suffix - return tuple(int(part) for part in base.split('.'))[:3] + return benedict(extended_config) @@ -778,13 +659,13 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue: 'enabled': config['USE_WGET'], 'is_valid': bool(config['WGET_VERSION']), }, - 'NODE_BINARY': { - 'path': bin_path(config['NODE_BINARY']), - 'version': config['NODE_VERSION'], - 'hash': bin_hash(config['NODE_BINARY']), - 'enabled': config['USE_NODE'], - 'is_valid': bool(config['NODE_VERSION']), - }, + # 'NODE_BINARY': { + # 'path': bin_path(config['NODE_BINARY']), + # 'version': config['NODE_VERSION'], + # 'hash': bin_hash(config['NODE_BINARY']), + # 'enabled': config['USE_NODE'], + # 'is_valid': bool(config['NODE_VERSION']), + # }, 'MERCURY_BINARY': { 'path': bin_path(config['MERCURY_BINARY']), 'version': config['MERCURY_VERSION'], @@ -879,15 +760,15 @@ globals().update(CONFIG) # Set timezone to UTC and umask to OUTPUT_PERMISSIONS -assert TIMEZONE == 'UTC', 'The server timezone should always be set to UTC' # noqa: F821 +assert TIMEZONE == 'UTC', f'The server timezone should always be set to UTC (got {TIMEZONE})' # noqa: F821 os.environ["TZ"] = TIMEZONE # noqa: F821 -os.umask(0o777 - int(DIR_OUTPUT_PERMISSIONS, base=8)) # noqa: F821 +os.umask(0o777 - int(STORAGE_CONFIG.DIR_OUTPUT_PERMISSIONS, base=8)) # noqa: F821 ########################### Config Validity Checkers ########################### -if not CONFIG.USE_COLOR: +if not SHELL_CONFIG.USE_COLOR: os.environ['NO_COLOR'] = '1' -if not CONFIG.SHOW_PROGRESS: +if not SHELL_CONFIG.SHOW_PROGRESS: os.environ['TERM'] = 'dumb' # recreate rich console obj based on new config values @@ -913,7 +794,7 @@ def setup_django_minimal(): django.setup() -def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None: +def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None: global INITIAL_STARTUP_PROGRESS global INITIAL_STARTUP_PROGRESS_TASK @@ -930,7 +811,6 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, sys.path.append(str(archivebox.PACKAGE_DIR)) os.environ.setdefault('OUTPUT_DIR', str(archivebox.DATA_DIR)) - os.environ.setdefault("ARCHIVEBOX_DATABASE_NAME", ":memory:") os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings') bump_startup_progress_bar() diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index 51e85607..d74f80ea 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -4,13 +4,13 @@ import os import sys import inspect -from typing import Dict from pathlib import Path -from benedict import benedict from django.utils.crypto import get_random_string +import abx import archivebox +from archivebox.constants import CONSTANTS from ..config import CONFIG @@ -28,39 +28,47 @@ ARCHIVE_DIR = archivebox.DATA_DIR / 'archive' ### ArchiveBox Plugin Settings ################################################################################ +PLUGIN_HOOKSPECS = [ + 'abx.hookspec_django_settings', + 'abx.hookspec_django_apps', + 'abx.hookspec_django_urls', + 'abx.hookspec_pydantic_pkgr', + 'abx.hookspec_archivebox', + 'plugantic.base_check', +] +abx.register_hookspecs(PLUGIN_HOOKSPECS) -def find_plugins_in_dir(plugins_dir: Path, prefix: str) -> Dict[str, Path]: - return { - f"{prefix}.{plugin_entrypoint.parent.name}": plugin_entrypoint.parent - for plugin_entrypoint in sorted(plugins_dir.glob("*/apps.py")) # key=get_plugin_order # Someday enforcing plugin import order may be required, but right now it's not needed - } # "plugins_pkg.pip": "/app/archivebox/plugins_pkg/pip" - -PLUGIN_DIRS = { - 'plugins_sys': PACKAGE_DIR / 'plugins_sys', - 'plugins_pkg': PACKAGE_DIR / 'plugins_pkg', - 'plugins_auth': PACKAGE_DIR / 'plugins_auth', - 'plugins_search': PACKAGE_DIR / 'plugins_search', - 'plugins_extractor': PACKAGE_DIR / 'plugins_extractor', - 'user_plugins': DATA_DIR / 'user_plugins', +BUILTIN_PLUGIN_DIRS = { + 'plugins_sys': archivebox.PACKAGE_DIR / 'plugins_sys', + 'plugins_pkg': archivebox.PACKAGE_DIR / 'plugins_pkg', + 'plugins_auth': archivebox.PACKAGE_DIR / 'plugins_auth', + 'plugins_search': archivebox.PACKAGE_DIR / 'plugins_search', + 'plugins_extractor': archivebox.PACKAGE_DIR / 'plugins_extractor', +} +USER_PLUGIN_DIRS = { + 'user_plugins': archivebox.DATA_DIR / 'user_plugins', } -INSTALLED_PLUGINS = {} -for plugin_prefix, plugin_dir in PLUGIN_DIRS.items(): - INSTALLED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix)) +BUILTIN_PLUGINS = abx.get_plugins_in_dirs(BUILTIN_PLUGIN_DIRS) +PIP_PLUGINS = abx.get_pip_installed_plugins(group='archivebox') +USER_PLUGINS = abx.get_plugins_in_dirs(USER_PLUGIN_DIRS) +ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS} -### Plugins Globals (filled by plugin_type.pluginname.apps.PluginName.register() after Django startup) -PLUGINS = benedict({}) -HOOKS = benedict({}) - -# Created later by Plugin.register(settings) -> Hook.register(settings): -# CONFIGS = benedict({}) -# BINPROVIDERS = benedict({}) -# BINARIES = benedict({}) -# EXTRACTORS = benedict({}) -# REPLAYERS = benedict({}) -# CHECKS = benedict({}) -# ADMINDATAVIEWS = benedict({}) +PLUGIN_MANAGER = abx.pm +PLUGINS = abx.load_plugins(ALL_PLUGINS) +HOOKS = abx.get_plugins_HOOKS(PLUGINS) +CONFIGS = abx.get_plugins_CONFIGS() +# FLAT_CONFIG = abx.get_plugins_FLAT_CONFIG(CONFIGS) +FLAT_CONFIG = CONFIG +BINPROVIDERS = abx.get_plugins_BINPROVIDERS() +BINARIES = abx.get_plugins_BINARIES() +EXTRACTORS = abx.get_plugins_EXTRACTORS() +REPLAYERS = abx.get_plugins_REPLAYERS() +CHECKS = abx.get_plugins_CHECKS() +ADMINDATAVIEWS = abx.get_plugins_ADMINDATAVIEWS() +QUEUES = abx.get_plugins_QUEUES() +SEARCHBACKENDS = abx.get_plugins_SEARCHBACKENDS() ################################################################################ ### Django Core Settings @@ -96,15 +104,14 @@ INSTALLED_APPS = [ 'django_object_actions', # provides easy Django Admin action buttons on change views https://github.com/crccheck/django-object-actions # Our ArchiveBox-provided apps + # 'plugantic', # ArchiveBox plugin API definition + finding/registering/calling interface 'queues', # handles starting and managing background workers and processes 'abid_utils', # handles ABID ID creation, handling, and models - 'plugantic', # ArchiveBox plugin API definition + finding/registering/calling interface 'core', # core django model with Snapshot, ArchiveResult, etc. 'api', # Django-Ninja-based Rest API interfaces, config, APIToken model, etc. # ArchiveBox plugins - *INSTALLED_PLUGINS.keys(), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins, - # plugin.register(settings) is called at import of each plugin (in the order they are listed here), then plugin.ready() is called at AppConfig.ready() time + *abx.get_plugins_INSTALLLED_APPS(), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins, # 3rd-party apps from PyPI that need to be loaded last 'admin_data_views', # handles rendering some convenient automatic read-only views of data in Django admin @@ -112,9 +119,13 @@ INSTALLED_APPS = [ 'django_huey', # provides multi-queue support for django huey https://github.com/gaiacoop/django-huey 'bx_django_utils', # needed for huey_monitor https://github.com/boxine/bx_django_utils 'huey_monitor', # adds an admin UI for monitoring background huey tasks https://github.com/boxine/django-huey-monitor + + # load plugins last so all other apps are already .ready() when we call plugins.ready() + 'abx', ] + MIDDLEWARE = [ 'core.middleware.TimezoneMiddleware', 'django.middleware.security.SecurityMiddleware', @@ -125,8 +136,10 @@ MIDDLEWARE = [ 'core.middleware.ReverseProxyAuthMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'core.middleware.CacheControlMiddleware', + *abx.get_plugins_MIDDLEWARE(), ] + ################################################################################ ### Authentication Settings ################################################################################ @@ -136,18 +149,20 @@ MIDDLEWARE = [ AUTHENTICATION_BACKENDS = [ 'django.contrib.auth.backends.RemoteUserBackend', 'django.contrib.auth.backends.ModelBackend', + *abx.get_plugins_AUTHENTICATION_BACKENDS(), ] -from ..plugins_auth.ldap.settings import LDAP_CONFIG -if LDAP_CONFIG.LDAP_ENABLED: - AUTH_LDAP_BIND_DN = LDAP_CONFIG.LDAP_BIND_DN - AUTH_LDAP_SERVER_URI = LDAP_CONFIG.LDAP_SERVER_URI - AUTH_LDAP_BIND_PASSWORD = LDAP_CONFIG.LDAP_BIND_PASSWORD - AUTH_LDAP_USER_ATTR_MAP = LDAP_CONFIG.LDAP_USER_ATTR_MAP - AUTH_LDAP_USER_SEARCH = LDAP_CONFIG.AUTH_LDAP_USER_SEARCH +# from ..plugins_auth.ldap.settings import LDAP_CONFIG + +# if LDAP_CONFIG.LDAP_ENABLED: +# AUTH_LDAP_BIND_DN = LDAP_CONFIG.LDAP_BIND_DN +# AUTH_LDAP_SERVER_URI = LDAP_CONFIG.LDAP_SERVER_URI +# AUTH_LDAP_BIND_PASSWORD = LDAP_CONFIG.LDAP_BIND_PASSWORD +# AUTH_LDAP_USER_ATTR_MAP = LDAP_CONFIG.LDAP_USER_ATTR_MAP +# AUTH_LDAP_USER_SEARCH = LDAP_CONFIG.AUTH_LDAP_USER_SEARCH - AUTHENTICATION_BACKENDS = LDAP_CONFIG.AUTHENTICATION_BACKENDS +# AUTHENTICATION_BACKENDS = LDAP_CONFIG.AUTHENTICATION_BACKENDS ################################################################################ ### Staticfile and Template Settings @@ -156,22 +171,24 @@ if LDAP_CONFIG.LDAP_ENABLED: STATIC_URL = '/static/' TEMPLATES_DIR_NAME = 'templates' STATICFILES_DIRS = [ - *([str(CONFIG.CUSTOM_TEMPLATES_DIR / 'static')] if CONFIG.CUSTOM_TEMPLATES_DIR else []), - *[ - str(plugin_dir / 'static') - for plugin_dir in PLUGIN_DIRS.values() - if (plugin_dir / 'static').is_dir() - ], + *([str(CONSTANTS.CUSTOM_TEMPLATES_DIR / 'static')] if CONSTANTS.CUSTOM_TEMPLATES_DIR.is_dir() else []), + # *[ + # str(plugin_dir / 'static') + # for plugin_dir in PLUGIN_DIRS.values() + # if (plugin_dir / 'static').is_dir() + # ], + *abx.get_plugins_STATICFILES_DIRS(), str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'static'), ] TEMPLATE_DIRS = [ - *([str(CONFIG.CUSTOM_TEMPLATES_DIR)] if CONFIG.CUSTOM_TEMPLATES_DIR else []), - *[ - str(plugin_dir / 'templates') - for plugin_dir in PLUGIN_DIRS.values() - if (plugin_dir / 'templates').is_dir() - ], + *([str(CONSTANTS.CUSTOM_TEMPLATES_DIR)] if CONSTANTS.CUSTOM_TEMPLATES_DIR.is_dir() else []), + # *[ + # str(plugin_dir / 'templates') + # for plugin_dir in PLUGIN_DIRS.values() + # if (plugin_dir / 'templates').is_dir() + # ], + *abx.get_plugins_TEMPLATE_DIRS(), str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'core'), str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'admin'), str(PACKAGE_DIR / TEMPLATES_DIR_NAME), @@ -198,8 +215,6 @@ TEMPLATES = [ ### External Service Settings ################################################################################ -from ..plugins_sys.config.constants import CONSTANTS - # CACHE_DB_FILENAME = 'cache.sqlite3' # CACHE_DB_PATH = CONSTANTS.CACHE_DIR / CACHE_DB_FILENAME # CACHE_DB_TABLE = 'django_cache' @@ -210,7 +225,7 @@ DATABASE_NAME = os.environ.get("ARCHIVEBOX_DATABASE_NAME", str(CONSTANTS.DATABAS QUEUE_DATABASE_NAME = DATABASE_NAME.replace('index.sqlite3', 'queue.sqlite3') SQLITE_CONNECTION_OPTIONS = { - "TIME_ZONE": CONFIG.TIMEZONE, + "TIME_ZONE": CONSTANTS.TIMEZONE, "OPTIONS": { # https://gcollazo.com/optimal-sqlite-settings-for-django/ # # https://litestream.io/tips/#busy-timeout @@ -280,6 +295,7 @@ DJANGO_HUEY = { "queues": { HUEY["name"]: HUEY.copy(), # more registered here at plugin import-time by BaseQueue.register() + **abx.get_plugins_DJANGO_HUEY_QUEUES(), }, } @@ -411,7 +427,7 @@ USE_I18N = True USE_TZ = True DATETIME_FORMAT = 'Y-m-d h:i:s A' SHORT_DATETIME_FORMAT = 'Y-m-d h:i:s A' -TIME_ZONE = CONFIG.TIMEZONE # django convention is TIME_ZONE, archivebox config uses TIMEZONE, they are equivalent +TIME_ZONE = CONSTANTS.TIMEZONE # django convention is TIME_ZONE, archivebox config uses TIMEZONE, they are equivalent from django.conf.locale.en import formats as en_formats # type: ignore @@ -504,6 +520,7 @@ ADMIN_DATA_VIEWS = { "name": "log", }, }, + *abx.get_plugins_ADMIN_DATA_VIEWS_URLS(), ], } @@ -595,3 +612,7 @@ DEBUG_LOGFIRE = DEBUG_LOGFIRE and (DATA_DIR / '.logfire').is_dir() # INSTALLED_APPS += ['jet_django'] # JET_PROJECT = 'archivebox' # JET_TOKEN = 'some-api-token-here' + + +abx.register_plugins_settings(globals()) + diff --git a/archivebox/core/views.py b/archivebox/core/views.py index 45bf1882..2de18c8d 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -533,6 +533,8 @@ def key_is_safe(key: str) -> bool: @render_with_table_view def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext: + CONFIG = settings.FLAT_CONFIG + assert request.user.is_superuser, 'Must be a superuser to view configuration settings.' rows = { diff --git a/archivebox/plugantic/base_check.py b/archivebox/plugantic/base_check.py index 3f3deda4..029113b6 100644 --- a/archivebox/plugantic/base_check.py +++ b/archivebox/plugantic/base_check.py @@ -1,11 +1,12 @@ __package__ = "archivebox.plugantic" +import abx from typing import List from django.core.checks import Warning, Tags, register from .base_hook import BaseHook, HookType -from ..config_stubs import AttrDict + class BaseCheck(BaseHook): hook_type: HookType = "CHECK" @@ -28,21 +29,18 @@ class BaseCheck(BaseHook): def register(self, settings, parent_plugin=None): # self._plugin = parent_plugin # backref to parent is for debugging only, never rely on this! - self.register_with_django_check_system(settings) # (SIDE EFFECT) + abx.pm.hook.register_django_check(check=self, settings=settings) - # install hook into settings.CHECKS - settings.CHECKS = getattr(settings, "CHECKS", None) or AttrDict({}) - settings.CHECKS[self.id] = self - # record installed hook in settings.HOOKS - super().register(settings, parent_plugin=parent_plugin) - def register_with_django_check_system(self, settings): - def run_check(app_configs, **kwargs) -> List[Warning]: - import logging - return self.check(settings, logging.getLogger("checks")) +@abx.hookspec +@abx.hookimpl +def register_django_check(check: BaseCheck, settings): + def run_check(app_configs, **kwargs) -> List[Warning]: + import logging + return check.check(settings, logging.getLogger("checks")) - run_check.__name__ = self.id - run_check.tags = [self.tag] - register(self.tag)(run_check) + run_check.__name__ = check.id + run_check.tags = [check.tag] + register(check.tag)(run_check) diff --git a/archivebox/plugantic/base_configset.py b/archivebox/plugantic/base_configset.py index 11ca16ef..972173dc 100644 --- a/archivebox/plugantic/base_configset.py +++ b/archivebox/plugantic/base_configset.py @@ -227,26 +227,34 @@ class ArchiveBoxBaseConfig(BaseSettings): print(f' {key}={original_value} -> {value}') self.__init__() return self + + def as_legacy_config_schema(self): + # shim for backwards compatibility with old config schema style + model_values = self.model_dump() + return benedict({ + key: {'type': field.annotation, 'default': model_values[key]} + for key, field in self.model_fields.items() + }) class BaseConfigSet(ArchiveBoxBaseConfig, BaseHook): # type: ignore[type-arg] hook_type: ClassVar[HookType] = 'CONFIG' section: ClassVar[ConfigSectionName] = 'GENERAL_CONFIG' - def register(self, settings, parent_plugin=None): - # self._plugin = parent_plugin # for debugging only, never rely on this! + # def register(self, settings, parent_plugin=None): + # # self._plugin = parent_plugin # for debugging only, never rely on this! - # settings.FLAT_CONFIG = benedict(getattr(settings, "FLAT_CONFIG", settings.CONFIG)) - # # pass FLAT_CONFIG so far into our config model to load it - # loaded_config = self.__class__(**settings.FLAT_CONFIG) - # # then dump our parsed config back into FLAT_CONFIG for the next plugin to use - # settings.FLAT_CONFIG.merge(loaded_config.model_dump(include=set(self.model_fields.keys()))) + # settings.FLAT_CONFIG = benedict(getattr(settings, "FLAT_CONFIG", {})) + # # pass FLAT_CONFIG so far into our config model to load it + # loaded_config = self + # # then dump our parsed config back into FLAT_CONFIG for the next plugin to use + # settings.FLAT_CONFIG.merge(loaded_config.model_dump(include=set(self.model_fields.keys()))) - settings.CONFIGS = getattr(settings, "CONFIGS", None) or benedict({}) - settings.CONFIGS[self.id] = self - self._original_id = id(self) + # settings.REGISTERED_CONFIGS = getattr(settings, "REGISTERED_CONFIGS", None) or benedict({}) + # settings.REGISTERED_CONFIGS[self.id] = self + # self._original_id = id(self) - super().register(settings, parent_plugin=parent_plugin) + # super().register(settings, parent_plugin=parent_plugin) # def ready(self, settings): # # reload config from environment, in case it's been changed by any other plugins diff --git a/archivebox/plugantic/base_hook.py b/archivebox/plugantic/base_hook.py index a847ca1c..adc5cad7 100644 --- a/archivebox/plugantic/base_hook.py +++ b/archivebox/plugantic/base_hook.py @@ -96,32 +96,32 @@ class BaseHook(BaseModel): # e.g. /admin/environment/config/LdapConfig/ return f"/admin/environment/{self.hook_type.lower()}/{self.id}/" - def register(self, settings, parent_plugin=None): - """Load a record of an installed hook into global Django settings.HOOKS at runtime.""" - self._plugin = parent_plugin # for debugging only, never rely on this! + # def register(self, settings, parent_plugin=None): + # """Load a record of an installed hook into global Django settings.HOOKS at runtime.""" + # self._plugin = parent_plugin # for debugging only, never rely on this! - # assert json.dumps(self.model_json_schema(), indent=4), f"Hook {self.hook_module} has invalid JSON schema." + # # assert json.dumps(self.model_json_schema(), indent=4), f"Hook {self.hook_module} has invalid JSON schema." - # print(' -', self.hook_module, '.register()') + # # print(' -', self.hook_module, '.register()') - # record installed hook in settings.HOOKS - settings.HOOKS[self.id] = self + # # record installed hook in settings.HOOKS + # settings.REGISTERED_HOOKS[self.id] = self - if settings.HOOKS[self.id]._is_registered: - raise Exception(f"Tried to run {self.hook_module}.register() but its already been called!") + # if settings.REGISTERED_HOOKS[self.id]._is_registered: + # raise Exception(f"Tried to run {self.hook_module}.register() but its already been called!") - settings.HOOKS[self.id]._is_registered = True + # settings.REGISTERED_HOOKS[self.id]._is_registered = True - # print("REGISTERED HOOK:", self.hook_module) + # # print("REGISTERED HOOK:", self.hook_module) - def ready(self, settings): - """Runs any runtime code needed when AppConfig.ready() is called (after all models are imported).""" + # def ready(self, settings): + # """Runs any runtime code needed when AppConfig.ready() is called (after all models are imported).""" - # print(' -', self.hook_module, '.ready()') + # # print(' -', self.hook_module, '.ready()') - assert self.id in settings.HOOKS, f"Tried to ready hook {self.hook_module} but it is not registered in settings.HOOKS." + # assert self.id in settings.REGISTERED_HOOKS, f"Tried to ready hook {self.hook_module} but it is not registered in settings.REGISTERED_HOOKS." - if settings.HOOKS[self.id]._is_ready: - raise Exception(f"Tried to run {self.hook_module}.ready() but its already been called!") + # if settings.REGISTERED_HOOKS[self.id]._is_ready: + # raise Exception(f"Tried to run {self.hook_module}.ready() but its already been called!") - settings.HOOKS[self.id]._is_ready = True + # settings.REGISTERED_HOOKS[self.id]._is_ready = True diff --git a/archivebox/plugantic/base_plugin.py b/archivebox/plugantic/base_plugin.py index 24683fab..2071a648 100644 --- a/archivebox/plugantic/base_plugin.py +++ b/archivebox/plugantic/base_plugin.py @@ -1,5 +1,6 @@ __package__ = 'archivebox.plugantic' +import abx import inspect from pathlib import Path @@ -21,9 +22,6 @@ from benedict import benedict from .base_hook import BaseHook, HookType -from ..config import bump_startup_progress_bar - - class BasePlugin(BaseModel): model_config = ConfigDict( extra='forbid', @@ -107,9 +105,10 @@ class BasePlugin(BaseModel): default_auto_field = 'django.db.models.AutoField' - def ready(self): - from django.conf import settings - plugin_self.ready(settings) + # handled by abx.hookimpl ready() + # def ready(self): + # from django.conf import settings + # plugin_self.ready(settings) return PluginAppConfig @@ -125,64 +124,60 @@ class BasePlugin(BaseModel): hooks[hook.hook_type][hook.id] = hook return hooks - def register(self, settings=None): + def register(self, settings): """Loads this plugin's configs, binaries, extractors, and replayers into global Django settings at import time (before models are imported or any AppConfig.ready() are called).""" - if settings is None: - from django.conf import settings as django_settings - settings = django_settings - - # print() - # print(self.plugin_module_full, '.register()') + from ..config import bump_startup_progress_bar - # assert json.dumps(self.model_json_schema(), indent=4), f'Plugin {self.plugin_module} has invalid JSON schema.' + # assert settings.PLUGINS[self.id] == self + # # assert self.id not in settings.PLUGINS, f'Tried to register plugin {self.plugin_module} but it conflicts with existing plugin of the same name ({self.app_label}).' - assert self.id not in settings.PLUGINS, f'Tried to register plugin {self.plugin_module} but it conflicts with existing plugin of the same name ({self.app_label}).' + # ### Mutate django.conf.settings... values in-place to include plugin-provided overrides - ### Mutate django.conf.settings... values in-place to include plugin-provided overrides - settings.PLUGINS[self.id] = self + # if settings.PLUGINS[self.id]._is_registered: + # raise Exception(f"Tried to run {self.plugin_module}.register() but its already been called!") - if settings.PLUGINS[self.id]._is_registered: - raise Exception(f"Tried to run {self.plugin_module}.register() but its already been called!") + # for hook in self.hooks: + # hook.register(settings, parent_plugin=self) - for hook in self.hooks: - hook.register(settings, parent_plugin=self) - - settings.PLUGINS[self.id]._is_registered = True - # print('√ REGISTERED PLUGIN:', self.plugin_module) + # settings.PLUGINS[self.id]._is_registered = True + # # print('√ REGISTERED PLUGIN:', self.plugin_module) bump_startup_progress_bar() def ready(self, settings=None): """Runs any runtime code needed when AppConfig.ready() is called (after all models are imported).""" - if settings is None: - from django.conf import settings as django_settings - settings = django_settings + from ..config import bump_startup_progress_bar - # print() - # print(self.plugin_module_full, '.ready()') - assert ( - self.id in settings.PLUGINS and settings.PLUGINS[self.id]._is_registered - ), f"Tried to run plugin.ready() for {self.plugin_module} but plugin is not yet registered in settings.PLUGINS." + # if settings is None: + # from django.conf import settings as django_settings + # settings = django_settings - if settings.PLUGINS[self.id]._is_ready: - raise Exception(f"Tried to run {self.plugin_module}.ready() but its already been called!") + # # print() + # # print(self.plugin_module_full, '.ready()') - for hook in self.hooks: - hook.ready(settings) + # assert ( + # self.id in settings.PLUGINS and settings.PLUGINS[self.id]._is_registered + # ), f"Tried to run plugin.ready() for {self.plugin_module} but plugin is not yet registered in settings.PLUGINS." + + # if settings.PLUGINS[self.id]._is_ready: + # raise Exception(f"Tried to run {self.plugin_module}.ready() but its already been called!") + + # for hook in self.hooks: + # hook.ready(settings) - settings.PLUGINS[self.id]._is_ready = True + # settings.PLUGINS[self.id]._is_ready = True bump_startup_progress_bar() - # @validate_call - # def install_binaries(self) -> Self: - # new_binaries = [] - # for idx, binary in enumerate(self.binaries): - # new_binaries.append(binary.install() or binary) - # return self.model_copy(update={ - # 'binaries': new_binaries, - # }) + @validate_call + def install_binaries(self) -> Self: + new_binaries = [] + for idx, binary in enumerate(self.binaries): + new_binaries.append(binary.install() or binary) + return self.model_copy(update={ + 'binaries': new_binaries, + }) @validate_call def load_binaries(self, cache=True) -> Self: diff --git a/archivebox/plugins_auth/ldap/apps.py b/archivebox/plugins_auth/ldap/apps.py index f7fd2ef7..fab177d3 100644 --- a/archivebox/plugins_auth/ldap/apps.py +++ b/archivebox/plugins_auth/ldap/apps.py @@ -51,5 +51,5 @@ class LdapAuthPlugin(BasePlugin): PLUGIN = LdapAuthPlugin() -PLUGIN.register(settings) +# PLUGIN.register(settings) DJANGO_APP = PLUGIN.AppConfig diff --git a/archivebox/plugins_extractor/chrome/apps.py b/archivebox/plugins_extractor/chrome/apps.py index fa295c37..f1cf1b92 100644 --- a/archivebox/plugins_extractor/chrome/apps.py +++ b/archivebox/plugins_extractor/chrome/apps.py @@ -259,5 +259,5 @@ class ChromePlugin(BasePlugin): PLUGIN = ChromePlugin() -PLUGIN.register(settings) +# PLUGIN.register(settings) DJANGO_APP = PLUGIN.AppConfig diff --git a/archivebox/plugins_extractor/readability/apps.py b/archivebox/plugins_extractor/readability/apps.py index 20c2939f..b695ee52 100644 --- a/archivebox/plugins_extractor/readability/apps.py +++ b/archivebox/plugins_extractor/readability/apps.py @@ -99,5 +99,5 @@ class ReadabilityPlugin(BasePlugin): PLUGIN = ReadabilityPlugin() -PLUGIN.register(settings) +# PLUGIN.register(settings) DJANGO_APP = PLUGIN.AppConfig diff --git a/archivebox/plugins_extractor/singlefile/apps.py b/archivebox/plugins_extractor/singlefile/apps.py index b7741213..403a5220 100644 --- a/archivebox/plugins_extractor/singlefile/apps.py +++ b/archivebox/plugins_extractor/singlefile/apps.py @@ -134,5 +134,5 @@ class SinglefilePlugin(BasePlugin): PLUGIN = SinglefilePlugin() -PLUGIN.register(settings) +# PLUGIN.register(settings) DJANGO_APP = PLUGIN.AppConfig diff --git a/archivebox/plugins_extractor/ytdlp/apps.py b/archivebox/plugins_extractor/ytdlp/apps.py index 4385f41f..335b4e1a 100644 --- a/archivebox/plugins_extractor/ytdlp/apps.py +++ b/archivebox/plugins_extractor/ytdlp/apps.py @@ -97,5 +97,5 @@ class YtdlpPlugin(BasePlugin): PLUGIN = YtdlpPlugin() -PLUGIN.register(settings) +# PLUGIN.register(settings) DJANGO_APP = PLUGIN.AppConfig diff --git a/archivebox/plugins_pkg/npm/apps.py b/archivebox/plugins_pkg/npm/apps.py index ea2db87e..fce0dad1 100644 --- a/archivebox/plugins_pkg/npm/apps.py +++ b/archivebox/plugins_pkg/npm/apps.py @@ -90,5 +90,5 @@ class NpmPlugin(BasePlugin): PLUGIN = NpmPlugin() -PLUGIN.register(settings) +# PLUGIN.register(settings) DJANGO_APP = PLUGIN.AppConfig diff --git a/archivebox/plugins_pkg/pip/apps.py b/archivebox/plugins_pkg/pip/apps.py index 45be3374..c283d2f4 100644 --- a/archivebox/plugins_pkg/pip/apps.py +++ b/archivebox/plugins_pkg/pip/apps.py @@ -8,10 +8,11 @@ from pathlib import Path from typing import List, Dict, Optional, ClassVar from pydantic import InstanceOf, Field, model_validator +import abx + import django from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type] from django.core.checks import Error, Tags -from django.conf import settings from pydantic_pkgr import BinProvider, PipProvider, BinName, BinProviderName, ProviderLookupDict, SemVer from plugantic.base_plugin import BasePlugin @@ -240,5 +241,11 @@ class PipPlugin(BasePlugin): ] PLUGIN = PipPlugin() -PLUGIN.register(settings) +# PLUGIN.register(settings) DJANGO_APP = PLUGIN.AppConfig + + +@abx.hookimpl +def register_django_checks(settings): + USER_IS_NOT_ROOT_CHECK.register_with_django_check_system(settings) + PIP_ENVIRONMENT_CHECK.register_with_django_check_system(settings) diff --git a/archivebox/plugins_pkg/playwright/apps.py b/archivebox/plugins_pkg/playwright/apps.py index dabb8ec8..9198eca5 100644 --- a/archivebox/plugins_pkg/playwright/apps.py +++ b/archivebox/plugins_pkg/playwright/apps.py @@ -72,7 +72,7 @@ class PlaywrightBinProvider(BaseBinProvider): if OPERATING_SYSTEM == "darwin" else Path("~/.cache/ms-playwright").expanduser() # linux playwright cache dir ) - puppeteer_install_args: List[str] = ["install"] # --with-deps + puppeteer_install_args: List[str] = ["install"] # --with-deps packages_handler: ProviderLookupDict = Field(default={ "chrome": lambda: ["chromium"], @@ -177,5 +177,5 @@ class PlaywrightPlugin(BasePlugin): PLUGIN = PlaywrightPlugin() -PLUGIN.register(settings) +# PLUGIN.register(settings) DJANGO_APP = PLUGIN.AppConfig diff --git a/archivebox/plugins_pkg/puppeteer/apps.py b/archivebox/plugins_pkg/puppeteer/apps.py index 2677ac06..0efd1e8c 100644 --- a/archivebox/plugins_pkg/puppeteer/apps.py +++ b/archivebox/plugins_pkg/puppeteer/apps.py @@ -165,5 +165,5 @@ class PuppeteerPlugin(BasePlugin): PLUGIN = PuppeteerPlugin() -PLUGIN.register(settings) +# PLUGIN.register(settings) DJANGO_APP = PLUGIN.AppConfig diff --git a/archivebox/plugins_search/ripgrep/apps.py b/archivebox/plugins_search/ripgrep/apps.py index e2671141..780c96a1 100644 --- a/archivebox/plugins_search/ripgrep/apps.py +++ b/archivebox/plugins_search/ripgrep/apps.py @@ -113,5 +113,5 @@ class RipgrepSearchPlugin(BasePlugin): PLUGIN = RipgrepSearchPlugin() -PLUGIN.register(settings) +# PLUGIN.register(settings) DJANGO_APP = PLUGIN.AppConfig diff --git a/archivebox/plugins_search/sonic/apps.py b/archivebox/plugins_search/sonic/apps.py index 1c8077ab..fd630fdb 100644 --- a/archivebox/plugins_search/sonic/apps.py +++ b/archivebox/plugins_search/sonic/apps.py @@ -133,5 +133,5 @@ class SonicSearchPlugin(BasePlugin): PLUGIN = SonicSearchPlugin() -PLUGIN.register(settings) +# PLUGIN.register(settings) DJANGO_APP = PLUGIN.AppConfig diff --git a/archivebox/plugins_search/sqlite/apps.py b/archivebox/plugins_search/sqlite/apps.py index c773843d..75e9309e 100644 --- a/archivebox/plugins_search/sqlite/apps.py +++ b/archivebox/plugins_search/sqlite/apps.py @@ -255,5 +255,5 @@ class SqliteftsSearchPlugin(BasePlugin): PLUGIN = SqliteftsSearchPlugin() -PLUGIN.register(settings) +# PLUGIN.register(settings) DJANGO_APP = PLUGIN.AppConfig diff --git a/archivebox/plugins_sys/config/apps.py b/archivebox/plugins_sys/config/apps.py index 4a1f1228..67607809 100644 --- a/archivebox/plugins_sys/config/apps.py +++ b/archivebox/plugins_sys/config/apps.py @@ -245,5 +245,17 @@ class ConfigPlugin(BasePlugin): PLUGIN = ConfigPlugin() -PLUGIN.register(settings) DJANGO_APP = PLUGIN.AppConfig + + + +# register django apps +@archivebox.plugin.hookimpl +def get_INSTALLED_APPS(): + return [DJANGO_APP.name] + +# register configs +@archivebox.plugin.hookimpl +def register_CONFIG(): + return PLUGIN.HOOKS_BY_TYPE['CONFIG'].values() +