migrate plugin loading process to new pluggy-powered system based on djp

This commit is contained in:
Nick Sweeting 2024-09-26 02:43:12 -07:00
parent efd341d8ad
commit 8ed3155ec5
No known key found for this signature in database
28 changed files with 690 additions and 321 deletions

271
archivebox/abx/__init__.py Normal file
View file

@ -0,0 +1,271 @@
import itertools
import importlib
from pathlib import Path
from typing import Dict
from benedict import benedict
import pluggy
import archivebox
from . import hookspec as base_spec
from .hookspec import hookimpl, hookspec # noqa
pm = pluggy.PluginManager("abx")
pm.add_hookspecs(base_spec)
def register_hookspecs(hookspecs):
for hookspec_import_path in hookspecs:
hookspec_module = importlib.import_module(hookspec_import_path)
pm.add_hookspecs(hookspec_module)
def find_plugins_in_dir(plugins_dir: Path, prefix: str) -> Dict[str, Path]:
return {
f"{prefix}.{plugin_entrypoint.parent.name}": plugin_entrypoint.parent
for plugin_entrypoint in sorted(plugins_dir.glob("*/apps.py")) # key=get_plugin_order # Someday enforcing plugin import order may be required, but right now it's not needed
} # "plugins_pkg.pip": "/app/archivebox/plugins_pkg/pip"
def get_pip_installed_plugins(group='abx'):
"""replaces pm.load_setuptools_entrypoints("abx")"""
import importlib.metadata
DETECTED_PLUGINS = {} # module_name: module_dir_path
for dist in list(importlib.metadata.distributions()):
for entrypoint in dist.entry_points:
if entrypoint.group != group or pm.is_blocked(entrypoint.name):
continue
DETECTED_PLUGINS[entrypoint.name] = Path(entrypoint.load().__file__).parent
# pm.register(plugin, name=ep.name)
# pm._plugin_distinfo.append((plugin, DistFacade(dist)))
return DETECTED_PLUGINS
def get_plugins_in_dirs(plugin_dirs: Dict[str, Path]):
DETECTED_PLUGINS = {}
for plugin_prefix, plugin_dir in plugin_dirs.items():
DETECTED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix))
return DETECTED_PLUGINS
def get_builtin_plugins():
PLUGIN_DIRS = {
'plugins_sys': archivebox.PACKAGE_DIR / 'plugins_sys',
'plugins_pkg': archivebox.PACKAGE_DIR / 'plugins_pkg',
'plugins_auth': archivebox.PACKAGE_DIR / 'plugins_auth',
'plugins_search': archivebox.PACKAGE_DIR / 'plugins_search',
'plugins_extractor': archivebox.PACKAGE_DIR / 'plugins_extractor',
}
DETECTED_PLUGINS = {}
for plugin_prefix, plugin_dir in PLUGIN_DIRS.items():
DETECTED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix))
return DETECTED_PLUGINS
def get_user_plugins():
return find_plugins_in_dir(archivebox.DATA_DIR / 'user_plugins', prefix='user_plugins')
# BUILTIN_PLUGINS = get_builtin_plugins()
# PIP_PLUGINS = get_pip_installed_plugins()
# USER_PLUGINS = get_user_plugins()
# ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS}
# Load all plugins from pip packages, archivebox built-ins, and user plugins
def load_plugins(plugins_dict: Dict[str, Path]):
LOADED_PLUGINS = {}
for plugin_module, plugin_dir in plugins_dict.items():
# print(f'Loading plugin: {plugin_module} from {plugin_dir}')
plugin_module_loaded = importlib.import_module(plugin_module + '.apps')
pm.register(plugin_module_loaded)
LOADED_PLUGINS[plugin_module] = plugin_module_loaded.PLUGIN
# print(f' √ Loaded plugin: {plugin_module}')
return LOADED_PLUGINS
def get_registered_plugins():
plugins = {}
plugin_to_distinfo = dict(pm.list_plugin_distinfo())
for plugin in pm.get_plugins():
plugin_info = {
"name": plugin.__name__,
"hooks": [h.name for h in pm.get_hookcallers(plugin) or ()],
}
distinfo = plugin_to_distinfo.get(plugin)
if distinfo:
plugin_info["version"] = distinfo.version
plugin_info["name"] = (
getattr(distinfo, "name", None) or distinfo.project_name
)
plugins[plugin_info["name"]] = plugin_info
return plugins
def get_plugins_INSTALLLED_APPS():
return itertools.chain(*pm.hook.get_INSTALLED_APPS())
def register_plugins_INSTALLLED_APPS(INSTALLED_APPS):
pm.hook.register_INSTALLED_APPS(INSTALLED_APPS=INSTALLED_APPS)
def get_plugins_MIDDLEWARE():
return itertools.chain(*pm.hook.get_MIDDLEWARE())
def register_plugins_MIDDLEWARE(MIDDLEWARE):
pm.hook.register_MIDDLEWARE(MIDDLEWARE=MIDDLEWARE)
def get_plugins_AUTHENTICATION_BACKENDS():
return itertools.chain(*pm.hook.get_AUTHENTICATION_BACKENDS())
def register_plugins_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
pm.hook.register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS=AUTHENTICATION_BACKENDS)
def get_plugins_STATICFILES_DIRS():
return itertools.chain(*pm.hook.get_STATICFILES_DIRS())
def register_plugins_STATICFILES_DIRS(STATICFILES_DIRS):
pm.hook.register_STATICFILES_DIRS(STATICFILES_DIRS=STATICFILES_DIRS)
def get_plugins_TEMPLATE_DIRS():
return itertools.chain(*pm.hook.get_TEMPLATE_DIRS())
def register_plugins_TEMPLATE_DIRS(TEMPLATE_DIRS):
pm.hook.register_TEMPLATE_DIRS(TEMPLATE_DIRS=TEMPLATE_DIRS)
def get_plugins_DJANGO_HUEY_QUEUES():
HUEY_QUEUES = {}
for plugin_result in pm.hook.get_DJANGO_HUEY_QUEUES():
HUEY_QUEUES.update(plugin_result)
return HUEY_QUEUES
def register_plugins_DJANGO_HUEY(DJANGO_HUEY):
pm.hook.register_DJANGO_HUEY(DJANGO_HUEY=DJANGO_HUEY)
def get_plugins_ADMIN_DATA_VIEWS_URLS():
return itertools.chain(*pm.hook.get_ADMIN_DATA_VIEWS_URLS())
def register_plugins_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
pm.hook.register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS=ADMIN_DATA_VIEWS)
def register_plugins_settings(settings):
# convert settings dict to an benedict so we can set values using settings.attr = xyz notation
settings_as_obj = benedict(settings, keypath_separator=None)
# set default values for settings that are used by plugins
settings_as_obj.INSTALLED_APPS = settings_as_obj.get('INSTALLED_APPS', [])
settings_as_obj.MIDDLEWARE = settings_as_obj.get('MIDDLEWARE', [])
settings_as_obj.AUTHENTICATION_BACKENDS = settings_as_obj.get('AUTHENTICATION_BACKENDS', [])
settings_as_obj.STATICFILES_DIRS = settings_as_obj.get('STATICFILES_DIRS', [])
settings_as_obj.TEMPLATE_DIRS = settings_as_obj.get('TEMPLATE_DIRS', [])
settings_as_obj.DJANGO_HUEY = settings_as_obj.get('DJANGO_HUEY', {'queues': {}})
settings_as_obj.ADMIN_DATA_VIEWS = settings_as_obj.get('ADMIN_DATA_VIEWS', {'URLS': []})
# call all the hook functions to mutate the settings values in-place
register_plugins_INSTALLLED_APPS(settings_as_obj.INSTALLED_APPS)
register_plugins_MIDDLEWARE(settings_as_obj.MIDDLEWARE)
register_plugins_AUTHENTICATION_BACKENDS(settings_as_obj.AUTHENTICATION_BACKENDS)
register_plugins_STATICFILES_DIRS(settings_as_obj.STATICFILES_DIRS)
register_plugins_TEMPLATE_DIRS(settings_as_obj.TEMPLATE_DIRS)
register_plugins_DJANGO_HUEY(settings_as_obj.DJANGO_HUEY)
register_plugins_ADMIN_DATA_VIEWS(settings_as_obj.ADMIN_DATA_VIEWS)
# calls Plugin.settings(settings) on each registered plugin
pm.hook.register_settings(settings=settings_as_obj)
# then finally update the settings globals() object will all the new settings
settings.update(settings_as_obj)
def get_plugins_urlpatterns():
return list(itertools.chain(*pm.hook.urlpatterns()))
def register_plugins_urlpatterns(urlpatterns):
pm.hook.register_urlpatterns(urlpatterns=urlpatterns)
# PLUGANTIC HOOKS
def get_plugins_PLUGINS():
return benedict({
plugin.PLUGIN.id: plugin.PLUGIN
for plugin in pm.get_plugins()
})
def get_plugins_HOOKS(PLUGINS):
return benedict({
hook.id: hook
for plugin in PLUGINS.values()
for hook in plugin.hooks
})
def get_plugins_CONFIGS():
return benedict({
config.id: config
for plugin_configs in pm.hook.get_CONFIGS()
for config in plugin_configs
})
def get_plugins_FLAT_CONFIG(CONFIGS):
FLAT_CONFIG = {}
for config in CONFIGS.values():
FLAT_CONFIG.update(config.model_dump())
return benedict(FLAT_CONFIG)
def get_plugins_BINPROVIDERS():
return benedict({
binprovider.id: binprovider
for plugin_binproviders in pm.hook.get_BINPROVIDERS()
for binprovider in plugin_binproviders
})
def get_plugins_BINARIES():
return benedict({
binary.id: binary
for plugin_binaries in pm.hook.get_BINARIES()
for binary in plugin_binaries
})
def get_plugins_EXTRACTORS():
return benedict({
extractor.id: extractor
for plugin_extractors in pm.hook.get_EXTRACTORS()
for extractor in plugin_extractors
})
def get_plugins_REPLAYERS():
return benedict({
replayer.id: replayer
for plugin_replayers in pm.hook.get_REPLAYERS()
for replayer in plugin_replayers
})
def get_plugins_CHECKS():
return benedict({
check.id: check
for plugin_checks in pm.hook.get_CHECKS()
for check in plugin_checks
})
def get_plugins_ADMINDATAVIEWS():
return benedict({
admin_dataview.id: admin_dataview
for plugin_admin_dataviews in pm.hook.get_ADMINDATAVIEWS()
for admin_dataview in plugin_admin_dataviews
})
def get_plugins_QUEUES():
return benedict({
queue.id: queue
for plugin_queues in pm.hook.get_QUEUES()
for queue in plugin_queues
})
def get_plugins_SEARCHBACKENDS():
return benedict({
searchbackend.id: searchbackend
for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS()
for searchbackend in plugin_searchbackends
})

12
archivebox/abx/apps.py Normal file
View file

@ -0,0 +1,12 @@
from django.apps import AppConfig
class ABXConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'abx'
def ready(self):
import abx
from django.conf import settings
abx.pm.hook.ready(settings=settings)

View file

@ -0,0 +1,12 @@
from pathlib import Path
from pluggy import HookimplMarker
from pluggy import HookspecMarker
hookspec = HookspecMarker("abx")
hookimpl = HookimplMarker("abx")
@hookspec
def get_system_user() -> str:
return Path('~').expanduser().name

View file

@ -0,0 +1,30 @@
from .hookspec import hookspec
@hookspec
def get_CONFIGS():
return {}
@hookspec
def get_EXTRACTORS():
return {}
@hookspec
def get_REPLAYERS():
return {}
@hookspec
def get_CHECKS():
return {}
@hookspec
def get_ADMINDATAVIEWS():
return {}
@hookspec
def get_QUEUES():
return {}
@hookspec
def get_SEARCHBACKENDS():
return {}

View file

@ -0,0 +1,6 @@
from .hookspec import hookspec
@hookspec
def ready(settings):
"""Called when the Django app.ready() is triggered"""
pass

View file

@ -0,0 +1,90 @@
from .hookspec import hookspec
###########################################################################################
@hookspec
def get_INSTALLED_APPS():
"""Return a list of apps to add to INSTALLED_APPS"""
# e.g. ['your_plugin_type.plugin_name']
return []
@hookspec
def register_INSTALLED_APPS(INSTALLED_APPS):
"""Mutate INSTALLED_APPS in place to add your app in a specific position"""
# idx_of_contrib = INSTALLED_APPS.index('django.contrib.auth')
# INSTALLED_APPS.insert(idx_of_contrib + 1, 'your_plugin_type.plugin_name')
pass
@hookspec
def get_TEMPLATE_DIRS():
return [] # e.g. ['your_plugin_type/plugin_name/templates']
@hookspec
def register_TEMPLATE_DIRS(TEMPLATE_DIRS):
"""Install django settings"""
# e.g. TEMPLATE_DIRS.insert(0, 'your_plugin_type/plugin_name/templates')
pass
@hookspec
def get_STATICFILES_DIRS():
return [] # e.g. ['your_plugin_type/plugin_name/static']
@hookspec
def register_STATICFILES_DIRS(STATICFILES_DIRS):
"""Mutate STATICFILES_DIRS in place to add your static dirs in a specific position"""
# e.g. STATICFILES_DIRS.insert(0, 'your_plugin_type/plugin_name/static')
pass
@hookspec
def get_MIDDLEWARE():
return [] # e.g. ['your_plugin_type.plugin_name.middleware.YourMiddleware']
@hookspec
def register_MIDDLEWARE(MIDDLEWARE):
"""Mutate MIDDLEWARE in place to add your middleware in a specific position"""
# e.g. MIDDLEWARE.insert(0, 'your_plugin_type.plugin_name.middleware.YourMiddleware')
pass
@hookspec
def get_AUTHENTICATION_BACKENDS():
return [] # e.g. ['django_auth_ldap.backend.LDAPBackend']
@hookspec
def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
"""Mutate AUTHENTICATION_BACKENDS in place to add your auth backends in a specific position"""
# e.g. AUTHENTICATION_BACKENDS.insert(0, 'your_plugin_type.plugin_name.backend.YourBackend')
pass
@hookspec
def get_DJANGO_HUEY_QUEUES():
return [] # e.g. [{'name': 'your_plugin_type.plugin_name', 'HUEY': {...}}]
@hookspec
def register_DJANGO_HUEY(DJANGO_HUEY):
"""Mutate DJANGO_HUEY in place to add your huey queues in a specific position"""
# e.g. DJANGO_HUEY['queues']['some_queue_name']['some_setting'] = 'some_value'
pass
@hookspec
def get_ADMIN_DATA_VIEWS_URLS():
return []
@hookspec
def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
"""Mutate ADMIN_DATA_VIEWS in place to add your admin data views in a specific position"""
# e.g. ADMIN_DATA_VIEWS['URLS'].insert(0, 'your_plugin_type/plugin_name/admin_data_views.py')
pass
@hookspec
def register_settings(settings):
"""Mutate settings in place to add your settings / modify existing settings"""
# settings.SOME_KEY = 'some_value'
pass

View file

@ -0,0 +1,12 @@
from .hookspec import hookspec
@hookspec
def get_urlpatterns():
return [] # e.g. [path('your_plugin_type/plugin_name/url.py', your_view)]
@hookspec
def register_urlpatterns(urlpatterns):
"""Mutate urlpatterns in place to add your urlpatterns in a specific position"""
# e.g. urlpatterns.insert(0, path('your_plugin_type/plugin_name/url.py', your_view))
pass

View file

@ -0,0 +1,13 @@
from .hookspec import hookspec
###########################################################################################
@hookspec
def get_BINPROVIDERS():
return {}
@hookspec
def get_BINARIES():
return {}

View file

@ -27,7 +27,6 @@ import re
import sys import sys
import json import json
import shutil import shutil
import archivebox
from hashlib import md5 from hashlib import md5
from pathlib import Path from pathlib import Path
@ -36,15 +35,20 @@ from typing import Optional, Type, Tuple, Dict
from subprocess import run, PIPE, DEVNULL, STDOUT, TimeoutExpired from subprocess import run, PIPE, DEVNULL, STDOUT, TimeoutExpired
from configparser import ConfigParser from configparser import ConfigParser
from pydantic_pkgr import SemVer
from rich.progress import Progress from rich.progress import Progress
from rich.console import Console from rich.console import Console
from benedict import benedict
import django import django
from django.db.backends.sqlite3.base import Database as sqlite3 from django.db.backends.sqlite3.base import Database as sqlite3
import archivebox
from archivebox.constants import CONSTANTS
from archivebox.constants import *
from pydantic_pkgr import SemVer
from .config_stubs import ( from .config_stubs import (
AttrDict,
ConfigValue, ConfigValue,
ConfigDict, ConfigDict,
ConfigDefaultValue, ConfigDefaultValue,
@ -52,85 +56,35 @@ from .config_stubs import (
) )
from .misc.logging import ( from .misc.logging import (
DEFAULT_CLI_COLORS,
ANSI,
COLOR_DICT,
stderr, stderr,
hint, # noqa hint, # noqa
) )
# print('STARTING CONFIG LOADING') from .plugins_sys.config.apps import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG
from .plugins_auth.ldap.apps import LDAP_CONFIG
# load fallback libraries from vendor dir from .plugins_extractor.favicon.apps import FAVICON_CONFIG
from .vendor import load_vendored_libs ANSI = SHELL_CONFIG.ANSI
load_vendored_libs() LDAP = LDAP_CONFIG.LDAP_ENABLED
# print("LOADED VENDOR LIBS")
############################### Config Schema ################################## ############################### Config Schema ##################################
CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = { CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'SHELL_CONFIG': { 'SHELL_CONFIG': SHELL_CONFIG.as_legacy_config_schema(),
'IS_TTY': {'type': bool, 'default': lambda _: sys.stdout.isatty()},
'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']},
'SHOW_PROGRESS': {'type': bool, 'default': lambda c: c['IS_TTY']}, # progress bars are buggy on mac, disable for now
'IN_DOCKER': {'type': bool, 'default': False},
'IN_QEMU': {'type': bool, 'default': False},
'PUID': {'type': int, 'default': os.getuid()},
'PGID': {'type': int, 'default': os.getgid()},
},
'GENERAL_CONFIG': { 'SERVER_CONFIG': SERVER_CONFIG.as_legacy_config_schema(),
'OUTPUT_DIR': {'type': str, 'default': None},
'CONFIG_FILE': {'type': str, 'default': None}, 'GENERAL_CONFIG': GENERAL_CONFIG.as_legacy_config_schema(),
'ONLY_NEW': {'type': bool, 'default': True},
'TIMEOUT': {'type': int, 'default': 60},
'MEDIA_TIMEOUT': {'type': int, 'default': 3600},
'OUTPUT_PERMISSIONS': {'type': str, 'default': '644'},
'RESTRICT_FILE_NAMES': {'type': str, 'default': 'windows'}, # TODO: move this to be a default WGET_ARGS
'URL_DENYLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$', 'aliases': ('URL_BLACKLIST',)}, # to avoid downloading code assets as their own pages 'ARCHIVING_CONFIG': ARCHIVING_CONFIG.as_legacy_config_schema(),
'URL_ALLOWLIST': {'type': str, 'default': None, 'aliases': ('URL_WHITELIST',)},
'SEARCH_BACKEND_CONFIG': SEARCH_BACKEND_CONFIG.as_legacy_config_schema(),
'ENFORCE_ATOMIC_WRITES': {'type': bool, 'default': True}, 'STORAGE_CONFIG': STORAGE_CONFIG.as_legacy_config_schema(),
'TAG_SEPARATOR_PATTERN': {'type': str, 'default': r'[,]'},
}, 'LDAP_CONFIG': LDAP_CONFIG.as_legacy_config_schema(),
'FAVICON_CONFIG': FAVICON_CONFIG.as_legacy_config_schema(),
'SERVER_CONFIG': {
'ADMIN_USERNAME': {'type': str, 'default': None},
'ADMIN_PASSWORD': {'type': str, 'default': None},
'SECRET_KEY': {'type': str, 'default': None},
'BIND_ADDR': {'type': str, 'default': lambda c: ['127.0.0.1:8000', '0.0.0.0:8000'][c['IN_DOCKER']]},
'ALLOWED_HOSTS': {'type': str, 'default': '*'}, # e.g. archivebox.example.com,archivebox2.example.com
'CSRF_TRUSTED_ORIGINS': {'type': str, 'default': lambda c: 'http://localhost:8000,http://127.0.0.1:8000,http://0.0.0.0:8000,http://{}'.format(c['BIND_ADDR'])}, # e.g. https://archivebox.example.com,https://archivebox2.example.com:8080
'DEBUG': {'type': bool, 'default': False},
'PUBLIC_INDEX': {'type': bool, 'default': True},
'PUBLIC_SNAPSHOTS': {'type': bool, 'default': True},
'PUBLIC_ADD_VIEW': {'type': bool, 'default': False},
'FOOTER_INFO': {'type': str, 'default': 'Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.'},
'SNAPSHOTS_PER_PAGE': {'type': int, 'default': 40},
'CUSTOM_TEMPLATES_DIR': {'type': str, 'default': None},
'TIME_ZONE': {'type': str, 'default': 'UTC'},
'TIMEZONE': {'type': str, 'default': 'UTC'},
'REVERSE_PROXY_USER_HEADER': {'type': str, 'default': 'Remote-User'},
'REVERSE_PROXY_WHITELIST': {'type': str, 'default': ''},
'LOGOUT_REDIRECT_URL': {'type': str, 'default': '/'},
'PREVIEW_ORIGINALS': {'type': bool, 'default': True},
'LDAP': {'type': bool, 'default': False},
'LDAP_SERVER_URI': {'type': str, 'default': None},
'LDAP_BIND_DN': {'type': str, 'default': None},
'LDAP_BIND_PASSWORD': {'type': str, 'default': None},
'LDAP_USER_BASE': {'type': str, 'default': None},
'LDAP_USER_FILTER': {'type': str, 'default': None},
'LDAP_USERNAME_ATTR': {'type': str, 'default': None},
'LDAP_FIRSTNAME_ATTR': {'type': str, 'default': None},
'LDAP_LASTNAME_ATTR': {'type': str, 'default': None},
'LDAP_EMAIL_ATTR': {'type': str, 'default': None},
'LDAP_CREATE_SUPERUSER': {'type': bool, 'default': False},
},
'ARCHIVE_METHOD_TOGGLES': { 'ARCHIVE_METHOD_TOGGLES': {
'SAVE_TITLE': {'type': bool, 'default': True, 'aliases': ('FETCH_TITLE',)}, 'SAVE_TITLE': {'type': bool, 'default': True, 'aliases': ('FETCH_TITLE',)},
@ -212,26 +166,6 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'SINGLEFILE_EXTRA_ARGS': {'type': list, 'default': None}, 'SINGLEFILE_EXTRA_ARGS': {'type': list, 'default': None},
'MERCURY_ARGS': {'type': list, 'default': ['--format=text']}, 'MERCURY_ARGS': {'type': list, 'default': ['--format=text']},
'MERCURY_EXTRA_ARGS': {'type': list, 'default': None}, 'MERCURY_EXTRA_ARGS': {'type': list, 'default': None},
'FAVICON_PROVIDER': {'type': str, 'default': 'https://www.google.com/s2/favicons?domain={}'},
},
'SEARCH_BACKEND_CONFIG' : {
'USE_INDEXING_BACKEND': {'type': bool, 'default': True},
'USE_SEARCHING_BACKEND': {'type': bool, 'default': True},
'SEARCH_BACKEND_ENGINE': {'type': str, 'default': 'ripgrep'},
'SEARCH_BACKEND_HOST_NAME': {'type': str, 'default': 'localhost'},
'SEARCH_BACKEND_PORT': {'type': int, 'default': 1491},
'SEARCH_BACKEND_PASSWORD': {'type': str, 'default': 'SecretPassword'},
'SEARCH_PROCESS_HTML': {'type': bool, 'default': True},
# SONIC
'SONIC_COLLECTION': {'type': str, 'default': 'archivebox'},
'SONIC_BUCKET': {'type': str, 'default': 'snapshots'},
'SEARCH_BACKEND_TIMEOUT': {'type': int, 'default': 90},
# SQLite3 FTS5
'FTS_SEPARATE_DATABASE': {'type': bool, 'default': True},
'FTS_TOKENIZERS': {'type': str, 'default': 'porter unicode61 remove_diacritics 2'},
# Default from https://www.sqlite.org/limits.html#max_length
'FTS_SQLITE_MAX_LENGTH': {'type': int, 'default': int(1e9)},
}, },
'DEPENDENCY_CONFIG': { 'DEPENDENCY_CONFIG': {
@ -242,7 +176,6 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'USE_MERCURY': {'type': bool, 'default': True}, 'USE_MERCURY': {'type': bool, 'default': True},
'USE_GIT': {'type': bool, 'default': True}, 'USE_GIT': {'type': bool, 'default': True},
'USE_CHROME': {'type': bool, 'default': True}, 'USE_CHROME': {'type': bool, 'default': True},
'USE_NODE': {'type': bool, 'default': True},
'USE_YOUTUBEDL': {'type': bool, 'default': True}, 'USE_YOUTUBEDL': {'type': bool, 'default': True},
'USE_RIPGREP': {'type': bool, 'default': True}, 'USE_RIPGREP': {'type': bool, 'default': True},
@ -282,60 +215,16 @@ def get_real_name(key: str) -> str:
################################ Constants #####################################
PACKAGE_DIR_NAME = 'archivebox'
TEMPLATES_DIR_NAME = 'templates'
ARCHIVE_DIR_NAME = 'archive'
SOURCES_DIR_NAME = 'sources'
LOGS_DIR_NAME = 'logs'
CACHE_DIR_NAME = 'cache'
LIB_DIR_NAME = 'lib'
PERSONAS_DIR_NAME = 'personas'
CRONTABS_DIR_NAME = 'crontabs'
SQL_INDEX_FILENAME = 'index.sqlite3'
JSON_INDEX_FILENAME = 'index.json'
HTML_INDEX_FILENAME = 'index.html'
ROBOTS_TXT_FILENAME = 'robots.txt'
FAVICON_FILENAME = 'favicon.ico'
CONFIG_FILENAME = 'ArchiveBox.conf'
ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE
CONSTANTS = archivebox.CONSTANTS._asdict()
############################## Version Config ##################################
############################## Derived Config ##################################
# These are derived/computed values calculated *after* all user-provided config values are ingested # These are derived/computed values calculated *after* all user-provided config values are ingested
# they appear in `archivebox config` output and are intended to be read-only for the user # they appear in `archivebox config` output and are intended to be read-only for the user
DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = { DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
**{
key: {'default': lambda c: val}
for key, val in archivebox.CONSTANTS.items()
},
'PACKAGE_DIR': {'default': lambda c: archivebox.PACKAGE_DIR.resolve()}, 'PACKAGE_DIR': {'default': lambda c: archivebox.PACKAGE_DIR.resolve()},
'TEMPLATES_DIR': {'default': lambda c: c['PACKAGE_DIR'] / TEMPLATES_DIR_NAME}, 'TEMPLATES_DIR': {'default': lambda c: c['PACKAGE_DIR'] / CONSTANTS.TEMPLATES_DIR_NAME},
'CUSTOM_TEMPLATES_DIR': {'default': lambda c: c['CUSTOM_TEMPLATES_DIR'] and Path(c['CUSTOM_TEMPLATES_DIR'])}, 'CUSTOM_TEMPLATES_DIR': {'default': lambda c: c['CUSTOM_TEMPLATES_DIR'] and Path(c['CUSTOM_TEMPLATES_DIR'])},
'URL_DENYLIST_PTN': {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)}, 'URL_DENYLIST_PTN': {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS)},
'URL_ALLOWLIST_PTN': {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)}, 'URL_ALLOWLIST_PTN': {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS)},
'DIR_OUTPUT_PERMISSIONS': {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')}, # exec is always needed to list directories
'USE_CURL': {'default': lambda c: c['USE_CURL'] and (c['SAVE_FAVICON'] or c['SAVE_TITLE'] or c['SAVE_ARCHIVE_DOT_ORG'])}, 'USE_CURL': {'default': lambda c: c['USE_CURL'] and (c['SAVE_FAVICON'] or c['SAVE_TITLE'] or c['SAVE_ARCHIVE_DOT_ORG'])},
@ -356,7 +245,7 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
'WGET_EXTRA_ARGS': {'default': lambda c: c['WGET_EXTRA_ARGS'] or []}, 'WGET_EXTRA_ARGS': {'default': lambda c: c['WGET_EXTRA_ARGS'] or []},
'USE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['SAVE_MERCURY']}, 'USE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['SAVE_MERCURY']},
'SAVE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['USE_NODE']}, 'SAVE_MERCURY': {'default': lambda c: c['USE_MERCURY']},
'MERCURY_VERSION': {'default': lambda c: '1.0.0' if shutil.which(str(bin_path(c['MERCURY_BINARY']))) else None}, # mercury doesnt expose version info until this is merged https://github.com/postlight/parser/pull/750 'MERCURY_VERSION': {'default': lambda c: '1.0.0' if shutil.which(str(bin_path(c['MERCURY_BINARY']))) else None}, # mercury doesnt expose version info until this is merged https://github.com/postlight/parser/pull/750
'MERCURY_ARGS': {'default': lambda c: c['MERCURY_ARGS'] or []}, 'MERCURY_ARGS': {'default': lambda c: c['MERCURY_ARGS'] or []},
'MERCURY_EXTRA_ARGS': {'default': lambda c: c['MERCURY_EXTRA_ARGS'] or []}, 'MERCURY_EXTRA_ARGS': {'default': lambda c: c['MERCURY_EXTRA_ARGS'] or []},
@ -365,8 +254,6 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
'GIT_VERSION': {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None}, 'GIT_VERSION': {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None},
'SAVE_GIT': {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']}, 'SAVE_GIT': {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
'USE_NODE': {'default': lambda c: True},
'NODE_VERSION': {'default': lambda c: bin_version(c['NODE_BINARY']) if c['USE_NODE'] else None},
'DEPENDENCIES': {'default': lambda c: get_dependency_info(c)}, 'DEPENDENCIES': {'default': lambda c: get_dependency_info(c)},
# 'CODE_LOCATIONS': {'default': lambda c: get_code_locations(c)}, # 'CODE_LOCATIONS': {'default': lambda c: get_code_locations(c)},
@ -550,7 +437,7 @@ def load_config(defaults: ConfigDefaultDict,
config: Optional[ConfigDict]=None, config: Optional[ConfigDict]=None,
out_dir: Optional[str]=None, out_dir: Optional[str]=None,
env_vars: Optional[os._Environ]=None, env_vars: Optional[os._Environ]=None,
config_file_vars: Optional[Dict[str, str]]=None) -> ConfigDict: config_file_vars: Optional[Dict[str, str]]=None) -> benedict:
env_vars = env_vars or os.environ env_vars = env_vars or os.environ
config_file_vars = config_file_vars or load_config_file(out_dir=out_dir) config_file_vars = config_file_vars or load_config_file(out_dir=out_dir)
@ -583,13 +470,7 @@ def load_config(defaults: ConfigDefaultDict,
# raise # raise
# raise SystemExit(2) # raise SystemExit(2)
return AttrDict(extended_config) return benedict(extended_config)
def parse_version_string(version: str) -> Tuple[int, int, int]:
"""parses a version tag string formatted like 'vx.x.x' into (major, minor, patch) ints"""
base = version.split('+')[0].split('v')[-1] # remove 'v' prefix and '+editable' suffix
return tuple(int(part) for part in base.split('.'))[:3]
@ -778,13 +659,13 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
'enabled': config['USE_WGET'], 'enabled': config['USE_WGET'],
'is_valid': bool(config['WGET_VERSION']), 'is_valid': bool(config['WGET_VERSION']),
}, },
'NODE_BINARY': { # 'NODE_BINARY': {
'path': bin_path(config['NODE_BINARY']), # 'path': bin_path(config['NODE_BINARY']),
'version': config['NODE_VERSION'], # 'version': config['NODE_VERSION'],
'hash': bin_hash(config['NODE_BINARY']), # 'hash': bin_hash(config['NODE_BINARY']),
'enabled': config['USE_NODE'], # 'enabled': config['USE_NODE'],
'is_valid': bool(config['NODE_VERSION']), # 'is_valid': bool(config['NODE_VERSION']),
}, # },
'MERCURY_BINARY': { 'MERCURY_BINARY': {
'path': bin_path(config['MERCURY_BINARY']), 'path': bin_path(config['MERCURY_BINARY']),
'version': config['MERCURY_VERSION'], 'version': config['MERCURY_VERSION'],
@ -879,15 +760,15 @@ globals().update(CONFIG)
# Set timezone to UTC and umask to OUTPUT_PERMISSIONS # Set timezone to UTC and umask to OUTPUT_PERMISSIONS
assert TIMEZONE == 'UTC', 'The server timezone should always be set to UTC' # noqa: F821 assert TIMEZONE == 'UTC', f'The server timezone should always be set to UTC (got {TIMEZONE})' # noqa: F821
os.environ["TZ"] = TIMEZONE # noqa: F821 os.environ["TZ"] = TIMEZONE # noqa: F821
os.umask(0o777 - int(DIR_OUTPUT_PERMISSIONS, base=8)) # noqa: F821 os.umask(0o777 - int(STORAGE_CONFIG.DIR_OUTPUT_PERMISSIONS, base=8)) # noqa: F821
########################### Config Validity Checkers ########################### ########################### Config Validity Checkers ###########################
if not CONFIG.USE_COLOR: if not SHELL_CONFIG.USE_COLOR:
os.environ['NO_COLOR'] = '1' os.environ['NO_COLOR'] = '1'
if not CONFIG.SHOW_PROGRESS: if not SHELL_CONFIG.SHOW_PROGRESS:
os.environ['TERM'] = 'dumb' os.environ['TERM'] = 'dumb'
# recreate rich console obj based on new config values # recreate rich console obj based on new config values
@ -913,7 +794,7 @@ def setup_django_minimal():
django.setup() django.setup()
def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None: def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
global INITIAL_STARTUP_PROGRESS global INITIAL_STARTUP_PROGRESS
global INITIAL_STARTUP_PROGRESS_TASK global INITIAL_STARTUP_PROGRESS_TASK
@ -930,7 +811,6 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
sys.path.append(str(archivebox.PACKAGE_DIR)) sys.path.append(str(archivebox.PACKAGE_DIR))
os.environ.setdefault('OUTPUT_DIR', str(archivebox.DATA_DIR)) os.environ.setdefault('OUTPUT_DIR', str(archivebox.DATA_DIR))
os.environ.setdefault("ARCHIVEBOX_DATABASE_NAME", ":memory:")
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings') os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
bump_startup_progress_bar() bump_startup_progress_bar()

View file

@ -4,13 +4,13 @@ import os
import sys import sys
import inspect import inspect
from typing import Dict
from pathlib import Path from pathlib import Path
from benedict import benedict
from django.utils.crypto import get_random_string from django.utils.crypto import get_random_string
import abx
import archivebox import archivebox
from archivebox.constants import CONSTANTS
from ..config import CONFIG from ..config import CONFIG
@ -28,39 +28,47 @@ ARCHIVE_DIR = archivebox.DATA_DIR / 'archive'
### ArchiveBox Plugin Settings ### ArchiveBox Plugin Settings
################################################################################ ################################################################################
PLUGIN_HOOKSPECS = [
'abx.hookspec_django_settings',
'abx.hookspec_django_apps',
'abx.hookspec_django_urls',
'abx.hookspec_pydantic_pkgr',
'abx.hookspec_archivebox',
'plugantic.base_check',
]
abx.register_hookspecs(PLUGIN_HOOKSPECS)
def find_plugins_in_dir(plugins_dir: Path, prefix: str) -> Dict[str, Path]: BUILTIN_PLUGIN_DIRS = {
return { 'plugins_sys': archivebox.PACKAGE_DIR / 'plugins_sys',
f"{prefix}.{plugin_entrypoint.parent.name}": plugin_entrypoint.parent 'plugins_pkg': archivebox.PACKAGE_DIR / 'plugins_pkg',
for plugin_entrypoint in sorted(plugins_dir.glob("*/apps.py")) # key=get_plugin_order # Someday enforcing plugin import order may be required, but right now it's not needed 'plugins_auth': archivebox.PACKAGE_DIR / 'plugins_auth',
} # "plugins_pkg.pip": "/app/archivebox/plugins_pkg/pip" 'plugins_search': archivebox.PACKAGE_DIR / 'plugins_search',
'plugins_extractor': archivebox.PACKAGE_DIR / 'plugins_extractor',
PLUGIN_DIRS = { }
'plugins_sys': PACKAGE_DIR / 'plugins_sys', USER_PLUGIN_DIRS = {
'plugins_pkg': PACKAGE_DIR / 'plugins_pkg', 'user_plugins': archivebox.DATA_DIR / 'user_plugins',
'plugins_auth': PACKAGE_DIR / 'plugins_auth',
'plugins_search': PACKAGE_DIR / 'plugins_search',
'plugins_extractor': PACKAGE_DIR / 'plugins_extractor',
'user_plugins': DATA_DIR / 'user_plugins',
} }
INSTALLED_PLUGINS = {}
for plugin_prefix, plugin_dir in PLUGIN_DIRS.items():
INSTALLED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix))
BUILTIN_PLUGINS = abx.get_plugins_in_dirs(BUILTIN_PLUGIN_DIRS)
PIP_PLUGINS = abx.get_pip_installed_plugins(group='archivebox')
USER_PLUGINS = abx.get_plugins_in_dirs(USER_PLUGIN_DIRS)
ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS}
### Plugins Globals (filled by plugin_type.pluginname.apps.PluginName.register() after Django startup) PLUGIN_MANAGER = abx.pm
PLUGINS = benedict({}) PLUGINS = abx.load_plugins(ALL_PLUGINS)
HOOKS = benedict({}) HOOKS = abx.get_plugins_HOOKS(PLUGINS)
# Created later by Plugin.register(settings) -> Hook.register(settings):
# CONFIGS = benedict({})
# BINPROVIDERS = benedict({})
# BINARIES = benedict({})
# EXTRACTORS = benedict({})
# REPLAYERS = benedict({})
# CHECKS = benedict({})
# ADMINDATAVIEWS = benedict({})
CONFIGS = abx.get_plugins_CONFIGS()
# FLAT_CONFIG = abx.get_plugins_FLAT_CONFIG(CONFIGS)
FLAT_CONFIG = CONFIG
BINPROVIDERS = abx.get_plugins_BINPROVIDERS()
BINARIES = abx.get_plugins_BINARIES()
EXTRACTORS = abx.get_plugins_EXTRACTORS()
REPLAYERS = abx.get_plugins_REPLAYERS()
CHECKS = abx.get_plugins_CHECKS()
ADMINDATAVIEWS = abx.get_plugins_ADMINDATAVIEWS()
QUEUES = abx.get_plugins_QUEUES()
SEARCHBACKENDS = abx.get_plugins_SEARCHBACKENDS()
################################################################################ ################################################################################
### Django Core Settings ### Django Core Settings
@ -96,15 +104,14 @@ INSTALLED_APPS = [
'django_object_actions', # provides easy Django Admin action buttons on change views https://github.com/crccheck/django-object-actions 'django_object_actions', # provides easy Django Admin action buttons on change views https://github.com/crccheck/django-object-actions
# Our ArchiveBox-provided apps # Our ArchiveBox-provided apps
# 'plugantic', # ArchiveBox plugin API definition + finding/registering/calling interface
'queues', # handles starting and managing background workers and processes 'queues', # handles starting and managing background workers and processes
'abid_utils', # handles ABID ID creation, handling, and models 'abid_utils', # handles ABID ID creation, handling, and models
'plugantic', # ArchiveBox plugin API definition + finding/registering/calling interface
'core', # core django model with Snapshot, ArchiveResult, etc. 'core', # core django model with Snapshot, ArchiveResult, etc.
'api', # Django-Ninja-based Rest API interfaces, config, APIToken model, etc. 'api', # Django-Ninja-based Rest API interfaces, config, APIToken model, etc.
# ArchiveBox plugins # ArchiveBox plugins
*INSTALLED_PLUGINS.keys(), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins, *abx.get_plugins_INSTALLLED_APPS(), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins,
# plugin.register(settings) is called at import of each plugin (in the order they are listed here), then plugin.ready() is called at AppConfig.ready() time
# 3rd-party apps from PyPI that need to be loaded last # 3rd-party apps from PyPI that need to be loaded last
'admin_data_views', # handles rendering some convenient automatic read-only views of data in Django admin 'admin_data_views', # handles rendering some convenient automatic read-only views of data in Django admin
@ -112,9 +119,13 @@ INSTALLED_APPS = [
'django_huey', # provides multi-queue support for django huey https://github.com/gaiacoop/django-huey 'django_huey', # provides multi-queue support for django huey https://github.com/gaiacoop/django-huey
'bx_django_utils', # needed for huey_monitor https://github.com/boxine/bx_django_utils 'bx_django_utils', # needed for huey_monitor https://github.com/boxine/bx_django_utils
'huey_monitor', # adds an admin UI for monitoring background huey tasks https://github.com/boxine/django-huey-monitor 'huey_monitor', # adds an admin UI for monitoring background huey tasks https://github.com/boxine/django-huey-monitor
# load plugins last so all other apps are already .ready() when we call plugins.ready()
'abx',
] ]
MIDDLEWARE = [ MIDDLEWARE = [
'core.middleware.TimezoneMiddleware', 'core.middleware.TimezoneMiddleware',
'django.middleware.security.SecurityMiddleware', 'django.middleware.security.SecurityMiddleware',
@ -125,8 +136,10 @@ MIDDLEWARE = [
'core.middleware.ReverseProxyAuthMiddleware', 'core.middleware.ReverseProxyAuthMiddleware',
'django.contrib.messages.middleware.MessageMiddleware', 'django.contrib.messages.middleware.MessageMiddleware',
'core.middleware.CacheControlMiddleware', 'core.middleware.CacheControlMiddleware',
*abx.get_plugins_MIDDLEWARE(),
] ]
################################################################################ ################################################################################
### Authentication Settings ### Authentication Settings
################################################################################ ################################################################################
@ -136,18 +149,20 @@ MIDDLEWARE = [
AUTHENTICATION_BACKENDS = [ AUTHENTICATION_BACKENDS = [
'django.contrib.auth.backends.RemoteUserBackend', 'django.contrib.auth.backends.RemoteUserBackend',
'django.contrib.auth.backends.ModelBackend', 'django.contrib.auth.backends.ModelBackend',
*abx.get_plugins_AUTHENTICATION_BACKENDS(),
] ]
from ..plugins_auth.ldap.settings import LDAP_CONFIG
if LDAP_CONFIG.LDAP_ENABLED: # from ..plugins_auth.ldap.settings import LDAP_CONFIG
AUTH_LDAP_BIND_DN = LDAP_CONFIG.LDAP_BIND_DN
AUTH_LDAP_SERVER_URI = LDAP_CONFIG.LDAP_SERVER_URI # if LDAP_CONFIG.LDAP_ENABLED:
AUTH_LDAP_BIND_PASSWORD = LDAP_CONFIG.LDAP_BIND_PASSWORD # AUTH_LDAP_BIND_DN = LDAP_CONFIG.LDAP_BIND_DN
AUTH_LDAP_USER_ATTR_MAP = LDAP_CONFIG.LDAP_USER_ATTR_MAP # AUTH_LDAP_SERVER_URI = LDAP_CONFIG.LDAP_SERVER_URI
AUTH_LDAP_USER_SEARCH = LDAP_CONFIG.AUTH_LDAP_USER_SEARCH # AUTH_LDAP_BIND_PASSWORD = LDAP_CONFIG.LDAP_BIND_PASSWORD
# AUTH_LDAP_USER_ATTR_MAP = LDAP_CONFIG.LDAP_USER_ATTR_MAP
# AUTH_LDAP_USER_SEARCH = LDAP_CONFIG.AUTH_LDAP_USER_SEARCH
AUTHENTICATION_BACKENDS = LDAP_CONFIG.AUTHENTICATION_BACKENDS # AUTHENTICATION_BACKENDS = LDAP_CONFIG.AUTHENTICATION_BACKENDS
################################################################################ ################################################################################
### Staticfile and Template Settings ### Staticfile and Template Settings
@ -156,22 +171,24 @@ if LDAP_CONFIG.LDAP_ENABLED:
STATIC_URL = '/static/' STATIC_URL = '/static/'
TEMPLATES_DIR_NAME = 'templates' TEMPLATES_DIR_NAME = 'templates'
STATICFILES_DIRS = [ STATICFILES_DIRS = [
*([str(CONFIG.CUSTOM_TEMPLATES_DIR / 'static')] if CONFIG.CUSTOM_TEMPLATES_DIR else []), *([str(CONSTANTS.CUSTOM_TEMPLATES_DIR / 'static')] if CONSTANTS.CUSTOM_TEMPLATES_DIR.is_dir() else []),
*[ # *[
str(plugin_dir / 'static') # str(plugin_dir / 'static')
for plugin_dir in PLUGIN_DIRS.values() # for plugin_dir in PLUGIN_DIRS.values()
if (plugin_dir / 'static').is_dir() # if (plugin_dir / 'static').is_dir()
], # ],
*abx.get_plugins_STATICFILES_DIRS(),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'static'), str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'static'),
] ]
TEMPLATE_DIRS = [ TEMPLATE_DIRS = [
*([str(CONFIG.CUSTOM_TEMPLATES_DIR)] if CONFIG.CUSTOM_TEMPLATES_DIR else []), *([str(CONSTANTS.CUSTOM_TEMPLATES_DIR)] if CONSTANTS.CUSTOM_TEMPLATES_DIR.is_dir() else []),
*[ # *[
str(plugin_dir / 'templates') # str(plugin_dir / 'templates')
for plugin_dir in PLUGIN_DIRS.values() # for plugin_dir in PLUGIN_DIRS.values()
if (plugin_dir / 'templates').is_dir() # if (plugin_dir / 'templates').is_dir()
], # ],
*abx.get_plugins_TEMPLATE_DIRS(),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'core'), str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'core'),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'admin'), str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'admin'),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME), str(PACKAGE_DIR / TEMPLATES_DIR_NAME),
@ -198,8 +215,6 @@ TEMPLATES = [
### External Service Settings ### External Service Settings
################################################################################ ################################################################################
from ..plugins_sys.config.constants import CONSTANTS
# CACHE_DB_FILENAME = 'cache.sqlite3' # CACHE_DB_FILENAME = 'cache.sqlite3'
# CACHE_DB_PATH = CONSTANTS.CACHE_DIR / CACHE_DB_FILENAME # CACHE_DB_PATH = CONSTANTS.CACHE_DIR / CACHE_DB_FILENAME
# CACHE_DB_TABLE = 'django_cache' # CACHE_DB_TABLE = 'django_cache'
@ -210,7 +225,7 @@ DATABASE_NAME = os.environ.get("ARCHIVEBOX_DATABASE_NAME", str(CONSTANTS.DATABAS
QUEUE_DATABASE_NAME = DATABASE_NAME.replace('index.sqlite3', 'queue.sqlite3') QUEUE_DATABASE_NAME = DATABASE_NAME.replace('index.sqlite3', 'queue.sqlite3')
SQLITE_CONNECTION_OPTIONS = { SQLITE_CONNECTION_OPTIONS = {
"TIME_ZONE": CONFIG.TIMEZONE, "TIME_ZONE": CONSTANTS.TIMEZONE,
"OPTIONS": { "OPTIONS": {
# https://gcollazo.com/optimal-sqlite-settings-for-django/ # https://gcollazo.com/optimal-sqlite-settings-for-django/
# # https://litestream.io/tips/#busy-timeout # # https://litestream.io/tips/#busy-timeout
@ -280,6 +295,7 @@ DJANGO_HUEY = {
"queues": { "queues": {
HUEY["name"]: HUEY.copy(), HUEY["name"]: HUEY.copy(),
# more registered here at plugin import-time by BaseQueue.register() # more registered here at plugin import-time by BaseQueue.register()
**abx.get_plugins_DJANGO_HUEY_QUEUES(),
}, },
} }
@ -411,7 +427,7 @@ USE_I18N = True
USE_TZ = True USE_TZ = True
DATETIME_FORMAT = 'Y-m-d h:i:s A' DATETIME_FORMAT = 'Y-m-d h:i:s A'
SHORT_DATETIME_FORMAT = 'Y-m-d h:i:s A' SHORT_DATETIME_FORMAT = 'Y-m-d h:i:s A'
TIME_ZONE = CONFIG.TIMEZONE # django convention is TIME_ZONE, archivebox config uses TIMEZONE, they are equivalent TIME_ZONE = CONSTANTS.TIMEZONE # django convention is TIME_ZONE, archivebox config uses TIMEZONE, they are equivalent
from django.conf.locale.en import formats as en_formats # type: ignore from django.conf.locale.en import formats as en_formats # type: ignore
@ -504,6 +520,7 @@ ADMIN_DATA_VIEWS = {
"name": "log", "name": "log",
}, },
}, },
*abx.get_plugins_ADMIN_DATA_VIEWS_URLS(),
], ],
} }
@ -595,3 +612,7 @@ DEBUG_LOGFIRE = DEBUG_LOGFIRE and (DATA_DIR / '.logfire').is_dir()
# INSTALLED_APPS += ['jet_django'] # INSTALLED_APPS += ['jet_django']
# JET_PROJECT = 'archivebox' # JET_PROJECT = 'archivebox'
# JET_TOKEN = 'some-api-token-here' # JET_TOKEN = 'some-api-token-here'
abx.register_plugins_settings(globals())

View file

@ -533,6 +533,8 @@ def key_is_safe(key: str) -> bool:
@render_with_table_view @render_with_table_view
def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext: def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
CONFIG = settings.FLAT_CONFIG
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.' assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
rows = { rows = {

View file

@ -1,11 +1,12 @@
__package__ = "archivebox.plugantic" __package__ = "archivebox.plugantic"
import abx
from typing import List from typing import List
from django.core.checks import Warning, Tags, register from django.core.checks import Warning, Tags, register
from .base_hook import BaseHook, HookType from .base_hook import BaseHook, HookType
from ..config_stubs import AttrDict
class BaseCheck(BaseHook): class BaseCheck(BaseHook):
hook_type: HookType = "CHECK" hook_type: HookType = "CHECK"
@ -28,21 +29,18 @@ class BaseCheck(BaseHook):
def register(self, settings, parent_plugin=None): def register(self, settings, parent_plugin=None):
# self._plugin = parent_plugin # backref to parent is for debugging only, never rely on this! # self._plugin = parent_plugin # backref to parent is for debugging only, never rely on this!
self.register_with_django_check_system(settings) # (SIDE EFFECT) abx.pm.hook.register_django_check(check=self, settings=settings)
# install hook into settings.CHECKS
settings.CHECKS = getattr(settings, "CHECKS", None) or AttrDict({})
settings.CHECKS[self.id] = self
# record installed hook in settings.HOOKS
super().register(settings, parent_plugin=parent_plugin)
def register_with_django_check_system(self, settings): @abx.hookspec
def run_check(app_configs, **kwargs) -> List[Warning]: @abx.hookimpl
import logging def register_django_check(check: BaseCheck, settings):
return self.check(settings, logging.getLogger("checks")) def run_check(app_configs, **kwargs) -> List[Warning]:
import logging
return check.check(settings, logging.getLogger("checks"))
run_check.__name__ = self.id run_check.__name__ = check.id
run_check.tags = [self.tag] run_check.tags = [check.tag]
register(self.tag)(run_check) register(check.tag)(run_check)

View file

@ -227,26 +227,34 @@ class ArchiveBoxBaseConfig(BaseSettings):
print(f' {key}={original_value} -> {value}') print(f' {key}={original_value} -> {value}')
self.__init__() self.__init__()
return self return self
def as_legacy_config_schema(self):
# shim for backwards compatibility with old config schema style
model_values = self.model_dump()
return benedict({
key: {'type': field.annotation, 'default': model_values[key]}
for key, field in self.model_fields.items()
})
class BaseConfigSet(ArchiveBoxBaseConfig, BaseHook): # type: ignore[type-arg] class BaseConfigSet(ArchiveBoxBaseConfig, BaseHook): # type: ignore[type-arg]
hook_type: ClassVar[HookType] = 'CONFIG' hook_type: ClassVar[HookType] = 'CONFIG'
section: ClassVar[ConfigSectionName] = 'GENERAL_CONFIG' section: ClassVar[ConfigSectionName] = 'GENERAL_CONFIG'
def register(self, settings, parent_plugin=None): # def register(self, settings, parent_plugin=None):
# self._plugin = parent_plugin # for debugging only, never rely on this! # # self._plugin = parent_plugin # for debugging only, never rely on this!
# settings.FLAT_CONFIG = benedict(getattr(settings, "FLAT_CONFIG", settings.CONFIG)) # settings.FLAT_CONFIG = benedict(getattr(settings, "FLAT_CONFIG", {}))
# # pass FLAT_CONFIG so far into our config model to load it # # pass FLAT_CONFIG so far into our config model to load it
# loaded_config = self.__class__(**settings.FLAT_CONFIG) # loaded_config = self
# # then dump our parsed config back into FLAT_CONFIG for the next plugin to use # # then dump our parsed config back into FLAT_CONFIG for the next plugin to use
# settings.FLAT_CONFIG.merge(loaded_config.model_dump(include=set(self.model_fields.keys()))) # settings.FLAT_CONFIG.merge(loaded_config.model_dump(include=set(self.model_fields.keys())))
settings.CONFIGS = getattr(settings, "CONFIGS", None) or benedict({}) # settings.REGISTERED_CONFIGS = getattr(settings, "REGISTERED_CONFIGS", None) or benedict({})
settings.CONFIGS[self.id] = self # settings.REGISTERED_CONFIGS[self.id] = self
self._original_id = id(self) # self._original_id = id(self)
super().register(settings, parent_plugin=parent_plugin) # super().register(settings, parent_plugin=parent_plugin)
# def ready(self, settings): # def ready(self, settings):
# # reload config from environment, in case it's been changed by any other plugins # # reload config from environment, in case it's been changed by any other plugins

View file

@ -96,32 +96,32 @@ class BaseHook(BaseModel):
# e.g. /admin/environment/config/LdapConfig/ # e.g. /admin/environment/config/LdapConfig/
return f"/admin/environment/{self.hook_type.lower()}/{self.id}/" return f"/admin/environment/{self.hook_type.lower()}/{self.id}/"
def register(self, settings, parent_plugin=None): # def register(self, settings, parent_plugin=None):
"""Load a record of an installed hook into global Django settings.HOOKS at runtime.""" # """Load a record of an installed hook into global Django settings.HOOKS at runtime."""
self._plugin = parent_plugin # for debugging only, never rely on this! # self._plugin = parent_plugin # for debugging only, never rely on this!
# assert json.dumps(self.model_json_schema(), indent=4), f"Hook {self.hook_module} has invalid JSON schema." # # assert json.dumps(self.model_json_schema(), indent=4), f"Hook {self.hook_module} has invalid JSON schema."
# print(' -', self.hook_module, '.register()') # # print(' -', self.hook_module, '.register()')
# record installed hook in settings.HOOKS # # record installed hook in settings.HOOKS
settings.HOOKS[self.id] = self # settings.REGISTERED_HOOKS[self.id] = self
if settings.HOOKS[self.id]._is_registered: # if settings.REGISTERED_HOOKS[self.id]._is_registered:
raise Exception(f"Tried to run {self.hook_module}.register() but its already been called!") # raise Exception(f"Tried to run {self.hook_module}.register() but its already been called!")
settings.HOOKS[self.id]._is_registered = True # settings.REGISTERED_HOOKS[self.id]._is_registered = True
# print("REGISTERED HOOK:", self.hook_module) # # print("REGISTERED HOOK:", self.hook_module)
def ready(self, settings): # def ready(self, settings):
"""Runs any runtime code needed when AppConfig.ready() is called (after all models are imported).""" # """Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
# print(' -', self.hook_module, '.ready()') # # print(' -', self.hook_module, '.ready()')
assert self.id in settings.HOOKS, f"Tried to ready hook {self.hook_module} but it is not registered in settings.HOOKS." # assert self.id in settings.REGISTERED_HOOKS, f"Tried to ready hook {self.hook_module} but it is not registered in settings.REGISTERED_HOOKS."
if settings.HOOKS[self.id]._is_ready: # if settings.REGISTERED_HOOKS[self.id]._is_ready:
raise Exception(f"Tried to run {self.hook_module}.ready() but its already been called!") # raise Exception(f"Tried to run {self.hook_module}.ready() but its already been called!")
settings.HOOKS[self.id]._is_ready = True # settings.REGISTERED_HOOKS[self.id]._is_ready = True

View file

@ -1,5 +1,6 @@
__package__ = 'archivebox.plugantic' __package__ = 'archivebox.plugantic'
import abx
import inspect import inspect
from pathlib import Path from pathlib import Path
@ -21,9 +22,6 @@ from benedict import benedict
from .base_hook import BaseHook, HookType from .base_hook import BaseHook, HookType
from ..config import bump_startup_progress_bar
class BasePlugin(BaseModel): class BasePlugin(BaseModel):
model_config = ConfigDict( model_config = ConfigDict(
extra='forbid', extra='forbid',
@ -107,9 +105,10 @@ class BasePlugin(BaseModel):
default_auto_field = 'django.db.models.AutoField' default_auto_field = 'django.db.models.AutoField'
def ready(self): # handled by abx.hookimpl ready()
from django.conf import settings # def ready(self):
plugin_self.ready(settings) # from django.conf import settings
# plugin_self.ready(settings)
return PluginAppConfig return PluginAppConfig
@ -125,64 +124,60 @@ class BasePlugin(BaseModel):
hooks[hook.hook_type][hook.id] = hook hooks[hook.hook_type][hook.id] = hook
return hooks return hooks
def register(self, settings=None): def register(self, settings):
"""Loads this plugin's configs, binaries, extractors, and replayers into global Django settings at import time (before models are imported or any AppConfig.ready() are called).""" """Loads this plugin's configs, binaries, extractors, and replayers into global Django settings at import time (before models are imported or any AppConfig.ready() are called)."""
if settings is None: from ..config import bump_startup_progress_bar
from django.conf import settings as django_settings
settings = django_settings
# print()
# print(self.plugin_module_full, '.register()')
# assert json.dumps(self.model_json_schema(), indent=4), f'Plugin {self.plugin_module} has invalid JSON schema.' # assert settings.PLUGINS[self.id] == self
# # assert self.id not in settings.PLUGINS, f'Tried to register plugin {self.plugin_module} but it conflicts with existing plugin of the same name ({self.app_label}).'
assert self.id not in settings.PLUGINS, f'Tried to register plugin {self.plugin_module} but it conflicts with existing plugin of the same name ({self.app_label}).' # ### Mutate django.conf.settings... values in-place to include plugin-provided overrides
### Mutate django.conf.settings... values in-place to include plugin-provided overrides # if settings.PLUGINS[self.id]._is_registered:
settings.PLUGINS[self.id] = self # raise Exception(f"Tried to run {self.plugin_module}.register() but its already been called!")
if settings.PLUGINS[self.id]._is_registered: # for hook in self.hooks:
raise Exception(f"Tried to run {self.plugin_module}.register() but its already been called!") # hook.register(settings, parent_plugin=self)
for hook in self.hooks: # settings.PLUGINS[self.id]._is_registered = True
hook.register(settings, parent_plugin=self) # # print('√ REGISTERED PLUGIN:', self.plugin_module)
settings.PLUGINS[self.id]._is_registered = True
# print('√ REGISTERED PLUGIN:', self.plugin_module)
bump_startup_progress_bar() bump_startup_progress_bar()
def ready(self, settings=None): def ready(self, settings=None):
"""Runs any runtime code needed when AppConfig.ready() is called (after all models are imported).""" """Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
if settings is None: from ..config import bump_startup_progress_bar
from django.conf import settings as django_settings
settings = django_settings
# print()
# print(self.plugin_module_full, '.ready()')
assert ( # if settings is None:
self.id in settings.PLUGINS and settings.PLUGINS[self.id]._is_registered # from django.conf import settings as django_settings
), f"Tried to run plugin.ready() for {self.plugin_module} but plugin is not yet registered in settings.PLUGINS." # settings = django_settings
if settings.PLUGINS[self.id]._is_ready: # # print()
raise Exception(f"Tried to run {self.plugin_module}.ready() but its already been called!") # # print(self.plugin_module_full, '.ready()')
for hook in self.hooks: # assert (
hook.ready(settings) # self.id in settings.PLUGINS and settings.PLUGINS[self.id]._is_registered
# ), f"Tried to run plugin.ready() for {self.plugin_module} but plugin is not yet registered in settings.PLUGINS."
# if settings.PLUGINS[self.id]._is_ready:
# raise Exception(f"Tried to run {self.plugin_module}.ready() but its already been called!")
# for hook in self.hooks:
# hook.ready(settings)
settings.PLUGINS[self.id]._is_ready = True # settings.PLUGINS[self.id]._is_ready = True
bump_startup_progress_bar() bump_startup_progress_bar()
# @validate_call @validate_call
# def install_binaries(self) -> Self: def install_binaries(self) -> Self:
# new_binaries = [] new_binaries = []
# for idx, binary in enumerate(self.binaries): for idx, binary in enumerate(self.binaries):
# new_binaries.append(binary.install() or binary) new_binaries.append(binary.install() or binary)
# return self.model_copy(update={ return self.model_copy(update={
# 'binaries': new_binaries, 'binaries': new_binaries,
# }) })
@validate_call @validate_call
def load_binaries(self, cache=True) -> Self: def load_binaries(self, cache=True) -> Self:

View file

@ -51,5 +51,5 @@ class LdapAuthPlugin(BasePlugin):
PLUGIN = LdapAuthPlugin() PLUGIN = LdapAuthPlugin()
PLUGIN.register(settings) # PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig DJANGO_APP = PLUGIN.AppConfig

View file

@ -259,5 +259,5 @@ class ChromePlugin(BasePlugin):
PLUGIN = ChromePlugin() PLUGIN = ChromePlugin()
PLUGIN.register(settings) # PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig DJANGO_APP = PLUGIN.AppConfig

View file

@ -99,5 +99,5 @@ class ReadabilityPlugin(BasePlugin):
PLUGIN = ReadabilityPlugin() PLUGIN = ReadabilityPlugin()
PLUGIN.register(settings) # PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig DJANGO_APP = PLUGIN.AppConfig

View file

@ -134,5 +134,5 @@ class SinglefilePlugin(BasePlugin):
PLUGIN = SinglefilePlugin() PLUGIN = SinglefilePlugin()
PLUGIN.register(settings) # PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig DJANGO_APP = PLUGIN.AppConfig

View file

@ -97,5 +97,5 @@ class YtdlpPlugin(BasePlugin):
PLUGIN = YtdlpPlugin() PLUGIN = YtdlpPlugin()
PLUGIN.register(settings) # PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig DJANGO_APP = PLUGIN.AppConfig

View file

@ -90,5 +90,5 @@ class NpmPlugin(BasePlugin):
PLUGIN = NpmPlugin() PLUGIN = NpmPlugin()
PLUGIN.register(settings) # PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig DJANGO_APP = PLUGIN.AppConfig

View file

@ -8,10 +8,11 @@ from pathlib import Path
from typing import List, Dict, Optional, ClassVar from typing import List, Dict, Optional, ClassVar
from pydantic import InstanceOf, Field, model_validator from pydantic import InstanceOf, Field, model_validator
import abx
import django import django
from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type] from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type]
from django.core.checks import Error, Tags from django.core.checks import Error, Tags
from django.conf import settings
from pydantic_pkgr import BinProvider, PipProvider, BinName, BinProviderName, ProviderLookupDict, SemVer from pydantic_pkgr import BinProvider, PipProvider, BinName, BinProviderName, ProviderLookupDict, SemVer
from plugantic.base_plugin import BasePlugin from plugantic.base_plugin import BasePlugin
@ -240,5 +241,11 @@ class PipPlugin(BasePlugin):
] ]
PLUGIN = PipPlugin() PLUGIN = PipPlugin()
PLUGIN.register(settings) # PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig DJANGO_APP = PLUGIN.AppConfig
@abx.hookimpl
def register_django_checks(settings):
USER_IS_NOT_ROOT_CHECK.register_with_django_check_system(settings)
PIP_ENVIRONMENT_CHECK.register_with_django_check_system(settings)

View file

@ -72,7 +72,7 @@ class PlaywrightBinProvider(BaseBinProvider):
if OPERATING_SYSTEM == "darwin" else if OPERATING_SYSTEM == "darwin" else
Path("~/.cache/ms-playwright").expanduser() # linux playwright cache dir Path("~/.cache/ms-playwright").expanduser() # linux playwright cache dir
) )
puppeteer_install_args: List[str] = ["install"] # --with-deps puppeteer_install_args: List[str] = ["install"] # --with-deps
packages_handler: ProviderLookupDict = Field(default={ packages_handler: ProviderLookupDict = Field(default={
"chrome": lambda: ["chromium"], "chrome": lambda: ["chromium"],
@ -177,5 +177,5 @@ class PlaywrightPlugin(BasePlugin):
PLUGIN = PlaywrightPlugin() PLUGIN = PlaywrightPlugin()
PLUGIN.register(settings) # PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig DJANGO_APP = PLUGIN.AppConfig

View file

@ -165,5 +165,5 @@ class PuppeteerPlugin(BasePlugin):
PLUGIN = PuppeteerPlugin() PLUGIN = PuppeteerPlugin()
PLUGIN.register(settings) # PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig DJANGO_APP = PLUGIN.AppConfig

View file

@ -113,5 +113,5 @@ class RipgrepSearchPlugin(BasePlugin):
PLUGIN = RipgrepSearchPlugin() PLUGIN = RipgrepSearchPlugin()
PLUGIN.register(settings) # PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig DJANGO_APP = PLUGIN.AppConfig

View file

@ -133,5 +133,5 @@ class SonicSearchPlugin(BasePlugin):
PLUGIN = SonicSearchPlugin() PLUGIN = SonicSearchPlugin()
PLUGIN.register(settings) # PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig DJANGO_APP = PLUGIN.AppConfig

View file

@ -255,5 +255,5 @@ class SqliteftsSearchPlugin(BasePlugin):
PLUGIN = SqliteftsSearchPlugin() PLUGIN = SqliteftsSearchPlugin()
PLUGIN.register(settings) # PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig DJANGO_APP = PLUGIN.AppConfig

View file

@ -245,5 +245,17 @@ class ConfigPlugin(BasePlugin):
PLUGIN = ConfigPlugin() PLUGIN = ConfigPlugin()
PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig DJANGO_APP = PLUGIN.AppConfig
# register django apps
@archivebox.plugin.hookimpl
def get_INSTALLED_APPS():
return [DJANGO_APP.name]
# register configs
@archivebox.plugin.hookimpl
def register_CONFIG():
return PLUGIN.HOOKS_BY_TYPE['CONFIG'].values()