migrate plugin loading process to new pluggy-powered system based on djp

This commit is contained in:
Nick Sweeting 2024-09-26 02:43:12 -07:00
parent efd341d8ad
commit 8ed3155ec5
No known key found for this signature in database
28 changed files with 690 additions and 321 deletions

271
archivebox/abx/__init__.py Normal file
View file

@ -0,0 +1,271 @@
import itertools
import importlib
from pathlib import Path
from typing import Dict
from benedict import benedict
import pluggy
import archivebox
from . import hookspec as base_spec
from .hookspec import hookimpl, hookspec # noqa
pm = pluggy.PluginManager("abx")
pm.add_hookspecs(base_spec)
def register_hookspecs(hookspecs):
for hookspec_import_path in hookspecs:
hookspec_module = importlib.import_module(hookspec_import_path)
pm.add_hookspecs(hookspec_module)
def find_plugins_in_dir(plugins_dir: Path, prefix: str) -> Dict[str, Path]:
return {
f"{prefix}.{plugin_entrypoint.parent.name}": plugin_entrypoint.parent
for plugin_entrypoint in sorted(plugins_dir.glob("*/apps.py")) # key=get_plugin_order # Someday enforcing plugin import order may be required, but right now it's not needed
} # "plugins_pkg.pip": "/app/archivebox/plugins_pkg/pip"
def get_pip_installed_plugins(group='abx'):
"""replaces pm.load_setuptools_entrypoints("abx")"""
import importlib.metadata
DETECTED_PLUGINS = {} # module_name: module_dir_path
for dist in list(importlib.metadata.distributions()):
for entrypoint in dist.entry_points:
if entrypoint.group != group or pm.is_blocked(entrypoint.name):
continue
DETECTED_PLUGINS[entrypoint.name] = Path(entrypoint.load().__file__).parent
# pm.register(plugin, name=ep.name)
# pm._plugin_distinfo.append((plugin, DistFacade(dist)))
return DETECTED_PLUGINS
def get_plugins_in_dirs(plugin_dirs: Dict[str, Path]):
DETECTED_PLUGINS = {}
for plugin_prefix, plugin_dir in plugin_dirs.items():
DETECTED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix))
return DETECTED_PLUGINS
def get_builtin_plugins():
PLUGIN_DIRS = {
'plugins_sys': archivebox.PACKAGE_DIR / 'plugins_sys',
'plugins_pkg': archivebox.PACKAGE_DIR / 'plugins_pkg',
'plugins_auth': archivebox.PACKAGE_DIR / 'plugins_auth',
'plugins_search': archivebox.PACKAGE_DIR / 'plugins_search',
'plugins_extractor': archivebox.PACKAGE_DIR / 'plugins_extractor',
}
DETECTED_PLUGINS = {}
for plugin_prefix, plugin_dir in PLUGIN_DIRS.items():
DETECTED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix))
return DETECTED_PLUGINS
def get_user_plugins():
return find_plugins_in_dir(archivebox.DATA_DIR / 'user_plugins', prefix='user_plugins')
# BUILTIN_PLUGINS = get_builtin_plugins()
# PIP_PLUGINS = get_pip_installed_plugins()
# USER_PLUGINS = get_user_plugins()
# ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS}
# Load all plugins from pip packages, archivebox built-ins, and user plugins
def load_plugins(plugins_dict: Dict[str, Path]):
LOADED_PLUGINS = {}
for plugin_module, plugin_dir in plugins_dict.items():
# print(f'Loading plugin: {plugin_module} from {plugin_dir}')
plugin_module_loaded = importlib.import_module(plugin_module + '.apps')
pm.register(plugin_module_loaded)
LOADED_PLUGINS[plugin_module] = plugin_module_loaded.PLUGIN
# print(f' √ Loaded plugin: {plugin_module}')
return LOADED_PLUGINS
def get_registered_plugins():
plugins = {}
plugin_to_distinfo = dict(pm.list_plugin_distinfo())
for plugin in pm.get_plugins():
plugin_info = {
"name": plugin.__name__,
"hooks": [h.name for h in pm.get_hookcallers(plugin) or ()],
}
distinfo = plugin_to_distinfo.get(plugin)
if distinfo:
plugin_info["version"] = distinfo.version
plugin_info["name"] = (
getattr(distinfo, "name", None) or distinfo.project_name
)
plugins[plugin_info["name"]] = plugin_info
return plugins
def get_plugins_INSTALLLED_APPS():
return itertools.chain(*pm.hook.get_INSTALLED_APPS())
def register_plugins_INSTALLLED_APPS(INSTALLED_APPS):
pm.hook.register_INSTALLED_APPS(INSTALLED_APPS=INSTALLED_APPS)
def get_plugins_MIDDLEWARE():
return itertools.chain(*pm.hook.get_MIDDLEWARE())
def register_plugins_MIDDLEWARE(MIDDLEWARE):
pm.hook.register_MIDDLEWARE(MIDDLEWARE=MIDDLEWARE)
def get_plugins_AUTHENTICATION_BACKENDS():
return itertools.chain(*pm.hook.get_AUTHENTICATION_BACKENDS())
def register_plugins_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
pm.hook.register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS=AUTHENTICATION_BACKENDS)
def get_plugins_STATICFILES_DIRS():
return itertools.chain(*pm.hook.get_STATICFILES_DIRS())
def register_plugins_STATICFILES_DIRS(STATICFILES_DIRS):
pm.hook.register_STATICFILES_DIRS(STATICFILES_DIRS=STATICFILES_DIRS)
def get_plugins_TEMPLATE_DIRS():
return itertools.chain(*pm.hook.get_TEMPLATE_DIRS())
def register_plugins_TEMPLATE_DIRS(TEMPLATE_DIRS):
pm.hook.register_TEMPLATE_DIRS(TEMPLATE_DIRS=TEMPLATE_DIRS)
def get_plugins_DJANGO_HUEY_QUEUES():
HUEY_QUEUES = {}
for plugin_result in pm.hook.get_DJANGO_HUEY_QUEUES():
HUEY_QUEUES.update(plugin_result)
return HUEY_QUEUES
def register_plugins_DJANGO_HUEY(DJANGO_HUEY):
pm.hook.register_DJANGO_HUEY(DJANGO_HUEY=DJANGO_HUEY)
def get_plugins_ADMIN_DATA_VIEWS_URLS():
return itertools.chain(*pm.hook.get_ADMIN_DATA_VIEWS_URLS())
def register_plugins_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
pm.hook.register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS=ADMIN_DATA_VIEWS)
def register_plugins_settings(settings):
# convert settings dict to an benedict so we can set values using settings.attr = xyz notation
settings_as_obj = benedict(settings, keypath_separator=None)
# set default values for settings that are used by plugins
settings_as_obj.INSTALLED_APPS = settings_as_obj.get('INSTALLED_APPS', [])
settings_as_obj.MIDDLEWARE = settings_as_obj.get('MIDDLEWARE', [])
settings_as_obj.AUTHENTICATION_BACKENDS = settings_as_obj.get('AUTHENTICATION_BACKENDS', [])
settings_as_obj.STATICFILES_DIRS = settings_as_obj.get('STATICFILES_DIRS', [])
settings_as_obj.TEMPLATE_DIRS = settings_as_obj.get('TEMPLATE_DIRS', [])
settings_as_obj.DJANGO_HUEY = settings_as_obj.get('DJANGO_HUEY', {'queues': {}})
settings_as_obj.ADMIN_DATA_VIEWS = settings_as_obj.get('ADMIN_DATA_VIEWS', {'URLS': []})
# call all the hook functions to mutate the settings values in-place
register_plugins_INSTALLLED_APPS(settings_as_obj.INSTALLED_APPS)
register_plugins_MIDDLEWARE(settings_as_obj.MIDDLEWARE)
register_plugins_AUTHENTICATION_BACKENDS(settings_as_obj.AUTHENTICATION_BACKENDS)
register_plugins_STATICFILES_DIRS(settings_as_obj.STATICFILES_DIRS)
register_plugins_TEMPLATE_DIRS(settings_as_obj.TEMPLATE_DIRS)
register_plugins_DJANGO_HUEY(settings_as_obj.DJANGO_HUEY)
register_plugins_ADMIN_DATA_VIEWS(settings_as_obj.ADMIN_DATA_VIEWS)
# calls Plugin.settings(settings) on each registered plugin
pm.hook.register_settings(settings=settings_as_obj)
# then finally update the settings globals() object will all the new settings
settings.update(settings_as_obj)
def get_plugins_urlpatterns():
return list(itertools.chain(*pm.hook.urlpatterns()))
def register_plugins_urlpatterns(urlpatterns):
pm.hook.register_urlpatterns(urlpatterns=urlpatterns)
# PLUGANTIC HOOKS
def get_plugins_PLUGINS():
return benedict({
plugin.PLUGIN.id: plugin.PLUGIN
for plugin in pm.get_plugins()
})
def get_plugins_HOOKS(PLUGINS):
return benedict({
hook.id: hook
for plugin in PLUGINS.values()
for hook in plugin.hooks
})
def get_plugins_CONFIGS():
return benedict({
config.id: config
for plugin_configs in pm.hook.get_CONFIGS()
for config in plugin_configs
})
def get_plugins_FLAT_CONFIG(CONFIGS):
FLAT_CONFIG = {}
for config in CONFIGS.values():
FLAT_CONFIG.update(config.model_dump())
return benedict(FLAT_CONFIG)
def get_plugins_BINPROVIDERS():
return benedict({
binprovider.id: binprovider
for plugin_binproviders in pm.hook.get_BINPROVIDERS()
for binprovider in plugin_binproviders
})
def get_plugins_BINARIES():
return benedict({
binary.id: binary
for plugin_binaries in pm.hook.get_BINARIES()
for binary in plugin_binaries
})
def get_plugins_EXTRACTORS():
return benedict({
extractor.id: extractor
for plugin_extractors in pm.hook.get_EXTRACTORS()
for extractor in plugin_extractors
})
def get_plugins_REPLAYERS():
return benedict({
replayer.id: replayer
for plugin_replayers in pm.hook.get_REPLAYERS()
for replayer in plugin_replayers
})
def get_plugins_CHECKS():
return benedict({
check.id: check
for plugin_checks in pm.hook.get_CHECKS()
for check in plugin_checks
})
def get_plugins_ADMINDATAVIEWS():
return benedict({
admin_dataview.id: admin_dataview
for plugin_admin_dataviews in pm.hook.get_ADMINDATAVIEWS()
for admin_dataview in plugin_admin_dataviews
})
def get_plugins_QUEUES():
return benedict({
queue.id: queue
for plugin_queues in pm.hook.get_QUEUES()
for queue in plugin_queues
})
def get_plugins_SEARCHBACKENDS():
return benedict({
searchbackend.id: searchbackend
for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS()
for searchbackend in plugin_searchbackends
})

12
archivebox/abx/apps.py Normal file
View file

@ -0,0 +1,12 @@
from django.apps import AppConfig
class ABXConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'abx'
def ready(self):
import abx
from django.conf import settings
abx.pm.hook.ready(settings=settings)

View file

@ -0,0 +1,12 @@
from pathlib import Path
from pluggy import HookimplMarker
from pluggy import HookspecMarker
hookspec = HookspecMarker("abx")
hookimpl = HookimplMarker("abx")
@hookspec
def get_system_user() -> str:
return Path('~').expanduser().name

View file

@ -0,0 +1,30 @@
from .hookspec import hookspec
@hookspec
def get_CONFIGS():
return {}
@hookspec
def get_EXTRACTORS():
return {}
@hookspec
def get_REPLAYERS():
return {}
@hookspec
def get_CHECKS():
return {}
@hookspec
def get_ADMINDATAVIEWS():
return {}
@hookspec
def get_QUEUES():
return {}
@hookspec
def get_SEARCHBACKENDS():
return {}

View file

@ -0,0 +1,6 @@
from .hookspec import hookspec
@hookspec
def ready(settings):
"""Called when the Django app.ready() is triggered"""
pass

View file

@ -0,0 +1,90 @@
from .hookspec import hookspec
###########################################################################################
@hookspec
def get_INSTALLED_APPS():
"""Return a list of apps to add to INSTALLED_APPS"""
# e.g. ['your_plugin_type.plugin_name']
return []
@hookspec
def register_INSTALLED_APPS(INSTALLED_APPS):
"""Mutate INSTALLED_APPS in place to add your app in a specific position"""
# idx_of_contrib = INSTALLED_APPS.index('django.contrib.auth')
# INSTALLED_APPS.insert(idx_of_contrib + 1, 'your_plugin_type.plugin_name')
pass
@hookspec
def get_TEMPLATE_DIRS():
return [] # e.g. ['your_plugin_type/plugin_name/templates']
@hookspec
def register_TEMPLATE_DIRS(TEMPLATE_DIRS):
"""Install django settings"""
# e.g. TEMPLATE_DIRS.insert(0, 'your_plugin_type/plugin_name/templates')
pass
@hookspec
def get_STATICFILES_DIRS():
return [] # e.g. ['your_plugin_type/plugin_name/static']
@hookspec
def register_STATICFILES_DIRS(STATICFILES_DIRS):
"""Mutate STATICFILES_DIRS in place to add your static dirs in a specific position"""
# e.g. STATICFILES_DIRS.insert(0, 'your_plugin_type/plugin_name/static')
pass
@hookspec
def get_MIDDLEWARE():
return [] # e.g. ['your_plugin_type.plugin_name.middleware.YourMiddleware']
@hookspec
def register_MIDDLEWARE(MIDDLEWARE):
"""Mutate MIDDLEWARE in place to add your middleware in a specific position"""
# e.g. MIDDLEWARE.insert(0, 'your_plugin_type.plugin_name.middleware.YourMiddleware')
pass
@hookspec
def get_AUTHENTICATION_BACKENDS():
return [] # e.g. ['django_auth_ldap.backend.LDAPBackend']
@hookspec
def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
"""Mutate AUTHENTICATION_BACKENDS in place to add your auth backends in a specific position"""
# e.g. AUTHENTICATION_BACKENDS.insert(0, 'your_plugin_type.plugin_name.backend.YourBackend')
pass
@hookspec
def get_DJANGO_HUEY_QUEUES():
return [] # e.g. [{'name': 'your_plugin_type.plugin_name', 'HUEY': {...}}]
@hookspec
def register_DJANGO_HUEY(DJANGO_HUEY):
"""Mutate DJANGO_HUEY in place to add your huey queues in a specific position"""
# e.g. DJANGO_HUEY['queues']['some_queue_name']['some_setting'] = 'some_value'
pass
@hookspec
def get_ADMIN_DATA_VIEWS_URLS():
return []
@hookspec
def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
"""Mutate ADMIN_DATA_VIEWS in place to add your admin data views in a specific position"""
# e.g. ADMIN_DATA_VIEWS['URLS'].insert(0, 'your_plugin_type/plugin_name/admin_data_views.py')
pass
@hookspec
def register_settings(settings):
"""Mutate settings in place to add your settings / modify existing settings"""
# settings.SOME_KEY = 'some_value'
pass

View file

@ -0,0 +1,12 @@
from .hookspec import hookspec
@hookspec
def get_urlpatterns():
return [] # e.g. [path('your_plugin_type/plugin_name/url.py', your_view)]
@hookspec
def register_urlpatterns(urlpatterns):
"""Mutate urlpatterns in place to add your urlpatterns in a specific position"""
# e.g. urlpatterns.insert(0, path('your_plugin_type/plugin_name/url.py', your_view))
pass

View file

@ -0,0 +1,13 @@
from .hookspec import hookspec
###########################################################################################
@hookspec
def get_BINPROVIDERS():
return {}
@hookspec
def get_BINARIES():
return {}

View file

@ -27,7 +27,6 @@ import re
import sys
import json
import shutil
import archivebox
from hashlib import md5
from pathlib import Path
@ -36,15 +35,20 @@ from typing import Optional, Type, Tuple, Dict
from subprocess import run, PIPE, DEVNULL, STDOUT, TimeoutExpired
from configparser import ConfigParser
from pydantic_pkgr import SemVer
from rich.progress import Progress
from rich.console import Console
from benedict import benedict
import django
from django.db.backends.sqlite3.base import Database as sqlite3
import archivebox
from archivebox.constants import CONSTANTS
from archivebox.constants import *
from pydantic_pkgr import SemVer
from .config_stubs import (
AttrDict,
ConfigValue,
ConfigDict,
ConfigDefaultValue,
@ -52,85 +56,35 @@ from .config_stubs import (
)
from .misc.logging import (
DEFAULT_CLI_COLORS,
ANSI,
COLOR_DICT,
stderr,
hint, # noqa
)
# print('STARTING CONFIG LOADING')
# load fallback libraries from vendor dir
from .vendor import load_vendored_libs
load_vendored_libs()
# print("LOADED VENDOR LIBS")
from .plugins_sys.config.apps import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG
from .plugins_auth.ldap.apps import LDAP_CONFIG
from .plugins_extractor.favicon.apps import FAVICON_CONFIG
ANSI = SHELL_CONFIG.ANSI
LDAP = LDAP_CONFIG.LDAP_ENABLED
############################### Config Schema ##################################
CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'SHELL_CONFIG': {
'IS_TTY': {'type': bool, 'default': lambda _: sys.stdout.isatty()},
'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']},
'SHOW_PROGRESS': {'type': bool, 'default': lambda c: c['IS_TTY']}, # progress bars are buggy on mac, disable for now
'IN_DOCKER': {'type': bool, 'default': False},
'IN_QEMU': {'type': bool, 'default': False},
'PUID': {'type': int, 'default': os.getuid()},
'PGID': {'type': int, 'default': os.getgid()},
},
'SHELL_CONFIG': SHELL_CONFIG.as_legacy_config_schema(),
'GENERAL_CONFIG': {
'OUTPUT_DIR': {'type': str, 'default': None},
'CONFIG_FILE': {'type': str, 'default': None},
'ONLY_NEW': {'type': bool, 'default': True},
'TIMEOUT': {'type': int, 'default': 60},
'MEDIA_TIMEOUT': {'type': int, 'default': 3600},
'OUTPUT_PERMISSIONS': {'type': str, 'default': '644'},
'RESTRICT_FILE_NAMES': {'type': str, 'default': 'windows'}, # TODO: move this to be a default WGET_ARGS
'SERVER_CONFIG': SERVER_CONFIG.as_legacy_config_schema(),
'GENERAL_CONFIG': GENERAL_CONFIG.as_legacy_config_schema(),
'URL_DENYLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$', 'aliases': ('URL_BLACKLIST',)}, # to avoid downloading code assets as their own pages
'URL_ALLOWLIST': {'type': str, 'default': None, 'aliases': ('URL_WHITELIST',)},
'ARCHIVING_CONFIG': ARCHIVING_CONFIG.as_legacy_config_schema(),
'SEARCH_BACKEND_CONFIG': SEARCH_BACKEND_CONFIG.as_legacy_config_schema(),
'ENFORCE_ATOMIC_WRITES': {'type': bool, 'default': True},
'TAG_SEPARATOR_PATTERN': {'type': str, 'default': r'[,]'},
},
'STORAGE_CONFIG': STORAGE_CONFIG.as_legacy_config_schema(),
'LDAP_CONFIG': LDAP_CONFIG.as_legacy_config_schema(),
'FAVICON_CONFIG': FAVICON_CONFIG.as_legacy_config_schema(),
'SERVER_CONFIG': {
'ADMIN_USERNAME': {'type': str, 'default': None},
'ADMIN_PASSWORD': {'type': str, 'default': None},
'SECRET_KEY': {'type': str, 'default': None},
'BIND_ADDR': {'type': str, 'default': lambda c: ['127.0.0.1:8000', '0.0.0.0:8000'][c['IN_DOCKER']]},
'ALLOWED_HOSTS': {'type': str, 'default': '*'}, # e.g. archivebox.example.com,archivebox2.example.com
'CSRF_TRUSTED_ORIGINS': {'type': str, 'default': lambda c: 'http://localhost:8000,http://127.0.0.1:8000,http://0.0.0.0:8000,http://{}'.format(c['BIND_ADDR'])}, # e.g. https://archivebox.example.com,https://archivebox2.example.com:8080
'DEBUG': {'type': bool, 'default': False},
'PUBLIC_INDEX': {'type': bool, 'default': True},
'PUBLIC_SNAPSHOTS': {'type': bool, 'default': True},
'PUBLIC_ADD_VIEW': {'type': bool, 'default': False},
'FOOTER_INFO': {'type': str, 'default': 'Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.'},
'SNAPSHOTS_PER_PAGE': {'type': int, 'default': 40},
'CUSTOM_TEMPLATES_DIR': {'type': str, 'default': None},
'TIME_ZONE': {'type': str, 'default': 'UTC'},
'TIMEZONE': {'type': str, 'default': 'UTC'},
'REVERSE_PROXY_USER_HEADER': {'type': str, 'default': 'Remote-User'},
'REVERSE_PROXY_WHITELIST': {'type': str, 'default': ''},
'LOGOUT_REDIRECT_URL': {'type': str, 'default': '/'},
'PREVIEW_ORIGINALS': {'type': bool, 'default': True},
'LDAP': {'type': bool, 'default': False},
'LDAP_SERVER_URI': {'type': str, 'default': None},
'LDAP_BIND_DN': {'type': str, 'default': None},
'LDAP_BIND_PASSWORD': {'type': str, 'default': None},
'LDAP_USER_BASE': {'type': str, 'default': None},
'LDAP_USER_FILTER': {'type': str, 'default': None},
'LDAP_USERNAME_ATTR': {'type': str, 'default': None},
'LDAP_FIRSTNAME_ATTR': {'type': str, 'default': None},
'LDAP_LASTNAME_ATTR': {'type': str, 'default': None},
'LDAP_EMAIL_ATTR': {'type': str, 'default': None},
'LDAP_CREATE_SUPERUSER': {'type': bool, 'default': False},
},
'ARCHIVE_METHOD_TOGGLES': {
'SAVE_TITLE': {'type': bool, 'default': True, 'aliases': ('FETCH_TITLE',)},
@ -212,26 +166,6 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'SINGLEFILE_EXTRA_ARGS': {'type': list, 'default': None},
'MERCURY_ARGS': {'type': list, 'default': ['--format=text']},
'MERCURY_EXTRA_ARGS': {'type': list, 'default': None},
'FAVICON_PROVIDER': {'type': str, 'default': 'https://www.google.com/s2/favicons?domain={}'},
},
'SEARCH_BACKEND_CONFIG' : {
'USE_INDEXING_BACKEND': {'type': bool, 'default': True},
'USE_SEARCHING_BACKEND': {'type': bool, 'default': True},
'SEARCH_BACKEND_ENGINE': {'type': str, 'default': 'ripgrep'},
'SEARCH_BACKEND_HOST_NAME': {'type': str, 'default': 'localhost'},
'SEARCH_BACKEND_PORT': {'type': int, 'default': 1491},
'SEARCH_BACKEND_PASSWORD': {'type': str, 'default': 'SecretPassword'},
'SEARCH_PROCESS_HTML': {'type': bool, 'default': True},
# SONIC
'SONIC_COLLECTION': {'type': str, 'default': 'archivebox'},
'SONIC_BUCKET': {'type': str, 'default': 'snapshots'},
'SEARCH_BACKEND_TIMEOUT': {'type': int, 'default': 90},
# SQLite3 FTS5
'FTS_SEPARATE_DATABASE': {'type': bool, 'default': True},
'FTS_TOKENIZERS': {'type': str, 'default': 'porter unicode61 remove_diacritics 2'},
# Default from https://www.sqlite.org/limits.html#max_length
'FTS_SQLITE_MAX_LENGTH': {'type': int, 'default': int(1e9)},
},
'DEPENDENCY_CONFIG': {
@ -242,7 +176,6 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'USE_MERCURY': {'type': bool, 'default': True},
'USE_GIT': {'type': bool, 'default': True},
'USE_CHROME': {'type': bool, 'default': True},
'USE_NODE': {'type': bool, 'default': True},
'USE_YOUTUBEDL': {'type': bool, 'default': True},
'USE_RIPGREP': {'type': bool, 'default': True},
@ -282,60 +215,16 @@ def get_real_name(key: str) -> str:
################################ Constants #####################################
PACKAGE_DIR_NAME = 'archivebox'
TEMPLATES_DIR_NAME = 'templates'
ARCHIVE_DIR_NAME = 'archive'
SOURCES_DIR_NAME = 'sources'
LOGS_DIR_NAME = 'logs'
CACHE_DIR_NAME = 'cache'
LIB_DIR_NAME = 'lib'
PERSONAS_DIR_NAME = 'personas'
CRONTABS_DIR_NAME = 'crontabs'
SQL_INDEX_FILENAME = 'index.sqlite3'
JSON_INDEX_FILENAME = 'index.json'
HTML_INDEX_FILENAME = 'index.html'
ROBOTS_TXT_FILENAME = 'robots.txt'
FAVICON_FILENAME = 'favicon.ico'
CONFIG_FILENAME = 'ArchiveBox.conf'
ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE
CONSTANTS = archivebox.CONSTANTS._asdict()
############################## Version Config ##################################
############################## Derived Config ##################################
# These are derived/computed values calculated *after* all user-provided config values are ingested
# they appear in `archivebox config` output and are intended to be read-only for the user
DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
**{
key: {'default': lambda c: val}
for key, val in archivebox.CONSTANTS.items()
},
'PACKAGE_DIR': {'default': lambda c: archivebox.PACKAGE_DIR.resolve()},
'TEMPLATES_DIR': {'default': lambda c: c['PACKAGE_DIR'] / TEMPLATES_DIR_NAME},
'TEMPLATES_DIR': {'default': lambda c: c['PACKAGE_DIR'] / CONSTANTS.TEMPLATES_DIR_NAME},
'CUSTOM_TEMPLATES_DIR': {'default': lambda c: c['CUSTOM_TEMPLATES_DIR'] and Path(c['CUSTOM_TEMPLATES_DIR'])},
'URL_DENYLIST_PTN': {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)},
'URL_ALLOWLIST_PTN': {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)},
'DIR_OUTPUT_PERMISSIONS': {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')}, # exec is always needed to list directories
'URL_DENYLIST_PTN': {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS)},
'URL_ALLOWLIST_PTN': {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS)},
'USE_CURL': {'default': lambda c: c['USE_CURL'] and (c['SAVE_FAVICON'] or c['SAVE_TITLE'] or c['SAVE_ARCHIVE_DOT_ORG'])},
@ -356,7 +245,7 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
'WGET_EXTRA_ARGS': {'default': lambda c: c['WGET_EXTRA_ARGS'] or []},
'USE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['SAVE_MERCURY']},
'SAVE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['USE_NODE']},
'SAVE_MERCURY': {'default': lambda c: c['USE_MERCURY']},
'MERCURY_VERSION': {'default': lambda c: '1.0.0' if shutil.which(str(bin_path(c['MERCURY_BINARY']))) else None}, # mercury doesnt expose version info until this is merged https://github.com/postlight/parser/pull/750
'MERCURY_ARGS': {'default': lambda c: c['MERCURY_ARGS'] or []},
'MERCURY_EXTRA_ARGS': {'default': lambda c: c['MERCURY_EXTRA_ARGS'] or []},
@ -365,8 +254,6 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
'GIT_VERSION': {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None},
'SAVE_GIT': {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
'USE_NODE': {'default': lambda c: True},
'NODE_VERSION': {'default': lambda c: bin_version(c['NODE_BINARY']) if c['USE_NODE'] else None},
'DEPENDENCIES': {'default': lambda c: get_dependency_info(c)},
# 'CODE_LOCATIONS': {'default': lambda c: get_code_locations(c)},
@ -550,7 +437,7 @@ def load_config(defaults: ConfigDefaultDict,
config: Optional[ConfigDict]=None,
out_dir: Optional[str]=None,
env_vars: Optional[os._Environ]=None,
config_file_vars: Optional[Dict[str, str]]=None) -> ConfigDict:
config_file_vars: Optional[Dict[str, str]]=None) -> benedict:
env_vars = env_vars or os.environ
config_file_vars = config_file_vars or load_config_file(out_dir=out_dir)
@ -583,13 +470,7 @@ def load_config(defaults: ConfigDefaultDict,
# raise
# raise SystemExit(2)
return AttrDict(extended_config)
def parse_version_string(version: str) -> Tuple[int, int, int]:
"""parses a version tag string formatted like 'vx.x.x' into (major, minor, patch) ints"""
base = version.split('+')[0].split('v')[-1] # remove 'v' prefix and '+editable' suffix
return tuple(int(part) for part in base.split('.'))[:3]
return benedict(extended_config)
@ -778,13 +659,13 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
'enabled': config['USE_WGET'],
'is_valid': bool(config['WGET_VERSION']),
},
'NODE_BINARY': {
'path': bin_path(config['NODE_BINARY']),
'version': config['NODE_VERSION'],
'hash': bin_hash(config['NODE_BINARY']),
'enabled': config['USE_NODE'],
'is_valid': bool(config['NODE_VERSION']),
},
# 'NODE_BINARY': {
# 'path': bin_path(config['NODE_BINARY']),
# 'version': config['NODE_VERSION'],
# 'hash': bin_hash(config['NODE_BINARY']),
# 'enabled': config['USE_NODE'],
# 'is_valid': bool(config['NODE_VERSION']),
# },
'MERCURY_BINARY': {
'path': bin_path(config['MERCURY_BINARY']),
'version': config['MERCURY_VERSION'],
@ -879,15 +760,15 @@ globals().update(CONFIG)
# Set timezone to UTC and umask to OUTPUT_PERMISSIONS
assert TIMEZONE == 'UTC', 'The server timezone should always be set to UTC' # noqa: F821
assert TIMEZONE == 'UTC', f'The server timezone should always be set to UTC (got {TIMEZONE})' # noqa: F821
os.environ["TZ"] = TIMEZONE # noqa: F821
os.umask(0o777 - int(DIR_OUTPUT_PERMISSIONS, base=8)) # noqa: F821
os.umask(0o777 - int(STORAGE_CONFIG.DIR_OUTPUT_PERMISSIONS, base=8)) # noqa: F821
########################### Config Validity Checkers ###########################
if not CONFIG.USE_COLOR:
if not SHELL_CONFIG.USE_COLOR:
os.environ['NO_COLOR'] = '1'
if not CONFIG.SHOW_PROGRESS:
if not SHELL_CONFIG.SHOW_PROGRESS:
os.environ['TERM'] = 'dumb'
# recreate rich console obj based on new config values
@ -913,7 +794,7 @@ def setup_django_minimal():
django.setup()
def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
global INITIAL_STARTUP_PROGRESS
global INITIAL_STARTUP_PROGRESS_TASK
@ -930,7 +811,6 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
sys.path.append(str(archivebox.PACKAGE_DIR))
os.environ.setdefault('OUTPUT_DIR', str(archivebox.DATA_DIR))
os.environ.setdefault("ARCHIVEBOX_DATABASE_NAME", ":memory:")
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
bump_startup_progress_bar()

View file

@ -4,13 +4,13 @@ import os
import sys
import inspect
from typing import Dict
from pathlib import Path
from benedict import benedict
from django.utils.crypto import get_random_string
import abx
import archivebox
from archivebox.constants import CONSTANTS
from ..config import CONFIG
@ -28,39 +28,47 @@ ARCHIVE_DIR = archivebox.DATA_DIR / 'archive'
### ArchiveBox Plugin Settings
################################################################################
PLUGIN_HOOKSPECS = [
'abx.hookspec_django_settings',
'abx.hookspec_django_apps',
'abx.hookspec_django_urls',
'abx.hookspec_pydantic_pkgr',
'abx.hookspec_archivebox',
'plugantic.base_check',
]
abx.register_hookspecs(PLUGIN_HOOKSPECS)
def find_plugins_in_dir(plugins_dir: Path, prefix: str) -> Dict[str, Path]:
return {
f"{prefix}.{plugin_entrypoint.parent.name}": plugin_entrypoint.parent
for plugin_entrypoint in sorted(plugins_dir.glob("*/apps.py")) # key=get_plugin_order # Someday enforcing plugin import order may be required, but right now it's not needed
} # "plugins_pkg.pip": "/app/archivebox/plugins_pkg/pip"
PLUGIN_DIRS = {
'plugins_sys': PACKAGE_DIR / 'plugins_sys',
'plugins_pkg': PACKAGE_DIR / 'plugins_pkg',
'plugins_auth': PACKAGE_DIR / 'plugins_auth',
'plugins_search': PACKAGE_DIR / 'plugins_search',
'plugins_extractor': PACKAGE_DIR / 'plugins_extractor',
'user_plugins': DATA_DIR / 'user_plugins',
BUILTIN_PLUGIN_DIRS = {
'plugins_sys': archivebox.PACKAGE_DIR / 'plugins_sys',
'plugins_pkg': archivebox.PACKAGE_DIR / 'plugins_pkg',
'plugins_auth': archivebox.PACKAGE_DIR / 'plugins_auth',
'plugins_search': archivebox.PACKAGE_DIR / 'plugins_search',
'plugins_extractor': archivebox.PACKAGE_DIR / 'plugins_extractor',
}
USER_PLUGIN_DIRS = {
'user_plugins': archivebox.DATA_DIR / 'user_plugins',
}
INSTALLED_PLUGINS = {}
for plugin_prefix, plugin_dir in PLUGIN_DIRS.items():
INSTALLED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix))
BUILTIN_PLUGINS = abx.get_plugins_in_dirs(BUILTIN_PLUGIN_DIRS)
PIP_PLUGINS = abx.get_pip_installed_plugins(group='archivebox')
USER_PLUGINS = abx.get_plugins_in_dirs(USER_PLUGIN_DIRS)
ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS}
### Plugins Globals (filled by plugin_type.pluginname.apps.PluginName.register() after Django startup)
PLUGINS = benedict({})
HOOKS = benedict({})
# Created later by Plugin.register(settings) -> Hook.register(settings):
# CONFIGS = benedict({})
# BINPROVIDERS = benedict({})
# BINARIES = benedict({})
# EXTRACTORS = benedict({})
# REPLAYERS = benedict({})
# CHECKS = benedict({})
# ADMINDATAVIEWS = benedict({})
PLUGIN_MANAGER = abx.pm
PLUGINS = abx.load_plugins(ALL_PLUGINS)
HOOKS = abx.get_plugins_HOOKS(PLUGINS)
CONFIGS = abx.get_plugins_CONFIGS()
# FLAT_CONFIG = abx.get_plugins_FLAT_CONFIG(CONFIGS)
FLAT_CONFIG = CONFIG
BINPROVIDERS = abx.get_plugins_BINPROVIDERS()
BINARIES = abx.get_plugins_BINARIES()
EXTRACTORS = abx.get_plugins_EXTRACTORS()
REPLAYERS = abx.get_plugins_REPLAYERS()
CHECKS = abx.get_plugins_CHECKS()
ADMINDATAVIEWS = abx.get_plugins_ADMINDATAVIEWS()
QUEUES = abx.get_plugins_QUEUES()
SEARCHBACKENDS = abx.get_plugins_SEARCHBACKENDS()
################################################################################
### Django Core Settings
@ -96,15 +104,14 @@ INSTALLED_APPS = [
'django_object_actions', # provides easy Django Admin action buttons on change views https://github.com/crccheck/django-object-actions
# Our ArchiveBox-provided apps
# 'plugantic', # ArchiveBox plugin API definition + finding/registering/calling interface
'queues', # handles starting and managing background workers and processes
'abid_utils', # handles ABID ID creation, handling, and models
'plugantic', # ArchiveBox plugin API definition + finding/registering/calling interface
'core', # core django model with Snapshot, ArchiveResult, etc.
'api', # Django-Ninja-based Rest API interfaces, config, APIToken model, etc.
# ArchiveBox plugins
*INSTALLED_PLUGINS.keys(), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins,
# plugin.register(settings) is called at import of each plugin (in the order they are listed here), then plugin.ready() is called at AppConfig.ready() time
*abx.get_plugins_INSTALLLED_APPS(), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins,
# 3rd-party apps from PyPI that need to be loaded last
'admin_data_views', # handles rendering some convenient automatic read-only views of data in Django admin
@ -112,9 +119,13 @@ INSTALLED_APPS = [
'django_huey', # provides multi-queue support for django huey https://github.com/gaiacoop/django-huey
'bx_django_utils', # needed for huey_monitor https://github.com/boxine/bx_django_utils
'huey_monitor', # adds an admin UI for monitoring background huey tasks https://github.com/boxine/django-huey-monitor
# load plugins last so all other apps are already .ready() when we call plugins.ready()
'abx',
]
MIDDLEWARE = [
'core.middleware.TimezoneMiddleware',
'django.middleware.security.SecurityMiddleware',
@ -125,8 +136,10 @@ MIDDLEWARE = [
'core.middleware.ReverseProxyAuthMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'core.middleware.CacheControlMiddleware',
*abx.get_plugins_MIDDLEWARE(),
]
################################################################################
### Authentication Settings
################################################################################
@ -136,18 +149,20 @@ MIDDLEWARE = [
AUTHENTICATION_BACKENDS = [
'django.contrib.auth.backends.RemoteUserBackend',
'django.contrib.auth.backends.ModelBackend',
*abx.get_plugins_AUTHENTICATION_BACKENDS(),
]
from ..plugins_auth.ldap.settings import LDAP_CONFIG
if LDAP_CONFIG.LDAP_ENABLED:
AUTH_LDAP_BIND_DN = LDAP_CONFIG.LDAP_BIND_DN
AUTH_LDAP_SERVER_URI = LDAP_CONFIG.LDAP_SERVER_URI
AUTH_LDAP_BIND_PASSWORD = LDAP_CONFIG.LDAP_BIND_PASSWORD
AUTH_LDAP_USER_ATTR_MAP = LDAP_CONFIG.LDAP_USER_ATTR_MAP
AUTH_LDAP_USER_SEARCH = LDAP_CONFIG.AUTH_LDAP_USER_SEARCH
# from ..plugins_auth.ldap.settings import LDAP_CONFIG
# if LDAP_CONFIG.LDAP_ENABLED:
# AUTH_LDAP_BIND_DN = LDAP_CONFIG.LDAP_BIND_DN
# AUTH_LDAP_SERVER_URI = LDAP_CONFIG.LDAP_SERVER_URI
# AUTH_LDAP_BIND_PASSWORD = LDAP_CONFIG.LDAP_BIND_PASSWORD
# AUTH_LDAP_USER_ATTR_MAP = LDAP_CONFIG.LDAP_USER_ATTR_MAP
# AUTH_LDAP_USER_SEARCH = LDAP_CONFIG.AUTH_LDAP_USER_SEARCH
AUTHENTICATION_BACKENDS = LDAP_CONFIG.AUTHENTICATION_BACKENDS
# AUTHENTICATION_BACKENDS = LDAP_CONFIG.AUTHENTICATION_BACKENDS
################################################################################
### Staticfile and Template Settings
@ -156,22 +171,24 @@ if LDAP_CONFIG.LDAP_ENABLED:
STATIC_URL = '/static/'
TEMPLATES_DIR_NAME = 'templates'
STATICFILES_DIRS = [
*([str(CONFIG.CUSTOM_TEMPLATES_DIR / 'static')] if CONFIG.CUSTOM_TEMPLATES_DIR else []),
*[
str(plugin_dir / 'static')
for plugin_dir in PLUGIN_DIRS.values()
if (plugin_dir / 'static').is_dir()
],
*([str(CONSTANTS.CUSTOM_TEMPLATES_DIR / 'static')] if CONSTANTS.CUSTOM_TEMPLATES_DIR.is_dir() else []),
# *[
# str(plugin_dir / 'static')
# for plugin_dir in PLUGIN_DIRS.values()
# if (plugin_dir / 'static').is_dir()
# ],
*abx.get_plugins_STATICFILES_DIRS(),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'static'),
]
TEMPLATE_DIRS = [
*([str(CONFIG.CUSTOM_TEMPLATES_DIR)] if CONFIG.CUSTOM_TEMPLATES_DIR else []),
*[
str(plugin_dir / 'templates')
for plugin_dir in PLUGIN_DIRS.values()
if (plugin_dir / 'templates').is_dir()
],
*([str(CONSTANTS.CUSTOM_TEMPLATES_DIR)] if CONSTANTS.CUSTOM_TEMPLATES_DIR.is_dir() else []),
# *[
# str(plugin_dir / 'templates')
# for plugin_dir in PLUGIN_DIRS.values()
# if (plugin_dir / 'templates').is_dir()
# ],
*abx.get_plugins_TEMPLATE_DIRS(),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'core'),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'admin'),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME),
@ -198,8 +215,6 @@ TEMPLATES = [
### External Service Settings
################################################################################
from ..plugins_sys.config.constants import CONSTANTS
# CACHE_DB_FILENAME = 'cache.sqlite3'
# CACHE_DB_PATH = CONSTANTS.CACHE_DIR / CACHE_DB_FILENAME
# CACHE_DB_TABLE = 'django_cache'
@ -210,7 +225,7 @@ DATABASE_NAME = os.environ.get("ARCHIVEBOX_DATABASE_NAME", str(CONSTANTS.DATABAS
QUEUE_DATABASE_NAME = DATABASE_NAME.replace('index.sqlite3', 'queue.sqlite3')
SQLITE_CONNECTION_OPTIONS = {
"TIME_ZONE": CONFIG.TIMEZONE,
"TIME_ZONE": CONSTANTS.TIMEZONE,
"OPTIONS": {
# https://gcollazo.com/optimal-sqlite-settings-for-django/
# # https://litestream.io/tips/#busy-timeout
@ -280,6 +295,7 @@ DJANGO_HUEY = {
"queues": {
HUEY["name"]: HUEY.copy(),
# more registered here at plugin import-time by BaseQueue.register()
**abx.get_plugins_DJANGO_HUEY_QUEUES(),
},
}
@ -411,7 +427,7 @@ USE_I18N = True
USE_TZ = True
DATETIME_FORMAT = 'Y-m-d h:i:s A'
SHORT_DATETIME_FORMAT = 'Y-m-d h:i:s A'
TIME_ZONE = CONFIG.TIMEZONE # django convention is TIME_ZONE, archivebox config uses TIMEZONE, they are equivalent
TIME_ZONE = CONSTANTS.TIMEZONE # django convention is TIME_ZONE, archivebox config uses TIMEZONE, they are equivalent
from django.conf.locale.en import formats as en_formats # type: ignore
@ -504,6 +520,7 @@ ADMIN_DATA_VIEWS = {
"name": "log",
},
},
*abx.get_plugins_ADMIN_DATA_VIEWS_URLS(),
],
}
@ -595,3 +612,7 @@ DEBUG_LOGFIRE = DEBUG_LOGFIRE and (DATA_DIR / '.logfire').is_dir()
# INSTALLED_APPS += ['jet_django']
# JET_PROJECT = 'archivebox'
# JET_TOKEN = 'some-api-token-here'
abx.register_plugins_settings(globals())

View file

@ -533,6 +533,8 @@ def key_is_safe(key: str) -> bool:
@render_with_table_view
def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
CONFIG = settings.FLAT_CONFIG
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
rows = {

View file

@ -1,11 +1,12 @@
__package__ = "archivebox.plugantic"
import abx
from typing import List
from django.core.checks import Warning, Tags, register
from .base_hook import BaseHook, HookType
from ..config_stubs import AttrDict
class BaseCheck(BaseHook):
hook_type: HookType = "CHECK"
@ -28,21 +29,18 @@ class BaseCheck(BaseHook):
def register(self, settings, parent_plugin=None):
# self._plugin = parent_plugin # backref to parent is for debugging only, never rely on this!
self.register_with_django_check_system(settings) # (SIDE EFFECT)
abx.pm.hook.register_django_check(check=self, settings=settings)
# install hook into settings.CHECKS
settings.CHECKS = getattr(settings, "CHECKS", None) or AttrDict({})
settings.CHECKS[self.id] = self
# record installed hook in settings.HOOKS
super().register(settings, parent_plugin=parent_plugin)
def register_with_django_check_system(self, settings):
def run_check(app_configs, **kwargs) -> List[Warning]:
import logging
return self.check(settings, logging.getLogger("checks"))
@abx.hookspec
@abx.hookimpl
def register_django_check(check: BaseCheck, settings):
def run_check(app_configs, **kwargs) -> List[Warning]:
import logging
return check.check(settings, logging.getLogger("checks"))
run_check.__name__ = self.id
run_check.tags = [self.tag]
register(self.tag)(run_check)
run_check.__name__ = check.id
run_check.tags = [check.tag]
register(check.tag)(run_check)

View file

@ -227,26 +227,34 @@ class ArchiveBoxBaseConfig(BaseSettings):
print(f' {key}={original_value} -> {value}')
self.__init__()
return self
def as_legacy_config_schema(self):
# shim for backwards compatibility with old config schema style
model_values = self.model_dump()
return benedict({
key: {'type': field.annotation, 'default': model_values[key]}
for key, field in self.model_fields.items()
})
class BaseConfigSet(ArchiveBoxBaseConfig, BaseHook): # type: ignore[type-arg]
hook_type: ClassVar[HookType] = 'CONFIG'
section: ClassVar[ConfigSectionName] = 'GENERAL_CONFIG'
def register(self, settings, parent_plugin=None):
# self._plugin = parent_plugin # for debugging only, never rely on this!
# def register(self, settings, parent_plugin=None):
# # self._plugin = parent_plugin # for debugging only, never rely on this!
# settings.FLAT_CONFIG = benedict(getattr(settings, "FLAT_CONFIG", settings.CONFIG))
# # pass FLAT_CONFIG so far into our config model to load it
# loaded_config = self.__class__(**settings.FLAT_CONFIG)
# # then dump our parsed config back into FLAT_CONFIG for the next plugin to use
# settings.FLAT_CONFIG.merge(loaded_config.model_dump(include=set(self.model_fields.keys())))
# settings.FLAT_CONFIG = benedict(getattr(settings, "FLAT_CONFIG", {}))
# # pass FLAT_CONFIG so far into our config model to load it
# loaded_config = self
# # then dump our parsed config back into FLAT_CONFIG for the next plugin to use
# settings.FLAT_CONFIG.merge(loaded_config.model_dump(include=set(self.model_fields.keys())))
settings.CONFIGS = getattr(settings, "CONFIGS", None) or benedict({})
settings.CONFIGS[self.id] = self
self._original_id = id(self)
# settings.REGISTERED_CONFIGS = getattr(settings, "REGISTERED_CONFIGS", None) or benedict({})
# settings.REGISTERED_CONFIGS[self.id] = self
# self._original_id = id(self)
super().register(settings, parent_plugin=parent_plugin)
# super().register(settings, parent_plugin=parent_plugin)
# def ready(self, settings):
# # reload config from environment, in case it's been changed by any other plugins

View file

@ -96,32 +96,32 @@ class BaseHook(BaseModel):
# e.g. /admin/environment/config/LdapConfig/
return f"/admin/environment/{self.hook_type.lower()}/{self.id}/"
def register(self, settings, parent_plugin=None):
"""Load a record of an installed hook into global Django settings.HOOKS at runtime."""
self._plugin = parent_plugin # for debugging only, never rely on this!
# def register(self, settings, parent_plugin=None):
# """Load a record of an installed hook into global Django settings.HOOKS at runtime."""
# self._plugin = parent_plugin # for debugging only, never rely on this!
# assert json.dumps(self.model_json_schema(), indent=4), f"Hook {self.hook_module} has invalid JSON schema."
# # assert json.dumps(self.model_json_schema(), indent=4), f"Hook {self.hook_module} has invalid JSON schema."
# print(' -', self.hook_module, '.register()')
# # print(' -', self.hook_module, '.register()')
# record installed hook in settings.HOOKS
settings.HOOKS[self.id] = self
# # record installed hook in settings.HOOKS
# settings.REGISTERED_HOOKS[self.id] = self
if settings.HOOKS[self.id]._is_registered:
raise Exception(f"Tried to run {self.hook_module}.register() but its already been called!")
# if settings.REGISTERED_HOOKS[self.id]._is_registered:
# raise Exception(f"Tried to run {self.hook_module}.register() but its already been called!")
settings.HOOKS[self.id]._is_registered = True
# settings.REGISTERED_HOOKS[self.id]._is_registered = True
# print("REGISTERED HOOK:", self.hook_module)
# # print("REGISTERED HOOK:", self.hook_module)
def ready(self, settings):
"""Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
# def ready(self, settings):
# """Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
# print(' -', self.hook_module, '.ready()')
# # print(' -', self.hook_module, '.ready()')
assert self.id in settings.HOOKS, f"Tried to ready hook {self.hook_module} but it is not registered in settings.HOOKS."
# assert self.id in settings.REGISTERED_HOOKS, f"Tried to ready hook {self.hook_module} but it is not registered in settings.REGISTERED_HOOKS."
if settings.HOOKS[self.id]._is_ready:
raise Exception(f"Tried to run {self.hook_module}.ready() but its already been called!")
# if settings.REGISTERED_HOOKS[self.id]._is_ready:
# raise Exception(f"Tried to run {self.hook_module}.ready() but its already been called!")
settings.HOOKS[self.id]._is_ready = True
# settings.REGISTERED_HOOKS[self.id]._is_ready = True

View file

@ -1,5 +1,6 @@
__package__ = 'archivebox.plugantic'
import abx
import inspect
from pathlib import Path
@ -21,9 +22,6 @@ from benedict import benedict
from .base_hook import BaseHook, HookType
from ..config import bump_startup_progress_bar
class BasePlugin(BaseModel):
model_config = ConfigDict(
extra='forbid',
@ -107,9 +105,10 @@ class BasePlugin(BaseModel):
default_auto_field = 'django.db.models.AutoField'
def ready(self):
from django.conf import settings
plugin_self.ready(settings)
# handled by abx.hookimpl ready()
# def ready(self):
# from django.conf import settings
# plugin_self.ready(settings)
return PluginAppConfig
@ -125,64 +124,60 @@ class BasePlugin(BaseModel):
hooks[hook.hook_type][hook.id] = hook
return hooks
def register(self, settings=None):
def register(self, settings):
"""Loads this plugin's configs, binaries, extractors, and replayers into global Django settings at import time (before models are imported or any AppConfig.ready() are called)."""
if settings is None:
from django.conf import settings as django_settings
settings = django_settings
# print()
# print(self.plugin_module_full, '.register()')
from ..config import bump_startup_progress_bar
# assert json.dumps(self.model_json_schema(), indent=4), f'Plugin {self.plugin_module} has invalid JSON schema.'
# assert settings.PLUGINS[self.id] == self
# # assert self.id not in settings.PLUGINS, f'Tried to register plugin {self.plugin_module} but it conflicts with existing plugin of the same name ({self.app_label}).'
assert self.id not in settings.PLUGINS, f'Tried to register plugin {self.plugin_module} but it conflicts with existing plugin of the same name ({self.app_label}).'
# ### Mutate django.conf.settings... values in-place to include plugin-provided overrides
### Mutate django.conf.settings... values in-place to include plugin-provided overrides
settings.PLUGINS[self.id] = self
# if settings.PLUGINS[self.id]._is_registered:
# raise Exception(f"Tried to run {self.plugin_module}.register() but its already been called!")
if settings.PLUGINS[self.id]._is_registered:
raise Exception(f"Tried to run {self.plugin_module}.register() but its already been called!")
# for hook in self.hooks:
# hook.register(settings, parent_plugin=self)
for hook in self.hooks:
hook.register(settings, parent_plugin=self)
settings.PLUGINS[self.id]._is_registered = True
# print('√ REGISTERED PLUGIN:', self.plugin_module)
# settings.PLUGINS[self.id]._is_registered = True
# # print('√ REGISTERED PLUGIN:', self.plugin_module)
bump_startup_progress_bar()
def ready(self, settings=None):
"""Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
if settings is None:
from django.conf import settings as django_settings
settings = django_settings
from ..config import bump_startup_progress_bar
# print()
# print(self.plugin_module_full, '.ready()')
assert (
self.id in settings.PLUGINS and settings.PLUGINS[self.id]._is_registered
), f"Tried to run plugin.ready() for {self.plugin_module} but plugin is not yet registered in settings.PLUGINS."
# if settings is None:
# from django.conf import settings as django_settings
# settings = django_settings
if settings.PLUGINS[self.id]._is_ready:
raise Exception(f"Tried to run {self.plugin_module}.ready() but its already been called!")
# # print()
# # print(self.plugin_module_full, '.ready()')
for hook in self.hooks:
hook.ready(settings)
# assert (
# self.id in settings.PLUGINS and settings.PLUGINS[self.id]._is_registered
# ), f"Tried to run plugin.ready() for {self.plugin_module} but plugin is not yet registered in settings.PLUGINS."
# if settings.PLUGINS[self.id]._is_ready:
# raise Exception(f"Tried to run {self.plugin_module}.ready() but its already been called!")
# for hook in self.hooks:
# hook.ready(settings)
settings.PLUGINS[self.id]._is_ready = True
# settings.PLUGINS[self.id]._is_ready = True
bump_startup_progress_bar()
# @validate_call
# def install_binaries(self) -> Self:
# new_binaries = []
# for idx, binary in enumerate(self.binaries):
# new_binaries.append(binary.install() or binary)
# return self.model_copy(update={
# 'binaries': new_binaries,
# })
@validate_call
def install_binaries(self) -> Self:
new_binaries = []
for idx, binary in enumerate(self.binaries):
new_binaries.append(binary.install() or binary)
return self.model_copy(update={
'binaries': new_binaries,
})
@validate_call
def load_binaries(self, cache=True) -> Self:

View file

@ -51,5 +51,5 @@ class LdapAuthPlugin(BasePlugin):
PLUGIN = LdapAuthPlugin()
PLUGIN.register(settings)
# PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig

View file

@ -259,5 +259,5 @@ class ChromePlugin(BasePlugin):
PLUGIN = ChromePlugin()
PLUGIN.register(settings)
# PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig

View file

@ -99,5 +99,5 @@ class ReadabilityPlugin(BasePlugin):
PLUGIN = ReadabilityPlugin()
PLUGIN.register(settings)
# PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig

View file

@ -134,5 +134,5 @@ class SinglefilePlugin(BasePlugin):
PLUGIN = SinglefilePlugin()
PLUGIN.register(settings)
# PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig

View file

@ -97,5 +97,5 @@ class YtdlpPlugin(BasePlugin):
PLUGIN = YtdlpPlugin()
PLUGIN.register(settings)
# PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig

View file

@ -90,5 +90,5 @@ class NpmPlugin(BasePlugin):
PLUGIN = NpmPlugin()
PLUGIN.register(settings)
# PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig

View file

@ -8,10 +8,11 @@ from pathlib import Path
from typing import List, Dict, Optional, ClassVar
from pydantic import InstanceOf, Field, model_validator
import abx
import django
from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type]
from django.core.checks import Error, Tags
from django.conf import settings
from pydantic_pkgr import BinProvider, PipProvider, BinName, BinProviderName, ProviderLookupDict, SemVer
from plugantic.base_plugin import BasePlugin
@ -240,5 +241,11 @@ class PipPlugin(BasePlugin):
]
PLUGIN = PipPlugin()
PLUGIN.register(settings)
# PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig
@abx.hookimpl
def register_django_checks(settings):
USER_IS_NOT_ROOT_CHECK.register_with_django_check_system(settings)
PIP_ENVIRONMENT_CHECK.register_with_django_check_system(settings)

View file

@ -72,7 +72,7 @@ class PlaywrightBinProvider(BaseBinProvider):
if OPERATING_SYSTEM == "darwin" else
Path("~/.cache/ms-playwright").expanduser() # linux playwright cache dir
)
puppeteer_install_args: List[str] = ["install"] # --with-deps
puppeteer_install_args: List[str] = ["install"] # --with-deps
packages_handler: ProviderLookupDict = Field(default={
"chrome": lambda: ["chromium"],
@ -177,5 +177,5 @@ class PlaywrightPlugin(BasePlugin):
PLUGIN = PlaywrightPlugin()
PLUGIN.register(settings)
# PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig

View file

@ -165,5 +165,5 @@ class PuppeteerPlugin(BasePlugin):
PLUGIN = PuppeteerPlugin()
PLUGIN.register(settings)
# PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig

View file

@ -113,5 +113,5 @@ class RipgrepSearchPlugin(BasePlugin):
PLUGIN = RipgrepSearchPlugin()
PLUGIN.register(settings)
# PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig

View file

@ -133,5 +133,5 @@ class SonicSearchPlugin(BasePlugin):
PLUGIN = SonicSearchPlugin()
PLUGIN.register(settings)
# PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig

View file

@ -255,5 +255,5 @@ class SqliteftsSearchPlugin(BasePlugin):
PLUGIN = SqliteftsSearchPlugin()
PLUGIN.register(settings)
# PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig

View file

@ -245,5 +245,17 @@ class ConfigPlugin(BasePlugin):
PLUGIN = ConfigPlugin()
PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig
# register django apps
@archivebox.plugin.hookimpl
def get_INSTALLED_APPS():
return [DJANGO_APP.name]
# register configs
@archivebox.plugin.hookimpl
def register_CONFIG():
return PLUGIN.HOOKS_BY_TYPE['CONFIG'].values()