merge plugantic and abx, all praise be to praise our glorious pluggy gods

This commit is contained in:
Nick Sweeting 2024-09-27 00:41:21 -07:00
parent 4f42eb0313
commit 8d3f45b720
No known key found for this signature in database
59 changed files with 870 additions and 1343 deletions

View file

@ -1,19 +1,19 @@
import itertools
__package__ = 'abx'
import importlib
from pathlib import Path
from typing import Dict
from benedict import benedict
import pluggy
import archivebox
from . import hookspec as base_spec
from .hookspec import hookimpl, hookspec # noqa
from .manager import pm, PluginManager # noqa
pm = pluggy.PluginManager("abx")
pm.add_hookspecs(base_spec)
###### PLUGIN DISCOVERY AND LOADING ########################################################
def register_hookspecs(hookspecs):
for hookspec_import_path in hookspecs:
hookspec_module = importlib.import_module(hookspec_import_path)
@ -48,27 +48,6 @@ def get_plugins_in_dirs(plugin_dirs: Dict[str, Path]):
DETECTED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix))
return DETECTED_PLUGINS
def get_builtin_plugins():
PLUGIN_DIRS = {
'plugins_sys': archivebox.PACKAGE_DIR / 'plugins_sys',
'plugins_pkg': archivebox.PACKAGE_DIR / 'plugins_pkg',
'plugins_auth': archivebox.PACKAGE_DIR / 'plugins_auth',
'plugins_search': archivebox.PACKAGE_DIR / 'plugins_search',
'plugins_extractor': archivebox.PACKAGE_DIR / 'plugins_extractor',
}
DETECTED_PLUGINS = {}
for plugin_prefix, plugin_dir in PLUGIN_DIRS.items():
DETECTED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix))
return DETECTED_PLUGINS
def get_user_plugins():
return find_plugins_in_dir(archivebox.DATA_DIR / 'user_plugins', prefix='user_plugins')
# BUILTIN_PLUGINS = get_builtin_plugins()
# PIP_PLUGINS = get_pip_installed_plugins()
# USER_PLUGINS = get_user_plugins()
# ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS}
# Load all plugins from pip packages, archivebox built-ins, and user plugins
@ -76,7 +55,7 @@ def load_plugins(plugins_dict: Dict[str, Path]):
LOADED_PLUGINS = {}
for plugin_module, plugin_dir in plugins_dict.items():
# print(f'Loading plugin: {plugin_module} from {plugin_dir}')
plugin_module_loaded = importlib.import_module(plugin_module + '.apps')
plugin_module_loaded = importlib.import_module(plugin_module)
pm.register(plugin_module_loaded)
LOADED_PLUGINS[plugin_module] = plugin_module_loaded.PLUGIN
# print(f' √ Loaded plugin: {plugin_module}')
@ -100,172 +79,3 @@ def get_registered_plugins():
return plugins
def get_plugins_INSTALLLED_APPS():
return itertools.chain(*pm.hook.get_INSTALLED_APPS())
def register_plugins_INSTALLLED_APPS(INSTALLED_APPS):
pm.hook.register_INSTALLED_APPS(INSTALLED_APPS=INSTALLED_APPS)
def get_plugins_MIDDLEWARE():
return itertools.chain(*pm.hook.get_MIDDLEWARE())
def register_plugins_MIDDLEWARE(MIDDLEWARE):
pm.hook.register_MIDDLEWARE(MIDDLEWARE=MIDDLEWARE)
def get_plugins_AUTHENTICATION_BACKENDS():
return itertools.chain(*pm.hook.get_AUTHENTICATION_BACKENDS())
def register_plugins_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
pm.hook.register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS=AUTHENTICATION_BACKENDS)
def get_plugins_STATICFILES_DIRS():
return itertools.chain(*pm.hook.get_STATICFILES_DIRS())
def register_plugins_STATICFILES_DIRS(STATICFILES_DIRS):
pm.hook.register_STATICFILES_DIRS(STATICFILES_DIRS=STATICFILES_DIRS)
def get_plugins_TEMPLATE_DIRS():
return itertools.chain(*pm.hook.get_TEMPLATE_DIRS())
def register_plugins_TEMPLATE_DIRS(TEMPLATE_DIRS):
pm.hook.register_TEMPLATE_DIRS(TEMPLATE_DIRS=TEMPLATE_DIRS)
def get_plugins_DJANGO_HUEY_QUEUES():
HUEY_QUEUES = {}
for plugin_result in pm.hook.get_DJANGO_HUEY_QUEUES():
HUEY_QUEUES.update(plugin_result)
return HUEY_QUEUES
def register_plugins_DJANGO_HUEY(DJANGO_HUEY):
pm.hook.register_DJANGO_HUEY(DJANGO_HUEY=DJANGO_HUEY)
def get_plugins_ADMIN_DATA_VIEWS_URLS():
return itertools.chain(*pm.hook.get_ADMIN_DATA_VIEWS_URLS())
def register_plugins_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
pm.hook.register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS=ADMIN_DATA_VIEWS)
def register_plugins_settings(settings):
# convert settings dict to an benedict so we can set values using settings.attr = xyz notation
settings_as_obj = benedict(settings, keypath_separator=None)
# set default values for settings that are used by plugins
settings_as_obj.INSTALLED_APPS = settings_as_obj.get('INSTALLED_APPS', [])
settings_as_obj.MIDDLEWARE = settings_as_obj.get('MIDDLEWARE', [])
settings_as_obj.AUTHENTICATION_BACKENDS = settings_as_obj.get('AUTHENTICATION_BACKENDS', [])
settings_as_obj.STATICFILES_DIRS = settings_as_obj.get('STATICFILES_DIRS', [])
settings_as_obj.TEMPLATE_DIRS = settings_as_obj.get('TEMPLATE_DIRS', [])
settings_as_obj.DJANGO_HUEY = settings_as_obj.get('DJANGO_HUEY', {'queues': {}})
settings_as_obj.ADMIN_DATA_VIEWS = settings_as_obj.get('ADMIN_DATA_VIEWS', {'URLS': []})
# call all the hook functions to mutate the settings values in-place
register_plugins_INSTALLLED_APPS(settings_as_obj.INSTALLED_APPS)
register_plugins_MIDDLEWARE(settings_as_obj.MIDDLEWARE)
register_plugins_AUTHENTICATION_BACKENDS(settings_as_obj.AUTHENTICATION_BACKENDS)
register_plugins_STATICFILES_DIRS(settings_as_obj.STATICFILES_DIRS)
register_plugins_TEMPLATE_DIRS(settings_as_obj.TEMPLATE_DIRS)
register_plugins_DJANGO_HUEY(settings_as_obj.DJANGO_HUEY)
register_plugins_ADMIN_DATA_VIEWS(settings_as_obj.ADMIN_DATA_VIEWS)
# calls Plugin.settings(settings) on each registered plugin
pm.hook.register_settings(settings=settings_as_obj)
# then finally update the settings globals() object will all the new settings
settings.update(settings_as_obj)
def get_plugins_urlpatterns():
return list(itertools.chain(*pm.hook.urlpatterns()))
def register_plugins_urlpatterns(urlpatterns):
pm.hook.register_urlpatterns(urlpatterns=urlpatterns)
# PLUGANTIC HOOKS
def get_plugins_PLUGINS():
return benedict({
plugin.PLUGIN.id: plugin.PLUGIN
for plugin in pm.get_plugins()
})
def get_plugins_HOOKS(PLUGINS):
return benedict({
hook.id: hook
for plugin in PLUGINS.values()
for hook in plugin.hooks
})
def get_plugins_CONFIGS():
return benedict({
config.id: config
for plugin_configs in pm.hook.get_CONFIGS()
for config in plugin_configs
})
def get_plugins_FLAT_CONFIG(CONFIGS):
FLAT_CONFIG = {}
for config in CONFIGS.values():
FLAT_CONFIG.update(config.model_dump())
return benedict(FLAT_CONFIG)
def get_plugins_BINPROVIDERS():
return benedict({
binprovider.id: binprovider
for plugin_binproviders in pm.hook.get_BINPROVIDERS()
for binprovider in plugin_binproviders
})
def get_plugins_BINARIES():
return benedict({
binary.id: binary
for plugin_binaries in pm.hook.get_BINARIES()
for binary in plugin_binaries
})
def get_plugins_EXTRACTORS():
return benedict({
extractor.id: extractor
for plugin_extractors in pm.hook.get_EXTRACTORS()
for extractor in plugin_extractors
})
def get_plugins_REPLAYERS():
return benedict({
replayer.id: replayer
for plugin_replayers in pm.hook.get_REPLAYERS()
for replayer in plugin_replayers
})
def get_plugins_CHECKS():
return benedict({
check.id: check
for plugin_checks in pm.hook.get_CHECKS()
for check in plugin_checks
})
def get_plugins_ADMINDATAVIEWS():
return benedict({
admin_dataview.id: admin_dataview
for plugin_admin_dataviews in pm.hook.get_ADMINDATAVIEWS()
for admin_dataview in plugin_admin_dataviews
})
def get_plugins_QUEUES():
return benedict({
queue.id: queue
for plugin_queues in pm.hook.get_QUEUES()
for queue in plugin_queues
})
def get_plugins_SEARCHBACKENDS():
return benedict({
searchbackend.id: searchbackend
for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS()
for searchbackend in plugin_searchbackends
})

View file

@ -0,0 +1,39 @@
__package__ = 'abx.archivebox'
import importlib
from typing import Dict
from pathlib import Path
def load_archivebox_plugins(pm, plugins_dict: Dict[str, Path]):
"""Load archivebox plugins, very similar to abx.load_plugins but it looks for a pydantic PLUGIN model + hooks in apps.py"""
LOADED_PLUGINS = {}
for plugin_module, plugin_dir in plugins_dict.items():
# print(f'Loading plugin: {plugin_module} from {plugin_dir}')
archivebox_plugins_found = []
# 1. register the plugin module directly in case it contains any look hookimpls (e.g. in __init__.py)
plugin_module_loaded = importlib.import_module(plugin_module)
pm.register(plugin_module_loaded)
if hasattr(plugin_module_loaded, 'PLUGIN'):
archivebox_plugins_found.append(plugin_module_loaded.PLUGIN)
# 2. then try to import plugin_module.apps as well
if (plugin_dir / 'apps.py').exists():
plugin_apps = importlib.import_module(plugin_module + '.apps')
pm.register(plugin_apps) # register the whole .apps in case it contains loose hookimpls (not in a class)
if hasattr(plugin_apps, 'PLUGIN'):
archivebox_plugins_found.append(plugin_apps.PLUGIN)
# 3. then try to look for plugin_module.PLUGIN and register it + all its hooks
for ab_plugin in archivebox_plugins_found:
pm.register(ab_plugin)
for hook in ab_plugin.hooks:
hook.__signature__ = hook.__class__.__signature__ # fix to make pydantic model usable as Pluggy plugin
pm.register(hook)
LOADED_PLUGINS[plugin_module] = ab_plugin
# print(f' √ Loaded plugin: {LOADED_PLUGINS}')
return LOADED_PLUGINS

View file

@ -0,0 +1,38 @@
__package__ = 'abx.archivebox'
from typing import Dict
import abx
from .base_hook import BaseHook, HookType
class BaseAdminDataView(BaseHook):
hook_type: HookType = "ADMINDATAVIEW"
name: str = 'example_admin_data_view_list'
verbose_name: str = 'Data View'
route: str = '/__OVERRIDE_THIS__/'
view: str = 'plugins_example.example.views.example_view_list'
items: Dict[str, str] = {
'route': '<str:key>/',
"name": 'example_admin_data_view_item',
'view': 'plugins_example.example.views.example_view_item',
}
@abx.hookimpl
def get_ADMINDATAVIEWS(self):
return [self]
@abx.hookimpl
def get_ADMIN_DATA_VIEWS_URLS(self):
"""routes to be added to django.conf.settings.ADMIN_DATA_VIEWS['urls']"""
route = {
"route": self.route,
"view": self.view,
"name": self.verbose_name,
"items": self.items,
}
return [route]

View file

@ -1,9 +1,8 @@
__package__ = "archivebox.plugantic"
__package__ = "abx.archivebox"
from typing import Dict, List
from typing_extensions import Self
from benedict import benedict
from pydantic import Field, InstanceOf, validate_call
from pydantic_pkgr import (
Binary,
@ -15,10 +14,8 @@ from pydantic_pkgr import (
EnvProvider,
)
from django.conf import settings
import abx
import archivebox
from .base_hook import BaseHook, HookType
@ -37,19 +34,17 @@ class BaseBinProvider(BaseHook, BinProvider):
# # return cache.get_or_set(f'bin:version:{bin_name}:{abspath}', get_version_func)
# return get_version_func()
def register(self, settings, parent_plugin=None):
# self._plugin = parent_plugin # for debugging only, never rely on this!
settings.BINPROVIDERS = getattr(settings, "BINPROVIDERS", None) or benedict({})
settings.BINPROVIDERS[self.id] = self
super().register(settings, parent_plugin=parent_plugin)
# TODO: add install/load/load_or_install methods as abx.hookimpl methods
@property
def admin_url(self) -> str:
# e.g. /admin/environment/binproviders/NpmBinProvider/ TODO
return "/admin/environment/binaries/"
@abx.hookimpl
def get_BINPROVIDERS(self):
return [self]
class BaseBinary(BaseHook, Binary):
hook_type: HookType = "BINARY"
@ -57,14 +52,6 @@ class BaseBinary(BaseHook, Binary):
binproviders_supported: List[InstanceOf[BinProvider]] = Field(default_factory=list, alias="binproviders")
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = Field(default_factory=dict, alias="overrides")
def register(self, settings, parent_plugin=None):
# self._plugin = parent_plugin # for debugging only, never rely on this!
settings.BINARIES = getattr(settings, "BINARIES", None) or benedict({})
settings.BINARIES[self.id] = self
super().register(settings, parent_plugin=parent_plugin)
@staticmethod
def symlink_to_lib(binary, bin_dir=None) -> None:
bin_dir = bin_dir or archivebox.CONSTANTS.LIB_BIN_DIR
@ -101,6 +88,12 @@ class BaseBinary(BaseHook, Binary):
# e.g. /admin/environment/config/LdapConfig/
return f"/admin/environment/binaries/{self.name}/"
@abx.hookimpl
def get_BINARIES(self):
return [self]
apt = AptProvider()
brew = BrewProvider()
env = EnvProvider()

View file

@ -1,10 +1,11 @@
__package__ = "archivebox.plugantic"
__package__ = "abx.archivebox"
import abx
from typing import List
from django.core.checks import Warning, Tags, register
import abx
from .base_hook import BaseHook, HookType
@ -26,21 +27,18 @@ class BaseCheck(BaseHook):
# logger.debug('[√] Loaded settings.PLUGINS succesfully.')
return errors
def register(self, settings, parent_plugin=None):
# self._plugin = parent_plugin # backref to parent is for debugging only, never rely on this!
abx.pm.hook.register_django_check(check=self, settings=settings)
@abx.hookspec
@abx.hookimpl
def register_django_check(check: BaseCheck, settings):
def run_check(app_configs, **kwargs) -> List[Warning]:
import logging
return check.check(settings, logging.getLogger("checks"))
run_check.__name__ = check.id
run_check.tags = [check.tag]
register(check.tag)(run_check)
@abx.hookimpl
def get_CHECKS(self):
return [self]
@abx.hookimpl
def register_checks(self):
"""Tell django that this check exists so it can be run automatically by django."""
def run_check(**kwargs):
from django.conf import settings
import logging
return self.check(settings, logging.getLogger("checks"))
run_check.__name__ = self.id
run_check.tags = [self.tag]
register(self.tag)(run_check)

View file

@ -1,4 +1,4 @@
__package__ = 'archivebox.plugantic'
__package__ = 'abx.archivebox'
import os
import re
@ -14,8 +14,10 @@ from pydantic_settings.sources import TomlConfigSettingsSource
from pydantic_pkgr.base_types import func_takes_args_or_kwargs
import abx
from .base_hook import BaseHook, HookType
from . import ini_to_toml
from archivebox.misc import ini_to_toml
PACKAGE_DIR = Path(__file__).resolve().parent.parent
@ -236,6 +238,7 @@ class ArchiveBoxBaseConfig(BaseSettings):
for key, field in self.model_fields.items()
})
class BaseConfigSet(ArchiveBoxBaseConfig, BaseHook): # type: ignore[type-arg]
hook_type: ClassVar[HookType] = 'CONFIG'
@ -261,42 +264,20 @@ class BaseConfigSet(ArchiveBoxBaseConfig, BaseHook): # type: ignore[type-ar
# self.__init__()
# class WgetToggleConfig(ConfigSet):
# section: ConfigSectionName = 'ARCHIVE_METHOD_TOGGLES'
@abx.hookimpl
def get_CONFIGS(self):
try:
return {self.id: self}
except Exception as e:
# raise Exception(f'Error computing CONFIGS for {type(self)}: {e.__class__.__name__}: {e}')
print(f'Error computing CONFIGS for {type(self)}: {e.__class__.__name__}: {e}')
return {}
# SAVE_WGET: bool = True
# SAVE_WARC: bool = True
# class WgetDependencyConfig(ConfigSet):
# section: ConfigSectionName = 'DEPENDENCY_CONFIG'
# WGET_BINARY: str = Field(default='wget')
# WGET_ARGS: Optional[List[str]] = Field(default=None)
# WGET_EXTRA_ARGS: List[str] = []
# WGET_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
# class WgetOptionsConfig(ConfigSet):
# section: ConfigSectionName = 'ARCHIVE_METHOD_OPTIONS'
# # loaded from shared config
# WGET_AUTO_COMPRESSION: bool = Field(default=True)
# SAVE_WGET_REQUISITES: bool = Field(default=True)
# WGET_USER_AGENT: str = Field(default='', alias='USER_AGENT')
# WGET_TIMEOUT: int = Field(default=60, alias='TIMEOUT')
# WGET_CHECK_SSL_VALIDITY: bool = Field(default=True, alias='CHECK_SSL_VALIDITY')
# WGET_RESTRICT_FILE_NAMES: str = Field(default='windows', alias='RESTRICT_FILE_NAMES')
# WGET_COOKIES_FILE: Optional[Path] = Field(default=None, alias='COOKIES_FILE')
# CONFIG = {
# 'CHECK_SSL_VALIDITY': False,
# 'SAVE_WARC': False,
# 'TIMEOUT': 999,
# }
# WGET_CONFIG = [
# WgetToggleConfig(**CONFIG),
# WgetDependencyConfig(**CONFIG),
# WgetOptionsConfig(**CONFIG),
# ]
@abx.hookimpl
def get_FLAT_CONFIG(self):
try:
return self.model_dump()
except Exception as e:
# raise Exception(f'Error computing FLAT_CONFIG for {type(self)}: {e.__class__.__name__}: {e}')
print(f'Error computing FLAT_CONFIG for {type(self)}: {e.__class__.__name__}: {e}')
return {}

View file

@ -1,4 +1,4 @@
__package__ = 'archivebox.plugantic'
__package__ = 'abx.archivebox'
from typing import Optional, List, Literal, Annotated, Dict, Any
from typing_extensions import Self
@ -8,9 +8,9 @@ from pathlib import Path
from pydantic import model_validator, AfterValidator
from pydantic_pkgr import BinName
from .base_hook import BaseHook, HookType
from ..config_stubs import AttrDict
import abx
from .base_hook import BaseHook, HookType
def no_empty_args(args: List[str]) -> List[str]:
@ -45,16 +45,6 @@ class BaseExtractor(BaseHook):
return self
def register(self, settings, parent_plugin=None):
# self._plugin = parent_plugin # for debugging only, never rely on this!
settings.EXTRACTORS = getattr(settings, "EXTRACTORS", None) or AttrDict({})
settings.EXTRACTORS[self.id] = self
super().register(settings, parent_plugin=parent_plugin)
def get_output_path(self, snapshot) -> Path:
return Path(self.id.lower())
@ -64,7 +54,7 @@ class BaseExtractor(BaseHook):
return False
return True
# TODO: move this to a hookimpl
def extract(self, url: str, **kwargs) -> Dict[str, Any]:
output_dir = self.get_output_path(url, **kwargs)
@ -81,6 +71,7 @@ class BaseExtractor(BaseHook):
'returncode': proc.returncode,
}
# TODO: move this to a hookimpl
def exec(self, args: CmdArgsList, pwd: Optional[Path]=None, settings=None):
pwd = pwd or Path('.')
if settings is None:
@ -90,28 +81,6 @@ class BaseExtractor(BaseHook):
binary = settings.BINARIES[self.binary]
return binary.exec(args, pwd=pwd)
# class YtdlpExtractor(Extractor):
# name: ExtractorName = 'media'
# binary: Binary = YtdlpBinary()
# def get_output_path(self, snapshot) -> Path:
# return 'media/'
# class WgetExtractor(Extractor):
# name: ExtractorName = 'wget'
# binary: Binary = WgetBinary()
# def get_output_path(self, snapshot) -> Path:
# return get_wget_output_path(snapshot)
# class WarcExtractor(Extractor):
# name: ExtractorName = 'warc'
# binary: Binary = WgetBinary()
# def get_output_path(self, snapshot) -> Path:
# return get_wget_output_path(snapshot)
@abx.hookimpl
def get_EXTRACTORS(self):
return [self]

View file

@ -1,4 +1,4 @@
__package__ = 'archivebox.plugantic'
__package__ = 'abx.archivebox'
import inspect
from huey.api import TaskWrapper
@ -7,6 +7,7 @@ from pathlib import Path
from typing import Tuple, Literal, ClassVar, get_args
from pydantic import BaseModel, ConfigDict
import abx
HookType = Literal['CONFIG', 'BINPROVIDER', 'BINARY', 'EXTRACTOR', 'REPLAYER', 'CHECK', 'ADMINDATAVIEW', 'QUEUE', 'SEARCHBACKEND']
hook_type_names: Tuple[HookType] = get_args(HookType)
@ -29,8 +30,8 @@ class BaseHook(BaseModel):
plugins_pkg.npm.NpmPlugin().AppConfig.ready() # called by django
plugins_pkg.npm.NpmPlugin().register(settings) ->
plugins_pkg.npm.NpmConfigSet().register(settings)
plugantic.base_configset.BaseConfigSet().register(settings)
plugantic.base_hook.BaseHook().register(settings, parent_plugin=plugins_pkg.npm.NpmPlugin())
abx.archivebox.base_configset.BaseConfigSet().register(settings)
abx.archivebox.base_hook.BaseHook().register(settings, parent_plugin=plugins_pkg.npm.NpmPlugin())
...
...
@ -96,32 +97,20 @@ class BaseHook(BaseModel):
# e.g. /admin/environment/config/LdapConfig/
return f"/admin/environment/{self.hook_type.lower()}/{self.id}/"
# def register(self, settings, parent_plugin=None):
# """Load a record of an installed hook into global Django settings.HOOKS at runtime."""
# self._plugin = parent_plugin # for debugging only, never rely on this!
# # assert json.dumps(self.model_json_schema(), indent=4), f"Hook {self.hook_module} has invalid JSON schema."
@abx.hookimpl
def register(self, settings):
"""Called when django.apps.AppConfig.ready() is called"""
print("REGISTERED HOOK:", self.hook_module)
self._is_registered = True
# # print(' -', self.hook_module, '.register()')
# # record installed hook in settings.HOOKS
# settings.REGISTERED_HOOKS[self.id] = self
# if settings.REGISTERED_HOOKS[self.id]._is_registered:
# raise Exception(f"Tried to run {self.hook_module}.register() but its already been called!")
# settings.REGISTERED_HOOKS[self.id]._is_registered = True
# # print("REGISTERED HOOK:", self.hook_module)
# def ready(self, settings):
# """Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
# # print(' -', self.hook_module, '.ready()')
# assert self.id in settings.REGISTERED_HOOKS, f"Tried to ready hook {self.hook_module} but it is not registered in settings.REGISTERED_HOOKS."
# if settings.REGISTERED_HOOKS[self.id]._is_ready:
# raise Exception(f"Tried to run {self.hook_module}.ready() but its already been called!")
# settings.REGISTERED_HOOKS[self.id]._is_ready = True
@abx.hookimpl
def ready(self):
"""Called when django.apps.AppConfig.ready() is called"""
assert self._is_registered, f"Tried to run {self.hook_module}.ready() but it was never registered!"
# print("READY HOOK:", self.hook_module)
self._is_ready = True

View file

@ -1,4 +1,4 @@
__package__ = 'archivebox.plugantic'
__package__ = 'abx.archivebox'
import abx
import inspect
@ -16,7 +16,6 @@ from pydantic import (
model_validator,
InstanceOf,
computed_field,
validate_call,
)
from benedict import benedict
@ -124,91 +123,32 @@ class BasePlugin(BaseModel):
hooks[hook.hook_type][hook.id] = hook
return hooks
@abx.hookimpl
def register(self, settings):
"""Loads this plugin's configs, binaries, extractors, and replayers into global Django settings at import time (before models are imported or any AppConfig.ready() are called)."""
from archivebox.config import bump_startup_progress_bar
from ..config import bump_startup_progress_bar
# assert settings.PLUGINS[self.id] == self
# # assert self.id not in settings.PLUGINS, f'Tried to register plugin {self.plugin_module} but it conflicts with existing plugin of the same name ({self.app_label}).'
# ### Mutate django.conf.settings... values in-place to include plugin-provided overrides
# if settings.PLUGINS[self.id]._is_registered:
# raise Exception(f"Tried to run {self.plugin_module}.register() but its already been called!")
# for hook in self.hooks:
# hook.register(settings, parent_plugin=self)
# settings.PLUGINS[self.id]._is_registered = True
# # print('√ REGISTERED PLUGIN:', self.plugin_module)
self._is_registered = True
bump_startup_progress_bar()
print('◣----------------- REGISTERED PLUGIN:', self.plugin_module, '-----------------◢')
print()
@abx.hookimpl
def ready(self, settings=None):
"""Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
from ..config import bump_startup_progress_bar
from archivebox.config import bump_startup_progress_bar
assert self._is_registered, f"Tried to run {self.plugin_module}.ready() but it was never registered!"
self._is_ready = True
# if settings is None:
# from django.conf import settings as django_settings
# settings = django_settings
# # print()
# # print(self.plugin_module_full, '.ready()')
# assert (
# self.id in settings.PLUGINS and settings.PLUGINS[self.id]._is_registered
# ), f"Tried to run plugin.ready() for {self.plugin_module} but plugin is not yet registered in settings.PLUGINS."
# if settings.PLUGINS[self.id]._is_ready:
# raise Exception(f"Tried to run {self.plugin_module}.ready() but its already been called!")
# for hook in self.hooks:
# hook.ready(settings)
# settings.PLUGINS[self.id]._is_ready = True
bump_startup_progress_bar()
@validate_call
def install_binaries(self) -> Self:
new_binaries = []
for idx, binary in enumerate(self.binaries):
new_binaries.append(binary.install() or binary)
return self.model_copy(update={
'binaries': new_binaries,
})
@validate_call
def load_binaries(self, cache=True) -> Self:
new_binaries = []
for idx, binary in enumerate(self.HOOKS_BY_TYPE['BINARY'].values()):
new_binaries.append(binary.load(cache=cache) or binary)
return self.model_copy(update={
'binaries': new_binaries,
})
@abx.hookimpl
def get_INSTALLED_APPS(self):
return [self.plugin_module]
# @validate_call
# def load_or_install_binaries(self, cache=True) -> Self:
# new_binaries = []
# for idx, binary in enumerate(self.binaries):
# new_binaries.append(binary.load_or_install(cache=cache) or binary)
# return self.model_copy(update={
# 'binaries': new_binaries,
# })
# class YtdlpPlugin(BasePlugin):
# name: str = 'ytdlp'
# configs: List[SerializeAsAny[BaseConfigSet]] = []
# binaries: List[SerializeAsAny[BaseBinary]] = [YtdlpBinary()]
# extractors: List[SerializeAsAny[BaseExtractor]] = [YtdlpExtractor()]
# replayers: List[SerializeAsAny[BaseReplayer]] = [MEDIA_REPLAYER]
# class WgetPlugin(BasePlugin):
# name: str = 'wget'
# configs: List[SerializeAsAny[BaseConfigSet]] = [*WGET_CONFIG]
# binaries: List[SerializeAsAny[BaseBinary]] = [WgetBinary()]
# extractors: List[SerializeAsAny[BaseExtractor]] = [WgetExtractor(), WarcExtractor()]

View file

@ -1,16 +1,18 @@
__package__ = 'archivebox.plugantic'
__package__ = 'abx.archivebox'
import importlib
from typing import Dict, List, TYPE_CHECKING
from pydantic import Field, InstanceOf
from benedict import benedict
if TYPE_CHECKING:
from huey.api import TaskWrapper
import abx
from .base_hook import BaseHook, HookType
from .base_binary import BaseBinary
from ..config_stubs import AttrDict
@ -33,13 +35,13 @@ class BaseQueue(BaseHook):
if hasattr(task, "task_class") and task.huey.name == self.name:
all_tasks[task_name] = task
return AttrDict(all_tasks)
return benedict(all_tasks)
def get_huey_config(self, settings) -> dict:
def get_django_huey_config(self, QUEUE_DATABASE_NAME) -> dict:
"""Get the config dict to insert into django.conf.settings.DJANGO_HUEY['queues']."""
return {
"huey_class": "huey.SqliteHuey",
"filename": settings.QUEUE_DATABASE_NAME,
"filename": QUEUE_DATABASE_NAME,
"name": self.name,
"results": True,
"store_none": True,
@ -58,7 +60,7 @@ class BaseQueue(BaseHook):
},
}
def get_supervisor_config(self, settings) -> dict:
def get_supervisord_config(self, settings) -> dict:
"""Ge the config dict used to tell sueprvisord to start a huey consumer for this queue."""
return {
"name": f"worker_{self.name}",
@ -78,7 +80,7 @@ class BaseQueue(BaseHook):
print(f"Error starting worker for queue {self.name}: {e}")
return None
print()
worker = start_worker(supervisor, self.get_supervisor_config(settings), lazy=lazy)
worker = start_worker(supervisor, self.get_supervisord_config(settings), lazy=lazy)
# Update settings.WORKERS to include this worker
settings.WORKERS = getattr(settings, "WORKERS", None) or AttrDict({})
@ -86,65 +88,19 @@ class BaseQueue(BaseHook):
return worker
def register(self, settings, parent_plugin=None):
# self._plugin = parent_plugin # for debugging only, never rely on this!
# Side effect: register queue with django-huey multiqueue dict
settings.DJANGO_HUEY = getattr(settings, "DJANGO_HUEY", None) or AttrDict({"queues": {}})
settings.DJANGO_HUEY["queues"][self.name] = self.get_huey_config(settings)
# Side effect: register some extra tasks with huey
# on_startup(queue=self.name)(self.on_startup_task)
# db_periodic_task(crontab(minute='*/5'))(self.on_periodic_task)
# Install queue into settings.QUEUES
settings.QUEUES = getattr(settings, "QUEUES", None) or AttrDict({})
settings.QUEUES[self.id] = self
# Record installed hook into settings.HOOKS
super().register(settings, parent_plugin=parent_plugin)
@abx.hookimpl
def get_QUEUES(self):
return [self]
@abx.hookimpl
def get_DJANGO_HUEY_QUEUES(self, QUEUE_DATABASE_NAME):
"""queue configs to be added to django.conf.settings.DJANGO_HUEY['queues']"""
return {
self.name: self.get_django_huey_config(QUEUE_DATABASE_NAME)
}
# @abx.hookimpl
# def ready(self, settings):
# self.start_supervisord_worker(settings, lazy=True)
# super().ready(settings)
# class WgetToggleConfig(ConfigSet):
# section: ConfigSectionName = 'ARCHIVE_METHOD_TOGGLES'
# SAVE_WGET: bool = True
# SAVE_WARC: bool = True
# class WgetDependencyConfig(ConfigSet):
# section: ConfigSectionName = 'DEPENDENCY_CONFIG'
# WGET_BINARY: str = Field(default='wget')
# WGET_ARGS: Optional[List[str]] = Field(default=None)
# WGET_EXTRA_ARGS: List[str] = []
# WGET_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
# class WgetOptionsConfig(ConfigSet):
# section: ConfigSectionName = 'ARCHIVE_METHOD_OPTIONS'
# # loaded from shared config
# WGET_AUTO_COMPRESSION: bool = Field(default=True)
# SAVE_WGET_REQUISITES: bool = Field(default=True)
# WGET_USER_AGENT: str = Field(default='', alias='USER_AGENT')
# WGET_TIMEOUT: int = Field(default=60, alias='TIMEOUT')
# WGET_CHECK_SSL_VALIDITY: bool = Field(default=True, alias='CHECK_SSL_VALIDITY')
# WGET_RESTRICT_FILE_NAMES: str = Field(default='windows', alias='RESTRICT_FILE_NAMES')
# WGET_COOKIES_FILE: Optional[Path] = Field(default=None, alias='COOKIES_FILE')
# CONFIG = {
# 'CHECK_SSL_VALIDITY': False,
# 'SAVE_WARC': False,
# 'TIMEOUT': 999,
# }
# WGET_CONFIG = [
# WgetToggleConfig(**CONFIG),
# WgetDependencyConfig(**CONFIG),
# WgetOptionsConfig(**CONFIG),
# ]

View file

@ -1,8 +1,8 @@
__package__ = 'archivebox.plugantic'
__package__ = 'abx.archivebox'
import abx
from .base_hook import BaseHook, HookType
from ..config_stubs import AttrDict
class BaseReplayer(BaseHook):
@ -22,16 +22,8 @@ class BaseReplayer(BaseHook):
# icon_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon'
# thumbnail_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon'
def register(self, settings, parent_plugin=None):
# self._plugin = parent_plugin # for debugging only, never rely on this!
@abx.hookimpl
def get_REPLAYERS(self):
return [self]
settings.REPLAYERS = getattr(settings, 'REPLAYERS', None) or AttrDict({})
settings.REPLAYERS[self.id] = self
super().register(settings, parent_plugin=parent_plugin)
# class MediaReplayer(BaseReplayer):
# name: str = 'MediaReplayer'
# MEDIA_REPLAYER = MediaReplayer()
# TODO: add hookimpl methods for get_row_template, get_embed_template, get_fullpage_template, etc...

View file

@ -0,0 +1,33 @@
__package__ = 'abx.archivebox'
from typing import Iterable, List
from pydantic import Field
import abx
from .base_hook import BaseHook, HookType
class BaseSearchBackend(BaseHook):
hook_type: HookType = 'SEARCHBACKEND'
name: str = Field() # e.g. 'singlefile'
# TODO: move these to a hookimpl
@staticmethod
def index(snapshot_id: str, texts: List[str]):
return
@staticmethod
def flush(snapshot_ids: Iterable[str]):
return
@staticmethod
def search(text: str) -> List[str]:
raise NotImplementedError("search method must be implemented by subclass")
@abx.hookimpl
def get_SEARCHBACKENDS(self):
return [self]

View file

@ -1,4 +1,6 @@
from .hookspec import hookspec
__package__ = 'abx.archivebox'
from .. import hookspec
@hookspec

View file

@ -0,0 +1,98 @@
__package__ = 'abx.archivebox'
from benedict import benedict
from .. import pm
# API exposed to ArchiveBox code
def get_PLUGINS():
return benedict({
plugin.PLUGIN.id: plugin.PLUGIN
for plugin in pm.get_plugins()
})
def get_HOOKS(PLUGINS):
return benedict({
hook.id: hook
for plugin in PLUGINS.values()
for hook in plugin.hooks
})
def get_CONFIGS():
return benedict({
config_id: config
for plugin_configs in pm.hook.get_CONFIGS()
for config_id, config in plugin_configs.items()
})
def get_FLAT_CONFIG():
return benedict({
key: value
for plugin_config_dict in pm.hook.get_FLAT_CONFIG()
for key, value in plugin_config_dict.items()
})
def get_BINPROVIDERS():
return benedict({
binprovider.id: binprovider
for plugin_binproviders in pm.hook.get_BINPROVIDERS()
for binprovider in plugin_binproviders
})
def get_BINARIES():
return benedict({
binary.id: binary
for plugin_binaries in pm.hook.get_BINARIES()
for binary in plugin_binaries
})
def get_EXTRACTORS():
return benedict({
extractor.id: extractor
for plugin_extractors in pm.hook.get_EXTRACTORS()
for extractor in plugin_extractors
})
def get_REPLAYERS():
return benedict({
replayer.id: replayer
for plugin_replayers in pm.hook.get_REPLAYERS()
for replayer in plugin_replayers
})
def get_CHECKS():
return benedict({
check.id: check
for plugin_checks in pm.hook.get_CHECKS()
for check in plugin_checks
})
def get_ADMINDATAVIEWS():
return benedict({
admin_dataview.id: admin_dataview
for plugin_admin_dataviews in pm.hook.get_ADMINDATAVIEWS()
for admin_dataview in plugin_admin_dataviews
})
def get_QUEUES():
return benedict({
queue.id: queue
for plugin_queues in pm.hook.get_QUEUES()
for queue in plugin_queues
})
def get_SEARCHBACKENDS():
return benedict({
searchbackend.id: searchbackend
for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS()
for searchbackend in plugin_searchbackends
})
###########################
def register_all_hooks(settings):
pm.hook.register(settings=settings)

View file

@ -0,0 +1 @@
__package__ = 'abx.django'

View file

@ -1,8 +1,9 @@
__package__ = 'abx.django'
from django.apps import AppConfig
class ABXConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'abx'
def ready(self):

View file

@ -0,0 +1,120 @@
__package__ = 'abx.django'
from ..hookspec import hookspec
###########################################################################################
@hookspec
def get_INSTALLED_APPS():
"""Return a list of apps to add to INSTALLED_APPS"""
# e.g. ['your_plugin_type.plugin_name']
return []
# @hookspec
# def register_INSTALLED_APPS(INSTALLED_APPS):
# """Mutate INSTALLED_APPS in place to add your app in a specific position"""
# # idx_of_contrib = INSTALLED_APPS.index('django.contrib.auth')
# # INSTALLED_APPS.insert(idx_of_contrib + 1, 'your_plugin_type.plugin_name')
# pass
@hookspec
def get_TEMPLATE_DIRS():
return [] # e.g. ['your_plugin_type/plugin_name/templates']
# @hookspec
# def register_TEMPLATE_DIRS(TEMPLATE_DIRS):
# """Install django settings"""
# # e.g. TEMPLATE_DIRS.insert(0, 'your_plugin_type/plugin_name/templates')
# pass
@hookspec
def get_STATICFILES_DIRS():
return [] # e.g. ['your_plugin_type/plugin_name/static']
# @hookspec
# def register_STATICFILES_DIRS(STATICFILES_DIRS):
# """Mutate STATICFILES_DIRS in place to add your static dirs in a specific position"""
# # e.g. STATICFILES_DIRS.insert(0, 'your_plugin_type/plugin_name/static')
# pass
@hookspec
def get_MIDDLEWARE():
return [] # e.g. ['your_plugin_type.plugin_name.middleware.YourMiddleware']
# @hookspec
# def register_MIDDLEWARE(MIDDLEWARE):
# """Mutate MIDDLEWARE in place to add your middleware in a specific position"""
# # e.g. MIDDLEWARE.insert(0, 'your_plugin_type.plugin_name.middleware.YourMiddleware')
# pass
@hookspec
def get_AUTHENTICATION_BACKENDS():
return [] # e.g. ['django_auth_ldap.backend.LDAPBackend']
# @hookspec
# def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
# """Mutate AUTHENTICATION_BACKENDS in place to add your auth backends in a specific position"""
# # e.g. AUTHENTICATION_BACKENDS.insert(0, 'your_plugin_type.plugin_name.backend.YourBackend')
# pass
@hookspec
def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME):
return [] # e.g. [{'name': 'your_plugin_type.plugin_name', 'HUEY': {...}}]
# @hookspec
# def register_DJANGO_HUEY(DJANGO_HUEY):
# """Mutate DJANGO_HUEY in place to add your huey queues in a specific position"""
# # e.g. DJANGO_HUEY['queues']['some_queue_name']['some_setting'] = 'some_value'
# pass
@hookspec
def get_ADMIN_DATA_VIEWS_URLS():
return []
# @hookspec
# def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
# """Mutate ADMIN_DATA_VIEWS in place to add your admin data views in a specific position"""
# # e.g. ADMIN_DATA_VIEWS['URLS'].insert(0, 'your_plugin_type/plugin_name/admin_data_views.py')
# pass
# @hookspec
# def register_settings(settings):
# """Mutate settings in place to add your settings / modify existing settings"""
# # settings.SOME_KEY = 'some_value'
# pass
###########################################################################################
@hookspec
def get_urlpatterns():
return [] # e.g. [path('your_plugin_type/plugin_name/url.py', your_view)]
# @hookspec
# def register_urlpatterns(urlpatterns):
# """Mutate urlpatterns in place to add your urlpatterns in a specific position"""
# # e.g. urlpatterns.insert(0, path('your_plugin_type/plugin_name/url.py', your_view))
# pass
###########################################################################################
@hookspec
def register_checks():
"""Register django checks with django system checks system"""
pass
###########################################################################################
@hookspec
def ready():
"""Called when Django apps app.ready() are triggered"""
pass

View file

@ -0,0 +1,98 @@
__package__ = 'abx.django'
import itertools
from benedict import benedict
from .. import pm
def get_INSTALLED_APPS():
return itertools.chain(*reversed(pm.hook.get_INSTALLED_APPS()))
# def register_INSTALLLED_APPS(INSTALLED_APPS):
# pm.hook.register_INSTALLED_APPS(INSTALLED_APPS=INSTALLED_APPS)
def get_MIDDLEWARES():
return itertools.chain(*reversed(pm.hook.get_MIDDLEWARE()))
# def register_MIDDLEWARES(MIDDLEWARE):
# pm.hook.register_MIDDLEWARE(MIDDLEWARE=MIDDLEWARE)
def get_AUTHENTICATION_BACKENDS():
return itertools.chain(*reversed(pm.hook.get_AUTHENTICATION_BACKENDS()))
# def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
# pm.hook.register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS=AUTHENTICATION_BACKENDS)
def get_STATICFILES_DIRS():
return itertools.chain(*reversed(pm.hook.get_STATICFILES_DIRS()))
# def register_STATICFILES_DIRS(STATICFILES_DIRS):
# pm.hook.register_STATICFILES_DIRS(STATICFILES_DIRS=STATICFILES_DIRS)
def get_TEMPLATE_DIRS():
return itertools.chain(*reversed(pm.hook.get_TEMPLATE_DIRS()))
# def register_TEMPLATE_DIRS(TEMPLATE_DIRS):
# pm.hook.register_TEMPLATE_DIRS(TEMPLATE_DIRS=TEMPLATE_DIRS)
def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME='queue.sqlite3'):
HUEY_QUEUES = {}
for plugin_result in pm.hook.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=QUEUE_DATABASE_NAME):
HUEY_QUEUES.update(plugin_result)
return HUEY_QUEUES
# def register_DJANGO_HUEY(DJANGO_HUEY):
# pm.hook.register_DJANGO_HUEY(DJANGO_HUEY=DJANGO_HUEY)
def get_ADMIN_DATA_VIEWS_URLS():
return itertools.chain(*reversed(pm.hook.get_ADMIN_DATA_VIEWS_URLS()))
# def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
# pm.hook.register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS=ADMIN_DATA_VIEWS)
# def register_settings(settings):
# # convert settings dict to an benedict so we can set values using settings.attr = xyz notation
# settings_as_obj = benedict(settings, keypath_separator=None)
# # set default values for settings that are used by plugins
# # settings_as_obj.INSTALLED_APPS = settings_as_obj.get('INSTALLED_APPS', [])
# # settings_as_obj.MIDDLEWARE = settings_as_obj.get('MIDDLEWARE', [])
# # settings_as_obj.AUTHENTICATION_BACKENDS = settings_as_obj.get('AUTHENTICATION_BACKENDS', [])
# # settings_as_obj.STATICFILES_DIRS = settings_as_obj.get('STATICFILES_DIRS', [])
# # settings_as_obj.TEMPLATE_DIRS = settings_as_obj.get('TEMPLATE_DIRS', [])
# # settings_as_obj.DJANGO_HUEY = settings_as_obj.get('DJANGO_HUEY', {'queues': {}})
# # settings_as_obj.ADMIN_DATA_VIEWS = settings_as_obj.get('ADMIN_DATA_VIEWS', {'URLS': []})
# # # call all the hook functions to mutate the settings values in-place
# # register_INSTALLLED_APPS(settings_as_obj.INSTALLED_APPS)
# # register_MIDDLEWARES(settings_as_obj.MIDDLEWARE)
# # register_AUTHENTICATION_BACKENDS(settings_as_obj.AUTHENTICATION_BACKENDS)
# # register_STATICFILES_DIRS(settings_as_obj.STATICFILES_DIRS)
# # register_TEMPLATE_DIRS(settings_as_obj.TEMPLATE_DIRS)
# # register_DJANGO_HUEY(settings_as_obj.DJANGO_HUEY)
# # register_ADMIN_DATA_VIEWS(settings_as_obj.ADMIN_DATA_VIEWS)
# # calls Plugin.settings(settings) on each registered plugin
# pm.hook.register_settings(settings=settings_as_obj)
# # then finally update the settings globals() object will all the new settings
# # settings.update(settings_as_obj)
def get_urlpatterns():
return list(itertools.chain(*pm.hook.urlpatterns()))
def register_urlpatterns(urlpatterns):
pm.hook.register_urlpatterns(urlpatterns=urlpatterns)
def register_checks():
"""register any django system checks"""
pm.hook.register_checks()

View file

@ -3,10 +3,12 @@ from pathlib import Path
from pluggy import HookimplMarker
from pluggy import HookspecMarker
hookspec = HookspecMarker("abx")
hookimpl = HookimplMarker("abx")
spec = hookspec = HookspecMarker("abx")
impl = hookimpl = HookimplMarker("abx")
@hookspec
@hookimpl
def get_system_user() -> str:
return Path('~').expanduser().name

View file

@ -1,6 +0,0 @@
from .hookspec import hookspec
@hookspec
def ready(settings):
"""Called when the Django app.ready() is triggered"""
pass

View file

@ -1,90 +0,0 @@
from .hookspec import hookspec
###########################################################################################
@hookspec
def get_INSTALLED_APPS():
"""Return a list of apps to add to INSTALLED_APPS"""
# e.g. ['your_plugin_type.plugin_name']
return []
@hookspec
def register_INSTALLED_APPS(INSTALLED_APPS):
"""Mutate INSTALLED_APPS in place to add your app in a specific position"""
# idx_of_contrib = INSTALLED_APPS.index('django.contrib.auth')
# INSTALLED_APPS.insert(idx_of_contrib + 1, 'your_plugin_type.plugin_name')
pass
@hookspec
def get_TEMPLATE_DIRS():
return [] # e.g. ['your_plugin_type/plugin_name/templates']
@hookspec
def register_TEMPLATE_DIRS(TEMPLATE_DIRS):
"""Install django settings"""
# e.g. TEMPLATE_DIRS.insert(0, 'your_plugin_type/plugin_name/templates')
pass
@hookspec
def get_STATICFILES_DIRS():
return [] # e.g. ['your_plugin_type/plugin_name/static']
@hookspec
def register_STATICFILES_DIRS(STATICFILES_DIRS):
"""Mutate STATICFILES_DIRS in place to add your static dirs in a specific position"""
# e.g. STATICFILES_DIRS.insert(0, 'your_plugin_type/plugin_name/static')
pass
@hookspec
def get_MIDDLEWARE():
return [] # e.g. ['your_plugin_type.plugin_name.middleware.YourMiddleware']
@hookspec
def register_MIDDLEWARE(MIDDLEWARE):
"""Mutate MIDDLEWARE in place to add your middleware in a specific position"""
# e.g. MIDDLEWARE.insert(0, 'your_plugin_type.plugin_name.middleware.YourMiddleware')
pass
@hookspec
def get_AUTHENTICATION_BACKENDS():
return [] # e.g. ['django_auth_ldap.backend.LDAPBackend']
@hookspec
def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
"""Mutate AUTHENTICATION_BACKENDS in place to add your auth backends in a specific position"""
# e.g. AUTHENTICATION_BACKENDS.insert(0, 'your_plugin_type.plugin_name.backend.YourBackend')
pass
@hookspec
def get_DJANGO_HUEY_QUEUES():
return [] # e.g. [{'name': 'your_plugin_type.plugin_name', 'HUEY': {...}}]
@hookspec
def register_DJANGO_HUEY(DJANGO_HUEY):
"""Mutate DJANGO_HUEY in place to add your huey queues in a specific position"""
# e.g. DJANGO_HUEY['queues']['some_queue_name']['some_setting'] = 'some_value'
pass
@hookspec
def get_ADMIN_DATA_VIEWS_URLS():
return []
@hookspec
def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
"""Mutate ADMIN_DATA_VIEWS in place to add your admin data views in a specific position"""
# e.g. ADMIN_DATA_VIEWS['URLS'].insert(0, 'your_plugin_type/plugin_name/admin_data_views.py')
pass
@hookspec
def register_settings(settings):
"""Mutate settings in place to add your settings / modify existing settings"""
# settings.SOME_KEY = 'some_value'
pass

View file

@ -1,12 +0,0 @@
from .hookspec import hookspec
@hookspec
def get_urlpatterns():
return [] # e.g. [path('your_plugin_type/plugin_name/url.py', your_view)]
@hookspec
def register_urlpatterns(urlpatterns):
"""Mutate urlpatterns in place to add your urlpatterns in a specific position"""
# e.g. urlpatterns.insert(0, path('your_plugin_type/plugin_name/url.py', your_view))
pass

30
archivebox/abx/manager.py Normal file
View file

@ -0,0 +1,30 @@
import inspect
import pluggy
class PluginManager(pluggy.PluginManager):
"""
Patch to fix pluggy's PluginManager to work with pydantic models.
See: https://github.com/pytest-dev/pluggy/pull/536
"""
def parse_hookimpl_opts(self, plugin, name: str) -> pluggy.HookimplOpts | None:
# IMPORTANT: @property methods can have side effects, and are never hookimpl
# if attr is a property, skip it in advance
plugin_class = plugin if inspect.isclass(plugin) else type(plugin)
if isinstance(getattr(plugin_class, name, None), property):
return None
# pydantic model fields are like attrs and also can never be hookimpls
plugin_is_pydantic_obj = hasattr(plugin, "__pydantic_core_schema__")
if plugin_is_pydantic_obj and name in getattr(plugin, "model_fields", {}):
# pydantic models mess with the class and attr __signature__
# so inspect.isroutine(...) throws exceptions and cant be used
return None
try:
return super().parse_hookimpl_opts(plugin, name)
except AttributeError:
return super().parse_hookimpl_opts(type(plugin), name)
pm = PluginManager("abx")

View file

@ -0,0 +1 @@
__package__ = 'abx.pydantic_pkgr'

View file

@ -1,5 +1,5 @@
from .hookspec import hookspec
from ..hookspec import hookspec
###########################################################################################

View file

@ -12,7 +12,6 @@ from collections.abc import Mapping
from typing import Optional, List, IO, Union, Iterable
from pathlib import Path
from ..misc.checks import check_data_folder, check_migrations
from ..misc.logging import stderr

View file

@ -788,16 +788,23 @@ def bump_startup_progress_bar():
def setup_django_minimal():
sys.path.append(str(archivebox.PACKAGE_DIR))
os.environ.setdefault('OUTPUT_DIR', str(archivebox.DATA_DIR))
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
django.setup()
# sys.path.append(str(archivebox.PACKAGE_DIR))
# os.environ.setdefault('OUTPUT_DIR', str(archivebox.DATA_DIR))
# os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
# django.setup()
raise Exception('dont use this anymore')
DJANGO_SET_UP = False
def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
global INITIAL_STARTUP_PROGRESS
global INITIAL_STARTUP_PROGRESS_TASK
global DJANGO_SET_UP
if DJANGO_SET_UP:
raise Exception('django is already set up!')
with Progress(transient=True, expand=True, console=CONSOLE) as INITIAL_STARTUP_PROGRESS:
INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25)
@ -808,14 +815,12 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=C
bump_startup_progress_bar()
try:
from django.core.management import call_command
sys.path.append(str(archivebox.PACKAGE_DIR))
os.environ.setdefault('OUTPUT_DIR', str(archivebox.DATA_DIR))
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
bump_startup_progress_bar()
if in_memory_db:
raise Exception('dont use this anymore')
# some commands (e.g. oneshot) dont store a long-lived sqlite3 db file on disk.
# in those cases we create a temporary in-memory db and run the migrations
# immediately to get a usable in-memory-database at startup
@ -833,8 +838,6 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=C
from django.conf import settings
from plugins_sys.config.apps import SHELL_CONFIG
# log startup message to the error log
with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
command = ' '.join(sys.argv)
@ -877,6 +880,8 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=C
except KeyboardInterrupt:
raise SystemExit(2)
DJANGO_SET_UP = True
INITIAL_STARTUP_PROGRESS = None
INITIAL_STARTUP_PROGRESS_TASK = None

View file

@ -22,7 +22,7 @@ import archivebox
from signal_webhooks.admin import WebhookAdmin
from signal_webhooks.utils import get_webhook_model
# from plugantic.admin import CustomPlugin
# from abx.archivebox.admin import CustomPlugin
from ..util import htmldecode, urldecode

View file

@ -9,6 +9,10 @@ from pathlib import Path
from django.utils.crypto import get_random_string
import abx
import abx.archivebox
import abx.archivebox.use
import abx.django.use
import archivebox
from archivebox.constants import CONSTANTS
@ -19,22 +23,19 @@ IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ
IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3]
VERSION = archivebox.__version__
VERSION = archivebox.VERSION
PACKAGE_DIR = archivebox.PACKAGE_DIR
DATA_DIR = archivebox.DATA_DIR
ARCHIVE_DIR = archivebox.DATA_DIR / 'archive'
ARCHIVE_DIR = archivebox.ARCHIVE_DIR
################################################################################
### ArchiveBox Plugin Settings
################################################################################
PLUGIN_HOOKSPECS = [
'abx.hookspec_django_settings',
'abx.hookspec_django_apps',
'abx.hookspec_django_urls',
'abx.hookspec_pydantic_pkgr',
'abx.hookspec_archivebox',
'plugantic.base_check',
'abx.django.hookspec',
'abx.pydantic_pkgr.hookspec',
'abx.archivebox.hookspec',
]
abx.register_hookspecs(PLUGIN_HOOKSPECS)
@ -55,20 +56,20 @@ USER_PLUGINS = abx.get_plugins_in_dirs(USER_PLUGIN_DIRS)
ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS}
PLUGIN_MANAGER = abx.pm
PLUGINS = abx.load_plugins(ALL_PLUGINS)
HOOKS = abx.get_plugins_HOOKS(PLUGINS)
PLUGINS = abx.archivebox.load_archivebox_plugins(PLUGIN_MANAGER, ALL_PLUGINS)
HOOKS = abx.archivebox.use.get_HOOKS(PLUGINS)
CONFIGS = abx.archivebox.use.get_CONFIGS()
FLAT_CONFIG = abx.archivebox.use.get_FLAT_CONFIG()
BINPROVIDERS = abx.archivebox.use.get_BINPROVIDERS()
BINARIES = abx.archivebox.use.get_BINARIES()
EXTRACTORS = abx.archivebox.use.get_EXTRACTORS()
REPLAYERS = abx.archivebox.use.get_REPLAYERS()
CHECKS = abx.archivebox.use.get_CHECKS()
ADMINDATAVIEWS = abx.archivebox.use.get_ADMINDATAVIEWS()
QUEUES = abx.archivebox.use.get_QUEUES()
SEARCHBACKENDS = abx.archivebox.use.get_SEARCHBACKENDS()
CONFIGS = abx.get_plugins_CONFIGS()
# FLAT_CONFIG = abx.get_plugins_FLAT_CONFIG(CONFIGS)
FLAT_CONFIG = CONFIG
BINPROVIDERS = abx.get_plugins_BINPROVIDERS()
BINARIES = abx.get_plugins_BINARIES()
EXTRACTORS = abx.get_plugins_EXTRACTORS()
REPLAYERS = abx.get_plugins_REPLAYERS()
CHECKS = abx.get_plugins_CHECKS()
ADMINDATAVIEWS = abx.get_plugins_ADMINDATAVIEWS()
QUEUES = abx.get_plugins_QUEUES()
SEARCHBACKENDS = abx.get_plugins_SEARCHBACKENDS()
################################################################################
### Django Core Settings
@ -104,14 +105,13 @@ INSTALLED_APPS = [
'django_object_actions', # provides easy Django Admin action buttons on change views https://github.com/crccheck/django-object-actions
# Our ArchiveBox-provided apps
# 'plugantic', # ArchiveBox plugin API definition + finding/registering/calling interface
'queues', # handles starting and managing background workers and processes
'abid_utils', # handles ABID ID creation, handling, and models
'core', # core django model with Snapshot, ArchiveResult, etc.
'api', # Django-Ninja-based Rest API interfaces, config, APIToken model, etc.
# ArchiveBox plugins
*abx.get_plugins_INSTALLLED_APPS(), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins,
*abx.django.use.get_INSTALLED_APPS(), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins,
# 3rd-party apps from PyPI that need to be loaded last
'admin_data_views', # handles rendering some convenient automatic read-only views of data in Django admin
@ -136,7 +136,7 @@ MIDDLEWARE = [
'core.middleware.ReverseProxyAuthMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'core.middleware.CacheControlMiddleware',
*abx.get_plugins_MIDDLEWARE(),
*abx.django.use.get_MIDDLEWARES(),
]
@ -149,7 +149,7 @@ MIDDLEWARE = [
AUTHENTICATION_BACKENDS = [
'django.contrib.auth.backends.RemoteUserBackend',
'django.contrib.auth.backends.ModelBackend',
*abx.get_plugins_AUTHENTICATION_BACKENDS(),
*abx.django.use.get_AUTHENTICATION_BACKENDS(),
]
@ -177,7 +177,7 @@ STATICFILES_DIRS = [
# for plugin_dir in PLUGIN_DIRS.values()
# if (plugin_dir / 'static').is_dir()
# ],
*abx.get_plugins_STATICFILES_DIRS(),
*abx.django.use.get_STATICFILES_DIRS(),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'static'),
]
@ -188,7 +188,7 @@ TEMPLATE_DIRS = [
# for plugin_dir in PLUGIN_DIRS.values()
# if (plugin_dir / 'templates').is_dir()
# ],
*abx.get_plugins_TEMPLATE_DIRS(),
*abx.django.use.get_TEMPLATE_DIRS(),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'core'),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'admin'),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME),
@ -225,10 +225,12 @@ DATABASE_NAME = os.environ.get("ARCHIVEBOX_DATABASE_NAME", str(CONSTANTS.DATABAS
QUEUE_DATABASE_NAME = DATABASE_NAME.replace('index.sqlite3', 'queue.sqlite3')
SQLITE_CONNECTION_OPTIONS = {
"ENGINE": "django.db.backends.sqlite3",
"TIME_ZONE": CONSTANTS.TIMEZONE,
"OPTIONS": {
# https://gcollazo.com/optimal-sqlite-settings-for-django/
# # https://litestream.io/tips/#busy-timeout
# https://litestream.io/tips/#busy-timeout
# https://docs.djangoproject.com/en/5.1/ref/databases/#setting-pragma-options
"timeout": 5,
"check_same_thread": False,
"transaction_mode": "IMMEDIATE",
@ -246,17 +248,14 @@ SQLITE_CONNECTION_OPTIONS = {
DATABASES = {
"default": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": DATABASE_NAME,
# DB setup is sometimes modified at runtime by setup_django() in config.py
**SQLITE_CONNECTION_OPTIONS,
},
"queue": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": QUEUE_DATABASE_NAME,
**SQLITE_CONNECTION_OPTIONS,
},
# 'cache': {
# 'ENGINE': 'django.db.backends.sqlite3',
# 'NAME': CACHE_DB_PATH,
# **SQLITE_CONNECTION_OPTIONS,
# },
@ -295,7 +294,7 @@ DJANGO_HUEY = {
"queues": {
HUEY["name"]: HUEY.copy(),
# more registered here at plugin import-time by BaseQueue.register()
**abx.get_plugins_DJANGO_HUEY_QUEUES(),
**abx.django.use.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=QUEUE_DATABASE_NAME),
},
}
@ -482,45 +481,45 @@ ADMIN_DATA_VIEWS = {
},
{
"route": "binaries/",
"view": "plugantic.views.binaries_list_view",
"view": "plugins_sys.config.views.binaries_list_view",
"name": "Binaries",
"items": {
"route": "<str:key>/",
"view": "plugantic.views.binary_detail_view",
"view": "plugins_sys.config.views.binary_detail_view",
"name": "binary",
},
},
{
"route": "plugins/",
"view": "plugantic.views.plugins_list_view",
"view": "plugins_sys.config.views.plugins_list_view",
"name": "Plugins",
"items": {
"route": "<str:key>/",
"view": "plugantic.views.plugin_detail_view",
"view": "plugins_sys.config.views.plugin_detail_view",
"name": "plugin",
},
},
{
"route": "workers/",
"view": "plugantic.views.worker_list_view",
"view": "plugins_sys.config.views.worker_list_view",
"name": "Workers",
"items": {
"route": "<str:key>/",
"view": "plugantic.views.worker_detail_view",
"view": "plugins_sys.config.views.worker_detail_view",
"name": "worker",
},
},
{
"route": "logs/",
"view": "plugantic.views.log_list_view",
"view": "plugins_sys.config.views.log_list_view",
"name": "Logs",
"items": {
"route": "<str:key>/",
"view": "plugantic.views.log_detail_view",
"view": "plugins_sys.config.views.log_detail_view",
"name": "log",
},
},
*abx.get_plugins_ADMIN_DATA_VIEWS_URLS(),
*abx.django.use.get_ADMIN_DATA_VIEWS_URLS(),
],
}
@ -614,5 +613,7 @@ DEBUG_LOGFIRE = DEBUG_LOGFIRE and (DATA_DIR / '.logfire').is_dir()
# JET_TOKEN = 'some-api-token-here'
abx.register_plugins_settings(globals())
abx.django.use.register_checks()
abx.archivebox.use.register_all_hooks(globals())
# import ipdb; ipdb.set_trace()

View file

@ -1,7 +1,7 @@
__package__ = 'archivebox.core'
from typing import Callable
from benedict import benedict
from pathlib import Path
from django.shortcuts import render, redirect
@ -36,12 +36,15 @@ from ..config import (
CONFIG_SCHEMA,
DYNAMIC_CONFIG_SCHEMA,
USER_CONFIG,
CONFIG,
)
from ..logging_util import printable_filesize
from ..util import base_url, htmlencode, ts_to_date_str
from ..search import query_search_index
from .serve_static import serve_static_with_byterange_support
CONFIG = benedict({**CONSTANTS, **CONFIG, **settings.FLAT_CONFIG})
class HomepageView(View):
def get(self, request):
@ -533,8 +536,6 @@ def key_is_safe(key: str) -> bool:
@render_with_table_view
def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
CONFIG = settings.FLAT_CONFIG
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
rows = {

View file

@ -6,8 +6,6 @@ import shutil
import platform
import archivebox
CONSTANTS = archivebox.CONSTANTS
from typing import Dict, List, Optional, Iterable, IO, Union
from pathlib import Path
from datetime import date, datetime
@ -69,9 +67,8 @@ from .index.html import (
from .index.csv import links_to_csv
from .extractors import archive_links, archive_link, ignore_methods
from .misc.logging import stderr, hint, ANSI
from .misc.checks import check_data_folder, check_dependencies
from .misc.checks import check_data_folder
from .config import (
setup_django_minimal,
ConfigDict,
IS_TTY,
DEBUG,
@ -91,7 +88,6 @@ from .config import (
CONFIG,
USER_CONFIG,
get_real_name,
setup_django,
)
from .logging_util import (
TimedProgress,
@ -108,6 +104,7 @@ from .logging_util import (
printable_dependency_version,
)
CONSTANTS = archivebox.CONSTANTS
VERSION = archivebox.VERSION
PACKAGE_DIR = archivebox.PACKAGE_DIR
OUTPUT_DIR = archivebox.DATA_DIR
@ -190,7 +187,6 @@ def version(quiet: bool=False,
out_dir: Path=OUTPUT_DIR) -> None:
"""Print the ArchiveBox version and dependency information"""
setup_django_minimal()
from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG, STORAGE_CONFIG, SHELL_CONFIG
from plugins_auth.ldap.apps import LDAP_CONFIG
from django.conf import settings
@ -270,7 +266,6 @@ def version(quiet: bool=False,
print('{white}[i] Data locations:{reset} (not in a data directory)'.format(**ANSI))
print()
check_dependencies(CONFIG)
@enforce_types
@ -461,7 +456,7 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
check_data_folder(CONFIG)
from core.models import Snapshot
from django.contrib.auth import get_user_mod, SHELL_CONFIG
from django.contrib.auth import get_user_model
User = get_user_model()
print('{green}[*] Scanning archive main index...{reset}'.format(**ANSI))
@ -602,7 +597,7 @@ def add(urls: Union[str, List[str]],
# Load list of links from the existing index
check_data_folder(CONFIG)
check_dependencies(CONFIG)
# worker = start_cli_workers()
new_links: List[Link] = []
@ -791,7 +786,6 @@ def update(resume: Optional[float]=None,
check_data_folder(CONFIG)
check_dependencies(CONFIG)
# start_cli_workers()
new_links: List[Link] = [] # TODO: Remove input argument: only_new
@ -963,8 +957,6 @@ def setup(out_dir: Path=OUTPUT_DIR) -> None:
if not ARCHIVE_DIR.exists():
run_subcommand('init', stdin=None, pwd=out_dir)
setup_django(out_dir=out_dir, check_db=True)
stderr('\n[+] Installing ArchiveBox dependencies automatically...', color='green')
from plugins_extractor.ytdlp.apps import YTDLP_BINARY
@ -1109,7 +1101,6 @@ def schedule(add: bool=False,
"""Set ArchiveBox to regularly import URLs at specific times using cron"""
check_data_folder(CONFIG)
setup_django_minimal()
from plugins_pkg.pip.apps import ARCHIVEBOX_BINARY
from plugins_sys.config.apps import SHELL_CONFIG, CONSTANTS
@ -1256,6 +1247,8 @@ def server(runserver_args: Optional[List[str]]=None,
from django.core.management import call_command
from django.contrib.auth.models import User
print('{green}[+] Starting ArchiveBox webserver... {reset}'.format(**ANSI))
print(' > Logging errors to ./logs/errors.log')
@ -1306,7 +1299,6 @@ def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None:
"""Run an ArchiveBox Django management command"""
check_data_folder(CONFIG)
setup_django_minimal()
from django.core.management import execute_from_command_line
if (args and "createsuperuser" in args) and (IN_DOCKER and not IS_TTY):

View file

@ -1,38 +1,10 @@
__package__ = 'archivebox.misc'
# TODO: migrate all of these to new plugantic/base_check.py Check system
from benedict import benedict
from pathlib import Path
import archivebox
from .logging import stderr, hint, ANSI
def check_dependencies(config: benedict, show_help: bool=True) -> None:
# dont do this on startup anymore, it's too slow
pass
# invalid_dependencies = [
# (name, binary) for name, info in settings.BINARIES.items()
# if not binary.
# ]
# if invalid_dependencies and show_help:
# stderr(f'[!] Warning: Missing {len(invalid_dependencies)} recommended dependencies', color='lightyellow')
# for dependency, info in invalid_dependencies:
# stderr(
# ' ! {}: {} ({})'.format(
# dependency,
# info['path'] or 'unable to find binary',
# info['version'] or 'unable to detect version',
# )
# )
# if dependency in ('YOUTUBEDL_BINARY', 'CHROME_BINARY', 'SINGLEFILE_BINARY', 'READABILITY_BINARY', 'MERCURY_BINARY'):
# hint(('To install all packages automatically run: archivebox setup',
# f'or to disable it and silence this warning: archivebox config --set SAVE_{dependency.rsplit("_", 1)[0]}=False',
# ''), prefix=' ')
# stderr('')
from .logging import stderr, ANSI
def check_data_folder(config: benedict) -> None:

View file

@ -1 +0,0 @@
__package__ = 'archivebox.plugantic'

View file

@ -1,12 +0,0 @@
__package__ = 'archivebox.plugantic'
from django.apps import AppConfig
class PluganticConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'plugantic'
def ready(self) -> None:
pass
# from django.conf import settings
# print(f'[🧩] Detected {len(settings.INSTALLED_PLUGINS)} settings.INSTALLED_PLUGINS to load...')

View file

@ -1,39 +0,0 @@
__package__ = 'archivebox.plugantic'
# from typing import Dict
from .base_hook import BaseHook, HookType
from ..config_stubs import AttrDict
class BaseAdminDataView(BaseHook):
hook_type: HookType = "ADMINDATAVIEW"
# verbose_name: str = 'Data View'
# route: str = '/npm/installed/'
# view: str = 'plugins_pkg.npm.admin.installed_list_view'
# items: Dict[str, str] = {
# "name": "installed_npm_pkg",
# 'route': '<str:key>/',
# 'view': 'plugins_pkg.npm.admin.installed_detail_view',
# }
def register(self, settings, parent_plugin=None):
# self._plugin = parent_plugin # circular ref to parent only here for easier debugging! never depend on circular backref to parent in real code!
self.register_route_in_admin_data_view_urls(settings)
settings.ADMINDATAVIEWS = getattr(settings, "ADMINDATAVIEWS", None) or AttrDict({})
settings.ADMINDATAVIEWS[self.id] = self
super().register(settings, parent_plugin)
def register_route_in_admin_data_view_urls(self, settings):
route = {
"route": self.route,
"view": self.view,
"name": self.verbose_name,
"items": self.items,
}
if route not in settings.ADMIN_DATA_VIEWS.URLS:
settings.ADMIN_DATA_VIEWS.URLS += [route] # append our route (update in place)

View file

@ -1,39 +0,0 @@
__package__ = 'archivebox.plugantic'
from typing import Iterable, List
from benedict import benedict
from pydantic import Field
from .base_hook import BaseHook, HookType
class BaseSearchBackend(BaseHook):
hook_type: HookType = 'SEARCHBACKEND'
name: str = Field() # e.g. 'singlefile'
@staticmethod
def index(snapshot_id: str, texts: List[str]):
return
@staticmethod
def flush(snapshot_ids: Iterable[str]):
return
@staticmethod
def search(text: str) -> List[str]:
raise NotImplementedError("search method must be implemented by subclass")
def register(self, settings, parent_plugin=None):
# self._plugin = parent_plugin # for debugging only, never rely on this!
# Install queue into settings.SEARCH_BACKENDS
settings.SEARCH_BACKENDS = getattr(settings, "SEARCH_BACKENDS", None) or benedict({})
settings.SEARCH_BACKENDS[self.id] = self
# Record installed hook into settings.HOOKS
super().register(settings, parent_plugin=parent_plugin)

View file

@ -1,72 +0,0 @@
# __package__ = 'archivebox.plugantic.management.commands'
# from django.core.management.base import BaseCommand
# from django.conf import settings
# from pydantic_pkgr import Binary, BinProvider, BrewProvider, EnvProvider, SemVer
# from pydantic_pkgr.binprovider import bin_abspath
# from ....config import bin_path
# from ...base_binary import env
# class Command(BaseCommand):
# def handle(self, *args, method, **options):
# method(*args, **options)
# def add_arguments(self, parser):
# subparsers = parser.add_subparsers(title="sub-commands", required=True)
# list_parser = subparsers.add_parser("list", help="List archivebox runtime dependencies.")
# list_parser.set_defaults(method=self.list)
# install_parser = subparsers.add_parser("install", help="Install archivebox runtime dependencies.")
# install_parser.add_argument("--update", action="store_true", help="Update dependencies to latest versions.")
# install_parser.add_argument("package_names", nargs="+", type=str)
# install_parser.set_defaults(method=self.install)
# def list(self, *args, **options):
# self.stdout.write('################# PLUGINS ####################')
# for plugin in settings.PLUGINS.values():
# self.stdout.write(f'{plugin.name}:')
# for binary in plugin.binaries:
# try:
# binary = binary.load()
# except Exception as e:
# # import ipdb; ipdb.set_trace()
# raise
# self.stdout.write(f' {binary.name.ljust(14)} {str(binary.version).ljust(11)} {binary.binprovider.INSTALLER_BIN.ljust(5)} {binary.abspath}')
# self.stdout.write('\n################# LEGACY ####################')
# for bin_key, dependency in settings.CONFIG.DEPENDENCIES.items():
# bin_name = settings.CONFIG[bin_key]
# self.stdout.write(f'{bin_key}: {bin_name}')
# # binary = Binary(name=package_name, providers=[env])
# # print(binary)
# # try:
# # loaded_bin = binary.load()
# # self.stdout.write(
# # self.style.SUCCESS(f'Successfully loaded {package_name}:') + str(loaded_bin)
# # )
# # except Exception as e:
# # self.stderr.write(
# # self.style.ERROR(f"Error loading {package_name}: {e}")
# # )
# def install(self, *args, bright, **options):
# for package_name in options["package_names"]:
# binary = Binary(name=package_name, providers=[env])
# print(binary)
# try:
# loaded_bin = binary.load()
# self.stdout.write(
# self.style.SUCCESS(f'Successfully loaded {package_name}:') + str(loaded_bin)
# )
# except Exception as e:
# self.stderr.write(
# self.style.ERROR(f"Error loading {package_name}: {e}")
# )

View file

@ -1,337 +0,0 @@
__package__ = 'archivebox.plugantic'
from django.test import TestCase
from .ini_to_toml import convert, TOML_HEADER
TEST_INPUT = """
[SERVER_CONFIG]
IS_TTY=False
USE_COLOR=False
SHOW_PROGRESS=False
IN_DOCKER=False
IN_QEMU=False
PUID=501
PGID=20
OUTPUT_DIR=/opt/archivebox/data
CONFIG_FILE=/opt/archivebox/data/ArchiveBox.conf
ONLY_NEW=True
TIMEOUT=60
MEDIA_TIMEOUT=3600
OUTPUT_PERMISSIONS=644
RESTRICT_FILE_NAMES=windows
URL_DENYLIST=\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$
URL_ALLOWLIST=None
ADMIN_USERNAME=None
ADMIN_PASSWORD=None
ENFORCE_ATOMIC_WRITES=True
TAG_SEPARATOR_PATTERN=[,]
SECRET_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
BIND_ADDR=127.0.0.1:8000
ALLOWED_HOSTS=*
DEBUG=False
PUBLIC_INDEX=True
PUBLIC_SNAPSHOTS=True
PUBLIC_ADD_VIEW=False
FOOTER_INFO=Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.
SNAPSHOTS_PER_PAGE=40
CUSTOM_TEMPLATES_DIR=None
TIME_ZONE=UTC
TIMEZONE=UTC
REVERSE_PROXY_USER_HEADER=Remote-User
REVERSE_PROXY_WHITELIST=
LOGOUT_REDIRECT_URL=/
PREVIEW_ORIGINALS=True
LDAP=False
LDAP_SERVER_URI=None
LDAP_BIND_DN=None
LDAP_BIND_PASSWORD=None
LDAP_USER_BASE=None
LDAP_USER_FILTER=None
LDAP_USERNAME_ATTR=None
LDAP_FIRSTNAME_ATTR=None
LDAP_LASTNAME_ATTR=None
LDAP_EMAIL_ATTR=None
LDAP_CREATE_SUPERUSER=False
SAVE_TITLE=True
SAVE_FAVICON=True
SAVE_WGET=True
SAVE_WGET_REQUISITES=True
SAVE_SINGLEFILE=True
SAVE_READABILITY=True
SAVE_MERCURY=True
SAVE_HTMLTOTEXT=True
SAVE_PDF=True
SAVE_SCREENSHOT=True
SAVE_DOM=True
SAVE_HEADERS=True
SAVE_WARC=True
SAVE_GIT=True
SAVE_MEDIA=True
SAVE_ARCHIVE_DOT_ORG=True
RESOLUTION=1440,2000
GIT_DOMAINS=github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht
CHECK_SSL_VALIDITY=True
MEDIA_MAX_SIZE=750m
USER_AGENT=None
CURL_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)
WGET_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5
CHROME_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)
COOKIES_FILE=None
CHROME_USER_DATA_DIR=None
CHROME_TIMEOUT=0
CHROME_HEADLESS=True
CHROME_SANDBOX=True
CHROME_EXTRA_ARGS=[]
YOUTUBEDL_ARGS=['--restrict-filenames', '--trim-filenames', '128', '--write-description', '--write-info-json', '--write-annotations', '--write-thumbnail', '--no-call-home', '--write-sub', '--write-auto-subs', '--convert-subs=srt', '--yes-playlist', '--continue', '--no-abort-on-error', '--ignore-errors', '--geo-bypass', '--add-metadata', '--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)']
YOUTUBEDL_EXTRA_ARGS=[]
WGET_ARGS=['--no-verbose', '--adjust-extension', '--convert-links', '--force-directories', '--backup-converted', '--span-hosts', '--no-parent', '-e', 'robots=off']
WGET_EXTRA_ARGS=[]
CURL_ARGS=['--silent', '--location', '--compressed']
CURL_EXTRA_ARGS=[]
GIT_ARGS=['--recursive']
SINGLEFILE_ARGS=[]
SINGLEFILE_EXTRA_ARGS=[]
MERCURY_ARGS=['--format=text']
MERCURY_EXTRA_ARGS=[]
FAVICON_PROVIDER=https://www.google.com/s2/favicons?domain={}
USE_INDEXING_BACKEND=True
USE_SEARCHING_BACKEND=True
SEARCH_BACKEND_ENGINE=ripgrep
SEARCH_BACKEND_HOST_NAME=localhost
SEARCH_BACKEND_PORT=1491
SEARCH_BACKEND_PASSWORD=SecretPassword
SEARCH_PROCESS_HTML=True
SONIC_COLLECTION=archivebox
SONIC_BUCKET=snapshots
SEARCH_BACKEND_TIMEOUT=90
FTS_SEPARATE_DATABASE=True
FTS_TOKENIZERS=porter unicode61 remove_diacritics 2
FTS_SQLITE_MAX_LENGTH=1000000000
USE_CURL=True
USE_WGET=True
USE_SINGLEFILE=True
USE_READABILITY=True
USE_MERCURY=True
USE_GIT=True
USE_CHROME=True
USE_NODE=True
USE_YOUTUBEDL=True
USE_RIPGREP=True
CURL_BINARY=curl
GIT_BINARY=git
WGET_BINARY=wget
SINGLEFILE_BINARY=single-file
READABILITY_BINARY=readability-extractor
MERCURY_BINARY=postlight-parser
YOUTUBEDL_BINARY=yt-dlp
NODE_BINARY=node
RIPGREP_BINARY=rg
CHROME_BINARY=chrome
POCKET_CONSUMER_KEY=None
USER=squash
PACKAGE_DIR=/opt/archivebox/archivebox
TEMPLATES_DIR=/opt/archivebox/archivebox/templates
ARCHIVE_DIR=/opt/archivebox/data/archive
SOURCES_DIR=/opt/archivebox/data/sources
LOGS_DIR=/opt/archivebox/data/logs
PERSONAS_DIR=/opt/archivebox/data/personas
URL_DENYLIST_PTN=re.compile('\\.(css|js|otf|ttf|woff|woff2|gstatic\\.com|googleapis\\.com/css)(\\?.*)?$', re.IGNORECASE|re.MULTILINE)
URL_ALLOWLIST_PTN=None
DIR_OUTPUT_PERMISSIONS=755
ARCHIVEBOX_BINARY=/opt/archivebox/.venv/bin/archivebox
VERSION=0.8.0
COMMIT_HASH=102e87578c6036bb0132dd1ebd17f8f05ffc880f
BUILD_TIME=2024-05-15 03:28:05 1715768885
VERSIONS_AVAILABLE=None
CAN_UPGRADE=False
PYTHON_BINARY=/opt/archivebox/.venv/bin/python3.10
PYTHON_VERSION=3.10.14
DJANGO_BINARY=/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py
DJANGO_VERSION=5.0.6 final (0)
SQLITE_BINARY=/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py
SQLITE_VERSION=2.6.0
CURL_VERSION=curl 8.4.0 (x86_64-apple-darwin23.0)
WGET_VERSION=GNU Wget 1.24.5
WGET_AUTO_COMPRESSION=True
RIPGREP_VERSION=ripgrep 14.1.0
SINGLEFILE_VERSION=None
READABILITY_VERSION=None
MERCURY_VERSION=None
GIT_VERSION=git version 2.44.0
YOUTUBEDL_VERSION=2024.04.09
CHROME_VERSION=Google Chrome 124.0.6367.207
NODE_VERSION=v21.7.3
"""
EXPECTED_OUTPUT = TOML_HEADER + '''[SERVER_CONFIG]
IS_TTY = false
USE_COLOR = false
SHOW_PROGRESS = false
IN_DOCKER = false
IN_QEMU = false
PUID = 501
PGID = 20
OUTPUT_DIR = "/opt/archivebox/data"
CONFIG_FILE = "/opt/archivebox/data/ArchiveBox.conf"
ONLY_NEW = true
TIMEOUT = 60
MEDIA_TIMEOUT = 3600
OUTPUT_PERMISSIONS = 644
RESTRICT_FILE_NAMES = "windows"
URL_DENYLIST = "\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$"
URL_ALLOWLIST = null
ADMIN_USERNAME = null
ADMIN_PASSWORD = null
ENFORCE_ATOMIC_WRITES = true
TAG_SEPARATOR_PATTERN = "[,]"
SECRET_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
BIND_ADDR = "127.0.0.1:8000"
ALLOWED_HOSTS = "*"
DEBUG = false
PUBLIC_INDEX = true
PUBLIC_SNAPSHOTS = true
PUBLIC_ADD_VIEW = false
FOOTER_INFO = "Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests."
SNAPSHOTS_PER_PAGE = 40
CUSTOM_TEMPLATES_DIR = null
TIME_ZONE = "UTC"
TIMEZONE = "UTC"
REVERSE_PROXY_USER_HEADER = "Remote-User"
REVERSE_PROXY_WHITELIST = ""
LOGOUT_REDIRECT_URL = "/"
PREVIEW_ORIGINALS = true
LDAP = false
LDAP_SERVER_URI = null
LDAP_BIND_DN = null
LDAP_BIND_PASSWORD = null
LDAP_USER_BASE = null
LDAP_USER_FILTER = null
LDAP_USERNAME_ATTR = null
LDAP_FIRSTNAME_ATTR = null
LDAP_LASTNAME_ATTR = null
LDAP_EMAIL_ATTR = null
LDAP_CREATE_SUPERUSER = false
SAVE_TITLE = true
SAVE_FAVICON = true
SAVE_WGET = true
SAVE_WGET_REQUISITES = true
SAVE_SINGLEFILE = true
SAVE_READABILITY = true
SAVE_MERCURY = true
SAVE_HTMLTOTEXT = true
SAVE_PDF = true
SAVE_SCREENSHOT = true
SAVE_DOM = true
SAVE_HEADERS = true
SAVE_WARC = true
SAVE_GIT = true
SAVE_MEDIA = true
SAVE_ARCHIVE_DOT_ORG = true
RESOLUTION = [1440, 2000]
GIT_DOMAINS = "github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht"
CHECK_SSL_VALIDITY = true
MEDIA_MAX_SIZE = "750m"
USER_AGENT = null
CURL_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)"
WGET_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5"
CHROME_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)"
COOKIES_FILE = null
CHROME_USER_DATA_DIR = null
CHROME_TIMEOUT = false
CHROME_HEADLESS = true
CHROME_SANDBOX = true
CHROME_EXTRA_ARGS = []
YOUTUBEDL_ARGS = ["--restrict-filenames", "--trim-filenames", "128", "--write-description", "--write-info-json", "--write-annotations", "--write-thumbnail", "--no-call-home", "--write-sub", "--write-auto-subs", "--convert-subs=srt", "--yes-playlist", "--continue", "--no-abort-on-error", "--ignore-errors", "--geo-bypass", "--add-metadata", "--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)"]
YOUTUBEDL_EXTRA_ARGS = []
WGET_ARGS = ["--no-verbose", "--adjust-extension", "--convert-links", "--force-directories", "--backup-converted", "--span-hosts", "--no-parent", "-e", "robots=off"]
WGET_EXTRA_ARGS = []
CURL_ARGS = ["--silent", "--location", "--compressed"]
CURL_EXTRA_ARGS = []
GIT_ARGS = ["--recursive"]
SINGLEFILE_ARGS = []
SINGLEFILE_EXTRA_ARGS = []
MERCURY_ARGS = ["--format=text"]
MERCURY_EXTRA_ARGS = []
FAVICON_PROVIDER = "https://www.google.com/s2/favicons?domain={}"
USE_INDEXING_BACKEND = true
USE_SEARCHING_BACKEND = true
SEARCH_BACKEND_ENGINE = "ripgrep"
SEARCH_BACKEND_HOST_NAME = "localhost"
SEARCH_BACKEND_PORT = 1491
SEARCH_BACKEND_PASSWORD = "SecretPassword"
SEARCH_PROCESS_HTML = true
SONIC_COLLECTION = "archivebox"
SONIC_BUCKET = "snapshots"
SEARCH_BACKEND_TIMEOUT = 90
FTS_SEPARATE_DATABASE = true
FTS_TOKENIZERS = "porter unicode61 remove_diacritics 2"
FTS_SQLITE_MAX_LENGTH = 1000000000
USE_CURL = true
USE_WGET = true
USE_SINGLEFILE = true
USE_READABILITY = true
USE_MERCURY = true
USE_GIT = true
USE_CHROME = true
USE_NODE = true
USE_YOUTUBEDL = true
USE_RIPGREP = true
CURL_BINARY = "curl"
GIT_BINARY = "git"
WGET_BINARY = "wget"
SINGLEFILE_BINARY = "single-file"
READABILITY_BINARY = "readability-extractor"
MERCURY_BINARY = "postlight-parser"
YOUTUBEDL_BINARY = "yt-dlp"
NODE_BINARY = "node"
RIPGREP_BINARY = "rg"
CHROME_BINARY = "chrome"
POCKET_CONSUMER_KEY = null
USER = "squash"
PACKAGE_DIR = "/opt/archivebox/archivebox"
TEMPLATES_DIR = "/opt/archivebox/archivebox/templates"
ARCHIVE_DIR = "/opt/archivebox/data/archive"
SOURCES_DIR = "/opt/archivebox/data/sources"
LOGS_DIR = "/opt/archivebox/data/logs"
PERSONAS_DIR = "/opt/archivebox/data/personas"
URL_DENYLIST_PTN = "re.compile(\'\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$\', re.IGNORECASE|re.MULTILINE)"
URL_ALLOWLIST_PTN = null
DIR_OUTPUT_PERMISSIONS = 755
ARCHIVEBOX_BINARY = "/opt/archivebox/.venv/bin/archivebox"
VERSION = "0.8.0"
COMMIT_HASH = "102e87578c6036bb0132dd1ebd17f8f05ffc880f"
BUILD_TIME = "2024-05-15 03:28:05 1715768885"
VERSIONS_AVAILABLE = null
CAN_UPGRADE = false
PYTHON_BINARY = "/opt/archivebox/.venv/bin/python3.10"
PYTHON_VERSION = "3.10.14"
DJANGO_BINARY = "/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py"
DJANGO_VERSION = "5.0.6 final (0)"
SQLITE_BINARY = "/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py"
SQLITE_VERSION = "2.6.0"
CURL_VERSION = "curl 8.4.0 (x86_64-apple-darwin23.0)"
WGET_VERSION = "GNU Wget 1.24.5"
WGET_AUTO_COMPRESSION = true
RIPGREP_VERSION = "ripgrep 14.1.0"
SINGLEFILE_VERSION = null
READABILITY_VERSION = null
MERCURY_VERSION = null
GIT_VERSION = "git version 2.44.0"
YOUTUBEDL_VERSION = "2024.04.09"
CHROME_VERSION = "Google Chrome 124.0.6367.207"
NODE_VERSION = "v21.7.3"'''
class IniToTomlTests(TestCase):
def test_convert(self):
first_output = convert(TEST_INPUT) # make sure ini -> toml parses correctly
second_output = convert(first_output) # make sure toml -> toml parses/dumps consistently
assert first_output == second_output == EXPECTED_OUTPUT # make sure parsing is indempotent
# # DEBUGGING
# import sys
# import difflib
# sys.stdout.writelines(difflib.context_diff(first_output, second_output, fromfile='first', tofile='second'))
# print(repr(second_output))

View file

@ -1,4 +1,4 @@
__package__ = 'archivebox.plugins_auth.ldap'
__package__ = 'plugins_auth.ldap'
import inspect
@ -6,13 +6,11 @@ from typing import List, Dict
from pathlib import Path
from pydantic import InstanceOf
from django.conf import settings
from pydantic_pkgr import BinProviderName, ProviderLookupDict, SemVer
from plugantic.base_plugin import BasePlugin
from plugantic.base_hook import BaseHook
from plugantic.base_binary import BaseBinary, BaseBinProvider
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_hook import BaseHook
from abx.archivebox.base_binary import BaseBinary, BaseBinProvider
from plugins_pkg.pip.apps import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER
from .settings import LDAP_CONFIG, LDAP_LIB
@ -51,5 +49,4 @@ class LdapAuthPlugin(BasePlugin):
PLUGIN = LdapAuthPlugin()
# PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig

View file

@ -3,9 +3,9 @@ __package__ = 'archivebox.plugins_auth.ldap'
import sys
from typing import Dict, List, ClassVar, Optional
from pydantic import Field, model_validator
from pydantic import Field, model_validator, computed_field
from ...plugantic.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
LDAP_LIB = None
try:
@ -35,10 +35,10 @@ class LdapConfig(BaseConfigSet):
LDAP_USER_FILTER: str = Field(default=None)
LDAP_CREATE_SUPERUSER: bool = Field(default=False)
LDAP_USERNAME_ATTR: str = Field(default=None)
LDAP_FIRSTNAME_ATTR: str = Field(default=None)
LDAP_LASTNAME_ATTR: str = Field(default=None)
LDAP_EMAIL_ATTR: str = Field(default=None)
LDAP_USERNAME_ATTR: str = Field(default='username')
LDAP_FIRSTNAME_ATTR: str = Field(default='first_name')
LDAP_LASTNAME_ATTR: str = Field(default='last_name')
LDAP_EMAIL_ATTR: str = Field(default='email')
@model_validator(mode='after')
def validate_ldap_config(self):
@ -50,14 +50,7 @@ class LdapConfig(BaseConfigSet):
self.update(LDAP_ENABLED=False)
# Check that all required LDAP config options are set
all_config_is_set = (
self.LDAP_SERVER_URI
and self.LDAP_BIND_DN
and self.LDAP_BIND_PASSWORD
and self.LDAP_USER_BASE
and self.LDAP_USER_FILTER
)
if self.LDAP_ENABLED and not all_config_is_set:
if self.LDAP_ENABLED and not self.LDAP_CONFIG_IS_SET:
missing_config_options = [
key for key, value in self.model_dump().items()
if value is None and key != 'LDAP_ENABLED'
@ -66,7 +59,20 @@ class LdapConfig(BaseConfigSet):
sys.stderr.write(f' Missing: {", ".join(missing_config_options)}\n')
self.update(LDAP_ENABLED=False)
return self
@computed_field
@property
def LDAP_CONFIG_IS_SET(self) -> bool:
"""Check that all required LDAP config options are set"""
return bool(LDAP_LIB) and self.LDAP_ENABLED and bool(
self.LDAP_SERVER_URI
and self.LDAP_BIND_DN
and self.LDAP_BIND_PASSWORD
and self.LDAP_USER_BASE
and self.LDAP_USER_FILTER
)
@computed_field
@property
def LDAP_USER_ATTR_MAP(self) -> Dict[str, str]:
return {
@ -76,6 +82,7 @@ class LdapConfig(BaseConfigSet):
'email': self.LDAP_EMAIL_ATTR,
}
@computed_field
@property
def AUTHENTICATION_BACKENDS(self) -> List[str]:
return [
@ -83,9 +90,10 @@ class LdapConfig(BaseConfigSet):
'django_auth_ldap.backend.LDAPBackend',
]
@computed_field
@property
def AUTH_LDAP_USER_SEARCH(self) -> Optional[object]:
return LDAP_LIB and LDAPSearch(
return self.LDAP_USER_FILTER and LDAPSearch(
self.LDAP_USER_BASE,
LDAP_LIB.SCOPE_SUBTREE, # type: ignore
'(&(' + self.LDAP_USERNAME_ATTR + '=%(user)s)' + self.LDAP_USER_FILTER + ')',

View file

@ -2,9 +2,9 @@ __package__ = 'archivebox.plugins_extractor.archivedotorg'
from typing import List
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet
from plugantic.base_hook import BaseHook
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet
from abx.archivebox.base_hook import BaseHook
###################### Config ##########################

View file

@ -21,12 +21,12 @@ from pydantic_pkgr import (
import archivebox
# Depends on other Django apps:
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_binary import BaseBinary, env
# from plugantic.base_extractor import BaseExtractor
# from plugantic.base_queue import BaseQueue
from plugantic.base_hook import BaseHook
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_binary import BaseBinary, env
# from abx.archivebox.base_extractor import BaseExtractor
# from abx.archivebox.base_queue import BaseQueue
from abx.archivebox.base_hook import BaseHook
# Depends on Other Plugins:
from plugins_sys.config.apps import ARCHIVING_CONFIG, SHELL_CONFIG

View file

@ -2,9 +2,9 @@ __package__ = 'archivebox.plugins_extractor.favicon'
from typing import List
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet
from plugantic.base_hook import BaseHook
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet
from abx.archivebox.base_hook import BaseHook
###################### Config ##########################

View file

@ -11,11 +11,11 @@ from pydantic import InstanceOf, Field, validate_call
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName, ShallowBinary
# Depends on other Django apps:
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_binary import BaseBinary, env
from plugantic.base_extractor import BaseExtractor
from plugantic.base_hook import BaseHook
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_binary import BaseBinary, env
from abx.archivebox.base_extractor import BaseExtractor
from abx.archivebox.base_hook import BaseHook
# Depends on Other Plugins:
from plugins_sys.config.apps import ARCHIVING_CONFIG

View file

@ -11,12 +11,12 @@ from pydantic import InstanceOf, Field, validate_call
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName, bin_abspath, ShallowBinary
# Depends on other Django apps:
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_binary import BaseBinary, env
from plugantic.base_extractor import BaseExtractor
from plugantic.base_queue import BaseQueue
from plugantic.base_hook import BaseHook
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_binary import BaseBinary, env
from abx.archivebox.base_extractor import BaseExtractor
from abx.archivebox.base_queue import BaseQueue
from abx.archivebox.base_hook import BaseHook
# Depends on Other Plugins:
from plugins_sys.config.apps import ARCHIVING_CONFIG
@ -84,19 +84,6 @@ class SinglefileBinary(BaseBinary):
return BaseBinary.install(self, binprovider_name=binprovider_name or LIB_NPM_BINPROVIDER.name)
# ALTERNATIVE INSTALL METHOD using Ansible:
# install_playbook = PLUGANTIC_DIR / 'ansible' / 'install_singlefile.yml'
# singlefile_bin = run_playbook(install_playbook, data_dir=settings.CONFIG.OUTPUT_DIR, quiet=quiet).BINARIES.singlefile
# return self.__class__.model_validate(
# {
# **self.model_dump(),
# "loaded_abspath": singlefile_bin.abspath,
# "loaded_version": singlefile_bin.version,
# "loaded_binprovider": env,
# "binproviders_supported": self.binproviders_supported,
# }
# )
SINGLEFILE_BINARY = SinglefileBinary()

View file

@ -0,0 +1,74 @@
from typing import List
from abx.archivebox.base_plugin import BasePlugin, InstanceOf, BaseHook
# class WgetToggleConfig(ConfigSet):
# section: ConfigSectionName = 'ARCHIVE_METHOD_TOGGLES'
# SAVE_WGET: bool = True
# SAVE_WARC: bool = True
# class WgetDependencyConfig(ConfigSet):
# section: ConfigSectionName = 'DEPENDENCY_CONFIG'
# WGET_BINARY: str = Field(default='wget')
# WGET_ARGS: Optional[List[str]] = Field(default=None)
# WGET_EXTRA_ARGS: List[str] = []
# WGET_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
# class WgetOptionsConfig(ConfigSet):
# section: ConfigSectionName = 'ARCHIVE_METHOD_OPTIONS'
# # loaded from shared config
# WGET_AUTO_COMPRESSION: bool = Field(default=True)
# SAVE_WGET_REQUISITES: bool = Field(default=True)
# WGET_USER_AGENT: str = Field(default='', alias='USER_AGENT')
# WGET_TIMEOUT: int = Field(default=60, alias='TIMEOUT')
# WGET_CHECK_SSL_VALIDITY: bool = Field(default=True, alias='CHECK_SSL_VALIDITY')
# WGET_RESTRICT_FILE_NAMES: str = Field(default='windows', alias='RESTRICT_FILE_NAMES')
# WGET_COOKIES_FILE: Optional[Path] = Field(default=None, alias='COOKIES_FILE')
# CONFIG = {
# 'CHECK_SSL_VALIDITY': False,
# 'SAVE_WARC': False,
# 'TIMEOUT': 999,
# }
# WGET_CONFIG = [
# WgetToggleConfig(**CONFIG),
# WgetDependencyConfig(**CONFIG),
# WgetOptionsConfig(**CONFIG),
# ]
# class WgetExtractor(Extractor):
# name: ExtractorName = 'wget'
# binary: Binary = WgetBinary()
# def get_output_path(self, snapshot) -> Path:
# return get_wget_output_path(snapshot)
# class WarcExtractor(Extractor):
# name: ExtractorName = 'warc'
# binary: Binary = WgetBinary()
# def get_output_path(self, snapshot) -> Path:
# return get_wget_output_path(snapshot)
class WgetPlugin(BasePlugin):
app_label: str = 'wget'
verbose_name: str = 'WGET'
hooks: List[InstanceOf[BaseHook]] = []
PLUGIN = WgetPlugin()
DJANGO_APP = PLUGIN.AppConfig

View file

@ -7,10 +7,10 @@ from pydantic import InstanceOf, Field, model_validator, AliasChoices
from django.conf import settings
from pydantic_pkgr import BinProvider, BinName, BinProviderName, ProviderLookupDict
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_binary import BaseBinary, env, apt, brew
from plugantic.base_hook import BaseHook
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
from abx.archivebox.base_hook import BaseHook
from plugins_sys.config.apps import ARCHIVING_CONFIG
from plugins_pkg.pip.apps import pip

View file

@ -11,10 +11,10 @@ from pydantic import InstanceOf, model_validator
from pydantic_pkgr import BinProvider, NpmProvider, BinName, PATHStr, BinProviderName
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet
from plugantic.base_binary import BaseBinary, BaseBinProvider, env, apt, brew
from plugantic.base_hook import BaseHook
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet
from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env, apt, brew
from abx.archivebox.base_hook import BaseHook
###################### Config ##########################

View file

@ -15,11 +15,11 @@ from django.db.backends.sqlite3.base import Database as django_sqlite3 # typ
from django.core.checks import Error, Tags
from pydantic_pkgr import BinProvider, PipProvider, BinName, BinProviderName, ProviderLookupDict, SemVer
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_check import BaseCheck
from plugantic.base_binary import BaseBinary, BaseBinProvider, env, apt, brew
from plugantic.base_hook import BaseHook
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_check import BaseCheck
from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env, apt, brew
from abx.archivebox.base_hook import BaseHook
from ...misc.logging import hint

View file

@ -22,12 +22,12 @@ from pydantic_pkgr import (
import archivebox
# Depends on other Django apps:
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet
from plugantic.base_binary import BaseBinary, BaseBinProvider, env
# from plugantic.base_extractor import BaseExtractor
# from plugantic.base_queue import BaseQueue
from plugantic.base_hook import BaseHook
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet
from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env
# from abx.archivebox.base_extractor import BaseExtractor
# from abx.archivebox.base_queue import BaseQueue
from abx.archivebox.base_hook import BaseHook
from plugins_pkg.pip.apps import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER

View file

@ -19,12 +19,12 @@ from pydantic_pkgr import (
import archivebox
# Depends on other Django apps:
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet
from plugantic.base_binary import BaseBinary, BaseBinProvider, env
# from plugantic.base_extractor import BaseExtractor
# from plugantic.base_queue import BaseQueue
from plugantic.base_hook import BaseHook
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet
from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env
# from abx.archivebox.base_extractor import BaseExtractor
# from abx.archivebox.base_queue import BaseQueue
from abx.archivebox.base_hook import BaseHook
# Depends on Other Plugins:
from plugins_pkg.npm.apps import LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER

View file

@ -13,11 +13,11 @@ from pydantic import InstanceOf, Field
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName
# Depends on other Django apps:
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_binary import BaseBinary, env, apt, brew
from plugantic.base_hook import BaseHook
from plugantic.base_searchbackend import BaseSearchBackend
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
from abx.archivebox.base_hook import BaseHook
from abx.archivebox.base_searchbackend import BaseSearchBackend
# Depends on Other Plugins:
from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG

View file

@ -11,11 +11,11 @@ from pydantic import InstanceOf, Field, model_validator
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName
# Depends on other Django apps:
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_binary import BaseBinary, env, brew
from plugantic.base_hook import BaseHook
from plugantic.base_searchbackend import BaseSearchBackend
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_binary import BaseBinary, env, brew
from abx.archivebox.base_hook import BaseHook
from abx.archivebox.base_searchbackend import BaseSearchBackend
# Depends on Other Plugins:
from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG

View file

@ -1,21 +1,20 @@
__package__ = 'archivebox.plugins_search.sqlite'
import sys
import sqlite3
import codecs
import sqlite3
from typing import List, ClassVar, Iterable, Callable
from django.conf import settings
from django.db import connection as database
from django.core.exceptions import ImproperlyConfigured
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, Field, model_validator
# Depends on other Django apps:
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_hook import BaseHook
from plugantic.base_searchbackend import BaseSearchBackend
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_hook import BaseHook
from abx.archivebox.base_searchbackend import BaseSearchBackend
# Depends on Other Plugins:
from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG
@ -52,6 +51,7 @@ class SqliteftsConfig(BaseConfigSet):
if self.SQLITEFTS_SEPARATE_DATABASE:
return lambda: sqlite3.connect(self.SQLITEFTS_DB)
else:
from django.db import connection as database
return database.cursor
@property
@ -63,16 +63,20 @@ class SqliteftsConfig(BaseConfigSet):
@property
def SQLITE_LIMIT_LENGTH(self) -> int:
from django.db import connection as database
# Only Python >= 3.11 supports sqlite3.Connection.getlimit(),
# so fall back to the default if the API to get the real value isn't present
try:
limit_id = sqlite3.SQLITE_LIMIT_LENGTH
try:
if self.SQLITEFTS_SEPARATE_DATABASE:
cursor = self.get_connection()
return cursor.connection.getlimit(limit_id)
else:
with database.temporary_connection() as cursor: # type: ignore[attr-defined]
return cursor.connection.getlimit(limit_id)
except AttributeError:
return database.getlimit(limit_id)
except AttributeError:
except (AttributeError, ImproperlyConfigured):
return self.SQLITEFTS_MAX_LENGTH
SQLITEFTS_CONFIG = SqliteftsConfig()

View file

@ -1,21 +1,24 @@
__package__ = 'archivebox.plugins_sys.config'
__package__ = 'plugins_sys.config'
import os
import sys
import shutil
import archivebox
from typing import List, ClassVar, Dict, Optional
from datetime import datetime
from pathlib import Path
from pydantic import InstanceOf, Field, field_validator, model_validator, computed_field
from rich import print
from pydantic import InstanceOf, Field, field_validator, model_validator, computed_field
from django.utils.crypto import get_random_string
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_hook import BaseHook, HookType
from .constants import CONSTANTS, CONSTANTS_CONFIG
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_hook import BaseHook
import archivebox
from archivebox.constants import CONSTANTS, CONSTANTS_CONFIG # noqa
###################### Config ##########################
@ -123,6 +126,7 @@ class StorageConfig(BaseConfigSet):
# not supposed to be user settable:
DIR_OUTPUT_PERMISSIONS: str = Field(default=lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5'))
STORAGE_CONFIG = StorageConfig()
@ -249,13 +253,13 @@ DJANGO_APP = PLUGIN.AppConfig
# register django apps
@archivebox.plugin.hookimpl
def get_INSTALLED_APPS():
return [DJANGO_APP.name]
# # register django apps
# @abx.hookimpl
# def get_INSTALLED_APPS():
# return [DJANGO_APP.name]
# register configs
@archivebox.plugin.hookimpl
def register_CONFIG():
return PLUGIN.HOOKS_BY_TYPE['CONFIG'].values()
# # register configs
# @abx.hookimpl
# def register_CONFIG():
# return PLUGIN.HOOKS_BY_TYPE['CONFIG'].values()

View file

@ -1,8 +1,9 @@
__package__ = 'archivebox.plugantic'
__package__ = 'abx.archivebox'
import os
import inspect
from typing import Any, List, Dict, cast
from benedict import benedict
from django.http import HttpRequest
from django.conf import settings
@ -14,8 +15,7 @@ from admin_data_views.utils import render_with_table_view, render_with_item_view
import archivebox
from ..config_stubs import AttrDict
from ..util import parse_date
from archivebox.util import parse_date
def obj_to_yaml(obj: Any, indent: int=0) -> str:
@ -255,7 +255,7 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
)
all_config_entries = cast(List[Dict[str, Any]], supervisor.getAllConfigInfo() or [])
all_config = {config["name"]: AttrDict(config) for config in all_config_entries}
all_config = {config["name"]: benedict(config) for config in all_config_entries}
# Add top row for supervisord process manager
rows["Name"].append(ItemLink('supervisord', key='supervisord'))
@ -274,7 +274,7 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
# Add a row for each worker process managed by supervisord
for proc in cast(List[Dict[str, Any]], supervisor.getAllProcessInfo()):
proc = AttrDict(proc)
proc = benedict(proc)
# {
# "name": "daphne",
# "group": "daphne",
@ -334,7 +334,7 @@ def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
start_ts = [line for line in relevant_logs.split("\n") if "RPC interface 'supervisor' initialized" in line][-1].split(",", 1)[0]
uptime = str(timezone.now() - parse_date(start_ts)).split(".")[0]
proc = AttrDict(
proc = benedict(
{
"name": "supervisord",
"pid": supervisor.getPID(),
@ -347,7 +347,7 @@ def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
}
)
else:
proc = AttrDict(get_worker(supervisor, key) or {})
proc = benedict(get_worker(supervisor, key) or {})
relevant_config = [config for config in all_config if config['name'] == key][0]
relevant_logs = supervisor.tailProcessStdoutLog(key, 0, 10_000_000)[0]

View file

@ -26,7 +26,7 @@ except ImportError:
from archivebox.constants import STATICFILE_EXTENSIONS
from archivebox.plugins_sys.config.apps import ARCHIVING_CONFIG
from plugins_sys.config.apps import ARCHIVING_CONFIG
from .misc.logging import COLOR_DICT