merge plugantic and abx, all praise be to praise our glorious pluggy gods

This commit is contained in:
Nick Sweeting 2024-09-27 00:41:21 -07:00
parent 4f42eb0313
commit 8d3f45b720
No known key found for this signature in database
59 changed files with 870 additions and 1343 deletions

View file

@ -1,19 +1,19 @@
import itertools __package__ = 'abx'
import importlib import importlib
from pathlib import Path from pathlib import Path
from typing import Dict from typing import Dict
from benedict import benedict
import pluggy
import archivebox
from . import hookspec as base_spec from . import hookspec as base_spec
from .hookspec import hookimpl, hookspec # noqa from .hookspec import hookimpl, hookspec # noqa
from .manager import pm, PluginManager # noqa
pm = pluggy.PluginManager("abx")
pm.add_hookspecs(base_spec) pm.add_hookspecs(base_spec)
###### PLUGIN DISCOVERY AND LOADING ########################################################
def register_hookspecs(hookspecs): def register_hookspecs(hookspecs):
for hookspec_import_path in hookspecs: for hookspec_import_path in hookspecs:
hookspec_module = importlib.import_module(hookspec_import_path) hookspec_module = importlib.import_module(hookspec_import_path)
@ -48,27 +48,6 @@ def get_plugins_in_dirs(plugin_dirs: Dict[str, Path]):
DETECTED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix)) DETECTED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix))
return DETECTED_PLUGINS return DETECTED_PLUGINS
def get_builtin_plugins():
PLUGIN_DIRS = {
'plugins_sys': archivebox.PACKAGE_DIR / 'plugins_sys',
'plugins_pkg': archivebox.PACKAGE_DIR / 'plugins_pkg',
'plugins_auth': archivebox.PACKAGE_DIR / 'plugins_auth',
'plugins_search': archivebox.PACKAGE_DIR / 'plugins_search',
'plugins_extractor': archivebox.PACKAGE_DIR / 'plugins_extractor',
}
DETECTED_PLUGINS = {}
for plugin_prefix, plugin_dir in PLUGIN_DIRS.items():
DETECTED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix))
return DETECTED_PLUGINS
def get_user_plugins():
return find_plugins_in_dir(archivebox.DATA_DIR / 'user_plugins', prefix='user_plugins')
# BUILTIN_PLUGINS = get_builtin_plugins()
# PIP_PLUGINS = get_pip_installed_plugins()
# USER_PLUGINS = get_user_plugins()
# ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS}
# Load all plugins from pip packages, archivebox built-ins, and user plugins # Load all plugins from pip packages, archivebox built-ins, and user plugins
@ -76,7 +55,7 @@ def load_plugins(plugins_dict: Dict[str, Path]):
LOADED_PLUGINS = {} LOADED_PLUGINS = {}
for plugin_module, plugin_dir in plugins_dict.items(): for plugin_module, plugin_dir in plugins_dict.items():
# print(f'Loading plugin: {plugin_module} from {plugin_dir}') # print(f'Loading plugin: {plugin_module} from {plugin_dir}')
plugin_module_loaded = importlib.import_module(plugin_module + '.apps') plugin_module_loaded = importlib.import_module(plugin_module)
pm.register(plugin_module_loaded) pm.register(plugin_module_loaded)
LOADED_PLUGINS[plugin_module] = plugin_module_loaded.PLUGIN LOADED_PLUGINS[plugin_module] = plugin_module_loaded.PLUGIN
# print(f' √ Loaded plugin: {plugin_module}') # print(f' √ Loaded plugin: {plugin_module}')
@ -100,172 +79,3 @@ def get_registered_plugins():
return plugins return plugins
def get_plugins_INSTALLLED_APPS():
return itertools.chain(*pm.hook.get_INSTALLED_APPS())
def register_plugins_INSTALLLED_APPS(INSTALLED_APPS):
pm.hook.register_INSTALLED_APPS(INSTALLED_APPS=INSTALLED_APPS)
def get_plugins_MIDDLEWARE():
return itertools.chain(*pm.hook.get_MIDDLEWARE())
def register_plugins_MIDDLEWARE(MIDDLEWARE):
pm.hook.register_MIDDLEWARE(MIDDLEWARE=MIDDLEWARE)
def get_plugins_AUTHENTICATION_BACKENDS():
return itertools.chain(*pm.hook.get_AUTHENTICATION_BACKENDS())
def register_plugins_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
pm.hook.register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS=AUTHENTICATION_BACKENDS)
def get_plugins_STATICFILES_DIRS():
return itertools.chain(*pm.hook.get_STATICFILES_DIRS())
def register_plugins_STATICFILES_DIRS(STATICFILES_DIRS):
pm.hook.register_STATICFILES_DIRS(STATICFILES_DIRS=STATICFILES_DIRS)
def get_plugins_TEMPLATE_DIRS():
return itertools.chain(*pm.hook.get_TEMPLATE_DIRS())
def register_plugins_TEMPLATE_DIRS(TEMPLATE_DIRS):
pm.hook.register_TEMPLATE_DIRS(TEMPLATE_DIRS=TEMPLATE_DIRS)
def get_plugins_DJANGO_HUEY_QUEUES():
HUEY_QUEUES = {}
for plugin_result in pm.hook.get_DJANGO_HUEY_QUEUES():
HUEY_QUEUES.update(plugin_result)
return HUEY_QUEUES
def register_plugins_DJANGO_HUEY(DJANGO_HUEY):
pm.hook.register_DJANGO_HUEY(DJANGO_HUEY=DJANGO_HUEY)
def get_plugins_ADMIN_DATA_VIEWS_URLS():
return itertools.chain(*pm.hook.get_ADMIN_DATA_VIEWS_URLS())
def register_plugins_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
pm.hook.register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS=ADMIN_DATA_VIEWS)
def register_plugins_settings(settings):
# convert settings dict to an benedict so we can set values using settings.attr = xyz notation
settings_as_obj = benedict(settings, keypath_separator=None)
# set default values for settings that are used by plugins
settings_as_obj.INSTALLED_APPS = settings_as_obj.get('INSTALLED_APPS', [])
settings_as_obj.MIDDLEWARE = settings_as_obj.get('MIDDLEWARE', [])
settings_as_obj.AUTHENTICATION_BACKENDS = settings_as_obj.get('AUTHENTICATION_BACKENDS', [])
settings_as_obj.STATICFILES_DIRS = settings_as_obj.get('STATICFILES_DIRS', [])
settings_as_obj.TEMPLATE_DIRS = settings_as_obj.get('TEMPLATE_DIRS', [])
settings_as_obj.DJANGO_HUEY = settings_as_obj.get('DJANGO_HUEY', {'queues': {}})
settings_as_obj.ADMIN_DATA_VIEWS = settings_as_obj.get('ADMIN_DATA_VIEWS', {'URLS': []})
# call all the hook functions to mutate the settings values in-place
register_plugins_INSTALLLED_APPS(settings_as_obj.INSTALLED_APPS)
register_plugins_MIDDLEWARE(settings_as_obj.MIDDLEWARE)
register_plugins_AUTHENTICATION_BACKENDS(settings_as_obj.AUTHENTICATION_BACKENDS)
register_plugins_STATICFILES_DIRS(settings_as_obj.STATICFILES_DIRS)
register_plugins_TEMPLATE_DIRS(settings_as_obj.TEMPLATE_DIRS)
register_plugins_DJANGO_HUEY(settings_as_obj.DJANGO_HUEY)
register_plugins_ADMIN_DATA_VIEWS(settings_as_obj.ADMIN_DATA_VIEWS)
# calls Plugin.settings(settings) on each registered plugin
pm.hook.register_settings(settings=settings_as_obj)
# then finally update the settings globals() object will all the new settings
settings.update(settings_as_obj)
def get_plugins_urlpatterns():
return list(itertools.chain(*pm.hook.urlpatterns()))
def register_plugins_urlpatterns(urlpatterns):
pm.hook.register_urlpatterns(urlpatterns=urlpatterns)
# PLUGANTIC HOOKS
def get_plugins_PLUGINS():
return benedict({
plugin.PLUGIN.id: plugin.PLUGIN
for plugin in pm.get_plugins()
})
def get_plugins_HOOKS(PLUGINS):
return benedict({
hook.id: hook
for plugin in PLUGINS.values()
for hook in plugin.hooks
})
def get_plugins_CONFIGS():
return benedict({
config.id: config
for plugin_configs in pm.hook.get_CONFIGS()
for config in plugin_configs
})
def get_plugins_FLAT_CONFIG(CONFIGS):
FLAT_CONFIG = {}
for config in CONFIGS.values():
FLAT_CONFIG.update(config.model_dump())
return benedict(FLAT_CONFIG)
def get_plugins_BINPROVIDERS():
return benedict({
binprovider.id: binprovider
for plugin_binproviders in pm.hook.get_BINPROVIDERS()
for binprovider in plugin_binproviders
})
def get_plugins_BINARIES():
return benedict({
binary.id: binary
for plugin_binaries in pm.hook.get_BINARIES()
for binary in plugin_binaries
})
def get_plugins_EXTRACTORS():
return benedict({
extractor.id: extractor
for plugin_extractors in pm.hook.get_EXTRACTORS()
for extractor in plugin_extractors
})
def get_plugins_REPLAYERS():
return benedict({
replayer.id: replayer
for plugin_replayers in pm.hook.get_REPLAYERS()
for replayer in plugin_replayers
})
def get_plugins_CHECKS():
return benedict({
check.id: check
for plugin_checks in pm.hook.get_CHECKS()
for check in plugin_checks
})
def get_plugins_ADMINDATAVIEWS():
return benedict({
admin_dataview.id: admin_dataview
for plugin_admin_dataviews in pm.hook.get_ADMINDATAVIEWS()
for admin_dataview in plugin_admin_dataviews
})
def get_plugins_QUEUES():
return benedict({
queue.id: queue
for plugin_queues in pm.hook.get_QUEUES()
for queue in plugin_queues
})
def get_plugins_SEARCHBACKENDS():
return benedict({
searchbackend.id: searchbackend
for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS()
for searchbackend in plugin_searchbackends
})

View file

@ -0,0 +1,39 @@
__package__ = 'abx.archivebox'
import importlib
from typing import Dict
from pathlib import Path
def load_archivebox_plugins(pm, plugins_dict: Dict[str, Path]):
"""Load archivebox plugins, very similar to abx.load_plugins but it looks for a pydantic PLUGIN model + hooks in apps.py"""
LOADED_PLUGINS = {}
for plugin_module, plugin_dir in plugins_dict.items():
# print(f'Loading plugin: {plugin_module} from {plugin_dir}')
archivebox_plugins_found = []
# 1. register the plugin module directly in case it contains any look hookimpls (e.g. in __init__.py)
plugin_module_loaded = importlib.import_module(plugin_module)
pm.register(plugin_module_loaded)
if hasattr(plugin_module_loaded, 'PLUGIN'):
archivebox_plugins_found.append(plugin_module_loaded.PLUGIN)
# 2. then try to import plugin_module.apps as well
if (plugin_dir / 'apps.py').exists():
plugin_apps = importlib.import_module(plugin_module + '.apps')
pm.register(plugin_apps) # register the whole .apps in case it contains loose hookimpls (not in a class)
if hasattr(plugin_apps, 'PLUGIN'):
archivebox_plugins_found.append(plugin_apps.PLUGIN)
# 3. then try to look for plugin_module.PLUGIN and register it + all its hooks
for ab_plugin in archivebox_plugins_found:
pm.register(ab_plugin)
for hook in ab_plugin.hooks:
hook.__signature__ = hook.__class__.__signature__ # fix to make pydantic model usable as Pluggy plugin
pm.register(hook)
LOADED_PLUGINS[plugin_module] = ab_plugin
# print(f' √ Loaded plugin: {LOADED_PLUGINS}')
return LOADED_PLUGINS

View file

@ -0,0 +1,38 @@
__package__ = 'abx.archivebox'
from typing import Dict
import abx
from .base_hook import BaseHook, HookType
class BaseAdminDataView(BaseHook):
hook_type: HookType = "ADMINDATAVIEW"
name: str = 'example_admin_data_view_list'
verbose_name: str = 'Data View'
route: str = '/__OVERRIDE_THIS__/'
view: str = 'plugins_example.example.views.example_view_list'
items: Dict[str, str] = {
'route': '<str:key>/',
"name": 'example_admin_data_view_item',
'view': 'plugins_example.example.views.example_view_item',
}
@abx.hookimpl
def get_ADMINDATAVIEWS(self):
return [self]
@abx.hookimpl
def get_ADMIN_DATA_VIEWS_URLS(self):
"""routes to be added to django.conf.settings.ADMIN_DATA_VIEWS['urls']"""
route = {
"route": self.route,
"view": self.view,
"name": self.verbose_name,
"items": self.items,
}
return [route]

View file

@ -1,9 +1,8 @@
__package__ = "archivebox.plugantic" __package__ = "abx.archivebox"
from typing import Dict, List from typing import Dict, List
from typing_extensions import Self from typing_extensions import Self
from benedict import benedict
from pydantic import Field, InstanceOf, validate_call from pydantic import Field, InstanceOf, validate_call
from pydantic_pkgr import ( from pydantic_pkgr import (
Binary, Binary,
@ -15,10 +14,8 @@ from pydantic_pkgr import (
EnvProvider, EnvProvider,
) )
from django.conf import settings import abx
import archivebox import archivebox
from .base_hook import BaseHook, HookType from .base_hook import BaseHook, HookType
@ -37,19 +34,17 @@ class BaseBinProvider(BaseHook, BinProvider):
# # return cache.get_or_set(f'bin:version:{bin_name}:{abspath}', get_version_func) # # return cache.get_or_set(f'bin:version:{bin_name}:{abspath}', get_version_func)
# return get_version_func() # return get_version_func()
def register(self, settings, parent_plugin=None):
# self._plugin = parent_plugin # for debugging only, never rely on this!
settings.BINPROVIDERS = getattr(settings, "BINPROVIDERS", None) or benedict({}) # TODO: add install/load/load_or_install methods as abx.hookimpl methods
settings.BINPROVIDERS[self.id] = self
super().register(settings, parent_plugin=parent_plugin)
@property @property
def admin_url(self) -> str: def admin_url(self) -> str:
# e.g. /admin/environment/binproviders/NpmBinProvider/ TODO # e.g. /admin/environment/binproviders/NpmBinProvider/ TODO
return "/admin/environment/binaries/" return "/admin/environment/binaries/"
@abx.hookimpl
def get_BINPROVIDERS(self):
return [self]
class BaseBinary(BaseHook, Binary): class BaseBinary(BaseHook, Binary):
hook_type: HookType = "BINARY" hook_type: HookType = "BINARY"
@ -57,14 +52,6 @@ class BaseBinary(BaseHook, Binary):
binproviders_supported: List[InstanceOf[BinProvider]] = Field(default_factory=list, alias="binproviders") binproviders_supported: List[InstanceOf[BinProvider]] = Field(default_factory=list, alias="binproviders")
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = Field(default_factory=dict, alias="overrides") provider_overrides: Dict[BinProviderName, ProviderLookupDict] = Field(default_factory=dict, alias="overrides")
def register(self, settings, parent_plugin=None):
# self._plugin = parent_plugin # for debugging only, never rely on this!
settings.BINARIES = getattr(settings, "BINARIES", None) or benedict({})
settings.BINARIES[self.id] = self
super().register(settings, parent_plugin=parent_plugin)
@staticmethod @staticmethod
def symlink_to_lib(binary, bin_dir=None) -> None: def symlink_to_lib(binary, bin_dir=None) -> None:
bin_dir = bin_dir or archivebox.CONSTANTS.LIB_BIN_DIR bin_dir = bin_dir or archivebox.CONSTANTS.LIB_BIN_DIR
@ -101,6 +88,12 @@ class BaseBinary(BaseHook, Binary):
# e.g. /admin/environment/config/LdapConfig/ # e.g. /admin/environment/config/LdapConfig/
return f"/admin/environment/binaries/{self.name}/" return f"/admin/environment/binaries/{self.name}/"
@abx.hookimpl
def get_BINARIES(self):
return [self]
apt = AptProvider() apt = AptProvider()
brew = BrewProvider() brew = BrewProvider()
env = EnvProvider() env = EnvProvider()

View file

@ -1,10 +1,11 @@
__package__ = "archivebox.plugantic" __package__ = "abx.archivebox"
import abx
from typing import List from typing import List
from django.core.checks import Warning, Tags, register from django.core.checks import Warning, Tags, register
import abx
from .base_hook import BaseHook, HookType from .base_hook import BaseHook, HookType
@ -26,21 +27,18 @@ class BaseCheck(BaseHook):
# logger.debug('[√] Loaded settings.PLUGINS succesfully.') # logger.debug('[√] Loaded settings.PLUGINS succesfully.')
return errors return errors
def register(self, settings, parent_plugin=None):
# self._plugin = parent_plugin # backref to parent is for debugging only, never rely on this!
abx.pm.hook.register_django_check(check=self, settings=settings)
@abx.hookspec
@abx.hookimpl @abx.hookimpl
def register_django_check(check: BaseCheck, settings): def get_CHECKS(self):
def run_check(app_configs, **kwargs) -> List[Warning]: return [self]
@abx.hookimpl
def register_checks(self):
"""Tell django that this check exists so it can be run automatically by django."""
def run_check(**kwargs):
from django.conf import settings
import logging import logging
return check.check(settings, logging.getLogger("checks")) return self.check(settings, logging.getLogger("checks"))
run_check.__name__ = check.id
run_check.tags = [check.tag]
register(check.tag)(run_check)
run_check.__name__ = self.id
run_check.tags = [self.tag]
register(self.tag)(run_check)

View file

@ -1,4 +1,4 @@
__package__ = 'archivebox.plugantic' __package__ = 'abx.archivebox'
import os import os
import re import re
@ -14,8 +14,10 @@ from pydantic_settings.sources import TomlConfigSettingsSource
from pydantic_pkgr.base_types import func_takes_args_or_kwargs from pydantic_pkgr.base_types import func_takes_args_or_kwargs
import abx
from .base_hook import BaseHook, HookType from .base_hook import BaseHook, HookType
from . import ini_to_toml from archivebox.misc import ini_to_toml
PACKAGE_DIR = Path(__file__).resolve().parent.parent PACKAGE_DIR = Path(__file__).resolve().parent.parent
@ -236,6 +238,7 @@ class ArchiveBoxBaseConfig(BaseSettings):
for key, field in self.model_fields.items() for key, field in self.model_fields.items()
}) })
class BaseConfigSet(ArchiveBoxBaseConfig, BaseHook): # type: ignore[type-arg] class BaseConfigSet(ArchiveBoxBaseConfig, BaseHook): # type: ignore[type-arg]
hook_type: ClassVar[HookType] = 'CONFIG' hook_type: ClassVar[HookType] = 'CONFIG'
@ -261,42 +264,20 @@ class BaseConfigSet(ArchiveBoxBaseConfig, BaseHook): # type: ignore[type-ar
# self.__init__() # self.__init__()
# class WgetToggleConfig(ConfigSet): @abx.hookimpl
# section: ConfigSectionName = 'ARCHIVE_METHOD_TOGGLES' def get_CONFIGS(self):
try:
return {self.id: self}
except Exception as e:
# raise Exception(f'Error computing CONFIGS for {type(self)}: {e.__class__.__name__}: {e}')
print(f'Error computing CONFIGS for {type(self)}: {e.__class__.__name__}: {e}')
return {}
# SAVE_WGET: bool = True @abx.hookimpl
# SAVE_WARC: bool = True def get_FLAT_CONFIG(self):
try:
# class WgetDependencyConfig(ConfigSet): return self.model_dump()
# section: ConfigSectionName = 'DEPENDENCY_CONFIG' except Exception as e:
# raise Exception(f'Error computing FLAT_CONFIG for {type(self)}: {e.__class__.__name__}: {e}')
# WGET_BINARY: str = Field(default='wget') print(f'Error computing FLAT_CONFIG for {type(self)}: {e.__class__.__name__}: {e}')
# WGET_ARGS: Optional[List[str]] = Field(default=None) return {}
# WGET_EXTRA_ARGS: List[str] = []
# WGET_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
# class WgetOptionsConfig(ConfigSet):
# section: ConfigSectionName = 'ARCHIVE_METHOD_OPTIONS'
# # loaded from shared config
# WGET_AUTO_COMPRESSION: bool = Field(default=True)
# SAVE_WGET_REQUISITES: bool = Field(default=True)
# WGET_USER_AGENT: str = Field(default='', alias='USER_AGENT')
# WGET_TIMEOUT: int = Field(default=60, alias='TIMEOUT')
# WGET_CHECK_SSL_VALIDITY: bool = Field(default=True, alias='CHECK_SSL_VALIDITY')
# WGET_RESTRICT_FILE_NAMES: str = Field(default='windows', alias='RESTRICT_FILE_NAMES')
# WGET_COOKIES_FILE: Optional[Path] = Field(default=None, alias='COOKIES_FILE')
# CONFIG = {
# 'CHECK_SSL_VALIDITY': False,
# 'SAVE_WARC': False,
# 'TIMEOUT': 999,
# }
# WGET_CONFIG = [
# WgetToggleConfig(**CONFIG),
# WgetDependencyConfig(**CONFIG),
# WgetOptionsConfig(**CONFIG),
# ]

View file

@ -1,4 +1,4 @@
__package__ = 'archivebox.plugantic' __package__ = 'abx.archivebox'
from typing import Optional, List, Literal, Annotated, Dict, Any from typing import Optional, List, Literal, Annotated, Dict, Any
from typing_extensions import Self from typing_extensions import Self
@ -8,9 +8,9 @@ from pathlib import Path
from pydantic import model_validator, AfterValidator from pydantic import model_validator, AfterValidator
from pydantic_pkgr import BinName from pydantic_pkgr import BinName
from .base_hook import BaseHook, HookType import abx
from ..config_stubs import AttrDict
from .base_hook import BaseHook, HookType
def no_empty_args(args: List[str]) -> List[str]: def no_empty_args(args: List[str]) -> List[str]:
@ -45,16 +45,6 @@ class BaseExtractor(BaseHook):
return self return self
def register(self, settings, parent_plugin=None):
# self._plugin = parent_plugin # for debugging only, never rely on this!
settings.EXTRACTORS = getattr(settings, "EXTRACTORS", None) or AttrDict({})
settings.EXTRACTORS[self.id] = self
super().register(settings, parent_plugin=parent_plugin)
def get_output_path(self, snapshot) -> Path: def get_output_path(self, snapshot) -> Path:
return Path(self.id.lower()) return Path(self.id.lower())
@ -64,7 +54,7 @@ class BaseExtractor(BaseHook):
return False return False
return True return True
# TODO: move this to a hookimpl
def extract(self, url: str, **kwargs) -> Dict[str, Any]: def extract(self, url: str, **kwargs) -> Dict[str, Any]:
output_dir = self.get_output_path(url, **kwargs) output_dir = self.get_output_path(url, **kwargs)
@ -81,6 +71,7 @@ class BaseExtractor(BaseHook):
'returncode': proc.returncode, 'returncode': proc.returncode,
} }
# TODO: move this to a hookimpl
def exec(self, args: CmdArgsList, pwd: Optional[Path]=None, settings=None): def exec(self, args: CmdArgsList, pwd: Optional[Path]=None, settings=None):
pwd = pwd or Path('.') pwd = pwd or Path('.')
if settings is None: if settings is None:
@ -90,28 +81,6 @@ class BaseExtractor(BaseHook):
binary = settings.BINARIES[self.binary] binary = settings.BINARIES[self.binary]
return binary.exec(args, pwd=pwd) return binary.exec(args, pwd=pwd)
@abx.hookimpl
# class YtdlpExtractor(Extractor): def get_EXTRACTORS(self):
# name: ExtractorName = 'media' return [self]
# binary: Binary = YtdlpBinary()
# def get_output_path(self, snapshot) -> Path:
# return 'media/'
# class WgetExtractor(Extractor):
# name: ExtractorName = 'wget'
# binary: Binary = WgetBinary()
# def get_output_path(self, snapshot) -> Path:
# return get_wget_output_path(snapshot)
# class WarcExtractor(Extractor):
# name: ExtractorName = 'warc'
# binary: Binary = WgetBinary()
# def get_output_path(self, snapshot) -> Path:
# return get_wget_output_path(snapshot)

View file

@ -1,4 +1,4 @@
__package__ = 'archivebox.plugantic' __package__ = 'abx.archivebox'
import inspect import inspect
from huey.api import TaskWrapper from huey.api import TaskWrapper
@ -7,6 +7,7 @@ from pathlib import Path
from typing import Tuple, Literal, ClassVar, get_args from typing import Tuple, Literal, ClassVar, get_args
from pydantic import BaseModel, ConfigDict from pydantic import BaseModel, ConfigDict
import abx
HookType = Literal['CONFIG', 'BINPROVIDER', 'BINARY', 'EXTRACTOR', 'REPLAYER', 'CHECK', 'ADMINDATAVIEW', 'QUEUE', 'SEARCHBACKEND'] HookType = Literal['CONFIG', 'BINPROVIDER', 'BINARY', 'EXTRACTOR', 'REPLAYER', 'CHECK', 'ADMINDATAVIEW', 'QUEUE', 'SEARCHBACKEND']
hook_type_names: Tuple[HookType] = get_args(HookType) hook_type_names: Tuple[HookType] = get_args(HookType)
@ -29,8 +30,8 @@ class BaseHook(BaseModel):
plugins_pkg.npm.NpmPlugin().AppConfig.ready() # called by django plugins_pkg.npm.NpmPlugin().AppConfig.ready() # called by django
plugins_pkg.npm.NpmPlugin().register(settings) -> plugins_pkg.npm.NpmPlugin().register(settings) ->
plugins_pkg.npm.NpmConfigSet().register(settings) plugins_pkg.npm.NpmConfigSet().register(settings)
plugantic.base_configset.BaseConfigSet().register(settings) abx.archivebox.base_configset.BaseConfigSet().register(settings)
plugantic.base_hook.BaseHook().register(settings, parent_plugin=plugins_pkg.npm.NpmPlugin()) abx.archivebox.base_hook.BaseHook().register(settings, parent_plugin=plugins_pkg.npm.NpmPlugin())
... ...
... ...
@ -96,32 +97,20 @@ class BaseHook(BaseModel):
# e.g. /admin/environment/config/LdapConfig/ # e.g. /admin/environment/config/LdapConfig/
return f"/admin/environment/{self.hook_type.lower()}/{self.id}/" return f"/admin/environment/{self.hook_type.lower()}/{self.id}/"
# def register(self, settings, parent_plugin=None):
# """Load a record of an installed hook into global Django settings.HOOKS at runtime."""
# self._plugin = parent_plugin # for debugging only, never rely on this!
# # assert json.dumps(self.model_json_schema(), indent=4), f"Hook {self.hook_module} has invalid JSON schema." @abx.hookimpl
def register(self, settings):
"""Called when django.apps.AppConfig.ready() is called"""
# # print(' -', self.hook_module, '.register()') print("REGISTERED HOOK:", self.hook_module)
self._is_registered = True
# # record installed hook in settings.HOOKS
# settings.REGISTERED_HOOKS[self.id] = self
# if settings.REGISTERED_HOOKS[self.id]._is_registered: @abx.hookimpl
# raise Exception(f"Tried to run {self.hook_module}.register() but its already been called!") def ready(self):
"""Called when django.apps.AppConfig.ready() is called"""
# settings.REGISTERED_HOOKS[self.id]._is_registered = True assert self._is_registered, f"Tried to run {self.hook_module}.ready() but it was never registered!"
# # print("REGISTERED HOOK:", self.hook_module) # print("READY HOOK:", self.hook_module)
self._is_ready = True
# def ready(self, settings):
# """Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
# # print(' -', self.hook_module, '.ready()')
# assert self.id in settings.REGISTERED_HOOKS, f"Tried to ready hook {self.hook_module} but it is not registered in settings.REGISTERED_HOOKS."
# if settings.REGISTERED_HOOKS[self.id]._is_ready:
# raise Exception(f"Tried to run {self.hook_module}.ready() but its already been called!")
# settings.REGISTERED_HOOKS[self.id]._is_ready = True

View file

@ -1,4 +1,4 @@
__package__ = 'archivebox.plugantic' __package__ = 'abx.archivebox'
import abx import abx
import inspect import inspect
@ -16,7 +16,6 @@ from pydantic import (
model_validator, model_validator,
InstanceOf, InstanceOf,
computed_field, computed_field,
validate_call,
) )
from benedict import benedict from benedict import benedict
@ -124,91 +123,32 @@ class BasePlugin(BaseModel):
hooks[hook.hook_type][hook.id] = hook hooks[hook.hook_type][hook.id] = hook
return hooks return hooks
@abx.hookimpl
def register(self, settings): def register(self, settings):
"""Loads this plugin's configs, binaries, extractors, and replayers into global Django settings at import time (before models are imported or any AppConfig.ready() are called).""" from archivebox.config import bump_startup_progress_bar
from ..config import bump_startup_progress_bar self._is_registered = True
# assert settings.PLUGINS[self.id] == self
# # assert self.id not in settings.PLUGINS, f'Tried to register plugin {self.plugin_module} but it conflicts with existing plugin of the same name ({self.app_label}).'
# ### Mutate django.conf.settings... values in-place to include plugin-provided overrides
# if settings.PLUGINS[self.id]._is_registered:
# raise Exception(f"Tried to run {self.plugin_module}.register() but its already been called!")
# for hook in self.hooks:
# hook.register(settings, parent_plugin=self)
# settings.PLUGINS[self.id]._is_registered = True
# # print('√ REGISTERED PLUGIN:', self.plugin_module)
bump_startup_progress_bar() bump_startup_progress_bar()
print('◣----------------- REGISTERED PLUGIN:', self.plugin_module, '-----------------◢')
print()
@abx.hookimpl
def ready(self, settings=None): def ready(self, settings=None):
"""Runs any runtime code needed when AppConfig.ready() is called (after all models are imported).""" """Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
from ..config import bump_startup_progress_bar from archivebox.config import bump_startup_progress_bar
assert self._is_registered, f"Tried to run {self.plugin_module}.ready() but it was never registered!"
# if settings is None: self._is_ready = True
# from django.conf import settings as django_settings
# settings = django_settings
# # print()
# # print(self.plugin_module_full, '.ready()')
# assert (
# self.id in settings.PLUGINS and settings.PLUGINS[self.id]._is_registered
# ), f"Tried to run plugin.ready() for {self.plugin_module} but plugin is not yet registered in settings.PLUGINS."
# if settings.PLUGINS[self.id]._is_ready:
# raise Exception(f"Tried to run {self.plugin_module}.ready() but its already been called!")
# for hook in self.hooks:
# hook.ready(settings)
# settings.PLUGINS[self.id]._is_ready = True # settings.PLUGINS[self.id]._is_ready = True
bump_startup_progress_bar() bump_startup_progress_bar()
@validate_call
def install_binaries(self) -> Self:
new_binaries = []
for idx, binary in enumerate(self.binaries):
new_binaries.append(binary.install() or binary)
return self.model_copy(update={
'binaries': new_binaries,
})
@validate_call @abx.hookimpl
def load_binaries(self, cache=True) -> Self: def get_INSTALLED_APPS(self):
new_binaries = [] return [self.plugin_module]
for idx, binary in enumerate(self.HOOKS_BY_TYPE['BINARY'].values()):
new_binaries.append(binary.load(cache=cache) or binary)
return self.model_copy(update={
'binaries': new_binaries,
})
# @validate_call
# def load_or_install_binaries(self, cache=True) -> Self:
# new_binaries = []
# for idx, binary in enumerate(self.binaries):
# new_binaries.append(binary.load_or_install(cache=cache) or binary)
# return self.model_copy(update={
# 'binaries': new_binaries,
# })
# class YtdlpPlugin(BasePlugin):
# name: str = 'ytdlp'
# configs: List[SerializeAsAny[BaseConfigSet]] = []
# binaries: List[SerializeAsAny[BaseBinary]] = [YtdlpBinary()]
# extractors: List[SerializeAsAny[BaseExtractor]] = [YtdlpExtractor()]
# replayers: List[SerializeAsAny[BaseReplayer]] = [MEDIA_REPLAYER]
# class WgetPlugin(BasePlugin):
# name: str = 'wget'
# configs: List[SerializeAsAny[BaseConfigSet]] = [*WGET_CONFIG]
# binaries: List[SerializeAsAny[BaseBinary]] = [WgetBinary()]
# extractors: List[SerializeAsAny[BaseExtractor]] = [WgetExtractor(), WarcExtractor()]

View file

@ -1,16 +1,18 @@
__package__ = 'archivebox.plugantic' __package__ = 'abx.archivebox'
import importlib import importlib
from typing import Dict, List, TYPE_CHECKING from typing import Dict, List, TYPE_CHECKING
from pydantic import Field, InstanceOf from pydantic import Field, InstanceOf
from benedict import benedict
if TYPE_CHECKING: if TYPE_CHECKING:
from huey.api import TaskWrapper from huey.api import TaskWrapper
import abx
from .base_hook import BaseHook, HookType from .base_hook import BaseHook, HookType
from .base_binary import BaseBinary from .base_binary import BaseBinary
from ..config_stubs import AttrDict
@ -33,13 +35,13 @@ class BaseQueue(BaseHook):
if hasattr(task, "task_class") and task.huey.name == self.name: if hasattr(task, "task_class") and task.huey.name == self.name:
all_tasks[task_name] = task all_tasks[task_name] = task
return AttrDict(all_tasks) return benedict(all_tasks)
def get_huey_config(self, settings) -> dict: def get_django_huey_config(self, QUEUE_DATABASE_NAME) -> dict:
"""Get the config dict to insert into django.conf.settings.DJANGO_HUEY['queues'].""" """Get the config dict to insert into django.conf.settings.DJANGO_HUEY['queues']."""
return { return {
"huey_class": "huey.SqliteHuey", "huey_class": "huey.SqliteHuey",
"filename": settings.QUEUE_DATABASE_NAME, "filename": QUEUE_DATABASE_NAME,
"name": self.name, "name": self.name,
"results": True, "results": True,
"store_none": True, "store_none": True,
@ -58,7 +60,7 @@ class BaseQueue(BaseHook):
}, },
} }
def get_supervisor_config(self, settings) -> dict: def get_supervisord_config(self, settings) -> dict:
"""Ge the config dict used to tell sueprvisord to start a huey consumer for this queue.""" """Ge the config dict used to tell sueprvisord to start a huey consumer for this queue."""
return { return {
"name": f"worker_{self.name}", "name": f"worker_{self.name}",
@ -78,7 +80,7 @@ class BaseQueue(BaseHook):
print(f"Error starting worker for queue {self.name}: {e}") print(f"Error starting worker for queue {self.name}: {e}")
return None return None
print() print()
worker = start_worker(supervisor, self.get_supervisor_config(settings), lazy=lazy) worker = start_worker(supervisor, self.get_supervisord_config(settings), lazy=lazy)
# Update settings.WORKERS to include this worker # Update settings.WORKERS to include this worker
settings.WORKERS = getattr(settings, "WORKERS", None) or AttrDict({}) settings.WORKERS = getattr(settings, "WORKERS", None) or AttrDict({})
@ -86,65 +88,19 @@ class BaseQueue(BaseHook):
return worker return worker
def register(self, settings, parent_plugin=None): @abx.hookimpl
# self._plugin = parent_plugin # for debugging only, never rely on this! def get_QUEUES(self):
return [self]
# Side effect: register queue with django-huey multiqueue dict @abx.hookimpl
settings.DJANGO_HUEY = getattr(settings, "DJANGO_HUEY", None) or AttrDict({"queues": {}}) def get_DJANGO_HUEY_QUEUES(self, QUEUE_DATABASE_NAME):
settings.DJANGO_HUEY["queues"][self.name] = self.get_huey_config(settings) """queue configs to be added to django.conf.settings.DJANGO_HUEY['queues']"""
return {
self.name: self.get_django_huey_config(QUEUE_DATABASE_NAME)
}
# Side effect: register some extra tasks with huey
# on_startup(queue=self.name)(self.on_startup_task)
# db_periodic_task(crontab(minute='*/5'))(self.on_periodic_task)
# Install queue into settings.QUEUES
settings.QUEUES = getattr(settings, "QUEUES", None) or AttrDict({})
settings.QUEUES[self.id] = self
# Record installed hook into settings.HOOKS
super().register(settings, parent_plugin=parent_plugin)
# @abx.hookimpl
# def ready(self, settings): # def ready(self, settings):
# self.start_supervisord_worker(settings, lazy=True) # self.start_supervisord_worker(settings, lazy=True)
# super().ready(settings) # super().ready(settings)
# class WgetToggleConfig(ConfigSet):
# section: ConfigSectionName = 'ARCHIVE_METHOD_TOGGLES'
# SAVE_WGET: bool = True
# SAVE_WARC: bool = True
# class WgetDependencyConfig(ConfigSet):
# section: ConfigSectionName = 'DEPENDENCY_CONFIG'
# WGET_BINARY: str = Field(default='wget')
# WGET_ARGS: Optional[List[str]] = Field(default=None)
# WGET_EXTRA_ARGS: List[str] = []
# WGET_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
# class WgetOptionsConfig(ConfigSet):
# section: ConfigSectionName = 'ARCHIVE_METHOD_OPTIONS'
# # loaded from shared config
# WGET_AUTO_COMPRESSION: bool = Field(default=True)
# SAVE_WGET_REQUISITES: bool = Field(default=True)
# WGET_USER_AGENT: str = Field(default='', alias='USER_AGENT')
# WGET_TIMEOUT: int = Field(default=60, alias='TIMEOUT')
# WGET_CHECK_SSL_VALIDITY: bool = Field(default=True, alias='CHECK_SSL_VALIDITY')
# WGET_RESTRICT_FILE_NAMES: str = Field(default='windows', alias='RESTRICT_FILE_NAMES')
# WGET_COOKIES_FILE: Optional[Path] = Field(default=None, alias='COOKIES_FILE')
# CONFIG = {
# 'CHECK_SSL_VALIDITY': False,
# 'SAVE_WARC': False,
# 'TIMEOUT': 999,
# }
# WGET_CONFIG = [
# WgetToggleConfig(**CONFIG),
# WgetDependencyConfig(**CONFIG),
# WgetOptionsConfig(**CONFIG),
# ]

View file

@ -1,8 +1,8 @@
__package__ = 'archivebox.plugantic' __package__ = 'abx.archivebox'
import abx
from .base_hook import BaseHook, HookType from .base_hook import BaseHook, HookType
from ..config_stubs import AttrDict
class BaseReplayer(BaseHook): class BaseReplayer(BaseHook):
@ -22,16 +22,8 @@ class BaseReplayer(BaseHook):
# icon_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon' # icon_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon'
# thumbnail_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon' # thumbnail_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon'
def register(self, settings, parent_plugin=None): @abx.hookimpl
# self._plugin = parent_plugin # for debugging only, never rely on this! def get_REPLAYERS(self):
return [self]
settings.REPLAYERS = getattr(settings, 'REPLAYERS', None) or AttrDict({}) # TODO: add hookimpl methods for get_row_template, get_embed_template, get_fullpage_template, etc...
settings.REPLAYERS[self.id] = self
super().register(settings, parent_plugin=parent_plugin)
# class MediaReplayer(BaseReplayer):
# name: str = 'MediaReplayer'
# MEDIA_REPLAYER = MediaReplayer()

View file

@ -0,0 +1,33 @@
__package__ = 'abx.archivebox'
from typing import Iterable, List
from pydantic import Field
import abx
from .base_hook import BaseHook, HookType
class BaseSearchBackend(BaseHook):
hook_type: HookType = 'SEARCHBACKEND'
name: str = Field() # e.g. 'singlefile'
# TODO: move these to a hookimpl
@staticmethod
def index(snapshot_id: str, texts: List[str]):
return
@staticmethod
def flush(snapshot_ids: Iterable[str]):
return
@staticmethod
def search(text: str) -> List[str]:
raise NotImplementedError("search method must be implemented by subclass")
@abx.hookimpl
def get_SEARCHBACKENDS(self):
return [self]

View file

@ -1,4 +1,6 @@
from .hookspec import hookspec __package__ = 'abx.archivebox'
from .. import hookspec
@hookspec @hookspec

View file

@ -0,0 +1,98 @@
__package__ = 'abx.archivebox'
from benedict import benedict
from .. import pm
# API exposed to ArchiveBox code
def get_PLUGINS():
return benedict({
plugin.PLUGIN.id: plugin.PLUGIN
for plugin in pm.get_plugins()
})
def get_HOOKS(PLUGINS):
return benedict({
hook.id: hook
for plugin in PLUGINS.values()
for hook in plugin.hooks
})
def get_CONFIGS():
return benedict({
config_id: config
for plugin_configs in pm.hook.get_CONFIGS()
for config_id, config in plugin_configs.items()
})
def get_FLAT_CONFIG():
return benedict({
key: value
for plugin_config_dict in pm.hook.get_FLAT_CONFIG()
for key, value in plugin_config_dict.items()
})
def get_BINPROVIDERS():
return benedict({
binprovider.id: binprovider
for plugin_binproviders in pm.hook.get_BINPROVIDERS()
for binprovider in plugin_binproviders
})
def get_BINARIES():
return benedict({
binary.id: binary
for plugin_binaries in pm.hook.get_BINARIES()
for binary in plugin_binaries
})
def get_EXTRACTORS():
return benedict({
extractor.id: extractor
for plugin_extractors in pm.hook.get_EXTRACTORS()
for extractor in plugin_extractors
})
def get_REPLAYERS():
return benedict({
replayer.id: replayer
for plugin_replayers in pm.hook.get_REPLAYERS()
for replayer in plugin_replayers
})
def get_CHECKS():
return benedict({
check.id: check
for plugin_checks in pm.hook.get_CHECKS()
for check in plugin_checks
})
def get_ADMINDATAVIEWS():
return benedict({
admin_dataview.id: admin_dataview
for plugin_admin_dataviews in pm.hook.get_ADMINDATAVIEWS()
for admin_dataview in plugin_admin_dataviews
})
def get_QUEUES():
return benedict({
queue.id: queue
for plugin_queues in pm.hook.get_QUEUES()
for queue in plugin_queues
})
def get_SEARCHBACKENDS():
return benedict({
searchbackend.id: searchbackend
for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS()
for searchbackend in plugin_searchbackends
})
###########################
def register_all_hooks(settings):
pm.hook.register(settings=settings)

View file

@ -0,0 +1 @@
__package__ = 'abx.django'

View file

@ -1,8 +1,9 @@
__package__ = 'abx.django'
from django.apps import AppConfig from django.apps import AppConfig
class ABXConfig(AppConfig): class ABXConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'abx' name = 'abx'
def ready(self): def ready(self):

View file

@ -0,0 +1,120 @@
__package__ = 'abx.django'
from ..hookspec import hookspec
###########################################################################################
@hookspec
def get_INSTALLED_APPS():
"""Return a list of apps to add to INSTALLED_APPS"""
# e.g. ['your_plugin_type.plugin_name']
return []
# @hookspec
# def register_INSTALLED_APPS(INSTALLED_APPS):
# """Mutate INSTALLED_APPS in place to add your app in a specific position"""
# # idx_of_contrib = INSTALLED_APPS.index('django.contrib.auth')
# # INSTALLED_APPS.insert(idx_of_contrib + 1, 'your_plugin_type.plugin_name')
# pass
@hookspec
def get_TEMPLATE_DIRS():
return [] # e.g. ['your_plugin_type/plugin_name/templates']
# @hookspec
# def register_TEMPLATE_DIRS(TEMPLATE_DIRS):
# """Install django settings"""
# # e.g. TEMPLATE_DIRS.insert(0, 'your_plugin_type/plugin_name/templates')
# pass
@hookspec
def get_STATICFILES_DIRS():
return [] # e.g. ['your_plugin_type/plugin_name/static']
# @hookspec
# def register_STATICFILES_DIRS(STATICFILES_DIRS):
# """Mutate STATICFILES_DIRS in place to add your static dirs in a specific position"""
# # e.g. STATICFILES_DIRS.insert(0, 'your_plugin_type/plugin_name/static')
# pass
@hookspec
def get_MIDDLEWARE():
return [] # e.g. ['your_plugin_type.plugin_name.middleware.YourMiddleware']
# @hookspec
# def register_MIDDLEWARE(MIDDLEWARE):
# """Mutate MIDDLEWARE in place to add your middleware in a specific position"""
# # e.g. MIDDLEWARE.insert(0, 'your_plugin_type.plugin_name.middleware.YourMiddleware')
# pass
@hookspec
def get_AUTHENTICATION_BACKENDS():
return [] # e.g. ['django_auth_ldap.backend.LDAPBackend']
# @hookspec
# def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
# """Mutate AUTHENTICATION_BACKENDS in place to add your auth backends in a specific position"""
# # e.g. AUTHENTICATION_BACKENDS.insert(0, 'your_plugin_type.plugin_name.backend.YourBackend')
# pass
@hookspec
def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME):
return [] # e.g. [{'name': 'your_plugin_type.plugin_name', 'HUEY': {...}}]
# @hookspec
# def register_DJANGO_HUEY(DJANGO_HUEY):
# """Mutate DJANGO_HUEY in place to add your huey queues in a specific position"""
# # e.g. DJANGO_HUEY['queues']['some_queue_name']['some_setting'] = 'some_value'
# pass
@hookspec
def get_ADMIN_DATA_VIEWS_URLS():
return []
# @hookspec
# def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
# """Mutate ADMIN_DATA_VIEWS in place to add your admin data views in a specific position"""
# # e.g. ADMIN_DATA_VIEWS['URLS'].insert(0, 'your_plugin_type/plugin_name/admin_data_views.py')
# pass
# @hookspec
# def register_settings(settings):
# """Mutate settings in place to add your settings / modify existing settings"""
# # settings.SOME_KEY = 'some_value'
# pass
###########################################################################################
@hookspec
def get_urlpatterns():
return [] # e.g. [path('your_plugin_type/plugin_name/url.py', your_view)]
# @hookspec
# def register_urlpatterns(urlpatterns):
# """Mutate urlpatterns in place to add your urlpatterns in a specific position"""
# # e.g. urlpatterns.insert(0, path('your_plugin_type/plugin_name/url.py', your_view))
# pass
###########################################################################################
@hookspec
def register_checks():
"""Register django checks with django system checks system"""
pass
###########################################################################################
@hookspec
def ready():
"""Called when Django apps app.ready() are triggered"""
pass

View file

@ -0,0 +1,98 @@
__package__ = 'abx.django'
import itertools
from benedict import benedict
from .. import pm
def get_INSTALLED_APPS():
return itertools.chain(*reversed(pm.hook.get_INSTALLED_APPS()))
# def register_INSTALLLED_APPS(INSTALLED_APPS):
# pm.hook.register_INSTALLED_APPS(INSTALLED_APPS=INSTALLED_APPS)
def get_MIDDLEWARES():
return itertools.chain(*reversed(pm.hook.get_MIDDLEWARE()))
# def register_MIDDLEWARES(MIDDLEWARE):
# pm.hook.register_MIDDLEWARE(MIDDLEWARE=MIDDLEWARE)
def get_AUTHENTICATION_BACKENDS():
return itertools.chain(*reversed(pm.hook.get_AUTHENTICATION_BACKENDS()))
# def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
# pm.hook.register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS=AUTHENTICATION_BACKENDS)
def get_STATICFILES_DIRS():
return itertools.chain(*reversed(pm.hook.get_STATICFILES_DIRS()))
# def register_STATICFILES_DIRS(STATICFILES_DIRS):
# pm.hook.register_STATICFILES_DIRS(STATICFILES_DIRS=STATICFILES_DIRS)
def get_TEMPLATE_DIRS():
return itertools.chain(*reversed(pm.hook.get_TEMPLATE_DIRS()))
# def register_TEMPLATE_DIRS(TEMPLATE_DIRS):
# pm.hook.register_TEMPLATE_DIRS(TEMPLATE_DIRS=TEMPLATE_DIRS)
def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME='queue.sqlite3'):
HUEY_QUEUES = {}
for plugin_result in pm.hook.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=QUEUE_DATABASE_NAME):
HUEY_QUEUES.update(plugin_result)
return HUEY_QUEUES
# def register_DJANGO_HUEY(DJANGO_HUEY):
# pm.hook.register_DJANGO_HUEY(DJANGO_HUEY=DJANGO_HUEY)
def get_ADMIN_DATA_VIEWS_URLS():
return itertools.chain(*reversed(pm.hook.get_ADMIN_DATA_VIEWS_URLS()))
# def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
# pm.hook.register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS=ADMIN_DATA_VIEWS)
# def register_settings(settings):
# # convert settings dict to an benedict so we can set values using settings.attr = xyz notation
# settings_as_obj = benedict(settings, keypath_separator=None)
# # set default values for settings that are used by plugins
# # settings_as_obj.INSTALLED_APPS = settings_as_obj.get('INSTALLED_APPS', [])
# # settings_as_obj.MIDDLEWARE = settings_as_obj.get('MIDDLEWARE', [])
# # settings_as_obj.AUTHENTICATION_BACKENDS = settings_as_obj.get('AUTHENTICATION_BACKENDS', [])
# # settings_as_obj.STATICFILES_DIRS = settings_as_obj.get('STATICFILES_DIRS', [])
# # settings_as_obj.TEMPLATE_DIRS = settings_as_obj.get('TEMPLATE_DIRS', [])
# # settings_as_obj.DJANGO_HUEY = settings_as_obj.get('DJANGO_HUEY', {'queues': {}})
# # settings_as_obj.ADMIN_DATA_VIEWS = settings_as_obj.get('ADMIN_DATA_VIEWS', {'URLS': []})
# # # call all the hook functions to mutate the settings values in-place
# # register_INSTALLLED_APPS(settings_as_obj.INSTALLED_APPS)
# # register_MIDDLEWARES(settings_as_obj.MIDDLEWARE)
# # register_AUTHENTICATION_BACKENDS(settings_as_obj.AUTHENTICATION_BACKENDS)
# # register_STATICFILES_DIRS(settings_as_obj.STATICFILES_DIRS)
# # register_TEMPLATE_DIRS(settings_as_obj.TEMPLATE_DIRS)
# # register_DJANGO_HUEY(settings_as_obj.DJANGO_HUEY)
# # register_ADMIN_DATA_VIEWS(settings_as_obj.ADMIN_DATA_VIEWS)
# # calls Plugin.settings(settings) on each registered plugin
# pm.hook.register_settings(settings=settings_as_obj)
# # then finally update the settings globals() object will all the new settings
# # settings.update(settings_as_obj)
def get_urlpatterns():
return list(itertools.chain(*pm.hook.urlpatterns()))
def register_urlpatterns(urlpatterns):
pm.hook.register_urlpatterns(urlpatterns=urlpatterns)
def register_checks():
"""register any django system checks"""
pm.hook.register_checks()

View file

@ -3,10 +3,12 @@ from pathlib import Path
from pluggy import HookimplMarker from pluggy import HookimplMarker
from pluggy import HookspecMarker from pluggy import HookspecMarker
hookspec = HookspecMarker("abx") spec = hookspec = HookspecMarker("abx")
hookimpl = HookimplMarker("abx") impl = hookimpl = HookimplMarker("abx")
@hookspec @hookspec
@hookimpl
def get_system_user() -> str: def get_system_user() -> str:
return Path('~').expanduser().name return Path('~').expanduser().name

View file

@ -1,6 +0,0 @@
from .hookspec import hookspec
@hookspec
def ready(settings):
"""Called when the Django app.ready() is triggered"""
pass

View file

@ -1,90 +0,0 @@
from .hookspec import hookspec
###########################################################################################
@hookspec
def get_INSTALLED_APPS():
"""Return a list of apps to add to INSTALLED_APPS"""
# e.g. ['your_plugin_type.plugin_name']
return []
@hookspec
def register_INSTALLED_APPS(INSTALLED_APPS):
"""Mutate INSTALLED_APPS in place to add your app in a specific position"""
# idx_of_contrib = INSTALLED_APPS.index('django.contrib.auth')
# INSTALLED_APPS.insert(idx_of_contrib + 1, 'your_plugin_type.plugin_name')
pass
@hookspec
def get_TEMPLATE_DIRS():
return [] # e.g. ['your_plugin_type/plugin_name/templates']
@hookspec
def register_TEMPLATE_DIRS(TEMPLATE_DIRS):
"""Install django settings"""
# e.g. TEMPLATE_DIRS.insert(0, 'your_plugin_type/plugin_name/templates')
pass
@hookspec
def get_STATICFILES_DIRS():
return [] # e.g. ['your_plugin_type/plugin_name/static']
@hookspec
def register_STATICFILES_DIRS(STATICFILES_DIRS):
"""Mutate STATICFILES_DIRS in place to add your static dirs in a specific position"""
# e.g. STATICFILES_DIRS.insert(0, 'your_plugin_type/plugin_name/static')
pass
@hookspec
def get_MIDDLEWARE():
return [] # e.g. ['your_plugin_type.plugin_name.middleware.YourMiddleware']
@hookspec
def register_MIDDLEWARE(MIDDLEWARE):
"""Mutate MIDDLEWARE in place to add your middleware in a specific position"""
# e.g. MIDDLEWARE.insert(0, 'your_plugin_type.plugin_name.middleware.YourMiddleware')
pass
@hookspec
def get_AUTHENTICATION_BACKENDS():
return [] # e.g. ['django_auth_ldap.backend.LDAPBackend']
@hookspec
def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
"""Mutate AUTHENTICATION_BACKENDS in place to add your auth backends in a specific position"""
# e.g. AUTHENTICATION_BACKENDS.insert(0, 'your_plugin_type.plugin_name.backend.YourBackend')
pass
@hookspec
def get_DJANGO_HUEY_QUEUES():
return [] # e.g. [{'name': 'your_plugin_type.plugin_name', 'HUEY': {...}}]
@hookspec
def register_DJANGO_HUEY(DJANGO_HUEY):
"""Mutate DJANGO_HUEY in place to add your huey queues in a specific position"""
# e.g. DJANGO_HUEY['queues']['some_queue_name']['some_setting'] = 'some_value'
pass
@hookspec
def get_ADMIN_DATA_VIEWS_URLS():
return []
@hookspec
def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
"""Mutate ADMIN_DATA_VIEWS in place to add your admin data views in a specific position"""
# e.g. ADMIN_DATA_VIEWS['URLS'].insert(0, 'your_plugin_type/plugin_name/admin_data_views.py')
pass
@hookspec
def register_settings(settings):
"""Mutate settings in place to add your settings / modify existing settings"""
# settings.SOME_KEY = 'some_value'
pass

View file

@ -1,12 +0,0 @@
from .hookspec import hookspec
@hookspec
def get_urlpatterns():
return [] # e.g. [path('your_plugin_type/plugin_name/url.py', your_view)]
@hookspec
def register_urlpatterns(urlpatterns):
"""Mutate urlpatterns in place to add your urlpatterns in a specific position"""
# e.g. urlpatterns.insert(0, path('your_plugin_type/plugin_name/url.py', your_view))
pass

30
archivebox/abx/manager.py Normal file
View file

@ -0,0 +1,30 @@
import inspect
import pluggy
class PluginManager(pluggy.PluginManager):
"""
Patch to fix pluggy's PluginManager to work with pydantic models.
See: https://github.com/pytest-dev/pluggy/pull/536
"""
def parse_hookimpl_opts(self, plugin, name: str) -> pluggy.HookimplOpts | None:
# IMPORTANT: @property methods can have side effects, and are never hookimpl
# if attr is a property, skip it in advance
plugin_class = plugin if inspect.isclass(plugin) else type(plugin)
if isinstance(getattr(plugin_class, name, None), property):
return None
# pydantic model fields are like attrs and also can never be hookimpls
plugin_is_pydantic_obj = hasattr(plugin, "__pydantic_core_schema__")
if plugin_is_pydantic_obj and name in getattr(plugin, "model_fields", {}):
# pydantic models mess with the class and attr __signature__
# so inspect.isroutine(...) throws exceptions and cant be used
return None
try:
return super().parse_hookimpl_opts(plugin, name)
except AttributeError:
return super().parse_hookimpl_opts(type(plugin), name)
pm = PluginManager("abx")

View file

@ -0,0 +1 @@
__package__ = 'abx.pydantic_pkgr'

View file

@ -1,5 +1,5 @@
from .hookspec import hookspec from ..hookspec import hookspec
########################################################################################### ###########################################################################################

View file

@ -12,7 +12,6 @@ from collections.abc import Mapping
from typing import Optional, List, IO, Union, Iterable from typing import Optional, List, IO, Union, Iterable
from pathlib import Path from pathlib import Path
from ..misc.checks import check_data_folder, check_migrations from ..misc.checks import check_data_folder, check_migrations
from ..misc.logging import stderr from ..misc.logging import stderr

View file

@ -788,15 +788,22 @@ def bump_startup_progress_bar():
def setup_django_minimal(): def setup_django_minimal():
sys.path.append(str(archivebox.PACKAGE_DIR)) # sys.path.append(str(archivebox.PACKAGE_DIR))
os.environ.setdefault('OUTPUT_DIR', str(archivebox.DATA_DIR)) # os.environ.setdefault('OUTPUT_DIR', str(archivebox.DATA_DIR))
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings') # os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
django.setup() # django.setup()
raise Exception('dont use this anymore')
DJANGO_SET_UP = False
def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None: def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
global INITIAL_STARTUP_PROGRESS global INITIAL_STARTUP_PROGRESS
global INITIAL_STARTUP_PROGRESS_TASK global INITIAL_STARTUP_PROGRESS_TASK
global DJANGO_SET_UP
if DJANGO_SET_UP:
raise Exception('django is already set up!')
with Progress(transient=True, expand=True, console=CONSOLE) as INITIAL_STARTUP_PROGRESS: with Progress(transient=True, expand=True, console=CONSOLE) as INITIAL_STARTUP_PROGRESS:
INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25) INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25)
@ -809,13 +816,11 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=C
try: try:
from django.core.management import call_command from django.core.management import call_command
sys.path.append(str(archivebox.PACKAGE_DIR))
os.environ.setdefault('OUTPUT_DIR', str(archivebox.DATA_DIR))
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
bump_startup_progress_bar() bump_startup_progress_bar()
if in_memory_db: if in_memory_db:
raise Exception('dont use this anymore')
# some commands (e.g. oneshot) dont store a long-lived sqlite3 db file on disk. # some commands (e.g. oneshot) dont store a long-lived sqlite3 db file on disk.
# in those cases we create a temporary in-memory db and run the migrations # in those cases we create a temporary in-memory db and run the migrations
# immediately to get a usable in-memory-database at startup # immediately to get a usable in-memory-database at startup
@ -833,8 +838,6 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=C
from django.conf import settings from django.conf import settings
from plugins_sys.config.apps import SHELL_CONFIG
# log startup message to the error log # log startup message to the error log
with open(settings.ERROR_LOG, "a", encoding='utf-8') as f: with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
command = ' '.join(sys.argv) command = ' '.join(sys.argv)
@ -878,5 +881,7 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=C
except KeyboardInterrupt: except KeyboardInterrupt:
raise SystemExit(2) raise SystemExit(2)
DJANGO_SET_UP = True
INITIAL_STARTUP_PROGRESS = None INITIAL_STARTUP_PROGRESS = None
INITIAL_STARTUP_PROGRESS_TASK = None INITIAL_STARTUP_PROGRESS_TASK = None

View file

@ -22,7 +22,7 @@ import archivebox
from signal_webhooks.admin import WebhookAdmin from signal_webhooks.admin import WebhookAdmin
from signal_webhooks.utils import get_webhook_model from signal_webhooks.utils import get_webhook_model
# from plugantic.admin import CustomPlugin # from abx.archivebox.admin import CustomPlugin
from ..util import htmldecode, urldecode from ..util import htmldecode, urldecode

View file

@ -9,6 +9,10 @@ from pathlib import Path
from django.utils.crypto import get_random_string from django.utils.crypto import get_random_string
import abx import abx
import abx.archivebox
import abx.archivebox.use
import abx.django.use
import archivebox import archivebox
from archivebox.constants import CONSTANTS from archivebox.constants import CONSTANTS
@ -19,22 +23,19 @@ IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ
IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3] IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3]
VERSION = archivebox.__version__ VERSION = archivebox.VERSION
PACKAGE_DIR = archivebox.PACKAGE_DIR PACKAGE_DIR = archivebox.PACKAGE_DIR
DATA_DIR = archivebox.DATA_DIR DATA_DIR = archivebox.DATA_DIR
ARCHIVE_DIR = archivebox.DATA_DIR / 'archive' ARCHIVE_DIR = archivebox.ARCHIVE_DIR
################################################################################ ################################################################################
### ArchiveBox Plugin Settings ### ArchiveBox Plugin Settings
################################################################################ ################################################################################
PLUGIN_HOOKSPECS = [ PLUGIN_HOOKSPECS = [
'abx.hookspec_django_settings', 'abx.django.hookspec',
'abx.hookspec_django_apps', 'abx.pydantic_pkgr.hookspec',
'abx.hookspec_django_urls', 'abx.archivebox.hookspec',
'abx.hookspec_pydantic_pkgr',
'abx.hookspec_archivebox',
'plugantic.base_check',
] ]
abx.register_hookspecs(PLUGIN_HOOKSPECS) abx.register_hookspecs(PLUGIN_HOOKSPECS)
@ -55,20 +56,20 @@ USER_PLUGINS = abx.get_plugins_in_dirs(USER_PLUGIN_DIRS)
ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS} ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS}
PLUGIN_MANAGER = abx.pm PLUGIN_MANAGER = abx.pm
PLUGINS = abx.load_plugins(ALL_PLUGINS) PLUGINS = abx.archivebox.load_archivebox_plugins(PLUGIN_MANAGER, ALL_PLUGINS)
HOOKS = abx.get_plugins_HOOKS(PLUGINS) HOOKS = abx.archivebox.use.get_HOOKS(PLUGINS)
CONFIGS = abx.archivebox.use.get_CONFIGS()
FLAT_CONFIG = abx.archivebox.use.get_FLAT_CONFIG()
BINPROVIDERS = abx.archivebox.use.get_BINPROVIDERS()
BINARIES = abx.archivebox.use.get_BINARIES()
EXTRACTORS = abx.archivebox.use.get_EXTRACTORS()
REPLAYERS = abx.archivebox.use.get_REPLAYERS()
CHECKS = abx.archivebox.use.get_CHECKS()
ADMINDATAVIEWS = abx.archivebox.use.get_ADMINDATAVIEWS()
QUEUES = abx.archivebox.use.get_QUEUES()
SEARCHBACKENDS = abx.archivebox.use.get_SEARCHBACKENDS()
CONFIGS = abx.get_plugins_CONFIGS()
# FLAT_CONFIG = abx.get_plugins_FLAT_CONFIG(CONFIGS)
FLAT_CONFIG = CONFIG
BINPROVIDERS = abx.get_plugins_BINPROVIDERS()
BINARIES = abx.get_plugins_BINARIES()
EXTRACTORS = abx.get_plugins_EXTRACTORS()
REPLAYERS = abx.get_plugins_REPLAYERS()
CHECKS = abx.get_plugins_CHECKS()
ADMINDATAVIEWS = abx.get_plugins_ADMINDATAVIEWS()
QUEUES = abx.get_plugins_QUEUES()
SEARCHBACKENDS = abx.get_plugins_SEARCHBACKENDS()
################################################################################ ################################################################################
### Django Core Settings ### Django Core Settings
@ -104,14 +105,13 @@ INSTALLED_APPS = [
'django_object_actions', # provides easy Django Admin action buttons on change views https://github.com/crccheck/django-object-actions 'django_object_actions', # provides easy Django Admin action buttons on change views https://github.com/crccheck/django-object-actions
# Our ArchiveBox-provided apps # Our ArchiveBox-provided apps
# 'plugantic', # ArchiveBox plugin API definition + finding/registering/calling interface
'queues', # handles starting and managing background workers and processes 'queues', # handles starting and managing background workers and processes
'abid_utils', # handles ABID ID creation, handling, and models 'abid_utils', # handles ABID ID creation, handling, and models
'core', # core django model with Snapshot, ArchiveResult, etc. 'core', # core django model with Snapshot, ArchiveResult, etc.
'api', # Django-Ninja-based Rest API interfaces, config, APIToken model, etc. 'api', # Django-Ninja-based Rest API interfaces, config, APIToken model, etc.
# ArchiveBox plugins # ArchiveBox plugins
*abx.get_plugins_INSTALLLED_APPS(), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins, *abx.django.use.get_INSTALLED_APPS(), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins,
# 3rd-party apps from PyPI that need to be loaded last # 3rd-party apps from PyPI that need to be loaded last
'admin_data_views', # handles rendering some convenient automatic read-only views of data in Django admin 'admin_data_views', # handles rendering some convenient automatic read-only views of data in Django admin
@ -136,7 +136,7 @@ MIDDLEWARE = [
'core.middleware.ReverseProxyAuthMiddleware', 'core.middleware.ReverseProxyAuthMiddleware',
'django.contrib.messages.middleware.MessageMiddleware', 'django.contrib.messages.middleware.MessageMiddleware',
'core.middleware.CacheControlMiddleware', 'core.middleware.CacheControlMiddleware',
*abx.get_plugins_MIDDLEWARE(), *abx.django.use.get_MIDDLEWARES(),
] ]
@ -149,7 +149,7 @@ MIDDLEWARE = [
AUTHENTICATION_BACKENDS = [ AUTHENTICATION_BACKENDS = [
'django.contrib.auth.backends.RemoteUserBackend', 'django.contrib.auth.backends.RemoteUserBackend',
'django.contrib.auth.backends.ModelBackend', 'django.contrib.auth.backends.ModelBackend',
*abx.get_plugins_AUTHENTICATION_BACKENDS(), *abx.django.use.get_AUTHENTICATION_BACKENDS(),
] ]
@ -177,7 +177,7 @@ STATICFILES_DIRS = [
# for plugin_dir in PLUGIN_DIRS.values() # for plugin_dir in PLUGIN_DIRS.values()
# if (plugin_dir / 'static').is_dir() # if (plugin_dir / 'static').is_dir()
# ], # ],
*abx.get_plugins_STATICFILES_DIRS(), *abx.django.use.get_STATICFILES_DIRS(),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'static'), str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'static'),
] ]
@ -188,7 +188,7 @@ TEMPLATE_DIRS = [
# for plugin_dir in PLUGIN_DIRS.values() # for plugin_dir in PLUGIN_DIRS.values()
# if (plugin_dir / 'templates').is_dir() # if (plugin_dir / 'templates').is_dir()
# ], # ],
*abx.get_plugins_TEMPLATE_DIRS(), *abx.django.use.get_TEMPLATE_DIRS(),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'core'), str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'core'),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'admin'), str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'admin'),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME), str(PACKAGE_DIR / TEMPLATES_DIR_NAME),
@ -225,10 +225,12 @@ DATABASE_NAME = os.environ.get("ARCHIVEBOX_DATABASE_NAME", str(CONSTANTS.DATABAS
QUEUE_DATABASE_NAME = DATABASE_NAME.replace('index.sqlite3', 'queue.sqlite3') QUEUE_DATABASE_NAME = DATABASE_NAME.replace('index.sqlite3', 'queue.sqlite3')
SQLITE_CONNECTION_OPTIONS = { SQLITE_CONNECTION_OPTIONS = {
"ENGINE": "django.db.backends.sqlite3",
"TIME_ZONE": CONSTANTS.TIMEZONE, "TIME_ZONE": CONSTANTS.TIMEZONE,
"OPTIONS": { "OPTIONS": {
# https://gcollazo.com/optimal-sqlite-settings-for-django/ # https://gcollazo.com/optimal-sqlite-settings-for-django/
# # https://litestream.io/tips/#busy-timeout # https://litestream.io/tips/#busy-timeout
# https://docs.djangoproject.com/en/5.1/ref/databases/#setting-pragma-options
"timeout": 5, "timeout": 5,
"check_same_thread": False, "check_same_thread": False,
"transaction_mode": "IMMEDIATE", "transaction_mode": "IMMEDIATE",
@ -246,17 +248,14 @@ SQLITE_CONNECTION_OPTIONS = {
DATABASES = { DATABASES = {
"default": { "default": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": DATABASE_NAME, "NAME": DATABASE_NAME,
# DB setup is sometimes modified at runtime by setup_django() in config.py **SQLITE_CONNECTION_OPTIONS,
}, },
"queue": { "queue": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": QUEUE_DATABASE_NAME, "NAME": QUEUE_DATABASE_NAME,
**SQLITE_CONNECTION_OPTIONS, **SQLITE_CONNECTION_OPTIONS,
}, },
# 'cache': { # 'cache': {
# 'ENGINE': 'django.db.backends.sqlite3',
# 'NAME': CACHE_DB_PATH, # 'NAME': CACHE_DB_PATH,
# **SQLITE_CONNECTION_OPTIONS, # **SQLITE_CONNECTION_OPTIONS,
# }, # },
@ -295,7 +294,7 @@ DJANGO_HUEY = {
"queues": { "queues": {
HUEY["name"]: HUEY.copy(), HUEY["name"]: HUEY.copy(),
# more registered here at plugin import-time by BaseQueue.register() # more registered here at plugin import-time by BaseQueue.register()
**abx.get_plugins_DJANGO_HUEY_QUEUES(), **abx.django.use.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=QUEUE_DATABASE_NAME),
}, },
} }
@ -482,45 +481,45 @@ ADMIN_DATA_VIEWS = {
}, },
{ {
"route": "binaries/", "route": "binaries/",
"view": "plugantic.views.binaries_list_view", "view": "plugins_sys.config.views.binaries_list_view",
"name": "Binaries", "name": "Binaries",
"items": { "items": {
"route": "<str:key>/", "route": "<str:key>/",
"view": "plugantic.views.binary_detail_view", "view": "plugins_sys.config.views.binary_detail_view",
"name": "binary", "name": "binary",
}, },
}, },
{ {
"route": "plugins/", "route": "plugins/",
"view": "plugantic.views.plugins_list_view", "view": "plugins_sys.config.views.plugins_list_view",
"name": "Plugins", "name": "Plugins",
"items": { "items": {
"route": "<str:key>/", "route": "<str:key>/",
"view": "plugantic.views.plugin_detail_view", "view": "plugins_sys.config.views.plugin_detail_view",
"name": "plugin", "name": "plugin",
}, },
}, },
{ {
"route": "workers/", "route": "workers/",
"view": "plugantic.views.worker_list_view", "view": "plugins_sys.config.views.worker_list_view",
"name": "Workers", "name": "Workers",
"items": { "items": {
"route": "<str:key>/", "route": "<str:key>/",
"view": "plugantic.views.worker_detail_view", "view": "plugins_sys.config.views.worker_detail_view",
"name": "worker", "name": "worker",
}, },
}, },
{ {
"route": "logs/", "route": "logs/",
"view": "plugantic.views.log_list_view", "view": "plugins_sys.config.views.log_list_view",
"name": "Logs", "name": "Logs",
"items": { "items": {
"route": "<str:key>/", "route": "<str:key>/",
"view": "plugantic.views.log_detail_view", "view": "plugins_sys.config.views.log_detail_view",
"name": "log", "name": "log",
}, },
}, },
*abx.get_plugins_ADMIN_DATA_VIEWS_URLS(), *abx.django.use.get_ADMIN_DATA_VIEWS_URLS(),
], ],
} }
@ -614,5 +613,7 @@ DEBUG_LOGFIRE = DEBUG_LOGFIRE and (DATA_DIR / '.logfire').is_dir()
# JET_TOKEN = 'some-api-token-here' # JET_TOKEN = 'some-api-token-here'
abx.register_plugins_settings(globals()) abx.django.use.register_checks()
abx.archivebox.use.register_all_hooks(globals())
# import ipdb; ipdb.set_trace()

View file

@ -1,7 +1,7 @@
__package__ = 'archivebox.core' __package__ = 'archivebox.core'
from typing import Callable from typing import Callable
from benedict import benedict
from pathlib import Path from pathlib import Path
from django.shortcuts import render, redirect from django.shortcuts import render, redirect
@ -36,12 +36,15 @@ from ..config import (
CONFIG_SCHEMA, CONFIG_SCHEMA,
DYNAMIC_CONFIG_SCHEMA, DYNAMIC_CONFIG_SCHEMA,
USER_CONFIG, USER_CONFIG,
CONFIG,
) )
from ..logging_util import printable_filesize from ..logging_util import printable_filesize
from ..util import base_url, htmlencode, ts_to_date_str from ..util import base_url, htmlencode, ts_to_date_str
from ..search import query_search_index from ..search import query_search_index
from .serve_static import serve_static_with_byterange_support from .serve_static import serve_static_with_byterange_support
CONFIG = benedict({**CONSTANTS, **CONFIG, **settings.FLAT_CONFIG})
class HomepageView(View): class HomepageView(View):
def get(self, request): def get(self, request):
@ -533,8 +536,6 @@ def key_is_safe(key: str) -> bool:
@render_with_table_view @render_with_table_view
def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext: def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
CONFIG = settings.FLAT_CONFIG
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.' assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
rows = { rows = {

View file

@ -6,8 +6,6 @@ import shutil
import platform import platform
import archivebox import archivebox
CONSTANTS = archivebox.CONSTANTS
from typing import Dict, List, Optional, Iterable, IO, Union from typing import Dict, List, Optional, Iterable, IO, Union
from pathlib import Path from pathlib import Path
from datetime import date, datetime from datetime import date, datetime
@ -69,9 +67,8 @@ from .index.html import (
from .index.csv import links_to_csv from .index.csv import links_to_csv
from .extractors import archive_links, archive_link, ignore_methods from .extractors import archive_links, archive_link, ignore_methods
from .misc.logging import stderr, hint, ANSI from .misc.logging import stderr, hint, ANSI
from .misc.checks import check_data_folder, check_dependencies from .misc.checks import check_data_folder
from .config import ( from .config import (
setup_django_minimal,
ConfigDict, ConfigDict,
IS_TTY, IS_TTY,
DEBUG, DEBUG,
@ -91,7 +88,6 @@ from .config import (
CONFIG, CONFIG,
USER_CONFIG, USER_CONFIG,
get_real_name, get_real_name,
setup_django,
) )
from .logging_util import ( from .logging_util import (
TimedProgress, TimedProgress,
@ -108,6 +104,7 @@ from .logging_util import (
printable_dependency_version, printable_dependency_version,
) )
CONSTANTS = archivebox.CONSTANTS
VERSION = archivebox.VERSION VERSION = archivebox.VERSION
PACKAGE_DIR = archivebox.PACKAGE_DIR PACKAGE_DIR = archivebox.PACKAGE_DIR
OUTPUT_DIR = archivebox.DATA_DIR OUTPUT_DIR = archivebox.DATA_DIR
@ -190,7 +187,6 @@ def version(quiet: bool=False,
out_dir: Path=OUTPUT_DIR) -> None: out_dir: Path=OUTPUT_DIR) -> None:
"""Print the ArchiveBox version and dependency information""" """Print the ArchiveBox version and dependency information"""
setup_django_minimal()
from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG, STORAGE_CONFIG, SHELL_CONFIG from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG, STORAGE_CONFIG, SHELL_CONFIG
from plugins_auth.ldap.apps import LDAP_CONFIG from plugins_auth.ldap.apps import LDAP_CONFIG
from django.conf import settings from django.conf import settings
@ -270,7 +266,6 @@ def version(quiet: bool=False,
print('{white}[i] Data locations:{reset} (not in a data directory)'.format(**ANSI)) print('{white}[i] Data locations:{reset} (not in a data directory)'.format(**ANSI))
print() print()
check_dependencies(CONFIG)
@enforce_types @enforce_types
@ -461,7 +456,7 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
check_data_folder(CONFIG) check_data_folder(CONFIG)
from core.models import Snapshot from core.models import Snapshot
from django.contrib.auth import get_user_mod, SHELL_CONFIG from django.contrib.auth import get_user_model
User = get_user_model() User = get_user_model()
print('{green}[*] Scanning archive main index...{reset}'.format(**ANSI)) print('{green}[*] Scanning archive main index...{reset}'.format(**ANSI))
@ -602,7 +597,7 @@ def add(urls: Union[str, List[str]],
# Load list of links from the existing index # Load list of links from the existing index
check_data_folder(CONFIG) check_data_folder(CONFIG)
check_dependencies(CONFIG)
# worker = start_cli_workers() # worker = start_cli_workers()
new_links: List[Link] = [] new_links: List[Link] = []
@ -791,7 +786,6 @@ def update(resume: Optional[float]=None,
check_data_folder(CONFIG) check_data_folder(CONFIG)
check_dependencies(CONFIG)
# start_cli_workers() # start_cli_workers()
new_links: List[Link] = [] # TODO: Remove input argument: only_new new_links: List[Link] = [] # TODO: Remove input argument: only_new
@ -963,8 +957,6 @@ def setup(out_dir: Path=OUTPUT_DIR) -> None:
if not ARCHIVE_DIR.exists(): if not ARCHIVE_DIR.exists():
run_subcommand('init', stdin=None, pwd=out_dir) run_subcommand('init', stdin=None, pwd=out_dir)
setup_django(out_dir=out_dir, check_db=True)
stderr('\n[+] Installing ArchiveBox dependencies automatically...', color='green') stderr('\n[+] Installing ArchiveBox dependencies automatically...', color='green')
from plugins_extractor.ytdlp.apps import YTDLP_BINARY from plugins_extractor.ytdlp.apps import YTDLP_BINARY
@ -1109,7 +1101,6 @@ def schedule(add: bool=False,
"""Set ArchiveBox to regularly import URLs at specific times using cron""" """Set ArchiveBox to regularly import URLs at specific times using cron"""
check_data_folder(CONFIG) check_data_folder(CONFIG)
setup_django_minimal()
from plugins_pkg.pip.apps import ARCHIVEBOX_BINARY from plugins_pkg.pip.apps import ARCHIVEBOX_BINARY
from plugins_sys.config.apps import SHELL_CONFIG, CONSTANTS from plugins_sys.config.apps import SHELL_CONFIG, CONSTANTS
@ -1257,6 +1248,8 @@ def server(runserver_args: Optional[List[str]]=None,
from django.core.management import call_command from django.core.management import call_command
from django.contrib.auth.models import User from django.contrib.auth.models import User
print('{green}[+] Starting ArchiveBox webserver... {reset}'.format(**ANSI)) print('{green}[+] Starting ArchiveBox webserver... {reset}'.format(**ANSI))
print(' > Logging errors to ./logs/errors.log') print(' > Logging errors to ./logs/errors.log')
if not User.objects.filter(is_superuser=True).exists(): if not User.objects.filter(is_superuser=True).exists():
@ -1306,7 +1299,6 @@ def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None:
"""Run an ArchiveBox Django management command""" """Run an ArchiveBox Django management command"""
check_data_folder(CONFIG) check_data_folder(CONFIG)
setup_django_minimal()
from django.core.management import execute_from_command_line from django.core.management import execute_from_command_line
if (args and "createsuperuser" in args) and (IN_DOCKER and not IS_TTY): if (args and "createsuperuser" in args) and (IN_DOCKER and not IS_TTY):

View file

@ -1,38 +1,10 @@
__package__ = 'archivebox.misc' __package__ = 'archivebox.misc'
# TODO: migrate all of these to new plugantic/base_check.py Check system
from benedict import benedict from benedict import benedict
from pathlib import Path
import archivebox import archivebox
from .logging import stderr, hint, ANSI from .logging import stderr, ANSI
def check_dependencies(config: benedict, show_help: bool=True) -> None:
# dont do this on startup anymore, it's too slow
pass
# invalid_dependencies = [
# (name, binary) for name, info in settings.BINARIES.items()
# if not binary.
# ]
# if invalid_dependencies and show_help:
# stderr(f'[!] Warning: Missing {len(invalid_dependencies)} recommended dependencies', color='lightyellow')
# for dependency, info in invalid_dependencies:
# stderr(
# ' ! {}: {} ({})'.format(
# dependency,
# info['path'] or 'unable to find binary',
# info['version'] or 'unable to detect version',
# )
# )
# if dependency in ('YOUTUBEDL_BINARY', 'CHROME_BINARY', 'SINGLEFILE_BINARY', 'READABILITY_BINARY', 'MERCURY_BINARY'):
# hint(('To install all packages automatically run: archivebox setup',
# f'or to disable it and silence this warning: archivebox config --set SAVE_{dependency.rsplit("_", 1)[0]}=False',
# ''), prefix=' ')
# stderr('')
def check_data_folder(config: benedict) -> None: def check_data_folder(config: benedict) -> None:

View file

@ -1 +0,0 @@
__package__ = 'archivebox.plugantic'

View file

@ -1,12 +0,0 @@
__package__ = 'archivebox.plugantic'
from django.apps import AppConfig
class PluganticConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'plugantic'
def ready(self) -> None:
pass
# from django.conf import settings
# print(f'[🧩] Detected {len(settings.INSTALLED_PLUGINS)} settings.INSTALLED_PLUGINS to load...')

View file

@ -1,39 +0,0 @@
__package__ = 'archivebox.plugantic'
# from typing import Dict
from .base_hook import BaseHook, HookType
from ..config_stubs import AttrDict
class BaseAdminDataView(BaseHook):
hook_type: HookType = "ADMINDATAVIEW"
# verbose_name: str = 'Data View'
# route: str = '/npm/installed/'
# view: str = 'plugins_pkg.npm.admin.installed_list_view'
# items: Dict[str, str] = {
# "name": "installed_npm_pkg",
# 'route': '<str:key>/',
# 'view': 'plugins_pkg.npm.admin.installed_detail_view',
# }
def register(self, settings, parent_plugin=None):
# self._plugin = parent_plugin # circular ref to parent only here for easier debugging! never depend on circular backref to parent in real code!
self.register_route_in_admin_data_view_urls(settings)
settings.ADMINDATAVIEWS = getattr(settings, "ADMINDATAVIEWS", None) or AttrDict({})
settings.ADMINDATAVIEWS[self.id] = self
super().register(settings, parent_plugin)
def register_route_in_admin_data_view_urls(self, settings):
route = {
"route": self.route,
"view": self.view,
"name": self.verbose_name,
"items": self.items,
}
if route not in settings.ADMIN_DATA_VIEWS.URLS:
settings.ADMIN_DATA_VIEWS.URLS += [route] # append our route (update in place)

View file

@ -1,39 +0,0 @@
__package__ = 'archivebox.plugantic'
from typing import Iterable, List
from benedict import benedict
from pydantic import Field
from .base_hook import BaseHook, HookType
class BaseSearchBackend(BaseHook):
hook_type: HookType = 'SEARCHBACKEND'
name: str = Field() # e.g. 'singlefile'
@staticmethod
def index(snapshot_id: str, texts: List[str]):
return
@staticmethod
def flush(snapshot_ids: Iterable[str]):
return
@staticmethod
def search(text: str) -> List[str]:
raise NotImplementedError("search method must be implemented by subclass")
def register(self, settings, parent_plugin=None):
# self._plugin = parent_plugin # for debugging only, never rely on this!
# Install queue into settings.SEARCH_BACKENDS
settings.SEARCH_BACKENDS = getattr(settings, "SEARCH_BACKENDS", None) or benedict({})
settings.SEARCH_BACKENDS[self.id] = self
# Record installed hook into settings.HOOKS
super().register(settings, parent_plugin=parent_plugin)

View file

@ -1,72 +0,0 @@
# __package__ = 'archivebox.plugantic.management.commands'
# from django.core.management.base import BaseCommand
# from django.conf import settings
# from pydantic_pkgr import Binary, BinProvider, BrewProvider, EnvProvider, SemVer
# from pydantic_pkgr.binprovider import bin_abspath
# from ....config import bin_path
# from ...base_binary import env
# class Command(BaseCommand):
# def handle(self, *args, method, **options):
# method(*args, **options)
# def add_arguments(self, parser):
# subparsers = parser.add_subparsers(title="sub-commands", required=True)
# list_parser = subparsers.add_parser("list", help="List archivebox runtime dependencies.")
# list_parser.set_defaults(method=self.list)
# install_parser = subparsers.add_parser("install", help="Install archivebox runtime dependencies.")
# install_parser.add_argument("--update", action="store_true", help="Update dependencies to latest versions.")
# install_parser.add_argument("package_names", nargs="+", type=str)
# install_parser.set_defaults(method=self.install)
# def list(self, *args, **options):
# self.stdout.write('################# PLUGINS ####################')
# for plugin in settings.PLUGINS.values():
# self.stdout.write(f'{plugin.name}:')
# for binary in plugin.binaries:
# try:
# binary = binary.load()
# except Exception as e:
# # import ipdb; ipdb.set_trace()
# raise
# self.stdout.write(f' {binary.name.ljust(14)} {str(binary.version).ljust(11)} {binary.binprovider.INSTALLER_BIN.ljust(5)} {binary.abspath}')
# self.stdout.write('\n################# LEGACY ####################')
# for bin_key, dependency in settings.CONFIG.DEPENDENCIES.items():
# bin_name = settings.CONFIG[bin_key]
# self.stdout.write(f'{bin_key}: {bin_name}')
# # binary = Binary(name=package_name, providers=[env])
# # print(binary)
# # try:
# # loaded_bin = binary.load()
# # self.stdout.write(
# # self.style.SUCCESS(f'Successfully loaded {package_name}:') + str(loaded_bin)
# # )
# # except Exception as e:
# # self.stderr.write(
# # self.style.ERROR(f"Error loading {package_name}: {e}")
# # )
# def install(self, *args, bright, **options):
# for package_name in options["package_names"]:
# binary = Binary(name=package_name, providers=[env])
# print(binary)
# try:
# loaded_bin = binary.load()
# self.stdout.write(
# self.style.SUCCESS(f'Successfully loaded {package_name}:') + str(loaded_bin)
# )
# except Exception as e:
# self.stderr.write(
# self.style.ERROR(f"Error loading {package_name}: {e}")
# )

View file

@ -1,337 +0,0 @@
__package__ = 'archivebox.plugantic'
from django.test import TestCase
from .ini_to_toml import convert, TOML_HEADER
TEST_INPUT = """
[SERVER_CONFIG]
IS_TTY=False
USE_COLOR=False
SHOW_PROGRESS=False
IN_DOCKER=False
IN_QEMU=False
PUID=501
PGID=20
OUTPUT_DIR=/opt/archivebox/data
CONFIG_FILE=/opt/archivebox/data/ArchiveBox.conf
ONLY_NEW=True
TIMEOUT=60
MEDIA_TIMEOUT=3600
OUTPUT_PERMISSIONS=644
RESTRICT_FILE_NAMES=windows
URL_DENYLIST=\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$
URL_ALLOWLIST=None
ADMIN_USERNAME=None
ADMIN_PASSWORD=None
ENFORCE_ATOMIC_WRITES=True
TAG_SEPARATOR_PATTERN=[,]
SECRET_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
BIND_ADDR=127.0.0.1:8000
ALLOWED_HOSTS=*
DEBUG=False
PUBLIC_INDEX=True
PUBLIC_SNAPSHOTS=True
PUBLIC_ADD_VIEW=False
FOOTER_INFO=Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.
SNAPSHOTS_PER_PAGE=40
CUSTOM_TEMPLATES_DIR=None
TIME_ZONE=UTC
TIMEZONE=UTC
REVERSE_PROXY_USER_HEADER=Remote-User
REVERSE_PROXY_WHITELIST=
LOGOUT_REDIRECT_URL=/
PREVIEW_ORIGINALS=True
LDAP=False
LDAP_SERVER_URI=None
LDAP_BIND_DN=None
LDAP_BIND_PASSWORD=None
LDAP_USER_BASE=None
LDAP_USER_FILTER=None
LDAP_USERNAME_ATTR=None
LDAP_FIRSTNAME_ATTR=None
LDAP_LASTNAME_ATTR=None
LDAP_EMAIL_ATTR=None
LDAP_CREATE_SUPERUSER=False
SAVE_TITLE=True
SAVE_FAVICON=True
SAVE_WGET=True
SAVE_WGET_REQUISITES=True
SAVE_SINGLEFILE=True
SAVE_READABILITY=True
SAVE_MERCURY=True
SAVE_HTMLTOTEXT=True
SAVE_PDF=True
SAVE_SCREENSHOT=True
SAVE_DOM=True
SAVE_HEADERS=True
SAVE_WARC=True
SAVE_GIT=True
SAVE_MEDIA=True
SAVE_ARCHIVE_DOT_ORG=True
RESOLUTION=1440,2000
GIT_DOMAINS=github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht
CHECK_SSL_VALIDITY=True
MEDIA_MAX_SIZE=750m
USER_AGENT=None
CURL_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)
WGET_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5
CHROME_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)
COOKIES_FILE=None
CHROME_USER_DATA_DIR=None
CHROME_TIMEOUT=0
CHROME_HEADLESS=True
CHROME_SANDBOX=True
CHROME_EXTRA_ARGS=[]
YOUTUBEDL_ARGS=['--restrict-filenames', '--trim-filenames', '128', '--write-description', '--write-info-json', '--write-annotations', '--write-thumbnail', '--no-call-home', '--write-sub', '--write-auto-subs', '--convert-subs=srt', '--yes-playlist', '--continue', '--no-abort-on-error', '--ignore-errors', '--geo-bypass', '--add-metadata', '--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)']
YOUTUBEDL_EXTRA_ARGS=[]
WGET_ARGS=['--no-verbose', '--adjust-extension', '--convert-links', '--force-directories', '--backup-converted', '--span-hosts', '--no-parent', '-e', 'robots=off']
WGET_EXTRA_ARGS=[]
CURL_ARGS=['--silent', '--location', '--compressed']
CURL_EXTRA_ARGS=[]
GIT_ARGS=['--recursive']
SINGLEFILE_ARGS=[]
SINGLEFILE_EXTRA_ARGS=[]
MERCURY_ARGS=['--format=text']
MERCURY_EXTRA_ARGS=[]
FAVICON_PROVIDER=https://www.google.com/s2/favicons?domain={}
USE_INDEXING_BACKEND=True
USE_SEARCHING_BACKEND=True
SEARCH_BACKEND_ENGINE=ripgrep
SEARCH_BACKEND_HOST_NAME=localhost
SEARCH_BACKEND_PORT=1491
SEARCH_BACKEND_PASSWORD=SecretPassword
SEARCH_PROCESS_HTML=True
SONIC_COLLECTION=archivebox
SONIC_BUCKET=snapshots
SEARCH_BACKEND_TIMEOUT=90
FTS_SEPARATE_DATABASE=True
FTS_TOKENIZERS=porter unicode61 remove_diacritics 2
FTS_SQLITE_MAX_LENGTH=1000000000
USE_CURL=True
USE_WGET=True
USE_SINGLEFILE=True
USE_READABILITY=True
USE_MERCURY=True
USE_GIT=True
USE_CHROME=True
USE_NODE=True
USE_YOUTUBEDL=True
USE_RIPGREP=True
CURL_BINARY=curl
GIT_BINARY=git
WGET_BINARY=wget
SINGLEFILE_BINARY=single-file
READABILITY_BINARY=readability-extractor
MERCURY_BINARY=postlight-parser
YOUTUBEDL_BINARY=yt-dlp
NODE_BINARY=node
RIPGREP_BINARY=rg
CHROME_BINARY=chrome
POCKET_CONSUMER_KEY=None
USER=squash
PACKAGE_DIR=/opt/archivebox/archivebox
TEMPLATES_DIR=/opt/archivebox/archivebox/templates
ARCHIVE_DIR=/opt/archivebox/data/archive
SOURCES_DIR=/opt/archivebox/data/sources
LOGS_DIR=/opt/archivebox/data/logs
PERSONAS_DIR=/opt/archivebox/data/personas
URL_DENYLIST_PTN=re.compile('\\.(css|js|otf|ttf|woff|woff2|gstatic\\.com|googleapis\\.com/css)(\\?.*)?$', re.IGNORECASE|re.MULTILINE)
URL_ALLOWLIST_PTN=None
DIR_OUTPUT_PERMISSIONS=755
ARCHIVEBOX_BINARY=/opt/archivebox/.venv/bin/archivebox
VERSION=0.8.0
COMMIT_HASH=102e87578c6036bb0132dd1ebd17f8f05ffc880f
BUILD_TIME=2024-05-15 03:28:05 1715768885
VERSIONS_AVAILABLE=None
CAN_UPGRADE=False
PYTHON_BINARY=/opt/archivebox/.venv/bin/python3.10
PYTHON_VERSION=3.10.14
DJANGO_BINARY=/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py
DJANGO_VERSION=5.0.6 final (0)
SQLITE_BINARY=/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py
SQLITE_VERSION=2.6.0
CURL_VERSION=curl 8.4.0 (x86_64-apple-darwin23.0)
WGET_VERSION=GNU Wget 1.24.5
WGET_AUTO_COMPRESSION=True
RIPGREP_VERSION=ripgrep 14.1.0
SINGLEFILE_VERSION=None
READABILITY_VERSION=None
MERCURY_VERSION=None
GIT_VERSION=git version 2.44.0
YOUTUBEDL_VERSION=2024.04.09
CHROME_VERSION=Google Chrome 124.0.6367.207
NODE_VERSION=v21.7.3
"""
EXPECTED_OUTPUT = TOML_HEADER + '''[SERVER_CONFIG]
IS_TTY = false
USE_COLOR = false
SHOW_PROGRESS = false
IN_DOCKER = false
IN_QEMU = false
PUID = 501
PGID = 20
OUTPUT_DIR = "/opt/archivebox/data"
CONFIG_FILE = "/opt/archivebox/data/ArchiveBox.conf"
ONLY_NEW = true
TIMEOUT = 60
MEDIA_TIMEOUT = 3600
OUTPUT_PERMISSIONS = 644
RESTRICT_FILE_NAMES = "windows"
URL_DENYLIST = "\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$"
URL_ALLOWLIST = null
ADMIN_USERNAME = null
ADMIN_PASSWORD = null
ENFORCE_ATOMIC_WRITES = true
TAG_SEPARATOR_PATTERN = "[,]"
SECRET_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
BIND_ADDR = "127.0.0.1:8000"
ALLOWED_HOSTS = "*"
DEBUG = false
PUBLIC_INDEX = true
PUBLIC_SNAPSHOTS = true
PUBLIC_ADD_VIEW = false
FOOTER_INFO = "Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests."
SNAPSHOTS_PER_PAGE = 40
CUSTOM_TEMPLATES_DIR = null
TIME_ZONE = "UTC"
TIMEZONE = "UTC"
REVERSE_PROXY_USER_HEADER = "Remote-User"
REVERSE_PROXY_WHITELIST = ""
LOGOUT_REDIRECT_URL = "/"
PREVIEW_ORIGINALS = true
LDAP = false
LDAP_SERVER_URI = null
LDAP_BIND_DN = null
LDAP_BIND_PASSWORD = null
LDAP_USER_BASE = null
LDAP_USER_FILTER = null
LDAP_USERNAME_ATTR = null
LDAP_FIRSTNAME_ATTR = null
LDAP_LASTNAME_ATTR = null
LDAP_EMAIL_ATTR = null
LDAP_CREATE_SUPERUSER = false
SAVE_TITLE = true
SAVE_FAVICON = true
SAVE_WGET = true
SAVE_WGET_REQUISITES = true
SAVE_SINGLEFILE = true
SAVE_READABILITY = true
SAVE_MERCURY = true
SAVE_HTMLTOTEXT = true
SAVE_PDF = true
SAVE_SCREENSHOT = true
SAVE_DOM = true
SAVE_HEADERS = true
SAVE_WARC = true
SAVE_GIT = true
SAVE_MEDIA = true
SAVE_ARCHIVE_DOT_ORG = true
RESOLUTION = [1440, 2000]
GIT_DOMAINS = "github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht"
CHECK_SSL_VALIDITY = true
MEDIA_MAX_SIZE = "750m"
USER_AGENT = null
CURL_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)"
WGET_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5"
CHROME_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)"
COOKIES_FILE = null
CHROME_USER_DATA_DIR = null
CHROME_TIMEOUT = false
CHROME_HEADLESS = true
CHROME_SANDBOX = true
CHROME_EXTRA_ARGS = []
YOUTUBEDL_ARGS = ["--restrict-filenames", "--trim-filenames", "128", "--write-description", "--write-info-json", "--write-annotations", "--write-thumbnail", "--no-call-home", "--write-sub", "--write-auto-subs", "--convert-subs=srt", "--yes-playlist", "--continue", "--no-abort-on-error", "--ignore-errors", "--geo-bypass", "--add-metadata", "--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)"]
YOUTUBEDL_EXTRA_ARGS = []
WGET_ARGS = ["--no-verbose", "--adjust-extension", "--convert-links", "--force-directories", "--backup-converted", "--span-hosts", "--no-parent", "-e", "robots=off"]
WGET_EXTRA_ARGS = []
CURL_ARGS = ["--silent", "--location", "--compressed"]
CURL_EXTRA_ARGS = []
GIT_ARGS = ["--recursive"]
SINGLEFILE_ARGS = []
SINGLEFILE_EXTRA_ARGS = []
MERCURY_ARGS = ["--format=text"]
MERCURY_EXTRA_ARGS = []
FAVICON_PROVIDER = "https://www.google.com/s2/favicons?domain={}"
USE_INDEXING_BACKEND = true
USE_SEARCHING_BACKEND = true
SEARCH_BACKEND_ENGINE = "ripgrep"
SEARCH_BACKEND_HOST_NAME = "localhost"
SEARCH_BACKEND_PORT = 1491
SEARCH_BACKEND_PASSWORD = "SecretPassword"
SEARCH_PROCESS_HTML = true
SONIC_COLLECTION = "archivebox"
SONIC_BUCKET = "snapshots"
SEARCH_BACKEND_TIMEOUT = 90
FTS_SEPARATE_DATABASE = true
FTS_TOKENIZERS = "porter unicode61 remove_diacritics 2"
FTS_SQLITE_MAX_LENGTH = 1000000000
USE_CURL = true
USE_WGET = true
USE_SINGLEFILE = true
USE_READABILITY = true
USE_MERCURY = true
USE_GIT = true
USE_CHROME = true
USE_NODE = true
USE_YOUTUBEDL = true
USE_RIPGREP = true
CURL_BINARY = "curl"
GIT_BINARY = "git"
WGET_BINARY = "wget"
SINGLEFILE_BINARY = "single-file"
READABILITY_BINARY = "readability-extractor"
MERCURY_BINARY = "postlight-parser"
YOUTUBEDL_BINARY = "yt-dlp"
NODE_BINARY = "node"
RIPGREP_BINARY = "rg"
CHROME_BINARY = "chrome"
POCKET_CONSUMER_KEY = null
USER = "squash"
PACKAGE_DIR = "/opt/archivebox/archivebox"
TEMPLATES_DIR = "/opt/archivebox/archivebox/templates"
ARCHIVE_DIR = "/opt/archivebox/data/archive"
SOURCES_DIR = "/opt/archivebox/data/sources"
LOGS_DIR = "/opt/archivebox/data/logs"
PERSONAS_DIR = "/opt/archivebox/data/personas"
URL_DENYLIST_PTN = "re.compile(\'\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$\', re.IGNORECASE|re.MULTILINE)"
URL_ALLOWLIST_PTN = null
DIR_OUTPUT_PERMISSIONS = 755
ARCHIVEBOX_BINARY = "/opt/archivebox/.venv/bin/archivebox"
VERSION = "0.8.0"
COMMIT_HASH = "102e87578c6036bb0132dd1ebd17f8f05ffc880f"
BUILD_TIME = "2024-05-15 03:28:05 1715768885"
VERSIONS_AVAILABLE = null
CAN_UPGRADE = false
PYTHON_BINARY = "/opt/archivebox/.venv/bin/python3.10"
PYTHON_VERSION = "3.10.14"
DJANGO_BINARY = "/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py"
DJANGO_VERSION = "5.0.6 final (0)"
SQLITE_BINARY = "/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py"
SQLITE_VERSION = "2.6.0"
CURL_VERSION = "curl 8.4.0 (x86_64-apple-darwin23.0)"
WGET_VERSION = "GNU Wget 1.24.5"
WGET_AUTO_COMPRESSION = true
RIPGREP_VERSION = "ripgrep 14.1.0"
SINGLEFILE_VERSION = null
READABILITY_VERSION = null
MERCURY_VERSION = null
GIT_VERSION = "git version 2.44.0"
YOUTUBEDL_VERSION = "2024.04.09"
CHROME_VERSION = "Google Chrome 124.0.6367.207"
NODE_VERSION = "v21.7.3"'''
class IniToTomlTests(TestCase):
def test_convert(self):
first_output = convert(TEST_INPUT) # make sure ini -> toml parses correctly
second_output = convert(first_output) # make sure toml -> toml parses/dumps consistently
assert first_output == second_output == EXPECTED_OUTPUT # make sure parsing is indempotent
# # DEBUGGING
# import sys
# import difflib
# sys.stdout.writelines(difflib.context_diff(first_output, second_output, fromfile='first', tofile='second'))
# print(repr(second_output))

View file

@ -1,4 +1,4 @@
__package__ = 'archivebox.plugins_auth.ldap' __package__ = 'plugins_auth.ldap'
import inspect import inspect
@ -6,13 +6,11 @@ from typing import List, Dict
from pathlib import Path from pathlib import Path
from pydantic import InstanceOf from pydantic import InstanceOf
from django.conf import settings
from pydantic_pkgr import BinProviderName, ProviderLookupDict, SemVer from pydantic_pkgr import BinProviderName, ProviderLookupDict, SemVer
from plugantic.base_plugin import BasePlugin from abx.archivebox.base_plugin import BasePlugin
from plugantic.base_hook import BaseHook from abx.archivebox.base_hook import BaseHook
from plugantic.base_binary import BaseBinary, BaseBinProvider from abx.archivebox.base_binary import BaseBinary, BaseBinProvider
from plugins_pkg.pip.apps import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER from plugins_pkg.pip.apps import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER
from .settings import LDAP_CONFIG, LDAP_LIB from .settings import LDAP_CONFIG, LDAP_LIB
@ -51,5 +49,4 @@ class LdapAuthPlugin(BasePlugin):
PLUGIN = LdapAuthPlugin() PLUGIN = LdapAuthPlugin()
# PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig DJANGO_APP = PLUGIN.AppConfig

View file

@ -3,9 +3,9 @@ __package__ = 'archivebox.plugins_auth.ldap'
import sys import sys
from typing import Dict, List, ClassVar, Optional from typing import Dict, List, ClassVar, Optional
from pydantic import Field, model_validator from pydantic import Field, model_validator, computed_field
from ...plugantic.base_configset import BaseConfigSet, ConfigSectionName from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
LDAP_LIB = None LDAP_LIB = None
try: try:
@ -35,10 +35,10 @@ class LdapConfig(BaseConfigSet):
LDAP_USER_FILTER: str = Field(default=None) LDAP_USER_FILTER: str = Field(default=None)
LDAP_CREATE_SUPERUSER: bool = Field(default=False) LDAP_CREATE_SUPERUSER: bool = Field(default=False)
LDAP_USERNAME_ATTR: str = Field(default=None) LDAP_USERNAME_ATTR: str = Field(default='username')
LDAP_FIRSTNAME_ATTR: str = Field(default=None) LDAP_FIRSTNAME_ATTR: str = Field(default='first_name')
LDAP_LASTNAME_ATTR: str = Field(default=None) LDAP_LASTNAME_ATTR: str = Field(default='last_name')
LDAP_EMAIL_ATTR: str = Field(default=None) LDAP_EMAIL_ATTR: str = Field(default='email')
@model_validator(mode='after') @model_validator(mode='after')
def validate_ldap_config(self): def validate_ldap_config(self):
@ -50,14 +50,7 @@ class LdapConfig(BaseConfigSet):
self.update(LDAP_ENABLED=False) self.update(LDAP_ENABLED=False)
# Check that all required LDAP config options are set # Check that all required LDAP config options are set
all_config_is_set = ( if self.LDAP_ENABLED and not self.LDAP_CONFIG_IS_SET:
self.LDAP_SERVER_URI
and self.LDAP_BIND_DN
and self.LDAP_BIND_PASSWORD
and self.LDAP_USER_BASE
and self.LDAP_USER_FILTER
)
if self.LDAP_ENABLED and not all_config_is_set:
missing_config_options = [ missing_config_options = [
key for key, value in self.model_dump().items() key for key, value in self.model_dump().items()
if value is None and key != 'LDAP_ENABLED' if value is None and key != 'LDAP_ENABLED'
@ -67,6 +60,19 @@ class LdapConfig(BaseConfigSet):
self.update(LDAP_ENABLED=False) self.update(LDAP_ENABLED=False)
return self return self
@computed_field
@property
def LDAP_CONFIG_IS_SET(self) -> bool:
"""Check that all required LDAP config options are set"""
return bool(LDAP_LIB) and self.LDAP_ENABLED and bool(
self.LDAP_SERVER_URI
and self.LDAP_BIND_DN
and self.LDAP_BIND_PASSWORD
and self.LDAP_USER_BASE
and self.LDAP_USER_FILTER
)
@computed_field
@property @property
def LDAP_USER_ATTR_MAP(self) -> Dict[str, str]: def LDAP_USER_ATTR_MAP(self) -> Dict[str, str]:
return { return {
@ -76,6 +82,7 @@ class LdapConfig(BaseConfigSet):
'email': self.LDAP_EMAIL_ATTR, 'email': self.LDAP_EMAIL_ATTR,
} }
@computed_field
@property @property
def AUTHENTICATION_BACKENDS(self) -> List[str]: def AUTHENTICATION_BACKENDS(self) -> List[str]:
return [ return [
@ -83,9 +90,10 @@ class LdapConfig(BaseConfigSet):
'django_auth_ldap.backend.LDAPBackend', 'django_auth_ldap.backend.LDAPBackend',
] ]
@computed_field
@property @property
def AUTH_LDAP_USER_SEARCH(self) -> Optional[object]: def AUTH_LDAP_USER_SEARCH(self) -> Optional[object]:
return LDAP_LIB and LDAPSearch( return self.LDAP_USER_FILTER and LDAPSearch(
self.LDAP_USER_BASE, self.LDAP_USER_BASE,
LDAP_LIB.SCOPE_SUBTREE, # type: ignore LDAP_LIB.SCOPE_SUBTREE, # type: ignore
'(&(' + self.LDAP_USERNAME_ATTR + '=%(user)s)' + self.LDAP_USER_FILTER + ')', '(&(' + self.LDAP_USERNAME_ATTR + '=%(user)s)' + self.LDAP_USER_FILTER + ')',

View file

@ -2,9 +2,9 @@ __package__ = 'archivebox.plugins_extractor.archivedotorg'
from typing import List from typing import List
from plugantic.base_plugin import BasePlugin from abx.archivebox.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet from abx.archivebox.base_configset import BaseConfigSet
from plugantic.base_hook import BaseHook from abx.archivebox.base_hook import BaseHook
###################### Config ########################## ###################### Config ##########################

View file

@ -21,12 +21,12 @@ from pydantic_pkgr import (
import archivebox import archivebox
# Depends on other Django apps: # Depends on other Django apps:
from plugantic.base_plugin import BasePlugin from abx.archivebox.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_binary import BaseBinary, env from abx.archivebox.base_binary import BaseBinary, env
# from plugantic.base_extractor import BaseExtractor # from abx.archivebox.base_extractor import BaseExtractor
# from plugantic.base_queue import BaseQueue # from abx.archivebox.base_queue import BaseQueue
from plugantic.base_hook import BaseHook from abx.archivebox.base_hook import BaseHook
# Depends on Other Plugins: # Depends on Other Plugins:
from plugins_sys.config.apps import ARCHIVING_CONFIG, SHELL_CONFIG from plugins_sys.config.apps import ARCHIVING_CONFIG, SHELL_CONFIG

View file

@ -2,9 +2,9 @@ __package__ = 'archivebox.plugins_extractor.favicon'
from typing import List from typing import List
from plugantic.base_plugin import BasePlugin from abx.archivebox.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet from abx.archivebox.base_configset import BaseConfigSet
from plugantic.base_hook import BaseHook from abx.archivebox.base_hook import BaseHook
###################### Config ########################## ###################### Config ##########################

View file

@ -11,11 +11,11 @@ from pydantic import InstanceOf, Field, validate_call
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName, ShallowBinary from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName, ShallowBinary
# Depends on other Django apps: # Depends on other Django apps:
from plugantic.base_plugin import BasePlugin from abx.archivebox.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_binary import BaseBinary, env from abx.archivebox.base_binary import BaseBinary, env
from plugantic.base_extractor import BaseExtractor from abx.archivebox.base_extractor import BaseExtractor
from plugantic.base_hook import BaseHook from abx.archivebox.base_hook import BaseHook
# Depends on Other Plugins: # Depends on Other Plugins:
from plugins_sys.config.apps import ARCHIVING_CONFIG from plugins_sys.config.apps import ARCHIVING_CONFIG

View file

@ -11,12 +11,12 @@ from pydantic import InstanceOf, Field, validate_call
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName, bin_abspath, ShallowBinary from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName, bin_abspath, ShallowBinary
# Depends on other Django apps: # Depends on other Django apps:
from plugantic.base_plugin import BasePlugin from abx.archivebox.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_binary import BaseBinary, env from abx.archivebox.base_binary import BaseBinary, env
from plugantic.base_extractor import BaseExtractor from abx.archivebox.base_extractor import BaseExtractor
from plugantic.base_queue import BaseQueue from abx.archivebox.base_queue import BaseQueue
from plugantic.base_hook import BaseHook from abx.archivebox.base_hook import BaseHook
# Depends on Other Plugins: # Depends on Other Plugins:
from plugins_sys.config.apps import ARCHIVING_CONFIG from plugins_sys.config.apps import ARCHIVING_CONFIG
@ -84,19 +84,6 @@ class SinglefileBinary(BaseBinary):
return BaseBinary.install(self, binprovider_name=binprovider_name or LIB_NPM_BINPROVIDER.name) return BaseBinary.install(self, binprovider_name=binprovider_name or LIB_NPM_BINPROVIDER.name)
# ALTERNATIVE INSTALL METHOD using Ansible:
# install_playbook = PLUGANTIC_DIR / 'ansible' / 'install_singlefile.yml'
# singlefile_bin = run_playbook(install_playbook, data_dir=settings.CONFIG.OUTPUT_DIR, quiet=quiet).BINARIES.singlefile
# return self.__class__.model_validate(
# {
# **self.model_dump(),
# "loaded_abspath": singlefile_bin.abspath,
# "loaded_version": singlefile_bin.version,
# "loaded_binprovider": env,
# "binproviders_supported": self.binproviders_supported,
# }
# )
SINGLEFILE_BINARY = SinglefileBinary() SINGLEFILE_BINARY = SinglefileBinary()

View file

@ -0,0 +1,74 @@
from typing import List
from abx.archivebox.base_plugin import BasePlugin, InstanceOf, BaseHook
# class WgetToggleConfig(ConfigSet):
# section: ConfigSectionName = 'ARCHIVE_METHOD_TOGGLES'
# SAVE_WGET: bool = True
# SAVE_WARC: bool = True
# class WgetDependencyConfig(ConfigSet):
# section: ConfigSectionName = 'DEPENDENCY_CONFIG'
# WGET_BINARY: str = Field(default='wget')
# WGET_ARGS: Optional[List[str]] = Field(default=None)
# WGET_EXTRA_ARGS: List[str] = []
# WGET_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
# class WgetOptionsConfig(ConfigSet):
# section: ConfigSectionName = 'ARCHIVE_METHOD_OPTIONS'
# # loaded from shared config
# WGET_AUTO_COMPRESSION: bool = Field(default=True)
# SAVE_WGET_REQUISITES: bool = Field(default=True)
# WGET_USER_AGENT: str = Field(default='', alias='USER_AGENT')
# WGET_TIMEOUT: int = Field(default=60, alias='TIMEOUT')
# WGET_CHECK_SSL_VALIDITY: bool = Field(default=True, alias='CHECK_SSL_VALIDITY')
# WGET_RESTRICT_FILE_NAMES: str = Field(default='windows', alias='RESTRICT_FILE_NAMES')
# WGET_COOKIES_FILE: Optional[Path] = Field(default=None, alias='COOKIES_FILE')
# CONFIG = {
# 'CHECK_SSL_VALIDITY': False,
# 'SAVE_WARC': False,
# 'TIMEOUT': 999,
# }
# WGET_CONFIG = [
# WgetToggleConfig(**CONFIG),
# WgetDependencyConfig(**CONFIG),
# WgetOptionsConfig(**CONFIG),
# ]
# class WgetExtractor(Extractor):
# name: ExtractorName = 'wget'
# binary: Binary = WgetBinary()
# def get_output_path(self, snapshot) -> Path:
# return get_wget_output_path(snapshot)
# class WarcExtractor(Extractor):
# name: ExtractorName = 'warc'
# binary: Binary = WgetBinary()
# def get_output_path(self, snapshot) -> Path:
# return get_wget_output_path(snapshot)
class WgetPlugin(BasePlugin):
app_label: str = 'wget'
verbose_name: str = 'WGET'
hooks: List[InstanceOf[BaseHook]] = []
PLUGIN = WgetPlugin()
DJANGO_APP = PLUGIN.AppConfig

View file

@ -7,10 +7,10 @@ from pydantic import InstanceOf, Field, model_validator, AliasChoices
from django.conf import settings from django.conf import settings
from pydantic_pkgr import BinProvider, BinName, BinProviderName, ProviderLookupDict from pydantic_pkgr import BinProvider, BinName, BinProviderName, ProviderLookupDict
from plugantic.base_plugin import BasePlugin from abx.archivebox.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_binary import BaseBinary, env, apt, brew from abx.archivebox.base_binary import BaseBinary, env, apt, brew
from plugantic.base_hook import BaseHook from abx.archivebox.base_hook import BaseHook
from plugins_sys.config.apps import ARCHIVING_CONFIG from plugins_sys.config.apps import ARCHIVING_CONFIG
from plugins_pkg.pip.apps import pip from plugins_pkg.pip.apps import pip

View file

@ -11,10 +11,10 @@ from pydantic import InstanceOf, model_validator
from pydantic_pkgr import BinProvider, NpmProvider, BinName, PATHStr, BinProviderName from pydantic_pkgr import BinProvider, NpmProvider, BinName, PATHStr, BinProviderName
from plugantic.base_plugin import BasePlugin from abx.archivebox.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet from abx.archivebox.base_configset import BaseConfigSet
from plugantic.base_binary import BaseBinary, BaseBinProvider, env, apt, brew from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env, apt, brew
from plugantic.base_hook import BaseHook from abx.archivebox.base_hook import BaseHook
###################### Config ########################## ###################### Config ##########################

View file

@ -15,11 +15,11 @@ from django.db.backends.sqlite3.base import Database as django_sqlite3 # typ
from django.core.checks import Error, Tags from django.core.checks import Error, Tags
from pydantic_pkgr import BinProvider, PipProvider, BinName, BinProviderName, ProviderLookupDict, SemVer from pydantic_pkgr import BinProvider, PipProvider, BinName, BinProviderName, ProviderLookupDict, SemVer
from plugantic.base_plugin import BasePlugin from abx.archivebox.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_check import BaseCheck from abx.archivebox.base_check import BaseCheck
from plugantic.base_binary import BaseBinary, BaseBinProvider, env, apt, brew from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env, apt, brew
from plugantic.base_hook import BaseHook from abx.archivebox.base_hook import BaseHook
from ...misc.logging import hint from ...misc.logging import hint

View file

@ -22,12 +22,12 @@ from pydantic_pkgr import (
import archivebox import archivebox
# Depends on other Django apps: # Depends on other Django apps:
from plugantic.base_plugin import BasePlugin from abx.archivebox.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet from abx.archivebox.base_configset import BaseConfigSet
from plugantic.base_binary import BaseBinary, BaseBinProvider, env from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env
# from plugantic.base_extractor import BaseExtractor # from abx.archivebox.base_extractor import BaseExtractor
# from plugantic.base_queue import BaseQueue # from abx.archivebox.base_queue import BaseQueue
from plugantic.base_hook import BaseHook from abx.archivebox.base_hook import BaseHook
from plugins_pkg.pip.apps import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER from plugins_pkg.pip.apps import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER

View file

@ -19,12 +19,12 @@ from pydantic_pkgr import (
import archivebox import archivebox
# Depends on other Django apps: # Depends on other Django apps:
from plugantic.base_plugin import BasePlugin from abx.archivebox.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet from abx.archivebox.base_configset import BaseConfigSet
from plugantic.base_binary import BaseBinary, BaseBinProvider, env from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env
# from plugantic.base_extractor import BaseExtractor # from abx.archivebox.base_extractor import BaseExtractor
# from plugantic.base_queue import BaseQueue # from abx.archivebox.base_queue import BaseQueue
from plugantic.base_hook import BaseHook from abx.archivebox.base_hook import BaseHook
# Depends on Other Plugins: # Depends on Other Plugins:
from plugins_pkg.npm.apps import LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER from plugins_pkg.npm.apps import LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER

View file

@ -13,11 +13,11 @@ from pydantic import InstanceOf, Field
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName
# Depends on other Django apps: # Depends on other Django apps:
from plugantic.base_plugin import BasePlugin from abx.archivebox.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_binary import BaseBinary, env, apt, brew from abx.archivebox.base_binary import BaseBinary, env, apt, brew
from plugantic.base_hook import BaseHook from abx.archivebox.base_hook import BaseHook
from plugantic.base_searchbackend import BaseSearchBackend from abx.archivebox.base_searchbackend import BaseSearchBackend
# Depends on Other Plugins: # Depends on Other Plugins:
from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG

View file

@ -11,11 +11,11 @@ from pydantic import InstanceOf, Field, model_validator
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName
# Depends on other Django apps: # Depends on other Django apps:
from plugantic.base_plugin import BasePlugin from abx.archivebox.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_binary import BaseBinary, env, brew from abx.archivebox.base_binary import BaseBinary, env, brew
from plugantic.base_hook import BaseHook from abx.archivebox.base_hook import BaseHook
from plugantic.base_searchbackend import BaseSearchBackend from abx.archivebox.base_searchbackend import BaseSearchBackend
# Depends on Other Plugins: # Depends on Other Plugins:
from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG

View file

@ -1,21 +1,20 @@
__package__ = 'archivebox.plugins_search.sqlite' __package__ = 'archivebox.plugins_search.sqlite'
import sys import sys
import sqlite3
import codecs import codecs
import sqlite3
from typing import List, ClassVar, Iterable, Callable from typing import List, ClassVar, Iterable, Callable
from django.conf import settings from django.core.exceptions import ImproperlyConfigured
from django.db import connection as database
# Depends on other PyPI/vendor packages: # Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, Field, model_validator from pydantic import InstanceOf, Field, model_validator
# Depends on other Django apps: # Depends on other Django apps:
from plugantic.base_plugin import BasePlugin from abx.archivebox.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_hook import BaseHook from abx.archivebox.base_hook import BaseHook
from plugantic.base_searchbackend import BaseSearchBackend from abx.archivebox.base_searchbackend import BaseSearchBackend
# Depends on Other Plugins: # Depends on Other Plugins:
from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG
@ -52,6 +51,7 @@ class SqliteftsConfig(BaseConfigSet):
if self.SQLITEFTS_SEPARATE_DATABASE: if self.SQLITEFTS_SEPARATE_DATABASE:
return lambda: sqlite3.connect(self.SQLITEFTS_DB) return lambda: sqlite3.connect(self.SQLITEFTS_DB)
else: else:
from django.db import connection as database
return database.cursor return database.cursor
@property @property
@ -63,16 +63,20 @@ class SqliteftsConfig(BaseConfigSet):
@property @property
def SQLITE_LIMIT_LENGTH(self) -> int: def SQLITE_LIMIT_LENGTH(self) -> int:
from django.db import connection as database
# Only Python >= 3.11 supports sqlite3.Connection.getlimit(), # Only Python >= 3.11 supports sqlite3.Connection.getlimit(),
# so fall back to the default if the API to get the real value isn't present # so fall back to the default if the API to get the real value isn't present
try: try:
limit_id = sqlite3.SQLITE_LIMIT_LENGTH limit_id = sqlite3.SQLITE_LIMIT_LENGTH
try:
if self.SQLITEFTS_SEPARATE_DATABASE:
cursor = self.get_connection()
return cursor.connection.getlimit(limit_id)
else:
with database.temporary_connection() as cursor: # type: ignore[attr-defined] with database.temporary_connection() as cursor: # type: ignore[attr-defined]
return cursor.connection.getlimit(limit_id) return cursor.connection.getlimit(limit_id)
except AttributeError: except (AttributeError, ImproperlyConfigured):
return database.getlimit(limit_id)
except AttributeError:
return self.SQLITEFTS_MAX_LENGTH return self.SQLITEFTS_MAX_LENGTH
SQLITEFTS_CONFIG = SqliteftsConfig() SQLITEFTS_CONFIG = SqliteftsConfig()

View file

@ -1,21 +1,24 @@
__package__ = 'archivebox.plugins_sys.config' __package__ = 'plugins_sys.config'
import os import os
import sys import sys
import shutil import shutil
import archivebox
from typing import List, ClassVar, Dict, Optional from typing import List, ClassVar, Dict, Optional
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from pydantic import InstanceOf, Field, field_validator, model_validator, computed_field
from rich import print from rich import print
from pydantic import InstanceOf, Field, field_validator, model_validator, computed_field
from django.utils.crypto import get_random_string from django.utils.crypto import get_random_string
from plugantic.base_plugin import BasePlugin
from plugantic.base_configset import BaseConfigSet, ConfigSectionName
from plugantic.base_hook import BaseHook, HookType
from .constants import CONSTANTS, CONSTANTS_CONFIG from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_hook import BaseHook
import archivebox
from archivebox.constants import CONSTANTS, CONSTANTS_CONFIG # noqa
###################### Config ########################## ###################### Config ##########################
@ -123,6 +126,7 @@ class StorageConfig(BaseConfigSet):
# not supposed to be user settable: # not supposed to be user settable:
DIR_OUTPUT_PERMISSIONS: str = Field(default=lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')) DIR_OUTPUT_PERMISSIONS: str = Field(default=lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5'))
STORAGE_CONFIG = StorageConfig() STORAGE_CONFIG = StorageConfig()
@ -249,13 +253,13 @@ DJANGO_APP = PLUGIN.AppConfig
# register django apps # # register django apps
@archivebox.plugin.hookimpl # @abx.hookimpl
def get_INSTALLED_APPS(): # def get_INSTALLED_APPS():
return [DJANGO_APP.name] # return [DJANGO_APP.name]
# register configs # # register configs
@archivebox.plugin.hookimpl # @abx.hookimpl
def register_CONFIG(): # def register_CONFIG():
return PLUGIN.HOOKS_BY_TYPE['CONFIG'].values() # return PLUGIN.HOOKS_BY_TYPE['CONFIG'].values()

View file

@ -1,8 +1,9 @@
__package__ = 'archivebox.plugantic' __package__ = 'abx.archivebox'
import os import os
import inspect import inspect
from typing import Any, List, Dict, cast from typing import Any, List, Dict, cast
from benedict import benedict
from django.http import HttpRequest from django.http import HttpRequest
from django.conf import settings from django.conf import settings
@ -14,8 +15,7 @@ from admin_data_views.utils import render_with_table_view, render_with_item_view
import archivebox import archivebox
from ..config_stubs import AttrDict from archivebox.util import parse_date
from ..util import parse_date
def obj_to_yaml(obj: Any, indent: int=0) -> str: def obj_to_yaml(obj: Any, indent: int=0) -> str:
@ -255,7 +255,7 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
) )
all_config_entries = cast(List[Dict[str, Any]], supervisor.getAllConfigInfo() or []) all_config_entries = cast(List[Dict[str, Any]], supervisor.getAllConfigInfo() or [])
all_config = {config["name"]: AttrDict(config) for config in all_config_entries} all_config = {config["name"]: benedict(config) for config in all_config_entries}
# Add top row for supervisord process manager # Add top row for supervisord process manager
rows["Name"].append(ItemLink('supervisord', key='supervisord')) rows["Name"].append(ItemLink('supervisord', key='supervisord'))
@ -274,7 +274,7 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
# Add a row for each worker process managed by supervisord # Add a row for each worker process managed by supervisord
for proc in cast(List[Dict[str, Any]], supervisor.getAllProcessInfo()): for proc in cast(List[Dict[str, Any]], supervisor.getAllProcessInfo()):
proc = AttrDict(proc) proc = benedict(proc)
# { # {
# "name": "daphne", # "name": "daphne",
# "group": "daphne", # "group": "daphne",
@ -334,7 +334,7 @@ def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
start_ts = [line for line in relevant_logs.split("\n") if "RPC interface 'supervisor' initialized" in line][-1].split(",", 1)[0] start_ts = [line for line in relevant_logs.split("\n") if "RPC interface 'supervisor' initialized" in line][-1].split(",", 1)[0]
uptime = str(timezone.now() - parse_date(start_ts)).split(".")[0] uptime = str(timezone.now() - parse_date(start_ts)).split(".")[0]
proc = AttrDict( proc = benedict(
{ {
"name": "supervisord", "name": "supervisord",
"pid": supervisor.getPID(), "pid": supervisor.getPID(),
@ -347,7 +347,7 @@ def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
} }
) )
else: else:
proc = AttrDict(get_worker(supervisor, key) or {}) proc = benedict(get_worker(supervisor, key) or {})
relevant_config = [config for config in all_config if config['name'] == key][0] relevant_config = [config for config in all_config if config['name'] == key][0]
relevant_logs = supervisor.tailProcessStdoutLog(key, 0, 10_000_000)[0] relevant_logs = supervisor.tailProcessStdoutLog(key, 0, 10_000_000)[0]

View file

@ -26,7 +26,7 @@ except ImportError:
from archivebox.constants import STATICFILE_EXTENSIONS from archivebox.constants import STATICFILE_EXTENSIONS
from archivebox.plugins_sys.config.apps import ARCHIVING_CONFIG from plugins_sys.config.apps import ARCHIVING_CONFIG
from .misc.logging import COLOR_DICT from .misc.logging import COLOR_DICT