diff --git a/archivebox/builtin_plugins/__init__.py b/archivebox/auth_plugins/ldap/__init__.py similarity index 100% rename from archivebox/builtin_plugins/__init__.py rename to archivebox/auth_plugins/ldap/__init__.py diff --git a/archivebox/auth_plugins/ldap/apps.py b/archivebox/auth_plugins/ldap/apps.py new file mode 100644 index 00000000..5cf18fec --- /dev/null +++ b/archivebox/auth_plugins/ldap/apps.py @@ -0,0 +1,55 @@ +__package__ = 'archivebox.auth_plugins.ldap' + +import inspect + +from typing import List, Dict +from pathlib import Path +from pydantic import InstanceOf + +from django.conf import settings + +from pydantic_pkgr import BinProviderName, ProviderLookupDict, SemVer + +from plugantic.base_plugin import BasePlugin +from plugantic.base_hook import BaseHook +from plugantic.base_binary import BaseBinary, BaseBinProvider + +from pkg_plugins.pip.apps import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER +from .settings import LDAP_CONFIG, LDAP_LIB + + +###################### Config ########################## + + +class LdapBinary(BaseBinary): + name: str = 'ldap' + description: str = 'LDAP Authentication' + binproviders_supported: List[InstanceOf[BaseBinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER] + + provider_overrides: Dict[BinProviderName, ProviderLookupDict] = { + VENV_PIP_BINPROVIDER.name: { + "abspath": lambda: LDAP_LIB and Path(inspect.getfile(LDAP_LIB)), + "version": lambda: LDAP_LIB and SemVer(LDAP_LIB.__version__), + }, + SYS_PIP_BINPROVIDER.name: { + "abspath": lambda: LDAP_LIB and Path(inspect.getfile(LDAP_LIB)), + "version": lambda: LDAP_LIB and SemVer(LDAP_LIB.__version__), + }, + } + +LDAP_BINARY = LdapBinary() + + +class LdapAuthPlugin(BasePlugin): + app_label: str = 'ldap' + verbose_name: str = 'LDAP Authentication' + + hooks: List[InstanceOf[BaseHook]] = [ + LDAP_CONFIG, + LDAP_BINARY, + ] + + +PLUGIN = LdapAuthPlugin() +PLUGIN.register(settings) +DJANGO_APP = PLUGIN.AppConfig diff --git a/archivebox/auth_plugins/ldap/settings.py b/archivebox/auth_plugins/ldap/settings.py new file mode 100644 index 00000000..f7320ee0 --- /dev/null +++ b/archivebox/auth_plugins/ldap/settings.py @@ -0,0 +1,85 @@ +__package__ = 'archivebox.auth_plugins.ldap' + +import sys + +from typing import Dict, List, ClassVar, Optional +from pydantic import Field, model_validator + +from ...plugantic.base_configset import BaseConfigSet, ConfigSectionName + +LDAP_LIB = None +try: + import ldap + from django_auth_ldap.config import LDAPSearch + LDAP_LIB = ldap +except ImportError: + pass + +###################### Config ########################## + + +class LdapConfig(BaseConfigSet): + """ + LDAP Config gets imported by core/settings.py very early during startup, so it needs to be in a separate file from apps.py + so that it can be imported during settings.py initialization before the apps are loaded. + """ + section: ClassVar[ConfigSectionName] = 'LDAP_CONFIG' + + LDAP_ENABLED: bool = Field(default=False, alias='LDAP') + + LDAP_SERVER_URI: str = Field(default=None) + LDAP_BIND_DN: str = Field(default=None) + LDAP_BIND_PASSWORD: str = Field(default=None) + LDAP_USER_BASE: str = Field(default=None) + LDAP_USER_FILTER: str = Field(default=None) + LDAP_CREATE_SUPERUSER: bool = Field(default=False) + + LDAP_USERNAME_ATTR: str = Field(default=None) + LDAP_FIRSTNAME_ATTR: str = Field(default=None) + LDAP_LASTNAME_ATTR: str = Field(default=None) + LDAP_EMAIL_ATTR: str = Field(default=None) + + @model_validator(mode='after') + def validate_ldap_config(self): + if self.LDAP_ENABLED and LDAP_LIB is None: + sys.stderr.write('[X] Error: Found LDAP=True config but LDAP packages not installed. You may need to run: pip install archivebox[ldap]\n\n') + # dont hard exit here. in case the user is just running "archivebox version" or "archivebox help", we still want those to work despite broken ldap + # sys.exit(1) + self.LDAP_ENABLED = False + + if self.LDAP_ENABLED: + assert ( + self.LDAP_SERVER_URI + and self.LDAP_BIND_DN + and self.LDAP_BIND_PASSWORD + and self.LDAP_USER_BASE + and self.LDAP_USER_FILTER + ), 'LDAP_* config options must all be set if LDAP_ENABLED=True' + return self + + @property + def LDAP_USER_ATTR_MAP(self) -> Dict[str, str]: + return { + 'username': self.LDAP_USERNAME_ATTR, + 'first_name': self.LDAP_FIRSTNAME_ATTR, + 'last_name': self.LDAP_LASTNAME_ATTR, + 'email': self.LDAP_EMAIL_ATTR, + } + + @property + def AUTHENTICATION_BACKENDS(self) -> List[str]: + return [ + 'django.contrib.auth.backends.ModelBackend', + 'django_auth_ldap.backend.LDAPBackend', + ] + + @property + def AUTH_LDAP_USER_SEARCH(self) -> Optional[object]: + return LDAP_LIB and LDAPSearch( + self.LDAP_USER_BASE, + LDAP_LIB.SCOPE_SUBTREE, # type: ignore + '(&(' + self.LDAP_USERNAME_ATTR + '=%(user)s)' + self.LDAP_USER_FILTER + ')', + ) + + +LDAP_CONFIG = LdapConfig() diff --git a/archivebox/config.py b/archivebox/config.py index a0a61f6f..53c23b2e 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -89,14 +89,15 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = { 'URL_DENYLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$', 'aliases': ('URL_BLACKLIST',)}, # to avoid downloading code assets as their own pages 'URL_ALLOWLIST': {'type': str, 'default': None, 'aliases': ('URL_WHITELIST',)}, - 'ADMIN_USERNAME': {'type': str, 'default': None}, - 'ADMIN_PASSWORD': {'type': str, 'default': None}, 'ENFORCE_ATOMIC_WRITES': {'type': bool, 'default': True}, 'TAG_SEPARATOR_PATTERN': {'type': str, 'default': r'[,]'}, }, 'SERVER_CONFIG': { + 'ADMIN_USERNAME': {'type': str, 'default': None}, + 'ADMIN_PASSWORD': {'type': str, 'default': None}, + 'SECRET_KEY': {'type': str, 'default': None}, 'BIND_ADDR': {'type': str, 'default': lambda c: ['127.0.0.1:8000', '0.0.0.0:8000'][c['IN_DOCKER']]}, 'ALLOWED_HOSTS': {'type': str, 'default': '*'}, # e.g. archivebox.example.com,archivebox2.example.com @@ -420,7 +421,7 @@ CONSTANTS = { "COLOR_DICT": {'default': lambda c: COLOR_DICT}, "STATICFILE_EXTENSIONS": {'default': lambda c: STATICFILE_EXTENSIONS}, "ALLOWED_IN_OUTPUT_DIR": {'default': lambda c: ALLOWED_IN_OUTPUT_DIR}, - "ALLOWDENYLIST_REGEX_FLAGS": {'default': lambda c: ALLOWDENYLIST_REGEX_FLAGS}, + # "ALLOWDENYLIST_REGEX_FLAGS": {'default': lambda c: ALLOWDENYLIST_REGEX_FLAGS}, } ############################## Version Config ################################## @@ -579,8 +580,8 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = { 'COMMIT_HASH': {'default': lambda c: get_commit_hash(c)}, # short git commit hash of codebase HEAD commit 'BUILD_TIME': {'default': lambda c: get_build_time(c)}, # docker build completed time or python src last modified time - 'VERSIONS_AVAILABLE': {'default': lambda c: get_versions_available_on_github(c)}, - 'CAN_UPGRADE': {'default': lambda c: can_upgrade(c)}, + 'VERSIONS_AVAILABLE': {'default': lambda c: False}, # get_versions_available_on_github(c)}, + 'CAN_UPGRADE': {'default': lambda c: False}, # can_upgrade(c)}, 'PYTHON_BINARY': {'default': lambda c: sys.executable}, 'PYTHON_ENCODING': {'default': lambda c: sys.stdout.encoding.upper()}, diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index 62e2d241..b055096b 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -21,37 +21,40 @@ IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3] IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3] -DATA_DIR = Path(os.curdir).resolve() -assert DATA_DIR == CONFIG.OUTPUT_DIR PACKAGE_DIR = Path(__file__).resolve().parent.parent assert PACKAGE_DIR == CONFIG.PACKAGE_DIR +DATA_DIR = Path(os.curdir).resolve() +assert DATA_DIR == CONFIG.OUTPUT_DIR +ARCHIVE_DIR = DATA_DIR / 'archive' +assert ARCHIVE_DIR == CONFIG.ARCHIVE_DIR + ################################################################################ ### ArchiveBox Plugin Settings ################################################################################ -BUILTIN_PLUGINS_DIR = PACKAGE_DIR / 'builtin_plugins' # /app/archivebox/builtin_plugins -USERDATA_PLUGINS_DIR = DATA_DIR / 'user_plugins' # /data/user_plugins - -# PLUGIN_IMPORT_ORDER = ['base', 'pip', 'npm', 'ytdlp'] -# -# def get_plugin_order(p: Path) -> str: -# return str(PLUGIN_IMPORT_ORDER.index(p.parent.name)) if p.parent.name in PLUGIN_IMPORT_ORDER else str(p) def find_plugins_in_dir(plugins_dir: Path, prefix: str) -> Dict[str, Path]: - """{"builtin_plugins.pip": "/app/archivebox/builtin_plugins/pip", "user_plugins.other": "/data/user_plugins/other",...}""" + """{"pkg_plugins.pip": "/app/archivebox/pkg_plugins/pip", "user_plugins.other": "/data/user_plugins/other",...}""" return { f"{prefix}.{plugin_entrypoint.parent.name}": plugin_entrypoint.parent for plugin_entrypoint in sorted(plugins_dir.glob("*/apps.py")) # key=get_plugin_order # Someday enforcing plugin import order may be required, but right now it's not needed } - -INSTALLED_PLUGINS = { - **find_plugins_in_dir(BUILTIN_PLUGINS_DIR, prefix='builtin_plugins'), - **find_plugins_in_dir(USERDATA_PLUGINS_DIR, prefix='user_plugins'), + +PLUGIN_DIRS = { + 'sys_plugins': PACKAGE_DIR / 'sys_plugins', + 'pkg_plugins': PACKAGE_DIR / 'pkg_plugins', + 'auth_plugins': PACKAGE_DIR / 'auth_plugins', + 'extractor_plugins': PACKAGE_DIR / 'extractor_plugins', + 'user_plugins': DATA_DIR / 'user_plugins', } +INSTALLED_PLUGINS = {} +for plugin_prefix, plugin_dir in PLUGIN_DIRS.items(): + INSTALLED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix)) -### Plugins Globals (filled by builtin_plugins.npm.apps.NpmPlugin.register() after Django startup) + +### Plugins Globals (filled by plugin_type.pluginname.apps.PluginName.register() after Django startup) PLUGINS = AttrDict({}) HOOKS = AttrDict({}) @@ -106,7 +109,7 @@ INSTALLED_APPS = [ 'api', # Django-Ninja-based Rest API interfaces, config, APIToken model, etc. # ArchiveBox plugins - *INSTALLED_PLUGINS.keys(), # all plugin django-apps found in archivebox/builtin_plugins and data/user_plugins, + *INSTALLED_PLUGINS.keys(), # all plugin django-apps found in archivebox/*_plugins and data/user_plugins, # plugin.register(settings) is called at import of each plugin (in the order they are listed here), then plugin.ready() is called at AppConfig.ready() time # 3rd-party apps from PyPI that need to be loaded last @@ -141,46 +144,16 @@ AUTHENTICATION_BACKENDS = [ 'django.contrib.auth.backends.ModelBackend', ] -if CONFIG.LDAP: - try: - import ldap - from django_auth_ldap.config import LDAPSearch - - global AUTH_LDAP_SERVER_URI - global AUTH_LDAP_BIND_DN - global AUTH_LDAP_BIND_PASSWORD - global AUTH_LDAP_USER_SEARCH - global AUTH_LDAP_USER_ATTR_MAP - - AUTH_LDAP_SERVER_URI = CONFIG.LDAP_SERVER_URI - AUTH_LDAP_BIND_DN = CONFIG.LDAP_BIND_DN - AUTH_LDAP_BIND_PASSWORD = CONFIG.LDAP_BIND_PASSWORD - - assert AUTH_LDAP_SERVER_URI and CONFIG.LDAP_USERNAME_ATTR and CONFIG.LDAP_USER_FILTER, 'LDAP_* config options must all be set if LDAP=True' - - AUTH_LDAP_USER_SEARCH = LDAPSearch( - CONFIG.LDAP_USER_BASE, - ldap.SCOPE_SUBTREE, - '(&(' + CONFIG.LDAP_USERNAME_ATTR + '=%(user)s)' + CONFIG.LDAP_USER_FILTER + ')', - ) - - AUTH_LDAP_USER_ATTR_MAP = { - 'username': CONFIG.LDAP_USERNAME_ATTR, - 'first_name': CONFIG.LDAP_FIRSTNAME_ATTR, - 'last_name': CONFIG.LDAP_LASTNAME_ATTR, - 'email': CONFIG.LDAP_EMAIL_ATTR, - } - - AUTHENTICATION_BACKENDS = [ - 'django.contrib.auth.backends.ModelBackend', - 'django_auth_ldap.backend.LDAPBackend', - ] - except ModuleNotFoundError: - sys.stderr.write('[X] Error: Found LDAP=True config but LDAP packages not installed. You may need to run: pip install archivebox[ldap]\n\n') - # dont hard exit here. in case the user is just running "archivebox version" or "archivebox help", we still want those to work despite broken ldap - # sys.exit(1) - +from ..auth_plugins.ldap.settings import LDAP_CONFIG +if LDAP_CONFIG.LDAP_ENABLED: + AUTH_LDAP_BIND_DN = LDAP_CONFIG.LDAP_BIND_DN + AUTH_LDAP_SERVER_URI = LDAP_CONFIG.LDAP_SERVER_URI + AUTH_LDAP_BIND_PASSWORD = LDAP_CONFIG.LDAP_BIND_PASSWORD + AUTH_LDAP_USER_ATTR_MAP = LDAP_CONFIG.LDAP_USER_ATTR_MAP + AUTH_LDAP_USER_SEARCH = LDAP_CONFIG.AUTH_LDAP_USER_SEARCH + + AUTHENTICATION_BACKENDS = LDAP_CONFIG.AUTHENTICATION_BACKENDS ################################################################################ ### Staticfile and Template Settings @@ -496,6 +469,7 @@ else: LOG_LEVEL_DATABASE = 'DEBUG' if DEBUG else 'WARNING' LOG_LEVEL_REQUEST = 'DEBUG' if DEBUG else 'WARNING' + import pydantic import django.template @@ -585,7 +559,7 @@ LOGGING = { "handlers": ["default", "logfile"], "level": "DEBUG", }, - "builtin_plugins": { + "extractor_plugins": { "handlers": ["default", "logfile"], "level": "DEBUG", }, diff --git a/archivebox/builtin_plugins/chrome/__init__.py b/archivebox/extractor_plugins/__init__.py similarity index 100% rename from archivebox/builtin_plugins/chrome/__init__.py rename to archivebox/extractor_plugins/__init__.py diff --git a/archivebox/builtin_plugins/npm/__init__.py b/archivebox/extractor_plugins/chrome/__init__.py similarity index 100% rename from archivebox/builtin_plugins/npm/__init__.py rename to archivebox/extractor_plugins/chrome/__init__.py diff --git a/archivebox/builtin_plugins/chrome/apps.py b/archivebox/extractor_plugins/chrome/apps.py similarity index 97% rename from archivebox/builtin_plugins/chrome/apps.py rename to archivebox/extractor_plugins/chrome/apps.py index 56eb48de..7f197e26 100644 --- a/archivebox/builtin_plugins/chrome/apps.py +++ b/archivebox/extractor_plugins/chrome/apps.py @@ -23,8 +23,8 @@ from plugantic.base_binary import BaseBinary, env from plugantic.base_hook import BaseHook # Depends on Other Plugins: -from builtin_plugins.puppeteer.apps import PUPPETEER_BINPROVIDER -from builtin_plugins.playwright.apps import PLAYWRIGHT_BINPROVIDER +from pkg_plugins.puppeteer.apps import PUPPETEER_BINPROVIDER +from pkg_plugins.playwright.apps import PLAYWRIGHT_BINPROVIDER CHROMIUM_BINARY_NAMES_LINUX = [ diff --git a/archivebox/builtin_plugins/pip/__init__.py b/archivebox/extractor_plugins/singlefile/__init__.py similarity index 100% rename from archivebox/builtin_plugins/pip/__init__.py rename to archivebox/extractor_plugins/singlefile/__init__.py diff --git a/archivebox/builtin_plugins/singlefile/apps.py b/archivebox/extractor_plugins/singlefile/apps.py similarity index 90% rename from archivebox/builtin_plugins/singlefile/apps.py rename to archivebox/extractor_plugins/singlefile/apps.py index 6fcc5051..e5386969 100644 --- a/archivebox/builtin_plugins/singlefile/apps.py +++ b/archivebox/extractor_plugins/singlefile/apps.py @@ -1,14 +1,14 @@ -__package__ = 'archivebox.builtin_plugins.singlefile' +__package__ = 'archivebox.extractor_plugins.singlefile' from pathlib import Path from typing import List, Dict, Optional, ClassVar -from typing_extensions import Self +# from typing_extensions import Self from django.conf import settings # Depends on other PyPI/vendor packages: from pydantic import InstanceOf, Field, validate_call -from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName, bin_abspath +from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName, bin_abspath, ShallowBinary # Depends on other Django apps: from plugantic.base_plugin import BasePlugin @@ -19,8 +19,8 @@ from plugantic.base_queue import BaseQueue from plugantic.base_hook import BaseHook # Depends on Other Plugins: -from builtin_plugins.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER -from builtin_plugins.base.apps import CORE_CONFIG +from sys_plugins.base.apps import ARCHIVING_CONFIG +from pkg_plugins.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER ###################### Config ########################## @@ -33,11 +33,10 @@ class SinglefileToggleConfigs(BaseConfigSet): class SinglefileOptionsConfigs(BaseConfigSet): section: ClassVar[ConfigSectionName] = 'ARCHIVE_METHOD_OPTIONS' - SINGLEFILE_USER_AGENT: str = Field(default=lambda: CORE_CONFIG.USER_AGENT) - SINGLEFILE_TIMEOUT: int = Field(default=lambda: CORE_CONFIG.TIMEOUT) - SINGLEFILE_CHECK_SSL_VALIDITY: bool = Field(default=lambda: CORE_CONFIG.CHECK_SSL_VALIDITY) - SINGLEFILE_RESTRICT_FILE_NAMES: str = Field(default=lambda: CORE_CONFIG.RESTRICT_FILE_NAMES) - SINGLEFILE_COOKIES_FILE: Optional[Path] = Field(default=lambda: CORE_CONFIG.COOKIES_FILE) + SINGLEFILE_USER_AGENT: str = Field(default=lambda: ARCHIVING_CONFIG.USER_AGENT) + SINGLEFILE_TIMEOUT: int = Field(default=lambda: ARCHIVING_CONFIG.TIMEOUT) + SINGLEFILE_CHECK_SSL_VALIDITY: bool = Field(default=lambda: ARCHIVING_CONFIG.CHECK_SSL_VALIDITY) + SINGLEFILE_COOKIES_FILE: Optional[Path] = Field(default=lambda: ARCHIVING_CONFIG.COOKIES_FILE) class SinglefileDependencyConfigs(BaseConfigSet): @@ -87,12 +86,12 @@ class SinglefileBinary(BaseBinary): } @validate_call - def install(self, binprovider_name: Optional[BinProviderName]=None) -> Self: + def install(self, binprovider_name: Optional[BinProviderName]=None) -> ShallowBinary: # force install to only use lib/npm provider, we never want to modify global NPM packages return BaseBinary.install(self, binprovider_name=binprovider_name or LIB_NPM_BINPROVIDER.name) @validate_call - def load_or_install(self, binprovider_name: Optional[BinProviderName] = None) -> Self: + def load_or_install(self, binprovider_name: Optional[BinProviderName] = None) -> ShallowBinary: # force install to only use lib/npm provider, we never want to modify global NPM packages try: return self.load() diff --git a/archivebox/builtin_plugins/singlefile/migrations/0001_initial.py b/archivebox/extractor_plugins/singlefile/migrations/0001_initial.py similarity index 100% rename from archivebox/builtin_plugins/singlefile/migrations/0001_initial.py rename to archivebox/extractor_plugins/singlefile/migrations/0001_initial.py diff --git a/archivebox/builtin_plugins/playwright/__init__.py b/archivebox/extractor_plugins/singlefile/migrations/__init__.py similarity index 100% rename from archivebox/builtin_plugins/playwright/__init__.py rename to archivebox/extractor_plugins/singlefile/migrations/__init__.py diff --git a/archivebox/builtin_plugins/singlefile/models.py b/archivebox/extractor_plugins/singlefile/models.py similarity index 100% rename from archivebox/builtin_plugins/singlefile/models.py rename to archivebox/extractor_plugins/singlefile/models.py diff --git a/archivebox/builtin_plugins/singlefile/tasks.py b/archivebox/extractor_plugins/singlefile/tasks.py similarity index 100% rename from archivebox/builtin_plugins/singlefile/tasks.py rename to archivebox/extractor_plugins/singlefile/tasks.py diff --git a/archivebox/builtin_plugins/puppeteer/__init__.py b/archivebox/extractor_plugins/ytdlp/__init__.py similarity index 100% rename from archivebox/builtin_plugins/puppeteer/__init__.py rename to archivebox/extractor_plugins/ytdlp/__init__.py diff --git a/archivebox/builtin_plugins/ytdlp/apps.py b/archivebox/extractor_plugins/ytdlp/apps.py similarity index 94% rename from archivebox/builtin_plugins/ytdlp/apps.py rename to archivebox/extractor_plugins/ytdlp/apps.py index f88cf6d8..a513119c 100644 --- a/archivebox/builtin_plugins/ytdlp/apps.py +++ b/archivebox/extractor_plugins/ytdlp/apps.py @@ -10,7 +10,7 @@ from plugantic.base_configset import BaseConfigSet, ConfigSectionName from plugantic.base_binary import BaseBinary, env, apt, brew from plugantic.base_hook import BaseHook -from builtin_plugins.pip.apps import pip +from pkg_plugins.pip.apps import pip ###################### Config ########################## @@ -65,7 +65,8 @@ FFMPEG_BINARY = FfmpegBinary() class YtdlpPlugin(BasePlugin): app_label: str = 'ytdlp' - verbose_name: str = 'YTDLP' + verbose_name: str = 'YT-DLP' + docs_url: str = 'https://github.com/yt-dlp/yt-dlp' hooks: List[InstanceOf[BaseHook]] = [ YTDLP_CONFIG, diff --git a/archivebox/package-lock.json b/archivebox/package-lock.json index 42bd3256..db0ac368 100644 --- a/archivebox/package-lock.json +++ b/archivebox/package-lock.json @@ -242,9 +242,9 @@ "license": "MIT" }, "node_modules/@types/node": { - "version": "22.5.5", - "resolved": "https://registry.npmjs.org/@types/node/-/node-22.5.5.tgz", - "integrity": "sha512-Xjs4y5UPO/CLdzpgR6GirZJx36yScjh73+2NlLlkFRSoQN8B0DpfXPdZGnvVmLRLOsqDpOfTNv7D9trgGhmOIA==", + "version": "22.6.1", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.6.1.tgz", + "integrity": "sha512-V48tCfcKb/e6cVUigLAaJDAILdMP0fUW6BidkPK4GpGjXcfbnoHasCZDwz3N3yVt5we2RHm4XTQCpv0KJz9zqw==", "license": "MIT", "optional": true, "dependencies": { diff --git a/archivebox/builtin_plugins/singlefile/__init__.py b/archivebox/pkg_plugins/npm/__init__.py similarity index 100% rename from archivebox/builtin_plugins/singlefile/__init__.py rename to archivebox/pkg_plugins/npm/__init__.py diff --git a/archivebox/builtin_plugins/npm/apps.py b/archivebox/pkg_plugins/npm/apps.py similarity index 97% rename from archivebox/builtin_plugins/npm/apps.py rename to archivebox/pkg_plugins/npm/apps.py index 44e54428..96585ba2 100644 --- a/archivebox/builtin_plugins/npm/apps.py +++ b/archivebox/pkg_plugins/npm/apps.py @@ -1,4 +1,4 @@ -__package__ = 'archivebox.builtin_plugins.npm' +__package__ = 'archivebox.pkg_plugins.npm' from pathlib import Path from typing import List, Optional diff --git a/archivebox/builtin_plugins/singlefile/migrations/__init__.py b/archivebox/pkg_plugins/pip/__init__.py similarity index 100% rename from archivebox/builtin_plugins/singlefile/migrations/__init__.py rename to archivebox/pkg_plugins/pip/__init__.py diff --git a/archivebox/builtin_plugins/pip/apps.py b/archivebox/pkg_plugins/pip/apps.py similarity index 100% rename from archivebox/builtin_plugins/pip/apps.py rename to archivebox/pkg_plugins/pip/apps.py diff --git a/archivebox/builtin_plugins/ytdlp/__init__.py b/archivebox/pkg_plugins/playwright/__init__.py similarity index 100% rename from archivebox/builtin_plugins/ytdlp/__init__.py rename to archivebox/pkg_plugins/playwright/__init__.py diff --git a/archivebox/builtin_plugins/playwright/apps.py b/archivebox/pkg_plugins/playwright/apps.py similarity index 98% rename from archivebox/builtin_plugins/playwright/apps.py rename to archivebox/pkg_plugins/playwright/apps.py index 0559dd2a..cd606884 100644 --- a/archivebox/builtin_plugins/playwright/apps.py +++ b/archivebox/pkg_plugins/playwright/apps.py @@ -27,8 +27,7 @@ from plugantic.base_binary import BaseBinary, BaseBinProvider, env # from plugantic.base_queue import BaseQueue from plugantic.base_hook import BaseHook -# Depends on Other Plugins: -from builtin_plugins.pip.apps import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER +from pkg_plugins.pip.apps import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER ###################### Config ########################## diff --git a/archivebox/pkg_plugins/puppeteer/__init__.py b/archivebox/pkg_plugins/puppeteer/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/builtin_plugins/puppeteer/apps.py b/archivebox/pkg_plugins/puppeteer/apps.py similarity index 98% rename from archivebox/builtin_plugins/puppeteer/apps.py rename to archivebox/pkg_plugins/puppeteer/apps.py index f6992611..a6bf67da 100644 --- a/archivebox/builtin_plugins/puppeteer/apps.py +++ b/archivebox/pkg_plugins/puppeteer/apps.py @@ -25,7 +25,7 @@ from plugantic.base_binary import BaseBinary, BaseBinProvider, env from plugantic.base_hook import BaseHook # Depends on Other Plugins: -from builtin_plugins.npm.apps import LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER +from pkg_plugins.npm.apps import LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER ###################### Config ########################## diff --git a/archivebox/plugantic/__init__.py b/archivebox/plugantic/__init__.py index 950a947c..98372eb4 100644 --- a/archivebox/plugantic/__init__.py +++ b/archivebox/plugantic/__init__.py @@ -1,9 +1 @@ __package__ = 'archivebox.plugantic' - -from .base_plugin import BasePlugin -from .base_configset import BaseConfigSet -from .base_binary import BaseBinary -from .base_extractor import BaseExtractor -from .base_replayer import BaseReplayer -from .base_check import BaseCheck - diff --git a/archivebox/plugantic/ansible_utils.py b/archivebox/plugantic/ansible_utils.py index 7288c971..8957fe5d 100644 --- a/archivebox/plugantic/ansible_utils.py +++ b/archivebox/plugantic/ansible_utils.py @@ -1,59 +1,59 @@ -import os +# import os -from pathlib import Path +# from pathlib import Path -from benedict import benedict -from rich.pretty import pprint +# from benedict import benedict +# from rich.pretty import pprint -from ansible_runner import Runner, RunnerConfig +# from ansible_runner import Runner, RunnerConfig -GLOBAL_CACHE = {} +# GLOBAL_CACHE = {} -def run_playbook(playbook_path, data_dir, quiet=False, **kwargs): - ANSIBLE_TMP_DIR = str(Path(data_dir) / "tmp" / "ansible") - os.environ['ANSIBLE_INVENTORY_UNPARSED_WARNING'] = 'False' - os.environ['ANSIBLE_LOCALHOST_WARNING'] = 'False' - os.environ["ANSIBLE_HOME"] = ANSIBLE_TMP_DIR - # os.environ["ANSIBLE_COLLECTIONS_PATH"] = str(Path(data_dir).parent / 'archivebox') - os.environ["ANSIBLE_ROLES_PATH"] = ( - '/Volumes/NVME/Users/squash/Code/archiveboxes/archivebox7/archivebox/builtin_plugins/ansible/roles' - ) +# def run_playbook(playbook_path, data_dir, quiet=False, **kwargs): +# ANSIBLE_TMP_DIR = str(Path(data_dir) / "tmp" / "ansible") +# os.environ['ANSIBLE_INVENTORY_UNPARSED_WARNING'] = 'False' +# os.environ['ANSIBLE_LOCALHOST_WARNING'] = 'False' +# os.environ["ANSIBLE_HOME"] = ANSIBLE_TMP_DIR +# # os.environ["ANSIBLE_COLLECTIONS_PATH"] = str(Path(data_dir).parent / 'archivebox') +# os.environ["ANSIBLE_ROLES_PATH"] = ( +# './roles' +# ) - rc = RunnerConfig( - private_data_dir=ANSIBLE_TMP_DIR, - playbook=str(playbook_path), - rotate_artifacts=50000, - host_pattern="localhost", - extravars={ - "DATA_DIR": str(data_dir), - **kwargs, - }, - quiet=quiet, - ) - rc.prepare() - r = Runner(config=rc) - r.set_fact_cache('localhost', GLOBAL_CACHE) - r.run() - last_run_facts = r.get_fact_cache('localhost') - GLOBAL_CACHE.update(filtered_facts(last_run_facts)) - return benedict({ - key: val - for key, val in last_run_facts.items() - if not (key.startswith('ansible_') or key in ('gather_subset', 'module_setup')) - }) +# rc = RunnerConfig( +# private_data_dir=ANSIBLE_TMP_DIR, +# playbook=str(playbook_path), +# rotate_artifacts=50000, +# host_pattern="localhost", +# extravars={ +# "DATA_DIR": str(data_dir), +# **kwargs, +# }, +# quiet=quiet, +# ) +# rc.prepare() +# r = Runner(config=rc) +# r.set_fact_cache('localhost', GLOBAL_CACHE) +# r.run() +# last_run_facts = r.get_fact_cache('localhost') +# GLOBAL_CACHE.update(filtered_facts(last_run_facts)) +# return benedict({ +# key: val +# for key, val in last_run_facts.items() +# if not (key.startswith('ansible_') or key in ('gather_subset', 'module_setup')) +# }) -def filtered_facts(facts): - return benedict({ - key: val - for key, val in facts.items() - if not (key.startswith('ansible_') or key in ('gather_subset', 'module_setup')) - }) +# def filtered_facts(facts): +# return benedict({ +# key: val +# for key, val in facts.items() +# if not (key.startswith('ansible_') or key in ('gather_subset', 'module_setup')) +# }) -def print_globals(): - pprint(filtered_facts(GLOBAL_CACHE), expand_all=True) +# def print_globals(): +# pprint(filtered_facts(GLOBAL_CACHE), expand_all=True) -# YTDLP_OUTPUT = run_playbook('extract.yml', {'url': 'https://www.youtube.com/watch?v=cK4REjqGc9w&t=27s'}) -# pprint(YTDLP_OUTPUT) +# # YTDLP_OUTPUT = run_playbook('extract.yml', {'url': 'https://www.youtube.com/watch?v=cK4REjqGc9w&t=27s'}) +# # pprint(YTDLP_OUTPUT) diff --git a/archivebox/plugantic/base_admindataview.py b/archivebox/plugantic/base_admindataview.py index 34914203..b5cac1de 100644 --- a/archivebox/plugantic/base_admindataview.py +++ b/archivebox/plugantic/base_admindataview.py @@ -1,21 +1,22 @@ __package__ = 'archivebox.plugantic' -from typing import Dict +# from typing import Dict from .base_hook import BaseHook, HookType from ..config_stubs import AttrDict + class BaseAdminDataView(BaseHook): hook_type: HookType = "ADMINDATAVIEW" - verbose_name: str = 'NPM Installed Packages' - route: str = '/npm/installed/' - view: str = 'builtin_plugins.npm.admin.installed_list_view' - items: Dict[str, str] = { - "name": "installed_npm_pkg", - 'route': '/', - 'view': 'builtin_plugins.npm.admin.installed_detail_view', - } + # verbose_name: str = 'Data View' + # route: str = '/npm/installed/' + # view: str = 'pkg_plugins.npm.admin.installed_list_view' + # items: Dict[str, str] = { + # "name": "installed_npm_pkg", + # 'route': '/', + # 'view': 'pkg_plugins.npm.admin.installed_detail_view', + # } def register(self, settings, parent_plugin=None): # self._plugin = parent_plugin # circular ref to parent only here for easier debugging! never depend on circular backref to parent in real code! diff --git a/archivebox/plugantic/base_binary.py b/archivebox/plugantic/base_binary.py index 810f56b9..4aa96ac2 100644 --- a/archivebox/plugantic/base_binary.py +++ b/archivebox/plugantic/base_binary.py @@ -42,7 +42,11 @@ class BaseBinProvider(BaseHook, BinProvider): settings.BINPROVIDERS[self.id] = self super().register(settings, parent_plugin=parent_plugin) - + + @property + def admin_url(self) -> str: + # e.g. /admin/environment/binproviders/NpmBinProvider/ TODO + return "/admin/environment/binaries/" class BaseBinary(BaseHook, Binary): @@ -87,6 +91,11 @@ class BaseBinary(BaseHook, Binary): binary = super().load_or_install(**kwargs) self.symlink_to_lib(binary=binary, bin_dir=settings.CONFIG.BIN_DIR) return binary + + @property + def admin_url(self) -> str: + # e.g. /admin/environment/config/LdapConfig/ + return f"/admin/environment/binaries/{self.name}/" apt = AptProvider() brew = BrewProvider() diff --git a/archivebox/plugantic/base_configset.py b/archivebox/plugantic/base_configset.py index 09d6fbfa..d104afd5 100644 --- a/archivebox/plugantic/base_configset.py +++ b/archivebox/plugantic/base_configset.py @@ -1,9 +1,10 @@ __package__ = 'archivebox.plugantic' +import os import re import json from pathlib import Path -from typing import List, Literal, Type, Tuple, Callable, ClassVar, Any +from typing import Literal, Type, Tuple, Callable, ClassVar, Any, get_args import toml from benedict import benedict @@ -13,29 +14,27 @@ from pydantic_settings.sources import TomlConfigSettingsSource from pydantic_pkgr.base_types import func_takes_args_or_kwargs -from django.conf import settings - from .base_hook import BaseHook, HookType from . import ini_to_toml + +PACKAGE_DIR = Path(__file__).resolve().parent.parent +DATA_DIR = Path(os.curdir).resolve() + + ConfigSectionName = Literal[ 'SHELL_CONFIG', 'GENERAL_CONFIG', + 'STORAGE_CONFIG', 'SERVER_CONFIG', + 'ARCHIVING_CONFIG', + 'LDAP_CONFIG', 'ARCHIVE_METHOD_TOGGLES', 'ARCHIVE_METHOD_OPTIONS', 'SEARCH_BACKEND_CONFIG', 'DEPENDENCY_CONFIG', ] -ConfigSectionNames: List[ConfigSectionName] = [ - 'SHELL_CONFIG', - 'GENERAL_CONFIG', - 'SERVER_CONFIG', - 'ARCHIVE_METHOD_TOGGLES', - 'ARCHIVE_METHOD_OPTIONS', - 'SEARCH_BACKEND_CONFIG', - 'DEPENDENCY_CONFIG', -] +ConfigSectionNames: Tuple[ConfigSectionName, ...] = get_args(ConfigSectionName) # just gets the list of values from the Literal type def better_toml_dump_str(val: Any) -> str: @@ -136,7 +135,7 @@ class ArchiveBoxBaseConfig(BaseSettings): ) -> Tuple[PydanticBaseSettingsSource, ...]: """Defines the config precedence order: Schema defaults -> ArchiveBox.conf (TOML) -> Environment variables""" - ARCHIVEBOX_CONFIG_FILE = settings.DATA_DIR / "ArchiveBox.conf" + ARCHIVEBOX_CONFIG_FILE = DATA_DIR / "ArchiveBox.conf" ARCHIVEBOX_CONFIG_FILE_BAK = ARCHIVEBOX_CONFIG_FILE.parent / ".ArchiveBox.conf.bak" # import ipdb; ipdb.set_trace() @@ -177,7 +176,7 @@ class ArchiveBoxBaseConfig(BaseSettings): """Populate any unset values using function provided as their default""" for key, field in self.model_fields.items(): - config_so_far = self.model_dump(include=set(self.model_fields.keys()), warnings=False) + config_so_far = benedict(self.model_dump(include=set(self.model_fields.keys()), warnings=False)) value = getattr(self, key) if isinstance(value, Callable): # if value is a function, execute it to get the actual value, passing existing config as a dict arg diff --git a/archivebox/plugantic/base_hook.py b/archivebox/plugantic/base_hook.py index 2aaae2ba..aab612ae 100644 --- a/archivebox/plugantic/base_hook.py +++ b/archivebox/plugantic/base_hook.py @@ -5,7 +5,7 @@ from huey.api import TaskWrapper from pathlib import Path from typing import List, Literal, ClassVar -from pydantic import BaseModel, ConfigDict, Field, computed_field +from pydantic import BaseModel, ConfigDict HookType = Literal['CONFIG', 'BINPROVIDER', 'BINARY', 'EXTRACTOR', 'REPLAYER', 'CHECK', 'ADMINDATAVIEW', 'QUEUE'] @@ -26,11 +26,11 @@ class BaseHook(BaseModel): # django imports AppConfig, models, migrations, admins, etc. for all installed apps # django then calls AppConfig.ready() on each installed app... - builtin_plugins.npm.NpmPlugin().AppConfig.ready() # called by django - builtin_plugins.npm.NpmPlugin().register(settings) -> - builtin_plugins.npm.NpmConfigSet().register(settings) + pkg_plugins.npm.NpmPlugin().AppConfig.ready() # called by django + pkg_plugins.npm.NpmPlugin().register(settings) -> + pkg_plugins.npm.NpmConfigSet().register(settings) plugantic.base_configset.BaseConfigSet().register(settings) - plugantic.base_hook.BaseHook().register(settings, parent_plugin=builtin_plugins.npm.NpmPlugin()) + plugantic.base_hook.BaseHook().register(settings, parent_plugin=pkg_plugins.npm.NpmPlugin()) ... ... @@ -74,22 +74,27 @@ class BaseHook(BaseModel): @property def hook_module(self) -> str: - """e.g. builtin_plugins.singlefile.apps.SinglefileConfigSet""" + """e.g. extractor_plugins.singlefile.apps.SinglefileConfigSet""" return f'{self.__module__}.{self.__class__.__name__}' @property def hook_file(self) -> Path: - """e.g. builtin_plugins.singlefile.apps.SinglefileConfigSet""" + """e.g. extractor_plugins.singlefile.apps.SinglefileConfigSet""" return Path(inspect.getfile(self.__class__)) @property def plugin_module(self) -> str: - """e.g. builtin_plugins.singlefile""" + """e.g. extractor_plugins.singlefile""" return f"{self.__module__}.{self.__class__.__name__}".split("archivebox.", 1)[-1].rsplit(".apps.", 1)[0] @property def plugin_dir(self) -> Path: return Path(inspect.getfile(self.__class__)).parent.resolve() + + @property + def admin_url(self) -> str: + # e.g. /admin/environment/config/LdapConfig/ + return f"/admin/environment/{self.hook_type.lower()}/{self.id}/" def register(self, settings, parent_plugin=None): diff --git a/archivebox/plugantic/base_plugin.py b/archivebox/plugantic/base_plugin.py index a78f5ffc..eb8e0161 100644 --- a/archivebox/plugantic/base_plugin.py +++ b/archivebox/plugantic/base_plugin.py @@ -39,6 +39,7 @@ class BasePlugin(BaseModel): # Required by AppConfig: app_label: str = Field() # e.g. 'singlefile' (one-word machine-readable representation, to use as url-safe id/db-table prefix_/attr name) verbose_name: str = Field() # e.g. 'SingleFile' (human-readable *short* label, for use in column names, form labels, etc.) + docs_url: str = Field(default=None) # e.g. 'https://github.com/...' # All the hooks the plugin will install: hooks: List[InstanceOf[BaseHook]] = Field(default=[]) @@ -60,10 +61,16 @@ class BasePlugin(BaseModel): def plugin_module(self) -> str: # DottedImportPath """ " Dotted import path of the plugin's module (after its loaded via settings.INSTALLED_APPS). - e.g. 'archivebox.builtin_plugins.npm.apps.NpmPlugin' -> 'builtin_plugins.npm' + e.g. 'archivebox.pkg_plugins.npm.apps.NpmPlugin' -> 'pkg_plugins.npm' """ return f"{self.__module__}.{self.__class__.__name__}".split("archivebox.", 1)[-1].rsplit('.apps.', 1)[0] + + @property + def plugin_module_full(self) -> str: # DottedImportPath + """e.g. 'archivebox.pkg_plugins.npm.apps.NpmPlugin'""" + return f"{self.__module__}.{self.__class__.__name__}" + # @computed_field @property def plugin_dir(self) -> Path: @@ -77,7 +84,7 @@ class BasePlugin(BaseModel): # preserve references to original default objects, # pydantic deepcopies them by default which breaks mutability # see https://github.com/pydantic/pydantic/issues/7608 - # if we dont do this, then builtin_plugins.base.CORE_CONFIG != settings.CONFIGS.CoreConfig for example + # if we dont do this, then sys_plugins.base.CORE_CONFIG != settings.CONFIGS.CoreConfig for example # and calling .__init__() on one of them will not update the other self.hooks = self.model_fields['hooks'].default diff --git a/archivebox/plugantic/views.py b/archivebox/plugantic/views.py index 79146a88..20eebcc0 100644 --- a/archivebox/plugantic/views.py +++ b/archivebox/plugantic/views.py @@ -81,7 +81,7 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext: } for plugin in settings.PLUGINS.values(): - for binary in plugin.HOOKS_BY_TYPE.BINARY.values(): + for binary in plugin.HOOKS_BY_TYPE.get('BINARY', {}).values(): try: binary = binary.load() except Exception as e: @@ -125,7 +125,7 @@ def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext: binary = None plugin = None for loaded_plugin in settings.PLUGINS.values(): - for loaded_binary in loaded_plugin.HOOKS_BY_TYPE.BINARY.values(): + for loaded_binary in loaded_plugin.HOOKS_BY_TYPE.get('BINARY', {}).values(): if loaded_binary.name == key: binary = loaded_binary plugin = loaded_plugin @@ -175,17 +175,17 @@ def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext: for plugin in settings.PLUGINS.values(): - try: - plugin = plugin.load_binaries() - except Exception as e: - print(e) + # try: + # plugin.load_binaries() + # except Exception as e: + # print(e) rows['Name'].append(ItemLink(plugin.id, key=plugin.id)) - rows['verbose_name'].append(str(plugin.verbose_name)) + rows['verbose_name'].append(mark_safe(f'{plugin.verbose_name}')) rows['module'].append(str(plugin.plugin_module)) rows['source_code'].append(str(plugin.plugin_dir)) rows['hooks'].append(mark_safe(', '.join( - f'{hook.id}' + f'{hook.id}' for hook in plugin.hooks ))) diff --git a/archivebox/sys_plugins/base/__init__.py b/archivebox/sys_plugins/base/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/sys_plugins/base/apps.py b/archivebox/sys_plugins/base/apps.py new file mode 100644 index 00000000..3929b40f --- /dev/null +++ b/archivebox/sys_plugins/base/apps.py @@ -0,0 +1,142 @@ +import os +import sys +import platform + +from typing import List, ClassVar +from pathlib import Path +from pydantic import InstanceOf, Field + +from django.conf import settings + +from plugantic.base_plugin import BasePlugin +from plugantic.base_configset import BaseConfigSet, ConfigSectionName +from plugantic.base_hook import BaseHook + + +###################### Config ########################## + + +class ShellConfig(BaseConfigSet): + section: ClassVar[ConfigSectionName] = 'SHELL_CONFIG' + + DEBUG: bool = Field(default=False) + + IS_TTY: bool = Field(default=sys.stdout.isatty()) + USE_COLOR: bool = Field(default=lambda c: c.IS_TTY) + SHOW_PROGRESS: bool = Field(default=lambda c: (c.IS_TTY and platform.system() != 'darwin')) # progress bars are buggy on mac, disable for now + + IN_DOCKER: bool = Field(default=False) + IN_QEMU: bool = Field(default=False) + + PUID: int = Field(default=os.getuid()) + PGID: int = Field(default=os.getgid()) + +SHELL_CONFIG = ShellConfig() + + +class StorageConfig(BaseConfigSet): + section: ClassVar[ConfigSectionName] = 'STORAGE_CONFIG' + + OUTPUT_PERMISSIONS: str = Field(default='644') + RESTRICT_FILE_NAMES: str = Field(default='windows') + ENFORCE_ATOMIC_WRITES: bool = Field(default=True) + +STORAGE_CONFIG = StorageConfig() + + +class GeneralConfig(BaseConfigSet): + section: ClassVar[ConfigSectionName] = 'GENERAL_CONFIG' + + TAG_SEPARATOR_PATTERN: str = Field(default=r'[,]') + + +GENERAL_CONFIG = GeneralConfig() + + +class ServerConfig(BaseConfigSet): + section: ClassVar[ConfigSectionName] = 'SERVER_CONFIG' + + SECRET_KEY: str = Field(default=None) + BIND_ADDR: str = Field(default=lambda: ['127.0.0.1:8000', '0.0.0.0:8000'][SHELL_CONFIG.IN_DOCKER]) + ALLOWED_HOSTS: str = Field(default='*') + CSRF_TRUSTED_ORIGINS: str = Field(default=lambda c: 'http://localhost:8000,http://127.0.0.1:8000,http://0.0.0.0:8000,http://{}'.format(c.BIND_ADDR)) + + SNAPSHOTS_PER_PAGE: int = Field(default=40) + FOOTER_INFO: str = Field(default='Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.') + CUSTOM_TEMPLATES_DIR: Path = Field(default=None) + + PUBLIC_INDEX: bool = Field(default=True) + PUBLIC_SNAPSHOTS: bool = Field(default=True) + PUBLIC_ADD_VIEW: bool = Field(default=False) + + ADMIN_USERNAME: str = Field(default=None) + ADMIN_PASSWORD: str = Field(default=None) + REVERSE_PROXY_USER_HEADER: str = Field(default='Remote-User') + REVERSE_PROXY_WHITELIST: str = Field(default='') + LOGOUT_REDIRECT_URL: str = Field(default='/') + PREVIEW_ORIGINALS: bool = Field(default=True) + +SERVER_CONFIG = ServerConfig() + + +class ArchivingConfig(BaseConfigSet): + section: ClassVar[ConfigSectionName] = 'ARCHIVING_CONFIG' + + ONLY_NEW: bool = Field(default=True) + + TIMEOUT: int = Field(default=60) + MEDIA_TIMEOUT: int = Field(default=3600) + + MEDIA_MAX_SIZE: str = Field(default='750m') + RESOLUTION: str = Field(default='1440,2000') + CHECK_SSL_VALIDITY: bool = Field(default=True) + USER_AGENT: str = Field(default='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)') + COOKIES_FILE: Path | None = Field(default=None) + + URL_DENYLIST: str = Field(default=r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$', alias='URL_BLACKLIST') + URL_ALLOWLIST: str | None = Field(default=None, alias='URL_WHITELIST') + + # GIT_DOMAINS: str = Field(default='github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht') + # WGET_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT'] + ' wget/{WGET_VERSION}') + # CURL_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT'] + ' curl/{CURL_VERSION}') + # CHROME_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT']) + # CHROME_USER_DATA_DIR: str | None = Field(default=None) + # CHROME_TIMEOUT: int = Field(default=0) + # CHROME_HEADLESS: bool = Field(default=True) + # CHROME_SANDBOX: bool = Field(default=lambda: not SHELL_CONFIG.IN_DOCKER) + +ARCHIVING_CONFIG = ArchivingConfig() + + +class SearchBackendConfig(BaseConfigSet): + section: ClassVar[ConfigSectionName] = 'SEARCH_BACKEND_CONFIG' + + USE_INDEXING_BACKEND: bool = Field(default=True) + USE_SEARCHING_BACKEND: bool = Field(default=True) + + SEARCH_BACKEND_ENGINE: str = Field(default='ripgrep') + SEARCH_BACKEND_HOST_NAME: str = Field(default='localhost') + SEARCH_BACKEND_PORT: int = Field(default=1491) + SEARCH_BACKEND_PASSWORD: str = Field(default='SecretPassword') + SEARCH_PROCESS_HTML: bool = Field(default=True) + +SEARCH_BACKEND_CONFIG = SearchBackendConfig() + + +class CorePlugin(BasePlugin): + app_label: str = 'core' + verbose_name: str = 'Core' + + hooks: List[InstanceOf[BaseHook]] = [ + SHELL_CONFIG, + GENERAL_CONFIG, + STORAGE_CONFIG, + SERVER_CONFIG, + ARCHIVING_CONFIG, + SEARCH_BACKEND_CONFIG, + ] + + +PLUGIN = CorePlugin() +PLUGIN.register(settings) +DJANGO_APP = PLUGIN.AppConfig diff --git a/package-lock.json b/package-lock.json index 42bd3256..db0ac368 100644 --- a/package-lock.json +++ b/package-lock.json @@ -242,9 +242,9 @@ "license": "MIT" }, "node_modules/@types/node": { - "version": "22.5.5", - "resolved": "https://registry.npmjs.org/@types/node/-/node-22.5.5.tgz", - "integrity": "sha512-Xjs4y5UPO/CLdzpgR6GirZJx36yScjh73+2NlLlkFRSoQN8B0DpfXPdZGnvVmLRLOsqDpOfTNv7D9trgGhmOIA==", + "version": "22.6.1", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.6.1.tgz", + "integrity": "sha512-V48tCfcKb/e6cVUigLAaJDAILdMP0fUW6BidkPK4GpGjXcfbnoHasCZDwz3N3yVt5we2RHm4XTQCpv0KJz9zqw==", "license": "MIT", "optional": true, "dependencies": { diff --git a/pdm.lock b/pdm.lock index ff81362e..46e09094 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "all", "ldap", "sonic"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:6b062624538c5dfe6b1bd5be32546fef02b70ee73c4a1710a8eea9764bdd21d8" +content_hash = "sha256:c6898f1602f4760763b438a54b5a7e74833755c083718d56c27abcd765d7f0de" [[metadata.targets]] requires_python = "==3.11.*" @@ -208,14 +208,14 @@ files = [ [[package]] name = "bx-py-utils" -version = "102" +version = "103" requires_python = "<4,>=3.10" summary = "Various Python utility functions" groups = ["default"] marker = "python_version == \"3.11\"" files = [ - {file = "bx_py_utils-102-py3-none-any.whl", hash = "sha256:961a0abf31b512f72c1473a4d115096b0c5becd32d08338ac62adbf5b217b680"}, - {file = "bx_py_utils-102.tar.gz", hash = "sha256:6d131d40394b477de715169e80067a0ab4891c8f04afd33fbd7ca00e2faf21ae"}, + {file = "bx_py_utils-103-py3-none-any.whl", hash = "sha256:706291bdbc430655d78628ca3af037cff7dd5e2003136fd4ff4249adb3ab6228"}, + {file = "bx_py_utils-103.tar.gz", hash = "sha256:9aa162f7a1b81430811f2e7ce1a76ba4562e47d472b0e13cb8c8e055076d45d5"}, ] [[package]] @@ -593,7 +593,7 @@ files = [ [[package]] name = "django-stubs" -version = "5.0.4" +version = "5.1.0" requires_python = ">=3.8" summary = "Mypy stubs for Django" groups = ["default"] @@ -601,19 +601,19 @@ marker = "python_version == \"3.11\"" dependencies = [ "asgiref", "django", - "django-stubs-ext>=5.0.4", + "django-stubs-ext>=5.1.0", "tomli; python_version < \"3.11\"", "types-PyYAML", "typing-extensions>=4.11.0", ] files = [ - {file = "django_stubs-5.0.4-py3-none-any.whl", hash = "sha256:c2502f5ecbae50c68f9a86d52b5b2447d8648fd205036dad0ccb41e19a445927"}, - {file = "django_stubs-5.0.4.tar.gz", hash = "sha256:78e3764488fdfd2695f12502136548ec22f8d4b1780541a835042b8238d11514"}, + {file = "django_stubs-5.1.0-py3-none-any.whl", hash = "sha256:b98d49a80aa4adf1433a97407102d068de26c739c405431d93faad96dd282c40"}, + {file = "django_stubs-5.1.0.tar.gz", hash = "sha256:86128c228b65e6c9a85e5dc56eb1c6f41125917dae0e21e6cfecdf1b27e630c5"}, ] [[package]] name = "django-stubs-ext" -version = "5.0.4" +version = "5.1.0" requires_python = ">=3.8" summary = "Monkey-patching and extensions for django-stubs" groups = ["default"] @@ -623,8 +623,8 @@ dependencies = [ "typing-extensions", ] files = [ - {file = "django_stubs_ext-5.0.4-py3-none-any.whl", hash = "sha256:910cbaff3d1e8e806a5c27d5ddd4088535aae8371ea921b7fd680fdfa5f14e30"}, - {file = "django_stubs_ext-5.0.4.tar.gz", hash = "sha256:85da065224204774208be29c7d02b4482d5a69218a728465c2fbe41725fdc819"}, + {file = "django_stubs_ext-5.1.0-py3-none-any.whl", hash = "sha256:a455fc222c90b30b29ad8c53319559f5b54a99b4197205ddbb385aede03b395d"}, + {file = "django_stubs_ext-5.1.0.tar.gz", hash = "sha256:ed7d51c0b731651879fc75f331fb0806d98b67bfab464e96e2724db6b46ef926"}, ] [[package]] diff --git a/pyproject.toml b/pyproject.toml index 49cd31cc..b29555d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -136,6 +136,7 @@ test = [ "bottle>=0.13.1", ] lint = [ + "ruff>=0.6.6", "flake8>=7.1.1", "mypy>=1.11.2", "django-autotyping>=0.5.1", @@ -158,7 +159,7 @@ exclude = ["*.pyi", "typings/", "migrations/", "vendor/"] # https://docs.astral.sh/ruff/rules/ [tool.ruff.lint] -ignore = ["E731", "E303", "E266"] +ignore = ["E731", "E303", "E266", "E241", "E222"] [tool.pytest.ini_options] testpaths = [ "tests" ] diff --git a/requirements.txt b/requirements.txt index 27cba2b9..d37acbe8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,7 +14,7 @@ beautifulsoup4==4.12.3; python_version == "3.11" brotli==1.1.0; implementation_name == "cpython" and python_version == "3.11" brotlicffi==1.1.0.0; implementation_name != "cpython" and python_version == "3.11" bx-django-utils==79; python_version == "3.11" -bx-py-utils==102; python_version == "3.11" +bx-py-utils==103; python_version == "3.11" certifi==2024.8.30; python_version == "3.11" cffi==1.17.1; platform_python_implementation != "PyPy" and python_version == "3.11" or implementation_name != "cpython" and python_version == "3.11" channels[daphne]==4.1.0; python_version == "3.11" @@ -38,8 +38,8 @@ django-object-actions==4.3.0; python_version == "3.11" django-pydantic-field==0.3.10; python_version == "3.11" django-settings-holder==0.1.2; python_version == "3.11" django-signal-webhooks==0.3.0; python_version == "3.11" -django-stubs==5.0.4; python_version == "3.11" -django-stubs-ext==5.0.4; python_version == "3.11" +django-stubs==5.1.0; python_version == "3.11" +django-stubs-ext==5.1.0; python_version == "3.11" django-taggit==1.3.0; python_version == "3.11" et-xmlfile==1.1.0; python_version == "3.11" executing==2.1.0; python_version == "3.11"