From 267fde0138de782d9b486b6d02c01c0927b7bdab Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 21 Oct 2024 01:32:53 -0700 Subject: [PATCH] call validate functions at AppConfig.ready time manually --- archivebox/plugins_auth/ldap/__init__.py | 5 +++++ archivebox/plugins_auth/ldap/config.py | 3 +-- archivebox/plugins_extractor/chrome/__init__.py | 6 ++++++ archivebox/plugins_extractor/chrome/config.py | 5 +++-- archivebox/plugins_extractor/wget/__init__.py | 7 ++++++- archivebox/plugins_extractor/wget/config.py | 5 ++--- archivebox/plugins_extractor/ytdlp/__init__.py | 5 +++++ archivebox/plugins_extractor/ytdlp/config.py | 5 ++--- archivebox/plugins_pkg/pip/binaries.py | 2 +- archivebox/plugins_search/sonic/__init__.py | 5 +++++ archivebox/plugins_search/sonic/config.py | 9 +++------ archivebox/plugins_search/sqlitefts/config.py | 10 ++++------ 12 files changed, 43 insertions(+), 24 deletions(-) diff --git a/archivebox/plugins_auth/ldap/__init__.py b/archivebox/plugins_auth/ldap/__init__.py index 66d5ad88..6ba43b90 100644 --- a/archivebox/plugins_auth/ldap/__init__.py +++ b/archivebox/plugins_auth/ldap/__init__.py @@ -28,6 +28,7 @@ def get_PLUGIN(): @abx.hookimpl def get_CONFIG(): from .config import LDAP_CONFIG + return { __id__: LDAP_CONFIG } @@ -64,6 +65,10 @@ def ready(): """ Called at AppConfig.ready() time (settings + models are all loaded) """ + from .config import LDAP_CONFIG + + LDAP_CONFIG.validate() + from django.conf import settings if settings.CONFIGS.ldap.LDAP_ENABLED: diff --git a/archivebox/plugins_auth/ldap/config.py b/archivebox/plugins_auth/ldap/config.py index fb124273..2094dc68 100644 --- a/archivebox/plugins_auth/ldap/config.py +++ b/archivebox/plugins_auth/ldap/config.py @@ -50,8 +50,7 @@ class LdapConfig(BaseConfigSet): LDAP_LASTNAME_ATTR: str = Field(default='last_name') LDAP_EMAIL_ATTR: str = Field(default='email') - @model_validator(mode='after') - def validate_ldap_config(self): + def validate(self): if self.LDAP_ENABLED: LDAP_LIB, _LDAPSearch = get_ldap_lib() # Check that LDAP libraries are installed diff --git a/archivebox/plugins_extractor/chrome/__init__.py b/archivebox/plugins_extractor/chrome/__init__.py index 9b254655..f46ea8e0 100644 --- a/archivebox/plugins_extractor/chrome/__init__.py +++ b/archivebox/plugins_extractor/chrome/__init__.py @@ -37,6 +37,12 @@ def get_BINARIES(): 'chrome': CHROME_BINARY, } +@abx.hookimpl +def ready(): + from .config import CHROME_CONFIG + CHROME_CONFIG.validate() + + # @abx.hookimpl # def get_EXTRACTORS(): # return { diff --git a/archivebox/plugins_extractor/chrome/config.py b/archivebox/plugins_extractor/chrome/config.py index be943a94..a656f234 100644 --- a/archivebox/plugins_extractor/chrome/config.py +++ b/archivebox/plugins_extractor/chrome/config.py @@ -1,19 +1,20 @@ __package__ = 'plugins_extractor.chrome' import os - from pathlib import Path from typing import List, Optional -from pydantic import Field, model_validator +from pydantic import Field from pydantic_pkgr import bin_abspath from abx.archivebox.base_configset import BaseConfigSet from abx.archivebox.base_binary import env +from archivebox.config import CONSTANTS from archivebox.config.common import ARCHIVING_CONFIG, SHELL_CONFIG from archivebox.misc.logging import STDERR from archivebox.misc.util import dedupe +from archivebox.logging_util import pretty_path CHROMIUM_BINARY_NAMES_LINUX = [ diff --git a/archivebox/plugins_extractor/wget/__init__.py b/archivebox/plugins_extractor/wget/__init__.py index 506ad7bf..2b546836 100644 --- a/archivebox/plugins_extractor/wget/__init__.py +++ b/archivebox/plugins_extractor/wget/__init__.py @@ -24,7 +24,7 @@ def get_PLUGIN(): @abx.hookimpl def get_CONFIG(): from .config import WGET_CONFIG - + return { 'wget': WGET_CONFIG } @@ -45,3 +45,8 @@ def get_EXTRACTORS(): 'wget': WGET_EXTRACTOR, 'warc': WARC_EXTRACTOR, } + +@abx.hookimpl +def ready(): + from .config import WGET_CONFIG + WGET_CONFIG.validate() diff --git a/archivebox/plugins_extractor/wget/config.py b/archivebox/plugins_extractor/wget/config.py index 2cc99668..12edf672 100644 --- a/archivebox/plugins_extractor/wget/config.py +++ b/archivebox/plugins_extractor/wget/config.py @@ -4,7 +4,7 @@ import subprocess from typing import List, Optional from pathlib import Path -from pydantic import Field, model_validator +from pydantic import Field from abx.archivebox.base_configset import BaseConfigSet @@ -40,8 +40,7 @@ class WgetConfig(BaseConfigSet): WGET_USER_AGENT: str = Field(default=lambda: ARCHIVING_CONFIG.USER_AGENT) WGET_COOKIES_FILE: Optional[Path] = Field(default=lambda: ARCHIVING_CONFIG.COOKIES_FILE) - @model_validator(mode='after') - def validate_use_ytdlp(self): + def validate(self): if self.USE_WGET and self.WGET_TIMEOUT < 10: STDERR.print(f'[red][!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={self.WGET_TIMEOUT} seconds)[/red]') STDERR.print(' wget will fail to archive any sites if set to less than ~20 seconds.') diff --git a/archivebox/plugins_extractor/ytdlp/__init__.py b/archivebox/plugins_extractor/ytdlp/__init__.py index 26157c24..1dc9ef99 100644 --- a/archivebox/plugins_extractor/ytdlp/__init__.py +++ b/archivebox/plugins_extractor/ytdlp/__init__.py @@ -35,3 +35,8 @@ def get_BINARIES(): 'ytdlp': YTDLP_BINARY, 'ffmpeg': FFMPEG_BINARY, } + +@abx.hookimpl +def ready(): + from .config import YTDLP_CONFIG + YTDLP_CONFIG.validate() diff --git a/archivebox/plugins_extractor/ytdlp/config.py b/archivebox/plugins_extractor/ytdlp/config.py index abe442bf..29dd6ab4 100644 --- a/archivebox/plugins_extractor/ytdlp/config.py +++ b/archivebox/plugins_extractor/ytdlp/config.py @@ -2,7 +2,7 @@ __package__ = 'plugins_extractor.ytdlp' from typing import List -from pydantic import Field, model_validator, AliasChoices +from pydantic import Field, AliasChoices from abx.archivebox.base_configset import BaseConfigSet @@ -19,8 +19,7 @@ class YtdlpConfig(BaseConfigSet): YTDLP_CHECK_SSL_VALIDITY: bool = Field(default=lambda: ARCHIVING_CONFIG.CHECK_SSL_VALIDITY) YTDLP_TIMEOUT: int = Field(default=lambda: ARCHIVING_CONFIG.MEDIA_TIMEOUT) - @model_validator(mode='after') - def validate_use_ytdlp(self): + def validate(self): if self.USE_YTDLP and self.YTDLP_TIMEOUT < 20: STDERR.print(f'[red][!] Warning: MEDIA_TIMEOUT is set too low! (currently set to MEDIA_TIMEOUT={self.YTDLP_TIMEOUT} seconds)[/red]') STDERR.print(' youtube-dl/yt-dlp will fail to archive any media if set to less than ~20 seconds.') diff --git a/archivebox/plugins_pkg/pip/binaries.py b/archivebox/plugins_pkg/pip/binaries.py index d4709edb..3e451cfe 100644 --- a/archivebox/plugins_pkg/pip/binaries.py +++ b/archivebox/plugins_pkg/pip/binaries.py @@ -112,7 +112,7 @@ SQLITE_BINARY = SqliteBinary() LOADED_DJANGO_PATH = Path(django.__file__) LOADED_DJANGO_VERSION = SemVer(django.VERSION[:3]) -LOADED_DJANGO_FROM_VENV = str(LOADED_DJANGO_PATH.absolute().resolve()).startswith(str(VENV_PIP_BINPROVIDER.pip_venv.absolute().resolve())) +LOADED_DJANGO_FROM_VENV = str(LOADED_DJANGO_PATH.absolute().resolve()).startswith(str(VENV_PIP_BINPROVIDER.pip_venv and VENV_PIP_BINPROVIDER.pip_venv.absolute().resolve())) class DjangoBinary(BaseBinary): name: BinName = 'django' diff --git a/archivebox/plugins_search/sonic/__init__.py b/archivebox/plugins_search/sonic/__init__.py index 4b81b0be..a899679b 100644 --- a/archivebox/plugins_search/sonic/__init__.py +++ b/archivebox/plugins_search/sonic/__init__.py @@ -46,3 +46,8 @@ def get_SEARCHBACKENDS(): return { 'sonic': SONIC_SEARCH_BACKEND, } + +@abx.hookimpl +def ready(): + from .config import SONIC_CONFIG + SONIC_CONFIG.validate() diff --git a/archivebox/plugins_search/sonic/config.py b/archivebox/plugins_search/sonic/config.py index a16c8c42..d54ed568 100644 --- a/archivebox/plugins_search/sonic/config.py +++ b/archivebox/plugins_search/sonic/config.py @@ -2,7 +2,7 @@ __package__ = 'plugins_search.sonic' import sys -from pydantic import Field, model_validator +from pydantic import Field from abx.archivebox.base_configset import BaseConfigSet @@ -31,14 +31,11 @@ class SonicConfig(BaseConfigSet): SONIC_MAX_CHUNK_LENGTH: int = Field(default=2000) SONIC_MAX_TEXT_LENGTH: int = Field(default=100000000) SONIC_MAX_RETRIES: int = Field(default=5) - - @model_validator(mode='after') - def validate_sonic_port(self): + + def validate(self): if SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE == 'sonic' and SONIC_LIB is None: sys.stderr.write('[X] Error: Sonic search backend is enabled but sonic-client lib is not installed. You may need to run: pip install archivebox[sonic]\n') # dont hard exit here. in case the user is just running "archivebox version" or "archivebox help", we still want those to work despite broken ldap - # sys.exit(1) SEARCH_BACKEND_CONFIG.update_in_place(SEARCH_BACKEND_ENGINE='ripgrep') - return self SONIC_CONFIG = SonicConfig() diff --git a/archivebox/plugins_search/sqlitefts/config.py b/archivebox/plugins_search/sqlitefts/config.py index 77209f27..5690dc6c 100644 --- a/archivebox/plugins_search/sqlitefts/config.py +++ b/archivebox/plugins_search/sqlitefts/config.py @@ -6,7 +6,7 @@ from typing import Callable from django.core.exceptions import ImproperlyConfigured -from pydantic import Field, model_validator +from pydantic import Field from abx.archivebox.base_configset import BaseConfigSet @@ -26,14 +26,12 @@ class SqliteftsConfig(BaseConfigSet): SQLITEFTS_TABLE: str = Field(default='snapshot_fts') SQLITEFTS_ID_TABLE: str = Field(default='snapshot_id_fts') SQLITEFTS_COLUMN: str = Field(default='texts') - - @model_validator(mode='after') - def validate_fts_separate_database(self): + + def validate(self): if SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE == 'sqlite' and self.SQLITEFTS_SEPARATE_DATABASE and not self.SQLITEFTS_DB: sys.stderr.write('[X] Error: SQLITEFTS_DB must be set if SQLITEFTS_SEPARATE_DATABASE is True\n') SEARCH_BACKEND_CONFIG.update_in_place(SEARCH_BACKEND_ENGINE='ripgrep') - return self - + @property def get_connection(self) -> Callable[[], sqlite3.Connection]: # Make get_connection callable, because `django.db.connection.cursor()`