move final legacy config to plugins and fix archivebox config cmd and add search opt

This commit is contained in:
Nick Sweeting 2024-10-21 02:56:00 -07:00
parent 115f89fd8b
commit b3107ab830
No known key found for this signature in database
20 changed files with 379 additions and 275 deletions

View file

@ -31,6 +31,7 @@ PACKAGE_DIR = Path(__file__).resolve().parent
if str(PACKAGE_DIR) not in sys.path: if str(PACKAGE_DIR) not in sys.path:
sys.path.append(str(PACKAGE_DIR)) sys.path.append(str(PACKAGE_DIR))
os.environ['DJANGO_SETTINGS_MODULE'] = 'core.settings' os.environ['DJANGO_SETTINGS_MODULE'] = 'core.settings'
os.environ['TZ'] = 'UTC'
# detect ArchiveBox user's UID/GID based on data dir ownership # detect ArchiveBox user's UID/GID based on data dir ownership
from .config.permissions import drop_privileges # noqa from .config.permissions import drop_privileges # noqa

View file

@ -10,7 +10,7 @@ import toml
from rich import print from rich import print
from benedict import benedict from benedict import benedict
from pydantic import model_validator, TypeAdapter from pydantic import model_validator, TypeAdapter, AliasChoices
from pydantic_settings import BaseSettings, SettingsConfigDict, PydanticBaseSettingsSource from pydantic_settings import BaseSettings, SettingsConfigDict, PydanticBaseSettingsSource
from pydantic_settings.sources import TomlConfigSettingsSource from pydantic_settings.sources import TomlConfigSettingsSource
@ -247,6 +247,26 @@ class BaseConfigSet(BaseSettings):
return self return self
@property
def aliases(self) -> Dict[str, str]:
alias_map = {}
for key, field in self.model_fields.items():
alias_map[key] = key
if field.validation_alias is None:
continue
if isinstance(field.validation_alias, AliasChoices):
for alias in field.validation_alias.choices:
alias_map[alias] = key
elif isinstance(field.alias, str):
alias_map[field.alias] = key
else:
raise ValueError(f'Unknown alias type for field {key}: {field.alias}')
return benedict(alias_map)
@property @property
def toml_section_header(self): def toml_section_header(self):
"""Convert the class name to a TOML section header e.g. ShellConfig -> SHELL_CONFIG""" """Convert the class name to a TOML section header e.g. ShellConfig -> SHELL_CONFIG"""

View file

@ -24,6 +24,11 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
formatter_class=SmartFormatter, formatter_class=SmartFormatter,
) )
group = parser.add_mutually_exclusive_group() group = parser.add_mutually_exclusive_group()
parser.add_argument(
'--search',
action='store_true',
help="Search for KEYs that match the given search terms",
)
group.add_argument( group.add_argument(
'--get', #'-g', '--get', #'-g',
action='store_true', action='store_true',
@ -54,6 +59,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
config( config(
config_options_str=config_options_str, config_options_str=config_options_str,
config_options=command.config_options, config_options=command.config_options,
search=command.search,
get=command.get, get=command.get,
set=command.set, set=command.set,
reset=command.reset, reset=command.reset,

View file

@ -1,8 +1,9 @@
__package__ = 'archivebox.config' __package__ = 'archivebox.config'
import re
import sys import sys
import shutil import shutil
from typing import Dict, Optional from typing import Dict, Optional, List
from pathlib import Path from pathlib import Path
from rich import print from rich import print
@ -121,6 +122,9 @@ class ArchivingConfig(BaseConfigSet):
URL_DENYLIST: str = Field(default=r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$', alias='URL_BLACKLIST') URL_DENYLIST: str = Field(default=r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$', alias='URL_BLACKLIST')
URL_ALLOWLIST: str | None = Field(default=None, alias='URL_WHITELIST') URL_ALLOWLIST: str | None = Field(default=None, alias='URL_WHITELIST')
SAVE_ALLOWLIST: Dict[str, List[str]] = Field(default={}) # mapping of regex patterns to list of archive methods
SAVE_DENYLIST: Dict[str, List[str]] = Field(default={})
# GIT_DOMAINS: str = Field(default='github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht') # GIT_DOMAINS: str = Field(default='github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht')
# WGET_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT'] + ' wget/{WGET_VERSION}') # WGET_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT'] + ' wget/{WGET_VERSION}')
# CURL_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT'] + ' curl/{CURL_VERSION}') # CURL_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT'] + ' curl/{CURL_VERSION}')
@ -152,6 +156,28 @@ class ArchivingConfig(BaseConfigSet):
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
return v return v
@property
def URL_ALLOWLIST_PTN(self) -> re.Pattern | None:
return re.compile(self.URL_ALLOWLIST, CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS) if self.URL_ALLOWLIST else None
@property
def URL_DENYLIST_PTN(self) -> re.Pattern:
return re.compile(self.URL_DENYLIST, CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS)
@property
def SAVE_ALLOWLIST_PTNS(self) -> Dict[re.Pattern, List[str]]:
return {
re.compile(k, CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS): v
for k, v in self.SAVE_ALLOWLIST.items()
} if self.SAVE_ALLOWLIST else {}
@property
def SAVE_DENYLIST_PTNS(self) -> Dict[re.Pattern, List[str]]:
return {
re.compile(k, CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS): v
for k, v in self.SAVE_DENYLIST.items()
} if self.SAVE_DENYLIST else {}
ARCHIVING_CONFIG = ArchivingConfig() ARCHIVING_CONFIG = ArchivingConfig()

View file

@ -22,7 +22,6 @@ Documentation:
__package__ = 'archivebox.config' __package__ = 'archivebox.config'
import os import os
import re
import sys import sys
import json import json
import shutil import shutil
@ -49,152 +48,20 @@ from ..misc.logging import (
hint, # noqa hint, # noqa
) )
from .common import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG from .common import SHELL_CONFIG
from archivebox.plugins_extractor.favicon.config import FAVICON_CONFIG
from archivebox.plugins_extractor.wget.config import WGET_CONFIG
from archivebox.plugins_extractor.curl.config import CURL_CONFIG
ANSI = SHELL_CONFIG.ANSI ANSI = SHELL_CONFIG.ANSI
############################### Config Schema ##################################
CONFIG_SCHEMA: Dict[str, Dict[str, Any]] = {
'SHELL_CONFIG': SHELL_CONFIG.as_legacy_config_schema(),
'SERVER_CONFIG': SERVER_CONFIG.as_legacy_config_schema(),
'GENERAL_CONFIG': GENERAL_CONFIG.as_legacy_config_schema(),
'ARCHIVING_CONFIG': ARCHIVING_CONFIG.as_legacy_config_schema(),
'SEARCH_BACKEND_CONFIG': SEARCH_BACKEND_CONFIG.as_legacy_config_schema(),
'STORAGE_CONFIG': STORAGE_CONFIG.as_legacy_config_schema(),
# 'FAVICON_CONFIG': FAVICON_CONFIG.as_legacy_config_schema(),
# 'WGET_CONFIG': WGET_CONFIG.as_legacy_config_schema(),
# 'CURL_CONFIG': CURL_CONFIG.as_legacy_config_schema(),
'ARCHIVE_METHOD_TOGGLES': {
'SAVE_TITLE': {'type': bool, 'default': True, 'aliases': ('FETCH_TITLE',)},
'SAVE_FAVICON': {'type': bool, 'default': True, 'aliases': ('FETCH_FAVICON',)},
'SAVE_WGET': {'type': bool, 'default': True, 'aliases': ('FETCH_WGET',)},
'SAVE_WGET_REQUISITES': {'type': bool, 'default': True, 'aliases': ('FETCH_WGET_REQUISITES',)},
'SAVE_SINGLEFILE': {'type': bool, 'default': True, 'aliases': ('FETCH_SINGLEFILE',)},
'SAVE_READABILITY': {'type': bool, 'default': True, 'aliases': ('FETCH_READABILITY',)},
'SAVE_MERCURY': {'type': bool, 'default': True, 'aliases': ('FETCH_MERCURY',)},
'SAVE_HTMLTOTEXT': {'type': bool, 'default': True, 'aliases': ('FETCH_HTMLTOTEXT',)},
'SAVE_PDF': {'type': bool, 'default': True, 'aliases': ('FETCH_PDF',)},
'SAVE_SCREENSHOT': {'type': bool, 'default': True, 'aliases': ('FETCH_SCREENSHOT',)},
'SAVE_DOM': {'type': bool, 'default': True, 'aliases': ('FETCH_DOM',)},
'SAVE_HEADERS': {'type': bool, 'default': True, 'aliases': ('FETCH_HEADERS',)},
'SAVE_WARC': {'type': bool, 'default': True, 'aliases': ('FETCH_WARC',)},
'SAVE_GIT': {'type': bool, 'default': True, 'aliases': ('FETCH_GIT',)},
'SAVE_MEDIA': {'type': bool, 'default': True, 'aliases': ('FETCH_MEDIA',)},
'SAVE_ARCHIVE_DOT_ORG': {'type': bool, 'default': True, 'aliases': ('SUBMIT_ARCHIVE_DOT_ORG',)},
'SAVE_ALLOWLIST': {'type': dict, 'default': {},},
'SAVE_DENYLIST': {'type': dict, 'default': {},},
},
'ARCHIVE_METHOD_OPTIONS': {
'RESOLUTION': {'type': str, 'default': '1440,2000', 'aliases': ('SCREENSHOT_RESOLUTION','WINDOW_SIZE')},
# 'GIT_DOMAINS': {'type': str, 'default': 'github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht'},
'CHECK_SSL_VALIDITY': {'type': bool, 'default': True},
'MEDIA_MAX_SIZE': {'type': str, 'default': '750m'},
'USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)'},
'CURL_USER_AGENT': {'type': str, 'default': lambda c: c['USER_AGENT']}, # + ' curl/{CURL_VERSION}'},
'COOKIES_FILE': {'type': str, 'default': None},
'YOUTUBEDL_ARGS': {'type': list, 'default': lambda c: [
'--restrict-filenames',
'--trim-filenames', '128',
'--write-description',
'--write-info-json',
'--write-annotations',
'--write-thumbnail',
'--no-call-home',
'--write-sub',
'--write-auto-subs',
'--convert-subs=srt',
'--yes-playlist',
'--continue',
# This flag doesn't exist in youtube-dl
# only in yt-dlp
'--no-abort-on-error',
# --ignore-errors must come AFTER
# --no-abort-on-error
# https://github.com/yt-dlp/yt-dlp/issues/4914
'--ignore-errors',
'--geo-bypass',
'--add-metadata',
'--format=(bv*+ba/b)[filesize<={}][filesize_approx<=?{}]/(bv*+ba/b)'.format(c['MEDIA_MAX_SIZE'], c['MEDIA_MAX_SIZE']),
]},
'YOUTUBEDL_EXTRA_ARGS': {'type': list, 'default': None},
},
'DEPENDENCY_CONFIG': {
'USE_CURL': {'type': bool, 'default': True},
'USE_SINGLEFILE': {'type': bool, 'default': True},
'USE_READABILITY': {'type': bool, 'default': True},
'USE_GIT': {'type': bool, 'default': True},
'USE_CHROME': {'type': bool, 'default': True},
'USE_YOUTUBEDL': {'type': bool, 'default': True},
'USE_RIPGREP': {'type': bool, 'default': True},
# 'GIT_BINARY': {'type': str, 'default': 'git'},
# 'CURL_BINARY': {'type': str, 'default': 'curl'},
# 'NODE_BINARY': {'type': str, 'default': 'node'},
# 'YOUTUBEDL_BINARY': {'type': str, 'default': 'yt-dlp'}, # also can accept youtube-dl
# 'SINGLEFILE_BINARY': {'type': str, 'default': lambda c: bin_path('single-file')},
# 'READABILITY_BINARY': {'type': str, 'default': lambda c: bin_path('readability-extractor')},
# 'RIPGREP_BINARY': {'type': str, 'default': 'rg'},
'POCKET_CONSUMER_KEY': {'type': str, 'default': None},
'POCKET_ACCESS_TOKENS': {'type': dict, 'default': {}},
'READWISE_READER_TOKENS': {'type': dict, 'default': {}},
},
}
########################## Backwards-Compatibility #############################
# for backwards compatibility with old config files, check old/deprecated names for each key
CONFIG_ALIASES = {
alias: key
for section in CONFIG_SCHEMA.values()
for key, default in section.items()
for alias in default.get('aliases', ())
}
USER_CONFIG = {key: section[key] for section in CONFIG_SCHEMA.values() for key in section.keys()}
def get_real_name(key: str) -> str: def get_real_name(key: str) -> str:
"""get the current canonical name for a given deprecated config key""" """get the current canonical name for a given deprecated config key"""
return CONFIG_ALIASES.get(key.upper().strip(), key.upper().strip()) from django.conf import settings
for section in settings.CONFIGS.values():
try:
# These are derived/computed values calculated *after* all user-provided config values are ingested return section.aliases[key]
# they appear in `archivebox config` output and are intended to be read-only for the user except KeyError:
DYNAMIC_CONFIG_SCHEMA: Dict[str, Any] = { pass
'URL_DENYLIST_PTN': {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS)}, return key
'URL_ALLOWLIST_PTN': {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS)},
'SAVE_ALLOWLIST_PTN': {'default': lambda c: c['SAVE_ALLOWLIST'] and {re.compile(k, CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_ALLOWLIST'].items()}},
'SAVE_DENYLIST_PTN': {'default': lambda c: c['SAVE_DENYLIST'] and {re.compile(k, CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_DENYLIST'].items()}},
}
# print("FINISHED DEFINING SCHEMAS")
################################### Helpers ####################################
def load_config_val(key: str, def load_config_val(key: str,
@ -265,7 +132,7 @@ def load_config_val(key: str,
raise Exception('Config values can only be str, bool, int, or json') raise Exception('Config values can only be str, bool, int, or json')
def load_config_file(out_dir: str | None=CONSTANTS.DATA_DIR) -> Optional[benedict]: def load_config_file() -> Optional[benedict]:
"""load the ini-formatted config file from DATA_DIR/Archivebox.conf""" """load the ini-formatted config file from DATA_DIR/Archivebox.conf"""
config_path = CONSTANTS.CONFIG_FILE config_path = CONSTANTS.CONFIG_FILE
@ -285,9 +152,18 @@ def load_config_file(out_dir: str | None=CONSTANTS.DATA_DIR) -> Optional[benedic
return None return None
def write_config_file(config: Dict[str, str], out_dir: str | None=CONSTANTS.DATA_DIR) -> benedict: def section_for_key(key: str) -> Any:
from django.conf import settings
for config_section in settings.CONFIGS.values():
if hasattr(config_section, key):
return config_section
return None
def write_config_file(config: Dict[str, str]) -> benedict:
"""load the ini-formatted config file from DATA_DIR/Archivebox.conf""" """load the ini-formatted config file from DATA_DIR/Archivebox.conf"""
import abx.archivebox.reads
from archivebox.misc.system import atomic_write from archivebox.misc.system import atomic_write
CONFIG_HEADER = ( CONFIG_HEADER = (
@ -316,39 +192,30 @@ def write_config_file(config: Dict[str, str], out_dir: str | None=CONSTANTS.DATA
with open(config_path, 'r', encoding='utf-8') as old: with open(config_path, 'r', encoding='utf-8') as old:
atomic_write(f'{config_path}.bak', old.read()) atomic_write(f'{config_path}.bak', old.read())
find_section = lambda key: [name for name, opts in CONFIG_SCHEMA.items() if key in opts][0]
# Set up sections in empty config file # Set up sections in empty config file
for key, val in config.items(): for key, val in config.items():
section = find_section(key) section = section_for_key(key)
if section in config_file: assert section is not None
existing_config = dict(config_file[section])
section_name = section.toml_section_header
if section_name in config_file:
existing_config = dict(config_file[section_name])
else: else:
existing_config = {} existing_config = {}
config_file[section] = benedict({**existing_config, key: val})
# always make sure there's a SECRET_KEY defined for Django config_file[section_name] = benedict({**existing_config, key: val})
existing_secret_key = None section.update_in_place(warn=False, persist=False, **{key: val})
if 'SERVER_CONFIG' in config_file and 'SECRET_KEY' in config_file['SERVER_CONFIG']:
existing_secret_key = config_file['SERVER_CONFIG']['SECRET_KEY']
if (not existing_secret_key) or ('not a valid secret' in existing_secret_key):
from django.utils.crypto import get_random_string
chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
random_secret_key = get_random_string(50, chars)
if 'SERVER_CONFIG' in config_file:
config_file['SERVER_CONFIG']['SECRET_KEY'] = random_secret_key
else:
config_file['SERVER_CONFIG'] = {'SECRET_KEY': random_secret_key}
with open(config_path, 'w+', encoding='utf-8') as new: with open(config_path, 'w+', encoding='utf-8') as new:
config_file.write(new) config_file.write(new)
updated_config = {}
try: try:
# validate the config by attempting to re-parse it # validate the updated_config by attempting to re-parse it
CONFIG = load_all_config() updated_config = {**load_all_config(), **abx.archivebox.reads.get_FLAT_CONFIG()}
except BaseException: # lgtm [py/catch-base-exception] except BaseException: # lgtm [py/catch-base-exception]
# something went horribly wrong, rever to the previous version # something went horribly wrong, revert to the previous version
with open(f'{config_path}.bak', 'r', encoding='utf-8') as old: with open(f'{config_path}.bak', 'r', encoding='utf-8') as old:
atomic_write(config_path, old.read()) atomic_write(config_path, old.read())
@ -358,7 +225,7 @@ def write_config_file(config: Dict[str, str], out_dir: str | None=CONSTANTS.DATA
os.remove(f'{config_path}.bak') os.remove(f'{config_path}.bak')
return benedict({ return benedict({
key.upper(): CONFIG.get(key.upper()) key.upper(): updated_config.get(key.upper())
for key in config.keys() for key in config.keys()
}) })
@ -371,7 +238,7 @@ def load_config(defaults: Dict[str, Any],
config_file_vars: Optional[Dict[str, str]]=None) -> benedict: config_file_vars: Optional[Dict[str, str]]=None) -> benedict:
env_vars = env_vars or os.environ env_vars = env_vars or os.environ
config_file_vars = config_file_vars or load_config_file(out_dir=out_dir) config_file_vars = config_file_vars or load_config_file()
extended_config = benedict(config.copy() if config else {}) extended_config = benedict(config.copy() if config else {})
for key, default in defaults.items(): for key, default in defaults.items():
@ -486,17 +353,19 @@ def wget_supports_compression(config):
def load_all_config(): def load_all_config():
CONFIG = benedict() import abx.archivebox.reads
for section_name, section_config in CONFIG_SCHEMA.items():
# print('LOADING CONFIG SECTION:', section_name)
CONFIG = load_config(section_config, CONFIG)
# print("LOADING CONFIG SECTION:", 'DYNAMIC') flat_config = benedict()
return load_config(DYNAMIC_CONFIG_SCHEMA, CONFIG)
for config_section in abx.archivebox.reads.get_CONFIGS().values():
config_section.__init__()
flat_config.update(config_section.model_dump())
return flat_config
# add all final config values in CONFIG to globals in this file # add all final config values in CONFIG to globals in this file
CONFIG: benedict = load_all_config() # CONFIG: benedict = {}
globals().update(CONFIG) # globals().update(CONFIG)
# print("FINISHED LOADING CONFIG USING SCHEMAS + FILE + ENV") # print("FINISHED LOADING CONFIG USING SCHEMAS + FILE + ENV")
@ -508,15 +377,6 @@ globals().update(CONFIG)
# ****************************************************************************** # ******************************************************************************
########################### System Environment Setup ###########################
# Set timezone to UTC and umask to OUTPUT_PERMISSIONS
assert CONSTANTS.TIMEZONE == 'UTC', f'The server timezone should always be set to UTC (got {CONSTANTS.TIMEZONE})' # noqa: F821
os.environ["TZ"] = CONSTANTS.TIMEZONE # noqa: F821
os.umask(0o777 - int(STORAGE_CONFIG.DIR_OUTPUT_PERMISSIONS, base=8)) # noqa: F821
########################### Config Validity Checkers ########################### ########################### Config Validity Checkers ###########################
if not SHELL_CONFIG.USE_COLOR: if not SHELL_CONFIG.USE_COLOR:
@ -551,7 +411,7 @@ def setup_django_minimal():
DJANGO_SET_UP = False DJANGO_SET_UP = False
def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CONFIG, in_memory_db=False) -> None: def setup_django(check_db=False, in_memory_db=False) -> None:
from rich.panel import Panel from rich.panel import Panel
global INITIAL_STARTUP_PROGRESS global INITIAL_STARTUP_PROGRESS
@ -566,10 +426,6 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CON
with Progress(transient=True, expand=True, console=STDERR) as INITIAL_STARTUP_PROGRESS: with Progress(transient=True, expand=True, console=STDERR) as INITIAL_STARTUP_PROGRESS:
INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25) INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25)
output_dir = out_dir or CONSTANTS.DATA_DIR
assert isinstance(output_dir, Path) and isinstance(CONSTANTS.PACKAGE_DIR, Path)
from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, SudoPermission from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, SudoPermission
# if running as root, chown the data dir to the archivebox user to make sure it's accessible to the archivebox user # if running as root, chown the data dir to the archivebox user to make sure it's accessible to the archivebox user

View file

@ -10,10 +10,6 @@ from datetime import datetime, timezone
from django.db.models import QuerySet from django.db.models import QuerySet
from archivebox.config.legacy import (
SAVE_ALLOWLIST_PTN,
SAVE_DENYLIST_PTN,
)
from ..index.schema import ArchiveResult, Link from ..index.schema import ArchiveResult, Link
from ..index.sql import write_link_to_sql_index from ..index.sql import write_link_to_sql_index
from ..index import ( from ..index import (
@ -82,27 +78,30 @@ ARCHIVE_METHODS_INDEXING_PRECEDENCE = [
@enforce_types @enforce_types
def get_archive_methods_for_link(link: Link) -> Iterable[ArchiveMethodEntry]: def get_archive_methods_for_link(link: Link) -> Iterable[ArchiveMethodEntry]:
from archivebox.config.common import ARCHIVING_CONFIG
DEFAULT_METHODS = get_default_archive_methods() DEFAULT_METHODS = get_default_archive_methods()
allowed_methods = { allowed_methods = {
m for pat, methods in method_name
SAVE_ALLOWLIST_PTN.items() for url_pattern, methods in ARCHIVING_CONFIG.SAVE_ALLOWLIST_PTNS.items()
if pat.search(link.url) for method_name in methods
for m in methods if url_pattern.search(link.url)
} or { m[0] for m in DEFAULT_METHODS } } or { method[0] for method in DEFAULT_METHODS }
denied_methods = { denied_methods = {
m for pat, methods in method_name
SAVE_DENYLIST_PTN.items() for url_pattern, methods in ARCHIVING_CONFIG.SAVE_DENYLIST_PTNS.items()
if pat.search(link.url) for method_name in methods
for m in methods if url_pattern.search(link.url)
} }
allowed_methods -= denied_methods allowed_methods -= denied_methods
return (m for m in DEFAULT_METHODS if m[0] in allowed_methods) return [method for method in DEFAULT_METHODS if method[0] in allowed_methods]
@enforce_types @enforce_types
def ignore_methods(to_ignore: List[str]) -> Iterable[str]: def ignore_methods(to_ignore: List[str]) -> Iterable[str]:
ARCHIVE_METHODS = get_default_archive_methods() ARCHIVE_METHODS = get_default_archive_methods()
return [x[0] for x in ARCHIVE_METHODS if x[0] not in to_ignore] return [method[0] for method in ARCHIVE_METHODS if method[0] not in to_ignore]
@enforce_types @enforce_types
def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[Path]=None, created_by_id: int | None=None) -> Link: def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[Path]=None, created_by_id: int | None=None) -> Link:

View file

@ -7,10 +7,11 @@ from typing import Optional
from archivebox.config import VERSION from archivebox.config import VERSION
from archivebox.config.common import ARCHIVING_CONFIG from archivebox.config.common import ARCHIVING_CONFIG
from archivebox.config.legacy import SAVE_HTMLTOTEXT
from archivebox.misc.system import atomic_write from archivebox.misc.system import atomic_write
from archivebox.misc.util import enforce_types, is_static_file from archivebox.misc.util import enforce_types, is_static_file
from archivebox.plugins_extractor.htmltotext.config import HTMLTOTEXT_CONFIG
from ..logging_util import TimedProgress from ..logging_util import TimedProgress
from ..index.schema import Link, ArchiveResult, ArchiveError from ..index.schema import Link, ArchiveResult, ArchiveError
from .title import get_html from .title import get_html
@ -114,7 +115,7 @@ def should_save_htmltotext(link: Link, out_dir: Optional[Path]=None, overwrite:
if not overwrite and (out_dir / get_output_path()).exists(): if not overwrite and (out_dir / get_output_path()).exists():
return False return False
return SAVE_HTMLTOTEXT return HTMLTOTEXT_CONFIG.SAVE_HTMLTOTEXT
@enforce_types @enforce_types

View file

@ -17,7 +17,6 @@ from archivebox.misc.util import scheme, enforce_types, ExtendedEncoder
from archivebox.config import DATA_DIR, CONSTANTS from archivebox.config import DATA_DIR, CONSTANTS
from archivebox.config.common import ARCHIVING_CONFIG, STORAGE_CONFIG, SEARCH_BACKEND_CONFIG from archivebox.config.common import ARCHIVING_CONFIG, STORAGE_CONFIG, SEARCH_BACKEND_CONFIG
from archivebox.config.legacy import URL_DENYLIST_PTN, URL_ALLOWLIST_PTN
from ..logging_util import ( from ..logging_util import (
TimedProgress, TimedProgress,
@ -126,6 +125,7 @@ def validate_links(links: Iterable[Link]) -> List[Link]:
@enforce_types @enforce_types
def archivable_links(links: Iterable[Link]) -> Iterable[Link]: def archivable_links(links: Iterable[Link]) -> Iterable[Link]:
"""remove chrome://, about:// or other schemed links that cant be archived""" """remove chrome://, about:// or other schemed links that cant be archived"""
for link in links: for link in links:
try: try:
urlparse(link.url) urlparse(link.url)
@ -133,9 +133,9 @@ def archivable_links(links: Iterable[Link]) -> Iterable[Link]:
continue continue
if scheme(link.url) not in ('http', 'https', 'ftp'): if scheme(link.url) not in ('http', 'https', 'ftp'):
continue continue
if URL_DENYLIST_PTN and URL_DENYLIST_PTN.search(link.url): if ARCHIVING_CONFIG.URL_DENYLIST_PTN and ARCHIVING_CONFIG.URL_DENYLIST_PTN.search(link.url):
continue continue
if URL_ALLOWLIST_PTN and (not URL_ALLOWLIST_PTN.search(link.url)): if ARCHIVING_CONFIG.URL_ALLOWLIST_PTN and (not ARCHIVING_CONFIG.URL_ALLOWLIST_PTN.search(link.url)):
continue continue
yield link yield link

View file

@ -396,8 +396,16 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
Path(CONSTANTS.SOURCES_DIR).mkdir(exist_ok=True) Path(CONSTANTS.SOURCES_DIR).mkdir(exist_ok=True)
Path(CONSTANTS.ARCHIVE_DIR).mkdir(exist_ok=True) Path(CONSTANTS.ARCHIVE_DIR).mkdir(exist_ok=True)
Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True) Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
print(f' + ./{CONSTANTS.CONFIG_FILE.relative_to(DATA_DIR)}...') print(f' + ./{CONSTANTS.CONFIG_FILE.relative_to(DATA_DIR)}...')
write_config_file({}, out_dir=str(out_dir))
# create the .archivebox_id file with a unique ID for this collection
from archivebox.config.paths import _get_collection_id
_get_collection_id(CONSTANTS.DATA_DIR, force_create=True)
# create the ArchiveBox.conf file
write_config_file({'SECRET_KEY': SERVER_CONFIG.SECRET_KEY})
if os.access(CONSTANTS.DATABASE_FILE, os.F_OK): if os.access(CONSTANTS.DATABASE_FILE, os.F_OK):
print('\n[green][*] Verifying main SQL index and running any migrations needed...[/green]') print('\n[green][*] Verifying main SQL index and running any migrations needed...[/green]')
@ -1164,10 +1172,13 @@ def config(config_options_str: Optional[str]=None,
config_options: Optional[List[str]]=None, config_options: Optional[List[str]]=None,
get: bool=False, get: bool=False,
set: bool=False, set: bool=False,
search: bool=False,
reset: bool=False, reset: bool=False,
out_dir: Path=DATA_DIR) -> None: out_dir: Path=DATA_DIR) -> None:
"""Get and set your ArchiveBox project configuration values""" """Get and set your ArchiveBox project configuration values"""
import abx.archivebox.reads
from rich import print from rich import print
check_data_folder() check_data_folder()
@ -1188,7 +1199,27 @@ def config(config_options_str: Optional[str]=None,
no_args = not (get or set or reset or config_options) no_args = not (get or set or reset or config_options)
matching_config = {} matching_config = {}
if get or no_args: if search:
if config_options:
config_options = [get_real_name(key) for key in config_options]
matching_config = {key: settings.FLAT_CONFIG[key] for key in config_options if key in settings.FLAT_CONFIG}
for config_section in settings.CONFIGS.values():
aliases = config_section.aliases
for search_key in config_options:
# search all aliases in the section
for alias_key, key in aliases.items():
if search_key.lower() in alias_key.lower():
matching_config[key] = config_section.model_dump()[key]
# search all keys and values in the section
for existing_key, value in config_section.model_dump().items():
if search_key.lower() in existing_key.lower() or search_key.lower() in str(value).lower():
matching_config[existing_key] = value
print(printable_config(matching_config))
raise SystemExit(not matching_config)
elif get or no_args:
if config_options: if config_options:
config_options = [get_real_name(key) for key in config_options] config_options = [get_real_name(key) for key in config_options]
matching_config = {key: settings.FLAT_CONFIG[key] for key in config_options if key in settings.FLAT_CONFIG} matching_config = {key: settings.FLAT_CONFIG[key] for key in config_options if key in settings.FLAT_CONFIG}
@ -1227,14 +1258,15 @@ def config(config_options_str: Optional[str]=None,
if new_config: if new_config:
before = settings.FLAT_CONFIG before = settings.FLAT_CONFIG
matching_config = write_config_file(new_config, out_dir=DATA_DIR) matching_config = write_config_file(new_config)
after = load_all_config() after = {**load_all_config(), **abx.archivebox.reads.get_FLAT_CONFIG()}
print(printable_config(matching_config)) print(printable_config(matching_config))
side_effect_changes = {} side_effect_changes = {}
for key, val in after.items(): for key, val in after.items():
if key in settings.FLAT_CONFIG and (before[key] != after[key]) and (key not in matching_config): if key in settings.FLAT_CONFIG and (str(before[key]) != str(after[key])) and (key not in matching_config):
side_effect_changes[key] = after[key] side_effect_changes[key] = after[key]
# import ipdb; ipdb.set_trace()
if side_effect_changes: if side_effect_changes:
stderr() stderr()

View file

@ -51,6 +51,7 @@ def check_data_folder() -> None:
# Check data dir permissions, /tmp, and /lib permissions # Check data dir permissions, /tmp, and /lib permissions
check_data_dir_permissions() check_data_dir_permissions()
def check_migrations(): def check_migrations():
from archivebox import DATA_DIR from archivebox import DATA_DIR
from ..index.sql import list_migrations from ..index.sql import list_migrations
@ -66,6 +67,7 @@ def check_migrations():
print(' archivebox init', file=sys.stderr) print(' archivebox init', file=sys.stderr)
raise SystemExit(3) raise SystemExit(3)
def check_io_encoding(): def check_io_encoding():
PYTHON_ENCODING = (sys.__stdout__ or sys.stdout or sys.__stderr__ or sys.stderr).encoding.upper().replace('UTF8', 'UTF-8') PYTHON_ENCODING = (sys.__stdout__ or sys.stdout or sys.__stderr__ or sys.stderr).encoding.upper().replace('UTF8', 'UTF-8')
@ -151,6 +153,8 @@ def check_data_dir_permissions():
# Check /lib dir permissions # Check /lib dir permissions
check_lib_dir(STORAGE_CONFIG.LIB_DIR, throw=False, must_exist=True) check_lib_dir(STORAGE_CONFIG.LIB_DIR, throw=False, must_exist=True)
os.umask(0o777 - int(STORAGE_CONFIG.DIR_OUTPUT_PERMISSIONS, base=8)) # noqa: F821
def check_tmp_dir(tmp_dir=None, throw=False, quiet=False, must_exist=True): def check_tmp_dir(tmp_dir=None, throw=False, quiet=False, must_exist=True):
from archivebox.config.paths import assert_dir_can_contain_unix_sockets, dir_is_writable, get_or_create_working_tmp_dir from archivebox.config.paths import assert_dir_can_contain_unix_sockets, dir_is_writable, get_or_create_working_tmp_dir

View file

@ -11,10 +11,6 @@ from pocket import Pocket
from archivebox.config import CONSTANTS from archivebox.config import CONSTANTS
from archivebox.misc.util import enforce_types from archivebox.misc.util import enforce_types
from archivebox.misc.system import atomic_write from archivebox.misc.system import atomic_write
from archivebox.config.legacy import (
POCKET_CONSUMER_KEY,
POCKET_ACCESS_TOKENS,
)
from ..index.schema import Link from ..index.schema import Link
@ -98,13 +94,15 @@ def should_parse_as_pocket_api(text: str) -> bool:
def parse_pocket_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]: def parse_pocket_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
"""Parse bookmarks from the Pocket API""" """Parse bookmarks from the Pocket API"""
from archivebox.plugins_extractor.pocket.config import POCKET_CONFIG
input_buffer.seek(0) input_buffer.seek(0)
pattern = re.compile(r"^pocket:\/\/(\w+)") pattern = re.compile(r"^pocket:\/\/(\w+)")
for line in input_buffer: for line in input_buffer:
if should_parse_as_pocket_api(line): if should_parse_as_pocket_api(line):
username = pattern.search(line).group(1) username = pattern.search(line).group(1)
api = Pocket(POCKET_CONSUMER_KEY, POCKET_ACCESS_TOKENS[username]) api = Pocket(POCKET_CONFIG.POCKET_CONSUMER_KEY, POCKET_CONFIG.POCKET_ACCESS_TOKENS[username])
api.last_since = None api.last_since = None
for article in get_pocket_articles(api, since=read_since(username)): for article in get_pocket_articles(api, since=read_since(username)):

View file

@ -8,15 +8,12 @@ from datetime import datetime
from typing import IO, Iterable, Optional from typing import IO, Iterable, Optional
from configparser import ConfigParser from configparser import ConfigParser
from archivebox.config import CONSTANTS
from archivebox.misc.util import enforce_types from archivebox.misc.util import enforce_types
from archivebox.misc.system import atomic_write from archivebox.misc.system import atomic_write
from archivebox.config.legacy import READWISE_READER_TOKENS from archivebox.plugins_extractor.readwise.config import READWISE_CONFIG
from ..index.schema import Link from ..index.schema import Link
API_DB_PATH = CONSTANTS.SOURCES_DIR / "readwise_reader_api.db"
class ReadwiseReaderAPI: class ReadwiseReaderAPI:
cursor: Optional[str] cursor: Optional[str]
@ -65,26 +62,26 @@ def link_from_article(article: dict, sources: list):
def write_cursor(username: str, since: str): def write_cursor(username: str, since: str):
if not API_DB_PATH.exists(): if not READWISE_CONFIG.READWISE_DB_PATH.exists():
atomic_write(API_DB_PATH, "") atomic_write(READWISE_CONFIG.READWISE_DB_PATH, "")
since_file = ConfigParser() since_file = ConfigParser()
since_file.optionxform = str since_file.optionxform = str
since_file.read(API_DB_PATH) since_file.read(READWISE_CONFIG.READWISE_DB_PATH)
since_file[username] = {"since": since} since_file[username] = {"since": since}
with open(API_DB_PATH, "w+") as new: with open(READWISE_CONFIG.READWISE_DB_PATH, "w+") as new:
since_file.write(new) since_file.write(new)
def read_cursor(username: str) -> Optional[str]: def read_cursor(username: str) -> Optional[str]:
if not API_DB_PATH.exists(): if not READWISE_CONFIG.READWISE_DB_PATH.exists():
atomic_write(API_DB_PATH, "") atomic_write(READWISE_CONFIG.READWISE_DB_PATH, "")
config_file = ConfigParser() config_file = ConfigParser()
config_file.optionxform = str config_file.optionxform = str
config_file.read(API_DB_PATH) config_file.read(READWISE_CONFIG.READWISE_DB_PATH)
return config_file.get(username, "since", fallback=None) return config_file.get(username, "since", fallback=None)
@ -105,7 +102,7 @@ def parse_readwise_reader_api_export(input_buffer: IO[str], **_kwargs) -> Iterab
for line in input_buffer: for line in input_buffer:
if should_parse_as_readwise_reader_api(line): if should_parse_as_readwise_reader_api(line):
username = pattern.search(line).group(1) username = pattern.search(line).group(1)
api = ReadwiseReaderAPI(READWISE_READER_TOKENS[username], cursor=read_cursor(username)) api = ReadwiseReaderAPI(READWISE_CONFIG.READWISE_READER_TOKENS[username], cursor=read_cursor(username))
for article in get_readwise_reader_articles(api): for article in get_readwise_reader_articles(api):
yield link_from_article(article, sources=[line]) yield link_from_article(article, sources=[line])

View file

@ -1,5 +1,6 @@
__package__ = 'plugins_extractor.chrome' __package__ = 'plugins_extractor.chrome'
__label__ = 'chrome' __id__ = 'chrome'
__label__ = 'Chrome'
__version__ = '2024.10.14' __version__ = '2024.10.14'
__author__ = 'ArchiveBox' __author__ = 'ArchiveBox'
__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/main/archivebox/plugins_extractor/chrome' __homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/main/archivebox/plugins_extractor/chrome'
@ -11,13 +12,14 @@ import abx
@abx.hookimpl @abx.hookimpl
def get_PLUGIN(): def get_PLUGIN():
return { return {
'chrome': { __id__: {
'PACKAGE': __package__, 'id': __id__,
'LABEL': __label__, 'package': __package__,
'VERSION': __version__, 'label': __label__,
'AUTHOR': __author__, 'version': __version__,
'HOMEPAGE': __homepage__, 'author': __author__,
'DEPENDENCIES': __dependencies__, 'homepage': __homepage__,
'dependencies': __dependencies__,
} }
} }
@ -26,7 +28,7 @@ def get_CONFIG():
from .config import CHROME_CONFIG from .config import CHROME_CONFIG
return { return {
'chrome': CHROME_CONFIG __id__: CHROME_CONFIG
} }
@abx.hookimpl @abx.hookimpl
@ -50,22 +52,3 @@ def ready():
# 'screenshot': SCREENSHOT_EXTRACTOR, # 'screenshot': SCREENSHOT_EXTRACTOR,
# 'dom': DOM_EXTRACTOR, # 'dom': DOM_EXTRACTOR,
# } # }
# Hooks Available:
# Events:
# on_crawl_schedule_tick
# on_seed_post_save
# on_crawl_post_save
# on_snapshot_post_save
# on_archiveresult_post_save
# create_root_snapshot_from_seed
# create_archiveresults_pending_from_snapshot
# create_crawl_from_crawlschedule_if_due
# create_crawl_copy_from_template
#
# create_crawl_from_crawlschedule_if_due

View file

@ -0,0 +1,41 @@
__package__ = 'plugins_extractor.htmltotext'
__id__ = 'htmltotext'
__label__ = 'HTML-to-Text'
__version__ = '2024.10.14'
__author__ = 'ArchiveBox'
__homepage__ = 'https://github.com/ArchiveBox/archivebox'
__dependencies__ = []
import abx
@abx.hookimpl
def get_PLUGIN():
return {
__id__: {
'id': __id__,
'package': __package__,
'label': __label__,
'version': __version__,
'author': __author__,
'homepage': __homepage__,
'dependencies': __dependencies__,
}
}
@abx.hookimpl
def get_CONFIG():
from .config import HTMLTOTEXT_CONFIG
return {
__id__: HTMLTOTEXT_CONFIG
}
# @abx.hookimpl
# def get_EXTRACTORS():
# from .extractors import FAVICON_EXTRACTOR
# return {
# 'htmltotext': FAVICON_EXTRACTOR,
# }

View file

@ -0,0 +1,11 @@
__package__ = 'plugins_extractor.htmltotext'
from abx.archivebox.base_configset import BaseConfigSet
class HtmltotextConfig(BaseConfigSet):
SAVE_HTMLTOTEXT: bool = True
HTMLTOTEXT_CONFIG = HtmltotextConfig()

View file

@ -0,0 +1,37 @@
__package__ = 'plugins_extractor.pocket'
__id__ = 'pocket'
__label__ = 'pocket'
__version__ = '2024.10.21'
__author__ = 'ArchiveBox'
__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/pocket'
__dependencies__ = []
import abx
@abx.hookimpl
def get_PLUGIN():
return {
__id__: {
'id': __id__,
'package': __package__,
'label': __label__,
'version': __version__,
'author': __author__,
'homepage': __homepage__,
'dependencies': __dependencies__,
}
}
@abx.hookimpl
def get_CONFIG():
from .config import POCKET_CONFIG
return {
__id__: POCKET_CONFIG
}
@abx.hookimpl
def ready():
from .config import POCKET_CONFIG
POCKET_CONFIG.validate()

View file

@ -0,0 +1,15 @@
__package__ = 'plugins_extractor.pocket'
from typing import Dict
from pydantic import Field
from abx.archivebox.base_configset import BaseConfigSet
class PocketConfig(BaseConfigSet):
POCKET_CONSUMER_KEY: str | None = Field(default=None)
POCKET_ACCESS_TOKENS: Dict[str, str] = Field(default=lambda: {}) # {<username>: <access_token>, ...}
POCKET_CONFIG = PocketConfig()

View file

@ -0,0 +1,37 @@
__package__ = 'plugins_extractor.readwise'
__id__ = 'readwise'
__label__ = 'readwise'
__version__ = '2024.10.21'
__author__ = 'ArchiveBox'
__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/readwise'
__dependencies__ = []
import abx
@abx.hookimpl
def get_PLUGIN():
return {
__id__: {
'id': __id__,
'package': __package__,
'label': __label__,
'version': __version__,
'author': __author__,
'homepage': __homepage__,
'dependencies': __dependencies__,
}
}
@abx.hookimpl
def get_CONFIG():
from .config import READWISE_CONFIG
return {
__id__: READWISE_CONFIG
}
@abx.hookimpl
def ready():
from .config import READWISE_CONFIG
READWISE_CONFIG.validate()

View file

@ -0,0 +1,17 @@
__package__ = 'plugins_extractor.readwise'
from typing import Dict
from pathlib import Path
from pydantic import Field
from abx.archivebox.base_configset import BaseConfigSet
from archivebox.config import CONSTANTS
class ReadwiseConfig(BaseConfigSet):
READWISE_DB_PATH: Path = Field(default=CONSTANTS.SOURCES_DIR / "readwise_reader_api.db")
READWISE_READER_TOKENS: Dict[str, str] = Field(default=lambda: {}) # {<username>: <access_token>, ...}
READWISE_CONFIG = ReadwiseConfig()

View file

@ -14,7 +14,30 @@ class YtdlpConfig(BaseConfigSet):
USE_YTDLP: bool = Field(default=True, validation_alias=AliasChoices('USE_YOUTUBEDL', 'SAVE_MEDIA')) USE_YTDLP: bool = Field(default=True, validation_alias=AliasChoices('USE_YOUTUBEDL', 'SAVE_MEDIA'))
YTDLP_BINARY: str = Field(default='yt-dlp', alias='YOUTUBEDL_BINARY') YTDLP_BINARY: str = Field(default='yt-dlp', alias='YOUTUBEDL_BINARY')
YTDLP_EXTRA_ARGS: List[str] = Field(default=[], alias='YOUTUBEDL_EXTRA_ARGS') YTDLP_EXTRA_ARGS: List[str] = Field(default=lambda: [
'--restrict-filenames',
'--trim-filenames', '128',
'--write-description',
'--write-info-json',
'--write-annotations',
'--write-thumbnail',
'--no-call-home',
'--write-sub',
'--write-auto-subs',
'--convert-subs=srt',
'--yes-playlist',
'--continue',
# This flag doesn't exist in youtube-dl
# only in yt-dlp
'--no-abort-on-error',
# --ignore-errors must come AFTER
# --no-abort-on-error
# https://github.com/yt-dlp/yt-dlp/issues/4914
'--ignore-errors',
'--geo-bypass',
'--add-metadata',
'--format=(bv*+ba/b)[filesize<={}][filesize_approx<=?{}]/(bv*+ba/b)'.format(ARCHIVING_CONFIG.MEDIA_MAX_SIZE, ARCHIVING_CONFIG.MEDIA_MAX_SIZE),
], alias='YOUTUBEDL_EXTRA_ARGS')
YTDLP_CHECK_SSL_VALIDITY: bool = Field(default=lambda: ARCHIVING_CONFIG.CHECK_SSL_VALIDITY) YTDLP_CHECK_SSL_VALIDITY: bool = Field(default=lambda: ARCHIVING_CONFIG.CHECK_SSL_VALIDITY)
YTDLP_TIMEOUT: int = Field(default=lambda: ARCHIVING_CONFIG.MEDIA_TIMEOUT) YTDLP_TIMEOUT: int = Field(default=lambda: ARCHIVING_CONFIG.MEDIA_TIMEOUT)