__package__ = 'archivebox.plugins_sys.config' import os import sys import shutil import platform import archivebox from typing import List, ClassVar, Dict, Optional from datetime import datetime from pathlib import Path from pydantic import InstanceOf, Field, field_validator, model_validator, computed_field from benedict import benedict from rich import print from django.conf import settings from django.utils.crypto import get_random_string from plugantic.base_plugin import BasePlugin from plugantic.base_configset import BaseConfigSet, ConfigSectionName from plugantic.base_hook import BaseHook from .constants import CONSTANTS, CONSTANTS_CONFIG ###################### Config ########################## class ShellConfig(BaseConfigSet): section: ClassVar[ConfigSectionName] = 'SHELL_CONFIG' DEBUG: bool = Field(default=False) IS_TTY: bool = Field(default=sys.stdout.isatty()) USE_COLOR: bool = Field(default=lambda c: c.IS_TTY) SHOW_PROGRESS: bool = Field(default=lambda c: c.IS_TTY) IN_DOCKER: bool = Field(default=False) IN_QEMU: bool = Field(default=False) USER: str = Field(default=Path('~').expanduser().resolve().name) PUID: int = Field(default=os.getuid()) PGID: int = Field(default=os.getgid()) PYTHON_ENCODING: str = Field(default=(sys.__stdout__ or sys.stdout or sys.__stderr__ or sys.stderr).encoding.upper().replace('UTF8', 'UTF-8')) ANSI: Dict[str, str] = Field(default=lambda c: CONSTANTS.DEFAULT_CLI_COLORS if c.USE_COLOR else CONSTANTS.DISABLED_CLI_COLORS) VERSIONS_AVAILABLE: bool = False # .check_for_update.get_versions_available_on_github(c)}, CAN_UPGRADE: bool = False # .check_for_update.can_upgrade(c)}, @computed_field @property def TERM_WIDTH(self) -> int: return shutil.get_terminal_size((100, 10)).columns @computed_field @property def COMMIT_HASH(self) -> Optional[str]: try: git_dir = archivebox.PACKAGE_DIR / '../.git' ref = (git_dir / 'HEAD').read_text().strip().split(' ')[-1] commit_hash = git_dir.joinpath(ref).read_text().strip() return commit_hash except Exception: pass try: return list((archivebox.PACKAGE_DIR / '../.git/refs/heads/').glob('*'))[0].read_text().strip() except Exception: pass return None @computed_field @property def BUILD_TIME(self) -> str: if self.IN_DOCKER: docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0] return docker_build_end_time src_last_modified_unix_timestamp = (archivebox.PACKAGE_DIR / 'config.py').stat().st_mtime return datetime.fromtimestamp(src_last_modified_unix_timestamp).strftime('%Y-%m-%d %H:%M:%S %s') @model_validator(mode='after') def validate_not_running_as_root(self): attempted_command = ' '.join(sys.argv[:3]) if self.PUID == 0 and attempted_command != 'setup': # stderr('[!] ArchiveBox should never be run as root!', color='red') # stderr(' For more information, see the security overview documentation:') # stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root') print('[red][!] ArchiveBox should never be run as root![/red]', file=sys.stderr) print(' For more information, see the security overview documentation:', file=sys.stderr) print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root', file=sys.stderr) if self.IN_DOCKER: print('[red][!] When using Docker, you must run commands with [green]docker run[/green] instead of [yellow3]docker exec[/yellow3], e.g.:', file=sys.stderr) print(' docker compose run archivebox {attempted_command}', file=sys.stderr) print(f' docker run -it -v $PWD/data:/data archivebox/archivebox {attempted_command}', file=sys.stderr) print(' or:', file=sys.stderr) print(f' docker compose exec --user=archivebox archivebox /bin/bash -c "archivebox {attempted_command}"', file=sys.stderr) print(f' docker exec -it --user=archivebox /bin/bash -c "archivebox {attempted_command}"', file=sys.stderr) raise SystemExit(2) # check python locale if self.PYTHON_ENCODING != 'UTF-8': print(f'[red][X] Your system is running python3 scripts with a bad locale setting: {self.PYTHON_ENCODING} (it should be UTF-8).[/red]', file=sys.stderr) print(' To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)', file=sys.stderr) print(' Or if you\'re using ubuntu/debian, run "dpkg-reconfigure locales"', file=sys.stderr) print('') print(' Confirm that it\'s fixed by opening a new shell and running:', file=sys.stderr) print(' python3 -c "import sys; print(sys.stdout.encoding)" # should output UTF-8', file=sys.stderr) raise SystemExit(2) return self SHELL_CONFIG = ShellConfig() class StorageConfig(BaseConfigSet): section: ClassVar[ConfigSectionName] = 'STORAGE_CONFIG' OUTPUT_PERMISSIONS: str = Field(default='644') RESTRICT_FILE_NAMES: str = Field(default='windows') ENFORCE_ATOMIC_WRITES: bool = Field(default=True) STORAGE_CONFIG = StorageConfig() class GeneralConfig(BaseConfigSet): section: ClassVar[ConfigSectionName] = 'GENERAL_CONFIG' TAG_SEPARATOR_PATTERN: str = Field(default=r'[,]') GENERAL_CONFIG = GeneralConfig() class ServerConfig(BaseConfigSet): section: ClassVar[ConfigSectionName] = 'SERVER_CONFIG' SECRET_KEY: str = Field(default=lambda: get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789_')) BIND_ADDR: str = Field(default=lambda: ['127.0.0.1:8000', '0.0.0.0:8000'][SHELL_CONFIG.IN_DOCKER]) ALLOWED_HOSTS: str = Field(default='*') CSRF_TRUSTED_ORIGINS: str = Field(default=lambda c: 'http://localhost:8000,http://127.0.0.1:8000,http://0.0.0.0:8000,http://{}'.format(c.BIND_ADDR)) SNAPSHOTS_PER_PAGE: int = Field(default=40) FOOTER_INFO: str = Field(default='Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.') CUSTOM_TEMPLATES_DIR: Path = Field(default=None) PUBLIC_INDEX: bool = Field(default=True) PUBLIC_SNAPSHOTS: bool = Field(default=True) PUBLIC_ADD_VIEW: bool = Field(default=False) ADMIN_USERNAME: str = Field(default=None) ADMIN_PASSWORD: str = Field(default=None) REVERSE_PROXY_USER_HEADER: str = Field(default='Remote-User') REVERSE_PROXY_WHITELIST: str = Field(default='') LOGOUT_REDIRECT_URL: str = Field(default='/') PREVIEW_ORIGINALS: bool = Field(default=True) SERVER_CONFIG = ServerConfig() class ArchivingConfig(BaseConfigSet): section: ClassVar[ConfigSectionName] = 'ARCHIVING_CONFIG' ONLY_NEW: bool = Field(default=True) TIMEOUT: int = Field(default=60) MEDIA_TIMEOUT: int = Field(default=3600) MEDIA_MAX_SIZE: str = Field(default='750m') RESOLUTION: str = Field(default='1440,2000') CHECK_SSL_VALIDITY: bool = Field(default=True) USER_AGENT: str = Field(default='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)') COOKIES_FILE: Path | None = Field(default=None) URL_DENYLIST: str = Field(default=r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$', alias='URL_BLACKLIST') URL_ALLOWLIST: str | None = Field(default=None, alias='URL_WHITELIST') # GIT_DOMAINS: str = Field(default='github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht') # WGET_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT'] + ' wget/{WGET_VERSION}') # CURL_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT'] + ' curl/{CURL_VERSION}') # CHROME_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT']) # CHROME_USER_DATA_DIR: str | None = Field(default=None) # CHROME_TIMEOUT: int = Field(default=0) # CHROME_HEADLESS: bool = Field(default=True) # CHROME_SANDBOX: bool = Field(default=lambda: not SHELL_CONFIG.IN_DOCKER) @field_validator('TIMEOUT', mode='after') def validate_timeout(cls, v): print(f'[red][!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={v} seconds)[/red]', file=sys.stderr) print(' You must allow *at least* 5 seconds for indexing and archive methods to run succesfully.', file=sys.stderr) print(' (Setting it to somewhere between 30 and 3000 seconds is recommended)', file=sys.stderr) print(file=sys.stderr) print(' If you want to make ArchiveBox run faster, disable specific archive methods instead:', file=sys.stderr) print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles', file=sys.stderr) print(file=sys.stderr) return v @field_validator('CHECK_SSL_VALIDITY', mode='after') def validate_check_ssl_validity(cls, v): """SIDE EFFECT: disable "you really shouldnt disable ssl" warnings emitted by requests""" if not v: import requests import urllib3 requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) return v ARCHIVING_CONFIG = ArchivingConfig() class SearchBackendConfig(BaseConfigSet): section: ClassVar[ConfigSectionName] = 'SEARCH_BACKEND_CONFIG' USE_INDEXING_BACKEND: bool = Field(default=True) USE_SEARCHING_BACKEND: bool = Field(default=True) SEARCH_BACKEND_ENGINE: str = Field(default='ripgrep') SEARCH_PROCESS_HTML: bool = Field(default=True) SEARCH_BACKEND_TIMEOUT: int = Field(default=10) SEARCH_BACKEND_CONFIG = SearchBackendConfig() class ConfigPlugin(BasePlugin): app_label: str = 'CONFIG' verbose_name: str = 'Configuration' hooks: List[InstanceOf[BaseHook]] = [ SHELL_CONFIG, GENERAL_CONFIG, STORAGE_CONFIG, SERVER_CONFIG, ARCHIVING_CONFIG, SEARCH_BACKEND_CONFIG, ] # def register(self, settings, parent_plugin=None): # try: # super().register(settings, parent_plugin=parent_plugin) # except Exception as e: # print(f'[red][X] Error registering config plugin: {e}[/red]', file=sys.stderr) PLUGIN = ConfigPlugin() PLUGIN.register(settings) DJANGO_APP = PLUGIN.AppConfig