From 5b6cf68d988fcf9be7b6fad5f7181edbd49046d6 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Wed, 25 Sep 2024 00:41:24 -0700 Subject: [PATCH] move system startup checks to pip and plugins_sys config model validation --- archivebox/misc/checks.py | 89 --------------------------- archivebox/plugins_pkg/pip/apps.py | 23 +++++++ archivebox/plugins_sys/config/apps.py | 46 +++++++++++++- 3 files changed, 68 insertions(+), 90 deletions(-) diff --git a/archivebox/misc/checks.py b/archivebox/misc/checks.py index e0b7016a..3d0e4493 100644 --- a/archivebox/misc/checks.py +++ b/archivebox/misc/checks.py @@ -2,73 +2,12 @@ __package__ = 'archivebox.misc' # TODO: migrate all of these to new plugantic/base_check.py Check system -import sys from benedict import benedict from pathlib import Path from .logging import stderr, hint -def check_system_config(config: benedict) -> None: - ### Check system environment - if config['USER'] == 'root' or str(config['PUID']) == "0": - stderr('[!] ArchiveBox should never be run as root!', color='red') - stderr(' For more information, see the security overview documentation:') - stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root') - - if config['IN_DOCKER']: - attempted_command = ' '.join(sys.argv[:3]) - stderr('') - stderr(' {lightred}Hint{reset}: When using Docker, you must run commands with {green}docker run{reset} instead of {lightyellow}docker exec{reset}, e.g.:'.format(**config['ANSI'])) - stderr(f' docker compose run archivebox {attempted_command}') - stderr(f' docker run -it -v $PWD/data:/data archivebox/archivebox {attempted_command}') - stderr(' or:') - stderr(f' docker compose exec --user=archivebox archivebox /bin/bash -c "archivebox {attempted_command}"') - stderr(f' docker exec -it --user=archivebox /bin/bash -c "archivebox {attempted_command}"') - - raise SystemExit(2) - - ### Check Python environment - if sys.version_info[:3] < (3, 7, 0): - stderr(f'[X] Python version is not new enough: {config["PYTHON_VERSION"]} (>3.6 is required)', color='red') - stderr(' See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.') - raise SystemExit(2) - - if int(config['DJANGO_VERSION'].split('.')[0]) < 3: - stderr(f'[X] Django version is not new enough: {config["DJANGO_VERSION"]} (>3.0 is required)', color='red') - stderr(' Upgrade django using pip or your system package manager: pip3 install --upgrade django') - raise SystemExit(2) - - if config['PYTHON_ENCODING'] not in ('UTF-8', 'UTF8'): - stderr(f'[X] Your system is running python3 scripts with a bad locale setting: {config["PYTHON_ENCODING"]} (it should be UTF-8).', color='red') - stderr(' To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)') - stderr(' Or if you\'re using ubuntu/debian, run "dpkg-reconfigure locales"') - stderr('') - stderr(' Confirm that it\'s fixed by opening a new shell and running:') - stderr(' python3 -c "import sys; print(sys.stdout.encoding)" # should output UTF-8') - raise SystemExit(2) - - # stderr('[i] Using Chrome binary: {}'.format(shutil.which(CHROME_BINARY) or CHROME_BINARY)) - # stderr('[i] Using Chrome data dir: {}'.format(os.path.abspath(CHROME_USER_DATA_DIR))) - if config['CHROME_USER_DATA_DIR'] is not None and Path(config['CHROME_USER_DATA_DIR']).exists(): - if not (Path(config['CHROME_USER_DATA_DIR']) / 'Default').exists(): - stderr('[X] Could not find profile "Default" in CHROME_USER_DATA_DIR.', color='red') - stderr(f' {config["CHROME_USER_DATA_DIR"]}') - stderr(' Make sure you set it to a Chrome user data directory containing a Default profile folder.') - stderr(' For more info see:') - stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#CHROME_USER_DATA_DIR') - if '/Default' in str(config['CHROME_USER_DATA_DIR']): - stderr() - stderr(' Try removing /Default from the end e.g.:') - stderr(' CHROME_USER_DATA_DIR="{}"'.format(str(config['CHROME_USER_DATA_DIR']).split('/Default')[0])) - - # hard error is too annoying here, instead just set it to nothing - # raise SystemExit(2) - config['CHROME_USER_DATA_DIR'] = None - else: - config['CHROME_USER_DATA_DIR'] = None - - def check_dependencies(config: benedict, show_help: bool=True) -> None: invalid_dependencies = [ (name, info) for name, info in config['DEPENDENCIES'].items() @@ -90,34 +29,6 @@ def check_dependencies(config: benedict, show_help: bool=True) -> None: ''), prefix=' ') stderr('') - if config['TIMEOUT'] < 5: - stderr(f'[!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={config["TIMEOUT"]} seconds)', color='red') - stderr(' You must allow *at least* 5 seconds for indexing and archive methods to run succesfully.') - stderr(' (Setting it to somewhere between 30 and 3000 seconds is recommended)') - stderr() - stderr(' If you want to make ArchiveBox run faster, disable specific archive methods instead:') - stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles') - stderr() - - elif config['USE_CHROME'] and config['TIMEOUT'] < 15: - stderr(f'[!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={config["TIMEOUT"]} seconds)', color='red') - stderr(' Chrome will fail to archive all sites if set to less than ~15 seconds.') - stderr(' (Setting it to somewhere between 30 and 300 seconds is recommended)') - stderr() - stderr(' If you want to make ArchiveBox run faster, disable specific archive methods instead:') - stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles') - stderr() - - if config['USE_YOUTUBEDL'] and config['MEDIA_TIMEOUT'] < 20: - stderr(f'[!] Warning: MEDIA_TIMEOUT is set too low! (currently set to MEDIA_TIMEOUT={config["MEDIA_TIMEOUT"]} seconds)', color='red') - stderr(' youtube-dl/yt-dlp will fail to archive any media if set to less than ~20 seconds.') - stderr(' (Setting it somewhere over 60 seconds is recommended)') - stderr() - stderr(' If you want to disable media archiving entirely, set SAVE_MEDIA=False instead:') - stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_media') - stderr() - - def check_data_folder(config: benedict) -> None: diff --git a/archivebox/plugins_pkg/pip/apps.py b/archivebox/plugins_pkg/pip/apps.py index 185faaea..0a96db90 100644 --- a/archivebox/plugins_pkg/pip/apps.py +++ b/archivebox/plugins_pkg/pip/apps.py @@ -19,6 +19,7 @@ from plugantic.base_check import BaseCheck from plugantic.base_binary import BaseBinary, BaseBinProvider, env, apt, brew from plugantic.base_hook import BaseHook +PYTHON_ENCODING = sys.__stdout__.encoding.upper() ###################### Config ########################## @@ -184,6 +185,28 @@ class CheckPipEnvironment(BaseCheck): ) # logger.debug("[√] CheckPipEnvironment: data/lib/pip virtualenv is setup properly") return errors + + # check python version + if sys.version_info[:3] < (3, 10, 0): + print('[red][X] Python version is not new enough: {sys.version} (>3.10 is required)[/red]', file=sys.stderr) + print(' See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.', file=sys.stderr) + raise SystemExit(2) + + # check django version + if int(django.VERSION[0]) < 5: + print('[red][X] Django version is not new enough: {django.VERSION[:3]} (>=5.0 is required)[/red]', file=sys.stderr) + print(' Upgrade django using pip or your system package manager: pip3 install --upgrade django', file=sys.stderr) + raise SystemExit(2) + + # check python locale + if PYTHON_ENCODING not in ('UTF-8', 'UTF8'): + print(f'[red][X] Your system is running python3 scripts with a bad locale setting: {PYTHON_ENCODING} (it should be UTF-8).[/red]', file=sys.stderr) + print(' To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)', file=sys.stderr) + print(' Or if you\'re using ubuntu/debian, run "dpkg-reconfigure locales"', file=sys.stderr) + print('') + print(' Confirm that it\'s fixed by opening a new shell and running:', file=sys.stderr) + print(' python3 -c "import sys; print(sys.stdout.encoding)" # should output UTF-8', file=sys.stderr) + raise SystemExit(2) USER_IS_NOT_ROOT_CHECK = CheckUserIsNotRoot() diff --git a/archivebox/plugins_sys/config/apps.py b/archivebox/plugins_sys/config/apps.py index ecd905f7..61e6ea8b 100644 --- a/archivebox/plugins_sys/config/apps.py +++ b/archivebox/plugins_sys/config/apps.py @@ -4,7 +4,8 @@ import platform from typing import List, ClassVar from pathlib import Path -from pydantic import InstanceOf, Field +from pydantic import InstanceOf, Field, field_validator, model_validator +from rich import print from django.conf import settings @@ -30,6 +31,28 @@ class ShellConfig(BaseConfigSet): PUID: int = Field(default=os.getuid()) PGID: int = Field(default=os.getgid()) + + @model_validator(mode='after') + def validate_not_running_as_root(self): + attempted_command = ' '.join(sys.argv[:3]) + if self.PUID == 0 and attempted_command != 'setup': + # stderr('[!] ArchiveBox should never be run as root!', color='red') + # stderr(' For more information, see the security overview documentation:') + # stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root') + print('[red][!] ArchiveBox should never be run as root![/red]', file=sys.stderr) + print(' For more information, see the security overview documentation:', file=sys.stderr) + print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root', file=sys.stderr) + + if self.IN_DOCKER: + print('[red][!] When using Docker, you must run commands with [green]docker run[/green] instead of [yellow3]docker exec[/yellow3], e.g.:', file=sys.stderr) + print(' docker compose run archivebox {attempted_command}', file=sys.stderr) + print(f' docker run -it -v $PWD/data:/data archivebox/archivebox {attempted_command}', file=sys.stderr) + print(' or:', file=sys.stderr) + print(f' docker compose exec --user=archivebox archivebox /bin/bash -c "archivebox {attempted_command}"', file=sys.stderr) + print(f' docker exec -it --user=archivebox /bin/bash -c "archivebox {attempted_command}"', file=sys.stderr) + raise SystemExit(2) + + return self SHELL_CONFIG = ShellConfig() @@ -105,6 +128,27 @@ class ArchivingConfig(BaseConfigSet): # CHROME_HEADLESS: bool = Field(default=True) # CHROME_SANDBOX: bool = Field(default=lambda: not SHELL_CONFIG.IN_DOCKER) + @field_validator('TIMEOUT', mode='after') + def validate_timeout(cls, v): + print(f'[red][!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={v} seconds)[/red]', file=sys.stderr) + print(' You must allow *at least* 5 seconds for indexing and archive methods to run succesfully.', file=sys.stderr) + print(' (Setting it to somewhere between 30 and 3000 seconds is recommended)', file=sys.stderr) + print(file=sys.stderr) + print(' If you want to make ArchiveBox run faster, disable specific archive methods instead:', file=sys.stderr) + print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles', file=sys.stderr) + print(file=sys.stderr) + return v + + @field_validator('CHECK_SSL_VALIDITY', mode='after') + def validate_check_ssl_validity(cls, v): + """SIDE EFFECT: disable "you really shouldnt disable ssl" warnings emitted by requests""" + if not v: + import requests + import urllib3 + requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning) + urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + return v + ARCHIVING_CONFIG = ArchivingConfig()