move system startup checks to pip and plugins_sys config model validation

This commit is contained in:
Nick Sweeting 2024-09-25 00:41:24 -07:00
parent 2fd837f254
commit 5b6cf68d98
No known key found for this signature in database
3 changed files with 68 additions and 90 deletions

View file

@ -2,73 +2,12 @@ __package__ = 'archivebox.misc'
# TODO: migrate all of these to new plugantic/base_check.py Check system
import sys
from benedict import benedict
from pathlib import Path
from .logging import stderr, hint
def check_system_config(config: benedict) -> None:
### Check system environment
if config['USER'] == 'root' or str(config['PUID']) == "0":
stderr('[!] ArchiveBox should never be run as root!', color='red')
stderr(' For more information, see the security overview documentation:')
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root')
if config['IN_DOCKER']:
attempted_command = ' '.join(sys.argv[:3])
stderr('')
stderr(' {lightred}Hint{reset}: When using Docker, you must run commands with {green}docker run{reset} instead of {lightyellow}docker exec{reset}, e.g.:'.format(**config['ANSI']))
stderr(f' docker compose run archivebox {attempted_command}')
stderr(f' docker run -it -v $PWD/data:/data archivebox/archivebox {attempted_command}')
stderr(' or:')
stderr(f' docker compose exec --user=archivebox archivebox /bin/bash -c "archivebox {attempted_command}"')
stderr(f' docker exec -it --user=archivebox <container id> /bin/bash -c "archivebox {attempted_command}"')
raise SystemExit(2)
### Check Python environment
if sys.version_info[:3] < (3, 7, 0):
stderr(f'[X] Python version is not new enough: {config["PYTHON_VERSION"]} (>3.6 is required)', color='red')
stderr(' See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
raise SystemExit(2)
if int(config['DJANGO_VERSION'].split('.')[0]) < 3:
stderr(f'[X] Django version is not new enough: {config["DJANGO_VERSION"]} (>3.0 is required)', color='red')
stderr(' Upgrade django using pip or your system package manager: pip3 install --upgrade django')
raise SystemExit(2)
if config['PYTHON_ENCODING'] not in ('UTF-8', 'UTF8'):
stderr(f'[X] Your system is running python3 scripts with a bad locale setting: {config["PYTHON_ENCODING"]} (it should be UTF-8).', color='red')
stderr(' To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)')
stderr(' Or if you\'re using ubuntu/debian, run "dpkg-reconfigure locales"')
stderr('')
stderr(' Confirm that it\'s fixed by opening a new shell and running:')
stderr(' python3 -c "import sys; print(sys.stdout.encoding)" # should output UTF-8')
raise SystemExit(2)
# stderr('[i] Using Chrome binary: {}'.format(shutil.which(CHROME_BINARY) or CHROME_BINARY))
# stderr('[i] Using Chrome data dir: {}'.format(os.path.abspath(CHROME_USER_DATA_DIR)))
if config['CHROME_USER_DATA_DIR'] is not None and Path(config['CHROME_USER_DATA_DIR']).exists():
if not (Path(config['CHROME_USER_DATA_DIR']) / 'Default').exists():
stderr('[X] Could not find profile "Default" in CHROME_USER_DATA_DIR.', color='red')
stderr(f' {config["CHROME_USER_DATA_DIR"]}')
stderr(' Make sure you set it to a Chrome user data directory containing a Default profile folder.')
stderr(' For more info see:')
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#CHROME_USER_DATA_DIR')
if '/Default' in str(config['CHROME_USER_DATA_DIR']):
stderr()
stderr(' Try removing /Default from the end e.g.:')
stderr(' CHROME_USER_DATA_DIR="{}"'.format(str(config['CHROME_USER_DATA_DIR']).split('/Default')[0]))
# hard error is too annoying here, instead just set it to nothing
# raise SystemExit(2)
config['CHROME_USER_DATA_DIR'] = None
else:
config['CHROME_USER_DATA_DIR'] = None
def check_dependencies(config: benedict, show_help: bool=True) -> None:
invalid_dependencies = [
(name, info) for name, info in config['DEPENDENCIES'].items()
@ -90,34 +29,6 @@ def check_dependencies(config: benedict, show_help: bool=True) -> None:
''), prefix=' ')
stderr('')
if config['TIMEOUT'] < 5:
stderr(f'[!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={config["TIMEOUT"]} seconds)', color='red')
stderr(' You must allow *at least* 5 seconds for indexing and archive methods to run succesfully.')
stderr(' (Setting it to somewhere between 30 and 3000 seconds is recommended)')
stderr()
stderr(' If you want to make ArchiveBox run faster, disable specific archive methods instead:')
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles')
stderr()
elif config['USE_CHROME'] and config['TIMEOUT'] < 15:
stderr(f'[!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={config["TIMEOUT"]} seconds)', color='red')
stderr(' Chrome will fail to archive all sites if set to less than ~15 seconds.')
stderr(' (Setting it to somewhere between 30 and 300 seconds is recommended)')
stderr()
stderr(' If you want to make ArchiveBox run faster, disable specific archive methods instead:')
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles')
stderr()
if config['USE_YOUTUBEDL'] and config['MEDIA_TIMEOUT'] < 20:
stderr(f'[!] Warning: MEDIA_TIMEOUT is set too low! (currently set to MEDIA_TIMEOUT={config["MEDIA_TIMEOUT"]} seconds)', color='red')
stderr(' youtube-dl/yt-dlp will fail to archive any media if set to less than ~20 seconds.')
stderr(' (Setting it somewhere over 60 seconds is recommended)')
stderr()
stderr(' If you want to disable media archiving entirely, set SAVE_MEDIA=False instead:')
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_media')
stderr()
def check_data_folder(config: benedict) -> None:

View file

@ -19,6 +19,7 @@ from plugantic.base_check import BaseCheck
from plugantic.base_binary import BaseBinary, BaseBinProvider, env, apt, brew
from plugantic.base_hook import BaseHook
PYTHON_ENCODING = sys.__stdout__.encoding.upper()
###################### Config ##########################
@ -185,6 +186,28 @@ class CheckPipEnvironment(BaseCheck):
# logger.debug("[√] CheckPipEnvironment: data/lib/pip virtualenv is setup properly")
return errors
# check python version
if sys.version_info[:3] < (3, 10, 0):
print('[red][X] Python version is not new enough: {sys.version} (>3.10 is required)[/red]', file=sys.stderr)
print(' See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.', file=sys.stderr)
raise SystemExit(2)
# check django version
if int(django.VERSION[0]) < 5:
print('[red][X] Django version is not new enough: {django.VERSION[:3]} (>=5.0 is required)[/red]', file=sys.stderr)
print(' Upgrade django using pip or your system package manager: pip3 install --upgrade django', file=sys.stderr)
raise SystemExit(2)
# check python locale
if PYTHON_ENCODING not in ('UTF-8', 'UTF8'):
print(f'[red][X] Your system is running python3 scripts with a bad locale setting: {PYTHON_ENCODING} (it should be UTF-8).[/red]', file=sys.stderr)
print(' To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)', file=sys.stderr)
print(' Or if you\'re using ubuntu/debian, run "dpkg-reconfigure locales"', file=sys.stderr)
print('')
print(' Confirm that it\'s fixed by opening a new shell and running:', file=sys.stderr)
print(' python3 -c "import sys; print(sys.stdout.encoding)" # should output UTF-8', file=sys.stderr)
raise SystemExit(2)
USER_IS_NOT_ROOT_CHECK = CheckUserIsNotRoot()
PIP_ENVIRONMENT_CHECK = CheckPipEnvironment()

View file

@ -4,7 +4,8 @@ import platform
from typing import List, ClassVar
from pathlib import Path
from pydantic import InstanceOf, Field
from pydantic import InstanceOf, Field, field_validator, model_validator
from rich import print
from django.conf import settings
@ -31,6 +32,28 @@ class ShellConfig(BaseConfigSet):
PUID: int = Field(default=os.getuid())
PGID: int = Field(default=os.getgid())
@model_validator(mode='after')
def validate_not_running_as_root(self):
attempted_command = ' '.join(sys.argv[:3])
if self.PUID == 0 and attempted_command != 'setup':
# stderr('[!] ArchiveBox should never be run as root!', color='red')
# stderr(' For more information, see the security overview documentation:')
# stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root')
print('[red][!] ArchiveBox should never be run as root![/red]', file=sys.stderr)
print(' For more information, see the security overview documentation:', file=sys.stderr)
print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root', file=sys.stderr)
if self.IN_DOCKER:
print('[red][!] When using Docker, you must run commands with [green]docker run[/green] instead of [yellow3]docker exec[/yellow3], e.g.:', file=sys.stderr)
print(' docker compose run archivebox {attempted_command}', file=sys.stderr)
print(f' docker run -it -v $PWD/data:/data archivebox/archivebox {attempted_command}', file=sys.stderr)
print(' or:', file=sys.stderr)
print(f' docker compose exec --user=archivebox archivebox /bin/bash -c "archivebox {attempted_command}"', file=sys.stderr)
print(f' docker exec -it --user=archivebox <container id> /bin/bash -c "archivebox {attempted_command}"', file=sys.stderr)
raise SystemExit(2)
return self
SHELL_CONFIG = ShellConfig()
@ -105,6 +128,27 @@ class ArchivingConfig(BaseConfigSet):
# CHROME_HEADLESS: bool = Field(default=True)
# CHROME_SANDBOX: bool = Field(default=lambda: not SHELL_CONFIG.IN_DOCKER)
@field_validator('TIMEOUT', mode='after')
def validate_timeout(cls, v):
print(f'[red][!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={v} seconds)[/red]', file=sys.stderr)
print(' You must allow *at least* 5 seconds for indexing and archive methods to run succesfully.', file=sys.stderr)
print(' (Setting it to somewhere between 30 and 3000 seconds is recommended)', file=sys.stderr)
print(file=sys.stderr)
print(' If you want to make ArchiveBox run faster, disable specific archive methods instead:', file=sys.stderr)
print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles', file=sys.stderr)
print(file=sys.stderr)
return v
@field_validator('CHECK_SSL_VALIDITY', mode='after')
def validate_check_ssl_validity(cls, v):
"""SIDE EFFECT: disable "you really shouldnt disable ssl" warnings emitted by requests"""
if not v:
import requests
import urllib3
requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
return v
ARCHIVING_CONFIG = ArchivingConfig()