mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-21 02:15:10 -04:00
fix LIB_DIR and TMP_DIR loading when primary option isnt available
This commit is contained in:
parent
deb116eed4
commit
a211461ffc
21 changed files with 712 additions and 303 deletions
|
@ -5,16 +5,24 @@ import sys
|
|||
from pathlib import Path
|
||||
|
||||
from rich import print
|
||||
from rich.panel import Panel
|
||||
|
||||
# DO NOT ADD ANY TOP-LEVEL IMPORTS HERE
|
||||
# DO NOT ADD ANY TOP-LEVEL IMPORTS HERE to anything other than builtin python libraries
|
||||
# this file is imported by archivebox/__init__.py
|
||||
# and any imports here will be imported by EVERYTHING else
|
||||
# so this file should only be used for pure python checks
|
||||
# that don't need to import other parts of ArchiveBox
|
||||
|
||||
# if a check needs to import other parts of ArchiveBox,
|
||||
# the imports should be done inside the check function
|
||||
# and you should make sure if you need to import any django stuff
|
||||
# that the check is called after django.setup() has been called
|
||||
|
||||
|
||||
def check_data_folder() -> None:
|
||||
from archivebox import DATA_DIR, ARCHIVE_DIR
|
||||
from archivebox.config import CONSTANTS
|
||||
from archivebox.config.paths import create_and_chown_dir, get_or_create_working_tmp_dir, get_or_create_working_lib_dir
|
||||
|
||||
archive_dir_exists = os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()
|
||||
if not archive_dir_exists:
|
||||
|
@ -30,8 +38,21 @@ def check_data_folder() -> None:
|
|||
raise SystemExit(2)
|
||||
|
||||
|
||||
# Create data dir subdirs
|
||||
create_and_chown_dir(CONSTANTS.SOURCES_DIR)
|
||||
create_and_chown_dir(CONSTANTS.PERSONAS_DIR / 'Default')
|
||||
create_and_chown_dir(CONSTANTS.LOGS_DIR)
|
||||
# create_and_chown_dir(CONSTANTS.CACHE_DIR)
|
||||
|
||||
# Create /tmp and /lib dirs if they don't exist
|
||||
get_or_create_working_tmp_dir(autofix=True, quiet=False)
|
||||
get_or_create_working_lib_dir(autofix=True, quiet=False)
|
||||
|
||||
# Check data dir permissions, /tmp, and /lib permissions
|
||||
check_data_dir_permissions()
|
||||
|
||||
def check_migrations():
|
||||
from archivebox import DATA_DIR, CONSTANTS
|
||||
from archivebox import DATA_DIR
|
||||
from ..index.sql import list_migrations
|
||||
|
||||
pending_migrations = [name for status, name in list_migrations() if not status]
|
||||
|
@ -45,13 +66,6 @@ def check_migrations():
|
|||
print(' archivebox init', file=sys.stderr)
|
||||
raise SystemExit(3)
|
||||
|
||||
CONSTANTS.SOURCES_DIR.mkdir(exist_ok=True)
|
||||
CONSTANTS.LOGS_DIR.mkdir(exist_ok=True)
|
||||
# CONSTANTS.CACHE_DIR.mkdir(exist_ok=True)
|
||||
(CONSTANTS.LIB_DIR / 'bin').mkdir(exist_ok=True, parents=True)
|
||||
(CONSTANTS.PERSONAS_DIR / 'Default').mkdir(exist_ok=True, parents=True)
|
||||
|
||||
|
||||
def check_io_encoding():
|
||||
PYTHON_ENCODING = (sys.__stdout__ or sys.stdout or sys.__stderr__ or sys.stderr).encoding.upper().replace('UTF8', 'UTF-8')
|
||||
|
||||
|
@ -128,3 +142,98 @@ def check_data_dir_permissions():
|
|||
STDERR.print(' [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#permissions]https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#permissions[/link]')
|
||||
STDERR.print(' [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid]https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid[/link]')
|
||||
STDERR.print(' [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#filesystem-doesnt-support-fsync-eg-network-mounts]https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#filesystem-doesnt-support-fsync-eg-network-mounts[/link]')
|
||||
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
|
||||
# Check /tmp dir permissions
|
||||
check_tmp_dir(STORAGE_CONFIG.TMP_DIR, throw=False, must_exist=True)
|
||||
|
||||
# Check /lib dir permissions
|
||||
check_lib_dir(STORAGE_CONFIG.LIB_DIR, throw=False, must_exist=True)
|
||||
|
||||
|
||||
def check_tmp_dir(tmp_dir=None, throw=False, quiet=False, must_exist=True):
|
||||
from archivebox.config.paths import assert_dir_can_contain_unix_sockets, dir_is_writable, get_or_create_working_tmp_dir
|
||||
from archivebox.misc.logging import STDERR
|
||||
from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
from archivebox.logging_util import pretty_path
|
||||
|
||||
tmp_dir = tmp_dir or STORAGE_CONFIG.TMP_DIR
|
||||
socket_file = tmp_dir.absolute().resolve() / "supervisord.sock"
|
||||
|
||||
if not must_exist and not os.path.isdir(tmp_dir):
|
||||
# just check that its viable based on its length (because dir may not exist yet, we cant check if its writable)
|
||||
return len(f'file://{socket_file}') <= 96
|
||||
|
||||
tmp_is_valid = False
|
||||
try:
|
||||
tmp_is_valid = dir_is_writable(tmp_dir)
|
||||
tmp_is_valid = tmp_is_valid and assert_dir_can_contain_unix_sockets(tmp_dir)
|
||||
assert tmp_is_valid, f'ArchiveBox user PUID={ARCHIVEBOX_USER} PGID={ARCHIVEBOX_GROUP} is unable to write to TMP_DIR={tmp_dir}'
|
||||
assert len(f'file://{socket_file}') <= 96, f'ArchiveBox TMP_DIR={tmp_dir} is too long, dir containing unix socket files must be <90 chars.'
|
||||
return True
|
||||
except Exception as e:
|
||||
if not quiet:
|
||||
STDERR.print()
|
||||
ERROR_TEXT = '\n'.join((
|
||||
'',
|
||||
f'[red]:cross_mark: ArchiveBox is unable to use TMP_DIR={pretty_path(tmp_dir)}[/red]',
|
||||
f' [yellow]{e}[/yellow]',
|
||||
'',
|
||||
'[blue]Info:[/blue] [grey53]The TMP_DIR is used for the supervisord unix socket file and other temporary files.',
|
||||
' - It [red]must[/red] be on a local drive (not inside a docker volume, remote network drive, or FUSE mount).',
|
||||
f' - It [red]must[/red] be readable and writable by the ArchiveBox user (PUID={ARCHIVEBOX_USER}, PGID={ARCHIVEBOX_GROUP}).',
|
||||
' - It [red]must[/red] be a *short* path (less than 90 characters) due to UNIX path length restrictions for sockets.',
|
||||
' - It [yellow]should[/yellow] be able to hold at least 200MB of data (in-progress downloads can be large).[/grey53]',
|
||||
'',
|
||||
'[violet]Hint:[/violet] Fix it by setting TMP_DIR to a path that meets these requirements, e.g.:',
|
||||
f' [green]archivebox config --set TMP_DIR={get_or_create_working_tmp_dir(autofix=False, quiet=True) or "/tmp/archivebox"}[/green]',
|
||||
'',
|
||||
))
|
||||
STDERR.print(Panel(ERROR_TEXT, expand=False, border_style='red', title='[red]:cross_mark: Error with configured TMP_DIR[/red]', subtitle='Background workers may fail to start until fixed.'))
|
||||
STDERR.print()
|
||||
if throw:
|
||||
raise OSError(f'TMP_DIR={tmp_dir} is invalid, ArchiveBox is unable to use it and the server will fail to start!') from e
|
||||
return False
|
||||
|
||||
|
||||
def check_lib_dir(lib_dir: Path | None = None, throw=False, quiet=False, must_exist=True):
|
||||
from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
|
||||
from archivebox.misc.logging import STDERR
|
||||
from archivebox.config.paths import dir_is_writable, get_or_create_working_lib_dir
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
from archivebox.logging_util import pretty_path
|
||||
|
||||
lib_dir = lib_dir or STORAGE_CONFIG.LIB_DIR
|
||||
|
||||
if not must_exist and not os.path.isdir(lib_dir):
|
||||
return True
|
||||
|
||||
lib_is_valid = False
|
||||
try:
|
||||
lib_is_valid = dir_is_writable(lib_dir)
|
||||
assert lib_is_valid, f'ArchiveBox user PUID={ARCHIVEBOX_USER} PGID={ARCHIVEBOX_GROUP} is unable to write to LIB_DIR={lib_dir}'
|
||||
return True
|
||||
except Exception as e:
|
||||
if not quiet:
|
||||
STDERR.print()
|
||||
ERROR_TEXT = '\n'.join((
|
||||
'',
|
||||
f'[red]:cross_mark: ArchiveBox is unable to use LIB_DIR={pretty_path(lib_dir)}[/red]',
|
||||
f' [yellow]{e}[/yellow]',
|
||||
'',
|
||||
'[blue]Info:[/blue] [grey53]The LIB_DIR is used to store ArchiveBox auto-installed plugin library and binary dependencies.',
|
||||
f' - It [red]must[/red] be readable and writable by the ArchiveBox user (PUID={ARCHIVEBOX_USER}, PGID={ARCHIVEBOX_GROUP}).',
|
||||
' - It [yellow]should[/yellow] be on a local (ideally fast) drive like an SSD or HDD (not on a network drive or external HDD).',
|
||||
' - It [yellow]should[/yellow] be able to hold at least 1GB of data (some dependencies like Chrome can be large).[/grey53]',
|
||||
'',
|
||||
'[violet]Hint:[/violet] Fix it by setting LIB_DIR to a path that meets these requirements, e.g.:',
|
||||
f' [green]archivebox config --set LIB_DIR={get_or_create_working_lib_dir(autofix=False, quiet=True) or "/usr/local/share/archivebox"}[/green]',
|
||||
'',
|
||||
))
|
||||
STDERR.print(Panel(ERROR_TEXT, expand=False, border_style='red', title='[red]:cross_mark: Error with configured LIB_DIR[/red]', subtitle='[yellow]Dependencies may not auto-install properly until fixed.[/yellow]'))
|
||||
STDERR.print()
|
||||
if throw:
|
||||
raise OSError(f'LIB_DIR={lib_dir} is invalid, ArchiveBox is unable to use it and dependencies will fail to install.') from e
|
||||
return False
|
||||
|
|
|
@ -49,7 +49,7 @@ if __name__ == '__main__':
|
|||
|
||||
prnt('[i] :heavy_dollar_sign: Welcome to the ArchiveBox Shell!')
|
||||
prnt(' [deep_sky_blue4]Docs:[/deep_sky_blue4] [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage[/link]')
|
||||
prnt(' [link=https://docs.archivebox.io/en/latest/modules.html]https://docs.archivebox.io/en/latest/modules.html[/link]')
|
||||
prnt(' [link=https://docs.archivebox.io/en/dev/apidocs/archivebox/archivebox.html]https://docs.archivebox.io/en/dev/apidocs/archivebox/archivebox.html[/link]')
|
||||
prnt()
|
||||
prnt(' :grey_question: [violet]Hint[/] [i]Here are some examples to get started:[/]')
|
||||
prnt(' add[blink][deep_sky_blue4]?[/deep_sky_blue4][/blink] [grey53]# add ? after anything to get help[/]')
|
||||
|
|
|
@ -82,10 +82,10 @@ class JSONSchemaWithLambdas(GenerateJsonSchema):
|
|||
if isinstance(default, Callable):
|
||||
return '{{lambda ' + inspect.getsource(default).split('=lambda ')[-1].strip()[:-1] + '}}'
|
||||
return to_jsonable_python(
|
||||
default,
|
||||
timedelta_mode=config.ser_json_timedelta,
|
||||
bytes_mode=config.ser_json_bytes,
|
||||
serialize_unknown=True
|
||||
default,
|
||||
timedelta_mode=config.ser_json_timedelta,
|
||||
bytes_mode=config.ser_json_bytes,
|
||||
serialize_unknown=True
|
||||
)
|
||||
|
||||
# for computed_field properties render them like this instead:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue