handle failure on tmp_dir and lib_dir detection better

This commit is contained in:
Nick Sweeting 2024-10-08 16:55:28 -07:00
parent a33da44492
commit 35c7019772
No known key found for this signature in database
9 changed files with 191 additions and 167 deletions

View file

@ -193,6 +193,7 @@ class ConstantsDict(Mapping):
".DS_Store",
".env",
".collection_id",
".archivebox_id",
"Dockerfile",
))

View file

@ -596,8 +596,15 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CON
except Exception as e:
bump_startup_progress_bar(advance=1000)
STDERR.print()
STDERR.print(Panel(f'\n[red]{e.__class__.__name__}[/red]: [yellow]{e}[/yellow]\nPlease check your config and [blue]DATA_DIR[/blue] permissions.\n', title='\n\n[red][X] Error while trying to load database!', subtitle='[grey53]NO WRITES CAN BE PERFORMED[/grey53]', expand=False, style='bold red'))
STDERR.print(Panel(
f'\n[red]{e.__class__.__name__}[/red]: [yellow]{e}[/yellow]\nPlease check your config and [blue]DATA_DIR[/blue] permissions.\n',
title='\n\n[red][X] Error while trying to load database![/red]',
subtitle='[grey53]NO WRITES CAN BE PERFORMED[/grey53]',
expand=False,
style='bold red',
))
STDERR.print()
STDERR.print_exception(show_locals=False)
return
bump_startup_progress_bar()

View file

@ -1,6 +1,7 @@
__package__ = 'archivebox.config'
import os
import sys
import tempfile
import hashlib
from pathlib import Path
@ -21,7 +22,7 @@ ARCHIVE_DIR: Path = DATA_DIR / 'archive' # archivebox snaps
@cache
def get_collection_id(DATA_DIR=DATA_DIR):
"""Get a short, stable, unique ID for the current collection"""
collection_id_file = DATA_DIR / '.collection_id'
collection_id_file = DATA_DIR / '.archivebox_id'
try:
return collection_id_file.read_text().strip()
@ -71,30 +72,35 @@ def get_LIB_DIR():
HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False)
if 'SYSTEM_LIB_DIR' in os.environ:
lib_dir = Path(os.environ['SYSTEM_LIB_DIR'])
else:
with SudoPermission(uid=ARCHIVEBOX_USER, fallback=True):
lib_dir = HOST_DIRS.site_data_path
# Docker: /usr/local/share/archivebox/0.8.5
# Ubuntu: /usr/local/share/archivebox/0.8.5
# macOS: /Library/Application Support/archivebox
lib_dir = tempfile.gettempdir()
try:
with SudoPermission(uid=0, fallback=True):
lib_dir.mkdir(parents=True, exist_ok=True)
except PermissionError:
# our user cannot
lib_dir = HOST_DIRS.user_data_path
lib_dir.mkdir(parents=True, exist_ok=True)
if not dir_is_writable(lib_dir):
if IS_ROOT:
# make sure lib dir is owned by the archivebox user, not root
with SudoPermission(uid=0):
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{lib_dir}"')
if 'SYSTEM_LIB_DIR' in os.environ:
lib_dir = Path(os.environ['SYSTEM_LIB_DIR'])
else:
raise PermissionError(f'SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
with SudoPermission(uid=ARCHIVEBOX_USER, fallback=True):
lib_dir = HOST_DIRS.site_data_path
# Docker: /usr/local/share/archivebox/0.8.5
# Ubuntu: /usr/local/share/archivebox/0.8.5
# macOS: /Library/Application Support/archivebox
try:
with SudoPermission(uid=0, fallback=True):
lib_dir.mkdir(parents=True, exist_ok=True)
except PermissionError:
# our user cannot
lib_dir = HOST_DIRS.user_data_path
lib_dir.mkdir(parents=True, exist_ok=True)
if not dir_is_writable(lib_dir):
if IS_ROOT:
# make sure lib dir is owned by the archivebox user, not root
with SudoPermission(uid=0):
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{lib_dir}"')
else:
raise PermissionError()
except (PermissionError, AssertionError):
# raise PermissionError(f'SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
print(f'[red]:cross_mark: ERROR: SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/red]', file=sys.stderr)
return lib_dir
@ -114,39 +120,41 @@ def get_TMP_DIR():
# print('DATA_DIR OWNED BY:', ARCHIVEBOX_USER, ARCHIVEBOX_GROUP)
# print('RUNNING AS:', self.PUID, self.PGID)
if 'SYSTEM_TMP_DIR' in os.environ:
run_dir = Path(os.environ['SYSTEM_TMP_DIR']).resolve() / get_collection_id(DATA_DIR=DATA_DIR)
run_dir = tempfile.gettempdir()
try:
if 'SYSTEM_TMP_DIR' in os.environ:
run_dir = Path(os.environ['SYSTEM_TMP_DIR']).resolve() / get_collection_id(DATA_DIR=DATA_DIR)
with SudoPermission(uid=0, fallback=True):
run_dir.mkdir(parents=True, exist_ok=True)
if not dir_is_writable(run_dir):
if IS_ROOT:
with SudoPermission(uid=0, fallback=False):
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
else:
raise PermissionError()
assert len(str(run_dir / 'supervisord.conf')) < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
return run_dir
run_dir = (HOST_DIRS.site_runtime_path / get_collection_id(DATA_DIR=DATA_DIR)).resolve()
try:
assert len(str(run_dir)) + len('/supervisord.sock') < 95
except AssertionError:
run_dir = Path(tempfile.gettempdir()).resolve() / 'archivebox' / get_collection_id(DATA_DIR=DATA_DIR)
assert len(str(run_dir)) + len('/supervisord.sock') < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
with SudoPermission(uid=0, fallback=True):
run_dir.mkdir(parents=True, exist_ok=True)
if not dir_is_writable(run_dir):
if IS_ROOT:
with SudoPermission(uid=0, fallback=False):
with SudoPermission(uid=0):
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
else:
raise PermissionError(f'SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
assert len(str(run_dir / 'supervisord.conf')) < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
return run_dir
run_dir = (HOST_DIRS.site_runtime_path / get_collection_id(DATA_DIR=DATA_DIR)).resolve()
try:
assert len(str(run_dir)) + len('/supervisord.sock') < 95
except AssertionError:
run_dir = Path(tempfile.gettempdir()).resolve() / 'archivebox' / get_collection_id(DATA_DIR=DATA_DIR)
assert len(str(run_dir)) + len('/supervisord.sock') < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
with SudoPermission(uid=0, fallback=True):
run_dir.mkdir(parents=True, exist_ok=True)
raise PermissionError()
except (PermissionError, AssertionError):
# raise PermissionError(f'SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
print(f'[red]:cross_mark: ERROR: SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/red]', file=sys.stderr)
if not dir_is_writable(run_dir):
if IS_ROOT:
with SudoPermission(uid=0):
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
else:
raise PermissionError(f'SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
# Docker: /tmp/archivebox/0.8.5/abc324235
# Ubuntu: /tmp/archivebox/0.8.5/abc324235
# macOS: /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/archivebox/0.8.5/abc324235
return run_dir

View file

@ -104,7 +104,7 @@ def drop_privileges():
# os.system(f'chown -R :{ARCHIVEBOX_GROUP} "{PACKAGE_DIR}"')
# if we need sudo (e.g. for installing dependencies) code should use SudoPermissions() context manager to regain root
if ARCHIVEBOX_USER == 0 or not ARCHIVEBOX_USER_EXISTS:
print('[yellow]:warning: Running as root is not recommended and may make your [blue]DATA_DIR[/blue] inaccessible to other users on your system.[/yellow]', file=sys.stderr)
print('[yellow]:warning: Running as [red]root[/red] is not recommended and may make your [blue]DATA_DIR[/blue] inaccessible to other users on your system.[/yellow]', file=sys.stderr)
@contextmanager
@ -132,4 +132,3 @@ def SudoPermission(uid=0, fallback=False):
except PermissionError as err:
if not fallback:
raise PermissionError(f'Failed to revert uid={uid} back to {ARCHIVEBOX_USER} after running code with sudo') from err

View file

@ -269,31 +269,9 @@ def version(quiet: bool=False,
for name, path in CONSTANTS.DATA_LOCATIONS.items():
prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, DEFAULT_PUID, DEFAULT_PGID, IS_ROOT, USER
from archivebox.misc.checks import check_data_dir_permissions
data_dir_stat = Path(DATA_DIR).stat()
data_dir_uid, data_dir_gid = data_dir_stat.st_uid, data_dir_stat.st_gid
data_owned_by_root = data_dir_uid == 0
# data_owned_by_default_user = data_dir_uid == DEFAULT_PUID or data_dir_gid == DEFAULT_PGID
data_owner_doesnt_match = (data_dir_uid != ARCHIVEBOX_USER and data_dir_gid != ARCHIVEBOX_GROUP) and not IS_ROOT
data_not_writable = not (os.access(DATA_DIR, os.W_OK) and os.access(CONSTANTS.LIB_DIR, os.W_OK) and os.access(CONSTANTS.TMP_DIR, os.W_OK))
if data_owned_by_root:
prnt('[yellow]:warning: Warning: ArchiveBox [blue]DATA_DIR[/blue] is currently owned by [red]root[/red], ArchiveBox will refuse to run![/yellow]')
elif data_owner_doesnt_match or data_not_writable:
prnt(f'[yellow]:warning: Warning: ArchiveBox [blue]DATA_DIR[/blue] is currently owned by [red]{data_dir_uid}:{data_dir_gid}[/red], but ArchiveBox user is [blue]{ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/blue] ({USER})! (ArchiveBox may not be able to write to the data dir)[/yellow]')
if data_owned_by_root or data_owner_doesnt_match or data_not_writable:
prnt(f'[violet]Hint:[/violet] If you encounter permissions errors, change [red]{data_dir_uid}[/red]:{data_dir_gid} (PUID:PGID) to match the user that will run ArchiveBox, e.g.:')
prnt(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {DATA_DIR.resolve()}')
prnt(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {CONSTANTS.LIB_DIR.resolve()}')
prnt(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {CONSTANTS.TMP_DIR.resolve()}')
prnt()
prnt('[blue]More info:[/blue]')
prnt(' [link=https://github.com/ArchiveBox/ArchiveBox#storage-requirements]https://github.com/ArchiveBox/ArchiveBox#storage-requirements[/link]')
prnt(' [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#permissions]https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#permissions[/link]')
prnt(' [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid]https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid[/link]')
prnt(' [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#filesystem-doesnt-support-fsync-eg-network-mounts]https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#filesystem-doesnt-support-fsync-eg-network-mounts[/link]')
check_data_dir_permissions()
else:
prnt()
prnt('[red][i] Data locations:[/red] (not in a data directory)')

View file

@ -2,6 +2,7 @@ __package__ = 'archivebox.misc'
import os
import sys
from pathlib import Path
from rich import print
@ -96,3 +97,33 @@ def check_not_root():
print(f' docker compose exec --user=archivebox archivebox /bin/bash -c "archivebox {attempted_command}"', file=sys.stderr)
print(f' docker exec -it --user=archivebox <container id> /bin/bash -c "archivebox {attempted_command}"', file=sys.stderr)
raise SystemExit(2)
def check_data_dir_permissions():
from archivebox import DATA_DIR, CONSTANTS
from archivebox.misc.logging import STDERR
from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, DEFAULT_PUID, DEFAULT_PGID, IS_ROOT, USER
data_dir_stat = Path(DATA_DIR).stat()
data_dir_uid, data_dir_gid = data_dir_stat.st_uid, data_dir_stat.st_gid
data_owned_by_root = data_dir_uid == 0
# data_owned_by_default_user = data_dir_uid == DEFAULT_PUID or data_dir_gid == DEFAULT_PGID
data_owner_doesnt_match = (data_dir_uid != ARCHIVEBOX_USER and data_dir_gid != ARCHIVEBOX_GROUP) and not IS_ROOT
data_not_writable = not (os.access(DATA_DIR, os.W_OK) and os.access(CONSTANTS.LIB_DIR, os.W_OK) and os.access(CONSTANTS.TMP_DIR, os.W_OK))
if data_owned_by_root:
STDERR.print('\n[yellow]:warning: Warning: ArchiveBox [blue]DATA_DIR[/blue] is currently owned by [red]root[/red], ArchiveBox will refuse to run![/yellow]')
elif data_owner_doesnt_match or data_not_writable:
STDERR.print(f'\n[yellow]:warning: Warning: ArchiveBox [blue]DATA_DIR[/blue] is currently owned by [red]{data_dir_uid}:{data_dir_gid}[/red], but ArchiveBox user is [blue]{ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/blue] ({USER})! (ArchiveBox may not be able to write to the data dir)[/yellow]')
if data_owned_by_root or data_owner_doesnt_match or data_not_writable:
STDERR.print(f'[violet]Hint:[/violet] Change the current ownership [red]{data_dir_uid}[/red]:{data_dir_gid} (PUID:PGID) to a non-user & group that will run ArchiveBox, e.g.:')
STDERR.print(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {DATA_DIR.resolve()}')
STDERR.print(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {CONSTANTS.LIB_DIR.resolve()}')
STDERR.print(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {CONSTANTS.TMP_DIR.resolve()}')
STDERR.print()
STDERR.print('[blue]More info:[/blue]')
STDERR.print(' [link=https://github.com/ArchiveBox/ArchiveBox#storage-requirements]https://github.com/ArchiveBox/ArchiveBox#storage-requirements[/link]')
STDERR.print(' [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#permissions]https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#permissions[/link]')
STDERR.print(' [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid]https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid[/link]')
STDERR.print(' [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#filesystem-doesnt-support-fsync-eg-network-mounts]https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#filesystem-doesnt-support-fsync-eg-network-mounts[/link]')