fix LIB_DIR and TMP_DIR loading when primary option isnt available

This commit is contained in:
Nick Sweeting 2024-10-21 00:35:25 -07:00
parent deb116eed4
commit a211461ffc
No known key found for this signature in database
21 changed files with 712 additions and 303 deletions
archivebox/config

View file

@ -1,12 +1,16 @@
__package__ = 'archivebox.config'
import os
import socket
import hashlib
import tempfile
import platform
from pathlib import Path
from functools import cache
from datetime import datetime
from benedict import benedict
from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER
#############################################################################################
@ -88,7 +92,7 @@ def get_machine_type() -> str:
return LIB_DIR_SCOPE
def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = None, fallback=True) -> bool:
def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = None, fallback=True, chown=True) -> bool:
"""Check if a given directory is writable by a specific user and group (fallback=try as current user is unable to check with provided uid)"""
current_uid, current_gid = os.geteuid(), os.getegid()
uid, gid = uid or current_uid, gid or current_gid
@ -101,10 +105,197 @@ def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = No
test_file.unlink()
return True
except (IOError, OSError, PermissionError):
pass
if chown:
# try fixing it using sudo permissions
with SudoPermission(uid=uid, fallback=fallback):
os.system(f'chown {uid}:{gid} "{dir_path}" 2>/dev/null')
return dir_is_writable(dir_path, uid=uid, gid=gid, fallback=fallback, chown=False)
return False
def assert_dir_can_contain_unix_sockets(dir_path: Path) -> bool:
"""Check if a given directory can contain unix sockets (e.g. /tmp/supervisord.sock)"""
from archivebox.logging_util import pretty_path
try:
socket_path = str(dir_path / '.test_socket.sock')
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
try:
os.remove(socket_path)
except OSError:
pass
s.bind(socket_path)
s.close()
try:
os.remove(socket_path)
except OSError:
pass
except Exception as e:
raise Exception(f'ArchiveBox failed to create a test UNIX socket file in {pretty_path(dir_path, color=False)}') from e
return True
def create_and_chown_dir(dir_path: Path) -> None:
with SudoPermission(uid=0, fallback=True):
dir_path.mkdir(parents=True, exist_ok=True)
os.system(f'chown {ARCHIVEBOX_USER} "{dir_path}" 2>/dev/null')
os.system(f'chown {ARCHIVEBOX_USER} "{dir_path}"/* 2>/dev/null &')
@cache
def get_or_create_working_tmp_dir(autofix=True, quiet=False):
from archivebox import CONSTANTS
from archivebox.config.common import STORAGE_CONFIG
from archivebox.misc.checks import check_tmp_dir
# try a few potential directories in order of preference
CANDIDATES = [
STORAGE_CONFIG.TMP_DIR, # <user-specified>
CONSTANTS.DEFAULT_TMP_DIR, # ./data/tmp/<machine_id>
Path('/var/run/archivebox') / get_collection_id(), # /var/run/archivebox/abc5d8512
Path('/tmp') / 'archivebox' / get_collection_id(), # /tmp/archivebox/abc5d8512
Path('~/.tmp/archivebox').expanduser() / get_collection_id(), # ~/.tmp/archivebox/abc5d8512
Path(tempfile.gettempdir()) / 'archivebox' / get_collection_id(), # /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/archivebox/abc5d8512
Path(tempfile.gettempdir()) / 'archivebox' / get_collection_id()[:4], # /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/archivebox/abc5d
Path(tempfile.gettempdir()) / 'abx' / get_collection_id()[:4], # /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/abx/abc5
]
for candidate in CANDIDATES:
try:
create_and_chown_dir(candidate)
except Exception:
pass
if check_tmp_dir(candidate, throw=False, quiet=True, must_exist=True):
if autofix and STORAGE_CONFIG.TMP_DIR != candidate:
STORAGE_CONFIG.update_in_place(TMP_DIR=candidate, warn=not quiet)
return candidate
if not quiet:
raise OSError(f'ArchiveBox is unable to find a writable TMP_DIR, tried {CANDIDATES}!')
@cache
def get_or_create_working_lib_dir(autofix=True, quiet=False):
from archivebox import CONSTANTS
from archivebox.config.common import STORAGE_CONFIG
from archivebox.misc.checks import check_lib_dir
# try a few potential directories in order of preference
CANDIDATES = [
STORAGE_CONFIG.LIB_DIR, # <user-specified>
CONSTANTS.DEFAULT_LIB_DIR, # ./data/lib/arm64-linux-docker
Path('/usr/local/share/archivebox') / get_collection_id(), # /usr/local/share/archivebox/abc5
*([Path('/opt/homebrew/share/archivebox') / get_collection_id()] if os.path.isfile('/opt/homebrew/bin/archivebox') else []), # /opt/homebrew/share/archivebox/abc5
Path('~/.local/share/archivebox').expanduser() / get_collection_id(), # ~/.local/share/archivebox/abc5
]
for candidate in CANDIDATES:
try:
create_and_chown_dir(candidate)
except Exception:
pass
if check_lib_dir(candidate, throw=False, quiet=True, must_exist=True):
if autofix and STORAGE_CONFIG.LIB_DIR != candidate:
STORAGE_CONFIG.update_in_place(LIB_DIR=candidate, warn=not quiet)
return candidate
if not quiet:
raise OSError(f'ArchiveBox is unable to find a writable LIB_DIR, tried {CANDIDATES}!')
@cache
def get_data_locations():
from archivebox.config import CONSTANTS
from archivebox.config.common import STORAGE_CONFIG
return benedict({
"DATA_DIR": {
"path": DATA_DIR.resolve(),
"enabled": True,
"is_valid": os.path.isdir(DATA_DIR) and os.access(DATA_DIR, os.R_OK) and os.access(DATA_DIR, os.W_OK),
"is_mount": os.path.ismount(DATA_DIR.resolve()),
},
"CONFIG_FILE": {
"path": CONSTANTS.CONFIG_FILE.resolve(),
"enabled": True,
"is_valid": os.path.isfile(CONSTANTS.CONFIG_FILE) and os.access(CONSTANTS.CONFIG_FILE, os.R_OK) and os.access(CONSTANTS.CONFIG_FILE, os.W_OK),
},
"SQL_INDEX": {
"path": DATABASE_FILE.resolve(),
"enabled": True,
"is_valid": os.path.isfile(DATABASE_FILE) and os.access(DATABASE_FILE, os.R_OK) and os.access(DATABASE_FILE, os.W_OK),
"is_mount": os.path.ismount(DATABASE_FILE.resolve()),
},
"QUEUE_DATABASE": {
"path": CONSTANTS.QUEUE_DATABASE_FILE,
"enabled": True,
"is_valid": os.path.isfile(CONSTANTS.QUEUE_DATABASE_FILE) and os.access(CONSTANTS.QUEUE_DATABASE_FILE, os.R_OK) and os.access(CONSTANTS.QUEUE_DATABASE_FILE, os.W_OK),
"is_mount": os.path.ismount(CONSTANTS.QUEUE_DATABASE_FILE),
},
"ARCHIVE_DIR": {
"path": ARCHIVE_DIR.resolve(),
"enabled": True,
"is_valid": os.path.isdir(ARCHIVE_DIR) and os.access(ARCHIVE_DIR, os.R_OK) and os.access(ARCHIVE_DIR, os.W_OK),
"is_mount": os.path.ismount(ARCHIVE_DIR.resolve()),
},
"SOURCES_DIR": {
"path": CONSTANTS.SOURCES_DIR.resolve(),
"enabled": True,
"is_valid": os.path.isdir(CONSTANTS.SOURCES_DIR) and os.access(CONSTANTS.SOURCES_DIR, os.R_OK) and os.access(CONSTANTS.SOURCES_DIR, os.W_OK),
},
"PERSONAS_DIR": {
"path": CONSTANTS.PERSONAS_DIR.resolve(),
"enabled": os.path.isdir(CONSTANTS.PERSONAS_DIR),
"is_valid": os.path.isdir(CONSTANTS.PERSONAS_DIR) and os.access(CONSTANTS.PERSONAS_DIR, os.R_OK) and os.access(CONSTANTS.PERSONAS_DIR, os.W_OK), # read + write
},
"LOGS_DIR": {
"path": CONSTANTS.LOGS_DIR.resolve(),
"enabled": True,
"is_valid": os.path.isdir(CONSTANTS.LOGS_DIR) and os.access(CONSTANTS.LOGS_DIR, os.R_OK) and os.access(CONSTANTS.LOGS_DIR, os.W_OK), # read + write
},
'TMP_DIR': {
'path': STORAGE_CONFIG.TMP_DIR.resolve(),
'enabled': True,
'is_valid': os.path.isdir(STORAGE_CONFIG.TMP_DIR) and os.access(STORAGE_CONFIG.TMP_DIR, os.R_OK) and os.access(STORAGE_CONFIG.TMP_DIR, os.W_OK), # read + write
},
# "CACHE_DIR": {
# "path": CACHE_DIR.resolve(),
# "enabled": True,
# "is_valid": os.access(CACHE_DIR, os.R_OK) and os.access(CACHE_DIR, os.W_OK), # read + write
# },
})
@cache
def get_code_locations():
from archivebox.config import CONSTANTS
from archivebox.config.common import STORAGE_CONFIG
return benedict({
'PACKAGE_DIR': {
'path': (PACKAGE_DIR).resolve(),
'enabled': True,
'is_valid': os.access(PACKAGE_DIR / '__main__.py', os.X_OK), # executable
},
'TEMPLATES_DIR': {
'path': CONSTANTS.TEMPLATES_DIR.resolve(),
'enabled': True,
'is_valid': os.access(CONSTANTS.STATIC_DIR, os.R_OK) and os.access(CONSTANTS.STATIC_DIR, os.X_OK), # read + list
},
'CUSTOM_TEMPLATES_DIR': {
'path': CONSTANTS.CUSTOM_TEMPLATES_DIR.resolve(),
'enabled': os.path.isdir(CONSTANTS.CUSTOM_TEMPLATES_DIR),
'is_valid': os.path.isdir(CONSTANTS.CUSTOM_TEMPLATES_DIR) and os.access(CONSTANTS.CUSTOM_TEMPLATES_DIR, os.R_OK), # read
},
'USER_PLUGINS_DIR': {
'path': CONSTANTS.USER_PLUGINS_DIR.resolve(),
'enabled': os.path.isdir(CONSTANTS.USER_PLUGINS_DIR),
'is_valid': os.path.isdir(CONSTANTS.USER_PLUGINS_DIR) and os.access(CONSTANTS.USER_PLUGINS_DIR, os.R_OK), # read
},
'LIB_DIR': {
'path': STORAGE_CONFIG.LIB_DIR.resolve(),
'enabled': True,
'is_valid': os.path.isdir(STORAGE_CONFIG.LIB_DIR) and os.access(STORAGE_CONFIG.LIB_DIR, os.R_OK) and os.access(STORAGE_CONFIG.LIB_DIR, os.W_OK), # read + write
},
})
# @cache