From 4b34b729ab1530bb4319c82e2405ffe04bf31dd5 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 8 Oct 2024 17:48:59 -0700 Subject: [PATCH] fuck it go back to nested lib and tmp dirs with supervisord sock workaround --- archivebox/config/constants.py | 15 +++++++++------ archivebox/config/paths.py | 9 +++++---- archivebox/main.py | 13 +++++++++++-- archivebox/misc/checks.py | 4 ++-- archivebox/queues/settings.py | 21 ++++++++++++++++++++- archivebox/queues/supervisor_util.py | 11 +++++------ 6 files changed, 52 insertions(+), 21 deletions(-) diff --git a/archivebox/config/constants.py b/archivebox/config/constants.py index 5c2d4b81..e85f4447 100644 --- a/archivebox/config/constants.py +++ b/archivebox/config/constants.py @@ -16,9 +16,9 @@ from .paths import ( PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR, - get_collection_id, - get_LIB_DIR, - get_TMP_DIR, + # get_collection_id, + # get_LIB_DIR, + # get_TMP_DIR, ) from .permissions import ( IS_ROOT, @@ -39,13 +39,14 @@ class ConstantsDict(Mapping): PACKAGE_DIR: Path = PACKAGE_DIR DATA_DIR: Path = DATA_DIR ARCHIVE_DIR: Path = ARCHIVE_DIR - COLLECTION_ID: str = get_collection_id(DATA_DIR) + # COLLECTION_ID: str = get_collection_id(DATA_DIR) # Host system VERSION: str = detect_installed_version(PACKAGE_DIR) OS: str = platform.system().lower() # darwin, linux, etc. ARCH: str = platform.machine().lower() # arm64, x86_64, aarch64, etc. IN_DOCKER: bool = IN_DOCKER + LIB_DIR_SCOPE: str = f'{ARCH}-{OS}-docker' if IN_DOCKER else f'{ARCH}-{OS}' # Permissions IS_ROOT: bool = IS_ROOT @@ -95,9 +96,11 @@ class ConstantsDict(Mapping): # Runtime dirs TMP_DIR_NAME: str = 'tmp' - TMP_DIR: Path = get_TMP_DIR() + # TMP_DIR: Path = get_TMP_DIR() + TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME LIB_DIR_NAME: str = 'lib' - LIB_DIR: Path = get_LIB_DIR() + # LIB_DIR: Path = get_LIB_DIR() + LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME / LIB_DIR_SCOPE LIB_PIP_DIR: Path = LIB_DIR / 'pip' LIB_NPM_DIR: Path = LIB_DIR / 'npm' LIB_BROWSERS_DIR: Path = LIB_DIR / 'browsers' diff --git a/archivebox/config/paths.py b/archivebox/config/paths.py index b4aae940..e25078ac 100644 --- a/archivebox/config/paths.py +++ b/archivebox/config/paths.py @@ -5,9 +5,10 @@ import sys import tempfile import hashlib from pathlib import Path - from functools import cache + from platformdirs import PlatformDirs +from rich import print from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP @@ -91,7 +92,7 @@ def get_LIB_DIR(): lib_dir = HOST_DIRS.user_data_path lib_dir.mkdir(parents=True, exist_ok=True) - if not dir_is_writable(lib_dir): + if IS_ROOT or not dir_is_writable(lib_dir, uid=ARCHIVEBOX_USER): if IS_ROOT: # make sure lib dir is owned by the archivebox user, not root with SudoPermission(uid=0): @@ -130,7 +131,7 @@ def get_TMP_DIR(): run_dir = Path(os.environ['SYSTEM_TMP_DIR']).resolve() / get_collection_id(DATA_DIR=DATA_DIR) with SudoPermission(uid=0, fallback=True): run_dir.mkdir(parents=True, exist_ok=True) - if not dir_is_writable(run_dir): + if not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER): if IS_ROOT: with SudoPermission(uid=0, fallback=False): if ARCHIVEBOX_USER == 0: @@ -153,7 +154,7 @@ def get_TMP_DIR(): with SudoPermission(uid=0, fallback=True): run_dir.mkdir(parents=True, exist_ok=True) - if not dir_is_writable(run_dir): + if IS_ROOT or not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER): if IS_ROOT: with SudoPermission(uid=0): if ARCHIVEBOX_USER == 0: diff --git a/archivebox/main.py b/archivebox/main.py index 7654585b..3e679da1 100755 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -450,6 +450,9 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat json_index.rename(f"{index_name}.json") if os.access(html_index, os.F_OK): html_index.rename(f"{index_name}.html") + + CONSTANTS.TMP_DIR.mkdir(parents=True, exist_ok=True) + CONSTANTS.LIB_DIR.mkdir(parents=True, exist_ok=True) if install: run_subcommand('install', pwd=out_dir) @@ -1004,14 +1007,20 @@ def install(out_dir: Path=DATA_DIR) -> None: print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'})) if IS_ROOT: with SudoPermission(uid=0): - os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"') + if ARCHIVEBOX_USER == 0: + os.system(f'chmod -R 777 "{CONSTANTS.LIB_DIR.resolve()}"') + else: + os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"') except Exception as e: if IS_ROOT: print(f'[yellow]:warning: Retrying {binary.name} installation with [red]sudo[/red]...[/yellow]') with SudoPermission(uid=0): try: print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'})) - os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"') + if ARCHIVEBOX_USER == 0: + os.system(f'chmod -R 777 "{CONSTANTS.LIB_DIR.resolve()}"') + else: + os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"') except Exception as e: print(f'[red]:cross_mark: Failed to install {binary.name} as root: {e}[/red]') else: diff --git a/archivebox/misc/checks.py b/archivebox/misc/checks.py index 34fca692..ffab850b 100644 --- a/archivebox/misc/checks.py +++ b/archivebox/misc/checks.py @@ -109,8 +109,8 @@ def check_data_dir_permissions(): data_owned_by_root = data_dir_uid == 0 # data_owned_by_default_user = data_dir_uid == DEFAULT_PUID or data_dir_gid == DEFAULT_PGID - data_owner_doesnt_match = (data_dir_uid != ARCHIVEBOX_USER and data_dir_gid != ARCHIVEBOX_GROUP) and not IS_ROOT - data_not_writable = not (os.access(DATA_DIR, os.W_OK) and os.access(CONSTANTS.LIB_DIR, os.W_OK) and os.access(CONSTANTS.TMP_DIR, os.W_OK)) + data_owner_doesnt_match = (data_dir_uid != ARCHIVEBOX_USER and data_dir_gid != ARCHIVEBOX_GROUP) if not IS_ROOT else False + data_not_writable = not (os.isdir(DATA_DIR) and os.access(DATA_DIR, os.W_OK)) # and os.access(CONSTANTS.LIB_DIR, os.W_OK) and os.access(CONSTANTS.TMP_DIR, os.W_OK)) if data_owned_by_root: STDERR.print('\n[yellow]:warning: Warning: ArchiveBox [blue]DATA_DIR[/blue] is currently owned by [red]root[/red], ArchiveBox will refuse to run![/yellow]') elif data_owner_doesnt_match or data_not_writable: diff --git a/archivebox/queues/settings.py b/archivebox/queues/settings.py index d9566d27..ab1a975c 100644 --- a/archivebox/queues/settings.py +++ b/archivebox/queues/settings.py @@ -1,14 +1,33 @@ +import tempfile from pathlib import Path from archivebox.config import CONSTANTS +from archivebox.config.paths import get_collection_id DATA_DIR = CONSTANTS.DATA_DIR LOGS_DIR = CONSTANTS.LOGS_DIR TMP_DIR = CONSTANTS.TMP_DIR -Path.mkdir(TMP_DIR, exist_ok=True) SUPERVISORD_CONFIG_FILE = TMP_DIR / "supervisord.conf" PID_FILE = TMP_DIR / "supervisord.pid" SOCK_FILE = TMP_DIR / "supervisord.sock" LOG_FILE = TMP_DIR / "supervisord.log" WORKERS_DIR = TMP_DIR / "workers" + + +def get_sock_file(): + TMP_DIR.mkdir(parents=True, exist_ok=True) + + if len(str(SOCK_FILE)) > 100: + # socket absolute paths cannot be longer than 108 characters on some systems + # symlink it to a shorter path and use that instead + + # use tmpfile to atomically overwrite any existing symlink + symlink = Path(tempfile.gettempdir()) / f"archivebox_supervisord_{get_collection_id()}.sock.tmp" + symlink.unlink(missing_ok=True) + symlink.symlink_to(SOCK_FILE) + symlink.rename(str(symlink).replace('.sock.tmp', '.sock')) + assert len(str(symlink)) <= 100, f'Failed to create supervisord SOCK_FILE, system tmp dir location is too long {symlink} (unix only allows 108 characters for socket paths)' + return symlink + + return SOCK_FILE diff --git a/archivebox/queues/supervisor_util.py b/archivebox/queues/supervisor_util.py index 035f1e40..99de5e78 100644 --- a/archivebox/queues/supervisor_util.py +++ b/archivebox/queues/supervisor_util.py @@ -1,6 +1,5 @@ __package__ = 'archivebox.queues' -import os import time import signal import psutil @@ -15,7 +14,7 @@ from xmlrpc.client import ServerProxy from archivebox.config.permissions import ARCHIVEBOX_USER -from .settings import SUPERVISORD_CONFIG_FILE, DATA_DIR, PID_FILE, SOCK_FILE, LOG_FILE, WORKERS_DIR, TMP_DIR, LOGS_DIR +from .settings import SUPERVISORD_CONFIG_FILE, DATA_DIR, PID_FILE, get_sock_file, LOG_FILE, WORKERS_DIR, TMP_DIR, LOGS_DIR from typing import Iterator @@ -48,11 +47,11 @@ nocleanup = true user = {ARCHIVEBOX_USER} [unix_http_server] -file = {TMP_DIR}/{SOCK_FILE.name} +file = {get_sock_file()} chmod = 0700 [supervisorctl] -serverurl = unix://{TMP_DIR}/{SOCK_FILE.name} +serverurl = unix://{get_sock_file()} [rpcinterface:supervisor] supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface @@ -81,12 +80,12 @@ def create_worker_config(daemon): def get_existing_supervisord_process(): try: - transport = SupervisorTransport(None, None, f"unix://{SOCK_FILE}") + transport = SupervisorTransport(None, None, f"unix://{get_sock_file()}") server = ServerProxy("http://localhost", transport=transport) current_state = cast(Dict[str, int | str], server.supervisor.getState()) if current_state["statename"] == "RUNNING": pid = server.supervisor.getPID() - print(f"[🦸‍♂️] Supervisord connected (pid={pid}) via unix://{str(SOCK_FILE).replace(str(TMP_DIR), 'tmp')}.") + print(f"[🦸‍♂️] Supervisord connected (pid={pid}) via unix://{str(get_sock_file()).replace(str(TMP_DIR), 'tmp')}.") return server.supervisor except FileNotFoundError: return None