fuck it go back to nested lib and tmp dirs with supervisord sock workaround

This commit is contained in:
Nick Sweeting 2024-10-08 17:48:59 -07:00
parent df68f416fb
commit 4b34b729ab
No known key found for this signature in database
6 changed files with 52 additions and 21 deletions

View file

@ -16,9 +16,9 @@ from .paths import (
PACKAGE_DIR, PACKAGE_DIR,
DATA_DIR, DATA_DIR,
ARCHIVE_DIR, ARCHIVE_DIR,
get_collection_id, # get_collection_id,
get_LIB_DIR, # get_LIB_DIR,
get_TMP_DIR, # get_TMP_DIR,
) )
from .permissions import ( from .permissions import (
IS_ROOT, IS_ROOT,
@ -39,13 +39,14 @@ class ConstantsDict(Mapping):
PACKAGE_DIR: Path = PACKAGE_DIR PACKAGE_DIR: Path = PACKAGE_DIR
DATA_DIR: Path = DATA_DIR DATA_DIR: Path = DATA_DIR
ARCHIVE_DIR: Path = ARCHIVE_DIR ARCHIVE_DIR: Path = ARCHIVE_DIR
COLLECTION_ID: str = get_collection_id(DATA_DIR) # COLLECTION_ID: str = get_collection_id(DATA_DIR)
# Host system # Host system
VERSION: str = detect_installed_version(PACKAGE_DIR) VERSION: str = detect_installed_version(PACKAGE_DIR)
OS: str = platform.system().lower() # darwin, linux, etc. OS: str = platform.system().lower() # darwin, linux, etc.
ARCH: str = platform.machine().lower() # arm64, x86_64, aarch64, etc. ARCH: str = platform.machine().lower() # arm64, x86_64, aarch64, etc.
IN_DOCKER: bool = IN_DOCKER IN_DOCKER: bool = IN_DOCKER
LIB_DIR_SCOPE: str = f'{ARCH}-{OS}-docker' if IN_DOCKER else f'{ARCH}-{OS}'
# Permissions # Permissions
IS_ROOT: bool = IS_ROOT IS_ROOT: bool = IS_ROOT
@ -95,9 +96,11 @@ class ConstantsDict(Mapping):
# Runtime dirs # Runtime dirs
TMP_DIR_NAME: str = 'tmp' TMP_DIR_NAME: str = 'tmp'
TMP_DIR: Path = get_TMP_DIR() # TMP_DIR: Path = get_TMP_DIR()
TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME
LIB_DIR_NAME: str = 'lib' LIB_DIR_NAME: str = 'lib'
LIB_DIR: Path = get_LIB_DIR() # LIB_DIR: Path = get_LIB_DIR()
LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME / LIB_DIR_SCOPE
LIB_PIP_DIR: Path = LIB_DIR / 'pip' LIB_PIP_DIR: Path = LIB_DIR / 'pip'
LIB_NPM_DIR: Path = LIB_DIR / 'npm' LIB_NPM_DIR: Path = LIB_DIR / 'npm'
LIB_BROWSERS_DIR: Path = LIB_DIR / 'browsers' LIB_BROWSERS_DIR: Path = LIB_DIR / 'browsers'

View file

@ -5,9 +5,10 @@ import sys
import tempfile import tempfile
import hashlib import hashlib
from pathlib import Path from pathlib import Path
from functools import cache from functools import cache
from platformdirs import PlatformDirs from platformdirs import PlatformDirs
from rich import print
from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
@ -91,7 +92,7 @@ def get_LIB_DIR():
lib_dir = HOST_DIRS.user_data_path lib_dir = HOST_DIRS.user_data_path
lib_dir.mkdir(parents=True, exist_ok=True) lib_dir.mkdir(parents=True, exist_ok=True)
if not dir_is_writable(lib_dir): if IS_ROOT or not dir_is_writable(lib_dir, uid=ARCHIVEBOX_USER):
if IS_ROOT: if IS_ROOT:
# make sure lib dir is owned by the archivebox user, not root # make sure lib dir is owned by the archivebox user, not root
with SudoPermission(uid=0): with SudoPermission(uid=0):
@ -130,7 +131,7 @@ def get_TMP_DIR():
run_dir = Path(os.environ['SYSTEM_TMP_DIR']).resolve() / get_collection_id(DATA_DIR=DATA_DIR) run_dir = Path(os.environ['SYSTEM_TMP_DIR']).resolve() / get_collection_id(DATA_DIR=DATA_DIR)
with SudoPermission(uid=0, fallback=True): with SudoPermission(uid=0, fallback=True):
run_dir.mkdir(parents=True, exist_ok=True) run_dir.mkdir(parents=True, exist_ok=True)
if not dir_is_writable(run_dir): if not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
if IS_ROOT: if IS_ROOT:
with SudoPermission(uid=0, fallback=False): with SudoPermission(uid=0, fallback=False):
if ARCHIVEBOX_USER == 0: if ARCHIVEBOX_USER == 0:
@ -153,7 +154,7 @@ def get_TMP_DIR():
with SudoPermission(uid=0, fallback=True): with SudoPermission(uid=0, fallback=True):
run_dir.mkdir(parents=True, exist_ok=True) run_dir.mkdir(parents=True, exist_ok=True)
if not dir_is_writable(run_dir): if IS_ROOT or not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
if IS_ROOT: if IS_ROOT:
with SudoPermission(uid=0): with SudoPermission(uid=0):
if ARCHIVEBOX_USER == 0: if ARCHIVEBOX_USER == 0:

View file

@ -451,6 +451,9 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
if os.access(html_index, os.F_OK): if os.access(html_index, os.F_OK):
html_index.rename(f"{index_name}.html") html_index.rename(f"{index_name}.html")
CONSTANTS.TMP_DIR.mkdir(parents=True, exist_ok=True)
CONSTANTS.LIB_DIR.mkdir(parents=True, exist_ok=True)
if install: if install:
run_subcommand('install', pwd=out_dir) run_subcommand('install', pwd=out_dir)
@ -1004,14 +1007,20 @@ def install(out_dir: Path=DATA_DIR) -> None:
print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'})) print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'}))
if IS_ROOT: if IS_ROOT:
with SudoPermission(uid=0): with SudoPermission(uid=0):
os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"') if ARCHIVEBOX_USER == 0:
os.system(f'chmod -R 777 "{CONSTANTS.LIB_DIR.resolve()}"')
else:
os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"')
except Exception as e: except Exception as e:
if IS_ROOT: if IS_ROOT:
print(f'[yellow]:warning: Retrying {binary.name} installation with [red]sudo[/red]...[/yellow]') print(f'[yellow]:warning: Retrying {binary.name} installation with [red]sudo[/red]...[/yellow]')
with SudoPermission(uid=0): with SudoPermission(uid=0):
try: try:
print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'})) print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'}))
os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"') if ARCHIVEBOX_USER == 0:
os.system(f'chmod -R 777 "{CONSTANTS.LIB_DIR.resolve()}"')
else:
os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"')
except Exception as e: except Exception as e:
print(f'[red]:cross_mark: Failed to install {binary.name} as root: {e}[/red]') print(f'[red]:cross_mark: Failed to install {binary.name} as root: {e}[/red]')
else: else:

View file

@ -109,8 +109,8 @@ def check_data_dir_permissions():
data_owned_by_root = data_dir_uid == 0 data_owned_by_root = data_dir_uid == 0
# data_owned_by_default_user = data_dir_uid == DEFAULT_PUID or data_dir_gid == DEFAULT_PGID # data_owned_by_default_user = data_dir_uid == DEFAULT_PUID or data_dir_gid == DEFAULT_PGID
data_owner_doesnt_match = (data_dir_uid != ARCHIVEBOX_USER and data_dir_gid != ARCHIVEBOX_GROUP) and not IS_ROOT data_owner_doesnt_match = (data_dir_uid != ARCHIVEBOX_USER and data_dir_gid != ARCHIVEBOX_GROUP) if not IS_ROOT else False
data_not_writable = not (os.access(DATA_DIR, os.W_OK) and os.access(CONSTANTS.LIB_DIR, os.W_OK) and os.access(CONSTANTS.TMP_DIR, os.W_OK)) data_not_writable = not (os.isdir(DATA_DIR) and os.access(DATA_DIR, os.W_OK)) # and os.access(CONSTANTS.LIB_DIR, os.W_OK) and os.access(CONSTANTS.TMP_DIR, os.W_OK))
if data_owned_by_root: if data_owned_by_root:
STDERR.print('\n[yellow]:warning: Warning: ArchiveBox [blue]DATA_DIR[/blue] is currently owned by [red]root[/red], ArchiveBox will refuse to run![/yellow]') STDERR.print('\n[yellow]:warning: Warning: ArchiveBox [blue]DATA_DIR[/blue] is currently owned by [red]root[/red], ArchiveBox will refuse to run![/yellow]')
elif data_owner_doesnt_match or data_not_writable: elif data_owner_doesnt_match or data_not_writable:

View file

@ -1,14 +1,33 @@
import tempfile
from pathlib import Path from pathlib import Path
from archivebox.config import CONSTANTS from archivebox.config import CONSTANTS
from archivebox.config.paths import get_collection_id
DATA_DIR = CONSTANTS.DATA_DIR DATA_DIR = CONSTANTS.DATA_DIR
LOGS_DIR = CONSTANTS.LOGS_DIR LOGS_DIR = CONSTANTS.LOGS_DIR
TMP_DIR = CONSTANTS.TMP_DIR TMP_DIR = CONSTANTS.TMP_DIR
Path.mkdir(TMP_DIR, exist_ok=True)
SUPERVISORD_CONFIG_FILE = TMP_DIR / "supervisord.conf" SUPERVISORD_CONFIG_FILE = TMP_DIR / "supervisord.conf"
PID_FILE = TMP_DIR / "supervisord.pid" PID_FILE = TMP_DIR / "supervisord.pid"
SOCK_FILE = TMP_DIR / "supervisord.sock" SOCK_FILE = TMP_DIR / "supervisord.sock"
LOG_FILE = TMP_DIR / "supervisord.log" LOG_FILE = TMP_DIR / "supervisord.log"
WORKERS_DIR = TMP_DIR / "workers" WORKERS_DIR = TMP_DIR / "workers"
def get_sock_file():
TMP_DIR.mkdir(parents=True, exist_ok=True)
if len(str(SOCK_FILE)) > 100:
# socket absolute paths cannot be longer than 108 characters on some systems
# symlink it to a shorter path and use that instead
# use tmpfile to atomically overwrite any existing symlink
symlink = Path(tempfile.gettempdir()) / f"archivebox_supervisord_{get_collection_id()}.sock.tmp"
symlink.unlink(missing_ok=True)
symlink.symlink_to(SOCK_FILE)
symlink.rename(str(symlink).replace('.sock.tmp', '.sock'))
assert len(str(symlink)) <= 100, f'Failed to create supervisord SOCK_FILE, system tmp dir location is too long {symlink} (unix only allows 108 characters for socket paths)'
return symlink
return SOCK_FILE

View file

@ -1,6 +1,5 @@
__package__ = 'archivebox.queues' __package__ = 'archivebox.queues'
import os
import time import time
import signal import signal
import psutil import psutil
@ -15,7 +14,7 @@ from xmlrpc.client import ServerProxy
from archivebox.config.permissions import ARCHIVEBOX_USER from archivebox.config.permissions import ARCHIVEBOX_USER
from .settings import SUPERVISORD_CONFIG_FILE, DATA_DIR, PID_FILE, SOCK_FILE, LOG_FILE, WORKERS_DIR, TMP_DIR, LOGS_DIR from .settings import SUPERVISORD_CONFIG_FILE, DATA_DIR, PID_FILE, get_sock_file, LOG_FILE, WORKERS_DIR, TMP_DIR, LOGS_DIR
from typing import Iterator from typing import Iterator
@ -48,11 +47,11 @@ nocleanup = true
user = {ARCHIVEBOX_USER} user = {ARCHIVEBOX_USER}
[unix_http_server] [unix_http_server]
file = {TMP_DIR}/{SOCK_FILE.name} file = {get_sock_file()}
chmod = 0700 chmod = 0700
[supervisorctl] [supervisorctl]
serverurl = unix://{TMP_DIR}/{SOCK_FILE.name} serverurl = unix://{get_sock_file()}
[rpcinterface:supervisor] [rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
@ -81,12 +80,12 @@ def create_worker_config(daemon):
def get_existing_supervisord_process(): def get_existing_supervisord_process():
try: try:
transport = SupervisorTransport(None, None, f"unix://{SOCK_FILE}") transport = SupervisorTransport(None, None, f"unix://{get_sock_file()}")
server = ServerProxy("http://localhost", transport=transport) server = ServerProxy("http://localhost", transport=transport)
current_state = cast(Dict[str, int | str], server.supervisor.getState()) current_state = cast(Dict[str, int | str], server.supervisor.getState())
if current_state["statename"] == "RUNNING": if current_state["statename"] == "RUNNING":
pid = server.supervisor.getPID() pid = server.supervisor.getPID()
print(f"[🦸‍♂️] Supervisord connected (pid={pid}) via unix://{str(SOCK_FILE).replace(str(TMP_DIR), 'tmp')}.") print(f"[🦸‍♂️] Supervisord connected (pid={pid}) via unix://{str(get_sock_file()).replace(str(TMP_DIR), 'tmp')}.")
return server.supervisor return server.supervisor
except FileNotFoundError: except FileNotFoundError:
return None return None