mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 06:34:25 -04:00
fuck it go back to nested lib and tmp dirs with supervisord sock workaround
This commit is contained in:
parent
df68f416fb
commit
4b34b729ab
6 changed files with 52 additions and 21 deletions
|
@ -16,9 +16,9 @@ from .paths import (
|
||||||
PACKAGE_DIR,
|
PACKAGE_DIR,
|
||||||
DATA_DIR,
|
DATA_DIR,
|
||||||
ARCHIVE_DIR,
|
ARCHIVE_DIR,
|
||||||
get_collection_id,
|
# get_collection_id,
|
||||||
get_LIB_DIR,
|
# get_LIB_DIR,
|
||||||
get_TMP_DIR,
|
# get_TMP_DIR,
|
||||||
)
|
)
|
||||||
from .permissions import (
|
from .permissions import (
|
||||||
IS_ROOT,
|
IS_ROOT,
|
||||||
|
@ -39,13 +39,14 @@ class ConstantsDict(Mapping):
|
||||||
PACKAGE_DIR: Path = PACKAGE_DIR
|
PACKAGE_DIR: Path = PACKAGE_DIR
|
||||||
DATA_DIR: Path = DATA_DIR
|
DATA_DIR: Path = DATA_DIR
|
||||||
ARCHIVE_DIR: Path = ARCHIVE_DIR
|
ARCHIVE_DIR: Path = ARCHIVE_DIR
|
||||||
COLLECTION_ID: str = get_collection_id(DATA_DIR)
|
# COLLECTION_ID: str = get_collection_id(DATA_DIR)
|
||||||
|
|
||||||
# Host system
|
# Host system
|
||||||
VERSION: str = detect_installed_version(PACKAGE_DIR)
|
VERSION: str = detect_installed_version(PACKAGE_DIR)
|
||||||
OS: str = platform.system().lower() # darwin, linux, etc.
|
OS: str = platform.system().lower() # darwin, linux, etc.
|
||||||
ARCH: str = platform.machine().lower() # arm64, x86_64, aarch64, etc.
|
ARCH: str = platform.machine().lower() # arm64, x86_64, aarch64, etc.
|
||||||
IN_DOCKER: bool = IN_DOCKER
|
IN_DOCKER: bool = IN_DOCKER
|
||||||
|
LIB_DIR_SCOPE: str = f'{ARCH}-{OS}-docker' if IN_DOCKER else f'{ARCH}-{OS}'
|
||||||
|
|
||||||
# Permissions
|
# Permissions
|
||||||
IS_ROOT: bool = IS_ROOT
|
IS_ROOT: bool = IS_ROOT
|
||||||
|
@ -95,9 +96,11 @@ class ConstantsDict(Mapping):
|
||||||
|
|
||||||
# Runtime dirs
|
# Runtime dirs
|
||||||
TMP_DIR_NAME: str = 'tmp'
|
TMP_DIR_NAME: str = 'tmp'
|
||||||
TMP_DIR: Path = get_TMP_DIR()
|
# TMP_DIR: Path = get_TMP_DIR()
|
||||||
|
TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME
|
||||||
LIB_DIR_NAME: str = 'lib'
|
LIB_DIR_NAME: str = 'lib'
|
||||||
LIB_DIR: Path = get_LIB_DIR()
|
# LIB_DIR: Path = get_LIB_DIR()
|
||||||
|
LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME / LIB_DIR_SCOPE
|
||||||
LIB_PIP_DIR: Path = LIB_DIR / 'pip'
|
LIB_PIP_DIR: Path = LIB_DIR / 'pip'
|
||||||
LIB_NPM_DIR: Path = LIB_DIR / 'npm'
|
LIB_NPM_DIR: Path = LIB_DIR / 'npm'
|
||||||
LIB_BROWSERS_DIR: Path = LIB_DIR / 'browsers'
|
LIB_BROWSERS_DIR: Path = LIB_DIR / 'browsers'
|
||||||
|
|
|
@ -5,9 +5,10 @@ import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import hashlib
|
import hashlib
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from functools import cache
|
from functools import cache
|
||||||
|
|
||||||
from platformdirs import PlatformDirs
|
from platformdirs import PlatformDirs
|
||||||
|
from rich import print
|
||||||
|
|
||||||
from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
|
from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
|
||||||
|
|
||||||
|
@ -91,7 +92,7 @@ def get_LIB_DIR():
|
||||||
lib_dir = HOST_DIRS.user_data_path
|
lib_dir = HOST_DIRS.user_data_path
|
||||||
lib_dir.mkdir(parents=True, exist_ok=True)
|
lib_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
if not dir_is_writable(lib_dir):
|
if IS_ROOT or not dir_is_writable(lib_dir, uid=ARCHIVEBOX_USER):
|
||||||
if IS_ROOT:
|
if IS_ROOT:
|
||||||
# make sure lib dir is owned by the archivebox user, not root
|
# make sure lib dir is owned by the archivebox user, not root
|
||||||
with SudoPermission(uid=0):
|
with SudoPermission(uid=0):
|
||||||
|
@ -130,7 +131,7 @@ def get_TMP_DIR():
|
||||||
run_dir = Path(os.environ['SYSTEM_TMP_DIR']).resolve() / get_collection_id(DATA_DIR=DATA_DIR)
|
run_dir = Path(os.environ['SYSTEM_TMP_DIR']).resolve() / get_collection_id(DATA_DIR=DATA_DIR)
|
||||||
with SudoPermission(uid=0, fallback=True):
|
with SudoPermission(uid=0, fallback=True):
|
||||||
run_dir.mkdir(parents=True, exist_ok=True)
|
run_dir.mkdir(parents=True, exist_ok=True)
|
||||||
if not dir_is_writable(run_dir):
|
if not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
|
||||||
if IS_ROOT:
|
if IS_ROOT:
|
||||||
with SudoPermission(uid=0, fallback=False):
|
with SudoPermission(uid=0, fallback=False):
|
||||||
if ARCHIVEBOX_USER == 0:
|
if ARCHIVEBOX_USER == 0:
|
||||||
|
@ -153,7 +154,7 @@ def get_TMP_DIR():
|
||||||
with SudoPermission(uid=0, fallback=True):
|
with SudoPermission(uid=0, fallback=True):
|
||||||
run_dir.mkdir(parents=True, exist_ok=True)
|
run_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
if not dir_is_writable(run_dir):
|
if IS_ROOT or not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
|
||||||
if IS_ROOT:
|
if IS_ROOT:
|
||||||
with SudoPermission(uid=0):
|
with SudoPermission(uid=0):
|
||||||
if ARCHIVEBOX_USER == 0:
|
if ARCHIVEBOX_USER == 0:
|
||||||
|
|
|
@ -451,6 +451,9 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
|
||||||
if os.access(html_index, os.F_OK):
|
if os.access(html_index, os.F_OK):
|
||||||
html_index.rename(f"{index_name}.html")
|
html_index.rename(f"{index_name}.html")
|
||||||
|
|
||||||
|
CONSTANTS.TMP_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
CONSTANTS.LIB_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
if install:
|
if install:
|
||||||
run_subcommand('install', pwd=out_dir)
|
run_subcommand('install', pwd=out_dir)
|
||||||
|
|
||||||
|
@ -1004,14 +1007,20 @@ def install(out_dir: Path=DATA_DIR) -> None:
|
||||||
print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'}))
|
print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'}))
|
||||||
if IS_ROOT:
|
if IS_ROOT:
|
||||||
with SudoPermission(uid=0):
|
with SudoPermission(uid=0):
|
||||||
os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"')
|
if ARCHIVEBOX_USER == 0:
|
||||||
|
os.system(f'chmod -R 777 "{CONSTANTS.LIB_DIR.resolve()}"')
|
||||||
|
else:
|
||||||
|
os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if IS_ROOT:
|
if IS_ROOT:
|
||||||
print(f'[yellow]:warning: Retrying {binary.name} installation with [red]sudo[/red]...[/yellow]')
|
print(f'[yellow]:warning: Retrying {binary.name} installation with [red]sudo[/red]...[/yellow]')
|
||||||
with SudoPermission(uid=0):
|
with SudoPermission(uid=0):
|
||||||
try:
|
try:
|
||||||
print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'}))
|
print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'}))
|
||||||
os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"')
|
if ARCHIVEBOX_USER == 0:
|
||||||
|
os.system(f'chmod -R 777 "{CONSTANTS.LIB_DIR.resolve()}"')
|
||||||
|
else:
|
||||||
|
os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f'[red]:cross_mark: Failed to install {binary.name} as root: {e}[/red]')
|
print(f'[red]:cross_mark: Failed to install {binary.name} as root: {e}[/red]')
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -109,8 +109,8 @@ def check_data_dir_permissions():
|
||||||
data_owned_by_root = data_dir_uid == 0
|
data_owned_by_root = data_dir_uid == 0
|
||||||
|
|
||||||
# data_owned_by_default_user = data_dir_uid == DEFAULT_PUID or data_dir_gid == DEFAULT_PGID
|
# data_owned_by_default_user = data_dir_uid == DEFAULT_PUID or data_dir_gid == DEFAULT_PGID
|
||||||
data_owner_doesnt_match = (data_dir_uid != ARCHIVEBOX_USER and data_dir_gid != ARCHIVEBOX_GROUP) and not IS_ROOT
|
data_owner_doesnt_match = (data_dir_uid != ARCHIVEBOX_USER and data_dir_gid != ARCHIVEBOX_GROUP) if not IS_ROOT else False
|
||||||
data_not_writable = not (os.access(DATA_DIR, os.W_OK) and os.access(CONSTANTS.LIB_DIR, os.W_OK) and os.access(CONSTANTS.TMP_DIR, os.W_OK))
|
data_not_writable = not (os.isdir(DATA_DIR) and os.access(DATA_DIR, os.W_OK)) # and os.access(CONSTANTS.LIB_DIR, os.W_OK) and os.access(CONSTANTS.TMP_DIR, os.W_OK))
|
||||||
if data_owned_by_root:
|
if data_owned_by_root:
|
||||||
STDERR.print('\n[yellow]:warning: Warning: ArchiveBox [blue]DATA_DIR[/blue] is currently owned by [red]root[/red], ArchiveBox will refuse to run![/yellow]')
|
STDERR.print('\n[yellow]:warning: Warning: ArchiveBox [blue]DATA_DIR[/blue] is currently owned by [red]root[/red], ArchiveBox will refuse to run![/yellow]')
|
||||||
elif data_owner_doesnt_match or data_not_writable:
|
elif data_owner_doesnt_match or data_not_writable:
|
||||||
|
|
|
@ -1,14 +1,33 @@
|
||||||
|
import tempfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from archivebox.config import CONSTANTS
|
from archivebox.config import CONSTANTS
|
||||||
|
from archivebox.config.paths import get_collection_id
|
||||||
|
|
||||||
DATA_DIR = CONSTANTS.DATA_DIR
|
DATA_DIR = CONSTANTS.DATA_DIR
|
||||||
LOGS_DIR = CONSTANTS.LOGS_DIR
|
LOGS_DIR = CONSTANTS.LOGS_DIR
|
||||||
TMP_DIR = CONSTANTS.TMP_DIR
|
TMP_DIR = CONSTANTS.TMP_DIR
|
||||||
|
|
||||||
Path.mkdir(TMP_DIR, exist_ok=True)
|
|
||||||
SUPERVISORD_CONFIG_FILE = TMP_DIR / "supervisord.conf"
|
SUPERVISORD_CONFIG_FILE = TMP_DIR / "supervisord.conf"
|
||||||
PID_FILE = TMP_DIR / "supervisord.pid"
|
PID_FILE = TMP_DIR / "supervisord.pid"
|
||||||
SOCK_FILE = TMP_DIR / "supervisord.sock"
|
SOCK_FILE = TMP_DIR / "supervisord.sock"
|
||||||
LOG_FILE = TMP_DIR / "supervisord.log"
|
LOG_FILE = TMP_DIR / "supervisord.log"
|
||||||
WORKERS_DIR = TMP_DIR / "workers"
|
WORKERS_DIR = TMP_DIR / "workers"
|
||||||
|
|
||||||
|
|
||||||
|
def get_sock_file():
|
||||||
|
TMP_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
if len(str(SOCK_FILE)) > 100:
|
||||||
|
# socket absolute paths cannot be longer than 108 characters on some systems
|
||||||
|
# symlink it to a shorter path and use that instead
|
||||||
|
|
||||||
|
# use tmpfile to atomically overwrite any existing symlink
|
||||||
|
symlink = Path(tempfile.gettempdir()) / f"archivebox_supervisord_{get_collection_id()}.sock.tmp"
|
||||||
|
symlink.unlink(missing_ok=True)
|
||||||
|
symlink.symlink_to(SOCK_FILE)
|
||||||
|
symlink.rename(str(symlink).replace('.sock.tmp', '.sock'))
|
||||||
|
assert len(str(symlink)) <= 100, f'Failed to create supervisord SOCK_FILE, system tmp dir location is too long {symlink} (unix only allows 108 characters for socket paths)'
|
||||||
|
return symlink
|
||||||
|
|
||||||
|
return SOCK_FILE
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
__package__ = 'archivebox.queues'
|
__package__ = 'archivebox.queues'
|
||||||
|
|
||||||
import os
|
|
||||||
import time
|
import time
|
||||||
import signal
|
import signal
|
||||||
import psutil
|
import psutil
|
||||||
|
@ -15,7 +14,7 @@ from xmlrpc.client import ServerProxy
|
||||||
|
|
||||||
from archivebox.config.permissions import ARCHIVEBOX_USER
|
from archivebox.config.permissions import ARCHIVEBOX_USER
|
||||||
|
|
||||||
from .settings import SUPERVISORD_CONFIG_FILE, DATA_DIR, PID_FILE, SOCK_FILE, LOG_FILE, WORKERS_DIR, TMP_DIR, LOGS_DIR
|
from .settings import SUPERVISORD_CONFIG_FILE, DATA_DIR, PID_FILE, get_sock_file, LOG_FILE, WORKERS_DIR, TMP_DIR, LOGS_DIR
|
||||||
|
|
||||||
from typing import Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
|
@ -48,11 +47,11 @@ nocleanup = true
|
||||||
user = {ARCHIVEBOX_USER}
|
user = {ARCHIVEBOX_USER}
|
||||||
|
|
||||||
[unix_http_server]
|
[unix_http_server]
|
||||||
file = {TMP_DIR}/{SOCK_FILE.name}
|
file = {get_sock_file()}
|
||||||
chmod = 0700
|
chmod = 0700
|
||||||
|
|
||||||
[supervisorctl]
|
[supervisorctl]
|
||||||
serverurl = unix://{TMP_DIR}/{SOCK_FILE.name}
|
serverurl = unix://{get_sock_file()}
|
||||||
|
|
||||||
[rpcinterface:supervisor]
|
[rpcinterface:supervisor]
|
||||||
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
|
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
|
||||||
|
@ -81,12 +80,12 @@ def create_worker_config(daemon):
|
||||||
|
|
||||||
def get_existing_supervisord_process():
|
def get_existing_supervisord_process():
|
||||||
try:
|
try:
|
||||||
transport = SupervisorTransport(None, None, f"unix://{SOCK_FILE}")
|
transport = SupervisorTransport(None, None, f"unix://{get_sock_file()}")
|
||||||
server = ServerProxy("http://localhost", transport=transport)
|
server = ServerProxy("http://localhost", transport=transport)
|
||||||
current_state = cast(Dict[str, int | str], server.supervisor.getState())
|
current_state = cast(Dict[str, int | str], server.supervisor.getState())
|
||||||
if current_state["statename"] == "RUNNING":
|
if current_state["statename"] == "RUNNING":
|
||||||
pid = server.supervisor.getPID()
|
pid = server.supervisor.getPID()
|
||||||
print(f"[🦸♂️] Supervisord connected (pid={pid}) via unix://{str(SOCK_FILE).replace(str(TMP_DIR), 'tmp')}.")
|
print(f"[🦸♂️] Supervisord connected (pid={pid}) via unix://{str(get_sock_file()).replace(str(TMP_DIR), 'tmp')}.")
|
||||||
return server.supervisor
|
return server.supervisor
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
return None
|
return None
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue