From 613caec8ebcb5ecb1ca65fef87ef343d090c5ec0 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Wed, 9 Oct 2024 00:41:16 -0700 Subject: [PATCH] improve install flow with sudo, check package managers, and fix docker build --- archivebox/config/constants.py | 61 ++++++++--------- archivebox/config/paths.py | 21 +++++- archivebox/main.py | 65 +++++++++++-------- archivebox/misc/logging.py | 2 +- .../plugins_extractor/singlefile/apps.py | 18 +++-- archivebox/plugins_pkg/npm/apps.py | 26 +++++--- archivebox/plugins_pkg/pip/apps.py | 45 +++++++++---- archivebox/vendor/pydantic-pkgr | 2 +- bin/docker_entrypoint.sh | 14 ++-- pyproject.toml | 2 +- 10 files changed, 161 insertions(+), 95 deletions(-) diff --git a/archivebox/config/constants.py b/archivebox/config/constants.py index a8a8518f..e8ea9958 100644 --- a/archivebox/config/constants.py +++ b/archivebox/config/constants.py @@ -2,7 +2,7 @@ __package__ = 'archivebox.config' import os import re -import platform +import sys from typing import Dict from pathlib import Path @@ -56,6 +56,7 @@ class ConstantsDict(Mapping): RUNNING_AS_GID: int = RUNNING_AS_GID DEFAULT_PUID: int = DEFAULT_PUID DEFAULT_PGID: int = DEFAULT_PGID + IS_INSIDE_VENV: bool = sys.prefix != sys.base_prefix # Source code dirs PACKAGE_DIR_NAME: str = PACKAGE_DIR.name @@ -209,15 +210,20 @@ class ConstantsDict(Mapping): 'enabled': True, 'is_valid': os.access(STATIC_DIR, os.R_OK) and os.access(STATIC_DIR, os.X_OK), # read + list }, + 'CUSTOM_TEMPLATES_DIR': { + 'path': CUSTOM_TEMPLATES_DIR.resolve(), + 'enabled': os.path.isdir(CUSTOM_TEMPLATES_DIR), + 'is_valid': os.path.isdir(CUSTOM_TEMPLATES_DIR) and os.access(CUSTOM_TEMPLATES_DIR, os.R_OK), # read + }, + 'USER_PLUGINS_DIR': { + 'path': USER_PLUGINS_DIR.resolve(), + 'enabled': os.path.isdir(USER_PLUGINS_DIR), + 'is_valid': os.path.isdir(USER_PLUGINS_DIR) and os.access(USER_PLUGINS_DIR, os.R_OK), # read + }, 'LIB_DIR': { 'path': LIB_DIR.resolve(), 'enabled': True, - 'is_valid': os.access(LIB_DIR, os.R_OK) and os.access(LIB_DIR, os.X_OK) and os.access(LIB_DIR, os.W_OK), # read + write - }, - 'TMP_DIR': { - 'path': TMP_DIR.resolve(), - 'enabled': True, - 'is_valid': os.access(TMP_DIR, os.R_OK) and os.access(TMP_DIR, os.X_OK) and os.access(TMP_DIR, os.W_OK), # read + write + 'is_valid': os.path.isdir(LIB_DIR) and os.access(LIB_DIR, os.R_OK) and os.access(LIB_DIR, os.W_OK), # read + write }, }) @@ -225,62 +231,57 @@ class ConstantsDict(Mapping): "DATA_DIR": { "path": DATA_DIR.resolve(), "enabled": True, - "is_valid": os.access(DATA_DIR, os.R_OK) and os.access(DATA_DIR, os.W_OK) and os.access(DATA_DIR, os.X_OK), + "is_valid": os.path.isdir(DATA_DIR) and os.access(DATA_DIR, os.R_OK) and os.access(DATA_DIR, os.W_OK), "is_mount": os.path.ismount(DATA_DIR.resolve()), }, "CONFIG_FILE": { "path": CONFIG_FILE.resolve(), "enabled": True, - "is_valid": os.access(CONFIG_FILE, os.R_OK) and os.access(CONFIG_FILE, os.W_OK), + "is_valid": os.path.isfile(CONFIG_FILE) and os.access(CONFIG_FILE, os.R_OK) and os.access(CONFIG_FILE, os.W_OK), }, "SQL_INDEX": { "path": DATABASE_FILE.resolve(), "enabled": True, - "is_valid": os.access(DATABASE_FILE, os.R_OK) and os.access(DATABASE_FILE, os.W_OK), + "is_valid": os.path.isfile(DATABASE_FILE) and os.access(DATABASE_FILE, os.R_OK) and os.access(DATABASE_FILE, os.W_OK), "is_mount": os.path.ismount(DATABASE_FILE.resolve()), }, "QUEUE_DATABASE": { "path": QUEUE_DATABASE_FILE.resolve(), "enabled": True, - "is_valid": os.access(QUEUE_DATABASE_FILE, os.R_OK) and os.access(QUEUE_DATABASE_FILE, os.W_OK), + "is_valid": os.path.isfile(QUEUE_DATABASE_FILE) and os.access(QUEUE_DATABASE_FILE, os.R_OK) and os.access(QUEUE_DATABASE_FILE, os.W_OK), "is_mount": os.path.ismount(QUEUE_DATABASE_FILE.resolve()), }, "ARCHIVE_DIR": { "path": ARCHIVE_DIR.resolve(), "enabled": True, - "is_valid": os.access(ARCHIVE_DIR, os.R_OK) and os.access(ARCHIVE_DIR, os.W_OK) and os.access(ARCHIVE_DIR, os.X_OK), + "is_valid": os.path.isdir(ARCHIVE_DIR) and os.access(ARCHIVE_DIR, os.R_OK) and os.access(ARCHIVE_DIR, os.W_OK), "is_mount": os.path.ismount(ARCHIVE_DIR.resolve()), }, "SOURCES_DIR": { "path": SOURCES_DIR.resolve(), "enabled": True, - "is_valid": os.access(SOURCES_DIR, os.R_OK) and os.access(SOURCES_DIR, os.W_OK) and os.access(SOURCES_DIR, os.X_OK), + "is_valid": os.path.isdir(SOURCES_DIR) and os.access(SOURCES_DIR, os.R_OK) and os.access(SOURCES_DIR, os.W_OK), + }, + "PERSONAS_DIR": { + "path": PERSONAS_DIR.resolve(), + "enabled": os.path.isdir(PERSONAS_DIR), + "is_valid": os.path.isdir(PERSONAS_DIR) and os.access(PERSONAS_DIR, os.R_OK) and os.access(PERSONAS_DIR, os.W_OK), # read + write }, "LOGS_DIR": { "path": LOGS_DIR.resolve(), "enabled": True, - "is_valid": os.access(LOGS_DIR, os.R_OK) and os.access(LOGS_DIR, os.W_OK) and os.access(LOGS_DIR, os.X_OK), # read + write + "is_valid": os.path.isdir(LOGS_DIR) and os.access(LOGS_DIR, os.R_OK) and os.access(LOGS_DIR, os.W_OK), # read + write + }, + 'TMP_DIR': { + 'path': TMP_DIR.resolve(), + 'enabled': True, + 'is_valid': os.path.isdir(TMP_DIR) and os.access(TMP_DIR, os.R_OK) and os.access(TMP_DIR, os.W_OK), # read + write }, # "CACHE_DIR": { # "path": CACHE_DIR.resolve(), # "enabled": True, - # "is_valid": os.access(CACHE_DIR, os.R_OK) and os.access(CACHE_DIR, os.W_OK) and os.access(CACHE_DIR, os.X_OK), # read + write + # "is_valid": os.access(CACHE_DIR, os.R_OK) and os.access(CACHE_DIR, os.W_OK), # read + write # }, - "PERSONAS_DIR": { - "path": PERSONAS_DIR.resolve(), - "enabled": os.access(PERSONAS_DIR, os.R_OK), - "is_valid": os.access(PERSONAS_DIR, os.R_OK) and os.access(PERSONAS_DIR, os.W_OK) and os.access(PERSONAS_DIR, os.X_OK), # read + write - }, - 'CUSTOM_TEMPLATES_DIR': { - 'path': CUSTOM_TEMPLATES_DIR.resolve(), - 'enabled': os.access(CUSTOM_TEMPLATES_DIR, os.R_OK), - 'is_valid': os.access(CUSTOM_TEMPLATES_DIR, os.R_OK) and os.access(CUSTOM_TEMPLATES_DIR, os.X_OK), # read - }, - 'USER_PLUGINS_DIR': { - 'path': USER_PLUGINS_DIR.resolve(), - 'enabled': os.access(USER_PLUGINS_DIR, os.R_OK), - 'is_valid': os.access(USER_PLUGINS_DIR, os.R_OK) and os.access(USER_PLUGINS_DIR, os.X_OK), # read - }, }) @classmethod diff --git a/archivebox/config/paths.py b/archivebox/config/paths.py index c66feea0..c3b76e88 100644 --- a/archivebox/config/paths.py +++ b/archivebox/config/paths.py @@ -5,8 +5,9 @@ import hashlib import platform from pathlib import Path from functools import cache +from datetime import datetime -from .permissions import SudoPermission +from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER ############################################################################################# @@ -30,14 +31,28 @@ def get_collection_id(DATA_DIR=DATA_DIR) -> str: except (OSError, FileNotFoundError, PermissionError): pass - hash_key = str(DATA_DIR.resolve()).encode() - collection_id = hashlib.sha256(hash_key).hexdigest()[:8] + # hash the machine_id + collection dir path + creation time to get a unique collection_id + machine_id = get_machine_id() + collection_path = DATA_DIR.resolve() + try: + creation_date = DATA_DIR.stat().st_ctime + except Exception: + creation_date = datetime.now().isoformat() + collection_id = hashlib.sha256(f'{machine_id}:{collection_path}@{creation_date}'.encode()).hexdigest()[:8] try: # only persist collection_id file if we already have an index.sqlite3 file present # otherwise we might be running in a directory that is not a collection, no point creating cruft files if os.path.isfile(DATABASE_FILE) and os.access(DATA_DIR, os.W_OK): collection_id_file.write_text(collection_id) + + # if we're running as root right now, make sure the collection_id file is owned by the archivebox user + if IS_ROOT: + with SudoPermission(uid=0): + if ARCHIVEBOX_USER == 0: + os.system(f'chmod 777 "{collection_id_file}"') + else: + os.system(f'chown {ARCHIVEBOX_USER} "{collection_id_file}"') except (OSError, FileNotFoundError, PermissionError): pass return collection_id diff --git a/archivebox/main.py b/archivebox/main.py index 1a059a7c..eb68653e 100755 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -195,6 +195,8 @@ def version(quiet: bool=False, from archivebox.config.version import get_COMMIT_HASH, get_BUILD_TIME from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, RUNNING_AS_UID, RUNNING_AS_GID + from abx.archivebox.base_binary import BaseBinary, apt, brew, env + # 0.7.1 # ArchiveBox v0.7.1+editable COMMIT_HASH=951bba5 BUILD_TIME=2023-12-17 16:46:05 1702860365 # IN_DOCKER=False IN_QEMU=False ARCH=arm64 OS=Darwin PLATFORM=macOS-14.2-arm64-arm-64bit PYTHON=Cpython @@ -214,7 +216,7 @@ def version(quiet: bool=False, f'ARCH={p.machine}', f'OS={p.system}', f'PLATFORM={platform.platform()}', - f'PYTHON={sys.implementation.name.title()}', + f'PYTHON={sys.implementation.name.title()}' + (' (venv)' if CONSTANTS.IS_INSIDE_VENV else ''), ) OUTPUT_IS_REMOTE_FS = CONSTANTS.DATA_LOCATIONS.DATA_DIR.is_mount or CONSTANTS.DATA_LOCATIONS.ARCHIVE_DIR.is_mount DATA_DIR_STAT = CONSTANTS.DATA_DIR.stat() @@ -228,14 +230,15 @@ def version(quiet: bool=False, prnt( f'DEBUG={SHELL_CONFIG.DEBUG}', f'IS_TTY={SHELL_CONFIG.IS_TTY}', - f'TZ={CONSTANTS.TIMEZONE}', + f'SUDO={CONSTANTS.IS_ROOT}', + f'ID={CONSTANTS.MACHINE_ID}:{CONSTANTS.COLLECTION_ID}', f'SEARCH_BACKEND={SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}', f'LDAP={LDAP_CONFIG.LDAP_ENABLED}', #f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually ) prnt() - prnt('[pale_green1][i] Dependency versions:[/pale_green1]') + prnt('[pale_green1][i] Binary Dependencies:[/pale_green1]') failures = [] for name, binary in reversed(list(settings.BINARIES.items())): if binary.name == 'archivebox': @@ -247,7 +250,7 @@ def version(quiet: bool=False, except Exception as e: err = e loaded_bin = binary - provider_summary = f'[dark_sea_green3]{loaded_bin.binprovider.name.ljust(10)}[/dark_sea_green3]' if loaded_bin.binprovider else '[grey23]not found[/grey23]' + provider_summary = f'[dark_sea_green3]{loaded_bin.binprovider.name.ljust(10)}[/dark_sea_green3]' if loaded_bin.binprovider else '[grey23]not found[/grey23] ' if loaded_bin.abspath: abspath = str(loaded_bin.abspath).replace(str(DATA_DIR), '[light_slate_blue].[/light_slate_blue]').replace(str(Path('~').expanduser()), '~') if ' ' in abspath: @@ -257,6 +260,25 @@ def version(quiet: bool=False, prnt('', '[green]√[/green]' if loaded_bin.is_valid else '[red]X[/red]', '', loaded_bin.name.ljust(21), str(loaded_bin.version).ljust(12), provider_summary, abspath, overflow='ignore', crop=False) if not loaded_bin.is_valid: failures.append(loaded_bin.name) + + prnt() + prnt('[gold3][i] Package Managers:[/gold3]') + for name, binprovider in reversed(list(settings.BINPROVIDERS.items())): + err = None + + # TODO: implement a BinProvider.BINARY() method that gets the loaded binary for a binprovider's INSTALLER_BIN + loaded_bin = binprovider.INSTALLER_BINARY or BaseBinary(name=binprovider.INSTALLER_BIN, binproviders=[env, apt, brew]) + + abspath = None + if loaded_bin.abspath: + abspath = str(loaded_bin.abspath).replace(str(DATA_DIR), '.').replace(str(Path('~').expanduser()), '~') + if ' ' in abspath: + abspath = abspath.replace(' ', r'\ ') + + PATH = str(binprovider.PATH).replace(str(DATA_DIR), '[light_slate_blue].[/light_slate_blue]').replace(str(Path('~').expanduser()), '~') + ownership_summary = f'UID=[blue]{str(binprovider.euid).ljust(4)}[/blue]' + provider_summary = f'[dark_sea_green3]{str(abspath).ljust(52)}[/dark_sea_green3]' if abspath else f'[grey23]{"not available".ljust(52)}[/grey23]' + prnt('', '[green]√[/green]' if binprovider.is_valid else '[red]X[/red]', '', binprovider.name.ljust(11), provider_summary, ownership_summary, f'PATH={PATH}' if abspath else '', overflow='ellipsis', soft_wrap=True) prnt() prnt('[deep_sky_blue3][i] Source-code locations:[/deep_sky_blue3]') @@ -278,11 +300,9 @@ def version(quiet: bool=False, prnt() - if failures: raise SystemExit(1) - else: - raise SystemExit(0) + raise SystemExit(0) @enforce_types def run(subcommand: str, @@ -451,6 +471,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat if os.access(html_index, os.F_OK): html_index.rename(f"{index_name}.html") + CONSTANTS.PERSONAS_DIR.mkdir(parents=True, exist_ok=True) CONSTANTS.TMP_DIR.mkdir(parents=True, exist_ok=True) CONSTANTS.LIB_DIR.mkdir(parents=True, exist_ok=True) @@ -985,7 +1006,7 @@ def install(out_dir: Path=DATA_DIR) -> None: from django.conf import settings from archivebox import CONSTANTS - from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP + from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, USER if not (os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()): run_subcommand('init', stdin=None, pwd=out_dir) # must init full index because we need a db to store InstalledBinary entries in @@ -994,15 +1015,17 @@ def install(out_dir: Path=DATA_DIR) -> None: # we never want the data dir to be owned by root, detect owner of existing owner of DATA_DIR to try and guess desired non-root UID if IS_ROOT: + EUID = os.geteuid() + # if we have sudo/root permissions, take advantage of them just while installing dependencies print() - print('[yellow]:warning: Using [red]root[/red] privileges only to install dependencies that need it, all other operations should be done as a [blue]non-root[/blue] user.[/yellow]') + print(f'[yellow]:warning: Running as [blue]{USER}[/blue] ({EUID}) with [red]sudo[/red] only for dependencies that need it.[/yellow]') print(f' DATA_DIR, LIB_DIR, and TMP_DIR will be owned by [blue]{ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/blue].') print() - package_manager_names = ', '.join(binprovider.name for binprovider in reversed(list(settings.BINPROVIDERS.values()))) - print(f'[+] Setting up package managers [yellow]{package_manager_names}[/yellow]...') + package_manager_names = ', '.join(f'[yellow]{binprovider.name}[/yellow]' for binprovider in reversed(list(settings.BINPROVIDERS.values()))) + print(f'[+] Setting up package managers {package_manager_names}...') for binprovider in reversed(list(settings.BINPROVIDERS.values())): try: binprovider.setup() @@ -1016,9 +1039,11 @@ def install(out_dir: Path=DATA_DIR) -> None: for binary in reversed(list(settings.BINARIES.values())): providers = ' [grey53]or[/grey53] '.join(provider.name for provider in binary.binproviders_supported) - print(f'[+] Locating / Installing [yellow]{binary.name}[/yellow] using [red]{providers}[/red]...') + print(f'[+] Detecting / Installing [yellow]{binary.name.ljust(22)}[/yellow] using [red]{providers}[/red]...') try: - print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'})) + with SudoPermission(uid=0, fallback=True): + # print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'})) + binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'}) if IS_ROOT: with SudoPermission(uid=0): if ARCHIVEBOX_USER == 0: @@ -1026,19 +1051,7 @@ def install(out_dir: Path=DATA_DIR) -> None: else: os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"') except Exception as e: - if IS_ROOT: - print(f'[yellow]:warning: Retrying {binary.name} installation with [red]sudo[/red]...[/yellow]') - with SudoPermission(uid=0): - try: - print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'})) - if ARCHIVEBOX_USER == 0: - os.system(f'chmod -R 777 "{CONSTANTS.LIB_DIR.resolve()}"') - else: - os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"') - except Exception as e: - print(f'[red]:cross_mark: Failed to install {binary.name} as root: {e}[/red]') - else: - print(f'[red]:cross_mark: Failed to install {binary.name} as user {ARCHIVEBOX_USER}: {e}[/red]') + print(f'[red]:cross_mark: Failed to install {binary.name} as user {ARCHIVEBOX_USER}: {e}[/red]') from django.contrib.auth import get_user_model diff --git a/archivebox/misc/logging.py b/archivebox/misc/logging.py index 86983176..daa30029 100644 --- a/archivebox/misc/logging.py +++ b/archivebox/misc/logging.py @@ -2,6 +2,7 @@ __package__ = 'archivebox.misc' # TODO: merge/dedupe this file with archivebox/logging_util.py + import sys from typing import Optional, Union, Tuple, List from collections import defaultdict @@ -16,7 +17,6 @@ CONSOLE = Console() STDERR = Console(stderr=True) IS_TTY = CONSOLE.is_interactive - class RainbowHighlighter(Highlighter): def highlight(self, text): for index in range(len(text)): diff --git a/archivebox/plugins_extractor/singlefile/apps.py b/archivebox/plugins_extractor/singlefile/apps.py index 8ebbc41c..c0e91116 100644 --- a/archivebox/plugins_extractor/singlefile/apps.py +++ b/archivebox/plugins_extractor/singlefile/apps.py @@ -46,24 +46,28 @@ class SinglefileBinary(BaseBinary): binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env] provider_overrides: Dict[BinProviderName, ProviderLookupDict] = { - env.name: { - 'abspath': lambda: - bin_abspath(SINGLEFILE_CONFIG.SINGLEFILE_BINARY, PATH=env.PATH) - or bin_abspath('single-file', PATH=env.PATH) - or bin_abspath('single-file-node.js', PATH=env.PATH), - }, LIB_NPM_BINPROVIDER.name: { "abspath": lambda: - bin_abspath(SINGLEFILE_CONFIG.SINGLEFILE_BINARY, PATH=env.PATH) + bin_abspath(SINGLEFILE_CONFIG.SINGLEFILE_BINARY, PATH=LIB_NPM_BINPROVIDER.PATH) or bin_abspath("single-file", PATH=LIB_NPM_BINPROVIDER.PATH) or bin_abspath("single-file-node.js", PATH=LIB_NPM_BINPROVIDER.PATH), "packages": lambda: [f"single-file-cli@>={SINGLEFILE_MIN_VERSION} <{SINGLEFILE_MAX_VERSION}"], }, SYS_NPM_BINPROVIDER.name: { + "abspath": lambda: + bin_abspath(SINGLEFILE_CONFIG.SINGLEFILE_BINARY, PATH=SYS_NPM_BINPROVIDER.PATH) + or bin_abspath("single-file", PATH=SYS_NPM_BINPROVIDER.PATH) + or bin_abspath("single-file-node.js", PATH=SYS_NPM_BINPROVIDER.PATH), "packages": lambda: [], # prevent modifying system global npm packages }, + env.name: { + 'abspath': lambda: + bin_abspath(SINGLEFILE_CONFIG.SINGLEFILE_BINARY, PATH=env.PATH) + or bin_abspath('single-file', PATH=env.PATH) + or bin_abspath('single-file-node.js', PATH=env.PATH), + }, } def install(self, binprovider_name: Optional[BinProviderName]=None, **kwargs) -> ShallowBinary: diff --git a/archivebox/plugins_pkg/npm/apps.py b/archivebox/plugins_pkg/npm/apps.py index 2f8a4a73..4a0b555e 100644 --- a/archivebox/plugins_pkg/npm/apps.py +++ b/archivebox/plugins_pkg/npm/apps.py @@ -42,7 +42,7 @@ class SystemNpmProvider(NpmProvider, BaseBinProvider): class LibNpmProvider(NpmProvider, BaseBinProvider): name: BinProviderName = "lib_npm" - PATH: PATHStr = str(OLD_NODE_BIN_PATH) + PATH: PATHStr = f'{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}' npm_prefix: Optional[Path] = CONSTANTS.LIB_NPM_DIR @@ -56,13 +56,6 @@ SYS_NPM_BINPROVIDER = SystemNpmProvider() LIB_NPM_BINPROVIDER = LibNpmProvider() npm = LIB_NPM_BINPROVIDER -class NpmBinary(BaseBinary): - name: BinName = 'npm' - binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] - - -NPM_BINARY = NpmBinary() - class NodeBinary(BaseBinary): name: BinName = 'node' binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] @@ -71,6 +64,22 @@ class NodeBinary(BaseBinary): NODE_BINARY = NodeBinary() +class NpmBinary(BaseBinary): + name: BinName = 'npm' + binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] + +NPM_BINARY = NpmBinary() + + +class NpxBinary(BaseBinary): + name: BinName = 'npx' + binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] + +NPX_BINARY = NpxBinary() + + + + class NpmPlugin(BasePlugin): app_label: str = 'npm' @@ -82,6 +91,7 @@ class NpmPlugin(BasePlugin): LIB_NPM_BINPROVIDER, NODE_BINARY, NPM_BINARY, + NPX_BINARY, ] diff --git a/archivebox/plugins_pkg/pip/apps.py b/archivebox/plugins_pkg/pip/apps.py index 813317dc..a61de233 100644 --- a/archivebox/plugins_pkg/pip/apps.py +++ b/archivebox/plugins_pkg/pip/apps.py @@ -2,13 +2,13 @@ __package__ = 'archivebox.plugins_pkg.pip' import os import sys -import inspect from pathlib import Path from typing import List, Dict, Optional from pydantic import InstanceOf, Field, model_validator, validate_call import django +import django.db.backends.sqlite3.base from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type] from django.core.checks import Error, Tags from pydantic_pkgr import BinProvider, PipProvider, BinName, BinProviderName, ProviderLookupDict, SemVer, bin_abspath @@ -54,16 +54,18 @@ class SystemPipxBinProvider(PipProvider, BaseBinProvider): pip_venv: Optional[Path] = None # global pipx scope +IS_INSIDE_VENV = sys.prefix != sys.base_prefix + class VenvPipBinProvider(PipProvider, BaseBinProvider): name: BinProviderName = "venv_pip" INSTALLER_BIN: BinName = "pip" - pip_venv: Optional[Path] = Path(os.environ.get("VIRTUAL_ENV", None) or '/tmp/NotInsideAVenv') + pip_venv: Optional[Path] = Path(sys.prefix if IS_INSIDE_VENV else os.environ.get("VIRTUAL_ENV", '/tmp/NotInsideAVenv/lib')) def setup(self): """never attempt to create a venv here, this is just used to detect if we are inside an existing one""" return None - + class LibPipBinProvider(PipProvider, BaseBinProvider): name: BinProviderName = "lib_pip" @@ -78,6 +80,9 @@ LIB_PIP_BINPROVIDER = LibPipBinProvider() pip = LIB_PIP_BINPROVIDER # ensure python libraries are importable from these locations (if archivebox wasnt executed from one of these then they wont already be in sys.path) +assert VENV_PIP_BINPROVIDER.pip_venv is not None +assert LIB_PIP_BINPROVIDER.pip_venv is not None + site_packages_dir = 'lib/python{}.{}/site-packages'.format(*sys.version_info[:2]) if os.environ.get("VIRTUAL_ENV", None): sys.path.append(str(VENV_PIP_BINPROVIDER.pip_venv / site_packages_dir)) @@ -127,17 +132,22 @@ class PythonBinary(BaseBinary): PYTHON_BINARY = PythonBinary() + +LOADED_SQLITE_PATH = Path(django.db.backends.sqlite3.base.__file__) +LOADED_SQLITE_VERSION = SemVer(django_sqlite3.version) +LOADED_SQLITE_FROM_VENV = str(LOADED_SQLITE_PATH.absolute().resolve()).startswith(str(VENV_PIP_BINPROVIDER.pip_venv.absolute().resolve())) + class SqliteBinary(BaseBinary): name: BinName = 'sqlite' binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER]) provider_overrides: Dict[BinProviderName, ProviderLookupDict] = { VENV_PIP_BINPROVIDER.name: { - "abspath": lambda: Path(inspect.getfile(django_sqlite3)), - "version": lambda: SemVer(django_sqlite3.version), + "abspath": lambda: LOADED_SQLITE_PATH if LOADED_SQLITE_FROM_VENV else None, + "version": lambda: LOADED_SQLITE_VERSION if LOADED_SQLITE_FROM_VENV else None, }, SYS_PIP_BINPROVIDER.name: { - "abspath": lambda: Path(inspect.getfile(django_sqlite3)), - "version": lambda: SemVer(django_sqlite3.version), + "abspath": lambda: LOADED_SQLITE_PATH if not LOADED_SQLITE_FROM_VENV else None, + "version": lambda: LOADED_SQLITE_VERSION if not LOADED_SQLITE_FROM_VENV else None, }, } @@ -166,18 +176,22 @@ class SqliteBinary(BaseBinary): SQLITE_BINARY = SqliteBinary() +LOADED_DJANGO_PATH = Path(django.__file__) +LOADED_DJANGO_VERSION = SemVer(django.VERSION[:3]) +LOADED_DJANGO_FROM_VENV = str(LOADED_DJANGO_PATH.absolute().resolve()).startswith(str(VENV_PIP_BINPROVIDER.pip_venv.absolute().resolve())) + class DjangoBinary(BaseBinary): name: BinName = 'django' binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER]) provider_overrides: Dict[BinProviderName, ProviderLookupDict] = { VENV_PIP_BINPROVIDER.name: { - "abspath": lambda: inspect.getfile(django), - "version": lambda: django.VERSION[:3], + "abspath": lambda: LOADED_DJANGO_PATH if LOADED_DJANGO_FROM_VENV else None, + "version": lambda: LOADED_DJANGO_VERSION if LOADED_DJANGO_FROM_VENV else None, }, SYS_PIP_BINPROVIDER.name: { - "abspath": lambda: inspect.getfile(django), - "version": lambda: django.VERSION[:3], + "abspath": lambda: LOADED_DJANGO_PATH if not LOADED_DJANGO_FROM_VENV else None, + "version": lambda: LOADED_DJANGO_VERSION if not LOADED_DJANGO_FROM_VENV else None, }, } @@ -206,6 +220,13 @@ class PipBinary(BaseBinary): PIP_BINARY = PipBinary() +class PipxBinary(BaseBinary): + name: BinName = "pipx" + binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env] + +PIPX_BINARY = PipxBinary() + + class CheckUserIsNotRoot(BaseCheck): label: str = 'CheckUserIsNotRoot' tag: str = Tags.database @@ -262,6 +283,7 @@ class PipPlugin(BasePlugin): VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, PIP_BINARY, + PIPX_BINARY, ARCHIVEBOX_BINARY, PYTHON_BINARY, SQLITE_BINARY, @@ -270,6 +292,7 @@ class PipPlugin(BasePlugin): PIP_ENVIRONMENT_CHECK, ] + PLUGIN = PipPlugin() # PLUGIN.register(settings) DJANGO_APP = PLUGIN.AppConfig diff --git a/archivebox/vendor/pydantic-pkgr b/archivebox/vendor/pydantic-pkgr index fa474024..830b3738 160000 --- a/archivebox/vendor/pydantic-pkgr +++ b/archivebox/vendor/pydantic-pkgr @@ -1 +1 @@ -Subproject commit fa47402471ccb1f2e5ed33806e3fd3e2dee590c8 +Subproject commit 830b3738f49109a05c8068df12f1e2167901953f diff --git a/bin/docker_entrypoint.sh b/bin/docker_entrypoint.sh index fb89f82f..f85b5043 100755 --- a/bin/docker_entrypoint.sh +++ b/bin/docker_entrypoint.sh @@ -110,11 +110,11 @@ if [[ -d "$PLAYWRIGHT_BROWSERS_PATH/.links" ]]; then chown -h $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/.links/* fi -# also chown tmp dir and lib dir -mkdir -p "$SYSTEM_TMP_DIR" -chown $PUID:$PGID "$SYSTEM_TMP_DIR" -mkdir -p "$SYSTEM_LIB_DIR" -chown $PUID:$PGID "$SYSTEM_LIB_DIR" "$SYSTEM_LIB_DIR"/* +# also create and chown tmp dir and lib dirs +mkdir -p "$DATA_DIR"/lib/bin +chown $PUID:$PGID "$DATA_DIR"/lib "$DATA_DIR"/lib/* +mkdir -p "$DATA_DIR"/tmp/workers +chown $PUID:$PGID "$DATA_DIR"/tmp "$DATA_DIR"/tmp/* # (this check is written in blood in 2023, QEMU silently breaks things in ways that are not obvious) export IN_QEMU="$(pmap 1 | grep qemu >/dev/null && echo 'True' || echo 'False')" @@ -177,7 +177,7 @@ else fi # symlink etc crontabs into place -mkdir -p "$DATA_DIR/crontabs" +mkdir -p "$DATA_DIR"/crontabs if ! test -L /var/spool/cron/crontabs; then # move files from old location into new data dir location for existing_file in /var/spool/cron/crontabs/*; do @@ -187,7 +187,7 @@ if ! test -L /var/spool/cron/crontabs; then rm -Rf /var/spool/cron/crontabs ln -sf "$DATA_DIR/crontabs" /var/spool/cron/crontabs fi -chown -R $PUID "$DATA_DIR/crontabs" +chown -R $PUID "$DATA_DIR"/crontabs # set DBUS_SYSTEM_BUS_ADDRESS & DBUS_SESSION_BUS_ADDRESS # (dbus is not actually needed, it makes chrome log fewer warnings but isn't worth making our docker images bigger) diff --git a/pyproject.toml b/pyproject.toml index ebf81aea..d2502004 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,7 +78,7 @@ dependencies = [ "django-taggit==1.3.0", "base32-crockford==0.3.0", # "pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7", - "pydantic-pkgr>=0.4.13", + "pydantic-pkgr>=0.4.16", ############# Plugin Dependencies ################ "sonic-client>=1.0.0", "yt-dlp>=2024.8.6", # for: media"