remove platformdirs dependency

This commit is contained in:
Nick Sweeting 2024-10-08 19:17:18 -07:00
parent 0b4cbb6415
commit 9f274cf9f4
No known key found for this signature in database
4 changed files with 151 additions and 124 deletions

View file

@ -16,9 +16,9 @@ from .paths import (
PACKAGE_DIR,
DATA_DIR,
ARCHIVE_DIR,
# get_collection_id,
# get_LIB_DIR,
# get_TMP_DIR,
get_collection_id,
get_machine_id,
get_machine_type,
)
from .permissions import (
IS_ROOT,
@ -39,14 +39,14 @@ class ConstantsDict(Mapping):
PACKAGE_DIR: Path = PACKAGE_DIR
DATA_DIR: Path = DATA_DIR
ARCHIVE_DIR: Path = ARCHIVE_DIR
# COLLECTION_ID: str = get_collection_id(DATA_DIR)
MACHINE_TYPE: str = get_machine_type()
MACHINE_ID: str = get_machine_id()
COLLECTION_ID: str = get_collection_id(DATA_DIR)
# Host system
VERSION: str = detect_installed_version(PACKAGE_DIR)
OS: str = platform.system().lower() # darwin, linux, etc.
ARCH: str = platform.machine().lower() # arm64, x86_64, aarch64, etc.
IN_DOCKER: bool = IN_DOCKER
LIB_DIR_SCOPE: str = f'{ARCH}-{OS}-docker' if IN_DOCKER else f'{ARCH}-{OS}'
# Permissions
IS_ROOT: bool = IS_ROOT
@ -96,11 +96,9 @@ class ConstantsDict(Mapping):
# Runtime dirs
TMP_DIR_NAME: str = 'tmp'
# TMP_DIR: Path = get_TMP_DIR()
TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME
TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME / MACHINE_ID
LIB_DIR_NAME: str = 'lib'
# LIB_DIR: Path = get_LIB_DIR()
LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME / LIB_DIR_SCOPE
LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME / MACHINE_TYPE
LIB_PIP_DIR: Path = LIB_DIR / 'pip'
LIB_NPM_DIR: Path = LIB_DIR / 'npm'
LIB_BROWSERS_DIR: Path = LIB_DIR / 'browsers'

View file

@ -1,16 +1,12 @@
__package__ = 'archivebox.config'
import os
import sys
import tempfile
import hashlib
import platform
from pathlib import Path
from functools import cache
from platformdirs import PlatformDirs
from rich import print
from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
from .permissions import SudoPermission
#############################################################################################
@ -18,11 +14,15 @@ PACKAGE_DIR: Path = Path(__file__).resolve().parent.parent # archivebox sourc
DATA_DIR: Path = Path(os.getcwd()).resolve() # archivebox user data dir
ARCHIVE_DIR: Path = DATA_DIR / 'archive' # archivebox snapshot data dir
IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE', 'yes')
DATABASE_FILE = DATA_DIR / 'index.sqlite3'
#############################################################################################
@cache
def get_collection_id(DATA_DIR=DATA_DIR):
"""Get a short, stable, unique ID for the current collection"""
def get_collection_id(DATA_DIR=DATA_DIR) -> str:
"""Get a short, stable, unique ID for the current collection (e.g. abc45678)"""
collection_id_file = DATA_DIR / '.archivebox_id'
try:
@ -32,12 +32,42 @@ def get_collection_id(DATA_DIR=DATA_DIR):
hash_key = str(DATA_DIR.resolve()).encode()
collection_id = hashlib.sha256(hash_key).hexdigest()[:8]
try:
collection_id_file.write_text(collection_id)
# only persist collection_id file if we already have an index.sqlite3 file present
# otherwise we might be running in a directory that is not a collection, no point creating cruft files
if os.path.isfile(DATABASE_FILE) and os.access(DATA_DIR, os.W_OK):
collection_id_file.write_text(collection_id)
except (OSError, FileNotFoundError, PermissionError):
pass
return collection_id
@cache
def get_machine_id() -> str:
"""Get a short, stable, unique ID for the current machine (e.g. abc45678)"""
MACHINE_ID = 'unknown'
try:
import machineid
MACHINE_ID = machineid.hashed_id('archivebox')[:8]
except Exception:
try:
import uuid
import hashlib
MACHINE_ID = hashlib.sha256(str(uuid.getnode()).encode()).hexdigest()[:8]
except Exception:
pass
return MACHINE_ID
@cache
def get_machine_type() -> str:
"""Get a short, stable, unique type identifier for the current machine (e.g. linux-x86_64-docker)"""
OS: str = platform.system().lower() # darwin, linux, etc.
ARCH: str = platform.machine().lower() # arm64, x86_64, aarch64, etc.
LIB_DIR_SCOPE: str = f'{ARCH}-{OS}-docker' if IN_DOCKER else f'{ARCH}-{OS}'
return LIB_DIR_SCOPE
def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = None, fallback=True) -> bool:
"""Check if a given directory is writable by a specific user and group (fallback=try as current user is unable to check with provided uid)"""
@ -58,116 +88,116 @@ def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = No
@cache
def get_LIB_DIR():
"""
- should be shared with other collections on the same host
- must be scoped by CPU architecture, OS family, and archivebox version
- should not be shared with other hosts/archivebox versions
- must be writable by any archivebox user
- should be persistent across reboots
- can be on a docker bin mount but probably shouldnt be
- ok to have a long path (doesnt contain SOCKETS)
"""
from .version import detect_installed_version
# @cache
# def get_LIB_DIR():
# """
# - should be shared with other collections on the same host
# - must be scoped by CPU architecture, OS family, and archivebox version
# - should not be shared with other hosts/archivebox versions
# - must be writable by any archivebox user
# - should be persistent across reboots
# - can be on a docker bin mount but probably shouldnt be
# - ok to have a long path (doesnt contain SOCKETS)
# """
# from .version import detect_installed_version
HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False)
# HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False)
lib_dir = tempfile.gettempdir()
try:
if 'SYSTEM_LIB_DIR' in os.environ:
lib_dir = Path(os.environ['SYSTEM_LIB_DIR'])
else:
with SudoPermission(uid=ARCHIVEBOX_USER, fallback=True):
lib_dir = HOST_DIRS.site_data_path
# lib_dir = tempfile.gettempdir()
# try:
# if 'SYSTEM_LIB_DIR' in os.environ:
# lib_dir = Path(os.environ['SYSTEM_LIB_DIR'])
# else:
# with SudoPermission(uid=ARCHIVEBOX_USER, fallback=True):
# lib_dir = HOST_DIRS.site_data_path
# Docker: /usr/local/share/archivebox/0.8.5
# Ubuntu: /usr/local/share/archivebox/0.8.5
# macOS: /Library/Application Support/archivebox
try:
with SudoPermission(uid=0, fallback=True):
lib_dir.mkdir(parents=True, exist_ok=True)
except PermissionError:
# our user cannot
lib_dir = HOST_DIRS.user_data_path
lib_dir.mkdir(parents=True, exist_ok=True)
# # Docker: /usr/local/share/archivebox/0.8.5
# # Ubuntu: /usr/local/share/archivebox/0.8.5
# # macOS: /Library/Application Support/archivebox
# try:
# with SudoPermission(uid=0, fallback=True):
# lib_dir.mkdir(parents=True, exist_ok=True)
# except PermissionError:
# # our user cannot
# lib_dir = HOST_DIRS.user_data_path
# lib_dir.mkdir(parents=True, exist_ok=True)
if IS_ROOT or not dir_is_writable(lib_dir, uid=ARCHIVEBOX_USER):
if IS_ROOT:
# make sure lib dir is owned by the archivebox user, not root
with SudoPermission(uid=0):
if ARCHIVEBOX_USER == 0:
# print(f'[yellow]:warning: Waring: Creating SYSTEM_LIB_DIR {lib_dir} with mode 777 so that non-root archivebox users can share it.[/yellow] (caches shared libs used by archivebox for performance)', file=sys.stderr)
os.system(f'chmod -R 777 "{lib_dir}"')
else:
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{lib_dir}"')
else:
raise PermissionError()
except (PermissionError, AssertionError):
# raise PermissionError(f'SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
print(f'[red]:cross_mark: ERROR: SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/red]', file=sys.stderr)
# if IS_ROOT or not dir_is_writable(lib_dir, uid=ARCHIVEBOX_USER):
# if IS_ROOT:
# # make sure lib dir is owned by the archivebox user, not root
# with SudoPermission(uid=0):
# if ARCHIVEBOX_USER == 0:
# # print(f'[yellow]:warning: Waring: Creating SYSTEM_LIB_DIR {lib_dir} with mode 777 so that non-root archivebox users can share it.[/yellow] (caches shared libs used by archivebox for performance)', file=sys.stderr)
# os.system(f'chmod -R 777 "{lib_dir}"')
# else:
# os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{lib_dir}"')
# else:
# raise PermissionError()
# except (PermissionError, AssertionError):
# # raise PermissionError(f'SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
# print(f'[red]:cross_mark: ERROR: SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/red]', file=sys.stderr)
return lib_dir
# return lib_dir
@cache
def get_TMP_DIR():
"""
- must NOT be inside DATA_DIR / inside a docker volume bind mount
- must NOT have a long PATH (UNIX socket path length restrictions)
- must NOT be shared with other collections/hosts
- must be writable by archivebox user & root
- must be cleared on every boot / not persisted
- must be cleared on every archivebox version upgrade
"""
from .version import detect_installed_version
# @cache
# def get_TMP_DIR():
# """
# - must NOT be inside DATA_DIR / inside a docker volume bind mount
# - must NOT have a long PATH (UNIX socket path length restrictions)
# - must NOT be shared with other collections/hosts
# - must be writable by archivebox user & root
# - must be cleared on every boot / not persisted
# - must be cleared on every archivebox version upgrade
# """
# from .version import detect_installed_version
HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False)
# HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False)
# print('DATA_DIR OWNED BY:', ARCHIVEBOX_USER, ARCHIVEBOX_GROUP)
# print('RUNNING AS:', self.PUID, self.PGID)
run_dir = tempfile.gettempdir()
try:
if 'SYSTEM_TMP_DIR' in os.environ:
run_dir = Path(os.environ['SYSTEM_TMP_DIR']).resolve() / get_collection_id(DATA_DIR=DATA_DIR)
with SudoPermission(uid=0, fallback=True):
run_dir.mkdir(parents=True, exist_ok=True)
if not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
if IS_ROOT:
with SudoPermission(uid=0, fallback=False):
if ARCHIVEBOX_USER == 0:
# print(f'[yellow]:warning: Waring: Creating SYSTEM_TMP_DIR {run_dir} with mode 777 so that non-root archivebox users can access it.[/yellow]', file=sys.stderr)
os.system(f'chmod -R 777 "{run_dir}"')
else:
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
else:
raise PermissionError()
assert len(str(run_dir / 'supervisord.conf')) < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
return run_dir
# # print('DATA_DIR OWNED BY:', ARCHIVEBOX_USER, ARCHIVEBOX_GROUP)
# # print('RUNNING AS:', self.PUID, self.PGID)
# run_dir = tempfile.gettempdir()
# try:
# if 'SYSTEM_TMP_DIR' in os.environ:
# run_dir = Path(os.environ['SYSTEM_TMP_DIR']).resolve() / get_collection_id(DATA_DIR=DATA_DIR)
# with SudoPermission(uid=0, fallback=True):
# run_dir.mkdir(parents=True, exist_ok=True)
# if not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
# if IS_ROOT:
# with SudoPermission(uid=0, fallback=False):
# if ARCHIVEBOX_USER == 0:
# # print(f'[yellow]:warning: Waring: Creating SYSTEM_TMP_DIR {run_dir} with mode 777 so that non-root archivebox users can access it.[/yellow]', file=sys.stderr)
# os.system(f'chmod -R 777 "{run_dir}"')
# else:
# os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
# else:
# raise PermissionError()
# assert len(str(run_dir / 'supervisord.conf')) < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
# return run_dir
run_dir = (HOST_DIRS.site_runtime_path / get_collection_id(DATA_DIR=DATA_DIR)).resolve()
try:
assert len(str(run_dir)) + len('/supervisord.sock') < 95
except AssertionError:
run_dir = Path(tempfile.gettempdir()).resolve() / 'archivebox' / get_collection_id(DATA_DIR=DATA_DIR)
assert len(str(run_dir)) + len('/supervisord.sock') < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
# run_dir = (HOST_DIRS.site_runtime_path / get_collection_id(DATA_DIR=DATA_DIR)).resolve()
# try:
# assert len(str(run_dir)) + len('/supervisord.sock') < 95
# except AssertionError:
# run_dir = Path(tempfile.gettempdir()).resolve() / 'archivebox' / get_collection_id(DATA_DIR=DATA_DIR)
# assert len(str(run_dir)) + len('/supervisord.sock') < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
with SudoPermission(uid=0, fallback=True):
run_dir.mkdir(parents=True, exist_ok=True)
# with SudoPermission(uid=0, fallback=True):
# run_dir.mkdir(parents=True, exist_ok=True)
if IS_ROOT or not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
if IS_ROOT:
with SudoPermission(uid=0):
if ARCHIVEBOX_USER == 0:
# print(f'[yellow]:warning: Waring: Creating SYSTEM_TMP_DIR {run_dir} with mode 777 so that non-root archivebox users can access it.[/yellow]', file=sys.stderr)
os.system(f'chmod -R 777 "{run_dir}"')
else:
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
else:
raise PermissionError()
# if IS_ROOT or not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
# if IS_ROOT:
# with SudoPermission(uid=0):
# if ARCHIVEBOX_USER == 0:
# # print(f'[yellow]:warning: Waring: Creating SYSTEM_TMP_DIR {run_dir} with mode 777 so that non-root archivebox users can access it.[/yellow]', file=sys.stderr)
# os.system(f'chmod -R 777 "{run_dir}"')
# else:
# os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
# else:
# raise PermissionError()
except (PermissionError, AssertionError):
# raise PermissionError(f'SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
print(f'[red]:cross_mark: ERROR: SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/red]', file=sys.stderr)
# except (PermissionError, AssertionError):
# # raise PermissionError(f'SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
# print(f'[red]:cross_mark: ERROR: SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/red]', file=sys.stderr)
return run_dir
# return run_dir

View file

@ -100,7 +100,7 @@ def check_not_root():
def check_data_dir_permissions():
from archivebox import DATA_DIR, CONSTANTS
from archivebox import DATA_DIR
from archivebox.misc.logging import STDERR
from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, DEFAULT_PUID, DEFAULT_PGID, IS_ROOT, USER
@ -119,8 +119,8 @@ def check_data_dir_permissions():
if data_owned_by_root or data_owner_doesnt_match or data_not_writable:
STDERR.print(f'[violet]Hint:[/violet] Change the current ownership [red]{data_dir_uid}[/red]:{data_dir_gid} (PUID:PGID) to a non-user & group that will run ArchiveBox, e.g.:')
STDERR.print(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {DATA_DIR.resolve()}')
STDERR.print(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {CONSTANTS.LIB_DIR.resolve()}')
STDERR.print(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {CONSTANTS.TMP_DIR.resolve()}')
# STDERR.print(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {CONSTANTS.LIB_DIR.resolve()}')
# STDERR.print(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {CONSTANTS.TMP_DIR.resolve()}')
STDERR.print()
STDERR.print('[blue]More info:[/blue]')
STDERR.print(' [link=https://github.com/ArchiveBox/ArchiveBox#storage-requirements]https://github.com/ArchiveBox/ArchiveBox#storage-requirements[/link]')

View file

@ -1,6 +1,6 @@
[project]
name = "archivebox"
version = "0.8.5rc18"
version = "0.8.5rc19"
requires-python = ">=3.10"
description = "Self-hosted internet archiving solution."
authors = [{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}]
@ -77,7 +77,6 @@ dependencies = [
"atomicwrites==1.4.1",
"django-taggit==1.3.0",
"base32-crockford==0.3.0",
"platformdirs>=4.3.6",
# "pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7",
"pydantic-pkgr>=0.4.13",
############# Plugin Dependencies ################