remove platformdirs dependency

This commit is contained in:
Nick Sweeting 2024-10-08 19:17:18 -07:00
parent 0b4cbb6415
commit 9f274cf9f4
No known key found for this signature in database
4 changed files with 151 additions and 124 deletions

View file

@ -16,9 +16,9 @@ from .paths import (
PACKAGE_DIR, PACKAGE_DIR,
DATA_DIR, DATA_DIR,
ARCHIVE_DIR, ARCHIVE_DIR,
# get_collection_id, get_collection_id,
# get_LIB_DIR, get_machine_id,
# get_TMP_DIR, get_machine_type,
) )
from .permissions import ( from .permissions import (
IS_ROOT, IS_ROOT,
@ -39,14 +39,14 @@ class ConstantsDict(Mapping):
PACKAGE_DIR: Path = PACKAGE_DIR PACKAGE_DIR: Path = PACKAGE_DIR
DATA_DIR: Path = DATA_DIR DATA_DIR: Path = DATA_DIR
ARCHIVE_DIR: Path = ARCHIVE_DIR ARCHIVE_DIR: Path = ARCHIVE_DIR
# COLLECTION_ID: str = get_collection_id(DATA_DIR)
MACHINE_TYPE: str = get_machine_type()
MACHINE_ID: str = get_machine_id()
COLLECTION_ID: str = get_collection_id(DATA_DIR)
# Host system # Host system
VERSION: str = detect_installed_version(PACKAGE_DIR) VERSION: str = detect_installed_version(PACKAGE_DIR)
OS: str = platform.system().lower() # darwin, linux, etc.
ARCH: str = platform.machine().lower() # arm64, x86_64, aarch64, etc.
IN_DOCKER: bool = IN_DOCKER IN_DOCKER: bool = IN_DOCKER
LIB_DIR_SCOPE: str = f'{ARCH}-{OS}-docker' if IN_DOCKER else f'{ARCH}-{OS}'
# Permissions # Permissions
IS_ROOT: bool = IS_ROOT IS_ROOT: bool = IS_ROOT
@ -96,11 +96,9 @@ class ConstantsDict(Mapping):
# Runtime dirs # Runtime dirs
TMP_DIR_NAME: str = 'tmp' TMP_DIR_NAME: str = 'tmp'
# TMP_DIR: Path = get_TMP_DIR() TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME / MACHINE_ID
TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME
LIB_DIR_NAME: str = 'lib' LIB_DIR_NAME: str = 'lib'
# LIB_DIR: Path = get_LIB_DIR() LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME / MACHINE_TYPE
LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME / LIB_DIR_SCOPE
LIB_PIP_DIR: Path = LIB_DIR / 'pip' LIB_PIP_DIR: Path = LIB_DIR / 'pip'
LIB_NPM_DIR: Path = LIB_DIR / 'npm' LIB_NPM_DIR: Path = LIB_DIR / 'npm'
LIB_BROWSERS_DIR: Path = LIB_DIR / 'browsers' LIB_BROWSERS_DIR: Path = LIB_DIR / 'browsers'

View file

@ -1,16 +1,12 @@
__package__ = 'archivebox.config' __package__ = 'archivebox.config'
import os import os
import sys
import tempfile
import hashlib import hashlib
import platform
from pathlib import Path from pathlib import Path
from functools import cache from functools import cache
from platformdirs import PlatformDirs from .permissions import SudoPermission
from rich import print
from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
############################################################################################# #############################################################################################
@ -18,11 +14,15 @@ PACKAGE_DIR: Path = Path(__file__).resolve().parent.parent # archivebox sourc
DATA_DIR: Path = Path(os.getcwd()).resolve() # archivebox user data dir DATA_DIR: Path = Path(os.getcwd()).resolve() # archivebox user data dir
ARCHIVE_DIR: Path = DATA_DIR / 'archive' # archivebox snapshot data dir ARCHIVE_DIR: Path = DATA_DIR / 'archive' # archivebox snapshot data dir
IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE', 'yes')
DATABASE_FILE = DATA_DIR / 'index.sqlite3'
############################################################################################# #############################################################################################
@cache @cache
def get_collection_id(DATA_DIR=DATA_DIR): def get_collection_id(DATA_DIR=DATA_DIR) -> str:
"""Get a short, stable, unique ID for the current collection""" """Get a short, stable, unique ID for the current collection (e.g. abc45678)"""
collection_id_file = DATA_DIR / '.archivebox_id' collection_id_file = DATA_DIR / '.archivebox_id'
try: try:
@ -32,12 +32,42 @@ def get_collection_id(DATA_DIR=DATA_DIR):
hash_key = str(DATA_DIR.resolve()).encode() hash_key = str(DATA_DIR.resolve()).encode()
collection_id = hashlib.sha256(hash_key).hexdigest()[:8] collection_id = hashlib.sha256(hash_key).hexdigest()[:8]
try: try:
collection_id_file.write_text(collection_id) # only persist collection_id file if we already have an index.sqlite3 file present
# otherwise we might be running in a directory that is not a collection, no point creating cruft files
if os.path.isfile(DATABASE_FILE) and os.access(DATA_DIR, os.W_OK):
collection_id_file.write_text(collection_id)
except (OSError, FileNotFoundError, PermissionError): except (OSError, FileNotFoundError, PermissionError):
pass pass
return collection_id return collection_id
@cache
def get_machine_id() -> str:
"""Get a short, stable, unique ID for the current machine (e.g. abc45678)"""
MACHINE_ID = 'unknown'
try:
import machineid
MACHINE_ID = machineid.hashed_id('archivebox')[:8]
except Exception:
try:
import uuid
import hashlib
MACHINE_ID = hashlib.sha256(str(uuid.getnode()).encode()).hexdigest()[:8]
except Exception:
pass
return MACHINE_ID
@cache
def get_machine_type() -> str:
"""Get a short, stable, unique type identifier for the current machine (e.g. linux-x86_64-docker)"""
OS: str = platform.system().lower() # darwin, linux, etc.
ARCH: str = platform.machine().lower() # arm64, x86_64, aarch64, etc.
LIB_DIR_SCOPE: str = f'{ARCH}-{OS}-docker' if IN_DOCKER else f'{ARCH}-{OS}'
return LIB_DIR_SCOPE
def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = None, fallback=True) -> bool: def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = None, fallback=True) -> bool:
"""Check if a given directory is writable by a specific user and group (fallback=try as current user is unable to check with provided uid)""" """Check if a given directory is writable by a specific user and group (fallback=try as current user is unable to check with provided uid)"""
@ -58,116 +88,116 @@ def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = No
@cache # @cache
def get_LIB_DIR(): # def get_LIB_DIR():
""" # """
- should be shared with other collections on the same host # - should be shared with other collections on the same host
- must be scoped by CPU architecture, OS family, and archivebox version # - must be scoped by CPU architecture, OS family, and archivebox version
- should not be shared with other hosts/archivebox versions # - should not be shared with other hosts/archivebox versions
- must be writable by any archivebox user # - must be writable by any archivebox user
- should be persistent across reboots # - should be persistent across reboots
- can be on a docker bin mount but probably shouldnt be # - can be on a docker bin mount but probably shouldnt be
- ok to have a long path (doesnt contain SOCKETS) # - ok to have a long path (doesnt contain SOCKETS)
""" # """
from .version import detect_installed_version # from .version import detect_installed_version
HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False) # HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False)
lib_dir = tempfile.gettempdir() # lib_dir = tempfile.gettempdir()
try: # try:
if 'SYSTEM_LIB_DIR' in os.environ: # if 'SYSTEM_LIB_DIR' in os.environ:
lib_dir = Path(os.environ['SYSTEM_LIB_DIR']) # lib_dir = Path(os.environ['SYSTEM_LIB_DIR'])
else: # else:
with SudoPermission(uid=ARCHIVEBOX_USER, fallback=True): # with SudoPermission(uid=ARCHIVEBOX_USER, fallback=True):
lib_dir = HOST_DIRS.site_data_path # lib_dir = HOST_DIRS.site_data_path
# Docker: /usr/local/share/archivebox/0.8.5 # # Docker: /usr/local/share/archivebox/0.8.5
# Ubuntu: /usr/local/share/archivebox/0.8.5 # # Ubuntu: /usr/local/share/archivebox/0.8.5
# macOS: /Library/Application Support/archivebox # # macOS: /Library/Application Support/archivebox
try: # try:
with SudoPermission(uid=0, fallback=True): # with SudoPermission(uid=0, fallback=True):
lib_dir.mkdir(parents=True, exist_ok=True) # lib_dir.mkdir(parents=True, exist_ok=True)
except PermissionError: # except PermissionError:
# our user cannot # # our user cannot
lib_dir = HOST_DIRS.user_data_path # lib_dir = HOST_DIRS.user_data_path
lib_dir.mkdir(parents=True, exist_ok=True) # lib_dir.mkdir(parents=True, exist_ok=True)
if IS_ROOT or not dir_is_writable(lib_dir, uid=ARCHIVEBOX_USER): # if IS_ROOT or not dir_is_writable(lib_dir, uid=ARCHIVEBOX_USER):
if IS_ROOT: # if IS_ROOT:
# make sure lib dir is owned by the archivebox user, not root # # make sure lib dir is owned by the archivebox user, not root
with SudoPermission(uid=0): # with SudoPermission(uid=0):
if ARCHIVEBOX_USER == 0: # if ARCHIVEBOX_USER == 0:
# print(f'[yellow]:warning: Waring: Creating SYSTEM_LIB_DIR {lib_dir} with mode 777 so that non-root archivebox users can share it.[/yellow] (caches shared libs used by archivebox for performance)', file=sys.stderr) # # print(f'[yellow]:warning: Waring: Creating SYSTEM_LIB_DIR {lib_dir} with mode 777 so that non-root archivebox users can share it.[/yellow] (caches shared libs used by archivebox for performance)', file=sys.stderr)
os.system(f'chmod -R 777 "{lib_dir}"') # os.system(f'chmod -R 777 "{lib_dir}"')
else: # else:
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{lib_dir}"') # os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{lib_dir}"')
else: # else:
raise PermissionError() # raise PermissionError()
except (PermissionError, AssertionError): # except (PermissionError, AssertionError):
# raise PermissionError(f'SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}') # # raise PermissionError(f'SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
print(f'[red]:cross_mark: ERROR: SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/red]', file=sys.stderr) # print(f'[red]:cross_mark: ERROR: SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/red]', file=sys.stderr)
return lib_dir # return lib_dir
@cache # @cache
def get_TMP_DIR(): # def get_TMP_DIR():
""" # """
- must NOT be inside DATA_DIR / inside a docker volume bind mount # - must NOT be inside DATA_DIR / inside a docker volume bind mount
- must NOT have a long PATH (UNIX socket path length restrictions) # - must NOT have a long PATH (UNIX socket path length restrictions)
- must NOT be shared with other collections/hosts # - must NOT be shared with other collections/hosts
- must be writable by archivebox user & root # - must be writable by archivebox user & root
- must be cleared on every boot / not persisted # - must be cleared on every boot / not persisted
- must be cleared on every archivebox version upgrade # - must be cleared on every archivebox version upgrade
""" # """
from .version import detect_installed_version # from .version import detect_installed_version
HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False) # HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False)
# print('DATA_DIR OWNED BY:', ARCHIVEBOX_USER, ARCHIVEBOX_GROUP) # # print('DATA_DIR OWNED BY:', ARCHIVEBOX_USER, ARCHIVEBOX_GROUP)
# print('RUNNING AS:', self.PUID, self.PGID) # # print('RUNNING AS:', self.PUID, self.PGID)
run_dir = tempfile.gettempdir() # run_dir = tempfile.gettempdir()
try: # try:
if 'SYSTEM_TMP_DIR' in os.environ: # if 'SYSTEM_TMP_DIR' in os.environ:
run_dir = Path(os.environ['SYSTEM_TMP_DIR']).resolve() / get_collection_id(DATA_DIR=DATA_DIR) # run_dir = Path(os.environ['SYSTEM_TMP_DIR']).resolve() / get_collection_id(DATA_DIR=DATA_DIR)
with SudoPermission(uid=0, fallback=True): # with SudoPermission(uid=0, fallback=True):
run_dir.mkdir(parents=True, exist_ok=True) # run_dir.mkdir(parents=True, exist_ok=True)
if not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER): # if not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
if IS_ROOT: # if IS_ROOT:
with SudoPermission(uid=0, fallback=False): # with SudoPermission(uid=0, fallback=False):
if ARCHIVEBOX_USER == 0: # if ARCHIVEBOX_USER == 0:
# print(f'[yellow]:warning: Waring: Creating SYSTEM_TMP_DIR {run_dir} with mode 777 so that non-root archivebox users can access it.[/yellow]', file=sys.stderr) # # print(f'[yellow]:warning: Waring: Creating SYSTEM_TMP_DIR {run_dir} with mode 777 so that non-root archivebox users can access it.[/yellow]', file=sys.stderr)
os.system(f'chmod -R 777 "{run_dir}"') # os.system(f'chmod -R 777 "{run_dir}"')
else: # else:
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"') # os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
else: # else:
raise PermissionError() # raise PermissionError()
assert len(str(run_dir / 'supervisord.conf')) < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)' # assert len(str(run_dir / 'supervisord.conf')) < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
return run_dir # return run_dir
run_dir = (HOST_DIRS.site_runtime_path / get_collection_id(DATA_DIR=DATA_DIR)).resolve() # run_dir = (HOST_DIRS.site_runtime_path / get_collection_id(DATA_DIR=DATA_DIR)).resolve()
try: # try:
assert len(str(run_dir)) + len('/supervisord.sock') < 95 # assert len(str(run_dir)) + len('/supervisord.sock') < 95
except AssertionError: # except AssertionError:
run_dir = Path(tempfile.gettempdir()).resolve() / 'archivebox' / get_collection_id(DATA_DIR=DATA_DIR) # run_dir = Path(tempfile.gettempdir()).resolve() / 'archivebox' / get_collection_id(DATA_DIR=DATA_DIR)
assert len(str(run_dir)) + len('/supervisord.sock') < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)' # assert len(str(run_dir)) + len('/supervisord.sock') < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
with SudoPermission(uid=0, fallback=True): # with SudoPermission(uid=0, fallback=True):
run_dir.mkdir(parents=True, exist_ok=True) # run_dir.mkdir(parents=True, exist_ok=True)
if IS_ROOT or not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER): # if IS_ROOT or not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
if IS_ROOT: # if IS_ROOT:
with SudoPermission(uid=0): # with SudoPermission(uid=0):
if ARCHIVEBOX_USER == 0: # if ARCHIVEBOX_USER == 0:
# print(f'[yellow]:warning: Waring: Creating SYSTEM_TMP_DIR {run_dir} with mode 777 so that non-root archivebox users can access it.[/yellow]', file=sys.stderr) # # print(f'[yellow]:warning: Waring: Creating SYSTEM_TMP_DIR {run_dir} with mode 777 so that non-root archivebox users can access it.[/yellow]', file=sys.stderr)
os.system(f'chmod -R 777 "{run_dir}"') # os.system(f'chmod -R 777 "{run_dir}"')
else: # else:
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"') # os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
else: # else:
raise PermissionError() # raise PermissionError()
except (PermissionError, AssertionError): # except (PermissionError, AssertionError):
# raise PermissionError(f'SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}') # # raise PermissionError(f'SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
print(f'[red]:cross_mark: ERROR: SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/red]', file=sys.stderr) # print(f'[red]:cross_mark: ERROR: SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/red]', file=sys.stderr)
return run_dir # return run_dir

View file

@ -100,7 +100,7 @@ def check_not_root():
def check_data_dir_permissions(): def check_data_dir_permissions():
from archivebox import DATA_DIR, CONSTANTS from archivebox import DATA_DIR
from archivebox.misc.logging import STDERR from archivebox.misc.logging import STDERR
from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, DEFAULT_PUID, DEFAULT_PGID, IS_ROOT, USER from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, DEFAULT_PUID, DEFAULT_PGID, IS_ROOT, USER
@ -119,8 +119,8 @@ def check_data_dir_permissions():
if data_owned_by_root or data_owner_doesnt_match or data_not_writable: if data_owned_by_root or data_owner_doesnt_match or data_not_writable:
STDERR.print(f'[violet]Hint:[/violet] Change the current ownership [red]{data_dir_uid}[/red]:{data_dir_gid} (PUID:PGID) to a non-user & group that will run ArchiveBox, e.g.:') STDERR.print(f'[violet]Hint:[/violet] Change the current ownership [red]{data_dir_uid}[/red]:{data_dir_gid} (PUID:PGID) to a non-user & group that will run ArchiveBox, e.g.:')
STDERR.print(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {DATA_DIR.resolve()}') STDERR.print(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {DATA_DIR.resolve()}')
STDERR.print(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {CONSTANTS.LIB_DIR.resolve()}') # STDERR.print(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {CONSTANTS.LIB_DIR.resolve()}')
STDERR.print(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {CONSTANTS.TMP_DIR.resolve()}') # STDERR.print(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {CONSTANTS.TMP_DIR.resolve()}')
STDERR.print() STDERR.print()
STDERR.print('[blue]More info:[/blue]') STDERR.print('[blue]More info:[/blue]')
STDERR.print(' [link=https://github.com/ArchiveBox/ArchiveBox#storage-requirements]https://github.com/ArchiveBox/ArchiveBox#storage-requirements[/link]') STDERR.print(' [link=https://github.com/ArchiveBox/ArchiveBox#storage-requirements]https://github.com/ArchiveBox/ArchiveBox#storage-requirements[/link]')

View file

@ -1,6 +1,6 @@
[project] [project]
name = "archivebox" name = "archivebox"
version = "0.8.5rc18" version = "0.8.5rc19"
requires-python = ">=3.10" requires-python = ">=3.10"
description = "Self-hosted internet archiving solution." description = "Self-hosted internet archiving solution."
authors = [{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}] authors = [{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}]
@ -77,7 +77,6 @@ dependencies = [
"atomicwrites==1.4.1", "atomicwrites==1.4.1",
"django-taggit==1.3.0", "django-taggit==1.3.0",
"base32-crockford==0.3.0", "base32-crockford==0.3.0",
"platformdirs>=4.3.6",
# "pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7", # "pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7",
"pydantic-pkgr>=0.4.13", "pydantic-pkgr>=0.4.13",
############# Plugin Dependencies ################ ############# Plugin Dependencies ################