diff --git a/archivebox/config/legacy.py b/archivebox/config/legacy.py index 33aeca2b..2a726c39 100644 --- a/archivebox/config/legacy.py +++ b/archivebox/config/legacy.py @@ -570,6 +570,18 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CON output_dir = out_dir or CONSTANTS.DATA_DIR assert isinstance(output_dir, Path) and isinstance(CONSTANTS.PACKAGE_DIR, Path) + + from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, SudoPermission + from archivebox.config.paths import _get_collection_id + + # if running as root, chown the data dir to the archivebox user to make sure it's accessible to the archivebox user + if IS_ROOT: + with SudoPermission(uid=0): + os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}"') + _get_collection_id(DATA_DIR=CONSTANTS.DATA_DIR, force_create=True) + if IS_ROOT: + with SudoPermission(uid=0): + os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}"/*') bump_startup_progress_bar() try: @@ -596,7 +608,7 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CON except Exception as e: bump_startup_progress_bar(advance=1000) - is_using_meta_cmd = any(ignored_subcommand in sys.argv for ignored_subcommand in ('help', 'version', '--help', '--version')) + is_using_meta_cmd = any(ignored_subcommand in sys.argv for ignored_subcommand in ('help', 'version', '--help', '--version', 'init')) if not is_using_meta_cmd: # show error message to user only if they're not running a meta command / just trying to get help STDERR.print() diff --git a/archivebox/config/paths.py b/archivebox/config/paths.py index c3b76e88..217dfbe9 100644 --- a/archivebox/config/paths.py +++ b/archivebox/config/paths.py @@ -21,9 +21,7 @@ DATABASE_FILE = DATA_DIR / 'index.sqlite3' ############################################################################################# -@cache -def get_collection_id(DATA_DIR=DATA_DIR) -> str: - """Get a short, stable, unique ID for the current collection (e.g. abc45678)""" +def _get_collection_id(DATA_DIR=DATA_DIR, force_create=False) -> str: collection_id_file = DATA_DIR / '.archivebox_id' try: @@ -43,7 +41,7 @@ def get_collection_id(DATA_DIR=DATA_DIR) -> str: try: # only persist collection_id file if we already have an index.sqlite3 file present # otherwise we might be running in a directory that is not a collection, no point creating cruft files - if os.path.isfile(DATABASE_FILE) and os.access(DATA_DIR, os.W_OK): + if os.path.isfile(DATABASE_FILE) and os.access(DATA_DIR, os.W_OK) or force_create: collection_id_file.write_text(collection_id) # if we're running as root right now, make sure the collection_id file is owned by the archivebox user @@ -57,6 +55,11 @@ def get_collection_id(DATA_DIR=DATA_DIR) -> str: pass return collection_id +@cache +def get_collection_id(DATA_DIR=DATA_DIR) -> str: + """Get a short, stable, unique ID for the current collection (e.g. abc45678)""" + return _get_collection_id(DATA_DIR=DATA_DIR) + @cache def get_machine_id() -> str: """Get a short, stable, unique ID for the current machine (e.g. abc45678)""" diff --git a/archivebox/main.py b/archivebox/main.py index 9a9e3ed5..87ed9aea 100755 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -324,16 +324,25 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat from core.models import Snapshot from rich import print + + from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP + from archivebox.config.paths import _get_collection_id + + # if running as root, chown the data dir to the archivebox user to make sure it's accessible to the archivebox user + if IS_ROOT: + with SudoPermission(uid=0): + os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}"') + _get_collection_id() + if IS_ROOT: + with SudoPermission(uid=0): + os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}"/*') + + # if os.access(out_dir / CONSTANTS.JSON_INDEX_FILENAME, os.F_OK): + # print("[red]:warning: This folder contains a JSON index. It is deprecated, and will no longer be kept up to date automatically.[/red]", file=sys.stderr) + # print("[red] You can run `archivebox list --json --with-headers > static_index.json` to manually generate it.[/red]", file=sys.stderr) - out_dir.mkdir(exist_ok=True) is_empty = not len(set(os.listdir(out_dir)) - CONSTANTS.ALLOWED_IN_DATA_DIR) - - if os.access(out_dir / CONSTANTS.JSON_INDEX_FILENAME, os.F_OK): - print("[red]:warning: This folder contains a JSON index. It is deprecated, and will no longer be kept up to date automatically.[/red]", file=sys.stderr) - print("[red] You can run `archivebox list --json --with-headers > static_index.json` to manually generate it.[/red]", file=sys.stderr) - - existing_index = os.access(CONSTANTS.DATABASE_FILE, os.F_OK) - + existing_index = os.path.isfile(CONSTANTS.DATABASE_FILE) if is_empty and not existing_index: print(f'[turquoise4][+] Initializing a new ArchiveBox v{VERSION} collection...[/turquoise4]') print('[green]----------------------------------------------------------------------[/green]') @@ -376,7 +385,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat for migration_line in apply_migrations(out_dir): sys.stdout.write(f' {migration_line}\n') - assert os.access(CONSTANTS.DATABASE_FILE, os.R_OK) + assert os.path.isfile(CONSTANTS.DATABASE_FILE) and os.access(CONSTANTS.DATABASE_FILE, os.R_OK) print() print(f' √ ./{CONSTANTS.DATABASE_FILE.relative_to(DATA_DIR)}') diff --git a/archivebox/plugins_auth/ldap/apps.py b/archivebox/plugins_auth/ldap/apps.py index a2d44706..d993bdb0 100644 --- a/archivebox/plugins_auth/ldap/apps.py +++ b/archivebox/plugins_auth/ldap/apps.py @@ -19,13 +19,16 @@ from .settings import LDAP_CONFIG, get_ldap_lib ###################### Config ########################## -def get_LDAP_LIB_path(paths): +def get_LDAP_LIB_path(paths=()): LDAP_LIB = get_ldap_lib()[0] if not LDAP_LIB: return None # check that LDAP_LIB path is in one of the specified site packages dirs lib_path = Path(inspect.getfile(LDAP_LIB)) + if not paths: + return lib_path + for site_packges_dir in paths: if str(lib_path.parent.parent.resolve()) == str(Path(site_packges_dir).resolve()): return lib_path @@ -57,7 +60,7 @@ class LdapBinary(BaseBinary): "packages": lambda: ['python-ldap>=3.4.3', 'django-auth-ldap>=4.1.0'], }, apt.name: { - "abspath": lambda: get_LDAP_LIB_path((*USER_SITE_PACKAGES, *SYS_SITE_PACKAGES)), + "abspath": lambda: get_LDAP_LIB_path(), "version": lambda: get_LDAP_LIB_version(), "packages": lambda: ['libssl-dev', 'libldap2-dev', 'libsasl2-dev', 'python3-ldap', 'python3-msgpack', 'python3-mutagen'], }, diff --git a/archivebox/vendor/pydantic-pkgr b/archivebox/vendor/pydantic-pkgr index ec4c2d5f..e2f6b105 160000 --- a/archivebox/vendor/pydantic-pkgr +++ b/archivebox/vendor/pydantic-pkgr @@ -1 +1 @@ -Subproject commit ec4c2d5f5a034ea6c10a5337c3115fbe1504f52b +Subproject commit e2f6b10550f41e64817908eef3feb0aa33071969 diff --git a/pyproject.toml b/pyproject.toml index f8353b23..7d22d047 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "archivebox" -version = "0.8.5rc28" +version = "0.8.5rc31" requires-python = ">=3.10" description = "Self-hosted internet archiving solution." authors = [{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}] diff --git a/uv.lock b/uv.lock index e3b66ecb..ef52a26a 100644 --- a/uv.lock +++ b/uv.lock @@ -41,7 +41,7 @@ wheels = [ [[package]] name = "archivebox" -version = "0.8.5rc28" +version = "0.8.5rc31" source = { editable = "." } dependencies = [ { name = "atomicwrites" },