From d747cf7f314c48549c1bc4ee90899ebf0985477f Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Fri, 4 Oct 2024 21:03:02 -0700 Subject: [PATCH] fix SYSTEM_TMP_DIR and SYSTEM_LIB_DIR in docker --- Dockerfile | 11 +++++++---- archivebox/config/constants.py | 25 +++++++++++-------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Dockerfile b/Dockerfile index 83a83b5e..fb363bc5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -227,7 +227,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T && rm -rf /var/lib/apt/lists/* \ && ln -s "$CHROME_BINARY" /usr/bin/chromium-browser \ && mkdir -p "/home/${ARCHIVEBOX_USER}/.config/chromium/Crash Reports/pending/" \ - && chown -R $ARCHIVEBOX_USER "/home/${ARCHIVEBOX_USER}/.config" \ + && chown -R "$DEFAULT_PUID:$DEFAULT_PGID" "/home/${ARCHIVEBOX_USER}/.config" \ && mkdir -p "$PLAYWRIGHT_BROWSERS_PATH" \ && chown -R $ARCHIVEBOX_USER "$PLAYWRIGHT_BROWSERS_PATH" \ # Save version info @@ -237,11 +237,12 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T ) | tee -a /VERSION.txt # Install Node dependencies -WORKDIR "$CODE_DIR" -COPY --chown=root:root --chmod=755 "package.json" "package-lock.json" "$CODE_DIR"/ +WORKDIR "$CODE_DIR/lib/npm" +COPY --chown=root:root --chmod=755 "package.json" "package-lock.json" "$CODE_DIR/lib/npm" RUN --mount=type=cache,target=/root/.npm,sharing=locked,id=npm-$TARGETARCH$TARGETVARIANT \ echo "[+] Installing NPM extractor dependencies from package.json..." \ - && npm ci --prefer-offline --no-audit --cache /root/.npm \ + && npm ci --prefix="$CODE_DIR/lib/npm" --prefer-offline --no-audit --cache /root/.npm \ + && chown -R "$DEFAULT_PUID:$DEFAULT_PGID" "$CODE_DIR/lib" \ && ( \ which node && node --version \ && which npm && npm version \ @@ -285,6 +286,8 @@ RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH WORKDIR "$DATA_DIR" RUN openssl rand -hex 16 > /etc/machine-id ENV IN_DOCKER=True \ + SYSTEM_LIB_DIR=/app/lib \ + SYSTEM_TMP_DIR=/tmp \ DISPLAY=novnc:0.0 \ CUSTOM_TEMPLATES_DIR=/data/user_templates \ GOOGLE_API_KEY=no \ diff --git a/archivebox/config/constants.py b/archivebox/config/constants.py index fbe0358e..be156127 100644 --- a/archivebox/config/constants.py +++ b/archivebox/config/constants.py @@ -55,15 +55,6 @@ def _detect_installed_version(PACKAGE_DIR: Path): VERSION: str = _detect_installed_version(PACKAGE_DIR) -# Ensure system tmp dir and data dir exist as we need them to run almost everything -if 'SYSTEM_TMP_DIR' in os.environ: - SYSTEM_TMP_DIR = Path(os.environ['SYSTEM_TMP_DIR']) -else: - SYSTEM_TMP_DIR = Path(tempfile.gettempdir()) / 'archivebox' - SYSTEM_TMP_DIR.mkdir(parents=True, exist_ok=True) - -DATA_DIR_TMP_DIR = DATA_DIR / 'tmp' / machineid.hashed_id('archivebox')[:16] -DATA_DIR_TMP_DIR.mkdir(parents=True, exist_ok=True) class ConstantsDict(Mapping): @@ -72,7 +63,6 @@ class ConstantsDict(Mapping): ARCH = platform.machine().lower() # arm64, x86_64, etc. LIB_DIR_SCOPE = f'{ARCH}-{OS}' + ('-docker' if IN_DOCKER else '') - PACKAGE_DIR: Path = PACKAGE_DIR # archivebox source code dir DATA_DIR: Path = DATA_DIR # archivebox user data dir ARCHIVE_DIR: Path = ARCHIVE_DIR # archivebox snapshot data dir @@ -94,16 +84,18 @@ class ConstantsDict(Mapping): LIB_DIR_NAME: str = 'lib' TMP_DIR_NAME: str = 'tmp' - SYSTEM_TMP_DIR: Path = SYSTEM_TMP_DIR - DATA_DIR_TMP_DIR: Path = DATA_DIR_TMP_DIR + SYSTEM_TMP_DIR: Path = Path(os.environ['SYSTEM_TMP_DIR']) if 'SYSTEM_TMP_DIR' in os.environ else (Path(tempfile.gettempdir()) / 'archivebox') + DATA_DIR_TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME / machineid.hashed_id('archivebox')[:16] + SYSTEM_LIB_DIR: Path = Path(os.environ['SYSTEM_LIB_DIR']) if 'SYSTEM_LIB_DIR' in os.environ else (PACKAGE_DIR / LIB_DIR_NAME) + DATA_DIR_LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME / LIB_DIR_SCOPE ARCHIVE_DIR: Path = DATA_DIR / ARCHIVE_DIR_NAME SOURCES_DIR: Path = DATA_DIR / SOURCES_DIR_NAME PERSONAS_DIR: Path = DATA_DIR / PERSONAS_DIR_NAME CACHE_DIR: Path = DATA_DIR / CACHE_DIR_NAME LOGS_DIR: Path = DATA_DIR / LOGS_DIR_NAME - LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME / LIB_DIR_SCOPE # e.g. data/lib/arm64-darwin-docker - TMP_DIR: Path = SYSTEM_TMP_DIR if IN_DOCKER else DATA_DIR_TMP_DIR # e.g. /var/folders/bk/63jsns1s.../T/archivebox or ./data/tmp/abcwe324234 + LIB_DIR: Path = SYSTEM_LIB_DIR if IN_DOCKER else DATA_DIR_LIB_DIR # e.g. /app/lib or ./data/lib/arm64-darwin-docker + TMP_DIR: Path = SYSTEM_TMP_DIR if IN_DOCKER else DATA_DIR_TMP_DIR # e.g. /tmp/archivebox or ./data/tmp/abcwe324234 CUSTOM_TEMPLATES_DIR: Path = DATA_DIR / CUSTOM_TEMPLATES_DIR_NAME USER_PLUGINS_DIR: Path = DATA_DIR / USER_PLUGINS_DIR_NAME @@ -323,3 +315,8 @@ CONSTANTS_CONFIG = CONSTANTS.__benedict__() # add all key: values to globals() for easier importing globals().update(CONSTANTS) + + +# these need to always exist as we need them to run almost everything +CONSTANTS.LIB_DIR.mkdir(parents=True, exist_ok=True) +CONSTANTS.TMP_DIR.mkdir(parents=True, exist_ok=True)