From 5e351f6ba6a9c5fe387855e0632c9531f9b9e201 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <github@sweeting.me>
Date: Tue, 8 Oct 2024 01:47:38 -0700
Subject: [PATCH] more docker dependency tweaks

---
 Dockerfile         | 29 ++++++++++++++++-------------
 archivebox/main.py |  2 +-
 pyproject.toml     |  2 +-
 uv.lock            |  2 +-
 4 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 8b0a8fd0..325514b4 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -18,6 +18,7 @@
 
 # Use Debian 12 w/ faster package updates: https://packages.debian.org/bookworm-backports/
 FROM python:3.11-slim-bookworm
+# FROM debian:bookworm-backports
 
 LABEL name="archivebox" \
     maintainer="Nick Sweeting <dockerfile@archivebox.io>" \
@@ -65,16 +66,20 @@ ENV PYTHON_VERSION=3.11 \
 # User config
 ENV ARCHIVEBOX_USER="archivebox" \
     DEFAULT_PUID=911 \
-    DEFAULT_PGID=911
+    DEFAULT_PGID=911 \
+    IN_DOCKER=True
 
 # Global paths
 ENV CODE_DIR=/app \
     DATA_DIR=/data \
     GLOBAL_VENV=/venv \
+    SYSTEM_LIB_DIR=/usr/share/archivebox \
+    SYSTEM_TMP_DIR=/tmp/archivebox \
     PLAYWRIGHT_BROWSERS_PATH=/browsers
     # TODO: add TMP_DIR and LIB_DIR?
 
 # Build shell config
+# ENV PATH="$SYSTEM_LIB_DIR/bin:$GLOBAL_VENV/bin:$PATH"
 SHELL ["/bin/bash", "-o", "pipefail", "-o", "errexit", "-o", "errtrace", "-o", "nounset", "-c"] 
 
 ######### System Environment ####################################
@@ -144,10 +149,11 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
     # NOT NEEDED because we're using a pre-built python image, keeping this here in case we switch back to custom-building our own:
     # && apt-get update -qq \
     # && apt-get install -qq -y -t bookworm-backports --no-upgrade \
-    #     python${PYTHON_VERSION} python${PYTHON_VERSION}-minimal python3-pip \
+    #     python${PYTHON_VERSION} python${PYTHON_VERSION}-minimal python3-pip python${PYTHON_VERSION}-venv pipx \
     # && rm -rf /var/lib/apt/lists/* \
     # tell PDM to allow using global system python site packages
     # && rm /usr/lib/python3*/EXTERNALLY-MANAGED \
+    # && ln -s "$(which python${PYTHON_VERSION})" /usr/bin/python \
     # create global virtual environment GLOBAL_VENV to use (better than using pip install --global)
     # && python3 -m venv --system-site-packages --symlinks $GLOBAL_VENV \
     # && python3 -m venv --system-site-packages $GLOBAL_VENV \
@@ -183,6 +189,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
     ) | tee -a /VERSION.txt
 
 
+
 ######### Extractor Dependencies ##################################
 
 # Install apt dependencies
@@ -190,7 +197,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
     echo "[+] Installing APT extractor dependencies globally using apt..." \
     && apt-get update -qq \
     && apt-get install -qq -y -t bookworm-backports \
-        curl wget git yt-dlp ffmpeg ripgrep \
+        curl wget git ffmpeg ripgrep \
         # Packages we have also needed in the past:
         # youtube-dl wget2 aria2 python3-pyxattr rtmpdump libfribidi-bin mpv \
     && rm -rf /var/lib/apt/lists/* \
@@ -198,7 +205,6 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
     && ( \
         which curl && curl --version | head -n1 \
         && which wget && wget --version 2>&1 | head -n1 \
-        && which yt-dlp && yt-dlp --version 2>&1 | head -n1 \
         && which git && git --version 2>&1 | head -n1 \
         && which rg && rg --version 2>&1 | head -n1 \
         && echo -e '\n\n' \
@@ -237,12 +243,12 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
     ) | tee -a /VERSION.txt
 
 # Install Node dependencies
-WORKDIR "/usr/share/archivebox/npm"
-COPY --chown=root:root --chmod=755 "etc/package.json" "/usr/share/archivebox/npm"
+WORKDIR "$SYSTEM_LIB_DIR/npm"
+COPY "etc/package.json" "$SYSTEM_LIB_DIR/npm"
 RUN --mount=type=cache,target=/root/.npm,sharing=locked,id=npm-$TARGETARCH$TARGETVARIANT \
     echo "[+] Installing NPM extractor dependencies from package.json..." \
-    && npm install --prefix="/usr/share/archivebox/npm" --prefer-offline --no-fund --no-audit --cache /root/.npm \
-    && chown -R "$DEFAULT_PUID:$DEFAULT_PGID" "/usr/share/archivebox" \
+    && npm install --prefix="$SYSTEM_LIB_DIR/npm" --prefer-offline --no-fund --no-audit --cache /root/.npm \
+    && chown -R "$DEFAULT_PUID:$DEFAULT_PGID" "$SYSTEM_LIB_DIR" \
     && ( \
         which node && node --version \
         && which npm && npm version \
@@ -277,7 +283,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
 COPY --chown=root:root --chmod=755 "." "$CODE_DIR/"
 RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
     echo "[*] Installing PIP ArchiveBox package from $CODE_DIR..." \
-    && pip install -e "${CODE_DIR}[sonic,ldap]" \
+    && pip install -e "${CODE_DIR}[all]" \
     && rm -rf /var/lib/apt/lists/*
 
 ####################################################
@@ -286,10 +292,7 @@ RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH
 WORKDIR "$DATA_DIR"
 RUN openssl rand -hex 16 > /etc/machine-id \
     && chown -R "$DEFAULT_PUID:$DEFAULT_PGID" "/tmp"
-ENV IN_DOCKER=True \
-    SYSTEM_LIB_DIR=/usr/share/archivebox \
-    SYSTEM_TMP_DIR=/tmp/archivebox \
-    GOOGLE_API_KEY=no \
+ENV GOOGLE_API_KEY=no \
     GOOGLE_DEFAULT_CLIENT_ID=no \
     GOOGLE_DEFAULT_CLIENT_SECRET=no \
     ALLOWED_HOSTS=*
diff --git a/archivebox/main.py b/archivebox/main.py
index e1779b8b..f96b6205 100755
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -275,7 +275,7 @@ def version(quiet: bool=False,
     
     data_dir_stat = Path(DATA_DIR).stat()
     data_dir_uid, data_dir_gid = data_dir_stat.st_uid, data_dir_stat.st_gid
-    data_owned_by_root = data_dir_uid == 0 or data_dir_gid == 0
+    data_owned_by_root = data_dir_uid == 0
     
     data_owned_by_default_user = data_dir_uid == DEFAULT_PUID or data_dir_gid == DEFAULT_PGID
     data_owner_doesnt_match = (data_dir_uid != ARCHIVEBOX_USER and data_dir_gid != ARCHIVEBOX_GROUP) and not IS_ROOT
diff --git a/pyproject.toml b/pyproject.toml
index ac7784c7..42558fd8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "archivebox"
-version = "0.8.5rc6"
+version = "0.8.5rc7"
 requires-python = ">=3.10"
 description = "Self-hosted internet archiving solution."
 authors = [{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}]
diff --git a/uv.lock b/uv.lock
index 366d9229..566a42cc 100644
--- a/uv.lock
+++ b/uv.lock
@@ -41,7 +41,7 @@ wheels = [
 
 [[package]]
 name = "archivebox"
-version = "0.8.5rc6"
+version = "0.8.5rc7"
 source = { editable = "." }
 dependencies = [
     { name = "atomicwrites" },