Merge branch 'dev' into specific-version-banner

2025-05-15 07:34:27 -04:00 · 2024-01-19 04:01:32 -08:00 · 2024-01-19 04:01:32 -08:00 · d0cd84a2af
commit d0cd84a2af
parent 5de45dbf30 1773146833
18 changed files with 293 additions and 1052 deletions
--- a/.github/workflows/pip.yml
+++ b/.github/workflows/pip.yml
@ -35,7 +35,7 @@ jobs:
          cache: true
      - name: Install dependencies
-        run: pdm install --fail-fast --no-lock --group :all --no-self
+        run: pdm install --fail-fast --no-lock --dev --group=':all' --no-self
      - name: Build package
        run: |
--- a/8
+++ b/8
@ -167,7 +167,6 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
        curl wget git yt-dlp ffmpeg ripgrep \
        # Packages we have also needed in the past:
        # youtube-dl wget2 aria2 python3-pyxattr rtmpdump libfribidi-bin mpv \
        # fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
    && rm -rf /var/lib/apt/lists/* \
    # Save version info
    && ( \
@ -183,6 +182,11 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/ms-playwright,sharing=locked,id=browsers-$TARGETARCH$TARGETVARIANT \
    echo "[+] Installing Browser binary dependencies to $PLAYWRIGHT_BROWSERS_PATH..." \
    && apt-get update -qq \
    && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
        fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-khmeros fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
        # chrome can run without dbus/upower technically, it complains about missing dbus but should run ok anyway
        # libxss1 dbus dbus-x11 upower \
    # && service dbus start \
    && if [[ "$TARGETPLATFORM" == *amd64* || "$TARGETPLATFORM" == *arm64* ]]; then \
        # install Chromium using playwright
        pip install playwright \
@ -192,7 +196,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
    else \
        # fall back to installing Chromium via apt-get on platforms not supported by playwright (e.g. risc, ARMv7, etc.) 
        apt-get install -qq -y -t bookworm-backports --no-install-recommends \
-            chromium fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
+            chromium \
        && export CHROME_BINARY="$(which chromium)"; \
    fi \
    && rm -rf /var/lib/apt/lists/* \
--- a/README.md
+++ b/README.md
@ -1,27 +1,16 @@
-<div align="center">
+<div align="center" style="text-align: center; width: 100%">
-<em><img src="https://archivebox.io/icon.png" height="90px"></em>
+<img src="https://archivebox.io/icon.png" height="90px"/>
 <h1>ArchiveBox<br/><sub>Open-source self-hosted web archiving.</sub></h1>
 <br/>
-▶️ <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart">Quickstart</a> |
+▶️ <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart">Quickstart</a> | <a href="https://demo.archivebox.io">Demo</a> | <a href="https://github.com/ArchiveBox/ArchiveBox">GitHub</a> | <a href="https://github.com/ArchiveBox/ArchiveBox/wiki">Documentation</a> | <a href="#background--motivation">Info & Motivation</a> | <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community">Community</a>
 <a href="https://demo.archivebox.io">Demo</a> |
 <a href="https://github.com/ArchiveBox/ArchiveBox">GitHub</a> |
 <a href="https://github.com/ArchiveBox/ArchiveBox/wiki">Documentation</a> |
 <a href="#background--motivation">Info & Motivation</a> |
 <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community">Community</a>
 <br/>
 <!--<a href="http://webchat.freenode.net?channels=ArchiveBox&uio=d4"><img src="https://img.shields.io/badge/Community_chat-IRC-%2328A745.svg"/></a>-->
-<a href="https://github.com/ArchiveBox/ArchiveBox/blob/dev/LICENSE"><img src="https://img.shields.io/badge/Open_source-MIT-green.svg?logo=git&logoColor=green"/></a>
+<a href="https://github.com/ArchiveBox/ArchiveBox/blob/dev/LICENSE"><img src="https://img.shields.io/badge/Open_source-MIT-green.svg?logo=git&logoColor=green"/></a> <a href="https://github.com/ArchiveBox/ArchiveBox/commits/dev"><img src="https://img.shields.io/github/last-commit/ArchiveBox/ArchiveBox.svg?logo=Sublime+Text&logoColor=green&label=Active"/></a> &nbsp; <a href="https://github.com/ArchiveBox/ArchiveBox"><img src="https://img.shields.io/github/stars/ArchiveBox/ArchiveBox.svg?logo=github&label=Stars&logoColor=blue"/></a> &nbsp; <a href="https://pypi.org/project/archivebox/"><img src="https://img.shields.io/pypi/dm/archivebox?label=PyPI%20Installs&labelColor=orange&color=yellow"/></a> <a href="https://chromewebstore.google.com/detail/archivebox-exporter/habonpimjphpdnmcfkaockjnffodikoj"><img src="https://img.shields.io/chrome-web-store/users/habonpimjphpdnmcfkaockjnffodikoj?label=Chrome%20Web%20Store&color=%231973e8"/></a> <a href="https://hub.docker.com/r/archivebox/archivebox"><img src="https://img.shields.io/docker/pulls/archivebox/archivebox.svg?label=Docker+Pulls"/></a>
 <a href="https://github.com/ArchiveBox/ArchiveBox"><img src="https://img.shields.io/github/stars/ArchiveBox/ArchiveBox.svg?logo=github&label=Stars&logoColor=blue"/></a>
 <a href="https://github.com/ArchiveBox/ArchiveBox/commits/dev"><img src="https://img.shields.io/github/last-commit/ArchiveBox/ArchiveBox.svg?logo=Sublime+Text&logoColor=green&label=Active"/></a> &nbsp;
 <a href="https://pypi.org/project/archivebox/"><img src="https://img.shields.io/badge/Python-yellow.svg?logo=python&logoColor=yellow"/></a>
 <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Install#dependencies"><img src="https://img.shields.io/badge/Chromium-orange.svg?logo=Google+Chrome&logoColor=orange"/></a>
 <a href="https://hub.docker.com/r/archivebox/archivebox"><img src="https://img.shields.io/badge/Docker-lightblue.svg?logo=docker&logoColor=lightblue"/></a>
 <!--<pre lang="bash" align="left"><code style="white-space: pre-line; text-align: left" align="left">
 curl -sSL 'https://get.archivebox.io' | sh    # (or see pip/brew/Docker instructions below)
@ -42,7 +31,7 @@ Without active preservation effort, everything on the internet eventually dissap
 📥 **You can feed ArchiveBox URLs one at a time, or schedule regular imports** from browser bookmarks or history, feeds like RSS, bookmark services like Pocket/Pinboard, and more. See <a href="#input-formats">input formats</a> for a full list.
-<img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/90f1ce3c-75bb-401d-88ed-6297694b76ae" alt="snapshot detail page" align="right" width="190px"/>
+<img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/90f1ce3c-75bb-401d-88ed-6297694b76ae" alt="snapshot detail page" align="right" width="190px" style="float: right"/>
 💾 **It saves snapshots of the URLs you feed it in several redundant formats.**  
 It also detects any content featured *inside* each webpage & extracts it out into a folder:
@ -69,7 +58,7 @@ It uses normal filesystem folders to organize archives (no complicated proprieta
 The goal is to sleep soundly knowing the part of the internet you care about will be automatically preserved in durable, easily accessible formats [for decades](#background--motivation) after it goes down.
-<div align="center">
+<div align="center" style="text-align: center">
 <br/><br/>
 <img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/5a7d95f2-6977-4de6-9f08-42851a1fe1d2" height="70px" alt="bookshelf graphic"> &nbsp; <img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/b2765a33-0d1e-4019-a1db-920c7e00e20e" height="75px" alt="logo" align="top"/> &nbsp; <img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/5a7d95f2-6977-4de6-9f08-42851a1fe1d2" height="70px" alt="bookshelf graphic">
 <br/><br/>
@ -85,10 +74,10 @@ The goal is to sleep soundly knowing the part of the internet you care about wil
 ```bash
 # Get ArchiveBox with Docker or Docker Compose (recommended)
-docker run -v $PWD/data:/data -it archivebox/archivebox:dev init --setup
+docker run -v $PWD/data:/data -p 8000:8000 -it archivebox/archivebox
 # Or install with your preferred package manager (see Quickstart below for apt, brew, and more)
-pip3 install archivebox
+pip install archivebox
 # Or use the optional auto setup script to install it
 curl -sSL 'https://get.archivebox.io' | sh
@ -107,7 +96,7 @@ archivebox list 'https://example.com'     # use the CLI commands (--help for mor
 ls ./archive/*/index.json                 # or browse directly via the filesystem
 ```
-<div align="center">
+<div align="center" style="text-align: center">
 <br/><br/>
 <img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/8d67382c-e0ce-4286-89f7-7915f09b930c" width="22%" alt="cli init screenshot" align="top">
 <img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/dad2bc51-e7e5-484e-bb26-f956ed692d16" width="22%" alt="cli init screenshot" align="top">
@ -143,7 +132,7 @@ ls ./archive/*/index.json                 # or browse directly via the filesyste
 <br/>
-<div align="center">
+<div align="center" style="text-align: center">
 <br/>
 <img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/0db52ea7-4a2c-441d-b47f-5553a5d8fe96" width="49%" alt="grass"/><img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/0db52ea7-4a2c-441d-b47f-5553a5d8fe96" width="49%" alt="grass"/>
 </div>
@ -327,6 +316,7 @@ See the <a href="https://github.com/ArchiveBox/pip-archivebox"><code>pip-archive
 <li>Arch: <a href="https://aur.archlinux.org/packages/archivebox/"><code>yay -S archivebox</code></a> (contributed by <a href="https://github.com/imlonghao"><code>@imlonghao</code></a>)</li>
 <li>FreeBSD: <a href="https://github.com/ArchiveBox/ArchiveBox#%EF%B8%8F-easy-setup"><code>curl -sSL 'https://get.archivebox.io' | sh</code></a> (uses <code>pkg</code> + <code>pip3</code> under-the-hood)</li>
 <li>Nix: <a href="https://github.com/NixOS/nixpkgs/blob/master/pkgs/applications/misc/archivebox/default.nix"><code>nix-env --install archivebox</code></a> (contributed by <a href="https://github.com/siraben"><code>@siraben</code></a>)</li>
 <li>Guix: <a href="https://packages.guix.gnu.org/packages/archivebox/"><code>guix install archivebox</code></a> (contributed by <a href="https://github.com/rakino"><code>@rakino</code></a>)</li>
 <li>More: <a href="https://github.com/ArchiveBox/ArchiveBox/issues/new"><i>contribute another distribution...!</i></a></li>
 </ul>
 See <a href="#%EF%B8%8F-cli-usage">below</a> for usage examples using the CLI, Web UI, or filesystem/SQL/Python to manage your archive.
@ -356,6 +346,27 @@ See <a href="#%EF%B8%8F-cli-usage">below</a> for usage examples using the CLI, W
  <br/>
 </details>
 <details>
 <summary><img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/0c46e949-00fe-49c8-a613-ee14501c014c" alt="Self-hosting Platforms" height="28px" align="top"/><b>TrueNAS / YunoHost / Cloudron / UNRAID / etc.</b> (self-hosting solutions)</summary>
 <br/>
 > [!WARNING]  
 > *These are contributed by external volunteers and may lag behind the official `pip` channel.*
 <ul>
 <li><a href="https://dev.to/finloop/setting-up-archivebox-on-truenas-scale-1788">TrueNAS</a></li>
 <li><a href="https://unraid.net/community/apps?q=archivebox#r">UnRaid</a></li>
 <li><a href="https://github.com/YunoHost-Apps/archivebox_ynh">Yunohost</a></li>
 <li><a href="https://www.cloudron.io/store/io.archivebox.cloudronapp.html">Cloudron</a></li>
 <li><a href="https://github.com/ArchiveBox/ArchiveBox/pull/922/files#diff-00f0606e18b2618c3cc1667ca7c2b703b537af690ca71eba1330633587dcb1ee">AppImage</a></li>
 <li><a href="https://github.com/ArchiveBox/ArchiveBox/issues/986">Umbrel</a> (need contributors...)</li>
 <li>More: <a href="https://github.com/ArchiveBox/ArchiveBox/issues/new"><i>contribute another distribution...!</i></a></li>
 </ul>
 See <a href="#%EF%B8%8F-cli-usage">below</a> for usage examples using the CLI, Web UI, or filesystem/SQL/Python to manage your archive.
 <br/><br/>
 </details>
 <details>
 <summary><img src="https://user-images.githubusercontent.com/511499/117448723-1663b180-af0d-11eb-837f-d43959227810.png" alt="paid" height="27px" align="top"/> Paid hosting solutions (cloud VPS)</summary>
 <br/>
@ -423,7 +434,7 @@ archivebox help
 #### 🖥&nbsp; Web UI Usage
 ```bash
-archivebox manage createsuperuser  # set an admin password
+archivebox manage createsuperuser  # create admin user via CLI (or use ADMIN_PASSWORD env variable)
 archivebox server 0.0.0.0:8000     # open http://127.0.0.1:8000 to view it
 # you can also configure whether or not login is required for most features
@ -441,12 +452,12 @@ ls ./archive/*/index.html  # or inspect snapshots on the filesystem
 ```
 <br/>
-<div align="center">
+<div align="center" style="text-align: center">
 <img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/65f82532-18dd-49c5-86f1-02b1f3100e1e" width="49%" alt="grass"/><img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/65f82532-18dd-49c5-86f1-02b1f3100e1e" width="49%" alt="grass"/>
 </div>
 <br/>
-<div align="center">
+<div align="center" style="text-align: center">
 <sub>. . . . . . . . . . . . . . . . . . . . . . . . . . . .</sub>
 <br/><br/>
 <a href="https://demo.archivebox.io">DEMO: <code>https://demo.archivebox.io</code></a><br/>
@ -458,7 +469,7 @@ ls ./archive/*/index.html  # or inspect snapshots on the filesystem
 ---
-<div align="center">
+<div align="center" style="text-align: center">
 <img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/ac1f897a-8baa-4f8b-8ee8-7443611f258b" width="96%" alt="lego">
 </div>
@ -476,9 +487,9 @@ ArchiveBox supports many input formats for URLs, including Pocket & Pinboard exp
 - <img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/64078483-21d7-4eb1-aa6e-9ad55afe45b8" height="22px"/> TXT, RSS, XML, JSON, CSV, SQL, HTML, Markdown, or [any other text-based format...](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Import-a-list-of-URLs-from-a-text-file)
 - <img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/32b494e6-4de1-4984-8d88-dc02f18e5c34" height="22px"/> [Browser history](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive) or [browser bookmarks](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive) (see instructions for: [Chrome](https://support.google.com/chrome/answer/96816?hl=en), [Firefox](https://support.mozilla.org/en-US/kb/export-firefox-bookmarks-to-backup-or-transfer), [Safari](https://github.com/ArchiveBox/ArchiveBox/assets/511499/24ad068e-0fa6-41f4-a7ff-4c26fc91f71a), [IE](https://support.microsoft.com/en-us/help/211089/how-to-import-and-export-the-internet-explorer-favorites-folder-to-a-32-bit-version-of-windows), [Opera](https://help.opera.com/en/latest/features/#bookmarks:~:text=Click%20the%20import/-,export%20button,-on%20the%20bottom), [and more...](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive))
 - <img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/ff20d251-5347-4b85-ae9b-83037d0ac01e" height="22px"/> Browser extension [`archivebox-exporter`](https://github.com/tjhorner/archivebox-exporter) (realtime archiving from Chrome/Chromium/Firefox)
- <img src="https://getpocket.com/favicon.ico" height="22px"/> [Pocket](https://getpocket.com/export), [Pinboard](https://pinboard.in/export/), [Instapaper](https://www.instapaper.com/user), [Shaarli](https://shaarli.readthedocs.io/en/master/Usage/#importexport), [Delicious](https://www.groovypost.com/howto/howto/export-delicious-bookmarks-xml/), [Reddit Saved](https://github.com/csu/export-saved-reddit), [Wallabag](https://doc.wallabag.org/en/user/import/wallabagv2.html), [Unmark.it](http://help.unmark.it/import-export), [OneTab](https://www.addictivetips.com/web/onetab-save-close-all-chrome-tabs-to-restore-export-or-import/), [and more...](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive)
+- <img src="https://getpocket.com/favicon.ico" height="22px"/> [Pocket](https://getpocket.com/export), [Pinboard](https://pinboard.in/export/), [Instapaper](https://www.instapaper.com/user), [Shaarli](https://shaarli.readthedocs.io/en/master/Usage/#importexport), [Delicious](https://www.groovypost.com/howto/howto/export-delicious-bookmarks-xml/), [Reddit Saved](https://github.com/csu/export-saved-reddit), [Wallabag](https://doc.wallabag.org/en/user/import/wallabagv2.html), [Unmark.it](http://help.unmark.it/import-export), [OneTab](https://www.addictivetips.com/web/onetab-save-close-all-chrome-tabs-to-restore-export-or-import/), [Firefox Sync](https://github.com/ArchiveBox/ArchiveBox/issues/648), [and more...](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive)
-<img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/e1e5bd78-b0b6-45dc-914c-e1046fee4bc4" width="330px" align="right">
+<img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/e1e5bd78-b0b6-45dc-914c-e1046fee4bc4" width="330px" align="right" style="float: right"/>
 ```bash
@ -505,14 +516,14 @@ It also includes a built-in scheduled import feature with `archivebox schedule`
 Inside each Snapshot folder, ArchiveBox saves these different types of extractor outputs as plain files:
-<img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/ace0954a-ddac-4520-9d18-1c77b1ec50b2" width="330px" align="right">
+<img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/ace0954a-ddac-4520-9d18-1c77b1ec50b2" width="330px" align="right" style="float: right"/>
-`./archive/<timestamp>/*`
+`./archive/TIMESTAMP/*`
 - **Index:** `index.html` & `index.json` HTML and JSON index files containing metadata and details
 - **Title**, **Favicon**, **Headers** Response headers, site favicon, and parsed site title
 - **SingleFile:** `singlefile.html` HTML snapshot rendered with headless Chrome using SingleFile
- **Wget Clone:** `example.com/page-name.html` wget clone of the site with  `warc/<timestamp>.gz`
+- **Wget Clone:** `example.com/page-name.html` wget clone of the site with  `warc/TIMESTAMP.gz`
 - Chrome Headless
  - **PDF:** `output.pdf` Printed PDF of site using headless chrome
  - **Screenshot:** `screenshot.png` 1440x900 screenshot of site using headless chrome
@ -529,7 +540,7 @@ It does everything out-of-the-box by default, but you can disable or tweak [indi
 ## Configuration
-<img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/ea672e6b-4df5-49d8-b550-7f450951fd27" width="330px" align="right">
+<img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/ea672e6b-4df5-49d8-b550-7f450951fd27" width="330px" align="right" style="float: right"/>
 ArchiveBox can be configured via environment variables, by using the `archivebox config` CLI, or by editing `./ArchiveBox.conf` directly.
@ -579,12 +590,11 @@ To achieve high-fidelity archives in as many situations as possible, ArchiveBox
 <details>
 <summary><i>Expand to learn more about ArchiveBox's dependencies...</i></summary><br/>
-> *TIP: For better security, easier updating, and to avoid polluting your host system with extra dependencies,*
+> *TIP: For better security, easier updating, and to avoid polluting your host system with extra dependencies,**it is strongly recommended to use the [⭐️ official Docker image](https://github.com/ArchiveBox/ArchiveBox/wiki/Docker)** with everything pre-installed for the best experience.*
 > ***it is strongly recommended to use the [⭐️ official Docker image](https://github.com/ArchiveBox/ArchiveBox/wiki/Docker)** with everything pre-installed for the best experience.*
 These optional dependencies used for archiving sites include:
-<img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/62a02155-05d7-4f3e-8de5-75a50a145c4f" alt="archivebox --version CLI output screenshot showing dependencies installed" width="330px" align="right">
+<img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/62a02155-05d7-4f3e-8de5-75a50a145c4f" alt="archivebox --version CLI output screenshot showing dependencies installed" width="330px" align="right" style="float: right"/>
 - `chromium` / `chrome` (for screenshots, PDF, DOM HTML, and headless JS scripts)
@ -630,24 +640,20 @@ Data folders can be created anywhere (`~/archivebox` or `$PWD/data` as seen in o
 <br/>
 <details>
-<summary><i>Expand to learn more about the layout of Archivebox's data on-disk...</i></summary>
+<summary><i>Expand to learn more about the layout of Archivebox's data on-disk...</i></summary><br/>
 <br/>
-All `archivebox` CLI commands are designed to be run from inside an ArchiveBox data folder, starting with `archivebox init` to initialize a new collection inside an empty directory.
+All <code>archivebox</code> CLI commands are designed to be run from inside an ArchiveBox data folder, starting with <code>archivebox init</code> to initialize a new collection inside an empty directory.
-```bash
+<pre lang="bash"><code style="white-space: pre-line">mkdir ~/archivebox && cd ~/archivebox   # just an example, can be anywhere
-mkdir ~/archivebox && cd ~/archivebox   # just an example, can be anywhere
+archivebox init</code></pre>
 archivebox init
 ```
-The on-disk layout is optimized to be easy to browse by hand and durable long-term. The main index is a standard `index.sqlite3` database in the root of the data folder (it can also be [exported as static JSON/HTML](https://github.com/ArchiveBox/ArchiveBox/wiki/Publishing-Your-Archive#2-export-and-host-it-as-static-html)), and the archive snapshots are organized by date-added timestamp in the `./archive/` subfolder.
+The on-disk layout is optimized to be easy to browse by hand and durable long-term. The main index is a standard <code>index.sqlite3</code> database in the root of the data folder (it can also be <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Publishing-Your-Archive#2-export-and-host-it-as-static-html">exported as static JSON/HTML</a>), and the archive snapshots are organized by date-added timestamp in the <code>./archive/</code> subfolder.
-<img src="https://user-images.githubusercontent.com/511499/117453293-c7b91600-af12-11eb-8a3f-aa48b0f9da3c.png" width="400px" align="right">
+<img src="https://user-images.githubusercontent.com/511499/117453293-c7b91600-af12-11eb-8a3f-aa48b0f9da3c.png" width="400px" align="right" style="float: right"/>
-```bash
+<pre lang="bash"><code style="white-space: pre-line">/data/
 /data/
    index.sqlite3
    ArchiveBox.conf
    archive/
@ -660,18 +666,18 @@ The on-disk layout is optimized to be easy to browse by hand and durable long-te
            warc/1617687755.warc.gz
            git/somerepo.git
            ...
-```
+</code></pre>
-Each snapshot subfolder `./archive/<timestamp>/` includes a static `index.json` and `index.html` describing its contents, and the snapshot extractor outputs are plain files within the folder.
+Each snapshot subfolder <code>./archive/TIMESTAMP/</code> includes a static <code>index.json</code> and <code>index.html</code> describing its contents, and the snapshot extractor outputs are plain files within the folder.
 #### Learn More
 - https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Disk-Layout
 - https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#large-archives
 - https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#output-folder
 - https://github.com/ArchiveBox/ArchiveBox/wiki/Publishing-Your-Archive
 - https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives
 <h4>Learn More</h4>
 <ul>
 <li>https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Disk-Layout</li>
 <li>https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#large-archives</li>
 <li>https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#output-folder</li>
 <li>https://github.com/ArchiveBox/ArchiveBox/wiki/Publishing-Your-Archive</li>
 <li>https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives</li>
 </ul>
 </details>
 <br/>
@ -683,12 +689,10 @@ You can export the main index to browse it statically as plain HTML files in a f
 <br/>
 <details>
-<summary><i>Expand to learn how to export your ArchiveBox collection...</i></summary>
+<summary><i>Expand to learn how to export your ArchiveBox collection...</i></summary><br/>
 <br/>
-> *NOTE: These exports are not paginated, exporting many URLs or the entire archive at once may be slow.*
+> *NOTE: These exports are not paginated, exporting many URLs or the entire archive at once may be slow. Use the filtering CLI flags on the `archivebox list` command to export specific Snapshots or ranges.*
 > *Use the filtering CLI flags on the `archivebox list` command to export specific Snapshots or ranges.*
 ```bash
 # archivebox list --help
@ -715,7 +719,7 @@ The paths in the static exports are relative, make sure to keep them next to you
 ---
-<div align="center">
+<div align="center" style="text-align: center">
 <img src="https://docs.monadical.com/uploads/upload_b6900afc422ae699bfefa2dcda3306f3.png" width="100%" alt="security graphic"/>
 </div>
@ -942,7 +946,7 @@ If using Docker or NFS/SMB/FUSE for the `data/archive/` folder, you may need to
 <br/>
-<div align="center">
+<div align="center" style="text-align: center">
 <img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/ca85432e-a2df-40c6-968f-51a1ef99b24e" width="100%" alt="paisley graphic">
 </div>
@ -962,7 +966,7 @@ Vast treasure troves of knowledge are lost every day on the internet to link rot
 Whether it's to resist censorship by saving articles before they get taken down or edited, or just to save a collection of early 2010's flash games you love to play, having the tools to archive internet content enables to you save the stuff you care most about before it disappears.
-<div align="center">
+<div align="center" style="text-align: center">
 <img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/71e36bc5-1c94-44e2-92b6-405fa898c734" width="40%"/><br/>
 <sup><i>Image from <a href="https://perma.cc/">Perma.cc</a>...</i><br/></sup>
 </div>
@ -980,30 +984,29 @@ ArchiveBox archives the sites in **several different formats** beyond what publi
 ## Comparison to Other Projects
-<img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/4cac62a9-e8fb-425b-85a3-ca644aa6dd42" width="5%" align="right" alt="comparison"/> 
+<img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/4cac62a9-e8fb-425b-85a3-ca644aa6dd42" width="5%" align="right" alt="comparison" style="float: right"/> 
-> [!TIP]
+> **Check out our [community wiki](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community) for a list of web archiving tools and orgs.**
 > **Check out our [community page](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community) for an index of web archiving initiatives and projects.**
 A variety of open and closed-source archiving projects exist, but few provide a nice UI and CLI to manage a large, high-fidelity archive collection over time.
-ArchiveBox tries to be a robust, set-and-forget archiving solution suitable for archiving RSS feeds, bookmarks, or your entire browsing history (beware, it may be too big to store), ~~including private/authenticated content that you wouldn't otherwise share with a centralized service~~ (this is not recommended due to JS replay security concerns).
+<br/>
 <details>
 <summary><i>Click to read more...</i></summary><br/>
-### Comparison With Centralized Public Archives
+ArchiveBox tries to be a robust, set-and-forget archiving solution suitable for archiving RSS feeds, bookmarks, or your entire browsing history (beware, it may be too big to store), including private/authenticated content that you wouldn't otherwise share with a centralized service.
 <h3>Comparison With Centralized Public Archives</h3>
 Not all content is suitable to be archived in a centralized collection, whether because it's private, copyrighted, too large, or too complex. ArchiveBox hopes to fill that gap.
 By having each user store their own content locally, we can save much larger portions of everyone's browsing history than a shared centralized service would be able to handle. The eventual goal is to work towards federated archiving where users can share portions of their collections with each other.
-### Comparison With Other Self-Hosted Archiving Options
+<h3>Comparison With Other Self-Hosted Archiving Options</h3>
 ArchiveBox differentiates itself from [similar self-hosted projects](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community#Web-Archiving-Projects) by providing both a comprehensive CLI interface for managing your archive, a Web UI that can be used either independently or together with the CLI, and a simple on-disk data format that can be used without either.
 <details>
 <summary><i>Click to see the <b>⭐️ officially recommended</b> alternatives to ArchiveBox...</i></summary>
 <br/>
 *If you want better fidelity for very complex interactive pages with heavy JS/streams/API requests, check out [ArchiveWeb.page](https://archiveweb.page) and [ReplayWeb.page](https://replayweb.page).*
@ -1019,17 +1022,23 @@ ArchiveBox is neither the highest fidelity nor the simplest tool available for s
 <br/>
-<div align="center">
+<div align="center" style="text-align: center">
 <br/>
 <img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/04808ac2-3133-44fd-8703-3387e06dc851" width="100%" alt="dependencies graphic">
 </div>
 ## Internet Archiving Ecosystem
-<img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/78d8a725-97f4-47f5-b983-1f62843ddc51" width="14%" align="right"/>
+<img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/78d8a725-97f4-47f5-b983-1f62843ddc51" width="14%" align="right" style="float: right"/>
-Whether you want to learn which organizations are the big players in the web archiving space, want to find a specific open-source tool for your web archiving need, or just want to see where archivists hang out online, our Community Wiki page serves as an index of the broader web archiving community. Check it out to learn about some of the coolest web archiving projects and communities on the web!
+Our Community Wiki page serves as an index of the broader web archiving community.
 <ul>
    <li>See where archivists hang out online</li>
    <li>Explore other open-source tools for your web archiving needs</li>
    <li>Learn which organizations are the big players in the web archiving space</li>
 </ul>
 <details>
 <summary><i>Explore our index of web archiving software, blogs, and communities around the world...</i></summary>
@ -1062,13 +1071,13 @@ Whether you want to learn which organizations are the big players in the web arc
 ---
-<div align="center">
+<div align="center" style="text-align: center">
 <img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/897f7a88-1265-4aab-b80c-b1640afaad1f" width="100%" alt="documentation graphic">
 </div>
 # Documentation
-<img src="https://read-the-docs-guidelines.readthedocs-hosted.com/_images/logo-dark.png" width="13%" align="right"/>
+<img src="https://read-the-docs-guidelines.readthedocs-hosted.com/_images/logo-dark.png" width="13%" align="right" style="float: right"/>
 We use the [GitHub wiki system](https://github.com/ArchiveBox/ArchiveBox/wiki) and [Read the Docs](https://archivebox.readthedocs.io/en/latest/) (WIP) for documentation.
@ -1113,7 +1122,7 @@ You can also access the docs locally by looking in the [`ArchiveBox/docs/`](http
 ---
-<div align="center">
+<div align="center" style="text-align: center">
 <img src="https://github.com/ArchiveBox/ArchiveBox/assets/511499/e895e79f-5c7d-429b-ad8a-7df2cc183ca3" width="100%" alt="development">
 </div>
@ -1285,7 +1294,7 @@ https://stackoverflow.com/questions/1074212/how-can-i-see-the-raw-sql-queries-dj
 ArchiveBox [`extractors`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/extractors/media.py) are external binaries or Python/Node scripts that ArchiveBox runs to archive content on a page.
-Extractors take the URL of a page to archive, write their output to the filesystem `archive/<timestamp>/<extractorname>/...`, and return an [`ArchiveResult`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/core/models.py#:~:text=return%20qs-,class%20ArchiveResult,-(models.Model)%3A) entry which is saved to the database (visible on the `Log` page in the UI).
+Extractors take the URL of a page to archive, write their output to the filesystem `archive/TIMESTAMP/EXTRACTOR/...`, and return an [`ArchiveResult`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/core/models.py#:~:text=return%20qs-,class%20ArchiveResult,-(models.Model)%3A) entry which is saved to the database (visible on the `Log` page in the UI).
 *Check out how we added **[`archivebox/extractors/singlefile.py`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/extractors/singlefile.py)** as an example of the process: [Issue #399](https://github.com/ArchiveBox/ArchiveBox/issues/399) + [PR #403](https://github.com/ArchiveBox/ArchiveBox/pull/403).*
@ -1297,7 +1306,7 @@ Extractors take the URL of a page to archive, write their output to the filesyst
 1. [Open an issue](https://github.com/ArchiveBox/ArchiveBox/issues/new?assignees=&labels=changes%3A+behavior%2Cstatus%3A+idea+phase&template=feature_request.md&title=Feature+Request%3A+...) with your propsoed implementation (please link to the pages of any new external dependencies you plan on using)
 2. Ensure any dependencies needed are easily installable via a package managers like `apt`, `brew`, `pip3`, `npm`
   (Ideally, prefer to use external programs available via `pip3` or `npm`, however we do support using any binary installable via package manager that exposes a CLI/Python API and writes output to stdout or the filesystem.)
-3. Create a new file in [`archivebox/extractors/<extractorname>.py`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/extractors) (copy an existing extractor like [`singlefile.py`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/extractors/singlefile.py) as a template)
+3. Create a new file in [`archivebox/extractors/EXTRACTOR.py`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/extractors) (copy an existing extractor like [`singlefile.py`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/extractors/singlefile.py) as a template)
 4. Add config settings to enable/disable any new dependencies and the extractor as a whole, e.g. `USE_DEPENDENCYNAME`, `SAVE_EXTRACTORNAME`, `EXTRACTORNAME_SOMEOTHEROPTION` in [`archivebox/config.py`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/config.py)
 5. Add a preview section to [`archivebox/templates/core/snapshot.html`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/templates/core/snapshot.html) to view the output, and a column to [`archivebox/templates/core/index_row.html`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/templates/core/index_row.html) with an icon for your extractor
 6. Add an integration test for your extractor in [`tests/test_extractors.py`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/tests/test_extractors.py)
@ -1364,7 +1373,7 @@ Extractors take the URL of a page to archive, write their output to the filesyst
 ---
-<div align="center">
+<div align="center" style="text-align: center">
 <br/><br/>
 <img src="https://raw.githubusercontent.com/Monadical-SAS/redux-time/HEAD/examples/static/jeremy.jpg" height="40px"/>
 <br/>
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@ -6,6 +6,7 @@ from contextlib import redirect_stdout
 from datetime import datetime, timezone
 from django.contrib import admin
 from django.db.models import Count
 from django.urls import path
 from django.utils.html import format_html
 from django.utils.safestring import mark_safe
@ -117,7 +118,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
    def get_queryset(self, request):
        self.request = request
-        return super().get_queryset(request).prefetch_related('tags')
+        return super().get_queryset(request).prefetch_related('tags').annotate(archiveresult_count=Count('archiveresult'))
    def tag_list(self, obj):
        return ', '.join(obj.tags.values_list('name', flat=True))
@ -199,7 +200,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
    def files(self, obj):
        return snapshot_icons(obj)
-    files.admin_order_field = 'updated'
+    files.admin_order_field = 'archiveresult_count'
    files.short_description = 'Files Saved'
    def size(self, obj):
@ -216,7 +217,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
            size_txt,
        )
-    size.admin_order_field = 'archiveresult__count'
+    size.admin_order_field = 'archiveresult_count'
    def url_str(self, obj):
        return format_html(
--- a/archivebox/extractors/wget.py
+++ b/archivebox/extractors/wget.py
@ -202,4 +202,9 @@ def wget_output_path(link: Link) -> Optional[str]:
    if search_dir.is_dir():
        return domain(link.url).replace(":", "+")
    # fallback to just the domain dir without port
    search_dir = Path(link.link_dir) / domain(link.url).split(":", 1)[0]
    if search_dir.is_dir():
        return domain(link.url).split(":", 1)[0]
    return None
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@ -379,11 +379,15 @@ class Link:
        output_paths = (
            domain(self.url),
            'output.html',
            'output.pdf',
            'screenshot.png',
-            'output.html',
+            'singlefile.html',
            'readability/content.html',
            'mercury/content.html',
            'htmltotext.txt',
            'media',
-            'singlefile.html'
+            'git',
        )
        return any(
--- a/archivebox/package.json
+++ b/archivebox/package.json
@ -1,6 +1,6 @@
 {
  "name": "archivebox",
-  "version": "0.7.2",
+  "version": "0.7.3",
  "description": "ArchiveBox: The self-hosted internet archive",
  "author": "Nick Sweeting <archivebox-npm@sweeting.me>",
  "repository": "github:ArchiveBox/ArchiveBox",
--- a/archivebox/templates/core/navigation.html
+++ b/archivebox/templates/core/navigation.html
@ -5,7 +5,7 @@
    <a href="{% url 'Home' %}">Snapshots</a> |
    <a href="/admin/core/tag/">Tags</a> |
    <a href="/admin/core/archiveresult/?o=-1">Log</a> &nbsp; &nbsp;
-    <a href="{% url 'Docs' %}">Docs</a> | 
+    <a href="{% url 'Docs' %}" target="_blank" rel="noopener noreferrer">Docs</a> | 
    <a href="{% url 'public-index' %}">Public</a> | 
    <a href="/admin/">Admin</a>
     &nbsp; &nbsp;
--- a/archivebox/templates/core/snapshot.html
+++ b/archivebox/templates/core/snapshot.html
--- a/archivebox/util.py
+++ b/archivebox/util.py
@ -221,6 +221,8 @@ def get_headers(url: str, timeout: int=None) -> str:
 def chrome_args(**options) -> List[str]:
    """helper to build up a chrome shell command with arguments"""
    # Chrome CLI flag documentation: https://peter.sh/experiments/chromium-command-line-switches/
    from .config import CHROME_OPTIONS, CHROME_VERSION
    options = {**CHROME_OPTIONS, **options}
@ -248,14 +250,19 @@ def chrome_args(**options) -> List[str]:
            "--disable-software-rasterizer",
            "--run-all-compositor-stages-before-draw",
            "--hide-scrollbars",
            "--window-size=1440,2000",
            "--autoplay-policy=no-user-gesture-required",
            "--no-first-run",
            "--use-fake-ui-for-media-stream",
            "--use-fake-device-for-media-stream",
            "--disable-sync",
            # "--password-store=basic",
        )
    # disable automatic updating when running headless, as there's no user to see the upgrade prompts
    cmd_args += ("--simulate-outdated-no-au='Tue, 31 Dec 2099 23:59:59 GMT'",)
    # set window size for screenshot/pdf/etc. rendering
    cmd_args += ('--window-size={}'.format(options['RESOLUTION']),)
    if not options['CHECK_SSL_VALIDITY']:
        cmd_args += ('--disable-web-security', '--ignore-certificate-errors')
@ -263,9 +270,6 @@ def chrome_args(**options) -> List[str]:
    if options['CHROME_USER_AGENT']:
        cmd_args += ('--user-agent={}'.format(options['CHROME_USER_AGENT']),)
    if options['RESOLUTION']:
        cmd_args += ('--window-size={}'.format(options['RESOLUTION']),)
    if options['CHROME_TIMEOUT']:
       cmd_args += ('--timeout={}'.format(options['CHROME_TIMEOUT'] * 1000),)
--- a/bin/docker_entrypoint.sh
+++ b/bin/docker_entrypoint.sh
@ -91,12 +91,16 @@ if ! chown $PUID:$PGID "$DATA_DIR"/* > /dev/null 2>&1; then
 fi
-# also chown BROWSERS_DIR because otherwise 'archivebox setup' wont be able to install chrome at runtime
+# also chown BROWSERS_DIR because otherwise 'archivebox setup' wont be able to 'playwright install chromium' at runtime
 export PLAYWRIGHT_BROWSERS_PATH="${PLAYWRIGHT_BROWSERS_PATH:-/browsers}"
 mkdir -p "$PLAYWRIGHT_BROWSERS_PATH/permissions_test_safe_to_delete"
 chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"
 chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/*
 rm -Rf "$PLAYWRIGHT_BROWSERS_PATH/permissions_test_safe_to_delete"
 chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"
 if [[ -d "$PLAYWRIGHT_BROWSERS_PATH/.links" ]]; then
    chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/*
    chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/.*
    chown -h $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/.links/*
 fi
 # (this check is written in blood in 2023, QEMU silently breaks things in ways that are not obvious)
@ -107,7 +111,7 @@ if [[ "$IN_QEMU" == "True" ]]; then
    echo -e "    See here for more info: https://github.com/microsoft/playwright/issues/17395#issuecomment-1250830493\n" > /dev/stderr
 fi
-# check disk space free on / and /data, warn on <500Mb free, error on <100Mb free
+# check disk space free on /, /data, and /data/archive, warn on <500Mb free, error on <100Mb free
 export ROOT_USAGE="$(df --output=pcent,avail / | tail -n 1 | xargs)"
 export ROOT_USED_PCT="${ROOT_USAGE%%%*}"
 export ROOT_AVAIL_KB="$(echo "$ROOT_USAGE" | awk '{print $2}')"
@ -124,23 +128,48 @@ elif [[ "$ROOT_USED_PCT" -ge 99 ]] || [[ "$ROOT_AVAIL_KB" -lt 500000 ]]; then
    df -kh / > /dev/stderr
 fi
-export DATA_USAGE="$(df --output=pcent,avail /data | tail -n 1 | xargs)"
+export DATA_USAGE="$(df --output=pcent,avail "$DATA_DIR" | tail -n 1 | xargs)"
 export DATA_USED_PCT="${DATA_USAGE%%%*}"
 export DATA_AVAIL_KB="$(echo "$DATA_USAGE" | awk '{print $2}')"
 if [[ "$DATA_AVAIL_KB" -lt 100000 ]]; then
-    echo -e "\n[!] Warning: Docker data volume is completely out of space! (${DATA_USED_PCT}% used on /data)" > /dev/stderr
+    echo -e "\n[!] Warning: Docker data volume is completely out of space! (${DATA_USED_PCT}% used on $DATA_DIR)" > /dev/stderr
    echo -e "    you need to free up at least 100Mb on the drive holding your data directory" > /dev/stderr
    echo -e "    \$ ncdu -x data\n" > /dev/stderr
-    df -kh /data > /dev/stderr
+    df -kh "$DATA_DIR" > /dev/stderr
    sleep 5
 elif [[ "$DATA_USED_PCT" -ge 99 ]] || [[ "$ROOT_AVAIL_KB" -lt 500000 ]]; then
-    echo -e "\n[!] Warning: Docker data volume is running out of space! (${DATA_USED_PCT}% used on /data)" > /dev/stderr
+    echo -e "\n[!] Warning: Docker data volume is running out of space! (${DATA_USED_PCT}% used on $DATA_DIR)" > /dev/stderr
    echo -e "    you may need to free up space on the drive holding your data directory soon" > /dev/stderr
    echo -e "    \$ ncdu -x data\n" > /dev/stderr
-    df -kh /data > /dev/stderr
+    df -kh "$DATA_DIR" > /dev/stderr
 else
    # data/ has space available, but check data/archive separately, because it might be on a network mount or external drive
    if [[ -d "$DATA_DIR/archive" ]]; then
        export ARCHIVE_USAGE="$(df --output=pcent,avail "$DATA_DIR/archive" | tail -n 1 | xargs)"
        export ARCHIVE_USED_PCT="${ARCHIVE_USAGE%%%*}"
        export ARCHIVE_AVAIL_KB="$(echo "$ARCHIVE_USAGE" | awk '{print $2}')"
        if [[ "$ARCHIVE_AVAIL_KB" -lt 100000 ]]; then
            echo -e "\n[!] Warning: data/archive folder is completely out of space! (${ARCHIVE_USED_PCT}% used on $DATA_DIR/archive)" > /dev/stderr
            echo -e "    you need to free up at least 100Mb on the drive holding your data/archive directory" > /dev/stderr
            echo -e "    \$ ncdu -x data/archive\n" > /dev/stderr
            df -kh "$DATA_DIR/archive" > /dev/stderr
            sleep 5
        elif [[ "$ARCHIVE_USED_PCT" -ge 99 ]] || [[ "$ROOT_AVAIL_KB" -lt 500000 ]]; then
            echo -e "\n[!] Warning: data/archive folder is running out of space! (${ARCHIVE_USED_PCT}% used on $DATA_DIR/archive)" > /dev/stderr
            echo -e "    you may need to free up space on the drive holding your data/archive directory soon" > /dev/stderr
            echo -e "    \$ ncdu -x data/archive\n" > /dev/stderr
            df -kh "$DATA_DIR/archive" > /dev/stderr
        fi
    fi
 fi
 # set DBUS_SYSTEM_BUS_ADDRESS & DBUS_SESSION_BUS_ADDRESS
 # (dbus is not actually needed, it makes chrome log fewer warnings but isn't worth making our docker images bigger)
 # service dbus start >/dev/null 2>&1 &
 # export $(dbus-launch --close-stderr)
 export ARCHIVEBOX_BIN_PATH="$(which archivebox)"
 # Drop permissions to run commands as the archivebox user
--- a/bin/setup.sh
+++ b/bin/setup.sh
@ -26,24 +26,24 @@ if (which docker-compose > /dev/null && docker pull archivebox/archivebox:latest
    if [ -f "./index.sqlite3" ]; then
        mv ~/archivebox/* ~/archivebox/data/
    fi
-    curl -O 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/docker-compose.yml'
+    curl -O 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/main/docker-compose.yml'
-    docker-compose run --rm archivebox init --setup
+    docker compose run --rm archivebox init --setup
    echo
-    echo "[+] Starting ArchiveBox server using: docker-compose up -d..."
+    echo "[+] Starting ArchiveBox server using: docker compose up -d..."
-    docker-compose up -d
+    docker compose up -d
    sleep 7
    open http://127.0.0.1:8000 || true
    echo
    echo "[√] Server started on http://0.0.0.0:8000 and data directory initialized in ~/archivebox/data. Usage:"
    echo "    cd ~/archivebox"
-    echo "    docker-compose ps"
+    echo "    docker compose ps"
-    echo "    docker-compose down"
+    echo "    docker compose down"
-    echo "    docker-compose pull"
+    echo "    docker compose pull"
-    echo "    docker-compose up"
+    echo "    docker compose up"
-    echo "    docker-compose run archivebox manage createsuperuser"
+    echo "    docker compose run archivebox manage createsuperuser"
-    echo "    docker-compose run archivebox add 'https://example.com'"
+    echo "    docker compose run archivebox add 'https://example.com'"
-    echo "    docker-compose run archivebox list"
+    echo "    docker compose run archivebox list"
-    echo "    docker-compose run archivebox help"
+    echo "    docker compose run archivebox help"
    exit 0
 elif (which docker > /dev/null && docker pull archivebox/archivebox:latest); then
    echo "[+] Initializing an ArchiveBox data folder at ~/archivebox using Docker..."
@ -189,12 +189,12 @@ which open > /dev/null && open http://127.0.0.1:8000 || true
 echo
 echo "[√] Server started on http://0.0.0.0:8000 and data directory initialized in ~/archivebox. Usage:"
-echo "    cd ~/archivebox"
+echo "    cd ~/archivebox                                    # see your data dir"
-echo "    ps aux | grep archivebox"
+echo "    ps aux | grep archivebox                           # see server process pid"
-echo "    pkill -f archivebox"
+echo "    pkill -f archivebox                                # stop the server"
-echo "    python3 -m pip install --upgrade archivebox"
+echo "    archivebox server --quick-init 0.0.0.0:8000        # start server process"
-echo "    archivebox server --quick-init 0.0.0.0:8000"
+echo "    pip install --upgrade archivebox; archivebox init  # update versions"
-echo "    archivebox manage createsuperuser"
+echo "    archivebox manage createsuperuser                  # add an admin user+pass"
-echo "    archivebox add 'https://example.com'"
+echo "    archivebox add 'https://example.com'"              # archive a new URL
-echo "    archivebox list"
+echo "    archivebox list                                    # see URLs archived"
-echo "    archivebox help"
+echo "    archivebox help                                    # see more help & examples"
--- a/etc/sonic.cfg
+++ b/etc/sonic.cfg
@ -6,7 +6,7 @@
 [server]
-log_level = "warn"
+log_level = "debug"
 [channel]
--- a/package-lock.json
+++ b/package-lock.json
@ -1,12 +1,12 @@
 {
  "name": "archivebox",
-  "version": "0.7.2",
+  "version": "0.7.3",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "archivebox",
-      "version": "0.7.2",
+      "version": "0.7.3",
      "license": "MIT",
      "dependencies": {
        "@postlight/parser": "^2.2.3",
--- a/package.json
+++ b/package.json
@ -1,6 +1,6 @@
 {
  "name": "archivebox",
-  "version": "0.7.2",
+  "version": "0.7.3",
  "description": "ArchiveBox: The self-hosted internet archive",
  "author": "Nick Sweeting <archivebox-npm@sweeting.me>",
  "repository": "github:ArchiveBox/ArchiveBox",
--- a/pdm.lock
+++ b/pdm.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,11 +1,16 @@
 [project]
 name = "archivebox"
-version = "0.7.2"
+version = "0.7.3"
 description = "Self-hosted internet archiving solution."
 authors = [
    {name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"},
 ]
 license = {text = "MIT"}
 readme = "README.md"
 package-dir = "archivebox"
 requires-python = ">=3.9,<3.12"
 dependencies = [
    # pdm update [--unconstrained] 
    "croniter>=0.3.34",
    "dateparser>=1.0.0",
    "django-extensions>=3.0.3",
@ -18,9 +23,6 @@ dependencies = [
    "yt-dlp>=2023.10.13",
    # "playwright>=1.39.0; platform_machine != 'armv7l'",
 ]
 requires-python = ">=3.9,<3.12"
 readme = "README.md"
 license = {text = "MIT"}
 classifiers = [
    "Development Status :: 4 - Beta",
    "Environment :: Console",
@ -54,26 +56,45 @@ classifiers = [
    "Typing :: Typed",
 ]
-# pdm lock -G:all
+[project.optional-dependencies]
-# pdm install -G:all
+# pdm update [--group=':all'] [--unconstrained] 
 sonic = [
    # echo "deb [signed-by=/usr/share/keyrings/valeriansaliou_sonic.gpg] https://packagecloud.io/valeriansaliou/sonic/debian/ bookworm main" > /etc/apt/sources.list.d/valeriansaliou_sonic.list
    # curl -fsSL https://packagecloud.io/valeriansaliou/sonic/gpgkey | gpg --dearmor -o /usr/share/keyrings/valeriansaliou_sonic.gpg
    "sonic-client>=0.0.5",
 ]
 ldap = [
    # apt install libldap2-dev libsasl2-dev
    "setuptools>=69.0.3",
    "python-ldap>=3.4.3",
    "django-auth-ldap>=4.1.0",
 ]
 # playwright = [
 #     platform_machine isnt respected by pdm export -o requirements.txt, this breaks arm/v7
 #     "playwright>=1.39.0; platform_machine != 'armv7l'",
 # ]
 # pdm install -G:all --dev
 # pdm update --dev [--unconstrained] 
 [tool.pdm.dev-dependencies]
 dev = [
-    # build
+    # building
    "setuptools>=69.0.3",
    "wheel",
    "pdm",
    "homebrew-pypi-poet>=0.10.0",
-    # docs
+    # documentation
    "recommonmark",
    "sphinx",
    "sphinx-rtd-theme",
-    # debug
+    # debugging
    "django-debug-toolbar",
    "djdt_flamegraph",
    "ipdb",
-    # test
+    # testing
    "pytest",
-    # lint
+    # linting
    "flake8",
    "mypy",
    "django-stubs",
@ -84,22 +105,6 @@ lint = "./bin/lint.sh"
 test = "./bin/test.sh"
 # all = {composite = ["lint mypackage/", "test -v tests/"]}
 [project.optional-dependencies]
 sonic = [
    # echo "deb [signed-by=/usr/share/keyrings/valeriansaliou_sonic.gpg] https://packagecloud.io/valeriansaliou/sonic/debian/ bookworm main" > /etc/apt/sources.list.d/valeriansaliou_sonic.list
    # curl -fsSL https://packagecloud.io/valeriansaliou/sonic/gpgkey | gpg --dearmor -o /usr/share/keyrings/valeriansaliou_sonic.gpg
    "sonic-client>=0.0.5",
 ]
 ldap = [
    # apt install libldap2-dev libsasl2-dev
    "python-ldap>=3.4.3",
    "django-auth-ldap>=4.1.0",
 ]
 # playwright = [
 #     platform_machine isnt respected by pdm export -o requirements.txt, this breaks arm/v7
 #     "playwright>=1.39.0; platform_machine != 'armv7l'",
 # ]
 [project.scripts]
 archivebox = "archivebox.cli:main"
--- a/requirements.txt
+++ b/requirements.txt
@ -31,7 +31,7 @@ pure-eval==0.2.2
 pyasn1==0.5.1
 pyasn1-modules==0.3.0
 pycparser==2.21; implementation_name != "cpython"
-pycryptodomex==3.19.1
+pycryptodomex==3.20.0
 pygments==2.17.2
 python-crontab==3.0.0
 python-dateutil==2.8.2
@ -49,6 +49,6 @@ tzdata==2023.4; platform_system == "Windows"
 tzlocal==5.2
 urllib3==2.1.0
 w3lib==2.1.2
-wcwidth==0.2.12
+wcwidth==0.2.13
 websockets==12.0
 yt-dlp==2023.12.30