diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index 220707b9..086e3d7b 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -1,8 +1,8 @@
 ---
 name: 🐞 Bug report
 about: Create a report to help us improve
-title: 'Bugfix: ...'
-labels: 'changes: bugfixes'
+title: 'Bug: ...'
+labels: 'bug'
 assignees: ''
 
 ---
diff --git a/.gitmodules b/.gitmodules
index 0993934a..196c9a92 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -23,3 +23,6 @@
 [submodule "archivebox/vendor/django-taggit"]
 	path = archivebox/vendor/django-taggit
 	url = https://github.com/jazzband/django-taggit
+[submodule "archivebox/vendor/python-atomicwrites"]
+	path = archivebox/vendor/python-atomicwrites
+	url = https://github.com/untitaker/python-atomicwrites
diff --git a/Dockerfile b/Dockerfile
index 8cf2da30..b11d3382 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -50,13 +50,6 @@ RUN apt-get update -qq \
         fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
     && rm -rf /var/lib/apt/lists/*
 
-# Install apt development dependencies
-# RUN apt-get install -qq \
-#     && apt-get install -qq -y --no-install-recommends \
-#         python3 python3-dev python3-pip python3-venv python3-all \
-#         dh-python debhelper devscripts dput software-properties-common \
-#         python3-distutils python3-setuptools python3-wheel python3-stdeb
-
 # Install Node environment
 RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - \
     && echo 'deb https://deb.nodesource.com/node_15.x buster main' >> /etc/apt/sources.list \
@@ -79,17 +72,26 @@ WORKDIR "$CODE_DIR"
 ENV PATH="${PATH}:$VENV_PATH/bin"
 RUN python -m venv --clear --symlinks "$VENV_PATH" \
     && pip install --upgrade --quiet pip setuptools
-ADD ./pip_dist/archivebox.egg-info/requires.txt "$CODE_DIR/pip_dist/archivebox.egg-info/requires.txt"
+ADD "./setup.py" "$CODE_DIR/"
+ADD "./README.md" "./package.json" "$CODE_DIR/archivebox/"
 RUN apt-get update -qq \
     && apt-get install -qq -y --no-install-recommends \
         build-essential python-dev python3-dev \
-    # && pip install --upgrade pip \
-    && grep -B 1000 -E '^$' "$CODE_DIR/pip_dist/archivebox.egg-info/requires.txt" | pip install --quiet -r /dev/stdin \
-    && pip install --quiet "sonic-client==0.0.5" \
+    && python3 -c 'from distutils.core import run_setup; result = run_setup("./setup.py", stop_after="init"); print("\n".join(result.install_requires + result.extras_require["sonic"]))' > /tmp/requirements.txt \
+    && pip install --quiet -r /tmp/requirements.txt \
     && apt-get purge -y build-essential python-dev python3-dev \
     && apt-get autoremove -y \
     && rm -rf /var/lib/apt/lists/*
 
+# Install apt development dependencies
+# RUN apt-get install -qq \
+#     && apt-get install -qq -y --no-install-recommends \
+#         python3 python3-dev python3-pip python3-venv python3-all \
+#         dh-python debhelper devscripts dput software-properties-common \
+#         python3-distutils python3-setuptools python3-wheel python3-stdeb
+# RUN python3 -c 'from distutils.core import run_setup; result = run_setup("./setup.py", stop_after="init"); print("\n".join(result.extras_require["dev"]))' > /tmp/dev_requirements.txt \
+    # && pip install --quiet -r /tmp/dev_requirements.txt
+
 # Install ArchiveBox Python package and its dependencies
 WORKDIR "$CODE_DIR"
 ADD . "$CODE_DIR"
@@ -115,5 +117,8 @@ RUN /app/bin/docker_entrypoint.sh archivebox version
 VOLUME "$DATA_DIR"
 EXPOSE 8000
 
+HEALTHCHECK --interval=30s --timeout=20s --retries=15 \
+    CMD curl --silent 'http://localhost:8000/admin/login/' || exit 1
+
 ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"]
-CMD ["archivebox", "server", "0.0.0.0:8000"]
+CMD ["archivebox", "server", "--quick-init", "0.0.0.0:8000"]
diff --git a/README.md b/README.md
index 76b51be3..d6c3d8ff 100644
--- a/README.md
+++ b/README.md
@@ -93,7 +93,7 @@ archivebox help
 
 ### Quickstart
 
-**🖥&nbsp; Supported OSs:** Linux/BSD, macOS, Windows  &nbsp; &nbsp; **🎮&nbsp; CPU Architectures:** x86, amd64, arm7, arm8 (raspi >=3)
+**🖥&nbsp; Supported OSs:** Linux/BSD, macOS, Windows (w/ Docker)  &nbsp; &nbsp; **🎮&nbsp; CPU Architectures:** x86, amd64, arm7, arm8 (raspi >=3)
 **📦&nbsp; Distributions:** `docker`/`apt`/`brew`/`pip3`/`npm` (in order of completeness)
 
 *(click to expand your preferred **► `distribution`** below for full setup instructions)*
@@ -103,22 +103,29 @@ archivebox help
 
 <i>First make sure you have Docker installed: https://docs.docker.com/get-docker/</i>
 
+Download the [`docker-compose.yml`](https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/docker-compose.yml) file.
 <pre lang="bash"><code>
-# create a new empty directory and initalize your collection (can be anywhere)
-mkdir ~/archivebox && cd ~/archivebox
 curl -O 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/docker-compose.yml'
-docker-compose run archivebox init
-docker-compose run archivebox --version
+</code></pre>
 
-# start the webserver and open the UI (optional)
+Start the server.
+<pre lang="bash"><code>
+docker-compose run archivebox server --quick-init
 docker-compose run archivebox manage createsuperuser
-docker-compose up -d
-open 'http://127.0.0.1:8000'
+</code></pre>
 
+Open [`http://127.0.0.1:8000`](http://127.0.0.1:8000).
+
+<pre lang="bash"><code>
 # you can also add links and manage your archive via the CLI:
 docker-compose run archivebox add 'https://example.com'
+echo 'https://example.com' | docker-compose run archivebox -T add
 docker-compose run archivebox status
 docker-compose run archivebox help  # to see more options
+
+# when passing stdin/stdout via the cli, use the -T flag
+echo 'https://example.com' | docker-compose run -T archivebox add
+docker-compose run -T archivebox list --html --with-headers > index.html
 </code></pre>
 
 This is the recommended way to run ArchiveBox because it includes <i>all</i> the extractors like:<br/>
@@ -127,7 +134,7 @@ chrome, wget, youtube-dl, git, etc., full-text search w/ sonic, and many other g
 </details>
 
 <details>
-<summary><b>Get ArchiveBox with <code>docker</code> on any platform</b></summary>
+<summary><b>Get ArchiveBox with <code>docker</code> on macOS/Linux/Windows</b></summary>
 
 <i>First make sure you have Docker installed: https://docs.docker.com/get-docker/</i>
 
@@ -145,21 +152,30 @@ open http://127.0.0.1:8000
 docker run -v $PWD:/data -it archivebox/archivebox add 'https://example.com'
 docker run -v $PWD:/data -it archivebox/archivebox status
 docker run -v $PWD:/data -it archivebox/archivebox help  # to see more options
+
+# when passing stdin/stdout via the cli, use only -i (not -it)
+echo 'https://example.com' | docker run -v $PWD:/data -i archivebox/archivebox add
+docker run -v $PWD:/data -i archivebox/archivebox list --html --with-headers > index.html
 </code></pre>
 
 </details>
 
 <details>
-<summary><b>Get ArchiveBox with <code>apt</code> on Ubuntu >=20.04</b></summary>
+<summary><b>Get ArchiveBox with <code>apt</code> on Ubuntu/Debian</b></summary>
 
-<i>First make sure you're on Ubuntu >= 20.04, or scroll down for older/non-Ubuntu instructions.</i>
+This method should work on all Ubuntu/Debian based systems, including x86, amd64, arm7, and arm8 CPUs (e.g. Raspberry Pis >=3).
+
+If you're on Ubuntu >= 20.04, add the `apt` repository with `add-apt-repository`:
+<small><i>(on other Ubuntu/Debian-based systems follow the <b>♰ instructions</b> below)</i></small>
 
 <pre lang="bash"><code>
 # add the repo to your sources and install the archivebox package using apt
 sudo apt install software-properties-common
 sudo add-apt-repository -u ppa:archivebox/archivebox
 sudo apt install archivebox
+</code></pre>
 
+<pre lang="bash"><code>
 # create a new empty directory and initalize your collection (can be anywhere)
 mkdir ~/archivebox && cd ~/archivebox
 npm install --prefix . 'git+https://github.com/ArchiveBox/ArchiveBox.git'
@@ -178,21 +194,25 @@ archivebox list --json --with-headers > index.json
 archivebox help  # to see more options
 </code></pre>
 
-For other Debian-based systems or older Ubuntu systems you can add these sources to `/etc/apt/sources.list`:
+<i><b>♰ On other Ubuntu/Debian-based systems</b> add these sources directly to <code>/etc/apt/sources.list</code>:</i>
 
 <pre lang="bash"><code>
-deb http://ppa.launchpad.net/archivebox/archivebox/ubuntu focal main
-deb-src http://ppa.launchpad.net/archivebox/archivebox/ubuntu focal main
+echo "deb http://ppa.launchpad.net/archivebox/archivebox/ubuntu focal main" > /etc/apt/sources.list.d/archivebox.list
+echo "deb-src http://ppa.launchpad.net/archivebox/archivebox/ubuntu focal main" >> /etc/apt/sources.list.d/archivebox.list
+sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C258F79DCC02E369
+sudo apt update
+sudo apt install archivebox
+sudo snap install chromium
+archivebox --version
+# then scroll back up and continue the initalization instructions above
 </code></pre>
 
-Then run `apt update; apt install archivebox; archivebox --version`.
-
 (you may need to install some other dependencies manually however)
 
 </details>
 
 <details>
-<summary><b>Get ArchiveBox with <code>brew</code> on macOS >=10.13</b></summary>
+<summary><b>Get ArchiveBox with <code>brew</code> on macOS</b></summary>
 
 <i>First make sure you have Homebrew installed: https://brew.sh/#install</i>
 
@@ -252,13 +272,12 @@ archivebox help  # to see more options
 
 No matter which install method you choose, they all roughly follow this 3-step process and all provide the same CLI, Web UI, and on-disk data format.
 
-<small>
-
-1. Install ArchiveBox: `apt/brew/pip3 install archivebox`
-2. Start a collection: `archivebox init`
-3. Start archiving: `archivebox add 'https://example.com'`
-
-</small>
+<small><ol>
+<li>Install ArchiveBox: <code>apt/brew/pip3 install archivebox</code></li>
+<li>Start a collection: <code>archivebox init</code></li>
+<li>Start archiving: <code>archivebox add 'https://example.com'</code></li>
+<li>View the archive: <code>archivebox server</code> or <code>archivebox list ...</code>, <code>ls ./archive/*/index.html</code></li>
+</ol></small>
 
 <br/>
 <div align="center">
@@ -307,8 +326,13 @@ archivebox add < ~/Downloads/firefox_bookmarks_export.html
 archivebox add < any_text_with_urls_in_it.txt
 archivebox add --depth=1 'https://example.com/some/downloads.html'
 archivebox add --depth=1 'https://news.ycombinator.com#2020-12-12'
-```
 
+# (if using docker add -i when passing via stdin)
+echo 'https://example.com' | docker run -v $PWD:/data -i archivebox/archivebox add
+
+# (if using docker-compose add -T when passing via stdin)
+echo 'https://example.com' | docker-compose run -T archivebox add
+```
 
 - <img src="https://nicksweeting.com/images/rss.svg" height="22px"/> TXT, RSS, XML, JSON, CSV, SQL, HTML, Markdown, or [any other text-based format...](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Import-a-list-of-URLs-from-a-text-file)
 - <img src="https://nicksweeting.com/images/bookmarks.png" height="22px"/> [Browser history](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive) or [browser bookmarks](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive) (see instructions for: [Chrome](https://support.google.com/chrome/answer/96816?hl=en), [Firefox](https://support.mozilla.org/en-US/kb/export-firefox-bookmarks-to-backup-or-transfer), [Safari](http://i.imgur.com/AtcvUZA.png), [IE](https://support.microsoft.com/en-us/help/211089/how-to-import-and-export-the-internet-explorer-favorites-folder-to-a-32-bit-version-of-windows), [Opera](http://help.opera.com/Windows/12.10/en/importexport.html), [and more...](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive))
@@ -328,6 +352,8 @@ The on-disk layout is optimized to be easy to browse by hand and durable long-te
 # to browse your index statically without running the archivebox server, run:
 archivebox list --html --with-headers > index.html
 archivebox list --json --with-headers > index.json
+# if running these commands with docker-compose, add -T:
+# docker-compose run -T archivebox list ...
 
 # then open the static index in a browser
 open index.html
@@ -338,13 +364,13 @@ ls ./archive/<timestamp>/
 
 - **Index:** `index.html` & `index.json` HTML and JSON index files containing metadata and details
 - **Title**, **Favicon**, **Headers** Response headers, site favicon, and parsed site title
+- **SingleFile:** `singlefile.html` HTML snapshot rendered with headless Chrome using SingleFile
 - **Wget Clone:** `example.com/page-name.html` wget clone of the site with  `warc/<timestamp>.gz`
 - Chrome Headless
-  - **SingleFile:** `singlefile.html` HTML snapshot rendered with headless Chrome using SingleFile
   - **PDF:** `output.pdf` Printed PDF of site using headless chrome
   - **Screenshot:** `screenshot.png` 1440x900 screenshot of site using headless chrome
   - **DOM Dump:** `output.html` DOM Dump of the HTML after rendering using headless chrome
-  - **Readability:** `article.html/json` Article text extraction using Readability
+- **Article Text:** `article.html/json` Article text extraction using Readability & Mercury
 - **Archive.org Permalink:** `archive.org.txt` A link to the saved site on archive.org
 - **Audio & Video:** `media/` all audio/video files + playlists, including subtitles & metadata with youtube-dl
 - **Source Code:** `git/` clone of any repository found on github, bitbucket, or gitlab links
@@ -534,7 +560,8 @@ Whether you want to learn which organizations are the big players in the web arc
     _A collection of the most active internet archiving communities and initiatives._
 - Check out the ArchiveBox [Roadmap](https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap) and [Changelog](https://github.com/ArchiveBox/ArchiveBox/wiki/Changelog)
 - Learn why archiving the internet is important by reading the "[On the Importance of Web Archiving](https://parameters.ssrc.org/2018/09/on-the-importance-of-web-archiving/)" blog post.
-- Or reach out to me for questions and comments via [@ArchiveBoxApp](https://twitter.com/ArchiveBoxApp) or [@theSquashSH](https://twitter.com/thesquashSH) on Twitter.
+- Reach out to me for questions and comments via [@ArchiveBoxApp](https://twitter.com/ArchiveBoxApp) or [@theSquashSH](https://twitter.com/thesquashSH) on Twitter
+- Hire us to develop an internet archiving solution for you [@MonadicalSAS](https://twitter.com/MonadicalSAS) [Monadical.com](https://monadical.com)
 
 <br/>
 
@@ -719,7 +746,10 @@ archivebox manage dbshell
 <br/><br/>
 <img src="https://raw.githubusercontent.com/Monadical-SAS/redux-time/HEAD/examples/static/jeremy.jpg" height="40px"/>
 <br/>
-<sub><i>This project is maintained mostly in <a href="https://nicksweeting.com/blog#About">my spare time</a> with the help from generous contributors and Monadical.com.</i></sub>
+<i><sub>
+This project is maintained mostly in <a href="https://nicksweeting.com/blog#About">my spare time</a> with the help from generous contributors and Monadical (✨  <a href="https://monadical.com">hire them</a> for dev work!).
+</sub>
+</i>
 <br/><br/>
 
 <br/>
diff --git a/archivebox/cli/__init__.py b/archivebox/cli/__init__.py
index f9a55efd..890065a4 100644
--- a/archivebox/cli/__init__.py
+++ b/archivebox/cli/__init__.py
@@ -63,7 +63,11 @@ def run_subcommand(subcommand: str,
 
     if subcommand not in meta_cmds:
         from ..config import setup_django
-        setup_django(in_memory_db=subcommand in fake_db, check_db=subcommand in archive_cmds)
+
+        cmd_requires_db = subcommand in archive_cmds
+        init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
+
+        setup_django(in_memory_db=subcommand in fake_db, check_db=cmd_requires_db and not init_pending)
 
     module = import_module('.archivebox_{}'.format(subcommand), __package__)
     module.main(args=subcommand_args, stdin=stdin, pwd=pwd)    # type: ignore
diff --git a/archivebox/cli/archivebox_add.py b/archivebox/cli/archivebox_add.py
index 41c7554d..a96888b0 100644
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -22,6 +22,12 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         add_help=True,
         formatter_class=SmartFormatter,
     )
+    parser.add_argument(
+        '--tag', '-t',
+        type=str,
+        default='',
+        help="Tag the added URLs with the provided tags e.g. --tag=tag1,tag2,tag3",
+    )
     parser.add_argument(
         '--update-all', #'-n',
         action='store_true',
@@ -75,7 +81,11 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     )
     command = parser.parse_args(args or ())
     urls = command.urls
-    stdin_urls = accept_stdin(stdin)
+
+    stdin_urls = ''
+    if not urls:
+        stdin_urls = accept_stdin(stdin)
+
     if (stdin_urls and urls) or (not stdin and not urls):
         stderr(
             '[X] You must pass URLs/paths to add via stdin or CLI arguments.\n',
@@ -85,6 +95,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     add(
         urls=stdin_urls or urls,
         depth=command.depth,
+        tag=command.tag,
         update_all=command.update_all,
         index_only=command.index_only,
         overwrite=command.overwrite,
diff --git a/archivebox/cli/archivebox_config.py b/archivebox/cli/archivebox_config.py
index f81286c6..25621972 100644
--- a/archivebox/cli/archivebox_config.py
+++ b/archivebox/cli/archivebox_config.py
@@ -45,7 +45,10 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         help='KEY or KEY=VALUE formatted config values to get or set',
     )
     command = parser.parse_args(args or ())
-    config_options_str = accept_stdin(stdin)
+
+    config_options_str = ''
+    if not command.config_options:
+        config_options_str = accept_stdin(stdin)
 
     config(
         config_options_str=config_options_str,
diff --git a/archivebox/cli/archivebox_init.py b/archivebox/cli/archivebox_init.py
index 6255ef26..5753269c 100755
--- a/archivebox/cli/archivebox_init.py
+++ b/archivebox/cli/archivebox_init.py
@@ -27,11 +27,17 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         action='store_true',
         help='Ignore unrecognized files in current directory and initialize anyway',
     )
+    parser.add_argument(
+        '--quick', '-q',
+        action='store_true',
+        help='Run any updates or migrations without rechecking all snapshot dirs',
+    )
     command = parser.parse_args(args or ())
     reject_stdin(__command__, stdin)
 
     init(
         force=command.force,
+        quick=command.quick,
         out_dir=pwd or OUTPUT_DIR,
     )
     
diff --git a/archivebox/cli/archivebox_list.py b/archivebox/cli/archivebox_list.py
index 3838cf60..5477bfc8 100644
--- a/archivebox/cli/archivebox_list.py
+++ b/archivebox/cli/archivebox_list.py
@@ -12,6 +12,7 @@ from ..main import list_all
 from ..util import docstring
 from ..config import OUTPUT_DIR
 from ..index import (
+    LINK_FILTERS,
     get_indexed_folders,
     get_archived_folders,
     get_unarchived_folders,
@@ -23,7 +24,7 @@ from ..index import (
     get_corrupted_folders,
     get_unrecognized_folders,
 )
-from ..logging_util import SmartFormatter, accept_stdin, stderr
+from ..logging_util import SmartFormatter, reject_stdin, stderr
 
 
 @docstring(list_all.__doc__)
@@ -44,7 +45,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     group.add_argument(
         '--json', #'-j',
         action='store_true',
-        help="Print the output in JSON format with all columns included.",
+        help="Print the output in JSON format with all columns included",
     )
     group.add_argument(
         '--html',
@@ -59,19 +60,19 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     parser.add_argument(
         '--sort', #'-s',
         type=str,
-        help="List the links sorted using the given key, e.g. timestamp or updated.",
+        help="List the links sorted using the given key, e.g. timestamp or updated",
         default=None,
     )
     parser.add_argument(
         '--before', #'-b',
         type=float,
-        help="List only links bookmarked before the given timestamp.",
+        help="List only links bookmarked before (less than) the given timestamp",
         default=None,
     )
     parser.add_argument(
         '--after', #'-a',
         type=float,
-        help="List only links bookmarked after the given timestamp.",
+        help="List only links bookmarked after (greater than or equal to) the given timestamp",
         default=None,
     )
     parser.add_argument(
@@ -96,9 +97,9 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         )
     )
     parser.add_argument(
-        '--filter-type',
+        '--filter-type', '-t',
         type=str,
-        choices=('exact', 'substring', 'domain', 'regex', 'tag', 'search'),
+        choices=(*LINK_FILTERS.keys(), 'search'),
         default='exact',
         help='Type of pattern matching to use when filtering URLs',
     )
@@ -107,20 +108,19 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         nargs='*',
         type=str,
         default=None,
-        help='List only URLs matching these filter patterns.'
+        help='List only URLs matching these filter patterns'
     )
     command = parser.parse_args(args or ())
-    filter_patterns_str = accept_stdin(stdin)
+    reject_stdin(stdin)
 
     if command.with_headers and not (command.json or command.html or command.csv):
         stderr(
-            '[X] --with-headers can only be used with --json, --html or --csv options.\n',
+            '[X] --with-headers can only be used with --json, --html or --csv options\n',
             color='red',
         )
         raise SystemExit(2)
 
     matching_folders = list_all(
-        filter_patterns_str=filter_patterns_str,
         filter_patterns=command.filter_patterns,
         filter_type=command.filter_type,
         status=command.status,
diff --git a/archivebox/cli/archivebox_oneshot.py b/archivebox/cli/archivebox_oneshot.py
index af68bac2..411cce8b 100644
--- a/archivebox/cli/archivebox_oneshot.py
+++ b/archivebox/cli/archivebox_oneshot.py
@@ -50,8 +50,11 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         help= "Path to save the single archive folder to, e.g. ./example.com_archive"
     )
     command = parser.parse_args(args or ())
+    stdin_url = None
     url = command.url
-    stdin_url = accept_stdin(stdin)
+    if not url:
+        stdin_url = accept_stdin(stdin)
+
     if (stdin_url and url) or (not stdin and not url):
         stderr(
             '[X] You must pass a URL/path to add via stdin or CLI arguments.\n',
diff --git a/archivebox/cli/archivebox_remove.py b/archivebox/cli/archivebox_remove.py
index cb073e95..dadf2654 100644
--- a/archivebox/cli/archivebox_remove.py
+++ b/archivebox/cli/archivebox_remove.py
@@ -61,7 +61,10 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         help='URLs matching this filter pattern will be removed from the index.'
     )
     command = parser.parse_args(args or ())
-    filter_str = accept_stdin(stdin)
+    
+    filter_str = None
+    if not command.filter_patterns:
+        filter_str = accept_stdin(stdin)
 
     remove(
         filter_str=filter_str,
diff --git a/archivebox/cli/archivebox_server.py b/archivebox/cli/archivebox_server.py
index a4d96dc9..4cc050dd 100644
--- a/archivebox/cli/archivebox_server.py
+++ b/archivebox/cli/archivebox_server.py
@@ -38,10 +38,20 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         action='store_true',
         help='Enable DEBUG=True mode with more verbose errors',
     )
+    parser.add_argument(
+        '--nothreading',
+        action='store_true',
+        help='Force runserver to run in single-threaded mode',
+    )
     parser.add_argument(
         '--init',
         action='store_true',
-        help='Run archivebox init before starting the server',
+        help='Run a full archivebox init/upgrade before starting the server',
+    )
+    parser.add_argument(
+        '--quick-init', '-i',
+        action='store_true',
+        help='Run quick archivebox init/upgrade before starting the server',
     )
     parser.add_argument(
         '--createsuperuser',
@@ -52,10 +62,11 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     reject_stdin(__command__, stdin)
     
     server(
-        runserver_args=command.runserver_args,
+        runserver_args=command.runserver_args + (['--nothreading'] if command.nothreading else []),
         reload=command.reload,
         debug=command.debug,
         init=command.init,
+        quick_init=command.quick_init,
         createsuperuser=command.createsuperuser,
         out_dir=pwd or OUTPUT_DIR,
     )
diff --git a/archivebox/cli/archivebox_update.py b/archivebox/cli/archivebox_update.py
index 6748096e..500d4c07 100644
--- a/archivebox/cli/archivebox_update.py
+++ b/archivebox/cli/archivebox_update.py
@@ -12,6 +12,7 @@ from ..main import update
 from ..util import docstring
 from ..config import OUTPUT_DIR
 from ..index import (
+    LINK_FILTERS,
     get_indexed_folders,
     get_archived_folders,
     get_unarchived_folders,
@@ -89,9 +90,9 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         )
     )
     parser.add_argument(
-        '--filter-type',
+        '--filter-type', '-t',
         type=str,
-        choices=('exact', 'substring', 'domain', 'regex', 'tag', 'search'),
+        choices=(*LINK_FILTERS.keys(), 'search'),
         default='exact',
         help='Type of pattern matching to use when filtering URLs',
     )
@@ -110,7 +111,10 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         default=""
     )
     command = parser.parse_args(args or ())
-    filter_patterns_str = accept_stdin(stdin)
+
+    filter_patterns_str = None
+    if not command.filter_patterns:
+        filter_patterns_str = accept_stdin(stdin)
 
     update(
         resume=command.resume,
diff --git a/archivebox/cli/tests.py b/archivebox/cli/tests.py
new file mode 100644
index 00000000..04c54df8
--- /dev/null
+++ b/archivebox/cli/tests.py
@@ -0,0 +1,227 @@
+#!/usr/bin/env python3
+
+__package__ = 'archivebox.cli'
+
+
+import os
+import sys
+import shutil
+import unittest
+from pathlib import Path
+
+from contextlib import contextmanager
+
+TEST_CONFIG = {
+    'USE_COLOR': 'False',
+    'SHOW_PROGRESS': 'False',
+
+    'OUTPUT_DIR': 'data.tests',
+    
+    'SAVE_ARCHIVE_DOT_ORG': 'False',
+    'SAVE_TITLE': 'False',
+    
+    'USE_CURL': 'False',
+    'USE_WGET': 'False',
+    'USE_GIT': 'False',
+    'USE_CHROME': 'False',
+    'USE_YOUTUBEDL': 'False',
+}
+
+OUTPUT_DIR = 'data.tests'
+os.environ.update(TEST_CONFIG)
+
+from ..main import init
+from ..index import load_main_index
+from ..config import (
+    SQL_INDEX_FILENAME,
+    JSON_INDEX_FILENAME,
+    HTML_INDEX_FILENAME,
+)
+
+from . import (
+    archivebox_init,
+    archivebox_add,
+    archivebox_remove,
+)
+
+HIDE_CLI_OUTPUT = True
+
+test_urls = '''
+https://example1.com/what/is/happening.html?what=1#how-about-this=1
+https://example2.com/what/is/happening/?what=1#how-about-this=1
+HTtpS://example3.com/what/is/happening/?what=1#how-about-this=1f
+https://example4.com/what/is/happening.html
+https://example5.com/
+https://example6.com
+
+<test>http://example7.com</test>
+[https://example8.com/what/is/this.php?what=1]
+[and http://example9.com?what=1&other=3#and-thing=2]
+<what>https://example10.com#and-thing=2 "</about>
+abc<this["https://subb.example11.com/what/is#and-thing=2?whoami=23&where=1"]that>def
+sdflkf[what](https://subb.example12.com/who/what.php?whoami=1#whatami=2)?am=hi
+example13.bada
+and example14.badb
+<or>htt://example15.badc</that>
+'''
+
+stdout = sys.stdout
+stderr = sys.stderr
+
+
+@contextmanager
+def output_hidden(show_failing=True):
+    if not HIDE_CLI_OUTPUT:
+        yield
+        return
+
+    sys.stdout = open('stdout.txt', 'w+', encoding='utf-8')
+    sys.stderr = open('stderr.txt', 'w+', encoding='utf-8')
+    try:
+        yield
+        sys.stdout.close()
+        sys.stderr.close()
+        sys.stdout = stdout
+        sys.stderr = stderr
+    except Exception:
+        sys.stdout.close()
+        sys.stderr.close()
+        sys.stdout = stdout
+        sys.stderr = stderr
+        if show_failing:
+            with open('stdout.txt', 'r', encoding='utf-8') as f:
+                print(f.read())
+            with open('stderr.txt', 'r', encoding='utf-8') as f:
+                print(f.read())
+        raise
+    finally:
+        os.remove('stdout.txt')
+        os.remove('stderr.txt')
+
+
+class TestInit(unittest.TestCase):
+    def setUp(self):
+        os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+    def tearDown(self):
+        shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
+
+    def test_basic_init(self):
+        with output_hidden():
+            archivebox_init.main([])
+
+        assert (Path(OUTPUT_DIR) / SQL_INDEX_FILENAME).exists()
+        assert (Path(OUTPUT_DIR) / JSON_INDEX_FILENAME).exists()
+        assert (Path(OUTPUT_DIR) / HTML_INDEX_FILENAME).exists()
+        assert len(load_main_index(out_dir=OUTPUT_DIR)) == 0
+
+    def test_conflicting_init(self):
+        with open(Path(OUTPUT_DIR) / 'test_conflict.txt', 'w+', encoding='utf-8') as f:
+            f.write('test')
+
+        try:
+            with output_hidden(show_failing=False):
+                archivebox_init.main([])
+            assert False, 'Init should have exited with an exception'
+        except SystemExit:
+            pass
+
+        assert not (Path(OUTPUT_DIR) / SQL_INDEX_FILENAME).exists()
+        assert not (Path(OUTPUT_DIR) / JSON_INDEX_FILENAME).exists()
+        assert not (Path(OUTPUT_DIR) / HTML_INDEX_FILENAME).exists()
+        try:
+            load_main_index(out_dir=OUTPUT_DIR)
+            assert False, 'load_main_index should raise an exception when no index is present'
+        except Exception:
+            pass
+
+    def test_no_dirty_state(self):
+        with output_hidden():
+            init()
+        shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
+        with output_hidden():
+            init()
+
+
+class TestAdd(unittest.TestCase):
+    def setUp(self):
+        os.makedirs(OUTPUT_DIR, exist_ok=True)
+        with output_hidden():
+            init()
+
+    def tearDown(self):
+        shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
+
+    def test_add_arg_url(self):
+        with output_hidden():
+            archivebox_add.main(['https://getpocket.com/users/nikisweeting/feed/all'])
+
+        all_links = load_main_index(out_dir=OUTPUT_DIR)
+        assert len(all_links) == 30
+
+    def test_add_arg_file(self):
+        test_file = Path(OUTPUT_DIR) / 'test.txt'
+        with open(test_file, 'w+', encoding='utf') as f:
+            f.write(test_urls)
+
+        with output_hidden():
+            archivebox_add.main([test_file])
+
+        all_links = load_main_index(out_dir=OUTPUT_DIR)
+        assert len(all_links) == 12
+        os.remove(test_file)
+
+    def test_add_stdin_url(self):
+        with output_hidden():
+            archivebox_add.main([], stdin=test_urls)
+
+        all_links = load_main_index(out_dir=OUTPUT_DIR)
+        assert len(all_links) == 12
+
+
+class TestRemove(unittest.TestCase):
+    def setUp(self):
+        os.makedirs(OUTPUT_DIR, exist_ok=True)
+        with output_hidden():
+            init()
+            archivebox_add.main([], stdin=test_urls)
+
+    # def tearDown(self):
+        # shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
+
+
+    def test_remove_exact(self):
+        with output_hidden():
+            archivebox_remove.main(['--yes', '--delete', 'https://example5.com/'])
+
+        all_links = load_main_index(out_dir=OUTPUT_DIR)
+        assert len(all_links) == 11
+
+    def test_remove_regex(self):
+        with output_hidden():
+            archivebox_remove.main(['--yes', '--delete', '--filter-type=regex', r'http(s)?:\/\/(.+\.)?(example\d\.com)'])
+
+        all_links = load_main_index(out_dir=OUTPUT_DIR)
+        assert len(all_links) == 4
+
+    def test_remove_domain(self):
+        with output_hidden():
+            archivebox_remove.main(['--yes', '--delete', '--filter-type=domain', 'example5.com', 'example6.com'])
+
+        all_links = load_main_index(out_dir=OUTPUT_DIR)
+        assert len(all_links) == 10
+
+    def test_remove_none(self):
+        try:
+            with output_hidden(show_failing=False):
+                archivebox_remove.main(['--yes', '--delete', 'https://doesntexist.com'])
+            assert False, 'Should raise if no URLs match'
+        except Exception:
+            pass
+
+
+if __name__ == '__main__':
+    if '--verbose' in sys.argv or '-v' in sys.argv:
+        HIDE_CLI_OUTPUT = False
+    
+    unittest.main()
diff --git a/archivebox/config.py b/archivebox/config.py
index 3d48344f..1c284ae7 100644
--- a/archivebox/config.py
+++ b/archivebox/config.py
@@ -29,10 +29,12 @@ import json
 import getpass
 import platform
 import shutil
+import sqlite3
 import django
 
 from hashlib import md5
 from pathlib import Path
+from datetime import datetime
 from typing import Optional, Type, Tuple, Dict, Union, List
 from subprocess import run, PIPE, DEVNULL
 from configparser import ConfigParser
@@ -77,6 +79,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
         'PUBLIC_SNAPSHOTS':         {'type': bool,  'default': True},
         'PUBLIC_ADD_VIEW':          {'type': bool,  'default': False},
         'FOOTER_INFO':              {'type': str,   'default': 'Content is hosted for personal archiving purposes only.  Contact server owner for any takedown requests.'},
+        'SNAPSHOTS_PER_PAGE':       {'type': int,   'default': 40},
     },
 
     'ARCHIVE_METHOD_TOGGLES': {
@@ -99,8 +102,9 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
 
     'ARCHIVE_METHOD_OPTIONS': {
         'RESOLUTION':               {'type': str,   'default': '1440,2000', 'aliases': ('SCREENSHOT_RESOLUTION',)},
-        'GIT_DOMAINS':              {'type': str,   'default': 'github.com,bitbucket.org,gitlab.com'},
+        'GIT_DOMAINS':              {'type': str,   'default': 'github.com,bitbucket.org,gitlab.com,gist.github.com'},
         'CHECK_SSL_VALIDITY':       {'type': bool,  'default': True},
+        'MEDIA_MAX_SIZE':           {'type': str,   'default': '750m'},
 
         'CURL_USER_AGENT':          {'type': str,   'default': 'ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) curl/{CURL_VERSION}'},
         'WGET_USER_AGENT':          {'type': str,   'default': 'ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) wget/{WGET_VERSION}'},
@@ -111,7 +115,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
 
         'CHROME_HEADLESS':          {'type': bool,  'default': True},
         'CHROME_SANDBOX':           {'type': bool,  'default': lambda c: not c['IN_DOCKER']},
-        'YOUTUBEDL_ARGS':           {'type': list,  'default': ['--write-description',
+        'YOUTUBEDL_ARGS':           {'type': list,  'default': lambda c: ['--write-description',
                                                                 '--write-info-json',
                                                                 '--write-annotations',
                                                                 '--write-thumbnail',
@@ -122,7 +126,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
                                                                 '--ignore-errors',
                                                                 '--geo-bypass',
                                                                 '--add-metadata',
-                                                                '--max-filesize=750m',
+                                                                '--max-filesize={}'.format(c['MEDIA_MAX_SIZE']),
                                                                 ]},
                                                                     
 
@@ -287,7 +291,6 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
 
     'ARCHIVEBOX_BINARY':        {'default': lambda c: sys.argv[0]},
     'VERSION':                  {'default': lambda c: json.loads((Path(c['PACKAGE_DIR']) / 'package.json').read_text().strip())['version']},
-    'GIT_SHA':                  {'default': lambda c: c['VERSION'].split('+')[-1] or 'unknown'},
 
     'PYTHON_BINARY':            {'default': lambda c: sys.executable},
     'PYTHON_ENCODING':          {'default': lambda c: sys.stdout.encoding.upper()},
@@ -459,7 +462,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
     config_file.optionxform = str
     config_file.read(config_path)
 
-    with open(config_path, 'r') as old:
+    with open(config_path, 'r', encoding='utf-8') as old:
         atomic_write(f'{config_path}.bak', old.read())
 
     find_section = lambda key: [name for name, opts in CONFIG_SCHEMA.items() if key in opts][0]
@@ -480,14 +483,14 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
 
     if (not existing_secret_key) or ('not a valid secret' in existing_secret_key):
         from django.utils.crypto import get_random_string
-        chars = 'abcdefghijklmnopqrstuvwxyz0123456789-_+!.'
+        chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
         random_secret_key = get_random_string(50, chars)
         if 'SERVER_CONFIG' in config_file:
             config_file['SERVER_CONFIG']['SECRET_KEY'] = random_secret_key
         else:
             config_file['SERVER_CONFIG'] = {'SECRET_KEY': random_secret_key}
 
-    with open(config_path, 'w+') as new:
+    with open(config_path, 'w+', encoding='utf-8') as new:
         config_file.write(new)
     
     try:
@@ -499,7 +502,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
         }
     except:
         # something went horribly wrong, rever to the previous version
-        with open(f'{config_path}.bak', 'r') as old:
+        with open(f'{config_path}.bak', 'r', encoding='utf-8') as old:
             atomic_write(config_path, old.read())
 
     if Path(f'{config_path}.bak').exists():
@@ -1062,23 +1065,72 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
 
     try:
         import django
+        from django.core.management import call_command
+
         sys.path.append(str(config['PACKAGE_DIR']))
         os.environ.setdefault('OUTPUT_DIR', str(output_dir))
         assert (config['PACKAGE_DIR'] / 'core' / 'settings.py').exists(), 'settings.py was not found at archivebox/core/settings.py'
         os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
 
+        # Check to make sure JSON extension is available in our Sqlite3 instance
+        try:
+            cursor = sqlite3.connect(':memory:').cursor()
+            cursor.execute('SELECT JSON(\'{"a": "b"}\')')
+        except sqlite3.OperationalError as exc:
+            stderr(f'[X] Your SQLite3 version is missing the required JSON1 extension: {exc}', color='red')
+            hint([
+                'Upgrade your Python version or install the extension manually:',
+                'https://code.djangoproject.com/wiki/JSON1Extension'
+            ])
+
         if in_memory_db:
-            # Put the db in memory and run migrations in case any command requires it
-            from django.core.management import call_command
+            # some commands (e.g. oneshot) dont store a long-lived sqlite3 db file on disk.
+            # in those cases we create a temporary in-memory db and run the migrations
+            # immediately to get a usable in-memory-database at startup
             os.environ.setdefault("ARCHIVEBOX_DATABASE_NAME", ":memory:")
             django.setup()
             call_command("migrate", interactive=False, verbosity=0)
         else:
+            # Otherwise use default sqlite3 file-based database and initialize django
+            # without running migrations automatically (user runs them manually by calling init)
             django.setup()
+            
+
+        from django.conf import settings
+
+        # log startup message to the error log
+        with open(settings.ERROR_LOG, "a+", encoding='utf-8') as f:
+            command = ' '.join(sys.argv)
+            ts = datetime.now().strftime('%Y-%m-%d__%H:%M:%S')
+            f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n")
+
 
         if check_db:
+            # Enable WAL mode in sqlite3
+            from django.db import connection
+            with connection.cursor() as cursor:
+                current_mode = cursor.execute("PRAGMA journal_mode")
+                if current_mode != 'wal':
+                    cursor.execute("PRAGMA journal_mode=wal;")
+
+            # Create cache table in DB if needed
+            try:
+                from django.core.cache import cache
+                cache.get('test', None)
+            except django.db.utils.OperationalError:
+                call_command("createcachetable", verbosity=0)
+
+
+            # if archivebox gets imported multiple times, we have to close
+            # the sqlite3 whenever we init from scratch to avoid multiple threads
+            # sharing the same connection by accident
+            from django.db import connections
+            for conn in connections.all():
+                conn.close_if_unusable_or_obsolete()
+
             sql_index_path = Path(output_dir) / SQL_INDEX_FILENAME
             assert sql_index_path.exists(), (
                 f'No database file {SQL_INDEX_FILENAME} found in: {config["OUTPUT_DIR"]} (Are you in an ArchiveBox collection directory?)')
+
     except KeyboardInterrupt:
         raise SystemExit(2)
diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py
index bacc53c0..91feb07b 100644
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -1,6 +1,7 @@
 __package__ = 'archivebox.core'
 
 from io import StringIO
+from pathlib import Path
 from contextlib import redirect_stdout
 
 from django.contrib import admin
@@ -13,15 +14,15 @@ from django import forms
 
 from ..util import htmldecode, urldecode, ansi_to_html
 
-from core.models import Snapshot, Tag
-from core.forms import AddLinkForm, TagField
+from core.models import Snapshot, ArchiveResult, Tag
+from core.forms import AddLinkForm
 
 from core.mixins import SearchResultsAdminMixin
 
 from index.html import snapshot_icons
 from logging_util import printable_filesize
 from main import add, remove
-from config import OUTPUT_DIR
+from config import OUTPUT_DIR, SNAPSHOTS_PER_PAGE
 from extractors import archive_links
 
 # Admin URLs
@@ -36,77 +37,34 @@ from extractors import archive_links
 
 # TODO: https://stackoverflow.com/questions/40760880/add-custom-button-to-django-admin-panel
 
-def update_snapshots(modeladmin, request, queryset):
-    archive_links([
-        snapshot.as_link()
-        for snapshot in queryset
-    ], out_dir=OUTPUT_DIR)
-update_snapshots.short_description = "Archive"
 
-def update_titles(modeladmin, request, queryset):
-    archive_links([
-        snapshot.as_link()
-        for snapshot in queryset
-    ], overwrite=True, methods=('title','favicon'), out_dir=OUTPUT_DIR)
-update_titles.short_description = "Pull title"
+class ArchiveResultInline(admin.TabularInline):
+    model = ArchiveResult
 
-def overwrite_snapshots(modeladmin, request, queryset):
-    archive_links([
-        snapshot.as_link()
-        for snapshot in queryset
-    ], overwrite=True, out_dir=OUTPUT_DIR)
-overwrite_snapshots.short_description = "Re-archive (overwrite)"
+class TagInline(admin.TabularInline):
+    model = Snapshot.tags.through
 
-def verify_snapshots(modeladmin, request, queryset):
-    for snapshot in queryset:
-        print(snapshot.timestamp, snapshot.url, snapshot.is_archived, snapshot.archive_size, len(snapshot.history))
-
-verify_snapshots.short_description = "Check"
-
-def delete_snapshots(modeladmin, request, queryset):
-    remove(snapshots=queryset, yes=True, delete=True, out_dir=OUTPUT_DIR)
-
-delete_snapshots.short_description = "Delete"
+from django.contrib.admin.helpers import ActionForm
 
 
-class SnapshotAdminForm(forms.ModelForm):
-    tags = TagField(required=False)
-
-    class Meta:
-        model = Snapshot
-        fields = "__all__"
-
-    def save(self, commit=True):
-        # Based on: https://stackoverflow.com/a/49933068/3509554
-
-        # Get the unsave instance
-        instance = forms.ModelForm.save(self, False)
-        tags = self.cleaned_data.pop("tags")
-
-        #update save_m2m
-        def new_save_m2m():
-            instance.save_tags(tags)
-
-        # Do we need to save all changes now?
-        self.save_m2m = new_save_m2m
-        if commit:
-            instance.save()
-
-        return instance
+class SnapshotActionForm(ActionForm):
+    tag = forms.ModelChoiceField(queryset=Tag.objects.all(), required=False)
 
 
 class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
     list_display = ('added', 'title_str', 'url_str', 'files', 'size')
     sort_fields = ('title_str', 'url_str', 'added')
-    readonly_fields = ('id', 'url', 'timestamp', 'num_outputs', 'is_archived', 'url_hash', 'added', 'updated')
+    readonly_fields = ('uuid', 'num_outputs', 'is_archived', 'url_hash', 'added', 'updated')
     search_fields = ['url__icontains', 'timestamp', 'title', 'tags__name']
-    fields = (*readonly_fields, 'title', 'tags')
+    fields = ('timestamp', 'url', 'title', 'tags', *readonly_fields)
     list_filter = ('added', 'updated', 'tags')
     ordering = ['-added']
-    actions = [delete_snapshots, overwrite_snapshots, update_snapshots, update_titles, verify_snapshots]
-    actions_template = 'admin/actions_as_select.html'
-    form = SnapshotAdminForm
-    list_per_page = 40
+    actions = ['delete_snapshots', 'overwrite_snapshots', 'update_snapshots', 'update_titles', 'verify_snapshots', 'add_tag', 'remove_tag']
+    autocomplete_fields = ['tags']
+    inlines = [ArchiveResultInline]
+    list_per_page = SNAPSHOTS_PER_PAGE
+
+    action_form = SnapshotActionForm
 
     def get_urls(self):
         urls = super().get_urls()
@@ -116,21 +74,46 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
         return custom_urls + urls
 
     def get_queryset(self, request):
+        self.request = request
         return super().get_queryset(request).prefetch_related('tags')
 
     def tag_list(self, obj):
         return ', '.join(obj.tags.values_list('name', flat=True))
 
-    def id_str(self, obj):
+    # TODO: figure out a different way to do this, you cant nest forms so this doenst work
+    # def action(self, obj):
+    #     # csrfmiddlewaretoken: Wa8UcQ4fD3FJibzxqHN3IYrrjLo4VguWynmbzzcPYoebfVUnDovon7GEMYFRgsh0
+    #     # action: update_snapshots
+    #     # select_across: 0
+    #     # _selected_action: 76d29b26-2a88-439e-877c-a7cca1b72bb3
+    #     return format_html(
+    #         '''
+    #             <form action="/admin/core/snapshot/" method="post" onsubmit="e => e.stopPropagation()">
+    #                 <input type="hidden" name="csrfmiddlewaretoken" value="{}">
+    #                 <input type="hidden" name="_selected_action" value="{}">
+    #                 <button name="update_snapshots">Check</button>
+    #                 <button name="update_titles">Pull title + favicon</button>
+    #                 <button name="update_snapshots">Update</button>
+    #                 <button name="overwrite_snapshots">Re-Archive (overwrite)</button>
+    #                 <button name="delete_snapshots">Permanently delete</button>
+    #             </form>
+    #         ''',
+    #         csrf.get_token(self.request),
+    #         obj.id,
+    #     )
+
+    def uuid(self, obj):
         return format_html(
-            '<code style="font-size: 10px">{}</code>',
-            obj.url_hash[:8],
+            '<code style="font-size: 10px">{}</code><br/><a href="/archive/{}">View index ➡️</a> &nbsp; &nbsp; <a href="/admin/core/snapshot/?id__exact={}">View actions ⚙️</a>',
+            obj.id,
+            obj.timestamp,
+            obj.id,
         )
 
     def title_str(self, obj):
         canon = obj.as_link().canonical_outputs()
         tags = ''.join(
-            format_html('<a href="/admin/core/snapshot/?tags__id__exact={}"><span class="tag">{}</span></a> ', tag.id, tag)
+            format_html('<a href="/admin/core/snapshot/?id__startswith={}"><span class="tag">{}</span></a> ', tag.id, tag)
             for tag in obj.tags.all()
             if str(tag).strip()
         )
@@ -152,7 +135,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
         return snapshot_icons(obj)
 
     def size(self, obj):
-        archive_size = obj.archive_size
+        archive_size = (Path(obj.link_dir) / 'index.html').exists() and obj.archive_size
         if archive_size:
             size_txt = printable_filesize(archive_size)
             if archive_size > 52428800:
@@ -190,28 +173,135 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
         rendered_response = self.changelist_view(request)
 
         # Restore values
-        self.change_list_template =  saved_change_list_template
+        self.change_list_template = saved_change_list_template
         self.list_per_page = saved_list_per_page
         self.list_max_show_all = saved_list_max_show_all
 
         return rendered_response
+
+
+    def update_snapshots(self, request, queryset):
+        archive_links([
+            snapshot.as_link()
+            for snapshot in queryset
+        ], out_dir=OUTPUT_DIR)
+    update_snapshots.short_description = "Archive"
+
+    def update_titles(self, request, queryset):
+        archive_links([
+            snapshot.as_link()
+            for snapshot in queryset
+        ], overwrite=True, methods=('title','favicon'), out_dir=OUTPUT_DIR)
+    update_titles.short_description = "Pull title"
+
+    def overwrite_snapshots(self, request, queryset):
+        archive_links([
+            snapshot.as_link()
+            for snapshot in queryset
+        ], overwrite=True, out_dir=OUTPUT_DIR)
+    overwrite_snapshots.short_description = "Re-archive (overwrite)"
+
+    def verify_snapshots(self, request, queryset):
+        for snapshot in queryset:
+            print(snapshot.timestamp, snapshot.url, snapshot.is_archived, snapshot.archive_size, len(snapshot.history))
+
+    verify_snapshots.short_description = "Check"
+
+    def delete_snapshots(self, request, queryset):
+        remove(snapshots=queryset, yes=True, delete=True, out_dir=OUTPUT_DIR)
+
+    delete_snapshots.short_description = "Delete"
+
+    def add_tag(self, request, queryset):
+        tag = request.POST['tag']
+        for obj in queryset:
+            obj.tags.add(tag)
+
+    add_tag.short_description = "Add tag"
+
+    def remove_tag(self, request, queryset):
+        tag = request.POST['tag']
+        for obj in queryset:
+            obj.tags.remove(tag)
+
+    remove_tag.short_description = "Remove tag"
+
         
 
-    id_str.short_description = 'ID'
     title_str.short_description = 'Title'
     url_str.short_description = 'Original URL'
 
-    id_str.admin_order_field = 'id'
     title_str.admin_order_field = 'title'
     url_str.admin_order_field = 'url'
 
+
+
 class TagAdmin(admin.ModelAdmin):
-    list_display = ('slug', 'name', 'id')
+    list_display = ('slug', 'name', 'num_snapshots', 'snapshots', 'id')
     sort_fields = ('id', 'name', 'slug')
-    readonly_fields = ('id',)
+    readonly_fields = ('id', 'num_snapshots', 'snapshots')
     search_fields = ('id', 'name', 'slug')
     fields = (*readonly_fields, 'name', 'slug')
+    actions = ['delete_selected']
+    ordering = ['-id']
 
+    def num_snapshots(self, obj):
+        return format_html(
+            '<a href="/admin/core/snapshot/?tags__id__exact={}">{} total</a>',
+            obj.id,
+            obj.snapshot_set.count(),
+        )
+
+    def snapshots(self, obj):
+        total_count = obj.snapshot_set.count()
+        return mark_safe('<br/>'.join(
+            format_html(
+                '{} <code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a> {}</code>',
+                snap.updated.strftime('%Y-%m-%d %H:%M') if snap.updated else 'pending...',
+                snap.id,
+                snap.timestamp,
+                snap.url,
+            )
+            for snap in obj.snapshot_set.order_by('-updated')[:10]
+        ) + (f'<br/><a href="/admin/core/snapshot/?tags__id__exact={obj.id}">and {total_count-10} more...<a>' if obj.snapshot_set.count() > 10 else ''))
+
+
+class ArchiveResultAdmin(admin.ModelAdmin):
+    list_display = ('id', 'start_ts', 'extractor', 'snapshot_str', 'cmd_str', 'status', 'output_str')
+    sort_fields = ('start_ts', 'extractor', 'status')
+    readonly_fields = ('id', 'uuid', 'snapshot_str')
+    search_fields = ('id', 'uuid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
+    fields = (*readonly_fields, 'snapshot', 'snapshot__tags', 'extractor', 'status', 'start_ts', 'end_ts', 'pwd', 'cmd', 'cmd_version', 'output')
+    autocomplete_fields = ['snapshot']
+
+    list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
+    ordering = ['-start_ts']
+    list_per_page = SNAPSHOTS_PER_PAGE
+
+    def snapshot_str(self, obj):
+        return format_html(
+            '<a href="/archive/{}/index.html"><b><code>[{}]</code></b></a><br/>'
+            '<small>{}</small>',
+            obj.snapshot.timestamp,
+            obj.snapshot.timestamp,
+            obj.snapshot.url[:128],
+        )
+
+    def cmd_str(self, obj):
+        return format_html(
+            '<pre>{}</pre>',
+            ' '.join(obj.cmd) if isinstance(obj.cmd, list) else str(obj.cmd),
+        )
+
+    def output_str(self, obj):
+        return format_html(
+            '<a href="/archive/{}/{}" class="output-link">↗️</a><pre>{}</pre>',
+            obj.snapshot.timestamp,
+            obj.output if (obj.status == 'succeeded') and obj.extractor not in ('title', 'archive_org') else 'index.html',
+            obj.output,
+        )
+
+    snapshot_str.short_description = 'snapshot'
 
 class ArchiveBoxAdmin(admin.AdminSite):
     site_header = 'ArchiveBox'
@@ -266,4 +356,5 @@ admin.site = ArchiveBoxAdmin()
 admin.site.register(get_user_model())
 admin.site.register(Snapshot, SnapshotAdmin)
 admin.site.register(Tag, TagAdmin)
+admin.site.register(ArchiveResult, ArchiveResultAdmin)
 admin.site.disable_action('delete_selected')
diff --git a/archivebox/core/forms.py b/archivebox/core/forms.py
index ed584c68..e3e904df 100644
--- a/archivebox/core/forms.py
+++ b/archivebox/core/forms.py
@@ -20,7 +20,8 @@ ARCHIVE_METHODS = [
 
 class AddLinkForm(forms.Form):
     url = forms.RegexField(label="URLs (one per line)", regex=URL_REGEX, min_length='6', strip=True, widget=forms.Textarea, required=True)
-    depth = forms.ChoiceField(label="Archive depth", choices=CHOICES, widget=forms.RadioSelect, initial='0')
+    tag = forms.CharField(label="Tags (comma separated tag1,tag2,tag3)", strip=True, required=False)
+    depth = forms.ChoiceField(label="Archive depth", choices=CHOICES, initial='0', widget=forms.RadioSelect(attrs={"class": "depth-selection"}))
     archive_methods = forms.MultipleChoiceField(
         label="Archive methods (select at least 1, otherwise all will be used by default)",
         required=False,
diff --git a/archivebox/core/migrations/0009_auto_20210216_1038.py b/archivebox/core/migrations/0009_auto_20210216_1038.py
new file mode 100644
index 00000000..2817fe54
--- /dev/null
+++ b/archivebox/core/migrations/0009_auto_20210216_1038.py
@@ -0,0 +1,18 @@
+# Generated by Django 3.1.3 on 2021-02-16 10:38
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('core', '0008_auto_20210105_1421'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='snapshot',
+            name='updated',
+            field=models.DateTimeField(auto_now=True, db_index=True, null=True),
+        ),
+    ]
diff --git a/archivebox/core/migrations/0010_auto_20210216_1055.py b/archivebox/core/migrations/0010_auto_20210216_1055.py
new file mode 100644
index 00000000..0af61a39
--- /dev/null
+++ b/archivebox/core/migrations/0010_auto_20210216_1055.py
@@ -0,0 +1,18 @@
+# Generated by Django 3.1.3 on 2021-02-16 10:55
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('core', '0009_auto_20210216_1038'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='archiveresult',
+            name='start_ts',
+            field=models.DateTimeField(db_index=True),
+        ),
+    ]
diff --git a/archivebox/core/migrations/0011_auto_20210216_1331.py b/archivebox/core/migrations/0011_auto_20210216_1331.py
new file mode 100644
index 00000000..d2226674
--- /dev/null
+++ b/archivebox/core/migrations/0011_auto_20210216_1331.py
@@ -0,0 +1,24 @@
+# Generated by Django 3.1.3 on 2021-02-16 13:31
+
+from django.db import migrations, models
+import uuid
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('core', '0010_auto_20210216_1055'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='archiveresult',
+            name='uuid',
+            field=models.UUIDField(default=uuid.uuid4, editable=False),
+        ),
+        migrations.AlterField(
+            model_name='archiveresult',
+            name='extractor',
+            field=models.CharField(choices=[('title', 'title'), ('favicon', 'favicon'), ('headers', 'headers'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('wget', 'wget'), ('readability', 'readability'), ('mercury', 'mercury'), ('git', 'git'), ('media', 'media'), ('archive_org', 'archive_org')], max_length=32),
+        ),
+    ]
diff --git a/archivebox/core/migrations/0012_auto_20210216_1425.py b/archivebox/core/migrations/0012_auto_20210216_1425.py
new file mode 100644
index 00000000..310058ac
--- /dev/null
+++ b/archivebox/core/migrations/0012_auto_20210216_1425.py
@@ -0,0 +1,23 @@
+# Generated by Django 3.1.3 on 2021-02-16 14:25
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('core', '0011_auto_20210216_1331'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='archiveresult',
+            name='cmd_version',
+            field=models.CharField(blank=True, default=None, max_length=128, null=True),
+        ),
+        migrations.AlterField(
+            model_name='archiveresult',
+            name='output',
+            field=models.CharField(max_length=1024),
+        ),
+    ]
diff --git a/archivebox/core/migrations/0013_auto_20210218_0729.py b/archivebox/core/migrations/0013_auto_20210218_0729.py
new file mode 100644
index 00000000..d3fe3b4f
--- /dev/null
+++ b/archivebox/core/migrations/0013_auto_20210218_0729.py
@@ -0,0 +1,18 @@
+# Generated by Django 3.1.3 on 2021-02-18 07:29
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('core', '0012_auto_20210216_1425'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='snapshot',
+            name='title',
+            field=models.CharField(blank=True, db_index=True, max_length=256, null=True),
+        ),
+    ]
diff --git a/archivebox/core/migrations/0014_auto_20210218_0729.py b/archivebox/core/migrations/0014_auto_20210218_0729.py
new file mode 100644
index 00000000..db81934f
--- /dev/null
+++ b/archivebox/core/migrations/0014_auto_20210218_0729.py
@@ -0,0 +1,18 @@
+# Generated by Django 3.1.3 on 2021-02-18 07:29
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('core', '0013_auto_20210218_0729'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='snapshot',
+            name='title',
+            field=models.CharField(blank=True, db_index=True, max_length=1024, null=True),
+        ),
+    ]
diff --git a/archivebox/core/migrations/0015_auto_20210218_0730.py b/archivebox/core/migrations/0015_auto_20210218_0730.py
new file mode 100644
index 00000000..b782a217
--- /dev/null
+++ b/archivebox/core/migrations/0015_auto_20210218_0730.py
@@ -0,0 +1,18 @@
+# Generated by Django 3.1.3 on 2021-02-18 07:30
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('core', '0014_auto_20210218_0729'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='snapshot',
+            name='title',
+            field=models.CharField(blank=True, db_index=True, max_length=512, null=True),
+        ),
+    ]
diff --git a/archivebox/core/migrations/0016_auto_20210218_1204.py b/archivebox/core/migrations/0016_auto_20210218_1204.py
new file mode 100644
index 00000000..4637feab
--- /dev/null
+++ b/archivebox/core/migrations/0016_auto_20210218_1204.py
@@ -0,0 +1,18 @@
+# Generated by Django 3.1.3 on 2021-02-18 12:04
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('core', '0015_auto_20210218_0730'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='snapshot',
+            name='tags',
+            field=models.ManyToManyField(blank=True, to='core.Tag'),
+        ),
+    ]
diff --git a/archivebox/core/migrations/0017_auto_20210219_0211.py b/archivebox/core/migrations/0017_auto_20210219_0211.py
new file mode 100644
index 00000000..221a250b
--- /dev/null
+++ b/archivebox/core/migrations/0017_auto_20210219_0211.py
@@ -0,0 +1,18 @@
+# Generated by Django 3.1.3 on 2021-02-19 02:11
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('core', '0016_auto_20210218_1204'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='tag',
+            name='slug',
+            field=models.SlugField(blank=True, max_length=100, unique=True, verbose_name='slug'),
+        ),
+    ]
diff --git a/archivebox/core/migrations/0018_auto_20210327_0952.py b/archivebox/core/migrations/0018_auto_20210327_0952.py
new file mode 100644
index 00000000..d0f3dde1
--- /dev/null
+++ b/archivebox/core/migrations/0018_auto_20210327_0952.py
@@ -0,0 +1,23 @@
+# Generated by Django 3.1.3 on 2021-03-27 09:52
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('core', '0017_auto_20210219_0211'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='tag',
+            name='name',
+            field=models.CharField(max_length=100, unique=True),
+        ),
+        migrations.AlterField(
+            model_name='tag',
+            name='slug',
+            field=models.SlugField(blank=True, max_length=100, unique=True),
+        ),
+    ]
diff --git a/archivebox/core/models.py b/archivebox/core/models.py
index 13d75b66..e7741920 100644
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -2,12 +2,15 @@ __package__ = 'archivebox.core'
 
 import uuid
 
-from django.db import models, transaction
+from django.db import models
 from django.utils.functional import cached_property
 from django.utils.text import slugify
+from django.core.cache import cache
 from django.db.models import Case, When, Value, IntegerField
 
-from ..util import parse_date
+from ..config import ARCHIVE_DIR, ARCHIVE_DIR_NAME
+from ..system import get_dir_size
+from ..util import parse_date, base_url, hashurl
 from ..index.schema import Link
 from ..extractors import get_default_archive_methods, ARCHIVE_METHODS_INDEXING_PRECEDENCE
 
@@ -29,8 +32,11 @@ class Tag(models.Model):
     """
     Based on django-taggit model
     """
-    name = models.CharField(verbose_name="name", unique=True, blank=False, max_length=100)
-    slug = models.SlugField(verbose_name="slug", unique=True, max_length=100)
+    name = models.CharField(unique=True, blank=False, max_length=100)
+
+    # slug is autoset on save from name, never set it manually
+    slug = models.SlugField(unique=True, blank=True, max_length=100)
+
 
     class Meta:
         verbose_name = "Tag"
@@ -49,20 +55,21 @@ class Tag(models.Model):
         if self._state.adding and not self.slug:
             self.slug = self.slugify(self.name)
 
-            with transaction.atomic():
-                slugs = set(
-                    type(self)
-                    ._default_manager.filter(slug__startswith=self.slug)
-                    .values_list("slug", flat=True)
-                )
+            # if name is different but slug conficts with another tags slug, append a counter
+            # with transaction.atomic():
+            slugs = set(
+                type(self)
+                ._default_manager.filter(slug__startswith=self.slug)
+                .values_list("slug", flat=True)
+            )
 
-                i = None
-                while True:
-                    slug = self.slugify(self.name, i)
-                    if slug not in slugs:
-                        self.slug = slug
-                        return super().save(*args, **kwargs)
-                    i = 1 if i is None else i+1
+            i = None
+            while True:
+                slug = self.slugify(self.name, i)
+                if slug not in slugs:
+                    self.slug = slug
+                    return super().save(*args, **kwargs)
+                i = 1 if i is None else i+1
         else:
             return super().save(*args, **kwargs)
 
@@ -73,11 +80,11 @@ class Snapshot(models.Model):
     url = models.URLField(unique=True)
     timestamp = models.CharField(max_length=32, unique=True, db_index=True)
 
-    title = models.CharField(max_length=128, null=True, blank=True, db_index=True)
+    title = models.CharField(max_length=512, null=True, blank=True, db_index=True)
 
     added = models.DateTimeField(auto_now_add=True, db_index=True)
-    updated = models.DateTimeField(null=True, blank=True, db_index=True)
-    tags = models.ManyToManyField(Tag)
+    updated = models.DateTimeField(auto_now=True, blank=True, null=True, db_index=True)
+    tags = models.ManyToManyField(Tag, blank=True)
 
     keys = ('url', 'timestamp', 'title', 'tags', 'updated')
 
@@ -109,13 +116,24 @@ class Snapshot(models.Model):
         from ..index import load_link_details
         return load_link_details(self.as_link())
 
-    def tags_str(self) -> str:
-        return ','.join(self.tags.order_by('name').values_list('name', flat=True))
+    def tags_str(self, nocache=True) -> str:
+        cache_key = f'{self.id}-{(self.updated or self.added).timestamp()}-tags'
+        calc_tags_str = lambda: ','.join(self.tags.order_by('name').values_list('name', flat=True))
+        if nocache:
+            tags_str = calc_tags_str()
+            cache.set(cache_key, tags_str)
+            return tags_str
+        return cache.get_or_set(cache_key, calc_tags_str)
 
     @cached_property
     def bookmarked(self):
         return parse_date(self.timestamp)
 
+    @cached_property
+    def bookmarked_date(self):
+        # TODO: remove this
+        return self.bookmarked
+
     @cached_property
     def is_archived(self):
         return self.as_link().is_archived
@@ -126,23 +144,31 @@ class Snapshot(models.Model):
 
     @cached_property
     def url_hash(self):
-        return self.as_link().url_hash
+        return hashurl(self.url)
 
     @cached_property
     def base_url(self):
-        return self.as_link().base_url
+        return base_url(self.url)
 
     @cached_property
     def link_dir(self):
-        return self.as_link().link_dir
+        return str(ARCHIVE_DIR / self.timestamp)
 
     @cached_property
     def archive_path(self):
-        return self.as_link().archive_path
+        return '{}/{}'.format(ARCHIVE_DIR_NAME, self.timestamp)
 
     @cached_property
     def archive_size(self):
-        return self.as_link().archive_size
+        cache_key = f'{str(self.id)[:12]}-{(self.updated or self.added).timestamp()}-size'
+
+        def calc_dir_size():
+            try:
+                return get_dir_size(self.link_dir)[0]
+            except Exception:
+                return 0
+
+        return cache.get_or_set(cache_key, calc_dir_size)
 
     @cached_property
     def history(self):
@@ -151,17 +177,40 @@ class Snapshot(models.Model):
 
     @cached_property
     def latest_title(self):
-        if ('title' in self.history
-            and self.history['title']
-            and (self.history['title'][-1].status == 'succeeded')
-            and self.history['title'][-1].output.strip()):
-            return self.history['title'][-1].output.strip()
+        if self.title:
+            return self.title   # whoopdedoo that was easy
+        
+        try:
+            # take longest successful title from ArchiveResult db history
+            return sorted(
+                self.archiveresult_set\
+                    .filter(extractor='title', status='succeeded', output__isnull=False)\
+                    .values_list('output', flat=True),
+                key=lambda r: len(r),
+            )[-1]
+        except IndexError:
+            pass
+
+        try:
+            # take longest successful title from Link json index file history
+            return sorted(
+                (
+                    result.output.strip()
+                    for result in self.history['title']
+                    if result.status == 'succeeded' and result.output.strip()
+                ),
+                key=lambda r: len(r),
+            )[-1]
+        except (KeyError, IndexError):
+            pass
+
         return None
 
     def save_tags(self, tags=()):
         tags_id = []
         for tag in tags:
-            tags_id.append(Tag.objects.get_or_create(name=tag)[0].id)
+            if tag.strip():
+                tags_id.append(Tag.objects.get_or_create(name=tag)[0].id)
         self.tags.clear()
         self.tags.add(*tags_id)
 
@@ -178,15 +227,18 @@ class ArchiveResultManager(models.Manager):
 
 
 class ArchiveResult(models.Model):
+    id = models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')
+    uuid = models.UUIDField(default=uuid.uuid4, editable=False)
+
     snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)
+    extractor = models.CharField(choices=EXTRACTORS, max_length=32)
     cmd = JSONField()
     pwd = models.CharField(max_length=256)
-    cmd_version = models.CharField(max_length=32, default=None, null=True, blank=True)
-    output = models.CharField(max_length=512)
-    start_ts = models.DateTimeField()
+    cmd_version = models.CharField(max_length=128, default=None, null=True, blank=True)
+    output = models.CharField(max_length=1024)
+    start_ts = models.DateTimeField(db_index=True)
     end_ts = models.DateTimeField()
     status = models.CharField(max_length=16, choices=STATUS_CHOICES)
-    extractor = models.CharField(choices=EXTRACTORS, max_length=32)
 
     objects = ArchiveResultManager()
 
diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py
index e73c93d9..6a795702 100644
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@@ -2,6 +2,9 @@ __package__ = 'archivebox.core'
 
 import os
 import sys
+import re
+import logging
+import tempfile
 
 from pathlib import Path
 from django.utils.crypto import get_random_string
@@ -14,6 +17,7 @@ from ..config import (
     TEMPLATES_DIR_NAME,
     SQL_INDEX_FILENAME,
     OUTPUT_DIR,
+    LOGS_DIR,
 )
 
 
@@ -62,6 +66,40 @@ AUTHENTICATION_BACKENDS = [
     'django.contrib.auth.backends.ModelBackend',
 ]
 
+# only enable debug toolbar when in DEBUG mode with --nothreading (it doesnt work in multithreaded mode)
+DEBUG_TOOLBAR = DEBUG and ('--nothreading' in sys.argv) and ('--reload' not in sys.argv)
+if DEBUG_TOOLBAR:
+    try:
+        import debug_toolbar   # noqa
+        DEBUG_TOOLBAR = True
+    except ImportError:
+        DEBUG_TOOLBAR = False
+
+if DEBUG_TOOLBAR:
+    INSTALLED_APPS = [*INSTALLED_APPS, 'debug_toolbar']
+    INTERNAL_IPS = ['0.0.0.0', '127.0.0.1', '*']
+    DEBUG_TOOLBAR_CONFIG = {
+        "SHOW_TOOLBAR_CALLBACK": lambda request: True,
+        "RENDER_PANELS": True,
+    }
+    DEBUG_TOOLBAR_PANELS = [
+        'debug_toolbar.panels.history.HistoryPanel',
+        'debug_toolbar.panels.versions.VersionsPanel',
+        'debug_toolbar.panels.timer.TimerPanel',
+        'debug_toolbar.panels.settings.SettingsPanel',
+        'debug_toolbar.panels.headers.HeadersPanel',
+        'debug_toolbar.panels.request.RequestPanel',
+        'debug_toolbar.panels.sql.SQLPanel',
+        'debug_toolbar.panels.staticfiles.StaticFilesPanel',
+        # 'debug_toolbar.panels.templates.TemplatesPanel',
+        'debug_toolbar.panels.cache.CachePanel',
+        'debug_toolbar.panels.signals.SignalsPanel',
+        'debug_toolbar.panels.logging.LoggingPanel',
+        'debug_toolbar.panels.redirects.RedirectsPanel',
+        'debug_toolbar.panels.profiling.ProfilingPanel',
+        'djdt_flamegraph.FlamegraphPanel',
+    ]
+    MIDDLEWARE = [*MIDDLEWARE, 'debug_toolbar.middleware.DebugToolbarMiddleware']
 
 ################################################################################
 ### Staticfile and Template Settings
@@ -107,6 +145,22 @@ DATABASES = {
     'default': {
         'ENGINE': 'django.db.backends.sqlite3',
         'NAME': DATABASE_NAME,
+        'OPTIONS': {
+            'timeout': 60,
+            'check_same_thread': False,
+        },
+        # DB setup is sometimes modified at runtime by setup_django() in config.py
+    }
+}
+
+CACHE_BACKEND = 'django.core.cache.backends.locmem.LocMemCache'
+# CACHE_BACKEND = 'django.core.cache.backends.db.DatabaseCache'
+# CACHE_BACKEND = 'django.core.cache.backends.dummy.DummyCache'
+
+CACHES = {
+    'default': {
+        'BACKEND': CACHE_BACKEND,
+        'LOCATION': 'django_cache_default',
     }
 }
 
@@ -117,7 +171,7 @@ EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
 ### Security Settings
 ################################################################################
 
-SECRET_KEY = SECRET_KEY or get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789-_+!.')
+SECRET_KEY = SECRET_KEY or get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789_')
 
 ALLOWED_HOSTS = ALLOWED_HOSTS.split(',')
 
@@ -131,6 +185,8 @@ SESSION_COOKIE_AGE = 1209600  # 2 weeks
 SESSION_EXPIRE_AT_BROWSER_CLOSE = False
 SESSION_SAVE_EVERY_REQUEST = True
 
+SESSION_ENGINE = "django.contrib.sessions.backends.db"
+
 AUTH_PASSWORD_VALIDATORS = [
     {'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator'},
     {'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator'},
@@ -163,3 +219,73 @@ USE_TZ = False
 
 DATETIME_FORMAT = 'Y-m-d g:iA'
 SHORT_DATETIME_FORMAT = 'Y-m-d h:iA'
+
+
+################################################################################
+### Logging Settings
+################################################################################
+
+IGNORABLE_404_URLS = [
+    re.compile(r'apple-touch-icon.*\.png$'),
+    re.compile(r'favicon\.ico$'),
+    re.compile(r'robots\.txt$'),
+    re.compile(r'.*\.(css|js)\.map$'),
+]
+
+class NoisyRequestsFilter(logging.Filter):
+    def filter(self, record):
+        logline = record.getMessage()
+
+        # ignore harmless 404s for the patterns in IGNORABLE_404_URLS
+        for ignorable_url_pattern in IGNORABLE_404_URLS:
+            ignorable_log_pattern = re.compile(f'^"GET /.*/?{ignorable_url_pattern.pattern[:-1]} HTTP/.*" (200|30.|404) .+$', re.I | re.M)
+            if ignorable_log_pattern.match(logline):
+                return 0
+
+        # ignore staticfile requests that 200 or 30*
+        ignoreable_200_log_pattern = re.compile(r'"GET /static/.* HTTP/.*" (200|30.) .+', re.I | re.M)
+        if ignoreable_200_log_pattern.match(logline):
+            return 0
+
+        return 1
+
+if LOGS_DIR.exists():
+    ERROR_LOG = (LOGS_DIR / 'errors.log')
+else:
+    # meh too many edge cases here around creating log dir w/ correct permissions
+    # cant be bothered, just trash the log and let them figure it out via stdout/stderr
+    ERROR_LOG = tempfile.NamedTemporaryFile().name
+
+LOGGING = {
+    'version': 1,
+    'disable_existing_loggers': False,
+    'handlers': {
+        'console': {
+            'class': 'logging.StreamHandler',
+        },
+        'logfile': {
+            'level': 'ERROR',
+            'class': 'logging.handlers.RotatingFileHandler',
+            'filename': ERROR_LOG,
+            'maxBytes': 1024 * 1024 * 25,  # 25 MB
+            'backupCount': 10,
+        },
+    },
+    'filters': {
+        'noisyrequestsfilter': {
+            '()': NoisyRequestsFilter,
+        }
+    },
+    'loggers': {
+        'django': {
+            'handlers': ['console', 'logfile'],
+            'level': 'INFO',
+            'filters': ['noisyrequestsfilter'],
+        },
+        'django.server': {
+            'handlers': ['console', 'logfile'],
+            'level': 'INFO',
+            'filters': ['noisyrequestsfilter'],
+        }
+    },
+}
diff --git a/archivebox/core/urls.py b/archivebox/core/urls.py
index 182e4dca..87a302b8 100644
--- a/archivebox/core/urls.py
+++ b/archivebox/core/urls.py
@@ -2,6 +2,7 @@ from django.contrib import admin
 
 from django.urls import path, include
 from django.views import static
+from django.contrib.staticfiles.urls import staticfiles_urlpatterns
 from django.conf import settings
 from django.views.generic.base import RedirectView
 
@@ -13,8 +14,8 @@ from core.views import HomepageView, SnapshotView, PublicIndexView, AddView
 urlpatterns = [
     path('public/', PublicIndexView.as_view(), name='public-index'),
 
-    path('robots.txt', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'robots.txt'}),
-    path('favicon.ico', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'favicon.ico'}),
+    path('robots.txt', static.serve, {'document_root': settings.STATICFILES_DIRS[0], 'path': 'robots.txt'}),
+    path('favicon.ico', static.serve, {'document_root': settings.STATICFILES_DIRS[0], 'path': 'favicon.ico'}),
 
     path('docs/', RedirectView.as_view(url='https://github.com/ArchiveBox/ArchiveBox/wiki'), name='Docs'),
 
@@ -35,35 +36,43 @@ urlpatterns = [
     path('index.json', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'index.json'}),
     path('', HomepageView.as_view(), name='Home'),
 ]
+urlpatterns += staticfiles_urlpatterns()
 
-    # # Proposed UI URLs spec
-    # path('',                 HomepageView)
-    # path('/add',             AddView)
-    # path('/public',          PublicIndexView)
-    # path('/snapshot/:slug',  SnapshotView)
-    
-    # path('/admin',           admin.site.urls)
-    # path('/accounts',        django.contrib.auth.urls)
+if settings.DEBUG_TOOLBAR:
+    import debug_toolbar
+    urlpatterns += [
+        path('__debug__/', include(debug_toolbar.urls)),
+    ]
 
-    # # Prposed REST API spec
-    # # :slugs can be uuid, short_uuid, or any of the unique index_fields
-    # path('api/v1/'),
-    # path('api/v1/core/'                      [GET])
-    # path('api/v1/core/snapshot/',            [GET, POST, PUT]),
-    # path('api/v1/core/snapshot/:slug',       [GET, PATCH, DELETE]),
-    # path('api/v1/core/archiveresult',        [GET, POST, PUT]),
-    # path('api/v1/core/archiveresult/:slug',  [GET, PATCH, DELETE]),
-    # path('api/v1/core/tag/',                 [GET, POST, PUT]),
-    # path('api/v1/core/tag/:slug',            [GET, PATCH, DELETE]),
 
-    # path('api/v1/cli/',                      [GET])
-    # path('api/v1/cli/{add,list,config,...}', [POST]),  # pass query as kwargs directly to `run_subcommand` and return stdout, stderr, exitcode
+# # Proposed FUTURE URLs spec
+# path('',                 HomepageView)
+# path('/add',             AddView)
+# path('/public',          PublicIndexView)
+# path('/snapshot/:slug',  SnapshotView)
 
-    # path('api/v1/extractors/',                    [GET])
-    # path('api/v1/extractors/:extractor/',         [GET]),
-    # path('api/v1/extractors/:extractor/:func',    [GET, POST]),  # pass query as args directly to chosen function
+# path('/admin',           admin.site.urls)
+# path('/accounts',        django.contrib.auth.urls)
 
-    # future, just an idea:
-    # path('api/v1/scheduler/',                [GET])
-    # path('api/v1/scheduler/task/',           [GET, POST, PUT]),
-    # path('api/v1/scheduler/task/:slug',      [GET, PATCH, DELETE]),
+# # Prposed REST API spec
+# # :slugs can be uuid, short_uuid, or any of the unique index_fields
+# path('api/v1/'),
+# path('api/v1/core/'                      [GET])
+# path('api/v1/core/snapshot/',            [GET, POST, PUT]),
+# path('api/v1/core/snapshot/:slug',       [GET, PATCH, DELETE]),
+# path('api/v1/core/archiveresult',        [GET, POST, PUT]),
+# path('api/v1/core/archiveresult/:slug',  [GET, PATCH, DELETE]),
+# path('api/v1/core/tag/',                 [GET, POST, PUT]),
+# path('api/v1/core/tag/:slug',            [GET, PATCH, DELETE]),
+
+# path('api/v1/cli/',                      [GET])
+# path('api/v1/cli/{add,list,config,...}', [POST]),  # pass query as kwargs directly to `run_subcommand` and return stdout, stderr, exitcode
+
+# path('api/v1/extractors/',                    [GET])
+# path('api/v1/extractors/:extractor/',         [GET]),
+# path('api/v1/extractors/:extractor/:func',    [GET, POST]),  # pass query as args directly to chosen function
+
+# future, just an idea:
+# path('api/v1/scheduler/',                [GET])
+# path('api/v1/scheduler/task/',           [GET, POST, PUT]),
+# path('api/v1/scheduler/task/:slug',      [GET, PATCH, DELETE]),
diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index 0e19fad6..36794a8d 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -4,8 +4,8 @@ from io import StringIO
 from contextlib import redirect_stdout
 
 from django.shortcuts import render, redirect
-
-from django.http import HttpResponse
+from django.http import HttpResponse, Http404
+from django.utils.html import format_html, mark_safe
 from django.views import View, static
 from django.views.generic.list import ListView
 from django.views.generic import FormView
@@ -22,6 +22,7 @@ from ..config import (
     PUBLIC_ADD_VIEW,
     VERSION,
     FOOTER_INFO,
+    SNAPSHOTS_PER_PAGE,
 )
 from main import add
 from ..util import base_url, ansi_to_html
@@ -43,10 +44,6 @@ class SnapshotView(View):
     # render static html index from filesystem archive/<timestamp>/index.html
 
     def get(self, request, path):
-        # missing trailing slash -> redirect to index
-        if '/' not in path:
-            return redirect(f'{path}/index.html')
-
         if not request.user.is_authenticated and not PUBLIC_SNAPSHOTS:
             return redirect(f'/admin/login/?next={request.path}')
 
@@ -55,46 +52,163 @@ class SnapshotView(View):
         except (IndexError, ValueError):
             slug, archivefile = path.split('/', 1)[0], 'index.html'
 
-        all_pages = list(Snapshot.objects.all())
-
         # slug is a timestamp
-        by_ts = {page.timestamp: page for page in all_pages}
-        try:
-            # print('SERVING STATICFILE', by_ts[slug].link_dir, request.path, path)
-            response = static.serve(request, archivefile, document_root=by_ts[slug].link_dir, show_indexes=True)
-            response["Link"] = f'<{by_ts[slug].url}>; rel="canonical"'
-            return response
-        except KeyError:
-            pass
+        if slug.replace('.','').isdigit():
 
-        # slug is a hash
-        by_hash = {page.url_hash: page for page in all_pages}
-        try:
-            timestamp = by_hash[slug].timestamp
-            return redirect(f'/archive/{timestamp}/{archivefile}')
-        except KeyError:
-            pass
+            # missing trailing slash -> redirect to index
+            if '/' not in path:
+                return redirect(f'{path}/index.html')
 
+            try:
+                try:
+                    snapshot = Snapshot.objects.get(Q(timestamp=slug) | Q(id__startswith=slug))
+                    response = static.serve(request, archivefile, document_root=snapshot.link_dir, show_indexes=True)
+                    response["Link"] = f'<{snapshot.url}>; rel="canonical"'
+                    return response
+                except Snapshot.DoesNotExist:
+                    if Snapshot.objects.filter(timestamp__startswith=slug).exists():
+                        raise Snapshot.MultipleObjectsReturned
+                    else:
+                        raise
+            except Snapshot.DoesNotExist:
+                # Snapshot does not exist
+                return HttpResponse(
+                    format_html(
+                        (
+                            '<center><br/><br/><br/>'
+                            'No Snapshot directories match the given timestamp or UUID: <code>{}</code><br/><br/>'
+                            'You can <a href="/add/" target="_top">add a new Snapshot</a>, or return to the <a href="/" target="_top">Main Index</a>'
+                            '</center>'
+                        ),
+                        slug,
+                        path,
+                    ),
+                    content_type="text/html",
+                    status=404,
+                )
+            except Snapshot.MultipleObjectsReturned:
+                snapshot_hrefs = mark_safe('<br/>').join(
+                    format_html(
+                        '{} <a href="/archive/{}/index.html"><b><code>{}</code></b></a> {} <b>{}</b>',
+                        snap.added.strftime('%Y-%m-%d %H:%M:%S'),
+                        snap.timestamp,
+                        snap.timestamp,
+                        snap.url,
+                        snap.title or '',
+                    )
+                    for snap in Snapshot.objects.filter(timestamp__startswith=slug).only('url', 'timestamp', 'title', 'added').order_by('-added')
+                )
+                return HttpResponse(
+                    format_html(
+                        (
+                            'Multiple Snapshots match the given timestamp/UUID <code>{}</code><br/><pre>'
+                        ),
+                        slug,
+                    ) + snapshot_hrefs + format_html(
+                        (
+                            '</pre><br/>'
+                            'Choose a Snapshot to proceed or go back to the <a href="/" target="_top">Main Index</a>'
+                        )
+                    ),
+                    content_type="text/html",
+                    status=404,
+                )
+            except Http404:
+                # Snapshot dir exists but file within does not e.g. 124235.324234/screenshot.png
+                return HttpResponse(
+                    format_html(
+                        (
+                            '<center><br/><br/><br/>'
+                            f'Snapshot <a href="/archive/{snapshot.timestamp}/index.html" target="_top"><b><code>[{snapshot.timestamp}]</code></b></a> exists in DB, but resource <b><code>{snapshot.timestamp}/'
+                            '{}'
+                            f'</code></b> does not exist in <a href="/archive/{snapshot.timestamp}/" target="_top">snapshot dir</a> yet.<br/><br/>'
+                            'Maybe this resource type is not availabe for this Snapshot,<br/>or the archiving process has not completed yet?<br/>'
+                            f'<pre><code># run this cmd to finish archiving this Snapshot<br/>archivebox update -t timestamp {snapshot.timestamp}</code></pre><br/><br/>'
+                            '<div class="text-align: left; width: 100%; max-width: 400px">'
+                            '<i><b>Next steps:</i></b><br/>'
+                            f'- list all the <a href="/archive/{snapshot.timestamp}/" target="_top">Snapshot files <code>.*</code></a><br/>'
+                            f'- view the <a href="/archive/{snapshot.timestamp}/index.html" target="_top">Snapshot <code>./index.html</code></a><br/>'
+                            f'- go to the <a href="/admin/core/snapshot/{snapshot.id}/change/" target="_top">Snapshot admin</a> to edit<br/>'
+                            f'- go to the <a href="/admin/core/snapshot/?id__startswith={snapshot.id}" target="_top">Snapshot actions</a> to re-archive<br/>'
+                            '- or return to <a href="/" target="_top">the main index...</a></div>'
+                            '</center>'
+                        ),
+                        archivefile,
+                    ),
+                    content_type="text/html",
+                    status=404,
+                )
         # slug is a URL
-        by_url = {page.base_url: page for page in all_pages}
         try:
-            # TODO: add multiple snapshot support by showing index of all snapshots
-            # for given url instead of redirecting to timestamp index
-            timestamp = by_url[base_url(path)].timestamp
-            return redirect(f'/archive/{timestamp}/index.html')
-        except KeyError:
-            pass
-
-        return HttpResponse(
-            'No archived link matches the given timestamp or hash.',
-            content_type="text/plain",
-            status=404,
-        )
+            try:
+                # try exact match on full url first
+                snapshot = Snapshot.objects.get(
+                    Q(url='http://' + path) | Q(url='https://' + path) | Q(id__startswith=path)
+                )
+            except Snapshot.DoesNotExist:
+                # fall back to match on exact base_url
+                try:
+                    snapshot = Snapshot.objects.get(
+                        Q(url='http://' + base_url(path)) | Q(url='https://' + base_url(path))
+                    )
+                except Snapshot.DoesNotExist:
+                    # fall back to matching base_url as prefix
+                    snapshot = Snapshot.objects.get(
+                        Q(url__startswith='http://' + base_url(path)) | Q(url__startswith='https://' + base_url(path))
+                    )
+            return redirect(f'/archive/{snapshot.timestamp}/index.html')
+        except Snapshot.DoesNotExist:
+            return HttpResponse(
+                format_html(
+                    (
+                        '<center><br/><br/><br/>'
+                        'No Snapshots match the given url: <code>{}</code><br/><br/><br/>'
+                        'Return to the <a href="/" target="_top">Main Index</a>, or:<br/><br/>'
+                        '+ <i><a href="/add/?url={}" target="_top">Add a new Snapshot for <code>{}</code></a><br/><br/></i>'
+                        '</center>'
+                    ),
+                    base_url(path),
+                    path if '://' in path else f'https://{path}',
+                    path,
+                ),
+                content_type="text/html",
+                status=404,
+            )
+        except Snapshot.MultipleObjectsReturned:
+            snapshot_hrefs = mark_safe('<br/>').join(
+                format_html(
+                    '{} <a href="/archive/{}/index.html"><b><code>{}</code></b></a> {} <b>{}</b>',
+                    snap.added.strftime('%Y-%m-%d %H:%M:%S'),
+                    snap.timestamp,
+                    snap.timestamp,
+                    snap.url,
+                    snap.title or '',
+                )
+                for snap in Snapshot.objects.filter(
+                    Q(url__startswith='http://' + base_url(path)) | Q(url__startswith='https://' + base_url(path))
+                ).only('url', 'timestamp', 'title', 'added').order_by('-added')
+            )
+            return HttpResponse(
+                format_html(
+                    (
+                        'Multiple Snapshots match the given URL <code>{}</code><br/><pre>'
+                    ),
+                    base_url(path),
+                ) + snapshot_hrefs + format_html(
+                    (
+                        '</pre><br/>'
+                        'Choose a Snapshot to proceed or go back to the <a href="/" target="_top">Main Index</a>'
+                    )
+                ),
+                content_type="text/html",
+                status=404,
+            )
+        
 
 class PublicIndexView(ListView):
     template_name = 'public_index.html'
     model = Snapshot
-    paginate_by = 100
+    paginate_by = SNAPSHOTS_PER_PAGE
     ordering = ['title']
 
     def get_context_data(self, **kwargs):
@@ -105,12 +219,14 @@ class PublicIndexView(ListView):
         }
 
     def get_queryset(self, **kwargs): 
-        qs = super().get_queryset(**kwargs) 
+        qs = super().get_queryset(**kwargs)
         query = self.request.GET.get('q')
         if query:
             qs = qs.filter(Q(title__icontains=query) | Q(url__icontains=query) | Q(timestamp__icontains=query) | Q(tags__name__icontains=query))
+        
         for snapshot in qs:
-            snapshot.icons = snapshot_icons(snapshot)
+            # lazy load snapshot icons, otherwise it will load icons for entire index at once
+            snapshot.icons = lambda: snapshot_icons(snapshot)
         return qs
 
     def get(self, *args, **kwargs):
@@ -130,9 +246,9 @@ class AddView(UserPassesTestMixin, FormView):
         if self.request.method == 'GET':
             url = self.request.GET.get('url', None)
             if url:
-                return {'url': url}
-        else:
-            return super().get_initial()
+                return {'url': url if '://' in url else f'https://{url}'}
+        
+        return super().get_initial()
 
     def test_func(self):
         return PUBLIC_ADD_VIEW or self.request.user.is_authenticated
@@ -145,15 +261,18 @@ class AddView(UserPassesTestMixin, FormView):
             'absolute_add_path': self.request.build_absolute_uri(self.request.path),
             'VERSION': VERSION,
             'FOOTER_INFO': FOOTER_INFO,
+            'stdout': '',
         }
 
     def form_valid(self, form):
         url = form.cleaned_data["url"]
         print(f'[+] Adding URL: {url}')
+        tag = form.cleaned_data["tag"]
         depth = 0 if form.cleaned_data["depth"] == "0" else 1
         extractors = ','.join(form.cleaned_data["archive_methods"])
         input_kwargs = {
             "urls": url,
+            "tag": tag,
             "depth": depth,
             "update_all": False,
             "out_dir": OUTPUT_DIR,
diff --git a/archivebox/core/wsgi.py b/archivebox/core/wsgi.py
index f933afae..94993b92 100644
--- a/archivebox/core/wsgi.py
+++ b/archivebox/core/wsgi.py
@@ -7,10 +7,10 @@ For more information on this file, see
 https://docs.djangoproject.com/en/2.1/howto/deployment/wsgi/
 """
 
-import os
+
+from archivebox.config import setup_django
+setup_django(in_memory_db=False, check_db=True)
 
 from django.core.wsgi import get_wsgi_application
 
-os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.settings')
-
 application = get_wsgi_application()
diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py
index 15968097..09b56c66 100644
--- a/archivebox/extractors/__init__.py
+++ b/archivebox/extractors/__init__.py
@@ -44,16 +44,16 @@ def get_default_archive_methods():
     return [
         ('title', should_save_title, save_title),
         ('favicon', should_save_favicon, save_favicon),
-        ('wget', should_save_wget, save_wget),
+        ('headers', should_save_headers, save_headers),
         ('singlefile', should_save_singlefile, save_singlefile),
         ('pdf', should_save_pdf, save_pdf),
         ('screenshot', should_save_screenshot, save_screenshot),
         ('dom', should_save_dom, save_dom),
-        ('readability', should_save_readability, save_readability), #keep readability below wget and singlefile, as it depends on them
+        ('wget', should_save_wget, save_wget),
+        ('readability', should_save_readability, save_readability),  # keep readability below wget and singlefile, as it depends on them
         ('mercury', should_save_mercury, save_mercury),
         ('git', should_save_git, save_git),
         ('media', should_save_media, save_media),
-        ('headers', should_save_headers, save_headers),
         ('archive_org', should_save_archive_dot_org, save_archive_dot_org),
     ]
 
@@ -115,6 +115,13 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
                     ArchiveResult.objects.create(snapshot=snapshot, extractor=method_name, cmd=result.cmd, cmd_version=result.cmd_version,
                                                  output=result.output, pwd=result.pwd, start_ts=result.start_ts, end_ts=result.end_ts, status=result.status)
 
+
+                    # bump the updated time on the main Snapshot here, this is critical
+                    # to be able to cache summaries of the ArchiveResults for a given
+                    # snapshot without having to load all the results from the DB each time.
+                    # (we use {Snapshot.id}-{Snapshot.updated} as the cache key and assume
+                    # ArchiveResults are unchanged as long as the updated timestamp is unchanged)
+                    snapshot.save()
                 else:
                     # print('{black}      X {}{reset}'.format(method_name, **ANSI))
                     stats['skipped'] += 1
diff --git a/archivebox/extractors/archive_org.py b/archivebox/extractors/archive_org.py
index 1f382190..a0883113 100644
--- a/archivebox/extractors/archive_org.py
+++ b/archivebox/extractors/archive_org.py
@@ -31,7 +31,7 @@ def should_save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, overwr
 
     out_dir = out_dir or Path(link.link_dir)
     if not overwrite and (out_dir / 'archive.org.txt').exists():
-        # if open(path, 'r').read().strip() != 'None':
+        # if open(path, 'r', encoding='utf-8').read().strip() != 'None':
         return False
 
     return SAVE_ARCHIVE_DOT_ORG
diff --git a/archivebox/extractors/mercury.py b/archivebox/extractors/mercury.py
index d9e32c0a..e7d20362 100644
--- a/archivebox/extractors/mercury.py
+++ b/archivebox/extractors/mercury.py
@@ -54,11 +54,13 @@ def save_mercury(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT)
 
     out_dir = Path(out_dir or link.link_dir)
     output_folder = out_dir.absolute() / "mercury"
-    output = str(output_folder)
+    output = "mercury"
 
     status = 'succeeded'
     timer = TimedProgress(timeout, prefix='      ')
     try:
+        output_folder.mkdir(exist_ok=True)
+
         # Get plain text version of article
         cmd = [
             DEPENDENCIES['MERCURY_BINARY']['path'],
@@ -71,6 +73,11 @@ def save_mercury(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT)
         except json.JSONDecodeError:
             raise ShellError(cmd, result)
         
+        if article_text.get('failed'):
+            raise ArchiveError('Mercury was not able to get article text from the URL')
+
+        atomic_write(str(output_folder / "content.txt"), article_text["content"])
+
         # Get HTML version of article
         cmd = [
             DEPENDENCIES['MERCURY_BINARY']['path'],
@@ -82,9 +89,10 @@ def save_mercury(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT)
         except json.JSONDecodeError:
             raise ShellError(cmd, result)
 
-        output_folder.mkdir(exist_ok=True)
+        if article_text.get('failed'):
+            raise ArchiveError('Mercury was not able to get article HTML from the URL')
+
         atomic_write(str(output_folder / "content.html"), article_json.pop("content"))
-        atomic_write(str(output_folder / "content.txt"), article_text["content"])
         atomic_write(str(output_folder / "article.json"), article_json)
 
         # Check for common failure cases
diff --git a/archivebox/extractors/readability.py b/archivebox/extractors/readability.py
index 6e48cd9a..d7c1e303 100644
--- a/archivebox/extractors/readability.py
+++ b/archivebox/extractors/readability.py
@@ -35,7 +35,7 @@ def get_html(link: Link, path: Path) -> str:
     document = None
     for source in sources:
         try:
-            with open(abs_path / source, "r") as f:
+            with open(abs_path / source, "r", encoding="utf-8") as f:
                 document = f.read()
                 break
         except (FileNotFoundError, TypeError):
@@ -63,7 +63,7 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
 
     out_dir = Path(out_dir or link.link_dir)
     output_folder = out_dir.absolute() / "readability"
-    output = str(output_folder)
+    output = "readability"
 
     # Readability Docs: https://github.com/mozilla/readability
 
@@ -81,13 +81,20 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
         temp_doc.write(document.encode("utf-8"))
         temp_doc.close()
 
+        if not document or len(document) < 10:
+            raise ArchiveError('Readability could not find HTML to parse for article text')
+
         cmd = [
             DEPENDENCIES['READABILITY_BINARY']['path'],
-            temp_doc.name
+            temp_doc.name,
         ]
 
         result = run(cmd, cwd=out_dir, timeout=timeout)
-        result_json = json.loads(result.stdout)
+        try:
+            result_json = json.loads(result.stdout)
+        except json.JSONDecodeError:
+            raise ArchiveError('Readability was not able to archive the page', result.stdout + result.stderr)
+
         output_folder.mkdir(exist_ok=True)
         readability_content = result_json.pop("textContent") 
         atomic_write(str(output_folder / "content.html"), result_json.pop("content"))
@@ -112,6 +119,7 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
     except (Exception, OSError) as err:
         status = 'failed'
         output = err
+        cmd = [cmd[0], './{singlefile,dom}.html']
     finally:
         timer.end()
 
@@ -121,6 +129,6 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
         cmd_version=READABILITY_VERSION,
         output=output,
         status=status,
-        index_texts= [readability_content] if readability_content else [],
+        index_texts=[readability_content] if readability_content else [],
         **timer.stats,  
     )
diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py
index 04ab0a8d..d3d1bedc 100644
--- a/archivebox/index/__init__.py
+++ b/archivebox/index/__init__.py
@@ -356,6 +356,7 @@ LINK_FILTERS = {
     'regex': lambda pattern: Q(url__iregex=pattern),
     'domain': lambda pattern: Q(url__istartswith=f"http://{pattern}") | Q(url__istartswith=f"https://{pattern}") | Q(url__istartswith=f"ftp://{pattern}"),
     'tag': lambda pattern: Q(tags__name=pattern),
+    'timestamp': lambda pattern: Q(timestamp=pattern),
 }
 
 @enforce_types
diff --git a/archivebox/index/html.py b/archivebox/index/html.py
index ebfe7d78..c4f66f55 100644
--- a/archivebox/index/html.py
+++ b/archivebox/index/html.py
@@ -1,11 +1,12 @@
 __package__ = 'archivebox.index'
 
-from datetime import datetime
-from typing import List, Optional, Iterator, Mapping
 from pathlib import Path
+from datetime import datetime
+from collections import defaultdict
+from typing import List, Optional, Iterator, Mapping
 
 from django.utils.html import format_html, mark_safe
-from collections import defaultdict
+from django.core.cache import cache
 
 from .schema import Link
 from ..system import atomic_write
@@ -20,7 +21,6 @@ from ..util import (
 from ..config import (
     OUTPUT_DIR,
     VERSION,
-    GIT_SHA,
     FOOTER_INFO,
     HTML_INDEX_FILENAME,
     SAVE_ARCHIVE_DOT_ORG,
@@ -60,7 +60,7 @@ def main_index_template(links: List[Link], template: str=MAIN_INDEX_TEMPLATE) ->
 
     return render_django_template(template, {
         'version': VERSION,
-        'git_sha': GIT_SHA,
+        'git_sha': VERSION,  # not used anymore, but kept for backwards compatibility
         'num_links': str(len(links)),
         'date_updated': datetime.now().strftime('%Y-%m-%d'),
         'time_updated': datetime.now().strftime('%Y-%m-%d %H:%M'),
@@ -116,71 +116,78 @@ def render_django_template(template: str, context: Mapping[str, str]) -> str:
 
 
 def snapshot_icons(snapshot) -> str:
-    from core.models import EXTRACTORS
+    cache_key = f'{str(snapshot.id)[:12]}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons'
+    
+    def calc_snapshot_icons():
+        from core.models import EXTRACTORS
+        # start = datetime.now()
 
-    # start = datetime.now()
+        archive_results = snapshot.archiveresult_set.filter(status="succeeded", output__isnull=False)
+        link = snapshot.as_link()
+        path = link.archive_path
+        canon = link.canonical_outputs()
+        output = ""
+        output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{}</a> &nbsp;'
+        icons = {
+            "singlefile": "❶",
+            "wget": "🆆",
+            "dom": "🅷",
+            "pdf": "📄",
+            "screenshot": "💻",
+            "media": "📼",
+            "git": "🅶",
+            "archive_org": "🏛",
+            "readability": "🆁",
+            "mercury": "🅼",
+            "warc": "📦"
+        }
+        exclude = ["favicon", "title", "headers", "archive_org"]
+        # Missing specific entry for WARC
 
-    archive_results = snapshot.archiveresult_set.filter(status="succeeded")
-    link = snapshot.as_link()
-    path = link.archive_path
-    canon = link.canonical_outputs()
-    output = ""
-    output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{}</a> &nbsp;'
-    icons = {
-        "singlefile": "❶",
-        "wget": "🆆",
-        "dom": "🅷",
-        "pdf": "📄",
-        "screenshot": "💻",
-        "media": "📼",
-        "git": "🅶",
-        "archive_org": "🏛",
-        "readability": "🆁",
-        "mercury": "🅼",
-        "warc": "📦"
-    }
-    exclude = ["favicon", "title", "headers", "archive_org"]
-    # Missing specific entry for WARC
+        extractor_outputs = defaultdict(lambda: None)
+        for extractor, _ in EXTRACTORS:
+            for result in archive_results:
+                if result.extractor == extractor and result:
+                    extractor_outputs[extractor] = result
 
-    extractor_outputs = defaultdict(lambda: None)
-    for extractor, _ in EXTRACTORS:
-        for result in archive_results:
-            if result.extractor == extractor and result:
-                extractor_outputs[extractor] = result
+        for extractor, _ in EXTRACTORS:
+            if extractor not in exclude:
+                existing = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
+                # Check filesystsem to see if anything is actually present (too slow, needs optimization/caching)
+                # if existing:
+                #     existing = (Path(path) / existing)
+                #     if existing.is_file():
+                #         existing = True
+                #     elif existing.is_dir():
+                #         existing = any(existing.glob('*.*'))
+                output += format_html(output_template, path, canon[f"{extractor}_path"], str(bool(existing)),
+                                             extractor, icons.get(extractor, "?"))
+            if extractor == "wget":
+                # warc isn't technically it's own extractor, so we have to add it after wget
+                
+                # get from db (faster but less thurthful)
+                exists = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
+                # get from filesystem (slower but more accurate)
+                # exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
+                output += format_html(output_template, path, canon["warc_path"], str(bool(exists)), "warc", icons.get("warc", "?"))
 
-    for extractor, _ in EXTRACTORS:
-        if extractor not in exclude:
-            existing = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
-            # Check filesystsem to see if anything is actually present (too slow, needs optimization/caching)
-            # if existing:
-            #     existing = (Path(path) / existing)
-            #     if existing.is_file():
-            #         existing = True
-            #     elif existing.is_dir():
-            #         existing = any(existing.glob('*.*'))
-            output += format_html(output_template, path, canon[f"{extractor}_path"], str(bool(existing)),
-                                         extractor, icons.get(extractor, "?"))
-        if extractor == "wget":
-            # warc isn't technically it's own extractor, so we have to add it after wget
-            
-            # get from db (faster but less thurthful)
-            exists = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
-            # get from filesystem (slower but more accurate)
-            # exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
-            output += format_html(output_template, 'warc/', canon["warc_path"], str(bool(exists)), "warc", icons.get("warc", "?"))
+            if extractor == "archive_org":
+                # The check for archive_org is different, so it has to be handled separately
 
-        if extractor == "archive_org":
-            # The check for archive_org is different, so it has to be handled separately
+                # get from db (faster)
+                exists = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
+                # get from filesystem (slower)
+                # target_path = Path(path) / "archive.org.txt"
+                # exists = target_path.exists()
+                output += '<a href="{}" class="exists-{}" title="{}">{}</a> '.format(canon["archive_org_path"], str(exists),
+                                                                                            "archive_org", icons.get("archive_org", "?"))
 
-            # get from db (faster)
-            exists = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
-            # get from filesystem (slower)
-            # target_path = Path(path) / "archive.org.txt"
-            # exists = target_path.exists()
-            output += '<a href="{}" class="exists-{}" title="{}">{}</a> '.format(canon["archive_org_path"], str(exists),
-                                                                                        "archive_org", icons.get("archive_org", "?"))
+        result = format_html('<span class="files-icons" style="font-size: 1.1em; opacity: 0.8; min-width: 240px; display: inline-block">{}<span>', mark_safe(output))
+        # end = datetime.now()
+        # print(((end - start).total_seconds()*1000) // 1, 'ms')
+        return result
 
-    result = format_html('<span class="files-icons" style="font-size: 1.1em; opacity: 0.8; min-width: 240px; display: inline-block">{}<span>', mark_safe(output))
-    # end = datetime.now()
-    # print(((end - start).total_seconds()*1000) // 1, 'ms')
-    return result
+    return cache.get_or_set(cache_key, calc_snapshot_icons)
+    # return calc_snapshot_icons()
+
+   
diff --git a/archivebox/index/json.py b/archivebox/index/json.py
index f24b969f..441e6854 100644
--- a/archivebox/index/json.py
+++ b/archivebox/index/json.py
@@ -15,7 +15,6 @@ from ..config import (
     VERSION,
     OUTPUT_DIR,
     FOOTER_INFO,
-    GIT_SHA,
     DEPENDENCIES,
     JSON_INDEX_FILENAME,
     ARCHIVE_DIR_NAME,
@@ -30,7 +29,7 @@ MAIN_INDEX_HEADER = {
     'meta': {
         'project': 'ArchiveBox',
         'version': VERSION,
-        'git_sha': GIT_SHA,
+        'git_sha': VERSION,  # not used anymore, but kept for backwards compatibility
         'website': 'https://ArchiveBox.io',
         'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki',
         'source': 'https://github.com/ArchiveBox/ArchiveBox',
diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index 1ca4e801..00831e19 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -16,6 +16,7 @@ from typing import List, Dict, Any, Optional, Union
 
 from dataclasses import dataclass, asdict, field, fields
 
+from django.utils.functional import cached_property
 
 from ..system import get_dir_size
 
@@ -133,7 +134,6 @@ class Link:
     updated: Optional[datetime] = None
     schema: str = 'Link'
 
-
     def __str__(self) -> str:
         return f'[{self.timestamp}] {self.url} "{self.title}"'
 
@@ -190,6 +190,7 @@ class Link:
         }
         if extended:
             info.update({
+                'snapshot_id': self.snapshot_id,
                 'link_dir': self.link_dir,
                 'archive_path': self.archive_path,
                 
@@ -201,6 +202,9 @@ class Link:
                 'basename': self.basename,
                 'extension': self.extension,
                 'is_static': self.is_static,
+                
+                'tags_str': self.tags,   # only used to render static index in index/html.py, remove if no longer needed there
+                'icons': None,           # only used to render static index in index/html.py, remove if no longer needed there
 
                 'bookmarked_date': self.bookmarked_date,
                 'updated_date': self.updated_date,
@@ -255,6 +259,11 @@ class Link:
 
         return to_csv(self, cols=cols or self.field_names(), separator=separator, ljust=ljust)
 
+    @cached_property
+    def snapshot_id(self):
+        from core.models import Snapshot
+        return str(Snapshot.objects.only('id').get(url=self.url).id)
+
     @classmethod
     def field_names(cls):
         return [f.name for f in fields(cls)]
diff --git a/archivebox/index/sql.py b/archivebox/index/sql.py
index 1e99f67c..2fcabd61 100644
--- a/archivebox/index/sql.py
+++ b/archivebox/index/sql.py
@@ -7,7 +7,7 @@ from django.db.models import QuerySet
 from django.db import transaction
 
 from .schema import Link
-from ..util import enforce_types
+from ..util import enforce_types, parse_date
 from ..config import OUTPUT_DIR
 
 
@@ -23,13 +23,15 @@ def parse_sql_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
     )
 
 @enforce_types
-def remove_from_sql_main_index(snapshots: QuerySet, out_dir: Path=OUTPUT_DIR) -> None:
-    with transaction.atomic():
-        snapshots.delete()
+def remove_from_sql_main_index(snapshots: QuerySet, atomic: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
+    if atomic:
+        with transaction.atomic():
+            return snapshots.delete()
+    return snapshots.delete()
 
 @enforce_types
 def write_link_to_sql_index(link: Link):
-    from core.models import Snapshot
+    from core.models import Snapshot, ArchiveResult
     info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys}
     tags = info.pop("tags")
     if tags is None:
@@ -41,36 +43,74 @@ def write_link_to_sql_index(link: Link):
         while Snapshot.objects.filter(timestamp=info["timestamp"]).exists():
             info["timestamp"] = str(float(info["timestamp"]) + 1.0)
 
-    snapshot, _ = Snapshot.objects.update_or_create(url=link.url, defaults=info)
+        snapshot, _ = Snapshot.objects.update_or_create(url=link.url, defaults=info)
     snapshot.save_tags(tags)
+
+    for extractor, entries in link.history.items():
+        for entry in entries:
+            if isinstance(entry, dict):
+                result, _ = ArchiveResult.objects.get_or_create(
+                    snapshot_id=snapshot.id,
+                    extractor=extractor,
+                    start_ts=parse_date(entry['start_ts']),
+                    defaults={
+                        'end_ts': parse_date(entry['end_ts']),
+                        'cmd': entry['cmd'],
+                        'output': entry['output'],
+                        'cmd_version': entry.get('cmd_version') or 'unknown',
+                        'pwd': entry['pwd'],
+                        'status': entry['status'],
+                    }
+                )
+            else:
+                result, _ = ArchiveResult.objects.update_or_create(
+                    snapshot_id=snapshot.id,
+                    extractor=extractor,
+                    start_ts=parse_date(entry.start_ts),
+                    defaults={
+                        'end_ts': parse_date(entry.end_ts),
+                        'cmd': entry.cmd,
+                        'output': entry.output,
+                        'cmd_version': entry.cmd_version or 'unknown',
+                        'pwd': entry.pwd,
+                        'status': entry.status,
+                    }
+                )
+
     return snapshot
 
 
 @enforce_types
 def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None:
-    with transaction.atomic():
-        for link in links:
-            write_link_to_sql_index(link)
+    for link in links:
+        # with transaction.atomic():
+            # write_link_to_sql_index(link)
+        write_link_to_sql_index(link)
             
 
 @enforce_types
 def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR) -> None:
     from core.models import Snapshot
 
-    with transaction.atomic():
-        try:
-            snap = Snapshot.objects.get(url=link.url)
-        except Snapshot.DoesNotExist:
-            snap = write_link_to_sql_index(link)
-        snap.title = link.title
+    # with transaction.atomic():
+    #     try:
+    #         snap = Snapshot.objects.get(url=link.url)
+    #     except Snapshot.DoesNotExist:
+    #         snap = write_link_to_sql_index(link)
+    #     snap.title = link.title
+    try:
+        snap = Snapshot.objects.get(url=link.url)
+    except Snapshot.DoesNotExist:
+        snap = write_link_to_sql_index(link)
+    snap.title = link.title
 
-        tag_set = (
-            set(tag.strip() for tag in (link.tags or '').split(','))
-        )
-        tag_list = list(tag_set) or []
+    tag_set = (
+        set(tag.strip() for tag in (link.tags or '').split(','))
+    )
+    tag_list = list(tag_set) or []
 
-        snap.save()
-        snap.save_tags(tag_list)
+    snap.save()
+    snap.save_tags(tag_list)
 
 
 
diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py
index f2b86735..492ae55e 100644
--- a/archivebox/logging_util.py
+++ b/archivebox/logging_util.py
@@ -3,6 +3,7 @@ __package__ = 'archivebox'
 import re
 import os
 import sys
+import stat
 import time
 import argparse
 from math import log
@@ -11,18 +12,21 @@ from pathlib import Path
 
 from datetime import datetime
 from dataclasses import dataclass
-from typing import Optional, List, Dict, Union, IO, TYPE_CHECKING
+from typing import Any, Optional, List, Dict, Union, IO, TYPE_CHECKING
 
 if TYPE_CHECKING:
     from .index.schema import Link, ArchiveResult
 
+from .system import get_dir_size
 from .util import enforce_types
 from .config import (
     ConfigDict,
     OUTPUT_DIR,
     PYTHON_ENCODING,
+    VERSION,
     ANSI,
     IS_TTY,
+    IN_DOCKER,
     TERM_WIDTH,
     SHOW_PROGRESS,
     SOURCES_DIR_NAME,
@@ -50,6 +54,37 @@ class RuntimeStats:
 _LAST_RUN_STATS = RuntimeStats()
 
 
+def debug_dict_summary(obj: Dict[Any, Any]) -> None:
+    stderr(' '.join(f'{key}={str(val).ljust(6)}' for key, val in obj.items()))
+
+
+def get_fd_info(fd) -> Dict[str, Any]:
+    NAME = fd.name[1:-1]
+    FILENO = fd.fileno()
+    MODE = os.fstat(FILENO).st_mode
+    IS_TTY = hasattr(fd, 'isatty') and fd.isatty()
+    IS_PIPE = stat.S_ISFIFO(MODE)
+    IS_FILE = stat.S_ISREG(MODE)
+    IS_TERMINAL =  not (IS_PIPE or IS_FILE)
+    IS_LINE_BUFFERED = fd.line_buffering
+    IS_READABLE = fd.readable()
+    return {
+        'NAME': NAME, 'FILENO': FILENO, 'MODE': MODE,
+        'IS_TTY': IS_TTY, 'IS_PIPE': IS_PIPE, 'IS_FILE': IS_FILE,
+        'IS_TERMINAL': IS_TERMINAL, 'IS_LINE_BUFFERED': IS_LINE_BUFFERED,
+        'IS_READABLE': IS_READABLE,
+    }
+    
+
+# # Log debug information about stdin, stdout, and stderr
+# sys.stdout.write('[>&1] this is python stdout\n')
+# sys.stderr.write('[>&2] this is python stderr\n')
+
+# debug_dict_summary(get_fd_info(sys.stdin))
+# debug_dict_summary(get_fd_info(sys.stdout))
+# debug_dict_summary(get_fd_info(sys.stderr))
+
+
 
 class SmartFormatter(argparse.HelpFormatter):
     """Patched formatter that prints newlines in argparse help strings"""
@@ -62,22 +97,40 @@ class SmartFormatter(argparse.HelpFormatter):
 def reject_stdin(caller: str, stdin: Optional[IO]=sys.stdin) -> None:
     """Tell the user they passed stdin to a command that doesn't accept it"""
 
-    if stdin and not stdin.isatty():
-        stdin_raw_text = stdin.read().strip()
+    if not stdin:
+        return None
+
+    if IN_DOCKER:
+        # when TTY is disabled in docker we cant tell if stdin is being piped in or not
+        # if we try to read stdin when its not piped we will hang indefinitely waiting for it
+        return None
+
+    if not stdin.isatty():
+        # stderr('READING STDIN TO REJECT...')
+        stdin_raw_text = stdin.read()
         if stdin_raw_text:
+            # stderr('GOT STDIN!', len(stdin_str))
             stderr(f'[X] The "{caller}" command does not accept stdin.', color='red')
             stderr(f'    Run archivebox "{caller} --help" to see usage and examples.')
             stderr()
             raise SystemExit(1)
+    return None
 
 
 def accept_stdin(stdin: Optional[IO]=sys.stdin) -> Optional[str]:
     """accept any standard input and return it as a string or None"""
+    
     if not stdin:
         return None
-    elif stdin and not stdin.isatty():
-        stdin_str = stdin.read().strip()
-        return stdin_str or None
+
+    if not stdin.isatty():
+        # stderr('READING STDIN TO ACCEPT...')
+        stdin_str = stdin.read()
+
+        if stdin_str:
+            # stderr('GOT STDIN...', len(stdin_str))
+            return stdin_str
+
     return None
 
 
@@ -174,7 +227,6 @@ def progress_bar(seconds: int, prefix: str='') -> None:
 
 
 def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional[str], pwd: str):
-    from .config import VERSION, ANSI
     cmd = ' '.join(('archivebox', subcommand, *subcommand_args))
     stderr('{black}[i] [{now}] ArchiveBox v{VERSION}: {cmd}{reset}'.format(
         now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
@@ -233,11 +285,11 @@ def log_indexing_process_finished():
 
 def log_indexing_started(out_path: str):
     if IS_TTY:
-        sys.stdout.write(f'    > {out_path}')
+        sys.stdout.write(f'    > ./{Path(out_path).relative_to(OUTPUT_DIR)}')
 
 
 def log_indexing_finished(out_path: str):
-    print(f'\r    √ {out_path}')
+    print(f'\r    √ ./{Path(out_path).relative_to(OUTPUT_DIR)}')
 
 
 ### Archiving Stage
@@ -272,8 +324,6 @@ def log_archiving_paused(num_links: int, idx: int, timestamp: str):
         total=num_links,
     ))
     print()
-    print('    {lightred}Hint:{reset} To view your archive index, run:'.format(**ANSI))
-    print('        archivebox server  # then visit http://127.0.0.1:8000')
     print('    Continue archiving where you left off by running:')
     print('        archivebox update --resume={}'.format(timestamp))
 
@@ -331,6 +381,9 @@ def log_link_archiving_finished(link: "Link", link_dir: str, is_new: bool, stats
     else:
         _LAST_RUN_STATS.succeeded += 1
 
+    size = get_dir_size(link_dir)
+    print('        {black}{} files ({}){reset}'.format(size[2], printable_filesize(size[0]), **ANSI))
+
 
 def log_archive_method_started(method: str):
     print('      > {}'.format(method))
diff --git a/archivebox/main.py b/archivebox/main.py
index c1751528..5c697c55 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -67,6 +67,7 @@ from .config import (
     ConfigDict,
     ANSI,
     IS_TTY,
+    DEBUG,
     IN_DOCKER,
     USER,
     ARCHIVEBOX_BINARY,
@@ -76,6 +77,7 @@ from .config import (
     ARCHIVE_DIR,
     LOGS_DIR,
     CONFIG_FILE,
+    CONFIG_FILENAME,
     ARCHIVE_DIR_NAME,
     SOURCES_DIR_NAME,
     LOGS_DIR_NAME,
@@ -84,6 +86,7 @@ from .config import (
     SQL_INDEX_FILENAME,
     ROBOTS_TXT_FILENAME,
     FAVICON_FILENAME,
+    SEARCH_BACKEND_ENGINE,
     check_dependencies,
     check_data_folder,
     write_config_file,
@@ -125,14 +128,19 @@ ALLOWED_IN_OUTPUT_DIR = {
     'node_modules',
     'package-lock.json',
     'static',
+    'sonic',
     ARCHIVE_DIR_NAME,
     SOURCES_DIR_NAME,
     LOGS_DIR_NAME,
     SQL_INDEX_FILENAME,
+    f'{SQL_INDEX_FILENAME}-wal',
+    f'{SQL_INDEX_FILENAME}-shm',
     JSON_INDEX_FILENAME,
     HTML_INDEX_FILENAME,
     ROBOTS_TXT_FILENAME,
     FAVICON_FILENAME,
+    CONFIG_FILENAME,
+    f'{CONFIG_FILENAME}.bak',
 }
 
 @enforce_types
@@ -214,9 +222,23 @@ def version(quiet: bool=False,
     if quiet:
         print(VERSION)
     else:
+        # ArchiveBox v0.5.6
+        # Cpython Linux Linux-4.19.121-linuxkit-x86_64-with-glibc2.28 x86_64 (in Docker) (in TTY)
         print('ArchiveBox v{}'.format(VERSION))
         p = platform.uname()
-        print(sys.implementation.name.title(), p.system, platform.platform(), p.machine, '(in Docker)' if IN_DOCKER else '(not in Docker)')
+        print(
+            sys.implementation.name.title(),
+            p.system,
+            platform.platform(),
+            p.machine,
+        )
+        print(
+            f'IN_DOCKER={IN_DOCKER}',
+            f'DEBUG={DEBUG}',
+            f'IS_TTY={IS_TTY}',
+            f'TZ={os.environ.get("TZ", "UTC")}',
+            f'SEARCH_BACKEND_ENGINE={SEARCH_BACKEND_ENGINE}',
+        )
         print()
 
         print('{white}[i] Dependency versions:{reset}'.format(**ANSI))
@@ -261,7 +283,7 @@ def run(subcommand: str,
 
 
 @enforce_types
-def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
+def init(force: bool=False, quick: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
     """Initialize a new ArchiveBox collection in the current directory"""
     
     from core.models import Snapshot
@@ -276,13 +298,12 @@ def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
     existing_index = (Path(out_dir) / SQL_INDEX_FILENAME).exists()
 
     if is_empty and not existing_index:
-        print('{green}[+] Initializing a new ArchiveBox collection in this folder...{reset}'.format(**ANSI))
-        print(f'    {out_dir}')
-        print('{green}------------------------------------------------------------------{reset}'.format(**ANSI))
+        print('{green}[+] Initializing a new ArchiveBox v{} collection...{reset}'.format(VERSION, **ANSI))
+        print('{green}----------------------------------------------------------------------{reset}'.format(**ANSI))
     elif existing_index:
-        print('{green}[*] Updating existing ArchiveBox collection in this folder...{reset}'.format(**ANSI))
-        print(f'    {out_dir}')
-        print('{green}------------------------------------------------------------------{reset}'.format(**ANSI))
+        # TODO: properly detect and print the existing version in current index as well
+        print('{green}[^] Verifying and updating existing ArchiveBox collection to v{}...{reset}'.format(VERSION, **ANSI))
+        print('{green}----------------------------------------------------------------------{reset}'.format(**ANSI))
     else:
         if force:
             stderr('[!] This folder appears to already have files in it, but no index.sqlite3 is present.', color='lightyellow')
@@ -303,30 +324,25 @@ def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
     else:
         print('\n{green}[+] Building archive folder structure...{reset}'.format(**ANSI))
     
+    print(f'    + ./{ARCHIVE_DIR.relative_to(OUTPUT_DIR)}, ./{SOURCES_DIR.relative_to(OUTPUT_DIR)}, ./{LOGS_DIR.relative_to(OUTPUT_DIR)}...')
     Path(SOURCES_DIR).mkdir(exist_ok=True)
-    print(f'    √ {SOURCES_DIR}')
-    
     Path(ARCHIVE_DIR).mkdir(exist_ok=True)
-    print(f'    √ {ARCHIVE_DIR}')
-
     Path(LOGS_DIR).mkdir(exist_ok=True)
-    print(f'    √ {LOGS_DIR}')
-
+    print(f'    + ./{CONFIG_FILE.relative_to(OUTPUT_DIR)}...')
     write_config_file({}, out_dir=out_dir)
-    print(f'    √ {CONFIG_FILE}')
+
     if (Path(out_dir) / SQL_INDEX_FILENAME).exists():
-        print('\n{green}[*] Verifying main SQL index and running migrations...{reset}'.format(**ANSI))
+        print('\n{green}[*] Verifying main SQL index and running any migrations needed...{reset}'.format(**ANSI))
     else:
-        print('\n{green}[+] Building main SQL index and running migrations...{reset}'.format(**ANSI))
+        print('\n{green}[+] Building main SQL index and running initial migrations...{reset}'.format(**ANSI))
     
     DATABASE_FILE = Path(out_dir) / SQL_INDEX_FILENAME
-    print(f'    √ {DATABASE_FILE}')
-    print()
     for migration_line in apply_migrations(out_dir):
         print(f'    {migration_line}')
 
-
     assert DATABASE_FILE.exists()
+    print()
+    print(f'    √ ./{DATABASE_FILE.relative_to(OUTPUT_DIR)}')
     
     # from django.contrib.auth.models import User
     # if IS_TTY and not User.objects.filter(is_superuser=True).exists():
@@ -334,7 +350,7 @@ def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
     #     call_command("createsuperuser", interactive=True)
 
     print()
-    print('{green}[*] Collecting links from any existing indexes and archive folders...{reset}'.format(**ANSI))
+    print('{green}[*] Checking links from indexes and archive folders (safe to Ctrl+C)...{reset}'.format(**ANSI))
 
     all_links = Snapshot.objects.none()
     pending_links: Dict[str, Link] = {}
@@ -343,63 +359,77 @@ def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
         all_links = load_main_index(out_dir=out_dir, warn=False)
         print('    √ Loaded {} links from existing main index.'.format(all_links.count()))
 
-    # Links in data folders that dont match their timestamp
-    fixed, cant_fix = fix_invalid_folder_locations(out_dir=out_dir)
-    if fixed:
-        print('    {lightyellow}√ Fixed {} data directory locations that didn\'t match their link timestamps.{reset}'.format(len(fixed), **ANSI))
-    if cant_fix:
-        print('    {lightyellow}! Could not fix {} data directory locations due to conflicts with existing folders.{reset}'.format(len(cant_fix), **ANSI))
+    if quick:
+        print('    > Skipping full snapshot directory check (quick mode)')
+    else:
+        try:
+            # Links in data folders that dont match their timestamp
+            fixed, cant_fix = fix_invalid_folder_locations(out_dir=out_dir)
+            if fixed:
+                print('    {lightyellow}√ Fixed {} data directory locations that didn\'t match their link timestamps.{reset}'.format(len(fixed), **ANSI))
+            if cant_fix:
+                print('    {lightyellow}! Could not fix {} data directory locations due to conflicts with existing folders.{reset}'.format(len(cant_fix), **ANSI))
 
-    # Links in JSON index but not in main index
-    orphaned_json_links = {
-        link.url: link
-        for link in parse_json_main_index(out_dir)
-        if not all_links.filter(url=link.url).exists()
-    }
-    if orphaned_json_links:
-        pending_links.update(orphaned_json_links)
-        print('    {lightyellow}√ Added {} orphaned links from existing JSON index...{reset}'.format(len(orphaned_json_links), **ANSI))
+            # Links in JSON index but not in main index
+            orphaned_json_links = {
+                link.url: link
+                for link in parse_json_main_index(out_dir)
+                if not all_links.filter(url=link.url).exists()
+            }
+            if orphaned_json_links:
+                pending_links.update(orphaned_json_links)
+                print('    {lightyellow}√ Added {} orphaned links from existing JSON index...{reset}'.format(len(orphaned_json_links), **ANSI))
 
-    # Links in data dir indexes but not in main index
-    orphaned_data_dir_links = {
-        link.url: link
-        for link in parse_json_links_details(out_dir)
-        if not all_links.filter(url=link.url).exists()
-    }
-    if orphaned_data_dir_links:
-        pending_links.update(orphaned_data_dir_links)
-        print('    {lightyellow}√ Added {} orphaned links from existing archive directories.{reset}'.format(len(orphaned_data_dir_links), **ANSI))
+            # Links in data dir indexes but not in main index
+            orphaned_data_dir_links = {
+                link.url: link
+                for link in parse_json_links_details(out_dir)
+                if not all_links.filter(url=link.url).exists()
+            }
+            if orphaned_data_dir_links:
+                pending_links.update(orphaned_data_dir_links)
+                print('    {lightyellow}√ Added {} orphaned links from existing archive directories.{reset}'.format(len(orphaned_data_dir_links), **ANSI))
 
-    # Links in invalid/duplicate data dirs
-    invalid_folders = {
-        folder: link
-        for folder, link in get_invalid_folders(all_links, out_dir=out_dir).items()
-    }
-    if invalid_folders:
-        print('    {lightyellow}! Skipped adding {} invalid link data directories.{reset}'.format(len(invalid_folders), **ANSI))
-        print('        X ' + '\n        X '.join(f'{folder} {link}' for folder, link in invalid_folders.items()))
-        print()
-        print('    {lightred}Hint:{reset} For more information about the link data directories that were skipped, run:'.format(**ANSI))
-        print('        archivebox status')
-        print('        archivebox list --status=invalid')
+            # Links in invalid/duplicate data dirs
+            invalid_folders = {
+                folder: link
+                for folder, link in get_invalid_folders(all_links, out_dir=out_dir).items()
+            }
+            if invalid_folders:
+                print('    {lightyellow}! Skipped adding {} invalid link data directories.{reset}'.format(len(invalid_folders), **ANSI))
+                print('        X ' + '\n        X '.join(f'./{Path(folder).relative_to(OUTPUT_DIR)} {link}' for folder, link in invalid_folders.items()))
+                print()
+                print('    {lightred}Hint:{reset} For more information about the link data directories that were skipped, run:'.format(**ANSI))
+                print('        archivebox status')
+                print('        archivebox list --status=invalid')
 
+        except (KeyboardInterrupt, SystemExit):
+            stderr()
+            stderr('[x] Stopped checking archive directories due to Ctrl-C/SIGTERM', color='red')
+            stderr('    Your archive data is safe, but you should re-run `archivebox init` to finish the process later.')
+            stderr()
+            stderr('    {lightred}Hint:{reset} In the future you can run a quick init without checking dirs like so:'.format(**ANSI))
+            stderr('        archivebox init --quick')
+            raise SystemExit(1)
+        
+        write_main_index(list(pending_links.values()), out_dir=out_dir)
 
-    write_main_index(list(pending_links.values()), out_dir=out_dir)
-
-    print('\n{green}------------------------------------------------------------------{reset}'.format(**ANSI))
+    print('\n{green}----------------------------------------------------------------------{reset}'.format(**ANSI))
     if existing_index:
         print('{green}[√] Done. Verified and updated the existing ArchiveBox collection.{reset}'.format(**ANSI))
     else:
-        print('{green}[√] Done. A new ArchiveBox collection was initialized ({} links).{reset}'.format(len(all_links), **ANSI))
-    print()
-    print('    {lightred}Hint:{reset} To view your archive index, run:'.format(**ANSI))
-    print('        archivebox server  # then visit http://127.0.0.1:8000')
-    print()
-    print('    To add new links, you can run:')
-    print("        archivebox add ~/some/path/or/url/to/list_of_links.txt")
-    print()
-    print('    For more usage and examples, run:')
-    print('        archivebox help')
+        print('{green}[√] Done. A new ArchiveBox collection was initialized ({} links).{reset}'.format(len(all_links) + len(pending_links), **ANSI))
+    
+    if Snapshot.objects.count() < 25:     # hide the hints for experienced users
+        print()
+        print('    {lightred}Hint:{reset} To view your archive index, run:'.format(**ANSI))
+        print('        archivebox server  # then visit http://127.0.0.1:8000')
+        print()
+        print('    To add new links, you can run:')
+        print("        archivebox add ~/some/path/or/url/to/list_of_links.txt")
+        print()
+        print('    For more usage and examples, run:')
+        print('        archivebox help')
 
     json_index = Path(out_dir) / JSON_INDEX_FILENAME
     html_index = Path(out_dir) / HTML_INDEX_FILENAME
@@ -531,6 +561,7 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
 
 @enforce_types
 def add(urls: Union[str, List[str]],
+        tag: str='',
         depth: int=0,
         update_all: bool=not ONLY_NEW,
         index_only: bool=False,
@@ -540,6 +571,8 @@ def add(urls: Union[str, List[str]],
         out_dir: Path=OUTPUT_DIR) -> List[Link]:
     """Add a new URL or list of URLs to your archive"""
 
+    from core.models import Tag
+
     assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
 
     extractors = extractors.split(",") if extractors else []
@@ -572,26 +605,48 @@ def add(urls: Union[str, List[str]],
             new_links_depth += parse_links_from_source(downloaded_file, root_url=new_link.url)
 
     imported_links = list({link.url: link for link in (new_links + new_links_depth)}.values())
+    
     new_links = dedupe_links(all_links, imported_links)
 
     write_main_index(links=new_links, out_dir=out_dir)
     all_links = load_main_index(out_dir=out_dir)
 
     if index_only:
-        return all_links
+        # mock archive all the links using the fake index_only extractor method in order to update their state
+        if overwrite:
+            archive_links(imported_links, overwrite=overwrite, methods=['index_only'], out_dir=out_dir)
+        else:
+            archive_links(new_links, overwrite=False, methods=['index_only'], out_dir=out_dir)
+    else:
+        # fully run the archive extractor methods for each link
+        archive_kwargs = {
+            "out_dir": out_dir,
+        }
+        if extractors:
+            archive_kwargs["methods"] = extractors
+
+        if update_all:
+            archive_links(all_links, overwrite=overwrite, **archive_kwargs)
+        elif overwrite:
+            archive_links(imported_links, overwrite=True, **archive_kwargs)
+        elif new_links:
+            archive_links(new_links, overwrite=False, **archive_kwargs)
+
+
+    # add any tags to imported links
+    tags = [
+        Tag.objects.get_or_create(name=name.strip())[0]
+        for name in tag.split(',')
+        if name.strip()
+    ]
+    if tags:
+        for link in imported_links:
+            snapshot = link.as_snapshot()
+            snapshot.tags.add(*tags)
+            snapshot.tags_str(nocache=True)
+            snapshot.save()
+        # print(f'    √ Tagged {len(imported_links)} Snapshots with {len(tags)} tags {tags_str}')
 
-    # Run the archive methods for each link
-    archive_kwargs = {
-        "out_dir": out_dir,
-    }
-    if extractors:
-        archive_kwargs["methods"] = extractors
-    if update_all:
-        archive_links(all_links, overwrite=overwrite, **archive_kwargs)
-    elif overwrite:
-        archive_links(imported_links, overwrite=True, **archive_kwargs)
-    elif new_links:
-        archive_links(new_links, overwrite=False, **archive_kwargs)
 
     return all_links
 
@@ -811,11 +866,15 @@ def list_links(snapshots: Optional[QuerySet]=None,
         all_snapshots = load_main_index(out_dir=out_dir)
 
     if after is not None:
-        all_snapshots = all_snapshots.filter(timestamp__lt=after)
+        all_snapshots = all_snapshots.filter(timestamp__gte=after)
     if before is not None:
-        all_snapshots = all_snapshots.filter(timestamp__gt=before)
+        all_snapshots = all_snapshots.filter(timestamp__lt=before)
     if filter_patterns:
         all_snapshots = snapshot_filter(all_snapshots, filter_patterns, filter_type)
+
+    if not all_snapshots:
+        stderr('[!] No Snapshots matched your filters:', filter_patterns, f'({filter_type})', color='lightyellow')
+
     return all_snapshots
 
 @enforce_types
@@ -1061,6 +1120,7 @@ def server(runserver_args: Optional[List[str]]=None,
            reload: bool=False,
            debug: bool=False,
            init: bool=False,
+           quick_init: bool=False,
            createsuperuser: bool=False,
            out_dir: Path=OUTPUT_DIR) -> None:
     """Run the ArchiveBox HTTP server"""
@@ -1069,9 +1129,14 @@ def server(runserver_args: Optional[List[str]]=None,
     
     if init:
         run_subcommand('init', stdin=None, pwd=out_dir)
+        print()
+    elif quick_init:
+        run_subcommand('init', subcommand_args=['--quick'], stdin=None, pwd=out_dir)
+        print()
 
     if createsuperuser:
         run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
+        print()
 
     # setup config for django runserver
     from . import config
@@ -1083,12 +1148,9 @@ def server(runserver_args: Optional[List[str]]=None,
     from django.core.management import call_command
     from django.contrib.auth.models import User
 
-    admin_user = User.objects.filter(is_superuser=True).order_by('date_joined').only('username').last()
-
     print('{green}[+] Starting ArchiveBox webserver...{reset}'.format(**ANSI))
-    if admin_user:
-        hint('The admin username is{lightblue} {}{reset}\n'.format(admin_user.username, **ANSI))
-    else:
+    print('    > Logging errors to ./logs/errors.log')
+    if not User.objects.filter(is_superuser=True).exists():
         print('{lightyellow}[!] No admin users exist yet, you will not be able to edit links in the UI.{reset}'.format(**ANSI))
         print()
         print('    To create an admin user, run:')
@@ -1106,7 +1168,6 @@ def server(runserver_args: Optional[List[str]]=None,
     config.SHOW_PROGRESS = False
     config.DEBUG = config.DEBUG or debug
 
-
     call_command("runserver", *runserver_args)
 
 
diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py
index 441c08ac..4af2c5ac 100644
--- a/archivebox/parsers/__init__.py
+++ b/archivebox/parsers/__init__.py
@@ -68,7 +68,6 @@ def parse_links_memory(urls: List[str], root_url: Optional[str]=None):
     """
     parse a list of URLS without touching the filesystem
     """
-    check_url_parsing_invariants()
 
     timer = TimedProgress(TIMEOUT * 4)
     #urls = list(map(lambda x: x + "\n", urls))
@@ -89,8 +88,6 @@ def parse_links(source_file: str, root_url: Optional[str]=None) -> Tuple[List[Li
        RSS feed, bookmarks export, or text file
     """
 
-    check_url_parsing_invariants()
-
     timer = TimedProgress(TIMEOUT * 4)
     with open(source_file, 'r', encoding='utf-8') as file:
         links, parser = run_parser_functions(file, timer, root_url=root_url)
@@ -173,31 +170,48 @@ def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{ba
     return source_path
 
 
-def check_url_parsing_invariants() -> None:
-    """Check that plain text regex URL parsing works as expected"""
-
-    # this is last-line-of-defense to make sure the URL_REGEX isn't
-    # misbehaving, as the consequences could be disastrous and lead to many
-    # incorrect/badly parsed links being added to the archive
-
-    test_urls = '''
-    https://example1.com/what/is/happening.html?what=1#how-about-this=1
-    https://example2.com/what/is/happening/?what=1#how-about-this=1
-    HTtpS://example3.com/what/is/happening/?what=1#how-about-this=1f
-    https://example4.com/what/is/happening.html
-    https://example5.com/
-    https://example6.com
-
-    <test>http://example7.com</test>
-    [https://example8.com/what/is/this.php?what=1]
-    [and http://example9.com?what=1&other=3#and-thing=2]
-    <what>https://example10.com#and-thing=2 "</about>
-    abc<this["https://example11.com/what/is#and-thing=2?whoami=23&where=1"]that>def
-    sdflkf[what](https://example12.com/who/what.php?whoami=1#whatami=2)?am=hi
-    example13.bada
-    and example14.badb
-    <or>htt://example15.badc</that>
-    '''
-    # print('\n'.join(re.findall(URL_REGEX, test_urls)))
-    assert len(re.findall(URL_REGEX, test_urls)) == 12
-
+# Check that plain text regex URL parsing works as expected
+#   this is last-line-of-defense to make sure the URL_REGEX isn't
+#   misbehaving due to some OS-level or environment level quirks (e.g. bad regex lib)
+#   the consequences of bad URL parsing could be disastrous and lead to many
+#   incorrect/badly parsed links being added to the archive, so this is worth the cost of checking
+_test_url_strs = {
+    'example.com': 0,
+    '/example.com': 0,
+    '//example.com': 0,
+    ':/example.com': 0,
+    '://example.com': 0,
+    'htt://example8.com': 0,
+    '/htt://example.com': 0,
+    'https://example': 1,
+    'https://localhost/2345': 1,
+    'https://localhost:1234/123': 1,
+    '://': 0,
+    'https://': 0,
+    'http://': 0,
+    'ftp://': 0,
+    'ftp://example.com': 0,
+    'https://example.com': 1,
+    'https://example.com/': 1,
+    'https://a.example.com': 1,
+    'https://a.example.com/': 1,
+    'https://a.example.com/what/is/happening.html': 1,
+    'https://a.example.com/what/ís/happening.html': 1,
+    'https://a.example.com/what/is/happening.html?what=1&2%20b#höw-about-this=1a': 1,
+    'https://a.example.com/what/is/happéning/?what=1&2%20b#how-aboüt-this=1a': 1,
+    'HTtpS://a.example.com/what/is/happening/?what=1&2%20b#how-about-this=1af&2f%20b': 1,
+    'https://example.com/?what=1#how-about-this=1&2%20baf': 1,
+    'https://example.com?what=1#how-about-this=1&2%20baf': 1,
+    '<test>http://example7.com</test>': 1,
+    '[https://example8.com/what/is/this.php?what=1]': 1,
+    '[and http://example9.com?what=1&other=3#and-thing=2]': 1,
+    '<what>https://example10.com#and-thing=2 "</about>': 1,
+    'abc<this["https://example11.com/what/is#and-thing=2?whoami=23&where=1"]that>def': 1,
+    'sdflkf[what](https://example12.com/who/what.php?whoami=1#whatami=2)?am=hi': 1,
+    '<or>http://examplehttp://15.badc</that>': 2,
+    'https://a.example.com/one.html?url=http://example.com/inside/of/another?=http://': 2,
+    '[https://a.example.com/one.html?url=http://example.com/inside/of/another?=](http://a.example.com)': 3,
+}
+for url_str, num_urls in _test_url_strs.items():
+    assert len(re.findall(URL_REGEX, url_str)) == num_urls, (
+        f'{url_str} does not contain {num_urls} urls')
diff --git a/archivebox/search/utils.py b/archivebox/search/utils.py
index e6d15455..82d1880e 100644
--- a/archivebox/search/utils.py
+++ b/archivebox/search/utils.py
@@ -16,7 +16,7 @@ def get_file_result_content(res, extra_path, use_pwd=False):
     if extra_path:
         fpath = f'{fpath}/{extra_path}'
 
-    with open(fpath, 'r') as file:
+    with open(fpath, 'r', encoding='utf-8') as file:
         data = file.read()
     if data:
         return [data]
diff --git a/archivebox/system.py b/archivebox/system.py
index 2191c70a..3c43eeaf 100644
--- a/archivebox/system.py
+++ b/archivebox/system.py
@@ -10,7 +10,7 @@ from typing import Optional, Union, Set, Tuple
 from subprocess import run as subprocess_run
 
 from crontab import CronTab
-from atomicwrites import atomic_write as lib_atomic_write
+from .vendor.atomicwrites import atomic_write as lib_atomic_write
 
 from .util import enforce_types, ExtendedEncoder
 from .config import OUTPUT_PERMISSIONS
@@ -37,10 +37,11 @@ def atomic_write(path: Union[Path, str], contents: Union[dict, str, bytes], over
     """Safe atomic write to filesystem by writing to temp file + atomic rename"""
 
     mode = 'wb+' if isinstance(contents, bytes) else 'w'
+    encoding = None if isinstance(contents, bytes) else 'utf-8'  # enforce utf-8 on all text writes
 
     # print('\n> Atomic Write:', mode, path, len(contents), f'overwrite={overwrite}')
     try:
-        with lib_atomic_write(path, mode=mode, overwrite=overwrite) as f:
+        with lib_atomic_write(path, mode=mode, overwrite=overwrite, encoding=encoding) as f:
             if isinstance(contents, dict):
                 dump(contents, f, indent=4, sort_keys=True, cls=ExtendedEncoder)
             elif isinstance(contents, (bytes, str)):
diff --git a/archivebox/templates/admin/actions_as_select.html b/archivebox/templates/admin/actions_as_select.html
index 86a77190..e69de29b 100644
--- a/archivebox/templates/admin/actions_as_select.html
+++ b/archivebox/templates/admin/actions_as_select.html
@@ -1 +0,0 @@
-actions_as_select
diff --git a/archivebox/templates/admin/base.html b/archivebox/templates/admin/base.html
index d8ad8d00..a3d21ba9 100644
--- a/archivebox/templates/admin/base.html
+++ b/archivebox/templates/admin/base.html
@@ -20,7 +20,7 @@
 <body class="{% if is_popup %}popup {% endif %}{% block bodyclass %}{% endblock %}"
   data-admin-utc-offset="{% now "Z" %}">
 
-  <style nonce="{{nonce}}">
+  <style>
       /* Loading Progress Bar */
         #progress {
             position: absolute;
@@ -89,7 +89,7 @@
             <a href="{% url 'admin:Add' %}">Add ➕</a> /
             <a href="{% url 'Home' %}">Snapshots</a> /
             <a href="/admin/core/tag/">Tags</a> /
-            <a href="/admin/auth/user/">Users</a> /
+            <a href="/admin/">Admin</a> /
             <a href="{% url 'Docs' %}">Docs</a>
              &nbsp; &nbsp;
             {% block welcome-msg %}
@@ -157,15 +157,15 @@
         function fix_actions() {
             var container = $('div.actions');
 
-            if (container.find('option').length < 10) {
-                container.find('label, button').hide();
+            if (container.find('select[name=action] option').length < 10) {
+                container.find('label:nth-child(1), button[value=0]').hide();
 
                 var buttons = $('<div></div>')
-                    .prependTo(container)
+                    .appendTo(container)
                     .css('display', 'inline')
                     .addClass('class', 'action-buttons');
 
-                container.find('option:gt(0)').reverse().each(function () {
+                container.find('select[name=action] option:gt(0)').reverse().each(function () {
                     const name = this.value
                     $('<button>')
                         .appendTo(buttons)
diff --git a/archivebox/templates/core/add.html b/archivebox/templates/core/add.html
index 0f161885..8123ef7a 100644
--- a/archivebox/templates/core/add.html
+++ b/archivebox/templates/core/add.html
@@ -15,7 +15,7 @@
 {% endblock %}
 
 {% block body %}
-    <div style="max-width: 550px; margin: auto; float: none">
+    <div style="max-width: 1440px; margin: auto; float: none">
         <br/><br/>
         {% if stdout %}
             <h1>Add new URLs to your archive: results</h1>
diff --git a/archivebox/templates/core/base.html b/archivebox/templates/core/base.html
index c6eda60f..e664c22f 100644
--- a/archivebox/templates/core/base.html
+++ b/archivebox/templates/core/base.html
@@ -38,7 +38,7 @@
             <div id="header">
                 <div id="branding">
                     <h1 id="site-name">
-                        <a href="{% url 'public-index' %}" class="header-archivebox" title="Last updated: {{updated}}">
+                        <a href="{% url 'public-index' %}" class="header-archivebox">
                             <img src="{% static 'archive.png' %}" alt="Logo" style="height: 30px"/>
                             ArchiveBox
                         </a>
@@ -70,7 +70,7 @@
                     <center>
                         <small>
                             Archive created using <a href="https://github.com/ArchiveBox/ArchiveBox" title="Github">ArchiveBox</a> version
-                            <a href="https://github.com/ArchiveBox/ArchiveBox/releases" title="Releases">v{{VERSION}}</a>.
+                            <a href="https://github.com/ArchiveBox/ArchiveBox/releases/tag/v{{VERSION}}" title="Releases">v{{VERSION}}</a>.
                             <br/><br/>
                             {{FOOTER_INFO}}
                         </small>
diff --git a/archivebox/templates/core/index_row.html b/archivebox/templates/core/index_row.html
index cba3ec39..ba34a8c6 100644
--- a/archivebox/templates/core/index_row.html
+++ b/archivebox/templates/core/index_row.html
@@ -10,7 +10,7 @@
         {% endif %}
 
         <a href="archive/{{link.timestamp}}/index.html" title="{{link.title|default:'Not yet archived...'}}">
-            <span data-title-for="{{link.url}}" data-archived="{{link.is_archived}}">{{link.title|default:'Loading...'}}</span>
+            <span data-title-for="{{link.url}}" data-archived="{{link.is_archived}}">{{link.title|default:'Loading...'|truncatechars:128}}</span>
             {% if link.tags_str %}
                 <span class="tags" style="float: right; border-radius: 5px; background-color: #bfdfff; padding: 2px 5px; margin-left: 4px; margin-top: 1px;">
                     {% if link.tags_str != None %}
@@ -33,5 +33,5 @@
             {% endif %}
         </span>
     </td>
-   <td style="text-align:left"><a href="{{link.url}}">{{link.url}}</a></td>
+   <td style="text-align:left; word-wrap: anywhere;"><a href="{{link.url}}">{{link.url|truncatechars:128}}</a></td>
 </tr>
diff --git a/archivebox/templates/core/minimal_index.html b/archivebox/templates/core/minimal_index.html
index 3c69a831..f50007a6 100644
--- a/archivebox/templates/core/minimal_index.html
+++ b/archivebox/templates/core/minimal_index.html
@@ -4,7 +4,7 @@
         <title>Archived Sites</title>
         <meta charset="utf-8" name="viewport" content="width=device-width, initial-scale=1">
     </head>
-    <body data-status="{{status}}">
+    <body>
         <table id="table-bookmarks">
             <thead>
                 <tr class="thead-tr">
diff --git a/archivebox/templates/core/public_index.html b/archivebox/templates/core/public_index.html
index 327042ea..c414cbf8 100644
--- a/archivebox/templates/core/public_index.html
+++ b/archivebox/templates/core/public_index.html
@@ -2,6 +2,11 @@
 {% load static %}
 
 {% block body %}
+    <style>
+        #table-bookmarks_info {
+            display: none;
+        }
+    </style>
     <div id="toolbar">
         <form id="changelist-search" action="{% url 'public-index' %}" method="get">
             <div>
@@ -21,7 +26,7 @@
         <thead>
             <tr>
                 <th style="width: 100px;">Bookmarked</th>
-                <th style="width: 26vw;">Snapshot ({{object_list|length}})</th>
+                <th style="width: 26vw;">Snapshot ({{page_obj.paginator.count}})</th>
                 <th style="width: 140px">Files</th>
                 <th style="width: 16vw;whitespace:nowrap;overflow-x:hidden;">Original URL</th>
             </tr>
@@ -33,26 +38,26 @@
             </tbody>
         </table>
         <center>
+            <br/>
+            Showing {{ page_obj.start_index }}-{{ page_obj.end_index }} of {{ page_obj.paginator.count }} total
+            <br/>
             <span class="step-links">
                 {% if page_obj.has_previous %}
-                    <a href="{% url 'public-index' %}?page=1">&laquo; first</a>
+                    <a href="{% url 'public-index' %}?page=1">&laquo; first</a> &nbsp;
                     <a href="{% url 'public-index' %}?page={{ page_obj.previous_page_number }}">previous</a>
+                    &nbsp;
                 {% endif %}
         
                 <span class="current">
-                    Page {{ page_obj.number }} of {{ page_obj.paginator.num_pages }}.
+                    Page {{ page_obj.number }} of {{ page_obj.paginator.num_pages }}
                 </span>
-        
+            
                 {% if page_obj.has_next %}
-                    <a href="{% url 'public-index' %}?page={{ page_obj.next_page_number }}">next </a>
+                    &nbsp;
+                    <a href="{% url 'public-index' %}?page={{ page_obj.next_page_number }}">next </a> &nbsp;
                     <a href="{% url 'public-index' %}?page={{ page_obj.paginator.num_pages }}">last &raquo;</a>
                 {% endif %}
             </span>
-    
-            {% if page_obj.has_next %}
-                <a href="{% url 'public-index' %}?page={{ page_obj.next_page_number }}">next </a>
-                <a href="{% url 'public-index' %}?page={{ page_obj.paginator.num_pages }}">last &raquo;</a>
-            {% endif %}
         </span>
         <br>
     </center>
diff --git a/archivebox/templates/core/snapshot.html b/archivebox/templates/core/snapshot.html
index 839df05c..94849b80 100644
--- a/archivebox/templates/core/snapshot.html
+++ b/archivebox/templates/core/snapshot.html
@@ -279,7 +279,7 @@
                     <div class="col-lg-8">
                         <img src="favicon.ico" onerror="this.style.opacity=0" alt="Favicon">
                         &nbsp;&nbsp;
-                        {{title}}
+                        {{title|safe}}
                         &nbsp;&nbsp;
                         <a href="#" class="header-toggle">▾</a>
                         <br/>
@@ -335,20 +335,21 @@
                     </div>
                     <div class="col-lg-4">
                         <div class="info-chunk">
-                            <h5>🗃 Files</h5>
+                            <h5>🗃 Snapshot ID: <a href="/admin/core/snapshot/{{snapshot_id}}/change/"><code style="color: rgba(255,255,255,0.6); font-weight: 200; font-size: 12px; background-color: #1a1a1a"><b>[{{timestamp}}]</b> <small>{{snapshot_id|truncatechars:24}}</small></code></a></h5>
                             <a href="index.json" title="JSON summary of archived link.">JSON</a> | 
                             <a href="warc/" title="Any WARC archives for the page">WARC</a> | 
                             <a href="media/" title="Audio, Video, and Subtitle files.">Media</a> | 
                             <a href="git/" title="Any git repos at the url">Git</a> | 
-                            <a href="favicon.ico" title="Any git repos at the url">Favicon</a> | 
-                            <a href="." title="Webserver-provided index of files directory.">See all...</a>
+                            <a href="/admin/core/snapshot/?id__startswith={{snapshot_id}}" title="Go to the Snapshot admin to update, overwrite, or delete this Snapshot">Actions</a> | 
+                            <a href="/admin/core/snapshot/{{snapshot_id}}/change/" title="Edit this snapshot in the Admin UI">Admin</a> | 
+                            <a href="." title="Webserver-provided index of files directory.">See all files...</a><br/>
                         </div>
                     </div>
                 </div>
                 <div class="row header-bottom-frames">
                     <div class="col-lg-2">
                         <div class="card selected-card">
-                            <iframe class="card-img-top" src="{{singlefile_path}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
+                            <iframe class="card-img-top" src="{{singlefile_path}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no"></iframe>
                             <div class="card-body">
                                 <a href="{{singlefile_path}}" title="Open in new tab..." target="_blank" rel="noopener">
                                     <p class="card-text"><code>./singlefile.html</code></p>
@@ -381,7 +382,7 @@
                     </div>
                     <div class="col-lg-2">
                         <div class="card">
-                          <iframe class="card-img-top" src="{{archive_url}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
+                          <iframe class="card-img-top" src="{{archive_url}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no"></iframe>
                           <div class="card-body">
                                 <a href="{{archive_url}}" title="Open in new tab..." target="_blank" rel="noopener">
                                     <p class="card-text"><code>./{{domain}}</code></p>
@@ -393,30 +394,30 @@
                     {% if SAVE_ARCHIVE_DOT_ORG %}
                     <div class="col-lg-2">
                         <div class="card">
-                            <iframe class="card-img-top" src="{{archive_org_path}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
+                            <iframe class="card-img-top" src="{{archive_org_path}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no"></iframe>
                             <div class="card-body">
                                 <a href="{{archive_org_path}}" title="Open in new tab..." target="_blank" rel="noopener">
                                     <p class="card-text"><code>🌐 web.archive.org/web/...</code></p>
                                 </a>
-                                <a href="{{archive_org_path}}" target="preview"><h4 class="card-title">Archive.Org</h4></a>
+                                <a href="{{archive_org_path}}" target="preview" id="archive_dot_org-btn"><h4 class="card-title">Archive.Org</h4></a>
                           </div>
                         </div>
                     </div>
                     {% endif %}
                     <div class="col-lg-2">
                         <div class="card">
-                            <iframe class="card-img-top" src="{{url}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
+                            <iframe class="card-img-top" src="{{url}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no"></iframe>
                             <div class="card-body">
                                 <a href="{{url}}" title="Open in new tab..." target="_blank" rel="noopener">
                                     <p class="card-text"><code>🌐 {{domain}}</code></p>
                                 </a>
-                                <a href="{{url}}" target="preview"><h4 class="card-title">Original</h4></a>
+                                <a href="{{url}}" target="preview" id="original-btn"><h4 class="card-title">Original</h4></a>
                           </div>
                         </div>
                     </div>
                     <div class="col-lg-2">
                         <div class="card">
-                            <iframe class="card-img-top" src="{{headers_path}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
+                            <iframe class="card-img-top" src="{{headers_path}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no"></iframe>
                             <div class="card-body">
                                 <a href="{{headers_path}}" title="Open in new tab..." target="_blank" rel="noopener">
                                     <p class="card-text"><code>./headers.json</code></p>
@@ -427,7 +428,7 @@
                     </div>
                     <div class="col-lg-2">
                         <div class="card">
-                            <iframe class="card-img-top" src="{{dom_path}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
+                            <iframe class="card-img-top" src="{{dom_path}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no"></iframe>
                             <div class="card-body">
                                 <a href="{{dom_path}}" title="Open in new tab..." target="_blank" rel="noopener">
                                     <p class="card-text"><code>./output.html</code></p>
@@ -438,7 +439,7 @@
                     </div>
                     <div class="col-lg-2">
                         <div class="card">
-                            <iframe class="card-img-top" src="{{readability_path}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
+                            <iframe class="card-img-top" src="{{readability_path}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no"></iframe>
                             <div class="card-body">
                                 <a href="{{readability_path}}" title="Open in new tab..." target="_blank" rel="noopener">
                                     <p class="card-text"><code>./readability/content.html</code></p>
@@ -450,7 +451,7 @@
                     <br/>
                     <div class="col-lg-2">
                         <div class="card">
-                            <iframe class="card-img-top" src="{{mercury_path}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
+                            <iframe class="card-img-top" src="{{mercury_path}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no"></iframe>
                             <div class="card-body">
                                 <a href="{{mercury_path}}" title="Open in new tab..." target="_blank" rel="noopener">
                                     <p class="card-text"><code>./mercury/content.html</code></p>
@@ -461,7 +462,7 @@
                     </div>
                     <div class="col-lg-2">
                         <div class="card">
-                            <iframe class="card-img-top" src="{{media_path}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
+                            <iframe class="card-img-top" src="{{media_path}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no"></iframe>
                             <div class="card-body">
                                 <a href="{{media_path}}" title="Open in new tab..." target="_blank" rel="noopener">
                                     <p class="card-text"><code>./media/*.mp4</code></p>
@@ -472,7 +473,7 @@
                     </div>
                     <div class="col-lg-2">
                         <div class="card">
-                            <iframe class="card-img-top" src="{{git_path}}" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
+                            <iframe class="card-img-top" src="{{git_path}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no"></iframe>
                             <div class="card-body">
                                 <a href="{{git_path}}" title="Open in new tab..." target="_blank" rel="noopener">
                                     <p class="card-text"><code>./git/*.git</code></p>
@@ -484,7 +485,7 @@
                 </div>
             </div>
         </header>
-        <iframe sandbox="allow-same-origin allow-scripts allow-forms" class="full-page-iframe" src="{{singlefile_path}}" name="preview"></iframe>
+        <iframe sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" class="full-page-iframe" src="{{singlefile_path}}" name="preview"></iframe>
     
         <script
               src="https://code.jquery.com/jquery-3.2.1.slim.min.js"
@@ -493,6 +494,16 @@
         <script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-alpha.6/js/bootstrap.min.js" integrity="sha384-vBWWzlZJ8ea9aCX4pEW3rVHjgjt7zpkNpZk+02D9phzyeVkE+jo0ieGizqPLForn" crossorigin="anonymous"></script>
 
         <script>
+            function getPreviewTypeFromPath(link) {
+                if (link.id == 'original-btn') {
+                    return 'original'
+                }
+                if (link.id == 'archive_dot_org-btn') {
+                    return 'archive_dot_org'
+                }
+                return link.pathname.split('/').filter(a => a.length).slice(-1)[0].toLowerCase()
+            }
+
             // show selected file in iframe when preview card is clicked
             jQuery('.card').on('click', function(e) {
                 jQuery('.selected-card').removeClass('selected-card')
@@ -502,11 +513,26 @@
                 if (e.currentTarget.href.endsWith('.pdf')) {
                     jQuery('.full-page-iframe')[0].removeAttribute('sandbox')
                 } else {
-                    jQuery('.full-page-iframe')[0].sandbox = "allow-same-origin allow-scripts allow-forms"
+                    jQuery('.full-page-iframe')[0].sandbox = "allow-same-origin allow-scripts allow-forms allow-top-navigation-by-user-activation"
                 }
+                window.location.hash = getPreviewTypeFromPath(e.currentTarget)
                 return true
             })
 
+            // check URL for hash e.g. #git and load relevant preview
+            jQuery(document).ready(function() {
+                if (window.location.hash) {
+                    for (const link of jQuery('a[target=preview]')) {
+                        console.log(link.pathname)
+                        if (getPreviewTypeFromPath(link) == window.location.hash.slice(1).toLowerCase()) {
+                            jQuery(link).closest('.card').click()
+                            jQuery(link).click()
+                            link.click()
+                        }
+                    }
+                }
+            })
+
             // un-sandbox iframes showing pdfs (required to display pdf viewer)
             jQuery('iframe').map(function() {
                 if (this.src.endsWith('.pdf')) {
diff --git a/archivebox/templates/core/static_index.html b/archivebox/templates/core/static_index.html
index 07066e27..4e97b83b 100644
--- a/archivebox/templates/core/static_index.html
+++ b/archivebox/templates/core/static_index.html
@@ -209,7 +209,7 @@
             <div class="header-top container-fluid">
                 <div class="row nav">
                     <div class="col-sm-2">
-                        <a href="/" class="header-archivebox" title="Last updated: {{updated}}">
+                        <a href="/" class="header-archivebox">
                             <img src="{% static 'archive.png' %}" alt="Logo"/>
                             ArchiveBox: Index
                         </a>
@@ -243,7 +243,7 @@
             <center>
                 <small>
                     Archive created using <a href="https://github.com/ArchiveBox/ArchiveBox" title="Github">ArchiveBox</a>
-                    version <a href="https://github.com/ArchiveBox/ArchiveBox/tree/v{{version}}" title="Git commit">v{{version}}</a> &nbsp; | &nbsp; 
+                    version <a href="https://github.com/ArchiveBox/ArchiveBox/releases/tag/v{{version}}" title="View source code and release info">v{{version}}</a> &nbsp; | &nbsp; 
                     Download index as <a href="index.json" title="JSON summary of archived links.">JSON</a>
                     <br/><br/>
                     {{FOOTER_INFO}}
diff --git a/archivebox/templates/static/add.css b/archivebox/templates/static/add.css
index 875c61bc..21645e70 100644
--- a/archivebox/templates/static/add.css
+++ b/archivebox/templates/static/add.css
@@ -42,7 +42,7 @@ header {
   background-color: #f5dd5d;
 }
 #stdout {
-  background-color: #ded;
+  background-color: #fbfbfb;
   padding: 10px 10px;
   border-radius: 4px;
   white-space: normal;
diff --git a/archivebox/templates/static/admin.css b/archivebox/templates/static/admin.css
index 142e1b89..1673b593 100644
--- a/archivebox/templates/static/admin.css
+++ b/archivebox/templates/static/admin.css
@@ -237,3 +237,40 @@ body.model-snapshot.change-list #content .object-tools {
     opacity: 0.1;
     filter: grayscale(100%);
 }
+
+
+#result_list tbody td.field-cmd_str pre,
+#result_list tbody td.field-output_str pre {
+    max-width: 22vw;
+    word-wrap: anywhere;
+    white-space: break-spaces;
+    max-height: 40px;
+    overflow: hidden;
+    margin: 2px;
+    background-color: rgba(0,0,0,0.05);
+    padding: 1px 4px 16px 8px;
+    border-radius: 4px;
+}
+
+#result_list tbody td.field-extractor {
+    font-weight: 800;
+    font-variant: small-caps;
+}
+
+#result_list tbody td.field-status {
+    font-variant: small-caps;
+}
+
+.inline-group .tabular td.original p {
+    margin-top: -33px;
+}
+
+tbody .output-link {
+    float: right;
+    margin-bottom: -25px;
+    margin-right: -3px;
+    margin-top: -4px;
+    opacity: 0.4;
+    box-shadow:   4px 4px 4px rgba(0,0,0,0.1);
+}
+tbody .output-link:hover {opacity: 1;}
diff --git a/archivebox/templates/static/favicon.ico b/archivebox/templates/static/favicon.ico
new file mode 100644
index 00000000..e3727006
Binary files /dev/null and b/archivebox/templates/static/favicon.ico differ
diff --git a/archivebox/templates/static/robots.txt b/archivebox/templates/static/robots.txt
new file mode 100644
index 00000000..1f53798b
--- /dev/null
+++ b/archivebox/templates/static/robots.txt
@@ -0,0 +1,2 @@
+User-agent: *
+Disallow: /
diff --git a/archivebox/util.py b/archivebox/util.py
index a96950bb..2f1bb248 100644
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -56,11 +56,13 @@ ts_to_iso = lambda ts: ts and parse_date(ts).isoformat()
 
 
 URL_REGEX = re.compile(
+    r'(?=('
     r'http[s]?://'                    # start matching from allowed schemes
     r'(?:[a-zA-Z]|[0-9]'              # followed by allowed alphanum characters
     r'|[$-_@.&+]|[!*\(\),]'           #    or allowed symbols
     r'|(?:%[0-9a-fA-F][0-9a-fA-F]))'  #    or allowed unicode bytes
-    r'[^\]\[\(\)<>"\'\s]+',         # stop parsing at these symbols
+    r'[^\]\[\(\)<>"\'\s]+'          # stop parsing at these symbols
+    r'))',
     re.IGNORECASE,
 )
 
diff --git a/archivebox/vendor/atomicwrites.py b/archivebox/vendor/atomicwrites.py
new file mode 120000
index 00000000..73abfe4c
--- /dev/null
+++ b/archivebox/vendor/atomicwrites.py
@@ -0,0 +1 @@
+python-atomicwrites/atomicwrites/__init__.py
\ No newline at end of file
diff --git a/archivebox/vendor/python-atomicwrites b/archivebox/vendor/python-atomicwrites
new file mode 160000
index 00000000..c35cd32e
--- /dev/null
+++ b/archivebox/vendor/python-atomicwrites
@@ -0,0 +1 @@
+Subproject commit c35cd32eb364d5a4210e64bf38fd1a55f329f316
diff --git a/bin/docker_entrypoint.sh b/bin/docker_entrypoint.sh
index 65a4c1f6..0d61337b 100755
--- a/bin/docker_entrypoint.sh
+++ b/bin/docker_entrypoint.sh
@@ -3,6 +3,7 @@
 DATA_DIR="${DATA_DIR:-/data}"
 ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}"
 
+
 # Set the archivebox user UID & GID
 if [[ -n "$PUID" && "$PUID" != 0 ]]; then
     usermod -u "$PUID" "$ARCHIVEBOX_USER" > /dev/null 2>&1
@@ -11,6 +12,7 @@ if [[ -n "$PGID" && "$PGID" != 0 ]]; then
     groupmod -g "$PGID" "$ARCHIVEBOX_USER" > /dev/null 2>&1
 fi
 
+
 # Set the permissions of the data dir to match the archivebox user
 if [[ -d "$DATA_DIR/archive" ]]; then
     # check data directory permissions
@@ -21,7 +23,7 @@ if [[ -d "$DATA_DIR/archive" ]]; then
     fi
 else
     # create data directory
-    mkdir -p "$DATA_DIR"
+    mkdir -p "$DATA_DIR/logs"
     chown -R $ARCHIVEBOX_USER:$ARCHIVEBOX_USER "$DATA_DIR"
 fi
 chown $ARCHIVEBOX_USER:$ARCHIVEBOX_USER "$DATA_DIR"
@@ -33,11 +35,11 @@ if [[ "$1" == /* || "$1" == "echo" || "$1" == "archivebox" ]]; then
     # e.g. "archivebox init"
     #      "/bin/bash"
     #      "echo"
-    gosu "$ARCHIVEBOX_USER" bash -c "$*"
+    exec gosu "$ARCHIVEBOX_USER" bash -c "$*"
 else
     # no command given, assume args were meant to be passed to archivebox cmd
     # e.g. "add https://example.com"
     #      "manage createsupseruser"
     #      "server 0.0.0.0:8000"
-    gosu "$ARCHIVEBOX_USER" bash -c "archivebox $*"
+    exec gosu "$ARCHIVEBOX_USER" bash -c "archivebox $*"
 fi
diff --git a/docker-compose.yml b/docker-compose.yml
index 0b4cad24..c6199b0d 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -11,36 +11,39 @@ version: '3.7'
 
 services:
     archivebox:
-        # build: .
+        # build: .                              # for developers working on archivebox
         image: ${DOCKER_IMAGE:-archivebox/archivebox:latest} 
-        command: server 0.0.0.0:8000
+        command: server --quick-init 0.0.0.0:8000
         stdin_open: true
         tty: true
         ports:
             - 8000:8000
         environment:
-            - USE_COLOR=True
-            - SHOW_PROGRESS=False
-            - SEARCH_BACKEND_ENGINE=sonic
-            - SEARCH_BACKEND_HOST_NAME=sonic
-            - SEARCH_BACKEND_PASSWORD=SecretPassword
+            - ALLOWED_HOSTS=*                   # add any config options you want as env vars
+            - MEDIA_MAX_SIZE=750m
+            # - SHOW_PROGRESS=False
+            # - SEARCH_BACKEND_ENGINE=sonic     # uncomment these if you enable sonic below
+            # - SEARCH_BACKEND_HOST_NAME=sonic
+            # - SEARCH_BACKEND_PASSWORD=SecretPassword
         volumes:
             - ./data:/data
-        depends_on:
-            - sonic
+            # - ./archivebox:/app/archivebox    # for developers working on archivebox
     
-    # Run sonic search backend
-    sonic:
-        image: valeriansaliou/sonic:v1.3.0    
-        ports:
-            - 1491:1491
-        environment:
-            - SEARCH_BACKEND_PASSWORD=SecretPassword
-        volumes:
-            - ./etc/sonic/config.cfg:/etc/sonic.cfg
-            - ./data:/var/lib/sonic/store/
+    # To run the Sonic full-text search backend, create an ./etc/sonic folder
+    # and download the sonic config file from here into that folder:
+    # https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/etc/sonic/config.cfg
+    # sonic:
+    #     image: valeriansaliou/sonic:v1.3.0    
+    #     expose:
+    #         - 1491
+    #     environment:
+    #         - SEARCH_BACKEND_PASSWORD=SecretPassword
+    #     volumes:
+    #         - ./etc/sonic/config.cfg:/etc/sonic.cfg
+    #         - ./data/sonic:/var/lib/sonic/store
 
-    # Optional Addons: tweak these examples as needed for your specific use case
+
+    ### Optional Addons: tweak these examples as needed for your specific use case
 
     # Example: Run scheduled imports in a docker instead of using cron on the
     # host machine, add tasks and see more info with archivebox schedule --help
diff --git a/package-lock.json b/package-lock.json
index 9df2c490..05445fa4 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,6 +1,6 @@
 {
 	"name": "archivebox",
-	"version": "0.4.21",
+	"version": "0.6.0",
 	"lockfileVersion": 1,
 	"requires": true,
 	"dependencies": {
@@ -14,9 +14,9 @@
 			}
 		},
 		"@mozilla/readability": {
-			"version": "0.3.0",
-			"resolved": "https://registry.npmjs.org/@mozilla/readability/-/readability-0.3.0.tgz",
-			"integrity": "sha512-q8f1CAZsRKK1j+O0BmikGIlKSK03RpT4woT0PCQwhw0nH0z4+rG026AkxoPcjT7Dsgh1ifGscW8tOpvjoyOjvw=="
+			"version": "0.4.1",
+			"resolved": "https://registry.npmjs.org/@mozilla/readability/-/readability-0.4.1.tgz",
+			"integrity": "sha512-yar/f0w0fRUVM895s6yd5Z2oIxjG/6c3ROB/uQboSOBaDlri/nqI4aKtdqrldWciTLcdpjB2Z6MiVF2Bl9b8LA=="
 		},
 		"@postlight/ci-failed-test-reporter": {
 			"version": "1.0.26",
@@ -116,15 +116,10 @@
 				"safe-buffer": "^5.0.1"
 			}
 		},
-		"@types/color-name": {
-			"version": "1.1.1",
-			"resolved": "https://registry.npmjs.org/@types/color-name/-/color-name-1.1.1.tgz",
-			"integrity": "sha512-rr+OQyAjxze7GgWrSaJwydHStIhHq2lvY3BOC2Mj7KnzI7XK0Uw1TOOdI9lDoajEbSWLiYgoo4f1R51erQfhPQ=="
-		},
 		"@types/node": {
-			"version": "14.11.2",
-			"resolved": "https://registry.npmjs.org/@types/node/-/node-14.11.2.tgz",
-			"integrity": "sha512-jiE3QIxJ8JLNcb1Ps6rDbysDhN4xa8DJJvuC9prr6w+1tIh+QAbYyNF3tyiZNLDBIuBCf4KEcV2UvQm/V60xfA==",
+			"version": "14.14.37",
+			"resolved": "https://registry.npmjs.org/@types/node/-/node-14.14.37.tgz",
+			"integrity": "sha512-XYmBiy+ohOR4Lh5jE379fV2IU+6Jn4g5qASinhitfyO71b/sCo6MKsMLF5tc7Zf2CE8hViVQyYSobJNke8OvUw==",
 			"optional": true
 		},
 		"@types/yauzl": {
@@ -142,9 +137,9 @@
 			"integrity": "sha512-Eu9ELJWCz/c1e9gTiCY+FceWxcqzjYEbqMgtndnuSqZSUCOL73TWNK2mHfIj4Cw2E/ongOp+JISVNCmovt2KYQ=="
 		},
 		"acorn": {
-			"version": "7.4.0",
-			"resolved": "https://registry.npmjs.org/acorn/-/acorn-7.4.0.tgz",
-			"integrity": "sha512-+G7P8jJmCHr+S+cLfQxygbWhXy+8YTVGzAkpEbcLo2mLoL7tij/VG41QSHACSf5QgYRhMZYHuNc6drJaO0Da+w=="
+			"version": "8.1.0",
+			"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.1.0.tgz",
+			"integrity": "sha512-LWCF/Wn0nfHOmJ9rzQApGnxnvgfROzGilS8936rqN/lfcYkY9MYZzdMqN+2NJ4SlTc+m5HiSa+kNfDtI64dwUA=="
 		},
 		"acorn-globals": {
 			"version": "6.0.0",
@@ -153,6 +148,13 @@
 			"requires": {
 				"acorn": "^7.1.1",
 				"acorn-walk": "^7.1.1"
+			},
+			"dependencies": {
+				"acorn": {
+					"version": "7.4.1",
+					"resolved": "https://registry.npmjs.org/acorn/-/acorn-7.4.1.tgz",
+					"integrity": "sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A=="
+				}
 			}
 		},
 		"acorn-walk": {
@@ -182,11 +184,10 @@
 			"integrity": "sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg=="
 		},
 		"ansi-styles": {
-			"version": "4.2.1",
-			"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.2.1.tgz",
-			"integrity": "sha512-9VGjrMsG1vePxcSweQsN20KY/c4zN0h9fLjqAbwbPfahM3t+NL+M9HC8xeXG2I8pX5NoamTGNuomEUFI7fcUjA==",
+			"version": "4.3.0",
+			"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
+			"integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
 			"requires": {
-				"@types/color-name": "^1.1.1",
 				"color-convert": "^2.0.1"
 			}
 		},
@@ -247,9 +248,9 @@
 			}
 		},
 		"bl": {
-			"version": "4.0.3",
-			"resolved": "https://registry.npmjs.org/bl/-/bl-4.0.3.tgz",
-			"integrity": "sha512-fs4G6/Hu4/EE+F75J8DuN/0IpQqNjAdC7aEQv7Qt8MHGUH7Ckv2MwTEEeN9QehD0pfIDkMI1bkHYkKy7xHyKIg==",
+			"version": "4.1.0",
+			"resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz",
+			"integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==",
 			"requires": {
 				"buffer": "^5.5.0",
 				"inherits": "^2.0.4",
@@ -296,12 +297,12 @@
 			}
 		},
 		"buffer": {
-			"version": "5.6.0",
-			"resolved": "https://registry.npmjs.org/buffer/-/buffer-5.6.0.tgz",
-			"integrity": "sha512-/gDYp/UtU0eA1ys8bOs9J6a+E/KWIY+DZ+Q2WESNUA0jFRsJOc0SNUO6xJ5SGA1xueg3NL65W6s+NY5l9cunuw==",
+			"version": "5.7.1",
+			"resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz",
+			"integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==",
 			"requires": {
-				"base64-js": "^1.0.2",
-				"ieee754": "^1.1.4"
+				"base64-js": "^1.3.1",
+				"ieee754": "^1.1.13"
 			}
 		},
 		"buffer-crc32": {
@@ -348,9 +349,9 @@
 			"integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg=="
 		},
 		"cliui": {
-			"version": "7.0.1",
-			"resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.1.tgz",
-			"integrity": "sha512-rcvHOWyGyid6I1WjT/3NatKj2kDt9OdSHSXpyLXaMWFbKpGACNW8pRhhdPUq9MWUOdwn8Rz9AVETjF4105rZZQ==",
+			"version": "7.0.4",
+			"resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz",
+			"integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==",
 			"requires": {
 				"string-width": "^4.2.0",
 				"strip-ansi": "^6.0.0",
@@ -448,9 +449,9 @@
 			}
 		},
 		"debug": {
-			"version": "4.2.0",
-			"resolved": "https://registry.npmjs.org/debug/-/debug-4.2.0.tgz",
-			"integrity": "sha512-IX2ncY78vDTjZMFUdmsvIRFY2Cf4FnD0wRs+nQwJU8Lu99/tPFdb0VybiiMTPe3I6rQmwsqQqRBvxU+bZ/I8sg==",
+			"version": "4.3.1",
+			"resolved": "https://registry.npmjs.org/debug/-/debug-4.3.1.tgz",
+			"integrity": "sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==",
 			"requires": {
 				"ms": "2.1.2"
 			}
@@ -461,9 +462,9 @@
 			"integrity": "sha1-9lNNFRSCabIDUue+4m9QH5oZEpA="
 		},
 		"decimal.js": {
-			"version": "10.2.0",
-			"resolved": "https://registry.npmjs.org/decimal.js/-/decimal.js-10.2.0.tgz",
-			"integrity": "sha512-vDPw+rDgn3bZe1+F/pyEwb1oMG2XTlRVgAa6B4KccTEpYgF8w6eQllVbQcfIJnZyvzFtFpxnpGtx8dd7DJp/Rw=="
+			"version": "10.2.1",
+			"resolved": "https://registry.npmjs.org/decimal.js/-/decimal.js-10.2.1.tgz",
+			"integrity": "sha512-KaL7+6Fw6i5A2XSnsbhm/6B+NuEA7TZ4vqxnd5tXz9sbKtrN9Srj8ab4vKVdK8YAqZO9P1kg45Y6YLoduPf+kw=="
 		},
 		"deep-is": {
 			"version": "0.1.3",
@@ -476,9 +477,9 @@
 			"integrity": "sha1-3zrhmayt+31ECqrgsp4icrJOxhk="
 		},
 		"devtools-protocol": {
-			"version": "0.0.799653",
-			"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.799653.tgz",
-			"integrity": "sha512-t1CcaZbvm8pOlikqrsIM9GOa7Ipp07+4h/q9u0JXBWjPCjHdBl9KkddX87Vv9vBHoBGtwV79sYQNGnQM6iS5gg=="
+			"version": "0.0.818844",
+			"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.818844.tgz",
+			"integrity": "sha512-AD1hi7iVJ8OD0aMLQU5VK0XH9LDlA1+BcPIgrAxPfaibx2DbWucuyOhc4oyQCbnvDDO68nN6/LcKfqTP343Jjg=="
 		},
 		"difflib": {
 			"version": "github:postlight/difflib.js#32e8e38c7fcd935241b9baab71bb432fd9b166ed",
@@ -525,9 +526,9 @@
 			}
 		},
 		"dompurify": {
-			"version": "2.1.0",
-			"resolved": "https://registry.npmjs.org/dompurify/-/dompurify-2.1.0.tgz",
-			"integrity": "sha512-wKExRhOwUnfm1icoISSXnlmM1P2l07W2tFQqbU+8oySnvy7tHwj2iHJ1kJQi8EfcTlojsHKESOJwCGVJmNUdPQ=="
+			"version": "2.2.7",
+			"resolved": "https://registry.npmjs.org/dompurify/-/dompurify-2.2.7.tgz",
+			"integrity": "sha512-jdtDffdGNY+C76jvodNTu9jt5yYj59vuTUyx+wXdzcSwAGTYZDAQkQ7Iwx9zcGrA4ixC1syU4H3RZROqRxokxg=="
 		},
 		"domutils": {
 			"version": "1.5.1",
@@ -576,9 +577,9 @@
 			"integrity": "sha512-f2LZMYl1Fzu7YSBKg+RoROelpOaNrcGmE9AZubeDfrCEia483oW4MI4VyFd5VNHIgQ/7qm1I0wUHK1eJnn2y2w=="
 		},
 		"escalade": {
-			"version": "3.1.0",
-			"resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.0.tgz",
-			"integrity": "sha512-mAk+hPSO8fLDkhV7V0dXazH5pDc6MrjBTPyD3VeKzxnVFjH1MIxbCdqGZB9O8+EwWakZs3ZCbDS4IpRt79V1ig=="
+			"version": "3.1.1",
+			"resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz",
+			"integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw=="
 		},
 		"escodegen": {
 			"version": "1.14.3",
@@ -800,9 +801,9 @@
 			}
 		},
 		"ieee754": {
-			"version": "1.1.13",
-			"resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.1.13.tgz",
-			"integrity": "sha512-4vf7I2LYV/HaWerSo3XmlMkp5eZ83i+/CDluXi/IGTs/O1sejBNhTtnxzmRZfvOUqj7lZjqHkeTvpgSFDlWZTg=="
+			"version": "1.2.1",
+			"resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
+			"integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA=="
 		},
 		"immediate": {
 			"version": "3.0.6",
@@ -823,11 +824,6 @@
 			"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
 			"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
 		},
-		"ip-regex": {
-			"version": "2.1.0",
-			"resolved": "https://registry.npmjs.org/ip-regex/-/ip-regex-2.1.0.tgz",
-			"integrity": "sha1-+ni/XS5pE8kRzp+BnuUUa7bYROk="
-		},
 		"is-fullwidth-code-point": {
 			"version": "3.0.0",
 			"resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
@@ -859,36 +855,60 @@
 			"integrity": "sha1-peZUwuWi3rXyAdls77yoDA7y9RM="
 		},
 		"jsdom": {
-			"version": "16.4.0",
-			"resolved": "https://registry.npmjs.org/jsdom/-/jsdom-16.4.0.tgz",
-			"integrity": "sha512-lYMm3wYdgPhrl7pDcRmvzPhhrGVBeVhPIqeHjzeiHN3DFmD1RBpbExbi8vU7BJdH8VAZYovR8DMt0PNNDM7k8w==",
+			"version": "16.5.2",
+			"resolved": "https://registry.npmjs.org/jsdom/-/jsdom-16.5.2.tgz",
+			"integrity": "sha512-JxNtPt9C1ut85boCbJmffaQ06NBnzkQY/MWO3YxPW8IWS38A26z+B1oBvA9LwKrytewdfymnhi4UNH3/RAgZrg==",
 			"requires": {
-				"abab": "^2.0.3",
-				"acorn": "^7.1.1",
+				"abab": "^2.0.5",
+				"acorn": "^8.1.0",
 				"acorn-globals": "^6.0.0",
 				"cssom": "^0.4.4",
-				"cssstyle": "^2.2.0",
+				"cssstyle": "^2.3.0",
 				"data-urls": "^2.0.0",
-				"decimal.js": "^10.2.0",
+				"decimal.js": "^10.2.1",
 				"domexception": "^2.0.1",
-				"escodegen": "^1.14.1",
+				"escodegen": "^2.0.0",
 				"html-encoding-sniffer": "^2.0.1",
 				"is-potential-custom-element-name": "^1.0.0",
 				"nwsapi": "^2.2.0",
-				"parse5": "5.1.1",
+				"parse5": "6.0.1",
 				"request": "^2.88.2",
-				"request-promise-native": "^1.0.8",
-				"saxes": "^5.0.0",
+				"request-promise-native": "^1.0.9",
+				"saxes": "^5.0.1",
 				"symbol-tree": "^3.2.4",
-				"tough-cookie": "^3.0.1",
+				"tough-cookie": "^4.0.0",
 				"w3c-hr-time": "^1.0.2",
 				"w3c-xmlserializer": "^2.0.0",
 				"webidl-conversions": "^6.1.0",
 				"whatwg-encoding": "^1.0.5",
 				"whatwg-mimetype": "^2.3.0",
-				"whatwg-url": "^8.0.0",
-				"ws": "^7.2.3",
+				"whatwg-url": "^8.5.0",
+				"ws": "^7.4.4",
 				"xml-name-validator": "^3.0.0"
+			},
+			"dependencies": {
+				"abab": {
+					"version": "2.0.5",
+					"resolved": "https://registry.npmjs.org/abab/-/abab-2.0.5.tgz",
+					"integrity": "sha512-9IK9EadsbHo6jLWIpxpR6pL0sazTXV6+SQv25ZB+F7Bj9mJNaOc4nCRabwd5M/JwmUa8idz6Eci6eKfJryPs6Q=="
+				},
+				"escodegen": {
+					"version": "2.0.0",
+					"resolved": "https://registry.npmjs.org/escodegen/-/escodegen-2.0.0.tgz",
+					"integrity": "sha512-mmHKys/C8BFUGI+MAWNcSYoORYLMdPzjrknd2Vc+bUsjN5bXcr8EhrNB+UTqfL1y3I9c4fw2ihgtMPQLBRiQxw==",
+					"requires": {
+						"esprima": "^4.0.1",
+						"estraverse": "^5.2.0",
+						"esutils": "^2.0.2",
+						"optionator": "^0.8.1",
+						"source-map": "~0.6.1"
+					}
+				},
+				"estraverse": {
+					"version": "5.2.0",
+					"resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.2.0.tgz",
+					"integrity": "sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ=="
+				}
 			}
 		},
 		"json-schema": {
@@ -918,9 +938,9 @@
 			}
 		},
 		"jszip": {
-			"version": "3.5.0",
-			"resolved": "https://registry.npmjs.org/jszip/-/jszip-3.5.0.tgz",
-			"integrity": "sha512-WRtu7TPCmYePR1nazfrtuF216cIVon/3GWOvHS9QR5bIwSbnxtdpma6un3jyGGNhHsKCSzn5Ypk+EkDRvTGiFA==",
+			"version": "3.6.0",
+			"resolved": "https://registry.npmjs.org/jszip/-/jszip-3.6.0.tgz",
+			"integrity": "sha512-jgnQoG9LKnWO3mnVNBnfhkh0QknICd1FGSrXcgrl67zioyJ4wgx25o9ZqwNtrROSflGBCGYnJfjrIyRIby1OoQ==",
 			"requires": {
 				"lie": "~3.3.0",
 				"pako": "~1.0.2",
@@ -1174,9 +1194,9 @@
 			"integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw=="
 		},
 		"parse5": {
-			"version": "5.1.1",
-			"resolved": "https://registry.npmjs.org/parse5/-/parse5-5.1.1.tgz",
-			"integrity": "sha512-ugq4DFI0Ptb+WWjAdOK16+u/nHfiIrcE+sh8kZMaM0WllQKLI9rOUq6c2b7cwPkXdzfQESqvoqK6ug7U/Yyzug=="
+			"version": "6.0.1",
+			"resolved": "https://registry.npmjs.org/parse5/-/parse5-6.0.1.tgz",
+			"integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw=="
 		},
 		"path-exists": {
 			"version": "4.0.0",
@@ -1301,14 +1321,15 @@
 			"integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A=="
 		},
 		"puppeteer-core": {
-			"version": "5.3.1",
-			"resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-5.3.1.tgz",
-			"integrity": "sha512-YE6c6FvHAFKQUyNTqFs78SgGmpcqOPhhmVfEVNYB4abv7bV2V+B3r72T3e7vlJkEeTloy4x9bQLrGbHHoKSg1w==",
+			"version": "5.5.0",
+			"resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-5.5.0.tgz",
+			"integrity": "sha512-tlA+1n+ziW/Db03hVV+bAecDKse8ihFRXYiEypBe9IlLRvOCzYFG6qrCMBYK34HO/Q/Ecjc+tvkHRAfLVH+NgQ==",
 			"requires": {
 				"debug": "^4.1.0",
-				"devtools-protocol": "0.0.799653",
+				"devtools-protocol": "0.0.818844",
 				"extract-zip": "^2.0.0",
 				"https-proxy-agent": "^4.0.0",
+				"node-fetch": "^2.6.1",
 				"pkg-dir": "^4.2.0",
 				"progress": "^2.0.1",
 				"proxy-from-env": "^1.0.0",
@@ -1332,9 +1353,9 @@
 			"version": "git+https://github.com/pirate/readability-extractor.git#0098f142b0a015c8c90766d3b74d9eb6fb7b7e6a",
 			"from": "git+https://github.com/pirate/readability-extractor.git",
 			"requires": {
-				"@mozilla/readability": "^0.3.0",
-				"dompurify": "^2.1.0",
-				"jsdom": "^16.4.0"
+				"@mozilla/readability": "^0.4.1",
+				"dompurify": "^2.2.7",
+				"jsdom": "^16.5.2"
 			}
 		},
 		"readable-stream": {
@@ -1571,9 +1592,9 @@
 			"integrity": "sha1-PYRT5ydKLkShQrPchEnftk2a3jo="
 		},
 		"string-width": {
-			"version": "4.2.0",
-			"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.0.tgz",
-			"integrity": "sha512-zUz5JD+tgqtuDjMhwIg5uFVV3dtqZ9yQJlZVfq4I01/K5Paj5UHj7VyrQOJvzawSVlKpObApbfD0Ed6yJc+1eg==",
+			"version": "4.2.2",
+			"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.2.tgz",
+			"integrity": "sha512-XBJbT3N4JhVumXE0eoLU9DCjcaF92KLNqTmFCnG1pf8duUxFGwtP6AD6nkjw9a3IdiRtL3E2w3JDiE/xi3vOeA==",
 			"requires": {
 				"emoji-regex": "^8.0.0",
 				"is-fullwidth-code-point": "^3.0.0",
@@ -1610,20 +1631,20 @@
 			"integrity": "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw=="
 		},
 		"tar-fs": {
-			"version": "2.1.0",
-			"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.0.tgz",
-			"integrity": "sha512-9uW5iDvrIMCVpvasdFHW0wJPez0K4JnMZtsuIeDI7HyMGJNxmDZDOCQROr7lXyS+iL/QMpj07qcjGYTSdRFXUg==",
+			"version": "2.1.1",
+			"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.1.tgz",
+			"integrity": "sha512-V0r2Y9scmbDRLCNex/+hYzvp/zyYjvFbHPNgVTKfQvVrb6guiE/fxP+XblDNR011utopbkex2nM4dHNV6GDsng==",
 			"requires": {
 				"chownr": "^1.1.1",
 				"mkdirp-classic": "^0.5.2",
 				"pump": "^3.0.0",
-				"tar-stream": "^2.0.0"
+				"tar-stream": "^2.1.4"
 			}
 		},
 		"tar-stream": {
-			"version": "2.1.4",
-			"resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.1.4.tgz",
-			"integrity": "sha512-o3pS2zlG4gxr67GmFYBLlq+dM8gyRGUOvsrHclSkvtVtQbjV0s/+ZE8OpICbaj8clrX3tjeHngYGP7rweaBnuw==",
+			"version": "2.2.0",
+			"resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz",
+			"integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==",
 			"requires": {
 				"bl": "^4.0.3",
 				"end-of-stream": "^1.4.1",
@@ -1646,13 +1667,13 @@
 			}
 		},
 		"tough-cookie": {
-			"version": "3.0.1",
-			"resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-3.0.1.tgz",
-			"integrity": "sha512-yQyJ0u4pZsv9D4clxO69OEjLWYw+jbgspjTue4lTQZLfV0c5l1VmK2y1JK8E9ahdpltPOaAThPcp5nKPUgSnsg==",
+			"version": "4.0.0",
+			"resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-4.0.0.tgz",
+			"integrity": "sha512-tHdtEpQCMrc1YLrMaqXXcj6AxhYi/xgit6mZu1+EDWUn+qhUf8wMQoFIy9NXuq23zAwtcB0t/MjACGR18pcRbg==",
 			"requires": {
-				"ip-regex": "^2.1.0",
-				"psl": "^1.1.28",
-				"punycode": "^2.1.1"
+				"psl": "^1.1.33",
+				"punycode": "^2.1.1",
+				"universalify": "^0.1.2"
 			}
 		},
 		"tr46": {
@@ -1863,6 +1884,11 @@
 				"through": "^2.3.8"
 			}
 		},
+		"universalify": {
+			"version": "0.1.2",
+			"resolved": "https://registry.npmjs.org/universalify/-/universalify-0.1.2.tgz",
+			"integrity": "sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg=="
+		},
 		"uri-js": {
 			"version": "4.4.0",
 			"resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.0.tgz",
@@ -1947,11 +1973,11 @@
 			"integrity": "sha512-M4yMwr6mAnQz76TbJm914+gPpB/nCwvZbJU28cUD6dR004SAxDLOOSUaB1JDRqLtaOV/vi0IC5lEAGFgrjGv/g=="
 		},
 		"whatwg-url": {
-			"version": "8.2.2",
-			"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-8.2.2.tgz",
-			"integrity": "sha512-PcVnO6NiewhkmzV0qn7A+UZ9Xx4maNTI+O+TShmfE4pqjoCMwUMjkvoNhNHPTvgR7QH9Xt3R13iHuWy2sToFxQ==",
+			"version": "8.5.0",
+			"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-8.5.0.tgz",
+			"integrity": "sha512-fy+R77xWv0AiqfLl4nuGUlQ3/6b5uNfQ4WAbGQVMYshCTCCPK9psC1nWh3XHuxGVCtlcDDQPQW1csmmIQo+fwg==",
 			"requires": {
-				"lodash.sortby": "^4.7.0",
+				"lodash": "^4.7.0",
 				"tr46": "^2.0.2",
 				"webidl-conversions": "^6.1.0"
 			}
@@ -1977,9 +2003,9 @@
 			"integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8="
 		},
 		"ws": {
-			"version": "7.3.1",
-			"resolved": "https://registry.npmjs.org/ws/-/ws-7.3.1.tgz",
-			"integrity": "sha512-D3RuNkynyHmEJIpD2qrgVkc9DQ23OrN/moAwZX4L8DfvszsJxpjQuUq3LMx6HoYji9fbIOBY18XWBsAux1ZZUA=="
+			"version": "7.4.4",
+			"resolved": "https://registry.npmjs.org/ws/-/ws-7.4.4.tgz",
+			"integrity": "sha512-Qm8k8ojNQIMx7S+Zp8u/uHOx7Qazv3Yv4q68MiWWWOJhiwG5W3x7iqmRtJo8xxrciZUY4vRxUTJCKuRnF28ZZw=="
 		},
 		"wuzzy": {
 			"version": "0.1.6",
@@ -2000,28 +2026,28 @@
 			"integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw=="
 		},
 		"y18n": {
-			"version": "5.0.1",
-			"resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.1.tgz",
-			"integrity": "sha512-/jJ831jEs4vGDbYPQp4yGKDYPSCCEQ45uZWJHE1AoYBzqdZi8+LDWas0z4HrmJXmKdpFsTiowSHXdxyFhpmdMg=="
+			"version": "5.0.5",
+			"resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.5.tgz",
+			"integrity": "sha512-hsRUr4FFrvhhRH12wOdfs38Gy7k2FFzB9qgN9v3aLykRq0dRcdcpz5C9FxdS2NuhOrI/628b/KSTJ3rwHysYSg=="
 		},
 		"yargs": {
-			"version": "16.0.3",
-			"resolved": "https://registry.npmjs.org/yargs/-/yargs-16.0.3.tgz",
-			"integrity": "sha512-6+nLw8xa9uK1BOEOykaiYAJVh6/CjxWXK/q9b5FpRgNslt8s22F2xMBqVIKgCRjNgGvGPBy8Vog7WN7yh4amtA==",
+			"version": "16.2.0",
+			"resolved": "https://registry.npmjs.org/yargs/-/yargs-16.2.0.tgz",
+			"integrity": "sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==",
 			"requires": {
-				"cliui": "^7.0.0",
-				"escalade": "^3.0.2",
+				"cliui": "^7.0.2",
+				"escalade": "^3.1.1",
 				"get-caller-file": "^2.0.5",
 				"require-directory": "^2.1.1",
 				"string-width": "^4.2.0",
-				"y18n": "^5.0.1",
-				"yargs-parser": "^20.0.0"
+				"y18n": "^5.0.5",
+				"yargs-parser": "^20.2.2"
 			}
 		},
 		"yargs-parser": {
-			"version": "20.2.0",
-			"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.0.tgz",
-			"integrity": "sha512-2agPoRFPoIcFzOIp6656gcvsg2ohtscpw2OINr/q46+Sq41xz2OYLqx5HRHabmFU1OARIPAYH5uteICE7mn/5A=="
+			"version": "20.2.7",
+			"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.7.tgz",
+			"integrity": "sha512-FiNkvbeHzB/syOjIUxFDCnhSfzAL8R5vs40MgLFBorXACCOAEaWu0gRZl14vG8MR9AOJIZbmkjhusqBYZ3HTHw=="
 		},
 		"yauzl": {
 			"version": "2.10.0",
diff --git a/package.json b/package.json
index 02247f7d..dbec9c36 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "archivebox",
-  "version": "0.5.6",
+  "version": "0.6.0",
   "description": "ArchiveBox: The self-hosted internet archive",
   "author": "Nick Sweeting <archivebox-npm@sweeting.me>",
   "license": "MIT",
diff --git a/setup.py b/setup.py
index 962db8d8..b4e198d8 100755
--- a/setup.py
+++ b/setup.py
@@ -27,6 +27,48 @@ PACKAGE_DIR = ROOT_DIR / PKG_NAME
 README = (PACKAGE_DIR / "README.md").read_text(encoding='utf-8', errors='ignore')
 VERSION = json.loads((PACKAGE_DIR / "package.json").read_text().strip())['version']
 
+PYTHON_REQUIRES = ">=3.7"
+SETUP_REQUIRES = ["wheel"]
+INSTALL_REQUIRES = [
+    # only add things here that have corresponding apt python3-packages available
+    # anything added here also needs to be added to our package dependencies in
+    # stdeb.cfg (apt), archivebox.rb (brew), Dockerfile, etc.
+    # if there is no apt python3-package equivalent, then vendor it instead in
+    # ./archivebox/vendor/
+    "requests>=2.24.0",
+    "mypy-extensions>=0.4.3",
+    "django>=3.1.3",
+    "django-extensions>=3.0.3",
+    "dateparser",
+    "ipython",
+    "youtube-dl",
+    "python-crontab>=2.5.1",
+    "croniter>=0.3.34",
+    "w3lib>=1.22.0",
+]
+EXTRAS_REQUIRE = {
+    'sonic': [
+        "sonic-client>=0.0.5",
+    ],
+    'dev': [
+        "setuptools",
+        "twine",
+        "wheel",
+        "flake8",
+        "ipdb",
+        "mypy",
+        "django-stubs",
+        "sphinx",
+        "sphinx-rtd-theme",
+        "recommonmark",
+        "pytest",
+        "bottle",
+        "stdeb",
+        "django-debug-toolbar",
+        "djdt_flamegraph",
+    ],
+}
+
 # To see when setup.py gets called (uncomment for debugging):
 # import sys
 # print(PACKAGE_DIR, f"     (v{VERSION})")
@@ -36,7 +78,9 @@ VERSION = json.loads((PACKAGE_DIR / "package.json").read_text().strip())['versio
 class DisabledTestCommand(test):
     def run(self):
         # setup.py test is deprecated, disable it here by force so stdeb doesnt run it
-        print('Use the ./bin/test.sh script to run tests, not setup.py test.')
+        print()
+        print('[X] Running tests via setup.py test is deprecated.')
+        print('    Hint: Use the ./bin/test.sh script or pytest instead')
 
 
 setuptools.setup(
@@ -50,45 +94,10 @@ setuptools.setup(
     long_description_content_type="text/markdown",
     url=REPO_URL,
     project_urls=PROJECT_URLS,
-    python_requires=">=3.7",
-    setup_requires=[
-        "wheel",
-    ],
-    install_requires=[
-        # only add things here that have corresponding apt python3-packages available
-        # anything added here also needs to be added to our package dependencies in
-        # stdeb.cfg (apt), archivebox.rb (brew), Dockerfile, etc.
-        # if there is no apt python3-package equivalent, then vendor it instead in
-        # ./archivebox/vendor/
-        "requests==2.24.0",
-        "atomicwrites==1.4.0",
-        "mypy-extensions==0.4.3",
-        "django==3.1.3",
-        "django-extensions==3.0.3",
-        "dateparser",
-        "ipython",
-        "youtube-dl",
-        "python-crontab==2.5.1",
-        "croniter==0.3.34",
-        "w3lib==1.22.0",
-    ],
-    extras_require={
-        'dev': [
-            "setuptools",
-            "twine",
-            "wheel",
-            "flake8",
-            "ipdb",
-            "mypy",
-            "django-stubs",
-            "sphinx",
-            "sphinx-rtd-theme",
-            "recommonmark",
-            "pytest",
-            "bottle",
-            "stdeb",
-        ],
-    },
+    python_requires=PYTHON_REQUIRES,
+    setup_requires=SETUP_REQUIRES,
+    install_requires=INSTALL_REQUIRES,
+    extras_require=EXTRAS_REQUIRE,
     packages=[PKG_NAME],
     include_package_data=True,   # see MANIFEST.in
     entry_points={
diff --git a/stdeb.cfg b/stdeb.cfg
index cd191a42..1cd19c1b 100644
--- a/stdeb.cfg
+++ b/stdeb.cfg
@@ -5,6 +5,6 @@ Package3: archivebox
 Suite: focal
 Suite3: focal
 Build-Depends: dh-python, python3-pip, python3-setuptools, python3-wheel, python3-stdeb
-Depends3: nodejs, chromium-browser, wget, curl, git, ffmpeg, youtube-dl, python3-atomicwrites, python3-croniter, python3-crontab, python3-dateparser, python3-django, python3-django-extensions, python3-django-jsonfield, python3-mypy-extensions, python3-requests, python3-w3lib, ripgrep
+Depends3: nodejs, chromium-browser, wget, curl, git, ffmpeg, youtube-dl, python3-croniter, python3-crontab, python3-dateparser, python3-django, python3-django-extensions, python3-django-jsonfield, python3-mypy-extensions, python3-requests, python3-w3lib, ripgrep
 XS-Python-Version: >= 3.7
 Setup-Env-Vars: DEB_BUILD_OPTIONS=nocheck
diff --git a/tests/test_add.py b/tests/test_add.py
index bb15e51b..331178fe 100644
--- a/tests/test_add.py
+++ b/tests/test_add.py
@@ -33,7 +33,7 @@ def test_depth_flag_0_crawls_only_the_arg_page(tmp_path, process, disable_extrac
     )
     
     archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
-    with open(archived_item_path / "index.json", "r") as f:
+    with open(archived_item_path / "index.json", "r", encoding='utf-8') as f:
         output_json = json.load(f)
     assert output_json["base_url"] == "127.0.0.1:8080/static/example.com.html"
 
@@ -79,7 +79,7 @@ def test_add_updates_history_json_index(tmp_path, process, disable_extractors_di
 
     archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
 
-    with open(archived_item_path / "index.json", "r") as f:
+    with open(archived_item_path / "index.json", "r", encoding="utf-8") as f:
         output_json = json.load(f)
     assert output_json["history"] != {}
 
@@ -90,4 +90,4 @@ def test_extract_input_uses_only_passed_extractors(tmp_path, process):
     archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
 
     assert (archived_item_path / "warc").exists()
-    assert not (archived_item_path / "singlefile.html").exists()
\ No newline at end of file
+    assert not (archived_item_path / "singlefile.html").exists()
diff --git a/tests/test_extractors.py b/tests/test_extractors.py
index b467f0e1..86b50d51 100644
--- a/tests/test_extractors.py
+++ b/tests/test_extractors.py
@@ -86,7 +86,7 @@ def test_headers_retrieved(tmp_path, process, disable_extractors_dict):
     output_file = archived_item_path / "headers.json"
     assert output_file.exists()
     headers_file = archived_item_path / 'headers.json'
-    with open(headers_file) as f:
+    with open(headers_file, 'r', encoding='utf-8') as f:
         headers = pyjson.load(f)
     assert headers['Content-Language'] == 'en'
     assert headers['Content-Script-Type'] == 'text/javascript'
@@ -98,7 +98,7 @@ def test_headers_redirect_chain(tmp_path, process, disable_extractors_dict):
                                   capture_output=True, env=disable_extractors_dict)
     archived_item_path = list(tmp_path.glob("archive/**/*"))[0]
     output_file = archived_item_path / "headers.json" 
-    with open(output_file) as f:
+    with open(output_file, 'r', encoding='utf-8') as f:
         headers = pyjson.load(f)
     assert headers['Content-Language'] == 'en'
     assert headers['Content-Script-Type'] == 'text/javascript'
@@ -110,6 +110,6 @@ def test_headers_400_plus(tmp_path, process, disable_extractors_dict):
                                   capture_output=True, env=disable_extractors_dict)
     archived_item_path = list(tmp_path.glob("archive/**/*"))[0]
     output_file = archived_item_path / "headers.json" 
-    with open(output_file) as f:
+    with open(output_file, 'r', encoding='utf-8') as f:
         headers = pyjson.load(f)
-    assert headers["Status-Code"] == "200"
\ No newline at end of file
+    assert headers["Status-Code"] == "200"
diff --git a/tests/test_init.py b/tests/test_init.py
index 86c1322d..728aedfb 100644
--- a/tests/test_init.py
+++ b/tests/test_init.py
@@ -12,12 +12,12 @@ from archivebox.config import OUTPUT_PERMISSIONS
 from .fixtures import *
 
 def test_init(tmp_path, process):
-    assert "Initializing a new ArchiveBox collection in this folder..." in process.stdout.decode("utf-8")
+    assert "Initializing a new ArchiveBox" in process.stdout.decode("utf-8")
     
 def test_update(tmp_path, process):
     os.chdir(tmp_path)
     update_process = subprocess.run(['archivebox', 'init'], capture_output=True)
-    assert "Updating existing ArchiveBox collection in this folder" in update_process.stdout.decode("utf-8")
+    assert "updating existing ArchiveBox" in update_process.stdout.decode("utf-8")
 
 def test_add_link(tmp_path, process, disable_extractors_dict):
     disable_extractors_dict.update({"USE_WGET": "true"})
@@ -28,11 +28,11 @@ def test_add_link(tmp_path, process, disable_extractors_dict):
 
     assert "index.json" in [x.name for x in archived_item_path.iterdir()]
 
-    with open(archived_item_path / "index.json", "r") as f:
+    with open(archived_item_path / "index.json", "r", encoding="utf-8") as f:
         output_json = json.load(f)
     assert "Example Domain" == output_json['history']['title'][0]['output']
 
-    with open(archived_item_path / "index.html", "r") as f:
+    with open(archived_item_path / "index.html", "r", encoding="utf-8") as f:
         output_html = f.read()
     assert "Example Domain" in output_html
 
@@ -47,7 +47,7 @@ def test_add_link_support_stdin(tmp_path, process, disable_extractors_dict):
 
     assert "index.json" in [x.name for x in archived_item_path.iterdir()]
 
-    with open(archived_item_path / "index.json", "r") as f:
+    with open(archived_item_path / "index.json", "r", encoding="utf-8") as f:
         output_json = json.load(f)
     assert "Example Domain" == output_json['history']['title'][0]['output']
 
@@ -75,11 +75,11 @@ def test_collision_urls_different_timestamps(tmp_path, process, disable_extracto
     
     first_archive = tmp_path / "archive" / str(min([float(folder) for folder in archive_folders]))
     json_index = str(first_archive / "index.json")
-    with open(json_index, "r") as f:
+    with open(json_index, "r", encoding="utf-8") as f:
         link_details = json.loads(f.read())
 
     link_details["url"] = "http://127.0.0.1:8080/static/iana.org.html"
-    with open(json_index, "w") as f:
+    with open(json_index, "w", encoding="utf-8") as f:
         json.dump(link_details, f)
 
     init_process = subprocess.run(['archivebox', 'init'], capture_output=True, env=disable_extractors_dict)
@@ -98,12 +98,12 @@ def test_collision_timestamps_different_urls(tmp_path, process, disable_extracto
     archive_folders.remove(first_archive.name)
     json_index = str(first_archive / "index.json")
 
-    with open(json_index, "r") as f:
+    with open(json_index, "r", encoding="utf-8") as f:
         link_details = json.loads(f.read())
 
     link_details["timestamp"] = archive_folders[0]
 
-    with open(json_index, "w") as f:
+    with open(json_index, "w", encoding="utf-8") as f:
         json.dump(link_details, f)
 
     init_process = subprocess.run(['archivebox', 'init'], capture_output=True, env=disable_extractors_dict)
@@ -173,4 +173,4 @@ def test_tags_migration(tmp_path, disable_extractors_dict):
         snapshot_id = tag["id"]
         tag_name = tag["name"]
         # Check each tag migrated is in the previous field
-        assert tag_name in snapshots_dict[snapshot_id]
\ No newline at end of file
+        assert tag_name in snapshots_dict[snapshot_id]
diff --git a/tests/test_list.py b/tests/test_list.py
index 75fa0ec8..a99ed645 100644
--- a/tests/test_list.py
+++ b/tests/test_list.py
@@ -50,7 +50,7 @@ def test_list_csv_headers(process, disable_extractors_dict):
 
 def test_list_index_with_wrong_flags(process):
     list_process = subprocess.run(["archivebox", "list", "--with-headers"], capture_output=True)
-    assert "--with-headers can only be used with --json, --html or --csv options." in list_process.stderr.decode("utf-8")
+    assert "--with-headers can only be used with --json, --html or --csv options" in list_process.stderr.decode("utf-8")
 
 def test_link_sort_by_url(process, disable_extractors_dict):
     subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/iana.org.html", "--depth=0"],
@@ -64,4 +64,4 @@ def test_link_sort_by_url(process, disable_extractors_dict):
 
     list_process = subprocess.run(["archivebox", "list", "--sort=url"], capture_output=True)
     link_list = list_process.stdout.decode("utf-8").split("\n")
-    assert "http://127.0.0.1:8080/static/example.com.html" in link_list[0]
\ No newline at end of file
+    assert "http://127.0.0.1:8080/static/example.com.html" in link_list[0]
diff --git a/tests/test_remove.py b/tests/test_remove.py
index c9c63385..76bbc009 100644
--- a/tests/test_remove.py
+++ b/tests/test_remove.py
@@ -100,16 +100,18 @@ def test_remove_before(tmp_path, process, disable_extractors_dict):
 
     conn = sqlite3.connect("index.sqlite3")
     c = conn.cursor()
-    timestamp = c.execute("SELECT timestamp FROM core_snapshot ORDER BY timestamp ASC").fetchall()
+    higherts, lowerts = timestamp = c.execute("SELECT timestamp FROM core_snapshot ORDER BY timestamp DESC").fetchall()
     conn.commit()
     conn.close()
 
-    before = list(map(lambda x: int(x[0].split(".")[0]), timestamp))
+    lowerts = lowerts[0]
+    higherts = higherts[0]
 
-    subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete', '--before', str(before[1])], capture_output=True)
+    # before is less than, so only the lower snapshot gets deleted
+    subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete', '--before', higherts], capture_output=True)
 
-    assert (tmp_path / "archive" / timestamp[0][0]).exists()
-    assert not (tmp_path / "archive" / timestamp[1][0]).exists()
+    assert not (tmp_path / "archive" / lowerts).exists()
+    assert (tmp_path / "archive" / higherts).exists()
 
 def test_remove_after(tmp_path, process, disable_extractors_dict):
     subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
@@ -118,13 +120,15 @@ def test_remove_after(tmp_path, process, disable_extractors_dict):
 
     conn = sqlite3.connect("index.sqlite3")
     c = conn.cursor()
-    timestamp = c.execute("SELECT timestamp FROM core_snapshot ORDER BY timestamp ASC").fetchall()
+    higherts, lowerts = c.execute("SELECT timestamp FROM core_snapshot ORDER BY timestamp DESC").fetchall()
     conn.commit()
     conn.close()
 
-    after = list(map(lambda x: int(x[0].split(".")[0]), timestamp))
+    lowerts = lowerts[0].split(".")[0]
+    higherts = higherts[0].split(".")[0]
 
-    subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete', '--after', str(after[1])], capture_output=True)
+    # after is greater than or equal to, so both snapshots get deleted
+    subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete', '--after', lowerts], capture_output=True)
 
-    assert (tmp_path / "archive" / timestamp[1][0]).exists()
-    assert not (tmp_path / "archive" / timestamp[0][0]).exists()
\ No newline at end of file
+    assert not (tmp_path / "archive" / lowerts).exists()
+    assert not (tmp_path / "archive" / higherts).exists()
diff --git a/uwsgi.ini b/uwsgi.ini
new file mode 100644
index 00000000..9fa83abe
--- /dev/null
+++ b/uwsgi.ini
@@ -0,0 +1,13 @@
+[uwsgi]
+socket = 127.0.0.1:3031
+chdir = ../
+http = 0.0.0.0:8001
+env = OUTPUT_DIR=./data
+wsgi-file = archivebox/core/wsgi.py
+processes = 4
+threads = 1
+stats = 127.0.0.1:9191
+static-map /static=./archivebox/templates/static
+harakiri = 172800
+post-buffering = 1
+disable-logging = True