Merge branch 'dev' into feat/reverse-proxy-auth

This commit is contained in:
Nick Sweeting 2023-01-09 18:20:45 -08:00 committed by GitHub
commit 2538b170c7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
36 changed files with 625 additions and 292 deletions

View file

@ -23,11 +23,12 @@ jobs:
cd brew_dist/ cd brew_dist/
brew install --build-bottle ./archivebox.rb brew install --build-bottle ./archivebox.rb
# brew bottle archivebox # brew bottle archivebox
archivebox version
- name: Add some links to test - name: Add some links to test
run: | run: |
mkdir data && cd data mkdir data && cd data
archivebox init archivebox init --setup
archivebox add 'https://example.com' archivebox add 'https://example.com'
archivebox version archivebox version
archivebox status archivebox status

3
.gitignore vendored
View file

@ -24,3 +24,6 @@ data1/
data2/ data2/
data3/ data3/
output/ output/
# vim
*.sw?

View file

@ -1,13 +1,22 @@
# This is the Dockerfile for ArchiveBox, it bundles the following dependencies: # This is the Dockerfile for ArchiveBox, it bundles the following dependencies:
# python3, ArchiveBox, curl, wget, git, chromium, youtube-dl, single-file # python3, ArchiveBox, curl, wget, git, chromium, youtube-dl, yt-dlp, single-file
# Usage: # Usage:
# git submodule update --init --recursive
# git pull --recurse-submodules
# docker build . -t archivebox --no-cache # docker build . -t archivebox --no-cache
# docker run -v "$PWD/data":/data archivebox init # docker run -v "$PWD/data":/data archivebox init
# docker run -v "$PWD/data":/data archivebox add 'https://example.com' # docker run -v "$PWD/data":/data archivebox add 'https://example.com'
# docker run -v "$PWD/data":/data -it archivebox manage createsuperuser # docker run -v "$PWD/data":/data -it archivebox manage createsuperuser
# docker run -v "$PWD/data":/data -p 8000:8000 archivebox server # docker run -v "$PWD/data":/data -p 8000:8000 archivebox server
# Multi-arch build:
# docker buildx create --use
# docker buildx build . --platform=linux/amd64,linux/arm64,linux/arm/v7 --push -t archivebox/archivebox:latest -t archivebox/archivebox:dev
#
# Read more about [developing
# Archivebox](https://github.com/ArchiveBox/ArchiveBox#archivebox-development).
FROM python:3.9-slim-buster
FROM python:3.10-slim-bullseye
LABEL name="archivebox" \ LABEL name="archivebox" \
maintainer="Nick Sweeting <archivebox-docker@sweeting.me>" \ maintainer="Nick Sweeting <archivebox-docker@sweeting.me>" \
@ -48,11 +57,12 @@ RUN apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \ && apt-get install -qq -y --no-install-recommends \
wget curl chromium git ffmpeg youtube-dl ripgrep \ wget curl chromium git ffmpeg youtube-dl ripgrep \
fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \ fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
&& ln -s /usr/bin/chromium /usr/bin/chromium-browser \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Install Node environment # Install Node environment
RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - \ RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - \
&& echo 'deb https://deb.nodesource.com/node_15.x buster main' >> /etc/apt/sources.list \ && echo 'deb https://deb.nodesource.com/node_17.x buster main' >> /etc/apt/sources.list \
&& apt-get update -qq \ && apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \ && apt-get install -qq -y --no-install-recommends \
nodejs \ nodejs \
@ -80,7 +90,8 @@ RUN apt-get update -qq \
build-essential python-dev python3-dev \ build-essential python-dev python3-dev \
&& echo 'empty placeholder for setup.py to use' > "$CODE_DIR/archivebox/README.md" \ && echo 'empty placeholder for setup.py to use' > "$CODE_DIR/archivebox/README.md" \
&& python3 -c 'from distutils.core import run_setup; result = run_setup("./setup.py", stop_after="init"); print("\n".join(result.install_requires + result.extras_require["sonic"]))' > /tmp/requirements.txt \ && python3 -c 'from distutils.core import run_setup; result = run_setup("./setup.py", stop_after="init"); print("\n".join(result.install_requires + result.extras_require["sonic"]))' > /tmp/requirements.txt \
&& pip install --quiet -r /tmp/requirements.txt \ && pip install -r /tmp/requirements.txt \
&& pip install --upgrade youtube-dl yt-dlp \
&& apt-get purge -y build-essential python-dev python3-dev \ && apt-get purge -y build-essential python-dev python3-dev \
&& apt-get autoremove -y \ && apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
@ -103,13 +114,14 @@ RUN pip install -e .
WORKDIR "$DATA_DIR" WORKDIR "$DATA_DIR"
ENV IN_DOCKER=True \ ENV IN_DOCKER=True \
CHROME_SANDBOX=False \ CHROME_SANDBOX=False \
CHROME_BINARY="chromium" \ CHROME_BINARY="/usr/bin/chromium-browser" \
USE_SINGLEFILE=True \ USE_SINGLEFILE=True \
SINGLEFILE_BINARY="$NODE_DIR/node_modules/.bin/single-file" \ SINGLEFILE_BINARY="$NODE_DIR/node_modules/.bin/single-file" \
USE_READABILITY=True \ USE_READABILITY=True \
READABILITY_BINARY="$NODE_DIR/node_modules/.bin/readability-extractor" \ READABILITY_BINARY="$NODE_DIR/node_modules/.bin/readability-extractor" \
USE_MERCURY=True \ USE_MERCURY=True \
MERCURY_BINARY="$NODE_DIR/node_modules/.bin/mercury-parser" MERCURY_BINARY="$NODE_DIR/node_modules/.bin/mercury-parser" \
YOUTUBEDL_BINARY="yt-dlp"
# Print version for nice docker finish summary # Print version for nice docker finish summary
# RUN archivebox version # RUN archivebox version
@ -119,8 +131,9 @@ RUN /app/bin/docker_entrypoint.sh archivebox version
VOLUME "$DATA_DIR" VOLUME "$DATA_DIR"
EXPOSE 8000 EXPOSE 8000
HEALTHCHECK --interval=30s --timeout=20s --retries=15 \ # Optional:
CMD curl --silent 'http://localhost:8000/admin/login/' || exit 1 # HEALTHCHECK --interval=30s --timeout=20s --retries=15 \
# CMD curl --silent 'http://localhost:8000/admin/login/' || exit 1
ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"] ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"]
CMD ["archivebox", "server", "--quick-init", "0.0.0.0:8000"] CMD ["archivebox", "server", "--quick-init", "0.0.0.0:8000"]

View file

@ -51,10 +51,13 @@ The goal is to sleep soundly knowing the part of the internet you care about wil
<br/> <br/>
**📦&nbsp; Get ArchiveBox with `docker-compose` / `docker` / `apt` / `brew` / `pip3` ([see Quickstart below](#quickstart)).** **📦&nbsp; Get ArchiveBox with Docker / `apt` / `brew` / `pip3` / etc. ([see Quickstart below](#quickstart)).**
```bash ```bash
# Or use this auto setup script to install it for you (optional) # Follow the instructions for your package manager in the quickstart, e.g.:
pip3 install archivebox
# Or use the optional auto setup script to install it for you:
curl -sSL 'https://get.archivebox.io' | sh curl -sSL 'https://get.archivebox.io' | sh
``` ```
@ -81,15 +84,15 @@ ls ./archive/*/index.json # or browse directly via the filesyste
## Key Features ## Key Features
- [**Free & open source**](https://github.com/ArchiveBox/ArchiveBox/blob/master/LICENSE), doesn't require signing up for anything, stores all data locally - [**Free & open source**](https://github.com/ArchiveBox/ArchiveBox/blob/master/LICENSE), doesn't require signing up online, stores all data locally
- [**Powerful, intuitive command line interface**](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#CLI-Usage) with [modular optional dependencies](#dependencies) - [**Powerful, intuitive command line interface**](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#CLI-Usage) with [modular optional dependencies](#dependencies)
- [**Comprehensive documentation**](https://github.com/ArchiveBox/ArchiveBox/wiki), [active development](https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap), and [rich community](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community) - [**Comprehensive documentation**](https://github.com/ArchiveBox/ArchiveBox/wiki), [active development](https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap), and [rich community](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community)
- [**Extracts a wide variety of content out-of-the-box**](https://github.com/ArchiveBox/ArchiveBox/issues/51): [media (youtube-dl), articles (readability), code (git), etc.](#output-formats) - [**Extracts a wide variety of content out-of-the-box**](https://github.com/ArchiveBox/ArchiveBox/issues/51): [media (youtube-dl or yt-dlp), articles (readability), code (git), etc.](#output-formats)
- [**Supports scheduled/realtime importing**](https://github.com/ArchiveBox/ArchiveBox/wiki/Scheduled-Archiving) from [many types of sources](#input-formats) - [**Supports scheduled/realtime importing**](https://github.com/ArchiveBox/ArchiveBox/wiki/Scheduled-Archiving) from [many types of sources](#input-formats)
- [**Uses standard, durable, long-term formats**](#saves-lots-of-useful-stuff-for-each-imported-link) like HTML, JSON, PDF, PNG, and WARC - [**Uses standard, durable, long-term formats**](#saves-lots-of-useful-stuff-for-each-imported-link) like HTML, JSON, PDF, PNG, and WARC
- [**Usable as a oneshot CLI**](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#CLI-Usage), [**self-hosted web UI**](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#UI-Usage), [Python API](https://docs.archivebox.io/en/latest/modules.html) (BETA), [REST API](https://github.com/ArchiveBox/ArchiveBox/issues/496) (ALPHA), or [desktop app](https://github.com/ArchiveBox/electron-archivebox) (ALPHA) - [**Usable as a oneshot CLI**](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#CLI-Usage), [**self-hosted web UI**](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#UI-Usage), [Python API](https://docs.archivebox.io/en/latest/modules.html) (BETA), [REST API](https://github.com/ArchiveBox/ArchiveBox/issues/496) (ALPHA), or [desktop app](https://github.com/ArchiveBox/electron-archivebox) (ALPHA)
- [**Saves all pages to archive.org as well**](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#submit_archive_dot_org) by default for redundancy (can be [disabled](https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#stealth-mode) for local-only mode) - [**Saves all pages to archive.org as well**](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_archive_dot_org) by default for redundancy (can be [disabled](https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#stealth-mode) for local-only mode)
- Planned: support for archiving [content requiring a login/paywall/cookies](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#chrome_user_data_dir) (working, but ill-advised until some pending fixes are released) - Advanced users: support for archiving [content requiring login/paywall/cookies](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#chrome_user_data_dir) (see wiki security caveats!)
- Planned: support for running [JS during archiving](https://github.com/ArchiveBox/ArchiveBox/issues/51) to adblock, [autoscroll](https://github.com/ArchiveBox/ArchiveBox/issues/80), [modal-hide](https://github.com/ArchiveBox/ArchiveBox/issues/175), [thread-expand](https://github.com/ArchiveBox/ArchiveBox/issues/345)... - Planned: support for running [JS during archiving](https://github.com/ArchiveBox/ArchiveBox/issues/51) to adblock, [autoscroll](https://github.com/ArchiveBox/ArchiveBox/issues/80), [modal-hide](https://github.com/ArchiveBox/ArchiveBox/issues/175), [thread-expand](https://github.com/ArchiveBox/ArchiveBox/issues/345)...
<br/><br/> <br/><br/>
@ -165,14 +168,16 @@ See <a href="#%EF%B8%8F-cli-usage">below</a> for more usage examples using the C
</ol> </ol>
See <a href="#%EF%B8%8F-cli-usage">below</a> for more usage examples using the CLI, Web UI, or filesystem/SQL/Python to manage your archive.<br/> See <a href="#%EF%B8%8F-cli-usage">below</a> for more usage examples using the CLI, Web UI, or filesystem/SQL/Python to manage your archive.<br/>
See <a href="https://github.com/ArchiveBox/ArchiveBox/blob/dev/bin/setup.sh"><code>setup.sh</code></a> for the source code of the auto-install script. See <a href="https://github.com/ArchiveBox/ArchiveBox/blob/dev/bin/setup.sh"><code>setup.sh</code></a> for the source code of the auto-install script.<br/>
See <a href="https://docs.sweeting.me/s/against-curl-sh">"Against curl | sh as an install method"</a> blog post for my thoughts on the shortcomings of this install method.
<br/><br/> <br/><br/>
</details> </details>
<br/> <br/>
#### 🛠&nbsp; Manual Setup #### 🛠&nbsp; Package Manager Setup
<a name="Manual-Setup"></a>
<details> <details>
<summary><b><img src="https://user-images.githubusercontent.com/511499/117448075-49597580-af0c-11eb-91ba-f34fff10096b.png" alt="aptitude" height="28px" align="top"/> <code>apt</code></b> (Ubuntu/Debian)</summary> <summary><b><img src="https://user-images.githubusercontent.com/511499/117448075-49597580-af0c-11eb-91ba-f34fff10096b.png" alt="aptitude" height="28px" align="top"/> <code>apt</code></b> (Ubuntu/Debian)</summary>
<br/> <br/>
@ -272,7 +277,7 @@ See the <a href="https://github.com/ArchiveBox/pip-archivebox"><code>pip-archive
<summary><img src="https://user-images.githubusercontent.com/511499/118077361-f0616580-b381-11eb-973c-ee894a3349fb.png" alt="Arch" height="28px" align="top"/> <code>pacman</code> / <img src="https://user-images.githubusercontent.com/511499/118077946-29e6a080-b383-11eb-94f0-d4871da08c3f.png" alt="FreeBSD" height="28px" align="top"/> <code>pkg</code> / <img src="https://user-images.githubusercontent.com/511499/118077861-002d7980-b383-11eb-86a7-5936fad9190f.png" alt="Nix" height="28px" align="top"/> <code>nix</code> (Arch/FreeBSD/NixOS/more)</summary> <summary><img src="https://user-images.githubusercontent.com/511499/118077361-f0616580-b381-11eb-973c-ee894a3349fb.png" alt="Arch" height="28px" align="top"/> <code>pacman</code> / <img src="https://user-images.githubusercontent.com/511499/118077946-29e6a080-b383-11eb-94f0-d4871da08c3f.png" alt="FreeBSD" height="28px" align="top"/> <code>pkg</code> / <img src="https://user-images.githubusercontent.com/511499/118077861-002d7980-b383-11eb-86a7-5936fad9190f.png" alt="Nix" height="28px" align="top"/> <code>nix</code> (Arch/FreeBSD/NixOS/more)</summary>
<br/> <br/>
<ul> <ul>
<li>Arch: <a href="https://aur.archlinux.org/packages/archivebox/"><code>pacman install archivebox</code></a> (contributed by <a href="https://github.com/imlonghao"><code>@imlonghao</code></a>)</li> <li>Arch: <a href="https://aur.archlinux.org/packages/archivebox/"><code>yay -S archivebox</code></a> (contributed by <a href="https://github.com/imlonghao"><code>@imlonghao</code></a>)</li>
<li>FreeBSD: <a href="https://github.com/ArchiveBox/ArchiveBox#%EF%B8%8F-easy-setup"><code>curl -sSL 'https://get.archivebox.io' | sh</code></a> (uses <code>pkg</code> + <code>pip3</code> under-the-hood)</li> <li>FreeBSD: <a href="https://github.com/ArchiveBox/ArchiveBox#%EF%B8%8F-easy-setup"><code>curl -sSL 'https://get.archivebox.io' | sh</code></a> (uses <code>pkg</code> + <code>pip3</code> under-the-hood)</li>
<li>Nix: <a href="https://github.com/NixOS/nixpkgs/blob/master/pkgs/applications/misc/archivebox/default.nix"><code>nix-env --install archivebox</code></a> (contributed by <a href="https://github.com/siraben"><code>@siraben</code></a>)</li> <li>Nix: <a href="https://github.com/NixOS/nixpkgs/blob/master/pkgs/applications/misc/archivebox/default.nix"><code>nix-env --install archivebox</code></a> (contributed by <a href="https://github.com/siraben"><code>@siraben</code></a>)</li>
<li>More: <a href="https://github.com/ArchiveBox/ArchiveBox/issues/new"><i>contribute another distribution...!</i></a></li> <li>More: <a href="https://github.com/ArchiveBox/ArchiveBox/issues/new"><i>contribute another distribution...!</i></a></li>
@ -316,6 +321,7 @@ None of these hosting providers are officially endorsed:<br/>
<sub><i>(most still require manual setup or manual periodic updating using the methods above)</i></sub> <sub><i>(most still require manual setup or manual periodic updating using the methods above)</i></sub>
<br/><br/> <br/><br/>
<li><a href="https://www.stellarhosted.com/archivebox/"><img src="https://img.shields.io/badge/Semi_Managed_Hosting-StellarHosted.com-%23193f7e.svg?style=flat" height="22px"/></a> (USD $29-250/mo, <a href="https://www.stellarhosted.com/archivebox/#pricing">pricing</a>)</li> <li><a href="https://www.stellarhosted.com/archivebox/"><img src="https://img.shields.io/badge/Semi_Managed_Hosting-StellarHosted.com-%23193f7e.svg?style=flat" height="22px"/></a> (USD $29-250/mo, <a href="https://www.stellarhosted.com/archivebox/#pricing">pricing</a>)</li>
<li><a href="https://www.pikapods.com/pods?run=archivebox"><img src="https://img.shields.io/badge/Semi_Managed_Hosting-PikaPods.com-%2343a047.svg?style=flat" height="22px"/></a> (from USD $2.6/mo)</li>
<li><a href="https://m.do.co/c/cbc4c0c17840"> <li><a href="https://m.do.co/c/cbc4c0c17840">
<img src="https://img.shields.io/badge/Unmanaged_VPS-DigitalOcean.com-%232f7cf7.svg?style=flat" height="22px"/> <img src="https://img.shields.io/badge/Unmanaged_VPS-DigitalOcean.com-%232f7cf7.svg?style=flat" height="22px"/>
</a> (USD $5-50+/mo, <a href="https://m.do.co/c/cbc4c0c17840">🎗&nbsp; referral link</a>, <a href="https://www.digitalocean.com/community/tutorials/how-to-install-and-use-docker-compose-on-ubuntu-20-04">instructions</a>)</li> </a> (USD $5-50+/mo, <a href="https://m.do.co/c/cbc4c0c17840">🎗&nbsp; referral link</a>, <a href="https://www.digitalocean.com/community/tutorials/how-to-install-and-use-docker-compose-on-ubuntu-20-04">instructions</a>)</li>
@ -341,7 +347,7 @@ For more discussion on managed and paid hosting options see here: <a href="https
- Import URLs from some of the supported [Input Formats](#input-formats) or view the supported [Output Formats](#output-formats)... - Import URLs from some of the supported [Input Formats](#input-formats) or view the supported [Output Formats](#output-formats)...
- Tweak your UI or archiving behavior [Configuration](#configuration) or read about some of the [Caveats](#caveats) and troubleshooting steps... - Tweak your UI or archiving behavior [Configuration](#configuration) or read about some of the [Caveats](#caveats) and troubleshooting steps...
- Read about the [Dependencies](#dependencies) used for archiving or the [Archive Layout](#archive-layout) on disk... - Read about the [Dependencies](#dependencies) used for archiving, the [Upgrading Process](https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives), or the [Archive Layout](#archive-layout) on disk...
- Or check out our full [Documentation](#documentation) or [Community Wiki](#internet-archiving-ecosystem)... - Or check out our full [Documentation](#documentation) or [Community Wiki](#internet-archiving-ecosystem)...
<br/> <br/>
@ -362,12 +368,12 @@ archivebox help
- `archivebox setup/init/config/status/manage` to administer your collection - `archivebox setup/init/config/status/manage` to administer your collection
- `archivebox add/schedule/remove/update/list/shell/oneshot` to manage Snapshots in the archive - `archivebox add/schedule/remove/update/list/shell/oneshot` to manage Snapshots in the archive
- `archivebox schedule` to pull in fresh URLs in regularly from [boorkmarks/history/Pocket/Pinboard/RSS/etc.](#input-formats) - `archivebox schedule` to pull in fresh URLs in regularly from [bookmarks/history/Pocket/Pinboard/RSS/etc.](#input-formats)
#### 🖥&nbsp; Web UI Usage #### 🖥&nbsp; Web UI Usage
```bash ```bash
archivebox manage createsuperuser archivebox manage createsuperuser # set an admin password
archivebox server 0.0.0.0:8000 # open http://127.0.0.1:8000 to view it archivebox server 0.0.0.0:8000 # open http://127.0.0.1:8000 to view it
# you can also configure whether or not login is required for most features # you can also configure whether or not login is required for most features
@ -419,6 +425,7 @@ ArchiveBox supports many input formats for URLs, including Pocket & Pinboard exp
- <img src="https://nicksweeting.com/images/rss.svg" height="22px"/> TXT, RSS, XML, JSON, CSV, SQL, HTML, Markdown, or [any other text-based format...](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Import-a-list-of-URLs-from-a-text-file) - <img src="https://nicksweeting.com/images/rss.svg" height="22px"/> TXT, RSS, XML, JSON, CSV, SQL, HTML, Markdown, or [any other text-based format...](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Import-a-list-of-URLs-from-a-text-file)
- <img src="https://nicksweeting.com/images/bookmarks.png" height="22px"/> [Browser history](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive) or [browser bookmarks](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive) (see instructions for: [Chrome](https://support.google.com/chrome/answer/96816?hl=en), [Firefox](https://support.mozilla.org/en-US/kb/export-firefox-bookmarks-to-backup-or-transfer), [Safari](http://i.imgur.com/AtcvUZA.png), [IE](https://support.microsoft.com/en-us/help/211089/how-to-import-and-export-the-internet-explorer-favorites-folder-to-a-32-bit-version-of-windows), [Opera](http://help.opera.com/Windows/12.10/en/importexport.html), [and more...](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive)) - <img src="https://nicksweeting.com/images/bookmarks.png" height="22px"/> [Browser history](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive) or [browser bookmarks](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive) (see instructions for: [Chrome](https://support.google.com/chrome/answer/96816?hl=en), [Firefox](https://support.mozilla.org/en-US/kb/export-firefox-bookmarks-to-backup-or-transfer), [Safari](http://i.imgur.com/AtcvUZA.png), [IE](https://support.microsoft.com/en-us/help/211089/how-to-import-and-export-the-internet-explorer-favorites-folder-to-a-32-bit-version-of-windows), [Opera](http://help.opera.com/Windows/12.10/en/importexport.html), [and more...](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive))
- <img src="https://i.imgur.com/AQyHbu8.png" height="22px"/> Browser extension [`archivebox-exporter`](https://github.com/tjhorner/archivebox-exporter) (realtime archiving from Chrome/Chromium/Firefox)
- <img src="https://getpocket.com/favicon.ico" height="22px"/> [Pocket](https://getpocket.com/export), [Pinboard](https://pinboard.in/export/), [Instapaper](https://www.instapaper.com/user/export), [Shaarli](https://shaarli.readthedocs.io/en/master/Usage/#importexport), [Delicious](https://www.groovypost.com/howto/howto/export-delicious-bookmarks-xml/), [Reddit Saved](https://github.com/csu/export-saved-reddit), [Wallabag](https://doc.wallabag.org/en/user/import/wallabagv2.html), [Unmark.it](http://help.unmark.it/import-export), [OneTab](https://www.addictivetips.com/web/onetab-save-close-all-chrome-tabs-to-restore-export-or-import/), [and more...](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive) - <img src="https://getpocket.com/favicon.ico" height="22px"/> [Pocket](https://getpocket.com/export), [Pinboard](https://pinboard.in/export/), [Instapaper](https://www.instapaper.com/user/export), [Shaarli](https://shaarli.readthedocs.io/en/master/Usage/#importexport), [Delicious](https://www.groovypost.com/howto/howto/export-delicious-bookmarks-xml/), [Reddit Saved](https://github.com/csu/export-saved-reddit), [Wallabag](https://doc.wallabag.org/en/user/import/wallabagv2.html), [Unmark.it](http://help.unmark.it/import-export), [OneTab](https://www.addictivetips.com/web/onetab-save-close-all-chrome-tabs-to-restore-export-or-import/), [and more...](https://github.com/ArchiveBox/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive)
<img src="https://i.imgur.com/zM4z1aU.png" width="330px" align="right"> <img src="https://i.imgur.com/zM4z1aU.png" width="330px" align="right">
@ -462,7 +469,7 @@ Inside each Snapshot folder, ArchiveBox save these different types of extractor
- **DOM Dump:** `output.html` DOM Dump of the HTML after rendering using headless chrome - **DOM Dump:** `output.html` DOM Dump of the HTML after rendering using headless chrome
- **Article Text:** `article.html/json` Article text extraction using Readability & Mercury - **Article Text:** `article.html/json` Article text extraction using Readability & Mercury
- **Archive.org Permalink:** `archive.org.txt` A link to the saved site on archive.org - **Archive.org Permalink:** `archive.org.txt` A link to the saved site on archive.org
- **Audio & Video:** `media/` all audio/video files + playlists, including subtitles & metadata with youtube-dl - **Audio & Video:** `media/` all audio/video files + playlists, including subtitles & metadata with youtube-dl (or yt-dlp)
- **Source Code:** `git/` clone of any repository found on GitHub, Bitbucket, or GitLab links - **Source Code:** `git/` clone of any repository found on GitHub, Bitbucket, or GitLab links
- _More coming soon! See the [Roadmap](https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap)..._ - _More coming soon! See the [Roadmap](https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap)..._
@ -522,7 +529,7 @@ To achieve high fidelity archives in as many situations as possible, ArchiveBox
- `node` & `npm` (for readability, mercury, and singlefile) - `node` & `npm` (for readability, mercury, and singlefile)
- `wget` (for plain HTML, static files, and WARC saving) - `wget` (for plain HTML, static files, and WARC saving)
- `curl` (for fetching headers, favicon, and posting to Archive.org) - `curl` (for fetching headers, favicon, and posting to Archive.org)
- `youtube-dl` (for audio, video, and subtitles) - `youtube-dl` or `yt-dlp` (for audio, video, and subtitles)
- `git` (for cloning git repos) - `git` (for cloning git repos)
- and more as we grow... - and more as we grow...
@ -538,8 +545,9 @@ archivebox setup # auto install all the extractors and extras
archivebox --version # see info and check validity of installed dependencies archivebox --version # see info and check validity of installed dependencies
``` ```
Installing directly on **Windows without Docker or WSL/WSL2/Cygwin is not officially supported**, but some advanced users have reported getting it working. Installing directly on **Windows without Docker or WSL/WSL2/Cygwin is not officially supported** (I cannot respond to Windows support tickets), but some advanced users have reported getting it working.
For detailed information about ugprading ArchiveBox and its dependencies, see: https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives
<br/> <br/>
@ -829,6 +837,7 @@ You can also access the docs locally by looking in the [`ArchiveBox/docs/`](http
- [Chromium Install](https://github.com/ArchiveBox/ArchiveBox/wiki/Chromium-Install) - [Chromium Install](https://github.com/ArchiveBox/ArchiveBox/wiki/Chromium-Install)
- [Security Overview](https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview) - [Security Overview](https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview)
- [Troubleshooting](https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting) - [Troubleshooting](https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting)
- [Upgrading or Merging Archives](https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives)
- [Python API](https://docs.archivebox.io/en/latest/modules.html) (alpha) - [Python API](https://docs.archivebox.io/en/latest/modules.html) (alpha)
- [REST API](https://github.com/ArchiveBox/ArchiveBox/issues/496) (alpha) - [REST API](https://github.com/ArchiveBox/ArchiveBox/issues/496) (alpha)
@ -895,7 +904,9 @@ archivebox --version
# if you edit e.g. ./archivebox/core/models.py on the docker host, runserver # if you edit e.g. ./archivebox/core/models.py on the docker host, runserver
# inside the container will reload and pick up your changes # inside the container will reload and pick up your changes
docker build . -t archivebox docker build . -t archivebox
docker run -it archivebox init --setup docker run -it \
-v $PWD/data:/data \
archivebox init --setup
docker run -it -p 8000:8000 \ docker run -it -p 8000:8000 \
-v $PWD/data:/data \ -v $PWD/data:/data \
-v $PWD/archivebox:/app/archivebox \ -v $PWD/archivebox:/app/archivebox \
@ -921,6 +932,8 @@ archivebox config --set DEBUG=True
archivebox server --debug ... archivebox server --debug ...
``` ```
https://stackoverflow.com/questions/1074212/how-can-i-see-the-raw-sql-queries-django-is-running
</details> </details>
#### Install and run a specific GitHub branch #### Install and run a specific GitHub branch
@ -976,6 +989,7 @@ archivebox shell
archivebox manage dbshell archivebox manage dbshell
``` ```
(uses `pytest -s`) (uses `pytest -s`)
https://stackoverflow.com/questions/1074212/how-can-i-see-the-raw-sql-queries-django-is-running
</details> </details>
@ -1067,7 +1081,7 @@ Extractors take the URL of a page to archive, write their output to the filesyst
<img src="https://raw.githubusercontent.com/Monadical-SAS/redux-time/HEAD/examples/static/jeremy.jpg" height="40px"/> <img src="https://raw.githubusercontent.com/Monadical-SAS/redux-time/HEAD/examples/static/jeremy.jpg" height="40px"/>
<br/> <br/>
<i><sub> <i><sub>
This project is maintained mostly in <a href="https://nicksweeting.com/blog#About">my spare time</a> with the help from generous contributors and <a href="https://monadical.com">Monadical</a> (✨ <a href="https://monadical.com">hire them</a> for dev work!). This project is maintained mostly in <a href="https://nicksweeting.com/blog#About">my spare time</a> with the help from generous <a href="https://github.com/ArchiveBox/ArchiveBox/graphs/contributors">contributors</a> and <a href="https://monadical.com">Monadical</a> (✨ <a href="https://monadical.com">hire them</a> for dev work!).
</sub> </sub>
</i> </i>
<br/><br/> <br/><br/>

View file

@ -1 +1,3 @@
production_url: https://archivebox.io
theme: jekyll-theme-merlot theme: jekyll-theme-merlot
# Github Pages static site settings for https://archivebox.io

View file

@ -30,11 +30,17 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
help="Tag the added URLs with the provided tags e.g. --tag=tag1,tag2,tag3", help="Tag the added URLs with the provided tags e.g. --tag=tag1,tag2,tag3",
) )
parser.add_argument( parser.add_argument(
'--update-all', #'-n', '--update', #'-u',
action='store_true', action='store_true',
default=not ONLY_NEW, # when ONLY_NEW=True we skip updating old links default=not ONLY_NEW, # when ONLY_NEW=True we skip updating old links
help="Also retry previously skipped/failed links when adding new links", help="Also retry previously skipped/failed links when adding new links",
) )
parser.add_argument(
'--update-all', #'-n',
action='store_true',
default=False,
help="Also update ALL links in index when finished adding new links",
)
parser.add_argument( parser.add_argument(
'--index-only', #'-o', '--index-only', #'-o',
action='store_true', action='store_true',
@ -104,6 +110,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
urls=stdin_urls or urls, urls=stdin_urls or urls,
depth=command.depth, depth=command.depth,
tag=command.tag, tag=command.tag,
update=command.update,
update_all=command.update_all, update_all=command.update_all,
index_only=command.index_only, index_only=command.index_only,
overwrite=command.overwrite, overwrite=command.overwrite,

View file

@ -51,6 +51,11 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
action='store_true', action='store_true',
help='Re-archive any URLs that have been previously archived, overwriting existing Snapshots', help='Re-archive any URLs that have been previously archived, overwriting existing Snapshots',
) )
parser.add_argument(
'--update',
action='store_true',
help='Re-pull any URLs that have been previously added, as needed to fill missing ArchiveResults',
)
group.add_argument( group.add_argument(
'--clear', # '-c' '--clear', # '-c'
action='store_true', action='store_true',
@ -94,6 +99,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
every=command.every, every=command.every,
depth=command.depth, depth=command.depth,
overwrite=command.overwrite, overwrite=command.overwrite,
update=command.update,
import_path=command.import_path, import_path=command.import_path,
out_dir=pwd or OUTPUT_DIR, out_dir=pwd or OUTPUT_DIR,
) )

View file

@ -26,11 +26,12 @@ import io
import re import re
import sys import sys
import json import json
import inspect
import getpass import getpass
import platform import platform
import shutil import shutil
import sqlite3
import django import django
from sqlite3 import dbapi2 as sqlite3
from hashlib import md5 from hashlib import md5
from pathlib import Path from pathlib import Path
@ -48,6 +49,9 @@ from .config_stubs import (
ConfigDefaultDict, ConfigDefaultDict,
) )
### Pre-Fetch Minimal System Config
SYSTEM_USER = getpass.getuser() or os.getlogin() SYSTEM_USER = getpass.getuser() or os.getlogin()
try: try:
@ -65,6 +69,8 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']}, 'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']},
'SHOW_PROGRESS': {'type': bool, 'default': lambda c: (c['IS_TTY'] and platform.system() != 'Darwin')}, # progress bars are buggy on mac, disable for now 'SHOW_PROGRESS': {'type': bool, 'default': lambda c: (c['IS_TTY'] and platform.system() != 'Darwin')}, # progress bars are buggy on mac, disable for now
'IN_DOCKER': {'type': bool, 'default': False}, 'IN_DOCKER': {'type': bool, 'default': False},
'PUID': {'type': int, 'default': os.getuid()},
'PGID': {'type': int, 'default': os.getgid()},
# TODO: 'SHOW_HINTS': {'type: bool, 'default': True}, # TODO: 'SHOW_HINTS': {'type: bool, 'default': True},
}, },
@ -79,6 +85,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'URL_BLACKLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$'}, # to avoid downloading code assets as their own pages 'URL_BLACKLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$'}, # to avoid downloading code assets as their own pages
'URL_WHITELIST': {'type': str, 'default': None}, 'URL_WHITELIST': {'type': str, 'default': None},
'ENFORCE_ATOMIC_WRITES': {'type': bool, 'default': True}, 'ENFORCE_ATOMIC_WRITES': {'type': bool, 'default': True},
'TAG_SEPARATOR_PATTERN': {'type': str, 'default': r'[,]'},
}, },
'SERVER_CONFIG': { 'SERVER_CONFIG': {
@ -93,9 +100,11 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'SNAPSHOTS_PER_PAGE': {'type': int, 'default': 40}, 'SNAPSHOTS_PER_PAGE': {'type': int, 'default': 40},
'CUSTOM_TEMPLATES_DIR': {'type': str, 'default': None}, 'CUSTOM_TEMPLATES_DIR': {'type': str, 'default': None},
'TIME_ZONE': {'type': str, 'default': 'UTC'}, 'TIME_ZONE': {'type': str, 'default': 'UTC'},
'TIMEZONE': {'type': str, 'default': 'UTC'},
'REVERSE_PROXY_USER_HEADER': {'type': str, 'default': 'Remote-User'}, 'REVERSE_PROXY_USER_HEADER': {'type': str, 'default': 'Remote-User'},
'REVERSE_PROXY_WHITELIST': {'type': str, 'default': ''}, 'REVERSE_PROXY_WHITELIST': {'type': str, 'default': ''},
'LOGOUT_REDIRECT_URL': {'type': str, 'default': '/'}, 'LOGOUT_REDIRECT_URL': {'type': str, 'default': '/'},
'PREVIEW_ORIGINALS': {'type': bool, 'default': True},
}, },
'ARCHIVE_METHOD_TOGGLES': { 'ARCHIVE_METHOD_TOGGLES': {
@ -122,9 +131,9 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'CHECK_SSL_VALIDITY': {'type': bool, 'default': True}, 'CHECK_SSL_VALIDITY': {'type': bool, 'default': True},
'MEDIA_MAX_SIZE': {'type': str, 'default': '750m'}, 'MEDIA_MAX_SIZE': {'type': str, 'default': '750m'},
'CURL_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.61 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) curl/{CURL_VERSION}'}, 'CURL_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/605.1.15 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) curl/{CURL_VERSION}'},
'WGET_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.61 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) wget/{WGET_VERSION}'}, 'WGET_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/605.1.15 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) wget/{WGET_VERSION}'},
'CHROME_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.61 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)'}, 'CHROME_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/605.1.15 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)'},
'COOKIES_FILE': {'type': str, 'default': None}, 'COOKIES_FILE': {'type': str, 'default': None},
'CHROME_USER_DATA_DIR': {'type': str, 'default': None}, 'CHROME_USER_DATA_DIR': {'type': str, 'default': None},
@ -139,10 +148,18 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'--no-call-home', '--no-call-home',
'--write-sub', '--write-sub',
'--all-subs', '--all-subs',
'--write-auto-sub', # There are too many of these and youtube
# throttles you with HTTP error 429
#'--write-auto-subs',
'--convert-subs=srt', '--convert-subs=srt',
'--yes-playlist', '--yes-playlist',
'--continue', '--continue',
# This flag doesn't exist in youtube-dl
# only in yt-dlp
'--no-abort-on-error',
# --ignore-errors must come AFTER
# --no-abort-on-error
# https://github.com/yt-dlp/yt-dlp/issues/4914
'--ignore-errors', '--ignore-errors',
'--geo-bypass', '--geo-bypass',
'--add-metadata', '--add-metadata',
@ -164,6 +181,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'--compressed' '--compressed'
]}, ]},
'GIT_ARGS': {'type': list, 'default': ['--recursive']}, 'GIT_ARGS': {'type': list, 'default': ['--recursive']},
'SINGLEFILE_ARGS': {'type': list, 'default' : None}
}, },
'SEARCH_BACKEND_CONFIG' : { 'SEARCH_BACKEND_CONFIG' : {
@ -197,7 +215,8 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'SINGLEFILE_BINARY': {'type': str, 'default': lambda c: bin_path('single-file')}, 'SINGLEFILE_BINARY': {'type': str, 'default': lambda c: bin_path('single-file')},
'READABILITY_BINARY': {'type': str, 'default': lambda c: bin_path('readability-extractor')}, 'READABILITY_BINARY': {'type': str, 'default': lambda c: bin_path('readability-extractor')},
'MERCURY_BINARY': {'type': str, 'default': lambda c: bin_path('mercury-parser')}, 'MERCURY_BINARY': {'type': str, 'default': lambda c: bin_path('mercury-parser')},
'YOUTUBEDL_BINARY': {'type': str, 'default': 'youtube-dl'}, #'YOUTUBEDL_BINARY': {'type': str, 'default': 'youtube-dl'},
'YOUTUBEDL_BINARY': {'type': str, 'default': 'yt-dlp'},
'NODE_BINARY': {'type': str, 'default': 'node'}, 'NODE_BINARY': {'type': str, 'default': 'node'},
'RIPGREP_BINARY': {'type': str, 'default': 'rg'}, 'RIPGREP_BINARY': {'type': str, 'default': 'rg'},
'CHROME_BINARY': {'type': str, 'default': None}, 'CHROME_BINARY': {'type': str, 'default': None},
@ -321,6 +340,15 @@ ALLOWED_IN_OUTPUT_DIR = {
'static_index.json', 'static_index.json',
} }
def get_version(config):
return json.loads((Path(config['PACKAGE_DIR']) / 'package.json').read_text(encoding='utf-8').strip())['version']
def get_commit_hash(config):
try:
return list((config['PACKAGE_DIR'] / '../.git/refs/heads/').glob('*'))[0].read_text().strip()
except Exception:
return None
############################## Derived Config ################################## ############################## Derived Config ##################################
@ -345,15 +373,21 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
'DIR_OUTPUT_PERMISSIONS': {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')}, 'DIR_OUTPUT_PERMISSIONS': {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')},
'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0] or bin_path('archivebox')}, 'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0] or bin_path('archivebox')},
'VERSION': {'default': lambda c: json.loads((Path(c['PACKAGE_DIR']) / 'package.json').read_text(encoding='utf-8').strip())['version']}, 'VERSION': {'default': lambda c: get_version(c)},
'COMMIT_HASH': {'default': lambda c: get_commit_hash(c)},
'PYTHON_BINARY': {'default': lambda c: sys.executable}, 'PYTHON_BINARY': {'default': lambda c: sys.executable},
'PYTHON_ENCODING': {'default': lambda c: sys.stdout.encoding.upper()}, 'PYTHON_ENCODING': {'default': lambda c: sys.stdout.encoding.upper()},
'PYTHON_VERSION': {'default': lambda c: '{}.{}.{}'.format(*sys.version_info[:3])}, 'PYTHON_VERSION': {'default': lambda c: '{}.{}.{}'.format(*sys.version_info[:3])},
'DJANGO_BINARY': {'default': lambda c: django.__file__.replace('__init__.py', 'bin/django-admin.py')}, 'DJANGO_BINARY': {'default': lambda c: inspect.getfile(django)},
'DJANGO_VERSION': {'default': lambda c: '{}.{}.{} {} ({})'.format(*django.VERSION)}, 'DJANGO_VERSION': {'default': lambda c: '{}.{}.{} {} ({})'.format(*django.VERSION)},
'SQLITE_BINARY': {'default': lambda c: inspect.getfile(sqlite3)},
'SQLITE_VERSION': {'default': lambda c: sqlite3.version},
#'SQLITE_JOURNAL_MODE': {'default': lambda c: 'wal'}, # set at runtime below, interesting but unused for now
#'SQLITE_OPTIONS': {'default': lambda c: ['JSON1']}, # set at runtime below
'USE_CURL': {'default': lambda c: c['USE_CURL'] and (c['SAVE_FAVICON'] or c['SAVE_TITLE'] or c['SAVE_ARCHIVE_DOT_ORG'])}, 'USE_CURL': {'default': lambda c: c['USE_CURL'] and (c['SAVE_FAVICON'] or c['SAVE_TITLE'] or c['SAVE_ARCHIVE_DOT_ORG'])},
'CURL_VERSION': {'default': lambda c: bin_version(c['CURL_BINARY']) if c['USE_CURL'] else None}, 'CURL_VERSION': {'default': lambda c: bin_version(c['CURL_BINARY']) if c['USE_CURL'] else None},
'CURL_USER_AGENT': {'default': lambda c: c['CURL_USER_AGENT'].format(**c)}, 'CURL_USER_AGENT': {'default': lambda c: c['CURL_USER_AGENT'].format(**c)},
@ -373,6 +407,7 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
'USE_SINGLEFILE': {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']}, 'USE_SINGLEFILE': {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']},
'SINGLEFILE_VERSION': {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None}, 'SINGLEFILE_VERSION': {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None},
'SINGLEFILE_ARGS': {'default': lambda c: c['SINGLEFILE_ARGS'] or []},
'USE_READABILITY': {'default': lambda c: c['USE_READABILITY'] and c['SAVE_READABILITY']}, 'USE_READABILITY': {'default': lambda c: c['USE_READABILITY'] and c['SAVE_READABILITY']},
'READABILITY_VERSION': {'default': lambda c: bin_version(c['READABILITY_BINARY']) if c['USE_READABILITY'] else None}, 'READABILITY_VERSION': {'default': lambda c: bin_version(c['READABILITY_BINARY']) if c['USE_READABILITY'] else None},
@ -652,6 +687,8 @@ def bin_version(binary: Optional[str]) -> Optional[str]:
return None return None
try: try:
version_str = run([abspath, "--version"], stdout=PIPE, env={'LANG': 'C'}).stdout.strip().decode()
if not version_str:
version_str = run([abspath, "--version"], stdout=PIPE).stdout.strip().decode() version_str = run([abspath, "--version"], stdout=PIPE).stdout.strip().decode()
# take first 3 columns of first line of version info # take first 3 columns of first line of version info
return ' '.join(version_str.split('\n')[0].strip().split()[:3]) return ' '.join(version_str.split('\n')[0].strip().split()[:3])
@ -795,6 +832,7 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
'path': config['OUTPUT_DIR'].resolve(), 'path': config['OUTPUT_DIR'].resolve(),
'enabled': True, 'enabled': True,
'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(), 'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(),
'is_mount': os.path.ismount(config['OUTPUT_DIR'].resolve()),
}, },
'SOURCES_DIR': { 'SOURCES_DIR': {
'path': config['SOURCES_DIR'].resolve(), 'path': config['SOURCES_DIR'].resolve(),
@ -810,6 +848,7 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
'path': config['ARCHIVE_DIR'].resolve(), 'path': config['ARCHIVE_DIR'].resolve(),
'enabled': True, 'enabled': True,
'is_valid': config['ARCHIVE_DIR'].exists(), 'is_valid': config['ARCHIVE_DIR'].exists(),
'is_mount': os.path.ismount(config['ARCHIVE_DIR'].resolve()),
}, },
'CONFIG_FILE': { 'CONFIG_FILE': {
'path': config['CONFIG_FILE'].resolve(), 'path': config['CONFIG_FILE'].resolve(),
@ -820,18 +859,12 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
'path': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).resolve(), 'path': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).resolve(),
'enabled': True, 'enabled': True,
'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(), 'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(),
'is_mount': os.path.ismount((config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).resolve()),
}, },
} }
def get_dependency_info(config: ConfigDict) -> ConfigValue: def get_dependency_info(config: ConfigDict) -> ConfigValue:
return { return {
'ARCHIVEBOX_BINARY': {
'path': bin_path(config['ARCHIVEBOX_BINARY']),
'version': config['VERSION'],
'hash': bin_hash(config['ARCHIVEBOX_BINARY']),
'enabled': True,
'is_valid': True,
},
'PYTHON_BINARY': { 'PYTHON_BINARY': {
'path': bin_path(config['PYTHON_BINARY']), 'path': bin_path(config['PYTHON_BINARY']),
'version': config['PYTHON_VERSION'], 'version': config['PYTHON_VERSION'],
@ -839,6 +872,13 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
'enabled': True, 'enabled': True,
'is_valid': bool(config['PYTHON_VERSION']), 'is_valid': bool(config['PYTHON_VERSION']),
}, },
'SQLITE_BINARY': {
'path': bin_path(config['SQLITE_BINARY']),
'version': config['SQLITE_VERSION'],
'hash': bin_hash(config['SQLITE_BINARY']),
'enabled': True,
'is_valid': bool(config['SQLITE_VERSION']),
},
'DJANGO_BINARY': { 'DJANGO_BINARY': {
'path': bin_path(config['DJANGO_BINARY']), 'path': bin_path(config['DJANGO_BINARY']),
'version': config['DJANGO_VERSION'], 'version': config['DJANGO_VERSION'],
@ -846,6 +886,14 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
'enabled': True, 'enabled': True,
'is_valid': bool(config['DJANGO_VERSION']), 'is_valid': bool(config['DJANGO_VERSION']),
}, },
'ARCHIVEBOX_BINARY': {
'path': bin_path(config['ARCHIVEBOX_BINARY']),
'version': config['VERSION'],
'hash': bin_hash(config['ARCHIVEBOX_BINARY']),
'enabled': True,
'is_valid': True,
},
'CURL_BINARY': { 'CURL_BINARY': {
'path': bin_path(config['CURL_BINARY']), 'path': bin_path(config['CURL_BINARY']),
'version': config['CURL_VERSION'], 'version': config['CURL_VERSION'],
@ -931,7 +979,7 @@ def get_chrome_info(config: ConfigDict) -> ConfigValue:
'TIMEOUT': config['TIMEOUT'], 'TIMEOUT': config['TIMEOUT'],
'RESOLUTION': config['RESOLUTION'], 'RESOLUTION': config['RESOLUTION'],
'CHECK_SSL_VALIDITY': config['CHECK_SSL_VALIDITY'], 'CHECK_SSL_VALIDITY': config['CHECK_SSL_VALIDITY'],
'CHROME_BINARY': config['CHROME_BINARY'], 'CHROME_BINARY': bin_path(config['CHROME_BINARY']),
'CHROME_HEADLESS': config['CHROME_HEADLESS'], 'CHROME_HEADLESS': config['CHROME_HEADLESS'],
'CHROME_SANDBOX': config['CHROME_SANDBOX'], 'CHROME_SANDBOX': config['CHROME_SANDBOX'],
'CHROME_USER_AGENT': config['CHROME_USER_AGENT'], 'CHROME_USER_AGENT': config['CHROME_USER_AGENT'],
@ -972,13 +1020,22 @@ globals().update(CONFIG)
# Set timezone to UTC and umask to OUTPUT_PERMISSIONS # Set timezone to UTC and umask to OUTPUT_PERMISSIONS
os.environ["TZ"] = 'UTC' assert TIMEZONE == 'UTC', 'The server timezone should always be set to UTC' # we may allow this to change later
os.environ["TZ"] = TIMEZONE
os.umask(0o777 - int(DIR_OUTPUT_PERMISSIONS, base=8)) # noqa: F821 os.umask(0o777 - int(DIR_OUTPUT_PERMISSIONS, base=8)) # noqa: F821
# add ./node_modules/.bin to $PATH so we can use node scripts in extractors # add ./node_modules/.bin to $PATH so we can use node scripts in extractors
NODE_BIN_PATH = str((Path(CONFIG["OUTPUT_DIR"]).absolute() / 'node_modules' / '.bin')) NODE_BIN_PATH = str((Path(CONFIG["OUTPUT_DIR"]).absolute() / 'node_modules' / '.bin'))
sys.path.append(NODE_BIN_PATH) sys.path.append(NODE_BIN_PATH)
# OPTIONAL: also look around the host system for node modules to use
# avoid enabling this unless absolutely needed,
# having overlapping potential sources of libs is a big source of bugs/confusing to users
# DEV_NODE_BIN_PATH = str((Path(CONFIG["PACKAGE_DIR"]).absolute() / '..' / 'node_modules' / '.bin'))
# sys.path.append(DEV_NODE_BIN_PATH)
# USER_NODE_BIN_PATH = str(Path('~/.node_modules/.bin').resolve())
# sys.path.append(USER_NODE_BIN_PATH)
# disable stderr "you really shouldnt disable ssl" warnings with library config # disable stderr "you really shouldnt disable ssl" warnings with library config
if not CONFIG['CHECK_SSL_VALIDITY']: if not CONFIG['CHECK_SSL_VALIDITY']:
import urllib3 import urllib3
@ -986,6 +1043,13 @@ if not CONFIG['CHECK_SSL_VALIDITY']:
requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning) requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# get SQLite database version, compile options, and runtime options
# TODO: make this a less hacky proper assertion checker helper function in somewhere like setup_django
#cursor = sqlite3.connect(':memory:').cursor()
#DYNAMIC_CONFIG_SCHEMA['SQLITE_VERSION'] = lambda c: cursor.execute("SELECT sqlite_version();").fetchone()[0]
#DYNAMIC_CONFIG_SCHEMA['SQLITE_JOURNAL_MODE'] = lambda c: cursor.execute('PRAGMA journal_mode;').fetchone()[0]
#DYNAMIC_CONFIG_SCHEMA['SQLITE_OPTIONS'] = lambda c: [option[0] for option in cursor.execute('PRAGMA compile_options;').fetchall()]
#cursor.close()
########################### Config Validity Checkers ########################### ########################### Config Validity Checkers ###########################
@ -1082,6 +1146,7 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_media') stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_media')
stderr() stderr()
def check_data_folder(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG) -> None: def check_data_folder(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG) -> None:
output_dir = out_dir or config['OUTPUT_DIR'] output_dir = out_dir or config['OUTPUT_DIR']
assert isinstance(output_dir, (str, Path)) assert isinstance(output_dir, (str, Path))
@ -1156,11 +1221,10 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
# without running migrations automatically (user runs them manually by calling init) # without running migrations automatically (user runs them manually by calling init)
django.setup() django.setup()
from django.conf import settings from django.conf import settings
# log startup message to the error log # log startup message to the error log
with open(settings.ERROR_LOG, "a+", encoding='utf-8') as f: with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
command = ' '.join(sys.argv) command = ' '.join(sys.argv)
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S') ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n") f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n")
@ -1170,10 +1234,17 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
# Enable WAL mode in sqlite3 # Enable WAL mode in sqlite3
from django.db import connection from django.db import connection
with connection.cursor() as cursor: with connection.cursor() as cursor:
# Set Journal mode to WAL to allow for multiple writers
current_mode = cursor.execute("PRAGMA journal_mode") current_mode = cursor.execute("PRAGMA journal_mode")
if current_mode != 'wal': if current_mode != 'wal':
cursor.execute("PRAGMA journal_mode=wal;") cursor.execute("PRAGMA journal_mode=wal;")
# Set max blocking delay for concurrent writes and write sync mode
# https://litestream.io/tips/#busy-timeout
cursor.execute("PRAGMA busy_timeout = 5000;")
cursor.execute("PRAGMA synchronous = NORMAL;")
# Create cache table in DB if needed # Create cache table in DB if needed
try: try:
from django.core.cache import cache from django.core.cache import cache
@ -1181,7 +1252,6 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
except django.db.utils.OperationalError: except django.db.utils.OperationalError:
call_command("createcachetable", verbosity=0) call_command("createcachetable", verbosity=0)
# if archivebox gets imported multiple times, we have to close # if archivebox gets imported multiple times, we have to close
# the sqlite3 whenever we init from scratch to avoid multiple threads # the sqlite3 whenever we init from scratch to avoid multiple threads
# sharing the same connection by accident # sharing the same connection by accident

View file

@ -98,6 +98,7 @@ class ConfigDict(BaseConfig, total=False):
WGET_ARGS: List[str] WGET_ARGS: List[str]
CURL_ARGS: List[str] CURL_ARGS: List[str]
GIT_ARGS: List[str] GIT_ARGS: List[str]
TAG_SEPARATOR_PATTERN: str
ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue] ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]

View file

@ -0,0 +1,18 @@
# Generated by Django 3.1.14 on 2022-09-14 09:34
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('core', '0020_auto_20210410_1031'),
]
operations = [
migrations.AlterField(
model_name='archiveresult',
name='extractor',
field=models.CharField(choices=[('favicon', 'favicon'), ('headers', 'headers'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('wget', 'wget'), ('title', 'title'), ('readability', 'readability'), ('mercury', 'mercury'), ('git', 'git'), ('media', 'media'), ('archive_org', 'archive_org')], max_length=32),
),
]

View file

@ -19,7 +19,7 @@ from ..config import (
SQL_INDEX_FILENAME, SQL_INDEX_FILENAME,
OUTPUT_DIR, OUTPUT_DIR,
LOGS_DIR, LOGS_DIR,
TIME_ZONE, TIMEZONE,
) )
IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3] IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3]
@ -157,7 +157,7 @@ DATABASES = {
'timeout': 60, 'timeout': 60,
'check_same_thread': False, 'check_same_thread': False,
}, },
'TIME_ZONE': 'UTC', 'TIME_ZONE': TIMEZONE,
# DB setup is sometimes modified at runtime by setup_django() in config.py # DB setup is sometimes modified at runtime by setup_django() in config.py
} }
} }
@ -227,7 +227,8 @@ USE_L10N = True
USE_TZ = True USE_TZ = True
DATETIME_FORMAT = 'Y-m-d g:iA' DATETIME_FORMAT = 'Y-m-d g:iA'
SHORT_DATETIME_FORMAT = 'Y-m-d h:iA' SHORT_DATETIME_FORMAT = 'Y-m-d h:iA'
TIME_ZONE = TIME_ZONE # noqa TIME_ZONE = TIMEZONE # django convention is TIME_ZONE, archivebox config uses TIMEZONE, they are equivalent
from django.conf.locale.en import formats as en_formats from django.conf.locale.en import formats as en_formats

View file

@ -6,7 +6,7 @@ from django.contrib.staticfiles.urls import staticfiles_urlpatterns
from django.conf import settings from django.conf import settings
from django.views.generic.base import RedirectView from django.views.generic.base import RedirectView
from core.views import HomepageView, SnapshotView, PublicIndexView, AddView from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
# print('DEBUG', settings.DEBUG) # print('DEBUG', settings.DEBUG)
@ -32,6 +32,8 @@ urlpatterns = [
path('accounts/', include('django.contrib.auth.urls')), path('accounts/', include('django.contrib.auth.urls')),
path('admin/', admin.site.urls), path('admin/', admin.site.urls),
path('health/', HealthCheckView.as_view(), name='healthcheck'),
path('index.html', RedirectView.as_view(url='/')), path('index.html', RedirectView.as_view(url='/')),
path('index.json', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'index.json'}), path('index.json', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'index.json'}),
path('', HomepageView.as_view(), name='Home'), path('', HomepageView.as_view(), name='Home'),

View file

@ -295,3 +295,18 @@ class AddView(UserPassesTestMixin, FormView):
"form": AddLinkForm() "form": AddLinkForm()
}) })
return render(template_name=self.template_name, request=self.request, context=context) return render(template_name=self.template_name, request=self.request, context=context)
class HealthCheckView(View):
"""
A Django view that renders plain text "OK" for service discovery tools
"""
def get(self, request):
"""
Handle a GET request
"""
return HttpResponse(
'OK',
content_type='text/plain',
status=200
)

View file

@ -1,12 +1,14 @@
__package__ = 'archivebox.extractors' __package__ = 'archivebox.extractors'
import os import os
import sys
from pathlib import Path from pathlib import Path
from typing import Optional, List, Iterable, Union from typing import Optional, List, Iterable, Union
from datetime import datetime, timezone from datetime import datetime, timezone
from django.db.models import QuerySet from django.db.models import QuerySet
from ..core.settings import ERROR_LOG
from ..index.schema import Link from ..index.schema import Link
from ..index.sql import write_link_to_sql_index from ..index.sql import write_link_to_sql_index
from ..index import ( from ..index import (
@ -42,7 +44,6 @@ from .headers import should_save_headers, save_headers
def get_default_archive_methods(): def get_default_archive_methods():
return [ return [
('title', should_save_title, save_title),
('favicon', should_save_favicon, save_favicon), ('favicon', should_save_favicon, save_favicon),
('headers', should_save_headers, save_headers), ('headers', should_save_headers, save_headers),
('singlefile', should_save_singlefile, save_singlefile), ('singlefile', should_save_singlefile, save_singlefile),
@ -50,7 +51,8 @@ def get_default_archive_methods():
('screenshot', should_save_screenshot, save_screenshot), ('screenshot', should_save_screenshot, save_screenshot),
('dom', should_save_dom, save_dom), ('dom', should_save_dom, save_dom),
('wget', should_save_wget, save_wget), ('wget', should_save_wget, save_wget),
('readability', should_save_readability, save_readability), # keep readability below wget and singlefile, as it depends on them ('title', should_save_title, save_title), # keep title and readability below wget and singlefile, as it depends on them
('readability', should_save_readability, save_readability),
('mercury', should_save_mercury, save_mercury), ('mercury', should_save_mercury, save_mercury),
('git', should_save_git, save_git), ('git', should_save_git, save_git),
('media', should_save_media, save_media), ('media', should_save_media, save_media),
@ -127,10 +129,27 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
# print('{black} X {}{reset}'.format(method_name, **ANSI)) # print('{black} X {}{reset}'.format(method_name, **ANSI))
stats['skipped'] += 1 stats['skipped'] += 1
except Exception as e: except Exception as e:
# Disabled until https://github.com/ArchiveBox/ArchiveBox/issues/984
# and https://github.com/ArchiveBox/ArchiveBox/issues/1014
# are fixed.
"""
raise Exception('Exception in archive_methods.save_{}(Link(url={}))'.format( raise Exception('Exception in archive_methods.save_{}(Link(url={}))'.format(
method_name, method_name,
link.url, link.url,
)) from e )) from e
"""
# Instead, use the kludgy workaround from
# https://github.com/ArchiveBox/ArchiveBox/issues/984#issuecomment-1150541627
with open(ERROR_LOG, "a", encoding='utf-8') as f:
command = ' '.join(sys.argv)
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
f.write(("\n" + 'Exception in archive_methods.save_{}(Link(url={})) command={}; ts={}'.format(
method_name,
link.url,
command,
ts
) + "\n"))
#f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n")
# print(' ', stats) # print(' ', stats)
@ -182,7 +201,7 @@ def archive_links(all_links: Union[Iterable[Link], QuerySet], overwrite: bool=Fa
except KeyboardInterrupt: except KeyboardInterrupt:
log_archiving_paused(num_links, idx, link.timestamp) log_archiving_paused(num_links, idx, link.timestamp)
raise SystemExit(0) raise SystemExit(0)
except BaseException: # lgtm [py/catch-base-exception] except BaseException:
print() print()
raise raise

View file

@ -33,7 +33,7 @@ def should_save_media(link: Link, out_dir: Optional[Path]=None, overwrite: Optio
@enforce_types @enforce_types
def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIMEOUT) -> ArchiveResult: def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIMEOUT) -> ArchiveResult:
"""Download playlists or individual video, audio, and subtitles using youtube-dl""" """Download playlists or individual video, audio, and subtitles using youtube-dl or yt-dlp"""
out_dir = out_dir or Path(link.link_dir) out_dir = out_dir or Path(link.link_dir)
output: ArchiveOutput = 'media' output: ArchiveOutput = 'media'
@ -43,6 +43,7 @@ def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIME
YOUTUBEDL_BINARY, YOUTUBEDL_BINARY,
*YOUTUBEDL_ARGS, *YOUTUBEDL_ARGS,
*([] if CHECK_SSL_VALIDITY else ['--no-check-certificate']), *([] if CHECK_SSL_VALIDITY else ['--no-check-certificate']),
# TODO: add --cookies-from-browser={CHROME_USER_DATA_DIR}
link.url, link.url,
] ]
status = 'succeeded' status = 'succeeded'
@ -60,7 +61,7 @@ def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIME
pass pass
else: else:
hints = ( hints = (
'Got youtube-dl response code: {}.'.format(result.returncode), 'Got youtube-dl (or yt-dlp) response code: {}.'.format(result.returncode),
*result.stderr.decode().split('\n'), *result.stderr.decode().split('\n'),
) )
raise ArchiveError('Failed to save media', hints) raise ArchiveError('Failed to save media', hints)
@ -71,8 +72,18 @@ def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIME
timer.end() timer.end()
# add video description and subtitles to full-text index # add video description and subtitles to full-text index
# Let's try a few different
index_texts = [ index_texts = [
text_file.read_text(encoding='utf-8').strip() # errors:
# * 'strict' to raise a ValueError exception if there is an
# encoding error. The default value of None has the same effect.
# * 'ignore' ignores errors. Note that ignoring encoding errors
# can lead to data loss.
# * 'xmlcharrefreplace' is only supported when writing to a
# file. Characters not supported by the encoding are replaced with
# the appropriate XML character reference &#nnn;.
# There are a few more options described in https://docs.python.org/3/library/functions.html#open
text_file.read_text(encoding='utf-8', errors='xmlcharrefreplace').strip()
for text_file in ( for text_file in (
*output_path.glob('*.description'), *output_path.glob('*.description'),
*output_path.glob('*.srt'), *output_path.glob('*.srt'),

View file

@ -10,9 +10,7 @@ from ..index.schema import Link, ArchiveResult, ArchiveError
from ..system import run, atomic_write from ..system import run, atomic_write
from ..util import ( from ..util import (
enforce_types, enforce_types,
download_url,
is_static_file, is_static_file,
) )
from ..config import ( from ..config import (
TIMEOUT, TIMEOUT,
@ -22,28 +20,8 @@ from ..config import (
READABILITY_VERSION, READABILITY_VERSION,
) )
from ..logging_util import TimedProgress from ..logging_util import TimedProgress
from .title import get_html
@enforce_types
def get_html(link: Link, path: Path) -> str:
"""
Try to find wget, singlefile and then dom files.
If none is found, download the url again.
"""
canonical = link.canonical_outputs()
abs_path = path.absolute()
sources = [canonical["singlefile_path"], canonical["wget_path"], canonical["dom_path"]]
document = None
for source in sources:
try:
with open(abs_path / source, "r", encoding="utf-8") as f:
document = f.read()
break
except (FileNotFoundError, TypeError):
continue
if document is None:
return download_url(link.url)
else:
return document
@enforce_types @enforce_types
def should_save_readability(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool: def should_save_readability(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:

View file

@ -17,6 +17,7 @@ from ..config import (
SAVE_SINGLEFILE, SAVE_SINGLEFILE,
DEPENDENCIES, DEPENDENCIES,
SINGLEFILE_VERSION, SINGLEFILE_VERSION,
SINGLEFILE_ARGS,
CHROME_BINARY, CHROME_BINARY,
) )
from ..logging_util import TimedProgress from ..logging_util import TimedProgress
@ -45,10 +46,31 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
# SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli # SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli
browser_args = '--browser-args={}'.format(json.dumps(browser_args[1:])) browser_args = '--browser-args={}'.format(json.dumps(browser_args[1:]))
cmd = [ options = [
DEPENDENCIES['SINGLEFILE_BINARY']['path'], *SINGLEFILE_ARGS,
'--browser-executable-path={}'.format(CHROME_BINARY), '--browser-executable-path={}'.format(CHROME_BINARY),
browser_args, browser_args,
]
# Deduplicate options (single-file doesn't like when you use the same option two times)
#
# NOTE: Options names that come first clobber conflicting names that come later
# My logic is SINGLEFILE_ARGS is the option that affects the singlefile command with most
# specificity, therefore the user sets it with a lot intent, therefore it should take precedence
# kind of like the ergonomic principle of lexical scope in programming languages.
seen_option_names = []
def test_seen(argument):
option_name = argument.split("=")[0]
if option_name in seen_option_names:
return False
else:
seen_option_names.append(option_name)
return True
deduped_options = list(filter(test_seen, options))
cmd = [
DEPENDENCIES['SINGLEFILE_BINARY']['path'],
*deduped_options,
link.url, link.url,
output, output,
] ]

View file

@ -58,6 +58,27 @@ class TitleParser(HTMLParser):
if tag.lower() == "title": if tag.lower() == "title":
self.inside_title_tag = False self.inside_title_tag = False
@enforce_types
def get_html(link: Link, path: Path, timeout: int=TIMEOUT) -> str:
"""
Try to find wget, singlefile and then dom files.
If none is found, download the url again.
"""
canonical = link.canonical_outputs()
abs_path = path.absolute()
sources = [canonical["singlefile_path"], canonical["wget_path"], canonical["dom_path"]]
document = None
for source in sources:
try:
with open(abs_path / source, "r", encoding="utf-8") as f:
document = f.read()
break
except (FileNotFoundError, TypeError):
continue
if document is None:
return download_url(link.url, timeout=timeout)
else:
return document
@enforce_types @enforce_types
def should_save_title(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool: def should_save_title(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
@ -90,7 +111,7 @@ def save_title(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -
status = 'succeeded' status = 'succeeded'
timer = TimedProgress(timeout, prefix=' ') timer = TimedProgress(timeout, prefix=' ')
try: try:
html = download_url(link.url, timeout=timeout) html = get_html(link, out_dir, timeout=timeout)
try: try:
# try using relatively strict html parser first # try using relatively strict html parser first
parser = TitleParser() parser = TitleParser()

View file

@ -24,6 +24,7 @@ from ..config import (
FOOTER_INFO, FOOTER_INFO,
HTML_INDEX_FILENAME, HTML_INDEX_FILENAME,
SAVE_ARCHIVE_DOT_ORG, SAVE_ARCHIVE_DOT_ORG,
PREVIEW_ORIGINALS,
) )
MAIN_INDEX_TEMPLATE = 'static_index.html' MAIN_INDEX_TEMPLATE = 'static_index.html'
@ -105,6 +106,7 @@ def link_details_template(link: Link) -> str:
'status_color': 'success' if link.is_archived else 'danger', 'status_color': 'success' if link.is_archived else 'danger',
'oldest_archive_date': ts_to_date_str(link.oldest_archive_date), 'oldest_archive_date': ts_to_date_str(link.oldest_archive_date),
'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG, 'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG,
'PREVIEW_ORIGINALS': PREVIEW_ORIGINALS,
}) })
@enforce_types @enforce_types

View file

@ -1,5 +1,7 @@
__package__ = 'archivebox.index' __package__ = 'archivebox.index'
import re
from io import StringIO from io import StringIO
from pathlib import Path from pathlib import Path
from typing import List, Tuple, Iterator from typing import List, Tuple, Iterator
@ -8,7 +10,10 @@ from django.db import transaction
from .schema import Link from .schema import Link
from ..util import enforce_types, parse_date from ..util import enforce_types, parse_date
from ..config import OUTPUT_DIR from ..config import (
OUTPUT_DIR,
TAG_SEPARATOR_PATTERN,
)
### Main Links Index ### Main Links Index
@ -33,9 +38,11 @@ def remove_from_sql_main_index(snapshots: QuerySet, atomic: bool=False, out_dir:
def write_link_to_sql_index(link: Link): def write_link_to_sql_index(link: Link):
from core.models import Snapshot, ArchiveResult from core.models import Snapshot, ArchiveResult
info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys} info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys}
tags = info.pop("tags")
if tags is None: tag_list = list(dict.fromkeys(
tags = [] tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '')
))
info.pop('tags')
try: try:
info["timestamp"] = Snapshot.objects.get(url=link.url).timestamp info["timestamp"] = Snapshot.objects.get(url=link.url).timestamp
@ -44,7 +51,7 @@ def write_link_to_sql_index(link: Link):
info["timestamp"] = str(float(info["timestamp"]) + 1.0) info["timestamp"] = str(float(info["timestamp"]) + 1.0)
snapshot, _ = Snapshot.objects.update_or_create(url=link.url, defaults=info) snapshot, _ = Snapshot.objects.update_or_create(url=link.url, defaults=info)
snapshot.save_tags(tags) snapshot.save_tags(tag_list)
for extractor, entries in link.history.items(): for extractor, entries in link.history.items():
for entry in entries: for entry in entries:
@ -104,10 +111,9 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR) -> None:
snap = write_link_to_sql_index(link) snap = write_link_to_sql_index(link)
snap.title = link.title snap.title = link.title
tag_set = ( tag_list = list(dict.fromkeys(
set(tag.strip() for tag in (link.tags or '').split(',')) tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '')
) ))
tag_list = list(tag_set) or []
snap.save() snap.save()
snap.save_tags(tag_list) snap.save_tags(tag_list)

View file

@ -432,7 +432,13 @@ def log_archive_method_finished(result: "ArchiveResult"):
# Prettify error output hints string and limit to five lines # Prettify error output hints string and limit to five lines
hints = getattr(result.output, 'hints', None) or () hints = getattr(result.output, 'hints', None) or ()
if hints: if hints:
hints = hints if isinstance(hints, (list, tuple)) else hints.split('\n') if isinstance(hints, (list, tuple, type(_ for _ in ()))):
hints = [hint.decode() for hint in hints if isinstance(hint, bytes)]
else:
if isinstance(hints, bytes):
hints = hints.decode()
hints = hints.split('\n')
hints = ( hints = (
' {}{}{}'.format(ANSI['lightyellow'], line.strip(), ANSI['reset']) ' {}{}{}'.format(ANSI['lightyellow'], line.strip(), ANSI['reset'])
for line in hints[:5] if line.strip() for line in hints[:5] if line.strip()
@ -566,7 +572,7 @@ def printable_config(config: ConfigDict, prefix: str='') -> str:
def printable_folder_status(name: str, folder: Dict) -> str: def printable_folder_status(name: str, folder: Dict) -> str:
if folder['enabled']: if folder['enabled']:
if folder['is_valid']: if folder['is_valid']:
color, symbol, note = 'green', '', 'valid' color, symbol, note, num_files = 'green', '', 'valid', ''
else: else:
color, symbol, note, num_files = 'red', 'X', 'invalid', '?' color, symbol, note, num_files = 'red', 'X', 'invalid', '?'
else: else:
@ -582,6 +588,10 @@ def printable_folder_status(name: str, folder: Dict) -> str:
else: else:
num_files = 'missing' num_files = 'missing'
if folder.get('is_mount'):
# add symbol @ next to filecount if path is a remote filesystem mount
num_files = f'{num_files} @' if num_files else '@'
path = str(folder['path']).replace(str(OUTPUT_DIR), '.') if folder['path'] else '' path = str(folder['path']).replace(str(OUTPUT_DIR), '.') if folder['path'] else ''
if path and ' ' in path: if path and ' ' in path:
path = f'"{path}"' path = f'"{path}"'

View file

@ -4,8 +4,9 @@ import os
import sys import sys
import shutil import shutil
import platform import platform
from django.utils import timezone
from pathlib import Path from pathlib import Path
from datetime import date from datetime import date, datetime
from typing import Dict, List, Optional, Iterable, IO, Union from typing import Dict, List, Optional, Iterable, IO, Union
from crontab import CronTab, CronSlices from crontab import CronTab, CronSlices
@ -70,7 +71,12 @@ from .config import (
IS_TTY, IS_TTY,
DEBUG, DEBUG,
IN_DOCKER, IN_DOCKER,
PUID,
PGID,
USER, USER,
TIMEZONE,
ENFORCE_ATOMIC_WRITES,
OUTPUT_PERMISSIONS,
PYTHON_BINARY, PYTHON_BINARY,
ARCHIVEBOX_BINARY, ARCHIVEBOX_BINARY,
ONLY_NEW, ONLY_NEW,
@ -90,6 +96,7 @@ from .config import (
check_data_folder, check_data_folder,
write_config_file, write_config_file,
VERSION, VERSION,
COMMIT_HASH,
CODE_LOCATIONS, CODE_LOCATIONS,
EXTERNAL_LOCATIONS, EXTERNAL_LOCATIONS,
DATA_LOCATIONS, DATA_LOCATIONS,
@ -204,25 +211,33 @@ def version(quiet: bool=False,
out_dir: Path=OUTPUT_DIR) -> None: out_dir: Path=OUTPUT_DIR) -> None:
"""Print the ArchiveBox version and dependency information""" """Print the ArchiveBox version and dependency information"""
if quiet:
print(VERSION) print(VERSION)
else:
# ArchiveBox v0.5.6 if not quiet:
# Cpython Linux Linux-4.19.121-linuxkit-x86_64-with-glibc2.28 x86_64 (in Docker) (in TTY) # 0.6.3
print('ArchiveBox v{}'.format(VERSION)) # ArchiveBox v0.6.3 Cpython Linux Linux-4.19.121-linuxkit-x86_64-with-glibc2.28 x86_64 (in Docker) (in TTY)
# DEBUG=False IN_DOCKER=True IS_TTY=True TZ=UTC FS_ATOMIC=True FS_REMOTE=False FS_PERMS=644 501:20 SEARCH_BACKEND=ripgrep
p = platform.uname() p = platform.uname()
print( print(
'ArchiveBox v{}'.format(VERSION),
*((COMMIT_HASH[:7],) if COMMIT_HASH else ()),
sys.implementation.name.title(), sys.implementation.name.title(),
p.system, p.system,
platform.platform(), platform.platform(),
p.machine, p.machine,
) )
OUTPUT_IS_REMOTE_FS = DATA_LOCATIONS['OUTPUT_DIR']['is_mount'] or DATA_LOCATIONS['ARCHIVE_DIR']['is_mount']
print( print(
f'IN_DOCKER={IN_DOCKER}',
f'DEBUG={DEBUG}', f'DEBUG={DEBUG}',
f'IN_DOCKER={IN_DOCKER}',
f'IS_TTY={IS_TTY}', f'IS_TTY={IS_TTY}',
f'TZ={os.environ.get("TZ", "UTC")}', f'TZ={TIMEZONE}',
f'SEARCH_BACKEND_ENGINE={SEARCH_BACKEND_ENGINE}', #f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually
f'FS_ATOMIC={ENFORCE_ATOMIC_WRITES}',
f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
f'FS_PERMS={OUTPUT_PERMISSIONS} {PUID}:{PGID}',
f'SEARCH_BACKEND={SEARCH_BACKEND_ENGINE}',
) )
print() print()
@ -230,6 +245,10 @@ def version(quiet: bool=False,
for name, dependency in DEPENDENCIES.items(): for name, dependency in DEPENDENCIES.items():
print(printable_dependency_version(name, dependency)) print(printable_dependency_version(name, dependency))
# add a newline between core dependencies and extractor dependencies for easier reading
if name == 'ARCHIVEBOX_BINARY':
print()
print() print()
print('{white}[i] Source-code locations:{reset}'.format(**ANSI)) print('{white}[i] Source-code locations:{reset}'.format(**ANSI))
for name, folder in CODE_LOCATIONS.items(): for name, folder in CODE_LOCATIONS.items():
@ -427,7 +446,7 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
print(' archivebox server # then visit http://127.0.0.1:8000') print(' archivebox server # then visit http://127.0.0.1:8000')
print() print()
print(' To add new links, you can run:') print(' To add new links, you can run:')
print(" archivebox add ~/some/path/or/url/to/list_of_links.txt") print(" archivebox add < ~/some/path/to/list_of_links.txt")
print() print()
print(' For more usage and examples, run:') print(' For more usage and examples, run:')
print(' archivebox help') print(' archivebox help')
@ -554,7 +573,8 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
def add(urls: Union[str, List[str]], def add(urls: Union[str, List[str]],
tag: str='', tag: str='',
depth: int=0, depth: int=0,
update_all: bool=not ONLY_NEW, update: bool=not ONLY_NEW,
update_all: bool=False,
index_only: bool=False, index_only: bool=False,
overwrite: bool=False, overwrite: bool=False,
# duplicate: bool=False, # TODO: reuse the logic from admin.py resnapshot to allow adding multiple snapshots by appending timestamp automatically # duplicate: bool=False, # TODO: reuse the logic from admin.py resnapshot to allow adding multiple snapshots by appending timestamp automatically
@ -587,6 +607,7 @@ def add(urls: Union[str, List[str]],
# save verbatim args to sources # save verbatim args to sources
write_ahead_log = save_text_as_source('\n'.join(urls), filename='{ts}-import.txt', out_dir=out_dir) write_ahead_log = save_text_as_source('\n'.join(urls), filename='{ts}-import.txt', out_dir=out_dir)
new_links += parse_links_from_source(write_ahead_log, root_url=None, parser=parser) new_links += parse_links_from_source(write_ahead_log, root_url=None, parser=parser)
# If we're going one level deeper, download each link and look for more links # If we're going one level deeper, download each link and look for more links
@ -594,8 +615,11 @@ def add(urls: Union[str, List[str]],
if new_links and depth == 1: if new_links and depth == 1:
log_crawl_started(new_links) log_crawl_started(new_links)
for new_link in new_links: for new_link in new_links:
try:
downloaded_file = save_file_as_source(new_link.url, filename=f'{new_link.timestamp}-crawl-{new_link.domain}.txt', out_dir=out_dir) downloaded_file = save_file_as_source(new_link.url, filename=f'{new_link.timestamp}-crawl-{new_link.domain}.txt', out_dir=out_dir)
new_links_depth += parse_links_from_source(downloaded_file, root_url=new_link.url) new_links_depth += parse_links_from_source(downloaded_file, root_url=new_link.url)
except Exception as err:
stderr('[!] Failed to get contents of URL {new_link.url}', err, color='red')
imported_links = list({link.url: link for link in (new_links + new_links_depth)}.values()) imported_links = list({link.url: link for link in (new_links + new_links_depth)}.values())
@ -618,11 +642,21 @@ def add(urls: Union[str, List[str]],
if extractors: if extractors:
archive_kwargs["methods"] = extractors archive_kwargs["methods"] = extractors
if update_all: stderr()
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
if update:
stderr(f'[*] [{ts}] Archiving + updating {len(imported_links)}/{len(all_links)}', len(imported_links), 'URLs from added set...', color='green')
archive_links(imported_links, overwrite=overwrite, **archive_kwargs)
elif update_all:
stderr(f'[*] [{ts}] Archiving + updating {len(all_links)}/{len(all_links)}', len(all_links), 'URLs from entire library...', color='green')
archive_links(all_links, overwrite=overwrite, **archive_kwargs) archive_links(all_links, overwrite=overwrite, **archive_kwargs)
elif overwrite: elif overwrite:
stderr(f'[*] [{ts}] Archiving + overwriting {len(imported_links)}/{len(all_links)}', len(imported_links), 'URLs from added set...', color='green')
archive_links(imported_links, overwrite=True, **archive_kwargs) archive_links(imported_links, overwrite=True, **archive_kwargs)
elif new_links: elif new_links:
stderr(f'[*] [{ts}] Archiving {len(new_links)}/{len(all_links)} URLs from added set...', color='green')
archive_links(new_links, overwrite=False, **archive_kwargs) archive_links(new_links, overwrite=False, **archive_kwargs)
@ -1113,6 +1147,7 @@ def schedule(add: bool=False,
every: Optional[str]=None, every: Optional[str]=None,
depth: int=0, depth: int=0,
overwrite: bool=False, overwrite: bool=False,
update: bool=not ONLY_NEW,
import_path: Optional[str]=None, import_path: Optional[str]=None,
out_dir: Path=OUTPUT_DIR): out_dir: Path=OUTPUT_DIR):
"""Set ArchiveBox to regularly import URLs at specific times using cron""" """Set ArchiveBox to regularly import URLs at specific times using cron"""
@ -1142,6 +1177,7 @@ def schedule(add: bool=False,
*([ *([
'add', 'add',
*(['--overwrite'] if overwrite else []), *(['--overwrite'] if overwrite else []),
*(['--update'] if update else []),
f'--depth={depth}', f'--depth={depth}',
f'"{import_path}"', f'"{import_path}"',
] if import_path else ['update']), ] if import_path else ['update']),

View file

@ -149,7 +149,17 @@ def run_parser_functions(to_parse: IO[str], timer, root_url: Optional[str]=None,
def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=OUTPUT_DIR) -> str: def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=OUTPUT_DIR) -> str:
ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0] ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0]
source_path = str(out_dir / SOURCES_DIR_NAME / filename.format(ts=ts)) source_path = str(out_dir / SOURCES_DIR_NAME / filename.format(ts=ts))
atomic_write(source_path, raw_text)
referenced_texts = ''
for entry in raw_text.split():
try:
if Path(entry).exists():
referenced_texts += Path(entry).read_text()
except Exception as err:
print(err)
atomic_write(source_path, raw_text + '\n' + referenced_texts)
log_source_saved(source_file=source_path) log_source_saved(source_file=source_path)
return source_path return source_path
@ -176,7 +186,7 @@ def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{ba
ANSI['reset'], ANSI['reset'],
)) ))
print(' ', e) print(' ', e)
raise SystemExit(1) raise e
else: else:
# Source is a path to a local file on the filesystem # Source is a path to a local file on the filesystem

View file

@ -47,11 +47,11 @@ def get_pocket_articles(api: Pocket, since=None, page=0):
def link_from_article(article: dict, sources: list): def link_from_article(article: dict, sources: list):
url: str = article['resolved_url'] or article['given_url'] url: str = article.get('resolved_url') or article['given_url']
broken_protocol = _BROKEN_PROTOCOL_RE.match(url) broken_protocol = _BROKEN_PROTOCOL_RE.match(url)
if broken_protocol: if broken_protocol:
url = url.replace(f'{broken_protocol.group(1)}:/', f'{broken_protocol.group(1)}://') url = url.replace(f'{broken_protocol.group(1)}:/', f'{broken_protocol.group(1)}://')
title = article['resolved_title'] or article['given_title'] or url title = article.get('resolved_title') or article.get('given_title') or url
return Link( return Link(
url=url, url=url,

View file

@ -34,13 +34,19 @@ def parse_wallabag_atom_export(rss_file: IO[str], **_kwargs) -> Iterable[Link]:
trailing_removed = entry.split('</entry>', 1)[0] trailing_removed = entry.split('</entry>', 1)[0]
leading_removed = trailing_removed.strip() leading_removed = trailing_removed.strip()
rows = leading_removed.split('\n') splits_fixed = leading_removed.replace('"\n href="', '" href="')
rows = splits_fixed.split('\n')
def get_row(key): def get_row(prefix):
return [r.strip() for r in rows if r.strip().startswith('<{}'.format(key))][0] return [
row.strip()
for row in rows
if row.strip().startswith('<{}'.format(prefix))
][0]
title = str_between(get_row('title'), '<title><![CDATA[', ']]></title>').strip() title = str_between(get_row('title'), '<title><![CDATA[', ']]></title>').strip()
url = str_between(get_row('link rel="via"'), '<link rel="via">', '</link>') url_inside_link = str_between(get_row('link rel="via"'), '<link rel="via">', '</link>')
url_inside_attr = str_between(get_row('link rel="via"'), 'href="', '"/>')
ts_str = str_between(get_row('published'), '<published>', '</published>') ts_str = str_between(get_row('published'), '<published>', '</published>')
time = datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%S%z") time = datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%S%z")
try: try:
@ -49,7 +55,7 @@ def parse_wallabag_atom_export(rss_file: IO[str], **_kwargs) -> Iterable[Link]:
tags = None tags = None
yield Link( yield Link(
url=htmldecode(url), url=htmldecode(url_inside_attr or url_inside_link),
timestamp=str(time.timestamp()), timestamp=str(time.timestamp()),
title=htmldecode(title) or None, title=htmldecode(title) or None,
tags=tags or '', tags=tags or '',

View file

@ -197,7 +197,7 @@
// select the action button from the dropdown // select the action button from the dropdown
container.find('select[name=action]') container.find('select[name=action]')
.find('op:selected').removeAttr('selected').end() .find('[selected]').removeAttr('selected').end()
.find('[value=' + action_type + ']').attr('selected', 'selected').click() .find('[value=' + action_type + ']').attr('selected', 'selected').click()
// click submit & replace the archivebox logo with a spinner // click submit & replace the archivebox logo with a spinner

View file

@ -28,6 +28,14 @@
<a href="/add" id="submit">&nbsp; Add more URLs </a> <a href="/add" id="submit">&nbsp; Add more URLs </a>
</center> </center>
{% else %} {% else %}
<div id="in-progress" style="display: none;">
<center><h3>Adding URLs to index and running archive methods...</h3>
<br/>
<div class="loader"></div>
<br/>
Check the server log or the <a href="/admin/core/archiveresult/?o=-1">Log</a> page for progress...
</center>
</div>
<form id="add-form" method="POST" class="p-form">{% csrf_token %} <form id="add-form" method="POST" class="p-form">{% csrf_token %}
<h1>Add new URLs to your archive</h1> <h1>Add new URLs to your archive</h1>
<br/> <br/>
@ -48,10 +56,9 @@
{% endif %} {% endif %}
<script> <script>
document.getElementById('add-form').addEventListener('submit', function(event) { document.getElementById('add-form').addEventListener('submit', function(event) {
setTimeout(function() { document.getElementById('in-progress').style.display = 'block'
document.getElementById('add-form').innerHTML = '<center><h3>Adding URLs to index and running archive methods...<h3><br/><div class="loader"></div><br/>Check the server log or the <a href="/admin/core/archiveresult/?o=-1">Log</a> page for progress...</center>' document.getElementById('add-form').style.display = 'none'
document.getElementById('delay-warning').style.display = 'block' document.getElementById('delay-warning').style.display = 'block'
}, 200)
return true return true
}) })
</script> </script>

View file

@ -414,6 +414,7 @@
</div> </div>
</div> </div>
{% endif %} {% endif %}
{% if PREVIEW_ORIGINALS %}
<div class="col-lg-2"> <div class="col-lg-2">
<div class="card"> <div class="card">
<iframe class="card-img-top" src="{{url}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no" loading="lazy" referrerpolicy="no-referrer"></iframe> <iframe class="card-img-top" src="{{url}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no" loading="lazy" referrerpolicy="no-referrer"></iframe>
@ -427,6 +428,7 @@
</div> </div>
</div> </div>
</div> </div>
{% endif %}
<div class="col-lg-2"> <div class="col-lg-2">
<div class="card"> <div class="card">
<iframe class="card-img-top" src="{{headers_path}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no" loading="lazy"></iframe> <iframe class="card-img-top" src="{{headers_path}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no" loading="lazy"></iframe>

View file

@ -92,7 +92,7 @@ echo " You may be prompted for a sudo password in order to install the follow
echo "" echo ""
echo " - archivebox" echo " - archivebox"
echo " - python3, pip, nodejs, npm (languages used by ArchiveBox, and its extractor modules)" echo " - python3, pip, nodejs, npm (languages used by ArchiveBox, and its extractor modules)"
echo " - curl, wget, git, youtube-dl (used for extracting title, favicon, git, media, and more)" echo " - curl, wget, git, youtube-dl, yt-dlp (used for extracting title, favicon, git, media, and more)"
echo " - chromium (skips this if any Chrome/Chromium version is already installed)" echo " - chromium (skips this if any Chrome/Chromium version is already installed)"
echo "" echo ""
echo " If you'd rather install these manually as-needed, you can find detailed documentation here:" echo " If you'd rather install these manually as-needed, you can find detailed documentation here:"
@ -115,13 +115,13 @@ if which apt-get > /dev/null; then
fi fi
echo echo
echo "[+] Installing ArchiveBox system dependencies using apt..." echo "[+] Installing ArchiveBox system dependencies using apt..."
sudo apt-get install -y git python3 python3-pip python3-distutils wget curl youtube-dl ffmpeg git nodejs npm ripgrep sudo apt-get install -y git python3 python3-pip python3-distutils wget curl youtube-dl yt-dlp ffmpeg git nodejs npm ripgrep
sudo apt-get install -y libgtk2.0-0 libgtk-3-0 libnotify-dev libgconf-2-4 libnss3 libxss1 libasound2 libxtst6 xauth xvfb libgbm-dev || sudo apt-get install -y chromium || sudo apt-get install -y chromium-browser || true sudo apt-get install -y libgtk2.0-0 libgtk-3-0 libnotify-dev libgconf-2-4 libnss3 libxss1 libasound2 libxtst6 xauth xvfb libgbm-dev || sudo apt-get install -y chromium || sudo apt-get install -y chromium-browser || true
sudo apt-get install -y archivebox sudo apt-get install -y archivebox
sudo apt-get --only-upgrade install -y archivebox sudo apt-get --only-upgrade install -y archivebox
echo "" echo ""
echo "[+] Installing ArchiveBox python dependencies using pip..." echo "[+] Installing ArchiveBox python dependencies using pip3..."
sudo python3.7 -m pip install --upgrade --ignore-installed archivebox sudo python3 -m pip install --upgrade --ignore-installed archivebox
# On Mac: # On Mac:
elif which brew > /dev/null; then elif which brew > /dev/null; then
echo "[+] Installing ArchiveBox system dependencies using brew..." echo "[+] Installing ArchiveBox system dependencies using brew..."
@ -129,16 +129,16 @@ elif which brew > /dev/null; then
brew update brew update
brew install --fetch-HEAD -f archivebox brew install --fetch-HEAD -f archivebox
echo "" echo ""
echo "[+] Installing ArchiveBox python dependencies using pip..." echo "[+] Installing ArchiveBox python dependencies using pip3..."
python3 -m pip install --upgrade --ignore-installed archivebox python3 -m pip install --upgrade --ignore-installed archivebox
elif which pkg > /dev/null; then elif which pkg > /dev/null; then
echo "[+] Installing ArchiveBox system dependencies using pkg..." echo "[+] Installing ArchiveBox system dependencies using pkg and pip (python3.9)..."
sudo pkg install -y python37 py37-pip py37-sqlite3 node npm wget curl youtube_dl ffmpeg git ripgrep sudo pkg install -y python3 py39-pip py39-sqlite3 npm wget curl youtube_dl ffmpeg git ripgrep
sudo pkg install -y chromium sudo pkg install -y chromium
echo "" echo ""
echo "[+] Installing ArchiveBox python dependencies using pip..." echo "[+] Installing ArchiveBox python dependencies using pip..."
sudo python3.7 -m pip install --upgrade --ignore-installed archivebox # don't use sudo here so that pip installs in $HOME/.local instead of into /usr/local
alias python3=python3.7 python3 -m pip install --upgrade --ignore-installed archivebox
else else
echo "[!] Warning: Could not find aptitude/homebrew/pkg! May not be able to install all dependencies automatically." echo "[!] Warning: Could not find aptitude/homebrew/pkg! May not be able to install all dependencies automatically."
echo "" echo ""
@ -192,7 +192,7 @@ echo "[√] Server started on http://0.0.0.0:8000 and data directory initialized
echo " cd ~/archivebox" echo " cd ~/archivebox"
echo " ps aux | grep archivebox" echo " ps aux | grep archivebox"
echo " pkill -f archivebox" echo " pkill -f archivebox"
echo " pip3 install --upgrade archivebox" echo " python3 -m pip install --upgrade archivebox"
echo " archivebox server --quick-init 0.0.0.0:8000" echo " archivebox server --quick-init 0.0.0.0:8000"
echo " archivebox manage createsuperuser" echo " archivebox manage createsuperuser"
echo " archivebox add 'https://example.com'" echo " archivebox add 'https://example.com'"

@ -1 +1 @@
Subproject commit 95a1c1a0875841d076f06106bd4c2307504928c2 Subproject commit a4314719746de549f359c2fa975762fc73b62f94

View file

@ -8,7 +8,7 @@
# Documentation: # Documentation:
# https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker-compose # https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker-compose
version: '2.4' version: '2.4' # '3.9' or greater also works
services: services:
archivebox: archivebox:
@ -23,15 +23,21 @@ services:
# - SEARCH_BACKEND_ENGINE=sonic # uncomment these if you enable sonic below # - SEARCH_BACKEND_ENGINE=sonic # uncomment these if you enable sonic below
# - SEARCH_BACKEND_HOST_NAME=sonic # - SEARCH_BACKEND_HOST_NAME=sonic
# - SEARCH_BACKEND_PASSWORD=SecretPassword # - SEARCH_BACKEND_PASSWORD=SecretPassword
# dns: # uncomment to use pihole below for ad/tracker blocking during archiving
# - pihole
volumes: volumes:
- ./data:/data - ./data:/data
# - ./archivebox:/app/archivebox # for developers working on archivebox # - ./archivebox:/app/archivebox # for developers working on archivebox
# To run the Sonic full-text search backend, first download the config file to sonic.cfg
### Optional Addons: tweak these examples as needed for your specific use case
### Example: To run the Sonic full-text search backend, first download the config file to sonic.cfg
# curl -O https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/etc/sonic.cfg # curl -O https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/etc/sonic.cfg
# after starting, backfill any existing Snapshots into the index: docker-compose run archivebox update --index-only # after starting, backfill any existing Snapshots into the index: docker-compose run archivebox update --index-only
# sonic: # sonic:
# image: valeriansaliou/sonic:v1.3.0 # image: valeriansaliou/sonic:v1.3.1
# expose: # expose:
# - 1491 # - 1491
# environment: # environment:
@ -41,10 +47,23 @@ services:
# - ./data/sonic:/var/lib/sonic/store # - ./data/sonic:/var/lib/sonic/store
### Optional Addons: tweak these examples as needed for your specific use case ### Example: To run pihole in order to block ad/tracker requests during archiving,
# uncomment this block and set up pihole using its admin interface
# Example: Run scheduled imports in a docker instead of using cron on the # pihole:
# image: pihole/pihole:latest
# ports:
# - 80:80 # uncomment to access the admin HTTP interface on http://localhost:80
# environment:
# WEBPASSWORD: 'set a secure password here or it will be random'
# volumes:
# - ./data/pihole:/etc/pihole
# - ./data/dnsmasq:/etc/dnsmasq.d
### Example: Run scheduled imports in a docker instead of using cron on the
# host machine, add tasks and see more info with archivebox schedule --help # host machine, add tasks and see more info with archivebox schedule --help
# scheduler: # scheduler:
# image: archivebox/archivebox:latest # image: archivebox/archivebox:latest
# command: schedule --foreground --every=day --depth=1 'https://getpocket.com/users/USERNAME/feed/all' # command: schedule --foreground --every=day --depth=1 'https://getpocket.com/users/USERNAME/feed/all'
@ -54,7 +73,9 @@ services:
# volumes: # volumes:
# - ./data:/data # - ./data:/data
# Example: Put Nginx in front of the ArchiveBox server for SSL termination
### Example: Put Nginx in front of the ArchiveBox server for SSL termination
# nginx: # nginx:
# image: nginx:alpine # image: nginx:alpine
# ports: # ports:
@ -64,7 +85,9 @@ services:
# - ./etc/nginx/nginx.conf:/etc/nginx/nginx.conf # - ./etc/nginx/nginx.conf:/etc/nginx/nginx.conf
# - ./data:/var/www # - ./data:/var/www
# Example: run all your ArchiveBox traffic through a WireGuard VPN tunnel
### Example: run all your ArchiveBox traffic through a WireGuard VPN tunnel
# wireguard: # wireguard:
# image: linuxserver/wireguard # image: linuxserver/wireguard
# network_mode: 'service:archivebox' # network_mode: 'service:archivebox'
@ -78,14 +101,16 @@ services:
# - /lib/modules:/lib/modules # - /lib/modules:/lib/modules
# - ./wireguard.conf:/config/wg0.conf:ro # - ./wireguard.conf:/config/wg0.conf:ro
# Example: Run PYWB in parallel and auto-import WARCs from ArchiveBox
### Example: Run PYWB in parallel and auto-import WARCs from ArchiveBox
# pywb: # pywb:
# image: webrecorder/pywb:latest # image: webrecorder/pywb:latest
# entrypoint: /bin/sh 'wb-manager add default /archivebox/archive/*/warc/*.warc.gz; wayback --proxy;' # entrypoint: /bin/sh -c '(wb-manager init default || test $$? -eq 2) && wb-manager add default /archivebox/archive/*/warc/*.warc.gz; wayback;'
# environment: # environment:
# - INIT_COLLECTION=archivebox # - INIT_COLLECTION=archivebox
# ports: # ports:
# - 8080:8080 # - 8080:8080
# volumes: # volumes:
# ./data:/archivebox # - ./data:/archivebox
# ./data/wayback:/webarchive # - ./data/wayback:/webarchive

View file

@ -55,7 +55,7 @@
# CURL_BINARY = curl # CURL_BINARY = curl
# GIT_BINARY = git # GIT_BINARY = git
# WGET_BINARY = wget # WGET_BINARY = wget
# YOUTUBEDL_BINARY = youtube-dl # YOUTUBEDL_BINARY = yt-dlp
# CHROME_BINARY = chromium # CHROME_BINARY = chromium
# CHROME_USER_DATA_DIR="~/.config/google-chrome/Default" # CHROME_USER_DATA_DIR="~/.config/google-chrome/Default"

282
package-lock.json generated
View file

@ -5,11 +5,11 @@
"requires": true, "requires": true,
"dependencies": { "dependencies": {
"@babel/runtime-corejs2": { "@babel/runtime-corejs2": {
"version": "7.13.10", "version": "7.17.11",
"resolved": "https://registry.npmjs.org/@babel/runtime-corejs2/-/runtime-corejs2-7.13.10.tgz", "resolved": "https://registry.npmjs.org/@babel/runtime-corejs2/-/runtime-corejs2-7.17.11.tgz",
"integrity": "sha512-rZw5P1ZewO6XZTDxtXuAuAFUqfNXyM8HO/9WiaDd34Anka0uFTpo0RvBLeV775AEE/zKw3LQB+poZw/O9lrZBg==", "integrity": "sha512-pJe8Aerb88TGVi1Xe/AE36aRCPrg+h6ktZPGl6xaJvOfTLcMMuogQu3BYcxeXPTNHhSYbmsDVYBs8CfAxeFFTg==",
"requires": { "requires": {
"core-js": "^2.6.5", "core-js": "^2.6.12",
"regenerator-runtime": "^0.13.4" "regenerator-runtime": "^0.13.4"
} }
}, },
@ -28,9 +28,8 @@
} }
}, },
"@postlight/mercury-parser": { "@postlight/mercury-parser": {
"version": "2.2.0", "version": "git+https://github.com/postlight/mercury-parser.git#9cd9662bcbfea00b773fad691a4f6e53394ff543",
"resolved": "https://registry.npmjs.org/@postlight/mercury-parser/-/mercury-parser-2.2.0.tgz", "from": "git+https://github.com/postlight/mercury-parser.git",
"integrity": "sha512-nz6dIvCAaiv74o1vhhp0BRsUe+ysPbZG5mVNpJmgLoI/goOBqRMM3Yg8uXtnv++e7tzKFSXdls8b2/zKk1qL0Q==",
"requires": { "requires": {
"@babel/runtime-corejs2": "^7.2.0", "@babel/runtime-corejs2": "^7.2.0",
"@postlight/ci-failed-test-reporter": "^1.0", "@postlight/ci-failed-test-reporter": "^1.0",
@ -50,35 +49,7 @@
"url": "^0.11.0", "url": "^0.11.0",
"valid-url": "^1.0.9", "valid-url": "^1.0.9",
"wuzzy": "^0.1.4", "wuzzy": "^0.1.4",
"yargs-parser": "^13.0.0" "yargs-parser": "^15.0.1"
},
"dependencies": {
"http-headers": {
"version": "3.0.2",
"bundled": true,
"requires": {
"next-line": "^1.1.0"
}
},
"jquery": {
"version": "3.4.1",
"bundled": true
},
"moment": {
"version": "2.23.0",
"bundled": true
},
"moment-timezone": {
"version": "0.5.26",
"bundled": true,
"requires": {
"moment": ">= 2.9.0"
}
},
"next-line": {
"version": "1.1.0",
"bundled": true
}
} }
}, },
"@postman/form-data": { "@postman/form-data": {
@ -105,9 +76,9 @@
"integrity": "sha512-RbzJvlNzmRq5c3O09UipeuXno4tA1FE6ikOjxZK0tuxVv3412l64l5t1W5pj4+rJq9vpkm/kwiR07aZXnsKPxw==" "integrity": "sha512-RbzJvlNzmRq5c3O09UipeuXno4tA1FE6ikOjxZK0tuxVv3412l64l5t1W5pj4+rJq9vpkm/kwiR07aZXnsKPxw=="
}, },
"@types/node": { "@types/node": {
"version": "16.0.0", "version": "17.0.4",
"resolved": "https://registry.npmjs.org/@types/node/-/node-16.0.0.tgz", "resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.4.tgz",
"integrity": "sha512-TmCW5HoZ2o2/z2EYi109jLqIaPIi9y/lc2LmDCWzuCi35bcaQ+OtUh6nwBiFK7SOu25FAU5+YKdqFZUwtqGSdg==", "integrity": "sha512-6xwbrW4JJiJLgF+zNypN5wr2ykM9/jHcL7rQ8fZe2vuftggjzZeRSM4OwRc6Xk8qWjwJ99qVHo/JgOGmomWRog==",
"optional": true "optional": true
}, },
"@types/yauzl": { "@types/yauzl": {
@ -170,9 +141,9 @@
} }
}, },
"ansi-regex": { "ansi-regex": {
"version": "5.0.0", "version": "5.0.1",
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.0.tgz", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
"integrity": "sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg==" "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ=="
}, },
"ansi-styles": { "ansi-styles": {
"version": "4.3.0", "version": "4.3.0",
@ -188,9 +159,9 @@
"integrity": "sha1-jCpe8kcv2ep0KwTHenUJO6J1fJM=" "integrity": "sha1-jCpe8kcv2ep0KwTHenUJO6J1fJM="
}, },
"asn1": { "asn1": {
"version": "0.2.4", "version": "0.2.6",
"resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.4.tgz", "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.6.tgz",
"integrity": "sha512-jxwzQpLQjSmWXgwaCZE9Nz+glAG01yF1QnWgbhGwHI5A6FRIEY6IVqtHhIepHqI7/kyEyQEagBC5mBEFlIYvdg==", "integrity": "sha512-ix/FxPn0MDjeyJ7i/yoHGFt/EX6LyNbxSEhPPXODPL+KB0VPk86UYfL0lMdy+KCnv+fmvIzySwaK5COwqVbWTQ==",
"requires": { "requires": {
"safer-buffer": "~2.1.0" "safer-buffer": "~2.1.0"
} }
@ -445,9 +416,9 @@
} }
}, },
"debug": { "debug": {
"version": "4.3.2", "version": "4.3.3",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.3.2.tgz", "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.3.tgz",
"integrity": "sha512-mOp8wKcvj7XxC78zLgw/ZA+6TSgkoE2C/ienthhRD298T7UNwAg9diBpLRxC0mOezLl4B0xV7M0cCO6P/O0Xhw==", "integrity": "sha512-/zxw5+vh1Tfv+4Qn7a5nsbcJKPaSvCDhojn6FEl9vupwK2VCSDtEiEtqr8DFtzYFOdz63LBkxec7DYuc2jon6Q==",
"requires": { "requires": {
"ms": "2.1.2" "ms": "2.1.2"
} }
@ -515,9 +486,9 @@
} }
}, },
"dompurify": { "dompurify": {
"version": "2.3.0", "version": "2.3.4",
"resolved": "https://registry.npmjs.org/dompurify/-/dompurify-2.3.0.tgz", "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-2.3.4.tgz",
"integrity": "sha512-VV5C6Kr53YVHGOBKO/F86OYX6/iLTw2yVSI721gKetxpHCK/V5TaLEf9ODjRgl1KLSWRMY6cUhAbv/c+IUnwQw==" "integrity": "sha512-6BVcgOAVFXjI0JTjEvZy901Rghm+7fDQOrNIcxB4+gdhj6Kwp6T9VBhBY/AbagKHJocRkDYGd6wvI+p4/10xtQ=="
}, },
"domutils": { "domutils": {
"version": "1.5.1", "version": "1.5.1",
@ -702,9 +673,9 @@
} }
}, },
"glob": { "glob": {
"version": "7.1.7", "version": "7.2.0",
"resolved": "https://registry.npmjs.org/glob/-/glob-7.1.7.tgz", "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.0.tgz",
"integrity": "sha512-OvD9ENzPLbegENnYP5UUfJIirTg4+XwMWGaQfQTY0JenxNvvIKP3U3/tAQSPIu/lHxXYSZmpXlUHeqAIdKzBLQ==", "integrity": "sha512-lmLf6gtyrPq8tTjSmrO94wBeQbFR3HbLHbuyD69wuyQkImp2hWqMGB47OX65FBkPffO641IP9jWa1z4ivqG26Q==",
"requires": { "requires": {
"fs.realpath": "^1.0.0", "fs.realpath": "^1.0.0",
"inflight": "^1.0.4", "inflight": "^1.0.4",
@ -729,9 +700,9 @@
} }
}, },
"heap": { "heap": {
"version": "0.2.6", "version": "0.2.7",
"resolved": "https://registry.npmjs.org/heap/-/heap-0.2.6.tgz", "resolved": "https://registry.npmjs.org/heap/-/heap-0.2.7.tgz",
"integrity": "sha1-CH4fELBGky/IWU3Z5tN4r8nR5aw=" "integrity": "sha512-2bsegYkkHO+h/9MGbn6KWcE45cHZgPANo5LXF7EvWdT0yT2EguSVO1nDgU5c8+ZOPwp2vMNa7YFsJhVcDR9Sdg=="
}, },
"html-encoding-sniffer": { "html-encoding-sniffer": {
"version": "1.0.2", "version": "1.0.2",
@ -773,12 +744,12 @@
} }
}, },
"http-signature": { "http-signature": {
"version": "1.3.5", "version": "1.3.6",
"resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.3.5.tgz", "resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.3.6.tgz",
"integrity": "sha512-NwoTQYSJoFt34jSBbwzDHDofoA61NGXzu6wXh95o1Ry62EnmKjXb/nR/RknLeZ3G/uGwrlKNY2z7uPt+Cdl7Tw==", "integrity": "sha512-3adrsD6zqo4GsTqtO7FyrejHNv+NgiIfAfv68+jVlFmSr9OGy7zrxONceFRLKvnnZA5jbxQBX1u9PpB6Wi32Gw==",
"requires": { "requires": {
"assert-plus": "^1.0.0", "assert-plus": "^1.0.0",
"jsprim": "^1.2.2", "jsprim": "^2.0.2",
"sshpk": "^1.14.1" "sshpk": "^1.14.1"
} }
}, },
@ -848,6 +819,11 @@
"resolved": "https://registry.npmjs.org/isstream/-/isstream-0.1.2.tgz", "resolved": "https://registry.npmjs.org/isstream/-/isstream-0.1.2.tgz",
"integrity": "sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo=" "integrity": "sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo="
}, },
"jquery": {
"version": "3.6.0",
"resolved": "https://registry.npmjs.org/jquery/-/jquery-3.6.0.tgz",
"integrity": "sha512-JVzAR/AjBvVt2BmYhxRCSYysDsPcssdmTFnzyLEts9qNwmjmu4JTAMYubEfwVOSwpQ1I1sKKFcxhZCI2buerfw=="
},
"jsbn": { "jsbn": {
"version": "0.1.1", "version": "0.1.1",
"resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz", "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz",
@ -887,9 +863,9 @@
} }
}, },
"json-schema": { "json-schema": {
"version": "0.2.3", "version": "0.4.0",
"resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.2.3.tgz", "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.4.0.tgz",
"integrity": "sha1-tIDIkuWaLwWVTOcnvT8qTogvnhM=" "integrity": "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA=="
}, },
"json-schema-traverse": { "json-schema-traverse": {
"version": "0.4.1", "version": "0.4.1",
@ -902,20 +878,20 @@
"integrity": "sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus=" "integrity": "sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus="
}, },
"jsprim": { "jsprim": {
"version": "1.4.1", "version": "2.0.2",
"resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.1.tgz", "resolved": "https://registry.npmjs.org/jsprim/-/jsprim-2.0.2.tgz",
"integrity": "sha1-MT5mvB5cwG5Di8G3SZwuXFastqI=", "integrity": "sha512-gqXddjPqQ6G40VdnI6T6yObEC+pDNvyP95wdQhkWkg7crHH3km5qP1FsOXEkzEQwnz6gz5qGTn1c2Y52wP3OyQ==",
"requires": { "requires": {
"assert-plus": "1.0.0", "assert-plus": "1.0.0",
"extsprintf": "1.3.0", "extsprintf": "1.3.0",
"json-schema": "0.2.3", "json-schema": "0.4.0",
"verror": "1.10.0" "verror": "1.10.0"
} }
}, },
"jszip": { "jszip": {
"version": "3.6.0", "version": "3.7.1",
"resolved": "https://registry.npmjs.org/jszip/-/jszip-3.6.0.tgz", "resolved": "https://registry.npmjs.org/jszip/-/jszip-3.7.1.tgz",
"integrity": "sha512-jgnQoG9LKnWO3mnVNBnfhkh0QknICd1FGSrXcgrl67zioyJ4wgx25o9ZqwNtrROSflGBCGYnJfjrIyRIby1OoQ==", "integrity": "sha512-ghL0tz1XG9ZEmRMcEN2vt7xabrDdqHHeykgARpmZ0BiIctWxM47Vt63ZO2dnp4QYt/xJVLLy5Zv1l/xRdh2byg==",
"requires": { "requires": {
"lie": "~3.3.0", "lie": "~3.3.0",
"pako": "~1.0.2", "pako": "~1.0.2",
@ -1078,11 +1054,24 @@
"resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz", "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz",
"integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==" "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A=="
}, },
"moment": {
"version": "2.29.3",
"resolved": "https://registry.npmjs.org/moment/-/moment-2.29.3.tgz",
"integrity": "sha512-c6YRvhEo//6T2Jz/vVtYzqBzwvPT95JBQ+smCytzf7c50oMZRsR/a4w88aD34I+/QVSfnoAnSBFPJHItlOMJVw=="
},
"moment-parseformat": { "moment-parseformat": {
"version": "3.0.0", "version": "3.0.0",
"resolved": "https://registry.npmjs.org/moment-parseformat/-/moment-parseformat-3.0.0.tgz", "resolved": "https://registry.npmjs.org/moment-parseformat/-/moment-parseformat-3.0.0.tgz",
"integrity": "sha512-dVgXe6b6DLnv4CHG7a1zUe5mSXaIZ3c6lSHm/EKeVeQI2/4pwe0VRde8OyoCE1Ro2lKT5P6uT9JElF7KDLV+jw==" "integrity": "sha512-dVgXe6b6DLnv4CHG7a1zUe5mSXaIZ3c6lSHm/EKeVeQI2/4pwe0VRde8OyoCE1Ro2lKT5P6uT9JElF7KDLV+jw=="
}, },
"moment-timezone": {
"version": "0.5.26",
"resolved": "https://registry.npmjs.org/moment-timezone/-/moment-timezone-0.5.26.tgz",
"integrity": "sha512-sFP4cgEKTCymBBKgoxZjYzlSovC20Y6J7y3nanDc5RoBIXKlZhoYwBoZGe3flwU6A372AcRwScH8KiwV6zjy1g==",
"requires": {
"moment": ">= 2.9.0"
}
},
"ms": { "ms": {
"version": "2.1.2", "version": "2.1.2",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
@ -1094,9 +1083,33 @@
"integrity": "sha1-/K5XhTBStqm66CCOQN19PC0wRgM=" "integrity": "sha1-/K5XhTBStqm66CCOQN19PC0wRgM="
}, },
"node-fetch": { "node-fetch": {
"version": "2.6.1", "version": "2.6.7",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.1.tgz", "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz",
"integrity": "sha512-V4aYg89jEoVRxRb2fJdAg8FHvI7cEyYdVAh94HH0UIK8oJxUfkjlDQN9RbMx+bEjP7+ggMiFRprSti032Oipxw==" "integrity": "sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ==",
"requires": {
"whatwg-url": "^5.0.0"
},
"dependencies": {
"tr46": {
"version": "0.0.3",
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
"integrity": "sha1-gYT9NH2snNwYWZLzpmIuFLnZq2o="
},
"webidl-conversions": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
"integrity": "sha1-JFNCdeKnvGvnvIZhHMFq4KVlSHE="
},
"whatwg-url": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
"integrity": "sha1-lmRU6HZUYuN2RNNib2dCzotwll0=",
"requires": {
"tr46": "~0.0.3",
"webidl-conversions": "^3.0.0"
}
}
}
}, },
"nth-check": { "nth-check": {
"version": "1.0.2", "version": "1.0.2",
@ -1207,9 +1220,9 @@
"integrity": "sha512-2qHaIQr2VLRFoxe2nASzsV6ef4yOOH+Fi9FBOVH6cqeSgUnoyySPZkxzLuzd+RYOQTRpROA0ztTMqxROKSb/nA==" "integrity": "sha512-2qHaIQr2VLRFoxe2nASzsV6ef4yOOH+Fi9FBOVH6cqeSgUnoyySPZkxzLuzd+RYOQTRpROA0ztTMqxROKSb/nA=="
}, },
"postman-request": { "postman-request": {
"version": "2.88.1-postman.29", "version": "2.88.1-postman.31",
"resolved": "https://registry.npmjs.org/postman-request/-/postman-request-2.88.1-postman.29.tgz", "resolved": "https://registry.npmjs.org/postman-request/-/postman-request-2.88.1-postman.31.tgz",
"integrity": "sha512-QuL3+AvGlmPLb1Qf0t/rM8M4U8LCYbADZBijUNToLl6l37i65KH8wY1gTLWLxlw2I6ugxUfX2Zyyk5/J5HFZIg==", "integrity": "sha512-OJbYqP7ItxQ84yHyuNpDywCZB0HYbpHJisMQ9lb1cSL3N5H3Td6a2+3l/a74UMd3u82BiGC5yQyYmdOIETP/nQ==",
"requires": { "requires": {
"@postman/form-data": "~3.1.1", "@postman/form-data": "~3.1.1",
"@postman/tunnel-agent": "^0.6.3", "@postman/tunnel-agent": "^0.6.3",
@ -1308,16 +1321,16 @@
} }
}, },
"ws": { "ws": {
"version": "7.5.2", "version": "7.5.6",
"resolved": "https://registry.npmjs.org/ws/-/ws-7.5.2.tgz", "resolved": "https://registry.npmjs.org/ws/-/ws-7.5.6.tgz",
"integrity": "sha512-lkF7AWRicoB9mAgjeKbGqVUekLnSNO4VjKVnuPHpQeOxZOErX6BPXwJk70nFslRCEEA8EVW7ZjKwXaP9N+1sKQ==" "integrity": "sha512-6GLgCqo2cy2A2rjCNFlxQS6ZljG/coZfZXclldI8FB/1G3CCI36Zd8xy2HrFVACi8tfk5XrgLQEk+P0Tnz9UcA=="
} }
} }
}, },
"qs": { "qs": {
"version": "6.5.2", "version": "6.5.3",
"resolved": "https://registry.npmjs.org/qs/-/qs-6.5.2.tgz", "resolved": "https://registry.npmjs.org/qs/-/qs-6.5.3.tgz",
"integrity": "sha512-N5ZAX4/LxJmF+7wN74pUD6qAh9/wnvdQcjq9TZjevvXzSUo7bfmw91saqMjzGS2xq91/odN2dW/WOl7qQHNDGA==" "integrity": "sha512-qxXIEh4pCGfHICj1mAJQ2/2XVZkjCDTcEgfoSQxc/fYivUZxTkk7L3bDBJSoNrEzXI17oUO5Dp07ktqE5KzczA=="
}, },
"querystring": { "querystring": {
"version": "0.2.0", "version": "0.2.0",
@ -1334,9 +1347,9 @@
}, },
"dependencies": { "dependencies": {
"acorn": { "acorn": {
"version": "8.4.1", "version": "8.6.0",
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.4.1.tgz", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.6.0.tgz",
"integrity": "sha512-asabaBSkEKosYKMITunzX177CXxQ4Q8BSSzMTKD+FefUhipQC70gfW5SiUDhYQ3vk8G+81HqQk7Fv9OXwwn9KA==" "integrity": "sha512-U1riIR+lBSNi3IbxtaHOIKdH8sLFv3NYfNv8sg7ZsNhcfl4HF2++BfqqrNAxoCLQW1iiylOj76ecnaUxz+z9yw=="
}, },
"acorn-globals": { "acorn-globals": {
"version": "6.0.0", "version": "6.0.0",
@ -1417,9 +1430,9 @@
} }
}, },
"estraverse": { "estraverse": {
"version": "5.2.0", "version": "5.3.0",
"resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.2.0.tgz", "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
"integrity": "sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==" "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA=="
}, },
"form-data": { "form-data": {
"version": "3.0.1", "version": "3.0.1",
@ -1440,9 +1453,9 @@
} }
}, },
"jsdom": { "jsdom": {
"version": "16.6.0", "version": "16.7.0",
"resolved": "https://registry.npmjs.org/jsdom/-/jsdom-16.6.0.tgz", "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-16.7.0.tgz",
"integrity": "sha512-Ty1vmF4NHJkolaEmdjtxTfSfkdb8Ywarwf63f+F8/mDD1uLSSWDxDuMiZxiPhwunLrn9LOSVItWj4bLYsLN3Dg==", "integrity": "sha512-u9Smc2G1USStM+s/x1ru5Sxrl6mPYCbByG1U/hUmqaVsm4tbNyS7CicOSRyuGQYZhTu0h84qkZZQ/I+dzizSVw==",
"requires": { "requires": {
"abab": "^2.0.5", "abab": "^2.0.5",
"acorn": "^8.2.4", "acorn": "^8.2.4",
@ -1469,7 +1482,7 @@
"whatwg-encoding": "^1.0.5", "whatwg-encoding": "^1.0.5",
"whatwg-mimetype": "^2.3.0", "whatwg-mimetype": "^2.3.0",
"whatwg-url": "^8.5.0", "whatwg-url": "^8.5.0",
"ws": "^7.4.5", "ws": "^7.4.6",
"xml-name-validator": "^3.0.0" "xml-name-validator": "^3.0.0"
} }
}, },
@ -1512,9 +1525,9 @@
} }
}, },
"ws": { "ws": {
"version": "7.5.2", "version": "7.5.6",
"resolved": "https://registry.npmjs.org/ws/-/ws-7.5.2.tgz", "resolved": "https://registry.npmjs.org/ws/-/ws-7.5.6.tgz",
"integrity": "sha512-lkF7AWRicoB9mAgjeKbGqVUekLnSNO4VjKVnuPHpQeOxZOErX6BPXwJk70nFslRCEEA8EVW7ZjKwXaP9N+1sKQ==" "integrity": "sha512-6GLgCqo2cy2A2rjCNFlxQS6ZljG/coZfZXclldI8FB/1G3CCI36Zd8xy2HrFVACi8tfk5XrgLQEk+P0Tnz9UcA=="
} }
} }
}, },
@ -1529,9 +1542,9 @@
} }
}, },
"regenerator-runtime": { "regenerator-runtime": {
"version": "0.13.7", "version": "0.13.9",
"resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.7.tgz", "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.9.tgz",
"integrity": "sha512-a54FxoJDIr27pgf7IgeQGxmqUNYrcV338lf/6gH456HZ/PhX+5BcwHXG9ajESmwe6WRO0tAzRUrRmNONWgkrew==" "integrity": "sha512-p3VT+cOEgxFsRRA9X4lkI1E+k2/CtnKtU4gcxyaCUreilL/vqI6CdZ3wxVUx3UOUg+gnUOQQcRI7BmSI656MYA=="
}, },
"request": { "request": {
"version": "2.88.2", "version": "2.88.2",
@ -1569,6 +1582,17 @@
"jsprim": "^1.2.2", "jsprim": "^1.2.2",
"sshpk": "^1.7.0" "sshpk": "^1.7.0"
} }
},
"jsprim": {
"version": "1.4.2",
"resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.2.tgz",
"integrity": "sha512-P2bSOMAc/ciLz6DzgjVlGJP9+BrJWu5UDGK70C2iweC5QBIeFf0ZXRvGjEj2uYgrY2MkAAhsSWHDWlFtEroZWw==",
"requires": {
"assert-plus": "1.0.0",
"extsprintf": "1.3.0",
"json-schema": "0.4.0",
"verror": "1.10.0"
}
} }
} }
}, },
@ -1683,9 +1707,9 @@
}, },
"dependencies": { "dependencies": {
"acorn": { "acorn": {
"version": "8.4.1", "version": "8.6.0",
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.4.1.tgz", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.6.0.tgz",
"integrity": "sha512-asabaBSkEKosYKMITunzX177CXxQ4Q8BSSzMTKD+FefUhipQC70gfW5SiUDhYQ3vk8G+81HqQk7Fv9OXwwn9KA==" "integrity": "sha512-U1riIR+lBSNi3IbxtaHOIKdH8sLFv3NYfNv8sg7ZsNhcfl4HF2++BfqqrNAxoCLQW1iiylOj76ecnaUxz+z9yw=="
}, },
"acorn-globals": { "acorn-globals": {
"version": "6.0.0", "version": "6.0.0",
@ -1766,9 +1790,9 @@
} }
}, },
"estraverse": { "estraverse": {
"version": "5.2.0", "version": "5.3.0",
"resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.2.0.tgz", "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
"integrity": "sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==" "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA=="
}, },
"form-data": { "form-data": {
"version": "3.0.1", "version": "3.0.1",
@ -1797,9 +1821,9 @@
} }
}, },
"jsdom": { "jsdom": {
"version": "16.6.0", "version": "16.7.0",
"resolved": "https://registry.npmjs.org/jsdom/-/jsdom-16.6.0.tgz", "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-16.7.0.tgz",
"integrity": "sha512-Ty1vmF4NHJkolaEmdjtxTfSfkdb8Ywarwf63f+F8/mDD1uLSSWDxDuMiZxiPhwunLrn9LOSVItWj4bLYsLN3Dg==", "integrity": "sha512-u9Smc2G1USStM+s/x1ru5Sxrl6mPYCbByG1U/hUmqaVsm4tbNyS7CicOSRyuGQYZhTu0h84qkZZQ/I+dzizSVw==",
"requires": { "requires": {
"abab": "^2.0.5", "abab": "^2.0.5",
"acorn": "^8.2.4", "acorn": "^8.2.4",
@ -1826,7 +1850,7 @@
"whatwg-encoding": "^1.0.5", "whatwg-encoding": "^1.0.5",
"whatwg-mimetype": "^2.3.0", "whatwg-mimetype": "^2.3.0",
"whatwg-url": "^8.5.0", "whatwg-url": "^8.5.0",
"ws": "^7.4.5", "ws": "^7.4.6",
"xml-name-validator": "^3.0.0" "xml-name-validator": "^3.0.0"
} }
}, },
@ -1869,9 +1893,9 @@
} }
}, },
"ws": { "ws": {
"version": "7.5.2", "version": "7.5.6",
"resolved": "https://registry.npmjs.org/ws/-/ws-7.5.2.tgz", "resolved": "https://registry.npmjs.org/ws/-/ws-7.5.6.tgz",
"integrity": "sha512-lkF7AWRicoB9mAgjeKbGqVUekLnSNO4VjKVnuPHpQeOxZOErX6BPXwJk70nFslRCEEA8EVW7ZjKwXaP9N+1sKQ==" "integrity": "sha512-6GLgCqo2cy2A2rjCNFlxQS6ZljG/coZfZXclldI8FB/1G3CCI36Zd8xy2HrFVACi8tfk5XrgLQEk+P0Tnz9UcA=="
} }
} }
}, },
@ -1882,9 +1906,9 @@
"optional": true "optional": true
}, },
"sshpk": { "sshpk": {
"version": "1.16.1", "version": "1.17.0",
"resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.16.1.tgz", "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.17.0.tgz",
"integrity": "sha512-HXXqVUq7+pcKeLqqZj6mHFUMvXtOJt1uoUx09pFW6011inTMxqI8BA8PM95myrIyyKwdnzjdFjLiE6KBPVtJIg==", "integrity": "sha512-/9HIEs1ZXGhSPE8X6Ccm7Nam1z8KcoCqPdI7ecm1N33EzAetWahvQWVqLZtaZQ+IDKX4IyA2o0gBzqIMkAagHQ==",
"requires": { "requires": {
"asn1": "~0.2.3", "asn1": "~0.2.3",
"assert-plus": "^1.0.0", "assert-plus": "^1.0.0",
@ -1916,13 +1940,13 @@
"integrity": "sha1-PYRT5ydKLkShQrPchEnftk2a3jo=" "integrity": "sha1-PYRT5ydKLkShQrPchEnftk2a3jo="
}, },
"string-width": { "string-width": {
"version": "4.2.2", "version": "4.2.3",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.2.tgz", "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
"integrity": "sha512-XBJbT3N4JhVumXE0eoLU9DCjcaF92KLNqTmFCnG1pf8duUxFGwtP6AD6nkjw9a3IdiRtL3E2w3JDiE/xi3vOeA==", "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
"requires": { "requires": {
"emoji-regex": "^8.0.0", "emoji-regex": "^8.0.0",
"is-fullwidth-code-point": "^3.0.0", "is-fullwidth-code-point": "^3.0.0",
"strip-ansi": "^6.0.0" "strip-ansi": "^6.0.1"
} }
}, },
"string_decoder": { "string_decoder": {
@ -1934,11 +1958,11 @@
} }
}, },
"strip-ansi": { "strip-ansi": {
"version": "6.0.0", "version": "6.0.1",
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.0.tgz", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
"integrity": "sha512-AuvKTrTfQNYNIctbR1K/YGTR1756GycPsg7b9bdV9Duqur4gv6aKqHXah67Z8ImS7WEz5QVcOtlfW2rZEugt6w==", "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
"requires": { "requires": {
"ansi-regex": "^5.0.0" "ansi-regex": "^5.0.1"
} }
}, },
"strong-data-uri": { "strong-data-uri": {
@ -2187,9 +2211,9 @@
} }
}, },
"wuzzy": { "wuzzy": {
"version": "0.1.6", "version": "0.1.8",
"resolved": "https://registry.npmjs.org/wuzzy/-/wuzzy-0.1.6.tgz", "resolved": "https://registry.npmjs.org/wuzzy/-/wuzzy-0.1.8.tgz",
"integrity": "sha512-x1lDcj0VvzJ1ygDpd9LWMnQVei6gEkUbCcZUG8TPnXhlPbaQWQa32ab/6xbm/samxJ2T3Y2+P3xHeeQIAcEvqQ==", "integrity": "sha512-FUzKQepFSTnANsDYwxpIzGJ/dIJaqxuMre6tzzbvWwFAiUHPsI1nVQVCLK4Xqr67KO7oYAK0kaCcI/+WYj/7JA==",
"requires": { "requires": {
"lodash": "^4.17.15" "lodash": "^4.17.15"
} }
@ -2231,9 +2255,9 @@
} }
}, },
"yargs-parser": { "yargs-parser": {
"version": "13.1.2", "version": "15.0.3",
"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-13.1.2.tgz", "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-15.0.3.tgz",
"integrity": "sha512-3lbsNRf/j+A4QuSZfDRA7HRSfWrzO0YjqTJd5kjAq37Zep1CEgaYmrH9Q3GwPiB9cHyd1Y1UwggGhJGoxipbzg==", "integrity": "sha512-/MVEVjTXy/cGAjdtQf8dW3V9b97bPN7rNn8ETj6BmAQL7ibC7O1Q9SPJbGjgh3SlwoBNXMzj/ZGIj8mBgl12YA==",
"requires": { "requires": {
"camelcase": "^5.0.0", "camelcase": "^5.0.0",
"decamelize": "^1.2.0" "decamelize": "^1.2.0"

View file

@ -6,7 +6,7 @@
"repository": "github:ArchiveBox/ArchiveBox", "repository": "github:ArchiveBox/ArchiveBox",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@postlight/mercury-parser": "^2.2.0", "@postlight/mercury-parser": "git+https://github.com/postlight/mercury-parser.git",
"readability-extractor": "git+https://github.com/ArchiveBox/readability-extractor.git", "readability-extractor": "git+https://github.com/ArchiveBox/readability-extractor.git",
"single-file": "git+https://github.com/gildas-lormeau/SingleFile.git" "single-file": "git+https://github.com/gildas-lormeau/SingleFile.git"
} }

View file

@ -42,6 +42,7 @@ INSTALL_REQUIRES = [
"django-extensions>=3.0.3", "django-extensions>=3.0.3",
"dateparser>=1.0.0", "dateparser>=1.0.0",
"youtube-dl>=2021.04.17", "youtube-dl>=2021.04.17",
"yt-dlp>=2021.4.11",
"python-crontab>=2.5.1", "python-crontab>=2.5.1",
"croniter>=0.3.34", "croniter>=0.3.34",
"w3lib>=1.22.0", "w3lib>=1.22.0",

View file

@ -5,7 +5,7 @@ Package3: archivebox
Suite: focal Suite: focal
Suite3: focal Suite3: focal
Build-Depends: debhelper, dh-python, python3-all, python3-pip, python3-setuptools, python3-wheel, python3-stdeb Build-Depends: debhelper, dh-python, python3-all, python3-pip, python3-setuptools, python3-wheel, python3-stdeb
Depends3: nodejs, wget, curl, git, ffmpeg, youtube-dl, python3-all, python3-pip, python3-setuptools, python3-croniter, python3-crontab, python3-dateparser, python3-django, python3-django-extensions, python3-django-jsonfield, python3-mypy-extensions, python3-requests, python3-w3lib, ripgrep Depends3: nodejs, wget, curl, git, ffmpeg, youtube-dl, yt-dlp, python3-all, python3-pip, python3-setuptools, python3-croniter, python3-crontab, python3-dateparser, python3-django, python3-django-extensions, python3-django-jsonfield, python3-mypy-extensions, python3-requests, python3-w3lib, ripgrep
X-Python3-Version: >= 3.7 X-Python3-Version: >= 3.7
XS-Python-Version: >= 3.7 XS-Python-Version: >= 3.7
Setup-Env-Vars: DEB_BUILD_OPTIONS=nocheck Setup-Env-Vars: DEB_BUILD_OPTIONS=nocheck