From a6ea05820b094f3ced65ce4aeef836c6db455725 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 4 Jan 2024 19:59:34 -0800 Subject: [PATCH 001/166] empty commit to bump version difference --- bin/build_brew.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/build_brew.sh b/bin/build_brew.sh index 170ba5a6..d73d92c4 100755 --- a/bin/build_brew.sh +++ b/bin/build_brew.sh @@ -10,7 +10,7 @@ set -o nounset set -o pipefail IFS=$'\n' -REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )" +REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" > /dev/null 2>&1 && cd .. && pwd )" CURRENT_PLAFORM="$(uname)" @@ -45,7 +45,7 @@ pip3 uninstall yt-dlp || true # brew untap archivebox/archivebox || true echo -echo "[+] Installing and building hombrew bottle from https://Github.com/ArchiveBox/homebrew-archivebox#main" +echo "[+] Installing and building hombrew bottle from https://github.com/ArchiveBox/homebrew-archivebox#main" brew tap archivebox/archivebox brew install --build-bottle archivebox brew bottle archivebox From bc2bfc1cc7e2086ac5f09d348bbebefc422d328f Mon Sep 17 00:00:00 2001 From: gnattu Date: Fri, 5 Jan 2024 12:57:01 +0800 Subject: [PATCH 002/166] fix: handle archive folder permission more graceful On mounted network shares, chmod might be hard or impossible to do without modify the server config. Ignore the archive folder because we already tested if that folder is writeable and printed warnings to the user. --- bin/docker_entrypoint.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bin/docker_entrypoint.sh b/bin/docker_entrypoint.sh index b288b817..db962b8b 100755 --- a/bin/docker_entrypoint.sh +++ b/bin/docker_entrypoint.sh @@ -64,9 +64,9 @@ if [[ -d "$DATA_DIR/archive" ]]; then # echo "[√] Permissions are correct" else # the only time this fails is if the host filesystem doesn't allow us to write as root (e.g. some NFS mapall/maproot problems, connection issues, drive dissapeared, etc.) - echo -e "\n[X] Error: archivebox user (PUID=$PUID) is not able to write to your ./data dir (currently owned by $(stat -c '%u' "$DATA_DIR"):$(stat -c '%g' "$DATA_DIR")." >&2 + echo -e "\n[X] Error: archivebox user (PUID=$PUID) is not able to write to your ./data/archive dir (currently owned by $(stat -c '%u' "$DATA_DIR/archive"):$(stat -c '%g' "$DATA_DIR/archive")." >&2 echo -e " Change ./data to be owned by PUID=$PUID PGID=$PGID on the host and retry:" > /dev/stderr - echo -e " \$ chown -R $PUID:$PGID ./data\n" > /dev/stderr + echo -e " \$ chown -R $PUID:$PGID ./data/archive\n" > /dev/stderr echo -e " Configure the PUID & PGID environment variables to change the desired owner:" > /dev/stderr echo -e " https://docs.linuxserver.io/general/understanding-puid-and-pgid\n" > /dev/stderr echo -e " Hint: some NFS/SMB/FUSE/etc. filesystems force-remap/ignore all permissions," > /dev/stderr @@ -82,7 +82,8 @@ fi # force set the ownership of the data dir contents to the archivebox user and group # this is needed because Docker Desktop often does not map user permissions from the host properly chown $PUID:$PGID "$DATA_DIR" -chown $PUID:$PGID "$DATA_DIR"/* +find "$DATA_DIR" -type d -not -path "$DATA_DIR/archive*" -exec chown $PUID:$PGID {} \; +find "$DATA_DIR" -type f -not -path "$DATA_DIR/archive/*" -exec chown $PUID:$PGID {} \; # also chown BROWSERS_DIR because otherwise 'archivebox setup' wont be able to install chrome at runtime export PLAYWRIGHT_BROWSERS_PATH="${PLAYWRIGHT_BROWSERS_PATH:-/browsers}" From e085b1d13be87d7dc979181c580afeba59110d73 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Fri, 5 Jan 2024 16:19:47 -0800 Subject: [PATCH 003/166] try to chown everything in ./data and only fallback to avoiding ./data/archive if it fails initially --- bin/docker_entrypoint.sh | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/bin/docker_entrypoint.sh b/bin/docker_entrypoint.sh index db962b8b..8c6e804d 100755 --- a/bin/docker_entrypoint.sh +++ b/bin/docker_entrypoint.sh @@ -35,7 +35,7 @@ export DEFAULT_PGID=911 if [[ "$PUID" == "0" ]]; then echo -e "\n[X] Error: Got PUID=$PUID and PGID=$PGID but ArchiveBox is not allowed to be run as root, please change or unset PUID & PGID and try again." > /dev/stderr echo -e " Hint: some NFS/SMB/FUSE/etc. filesystems force-remap/ignore all permissions," > /dev/stderr - echo -e " leave PUID/PGID unset, or use values the filesystem prefers (defaults to $DEFAULT_PUID:$DEFAULT_PGID)" > /dev/stderr + echo -e " leave PUID/PGID unset, disable root_squash, or use values the drive prefers (default is $DEFAULT_PUID:$DEFAULT_PGID)" > /dev/stderr echo -e " https://linux.die.net/man/8/mount.cifs#:~:text=does%20not%20provide%20unix%20ownership" > /dev/stderr exit 3 fi @@ -46,6 +46,7 @@ export DETECTED_PGID="$(stat -c '%g' "$DATA_DIR/logs/errors.log" 2>/dev/null || # If data directory exists but is owned by root, use defaults instead of root because root is not allowed [[ "$DETECTED_PUID" == "0" ]] && export DETECTED_PUID="$DEFAULT_PUID" +# (GUID / DETECTED_GUID is allowed to be 0 though) # Set archivebox user and group ids to desired PUID/PGID usermod -o -u "${PUID:-$DETECTED_PUID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1 @@ -64,26 +65,31 @@ if [[ -d "$DATA_DIR/archive" ]]; then # echo "[√] Permissions are correct" else # the only time this fails is if the host filesystem doesn't allow us to write as root (e.g. some NFS mapall/maproot problems, connection issues, drive dissapeared, etc.) - echo -e "\n[X] Error: archivebox user (PUID=$PUID) is not able to write to your ./data/archive dir (currently owned by $(stat -c '%u' "$DATA_DIR/archive"):$(stat -c '%g' "$DATA_DIR/archive")." >&2 + echo -e "\n[X] Error: archivebox user (PUID=$PUID) is not able to write to your ./data/archive dir (currently owned by $(stat -c '%u' "$DATA_DIR/archive"):$(stat -c '%g' "$DATA_DIR/archive")." > /dev/stderr echo -e " Change ./data to be owned by PUID=$PUID PGID=$PGID on the host and retry:" > /dev/stderr - echo -e " \$ chown -R $PUID:$PGID ./data/archive\n" > /dev/stderr + echo -e " \$ chown -R $PUID:$PGID ./data\n" > /dev/stderr echo -e " Configure the PUID & PGID environment variables to change the desired owner:" > /dev/stderr echo -e " https://docs.linuxserver.io/general/understanding-puid-and-pgid\n" > /dev/stderr echo -e " Hint: some NFS/SMB/FUSE/etc. filesystems force-remap/ignore all permissions," > /dev/stderr - echo -e " leave PUID/PGID unset, or use values the filesystem prefers (defaults to $DEFAULT_PUID:$DEFAULT_PGID)" > /dev/stderr + echo -e " leave PUID/PGID unset, disable root_squash, or use values the drive prefers (default is $DEFAULT_PUID:$DEFAULT_PGID)" > /dev/stderr echo -e " https://linux.die.net/man/8/mount.cifs#:~:text=does%20not%20provide%20unix%20ownership" > /dev/stderr exit 3 fi else - # create data directory + # create data directory (and logs, since its the first dir ArchiveBox needs to write to) mkdir -p "$DATA_DIR/logs" fi # force set the ownership of the data dir contents to the archivebox user and group # this is needed because Docker Desktop often does not map user permissions from the host properly chown $PUID:$PGID "$DATA_DIR" -find "$DATA_DIR" -type d -not -path "$DATA_DIR/archive*" -exec chown $PUID:$PGID {} \; -find "$DATA_DIR" -type f -not -path "$DATA_DIR/archive/*" -exec chown $PUID:$PGID {} \; +if ! chown $PUID:$PGID "$DATA_DIR"/* > /dev/null 2>&1; then + # users may store the ./data/archive folder on a network mount that prevents chmod/chown + # fallback to chowning everything else in ./data and leaving ./data/archive alone + find "$DATA_DIR" -type d -not -path "$DATA_DIR/archive*" -exec chown $PUID:$PGID {} \; > /dev/null 2>&1 + find "$DATA_DIR" -type f -not -path "$DATA_DIR/archive/*" -exec chown $PUID:$PGID {} \; > /dev/null 2>&1 +fi + # also chown BROWSERS_DIR because otherwise 'archivebox setup' wont be able to install chrome at runtime export PLAYWRIGHT_BROWSERS_PATH="${PLAYWRIGHT_BROWSERS_PATH:-/browsers}" From 84e026d86232134e2325441694ea028acc771018 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Fri, 5 Jan 2024 16:57:15 -0800 Subject: [PATCH 004/166] minor readme improvements --- README.md | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 91b56c22..fa47f9fd 100644 --- a/README.md +++ b/README.md @@ -531,7 +531,7 @@ It does everything out-of-the-box by default, but you can disable or tweak [indi -ArchiveBox can be configured via environment variables, by using the `archivebox config` CLI, or by editing the `ArchiveBox.conf` config file directly. +ArchiveBox can be configured via environment variables, by using the `archivebox config` CLI, or by editing `./ArchiveBox.conf` directly. ```bash archivebox config # view the entire config @@ -573,14 +573,15 @@ CURL_USER_AGENT="Mozilla/5.0 ..." ## Dependencies -To achieve high-fidelity archives in as many situations as possible, ArchiveBox depends on a variety of high-quality 3rd-party tools and libraries that specialize in extracting different types of content. +To achieve high-fidelity archives in as many situations as possible, ArchiveBox depends on a variety of 3rd-party tools that specialize in extracting different types of content.
Expand to learn more about ArchiveBox's dependencies...
-For better security, easier updating, and to avoid polluting your host system with extra dependencies, **it is strongly recommended to use the official [Docker image](https://github.com/ArchiveBox/ArchiveBox/wiki/Docker)** with everything pre-installed for the best experience. +> [!TIP] +> For better security, easier updating, and to avoid polluting your host system with extra dependencies, **it is strongly recommended to use the [⭐️ official Docker image](https://github.com/ArchiveBox/ArchiveBox/wiki/Docker)** with everything pre-installed for the best experience. These optional dependencies used for archiving sites include: @@ -591,8 +592,11 @@ These optional dependencies used for archiving sites include: - `node` & `npm` (for readability, mercury, and singlefile) - `wget` (for plain HTML, static files, and WARC saving) - `curl` (for fetching headers, favicon, and posting to Archive.org) -- `youtube-dl` or `yt-dlp` (for audio, video, and subtitles) +- `yt-dlp` or `youtube-dl` (for audio, video, and subtitles) - `git` (for cloning git repos) +- `singlefile` (for saving into a self-contained html file) +- `postlight/parser` (for discussion threads, forums, and articles) +- `readability` (for articles and long text content) - and more as we grow... You don't need to install every dependency to use ArchiveBox. ArchiveBox will automatically disable extractors that rely on dependencies that aren't installed, based on what is configured and available in your `$PATH`. @@ -681,7 +685,7 @@ You can export the main index to browse it statically as plain HTML files in a f Expand to learn how to export your ArchiveBox collection...
-> **Note** +> [!NOTE] > These exports are not paginated, exporting many URLs or the entire archive at once may be slow. Use the filtering CLI flags on the `archivebox list` command to export specific Snapshots or ranges. ```bash @@ -744,6 +748,10 @@ archivebox config --set SAVE_FAVICON=False # disable favicon fetching ( archivebox config --set CHROME_BINARY=chromium # ensure it's using Chromium instead of Chrome ``` +> [!CAUTION] +> Assume anyone *viewing* your archives will be able to see any cookies, session tokens, or private URLs passed to ArchiveBox during archiving. +> Make sure to secure your ArchiveBox data and don't share snapshots with others without stripping out sensitive headers and content first. + #### Learn More - https://github.com/ArchiveBox/ArchiveBox/wiki/Publishing-Your-Archive @@ -777,7 +785,9 @@ https://127.0.0.1:8000/archive/* The admin UI is also served from the same origin as replayed JS, so malicious pages could also potentially use your ArchiveBox login cookies to perform admin actions (e.g. adding/removing links, running extractors, etc.). We are planning to fix this security shortcoming in a future version by using separate ports/origins to serve the Admin UI and archived content (see [Issue #239](https://github.com/ArchiveBox/ArchiveBox/issues/239)). -*Note: Only the `wget` & `dom` extractor methods execute archived JS when viewing snapshots, all other archive methods produce static output that does not execute JS on viewing. If you are worried about these issues ^ you should disable these extractors using `archivebox config --set SAVE_WGET=False SAVE_DOM=False`.* +> [!NOTE] +> Only the `wget` & `dom` extractor methods execute archived JS when viewing snapshots, all other archive methods produce static output that does not execute JS on viewing. +> If you are worried about these issues ^ you should disable these extractors using `archivebox config --set SAVE_WGET=False SAVE_DOM=False`.* #### Learn More From 710167e967661e82645779447a9580d442a9bd4a Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Fri, 5 Jan 2024 16:58:14 -0800 Subject: [PATCH 005/166] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fa47f9fd..1051411c 100644 --- a/README.md +++ b/README.md @@ -577,8 +577,8 @@ To achieve high-fidelity archives in as many situations as possible, ArchiveBox
-Expand to learn more about ArchiveBox's dependencies... -
+Expand to learn more about ArchiveBox's dependencies...
+ > [!TIP] > For better security, easier updating, and to avoid polluting your host system with extra dependencies, **it is strongly recommended to use the [⭐️ official Docker image](https://github.com/ArchiveBox/ArchiveBox/wiki/Docker)** with everything pre-installed for the best experience. From a232b45b61f3127dd6390262d2a0073f88ff8b21 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Fri, 5 Jan 2024 17:03:46 -0800 Subject: [PATCH 006/166] Update README.md --- README.md | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 1051411c..92a2c19e 100644 --- a/README.md +++ b/README.md @@ -579,9 +579,8 @@ To achieve high-fidelity archives in as many situations as possible, ArchiveBox
Expand to learn more about ArchiveBox's dependencies...
- -> [!TIP] -> For better security, easier updating, and to avoid polluting your host system with extra dependencies, **it is strongly recommended to use the [⭐️ official Docker image](https://github.com/ArchiveBox/ArchiveBox/wiki/Docker)** with everything pre-installed for the best experience. +> *TIP: For better security, easier updating, and to avoid polluting your host system with extra dependencies,* +> ***it is strongly recommended to use the [⭐️ official Docker image](https://github.com/ArchiveBox/ArchiveBox/wiki/Docker)** with everything pre-installed for the best experience.* These optional dependencies used for archiving sites include: @@ -685,8 +684,8 @@ You can export the main index to browse it statically as plain HTML files in a f Expand to learn how to export your ArchiveBox collection...
-> [!NOTE] -> These exports are not paginated, exporting many URLs or the entire archive at once may be slow. Use the filtering CLI flags on the `archivebox list` command to export specific Snapshots or ranges. +> *NOTE: These exports are not paginated, exporting many URLs or the entire archive at once may be slow.* +> *Use the filtering CLI flags on the `archivebox list` command to export specific Snapshots or ranges.* ```bash # archivebox list --help @@ -748,9 +747,8 @@ archivebox config --set SAVE_FAVICON=False # disable favicon fetching ( archivebox config --set CHROME_BINARY=chromium # ensure it's using Chromium instead of Chrome ``` -> [!CAUTION] -> Assume anyone *viewing* your archives will be able to see any cookies, session tokens, or private URLs passed to ArchiveBox during archiving. -> Make sure to secure your ArchiveBox data and don't share snapshots with others without stripping out sensitive headers and content first. +> *CAUTION: Assume anyone *viewing* your archives will be able to see any cookies, session tokens, or private URLs passed to ArchiveBox during archiving.* +> *Make sure to secure your ArchiveBox data and don't share snapshots with others without stripping out sensitive headers and content first.* #### Learn More @@ -785,9 +783,8 @@ https://127.0.0.1:8000/archive/* The admin UI is also served from the same origin as replayed JS, so malicious pages could also potentially use your ArchiveBox login cookies to perform admin actions (e.g. adding/removing links, running extractors, etc.). We are planning to fix this security shortcoming in a future version by using separate ports/origins to serve the Admin UI and archived content (see [Issue #239](https://github.com/ArchiveBox/ArchiveBox/issues/239)). -> [!NOTE] -> Only the `wget` & `dom` extractor methods execute archived JS when viewing snapshots, all other archive methods produce static output that does not execute JS on viewing. -> If you are worried about these issues ^ you should disable these extractors using `archivebox config --set SAVE_WGET=False SAVE_DOM=False`.* +> *NOTE: Only the `wget` & `dom` extractor methods execute archived JS when viewing snapshots, all other archive methods produce static output that does not execute JS on viewing.* +> *If you are worried about these issues ^ you should disable these extractors using `archivebox config --set SAVE_WGET=False SAVE_DOM=False`.* #### Learn More @@ -1007,10 +1004,14 @@ ArchiveBox is neither the highest fidelity nor the simplest tool available for s ## Internet Archiving Ecosystem + + + Whether you want to learn which organizations are the big players in the web archiving space, want to find a specific open-source tool for your web archiving need, or just want to see where archivists hang out online, our Community Wiki page serves as an index of the broader web archiving community. Check it out to learn about some of the coolest web archiving projects and communities on the web! - - +
+Explore our index of web archiving software, blogs, and communities around the world... +
- [Community Wiki](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community) - [The Master Lists](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community#the-master-lists) @@ -1025,6 +1026,7 @@ Whether you want to learn which organizations are the big players in the web arc - Learn why archiving the internet is important by reading the "[On the Importance of Web Archiving](https://items.ssrc.org/parameters/on-the-importance-of-web-archiving/)" blog post. - Reach out to me for questions and comments via [@ArchiveBoxApp](https://twitter.com/ArchiveBoxApp) or [@theSquashSH](https://twitter.com/thesquashSH) on Twitter +

From e43babb7acf9d43472617522bd6eac6912b3ba2f Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Fri, 5 Jan 2024 17:20:09 -0800 Subject: [PATCH 007/166] Update README.md --- README.md | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 92a2c19e..efc5744a 100644 --- a/README.md +++ b/README.md @@ -620,9 +620,9 @@ Installing directly on **Windows without Docker or WSL/WSL2/Cygwin is not offici - https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#installing
-
+ ## Archive Layout All of ArchiveBox's state (including the SQLite DB, archived assets, config, logs, etc.) is stored in a single folder called the "ArchiveBox Data Folder". @@ -633,6 +633,7 @@ Data folders can be created anywhere (`~/archivebox` or `$PWD/data` as seen in o Expand to learn more about the layout of Archivebox's data on-disk...
+ All `archivebox` CLI commands are designed to be run from inside an ArchiveBox data folder, starting with `archivebox init` to initialize a new collection inside an empty directory. ```bash @@ -671,10 +672,11 @@ Each snapshot subfolder `./archive//` includes a static `index.json` - https://github.com/ArchiveBox/ArchiveBox/wiki/Publishing-Your-Archive - https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives -
+

+ ## Static Archive Exporting You can export the main index to browse it statically as plain HTML files in a folder (without needing to run a server). @@ -684,6 +686,7 @@ You can export the main index to browse it statically as plain HTML files in a f Expand to learn how to export your ArchiveBox collection...
+ > *NOTE: These exports are not paginated, exporting many URLs or the entire archive at once may be slow.* > *Use the filtering CLI flags on the `archivebox list` command to export specific Snapshots or ranges.* @@ -707,15 +710,16 @@ The paths in the static exports are relative, make sure to keep them next to you -
+ ---
security graphic
+ ## Caveats ### Archiving Private Content @@ -758,6 +762,7 @@ archivebox config --set CHROME_BINARY=chromium # ensure it's using Chromium - https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#chrome_user_data_dir - https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#cookies_file +
@@ -766,6 +771,7 @@ archivebox config --set CHROME_BINARY=chromium # ensure it's using Chromium Be aware that malicious archived JS can access the contents of other pages in your archive when viewed. Because the Web UI serves all viewed snapshots from a single domain, they share a request context and **typical CSRF/CORS/XSS/CSP protections do not work to prevent cross-site request attacks**. See the [Security Overview](https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#stealth-mode) page and [Issue #239](https://github.com/ArchiveBox/ArchiveBox/issues/239) for more details. +
Click to expand... @@ -797,6 +803,7 @@ The admin UI is also served from the same origin as replayed JS, so malicious pa

+ ### Working Around Sites that Block Archiving For various reasons, many large sites (Reddit, Twitter, Cloudflare, etc.) actively block archiving or bots in general. There are a number of approaches to work around this. @@ -806,6 +813,7 @@ For various reasons, many large sites (Reddit, Twitter, Cloudflare, etc.) active Click to expand...
+ - Set [`CHROME_USER_AGENT`, `WGET_USER_AGENT`, `CURL_USER_AGENT`](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#curl_user_agent) to impersonate a real browser (instead of an ArchiveBox bot) - Set up a logged-in browser session for archiving using [`CHROME_DATA_DIR` & `COOKIES_FILE`](https://github.com/ArchiveBox/ArchiveBox/wiki/Chromium-Install#setting-up-a-chromium-user-profile) - Rewrite your URLs before archiving to swap in an alternative frontend thats more bot-friendly e.g. @@ -822,11 +830,13 @@ In the future we plan on adding support for running JS scripts during archiving ArchiveBox appends a hash with the current date `https://example.com#2020-10-24` to differentiate when a single URL is archived multiple times. +
Click to expand...
+ Because ArchiveBox uniquely identifies snapshots by URL, it must use a workaround to take multiple snapshots of the same URL (otherwise they would show up as a single Snapshot entry). It makes the URLs of repeated snapshots unique by adding a hash with the archive date at the end: ```bash @@ -848,6 +858,7 @@ Improved support for saving multiple snapshots of a single URL without this hash

+ ### Storage Requirements Because ArchiveBox is designed to ingest a large volume of URLs with multiple copies of each URL stored by different 3rd-party tools, it can be quite disk-space intensive. @@ -858,6 +869,7 @@ There also also some special requirements when using filesystems like NFS/SMB/FU Click to expand...
+ **ArchiveBox can use anywhere from ~1gb per 1000 articles, to ~50gb per 1000 articles**, mostly dependent on whether you're saving audio & video using `SAVE_MEDIA=True` and whether you lower `MEDIA_MAX_SIZE=750mb`. Disk usage can be reduced by using a compressed/deduplicated filesystem like ZFS/BTRFS, or by turning off extractors methods you don't need. You can also deduplicate content with a tool like [fdupes](https://github.com/adrianlopezroche/fdupes) or [rdfind](https://github.com/pauldreik/rdfind). **Don't store large collections on older filesystems like EXT3/FAT** as they may not be able to handle more than 50k directory entries in the `archive/` folder. **Try to keep the `index.sqlite3` file on local drive (not a network mount)** or SSD for maximum performance, however the `archive/` folder can be on a network mount or slower HDD. @@ -878,10 +890,13 @@ If using Docker or NFS/SMB/FUSE for the `data/archive/` folder, you may need to
+ --- +
+ ## Screenshots
@@ -922,23 +937,27 @@ If using Docker or NFS/SMB/FUSE for the `data/archive/` folder, you may need to

+ --- -
+
paisley graphic
+ # Background & Motivation ArchiveBox aims to enable more of the internet to be saved from deterioration by empowering people to self-host their own archives. The intent is for all the web content you care about to be viewable with common software in 50 - 100 years without needing to run ArchiveBox or other specialized software to replay it. +
Click to read more...
+ Vast treasure troves of knowledge are lost every day on the internet to link rot. As a society, we have an imperative to preserve some important parts of that treasure, just like we preserve our books, paintings, and music in physical libraries long after the originals go out of print or fade into obscurity. Whether it's to resist censorship by saving articles before they get taken down or edited, or just to save a collection of early 2010's flash games you love to play, having the tools to archive internet content enables to you save the stuff you care most about before it disappears. @@ -948,14 +967,17 @@ Whether it's to resist censorship by saving articles before they get taken down Image from Perma.cc...
+ The balance between the permanence and ephemeral nature of content on the internet is part of what makes it beautiful. I don't think everything should be preserved in an automated fashion--making all content permanent and never removable, but I do think people should be able to decide for themselves and effectively archive specific content that they care about. Because modern websites are complicated and often rely on dynamic content, ArchiveBox archives the sites in **several different formats** beyond what public archiving services like Archive.org/Archive.is save. Using multiple methods and the market-dominant browser to execute JS ensures we can save even the most complex, finicky websites in at least a few high-quality, long-term data formats. +

+ ## Comparison to Other Projects comparison From 5de45dbf30c277fd39c2c4388cdcae1f159efb6b Mon Sep 17 00:00:00 2001 From: Ben Muthalaly Date: Mon, 8 Jan 2024 22:55:30 -0600 Subject: [PATCH 008/166] Show upgrade notification in admin snapshot view --- archivebox/core/admin.py | 14 +++++++++++++- archivebox/core/urls.py | 8 -------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py index c4974c3a..30aacc90 100644 --- a/archivebox/core/admin.py +++ b/archivebox/core/admin.py @@ -23,8 +23,16 @@ from core.mixins import SearchResultsAdminMixin from index.html import snapshot_icons from logging_util import printable_filesize from main import add, remove -from config import OUTPUT_DIR, SNAPSHOTS_PER_PAGE from extractors import archive_links +from config import ( + OUTPUT_DIR, + SNAPSHOTS_PER_PAGE, + VERSION, + VERSIONS_AVAILABLE, + CAN_UPGRADE +) + +GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': VERSIONS_AVAILABLE, 'CAN_UPGRADE': CAN_UPGRADE} # Admin URLs # /admin/ @@ -96,6 +104,10 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin): action_form = SnapshotActionForm + def changelist_view(self, request, extra_context=None): + extra_context = extra_context or {} + return super().changelist_view(request, extra_context | GLOBAL_CONTEXT) + def get_urls(self): urls = super().get_urls() custom_urls = [ diff --git a/archivebox/core/urls.py b/archivebox/core/urls.py index f89273ff..1111ead4 100644 --- a/archivebox/core/urls.py +++ b/archivebox/core/urls.py @@ -8,11 +8,6 @@ from django.views.generic.base import RedirectView from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView -# GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306 -# from config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE -# GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': VERSIONS_AVAILABLE, 'CAN_UPGRADE': CAN_UPGRADE} - - # print('DEBUG', settings.DEBUG) urlpatterns = [ @@ -36,9 +31,6 @@ urlpatterns = [ path('accounts/', include('django.contrib.auth.urls')), path('admin/', admin.site.urls), - # do not add extra_context like this as not all admin views (e.g. ModelAdmin.autocomplete_view accept extra kwargs) - # path('admin/', admin.site.urls, {'extra_context': GLOBAL_CONTEXT}), - path('health/', HealthCheckView.as_view(), name='healthcheck'), path('error/', lambda _: 1/0), From 3d2c4c70d267e5bfb09f6ffb333e83a70a62587b Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 9 Jan 2024 20:38:38 -0800 Subject: [PATCH 009/166] Update README.md --- README.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index efc5744a..e78c8598 100644 --- a/README.md +++ b/README.md @@ -630,8 +630,7 @@ Data folders can be created anywhere (`~/archivebox` or `$PWD/data` as seen in o
-Expand to learn more about the layout of Archivebox's data on-disk... -
+Expand to learn more about the layout of Archivebox's data on-disk...
All `archivebox` CLI commands are designed to be run from inside an ArchiveBox data folder, starting with `archivebox init` to initialize a new collection inside an empty directory. @@ -664,7 +663,7 @@ The on-disk layout is optimized to be easy to browse by hand and durable long-te Each snapshot subfolder `./archive//` includes a static `index.json` and `index.html` describing its contents, and the snapshot extractor outputs are plain files within the folder. -#### Learn More +

Learn More

- https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Disk-Layout - https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#large-archives @@ -683,8 +682,7 @@ You can export the main index to browse it statically as plain HTML files in a f
-Expand to learn how to export your ArchiveBox collection... -
+Expand to learn how to export your ArchiveBox collection...
> *NOTE: These exports are not paginated, exporting many URLs or the entire archive at once may be slow.* From 23a9c538c2d4317996e4efe72196d7c2bb2fde82 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 9 Jan 2024 20:46:22 -0800 Subject: [PATCH 010/166] Update README.md --- README.md | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index e78c8598..0ad793e9 100644 --- a/README.md +++ b/README.md @@ -633,20 +633,17 @@ Data folders can be created anywhere (`~/archivebox` or `$PWD/data` as seen in o Expand to learn more about the layout of Archivebox's data on-disk...
-All `archivebox` CLI commands are designed to be run from inside an ArchiveBox data folder, starting with `archivebox init` to initialize a new collection inside an empty directory. +All archivebox CLI commands are designed to be run from inside an ArchiveBox data folder, starting with archivebox init to initialize a new collection inside an empty directory. -```bash -mkdir ~/archivebox && cd ~/archivebox # just an example, can be anywhere -archivebox init -``` +
mkdir ~/archivebox && cd ~/archivebox   # just an example, can be anywhere
+archivebox init
-The on-disk layout is optimized to be easy to browse by hand and durable long-term. The main index is a standard `index.sqlite3` database in the root of the data folder (it can also be [exported as static JSON/HTML](https://github.com/ArchiveBox/ArchiveBox/wiki/Publishing-Your-Archive#2-export-and-host-it-as-static-html)), and the archive snapshots are organized by date-added timestamp in the `./archive/` subfolder. +The on-disk layout is optimized to be easy to browse by hand and durable long-term. The main index is a standard index.sqlite3 database in the root of the data folder (it can also be exported as static JSON/HTML), and the archive snapshots are organized by date-added timestamp in the ./archive/ subfolder. -```bash -/data/ +
/data/
     index.sqlite3
     ArchiveBox.conf
     archive/
@@ -659,18 +656,18 @@ The on-disk layout is optimized to be easy to browse by hand and durable long-te
             warc/1617687755.warc.gz
             git/somerepo.git
             ...
-```
+
-Each snapshot subfolder `./archive//` includes a static `index.json` and `index.html` describing its contents, and the snapshot extractor outputs are plain files within the folder. +Each snapshot subfolder ./archive// includes a static index.json and index.html describing its contents, and the snapshot extractor outputs are plain files within the folder.

Learn More

- -- https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Disk-Layout -- https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#large-archives -- https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#output-folder -- https://github.com/ArchiveBox/ArchiveBox/wiki/Publishing-Your-Archive -- https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives - +
    +
  • https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Disk-Layout
  • +
  • https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#large-archives
  • +
  • https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#output-folder
  • +
  • https://github.com/ArchiveBox/ArchiveBox/wiki/Publishing-Your-Archive
  • +
  • https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives
  • +

From 4adb214812113665c2b7d96c4b43b289e35da8d5 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 9 Jan 2024 21:12:17 -0800 Subject: [PATCH 011/166] Update README.md --- README.md | 97 ++++++++++++++++++++++++++----------------------------- 1 file changed, 45 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 0ad793e9..1a401375 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,16 @@ -
+

ArchiveBox
Open-source self-hosted web archiving.


-▶️ Quickstart | -Demo | -GitHub | -Documentation | -Info & Motivation | -Community +▶️ Quickstart | Demo | GitHub | Documentation | Info & Motivation | Community
- - -   - - - +   -   - +   -     -       -         -     +     -     +     -     +     -     +     ## Internet Archiving Ecosystem @@ -1476,16 +1473,10 @@ Extractors take the URL of a page to archive, write their output to the filesyst -- Home: [ArchiveBox.io](https://archivebox.io) -- Demo: [Demo.ArchiveBox.io](https://demo.archivebox.io) -- Docs: [Docs.ArchiveBox.io](https://docs.archivebox.io) -- Releases: [Github.com/ArchiveBox/ArchiveBox/releases](https://github.com/ArchiveBox/ArchiveBox/releases) -- Wiki: [Github.com/ArchiveBox/ArchiveBox/wiki](https://github.com/ArchiveBox/ArchiveBox/wiki) -- Issues: [Github.com/ArchiveBox/ArchiveBox/issues](https://github.com/ArchiveBox/ArchiveBox/issues) -- Discussions: [Github.com/ArchiveBox/ArchiveBox/discussions](https://github.com/ArchiveBox/ArchiveBox/discussions) -- Community Chat: [Zulip Chat (preferred)](https://zulip.archivebox.io) or [Matrix Chat (old)](https://app.element.io/#/room/#archivebox:matrix.org) +- [ArchiveBox.io Homepage](https://archivebox.io) / [Source Code (Github)](https://github.com/ArchiveBox/ArchiveBox) / [Demo Server](https://demo.archivebox.io) +- [Documentation Wiki](https://github.com/ArchiveBox/ArchiveBox/wiki) / [API Reference Docs](https://docs.archivebox.io) / [Changelog](https://github.com/ArchiveBox/ArchiveBox/releases) +- [Bug Tracker](https://github.com/ArchiveBox/ArchiveBox/issues) / [Discussions](https://github.com/ArchiveBox/ArchiveBox/discussions) / [Community Chat Forum (Zulip)](https://zulip.archivebox.io) - Social Media: [Twitter](https://twitter.com/ArchiveBoxApp), [LinkedIn](https://www.linkedin.com/company/archivebox/), [YouTube](https://www.youtube.com/@ArchiveBoxApp), [Alternative.to](https://alternativeto.net/software/archivebox/about/), [Reddit](https://www.reddit.com/r/ArchiveBox/) -- Donations: [Github.com/ArchiveBox/ArchiveBox/wiki/Donations](https://github.com/ArchiveBox/ArchiveBox/wiki/Donations) --- @@ -1496,6 +1487,8 @@ Extractors take the URL of a page to archive, write their output to the filesyst     + +
ArchiveBox operates as a US 501(c)(3) nonprofit (sponsored by HCB), donations are tax-deductible.

From 2d26728c2ab074e66466b73a0b219d8f1de89ee4 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Sun, 28 Jan 2024 04:27:40 -0800 Subject: [PATCH 120/166] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 60144ed3..86ec1d37 100644 --- a/README.md +++ b/README.md @@ -1164,7 +1164,7 @@ ArchiveBox is neither the highest fidelity nor the simplest tool available for s
-Our Community Wikia tries to be a comprehensive index of the broader web archiving community... +Our Community Wiki strives to be a comprehensive index of the broader web archiving community...
- [Community Wiki](https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community) From a4be98dd2bfba6b22724a59d9851df1f6c4be877 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Sun, 28 Jan 2024 04:30:35 -0800 Subject: [PATCH 121/166] Update README.md --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 86ec1d37..5ded344a 100644 --- a/README.md +++ b/README.md @@ -1485,10 +1485,10 @@ Extractors take the URL of a page to archive, write their output to the filesyst 🏛️ Contact us for professional support 💬


  -   - - - +   +   +   +
ArchiveBox operates as a US 501(c)(3) nonprofit (sponsored by HCB), donations are tax-deductible.

From a4bd4410775d46863a4c1a16e48e5acf5722e8a5 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Wed, 31 Jan 2024 01:59:43 -0800 Subject: [PATCH 122/166] Update config.py fix trim-filenames --- archivebox/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/archivebox/config.py b/archivebox/config.py index 0dfc41dd..3186a6b0 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -154,7 +154,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = { 'CHROME_SANDBOX': {'type': bool, 'default': lambda c: not c['IN_DOCKER']}, 'YOUTUBEDL_ARGS': {'type': list, 'default': lambda c: [ '--restrict-filenames', - '--trim-filenames', + '--trim-filenames', '128', '--write-description', '--write-info-json', '--write-annotations', From eb62b4403619d89e352cb497521641c70286e1f4 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 8 Feb 2024 00:11:27 -0800 Subject: [PATCH 123/166] Update README on Docker Hub when docker is built --- .github/workflows/docker.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 75c7658c..5102aecb 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -81,6 +81,13 @@ jobs: - name: Image digest run: echo ${{ steps.docker_build.outputs.digest }} + + - name: Update README + uses: peter-evans/dockerhub-description@v4 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + repository: archivebox/archivebox # This ugly bit is necessary if you don't want your cache to grow forever # until it hits GitHub's limit of 5GB. From f5aaeb6de7a780808db8d79c20af3b53857d6414 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 8 Feb 2024 00:47:48 -0800 Subject: [PATCH 124/166] Update docker.yml --- .github/workflows/docker.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 5102aecb..871f0260 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -85,8 +85,8 @@ jobs: - name: Update README uses: peter-evans/dockerhub-description@v4 with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_PASSWORD }} + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} repository: archivebox/archivebox # This ugly bit is necessary if you don't want your cache to grow forever From 19aefc85e6c3801ac6c77246c1534fc9758739df Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 8 Feb 2024 18:58:12 -0800 Subject: [PATCH 125/166] fix get_system_user failing on uid 999 in k3s --- archivebox/config.py | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/archivebox/config.py b/archivebox/config.py index 3186a6b0..1edd2eeb 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -366,24 +366,32 @@ ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE ############################## Version Config ################################## -def get_system_user(): - SYSTEM_USER = getpass.getuser() or os.getlogin() +def get_system_user() -> str: + # some host OS's are unable to provide a username (k3s, Windows), making this complicated + # uid 999 is especially problematic and breaks many attempts + SYSTEM_USER = None + FALLBACK_USER_PLACHOLDER = f'user_{os.getuid()}' + + # Option 1 try: import pwd - return pwd.getpwuid(os.geteuid()).pw_name or SYSTEM_USER - except KeyError: - # Process' UID might not map to a user in cases such as running the Docker image - # (where `archivebox` is 999) as a different UID. - pass - except ModuleNotFoundError: - # pwd doesn't exist on windows - pass - except Exception: - # this should never happen, uncomment to debug - # raise + SYSTEM_USER = SYSTEM_USER or pwd.getpwuid(os.geteuid()).pw_name + except (ModuleNotFoundError, Exception): pass - return SYSTEM_USER + # Option 2 + try: + SYSTEM_USER = SYSTEM_USER or getpass.getuser() + except Exception: + pass + + # Option 3 + try: + SYSTEM_USER = SYSTEM_USER or os.getlogin() + except Exception: + pass + + return SYSTEM_USER or FALLBACK_USER_PLACHOLDER def get_version(config): try: From 91c46411990147fa9db4a0b35a3a195bad78673f Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 12 Feb 2024 21:26:34 -0800 Subject: [PATCH 126/166] skip dir size calculation when path is too long --- archivebox/system.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/archivebox/system.py b/archivebox/system.py index d80a2cb5..6e03846f 100644 --- a/archivebox/system.py +++ b/archivebox/system.py @@ -146,20 +146,24 @@ def get_dir_size(path: Union[str, Path], recursive: bool=True, pattern: Optional recursively and limiting to a given filter list """ num_bytes, num_dirs, num_files = 0, 0, 0 - for entry in os.scandir(path): - if (pattern is not None) and (pattern not in entry.path): - continue - if entry.is_dir(follow_symlinks=False): - if not recursive: + try: + for entry in os.scandir(path): + if (pattern is not None) and (pattern not in entry.path): continue - num_dirs += 1 - bytes_inside, dirs_inside, files_inside = get_dir_size(entry.path) - num_bytes += bytes_inside - num_dirs += dirs_inside - num_files += files_inside - else: - num_bytes += entry.stat(follow_symlinks=False).st_size - num_files += 1 + if entry.is_dir(follow_symlinks=False): + if not recursive: + continue + num_dirs += 1 + bytes_inside, dirs_inside, files_inside = get_dir_size(entry.path) + num_bytes += bytes_inside + num_dirs += dirs_inside + num_files += files_inside + else: + num_bytes += entry.stat(follow_symlinks=False).st_size + num_files += 1 + except OSError: + # e.g. FileNameTooLong or other error while trying to read dir + pass return num_bytes, num_dirs, num_files From 903c72fa8871f4bf17afa032064853edcdb30120 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 12 Feb 2024 21:28:52 -0800 Subject: [PATCH 127/166] fix typing errors --- archivebox/system.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/archivebox/system.py b/archivebox/system.py index 6e03846f..bced0bac 100644 --- a/archivebox/system.py +++ b/archivebox/system.py @@ -30,8 +30,7 @@ def run(cmd, *args, input=None, capture_output=True, timeout=None, check=False, if capture_output: if ('stdout' in kwargs) or ('stderr' in kwargs): - raise ValueError('stdout and stderr arguments may not be used ' - 'with capture_output.') + raise ValueError('stdout and stderr arguments may not be used with capture_output.') kwargs['stdout'] = PIPE kwargs['stderr'] = PIPE @@ -175,7 +174,7 @@ def dedupe_cron_jobs(cron: CronTab) -> CronTab: deduped: Set[Tuple[str, str]] = set() for job in list(cron): - unique_tuple = (str(job.slices), job.command) + unique_tuple = (str(job.slices), str(job.command)) if unique_tuple not in deduped: deduped.add(unique_tuple) cron.remove(job) From fa24136ff7049b4c5b924537f0b646d0bfc5518f Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Sun, 18 Feb 2024 05:46:53 -0800 Subject: [PATCH 128/166] Update setup.sh --- bin/setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/setup.sh b/bin/setup.sh index 1b482c9e..3ca41a29 100755 --- a/bin/setup.sh +++ b/bin/setup.sh @@ -165,7 +165,7 @@ if ! (python3 --version && python3 -m pip --version && python3 -m django --versi exit 1 fi -if ! (python3 -m django --version && which -a archivebox); then +if ! (python3 -m django --version && python3 -m pip show archivebox && which -a archivebox); then echo "[X] Django and ArchiveBox were not found after installing!" echo " Check to see if a previous step failed." echo From 0899c298c0392dcfcece8a4876b3d25a186e4d61 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 22 Feb 2024 04:46:44 -0800 Subject: [PATCH 129/166] ignore data dirs --- .dockerignore | 1 + .gitignore | 1 + 2 files changed, 2 insertions(+) diff --git a/.dockerignore b/.dockerignore index 08408d22..27ad7a81 100644 --- a/.dockerignore +++ b/.dockerignore @@ -28,4 +28,5 @@ assets/ docker/ data/ +data*/ output/ diff --git a/.gitignore b/.gitignore index 22cad1c0..e789041c 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ data/ data1/ data2/ data3/ +data*/ output/ # vim From 3afd7b0cf0f9f1ac6bb2e41a3325c9338ee171c5 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 22 Feb 2024 04:47:01 -0800 Subject: [PATCH 130/166] add healthcheck to Dockerfile --- Dockerfile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 454effe8..541c338a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -293,9 +293,8 @@ WORKDIR "$DATA_DIR" VOLUME "$DATA_DIR" EXPOSE 8000 -# Optional: -# HEALTHCHECK --interval=30s --timeout=20s --retries=15 \ -# CMD curl --silent 'http://localhost:8000/admin/login/' || exit 1 +HEALTHCHECK --interval=30s --timeout=20s --retries=15 \ + CMD curl --silent 'http://localhost:8000/health/' | grep -q 'OK' ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"] CMD ["archivebox", "server", "--quick-init", "0.0.0.0:8000"] From 2d32f05a628e423c2b5ced6b67f07e2f1d536e3d Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 22 Feb 2024 04:47:51 -0800 Subject: [PATCH 131/166] lower default sonic log level --- etc/sonic.cfg | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/etc/sonic.cfg b/etc/sonic.cfg index 10fbda53..0018c87c 100644 --- a/etc/sonic.cfg +++ b/etc/sonic.cfg @@ -6,7 +6,8 @@ [server] -log_level = "debug" +# log_level = "debug" +log_level = "warn" [channel] From 6a4e568d1b9e18967278970039ae507144abdb54 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 22 Feb 2024 04:49:09 -0800 Subject: [PATCH 132/166] new archivebox update speed improvements --- archivebox/extractors/__init__.py | 9 ++++++++- archivebox/extractors/htmltotext.py | 5 +++-- archivebox/extractors/singlefile.py | 6 ++++-- archivebox/extractors/title.py | 2 +- archivebox/index/__init__.py | 6 +++--- archivebox/logging_util.py | 4 +++- archivebox/main.py | 11 +++++++++-- archivebox/util.py | 6 +++++- 8 files changed, 36 insertions(+), 13 deletions(-) diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py index 2515b8fd..0249897b 100644 --- a/archivebox/extractors/__init__.py +++ b/archivebox/extractors/__init__.py @@ -186,6 +186,13 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s ts ) + "\n" + str(e) + "\n")) #f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n") + + # print(f' ERROR: {method_name} {e.__class__.__name__}: {e} {getattr(e, "hints", "")}', ts, link.url, command) + raise Exception('Exception in archive_methods.save_{}(Link(url={}))'.format( + method_name, + link.url, + )) from e + # print(' ', stats) @@ -218,7 +225,7 @@ def archive_links(all_links: Union[Iterable[Link], QuerySet], overwrite: bool=Fa if type(all_links) is QuerySet: num_links: int = all_links.count() - get_link = lambda x: x.as_link() + get_link = lambda x: x.as_link_with_details() all_links = all_links.iterator() else: num_links: int = len(all_links) diff --git a/archivebox/extractors/htmltotext.py b/archivebox/extractors/htmltotext.py index 18722f13..0686f76e 100644 --- a/archivebox/extractors/htmltotext.py +++ b/archivebox/extractors/htmltotext.py @@ -121,9 +121,11 @@ def save_htmltotext(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO out_dir = Path(out_dir or link.link_dir) output = "htmltotext.txt" + cmd = ['(internal) archivebox.extractors.htmltotext', './{singlefile,dom}.html'] timer = TimedProgress(timeout, prefix=' ') extracted_text = None + status = 'failed' try: extractor = HTMLTextExtractor() document = get_html(link, out_dir) @@ -136,10 +138,9 @@ def save_htmltotext(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO extracted_text = str(extractor) atomic_write(str(out_dir / output), extracted_text) + status = 'succeeded' except (Exception, OSError) as err: - status = 'failed' output = err - cmd = ['(internal) archivebox.extractors.htmltotext', './{singlefile,dom}.html'] finally: timer.end() diff --git a/archivebox/extractors/singlefile.py b/archivebox/extractors/singlefile.py index e3860527..e50b3932 100644 --- a/archivebox/extractors/singlefile.py +++ b/archivebox/extractors/singlefile.py @@ -77,6 +77,7 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO status = 'succeeded' timer = TimedProgress(timeout, prefix=' ') + result = None try: result = run(cmd, cwd=str(out_dir), timeout=timeout) @@ -84,7 +85,7 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO # "Downloaded: 76 files, 4.0M in 1.6s (2.52 MB/s)" output_tail = [ line.strip() - for line in (result.stdout + result.stderr).decode().rsplit('\n', 3)[-3:] + for line in (result.stdout + result.stderr).decode().rsplit('\n', 5)[-5:] if line.strip() ] hints = ( @@ -94,12 +95,13 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO # Check for common failure cases if (result.returncode > 0) or not (out_dir / output).is_file(): - raise ArchiveError('SingleFile was not able to archive the page', hints) + raise ArchiveError(f'SingleFile was not able to archive the page (status={result.returncode})', hints) chmod_file(output, cwd=str(out_dir)) except (Exception, OSError) as err: status = 'failed' # TODO: Make this prettier. This is necessary to run the command (escape JSON internal quotes). cmd[2] = browser_args.replace('"', "\\\"") + err.hints = (result.stdout + result.stderr).decode().split('\n') output = err finally: timer.end() diff --git a/archivebox/extractors/title.py b/archivebox/extractors/title.py index 3505e03f..6b0e37f6 100644 --- a/archivebox/extractors/title.py +++ b/archivebox/extractors/title.py @@ -75,7 +75,7 @@ def get_html(link: Link, path: Path, timeout: int=TIMEOUT) -> str: with open(abs_path / source, "r", encoding="utf-8") as f: document = f.read() break - except (FileNotFoundError, TypeError): + except (FileNotFoundError, TypeError, UnicodeDecodeError): continue if document is None: return download_url(link.url, timeout=timeout) diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py index b9d57aeb..9912b4c7 100644 --- a/archivebox/index/__init__.py +++ b/archivebox/index/__init__.py @@ -407,7 +407,7 @@ def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]: """indexed links without checking archive status or data directory validity""" - links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()] + links = (snapshot.as_link() for snapshot in snapshots.iterator()) return { link.link_dir: link for link in links @@ -415,7 +415,7 @@ def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Option def get_archived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]: """indexed links that are archived with a valid data directory""" - links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()] + links = (snapshot.as_link() for snapshot in snapshots.iterator()) return { link.link_dir: link for link in filter(is_archived, links) @@ -423,7 +423,7 @@ def get_archived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optio def get_unarchived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]: """indexed links that are unarchived with no data directory or an empty data directory""" - links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()] + links = (snapshot.as_link() for snapshot in snapshots.iterator()) return { link.link_dir: link for link in filter(is_unarchived, links) diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py index 3c688a3c..933214b9 100644 --- a/archivebox/logging_util.py +++ b/archivebox/logging_util.py @@ -432,12 +432,14 @@ def log_archive_method_finished(result: "ArchiveResult"): **ANSI, ), ] + + # import pudb; pudb.set_trace() # Prettify error output hints string and limit to five lines hints = getattr(result.output, 'hints', None) or () if hints: if isinstance(hints, (list, tuple, type(_ for _ in ()))): - hints = [hint.decode() for hint in hints if isinstance(hint, bytes)] + hints = [hint.decode() if isinstance(hint, bytes) else str(hint) for hint in hints] else: if isinstance(hints, bytes): hints = hints.decode() diff --git a/archivebox/main.py b/archivebox/main.py index 76b204b8..7389c032 100755 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -791,6 +791,8 @@ def update(resume: Optional[float]=None, out_dir: Path=OUTPUT_DIR) -> List[Link]: """Import any new links from subscriptions and retry any previously failed/skipped links""" + from core.models import ArchiveResult + check_data_folder(out_dir=out_dir) check_dependencies() new_links: List[Link] = [] # TODO: Remove input argument: only_new @@ -798,19 +800,23 @@ def update(resume: Optional[float]=None, extractors = extractors.split(",") if extractors else [] # Step 1: Filter for selected_links + print('[*] Finding matching Snapshots to update...') + print(f' - Filtering by {" ".join(filter_patterns)} ({filter_type}) {before=} {after=} {status=}...') matching_snapshots = list_links( filter_patterns=filter_patterns, filter_type=filter_type, before=before, after=after, ) - + print(f' - Checking {matching_snapshots.count()} snapshot folders for existing data with {status=}...') matching_folders = list_folders( links=matching_snapshots, status=status, out_dir=out_dir, ) - all_links = [link for link in matching_folders.values() if link] + all_links = (link for link in matching_folders.values() if link) + print(' - Sorting by most unfinished -> least unfinished + date archived...') + all_links = sorted(all_links, key=lambda link: (ArchiveResult.objects.filter(snapshot__url=link.url).count(), link.timestamp)) if index_only: for link in all_links: @@ -836,6 +842,7 @@ def update(resume: Optional[float]=None, if extractors: archive_kwargs["methods"] = extractors + archive_links(to_archive, overwrite=overwrite, **archive_kwargs) # Step 4: Re-write links index with updated titles, icons, and resources diff --git a/archivebox/util.py b/archivebox/util.py index d7df7f3c..5321081c 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -179,7 +179,11 @@ def download_url(url: str, timeout: int=None) -> str: if encoding is not None: response.encoding = encoding - return response.text + try: + return response.text + except UnicodeDecodeError: + # if response is non-test (e.g. image or other binary files), just return the filename instead + return url.rsplit('/', 1)[-1] @enforce_types def get_headers(url: str, timeout: int=None) -> str: From 8b9bc3dec8281026179c930684e4f939e5e3a210 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 22 Feb 2024 04:49:50 -0800 Subject: [PATCH 133/166] minor fixes --- archivebox/extractors/__init__.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py index 0249897b..cb1c6841 100644 --- a/archivebox/extractors/__init__.py +++ b/archivebox/extractors/__init__.py @@ -131,7 +131,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s link = load_link_details(link, out_dir=out_dir) write_link_details(link, out_dir=out_dir, skip_sql_index=False) - log_link_archiving_started(link, out_dir, is_new) + log_link_archiving_started(link, str(out_dir), is_new) link = link.overwrite(updated=datetime.now(timezone.utc)) stats = {'skipped': 0, 'succeeded': 0, 'failed': 0} start_ts = datetime.now(timezone.utc) @@ -165,16 +165,6 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s # print('{black} X {}{reset}'.format(method_name, **ANSI)) stats['skipped'] += 1 except Exception as e: - # Disabled until https://github.com/ArchiveBox/ArchiveBox/issues/984 - # and https://github.com/ArchiveBox/ArchiveBox/issues/1014 - # are fixed. - """ - raise Exception('Exception in archive_methods.save_{}(Link(url={}))'.format( - method_name, - link.url, - )) from e - """ - # Instead, use the kludgy workaround from # https://github.com/ArchiveBox/ArchiveBox/issues/984#issuecomment-1150541627 with open(ERROR_LOG, "a", encoding='utf-8') as f: command = ' '.join(sys.argv) From 4e69d2c9e14bbbc4597731fdc349f5461a726b54 Mon Sep 17 00:00:00 2001 From: Ben Muthalaly Date: Wed, 21 Feb 2024 15:13:06 -0600 Subject: [PATCH 134/166] Add `EXTRA_*_ARGS` for wget, curl, and singlefile --- archivebox/config.py | 8 +++++++- archivebox/extractors/archive_org.py | 13 ++++++++++--- archivebox/extractors/favicon.py | 18 ++++++++++++++---- archivebox/extractors/headers.py | 14 ++++++++++---- archivebox/extractors/singlefile.py | 25 +++++++++---------------- archivebox/extractors/title.py | 13 ++++++++++--- archivebox/extractors/wget.py | 15 +++++++++++---- archivebox/util.py | 17 +++++++++++++++++ 8 files changed, 88 insertions(+), 35 deletions(-) diff --git a/archivebox/config.py b/archivebox/config.py index 1edd2eeb..ebb939a4 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -187,12 +187,15 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = { '--no-parent', '-e', 'robots=off', ]}, + 'WGET_EXTRA_ARGS': {'type': list, 'default': None}, 'CURL_ARGS': {'type': list, 'default': ['--silent', '--location', '--compressed' ]}, + 'CURL_EXTRA_ARGS': {'type': list, 'default': None}, 'GIT_ARGS': {'type': list, 'default': ['--recursive']}, - 'SINGLEFILE_ARGS': {'type': list, 'default' : None}, + 'SINGLEFILE_ARGS': {'type': list, 'default': None}, + 'SINGLEFILE_EXTRA_ARGS': {'type': list, 'default': None}, 'FAVICON_PROVIDER': {'type': str, 'default': 'https://www.google.com/s2/favicons?domain={}'}, }, @@ -530,6 +533,7 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = { 'CURL_VERSION': {'default': lambda c: bin_version(c['CURL_BINARY']) if c['USE_CURL'] else None}, 'CURL_USER_AGENT': {'default': lambda c: c['CURL_USER_AGENT'].format(**c)}, 'CURL_ARGS': {'default': lambda c: c['CURL_ARGS'] or []}, + 'CURL_EXTRA_ARGS': {'default': lambda c: c['CURL_EXTRA_ARGS'] or []}, 'SAVE_FAVICON': {'default': lambda c: c['USE_CURL'] and c['SAVE_FAVICON']}, 'SAVE_ARCHIVE_DOT_ORG': {'default': lambda c: c['USE_CURL'] and c['SAVE_ARCHIVE_DOT_ORG']}, @@ -540,12 +544,14 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = { 'SAVE_WGET': {'default': lambda c: c['USE_WGET'] and c['SAVE_WGET']}, 'SAVE_WARC': {'default': lambda c: c['USE_WGET'] and c['SAVE_WARC']}, 'WGET_ARGS': {'default': lambda c: c['WGET_ARGS'] or []}, + 'WGET_EXTRA_ARGS': {'default': lambda c: c['WGET_EXTRA_ARGS'] or []}, 'RIPGREP_VERSION': {'default': lambda c: bin_version(c['RIPGREP_BINARY']) if c['USE_RIPGREP'] else None}, 'USE_SINGLEFILE': {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']}, 'SINGLEFILE_VERSION': {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None}, 'SINGLEFILE_ARGS': {'default': lambda c: c['SINGLEFILE_ARGS'] or []}, + 'SINGLEFILE_EXTRA_ARGS': {'default': lambda c: c['SINGLEFILE_EXTRA_ARGS'] or []}, 'USE_READABILITY': {'default': lambda c: c['USE_READABILITY'] and c['SAVE_READABILITY']}, 'READABILITY_VERSION': {'default': lambda c: bin_version(c['READABILITY_BINARY']) if c['USE_READABILITY'] else None}, diff --git a/archivebox/extractors/archive_org.py b/archivebox/extractors/archive_org.py index a0883113..93730f26 100644 --- a/archivebox/extractors/archive_org.py +++ b/archivebox/extractors/archive_org.py @@ -10,10 +10,12 @@ from ..system import run, chmod_file from ..util import ( enforce_types, is_static_file, + dedupe, ) from ..config import ( TIMEOUT, CURL_ARGS, + CURL_EXTRA_ARGS, CHECK_SSL_VALIDITY, SAVE_ARCHIVE_DOT_ORG, CURL_BINARY, @@ -44,13 +46,18 @@ def save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, timeout: int= output: ArchiveOutput = 'archive.org.txt' archive_org_url = None submit_url = 'https://web.archive.org/save/{}'.format(link.url) - cmd = [ - CURL_BINARY, - *CURL_ARGS, + # earlier options take precedence + options = [ '--head', '--max-time', str(timeout), *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []), *([] if CHECK_SSL_VALIDITY else ['--insecure']), + *CURL_EXTRA_ARGS, + *CURL_ARGS, + ] + cmd = [ + CURL_BINARY, + *dedupe(*options), submit_url, ] status = 'succeeded' diff --git a/archivebox/extractors/favicon.py b/archivebox/extractors/favicon.py index 5baafc17..3b41f349 100644 --- a/archivebox/extractors/favicon.py +++ b/archivebox/extractors/favicon.py @@ -6,13 +6,18 @@ from typing import Optional from ..index.schema import Link, ArchiveResult, ArchiveOutput from ..system import chmod_file, run -from ..util import enforce_types, domain +from ..util import ( + enforce_types, + domain, + dedupe, +) from ..config import ( TIMEOUT, SAVE_FAVICON, FAVICON_PROVIDER, CURL_BINARY, CURL_ARGS, + CURL_EXTRA_ARGS, CURL_VERSION, CHECK_SSL_VALIDITY, CURL_USER_AGENT, @@ -34,13 +39,18 @@ def save_favicon(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) out_dir = out_dir or link.link_dir output: ArchiveOutput = 'favicon.ico' - cmd = [ - CURL_BINARY, - *CURL_ARGS, + # earlier options take precedence + options = [ '--max-time', str(timeout), '--output', str(output), *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []), *([] if CHECK_SSL_VALIDITY else ['--insecure']), + *CURL_EXTRA_ARGS, + *CURL_ARGS, + ] + cmd = [ + CURL_BINARY, + *dedupe(*options), FAVICON_PROVIDER.format(domain(link.url)), ] status = 'failed' diff --git a/archivebox/extractors/headers.py b/archivebox/extractors/headers.py index 91dcb8e3..3828de93 100644 --- a/archivebox/extractors/headers.py +++ b/archivebox/extractors/headers.py @@ -9,11 +9,13 @@ from ..system import atomic_write from ..util import ( enforce_types, get_headers, + dedupe, ) from ..config import ( TIMEOUT, CURL_BINARY, CURL_ARGS, + CURL_EXTRA_ARGS, CURL_USER_AGENT, CURL_VERSION, CHECK_SSL_VALIDITY, @@ -40,14 +42,18 @@ def save_headers(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) status = 'succeeded' timer = TimedProgress(timeout, prefix=' ') - - cmd = [ - CURL_BINARY, - *CURL_ARGS, + # earlier options take precedence + options = [ '--head', '--max-time', str(timeout), *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []), *([] if CHECK_SSL_VALIDITY else ['--insecure']), + *CURL_EXTRA_ARGS, + *CURL_ARGS, + ] + cmd = [ + CURL_BINARY, + *dedupe(*options), link.url, ] try: diff --git a/archivebox/extractors/singlefile.py b/archivebox/extractors/singlefile.py index e50b3932..b2119119 100644 --- a/archivebox/extractors/singlefile.py +++ b/archivebox/extractors/singlefile.py @@ -11,6 +11,7 @@ from ..util import ( enforce_types, is_static_file, chrome_args, + dedupe, ) from ..config import ( TIMEOUT, @@ -18,6 +19,7 @@ from ..config import ( DEPENDENCIES, SINGLEFILE_VERSION, SINGLEFILE_ARGS, + SINGLEFILE_EXTRA_ARGS, CHROME_BINARY, ) from ..logging_util import TimedProgress @@ -46,11 +48,6 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO # SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli browser_args = '--browser-args={}'.format(json.dumps(browser_args[1:])) - options = [ - *SINGLEFILE_ARGS, - '--browser-executable-path={}'.format(CHROME_BINARY), - browser_args, - ] # Deduplicate options (single-file doesn't like when you use the same option two times) # @@ -58,19 +55,15 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO # My logic is SINGLEFILE_ARGS is the option that affects the singlefile command with most # specificity, therefore the user sets it with a lot intent, therefore it should take precedence # kind of like the ergonomic principle of lexical scope in programming languages. - seen_option_names = [] - def test_seen(argument): - option_name = argument.split("=")[0] - if option_name in seen_option_names: - return False - else: - seen_option_names.append(option_name) - return True - deduped_options = list(filter(test_seen, options)) - + options = [ + '--browser-executable-path={}'.format(CHROME_BINARY), + browser_args, + *SINGLEFILE_EXTRA_ARGS, + *SINGLEFILE_ARGS, + ] cmd = [ DEPENDENCIES['SINGLEFILE_BINARY']['path'], - *deduped_options, + *dedupe(*options), link.url, output, ] diff --git a/archivebox/extractors/title.py b/archivebox/extractors/title.py index 6b0e37f6..b2b65af2 100644 --- a/archivebox/extractors/title.py +++ b/archivebox/extractors/title.py @@ -10,6 +10,7 @@ from ..util import ( enforce_types, download_url, htmldecode, + dedupe, ) from ..config import ( TIMEOUT, @@ -17,6 +18,7 @@ from ..config import ( SAVE_TITLE, CURL_BINARY, CURL_ARGS, + CURL_EXTRA_ARGS, CURL_VERSION, CURL_USER_AGENT, ) @@ -102,12 +104,17 @@ def save_title(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) - from core.models import Snapshot output: ArchiveOutput = None - cmd = [ - CURL_BINARY, - *CURL_ARGS, + # earlier options take precedence + options = [ '--max-time', str(timeout), *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []), *([] if CHECK_SSL_VALIDITY else ['--insecure']), + *CURL_EXTRA_ARGS, + *CURL_ARGS, + ] + cmd = [ + CURL_BINARY, + *dedupe(*options), link.url, ] status = 'succeeded' diff --git a/archivebox/extractors/wget.py b/archivebox/extractors/wget.py index f3057271..d50409b6 100644 --- a/archivebox/extractors/wget.py +++ b/archivebox/extractors/wget.py @@ -15,9 +15,11 @@ from ..util import ( path, domain, urldecode, + dedupe, ) from ..config import ( WGET_ARGS, + WGET_EXTRA_ARGS, TIMEOUT, SAVE_WGET, SAVE_WARC, @@ -55,10 +57,8 @@ def save_wget(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> # WGET CLI Docs: https://www.gnu.org/software/wget/manual/wget.html output: ArchiveOutput = None - cmd = [ - WGET_BINARY, - # '--server-response', # print headers for better error parsing - *WGET_ARGS, + # earlier options take precedence + options = [ '--timeout={}'.format(timeout), *(['--restrict-file-names={}'.format(RESTRICT_FILE_NAMES)] if RESTRICT_FILE_NAMES else []), *(['--warc-file={}'.format(str(warc_path))] if SAVE_WARC else []), @@ -68,6 +68,13 @@ def save_wget(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> *(['--compression=auto'] if WGET_AUTO_COMPRESSION else []), *([] if SAVE_WARC else ['--timestamping']), *([] if CHECK_SSL_VALIDITY else ['--no-check-certificate', '--no-hsts']), + # '--server-response', # print headers for better error parsing + *WGET_EXTRA_ARGS, + *WGET_ARGS, + ] + cmd = [ + WGET_BINARY, + *dedupe(*options), link.url, ] diff --git a/archivebox/util.py b/archivebox/util.py index 5321081c..6b31c86e 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -317,6 +317,23 @@ def ansi_to_html(text): return COLOR_REGEX.sub(single_sub, text) +@enforce_types +def dedupe(*options: List[str]) -> List[str]: + """ + Deduplicates the given options. Options that come earlier in the list clobber + later conflicting options. + """ + seen_option_names = [] + def test_seen(argument): + option_name = argument.split("=")[0] + if option_name in seen_option_names: + return False + else: + seen_option_names.append(option_name) + return True + return list(filter(test_seen, options)) + + class AttributeDict(dict): """Helper to allow accessing dict values via Example.key or Example['key']""" From ab8f395e0a4104dd01385be3d8fcea082a6987ee Mon Sep 17 00:00:00 2001 From: Ben Muthalaly Date: Fri, 23 Feb 2024 15:40:31 -0600 Subject: [PATCH 135/166] Add `YOUTUBEDL_EXTRA_ARGS` --- archivebox/config.py | 1 + archivebox/extractors/media.py | 12 +++++++++--- archivebox/extractors/wget.py | 2 +- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/archivebox/config.py b/archivebox/config.py index ebb939a4..00e3b9f0 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -176,6 +176,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = { '--add-metadata', '--format=(bv*+ba/b)[filesize<={}][filesize_approx<=?{}]/(bv*+ba/b)'.format(c['MEDIA_MAX_SIZE'], c['MEDIA_MAX_SIZE']), ]}, + 'YOUTUBEDL_EXTRA_ARGS': {'type': list, 'default': None}, 'WGET_ARGS': {'type': list, 'default': ['--no-verbose', diff --git a/archivebox/extractors/media.py b/archivebox/extractors/media.py index 7d73024f..862bb758 100644 --- a/archivebox/extractors/media.py +++ b/archivebox/extractors/media.py @@ -8,11 +8,13 @@ from ..system import run, chmod_file from ..util import ( enforce_types, is_static_file, + dedupe, ) from ..config import ( MEDIA_TIMEOUT, SAVE_MEDIA, YOUTUBEDL_ARGS, + YOUTUBEDL_EXTRA_ARGS, YOUTUBEDL_BINARY, YOUTUBEDL_VERSION, CHECK_SSL_VALIDITY @@ -39,11 +41,15 @@ def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIME output: ArchiveOutput = 'media' output_path = out_dir / output output_path.mkdir(exist_ok=True) - cmd = [ - YOUTUBEDL_BINARY, - *YOUTUBEDL_ARGS, + options = [ *([] if CHECK_SSL_VALIDITY else ['--no-check-certificate']), # TODO: add --cookies-from-browser={CHROME_USER_DATA_DIR} + *YOUTUBEDL_EXTRA_ARGS, + *YOUTUBEDL_ARGS, + ] + cmd = [ + YOUTUBEDL_BINARY, + *dedupe(*options), link.url, ] status = 'succeeded' diff --git a/archivebox/extractors/wget.py b/archivebox/extractors/wget.py index d50409b6..5209cde9 100644 --- a/archivebox/extractors/wget.py +++ b/archivebox/extractors/wget.py @@ -69,7 +69,7 @@ def save_wget(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> *([] if SAVE_WARC else ['--timestamping']), *([] if CHECK_SSL_VALIDITY else ['--no-check-certificate', '--no-hsts']), # '--server-response', # print headers for better error parsing - *WGET_EXTRA_ARGS, + *WGET_EXTRA_ARGS, *WGET_ARGS, ] cmd = [ From 4d9c5a7b4b0bc0f490b6d8928878853fad363d16 Mon Sep 17 00:00:00 2001 From: Ben Muthalaly Date: Fri, 23 Feb 2024 18:40:03 -0600 Subject: [PATCH 136/166] Add `CHROME_EXTRA_ARGS` Also fix `YOUTUBEDL_EXTRA_ARGS`. --- archivebox/config.py | 4 ++++ archivebox/util.py | 12 +++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/archivebox/config.py b/archivebox/config.py index 00e3b9f0..f8e56036 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -152,6 +152,8 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = { 'CHROME_TIMEOUT': {'type': int, 'default': 0}, 'CHROME_HEADLESS': {'type': bool, 'default': True}, 'CHROME_SANDBOX': {'type': bool, 'default': lambda c: not c['IN_DOCKER']}, + 'CHROME_EXTRA_ARGS': {'type': list, 'default': None}, + 'YOUTUBEDL_ARGS': {'type': list, 'default': lambda c: [ '--restrict-filenames', '--trim-filenames', '128', @@ -568,6 +570,7 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = { 'YOUTUBEDL_VERSION': {'default': lambda c: bin_version(c['YOUTUBEDL_BINARY']) if c['USE_YOUTUBEDL'] else None}, 'SAVE_MEDIA': {'default': lambda c: c['USE_YOUTUBEDL'] and c['SAVE_MEDIA']}, 'YOUTUBEDL_ARGS': {'default': lambda c: c['YOUTUBEDL_ARGS'] or []}, + 'YOUTUBEDL_EXTRA_ARGS': {'default': lambda c: c['YOUTUBEDL_EXTRA_ARGS'] or []}, 'CHROME_BINARY': {'default': lambda c: c['CHROME_BINARY'] or find_chrome_binary()}, 'USE_CHROME': {'default': lambda c: c['USE_CHROME'] and c['CHROME_BINARY'] and (c['SAVE_PDF'] or c['SAVE_SCREENSHOT'] or c['SAVE_DOM'] or c['SAVE_SINGLEFILE'])}, @@ -589,6 +592,7 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = { 'EXTERNAL_LOCATIONS': {'default': lambda c: get_external_locations(c)}, 'DATA_LOCATIONS': {'default': lambda c: get_data_locations(c)}, 'CHROME_OPTIONS': {'default': lambda c: get_chrome_info(c)}, + 'CHROME_EXTRA_ARGS': {'default': lambda c: c['CHROME_EXTRA_ARGS'] or []}, 'SAVE_ALLOWLIST_PTN': {'default': lambda c: c['SAVE_ALLOWLIST'] and {re.compile(k, ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_ALLOWLIST'].items()}}, 'SAVE_DENYLIST_PTN': {'default': lambda c: c['SAVE_DENYLIST'] and {re.compile(k, ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_DENYLIST'].items()}}, } diff --git a/archivebox/util.py b/archivebox/util.py index 6b31c86e..18ca08aa 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -227,7 +227,11 @@ def chrome_args(**options) -> List[str]: # Chrome CLI flag documentation: https://peter.sh/experiments/chromium-command-line-switches/ - from .config import CHROME_OPTIONS, CHROME_VERSION + from .config import ( + CHROME_OPTIONS, + CHROME_VERSION, + CHROME_EXTRA_ARGS, + ) options = {**CHROME_OPTIONS, **options} @@ -279,8 +283,10 @@ def chrome_args(**options) -> List[str]: if options['CHROME_USER_DATA_DIR']: cmd_args.append('--user-data-dir={}'.format(options['CHROME_USER_DATA_DIR'])) - - return cmd_args + + cmd_args += CHROME_EXTRA_ARGS + + return dedupe(*cmd_args) def chrome_cleanup(): """ From 22f9a289d399de5dda1de624ef92f93969f1473e Mon Sep 17 00:00:00 2001 From: jim winstead Date: Sun, 25 Feb 2024 12:34:51 -0800 Subject: [PATCH 137/166] Use feedparser for RSS parsing in generic_rss and pinboard_rss parsers The feedparser packages has 20 years of history and is very good at parsing RSS and Atom, so use that instead of ad-hoc regex and XML parsing. The medium_rss and shaarli_rss parsers weren't touched because they are probably unnecessary. (The special parse for pinboard is just needing because of how tags work.) Doesn't include tests because I haven't figured out how to run them in the docker development setup. Fixes #1171 --- archivebox/parsers/generic_rss.py | 44 ++++++++++++------------------ archivebox/parsers/pinboard_rss.py | 39 ++++++++++---------------- pyproject.toml | 1 + 3 files changed, 34 insertions(+), 50 deletions(-) diff --git a/archivebox/parsers/generic_rss.py b/archivebox/parsers/generic_rss.py index 4bd04967..005da688 100644 --- a/archivebox/parsers/generic_rss.py +++ b/archivebox/parsers/generic_rss.py @@ -2,13 +2,13 @@ __package__ = 'archivebox.parsers' from typing import IO, Iterable -from datetime import datetime +from time import mktime +from feedparser import parse as feedparser from ..index.schema import Link from ..util import ( htmldecode, - enforce_types, - str_between, + enforce_types ) @enforce_types @@ -16,35 +16,27 @@ def parse_generic_rss_export(rss_file: IO[str], **_kwargs) -> Iterable[Link]: """Parse RSS XML-format files into links""" rss_file.seek(0) - items = rss_file.read().split('') - items = items[1:] if items else [] - for item in items: - # example item: - # - # <![CDATA[How JavaScript works: inside the V8 engine]]> - # Unread - # https://blog.sessionstack.com/how-javascript-works-inside - # https://blog.sessionstack.com/how-javascript-works-inside - # Mon, 21 Aug 2017 14:21:58 -0500 - # + feed = feedparser(rss_file.read()) + for item in feed.entries: + url = item.link + title = item.title + time = mktime(item.updated_parsed) - trailing_removed = item.split('', 1)[0] - leading_removed = trailing_removed.split('', 1)[-1].strip() - rows = leading_removed.split('\n') + try: + tags = ','.join(map(lambda tag: tag.term, item.tags)) + except AttributeError: + tags = '' - def get_row(key): - return [r for r in rows if r.strip().startswith('<{}>'.format(key))][0] - - url = str_between(get_row('link'), '', '') - ts_str = str_between(get_row('pubDate'), '', '') - time = datetime.strptime(ts_str, "%a, %d %b %Y %H:%M:%S %z") - title = str_between(get_row('title'), ' Iterable[Link]: """Parse Pinboard RSS feed files into links""" rss_file.seek(0) - root = ElementTree.parse(rss_file).getroot() - items = root.findall("{http://purl.org/rss/1.0/}item") - for item in items: - find = lambda p: item.find(p).text.strip() if item.find(p) is not None else None # type: ignore + feed = feedparser(rss_file.read()) + for item in feed.entries: + url = item.link + # title will start with "[priv] " if pin was marked private. useful? + title = item.title + time = mktime(item.updated_parsed) - url = find("{http://purl.org/rss/1.0/}link") - tags = find("{http://purl.org/dc/elements/1.1/}subject") - title = find("{http://purl.org/rss/1.0/}title") - ts_str = find("{http://purl.org/dc/elements/1.1/}date") + # all tags are in one entry.tags with spaces in it. annoying! + try: + tags = item.tags[0].term.replace(' ', ',') + except AttributeError: + tags = '' if url is None: # Yielding a Link with no URL will # crash on a URL validation assertion continue - # Pinboard includes a colon in its date stamp timezone offsets, which - # Python can't parse. Remove it: - if ts_str and ts_str[-3:-2] == ":": - ts_str = ts_str[:-3]+ts_str[-2:] - - if ts_str: - time = datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%S%z") - else: - time = datetime.now(timezone.utc) - yield Link( url=htmldecode(url), - timestamp=str(time.timestamp()), + timestamp=str(time), title=htmldecode(title) or None, tags=htmldecode(tags) or None, sources=[rss_file.name], diff --git a/pyproject.toml b/pyproject.toml index 0907858b..cb18a911 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "dateparser>=1.0.0", "django-extensions>=3.0.3", "django>=3.1.3,<3.2", + "feedparser>=6.0.11", "ipython>5.0.0", "mypy-extensions>=0.4.3", "python-crontab>=2.5.1", From 68326a60ee20e2a8831ae86e9867b352e0f74ca6 Mon Sep 17 00:00:00 2001 From: Ben Muthalaly Date: Tue, 27 Feb 2024 15:30:31 -0600 Subject: [PATCH 138/166] Add cookies file to http request in `download_url` --- archivebox/util.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/archivebox/util.py b/archivebox/util.py index 5321081c..2e1e4907 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -3,6 +3,7 @@ __package__ = 'archivebox' import re import requests import json as pyjson +import http.cookiejar from typing import List, Optional, Any from pathlib import Path @@ -164,13 +165,26 @@ def parse_date(date: Any) -> Optional[datetime]: @enforce_types def download_url(url: str, timeout: int=None) -> str: """Download the contents of a remote url and return the text""" - from .config import TIMEOUT, CHECK_SSL_VALIDITY, WGET_USER_AGENT + from .config import ( + TIMEOUT, + CHECK_SSL_VALIDITY, + WGET_USER_AGENT, + COOKIES_FILE, + ) timeout = timeout or TIMEOUT + + cookie_jar = http.cookiejar.MozillaCookieJar() + if COOKIES_FILE is not None: + cookie_jar.load(COOKIES_FILE, ignore_discard=True, ignore_expires=True) + else: + cookie_jar = None + response = requests.get( url, headers={'User-Agent': WGET_USER_AGENT}, verify=CHECK_SSL_VALIDITY, timeout=timeout, + cookies=cookie_jar, ) content_type = response.headers.get('Content-Type', '') From 178e676e0f27704b1ead99c554f8a65426bc9ca8 Mon Sep 17 00:00:00 2001 From: jim winstead Date: Tue, 27 Feb 2024 14:48:19 -0800 Subject: [PATCH 139/166] Fix JSON parser by not always mangling the input Rather than by assuming the JSON file we are parsing has junk at the beginning (which maybe only used to happen?), try parsing it as-is first, and then fall back to trying again after skipping the first line Fixes #1347 --- archivebox/parsers/generic_json.py | 19 ++++++-- tests/mock_server/templates/example.json | 1 + tests/mock_server/templates/example.json.bad | 2 + tests/test_add.py | 50 ++++++++++++++++++++ 4 files changed, 68 insertions(+), 4 deletions(-) create mode 100644 tests/mock_server/templates/example.json create mode 100644 tests/mock_server/templates/example.json.bad diff --git a/archivebox/parsers/generic_json.py b/archivebox/parsers/generic_json.py index daebb7c4..d8df70c3 100644 --- a/archivebox/parsers/generic_json.py +++ b/archivebox/parsers/generic_json.py @@ -18,9 +18,16 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]: json_file.seek(0) - # sometimes the first line is a comment or filepath, so we get everything after the first { - json_file_json_str = '{' + json_file.read().split('{', 1)[-1] - links = json.loads(json_file_json_str) + try: + links = json.load(json_file) + except json.decoder.JSONDecodeError: + # sometimes the first line is a comment or other junk, so try without + json_file.seek(0) + first_line = json_file.readline() + #print(' > Trying JSON parser without first line: "', first_line.strip(), '"', sep= '') + links = json.load(json_file) + # we may fail again, which means we really don't know what to do + json_date = lambda s: datetime.strptime(s, '%Y-%m-%dT%H:%M:%S%z') for link in links: @@ -59,11 +66,15 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]: elif link.get('name'): title = link['name'].strip() + tags = '' + if link.get('tags'): + tags = link.get('tags').replace(' ',',') + yield Link( url=htmldecode(url), timestamp=ts_str, title=htmldecode(title) or None, - tags=htmldecode(link.get('tags')) or '', + tags=htmldecode(tags), sources=[json_file.name], ) diff --git a/tests/mock_server/templates/example.json b/tests/mock_server/templates/example.json new file mode 100644 index 00000000..512febe5 --- /dev/null +++ b/tests/mock_server/templates/example.json @@ -0,0 +1 @@ +[{"href":"http://127.0.0.1:8080/static/example.com.html","description":"Example","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"Tag1 Tag2","trap":"http://www.example.com/should-not-exist"}] diff --git a/tests/mock_server/templates/example.json.bad b/tests/mock_server/templates/example.json.bad new file mode 100644 index 00000000..88d77757 --- /dev/null +++ b/tests/mock_server/templates/example.json.bad @@ -0,0 +1,2 @@ +this line would cause problems but --parser=json will actually skip it +[{"href":"http://127.0.0.1:8080/static/example.com.html","description":"Example","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"Tag1 Tag2","trap":"http://www.example.com/should-not-exist"}] diff --git a/tests/test_add.py b/tests/test_add.py index 331178fe..062de11e 100644 --- a/tests/test_add.py +++ b/tests/test_add.py @@ -91,3 +91,53 @@ def test_extract_input_uses_only_passed_extractors(tmp_path, process): assert (archived_item_path / "warc").exists() assert not (archived_item_path / "singlefile.html").exists() + +def test_json(tmp_path, process, disable_extractors_dict): + with open('../../mock_server/templates/example.json', 'r', encoding='utf-8') as f: + arg_process = subprocess.run( + ["archivebox", "add", "--index-only", "--parser=json"], + stdin=f, + capture_output=True, + env=disable_extractors_dict, + ) + + conn = sqlite3.connect("index.sqlite3") + c = conn.cursor() + urls = c.execute("SELECT url from core_snapshot").fetchall() + tags = c.execute("SELECT name from core_tag").fetchall() + conn.commit() + conn.close() + + urls = list(map(lambda x: x[0], urls)) + assert "http://127.0.0.1:8080/static/example.com.html" in urls + # if the following URL appears, we must have fallen back to another parser + assert not "http://www.example.com/should-not-exist" in urls + + tags = list(map(lambda x: x[0], tags)) + assert "Tag1" in tags + assert "Tag2" in tags + +def test_json_with_leading_garbage(tmp_path, process, disable_extractors_dict): + with open('../../mock_server/templates/example.json.bad', 'r', encoding='utf-8') as f: + arg_process = subprocess.run( + ["archivebox", "add", "--index-only", "--parser=json"], + stdin=f, + capture_output=True, + env=disable_extractors_dict, + ) + + conn = sqlite3.connect("index.sqlite3") + c = conn.cursor() + urls = c.execute("SELECT url from core_snapshot").fetchall() + tags = c.execute("SELECT name from core_tag").fetchall() + conn.commit() + conn.close() + + urls = list(map(lambda x: x[0], urls)) + assert "http://127.0.0.1:8080/static/example.com.html" in urls + # if the following URL appears, we must have fallen back to another parser + assert not "http://www.example.com/should-not-exist" in urls + + tags = list(map(lambda x: x[0], tags)) + assert "Tag1" in tags + assert "Tag2" in tags From ccabda4c7d17f064feb413e9268b7d0c4f02029f Mon Sep 17 00:00:00 2001 From: jim winstead Date: Wed, 28 Feb 2024 17:38:49 -0800 Subject: [PATCH 140/166] Handle list of tags in JSON, and be more clever about comma vs. space --- archivebox/parsers/generic_json.py | 11 ++++++++--- tests/mock_server/templates/example.json | 7 ++++++- tests/test_add.py | 7 +++++++ 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/archivebox/parsers/generic_json.py b/archivebox/parsers/generic_json.py index d8df70c3..9d12a4ef 100644 --- a/archivebox/parsers/generic_json.py +++ b/archivebox/parsers/generic_json.py @@ -66,9 +66,14 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]: elif link.get('name'): title = link['name'].strip() - tags = '' - if link.get('tags'): - tags = link.get('tags').replace(' ',',') + # if we have a list, join it with commas + tags = link.get('tags') + if type(tags) == list: + tags = ','.join(tags) + elif type(tags) == str: + # if there's no comma, assume it was space-separated + if ',' not in tags: + tags = tags.replace(' ', ',') yield Link( url=htmldecode(url), diff --git a/tests/mock_server/templates/example.json b/tests/mock_server/templates/example.json index 512febe5..6ee15597 100644 --- a/tests/mock_server/templates/example.json +++ b/tests/mock_server/templates/example.json @@ -1 +1,6 @@ -[{"href":"http://127.0.0.1:8080/static/example.com.html","description":"Example","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"Tag1 Tag2","trap":"http://www.example.com/should-not-exist"}] +[ +{"href":"http://127.0.0.1:8080/static/example.com.html","description":"Example","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"Tag1 Tag2","trap":"http://www.example.com/should-not-exist"}, +{"href":"http://127.0.0.1:8080/static/iana.org.html","description":"Example 2","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:43Z","shared":"no","toread":"no","tags":"Tag3,Tag4 with Space"}, +{"href":"http://127.0.0.1:8080/static/shift_jis.html","description":"Example 2","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:44Z","shared":"no","toread":"no","tags":["Tag5","Tag6 with Space"]}, +{"href":"http://127.0.0.1:8080/static/title_og_with_html","description":"Example 2","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:45Z","shared":"no","toread":"no"} +] diff --git a/tests/test_add.py b/tests/test_add.py index 062de11e..dd1307bb 100644 --- a/tests/test_add.py +++ b/tests/test_add.py @@ -110,12 +110,19 @@ def test_json(tmp_path, process, disable_extractors_dict): urls = list(map(lambda x: x[0], urls)) assert "http://127.0.0.1:8080/static/example.com.html" in urls + assert "http://127.0.0.1:8080/static/iana.org.html" in urls + assert "http://127.0.0.1:8080/static/shift_jis.html" in urls + assert "http://127.0.0.1:8080/static/title_og_with_html" in urls # if the following URL appears, we must have fallen back to another parser assert not "http://www.example.com/should-not-exist" in urls tags = list(map(lambda x: x[0], tags)) assert "Tag1" in tags assert "Tag2" in tags + assert "Tag3" in tags + assert "Tag4 with Space" in tags + assert "Tag5" in tags + assert "Tag6 with Space" in tags def test_json_with_leading_garbage(tmp_path, process, disable_extractors_dict): with open('../../mock_server/templates/example.json.bad', 'r', encoding='utf-8') as f: From fe11e1c2f47487b419497bac38aafbd433ed689a Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Wed, 28 Feb 2024 18:19:44 -0800 Subject: [PATCH 141/166] check if COOKIE_FILE is file --- archivebox/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/archivebox/util.py b/archivebox/util.py index 2e1e4907..9b570ec9 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -174,7 +174,7 @@ def download_url(url: str, timeout: int=None) -> str: timeout = timeout or TIMEOUT cookie_jar = http.cookiejar.MozillaCookieJar() - if COOKIES_FILE is not None: + if COOKIES_FILE and Path(COOKIES_FILE).is_file(): cookie_jar.load(COOKIES_FILE, ignore_discard=True, ignore_expires=True) else: cookie_jar = None From 89ab18c772b482a92ee8c3c9b4a7e93b80593d93 Mon Sep 17 00:00:00 2001 From: jim winstead Date: Thu, 29 Feb 2024 18:15:06 -0800 Subject: [PATCH 142/166] Add generic_jsonl parser Resolves #1369 --- archivebox/parsers/__init__.py | 2 + archivebox/parsers/generic_json.py | 110 +++++++++--------- archivebox/parsers/generic_jsonl.py | 34 ++++++ .../templates/example-single.jsonl | 1 + tests/mock_server/templates/example.jsonl | 4 + tests/test_add.py | 70 +++++++++++ 6 files changed, 168 insertions(+), 53 deletions(-) create mode 100644 archivebox/parsers/generic_jsonl.py create mode 100644 tests/mock_server/templates/example-single.jsonl create mode 100644 tests/mock_server/templates/example.jsonl diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py index c6f2f382..0cd39d8a 100644 --- a/archivebox/parsers/__init__.py +++ b/archivebox/parsers/__init__.py @@ -44,6 +44,7 @@ from . import medium_rss from . import netscape_html from . import generic_rss from . import generic_json +from . import generic_jsonl from . import generic_html from . import generic_txt from . import url_list @@ -63,6 +64,7 @@ PARSERS = { netscape_html.KEY: (netscape_html.NAME, netscape_html.PARSER), generic_rss.KEY: (generic_rss.NAME, generic_rss.PARSER), generic_json.KEY: (generic_json.NAME, generic_json.PARSER), + generic_jsonl.KEY: (generic_jsonl.NAME, generic_jsonl.PARSER), generic_html.KEY: (generic_html.NAME, generic_html.PARSER), # Catchall fallback parser diff --git a/archivebox/parsers/generic_json.py b/archivebox/parsers/generic_json.py index 9d12a4ef..8b64f55e 100644 --- a/archivebox/parsers/generic_json.py +++ b/archivebox/parsers/generic_json.py @@ -11,6 +11,60 @@ from ..util import ( enforce_types, ) +# This gets used by generic_jsonl, too +def jsonObjectToLink(link: str, source: str): + json_date = lambda s: datetime.strptime(s, '%Y-%m-%dT%H:%M:%S%z') + + # example line + # {"href":"http:\/\/www.reddit.com\/r\/example","description":"title here","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"reddit android"}] + # Parse URL + url = link.get('href') or link.get('url') or link.get('URL') + if not url: + raise Exception('JSON must contain URL in each entry [{"url": "http://...", ...}, ...]') + + # Parse the timestamp + ts_str = str(datetime.now(timezone.utc).timestamp()) + if link.get('timestamp'): + # chrome/ff histories use a very precise timestamp + ts_str = str(link['timestamp'] / 10000000) + elif link.get('time'): + ts_str = str(json_date(link['time'].split(',', 1)[0]).timestamp()) + elif link.get('created_at'): + ts_str = str(json_date(link['created_at']).timestamp()) + elif link.get('created'): + ts_str = str(json_date(link['created']).timestamp()) + elif link.get('date'): + ts_str = str(json_date(link['date']).timestamp()) + elif link.get('bookmarked'): + ts_str = str(json_date(link['bookmarked']).timestamp()) + elif link.get('saved'): + ts_str = str(json_date(link['saved']).timestamp()) + + # Parse the title + title = None + if link.get('title'): + title = link['title'].strip() + elif link.get('description'): + title = link['description'].replace(' — Readability', '').strip() + elif link.get('name'): + title = link['name'].strip() + + # if we have a list, join it with commas + tags = link.get('tags') + if type(tags) == list: + tags = ','.join(tags) + elif type(tags) == str: + # if there's no comma, assume it was space-separated + if ',' not in tags: + tags = tags.replace(' ', ',') + + return Link( + url=htmldecode(url), + timestamp=ts_str, + title=htmldecode(title) or None, + tags=htmldecode(tags), + sources=[source], + ) @enforce_types def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]: @@ -20,6 +74,8 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]: try: links = json.load(json_file) + if type(links) != list: + raise Exception('JSON parser expects list of objects, maybe this is JSONL?') except json.decoder.JSONDecodeError: # sometimes the first line is a comment or other junk, so try without json_file.seek(0) @@ -28,61 +84,9 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]: links = json.load(json_file) # we may fail again, which means we really don't know what to do - json_date = lambda s: datetime.strptime(s, '%Y-%m-%dT%H:%M:%S%z') - for link in links: - # example line - # {"href":"http:\/\/www.reddit.com\/r\/example","description":"title here","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"reddit android"}] if link: - # Parse URL - url = link.get('href') or link.get('url') or link.get('URL') - if not url: - raise Exception('JSON must contain URL in each entry [{"url": "http://...", ...}, ...]') - - # Parse the timestamp - ts_str = str(datetime.now(timezone.utc).timestamp()) - if link.get('timestamp'): - # chrome/ff histories use a very precise timestamp - ts_str = str(link['timestamp'] / 10000000) - elif link.get('time'): - ts_str = str(json_date(link['time'].split(',', 1)[0]).timestamp()) - elif link.get('created_at'): - ts_str = str(json_date(link['created_at']).timestamp()) - elif link.get('created'): - ts_str = str(json_date(link['created']).timestamp()) - elif link.get('date'): - ts_str = str(json_date(link['date']).timestamp()) - elif link.get('bookmarked'): - ts_str = str(json_date(link['bookmarked']).timestamp()) - elif link.get('saved'): - ts_str = str(json_date(link['saved']).timestamp()) - - # Parse the title - title = None - if link.get('title'): - title = link['title'].strip() - elif link.get('description'): - title = link['description'].replace(' — Readability', '').strip() - elif link.get('name'): - title = link['name'].strip() - - # if we have a list, join it with commas - tags = link.get('tags') - if type(tags) == list: - tags = ','.join(tags) - elif type(tags) == str: - # if there's no comma, assume it was space-separated - if ',' not in tags: - tags = tags.replace(' ', ',') - - yield Link( - url=htmldecode(url), - timestamp=ts_str, - title=htmldecode(title) or None, - tags=htmldecode(tags), - sources=[json_file.name], - ) - + yield jsonObjectToLink(link,json_file.name) KEY = 'json' NAME = 'Generic JSON' diff --git a/archivebox/parsers/generic_jsonl.py b/archivebox/parsers/generic_jsonl.py new file mode 100644 index 00000000..8ee94b28 --- /dev/null +++ b/archivebox/parsers/generic_jsonl.py @@ -0,0 +1,34 @@ +__package__ = 'archivebox.parsers' + +import json + +from typing import IO, Iterable +from datetime import datetime, timezone + +from ..index.schema import Link +from ..util import ( + htmldecode, + enforce_types, +) + +from .generic_json import jsonObjectToLink + +def parse_line(line: str): + if line.strip() != "": + return json.loads(line) + +@enforce_types +def parse_generic_jsonl_export(json_file: IO[str], **_kwargs) -> Iterable[Link]: + """Parse JSONL format bookmarks export files""" + + json_file.seek(0) + + links = [ parse_line(line) for line in json_file ] + + for link in links: + if link: + yield jsonObjectToLink(link,json_file.name) + +KEY = 'jsonl' +NAME = 'Generic JSONL' +PARSER = parse_generic_jsonl_export diff --git a/tests/mock_server/templates/example-single.jsonl b/tests/mock_server/templates/example-single.jsonl new file mode 100644 index 00000000..492c906d --- /dev/null +++ b/tests/mock_server/templates/example-single.jsonl @@ -0,0 +1 @@ +{"href":"http://127.0.0.1:8080/static/example.com.html","description":"Example","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"Tag1 Tag2","trap":"http://www.example.com/should-not-exist"} diff --git a/tests/mock_server/templates/example.jsonl b/tests/mock_server/templates/example.jsonl new file mode 100644 index 00000000..de0b3b5c --- /dev/null +++ b/tests/mock_server/templates/example.jsonl @@ -0,0 +1,4 @@ +{"href":"http://127.0.0.1:8080/static/example.com.html","description":"Example","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"Tag1 Tag2","trap":"http://www.example.com/should-not-exist"} +{"href":"http://127.0.0.1:8080/static/iana.org.html","description":"Example 2","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:43Z","shared":"no","toread":"no","tags":"Tag3,Tag4 with Space"} +{"href":"http://127.0.0.1:8080/static/shift_jis.html","description":"Example 2","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:44Z","shared":"no","toread":"no","tags":["Tag5","Tag6 with Space"]} +{"href":"http://127.0.0.1:8080/static/title_og_with_html","description":"Example 2","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:45Z","shared":"no","toread":"no"} diff --git a/tests/test_add.py b/tests/test_add.py index dd1307bb..baeac4e9 100644 --- a/tests/test_add.py +++ b/tests/test_add.py @@ -148,3 +148,73 @@ def test_json_with_leading_garbage(tmp_path, process, disable_extractors_dict): tags = list(map(lambda x: x[0], tags)) assert "Tag1" in tags assert "Tag2" in tags + +def test_jsonl(tmp_path, process, disable_extractors_dict): + with open('../../mock_server/templates/example.jsonl', 'r', encoding='utf-8') as f: + arg_process = subprocess.run( + ["archivebox", "add", "--index-only", "--parser=jsonl"], + stdin=f, + capture_output=True, + env=disable_extractors_dict, + ) + + conn = sqlite3.connect("index.sqlite3") + c = conn.cursor() + urls = c.execute("SELECT url from core_snapshot").fetchall() + tags = c.execute("SELECT name from core_tag").fetchall() + conn.commit() + conn.close() + + urls = list(map(lambda x: x[0], urls)) + assert "http://127.0.0.1:8080/static/example.com.html" in urls + assert "http://127.0.0.1:8080/static/iana.org.html" in urls + assert "http://127.0.0.1:8080/static/shift_jis.html" in urls + assert "http://127.0.0.1:8080/static/title_og_with_html" in urls + # if the following URL appears, we must have fallen back to another parser + assert not "http://www.example.com/should-not-exist" in urls + + tags = list(map(lambda x: x[0], tags)) + assert "Tag1" in tags + assert "Tag2" in tags + assert "Tag3" in tags + assert "Tag4 with Space" in tags + assert "Tag5" in tags + assert "Tag6 with Space" in tags + +def test_jsonl_single(tmp_path, process, disable_extractors_dict): + with open('../../mock_server/templates/example-single.jsonl', 'r', encoding='utf-8') as f: + arg_process = subprocess.run( + ["archivebox", "add", "--index-only", "--parser=jsonl"], + stdin=f, + capture_output=True, + env=disable_extractors_dict, + ) + + conn = sqlite3.connect("index.sqlite3") + c = conn.cursor() + urls = c.execute("SELECT url from core_snapshot").fetchall() + tags = c.execute("SELECT name from core_tag").fetchall() + conn.commit() + conn.close() + + urls = list(map(lambda x: x[0], urls)) + assert "http://127.0.0.1:8080/static/example.com.html" in urls + # if the following URL appears, we must have fallen back to another parser + assert not "http://www.example.com/should-not-exist" in urls + + tags = list(map(lambda x: x[0], tags)) + assert "Tag1" in tags + assert "Tag2" in tags + +# make sure that JSON parser rejects a single line of JSONL which is valid +# JSON but not our expected format +def test_json_single(tmp_path, process, disable_extractors_dict): + with open('../../mock_server/templates/example-single.jsonl', 'r', encoding='utf-8') as f: + arg_process = subprocess.run( + ["archivebox", "add", "--index-only", "--parser=json"], + stdin=f, + capture_output=True, + env=disable_extractors_dict, + ) + + assert 'expects list of objects' in arg_process.stderr.decode("utf-8") From 1f828d94410eded4e23ee8778a2d6151a4c89c8c Mon Sep 17 00:00:00 2001 From: jim winstead Date: Fri, 1 Mar 2024 11:22:28 -0800 Subject: [PATCH 143/166] Add tests for generic_rss and pinboard_rss parsers --- tests/mock_server/templates/example.atom | 24 +++++++++ tests/mock_server/templates/example.rss | 32 +++++++++++ tests/test_add.py | 68 ++++++++++++++++++++++++ 3 files changed, 124 insertions(+) create mode 100644 tests/mock_server/templates/example.atom create mode 100644 tests/mock_server/templates/example.rss diff --git a/tests/mock_server/templates/example.atom b/tests/mock_server/templates/example.atom new file mode 100644 index 00000000..9d71abb1 --- /dev/null +++ b/tests/mock_server/templates/example.atom @@ -0,0 +1,24 @@ + + + http://www.example.com/ + Example of an Atom feed + + + + Jim Winstead + + 2024-02-26T03:18:26Z + + Example + + tag:example.com,2024-02-25:3319 + 2024-02-26T03:18:26Z + 2024-02-25T19:18:25-08:00 + + + This is some <b>content</b> + + diff --git a/tests/mock_server/templates/example.rss b/tests/mock_server/templates/example.rss new file mode 100644 index 00000000..d47a5a38 --- /dev/null +++ b/tests/mock_server/templates/example.rss @@ -0,0 +1,32 @@ + + + + Sample Feed + http://example.org/ + For documentation only + en-us + Nobody (nobody@example.org) + Public domain + 2024-02-26T17:28:12-08:00 + + + + + First! + http://127.0.0.1:8080/static/example.com.html + just-an@example.org + + This has a description. + + Tag1 Tag2 + 2024-02-26T17:28:12-08:00 + description.]]> + + + + diff --git a/tests/test_add.py b/tests/test_add.py index 331178fe..9675f361 100644 --- a/tests/test_add.py +++ b/tests/test_add.py @@ -91,3 +91,71 @@ def test_extract_input_uses_only_passed_extractors(tmp_path, process): assert (archived_item_path / "warc").exists() assert not (archived_item_path / "singlefile.html").exists() + +def test_generic_rss(tmp_path, process, disable_extractors_dict): + with open('../../mock_server/templates/example.rss', 'r', encoding='utf-8') as f: + arg_process = subprocess.run( + ["archivebox", "add", "--index-only", "--parser=rss"], + stdin=f, + capture_output=True, + env=disable_extractors_dict, + ) + + conn = sqlite3.connect("index.sqlite3") + c = conn.cursor() + urls = c.execute("SELECT url from core_snapshot").fetchall() + tags = c.execute("SELECT name from core_tag").fetchall() + conn.commit() + conn.close() + + urls = list(map(lambda x: x[0], urls)) + assert "http://127.0.0.1:8080/static/example.com.html" in urls + # if the following URL appears, we must have fallen back to another parser + assert not "http://purl.org/dc/elements/1.1/" in urls + + tags = list(map(lambda x: x[0], tags)) + assert "Tag1 Tag2" in tags + +def test_pinboard_rss(tmp_path, process, disable_extractors_dict): + with open('../../mock_server/templates/example.rss', 'r', encoding='utf-8') as f: + arg_process = subprocess.run( + ["archivebox", "add", "--index-only", "--parser=pinboard_rss"], + stdin=f, + capture_output=True, + env=disable_extractors_dict, + ) + + conn = sqlite3.connect("index.sqlite3") + c = conn.cursor() + tags = c.execute("SELECT name from core_tag").fetchall() + conn.commit() + conn.close() + + tags = list(map(lambda x: x[0], tags)) + assert "Tag1" in tags + assert "Tag2" in tags + +def test_atom(tmp_path, process, disable_extractors_dict): + with open('../../mock_server/templates/example.atom', 'r', encoding='utf-8') as f: + arg_process = subprocess.run( + ["archivebox", "add", "--index-only", "--parser=rss"], + stdin=f, + capture_output=True, + env=disable_extractors_dict, + ) + + conn = sqlite3.connect("index.sqlite3") + c = conn.cursor() + urls = c.execute("SELECT url from core_snapshot").fetchall() + tags = c.execute("SELECT name from core_tag").fetchall() + conn.commit() + conn.close() + + urls = list(map(lambda x: x[0], urls)) + assert "http://127.0.0.1:8080/static/example.com.html" in urls + # if the following URL appears, we must have fallen back to another parser + assert not "http://www.w3.org/2005/Atom" in urls + + tags = list(map(lambda x: x[0], tags)) + assert "Tag1" in tags + assert "Tag2" in tags From 9f462a87a8f021b5497dd75208b044dbe1c4ce40 Mon Sep 17 00:00:00 2001 From: jim winstead Date: Sun, 25 Feb 2024 12:34:51 -0800 Subject: [PATCH 144/166] Use feedparser for RSS parsing in generic_rss and pinboard_rss parsers The feedparser packages has 20 years of history and is very good at parsing RSS and Atom, so use that instead of ad-hoc regex and XML parsing. The medium_rss and shaarli_rss parsers weren't touched because they are probably unnecessary. (The special parse for pinboard is just needing because of how tags work.) Doesn't include tests because I haven't figured out how to run them in the docker development setup. Fixes #1171 --- archivebox/parsers/generic_rss.py | 44 ++++++++++++------------------ archivebox/parsers/pinboard_rss.py | 39 ++++++++++---------------- pyproject.toml | 1 + 3 files changed, 34 insertions(+), 50 deletions(-) diff --git a/archivebox/parsers/generic_rss.py b/archivebox/parsers/generic_rss.py index 4bd04967..005da688 100644 --- a/archivebox/parsers/generic_rss.py +++ b/archivebox/parsers/generic_rss.py @@ -2,13 +2,13 @@ __package__ = 'archivebox.parsers' from typing import IO, Iterable -from datetime import datetime +from time import mktime +from feedparser import parse as feedparser from ..index.schema import Link from ..util import ( htmldecode, - enforce_types, - str_between, + enforce_types ) @enforce_types @@ -16,35 +16,27 @@ def parse_generic_rss_export(rss_file: IO[str], **_kwargs) -> Iterable[Link]: """Parse RSS XML-format files into links""" rss_file.seek(0) - items = rss_file.read().split('') - items = items[1:] if items else [] - for item in items: - # example item: - # - # <![CDATA[How JavaScript works: inside the V8 engine]]> - # Unread - # https://blog.sessionstack.com/how-javascript-works-inside - # https://blog.sessionstack.com/how-javascript-works-inside - # Mon, 21 Aug 2017 14:21:58 -0500 - # + feed = feedparser(rss_file.read()) + for item in feed.entries: + url = item.link + title = item.title + time = mktime(item.updated_parsed) - trailing_removed = item.split('', 1)[0] - leading_removed = trailing_removed.split('', 1)[-1].strip() - rows = leading_removed.split('\n') + try: + tags = ','.join(map(lambda tag: tag.term, item.tags)) + except AttributeError: + tags = '' - def get_row(key): - return [r for r in rows if r.strip().startswith('<{}>'.format(key))][0] - - url = str_between(get_row('link'), '', '') - ts_str = str_between(get_row('pubDate'), '', '') - time = datetime.strptime(ts_str, "%a, %d %b %Y %H:%M:%S %z") - title = str_between(get_row('title'), ' Iterable[Link]: """Parse Pinboard RSS feed files into links""" rss_file.seek(0) - root = ElementTree.parse(rss_file).getroot() - items = root.findall("{http://purl.org/rss/1.0/}item") - for item in items: - find = lambda p: item.find(p).text.strip() if item.find(p) is not None else None # type: ignore + feed = feedparser(rss_file.read()) + for item in feed.entries: + url = item.link + # title will start with "[priv] " if pin was marked private. useful? + title = item.title + time = mktime(item.updated_parsed) - url = find("{http://purl.org/rss/1.0/}link") - tags = find("{http://purl.org/dc/elements/1.1/}subject") - title = find("{http://purl.org/rss/1.0/}title") - ts_str = find("{http://purl.org/dc/elements/1.1/}date") + # all tags are in one entry.tags with spaces in it. annoying! + try: + tags = item.tags[0].term.replace(' ', ',') + except AttributeError: + tags = '' if url is None: # Yielding a Link with no URL will # crash on a URL validation assertion continue - # Pinboard includes a colon in its date stamp timezone offsets, which - # Python can't parse. Remove it: - if ts_str and ts_str[-3:-2] == ":": - ts_str = ts_str[:-3]+ts_str[-2:] - - if ts_str: - time = datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%S%z") - else: - time = datetime.now(timezone.utc) - yield Link( url=htmldecode(url), - timestamp=str(time.timestamp()), + timestamp=str(time), title=htmldecode(title) or None, tags=htmldecode(tags) or None, sources=[rss_file.name], diff --git a/pyproject.toml b/pyproject.toml index 0907858b..cb18a911 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "dateparser>=1.0.0", "django-extensions>=3.0.3", "django>=3.1.3,<3.2", + "feedparser>=6.0.11", "ipython>5.0.0", "mypy-extensions>=0.4.3", "python-crontab>=2.5.1", From e7119adb0b1ff4b950bd61f88a69f8cf9f8ed145 Mon Sep 17 00:00:00 2001 From: jim winstead Date: Fri, 1 Mar 2024 11:27:59 -0800 Subject: [PATCH 145/166] Add tests for generic_rss and pinboard_rss parsers --- tests/mock_server/templates/example.atom | 24 ++++++++++++ tests/mock_server/templates/example.rss | 32 ++++++++++++++++ tests/test_add.py | 49 ++++++++++++++++++++++++ 3 files changed, 105 insertions(+) create mode 100644 tests/mock_server/templates/example.atom create mode 100644 tests/mock_server/templates/example.rss diff --git a/tests/mock_server/templates/example.atom b/tests/mock_server/templates/example.atom new file mode 100644 index 00000000..9d71abb1 --- /dev/null +++ b/tests/mock_server/templates/example.atom @@ -0,0 +1,24 @@ + + + http://www.example.com/ + Example of an Atom feed + + + + Jim Winstead + + 2024-02-26T03:18:26Z + + Example + + tag:example.com,2024-02-25:3319 + 2024-02-26T03:18:26Z + 2024-02-25T19:18:25-08:00 + + + This is some <b>content</b> + + diff --git a/tests/mock_server/templates/example.rss b/tests/mock_server/templates/example.rss new file mode 100644 index 00000000..d47a5a38 --- /dev/null +++ b/tests/mock_server/templates/example.rss @@ -0,0 +1,32 @@ + + + + Sample Feed + http://example.org/ + For documentation only + en-us + Nobody (nobody@example.org) + Public domain + 2024-02-26T17:28:12-08:00 + + + + + First! + http://127.0.0.1:8080/static/example.com.html + just-an@example.org + + This has a description. + + Tag1 Tag2 + 2024-02-26T17:28:12-08:00 + description.]]> + + + + diff --git a/tests/test_add.py b/tests/test_add.py index dd1307bb..37f666c9 100644 --- a/tests/test_add.py +++ b/tests/test_add.py @@ -148,3 +148,52 @@ def test_json_with_leading_garbage(tmp_path, process, disable_extractors_dict): tags = list(map(lambda x: x[0], tags)) assert "Tag1" in tags assert "Tag2" in tags + +def test_generic_rss(tmp_path, process, disable_extractors_dict): + with open('../../mock_server/templates/example.rss', 'r', encoding='utf-8') as f: + arg_process = subprocess.run( + ["archivebox", "add", "--index-only", "--parser=rss"], + assert not "http://purl.org/dc/elements/1.1/" in urls + + tags = list(map(lambda x: x[0], tags)) + assert "Tag1 Tag2" in tags + +def test_pinboard_rss(tmp_path, process, disable_extractors_dict): + with open('../../mock_server/templates/example.rss', 'r', encoding='utf-8') as f: + arg_process = subprocess.run( + ["archivebox", "add", "--index-only", "--parser=pinboard_rss"], + stdin=f, + capture_output=True, + env=disable_extractors_dict, + ) + + conn = sqlite3.connect("index.sqlite3") + c = conn.cursor() + tags = c.execute("SELECT name from core_tag").fetchall() + conn.commit() + conn.close() + + tags = list(map(lambda x: x[0], tags)) + assert "Tag1" in tags + assert "Tag2" in tags + +def test_atom(tmp_path, process, disable_extractors_dict): + with open('../../mock_server/templates/example.atom', 'r', encoding='utf-8') as f: + arg_process = subprocess.run( + ["archivebox", "add", "--index-only", "--parser=rss"], + stdin=f, + capture_output=True, + env=disable_extractors_dict, + ) + + conn = sqlite3.connect("index.sqlite3") + c = conn.cursor() + urls = c.execute("SELECT url from core_snapshot").fetchall() + tags = c.execute("SELECT name from core_tag").fetchall() + conn.commit() + conn.close() + + urls = list(map(lambda x: x[0], urls)) + assert "http://127.0.0.1:8080/static/example.com.html" in urls + # if the following URL appears, we must have fallen back to another parser + assert not "http://www.w3.org/2005/Atom" in urls From 741ff5f1a864ab6833ed98da5ff7a3be79cbee52 Mon Sep 17 00:00:00 2001 From: jim winstead Date: Fri, 1 Mar 2024 12:43:53 -0800 Subject: [PATCH 146/166] Make it a little easier to run specific tests Changes ./bin/test.sh to pass command line options to pytest, and default to only running tests in the tests/ directory instead of everywhere excluding a few directories which is more error-prone. Also keeps the mock_server used in testing quiet so access log entries don't appear on stdout. --- bin/test.sh | 2 +- pyproject.toml | 2 ++ tests/mock_server/server.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/bin/test.sh b/bin/test.sh index f9ea3575..515806bb 100755 --- a/bin/test.sh +++ b/bin/test.sh @@ -14,4 +14,4 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )" source "$DIR/.venv/bin/activate" -pytest -s --basetemp=tests/out --ignore=archivebox/vendor --ignore=deb_dist --ignore=pip_dist --ignore=brew_dist +pytest -s --basetemp=tests/out "$@" diff --git a/pyproject.toml b/pyproject.toml index 0907858b..f5f7dc4b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -107,6 +107,8 @@ lint = "./bin/lint.sh" test = "./bin/test.sh" # all = {composite = ["lint mypackage/", "test -v tests/"]} +[tool.pytest.ini_options] +testpaths = [ "tests" ] [project.scripts] archivebox = "archivebox.cli:main" diff --git a/tests/mock_server/server.py b/tests/mock_server/server.py index 4283574f..39abd80c 100644 --- a/tests/mock_server/server.py +++ b/tests/mock_server/server.py @@ -50,4 +50,4 @@ def redirect_to_static(filename): def start(): - run(host='localhost', port=8080) \ No newline at end of file + run(host='localhost', port=8080, quiet=True) From d74ddd42ae104004e656929036c55f972a9d63d4 Mon Sep 17 00:00:00 2001 From: Ben Muthalaly Date: Fri, 1 Mar 2024 14:50:32 -0600 Subject: [PATCH 147/166] Flip dedupe precedence order --- archivebox/extractors/archive_org.py | 6 +++--- archivebox/extractors/favicon.py | 6 +++--- archivebox/extractors/headers.py | 6 +++--- archivebox/extractors/media.py | 5 +++-- archivebox/extractors/singlefile.py | 14 ++++---------- archivebox/extractors/title.py | 6 +++--- archivebox/extractors/wget.py | 6 +++--- archivebox/util.py | 24 +++++++++++------------- 8 files changed, 33 insertions(+), 40 deletions(-) diff --git a/archivebox/extractors/archive_org.py b/archivebox/extractors/archive_org.py index 93730f26..0d45534a 100644 --- a/archivebox/extractors/archive_org.py +++ b/archivebox/extractors/archive_org.py @@ -46,14 +46,14 @@ def save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, timeout: int= output: ArchiveOutput = 'archive.org.txt' archive_org_url = None submit_url = 'https://web.archive.org/save/{}'.format(link.url) - # earlier options take precedence + # later options take precedence options = [ + *CURL_ARGS, + *CURL_EXTRA_ARGS, '--head', '--max-time', str(timeout), *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []), *([] if CHECK_SSL_VALIDITY else ['--insecure']), - *CURL_EXTRA_ARGS, - *CURL_ARGS, ] cmd = [ CURL_BINARY, diff --git a/archivebox/extractors/favicon.py b/archivebox/extractors/favicon.py index 3b41f349..fffa3d16 100644 --- a/archivebox/extractors/favicon.py +++ b/archivebox/extractors/favicon.py @@ -39,14 +39,14 @@ def save_favicon(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) out_dir = out_dir or link.link_dir output: ArchiveOutput = 'favicon.ico' - # earlier options take precedence + # later options take precedence options = [ + *CURL_ARGS, + *CURL_EXTRA_ARGS, '--max-time', str(timeout), '--output', str(output), *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []), *([] if CHECK_SSL_VALIDITY else ['--insecure']), - *CURL_EXTRA_ARGS, - *CURL_ARGS, ] cmd = [ CURL_BINARY, diff --git a/archivebox/extractors/headers.py b/archivebox/extractors/headers.py index 3828de93..9be14331 100644 --- a/archivebox/extractors/headers.py +++ b/archivebox/extractors/headers.py @@ -42,14 +42,14 @@ def save_headers(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) status = 'succeeded' timer = TimedProgress(timeout, prefix=' ') - # earlier options take precedence + # later options take precedence options = [ + *CURL_ARGS, + *CURL_EXTRA_ARGS, '--head', '--max-time', str(timeout), *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []), *([] if CHECK_SSL_VALIDITY else ['--insecure']), - *CURL_EXTRA_ARGS, - *CURL_ARGS, ] cmd = [ CURL_BINARY, diff --git a/archivebox/extractors/media.py b/archivebox/extractors/media.py index 862bb758..a6d4e81f 100644 --- a/archivebox/extractors/media.py +++ b/archivebox/extractors/media.py @@ -41,11 +41,12 @@ def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIME output: ArchiveOutput = 'media' output_path = out_dir / output output_path.mkdir(exist_ok=True) + # later options take precedence options = [ + *YOUTUBEDL_ARGS, + *YOUTUBEDL_EXTRA_ARGS, *([] if CHECK_SSL_VALIDITY else ['--no-check-certificate']), # TODO: add --cookies-from-browser={CHROME_USER_DATA_DIR} - *YOUTUBEDL_EXTRA_ARGS, - *YOUTUBEDL_ARGS, ] cmd = [ YOUTUBEDL_BINARY, diff --git a/archivebox/extractors/singlefile.py b/archivebox/extractors/singlefile.py index b2119119..5021a6cc 100644 --- a/archivebox/extractors/singlefile.py +++ b/archivebox/extractors/singlefile.py @@ -48,18 +48,12 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO # SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli browser_args = '--browser-args={}'.format(json.dumps(browser_args[1:])) - - # Deduplicate options (single-file doesn't like when you use the same option two times) - # - # NOTE: Options names that come first clobber conflicting names that come later - # My logic is SINGLEFILE_ARGS is the option that affects the singlefile command with most - # specificity, therefore the user sets it with a lot intent, therefore it should take precedence - # kind of like the ergonomic principle of lexical scope in programming languages. + # later options take precedence options = [ - '--browser-executable-path={}'.format(CHROME_BINARY), - browser_args, - *SINGLEFILE_EXTRA_ARGS, *SINGLEFILE_ARGS, + *SINGLEFILE_EXTRA_ARGS, + browser_args, + '--browser-executable-path={}'.format(CHROME_BINARY), ] cmd = [ DEPENDENCIES['SINGLEFILE_BINARY']['path'], diff --git a/archivebox/extractors/title.py b/archivebox/extractors/title.py index b2b65af2..4f34ca81 100644 --- a/archivebox/extractors/title.py +++ b/archivebox/extractors/title.py @@ -104,13 +104,13 @@ def save_title(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) - from core.models import Snapshot output: ArchiveOutput = None - # earlier options take precedence + # later options take precedence options = [ + *CURL_ARGS, + *CURL_EXTRA_ARGS, '--max-time', str(timeout), *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []), *([] if CHECK_SSL_VALIDITY else ['--insecure']), - *CURL_EXTRA_ARGS, - *CURL_ARGS, ] cmd = [ CURL_BINARY, diff --git a/archivebox/extractors/wget.py b/archivebox/extractors/wget.py index 5209cde9..885e31f5 100644 --- a/archivebox/extractors/wget.py +++ b/archivebox/extractors/wget.py @@ -57,8 +57,10 @@ def save_wget(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> # WGET CLI Docs: https://www.gnu.org/software/wget/manual/wget.html output: ArchiveOutput = None - # earlier options take precedence + # later options take precedence options = [ + *WGET_ARGS, + *WGET_EXTRA_ARGS, '--timeout={}'.format(timeout), *(['--restrict-file-names={}'.format(RESTRICT_FILE_NAMES)] if RESTRICT_FILE_NAMES else []), *(['--warc-file={}'.format(str(warc_path))] if SAVE_WARC else []), @@ -69,8 +71,6 @@ def save_wget(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> *([] if SAVE_WARC else ['--timestamping']), *([] if CHECK_SSL_VALIDITY else ['--no-check-certificate', '--no-hsts']), # '--server-response', # print headers for better error parsing - *WGET_EXTRA_ARGS, - *WGET_ARGS, ] cmd = [ WGET_BINARY, diff --git a/archivebox/util.py b/archivebox/util.py index 18ca08aa..10ceebd4 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -240,6 +240,8 @@ def chrome_args(**options) -> List[str]: cmd_args = [options['CHROME_BINARY']] + cmd_args += CHROME_EXTRA_ARGS + if options['CHROME_HEADLESS']: chrome_major_version = int(re.search(r'\s(\d+)\.\d', CHROME_VERSION)[1]) if chrome_major_version >= 111: @@ -284,7 +286,6 @@ def chrome_args(**options) -> List[str]: if options['CHROME_USER_DATA_DIR']: cmd_args.append('--user-data-dir={}'.format(options['CHROME_USER_DATA_DIR'])) - cmd_args += CHROME_EXTRA_ARGS return dedupe(*cmd_args) @@ -324,20 +325,17 @@ def ansi_to_html(text): @enforce_types -def dedupe(*options: List[str]) -> List[str]: +def dedupe(*options: str) -> List[str]: """ - Deduplicates the given options. Options that come earlier in the list clobber - later conflicting options. + Deduplicates the given options. Options that come later clobber earlier + conflicting options. """ - seen_option_names = [] - def test_seen(argument): - option_name = argument.split("=")[0] - if option_name in seen_option_names: - return False - else: - seen_option_names.append(option_name) - return True - return list(filter(test_seen, options)) + deduped = {} + + for option in options: + deduped[option.split('=')[0]] = option + + return list(deduped.values()) class AttributeDict(dict): From 4686da91e6b11661c0e57397fe86886416d965d5 Mon Sep 17 00:00:00 2001 From: Ben Muthalaly Date: Tue, 5 Mar 2024 01:48:35 -0600 Subject: [PATCH 148/166] Fix cookies being set incorrectly --- archivebox/util.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/archivebox/util.py b/archivebox/util.py index 2e1e4907..461141c3 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -166,25 +166,25 @@ def parse_date(date: Any) -> Optional[datetime]: def download_url(url: str, timeout: int=None) -> str: """Download the contents of a remote url and return the text""" from .config import ( - TIMEOUT, - CHECK_SSL_VALIDITY, - WGET_USER_AGENT, - COOKIES_FILE, + TIMEOUT, + CHECK_SSL_VALIDITY, + WGET_USER_AGENT, + COOKIES_FILE, ) timeout = timeout or TIMEOUT + session = requests.Session() - cookie_jar = http.cookiejar.MozillaCookieJar() - if COOKIES_FILE is not None: - cookie_jar.load(COOKIES_FILE, ignore_discard=True, ignore_expires=True) - else: - cookie_jar = None + if COOKIES_FILE and Path(COOKIES_FILE).is_file(): + cookie_jar = http.cookiejar.MozillaCookieJar(COOKIES_FILE) + cookie_jar.load(ignore_discard=True, ignore_expires=True) + for cookie in cookie_jar: + session.cookies.set(cookie.name, cookie.value, domain=cookie.domain, path=cookie.path) - response = requests.get( + response = session.get( url, headers={'User-Agent': WGET_USER_AGENT}, verify=CHECK_SSL_VALIDITY, timeout=timeout, - cookies=cookie_jar, ) content_type = response.headers.get('Content-Type', '') From d8cf09c21e2d6e3ece8a7e5c93d537596c3687d0 Mon Sep 17 00:00:00 2001 From: Ben Muthalaly Date: Tue, 5 Mar 2024 21:13:45 -0600 Subject: [PATCH 149/166] Remove unnecessary variable length args for dedupe --- archivebox/extractors/archive_org.py | 2 +- archivebox/extractors/favicon.py | 2 +- archivebox/extractors/headers.py | 2 +- archivebox/extractors/media.py | 2 +- archivebox/extractors/singlefile.py | 2 +- archivebox/extractors/title.py | 2 +- archivebox/extractors/wget.py | 2 +- archivebox/util.py | 4 ++-- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/archivebox/extractors/archive_org.py b/archivebox/extractors/archive_org.py index 0d45534a..245315f1 100644 --- a/archivebox/extractors/archive_org.py +++ b/archivebox/extractors/archive_org.py @@ -57,7 +57,7 @@ def save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, timeout: int= ] cmd = [ CURL_BINARY, - *dedupe(*options), + *dedupe(options), submit_url, ] status = 'succeeded' diff --git a/archivebox/extractors/favicon.py b/archivebox/extractors/favicon.py index fffa3d16..f793f8df 100644 --- a/archivebox/extractors/favicon.py +++ b/archivebox/extractors/favicon.py @@ -50,7 +50,7 @@ def save_favicon(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ] cmd = [ CURL_BINARY, - *dedupe(*options), + *dedupe(options), FAVICON_PROVIDER.format(domain(link.url)), ] status = 'failed' diff --git a/archivebox/extractors/headers.py b/archivebox/extractors/headers.py index 9be14331..975787ad 100644 --- a/archivebox/extractors/headers.py +++ b/archivebox/extractors/headers.py @@ -53,7 +53,7 @@ def save_headers(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) ] cmd = [ CURL_BINARY, - *dedupe(*options), + *dedupe(options), link.url, ] try: diff --git a/archivebox/extractors/media.py b/archivebox/extractors/media.py index a6d4e81f..ad4c9c4b 100644 --- a/archivebox/extractors/media.py +++ b/archivebox/extractors/media.py @@ -50,7 +50,7 @@ def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIME ] cmd = [ YOUTUBEDL_BINARY, - *dedupe(*options), + *dedupe(options), link.url, ] status = 'succeeded' diff --git a/archivebox/extractors/singlefile.py b/archivebox/extractors/singlefile.py index 5021a6cc..553c9f8d 100644 --- a/archivebox/extractors/singlefile.py +++ b/archivebox/extractors/singlefile.py @@ -57,7 +57,7 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO ] cmd = [ DEPENDENCIES['SINGLEFILE_BINARY']['path'], - *dedupe(*options), + *dedupe(options), link.url, output, ] diff --git a/archivebox/extractors/title.py b/archivebox/extractors/title.py index 4f34ca81..5decc52c 100644 --- a/archivebox/extractors/title.py +++ b/archivebox/extractors/title.py @@ -114,7 +114,7 @@ def save_title(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) - ] cmd = [ CURL_BINARY, - *dedupe(*options), + *dedupe(options), link.url, ] status = 'succeeded' diff --git a/archivebox/extractors/wget.py b/archivebox/extractors/wget.py index 885e31f5..07471e29 100644 --- a/archivebox/extractors/wget.py +++ b/archivebox/extractors/wget.py @@ -74,7 +74,7 @@ def save_wget(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ] cmd = [ WGET_BINARY, - *dedupe(*options), + *dedupe(options), link.url, ] diff --git a/archivebox/util.py b/archivebox/util.py index 10ceebd4..e1707049 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -287,7 +287,7 @@ def chrome_args(**options) -> List[str]: cmd_args.append('--user-data-dir={}'.format(options['CHROME_USER_DATA_DIR'])) - return dedupe(*cmd_args) + return dedupe(cmd_args) def chrome_cleanup(): """ @@ -325,7 +325,7 @@ def ansi_to_html(text): @enforce_types -def dedupe(*options: str) -> List[str]: +def dedupe(options: List[str]) -> List[str]: """ Deduplicates the given options. Options that come later clobber earlier conflicting options. From f4deb97f59abffae4faa5f93a5108c9f28cb09f3 Mon Sep 17 00:00:00 2001 From: Ben Muthalaly Date: Tue, 5 Mar 2024 21:15:38 -0600 Subject: [PATCH 150/166] Add `ARGS` and `EXTRA_ARGS` for Mercury extractor --- archivebox/config.py | 4 ++++ archivebox/extractors/mercury.py | 14 ++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/archivebox/config.py b/archivebox/config.py index f8e56036..64b07931 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -199,6 +199,8 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = { 'GIT_ARGS': {'type': list, 'default': ['--recursive']}, 'SINGLEFILE_ARGS': {'type': list, 'default': None}, 'SINGLEFILE_EXTRA_ARGS': {'type': list, 'default': None}, + 'MERCURY_ARGS': {'type': list, 'default': ['--format=text']}, + 'MERCURY_EXTRA_ARGS': {'type': list, 'default': None}, 'FAVICON_PROVIDER': {'type': str, 'default': 'https://www.google.com/s2/favicons?domain={}'}, }, @@ -561,6 +563,8 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = { 'USE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['SAVE_MERCURY']}, 'MERCURY_VERSION': {'default': lambda c: '1.0.0' if shutil.which(str(bin_path(c['MERCURY_BINARY']))) else None}, # mercury doesnt expose version info until this is merged https://github.com/postlight/parser/pull/750 + 'MERCURY_ARGS': {'default': lambda c: c['MERCURY_ARGS'] or []}, + 'MERCURY_EXTRA_ARGS': {'default': lambda c: c['MERCURY_EXTRA_ARGS'] or []}, 'USE_GIT': {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']}, 'GIT_VERSION': {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None}, diff --git a/archivebox/extractors/mercury.py b/archivebox/extractors/mercury.py index e7d20362..a0f38434 100644 --- a/archivebox/extractors/mercury.py +++ b/archivebox/extractors/mercury.py @@ -11,13 +11,15 @@ from ..system import run, atomic_write from ..util import ( enforce_types, is_static_file, - + dedupe, ) from ..config import ( TIMEOUT, SAVE_MERCURY, DEPENDENCIES, MERCURY_VERSION, + MERCURY_ARGS, + MERCURY_EXTRA_ARGS, ) from ..logging_util import TimedProgress @@ -60,12 +62,16 @@ def save_mercury(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) timer = TimedProgress(timeout, prefix=' ') try: output_folder.mkdir(exist_ok=True) - - # Get plain text version of article + # later options take precedence + options = [ + *MERCURY_ARGS, + *MERCURY_EXTRA_ARGS, + ] + # By default, get plain text version of article cmd = [ DEPENDENCIES['MERCURY_BINARY']['path'], link.url, - "--format=text" + *dedupe(options) ] result = run(cmd, cwd=out_dir, timeout=timeout) try: From 3512dc7e606e67b126100dc8bb2d56874c9025c5 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 14 Mar 2024 00:58:45 -0700 Subject: [PATCH 151/166] Disable searching for existing chrome user profiles by default --- archivebox/config.py | 53 ++++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/archivebox/config.py b/archivebox/config.py index 1edd2eeb..fad2db53 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -500,7 +500,7 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = { 'LOGS_DIR': {'default': lambda c: c['OUTPUT_DIR'] / LOGS_DIR_NAME}, 'CONFIG_FILE': {'default': lambda c: Path(c['CONFIG_FILE']).resolve() if c['CONFIG_FILE'] else c['OUTPUT_DIR'] / CONFIG_FILENAME}, 'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and Path(c['COOKIES_FILE']).resolve()}, - 'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (Path(c['CHROME_USER_DATA_DIR']).resolve() if c['CHROME_USER_DATA_DIR'] else None)}, # None means unset, so we autodetect it with find_chrome_Data_dir(), but emptystring '' means user manually set it to '', and we should store it as None + 'CHROME_USER_DATA_DIR': {'default': lambda c: Path(c['CHROME_USER_DATA_DIR']).resolve() if c['CHROME_USER_DATA_DIR'] else None}, 'URL_DENYLIST_PTN': {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)}, 'URL_ALLOWLIST_PTN': {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)}, 'DIR_OUTPUT_PERMISSIONS': {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')}, # exec is always needed to list directories @@ -910,27 +910,36 @@ def find_chrome_binary() -> Optional[str]: def find_chrome_data_dir() -> Optional[str]: """find any installed chrome user data directories in the default locations""" - # Precedence: Chromium, Chrome, Beta, Canary, Unstable, Dev - # make sure data dir finding precedence order always matches binary finding order - default_profile_paths = ( - '~/.config/chromium', - '~/Library/Application Support/Chromium', - '~/AppData/Local/Chromium/User Data', - '~/.config/chrome', - '~/.config/google-chrome', - '~/Library/Application Support/Google/Chrome', - '~/AppData/Local/Google/Chrome/User Data', - '~/.config/google-chrome-stable', - '~/.config/google-chrome-beta', - '~/Library/Application Support/Google/Chrome Canary', - '~/AppData/Local/Google/Chrome SxS/User Data', - '~/.config/google-chrome-unstable', - '~/.config/google-chrome-dev', - ) - for path in default_profile_paths: - full_path = Path(path).resolve() - if full_path.exists(): - return full_path + # deprecated because this is DANGEROUS, do not re-implement/uncomment this behavior. + + # Going forward we want to discourage people from using their main chrome profile for archiving. + # Session tokens, personal data, and cookies are often returned in server responses, + # when they get archived, they are essentially burned as anyone who can view the archive + # can use that data to masquerade as the logged-in user that did the archiving. + # For this reason users should always create dedicated burner profiles for archiving and not use + # their daily driver main accounts. + + # # Precedence: Chromium, Chrome, Beta, Canary, Unstable, Dev + # # make sure data dir finding precedence order always matches binary finding order + # default_profile_paths = ( + # '~/.config/chromium', + # '~/Library/Application Support/Chromium', + # '~/AppData/Local/Chromium/User Data', + # '~/.config/chrome', + # '~/.config/google-chrome', + # '~/Library/Application Support/Google/Chrome', + # '~/AppData/Local/Google/Chrome/User Data', + # '~/.config/google-chrome-stable', + # '~/.config/google-chrome-beta', + # '~/Library/Application Support/Google/Chrome Canary', + # '~/AppData/Local/Google/Chrome SxS/User Data', + # '~/.config/google-chrome-unstable', + # '~/.config/google-chrome-dev', + # ) + # for path in default_profile_paths: + # full_path = Path(path).resolve() + # if full_path.exists(): + # return full_path return None def wget_supports_compression(config): From 5478d13d5254a2443a3a32645a6bb3118bfa7b8a Mon Sep 17 00:00:00 2001 From: jim winstead Date: Thu, 29 Feb 2024 18:15:06 -0800 Subject: [PATCH 152/166] Add generic_jsonl parser Resolves #1369 --- archivebox/parsers/__init__.py | 2 + archivebox/parsers/generic_json.py | 110 +++++++++--------- archivebox/parsers/generic_jsonl.py | 34 ++++++ .../templates/example-single.jsonl | 1 + tests/mock_server/templates/example.jsonl | 4 + tests/test_add.py | 70 +++++++++++ 6 files changed, 168 insertions(+), 53 deletions(-) create mode 100644 archivebox/parsers/generic_jsonl.py create mode 100644 tests/mock_server/templates/example-single.jsonl create mode 100644 tests/mock_server/templates/example.jsonl diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py index c6f2f382..0cd39d8a 100644 --- a/archivebox/parsers/__init__.py +++ b/archivebox/parsers/__init__.py @@ -44,6 +44,7 @@ from . import medium_rss from . import netscape_html from . import generic_rss from . import generic_json +from . import generic_jsonl from . import generic_html from . import generic_txt from . import url_list @@ -63,6 +64,7 @@ PARSERS = { netscape_html.KEY: (netscape_html.NAME, netscape_html.PARSER), generic_rss.KEY: (generic_rss.NAME, generic_rss.PARSER), generic_json.KEY: (generic_json.NAME, generic_json.PARSER), + generic_jsonl.KEY: (generic_jsonl.NAME, generic_jsonl.PARSER), generic_html.KEY: (generic_html.NAME, generic_html.PARSER), # Catchall fallback parser diff --git a/archivebox/parsers/generic_json.py b/archivebox/parsers/generic_json.py index 9d12a4ef..8b64f55e 100644 --- a/archivebox/parsers/generic_json.py +++ b/archivebox/parsers/generic_json.py @@ -11,6 +11,60 @@ from ..util import ( enforce_types, ) +# This gets used by generic_jsonl, too +def jsonObjectToLink(link: str, source: str): + json_date = lambda s: datetime.strptime(s, '%Y-%m-%dT%H:%M:%S%z') + + # example line + # {"href":"http:\/\/www.reddit.com\/r\/example","description":"title here","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"reddit android"}] + # Parse URL + url = link.get('href') or link.get('url') or link.get('URL') + if not url: + raise Exception('JSON must contain URL in each entry [{"url": "http://...", ...}, ...]') + + # Parse the timestamp + ts_str = str(datetime.now(timezone.utc).timestamp()) + if link.get('timestamp'): + # chrome/ff histories use a very precise timestamp + ts_str = str(link['timestamp'] / 10000000) + elif link.get('time'): + ts_str = str(json_date(link['time'].split(',', 1)[0]).timestamp()) + elif link.get('created_at'): + ts_str = str(json_date(link['created_at']).timestamp()) + elif link.get('created'): + ts_str = str(json_date(link['created']).timestamp()) + elif link.get('date'): + ts_str = str(json_date(link['date']).timestamp()) + elif link.get('bookmarked'): + ts_str = str(json_date(link['bookmarked']).timestamp()) + elif link.get('saved'): + ts_str = str(json_date(link['saved']).timestamp()) + + # Parse the title + title = None + if link.get('title'): + title = link['title'].strip() + elif link.get('description'): + title = link['description'].replace(' — Readability', '').strip() + elif link.get('name'): + title = link['name'].strip() + + # if we have a list, join it with commas + tags = link.get('tags') + if type(tags) == list: + tags = ','.join(tags) + elif type(tags) == str: + # if there's no comma, assume it was space-separated + if ',' not in tags: + tags = tags.replace(' ', ',') + + return Link( + url=htmldecode(url), + timestamp=ts_str, + title=htmldecode(title) or None, + tags=htmldecode(tags), + sources=[source], + ) @enforce_types def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]: @@ -20,6 +74,8 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]: try: links = json.load(json_file) + if type(links) != list: + raise Exception('JSON parser expects list of objects, maybe this is JSONL?') except json.decoder.JSONDecodeError: # sometimes the first line is a comment or other junk, so try without json_file.seek(0) @@ -28,61 +84,9 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]: links = json.load(json_file) # we may fail again, which means we really don't know what to do - json_date = lambda s: datetime.strptime(s, '%Y-%m-%dT%H:%M:%S%z') - for link in links: - # example line - # {"href":"http:\/\/www.reddit.com\/r\/example","description":"title here","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"reddit android"}] if link: - # Parse URL - url = link.get('href') or link.get('url') or link.get('URL') - if not url: - raise Exception('JSON must contain URL in each entry [{"url": "http://...", ...}, ...]') - - # Parse the timestamp - ts_str = str(datetime.now(timezone.utc).timestamp()) - if link.get('timestamp'): - # chrome/ff histories use a very precise timestamp - ts_str = str(link['timestamp'] / 10000000) - elif link.get('time'): - ts_str = str(json_date(link['time'].split(',', 1)[0]).timestamp()) - elif link.get('created_at'): - ts_str = str(json_date(link['created_at']).timestamp()) - elif link.get('created'): - ts_str = str(json_date(link['created']).timestamp()) - elif link.get('date'): - ts_str = str(json_date(link['date']).timestamp()) - elif link.get('bookmarked'): - ts_str = str(json_date(link['bookmarked']).timestamp()) - elif link.get('saved'): - ts_str = str(json_date(link['saved']).timestamp()) - - # Parse the title - title = None - if link.get('title'): - title = link['title'].strip() - elif link.get('description'): - title = link['description'].replace(' — Readability', '').strip() - elif link.get('name'): - title = link['name'].strip() - - # if we have a list, join it with commas - tags = link.get('tags') - if type(tags) == list: - tags = ','.join(tags) - elif type(tags) == str: - # if there's no comma, assume it was space-separated - if ',' not in tags: - tags = tags.replace(' ', ',') - - yield Link( - url=htmldecode(url), - timestamp=ts_str, - title=htmldecode(title) or None, - tags=htmldecode(tags), - sources=[json_file.name], - ) - + yield jsonObjectToLink(link,json_file.name) KEY = 'json' NAME = 'Generic JSON' diff --git a/archivebox/parsers/generic_jsonl.py b/archivebox/parsers/generic_jsonl.py new file mode 100644 index 00000000..8ee94b28 --- /dev/null +++ b/archivebox/parsers/generic_jsonl.py @@ -0,0 +1,34 @@ +__package__ = 'archivebox.parsers' + +import json + +from typing import IO, Iterable +from datetime import datetime, timezone + +from ..index.schema import Link +from ..util import ( + htmldecode, + enforce_types, +) + +from .generic_json import jsonObjectToLink + +def parse_line(line: str): + if line.strip() != "": + return json.loads(line) + +@enforce_types +def parse_generic_jsonl_export(json_file: IO[str], **_kwargs) -> Iterable[Link]: + """Parse JSONL format bookmarks export files""" + + json_file.seek(0) + + links = [ parse_line(line) for line in json_file ] + + for link in links: + if link: + yield jsonObjectToLink(link,json_file.name) + +KEY = 'jsonl' +NAME = 'Generic JSONL' +PARSER = parse_generic_jsonl_export diff --git a/tests/mock_server/templates/example-single.jsonl b/tests/mock_server/templates/example-single.jsonl new file mode 100644 index 00000000..492c906d --- /dev/null +++ b/tests/mock_server/templates/example-single.jsonl @@ -0,0 +1 @@ +{"href":"http://127.0.0.1:8080/static/example.com.html","description":"Example","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"Tag1 Tag2","trap":"http://www.example.com/should-not-exist"} diff --git a/tests/mock_server/templates/example.jsonl b/tests/mock_server/templates/example.jsonl new file mode 100644 index 00000000..de0b3b5c --- /dev/null +++ b/tests/mock_server/templates/example.jsonl @@ -0,0 +1,4 @@ +{"href":"http://127.0.0.1:8080/static/example.com.html","description":"Example","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"Tag1 Tag2","trap":"http://www.example.com/should-not-exist"} +{"href":"http://127.0.0.1:8080/static/iana.org.html","description":"Example 2","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:43Z","shared":"no","toread":"no","tags":"Tag3,Tag4 with Space"} +{"href":"http://127.0.0.1:8080/static/shift_jis.html","description":"Example 2","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:44Z","shared":"no","toread":"no","tags":["Tag5","Tag6 with Space"]} +{"href":"http://127.0.0.1:8080/static/title_og_with_html","description":"Example 2","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:45Z","shared":"no","toread":"no"} diff --git a/tests/test_add.py b/tests/test_add.py index 972db2e8..c899b320 100644 --- a/tests/test_add.py +++ b/tests/test_add.py @@ -216,3 +216,73 @@ def test_atom(tmp_path, process, disable_extractors_dict): tags = list(map(lambda x: x[0], tags)) assert "Tag1" in tags assert "Tag2" in tags + +def test_jsonl(tmp_path, process, disable_extractors_dict): + with open('../../mock_server/templates/example.jsonl', 'r', encoding='utf-8') as f: + arg_process = subprocess.run( + ["archivebox", "add", "--index-only", "--parser=jsonl"], + stdin=f, + capture_output=True, + env=disable_extractors_dict, + ) + + conn = sqlite3.connect("index.sqlite3") + c = conn.cursor() + urls = c.execute("SELECT url from core_snapshot").fetchall() + tags = c.execute("SELECT name from core_tag").fetchall() + conn.commit() + conn.close() + + urls = list(map(lambda x: x[0], urls)) + assert "http://127.0.0.1:8080/static/example.com.html" in urls + assert "http://127.0.0.1:8080/static/iana.org.html" in urls + assert "http://127.0.0.1:8080/static/shift_jis.html" in urls + assert "http://127.0.0.1:8080/static/title_og_with_html" in urls + # if the following URL appears, we must have fallen back to another parser + assert not "http://www.example.com/should-not-exist" in urls + + tags = list(map(lambda x: x[0], tags)) + assert "Tag1" in tags + assert "Tag2" in tags + assert "Tag3" in tags + assert "Tag4 with Space" in tags + assert "Tag5" in tags + assert "Tag6 with Space" in tags + +def test_jsonl_single(tmp_path, process, disable_extractors_dict): + with open('../../mock_server/templates/example-single.jsonl', 'r', encoding='utf-8') as f: + arg_process = subprocess.run( + ["archivebox", "add", "--index-only", "--parser=jsonl"], + stdin=f, + capture_output=True, + env=disable_extractors_dict, + ) + + conn = sqlite3.connect("index.sqlite3") + c = conn.cursor() + urls = c.execute("SELECT url from core_snapshot").fetchall() + tags = c.execute("SELECT name from core_tag").fetchall() + conn.commit() + conn.close() + + urls = list(map(lambda x: x[0], urls)) + assert "http://127.0.0.1:8080/static/example.com.html" in urls + # if the following URL appears, we must have fallen back to another parser + assert not "http://www.example.com/should-not-exist" in urls + + tags = list(map(lambda x: x[0], tags)) + assert "Tag1" in tags + assert "Tag2" in tags + +# make sure that JSON parser rejects a single line of JSONL which is valid +# JSON but not our expected format +def test_json_single(tmp_path, process, disable_extractors_dict): + with open('../../mock_server/templates/example-single.jsonl', 'r', encoding='utf-8') as f: + arg_process = subprocess.run( + ["archivebox", "add", "--index-only", "--parser=json"], + stdin=f, + capture_output=True, + env=disable_extractors_dict, + ) + + assert 'expects list of objects' in arg_process.stderr.decode("utf-8") From 1fc5d7c5c8aa9075ee05d7f7a7e2c8dc1d23fcd0 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 18 Mar 2024 14:39:09 -0700 Subject: [PATCH 153/166] add USER_AGENT config option to set all USER_AGENTs at once --- archivebox/config.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/archivebox/config.py b/archivebox/config.py index abc83f79..74e7ee58 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -142,9 +142,10 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = { 'CHECK_SSL_VALIDITY': {'type': bool, 'default': True}, 'MEDIA_MAX_SIZE': {'type': str, 'default': '750m'}, - 'CURL_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) curl/{CURL_VERSION}'}, - 'WGET_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) wget/{WGET_VERSION}'}, - 'CHROME_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)'}, + 'USER_AGENT': {'type': str, 'default': None}, + 'CURL_USER_AGENT': {'type': str, 'default': lambda c: c['USER_AGENT'] or 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) curl/{CURL_VERSION}'}, + 'WGET_USER_AGENT': {'type': str, 'default': lambda c: c['USER_AGENT'] or 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) wget/{WGET_VERSION}'}, + 'CHROME_USER_AGENT': {'type': str, 'default': lambda c: c['USER_AGENT'] or 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)'}, 'COOKIES_FILE': {'type': str, 'default': None}, 'CHROME_USER_DATA_DIR': {'type': str, 'default': None}, From c5bb99dce1cfc9f5f873f3d6a63bc1a92295690f Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 18 Mar 2024 14:40:40 -0700 Subject: [PATCH 154/166] explicitly use Default profile inside user data dir --- archivebox/util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/archivebox/util.py b/archivebox/util.py index 3647d538..3814c23f 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -299,10 +299,11 @@ def chrome_args(**options) -> List[str]: if options['CHROME_USER_DATA_DIR']: cmd_args.append('--user-data-dir={}'.format(options['CHROME_USER_DATA_DIR'])) - + cmd_args.append('--profile-directory=Default') return dedupe(cmd_args) + def chrome_cleanup(): """ Cleans up any state or runtime files that chrome leaves behind when killed by From c0b5dbcecb3bc5c9ea6690d79ac43e60335202b7 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 18 Mar 2024 14:41:39 -0700 Subject: [PATCH 155/166] create new data/personas dir to hold cookies and chrome profiles --- archivebox/config.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/archivebox/config.py b/archivebox/config.py index 74e7ee58..8b2f3a7e 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -281,6 +281,7 @@ TEMPLATES_DIR_NAME = 'templates' ARCHIVE_DIR_NAME = 'archive' SOURCES_DIR_NAME = 'sources' LOGS_DIR_NAME = 'logs' +PERSONAS_DIR_NAME = 'personas' SQL_INDEX_FILENAME = 'index.sqlite3' JSON_INDEX_FILENAME = 'index.json' HTML_INDEX_FILENAME = 'index.html' @@ -357,6 +358,7 @@ ALLOWED_IN_OUTPUT_DIR = { ARCHIVE_DIR_NAME, SOURCES_DIR_NAME, LOGS_DIR_NAME, + PERSONAS_DIR_NAME, SQL_INDEX_FILENAME, f'{SQL_INDEX_FILENAME}-wal', f'{SQL_INDEX_FILENAME}-shm', @@ -507,6 +509,7 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = { 'ARCHIVE_DIR': {'default': lambda c: c['OUTPUT_DIR'] / ARCHIVE_DIR_NAME}, 'SOURCES_DIR': {'default': lambda c: c['OUTPUT_DIR'] / SOURCES_DIR_NAME}, 'LOGS_DIR': {'default': lambda c: c['OUTPUT_DIR'] / LOGS_DIR_NAME}, + 'PERSONAS_DIR': {'default': lambda c: c['OUTPUT_DIR'] / PERSONAS_DIR_NAME}, 'CONFIG_FILE': {'default': lambda c: Path(c['CONFIG_FILE']).resolve() if c['CONFIG_FILE'] else c['OUTPUT_DIR'] / CONFIG_FILENAME}, 'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and Path(c['COOKIES_FILE']).resolve()}, 'CHROME_USER_DATA_DIR': {'default': lambda c: Path(c['CHROME_USER_DATA_DIR']).resolve() if c['CHROME_USER_DATA_DIR'] else None}, @@ -1026,6 +1029,11 @@ def get_data_locations(config: ConfigDict) -> ConfigValue: 'enabled': True, 'is_valid': config['LOGS_DIR'].exists(), }, + 'PERSONAS': { + 'path': config['PERSONAS'].resolve(), + 'enabled': True, + 'is_valid': config['PERSONAS'].exists(), + }, 'ARCHIVE_DIR': { 'path': config['ARCHIVE_DIR'].resolve(), 'enabled': True, @@ -1373,6 +1381,8 @@ def check_migrations(out_dir: Union[str, Path, None]=None, config: ConfigDict=CO (Path(output_dir) / SOURCES_DIR_NAME).mkdir(exist_ok=True) (Path(output_dir) / LOGS_DIR_NAME).mkdir(exist_ok=True) + (Path(output_dir) / PERSONAS_DIR_NAME).mkdir(exist_ok=True) + (Path(output_dir) / PERSONAS_DIR_NAME / 'Default').mkdir(exist_ok=True) From 8007e97c3f93dc763c95737e1452af95ba73ff5c Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 18 Mar 2024 14:41:57 -0700 Subject: [PATCH 156/166] point archivebox to novnc display container by default --- Dockerfile | 9 ++++++++- docker-compose.yml | 15 +++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 541c338a..82647329 100644 --- a/Dockerfile +++ b/Dockerfile @@ -266,7 +266,14 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T # Setup ArchiveBox runtime config WORKDIR "$DATA_DIR" -ENV IN_DOCKER=True +ENV IN_DOCKER=True \ + DISPLAY=novnc:0.0 \ + CUSTOM_TEMPLATES_DIR=/data/templates \ + CHROME_USER_DATA_DIR=/data/personas/Default/chromium \ + GOOGLE_API_KEY=no \ + GOOGLE_DEFAULT_CLIENT_ID=no \ + GOOGLE_DEFAULT_CLIENT_SECRET=no \ + ALLOWED_HOSTS=* ## No need to set explicitly, these values will be autodetected by archivebox in docker: # CHROME_SANDBOX=False \ # WGET_BINARY="wget" \ diff --git a/docker-compose.yml b/docker-compose.yml index d8342216..ea3d3ab7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -124,6 +124,21 @@ services: # - ./data:/var/www + ### Example: Watch the ArchiveBox browser in realtime as it archives things, + # or remote control it to set up logins and credentials for sites you want to archive. + # https://github.com/ArchiveBox/ArchiveBox/wiki/Chromium-Install#setting-up-a-chromium-user-profile + + novnc: + image: theasp/novnc:latest + environment: + - DISPLAY_WIDTH=1920 + - DISPLAY_HEIGHT=1080 + - RUN_XTERM=no + ports: + # to view/control ArchiveBox's browser, visit: http://localhost:8080/vnc.html + - "8080:8080" + + ### Example: run all your ArchiveBox traffic through a WireGuard VPN tunnel # wireguard: From 8b1b01e508bf5827fd8d98a9cd1cdaf028d09a15 Mon Sep 17 00:00:00 2001 From: jim winstead Date: Mon, 25 Mar 2024 17:46:01 -0700 Subject: [PATCH 157/166] Update to Django 4.2.x, now in LTS until April 2026 --- archivebox/core/__init__.py | 1 - archivebox/core/admin.py | 177 ++++++++++++++++++++---------------- archivebox/core/apps.py | 2 - archivebox/core/settings.py | 4 - archivebox/core/urls.py | 4 +- pyproject.toml | 4 +- 6 files changed, 105 insertions(+), 87 deletions(-) diff --git a/archivebox/core/__init__.py b/archivebox/core/__init__.py index 9cd0ce16..ac3ec769 100644 --- a/archivebox/core/__init__.py +++ b/archivebox/core/__init__.py @@ -1,3 +1,2 @@ __package__ = 'archivebox.core' -default_app_config = 'archivebox.core.apps.CoreConfig' diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py index 65baa52b..172a8caf 100644 --- a/archivebox/core/admin.py +++ b/archivebox/core/admin.py @@ -48,6 +48,60 @@ GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': VERSIONS_AVAILABLE, # TODO: https://stackoverflow.com/questions/40760880/add-custom-button-to-django-admin-panel +class ArchiveBoxAdmin(admin.AdminSite): + site_header = 'ArchiveBox' + index_title = 'Links' + site_title = 'Index' + namespace = 'admin' + + def get_urls(self): + return [ + path('core/snapshot/add/', self.add_view, name='Add'), + ] + super().get_urls() + + def add_view(self, request): + if not request.user.is_authenticated: + return redirect(f'/admin/login/?next={request.path}') + + request.current_app = self.name + context = { + **self.each_context(request), + 'title': 'Add URLs', + } + + if request.method == 'GET': + context['form'] = AddLinkForm() + + elif request.method == 'POST': + form = AddLinkForm(request.POST) + if form.is_valid(): + url = form.cleaned_data["url"] + print(f'[+] Adding URL: {url}') + depth = 0 if form.cleaned_data["depth"] == "0" else 1 + input_kwargs = { + "urls": url, + "depth": depth, + "update_all": False, + "out_dir": OUTPUT_DIR, + } + add_stdout = StringIO() + with redirect_stdout(add_stdout): + add(**input_kwargs) + print(add_stdout.getvalue()) + + context.update({ + "stdout": ansi_to_html(add_stdout.getvalue().strip()), + "form": AddLinkForm() + }) + else: + context["form"] = form + + return render(template_name='add.html', request=request, context=context) + +archivebox_admin = ArchiveBoxAdmin() +archivebox_admin.register(get_user_model()) +archivebox_admin.disable_action('delete_selected') + class ArchiveResultInline(admin.TabularInline): model = ArchiveResult @@ -57,11 +111,11 @@ class TagInline(admin.TabularInline): from django.contrib.admin.helpers import ActionForm from django.contrib.admin.widgets import AutocompleteSelectMultiple -# WIP: broken by Django 3.1.2 -> 4.0 migration class AutocompleteTags: model = Tag search_fields = ['name'] name = 'tags' + remote_field = TagInline class AutocompleteTagsAdminStub: name = 'admin' @@ -71,7 +125,6 @@ class SnapshotActionForm(ActionForm): tags = forms.ModelMultipleChoiceField( queryset=Tag.objects.all(), required=False, - # WIP: broken by Django 3.1.2 -> 4.0 migration widget=AutocompleteSelectMultiple( AutocompleteTags(), AutocompleteTagsAdminStub(), @@ -90,6 +143,7 @@ class SnapshotActionForm(ActionForm): # ) +@admin.register(Snapshot, site=archivebox_admin) class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin): list_display = ('added', 'title_str', 'files', 'size', 'url_str') sort_fields = ('title_str', 'url_str', 'added', 'files') @@ -176,6 +230,10 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin): obj.id, ) + @admin.display( + description='Title', + ordering='title', + ) def title_str(self, obj): canon = obj.as_link().canonical_outputs() tags = ''.join( @@ -197,12 +255,17 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin): urldecode(htmldecode(obj.latest_title or obj.title or ''))[:128] or 'Pending...' ) + mark_safe(f' {tags}') + @admin.display( + description='Files Saved', + ordering='archiveresult_count', + ) def files(self, obj): return snapshot_icons(obj) - files.admin_order_field = 'archiveresult_count' - files.short_description = 'Files Saved' + @admin.display( + ordering='archiveresult_count' + ) def size(self, obj): archive_size = (Path(obj.link_dir) / 'index.html').exists() and obj.archive_size if archive_size: @@ -217,8 +280,11 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin): size_txt, ) - size.admin_order_field = 'archiveresult_count' + @admin.display( + description='Original URL', + ordering='url', + ) def url_str(self, obj): return format_html( '{}', @@ -255,65 +321,76 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin): # print('[*] Got request', request.method, request.POST) # return super().changelist_view(request, extra_context=None) + @admin.action( + description="Pull" + ) def update_snapshots(self, request, queryset): archive_links([ snapshot.as_link() for snapshot in queryset ], out_dir=OUTPUT_DIR) - update_snapshots.short_description = "Pull" + @admin.action( + description="⬇️ Title" + ) def update_titles(self, request, queryset): archive_links([ snapshot.as_link() for snapshot in queryset ], overwrite=True, methods=('title','favicon'), out_dir=OUTPUT_DIR) - update_titles.short_description = "⬇️ Title" + @admin.action( + description="Re-Snapshot" + ) def resnapshot_snapshot(self, request, queryset): for snapshot in queryset: timestamp = datetime.now(timezone.utc).isoformat('T', 'seconds') new_url = snapshot.url.split('#')[0] + f'#{timestamp}' add(new_url, tag=snapshot.tags_str()) - resnapshot_snapshot.short_description = "Re-Snapshot" + @admin.action( + description="Reset" + ) def overwrite_snapshots(self, request, queryset): archive_links([ snapshot.as_link() for snapshot in queryset ], overwrite=True, out_dir=OUTPUT_DIR) - overwrite_snapshots.short_description = "Reset" + @admin.action( + description="Delete" + ) def delete_snapshots(self, request, queryset): remove(snapshots=queryset, yes=True, delete=True, out_dir=OUTPUT_DIR) - delete_snapshots.short_description = "Delete" + @admin.action( + description="+" + ) def add_tags(self, request, queryset): tags = request.POST.getlist('tags') print('[+] Adding tags', tags, 'to Snapshots', queryset) for obj in queryset: obj.tags.add(*tags) - add_tags.short_description = "+" + @admin.action( + description="–" + ) def remove_tags(self, request, queryset): tags = request.POST.getlist('tags') print('[-] Removing tags', tags, 'to Snapshots', queryset) for obj in queryset: obj.tags.remove(*tags) - remove_tags.short_description = "–" - title_str.short_description = 'Title' - url_str.short_description = 'Original URL' - - title_str.admin_order_field = 'title' - url_str.admin_order_field = 'url' + +@admin.register(Tag, site=archivebox_admin) class TagAdmin(admin.ModelAdmin): list_display = ('slug', 'name', 'num_snapshots', 'snapshots', 'id') sort_fields = ('id', 'name', 'slug') @@ -344,6 +421,7 @@ class TagAdmin(admin.ModelAdmin): ) + (f'
and {total_count-10} more...' if obj.snapshot_set.count() > 10 else '')) +@admin.register(ArchiveResult, site=archivebox_admin) class ArchiveResultAdmin(admin.ModelAdmin): list_display = ('id', 'start_ts', 'extractor', 'snapshot_str', 'tags_str', 'cmd_str', 'status', 'output_str') sort_fields = ('start_ts', 'extractor', 'status') @@ -356,6 +434,9 @@ class ArchiveResultAdmin(admin.ModelAdmin): ordering = ['-start_ts'] list_per_page = SNAPSHOTS_PER_PAGE + @admin.display( + description='snapshot' + ) def snapshot_str(self, obj): return format_html( '[{}]
' @@ -365,6 +446,9 @@ class ArchiveResultAdmin(admin.ModelAdmin): obj.snapshot.url[:128], ) + @admin.display( + description='tags' + ) def tags_str(self, obj): return obj.snapshot.tags_str() @@ -381,62 +465,3 @@ class ArchiveResultAdmin(admin.ModelAdmin): obj.output if (obj.status == 'succeeded') and obj.extractor not in ('title', 'archive_org') else 'index.html', obj.output, ) - - tags_str.short_description = 'tags' - snapshot_str.short_description = 'snapshot' - -class ArchiveBoxAdmin(admin.AdminSite): - site_header = 'ArchiveBox' - index_title = 'Links' - site_title = 'Index' - - def get_urls(self): - return [ - path('core/snapshot/add/', self.add_view, name='Add'), - ] + super().get_urls() - - def add_view(self, request): - if not request.user.is_authenticated: - return redirect(f'/admin/login/?next={request.path}') - - request.current_app = self.name - context = { - **self.each_context(request), - 'title': 'Add URLs', - } - - if request.method == 'GET': - context['form'] = AddLinkForm() - - elif request.method == 'POST': - form = AddLinkForm(request.POST) - if form.is_valid(): - url = form.cleaned_data["url"] - print(f'[+] Adding URL: {url}') - depth = 0 if form.cleaned_data["depth"] == "0" else 1 - input_kwargs = { - "urls": url, - "depth": depth, - "update_all": False, - "out_dir": OUTPUT_DIR, - } - add_stdout = StringIO() - with redirect_stdout(add_stdout): - add(**input_kwargs) - print(add_stdout.getvalue()) - - context.update({ - "stdout": ansi_to_html(add_stdout.getvalue().strip()), - "form": AddLinkForm() - }) - else: - context["form"] = form - - return render(template_name='add.html', request=request, context=context) - -admin.site = ArchiveBoxAdmin() -admin.site.register(get_user_model()) -admin.site.register(Snapshot, SnapshotAdmin) -admin.site.register(Tag, TagAdmin) -admin.site.register(ArchiveResult, ArchiveResultAdmin) -admin.site.disable_action('delete_selected') diff --git a/archivebox/core/apps.py b/archivebox/core/apps.py index 32088de4..f3e35dbd 100644 --- a/archivebox/core/apps.py +++ b/archivebox/core/apps.py @@ -3,8 +3,6 @@ from django.apps import AppConfig class CoreConfig(AppConfig): name = 'core' - # WIP: broken by Django 3.1.2 -> 4.0 migration - default_auto_field = 'django.db.models.UUIDField' def ready(self): from .auth import register_signals diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index 06e798ab..9b80c336 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -269,9 +269,6 @@ AUTH_PASSWORD_VALIDATORS = [ {'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator'}, ] -# WIP: broken by Django 3.1.2 -> 4.0 migration -DEFAULT_AUTO_FIELD = 'django.db.models.UUIDField' - ################################################################################ ### Shell Settings ################################################################################ @@ -290,7 +287,6 @@ if IS_SHELL: LANGUAGE_CODE = 'en-us' USE_I18N = True -USE_L10N = True USE_TZ = True DATETIME_FORMAT = 'Y-m-d g:iA' SHORT_DATETIME_FORMAT = 'Y-m-d h:iA' diff --git a/archivebox/core/urls.py b/archivebox/core/urls.py index 1111ead4..ce38af32 100644 --- a/archivebox/core/urls.py +++ b/archivebox/core/urls.py @@ -1,4 +1,4 @@ -from django.contrib import admin +from .admin import archivebox_admin from django.urls import path, include from django.views import static @@ -29,7 +29,7 @@ urlpatterns = [ path('accounts/', include('django.contrib.auth.urls')), - path('admin/', admin.site.urls), + path('admin/', archivebox_admin.urls), path('health/', HealthCheckView.as_view(), name='healthcheck'), path('error/', lambda _: 1/0), diff --git a/pyproject.toml b/pyproject.toml index eedea90c..969b6318 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,8 +13,8 @@ dependencies = [ # pdm update [--unconstrained] "croniter>=0.3.34", "dateparser>=1.0.0", - "django-extensions>=3.0.3", - "django>=3.1.3,<3.2", + "django-extensions>=3.2.3", + "django>=4.2.0,<5.0", "feedparser>=6.0.11", "ipython>5.0.0", "mypy-extensions>=0.4.3", From a4453b6f8745cbe7c21eceeb3cce05eb4fb71111 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 26 Mar 2024 14:19:25 -0700 Subject: [PATCH 158/166] fix PERSONAS PERSONAS_DIR typo --- archivebox/config.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/archivebox/config.py b/archivebox/config.py index 8b2f3a7e..a08d73e6 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -1029,10 +1029,10 @@ def get_data_locations(config: ConfigDict) -> ConfigValue: 'enabled': True, 'is_valid': config['LOGS_DIR'].exists(), }, - 'PERSONAS': { - 'path': config['PERSONAS'].resolve(), + 'PERSONAS_DIR': { + 'path': config['PERSONAS_DIR'].resolve(), 'enabled': True, - 'is_valid': config['PERSONAS'].exists(), + 'is_valid': config['PERSONAS_DIR'].exists(), }, 'ARCHIVE_DIR': { 'path': config['ARCHIVE_DIR'].resolve(), From ac73fb51297a49f3f6087796472832f9563c0cbe Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 26 Mar 2024 15:22:40 -0700 Subject: [PATCH 159/166] merge fixes --- Dockerfile | 13 +++++---- README.md | 2 +- archivebox/index/__init__.py | 2 +- docker-compose.yml | 54 +++++++++++++++++------------------- package.json | 2 +- pyproject.toml | 8 +++--- 6 files changed, 40 insertions(+), 41 deletions(-) diff --git a/Dockerfile b/Dockerfile index 82647329..fbb56a78 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,7 +10,7 @@ # docker run -v "$PWD/data":/data -p 8000:8000 archivebox server # Multi-arch build: # docker buildx create --use -# docker buildx build . --platform=linux/amd64,linux/arm64,linux/arm/v7 --push -t archivebox/archivebox:latest -t archivebox/archivebox:dev +# docker buildx build . --platform=linux/amd64,linux/arm64--push -t archivebox/archivebox:latest -t archivebox/archivebox:dev # # Read more about [developing Archivebox](https://github.com/ArchiveBox/ArchiveBox#archivebox-development). @@ -194,10 +194,12 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T && playwright install --with-deps chromium \ && export CHROME_BINARY="$(python -c 'from playwright.sync_api import sync_playwright; print(sync_playwright().start().chromium.executable_path)')"; \ else \ - # fall back to installing Chromium via apt-get on platforms not supported by playwright (e.g. risc, ARMv7, etc.) - apt-get install -qq -y -t bookworm-backports --no-install-recommends \ - chromium \ - && export CHROME_BINARY="$(which chromium)"; \ + # fall back to installing Chromium via apt-get on platforms not supported by playwright (e.g. risc, ARMv7, etc.) + # apt-get install -qq -y -t bookworm-backports --no-install-recommends \ + # chromium \ + # && export CHROME_BINARY="$(which chromium)"; \ + echo 'armv7 no longer supported in versions after v0.7.3' \ + exit 1; \ fi \ && rm -rf /var/lib/apt/lists/* \ && ln -s "$CHROME_BINARY" /usr/bin/chromium-browser \ @@ -275,7 +277,6 @@ ENV IN_DOCKER=True \ GOOGLE_DEFAULT_CLIENT_SECRET=no \ ALLOWED_HOSTS=* ## No need to set explicitly, these values will be autodetected by archivebox in docker: - # CHROME_SANDBOX=False \ # WGET_BINARY="wget" \ # YOUTUBEDL_BINARY="yt-dlp" \ # CHROME_BINARY="/usr/bin/chromium-browser" \ diff --git a/README.md b/README.md index 27a84956..4d1bcf0d 100644 --- a/README.md +++ b/README.md @@ -1076,7 +1076,7 @@ Because ArchiveBox is designed to ingest a large volume of URLs with multiple co
  • Don't store large collections on older filesystems like EXT3/FAT as they may not be able to handle more than 50k directory entries in the data/archive/ folder.
  • Try to keep the data/index.sqlite3 file on local drive (not a network mount) or SSD for maximum performance, however the data/archive/ folder can be on a network mount or slower HDD.
  • -
  • If using Docker or NFS/SMB/FUSE for the `data/archive/` folder, you may need to set PUID & PGID and disable root_squash on your fileshare server. +
  • If using Docker or NFS/SMB/FUSE for the data/archive/ folder, you may need to set PUID & PGID and disable root_squash on your fileshare server.
  • diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py index 9912b4c7..fb3688f3 100644 --- a/archivebox/index/__init__.py +++ b/archivebox/index/__init__.py @@ -250,7 +250,7 @@ def load_main_index(out_dir: Path=OUTPUT_DIR, warn: bool=True) -> List[Link]: """parse and load existing index with any new links from import_path merged in""" from core.models import Snapshot try: - return Snapshot.objects.all() + return Snapshot.objects.all().only('id') except (KeyboardInterrupt, SystemExit): raise SystemExit(0) diff --git a/docker-compose.yml b/docker-compose.yml index ea3d3ab7..a8293705 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,32 +8,26 @@ # Documentation: # https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker-compose -version: '3.9' services: archivebox: - #image: ${DOCKER_IMAGE:-archivebox/archivebox:dev} - image: archivebox/archivebox:dev - command: server --quick-init 0.0.0.0:8000 + image: archivebox/archivebox ports: - 8000:8000 volumes: - ./data:/data - # - ./etc/crontabs:/var/spool/cron/crontabs # uncomment this and archivebox_scheduler below to set up automatic recurring archive jobs - # - ./archivebox:/app/archivebox # uncomment this to mount the ArchiveBox source code at runtime (for developers working on archivebox) - # build: . # uncomment this to build the image from source code at buildtime (for developers working on archivebox) environment: - ALLOWED_HOSTS=* # restrict this to only accept incoming traffic via specific domain name - # - PUBLIC_INDEX=True # set to False to prevent anonymous users from viewing snapshot list - # - PUBLIC_SNAPSHOTS=True # set to False to prevent anonymous users from viewing snapshot content - # - PUBLIC_ADD_VIEW=False # set to True to allow anonymous users to submit new URLs to archive # - ADMIN_USERNAME=admin # create an admin user on first run with the given user/pass combo # - ADMIN_PASSWORD=SomeSecretPassword # - PUID=911 # set to your host user's UID & GID if you encounter permissions issues # - PGID=911 - # - SEARCH_BACKEND_ENGINE=sonic # uncomment these and sonic container below for better full-text search - # - SEARCH_BACKEND_HOST_NAME=sonic - # - SEARCH_BACKEND_PASSWORD=SomeSecretPassword + # - PUBLIC_INDEX=True # set to False to prevent anonymous users from viewing snapshot list + # - PUBLIC_SNAPSHOTS=True # set to False to prevent anonymous users from viewing snapshot content + # - PUBLIC_ADD_VIEW=False # set to True to allow anonymous users to submit new URLs to archive + - SEARCH_BACKEND_ENGINE=sonic # uncomment these and sonic container below for better full-text search + - SEARCH_BACKEND_HOST_NAME=sonic + - SEARCH_BACKEND_PASSWORD=SomeSecretPassword # - MEDIA_MAX_SIZE=750m # increase this filesize limit to allow archiving larger audio/video files # - TIMEOUT=60 # increase this number to 120+ seconds if you see many slow downloads timing out # - CHECK_SSL_VALIDITY=True # set to False to disable strict SSL checking (allows saving URLs w/ broken certs) @@ -42,7 +36,7 @@ services: # add further configuration options from archivebox/config.py as needed (to apply them only to this container) # or set using `docker compose run archivebox config --set SOME_KEY=someval` (to persist config across all containers) - # For ad-blocking during archiving, uncomment this section and pihole service section below + # For ad-blocking during archiving, uncomment this section and pihole service section below # networks: # - dns # dns: @@ -51,22 +45,26 @@ services: ######## Optional Addons: tweak examples below as needed for your specific use case ######## - ### Example: To run the Sonic full-text search backend, first download the config file to sonic.cfg - # $ curl -O https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/etc/sonic.cfg - # After starting, backfill any existing Snapshots into the full-text index: + ### Runs the Sonic full-text search backend, config file is auto-downloaded into sonic.cfg: + # After starting, backfill any existing Snapshots into the full-text index: # $ docker-compose run archivebox update --index-only - # sonic: - # image: valeriansaliou/sonic:latest - # expose: - # - 1491 - # environment: - # - SEARCH_BACKEND_PASSWORD=SomeSecretPassword - # volumes: - # - ./sonic.cfg:/etc/sonic.cfg:ro - # - ./data/sonic:/var/lib/sonic/store - - + sonic: + image: valeriansaliou/sonic + build: + dockerfile_inline: | + FROM quay.io/curl/curl:latest AS setup + RUN curl -fsSL 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/main/etc/sonic.cfg' > /tmp/sonic.cfg + FROM valeriansaliou/sonic:latest + COPY --from=setup /tmp/sonic.cfg /etc/sonic.cfg + expose: + - 1491 + environment: + - SEARCH_BACKEND_PASSWORD=SomeSecretPassword + volumes: + - ./etc/sonic.cfg:/etc/sonic.cfg + - ./data/sonic:/var/lib/sonic/store + ### Example: To run pihole in order to block ad/tracker requests during archiving, # uncomment this block and set up pihole using its admin interface diff --git a/package.json b/package.json index 1377ef99..3c42a8b9 100644 --- a/package.json +++ b/package.json @@ -8,6 +8,6 @@ "dependencies": { "@postlight/parser": "^2.2.3", "readability-extractor": "github:ArchiveBox/readability-extractor", - "single-file-cli": "^1.1.46" + "single-file-cli": "^1.1.54" } } diff --git a/pyproject.toml b/pyproject.toml index 969b6318..98a1a055 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,15 +15,16 @@ dependencies = [ "dateparser>=1.0.0", "django-extensions>=3.2.3", "django>=4.2.0,<5.0", + "setuptools>=69.0.3", "feedparser>=6.0.11", "ipython>5.0.0", "mypy-extensions>=0.4.3", "python-crontab>=2.5.1", "requests>=2.24.0", "w3lib>=1.22.0", - "yt-dlp>=2023.10.13", + "yt-dlp>=2024.3.10", # dont add playwright becuase packages without sdists cause trouble on many build systems that refuse to install wheel-only packages - # "playwright>=1.39.0; platform_machine != 'armv7l'", + "playwright>=1.39.0; platform_machine != 'armv7l'", ] classifiers = [ @@ -64,11 +65,11 @@ classifiers = [ sonic = [ # echo "deb [signed-by=/usr/share/keyrings/valeriansaliou_sonic.gpg] https://packagecloud.io/valeriansaliou/sonic/debian/ bookworm main" > /etc/apt/sources.list.d/valeriansaliou_sonic.list # curl -fsSL https://packagecloud.io/valeriansaliou/sonic/gpgkey | gpg --dearmor -o /usr/share/keyrings/valeriansaliou_sonic.gpg + # apt install sonic "sonic-client>=0.0.5", ] ldap = [ # apt install libldap2-dev libsasl2-dev python3-ldap - "setuptools>=69.0.3", "python-ldap>=3.4.3", "django-auth-ldap>=4.1.0", ] @@ -83,7 +84,6 @@ ldap = [ [tool.pdm.dev-dependencies] dev = [ # building - "setuptools>=69.0.3", "wheel", "pdm", "homebrew-pypi-poet>=0.10.0", From e48159b8a0011d934facc38cb71ae6e738980da9 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 26 Mar 2024 15:23:43 -0700 Subject: [PATCH 160/166] cleanup docker-compose by storing crontabs in data dir --- archivebox/config.py | 1 + bin/docker_entrypoint.sh | 11 ++++ docker-compose.yml | 119 +++++++++++++++++++-------------------- 3 files changed, 71 insertions(+), 60 deletions(-) diff --git a/archivebox/config.py b/archivebox/config.py index a08d73e6..1a75229c 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -355,6 +355,7 @@ ALLOWED_IN_OUTPUT_DIR = { 'static', 'sonic', 'search.sqlite3', + 'crontabs', ARCHIVE_DIR_NAME, SOURCES_DIR_NAME, LOGS_DIR_NAME, diff --git a/bin/docker_entrypoint.sh b/bin/docker_entrypoint.sh index 74e7a3a9..4996b3d6 100755 --- a/bin/docker_entrypoint.sh +++ b/bin/docker_entrypoint.sh @@ -163,6 +163,17 @@ else fi fi +# symlink etc crontabs into place +mkdir -p "$DATA_DIR/crontabs" +if ! test -L /var/spool/cron/crontabs; then + # copy files from old location into new data dir location + for file in $(ls /var/spool/cron/crontabs); do + cp /var/spool/cron/crontabs/"$file" "$DATA_DIR/crontabs" + done + # replace old system path with symlink to data dir location + rm -Rf /var/spool/cron/crontabs + ln -s "$DATA_DIR/crontabs" /var/spool/cron/crontabs +fi # set DBUS_SYSTEM_BUS_ADDRESS & DBUS_SESSION_BUS_ADDRESS # (dbus is not actually needed, it makes chrome log fewer warnings but isn't worth making our docker images bigger) diff --git a/docker-compose.yml b/docker-compose.yml index a8293705..bfcb4f1e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,23 +11,23 @@ services: archivebox: - image: archivebox/archivebox + image: archivebox/archivebox:latest ports: - 8000:8000 volumes: - ./data:/data environment: - - ALLOWED_HOSTS=* # restrict this to only accept incoming traffic via specific domain name # - ADMIN_USERNAME=admin # create an admin user on first run with the given user/pass combo # - ADMIN_PASSWORD=SomeSecretPassword - # - PUID=911 # set to your host user's UID & GID if you encounter permissions issues - # - PGID=911 - # - PUBLIC_INDEX=True # set to False to prevent anonymous users from viewing snapshot list - # - PUBLIC_SNAPSHOTS=True # set to False to prevent anonymous users from viewing snapshot content - # - PUBLIC_ADD_VIEW=False # set to True to allow anonymous users to submit new URLs to archive - - SEARCH_BACKEND_ENGINE=sonic # uncomment these and sonic container below for better full-text search + - ALLOWED_HOSTS=* # restrict this to only accept incoming traffic via specific domain name + - PUBLIC_INDEX=True # set to False to prevent anonymous users from viewing snapshot list + - PUBLIC_SNAPSHOTS=True # set to False to prevent anonymous users from viewing snapshot content + - PUBLIC_ADD_VIEW=False # set to True to allow anonymous users to submit new URLs to archive + - SEARCH_BACKEND_ENGINE=sonic # uncomment these and sonic container below for better full-text search - SEARCH_BACKEND_HOST_NAME=sonic - SEARCH_BACKEND_PASSWORD=SomeSecretPassword + # - PUID=911 # set to your host user's UID & GID if you encounter permissions issues + # - PGID=911 # - MEDIA_MAX_SIZE=750m # increase this filesize limit to allow archiving larger audio/video files # - TIMEOUT=60 # increase this number to 120+ seconds if you see many slow downloads timing out # - CHECK_SSL_VALIDITY=True # set to False to disable strict SSL checking (allows saving URLs w/ broken certs) @@ -45,13 +45,35 @@ services: ######## Optional Addons: tweak examples below as needed for your specific use case ######## + ### Enable ability to run regularly scheduled archiving tasks by uncommenting this container + # $ docker compose run archivebox schedule --every=day --depth=1 'https://example.com/some/rss/feed.xml' + # then restart the scheduler container to apply the changes to the schedule + # $ docker compose restart archivebox_scheduler + + archivebox_scheduler: + image: archivebox/archivebox:latest + command: schedule --foreground + environment: + - TIMEOUT=120 # increase if you see timeouts often during archiving / on slow networks + - ONLY_NEW=True # set to False to retry previously failed URLs when re-adding instead of skipping them + # - PUID=502 # set to your host user's UID & GID if you encounter permissions issues + # - PGID=20 + volumes: + - ./data:/data + # cpus: 2 # uncomment / edit these values to limit container resource consumption + # mem_limit: 2048m + # shm_size: 1024m + + ### Runs the Sonic full-text search backend, config file is auto-downloaded into sonic.cfg: # After starting, backfill any existing Snapshots into the full-text index: # $ docker-compose run archivebox update --index-only sonic: - image: valeriansaliou/sonic + image: valeriansaliou/sonic:latest build: + # custom build just auto-downloads archivebox's default sonic.cfg as a convenience + # not needed if you have already have /etc/sonic.cfg dockerfile_inline: | FROM quay.io/curl/curl:latest AS setup RUN curl -fsSL 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/main/etc/sonic.cfg' > /tmp/sonic.cfg @@ -65,6 +87,34 @@ services: - ./etc/sonic.cfg:/etc/sonic.cfg - ./data/sonic:/var/lib/sonic/store + + ### Example: Watch the ArchiveBox browser in realtime as it archives things, + # or remote control it to set up logins and credentials for sites you want to archive. + # https://github.com/ArchiveBox/ArchiveBox/wiki/Chromium-Install#setting-up-a-chromium-user-profile + + novnc: + image: theasp/novnc:latest + environment: + - DISPLAY_WIDTH=1920 + - DISPLAY_HEIGHT=1080 + - RUN_XTERM=no + ports: + # to view/control ArchiveBox's browser, visit: http://localhost:8080/vnc.html + - "8080:8080" + + + ### Example: Put Nginx in front of the ArchiveBox server for SSL termination + + # nginx: + # image: nginx:alpine + # ports: + # - 443:443 + # - 80:80 + # volumes: + # - ./etc/nginx.conf:/etc/nginx/nginx.conf + # - ./data:/var/www + + ### Example: To run pihole in order to block ad/tracker requests during archiving, # uncomment this block and set up pihole using its admin interface @@ -86,57 +136,6 @@ services: # - ./etc/dnsmasq:/etc/dnsmasq.d - ### Example: Enable ability to run regularly scheduled archiving tasks by uncommenting this container - # $ docker compose run archivebox schedule --every=day --depth=1 'https://example.com/some/rss/feed.xml' - # then restart the scheduler container to apply the changes to the schedule - # $ docker compose restart archivebox_scheduler - - # archivebox_scheduler: - # image: ${DOCKER_IMAGE:-archivebox/archivebox:dev} - # command: schedule --foreground - # environment: - # - MEDIA_MAX_SIZE=750m # increase this number to allow archiving larger audio/video files - # # - TIMEOUT=60 # increase if you see timeouts often during archiving / on slow networks - # # - ONLY_NEW=True # set to False to retry previously failed URLs when re-adding instead of skipping them - # # - CHECK_SSL_VALIDITY=True # set to False to allow saving URLs w/ broken SSL certs - # # - SAVE_ARCHIVE_DOT_ORG=True # set to False to disable submitting URLs to Archive.org when archiving - # # - PUID=502 # set to your host user's UID & GID if you encounter permissions issues - # # - PGID=20 - # volumes: - # - ./data:/data - # - ./etc/crontabs:/var/spool/cron/crontabs - # # cpus: 2 # uncomment / edit these values to limit container resource consumption - # # mem_limit: 2048m - # # shm_size: 1024m - - - ### Example: Put Nginx in front of the ArchiveBox server for SSL termination - - # nginx: - # image: nginx:alpine - # ports: - # - 443:443 - # - 80:80 - # volumes: - # - ./etc/nginx.conf:/etc/nginx/nginx.conf - # - ./data:/var/www - - - ### Example: Watch the ArchiveBox browser in realtime as it archives things, - # or remote control it to set up logins and credentials for sites you want to archive. - # https://github.com/ArchiveBox/ArchiveBox/wiki/Chromium-Install#setting-up-a-chromium-user-profile - - novnc: - image: theasp/novnc:latest - environment: - - DISPLAY_WIDTH=1920 - - DISPLAY_HEIGHT=1080 - - RUN_XTERM=no - ports: - # to view/control ArchiveBox's browser, visit: http://localhost:8080/vnc.html - - "8080:8080" - - ### Example: run all your ArchiveBox traffic through a WireGuard VPN tunnel # wireguard: From 9d4cc361e6952909861af4cf3548f6560c94928c Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Wed, 27 Mar 2024 20:15:27 -0700 Subject: [PATCH 161/166] Update docker-compose.yml --- docker-compose.yml | 61 +++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index bfcb4f1e..6037695b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,14 +1,12 @@ # Usage: -# docker compose run archivebox init --setup # docker compose up -# echo "https://example.com" | docker compose run archivebox archivebox add -# docker compose run archivebox add --depth=1 https://example.com/some/feed.rss -# docker compose run archivebox config --set MEDIA_MAX_SIZE=750m +# echo 'https://example.com' | docker compose run -T archivebox add +# docker compose run archivebox add --depth=1 'https://news.ycombinator.com' +# docker compose run archivebox config --set SAVE_ARCHIVE_DOT_ORG=False # docker compose run archivebox help # Documentation: # https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker-compose - services: archivebox: image: archivebox/archivebox:latest @@ -23,11 +21,11 @@ services: - PUBLIC_INDEX=True # set to False to prevent anonymous users from viewing snapshot list - PUBLIC_SNAPSHOTS=True # set to False to prevent anonymous users from viewing snapshot content - PUBLIC_ADD_VIEW=False # set to True to allow anonymous users to submit new URLs to archive - - SEARCH_BACKEND_ENGINE=sonic # uncomment these and sonic container below for better full-text search + - SEARCH_BACKEND_ENGINE=sonic # tells ArchiveBox to use sonic container below for fast full-text search - SEARCH_BACKEND_HOST_NAME=sonic - SEARCH_BACKEND_PASSWORD=SomeSecretPassword # - PUID=911 # set to your host user's UID & GID if you encounter permissions issues - # - PGID=911 + # - PGID=911 # UID/GIDs <500 may clash with existing users and are not recommended # - MEDIA_MAX_SIZE=750m # increase this filesize limit to allow archiving larger audio/video files # - TIMEOUT=60 # increase this number to 120+ seconds if you see many slow downloads timing out # - CHECK_SSL_VALIDITY=True # set to False to disable strict SSL checking (allows saving URLs w/ broken certs) @@ -35,7 +33,6 @@ services: # ... # add further configuration options from archivebox/config.py as needed (to apply them only to this container) # or set using `docker compose run archivebox config --set SOME_KEY=someval` (to persist config across all containers) - # For ad-blocking during archiving, uncomment this section and pihole service section below # networks: # - dns @@ -45,51 +42,50 @@ services: ######## Optional Addons: tweak examples below as needed for your specific use case ######## - ### Enable ability to run regularly scheduled archiving tasks by uncommenting this container - # $ docker compose run archivebox schedule --every=day --depth=1 'https://example.com/some/rss/feed.xml' - # then restart the scheduler container to apply the changes to the schedule + ### This optional container runs any scheduled tasks in the background, add new tasks like so: + # $ docker compose run archivebox schedule --add --every=day --depth=1 'https://example.com/some/rss/feed.xml' + # then restart the scheduler container to apply any changes to the scheduled task list: # $ docker compose restart archivebox_scheduler archivebox_scheduler: image: archivebox/archivebox:latest - command: schedule --foreground + command: schedule --foreground --update --every=day environment: - - TIMEOUT=120 # increase if you see timeouts often during archiving / on slow networks - - ONLY_NEW=True # set to False to retry previously failed URLs when re-adding instead of skipping them + - TIMEOUT=120 # use a higher timeout than the main container to give slow tasks more time when retrying # - PUID=502 # set to your host user's UID & GID if you encounter permissions issues # - PGID=20 volumes: - ./data:/data - # cpus: 2 # uncomment / edit these values to limit container resource consumption + # cpus: 2 # uncomment / edit these values to limit scheduler container resource consumption # mem_limit: 2048m - # shm_size: 1024m + # restart: always - ### Runs the Sonic full-text search backend, config file is auto-downloaded into sonic.cfg: - # After starting, backfill any existing Snapshots into the full-text index: + ### This runs the optional Sonic full-text search backend (much faster than default rg backend). + # If Sonic is ever started after not running for a while, update its full-text index by running: # $ docker-compose run archivebox update --index-only sonic: image: valeriansaliou/sonic:latest build: # custom build just auto-downloads archivebox's default sonic.cfg as a convenience - # not needed if you have already have /etc/sonic.cfg + # not needed after first run / if you have already have ./etc/sonic.cfg present dockerfile_inline: | - FROM quay.io/curl/curl:latest AS setup + FROM quay.io/curl/curl:latest AS config_downloader RUN curl -fsSL 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/main/etc/sonic.cfg' > /tmp/sonic.cfg FROM valeriansaliou/sonic:latest - COPY --from=setup /tmp/sonic.cfg /etc/sonic.cfg + COPY --from=config_downloader /tmp/sonic.cfg /etc/sonic.cfg expose: - 1491 environment: - SEARCH_BACKEND_PASSWORD=SomeSecretPassword volumes: - - ./etc/sonic.cfg:/etc/sonic.cfg + - ./sonic.cfg:/etc/sonic.cfg - ./data/sonic:/var/lib/sonic/store - ### Example: Watch the ArchiveBox browser in realtime as it archives things, - # or remote control it to set up logins and credentials for sites you want to archive. + ### This container runs xvfb+noVNC so you can watch the ArchiveBox browser as it archives things, + # or remote control it to set up a chrome profile w/ login credentials for sites you want to archive. # https://github.com/ArchiveBox/ArchiveBox/wiki/Chromium-Install#setting-up-a-chromium-user-profile novnc: @@ -99,11 +95,13 @@ services: - DISPLAY_HEIGHT=1080 - RUN_XTERM=no ports: - # to view/control ArchiveBox's browser, visit: http://localhost:8080/vnc.html - - "8080:8080" + # to view/control ArchiveBox's browser, visit: http://127.0.0.1:8080/vnc.html + # restricted to access from localhost by default because it has no authentication + - 127.0.0.1:8080:8080 - ### Example: Put Nginx in front of the ArchiveBox server for SSL termination + ### Example: Put Nginx in front of the ArchiveBox server for SSL termination and static file serving. + # You can also any other ingress provider for SSL like Apache, Caddy, Traefik, Cloudflare Tunnels, etc. # nginx: # image: nginx:alpine @@ -121,7 +119,8 @@ services: # pihole: # image: pihole/pihole:latest # ports: - # - 127.0.0.1:8090:80 # uncomment to access the admin HTTP interface on http://localhost:8090 + # # access the admin HTTP interface on http://localhost:8090 + # - 127.0.0.1:8090:80 # environment: # - WEBPASSWORD=SET_THIS_TO_SOME_SECRET_PASSWORD_FOR_ADMIN_DASHBOARD # - DNSMASQ_LISTENING=all @@ -136,7 +135,8 @@ services: # - ./etc/dnsmasq:/etc/dnsmasq.d - ### Example: run all your ArchiveBox traffic through a WireGuard VPN tunnel + ### Example: run all your ArchiveBox traffic through a WireGuard VPN tunnel to avoid IP blocks. + # You can also use any other VPN that works at the docker IP level, e.g. Tailscale, OpenVPN, etc. # wireguard: # image: linuxserver/wireguard:latest @@ -167,8 +167,7 @@ services: networks: - - # network needed for pihole container to offer :53 dns resolving on fixed ip for archivebox container + # network just used for pihole container to offer :53 dns resolving on fixed ip for archivebox container dns: ipam: driver: default From 9ad99d86c1464377321ee3076432c6cea5ca1b19 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 9 Apr 2024 18:38:29 -0700 Subject: [PATCH 162/166] Update docker-compose.yml to add rclone remote storage example --- docker-compose.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index 6037695b..5f0f6b1f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -173,3 +173,24 @@ networks: driver: default config: - subnet: 172.20.0.0/24 + + +# To use remote storage for your ./data/archive (e.g. Amazon S3, Backblaze B2, Google Drive, OneDrive, SFTP, etc.) +# Follow the steps here to set up the Docker RClone Plugin https://rclone.org/docker/ +# $ docker plugin install rclone/docker-volume-rclone:amd64 --grant-all-permissions --alias rclone +# $ nano /var/lib/docker-plugins/rclone/config/rclone.conf +# [examplegdrive] +# type = drive +# scope = drive +# drive_id = 1234567... +# root_folder_id = 0Abcd... +# token = {"access_token":...} + +# volumes: +# archive: +# driver: rclone +# driver_opts: +# remote: 'examplegdrive:archivebox' +# allow_other: 'true' +# vfs_cache_mode: full +# poll_interval: 0 From 11acc9ceea72583632b666d056614ad5d0cc4bd9 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Fri, 12 Apr 2024 14:16:55 -0700 Subject: [PATCH 163/166] Add Dockerfile labels needed for depandabot and Docker Extension marketplace --- Dockerfile | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index fbb56a78..fb6f302c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,10 +20,24 @@ FROM python:3.11-slim-bookworm LABEL name="archivebox" \ maintainer="Nick Sweeting " \ - description="All-in-one personal internet archiving container" \ + description="All-in-one self-hosted internet archiving solution" \ homepage="https://github.com/ArchiveBox/ArchiveBox" \ - documentation="https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker" - + documentation="https://github.com/ArchiveBox/ArchiveBox/wiki/Docker" \ + org.opencontainers.image.title="ArchiveBox" \ + org.opencontainers.image.vendor="ArchiveBox" \ + org.opencontainers.image.description="All-in-one self-hosted internet archiving solution" \ + org.opencontainers.image.source="https://github.com/ArchiveBox/ArchiveBox" \ + com.docker.image.source.entrypoint="Dockerfile" \ + # TODO: release ArchiveBox as a Docker Desktop extension (requires these labels): + # https://docs.docker.com/desktop/extensions-sdk/architecture/metadata/ + com.docker.desktop.extension.api.version=">= 1.4.7" \ + com.docker.desktop.extension.icon="https://archivebox.io/icon.png" \ + com.docker.extension.publisher-url="https://archivebox.io" \ + com.docker.extension.screenshots='[{"alt": "Screenshot of Admin UI", "url": "https://github.com/ArchiveBox/ArchiveBox/assets/511499/e8e0b6f8-8fdf-4b7f-8124-c10d8699bdb2"}]' \ + com.docker.extension.detailed-description='See here for detailed documentation: https://wiki.archivebox.io' \ + com.docker.extension.changelog='See here for release notes: https://github.com/ArchiveBox/ArchiveBox/releases' \ + com.docker.extension.categories='database,utility-tools' + ARG TARGETPLATFORM ARG TARGETOS ARG TARGETARCH From 756e159dfe772d03002c74ba4bd88109378fdc1e Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 23 Apr 2024 17:43:01 -0700 Subject: [PATCH 164/166] add new bin/lock_pkgs.sh to generate package lockfiles consistently --- .gitignore | 4 + archivebox/package-lock.json | 2371 ++++++++++++++++++++++++++++ archivebox/package.json | 4 +- archivebox/vendor/requirements.txt | 6 + bin/build_deb.sh | 14 + bin/build_dev.sh | 16 +- bin/build_docker.sh | 6 +- bin/build_pip.sh | 15 +- bin/lock_pkgs.sh | 100 ++ package-lock.json | 482 +++--- package.json | 2 +- pdm.lock | 543 ++++--- pyproject.toml | 110 +- requirements.txt | 60 +- 14 files changed, 3210 insertions(+), 523 deletions(-) create mode 100644 archivebox/package-lock.json create mode 100644 archivebox/vendor/requirements.txt create mode 100755 bin/lock_pkgs.sh diff --git a/.gitignore b/.gitignore index e789041c..27d833f0 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,10 @@ venv/ .docker-venv/ node_modules/ +# Ignore dev lockfiles (should always be built fresh) +requirements-dev.txt +pdm.dev.lock + # Packaging artifacts .pdm-python .pdm-build diff --git a/archivebox/package-lock.json b/archivebox/package-lock.json new file mode 100644 index 00000000..4496a88b --- /dev/null +++ b/archivebox/package-lock.json @@ -0,0 +1,2371 @@ +{ + "name": "archivebox", + "version": "0.8.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "archivebox", + "version": "0.8.0", + "license": "MIT", + "dependencies": { + "@postlight/parser": "^2.2.3", + "readability-extractor": "github:ArchiveBox/readability-extractor", + "single-file-cli": "^1.1.54" + } + }, + "node_modules/@asamuzakjp/dom-selector": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/@asamuzakjp/dom-selector/-/dom-selector-2.0.2.tgz", + "integrity": "sha512-x1KXOatwofR6ZAYzXRBL5wrdV0vwNxlTCK9NCuLqAzQYARqGcvFwiJA6A1ERuh+dgeA4Dxm3JBYictIes+SqUQ==", + "dependencies": { + "bidi-js": "^1.0.3", + "css-tree": "^2.3.1", + "is-potential-custom-element-name": "^1.0.1" + } + }, + "node_modules/@babel/runtime-corejs2": { + "version": "7.24.4", + "resolved": "https://registry.npmjs.org/@babel/runtime-corejs2/-/runtime-corejs2-7.24.4.tgz", + "integrity": "sha512-ZCKqyUKt/Coimg+3Kafu43yNetgYnTXzNbEGAgxc81J5sI0qFNbQ613w7PNny+SmijAmGVroL0GDvx5rG/JI5Q==", + "dependencies": { + "core-js": "^2.6.12", + "regenerator-runtime": "^0.14.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@mozilla/readability": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/@mozilla/readability/-/readability-0.5.0.tgz", + "integrity": "sha512-Z+CZ3QaosfFaTqvhQsIktyGrjFjSC0Fa4EMph4mqKnWhmyoGICsV/8QK+8HpXut6zV7zwfWwqDmEjtk1Qf6EgQ==", + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@postlight/ci-failed-test-reporter": { + "version": "1.0.26", + "resolved": "https://registry.npmjs.org/@postlight/ci-failed-test-reporter/-/ci-failed-test-reporter-1.0.26.tgz", + "integrity": "sha512-xfXzxyOiKhco7Gx2OLTe9b66b0dFJw0elg94KGHoQXf5F8JqqFvdo35J8wayGOor64CSMvn+4Bjlu2NKV+yTGA==", + "dependencies": { + "dotenv": "^6.2.0", + "node-fetch": "^2.3.0" + }, + "bin": { + "ciftr": "cli.js" + } + }, + "node_modules/@postlight/parser": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/@postlight/parser/-/parser-2.2.3.tgz", + "integrity": "sha512-4/syRvqJARgLN4yH8qtl634WO0+KINjkijU/SmhCJqqh8/aOfv5uQf+SquFpA+JwsAsbGzYQkIxSum29riOreg==", + "bundleDependencies": [ + "jquery", + "moment-timezone", + "browser-request" + ], + "dependencies": { + "@babel/runtime-corejs2": "^7.2.0", + "@postlight/ci-failed-test-reporter": "^1.0", + "browser-request": "*", + "cheerio": "^0.22.0", + "difflib": "github:postlight/difflib.js", + "ellipsize": "0.1.0", + "iconv-lite": "0.5.0", + "jquery": "*", + "moment": "^2.23.0", + "moment-parseformat": "3.0.0", + "moment-timezone": "*", + "postman-request": "^2.88.1-postman.31", + "string-direction": "^0.1.2", + "turndown": "^7.1.1", + "valid-url": "^1.0.9", + "wuzzy": "^0.1.4", + "yargs-parser": "^15.0.1" + }, + "bin": { + "mercury-parser": "cli.js", + "postlight-parser": "cli.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/@postlight/parser/node_modules/browser-request": { + "version": "0.3.2", + "engines": [ + "node" + ], + "inBundle": true, + "dependencies": { + "http-headers": "^3.0.1" + } + }, + "node_modules/@postlight/parser/node_modules/http-headers": { + "version": "3.0.2", + "inBundle": true, + "license": "MIT", + "dependencies": { + "next-line": "^1.1.0" + } + }, + "node_modules/@postlight/parser/node_modules/jquery": { + "version": "3.6.0", + "inBundle": true, + "license": "MIT" + }, + "node_modules/@postlight/parser/node_modules/moment": { + "version": "2.29.4", + "inBundle": true, + "license": "MIT", + "engines": { + "node": "*" + } + }, + "node_modules/@postlight/parser/node_modules/moment-timezone": { + "version": "0.5.37", + "inBundle": true, + "license": "MIT", + "dependencies": { + "moment": ">= 2.9.0" + }, + "engines": { + "node": "*" + } + }, + "node_modules/@postlight/parser/node_modules/next-line": { + "version": "1.1.0", + "inBundle": true, + "license": "MIT" + }, + "node_modules/@postman/form-data": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/@postman/form-data/-/form-data-3.1.1.tgz", + "integrity": "sha512-vjh8Q2a8S6UCm/KKs31XFJqEEgmbjBmpPNVV2eVav6905wyFAwaUOBGA1NPBI4ERH9MMZc6w0umFgM6WbEPMdg==", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/@postman/tough-cookie": { + "version": "4.1.3-postman.1", + "resolved": "https://registry.npmjs.org/@postman/tough-cookie/-/tough-cookie-4.1.3-postman.1.tgz", + "integrity": "sha512-txpgUqZOnWYnUHZpHjkfb0IwVH4qJmyq77pPnJLlfhMtdCLMFTEeQHlzQiK906aaNCe4NEB5fGJHo9uzGbFMeA==", + "dependencies": { + "psl": "^1.1.33", + "punycode": "^2.1.1", + "universalify": "^0.2.0", + "url-parse": "^1.5.3" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/@postman/tunnel-agent": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/@postman/tunnel-agent/-/tunnel-agent-0.6.3.tgz", + "integrity": "sha512-k57fzmAZ2PJGxfOA4SGR05ejorHbVAa/84Hxh/2nAztjNXc4ZjOm9NUIk6/Z6LCrBvJZqjRZbN8e/nROVUPVdg==", + "dependencies": { + "safe-buffer": "^5.0.1" + }, + "engines": { + "node": "*" + } + }, + "node_modules/@puppeteer/browsers": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.0.0.tgz", + "integrity": "sha512-3PS82/5+tnpEaUWonjAFFvlf35QHF15xqyGd34GBa5oP5EPVfFXRsbSxIGYf1M+vZlqBZ3oxT1kRg9OYhtt8ng==", + "dependencies": { + "debug": "4.3.4", + "extract-zip": "2.0.1", + "progress": "2.0.3", + "proxy-agent": "6.3.1", + "tar-fs": "3.0.4", + "unbzip2-stream": "1.4.3", + "yargs": "17.7.2" + }, + "bin": { + "browsers": "lib/cjs/main-cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@tootallnate/quickjs-emscripten": { + "version": "0.23.0", + "resolved": "https://registry.npmjs.org/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz", + "integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==" + }, + "node_modules/@types/node": { + "version": "20.12.7", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.7.tgz", + "integrity": "sha512-wq0cICSkRLVaf3UGLMGItu/PtdY7oaXaI/RVU+xliKVOtRna3PRY57ZDfztpDL0n11vfymMUnXv8QwYCO7L1wg==", + "optional": true, + "dependencies": { + "undici-types": "~5.26.4" + } + }, + "node_modules/@types/yauzl": { + "version": "2.10.3", + "resolved": "https://registry.npmjs.org/@types/yauzl/-/yauzl-2.10.3.tgz", + "integrity": "sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==", + "optional": true, + "dependencies": { + "@types/node": "*" + } + }, + "node_modules/agent-base": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.1.tgz", + "integrity": "sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==", + "dependencies": { + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dependencies": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "engines": { + "node": ">=8" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/asn1": { + "version": "0.2.6", + "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.6.tgz", + "integrity": "sha512-ix/FxPn0MDjeyJ7i/yoHGFt/EX6LyNbxSEhPPXODPL+KB0VPk86UYfL0lMdy+KCnv+fmvIzySwaK5COwqVbWTQ==", + "dependencies": { + "safer-buffer": "~2.1.0" + } + }, + "node_modules/assert-plus": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-1.0.0.tgz", + "integrity": "sha512-NfJ4UzBCcQGLDlQq7nHxH+tv3kyZ0hHQqF5BO6J7tNJeP5do1llPr8dZ8zHonfhAu0PHAdMkSo+8o0wxg9lZWw==", + "engines": { + "node": ">=0.8" + } + }, + "node_modules/ast-types": { + "version": "0.13.4", + "resolved": "https://registry.npmjs.org/ast-types/-/ast-types-0.13.4.tgz", + "integrity": "sha512-x1FCFnFifvYDDzTaLII71vG5uvDwgtmDTEVWAxrgeiR8VjMONcCXJx7E+USjDtHlwFmt9MysbqgF9b9Vjr6w+w==", + "dependencies": { + "tslib": "^2.0.1" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" + }, + "node_modules/aws-sign2": { + "version": "0.7.0", + "resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.7.0.tgz", + "integrity": "sha512-08kcGqnYf/YmjoRhfxyu+CLxBjUtHLXLXX/vUfx9l2LYzG3c1m61nrpyFUZI6zeS+Li/wWMMidD9KgrqtGq3mA==", + "engines": { + "node": "*" + } + }, + "node_modules/aws4": { + "version": "1.12.0", + "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.12.0.tgz", + "integrity": "sha512-NmWvPnx0F1SfrQbYwOi7OeaNGokp9XhzNioJ/CSBs8Qa4vxug81mhJEAVZwxXuBmYB5KDRfMq/F3RR0BIU7sWg==" + }, + "node_modules/b4a": { + "version": "1.6.6", + "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.6.6.tgz", + "integrity": "sha512-5Tk1HLk6b6ctmjIkAcU/Ujv/1WqiDl0F0JdRCR80VsOcUlHcu7pWeWRlOqQLHfDEsVx9YH/aif5AG4ehoCtTmg==" + }, + "node_modules/bare-events": { + "version": "2.2.2", + "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.2.2.tgz", + "integrity": "sha512-h7z00dWdG0PYOQEvChhOSWvOfkIKsdZGkWr083FgN/HyoQuebSew/cgirYqh9SCuy/hRvxc5Vy6Fw8xAmYHLkQ==", + "optional": true + }, + "node_modules/base64-js": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", + "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, + "node_modules/basic-ftp": { + "version": "5.0.5", + "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.0.5.tgz", + "integrity": "sha512-4Bcg1P8xhUuqcii/S0Z9wiHIrQVPMermM1any+MX5GeGD7faD3/msQUDGLol9wOcz4/jbg/WJnGqoJF6LiBdtg==", + "engines": { + "node": ">=10.0.0" + } + }, + "node_modules/bcrypt-pbkdf": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz", + "integrity": "sha512-qeFIXtP4MSoi6NLqO12WfqARWWuCKi2Rn/9hJLEmtB5yTNr9DqFWkJRCf2qShWzPeAMRnOgCrq0sg/KLv5ES9w==", + "dependencies": { + "tweetnacl": "^0.14.3" + } + }, + "node_modules/bidi-js": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/bidi-js/-/bidi-js-1.0.3.tgz", + "integrity": "sha512-RKshQI1R3YQ+n9YJz2QQ147P66ELpa1FQEg20Dk8oW9t2KgLbpDLLp9aGZ7y8WHSshDknG0bknqGw5/tyCs5tw==", + "dependencies": { + "require-from-string": "^2.0.2" + } + }, + "node_modules/bluebird": { + "version": "2.11.0", + "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-2.11.0.tgz", + "integrity": "sha512-UfFSr22dmHPQqPP9XWHRhq+gWnHCYguQGkXQlbyPtW5qTnhFWA8/iXg765tH0cAjy7l/zPJ1aBTO0g5XgA7kvQ==" + }, + "node_modules/boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==" + }, + "node_modules/brotli": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/brotli/-/brotli-1.3.3.tgz", + "integrity": "sha512-oTKjJdShmDuGW94SyyaoQvAjf30dZaHnjJ8uAF+u2/vGJkJbJPJAT1gDiOJP5v1Zb6f9KEyW/1HpuaWIXtGHPg==", + "dependencies": { + "base64-js": "^1.1.2" + } + }, + "node_modules/buffer": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz", + "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "dependencies": { + "base64-js": "^1.3.1", + "ieee754": "^1.1.13" + } + }, + "node_modules/buffer-crc32": { + "version": "0.2.13", + "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz", + "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==", + "engines": { + "node": "*" + } + }, + "node_modules/camelcase": { + "version": "5.3.1", + "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-5.3.1.tgz", + "integrity": "sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==", + "engines": { + "node": ">=6" + } + }, + "node_modules/caseless": { + "version": "0.12.0", + "resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz", + "integrity": "sha512-4tYFyifaFfGacoiObjJegolkwSU4xQNGbVgUiNYVUxbQ2x2lUsFvY4hVgVzGiIe6WLOPqycWXA40l+PWsxthUw==" + }, + "node_modules/cheerio": { + "version": "0.22.0", + "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-0.22.0.tgz", + "integrity": "sha512-8/MzidM6G/TgRelkzDG13y3Y9LxBjCb+8yOEZ9+wwq5gVF2w2pV0wmHvjfT0RvuxGyR7UEuK36r+yYMbT4uKgA==", + "dependencies": { + "css-select": "~1.2.0", + "dom-serializer": "~0.1.0", + "entities": "~1.1.1", + "htmlparser2": "^3.9.1", + "lodash.assignin": "^4.0.9", + "lodash.bind": "^4.1.4", + "lodash.defaults": "^4.0.1", + "lodash.filter": "^4.4.0", + "lodash.flatten": "^4.2.0", + "lodash.foreach": "^4.3.0", + "lodash.map": "^4.4.0", + "lodash.merge": "^4.4.0", + "lodash.pick": "^4.2.1", + "lodash.reduce": "^4.4.0", + "lodash.reject": "^4.4.0", + "lodash.some": "^4.4.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/chromium-bidi": { + "version": "0.5.8", + "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-0.5.8.tgz", + "integrity": "sha512-blqh+1cEQbHBKmok3rVJkBlBxt9beKBgOsxbFgs7UJcoVbbeZ+K7+6liAsjgpc8l1Xd55cQUy14fXZdGSb4zIw==", + "dependencies": { + "mitt": "3.0.1", + "urlpattern-polyfill": "10.0.0" + }, + "peerDependencies": { + "devtools-protocol": "*" + } + }, + "node_modules/cliui": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", + "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", + "dependencies": { + "string-width": "^4.2.0", + "strip-ansi": "^6.0.1", + "wrap-ansi": "^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/core-js": { + "version": "2.6.12", + "resolved": "https://registry.npmjs.org/core-js/-/core-js-2.6.12.tgz", + "integrity": "sha512-Kb2wC0fvsWfQrgk8HU5lW6U/Lcs8+9aaYcy4ZFc6DDlo4nZ7n70dEgE5rtR0oG6ufKDUnrwfWL1mXR5ljDatrQ==", + "deprecated": "core-js@<3.23.3 is no longer maintained and not recommended for usage due to the number of issues. Because of the V8 engine whims, feature detection in old core-js versions could cause a slowdown up to 100x even if nothing is polyfilled. Some versions have web compatibility issues. Please, upgrade your dependencies to the actual version of core-js.", + "hasInstallScript": true + }, + "node_modules/core-util-is": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", + "integrity": "sha512-3lqz5YjWTYnW6dlDa5TLaTCcShfar1e40rmcJVwCBJC6mWlFuj0eCHIElmG1g5kyuJ/GD+8Wn4FFCcz4gJPfaQ==" + }, + "node_modules/cross-fetch": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-4.0.0.tgz", + "integrity": "sha512-e4a5N8lVvuLgAWgnCrLr2PP0YyDOTHa9H/Rj54dirp61qXnNq46m82bRhNqIA5VccJtWBvPTFRV3TtvHUKPB1g==", + "dependencies": { + "node-fetch": "^2.6.12" + } + }, + "node_modules/css-select": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-1.2.0.tgz", + "integrity": "sha512-dUQOBoqdR7QwV90WysXPLXG5LO7nhYBgiWVfxF80DKPF8zx1t/pUd2FYy73emg3zrjtM6dzmYgbHKfV2rxiHQA==", + "dependencies": { + "boolbase": "~1.0.0", + "css-what": "2.1", + "domutils": "1.5.1", + "nth-check": "~1.0.1" + } + }, + "node_modules/css-tree": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/css-tree/-/css-tree-2.3.1.tgz", + "integrity": "sha512-6Fv1DV/TYw//QF5IzQdqsNDjx/wc8TrMBZsqjL9eW01tWb7R7k/mq+/VXfJCl7SoD5emsJop9cOByJZfs8hYIw==", + "dependencies": { + "mdn-data": "2.0.30", + "source-map-js": "^1.0.1" + }, + "engines": { + "node": "^10 || ^12.20.0 || ^14.13.0 || >=15.0.0" + } + }, + "node_modules/css-what": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-2.1.3.tgz", + "integrity": "sha512-a+EPoD+uZiNfh+5fxw2nO9QwFa6nJe2Or35fGY6Ipw1R3R4AGz1d1TEZrCegvw2YTmZ0jXirGYlzxxpYSHwpEg==", + "engines": { + "node": "*" + } + }, + "node_modules/cssstyle": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-4.0.1.tgz", + "integrity": "sha512-8ZYiJ3A/3OkDd093CBT/0UKDWry7ak4BdPTFP2+QEP7cmhouyq/Up709ASSj2cK02BbZiMgk7kYjZNS4QP5qrQ==", + "dependencies": { + "rrweb-cssom": "^0.6.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/dashdash": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz", + "integrity": "sha512-jRFi8UDGo6j+odZiEpjazZaWqEal3w/basFjQHQEwVtZJGDpxbH1MeYluwCS8Xq5wmLJooDlMgvVarmWfGM44g==", + "dependencies": { + "assert-plus": "^1.0.0" + }, + "engines": { + "node": ">=0.10" + } + }, + "node_modules/data-uri-to-buffer": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz", + "integrity": "sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==", + "engines": { + "node": ">= 14" + } + }, + "node_modules/data-urls": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-5.0.0.tgz", + "integrity": "sha512-ZYP5VBHshaDAiVZxjbRVcFJpc+4xGgT0bK3vzy1HLN8jTO975HEbuYzZJcHoQEY5K1a0z8YayJkyVETa08eNTg==", + "dependencies": { + "whatwg-mimetype": "^4.0.0", + "whatwg-url": "^14.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/data-urls/node_modules/tr46": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-5.0.0.tgz", + "integrity": "sha512-tk2G5R2KRwBd+ZN0zaEXpmzdKyOYksXwywulIX95MBODjSzMIuQnQ3m8JxgbhnL1LeVo7lqQKsYa1O3Htl7K5g==", + "dependencies": { + "punycode": "^2.3.1" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/data-urls/node_modules/whatwg-url": { + "version": "14.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-14.0.0.tgz", + "integrity": "sha512-1lfMEm2IEr7RIV+f4lUNPOqfFL+pO+Xw3fJSqmjX9AbXcXcYOkCe1P6+9VBZB6n94af16NfZf+sSk0JCBZC9aw==", + "dependencies": { + "tr46": "^5.0.0", + "webidl-conversions": "^7.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/debug": { + "version": "4.3.4", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", + "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", + "dependencies": { + "ms": "2.1.2" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/decamelize": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz", + "integrity": "sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/decimal.js": { + "version": "10.4.3", + "resolved": "https://registry.npmjs.org/decimal.js/-/decimal.js-10.4.3.tgz", + "integrity": "sha512-VBBaLc1MgL5XpzgIP7ny5Z6Nx3UrRkIViUkPUdtl9aya5amy3De1gsUUSB1g3+3sExYNjCAsAznmukyxCb1GRA==" + }, + "node_modules/degenerator": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/degenerator/-/degenerator-5.0.1.tgz", + "integrity": "sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ==", + "dependencies": { + "ast-types": "^0.13.4", + "escodegen": "^2.1.0", + "esprima": "^4.0.1" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/devtools-protocol": { + "version": "0.0.1232444", + "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1232444.tgz", + "integrity": "sha512-pM27vqEfxSxRkTMnF+XCmxSEb6duO5R+t8A9DEEJgy4Wz2RVanje2mmj99B6A3zv2r/qGfYlOvYznUhuokizmg==" + }, + "node_modules/difflib": { + "version": "0.2.6", + "resolved": "git+ssh://git@github.com/postlight/difflib.js.git#32e8e38c7fcd935241b9baab71bb432fd9b166ed", + "dependencies": { + "heap": ">= 0.2.0" + } + }, + "node_modules/dom-serializer": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-0.1.1.tgz", + "integrity": "sha512-l0IU0pPzLWSHBcieZbpOKgkIn3ts3vAh7ZuFyXNwJxJXk/c4Gwj9xaTJwIDVQCXawWD0qb3IzMGH5rglQaO0XA==", + "dependencies": { + "domelementtype": "^1.3.0", + "entities": "^1.1.1" + } + }, + "node_modules/domelementtype": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-1.3.1.tgz", + "integrity": "sha512-BSKB+TSpMpFI/HOxCNr1O8aMOTZ8hT3pM3GQ0w/mWRmkhEDSFJkkyzz4XQsBV44BChwGkrDfMyjVD0eA2aFV3w==" + }, + "node_modules/domhandler": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-2.4.2.tgz", + "integrity": "sha512-JiK04h0Ht5u/80fdLMCEmV4zkNh2BcoMFBmZ/91WtYZ8qVXSKjiw7fXMgFPnHcSZgOo3XdinHvmnDUeMf5R4wA==", + "dependencies": { + "domelementtype": "1" + } + }, + "node_modules/domino": { + "version": "2.1.6", + "resolved": "https://registry.npmjs.org/domino/-/domino-2.1.6.tgz", + "integrity": "sha512-3VdM/SXBZX2omc9JF9nOPCtDaYQ67BGp5CoLpIQlO2KCAPETs8TcDHacF26jXadGbvUteZzRTeos2fhID5+ucQ==" + }, + "node_modules/dompurify": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.1.0.tgz", + "integrity": "sha512-yoU4rhgPKCo+p5UrWWWNKiIq+ToGqmVVhk0PmMYBK4kRsR3/qhemNFL8f6CFmBd4gMwm3F4T7HBoydP5uY07fA==" + }, + "node_modules/domutils": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-1.5.1.tgz", + "integrity": "sha512-gSu5Oi/I+3wDENBsOWBiRK1eoGxcywYSqg3rR960/+EfY0CF4EX1VPkgHOZ3WiS/Jg2DtliF6BhWcHlfpYUcGw==", + "dependencies": { + "dom-serializer": "0", + "domelementtype": "1" + } + }, + "node_modules/dotenv": { + "version": "6.2.0", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-6.2.0.tgz", + "integrity": "sha512-HygQCKUBSFl8wKQZBSemMywRWcEDNidvNbjGVyZu3nbZ8qq9ubiPoGLMdRDpfSrpkkm9BXYFkpKxxFX38o/76w==", + "engines": { + "node": ">=6" + } + }, + "node_modules/ecc-jsbn": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz", + "integrity": "sha512-eh9O+hwRHNbG4BLTjEl3nw044CkGm5X6LoaCf7LPp7UU8Qrt47JYNi6nPX8xjW97TKGKm1ouctg0QSpZe9qrnw==", + "dependencies": { + "jsbn": "~0.1.0", + "safer-buffer": "^2.1.0" + } + }, + "node_modules/ecc-jsbn/node_modules/jsbn": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz", + "integrity": "sha512-UVU9dibq2JcFWxQPA6KCqj5O42VOmAY3zQUfEKxU0KpTGXwNoCjkX1e13eHNvw/xPynt6pU0rZ1htjWTNTSXsg==" + }, + "node_modules/ellipsize": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/ellipsize/-/ellipsize-0.1.0.tgz", + "integrity": "sha512-5gxbEjcb/Z2n6TTmXZx9wVi3N/DOzE7RXY3Xg9dakDuhX/izwumB9rGjeWUV6dTA0D0+juvo+JonZgNR9sgA5A==" + }, + "node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==" + }, + "node_modules/end-of-stream": { + "version": "1.4.4", + "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz", + "integrity": "sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==", + "dependencies": { + "once": "^1.4.0" + } + }, + "node_modules/entities": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/entities/-/entities-1.1.2.tgz", + "integrity": "sha512-f2LZMYl1Fzu7YSBKg+RoROelpOaNrcGmE9AZubeDfrCEia483oW4MI4VyFd5VNHIgQ/7qm1I0wUHK1eJnn2y2w==" + }, + "node_modules/escalade": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.2.tgz", + "integrity": "sha512-ErCHMCae19vR8vQGe50xIsVomy19rg6gFu3+r3jkEO46suLMWBksvVyoGgQV+jOfl84ZSOSlmv6Gxa89PmTGmA==", + "engines": { + "node": ">=6" + } + }, + "node_modules/escodegen": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-2.1.0.tgz", + "integrity": "sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==", + "dependencies": { + "esprima": "^4.0.1", + "estraverse": "^5.2.0", + "esutils": "^2.0.2" + }, + "bin": { + "escodegen": "bin/escodegen.js", + "esgenerate": "bin/esgenerate.js" + }, + "engines": { + "node": ">=6.0" + }, + "optionalDependencies": { + "source-map": "~0.6.1" + } + }, + "node_modules/esprima": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", + "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", + "bin": { + "esparse": "bin/esparse.js", + "esvalidate": "bin/esvalidate.js" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/estraverse": { + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz", + "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", + "engines": { + "node": ">=4.0" + } + }, + "node_modules/esutils": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", + "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/extend": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==" + }, + "node_modules/extract-zip": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz", + "integrity": "sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==", + "dependencies": { + "debug": "^4.1.1", + "get-stream": "^5.1.0", + "yauzl": "^2.10.0" + }, + "bin": { + "extract-zip": "cli.js" + }, + "engines": { + "node": ">= 10.17.0" + }, + "optionalDependencies": { + "@types/yauzl": "^2.9.1" + } + }, + "node_modules/extsprintf": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/extsprintf/-/extsprintf-1.3.0.tgz", + "integrity": "sha512-11Ndz7Nv+mvAC1j0ktTa7fAb0vLyGGX+rMHNBYQviQDGU0Hw7lhctJANqbPhu9nV9/izT/IntTgZ7Im/9LJs9g==", + "engines": [ + "node >=0.6.0" + ] + }, + "node_modules/fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==" + }, + "node_modules/fast-fifo": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz", + "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==" + }, + "node_modules/fast-json-stable-stringify": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", + "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==" + }, + "node_modules/fd-slicer": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz", + "integrity": "sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==", + "dependencies": { + "pend": "~1.2.0" + } + }, + "node_modules/file-url": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/file-url/-/file-url-3.0.0.tgz", + "integrity": "sha512-g872QGsHexznxkIAdK8UiZRe7SkE6kvylShU4Nsj8NvfvZag7S0QuQ4IgvPDkk75HxgjIVDwycFTDAgIiO4nDA==", + "engines": { + "node": ">=8" + } + }, + "node_modules/forever-agent": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz", + "integrity": "sha512-j0KLYPhm6zeac4lz3oJ3o65qvgQCcPubiyotZrXqEaG4hNagNYO8qdlUrX5vwqv9ohqeT/Z3j6+yW067yWWdUw==", + "engines": { + "node": "*" + } + }, + "node_modules/form-data": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", + "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/fs-extra": { + "version": "11.2.0", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.2.0.tgz", + "integrity": "sha512-PmDi3uwK5nFuXh7XDTlVnS17xJS7vW36is2+w3xcv8SVxiB4NyATf4ctkVY5bkSjX0Y4nbvZCq1/EjtEyr9ktw==", + "dependencies": { + "graceful-fs": "^4.2.0", + "jsonfile": "^6.0.1", + "universalify": "^2.0.0" + }, + "engines": { + "node": ">=14.14" + } + }, + "node_modules/fs-extra/node_modules/universalify": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz", + "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==", + "engines": { + "node": ">= 10.0.0" + } + }, + "node_modules/get-caller-file": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", + "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", + "engines": { + "node": "6.* || 8.* || >= 10.*" + } + }, + "node_modules/get-stream": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-5.2.0.tgz", + "integrity": "sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==", + "dependencies": { + "pump": "^3.0.0" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/get-uri": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.3.tgz", + "integrity": "sha512-BzUrJBS9EcUb4cFol8r4W3v1cPsSyajLSthNkz5BxbpDcHN5tIrM10E2eNvfnvBn3DaT3DUgx0OpsBKkaOpanw==", + "dependencies": { + "basic-ftp": "^5.0.2", + "data-uri-to-buffer": "^6.0.2", + "debug": "^4.3.4", + "fs-extra": "^11.2.0" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/getpass": { + "version": "0.1.7", + "resolved": "https://registry.npmjs.org/getpass/-/getpass-0.1.7.tgz", + "integrity": "sha512-0fzj9JxOLfJ+XGLhR8ze3unN0KZCgZwiSSDz168VERjK8Wl8kVSdcu2kspd4s4wtAa1y/qrVRiAA0WclVsu0ng==", + "dependencies": { + "assert-plus": "^1.0.0" + } + }, + "node_modules/graceful-fs": { + "version": "4.2.11", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", + "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==" + }, + "node_modules/har-schema": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/har-schema/-/har-schema-2.0.0.tgz", + "integrity": "sha512-Oqluz6zhGX8cyRaTQlFMPw80bSJVG2x/cFb8ZPhUILGgHka9SsokCCOQgpveePerqidZOrT14ipqfJb7ILcW5Q==", + "engines": { + "node": ">=4" + } + }, + "node_modules/har-validator": { + "version": "5.1.5", + "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.1.5.tgz", + "integrity": "sha512-nmT2T0lljbxdQZfspsno9hgrG3Uir6Ks5afism62poxqBM6sDnMEuPmzTq8XN0OEwqKLLdh1jQI3qyE66Nzb3w==", + "deprecated": "this library is no longer supported", + "dependencies": { + "ajv": "^6.12.3", + "har-schema": "^2.0.0" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/heap": { + "version": "0.2.7", + "resolved": "https://registry.npmjs.org/heap/-/heap-0.2.7.tgz", + "integrity": "sha512-2bsegYkkHO+h/9MGbn6KWcE45cHZgPANo5LXF7EvWdT0yT2EguSVO1nDgU5c8+ZOPwp2vMNa7YFsJhVcDR9Sdg==" + }, + "node_modules/html-encoding-sniffer": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-4.0.0.tgz", + "integrity": "sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==", + "dependencies": { + "whatwg-encoding": "^3.1.1" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/htmlparser2": { + "version": "3.10.1", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-3.10.1.tgz", + "integrity": "sha512-IgieNijUMbkDovyoKObU1DUhm1iwNYE/fuifEoEHfd1oZKZDaONBSkal7Y01shxsM49R4XaMdGez3WnF9UfiCQ==", + "dependencies": { + "domelementtype": "^1.3.1", + "domhandler": "^2.3.0", + "domutils": "^1.5.1", + "entities": "^1.1.1", + "inherits": "^2.0.1", + "readable-stream": "^3.1.1" + } + }, + "node_modules/http-proxy-agent": { + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", + "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", + "dependencies": { + "agent-base": "^7.1.0", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/http-signature": { + "version": "1.3.6", + "resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.3.6.tgz", + "integrity": "sha512-3adrsD6zqo4GsTqtO7FyrejHNv+NgiIfAfv68+jVlFmSr9OGy7zrxONceFRLKvnnZA5jbxQBX1u9PpB6Wi32Gw==", + "dependencies": { + "assert-plus": "^1.0.0", + "jsprim": "^2.0.2", + "sshpk": "^1.14.1" + }, + "engines": { + "node": ">=0.10" + } + }, + "node_modules/https-proxy-agent": { + "version": "7.0.4", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.4.tgz", + "integrity": "sha512-wlwpilI7YdjSkWaQ/7omYBMTliDcmCN8OLihO6I9B86g06lMyAoqgoDpV0XqoaPOKj+0DIdAvnsWfyAAhmimcg==", + "dependencies": { + "agent-base": "^7.0.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/iconv-lite": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.5.0.tgz", + "integrity": "sha512-NnEhI9hIEKHOzJ4f697DMz9IQEXr/MMJ5w64vN2/4Ai+wRnvV7SBrL0KLoRlwaKVghOc7LQ5YkPLuX146b6Ydw==", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/ieee754": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", + "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, + "node_modules/immediate": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz", + "integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==" + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" + }, + "node_modules/ip-address": { + "version": "9.0.5", + "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-9.0.5.tgz", + "integrity": "sha512-zHtQzGojZXTwZTHQqra+ETKd4Sn3vgi7uBmlPoXVWZqYvuKmtI0l/VZTjqGmJY9x88GGOaZ9+G9ES8hC4T4X8g==", + "dependencies": { + "jsbn": "1.1.0", + "sprintf-js": "^1.1.3" + }, + "engines": { + "node": ">= 12" + } + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "engines": { + "node": ">=8" + } + }, + "node_modules/is-potential-custom-element-name": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz", + "integrity": "sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==" + }, + "node_modules/is-typedarray": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-typedarray/-/is-typedarray-1.0.0.tgz", + "integrity": "sha512-cyA56iCMHAh5CdzjJIa4aohJyeO1YbwLi3Jc35MmRU6poroFjIGZzUzupGiRPOjgHg9TLu43xbpwXk523fMxKA==" + }, + "node_modules/isarray": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", + "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==" + }, + "node_modules/isstream": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/isstream/-/isstream-0.1.2.tgz", + "integrity": "sha512-Yljz7ffyPbrLpLngrMtZ7NduUgVvi6wG9RJ9IUcyCd59YQ911PBJphODUcbOVbqYfxe1wuYf/LJ8PauMRwsM/g==" + }, + "node_modules/jsbn": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-1.1.0.tgz", + "integrity": "sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A==" + }, + "node_modules/jsdom": { + "version": "23.2.0", + "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-23.2.0.tgz", + "integrity": "sha512-L88oL7D/8ufIES+Zjz7v0aes+oBMh2Xnh3ygWvL0OaICOomKEPKuPnIfBJekiXr+BHbbMjrWn/xqrDQuxFTeyA==", + "dependencies": { + "@asamuzakjp/dom-selector": "^2.0.1", + "cssstyle": "^4.0.1", + "data-urls": "^5.0.0", + "decimal.js": "^10.4.3", + "form-data": "^4.0.0", + "html-encoding-sniffer": "^4.0.0", + "http-proxy-agent": "^7.0.0", + "https-proxy-agent": "^7.0.2", + "is-potential-custom-element-name": "^1.0.1", + "parse5": "^7.1.2", + "rrweb-cssom": "^0.6.0", + "saxes": "^6.0.0", + "symbol-tree": "^3.2.4", + "tough-cookie": "^4.1.3", + "w3c-xmlserializer": "^5.0.0", + "webidl-conversions": "^7.0.0", + "whatwg-encoding": "^3.1.1", + "whatwg-mimetype": "^4.0.0", + "whatwg-url": "^14.0.0", + "ws": "^8.16.0", + "xml-name-validator": "^5.0.0" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "canvas": "^2.11.2" + }, + "peerDependenciesMeta": { + "canvas": { + "optional": true + } + } + }, + "node_modules/jsdom/node_modules/tr46": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-5.0.0.tgz", + "integrity": "sha512-tk2G5R2KRwBd+ZN0zaEXpmzdKyOYksXwywulIX95MBODjSzMIuQnQ3m8JxgbhnL1LeVo7lqQKsYa1O3Htl7K5g==", + "dependencies": { + "punycode": "^2.3.1" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/jsdom/node_modules/whatwg-url": { + "version": "14.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-14.0.0.tgz", + "integrity": "sha512-1lfMEm2IEr7RIV+f4lUNPOqfFL+pO+Xw3fJSqmjX9AbXcXcYOkCe1P6+9VBZB6n94af16NfZf+sSk0JCBZC9aw==", + "dependencies": { + "tr46": "^5.0.0", + "webidl-conversions": "^7.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/json-schema": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.4.0.tgz", + "integrity": "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==" + }, + "node_modules/json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==" + }, + "node_modules/json-stringify-safe": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz", + "integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==" + }, + "node_modules/jsonfile": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.1.0.tgz", + "integrity": "sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==", + "dependencies": { + "universalify": "^2.0.0" + }, + "optionalDependencies": { + "graceful-fs": "^4.1.6" + } + }, + "node_modules/jsonfile/node_modules/universalify": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz", + "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==", + "engines": { + "node": ">= 10.0.0" + } + }, + "node_modules/jsprim": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/jsprim/-/jsprim-2.0.2.tgz", + "integrity": "sha512-gqXddjPqQ6G40VdnI6T6yObEC+pDNvyP95wdQhkWkg7crHH3km5qP1FsOXEkzEQwnz6gz5qGTn1c2Y52wP3OyQ==", + "engines": [ + "node >=0.6.0" + ], + "dependencies": { + "assert-plus": "1.0.0", + "extsprintf": "1.3.0", + "json-schema": "0.4.0", + "verror": "1.10.0" + } + }, + "node_modules/jszip": { + "version": "3.10.1", + "resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz", + "integrity": "sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==", + "dependencies": { + "lie": "~3.3.0", + "pako": "~1.0.2", + "readable-stream": "~2.3.6", + "setimmediate": "^1.0.5" + } + }, + "node_modules/jszip/node_modules/readable-stream": { + "version": "2.3.8", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz", + "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==", + "dependencies": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + }, + "node_modules/jszip/node_modules/safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" + }, + "node_modules/jszip/node_modules/string_decoder": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", + "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "dependencies": { + "safe-buffer": "~5.1.0" + } + }, + "node_modules/lie": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz", + "integrity": "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==", + "dependencies": { + "immediate": "~3.0.5" + } + }, + "node_modules/lodash": { + "version": "4.17.21", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" + }, + "node_modules/lodash.assignin": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/lodash.assignin/-/lodash.assignin-4.2.0.tgz", + "integrity": "sha512-yX/rx6d/UTVh7sSVWVSIMjfnz95evAgDFdb1ZozC35I9mSFCkmzptOzevxjgbQUsc78NR44LVHWjsoMQXy9FDg==" + }, + "node_modules/lodash.bind": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/lodash.bind/-/lodash.bind-4.2.1.tgz", + "integrity": "sha512-lxdsn7xxlCymgLYo1gGvVrfHmkjDiyqVv62FAeF2i5ta72BipE1SLxw8hPEPLhD4/247Ijw07UQH7Hq/chT5LA==" + }, + "node_modules/lodash.defaults": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz", + "integrity": "sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ==" + }, + "node_modules/lodash.filter": { + "version": "4.6.0", + "resolved": "https://registry.npmjs.org/lodash.filter/-/lodash.filter-4.6.0.tgz", + "integrity": "sha512-pXYUy7PR8BCLwX5mgJ/aNtyOvuJTdZAo9EQFUvMIYugqmJxnrYaANvTbgndOzHSCSR0wnlBBfRXJL5SbWxo3FQ==" + }, + "node_modules/lodash.flatten": { + "version": "4.4.0", + "resolved": "https://registry.npmjs.org/lodash.flatten/-/lodash.flatten-4.4.0.tgz", + "integrity": "sha512-C5N2Z3DgnnKr0LOpv/hKCgKdb7ZZwafIrsesve6lmzvZIRZRGaZ/l6Q8+2W7NaT+ZwO3fFlSCzCzrDCFdJfZ4g==" + }, + "node_modules/lodash.foreach": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/lodash.foreach/-/lodash.foreach-4.5.0.tgz", + "integrity": "sha512-aEXTF4d+m05rVOAUG3z4vZZ4xVexLKZGF0lIxuHZ1Hplpk/3B6Z1+/ICICYRLm7c41Z2xiejbkCkJoTlypoXhQ==" + }, + "node_modules/lodash.map": { + "version": "4.6.0", + "resolved": "https://registry.npmjs.org/lodash.map/-/lodash.map-4.6.0.tgz", + "integrity": "sha512-worNHGKLDetmcEYDvh2stPCrrQRkP20E4l0iIS7F8EvzMqBBi7ltvFN5m1HvTf1P7Jk1txKhvFcmYsCr8O2F1Q==" + }, + "node_modules/lodash.merge": { + "version": "4.6.2", + "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", + "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==" + }, + "node_modules/lodash.pick": { + "version": "4.4.0", + "resolved": "https://registry.npmjs.org/lodash.pick/-/lodash.pick-4.4.0.tgz", + "integrity": "sha512-hXt6Ul/5yWjfklSGvLQl8vM//l3FtyHZeuelpzK6mm99pNvN9yTDruNZPEJZD1oWrqo+izBmB7oUfWgcCX7s4Q==" + }, + "node_modules/lodash.reduce": { + "version": "4.6.0", + "resolved": "https://registry.npmjs.org/lodash.reduce/-/lodash.reduce-4.6.0.tgz", + "integrity": "sha512-6raRe2vxCYBhpBu+B+TtNGUzah+hQjVdu3E17wfusjyrXBka2nBS8OH/gjVZ5PvHOhWmIZTYri09Z6n/QfnNMw==" + }, + "node_modules/lodash.reject": { + "version": "4.6.0", + "resolved": "https://registry.npmjs.org/lodash.reject/-/lodash.reject-4.6.0.tgz", + "integrity": "sha512-qkTuvgEzYdyhiJBx42YPzPo71R1aEr0z79kAv7Ixg8wPFEjgRgJdUsGMG3Hf3OYSF/kHI79XhNlt+5Ar6OzwxQ==" + }, + "node_modules/lodash.some": { + "version": "4.6.0", + "resolved": "https://registry.npmjs.org/lodash.some/-/lodash.some-4.6.0.tgz", + "integrity": "sha512-j7MJE+TuT51q9ggt4fSgVqro163BEFjAt3u97IqU+JA2DkWl80nFTrowzLpZ/BnpN7rrl0JA/593NAdd8p/scQ==" + }, + "node_modules/lru-cache": { + "version": "7.18.3", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz", + "integrity": "sha512-jumlc0BIUrS3qJGgIkWZsyfAM7NCWiBcCDhnd+3NNM5KbBmLTgHVfWBcg6W+rLUsIpzpERPsvwUP7CckAQSOoA==", + "engines": { + "node": ">=12" + } + }, + "node_modules/mdn-data": { + "version": "2.0.30", + "resolved": "https://registry.npmjs.org/mdn-data/-/mdn-data-2.0.30.tgz", + "integrity": "sha512-GaqWWShW4kv/G9IEucWScBx9G1/vsFZZJUO+tD26M8J8z3Kw5RDQjaoZe03YAClgeS/SWPOcb4nkFBTEi5DUEA==" + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mitt": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz", + "integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==" + }, + "node_modules/mkdirp-classic": { + "version": "0.5.3", + "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz", + "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==" + }, + "node_modules/moment-parseformat": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/moment-parseformat/-/moment-parseformat-3.0.0.tgz", + "integrity": "sha512-dVgXe6b6DLnv4CHG7a1zUe5mSXaIZ3c6lSHm/EKeVeQI2/4pwe0VRde8OyoCE1Ro2lKT5P6uT9JElF7KDLV+jw==" + }, + "node_modules/ms": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" + }, + "node_modules/netmask": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/netmask/-/netmask-2.0.2.tgz", + "integrity": "sha512-dBpDMdxv9Irdq66304OLfEmQ9tbNRFnFTuZiLo+bD+r332bBmMJ8GBLXklIXXgxd3+v9+KUnZaUR5PJMa75Gsg==", + "engines": { + "node": ">= 0.4.0" + } + }, + "node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, + "node_modules/nth-check": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-1.0.2.tgz", + "integrity": "sha512-WeBOdju8SnzPN5vTUJYxYUxLeXpCaVP5i5e0LF8fg7WORF2Wd7wFX/pk0tYZk7s8T+J7VLy0Da6J1+wCT0AtHg==", + "dependencies": { + "boolbase": "~1.0.0" + } + }, + "node_modules/nwsapi": { + "version": "2.2.9", + "resolved": "https://registry.npmjs.org/nwsapi/-/nwsapi-2.2.9.tgz", + "integrity": "sha512-2f3F0SEEer8bBu0dsNCFF50N0cTThV1nWFYcEYFZttdW0lDAoybv9cQoK7X7/68Z89S7FoRrVjP1LPX4XRf9vg==" + }, + "node_modules/oauth-sign": { + "version": "0.9.0", + "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.9.0.tgz", + "integrity": "sha512-fexhUFFPTGV8ybAtSIGbV6gOkSv8UtRbDBnAyLQw4QPKkgNlsH2ByPGtMUqdWkos6YCRmAqViwgZrJc/mRDzZQ==", + "engines": { + "node": "*" + } + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/pac-proxy-agent": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.0.1.tgz", + "integrity": "sha512-ASV8yU4LLKBAjqIPMbrgtaKIvxQri/yh2OpI+S6hVa9JRkUI3Y3NPFbfngDtY7oFtSMD3w31Xns89mDa3Feo5A==", + "dependencies": { + "@tootallnate/quickjs-emscripten": "^0.23.0", + "agent-base": "^7.0.2", + "debug": "^4.3.4", + "get-uri": "^6.0.1", + "http-proxy-agent": "^7.0.0", + "https-proxy-agent": "^7.0.2", + "pac-resolver": "^7.0.0", + "socks-proxy-agent": "^8.0.2" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/pac-resolver": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/pac-resolver/-/pac-resolver-7.0.1.tgz", + "integrity": "sha512-5NPgf87AT2STgwa2ntRMr45jTKrYBGkVU36yT0ig/n/GMAa3oPqhZfIQ2kMEimReg0+t9kZViDVZ83qfVUlckg==", + "dependencies": { + "degenerator": "^5.0.0", + "netmask": "^2.0.2" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/pako": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz", + "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==" + }, + "node_modules/parse5": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz", + "integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==", + "dependencies": { + "entities": "^4.4.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5/node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/pend": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz", + "integrity": "sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==" + }, + "node_modules/performance-now": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz", + "integrity": "sha512-7EAHlyLHI56VEIdK57uwHdHKIaAGbnXPiw0yWbarQZOKaKpvUIgW0jWRVLiatnM+XXlSwsanIBH/hzGMJulMow==" + }, + "node_modules/postman-request": { + "version": "2.88.1-postman.33", + "resolved": "https://registry.npmjs.org/postman-request/-/postman-request-2.88.1-postman.33.tgz", + "integrity": "sha512-uL9sCML4gPH6Z4hreDWbeinKU0p0Ke261nU7OvII95NU22HN6Dk7T/SaVPaj6T4TsQqGKIFw6/woLZnH7ugFNA==", + "dependencies": { + "@postman/form-data": "~3.1.1", + "@postman/tough-cookie": "~4.1.3-postman.1", + "@postman/tunnel-agent": "^0.6.3", + "aws-sign2": "~0.7.0", + "aws4": "^1.12.0", + "brotli": "^1.3.3", + "caseless": "~0.12.0", + "combined-stream": "~1.0.6", + "extend": "~3.0.2", + "forever-agent": "~0.6.1", + "har-validator": "~5.1.3", + "http-signature": "~1.3.1", + "is-typedarray": "~1.0.0", + "isstream": "~0.1.2", + "json-stringify-safe": "~5.0.1", + "mime-types": "^2.1.35", + "oauth-sign": "~0.9.0", + "performance-now": "^2.1.0", + "qs": "~6.5.3", + "safe-buffer": "^5.1.2", + "stream-length": "^1.0.2", + "uuid": "^8.3.2" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/process-nextick-args": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", + "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==" + }, + "node_modules/progress": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", + "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/proxy-agent": { + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/proxy-agent/-/proxy-agent-6.3.1.tgz", + "integrity": "sha512-Rb5RVBy1iyqOtNl15Cw/llpeLH8bsb37gM1FUfKQ+Wck6xHlbAhWGUFiTRHtkjqGTA5pSHz6+0hrPW/oECihPQ==", + "dependencies": { + "agent-base": "^7.0.2", + "debug": "^4.3.4", + "http-proxy-agent": "^7.0.0", + "https-proxy-agent": "^7.0.2", + "lru-cache": "^7.14.1", + "pac-proxy-agent": "^7.0.1", + "proxy-from-env": "^1.1.0", + "socks-proxy-agent": "^8.0.2" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" + }, + "node_modules/psl": { + "version": "1.9.0", + "resolved": "https://registry.npmjs.org/psl/-/psl-1.9.0.tgz", + "integrity": "sha512-E/ZsdU4HLs/68gYzgGTkMicWTLPdAftJLfJFlLUAAKZGkStNU72sZjT66SnMDVOfOWY/YAoiD7Jxa9iHvngcag==" + }, + "node_modules/pump": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", + "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", + "dependencies": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } + }, + "node_modules/punycode": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", + "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", + "engines": { + "node": ">=6" + } + }, + "node_modules/puppeteer-core": { + "version": "22.0.0", + "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-22.0.0.tgz", + "integrity": "sha512-S3s91rLde0A86PWVeNY82h+P0fdS7CTiNWAicCVH/bIspRP4nS2PnO5j+VTFqCah0ZJizGzpVPAmxVYbLxTc9w==", + "dependencies": { + "@puppeteer/browsers": "2.0.0", + "chromium-bidi": "0.5.8", + "cross-fetch": "4.0.0", + "debug": "4.3.4", + "devtools-protocol": "0.0.1232444", + "ws": "8.16.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/qs": { + "version": "6.5.3", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.5.3.tgz", + "integrity": "sha512-qxXIEh4pCGfHICj1mAJQ2/2XVZkjCDTcEgfoSQxc/fYivUZxTkk7L3bDBJSoNrEzXI17oUO5Dp07ktqE5KzczA==", + "engines": { + "node": ">=0.6" + } + }, + "node_modules/querystringify": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/querystringify/-/querystringify-2.2.0.tgz", + "integrity": "sha512-FIqgj2EUvTa7R50u0rGsyTftzjYmv/a3hO345bZNrqabNqjtgiDMgmo4mkUjd+nzU5oF3dClKqFIPUKybUyqoQ==" + }, + "node_modules/queue-tick": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/queue-tick/-/queue-tick-1.0.1.tgz", + "integrity": "sha512-kJt5qhMxoszgU/62PLP1CJytzd2NKetjSRnyuj31fDd3Rlcz3fzlFdFLD1SItunPwyqEOkca6GbV612BWfaBag==" + }, + "node_modules/readability-extractor": { + "version": "0.0.11", + "resolved": "git+ssh://git@github.com/ArchiveBox/readability-extractor.git#057f2046f9535cfc6df7b8d551aaad32a9e6226c", + "dependencies": { + "@mozilla/readability": "^0.5.0", + "dompurify": "^3.0.6", + "jsdom": "^23.0.1" + }, + "bin": { + "readability-extractor": "readability-extractor" + } + }, + "node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/regenerator-runtime": { + "version": "0.14.1", + "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.14.1.tgz", + "integrity": "sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw==" + }, + "node_modules/require-directory": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/require-from-string": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/requires-port": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz", + "integrity": "sha512-KigOCHcocU3XODJxsu8i/j8T9tzT4adHiecwORRQ0ZZFcp7ahwXuRU1m+yuO90C5ZUyGeGfocHDI14M3L3yDAQ==" + }, + "node_modules/rrweb-cssom": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/rrweb-cssom/-/rrweb-cssom-0.6.0.tgz", + "integrity": "sha512-APM0Gt1KoXBz0iIkkdB/kfvGOwC4UuJFeG/c+yV7wSc7q96cG/kJ0HiYCnzivD9SB53cLV1MlHFNfOuPaadYSw==" + }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" + }, + "node_modules/saxes": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/saxes/-/saxes-6.0.0.tgz", + "integrity": "sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==", + "dependencies": { + "xmlchars": "^2.2.0" + }, + "engines": { + "node": ">=v12.22.7" + } + }, + "node_modules/selenium-webdriver": { + "version": "4.17.0", + "resolved": "https://registry.npmjs.org/selenium-webdriver/-/selenium-webdriver-4.17.0.tgz", + "integrity": "sha512-e2E+2XBlGepzwgFbyQfSwo9Cbj6G5fFfs9MzAS00nC99EewmcS2rwn2MwtgfP7I5p1e7DYv4HQJXtWedsu6DvA==", + "dependencies": { + "jszip": "^3.10.1", + "tmp": "^0.2.1", + "ws": ">=8.14.2" + }, + "engines": { + "node": ">= 14.20.0" + } + }, + "node_modules/setimmediate": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz", + "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==" + }, + "node_modules/single-file-cli": { + "version": "1.1.54", + "resolved": "https://registry.npmjs.org/single-file-cli/-/single-file-cli-1.1.54.tgz", + "integrity": "sha512-wnVPg7BklhswwFVrtuFXbmluI4piHxg2dC0xATxYTeXAld6PnRPlnp7ufallRKArjFBZdP2u+ihMkOIp7A38XA==", + "dependencies": { + "file-url": "3.0.0", + "iconv-lite": "0.6.3", + "jsdom": "24.0.0", + "puppeteer-core": "22.0.0", + "selenium-webdriver": "4.17.0", + "single-file-core": "1.3.24", + "strong-data-uri": "1.0.6", + "yargs": "17.7.2" + }, + "bin": { + "single-file": "single-file" + } + }, + "node_modules/single-file-cli/node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/single-file-cli/node_modules/jsdom": { + "version": "24.0.0", + "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-24.0.0.tgz", + "integrity": "sha512-UDS2NayCvmXSXVP6mpTj+73JnNQadZlr9N68189xib2tx5Mls7swlTNao26IoHv46BZJFvXygyRtyXd1feAk1A==", + "dependencies": { + "cssstyle": "^4.0.1", + "data-urls": "^5.0.0", + "decimal.js": "^10.4.3", + "form-data": "^4.0.0", + "html-encoding-sniffer": "^4.0.0", + "http-proxy-agent": "^7.0.0", + "https-proxy-agent": "^7.0.2", + "is-potential-custom-element-name": "^1.0.1", + "nwsapi": "^2.2.7", + "parse5": "^7.1.2", + "rrweb-cssom": "^0.6.0", + "saxes": "^6.0.0", + "symbol-tree": "^3.2.4", + "tough-cookie": "^4.1.3", + "w3c-xmlserializer": "^5.0.0", + "webidl-conversions": "^7.0.0", + "whatwg-encoding": "^3.1.1", + "whatwg-mimetype": "^4.0.0", + "whatwg-url": "^14.0.0", + "ws": "^8.16.0", + "xml-name-validator": "^5.0.0" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "canvas": "^2.11.2" + }, + "peerDependenciesMeta": { + "canvas": { + "optional": true + } + } + }, + "node_modules/single-file-cli/node_modules/tr46": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-5.0.0.tgz", + "integrity": "sha512-tk2G5R2KRwBd+ZN0zaEXpmzdKyOYksXwywulIX95MBODjSzMIuQnQ3m8JxgbhnL1LeVo7lqQKsYa1O3Htl7K5g==", + "dependencies": { + "punycode": "^2.3.1" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/single-file-cli/node_modules/whatwg-url": { + "version": "14.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-14.0.0.tgz", + "integrity": "sha512-1lfMEm2IEr7RIV+f4lUNPOqfFL+pO+Xw3fJSqmjX9AbXcXcYOkCe1P6+9VBZB6n94af16NfZf+sSk0JCBZC9aw==", + "dependencies": { + "tr46": "^5.0.0", + "webidl-conversions": "^7.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/single-file-core": { + "version": "1.3.24", + "resolved": "https://registry.npmjs.org/single-file-core/-/single-file-core-1.3.24.tgz", + "integrity": "sha512-1B256mKBbNV8jXAV+hRyEv0aMa7tn0C0Ci+zx7Ya4ZXZB3b9/1MgKsB/fxVwDiL28WJSU0pxzh8ftIYubCNn9w==" + }, + "node_modules/smart-buffer": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/smart-buffer/-/smart-buffer-4.2.0.tgz", + "integrity": "sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==", + "engines": { + "node": ">= 6.0.0", + "npm": ">= 3.0.0" + } + }, + "node_modules/socks": { + "version": "2.8.3", + "resolved": "https://registry.npmjs.org/socks/-/socks-2.8.3.tgz", + "integrity": "sha512-l5x7VUUWbjVFbafGLxPWkYsHIhEvmF85tbIeFZWc8ZPtoMyybuEhL7Jye/ooC4/d48FgOjSJXgsF/AJPYCW8Zw==", + "dependencies": { + "ip-address": "^9.0.5", + "smart-buffer": "^4.2.0" + }, + "engines": { + "node": ">= 10.0.0", + "npm": ">= 3.0.0" + } + }, + "node_modules/socks-proxy-agent": { + "version": "8.0.3", + "resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.3.tgz", + "integrity": "sha512-VNegTZKhuGq5vSD6XNKlbqWhyt/40CgoEw8XxD6dhnm8Jq9IEa3nIa4HwnM8XOqU0CdB0BwWVXusqiFXfHB3+A==", + "dependencies": { + "agent-base": "^7.1.1", + "debug": "^4.3.4", + "socks": "^2.7.1" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "optional": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/source-map-js": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.0.tgz", + "integrity": "sha512-itJW8lvSA0TXEphiRoawsCksnlf8SyvmFzIhltqAHluXd88pkCd+cXJVHTDwdCr0IzwptSm035IHQktUu1QUMg==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/sprintf-js": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz", + "integrity": "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==" + }, + "node_modules/sshpk": { + "version": "1.18.0", + "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.18.0.tgz", + "integrity": "sha512-2p2KJZTSqQ/I3+HX42EpYOa2l3f8Erv8MWKsy2I9uf4wA7yFIkXRffYdsx86y6z4vHtV8u7g+pPlr8/4ouAxsQ==", + "dependencies": { + "asn1": "~0.2.3", + "assert-plus": "^1.0.0", + "bcrypt-pbkdf": "^1.0.0", + "dashdash": "^1.12.0", + "ecc-jsbn": "~0.1.1", + "getpass": "^0.1.1", + "jsbn": "~0.1.0", + "safer-buffer": "^2.0.2", + "tweetnacl": "~0.14.0" + }, + "bin": { + "sshpk-conv": "bin/sshpk-conv", + "sshpk-sign": "bin/sshpk-sign", + "sshpk-verify": "bin/sshpk-verify" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/sshpk/node_modules/jsbn": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz", + "integrity": "sha512-UVU9dibq2JcFWxQPA6KCqj5O42VOmAY3zQUfEKxU0KpTGXwNoCjkX1e13eHNvw/xPynt6pU0rZ1htjWTNTSXsg==" + }, + "node_modules/stream-length": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/stream-length/-/stream-length-1.0.2.tgz", + "integrity": "sha512-aI+qKFiwoDV4rsXiS7WRoCt+v2RX1nUj17+KJC5r2gfh5xoSJIfP6Y3Do/HtvesFcTSWthIuJ3l1cvKQY/+nZg==", + "dependencies": { + "bluebird": "^2.6.2" + } + }, + "node_modules/streamx": { + "version": "2.16.1", + "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.16.1.tgz", + "integrity": "sha512-m9QYj6WygWyWa3H1YY69amr4nVgy61xfjys7xO7kviL5rfIEc2naf+ewFiOA+aEJD7y0JO3h2GoiUv4TDwEGzQ==", + "dependencies": { + "fast-fifo": "^1.1.0", + "queue-tick": "^1.0.1" + }, + "optionalDependencies": { + "bare-events": "^2.2.0" + } + }, + "node_modules/string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, + "node_modules/string-direction": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/string-direction/-/string-direction-0.1.2.tgz", + "integrity": "sha512-NJHQRg6GlOEMLA6jEAlSy21KaXvJDNoAid/v6fBAJbqdvOEIiPpCrIPTHnl4636wUF/IGyktX5A9eddmETb1Cw==" + }, + "node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strong-data-uri": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/strong-data-uri/-/strong-data-uri-1.0.6.tgz", + "integrity": "sha512-zhzBZev0uhT2IrFUerenXhfaE0vFUYwAZsnG0gIKGpfM/Gi6jOUQ3cmcvyTsXeDLIPiTubHESeO7EbD6FoPmzw==", + "dependencies": { + "truncate": "^2.0.1" + }, + "engines": { + "node": ">=0.8.0" + } + }, + "node_modules/symbol-tree": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/symbol-tree/-/symbol-tree-3.2.4.tgz", + "integrity": "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==" + }, + "node_modules/tar-fs": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.0.4.tgz", + "integrity": "sha512-5AFQU8b9qLfZCX9zp2duONhPmZv0hGYiBPJsyUdqMjzq/mqVpy/rEUSeHk1+YitmxugaptgBh5oDGU3VsAJq4w==", + "dependencies": { + "mkdirp-classic": "^0.5.2", + "pump": "^3.0.0", + "tar-stream": "^3.1.5" + } + }, + "node_modules/tar-stream": { + "version": "3.1.7", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.7.tgz", + "integrity": "sha512-qJj60CXt7IU1Ffyc3NJMjh6EkuCFej46zUqJ4J7pqYlThyd9bO0XBTmcOIhSzZJVWfsLks0+nle/j538YAW9RQ==", + "dependencies": { + "b4a": "^1.6.4", + "fast-fifo": "^1.2.0", + "streamx": "^2.15.0" + } + }, + "node_modules/through": { + "version": "2.3.8", + "resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz", + "integrity": "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==" + }, + "node_modules/tmp": { + "version": "0.2.3", + "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.3.tgz", + "integrity": "sha512-nZD7m9iCPC5g0pYmcaxogYKggSfLsdxl8of3Q/oIbqCqLLIO9IAF0GWjX1z9NZRHPiXv8Wex4yDCaZsgEw0Y8w==", + "engines": { + "node": ">=14.14" + } + }, + "node_modules/tough-cookie": { + "version": "4.1.3", + "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-4.1.3.tgz", + "integrity": "sha512-aX/y5pVRkfRnfmuX+OdbSdXvPe6ieKX/G2s7e98f4poJHnqH3281gDPm/metm6E/WRamfx7WC4HUqkWHfQHprw==", + "dependencies": { + "psl": "^1.1.33", + "punycode": "^2.1.1", + "universalify": "^0.2.0", + "url-parse": "^1.5.3" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" + }, + "node_modules/truncate": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/truncate/-/truncate-2.1.0.tgz", + "integrity": "sha512-em3E3SUDONOjTBcZ36DTm3RvDded3IRU9rX32oHwwXNt3rJD5MVaFlJTQvs8tJoHRoeYP36OuQ1eL/Q7bNEWIQ==", + "engines": { + "node": "*" + } + }, + "node_modules/tslib": { + "version": "2.6.2", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz", + "integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==" + }, + "node_modules/turndown": { + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/turndown/-/turndown-7.1.3.tgz", + "integrity": "sha512-Z3/iJ6IWh8VBiACWQJaA5ulPQE5E1QwvBHj00uGzdQxdRnd8fh1DPqNOJqzQDu6DkOstORrtXzf/9adB+vMtEA==", + "dependencies": { + "domino": "^2.1.6" + } + }, + "node_modules/tweetnacl": { + "version": "0.14.5", + "resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-0.14.5.tgz", + "integrity": "sha512-KXXFFdAbFXY4geFIwoyNK+f5Z1b7swfXABfL7HXCmoIWMKU3dmS26672A4EeQtDzLKy7SXmfBu51JolvEKwtGA==" + }, + "node_modules/unbzip2-stream": { + "version": "1.4.3", + "resolved": "https://registry.npmjs.org/unbzip2-stream/-/unbzip2-stream-1.4.3.tgz", + "integrity": "sha512-mlExGW4w71ebDJviH16lQLtZS32VKqsSfk80GCfUlwT/4/hNRFsoscrF/c++9xinkMzECL1uL9DDwXqFWkruPg==", + "dependencies": { + "buffer": "^5.2.1", + "through": "^2.3.8" + } + }, + "node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", + "optional": true + }, + "node_modules/universalify": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.2.0.tgz", + "integrity": "sha512-CJ1QgKmNg3CwvAv/kOFmtnEN05f0D/cn9QntgNOQlQF9dgvVTHj3t+8JPdjqawCHk7V/KA+fbUqzZ9XWhcqPUg==", + "engines": { + "node": ">= 4.0.0" + } + }, + "node_modules/uri-js": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", + "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", + "dependencies": { + "punycode": "^2.1.0" + } + }, + "node_modules/url-parse": { + "version": "1.5.10", + "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.10.tgz", + "integrity": "sha512-WypcfiRhfeUP9vvF0j6rw0J3hrWrw6iZv3+22h6iRMJ/8z1Tj6XfLP4DsUix5MhMPnXpiHDoKyoZ/bdCkwBCiQ==", + "dependencies": { + "querystringify": "^2.1.1", + "requires-port": "^1.0.0" + } + }, + "node_modules/urlpattern-polyfill": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz", + "integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg==" + }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==" + }, + "node_modules/uuid": { + "version": "8.3.2", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", + "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==", + "bin": { + "uuid": "dist/bin/uuid" + } + }, + "node_modules/valid-url": { + "version": "1.0.9", + "resolved": "https://registry.npmjs.org/valid-url/-/valid-url-1.0.9.tgz", + "integrity": "sha512-QQDsV8OnSf5Uc30CKSwG9lnhMPe6exHtTXLRYX8uMwKENy640pU+2BgBL0LRbDh/eYRahNCS7aewCx0wf3NYVA==" + }, + "node_modules/verror": { + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/verror/-/verror-1.10.0.tgz", + "integrity": "sha512-ZZKSmDAEFOijERBLkmYfJ+vmk3w+7hOLYDNkRCuRuMJGEmqYNCNLyBBFwWKVMhfwaEF3WOd0Zlw86U/WC/+nYw==", + "engines": [ + "node >=0.6.0" + ], + "dependencies": { + "assert-plus": "^1.0.0", + "core-util-is": "1.0.2", + "extsprintf": "^1.2.0" + } + }, + "node_modules/w3c-xmlserializer": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-5.0.0.tgz", + "integrity": "sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==", + "dependencies": { + "xml-name-validator": "^5.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/webidl-conversions": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-7.0.0.tgz", + "integrity": "sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==", + "engines": { + "node": ">=12" + } + }, + "node_modules/whatwg-encoding": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", + "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==", + "dependencies": { + "iconv-lite": "0.6.3" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/whatwg-encoding/node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/whatwg-mimetype": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz", + "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==", + "engines": { + "node": ">=18" + } + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, + "node_modules/whatwg-url/node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==" + }, + "node_modules/wrap-ansi": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" + }, + "node_modules/ws": { + "version": "8.16.0", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.16.0.tgz", + "integrity": "sha512-HS0c//TP7Ina87TfiPUz1rQzMhHrl/SG2guqRcTOIUYD2q8uhUdNHZYJUaQ8aTGPzCh+c6oawMKW35nFl1dxyQ==", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, + "node_modules/wuzzy": { + "version": "0.1.8", + "resolved": "https://registry.npmjs.org/wuzzy/-/wuzzy-0.1.8.tgz", + "integrity": "sha512-FUzKQepFSTnANsDYwxpIzGJ/dIJaqxuMre6tzzbvWwFAiUHPsI1nVQVCLK4Xqr67KO7oYAK0kaCcI/+WYj/7JA==", + "dependencies": { + "lodash": "^4.17.15" + } + }, + "node_modules/xml-name-validator": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-5.0.0.tgz", + "integrity": "sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==", + "engines": { + "node": ">=18" + } + }, + "node_modules/xmlchars": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz", + "integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==" + }, + "node_modules/y18n": { + "version": "5.0.8", + "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", + "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", + "engines": { + "node": ">=10" + } + }, + "node_modules/yargs": { + "version": "17.7.2", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", + "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", + "dependencies": { + "cliui": "^8.0.1", + "escalade": "^3.1.1", + "get-caller-file": "^2.0.5", + "require-directory": "^2.1.1", + "string-width": "^4.2.3", + "y18n": "^5.0.5", + "yargs-parser": "^21.1.1" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/yargs-parser": { + "version": "15.0.3", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-15.0.3.tgz", + "integrity": "sha512-/MVEVjTXy/cGAjdtQf8dW3V9b97bPN7rNn8ETj6BmAQL7ibC7O1Q9SPJbGjgh3SlwoBNXMzj/ZGIj8mBgl12YA==", + "dependencies": { + "camelcase": "^5.0.0", + "decamelize": "^1.2.0" + } + }, + "node_modules/yargs/node_modules/yargs-parser": { + "version": "21.1.1", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", + "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", + "engines": { + "node": ">=12" + } + }, + "node_modules/yauzl": { + "version": "2.10.0", + "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.10.0.tgz", + "integrity": "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==", + "dependencies": { + "buffer-crc32": "~0.2.3", + "fd-slicer": "~1.1.0" + } + } + } +} diff --git a/archivebox/package.json b/archivebox/package.json index 1377ef99..7682c546 100644 --- a/archivebox/package.json +++ b/archivebox/package.json @@ -1,6 +1,6 @@ { "name": "archivebox", - "version": "0.7.3", + "version": "0.8.0", "description": "ArchiveBox: The self-hosted internet archive", "author": "Nick Sweeting ", "repository": "github:ArchiveBox/ArchiveBox", @@ -8,6 +8,6 @@ "dependencies": { "@postlight/parser": "^2.2.3", "readability-extractor": "github:ArchiveBox/readability-extractor", - "single-file-cli": "^1.1.46" + "single-file-cli": "^1.1.54" } } diff --git a/archivebox/vendor/requirements.txt b/archivebox/vendor/requirements.txt new file mode 100644 index 00000000..3d4872b0 --- /dev/null +++ b/archivebox/vendor/requirements.txt @@ -0,0 +1,6 @@ +# this folder contains vendored versions of these packages + +atomicwrites==1.4.0 +pocket==0.3.7 +django-taggit==1.3.0 +base32-crockford==0.3.0 diff --git a/bin/build_deb.sh b/bin/build_deb.sh index 8c5c7fcf..4061e3ae 100755 --- a/bin/build_deb.sh +++ b/bin/build_deb.sh @@ -31,6 +31,20 @@ else echo "[!] Warning: No virtualenv presesnt in $REPO_DIR.venv" fi + +# Build python package lists +# https://pdm-project.org/latest/usage/lockfile/ +echo "[+] Generating requirements.txt and pdm.lock from pyproject.toml..." +pdm lock --group=':all' --production --lockfile pdm.lock --strategy="cross_platform" +pdm sync --group=':all' --production --lockfile pdm.lock --clean || pdm sync --group=':all' --production --lockfile pdm.lock --clean +pdm export --group=':all' --production --lockfile pdm.lock --without-hashes -o requirements.txt + +pdm lock --group=':all' --dev --lockfile pdm.dev.lock --strategy="cross_platform" +pdm sync --group=':all' --dev --lockfile pdm.dev.lock --clean || pdm sync --group=':all' --dev --lockfile pdm.dev.lock --clean +pdm export --group=':all' --dev --lockfile pdm.dev.lock --without-hashes -o requirements-dev.txt + + + # cleanup build artifacts rm -Rf build deb_dist dist archivebox-*.tar.gz diff --git a/bin/build_dev.sh b/bin/build_dev.sh index b5acda44..4b685ab1 100755 --- a/bin/build_dev.sh +++ b/bin/build_dev.sh @@ -21,6 +21,20 @@ VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")" SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')" REQUIRED_PLATFORMS="${2:-"linux/arm64,linux/amd64,linux/arm/v7"}" + +# Build python package lists +# https://pdm-project.org/latest/usage/lockfile/ +echo "[+] Generating requirements.txt and pdm.lock from pyproject.toml..." +pdm lock --group=':all' --production --lockfile pdm.lock --strategy="cross_platform" +pdm sync --group=':all' --production --lockfile pdm.lock --clean || pdm sync --group=':all' --production --lockfile pdm.lock --clean +pdm export --group=':all' --production --lockfile pdm.lock --without-hashes -o requirements.txt + +pdm lock --group=':all' --dev --lockfile pdm.dev.lock --strategy="cross_platform" +pdm sync --group=':all' --dev --lockfile pdm.dev.lock --clean || pdm sync --group=':all' --dev --lockfile pdm.dev.lock --clean +pdm export --group=':all' --dev --lockfile pdm.dev.lock --without-hashes -o requirements-dev.txt + + + echo "[+] Building Docker image: tag=$TAG_NAME version=$SHORT_VERSION arch=$REQUIRED_PLATFORMS" @@ -32,4 +46,4 @@ docker build . --no-cache -t archivebox-dev --load # -t archivebox \ # -t archivebox:$TAG_NAME \ # -t archivebox:$VERSION \ -# -t archivebox:$SHORT_VERSION \ No newline at end of file +# -t archivebox:$SHORT_VERSION diff --git a/bin/build_docker.sh b/bin/build_docker.sh index 0ed2a799..5c89804c 100755 --- a/bin/build_docker.sh +++ b/bin/build_docker.sh @@ -71,10 +71,8 @@ docker buildx use xbuilder 2>&1 >/dev/null || create_builder check_platforms || (recreate_builder && check_platforms) || exit 1 -# Build python package lists -echo "[+] Generating requirements.txt and pdm.lock from pyproject.toml..." -pdm lock --group=':all' --strategy="cross_platform" --production -pdm export --group=':all' --production --without-hashes -o requirements.txt +# Make sure pyproject.toml, pdm{.dev}.lock, requirements{-dev}.txt, package{-lock}.json are all up-to-date +bash ./bin/lock_pkgs.sh echo "[+] Building archivebox:$VERSION docker image..." diff --git a/bin/build_pip.sh b/bin/build_pip.sh index c3cbd51b..395ff11d 100755 --- a/bin/build_pip.sh +++ b/bin/build_pip.sh @@ -20,20 +20,13 @@ else fi cd "$REPO_DIR" -echo "[*] Cleaning up build dirs" -cd "$REPO_DIR" -rm -Rf build dist +# Generate pdm.lock, requirements.txt, and package-lock.json +bash ./bin/lock_pkgs.sh echo "[+] Building sdist, bdist_wheel, and egg_info" -rm -f archivebox/package.json -cp package.json archivebox/package.json - -pdm self update -pdm install +rm -Rf build dist pdm build -pdm export --without-hashes -o ./pip_dist/requirements.txt - cp dist/* ./pip_dist/ echo -echo "[√] Finished. Don't forget to commit the new sdist and wheel files in ./pip_dist/" \ No newline at end of file +echo "[√] Finished. Don't forget to commit the new sdist and wheel files in ./pip_dist/" diff --git a/bin/lock_pkgs.sh b/bin/lock_pkgs.sh new file mode 100755 index 00000000..91c53089 --- /dev/null +++ b/bin/lock_pkgs.sh @@ -0,0 +1,100 @@ +#!/usr/bin/env bash + +### Bash Environment Setup +# http://redsymbol.net/articles/unofficial-bash-strict-mode/ +# https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html +# set -o xtrace +set -o errexit +set -o errtrace +set -o nounset +set -o pipefail +IFS=$'\n' + +REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )" + +cd "$REPO_DIR" + +py_version="$(grep 'version = ' pyproject.toml | awk '{print $3}' | jq -r)" +js_version="$(jq -r '.version' package.json)" + +if [[ "$py_version" != "$js_version" ]]; then + echo "[❌] Version in pyproject.toml ($py_version) does not match version in package.json ($js_version)!" + exit 1 +fi + +echo "[🔒] Locking all ArchiveBox dependencies (pip, npm)" +echo +echo "pyproject.toml: archivebox $py_version" +echo "package.json: archivebox $js_version" +echo +echo + +echo "[*] Cleaning up old lockfiles and build files" +rm -Rf build dist +rm -f pdm.lock +rm -f pdm.dev.lock +rm -f requirements.txt +rm -f requirements-dev.txt +rm -f package-lock.json +rm -f archivebox/package.json +rm -f archivebox/package-lock.json +rm -Rf ./.venv +rm -Rf ./node_modules +rm -Rf ./archivebox/node_modules + +echo +echo + +echo "[+] Generating dev & prod requirements.txt & pdm.lock from pyproject.toml..." +pip install --upgrade pip setuptools +pdm self update +pdm venv create 3.12 +echo +echo "pyproject.toml: archivebox $(grep 'version = ' pyproject.toml | awk '{print $3}' | jq -r)" +echo "$(which python): $(python --version | head -n 1)" +echo "$(which pdm): $(pdm --version | head -n 1)" +pdm info --env +pdm info + +echo +# https://pdm-project.org/latest/usage/lockfile/ +# prod +pdm lock --group=':all' --production --lockfile pdm.lock --strategy="cross_platform" +pdm sync --group=':all' --production --lockfile pdm.lock --clean +pdm export --group=':all' --production --lockfile pdm.lock --without-hashes -o requirements.txt +cp ./pdm.lock ./pip_dist/ +cp ./requirements.txt ./pip_dist/ +# dev +pdm lock --group=':all' --dev --lockfile pdm.dev.lock --strategy="cross_platform" +pdm sync --group=':all' --dev --lockfile pdm.dev.lock --clean +pdm export --group=':all' --dev --lockfile pdm.dev.lock --without-hashes -o requirements-dev.txt +cp ./pdm.dev.lock ./pip_dist/ +cp ./requirements-dev.txt ./pip_dist/ + +echo +echo "[+]] Generating package-lock.json from package.json..." +npm install -g npm +echo +echo "package.json: archivebox $(jq -r '.version' package.json)" +echo +echo "$(which node): $(node --version | head -n 1)" +echo "$(which npm): $(npm --version | head -n 1)" + +echo +npm install --package-lock-only +cp package.json archivebox/package.json +cp package-lock.json archivebox/package-lock.json + +echo +echo "[√] Finished. Don't forget to commit the new lockfiles:" +echo +ls "pyproject.toml" | cat +ls "pdm.lock" | cat +ls "pdm.dev.lock" | cat +ls "requirements.txt" | cat +ls "requirements-dev.txt" | cat +echo +ls "package.json" | cat +ls "package-lock.json" | cat +ls "archivebox/package.json" | cat +ls "archivebox/package-lock.json" | cat diff --git a/package-lock.json b/package-lock.json index 4c0f3df2..4496a88b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,23 +1,33 @@ { "name": "archivebox", - "version": "0.7.3", + "version": "0.8.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "archivebox", - "version": "0.7.3", + "version": "0.8.0", "license": "MIT", "dependencies": { "@postlight/parser": "^2.2.3", "readability-extractor": "github:ArchiveBox/readability-extractor", - "single-file-cli": "^1.1.46" + "single-file-cli": "^1.1.54" + } + }, + "node_modules/@asamuzakjp/dom-selector": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/@asamuzakjp/dom-selector/-/dom-selector-2.0.2.tgz", + "integrity": "sha512-x1KXOatwofR6ZAYzXRBL5wrdV0vwNxlTCK9NCuLqAzQYARqGcvFwiJA6A1ERuh+dgeA4Dxm3JBYictIes+SqUQ==", + "dependencies": { + "bidi-js": "^1.0.3", + "css-tree": "^2.3.1", + "is-potential-custom-element-name": "^1.0.1" } }, "node_modules/@babel/runtime-corejs2": { - "version": "7.23.7", - "resolved": "https://registry.npmjs.org/@babel/runtime-corejs2/-/runtime-corejs2-7.23.7.tgz", - "integrity": "sha512-JmMk2t1zGDNkvsY2MsLLksocjY+ufGzSk8UlcNcxzfrzAPu4nMx0HRFakzIg2bhcqQq6xBI2nUaW/sHoaYIHdQ==", + "version": "7.24.4", + "resolved": "https://registry.npmjs.org/@babel/runtime-corejs2/-/runtime-corejs2-7.24.4.tgz", + "integrity": "sha512-ZCKqyUKt/Coimg+3Kafu43yNetgYnTXzNbEGAgxc81J5sI0qFNbQ613w7PNny+SmijAmGVroL0GDvx5rG/JI5Q==", "dependencies": { "core-js": "^2.6.12", "regenerator-runtime": "^0.14.0" @@ -168,9 +178,9 @@ } }, "node_modules/@puppeteer/browsers": { - "version": "1.8.0", - "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-1.8.0.tgz", - "integrity": "sha512-TkRHIV6k2D8OlUe8RtG+5jgOF/H98Myx0M6AOafC8DdNVOFiBSFa5cpRDtpm8LXOa9sVwe0+e6Q3FC56X/DZfg==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.0.0.tgz", + "integrity": "sha512-3PS82/5+tnpEaUWonjAFFvlf35QHF15xqyGd34GBa5oP5EPVfFXRsbSxIGYf1M+vZlqBZ3oxT1kRg9OYhtt8ng==", "dependencies": { "debug": "4.3.4", "extract-zip": "2.0.1", @@ -184,7 +194,7 @@ "browsers": "lib/cjs/main-cli.js" }, "engines": { - "node": ">=16.3.0" + "node": ">=18" } }, "node_modules/@tootallnate/quickjs-emscripten": { @@ -193,9 +203,9 @@ "integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==" }, "node_modules/@types/node": { - "version": "20.10.6", - "resolved": "https://registry.npmjs.org/@types/node/-/node-20.10.6.tgz", - "integrity": "sha512-Vac8H+NlRNNlAmDfGUP7b5h/KA+AtWIzuXy0E6OyP8f1tCLYAtPvKRRDJjAPqhpCb0t6U2j7/xqAuLEebW2kiw==", + "version": "20.12.7", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.7.tgz", + "integrity": "sha512-wq0cICSkRLVaf3UGLMGItu/PtdY7oaXaI/RVU+xliKVOtRna3PRY57ZDfztpDL0n11vfymMUnXv8QwYCO7L1wg==", "optional": true, "dependencies": { "undici-types": "~5.26.4" @@ -211,9 +221,9 @@ } }, "node_modules/agent-base": { - "version": "7.1.0", - "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.0.tgz", - "integrity": "sha512-o/zjMZRhJxny7OyEF+Op8X+efiELC7k7yOjMzgfzVqOzXqkBkWI79YoTdOtsuWd5BWhAGAuOY/Xa6xpiaWXiNg==", + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.1.tgz", + "integrity": "sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==", "dependencies": { "debug": "^4.3.4" }, @@ -304,14 +314,15 @@ "integrity": "sha512-NmWvPnx0F1SfrQbYwOi7OeaNGokp9XhzNioJ/CSBs8Qa4vxug81mhJEAVZwxXuBmYB5KDRfMq/F3RR0BIU7sWg==" }, "node_modules/b4a": { - "version": "1.6.4", - "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.6.4.tgz", - "integrity": "sha512-fpWrvyVHEKyeEvbKZTVOeZF3VSKKWtJxFIxX/jaVPf+cLbGUSitjb49pHLqPV2BUNNZ0LcoeEGfE/YCpyDYHIw==" + "version": "1.6.6", + "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.6.6.tgz", + "integrity": "sha512-5Tk1HLk6b6ctmjIkAcU/Ujv/1WqiDl0F0JdRCR80VsOcUlHcu7pWeWRlOqQLHfDEsVx9YH/aif5AG4ehoCtTmg==" }, - "node_modules/balanced-match": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", - "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" + "node_modules/bare-events": { + "version": "2.2.2", + "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.2.2.tgz", + "integrity": "sha512-h7z00dWdG0PYOQEvChhOSWvOfkIKsdZGkWr083FgN/HyoQuebSew/cgirYqh9SCuy/hRvxc5Vy6Fw8xAmYHLkQ==", + "optional": true }, "node_modules/base64-js": { "version": "1.5.1", @@ -333,9 +344,9 @@ ] }, "node_modules/basic-ftp": { - "version": "5.0.4", - "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.0.4.tgz", - "integrity": "sha512-8PzkB0arJFV4jJWSGOYR+OEic6aeKMu/osRhBULN6RY0ykby6LKhbmuQ5ublvaas5BOwboah5D87nrHyuh8PPA==", + "version": "5.0.5", + "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.0.5.tgz", + "integrity": "sha512-4Bcg1P8xhUuqcii/S0Z9wiHIrQVPMermM1any+MX5GeGD7faD3/msQUDGLol9wOcz4/jbg/WJnGqoJF6LiBdtg==", "engines": { "node": ">=10.0.0" } @@ -348,6 +359,14 @@ "tweetnacl": "^0.14.3" } }, + "node_modules/bidi-js": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/bidi-js/-/bidi-js-1.0.3.tgz", + "integrity": "sha512-RKshQI1R3YQ+n9YJz2QQ147P66ELpa1FQEg20Dk8oW9t2KgLbpDLLp9aGZ7y8WHSshDknG0bknqGw5/tyCs5tw==", + "dependencies": { + "require-from-string": "^2.0.2" + } + }, "node_modules/bluebird": { "version": "2.11.0", "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-2.11.0.tgz", @@ -358,15 +377,6 @@ "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==" }, - "node_modules/brace-expansion": { - "version": "1.1.11", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", - "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", - "dependencies": { - "balanced-match": "^1.0.0", - "concat-map": "0.0.1" - } - }, "node_modules/brotli": { "version": "1.3.3", "resolved": "https://registry.npmjs.org/brotli/-/brotli-1.3.3.tgz", @@ -446,12 +456,12 @@ } }, "node_modules/chromium-bidi": { - "version": "0.4.33", - "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-0.4.33.tgz", - "integrity": "sha512-IxoFM5WGQOIAd95qrSXzJUv4eXIrh+RvU3rwwqIiwYuvfE7U/Llj4fejbsJnjJMUYCuGtVQsY2gv7oGl4aTNSQ==", + "version": "0.5.8", + "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-0.5.8.tgz", + "integrity": "sha512-blqh+1cEQbHBKmok3rVJkBlBxt9beKBgOsxbFgs7UJcoVbbeZ+K7+6liAsjgpc8l1Xd55cQUy14fXZdGSb4zIw==", "dependencies": { "mitt": "3.0.1", - "urlpattern-polyfill": "9.0.0" + "urlpattern-polyfill": "10.0.0" }, "peerDependencies": { "devtools-protocol": "*" @@ -497,11 +507,6 @@ "node": ">= 0.8" } }, - "node_modules/concat-map": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", - "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==" - }, "node_modules/core-js": { "version": "2.6.12", "resolved": "https://registry.npmjs.org/core-js/-/core-js-2.6.12.tgz", @@ -533,6 +538,18 @@ "nth-check": "~1.0.1" } }, + "node_modules/css-tree": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/css-tree/-/css-tree-2.3.1.tgz", + "integrity": "sha512-6Fv1DV/TYw//QF5IzQdqsNDjx/wc8TrMBZsqjL9eW01tWb7R7k/mq+/VXfJCl7SoD5emsJop9cOByJZfs8hYIw==", + "dependencies": { + "mdn-data": "2.0.30", + "source-map-js": "^1.0.1" + }, + "engines": { + "node": "^10 || ^12.20.0 || ^14.13.0 || >=15.0.0" + } + }, "node_modules/css-what": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/css-what/-/css-what-2.1.3.tgz", @@ -542,14 +559,14 @@ } }, "node_modules/cssstyle": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-3.0.0.tgz", - "integrity": "sha512-N4u2ABATi3Qplzf0hWbVCdjenim8F3ojEXpBDF5hBpjzW182MjNGLqfmQ0SkSPeQ+V86ZXgeH8aXj6kayd4jgg==", + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-4.0.1.tgz", + "integrity": "sha512-8ZYiJ3A/3OkDd093CBT/0UKDWry7ak4BdPTFP2+QEP7cmhouyq/Up709ASSj2cK02BbZiMgk7kYjZNS4QP5qrQ==", "dependencies": { "rrweb-cssom": "^0.6.0" }, "engines": { - "node": ">=14" + "node": ">=18" } }, "node_modules/dashdash": { @@ -564,9 +581,9 @@ } }, "node_modules/data-uri-to-buffer": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.1.tgz", - "integrity": "sha512-MZd3VlchQkp8rdend6vrx7MmVDJzSNTBvghvKjirLkD+WTChA3KUf0jkE68Q4UyctNqI11zZO9/x2Yx+ub5Cvg==", + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz", + "integrity": "sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==", "engines": { "node": ">= 14" } @@ -657,9 +674,9 @@ } }, "node_modules/devtools-protocol": { - "version": "0.0.1203626", - "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1203626.tgz", - "integrity": "sha512-nEzHZteIUZfGCZtTiS1fRpC8UZmsfD1SiyPvaUNvS13dvKf666OAm8YTi0+Ca3n1nLEyu49Cy4+dPWpaHFJk9g==" + "version": "0.0.1232444", + "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1232444.tgz", + "integrity": "sha512-pM27vqEfxSxRkTMnF+XCmxSEb6duO5R+t8A9DEEJgy4Wz2RVanje2mmj99B6A3zv2r/qGfYlOvYznUhuokizmg==" }, "node_modules/difflib": { "version": "0.2.6", @@ -696,9 +713,9 @@ "integrity": "sha512-3VdM/SXBZX2omc9JF9nOPCtDaYQ67BGp5CoLpIQlO2KCAPETs8TcDHacF26jXadGbvUteZzRTeos2fhID5+ucQ==" }, "node_modules/dompurify": { - "version": "3.0.7", - "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.0.7.tgz", - "integrity": "sha512-BViYTZoqP3ak/ULKOc101y+CtHDUvBsVgSxIF1ku0HmK6BRf+C03MC+tArMvOPtVtZp83DDh5puywKDu4sbVjQ==" + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.1.0.tgz", + "integrity": "sha512-yoU4rhgPKCo+p5UrWWWNKiIq+ToGqmVVhk0PmMYBK4kRsR3/qhemNFL8f6CFmBd4gMwm3F4T7HBoydP5uY07fA==" }, "node_modules/domutils": { "version": "1.5.1", @@ -726,6 +743,11 @@ "safer-buffer": "^2.1.0" } }, + "node_modules/ecc-jsbn/node_modules/jsbn": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz", + "integrity": "sha512-UVU9dibq2JcFWxQPA6KCqj5O42VOmAY3zQUfEKxU0KpTGXwNoCjkX1e13eHNvw/xPynt6pU0rZ1htjWTNTSXsg==" + }, "node_modules/ellipsize": { "version": "0.1.0", "resolved": "https://registry.npmjs.org/ellipsize/-/ellipsize-0.1.0.tgz", @@ -750,9 +772,9 @@ "integrity": "sha512-f2LZMYl1Fzu7YSBKg+RoROelpOaNrcGmE9AZubeDfrCEia483oW4MI4VyFd5VNHIgQ/7qm1I0wUHK1eJnn2y2w==" }, "node_modules/escalade": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", - "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==", + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.2.tgz", + "integrity": "sha512-ErCHMCae19vR8vQGe50xIsVomy19rg6gFu3+r3jkEO46suLMWBksvVyoGgQV+jOfl84ZSOSlmv6Gxa89PmTGmA==", "engines": { "node": ">=6" } @@ -890,31 +912,26 @@ } }, "node_modules/fs-extra": { - "version": "8.1.0", - "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-8.1.0.tgz", - "integrity": "sha512-yhlQgA6mnOJUKOsRUFsgJdQCvkKhcz8tlZG5HBQfReYZy46OwLcY+Zia0mtdHsOo9y/hP+CxMN0TU9QxoOtG4g==", + "version": "11.2.0", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.2.0.tgz", + "integrity": "sha512-PmDi3uwK5nFuXh7XDTlVnS17xJS7vW36is2+w3xcv8SVxiB4NyATf4ctkVY5bkSjX0Y4nbvZCq1/EjtEyr9ktw==", "dependencies": { "graceful-fs": "^4.2.0", - "jsonfile": "^4.0.0", - "universalify": "^0.1.0" + "jsonfile": "^6.0.1", + "universalify": "^2.0.0" }, "engines": { - "node": ">=6 <7 || >=8" + "node": ">=14.14" } }, "node_modules/fs-extra/node_modules/universalify": { - "version": "0.1.2", - "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.1.2.tgz", - "integrity": "sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg==", + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz", + "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==", "engines": { - "node": ">= 4.0.0" + "node": ">= 10.0.0" } }, - "node_modules/fs.realpath": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", - "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==" - }, "node_modules/get-caller-file": { "version": "2.0.5", "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", @@ -938,14 +955,14 @@ } }, "node_modules/get-uri": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.2.tgz", - "integrity": "sha512-5KLucCJobh8vBY1K07EFV4+cPZH3mrV9YeAruUseCQKHB58SGjjT2l9/eA9LD082IiuMjSlFJEcdJ27TXvbZNw==", + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.3.tgz", + "integrity": "sha512-BzUrJBS9EcUb4cFol8r4W3v1cPsSyajLSthNkz5BxbpDcHN5tIrM10E2eNvfnvBn3DaT3DUgx0OpsBKkaOpanw==", "dependencies": { "basic-ftp": "^5.0.2", - "data-uri-to-buffer": "^6.0.0", + "data-uri-to-buffer": "^6.0.2", "debug": "^4.3.4", - "fs-extra": "^8.1.0" + "fs-extra": "^11.2.0" }, "engines": { "node": ">= 14" @@ -959,25 +976,6 @@ "assert-plus": "^1.0.0" } }, - "node_modules/glob": { - "version": "7.2.3", - "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", - "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", - "dependencies": { - "fs.realpath": "^1.0.0", - "inflight": "^1.0.4", - "inherits": "2", - "minimatch": "^3.1.1", - "once": "^1.3.0", - "path-is-absolute": "^1.0.0" - }, - "engines": { - "node": "*" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, "node_modules/graceful-fs": { "version": "4.2.11", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", @@ -1034,9 +1032,9 @@ } }, "node_modules/http-proxy-agent": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.0.tgz", - "integrity": "sha512-+ZT+iBxVUQ1asugqnD6oWoRiS25AkjNfG085dKJGtGxkdwLQrMKU5wJr2bOOFAXzKcTuqq+7fZlTMgG3SRfIYQ==", + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", + "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", "dependencies": { "agent-base": "^7.1.0", "debug": "^4.3.4" @@ -1059,9 +1057,9 @@ } }, "node_modules/https-proxy-agent": { - "version": "7.0.2", - "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.2.tgz", - "integrity": "sha512-NmLNjm6ucYwtcUmL7JQC1ZQ57LmHP4lT15FQ8D61nak1rO6DH+fz5qNK2Ap5UN4ZapYICE3/0KodcLYSPsPbaA==", + "version": "7.0.4", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.4.tgz", + "integrity": "sha512-wlwpilI7YdjSkWaQ/7omYBMTliDcmCN8OLihO6I9B86g06lMyAoqgoDpV0XqoaPOKj+0DIdAvnsWfyAAhmimcg==", "dependencies": { "agent-base": "^7.0.2", "debug": "4" @@ -1105,24 +1103,22 @@ "resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz", "integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==" }, - "node_modules/inflight": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", - "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", - "dependencies": { - "once": "^1.3.0", - "wrappy": "1" - } - }, "node_modules/inherits": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" }, - "node_modules/ip": { - "version": "1.1.8", - "resolved": "https://registry.npmjs.org/ip/-/ip-1.1.8.tgz", - "integrity": "sha512-PuExPYUiu6qMBQb4l06ecm6T6ujzhmh+MeJcW9wa89PoAz5pvd4zPgN5WJV104mb6S2T1AwNIAaB70JNrLQWhg==" + "node_modules/ip-address": { + "version": "9.0.5", + "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-9.0.5.tgz", + "integrity": "sha512-zHtQzGojZXTwZTHQqra+ETKd4Sn3vgi7uBmlPoXVWZqYvuKmtI0l/VZTjqGmJY9x88GGOaZ9+G9ES8hC4T4X8g==", + "dependencies": { + "jsbn": "1.1.0", + "sprintf-js": "^1.1.3" + }, + "engines": { + "node": ">= 12" + } }, "node_modules/is-fullwidth-code-point": { "version": "3.0.0", @@ -1153,16 +1149,17 @@ "integrity": "sha512-Yljz7ffyPbrLpLngrMtZ7NduUgVvi6wG9RJ9IUcyCd59YQ911PBJphODUcbOVbqYfxe1wuYf/LJ8PauMRwsM/g==" }, "node_modules/jsbn": { - "version": "0.1.1", - "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz", - "integrity": "sha512-UVU9dibq2JcFWxQPA6KCqj5O42VOmAY3zQUfEKxU0KpTGXwNoCjkX1e13eHNvw/xPynt6pU0rZ1htjWTNTSXsg==" + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-1.1.0.tgz", + "integrity": "sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A==" }, "node_modules/jsdom": { - "version": "23.0.1", - "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-23.0.1.tgz", - "integrity": "sha512-2i27vgvlUsGEBO9+/kJQRbtqtm+191b5zAZrU/UezVmnC2dlDAFLgDYJvAEi94T4kjsRKkezEtLQTgsNEsW2lQ==", + "version": "23.2.0", + "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-23.2.0.tgz", + "integrity": "sha512-L88oL7D/8ufIES+Zjz7v0aes+oBMh2Xnh3ygWvL0OaICOomKEPKuPnIfBJekiXr+BHbbMjrWn/xqrDQuxFTeyA==", "dependencies": { - "cssstyle": "^3.0.0", + "@asamuzakjp/dom-selector": "^2.0.1", + "cssstyle": "^4.0.1", "data-urls": "^5.0.0", "decimal.js": "^10.4.3", "form-data": "^4.0.0", @@ -1170,7 +1167,6 @@ "http-proxy-agent": "^7.0.0", "https-proxy-agent": "^7.0.2", "is-potential-custom-element-name": "^1.0.1", - "nwsapi": "^2.2.7", "parse5": "^7.1.2", "rrweb-cssom": "^0.6.0", "saxes": "^6.0.0", @@ -1181,7 +1177,7 @@ "whatwg-encoding": "^3.1.1", "whatwg-mimetype": "^4.0.0", "whatwg-url": "^14.0.0", - "ws": "^8.14.2", + "ws": "^8.16.0", "xml-name-validator": "^5.0.0" }, "engines": { @@ -1235,13 +1231,24 @@ "integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==" }, "node_modules/jsonfile": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-4.0.0.tgz", - "integrity": "sha512-m6F1R3z8jjlf2imQHS2Qez5sjKWQzbuuhuJ/FKYFRZvPE3PuHcSMVZzfsLhGVOkfd20obL5SWEBew5ShlquNxg==", + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.1.0.tgz", + "integrity": "sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==", + "dependencies": { + "universalify": "^2.0.0" + }, "optionalDependencies": { "graceful-fs": "^4.1.6" } }, + "node_modules/jsonfile/node_modules/universalify": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz", + "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==", + "engines": { + "node": ">= 10.0.0" + } + }, "node_modules/jsprim": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/jsprim/-/jsprim-2.0.2.tgz", @@ -1375,6 +1382,11 @@ "node": ">=12" } }, + "node_modules/mdn-data": { + "version": "2.0.30", + "resolved": "https://registry.npmjs.org/mdn-data/-/mdn-data-2.0.30.tgz", + "integrity": "sha512-GaqWWShW4kv/G9IEucWScBx9G1/vsFZZJUO+tD26M8J8z3Kw5RDQjaoZe03YAClgeS/SWPOcb4nkFBTEi5DUEA==" + }, "node_modules/mime-db": { "version": "1.52.0", "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", @@ -1394,17 +1406,6 @@ "node": ">= 0.6" } }, - "node_modules/minimatch": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", - "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", - "dependencies": { - "brace-expansion": "^1.1.7" - }, - "engines": { - "node": "*" - } - }, "node_modules/mitt": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz", @@ -1461,9 +1462,9 @@ } }, "node_modules/nwsapi": { - "version": "2.2.7", - "resolved": "https://registry.npmjs.org/nwsapi/-/nwsapi-2.2.7.tgz", - "integrity": "sha512-ub5E4+FBPKwAZx0UwIQOjYWGHTEq5sPqHQNRN8Z9e4A7u3Tj1weLJsL59yH9vmvqEtBHaOmT6cYQKIZOxp35FQ==" + "version": "2.2.9", + "resolved": "https://registry.npmjs.org/nwsapi/-/nwsapi-2.2.9.tgz", + "integrity": "sha512-2f3F0SEEer8bBu0dsNCFF50N0cTThV1nWFYcEYFZttdW0lDAoybv9cQoK7X7/68Z89S7FoRrVjP1LPX4XRf9vg==" }, "node_modules/oauth-sign": { "version": "0.9.0", @@ -1500,12 +1501,11 @@ } }, "node_modules/pac-resolver": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/pac-resolver/-/pac-resolver-7.0.0.tgz", - "integrity": "sha512-Fd9lT9vJbHYRACT8OhCbZBbxr6KRSawSovFpy8nDGshaK99S/EBhVIHp9+crhxrsZOuvLpgL1n23iyPg6Rl2hg==", + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/pac-resolver/-/pac-resolver-7.0.1.tgz", + "integrity": "sha512-5NPgf87AT2STgwa2ntRMr45jTKrYBGkVU36yT0ig/n/GMAa3oPqhZfIQ2kMEimReg0+t9kZViDVZ83qfVUlckg==", "dependencies": { "degenerator": "^5.0.0", - "ip": "^1.1.8", "netmask": "^2.0.2" }, "engines": { @@ -1539,14 +1539,6 @@ "url": "https://github.com/fb55/entities?sponsor=1" } }, - "node_modules/path-is-absolute": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", - "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==", - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/pend": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz", @@ -1648,39 +1640,19 @@ } }, "node_modules/puppeteer-core": { - "version": "21.5.2", - "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-21.5.2.tgz", - "integrity": "sha512-v4T0cWnujSKs+iEfmb8ccd7u4/x8oblEyKqplqKnJ582Kw8PewYAWvkH4qUWhitN3O2q9RF7dzkvjyK5HbzjLA==", + "version": "22.0.0", + "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-22.0.0.tgz", + "integrity": "sha512-S3s91rLde0A86PWVeNY82h+P0fdS7CTiNWAicCVH/bIspRP4nS2PnO5j+VTFqCah0ZJizGzpVPAmxVYbLxTc9w==", "dependencies": { - "@puppeteer/browsers": "1.8.0", - "chromium-bidi": "0.4.33", + "@puppeteer/browsers": "2.0.0", + "chromium-bidi": "0.5.8", "cross-fetch": "4.0.0", "debug": "4.3.4", - "devtools-protocol": "0.0.1203626", - "ws": "8.14.2" + "devtools-protocol": "0.0.1232444", + "ws": "8.16.0" }, "engines": { - "node": ">=16.13.2" - } - }, - "node_modules/puppeteer-core/node_modules/ws": { - "version": "8.14.2", - "resolved": "https://registry.npmjs.org/ws/-/ws-8.14.2.tgz", - "integrity": "sha512-wEBG1ftX4jcglPxgFCMJmZ2PLtSbJ2Peg6TmpJFTbe9GZYOQCDPdMYu/Tm0/bGZkw8paZnJY45J4K2PZrLYq8g==", - "engines": { - "node": ">=10.0.0" - }, - "peerDependencies": { - "bufferutil": "^4.0.1", - "utf-8-validate": ">=5.0.2" - }, - "peerDependenciesMeta": { - "bufferutil": { - "optional": true - }, - "utf-8-validate": { - "optional": true - } + "node": ">=18" } }, "node_modules/qs": { @@ -1703,8 +1675,7 @@ }, "node_modules/readability-extractor": { "version": "0.0.11", - "resolved": "git+ssh://git@github.com/ArchiveBox/readability-extractor.git#2fb4689a65c6433036453dcbee7a268840604eb9", - "license": "MIT", + "resolved": "git+ssh://git@github.com/ArchiveBox/readability-extractor.git#057f2046f9535cfc6df7b8d551aaad32a9e6226c", "dependencies": { "@mozilla/readability": "^0.5.0", "dompurify": "^3.0.6", @@ -1740,25 +1711,19 @@ "node": ">=0.10.0" } }, + "node_modules/require-from-string": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/requires-port": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz", "integrity": "sha512-KigOCHcocU3XODJxsu8i/j8T9tzT4adHiecwORRQ0ZZFcp7ahwXuRU1m+yuO90C5ZUyGeGfocHDI14M3L3yDAQ==" }, - "node_modules/rimraf": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz", - "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==", - "dependencies": { - "glob": "^7.1.3" - }, - "bin": { - "rimraf": "bin.js" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, "node_modules/rrweb-cssom": { "version": "0.6.0", "resolved": "https://registry.npmjs.org/rrweb-cssom/-/rrweb-cssom-0.6.0.tgz", @@ -1800,9 +1765,9 @@ } }, "node_modules/selenium-webdriver": { - "version": "4.15.0", - "resolved": "https://registry.npmjs.org/selenium-webdriver/-/selenium-webdriver-4.15.0.tgz", - "integrity": "sha512-BNG1bq+KWiBGHcJ/wULi0eKY0yaDqFIbEmtbsYJmfaEghdCkXBsx1akgOorhNwjBipOr0uwpvNXqT6/nzl+zjg==", + "version": "4.17.0", + "resolved": "https://registry.npmjs.org/selenium-webdriver/-/selenium-webdriver-4.17.0.tgz", + "integrity": "sha512-e2E+2XBlGepzwgFbyQfSwo9Cbj6G5fFfs9MzAS00nC99EewmcS2rwn2MwtgfP7I5p1e7DYv4HQJXtWedsu6DvA==", "dependencies": { "jszip": "^3.10.1", "tmp": "^0.2.1", @@ -1818,16 +1783,16 @@ "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==" }, "node_modules/single-file-cli": { - "version": "1.1.46", - "resolved": "https://registry.npmjs.org/single-file-cli/-/single-file-cli-1.1.46.tgz", - "integrity": "sha512-+vFj0a5Y4ESqpMwH0T6738pg8ZA9KVhhl6OlIOsicamGNU9DnMa+q9dL1S2KnLWHoauKjU0BThhR/YKUleJSxw==", + "version": "1.1.54", + "resolved": "https://registry.npmjs.org/single-file-cli/-/single-file-cli-1.1.54.tgz", + "integrity": "sha512-wnVPg7BklhswwFVrtuFXbmluI4piHxg2dC0xATxYTeXAld6PnRPlnp7ufallRKArjFBZdP2u+ihMkOIp7A38XA==", "dependencies": { "file-url": "3.0.0", "iconv-lite": "0.6.3", - "jsdom": "23.0.0", - "puppeteer-core": "21.5.2", - "selenium-webdriver": "4.15.0", - "single-file-core": "1.3.15", + "jsdom": "24.0.0", + "puppeteer-core": "22.0.0", + "selenium-webdriver": "4.17.0", + "single-file-core": "1.3.24", "strong-data-uri": "1.0.6", "yargs": "17.7.2" }, @@ -1847,11 +1812,11 @@ } }, "node_modules/single-file-cli/node_modules/jsdom": { - "version": "23.0.0", - "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-23.0.0.tgz", - "integrity": "sha512-cbL/UCtohJguhFC7c2/hgW6BeZCNvP7URQGnx9tSJRYKCdnfbfWOrtuLTMfiB2VxKsx5wPHVsh/J0aBy9lIIhQ==", + "version": "24.0.0", + "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-24.0.0.tgz", + "integrity": "sha512-UDS2NayCvmXSXVP6mpTj+73JnNQadZlr9N68189xib2tx5Mls7swlTNao26IoHv46BZJFvXygyRtyXd1feAk1A==", "dependencies": { - "cssstyle": "^3.0.0", + "cssstyle": "^4.0.1", "data-urls": "^5.0.0", "decimal.js": "^10.4.3", "form-data": "^4.0.0", @@ -1870,14 +1835,14 @@ "whatwg-encoding": "^3.1.1", "whatwg-mimetype": "^4.0.0", "whatwg-url": "^14.0.0", - "ws": "^8.14.2", + "ws": "^8.16.0", "xml-name-validator": "^5.0.0" }, "engines": { "node": ">=18" }, "peerDependencies": { - "canvas": "^3.0.0" + "canvas": "^2.11.2" }, "peerDependenciesMeta": { "canvas": { @@ -1909,9 +1874,9 @@ } }, "node_modules/single-file-core": { - "version": "1.3.15", - "resolved": "https://registry.npmjs.org/single-file-core/-/single-file-core-1.3.15.tgz", - "integrity": "sha512-/YNpHBwASWNxmSmZXz0xRolmXf0+PGAbwpVrwn6A8tYeuAdezxxde5RYTTQ7V4Zv68+H4JMhE2DwCRV0sVUGNA==" + "version": "1.3.24", + "resolved": "https://registry.npmjs.org/single-file-core/-/single-file-core-1.3.24.tgz", + "integrity": "sha512-1B256mKBbNV8jXAV+hRyEv0aMa7tn0C0Ci+zx7Ya4ZXZB3b9/1MgKsB/fxVwDiL28WJSU0pxzh8ftIYubCNn9w==" }, "node_modules/smart-buffer": { "version": "4.2.0", @@ -1923,24 +1888,24 @@ } }, "node_modules/socks": { - "version": "2.7.1", - "resolved": "https://registry.npmjs.org/socks/-/socks-2.7.1.tgz", - "integrity": "sha512-7maUZy1N7uo6+WVEX6psASxtNlKaNVMlGQKkG/63nEDdLOWNbiUMoLK7X4uYoLhQstau72mLgfEWcXcwsaHbYQ==", + "version": "2.8.3", + "resolved": "https://registry.npmjs.org/socks/-/socks-2.8.3.tgz", + "integrity": "sha512-l5x7VUUWbjVFbafGLxPWkYsHIhEvmF85tbIeFZWc8ZPtoMyybuEhL7Jye/ooC4/d48FgOjSJXgsF/AJPYCW8Zw==", "dependencies": { - "ip": "^2.0.0", + "ip-address": "^9.0.5", "smart-buffer": "^4.2.0" }, "engines": { - "node": ">= 10.13.0", + "node": ">= 10.0.0", "npm": ">= 3.0.0" } }, "node_modules/socks-proxy-agent": { - "version": "8.0.2", - "resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.2.tgz", - "integrity": "sha512-8zuqoLv1aP/66PHF5TqwJ7Czm3Yv32urJQHrVyhD7mmA6d61Zv8cIXQYPTWwmg6qlupnPvs/QKDmfa4P/qct2g==", + "version": "8.0.3", + "resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.3.tgz", + "integrity": "sha512-VNegTZKhuGq5vSD6XNKlbqWhyt/40CgoEw8XxD6dhnm8Jq9IEa3nIa4HwnM8XOqU0CdB0BwWVXusqiFXfHB3+A==", "dependencies": { - "agent-base": "^7.0.2", + "agent-base": "^7.1.1", "debug": "^4.3.4", "socks": "^2.7.1" }, @@ -1948,11 +1913,6 @@ "node": ">= 14" } }, - "node_modules/socks/node_modules/ip": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ip/-/ip-2.0.0.tgz", - "integrity": "sha512-WKa+XuLG1A1R0UWhl2+1XQSi+fZWMsYKffMZTTYsiZaUD8k2yDAj5atimTUD2TZkyCkNEeYE5NhFZmupOGtjYQ==" - }, "node_modules/source-map": { "version": "0.6.1", "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", @@ -1962,6 +1922,19 @@ "node": ">=0.10.0" } }, + "node_modules/source-map-js": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.0.tgz", + "integrity": "sha512-itJW8lvSA0TXEphiRoawsCksnlf8SyvmFzIhltqAHluXd88pkCd+cXJVHTDwdCr0IzwptSm035IHQktUu1QUMg==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/sprintf-js": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz", + "integrity": "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==" + }, "node_modules/sshpk": { "version": "1.18.0", "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.18.0.tgz", @@ -1986,6 +1959,11 @@ "node": ">=0.10.0" } }, + "node_modules/sshpk/node_modules/jsbn": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz", + "integrity": "sha512-UVU9dibq2JcFWxQPA6KCqj5O42VOmAY3zQUfEKxU0KpTGXwNoCjkX1e13eHNvw/xPynt6pU0rZ1htjWTNTSXsg==" + }, "node_modules/stream-length": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/stream-length/-/stream-length-1.0.2.tgz", @@ -1995,12 +1973,15 @@ } }, "node_modules/streamx": { - "version": "2.15.6", - "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.15.6.tgz", - "integrity": "sha512-q+vQL4AAz+FdfT137VF69Cc/APqUbxy+MDOImRrMvchJpigHj9GksgDU2LYbO9rx7RX6osWgxJB2WxhYv4SZAw==", + "version": "2.16.1", + "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.16.1.tgz", + "integrity": "sha512-m9QYj6WygWyWa3H1YY69amr4nVgy61xfjys7xO7kviL5rfIEc2naf+ewFiOA+aEJD7y0JO3h2GoiUv4TDwEGzQ==", "dependencies": { "fast-fifo": "^1.1.0", "queue-tick": "^1.0.1" + }, + "optionalDependencies": { + "bare-events": "^2.2.0" } }, "node_modules/string_decoder": { @@ -2067,9 +2048,9 @@ } }, "node_modules/tar-stream": { - "version": "3.1.6", - "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.6.tgz", - "integrity": "sha512-B/UyjYwPpMBv+PaFSWAmtYjwdrlEaZQEhMIBFNC5oEG8lpiW8XjcSdmEaClj28ArfKScKHs2nshz3k2le6crsg==", + "version": "3.1.7", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.7.tgz", + "integrity": "sha512-qJj60CXt7IU1Ffyc3NJMjh6EkuCFej46zUqJ4J7pqYlThyd9bO0XBTmcOIhSzZJVWfsLks0+nle/j538YAW9RQ==", "dependencies": { "b4a": "^1.6.4", "fast-fifo": "^1.2.0", @@ -2082,14 +2063,11 @@ "integrity": "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==" }, "node_modules/tmp": { - "version": "0.2.1", - "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.1.tgz", - "integrity": "sha512-76SUhtfqR2Ijn+xllcI5P1oyannHNHByD80W1q447gU3mp9G9PSpGdWmjUOHRDPiHYacIk66W7ubDTuPF3BEtQ==", - "dependencies": { - "rimraf": "^3.0.0" - }, + "version": "0.2.3", + "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.3.tgz", + "integrity": "sha512-nZD7m9iCPC5g0pYmcaxogYKggSfLsdxl8of3Q/oIbqCqLLIO9IAF0GWjX1z9NZRHPiXv8Wex4yDCaZsgEw0Y8w==", "engines": { - "node": ">=8.17.0" + "node": ">=14.14" } }, "node_modules/tough-cookie": { @@ -2125,9 +2103,9 @@ "integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==" }, "node_modules/turndown": { - "version": "7.1.2", - "resolved": "https://registry.npmjs.org/turndown/-/turndown-7.1.2.tgz", - "integrity": "sha512-ntI9R7fcUKjqBP6QU8rBK2Ehyt8LAzt3UBT9JR9tgo6GtuKvyUzpayWmeMKJw1DPdXzktvtIT8m2mVXz+bL/Qg==", + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/turndown/-/turndown-7.1.3.tgz", + "integrity": "sha512-Z3/iJ6IWh8VBiACWQJaA5ulPQE5E1QwvBHj00uGzdQxdRnd8fh1DPqNOJqzQDu6DkOstORrtXzf/9adB+vMtEA==", "dependencies": { "domino": "^2.1.6" } @@ -2178,9 +2156,9 @@ } }, "node_modules/urlpattern-polyfill": { - "version": "9.0.0", - "resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-9.0.0.tgz", - "integrity": "sha512-WHN8KDQblxd32odxeIgo83rdVDE2bvdkb86it7bMhYZwWKJz0+O0RK/eZiHYnM+zgt/U7hAHOlCQGfjjvSkw2g==" + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz", + "integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg==" }, "node_modules/util-deprecate": { "version": "1.0.2", diff --git a/package.json b/package.json index 3c42a8b9..7682c546 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "archivebox", - "version": "0.7.3", + "version": "0.8.0", "description": "ArchiveBox: The self-hosted internet archive", "author": "Nick Sweeting ", "repository": "github:ArchiveBox/ArchiveBox", diff --git a/pdm.lock b/pdm.lock index 3ff96734..4332ebfb 100644 --- a/pdm.lock +++ b/pdm.lock @@ -3,27 +3,29 @@ [metadata] groups = ["default", "ldap", "sonic"] -strategy = ["cross_platform"] +strategy = ["cross_platform", "inherit_metadata"] lock_version = "4.4.1" -content_hash = "sha256:4ba1c25daa30a36c5b3ffdb563d5024c2ab15042758f4fbc3f375dedb35d1bdf" +content_hash = "sha256:a2483b801ba2cb7748849f80e9030d949728ea3686eb023dc333b5a99f610874" [[package]] name = "asgiref" -version = "3.7.2" -requires_python = ">=3.7" +version = "3.8.1" +requires_python = ">=3.8" summary = "ASGI specs, helper code, and adapters" +groups = ["default", "ldap"] dependencies = [ "typing-extensions>=4; python_version < \"3.11\"", ] files = [ - {file = "asgiref-3.7.2-py3-none-any.whl", hash = "sha256:89b2ef2247e3b562a16eef663bc0e2e703ec6468e2fa8a5cd61cd449786d4f6e"}, - {file = "asgiref-3.7.2.tar.gz", hash = "sha256:9e0ce3aa93a819ba5b45120216b23878cf6e8525eb3848653452b4192b92afed"}, + {file = "asgiref-3.8.1-py3-none-any.whl", hash = "sha256:3e1e3ecc849832fe52ccf2cb6686b7a55f82bb1d6aee72a58826471390335e47"}, + {file = "asgiref-3.8.1.tar.gz", hash = "sha256:c343bd80a0bec947a9860adb4c432ffa7db769836c64238fc34bdc3fec84d590"}, ] [[package]] name = "asttokens" version = "2.4.1" summary = "Annotate AST trees with source code positions" +groups = ["default"] dependencies = [ "six>=1.12.0", ] @@ -36,6 +38,8 @@ files = [ name = "brotli" version = "1.1.0" summary = "Python bindings for the Brotli compression library" +groups = ["default"] +marker = "implementation_name == \"cpython\"" files = [ {file = "Brotli-1.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e1140c64812cb9b06c922e77f1c26a75ec5e3f0fb2bf92cc8c58720dec276752"}, {file = "Brotli-1.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c8fd5270e906eef71d4a8d19b7c6a43760c6abcfcc10c9101d14eb2357418de9"}, @@ -61,18 +65,18 @@ files = [ {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a1fd8a29719ccce974d523580987b7f8229aeace506952fa9ce1d53a033873c8"}, {file = "Brotli-1.1.0-cp311-cp311-win32.whl", hash = "sha256:39da8adedf6942d76dc3e46653e52df937a3c4d6d18fdc94a7c29d263b1f5b50"}, {file = "Brotli-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:aac0411d20e345dc0920bdec5548e438e999ff68d77564d5e9463a7ca9d3e7b1"}, - {file = "Brotli-1.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5fb2ce4b8045c78ebbc7b8f3c15062e435d47e7393cc57c25115cfd49883747a"}, - {file = "Brotli-1.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7905193081db9bfa73b1219140b3d315831cbff0d8941f22da695832f0dd188f"}, - {file = "Brotli-1.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a77def80806c421b4b0af06f45d65a136e7ac0bdca3c09d9e2ea4e515367c7e9"}, - {file = "Brotli-1.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8dadd1314583ec0bf2d1379f7008ad627cd6336625d6679cf2f8e67081b83acf"}, - {file = "Brotli-1.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:901032ff242d479a0efa956d853d16875d42157f98951c0230f69e69f9c09bac"}, - {file = "Brotli-1.1.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:22fc2a8549ffe699bfba2256ab2ed0421a7b8fadff114a3d201794e45a9ff578"}, - {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ae15b066e5ad21366600ebec29a7ccbc86812ed267e4b28e860b8ca16a2bc474"}, - {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:949f3b7c29912693cee0afcf09acd6ebc04c57af949d9bf77d6101ebb61e388c"}, - {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:89f4988c7203739d48c6f806f1e87a1d96e0806d44f0fba61dba81392c9e474d"}, - {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:de6551e370ef19f8de1807d0a9aa2cdfdce2e85ce88b122fe9f6b2b076837e59"}, - {file = "Brotli-1.1.0-cp39-cp39-win32.whl", hash = "sha256:f0d8a7a6b5983c2496e364b969f0e526647a06b075d034f3297dc66f3b360c64"}, - {file = "Brotli-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:cdad5b9014d83ca68c25d2e9444e28e967ef16e80f6b436918c700c117a85467"}, + {file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:316cc9b17edf613ac76b1f1f305d2a748f1b976b033b049a6ecdfd5612c70409"}, + {file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:caf9ee9a5775f3111642d33b86237b05808dafcd6268faa492250e9b78046eb2"}, + {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70051525001750221daa10907c77830bc889cb6d865cc0b813d9db7fefc21451"}, + {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7f4bf76817c14aa98cc6697ac02f3972cb8c3da93e9ef16b9c66573a68014f91"}, + {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0c5516f0aed654134a2fc936325cc2e642f8a0e096d075209672eb321cff408"}, + {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6c3020404e0b5eefd7c9485ccf8393cfb75ec38ce75586e046573c9dc29967a0"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4ed11165dd45ce798d99a136808a794a748d5dc38511303239d4e2363c0695dc"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4093c631e96fdd49e0377a9c167bfd75b6d0bad2ace734c6eb20b348bc3ea180"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7e4c4629ddad63006efa0ef968c8e4751c5868ff0b1c5c40f76524e894c50248"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:861bf317735688269936f755fa136a99d1ed526883859f86e41a5d43c61d8966"}, + {file = "Brotli-1.1.0-cp312-cp312-win32.whl", hash = "sha256:5f4d5ea15c9382135076d2fb28dde923352fe02951e66935a9efaac8f10e81b0"}, + {file = "Brotli-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:906bc3a79de8c4ae5b86d3d75a8b77e44404b0f4261714306e3ad248d8ab0951"}, {file = "Brotli-1.1.0.tar.gz", hash = "sha256:81de08ac11bcb85841e440c13611c00b67d3bf82698314928d0b676362546724"}, ] @@ -81,6 +85,8 @@ name = "brotlicffi" version = "1.1.0.0" requires_python = ">=3.7" summary = "Python CFFI bindings to the Brotli library" +groups = ["default"] +marker = "implementation_name != \"cpython\"" dependencies = [ "cffi>=1.0.0", ] @@ -116,12 +122,13 @@ files = [ [[package]] name = "certifi" -version = "2023.11.17" +version = "2024.2.2" requires_python = ">=3.6" summary = "Python package for providing Mozilla's CA Bundle." +groups = ["default"] files = [ - {file = "certifi-2023.11.17-py3-none-any.whl", hash = "sha256:e036ab49d5b79556f99cfc2d9320b34cfbe5be05c5871b51de9329f0603b0474"}, - {file = "certifi-2023.11.17.tar.gz", hash = "sha256:9b469f3a900bf28dc19b8cfbf8019bf47f7fdd1a65a1d4ffb98fc14166beb4d1"}, + {file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"}, + {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"}, ] [[package]] @@ -129,6 +136,8 @@ name = "cffi" version = "1.16.0" requires_python = ">=3.8" summary = "Foreign Function Interface for Python calling C code." +groups = ["default"] +marker = "implementation_name != \"cpython\"" dependencies = [ "pycparser", ] @@ -155,17 +164,16 @@ files = [ {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"}, {file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"}, {file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"}, - {file = "cffi-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed"}, - {file = "cffi-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098"}, - {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000"}, - {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe"}, - {file = "cffi-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4"}, - {file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"}, + {file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"}, + {file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"}, + {file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"}, {file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"}, ] @@ -174,6 +182,7 @@ name = "charset-normalizer" version = "3.3.2" requires_python = ">=3.7.0" summary = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +groups = ["default"] files = [ {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, @@ -206,21 +215,21 @@ files = [ {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, ] @@ -229,6 +238,8 @@ name = "colorama" version = "0.4.6" requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" summary = "Cross-platform colored terminal text." +groups = ["default"] +marker = "sys_platform == \"win32\"" files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, @@ -236,16 +247,17 @@ files = [ [[package]] name = "croniter" -version = "2.0.1" -requires_python = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +version = "2.0.5" +requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.6" summary = "croniter provides iteration for datetime object with cron like format" +groups = ["default"] dependencies = [ "python-dateutil", "pytz>2021.1", ] files = [ - {file = "croniter-2.0.1-py2.py3-none-any.whl", hash = "sha256:4cb064ce2d8f695b3b078be36ff50115cf8ac306c10a7e8653ee2a5b534673d7"}, - {file = "croniter-2.0.1.tar.gz", hash = "sha256:d199b2ec3ea5e82988d1f72022433c5f9302b3b3ea9e6bfd6a1518f6ea5e700a"}, + {file = "croniter-2.0.5-py2.py3-none-any.whl", hash = "sha256:fdbb44920944045cc323db54599b321325141d82d14fa7453bc0699826bbe9ed"}, + {file = "croniter-2.0.5.tar.gz", hash = "sha256:f1f8ca0af64212fbe99b1bee125ee5a1b53a9c1b433968d8bca8817b79d237f3"}, ] [[package]] @@ -253,6 +265,7 @@ name = "dateparser" version = "1.2.0" requires_python = ">=3.7" summary = "Date parsing library designed to parse dates from HTML pages" +groups = ["default"] dependencies = [ "python-dateutil", "pytz", @@ -269,6 +282,7 @@ name = "decorator" version = "5.1.1" requires_python = ">=3.5" summary = "Decorators for Humans" +groups = ["default"] files = [ {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, @@ -276,54 +290,59 @@ files = [ [[package]] name = "django" -version = "3.1.14" -requires_python = ">=3.6" -summary = "A high-level Python Web framework that encourages rapid development and clean, pragmatic design." +version = "4.2.11" +requires_python = ">=3.8" +summary = "A high-level Python web framework that encourages rapid development and clean, pragmatic design." +groups = ["default", "ldap"] dependencies = [ - "asgiref<4,>=3.2.10", - "pytz", - "sqlparse>=0.2.2", + "asgiref<4,>=3.6.0", + "sqlparse>=0.3.1", + "tzdata; sys_platform == \"win32\"", ] files = [ - {file = "Django-3.1.14-py3-none-any.whl", hash = "sha256:0fabc786489af16ad87a8c170ba9d42bfd23f7b699bd5ef05675864e8d012859"}, - {file = "Django-3.1.14.tar.gz", hash = "sha256:72a4a5a136a214c39cf016ccdd6b69e2aa08c7479c66d93f3a9b5e4bb9d8a347"}, + {file = "Django-4.2.11-py3-none-any.whl", hash = "sha256:ddc24a0a8280a0430baa37aff11f28574720af05888c62b7cfe71d219f4599d3"}, + {file = "Django-4.2.11.tar.gz", hash = "sha256:6e6ff3db2d8dd0c986b4eec8554c8e4f919b5c1ff62a5b4390c17aff2ed6e5c4"}, ] [[package]] name = "django-auth-ldap" -version = "4.1.0" -requires_python = ">=3.7" -summary = "Django LDAP authentication backend." +version = "4.8.0" +requires_python = ">=3.8" +summary = "Django LDAP authentication backend" +groups = ["ldap"] dependencies = [ - "Django>=2.2", + "Django>=3.2", "python-ldap>=3.1", ] files = [ - {file = "django-auth-ldap-4.1.0.tar.gz", hash = "sha256:77f749d3b17807ce8eb56a9c9c8e5746ff316567f81d5ba613495d9c7495a949"}, - {file = "django_auth_ldap-4.1.0-py3-none-any.whl", hash = "sha256:68870e7921e84b1a9867e268a9c8a3e573e8a0d95ea08bcf31be178f5826ff36"}, + {file = "django-auth-ldap-4.8.0.tar.gz", hash = "sha256:604250938ddc9fda619f247c7a59b0b2f06e53a7d3f46a156f28aa30dd71a738"}, + {file = "django_auth_ldap-4.8.0-py3-none-any.whl", hash = "sha256:4b4b944f3c28bce362f33fb6e8db68429ed8fd8f12f0c0c4b1a4344a7ef225ce"}, ] [[package]] name = "django-extensions" -version = "3.1.5" +version = "3.2.3" requires_python = ">=3.6" summary = "Extensions for Django" +groups = ["default"] dependencies = [ - "Django>=2.2", + "Django>=3.2", ] files = [ - {file = "django-extensions-3.1.5.tar.gz", hash = "sha256:28e1e1bf49f0e00307ba574d645b0af3564c981a6dfc87209d48cb98f77d0b1a"}, - {file = "django_extensions-3.1.5-py3-none-any.whl", hash = "sha256:9238b9e016bb0009d621e05cf56ea8ce5cce9b32e91ad2026996a7377ca28069"}, + {file = "django-extensions-3.2.3.tar.gz", hash = "sha256:44d27919d04e23b3f40231c4ab7af4e61ce832ef46d610cc650d53e68328410a"}, + {file = "django_extensions-3.2.3-py3-none-any.whl", hash = "sha256:9600b7562f79a92cbf1fde6403c04fee314608fefbb595502e34383ae8203401"}, ] [[package]] name = "exceptiongroup" -version = "1.2.0" +version = "1.2.1" requires_python = ">=3.7" summary = "Backport of PEP 654 (exception groups)" +groups = ["default"] +marker = "python_version < \"3.11\"" files = [ - {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"}, - {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"}, + {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, + {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, ] [[package]] @@ -331,42 +350,97 @@ name = "executing" version = "2.0.1" requires_python = ">=3.5" summary = "Get the currently executing AST node of a frame, and other information" +groups = ["default"] files = [ {file = "executing-2.0.1-py2.py3-none-any.whl", hash = "sha256:eac49ca94516ccc753f9fb5ce82603156e590b27525a8bc32cce8ae302eb61bc"}, {file = "executing-2.0.1.tar.gz", hash = "sha256:35afe2ce3affba8ee97f2d69927fa823b08b472b7b994e36a52a964b93d16147"}, ] +[[package]] +name = "feedparser" +version = "6.0.11" +requires_python = ">=3.6" +summary = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds" +groups = ["default"] +dependencies = [ + "sgmllib3k", +] +files = [ + {file = "feedparser-6.0.11-py3-none-any.whl", hash = "sha256:0be7ee7b395572b19ebeb1d6aafb0028dee11169f1c934e0ed67d54992f4ad45"}, + {file = "feedparser-6.0.11.tar.gz", hash = "sha256:c9d0407b64c6f2a065d0ebb292c2b35c01050cc0dc33757461aaabdc4c4184d5"}, +] + +[[package]] +name = "greenlet" +version = "3.0.3" +requires_python = ">=3.7" +summary = "Lightweight in-process concurrent programming" +groups = ["default"] +marker = "platform_machine != \"armv7l\"" +files = [ + {file = "greenlet-3.0.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9da2bd29ed9e4f15955dd1595ad7bc9320308a3b766ef7f837e23ad4b4aac31a"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d353cadd6083fdb056bb46ed07e4340b0869c305c8ca54ef9da3421acbdf6881"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dca1e2f3ca00b84a396bc1bce13dd21f680f035314d2379c4160c98153b2059b"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3ed7fb269f15dc662787f4119ec300ad0702fa1b19d2135a37c2c4de6fadfd4a"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd4f49ae60e10adbc94b45c0b5e6a179acc1736cf7a90160b404076ee283cf83"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:73a411ef564e0e097dbe7e866bb2dda0f027e072b04da387282b02c308807405"}, + {file = "greenlet-3.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7f362975f2d179f9e26928c5b517524e89dd48530a0202570d55ad6ca5d8a56f"}, + {file = "greenlet-3.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:649dde7de1a5eceb258f9cb00bdf50e978c9db1b996964cd80703614c86495eb"}, + {file = "greenlet-3.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:68834da854554926fbedd38c76e60c4a2e3198c6fbed520b106a8986445caaf9"}, + {file = "greenlet-3.0.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:b1b5667cced97081bf57b8fa1d6bfca67814b0afd38208d52538316e9422fc61"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52f59dd9c96ad2fc0d5724107444f76eb20aaccb675bf825df6435acb7703559"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:afaff6cf5200befd5cec055b07d1c0a5a06c040fe5ad148abcd11ba6ab9b114e"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe754d231288e1e64323cfad462fcee8f0288654c10bdf4f603a39ed923bef33"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2797aa5aedac23af156bbb5a6aa2cd3427ada2972c828244eb7d1b9255846379"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7f009caad047246ed379e1c4dbcb8b020f0a390667ea74d2387be2998f58a22"}, + {file = "greenlet-3.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c5e1536de2aad7bf62e27baf79225d0d64360d4168cf2e6becb91baf1ed074f3"}, + {file = "greenlet-3.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:894393ce10ceac937e56ec00bb71c4c2f8209ad516e96033e4b3b1de270e200d"}, + {file = "greenlet-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:1ea188d4f49089fc6fb283845ab18a2518d279c7cd9da1065d7a84e991748728"}, + {file = "greenlet-3.0.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:70fb482fdf2c707765ab5f0b6655e9cfcf3780d8d87355a063547b41177599be"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4d1ac74f5c0c0524e4a24335350edad7e5f03b9532da7ea4d3c54d527784f2e"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:149e94a2dd82d19838fe4b2259f1b6b9957d5ba1b25640d2380bea9c5df37676"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:15d79dd26056573940fcb8c7413d84118086f2ec1a8acdfa854631084393efcc"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b7db1ebff4ba09aaaeae6aa491daeb226c8150fc20e836ad00041bcb11230"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fcd2469d6a2cf298f198f0487e0a5b1a47a42ca0fa4dfd1b6862c999f018ebbf"}, + {file = "greenlet-3.0.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1f672519db1796ca0d8753f9e78ec02355e862d0998193038c7073045899f305"}, + {file = "greenlet-3.0.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2516a9957eed41dd8f1ec0c604f1cdc86758b587d964668b5b196a9db5bfcde6"}, + {file = "greenlet-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:bba5387a6975598857d86de9eac14210a49d554a77eb8261cc68b7d082f78ce2"}, + {file = "greenlet-3.0.3.tar.gz", hash = "sha256:43374442353259554ce33599da8b692d5aa96f8976d567d4badf263371fbe491"}, +] + [[package]] name = "idna" -version = "3.6" +version = "3.7" requires_python = ">=3.5" summary = "Internationalized Domain Names in Applications (IDNA)" +groups = ["default"] files = [ - {file = "idna-3.6-py3-none-any.whl", hash = "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f"}, - {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"}, + {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, + {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, ] [[package]] name = "ipython" -version = "8.18.1" -requires_python = ">=3.9" +version = "8.23.0" +requires_python = ">=3.10" summary = "IPython: Productive Interactive Computing" +groups = ["default"] dependencies = [ "colorama; sys_platform == \"win32\"", "decorator", "exceptiongroup; python_version < \"3.11\"", "jedi>=0.16", "matplotlib-inline", - "pexpect>4.3; sys_platform != \"win32\"", + "pexpect>4.3; sys_platform != \"win32\" and sys_platform != \"emscripten\"", "prompt-toolkit<3.1.0,>=3.0.41", "pygments>=2.4.0", "stack-data", - "traitlets>=5", - "typing-extensions; python_version < \"3.10\"", + "traitlets>=5.13.0", + "typing-extensions; python_version < \"3.12\"", ] files = [ - {file = "ipython-8.18.1-py3-none-any.whl", hash = "sha256:e8267419d72d81955ec1177f8a29aaa90ac80ad647499201119e2f05e99aa397"}, - {file = "ipython-8.18.1.tar.gz", hash = "sha256:ca6f079bb33457c66e233e4580ebfc4128855b4cf6370dddd73842a9563e8a27"}, + {file = "ipython-8.23.0-py3-none-any.whl", hash = "sha256:07232af52a5ba146dc3372c7bf52a0f890a23edf38d77caef8d53f9cdc2584c1"}, + {file = "ipython-8.23.0.tar.gz", hash = "sha256:7468edaf4f6de3e1b912e57f66c241e6fd3c7099f2ec2136e239e142e800274d"}, ] [[package]] @@ -374,6 +448,7 @@ name = "jedi" version = "0.19.1" requires_python = ">=3.6" summary = "An autocompletion tool for Python that can be used for text editors." +groups = ["default"] dependencies = [ "parso<0.9.0,>=0.8.3", ] @@ -384,15 +459,16 @@ files = [ [[package]] name = "matplotlib-inline" -version = "0.1.6" -requires_python = ">=3.5" +version = "0.1.7" +requires_python = ">=3.8" summary = "Inline Matplotlib backend for Jupyter" +groups = ["default"] dependencies = [ "traitlets", ] files = [ - {file = "matplotlib-inline-0.1.6.tar.gz", hash = "sha256:f887e5f10ba98e8d2b150ddcf4702c1e5f8b3a20005eb0f74bfdbd360ee6f304"}, - {file = "matplotlib_inline-0.1.6-py3-none-any.whl", hash = "sha256:f1f41aab5328aa5aaea9b16d083b128102f8712542f819fe7e6a420ff581b311"}, + {file = "matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca"}, + {file = "matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90"}, ] [[package]] @@ -400,6 +476,7 @@ name = "mutagen" version = "1.47.0" requires_python = ">=3.7" summary = "read and write audio tags for many formats" +groups = ["default"] files = [ {file = "mutagen-1.47.0-py3-none-any.whl", hash = "sha256:edd96f50c5907a9539d8e5bba7245f62c9f520aef333d13392a79a4f70aca719"}, {file = "mutagen-1.47.0.tar.gz", hash = "sha256:719fadef0a978c31b4cf3c956261b3c58b6948b32023078a2117b1de09f0fc99"}, @@ -410,6 +487,7 @@ name = "mypy-extensions" version = "1.0.0" requires_python = ">=3.5" summary = "Type system extensions for programs checked with the mypy type checker." +groups = ["default"] files = [ {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, @@ -417,18 +495,21 @@ files = [ [[package]] name = "parso" -version = "0.8.3" +version = "0.8.4" requires_python = ">=3.6" summary = "A Python Parser" +groups = ["default"] files = [ - {file = "parso-0.8.3-py2.py3-none-any.whl", hash = "sha256:c001d4636cd3aecdaf33cbb40aebb59b094be2a74c556778ef5576c175e19e75"}, - {file = "parso-0.8.3.tar.gz", hash = "sha256:8c07be290bb59f03588915921e29e8a50002acaf2cdc5fa0e0114f91709fafa0"}, + {file = "parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18"}, + {file = "parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d"}, ] [[package]] name = "pexpect" version = "4.9.0" summary = "Pexpect allows easy control of interactive console applications." +groups = ["default"] +marker = "sys_platform != \"win32\" and sys_platform != \"emscripten\"" dependencies = [ "ptyprocess>=0.5", ] @@ -437,11 +518,33 @@ files = [ {file = "pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f"}, ] +[[package]] +name = "playwright" +version = "1.43.0" +requires_python = ">=3.8" +summary = "A high-level API to automate web browsers" +groups = ["default"] +marker = "platform_machine != \"armv7l\"" +dependencies = [ + "greenlet==3.0.3", + "pyee==11.1.0", +] +files = [ + {file = "playwright-1.43.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:b03b12bd4da9c2cfb78dff820deac8b52892fe3c2f89a4d95d6f08c59e41deb9"}, + {file = "playwright-1.43.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e9ec21b141727392f630761c7f4dec46d80c98243614257cc501b64ff636d337"}, + {file = "playwright-1.43.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:e05a8d8fb2040c630429cca07e843c8fa33059717837c8f50c01b7d1fc651ce1"}, + {file = "playwright-1.43.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:50d9a5c07c76456945a2296d63f78fdf6eb11aed3e8d39bb5ccbda760a8d6d41"}, + {file = "playwright-1.43.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87191272c40b4c282cf2c9449ca3acaf705f38ac6e2372270f1617ce16b661b8"}, + {file = "playwright-1.43.0-py3-none-win32.whl", hash = "sha256:bd8b818904b17e2914be23e7bc2a340b203f57fe81678520b10f908485b056ea"}, + {file = "playwright-1.43.0-py3-none-win_amd64.whl", hash = "sha256:9b7bd707eeeaebee47f656b2de90aa9bd85e9ca2c6af7a08efd73896299e4d50"}, +] + [[package]] name = "prompt-toolkit" version = "3.0.43" requires_python = ">=3.7.0" summary = "Library for building powerful interactive command lines in Python" +groups = ["default"] dependencies = [ "wcwidth", ] @@ -454,6 +557,8 @@ files = [ name = "ptyprocess" version = "0.7.0" summary = "Run a subprocess in a pseudo terminal" +groups = ["default"] +marker = "sys_platform != \"win32\" and sys_platform != \"emscripten\"" files = [ {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"}, {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"}, @@ -463,6 +568,7 @@ files = [ name = "pure-eval" version = "0.2.2" summary = "Safely evaluate AST nodes without side effects" +groups = ["default"] files = [ {file = "pure_eval-0.2.2-py3-none-any.whl", hash = "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350"}, {file = "pure_eval-0.2.2.tar.gz", hash = "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3"}, @@ -470,35 +576,39 @@ files = [ [[package]] name = "pyasn1" -version = "0.5.1" -requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +version = "0.6.0" +requires_python = ">=3.8" summary = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" +groups = ["ldap"] files = [ - {file = "pyasn1-0.5.1-py2.py3-none-any.whl", hash = "sha256:4439847c58d40b1d0a573d07e3856e95333f1976294494c325775aeca506eb58"}, - {file = "pyasn1-0.5.1.tar.gz", hash = "sha256:6d391a96e59b23130a5cfa74d6fd7f388dbbe26cc8f1edf39fdddf08d9d6676c"}, + {file = "pyasn1-0.6.0-py2.py3-none-any.whl", hash = "sha256:cca4bb0f2df5504f02f6f8a775b6e416ff9b0b3b16f7ee80b5a3153d9b804473"}, + {file = "pyasn1-0.6.0.tar.gz", hash = "sha256:3a35ab2c4b5ef98e17dfdec8ab074046fbda76e281c5a706ccd82328cfc8f64c"}, ] [[package]] name = "pyasn1-modules" -version = "0.3.0" -requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +version = "0.4.0" +requires_python = ">=3.8" summary = "A collection of ASN.1-based protocols modules" +groups = ["ldap"] dependencies = [ - "pyasn1<0.6.0,>=0.4.6", + "pyasn1<0.7.0,>=0.4.6", ] files = [ - {file = "pyasn1_modules-0.3.0-py2.py3-none-any.whl", hash = "sha256:d3ccd6ed470d9ffbc716be08bd90efbd44d0734bc9303818f7336070984a162d"}, - {file = "pyasn1_modules-0.3.0.tar.gz", hash = "sha256:5bd01446b736eb9d31512a30d46c1ac3395d676c6f3cafa4c03eb54b9925631c"}, + {file = "pyasn1_modules-0.4.0-py3-none-any.whl", hash = "sha256:be04f15b66c206eed667e0bb5ab27e2b1855ea54a842e5037738099e8ca4ae0b"}, + {file = "pyasn1_modules-0.4.0.tar.gz", hash = "sha256:831dbcea1b177b28c9baddf4c6d1013c24c3accd14a1873fffaa6a2e905f17b6"}, ] [[package]] name = "pycparser" -version = "2.21" -requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +version = "2.22" +requires_python = ">=3.8" summary = "C parser in Python" +groups = ["default"] +marker = "implementation_name != \"cpython\"" files = [ - {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, - {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, + {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, + {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, ] [[package]] @@ -506,6 +616,7 @@ name = "pycryptodomex" version = "3.20.0" requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" summary = "Cryptographic library for Python" +groups = ["default"] files = [ {file = "pycryptodomex-3.20.0-cp35-abi3-macosx_10_9_universal2.whl", hash = "sha256:59af01efb011b0e8b686ba7758d59cf4a8263f9ad35911bfe3f416cee4f5c08c"}, {file = "pycryptodomex-3.20.0-cp35-abi3-macosx_10_9_x86_64.whl", hash = "sha256:82ee7696ed8eb9a82c7037f32ba9b7c59e51dda6f105b39f043b6ef293989cb3"}, @@ -530,11 +641,27 @@ files = [ {file = "pycryptodomex-3.20.0.tar.gz", hash = "sha256:7a710b79baddd65b806402e14766c721aee8fb83381769c27920f26476276c1e"}, ] +[[package]] +name = "pyee" +version = "11.1.0" +requires_python = ">=3.8" +summary = "A rough port of Node.js's EventEmitter to Python with a few tricks of its own" +groups = ["default"] +marker = "platform_machine != \"armv7l\"" +dependencies = [ + "typing-extensions", +] +files = [ + {file = "pyee-11.1.0-py3-none-any.whl", hash = "sha256:5d346a7d0f861a4b2e6c47960295bd895f816725b27d656181947346be98d7c1"}, + {file = "pyee-11.1.0.tar.gz", hash = "sha256:b53af98f6990c810edd9b56b87791021a8f54fd13db4edd1142438d44ba2263f"}, +] + [[package]] name = "pygments" version = "2.17.2" requires_python = ">=3.7" summary = "Pygments is a syntax highlighting package written in Python." +groups = ["default"] files = [ {file = "pygments-2.17.2-py3-none-any.whl", hash = "sha256:b27c2826c47d0f3219f29554824c30c5e8945175d888647acd804ddd04af846c"}, {file = "pygments-2.17.2.tar.gz", hash = "sha256:da46cec9fd2de5be3a8a784f434e4c4ab670b4ff54d605c4c2717e9d49c4c367"}, @@ -544,6 +671,7 @@ files = [ name = "python-crontab" version = "3.0.0" summary = "Python Crontab API" +groups = ["default"] dependencies = [ "python-dateutil", ] @@ -554,15 +682,16 @@ files = [ [[package]] name = "python-dateutil" -version = "2.8.2" +version = "2.9.0.post0" requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" summary = "Extensions to the standard Python datetime module" +groups = ["default"] dependencies = [ "six>=1.5", ] files = [ - {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, - {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, ] [[package]] @@ -570,6 +699,7 @@ name = "python-ldap" version = "3.4.4" requires_python = ">=3.6" summary = "Python modules for implementing LDAP clients" +groups = ["ldap"] dependencies = [ "pyasn1-modules>=0.1.5", "pyasn1>=0.3.7", @@ -580,67 +710,68 @@ files = [ [[package]] name = "pytz" -version = "2023.3.post1" +version = "2024.1" summary = "World timezone definitions, modern and historical" +groups = ["default"] files = [ - {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"}, - {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, + {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, + {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, ] [[package]] name = "regex" -version = "2023.12.25" +version = "2024.4.16" requires_python = ">=3.7" summary = "Alternative regular expression module, to replace re." +groups = ["default"] files = [ - {file = "regex-2023.12.25-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0694219a1d54336fd0445ea382d49d36882415c0134ee1e8332afd1529f0baa5"}, - {file = "regex-2023.12.25-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b014333bd0217ad3d54c143de9d4b9a3ca1c5a29a6d0d554952ea071cff0f1f8"}, - {file = "regex-2023.12.25-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d865984b3f71f6d0af64d0d88f5733521698f6c16f445bb09ce746c92c97c586"}, - {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e0eabac536b4cc7f57a5f3d095bfa557860ab912f25965e08fe1545e2ed8b4c"}, - {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c25a8ad70e716f96e13a637802813f65d8a6760ef48672aa3502f4c24ea8b400"}, - {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9b6d73353f777630626f403b0652055ebfe8ff142a44ec2cf18ae470395766e"}, - {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9cc99d6946d750eb75827cb53c4371b8b0fe89c733a94b1573c9dd16ea6c9e4"}, - {file = "regex-2023.12.25-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88d1f7bef20c721359d8675f7d9f8e414ec5003d8f642fdfd8087777ff7f94b5"}, - {file = "regex-2023.12.25-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cb3fe77aec8f1995611f966d0c656fdce398317f850d0e6e7aebdfe61f40e1cd"}, - {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7aa47c2e9ea33a4a2a05f40fcd3ea36d73853a2aae7b4feab6fc85f8bf2c9704"}, - {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:df26481f0c7a3f8739fecb3e81bc9da3fcfae34d6c094563b9d4670b047312e1"}, - {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c40281f7d70baf6e0db0c2f7472b31609f5bc2748fe7275ea65a0b4601d9b392"}, - {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:d94a1db462d5690ebf6ae86d11c5e420042b9898af5dcf278bd97d6bda065423"}, - {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ba1b30765a55acf15dce3f364e4928b80858fa8f979ad41f862358939bdd1f2f"}, - {file = "regex-2023.12.25-cp310-cp310-win32.whl", hash = "sha256:150c39f5b964e4d7dba46a7962a088fbc91f06e606f023ce57bb347a3b2d4630"}, - {file = "regex-2023.12.25-cp310-cp310-win_amd64.whl", hash = "sha256:09da66917262d9481c719599116c7dc0c321ffcec4b1f510c4f8a066f8768105"}, - {file = "regex-2023.12.25-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1b9d811f72210fa9306aeb88385b8f8bcef0dfbf3873410413c00aa94c56c2b6"}, - {file = "regex-2023.12.25-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d902a43085a308cef32c0d3aea962524b725403fd9373dea18110904003bac97"}, - {file = "regex-2023.12.25-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d166eafc19f4718df38887b2bbe1467a4f74a9830e8605089ea7a30dd4da8887"}, - {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7ad32824b7f02bb3c9f80306d405a1d9b7bb89362d68b3c5a9be53836caebdb"}, - {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:636ba0a77de609d6510235b7f0e77ec494d2657108f777e8765efc060094c98c"}, - {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fda75704357805eb953a3ee15a2b240694a9a514548cd49b3c5124b4e2ad01b"}, - {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f72cbae7f6b01591f90814250e636065850c5926751af02bb48da94dfced7baa"}, - {file = "regex-2023.12.25-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:db2a0b1857f18b11e3b0e54ddfefc96af46b0896fb678c85f63fb8c37518b3e7"}, - {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:7502534e55c7c36c0978c91ba6f61703faf7ce733715ca48f499d3dbbd7657e0"}, - {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e8c7e08bb566de4faaf11984af13f6bcf6a08f327b13631d41d62592681d24fe"}, - {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:283fc8eed679758de38fe493b7d7d84a198b558942b03f017b1f94dda8efae80"}, - {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:f44dd4d68697559d007462b0a3a1d9acd61d97072b71f6d1968daef26bc744bd"}, - {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:67d3ccfc590e5e7197750fcb3a2915b416a53e2de847a728cfa60141054123d4"}, - {file = "regex-2023.12.25-cp311-cp311-win32.whl", hash = "sha256:68191f80a9bad283432385961d9efe09d783bcd36ed35a60fb1ff3f1ec2efe87"}, - {file = "regex-2023.12.25-cp311-cp311-win_amd64.whl", hash = "sha256:7d2af3f6b8419661a0c421584cfe8aaec1c0e435ce7e47ee2a97e344b98f794f"}, - {file = "regex-2023.12.25-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f7bc09bc9c29ebead055bcba136a67378f03d66bf359e87d0f7c759d6d4ffa31"}, - {file = "regex-2023.12.25-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e14b73607d6231f3cc4622809c196b540a6a44e903bcfad940779c80dffa7be7"}, - {file = "regex-2023.12.25-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9eda5f7a50141291beda3edd00abc2d4a5b16c29c92daf8d5bd76934150f3edc"}, - {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc6bb9aa69aacf0f6032c307da718f61a40cf970849e471254e0e91c56ffca95"}, - {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:298dc6354d414bc921581be85695d18912bea163a8b23cac9a2562bbcd5088b1"}, - {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2f4e475a80ecbd15896a976aa0b386c5525d0ed34d5c600b6d3ebac0a67c7ddf"}, - {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:531ac6cf22b53e0696f8e1d56ce2396311254eb806111ddd3922c9d937151dae"}, - {file = "regex-2023.12.25-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22f3470f7524b6da61e2020672df2f3063676aff444db1daa283c2ea4ed259d6"}, - {file = "regex-2023.12.25-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:89723d2112697feaa320c9d351e5f5e7b841e83f8b143dba8e2d2b5f04e10923"}, - {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0ecf44ddf9171cd7566ef1768047f6e66975788258b1c6c6ca78098b95cf9a3d"}, - {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:905466ad1702ed4acfd67a902af50b8db1feeb9781436372261808df7a2a7bca"}, - {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:4558410b7a5607a645e9804a3e9dd509af12fb72b9825b13791a37cd417d73a5"}, - {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:7e316026cc1095f2a3e8cc012822c99f413b702eaa2ca5408a513609488cb62f"}, - {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3b1de218d5375cd6ac4b5493e0b9f3df2be331e86520f23382f216c137913d20"}, - {file = "regex-2023.12.25-cp39-cp39-win32.whl", hash = "sha256:11a963f8e25ab5c61348d090bf1b07f1953929c13bd2309a0662e9ff680763c9"}, - {file = "regex-2023.12.25-cp39-cp39-win_amd64.whl", hash = "sha256:e693e233ac92ba83a87024e1d32b5f9ab15ca55ddd916d878146f4e3406b5c91"}, - {file = "regex-2023.12.25.tar.gz", hash = "sha256:29171aa128da69afdf4bde412d5bedc335f2ca8fcfe4489038577d05f16181e5"}, + {file = "regex-2024.4.16-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fb83cc090eac63c006871fd24db5e30a1f282faa46328572661c0a24a2323a08"}, + {file = "regex-2024.4.16-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8c91e1763696c0eb66340c4df98623c2d4e77d0746b8f8f2bee2c6883fd1fe18"}, + {file = "regex-2024.4.16-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:10188fe732dec829c7acca7422cdd1bf57d853c7199d5a9e96bb4d40db239c73"}, + {file = "regex-2024.4.16-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:956b58d692f235cfbf5b4f3abd6d99bf102f161ccfe20d2fd0904f51c72c4c66"}, + {file = "regex-2024.4.16-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a70b51f55fd954d1f194271695821dd62054d949efd6368d8be64edd37f55c86"}, + {file = "regex-2024.4.16-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c02fcd2bf45162280613d2e4a1ca3ac558ff921ae4e308ecb307650d3a6ee51"}, + {file = "regex-2024.4.16-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4ed75ea6892a56896d78f11006161eea52c45a14994794bcfa1654430984b22"}, + {file = "regex-2024.4.16-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd727ad276bb91928879f3aa6396c9a1d34e5e180dce40578421a691eeb77f47"}, + {file = "regex-2024.4.16-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7cbc5d9e8a1781e7be17da67b92580d6ce4dcef5819c1b1b89f49d9678cc278c"}, + {file = "regex-2024.4.16-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:78fddb22b9ef810b63ef341c9fcf6455232d97cfe03938cbc29e2672c436670e"}, + {file = "regex-2024.4.16-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:445ca8d3c5a01309633a0c9db57150312a181146315693273e35d936472df912"}, + {file = "regex-2024.4.16-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:95399831a206211d6bc40224af1c635cb8790ddd5c7493e0bd03b85711076a53"}, + {file = "regex-2024.4.16-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:7731728b6568fc286d86745f27f07266de49603a6fdc4d19c87e8c247be452af"}, + {file = "regex-2024.4.16-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4facc913e10bdba42ec0aee76d029aedda628161a7ce4116b16680a0413f658a"}, + {file = "regex-2024.4.16-cp310-cp310-win32.whl", hash = "sha256:911742856ce98d879acbea33fcc03c1d8dc1106234c5e7d068932c945db209c0"}, + {file = "regex-2024.4.16-cp310-cp310-win_amd64.whl", hash = "sha256:e0a2df336d1135a0b3a67f3bbf78a75f69562c1199ed9935372b82215cddd6e2"}, + {file = "regex-2024.4.16-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1210365faba7c2150451eb78ec5687871c796b0f1fa701bfd2a4a25420482d26"}, + {file = "regex-2024.4.16-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9ab40412f8cd6f615bfedea40c8bf0407d41bf83b96f6fc9ff34976d6b7037fd"}, + {file = "regex-2024.4.16-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fd80d1280d473500d8086d104962a82d77bfbf2b118053824b7be28cd5a79ea5"}, + {file = "regex-2024.4.16-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bb966fdd9217e53abf824f437a5a2d643a38d4fd5fd0ca711b9da683d452969"}, + {file = "regex-2024.4.16-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:20b7a68444f536365af42a75ccecb7ab41a896a04acf58432db9e206f4e525d6"}, + {file = "regex-2024.4.16-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b74586dd0b039c62416034f811d7ee62810174bb70dffcca6439f5236249eb09"}, + {file = "regex-2024.4.16-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c8290b44d8b0af4e77048646c10c6e3aa583c1ca67f3b5ffb6e06cf0c6f0f89"}, + {file = "regex-2024.4.16-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2d80a6749724b37853ece57988b39c4e79d2b5fe2869a86e8aeae3bbeef9eb0"}, + {file = "regex-2024.4.16-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3a1018e97aeb24e4f939afcd88211ace472ba566efc5bdf53fd8fd7f41fa7170"}, + {file = "regex-2024.4.16-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:8d015604ee6204e76569d2f44e5a210728fa917115bef0d102f4107e622b08d5"}, + {file = "regex-2024.4.16-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:3d5ac5234fb5053850d79dd8eb1015cb0d7d9ed951fa37aa9e6249a19aa4f336"}, + {file = "regex-2024.4.16-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:0a38d151e2cdd66d16dab550c22f9521ba79761423b87c01dae0a6e9add79c0d"}, + {file = "regex-2024.4.16-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:159dc4e59a159cb8e4e8f8961eb1fa5d58f93cb1acd1701d8aff38d45e1a84a6"}, + {file = "regex-2024.4.16-cp311-cp311-win32.whl", hash = "sha256:ba2336d6548dee3117520545cfe44dc28a250aa091f8281d28804aa8d707d93d"}, + {file = "regex-2024.4.16-cp311-cp311-win_amd64.whl", hash = "sha256:8f83b6fd3dc3ba94d2b22717f9c8b8512354fd95221ac661784df2769ea9bba9"}, + {file = "regex-2024.4.16-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:80b696e8972b81edf0af2a259e1b2a4a661f818fae22e5fa4fa1a995fb4a40fd"}, + {file = "regex-2024.4.16-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d61ae114d2a2311f61d90c2ef1358518e8f05eafda76eaf9c772a077e0b465ec"}, + {file = "regex-2024.4.16-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8ba6745440b9a27336443b0c285d705ce73adb9ec90e2f2004c64d95ab5a7598"}, + {file = "regex-2024.4.16-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6295004b2dd37b0835ea5c14a33e00e8cfa3c4add4d587b77287825f3418d310"}, + {file = "regex-2024.4.16-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4aba818dcc7263852aabb172ec27b71d2abca02a593b95fa79351b2774eb1d2b"}, + {file = "regex-2024.4.16-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d0800631e565c47520aaa04ae38b96abc5196fe8b4aa9bd864445bd2b5848a7a"}, + {file = "regex-2024.4.16-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08dea89f859c3df48a440dbdcd7b7155bc675f2fa2ec8c521d02dc69e877db70"}, + {file = "regex-2024.4.16-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eeaa0b5328b785abc344acc6241cffde50dc394a0644a968add75fcefe15b9d4"}, + {file = "regex-2024.4.16-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4e819a806420bc010489f4e741b3036071aba209f2e0989d4750b08b12a9343f"}, + {file = "regex-2024.4.16-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:c2d0e7cbb6341e830adcbfa2479fdeebbfbb328f11edd6b5675674e7a1e37730"}, + {file = "regex-2024.4.16-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:91797b98f5e34b6a49f54be33f72e2fb658018ae532be2f79f7c63b4ae225145"}, + {file = "regex-2024.4.16-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:d2da13568eff02b30fd54fccd1e042a70fe920d816616fda4bf54ec705668d81"}, + {file = "regex-2024.4.16-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:370c68dc5570b394cbaadff50e64d705f64debed30573e5c313c360689b6aadc"}, + {file = "regex-2024.4.16-cp312-cp312-win32.whl", hash = "sha256:904c883cf10a975b02ab3478bce652f0f5346a2c28d0a8521d97bb23c323cc8b"}, + {file = "regex-2024.4.16-cp312-cp312-win_amd64.whl", hash = "sha256:785c071c982dce54d44ea0b79cd6dfafddeccdd98cfa5f7b86ef69b381b457d9"}, + {file = "regex-2024.4.16.tar.gz", hash = "sha256:fa454d26f2e87ad661c4f0c5a5fe4cf6aab1e307d1b94f16ffdfcb089ba685c0"}, ] [[package]] @@ -648,6 +779,7 @@ name = "requests" version = "2.31.0" requires_python = ">=3.7" summary = "Python HTTP for Humans." +groups = ["default"] dependencies = [ "certifi>=2017.4.17", "charset-normalizer<4,>=2", @@ -659,11 +791,32 @@ files = [ {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, ] +[[package]] +name = "setuptools" +version = "69.5.1" +requires_python = ">=3.8" +summary = "Easily download, build, install, upgrade, and uninstall Python packages" +groups = ["default"] +files = [ + {file = "setuptools-69.5.1-py3-none-any.whl", hash = "sha256:c636ac361bc47580504644275c9ad802c50415c7522212252c033bd15f301f32"}, + {file = "setuptools-69.5.1.tar.gz", hash = "sha256:6c1fccdac05a97e598fb0ae3bbed5904ccb317337a51139dcd51453611bbb987"}, +] + +[[package]] +name = "sgmllib3k" +version = "1.0.0" +summary = "Py3k port of sgmllib." +groups = ["default"] +files = [ + {file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"}, +] + [[package]] name = "six" version = "1.16.0" requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" summary = "Python 2 and 3 compatibility utilities" +groups = ["default"] files = [ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, @@ -673,6 +826,7 @@ files = [ name = "sonic-client" version = "1.0.0" summary = "python client for sonic search backend" +groups = ["sonic"] files = [ {file = "sonic-client-1.0.0.tar.gz", hash = "sha256:fe324c7354670488ed84847f6a6727d3cb5fb3675cb9b61396dcf5720e5aca66"}, {file = "sonic_client-1.0.0-py3-none-any.whl", hash = "sha256:291bf292861e97a2dd765ff0c8754ea9631383680d31a63ec3da6f5aa5f4beda"}, @@ -680,18 +834,20 @@ files = [ [[package]] name = "sqlparse" -version = "0.4.4" -requires_python = ">=3.5" +version = "0.5.0" +requires_python = ">=3.8" summary = "A non-validating SQL parser." +groups = ["default", "ldap"] files = [ - {file = "sqlparse-0.4.4-py3-none-any.whl", hash = "sha256:5430a4fe2ac7d0f93e66f1efc6e1338a41884b7ddf2a350cedd20ccc4d9d28f3"}, - {file = "sqlparse-0.4.4.tar.gz", hash = "sha256:d446183e84b8349fa3061f0fe7f06ca94ba65b426946ffebe6e3e8295332420c"}, + {file = "sqlparse-0.5.0-py3-none-any.whl", hash = "sha256:c204494cd97479d0e39f28c93d46c0b2d5959c7b9ab904762ea6c7af211c8663"}, + {file = "sqlparse-0.5.0.tar.gz", hash = "sha256:714d0a4932c059d16189f58ef5411ec2287a4360f17cdd0edd2d09d4c5087c93"}, ] [[package]] name = "stack-data" version = "0.6.3" summary = "Extract data from python stack frames and tracebacks for informative displays" +groups = ["default"] dependencies = [ "asttokens>=2.1.0", "executing>=1.2.0", @@ -704,32 +860,37 @@ files = [ [[package]] name = "traitlets" -version = "5.14.1" +version = "5.14.3" requires_python = ">=3.8" summary = "Traitlets Python configuration system" +groups = ["default"] files = [ - {file = "traitlets-5.14.1-py3-none-any.whl", hash = "sha256:2e5a030e6eff91737c643231bfcf04a65b0132078dad75e4936700b213652e74"}, - {file = "traitlets-5.14.1.tar.gz", hash = "sha256:8585105b371a04b8316a43d5ce29c098575c2e477850b62b848b964f1444527e"}, + {file = "traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f"}, + {file = "traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7"}, ] [[package]] name = "typing-extensions" -version = "4.9.0" +version = "4.11.0" requires_python = ">=3.8" summary = "Backported and Experimental Type Hints for Python 3.8+" +groups = ["default", "ldap"] +marker = "python_version < \"3.12\" or platform_machine != \"armv7l\"" files = [ - {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"}, - {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"}, + {file = "typing_extensions-4.11.0-py3-none-any.whl", hash = "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a"}, + {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"}, ] [[package]] name = "tzdata" -version = "2023.4" +version = "2024.1" requires_python = ">=2" summary = "Provider of IANA time zone data" +groups = ["default", "ldap"] +marker = "sys_platform == \"win32\" or platform_system == \"Windows\"" files = [ - {file = "tzdata-2023.4-py2.py3-none-any.whl", hash = "sha256:aa3ace4329eeacda5b7beb7ea08ece826c28d761cda36e747cfbf97996d39bf3"}, - {file = "tzdata-2023.4.tar.gz", hash = "sha256:dd54c94f294765522c77399649b4fefd95522479a664a0cec87f41bebc6148c9"}, + {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"}, + {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, ] [[package]] @@ -737,6 +898,7 @@ name = "tzlocal" version = "5.2" requires_python = ">=3.8" summary = "tzinfo object for the local timezone" +groups = ["default"] dependencies = [ "tzdata; platform_system == \"Windows\"", ] @@ -747,12 +909,13 @@ files = [ [[package]] name = "urllib3" -version = "2.1.0" +version = "2.2.1" requires_python = ">=3.8" summary = "HTTP library with thread-safe connection pooling, file post, and more." +groups = ["default"] files = [ - {file = "urllib3-2.1.0-py3-none-any.whl", hash = "sha256:55901e917a5896a349ff771be919f8bd99aff50b79fe58fec595eb37bbc56bb3"}, - {file = "urllib3-2.1.0.tar.gz", hash = "sha256:df7aa8afb0148fa78488e7899b2c59b5f4ffcfa82e6c54ccb9dd37c1d7b52d54"}, + {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"}, + {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"}, ] [[package]] @@ -760,6 +923,7 @@ name = "w3lib" version = "2.1.2" requires_python = ">=3.7" summary = "Library of web-related functions" +groups = ["default"] files = [ {file = "w3lib-2.1.2-py3-none-any.whl", hash = "sha256:c4432926e739caa8e3f49f5de783f336df563d9490416aebd5d39fb896d264e7"}, {file = "w3lib-2.1.2.tar.gz", hash = "sha256:ed5b74e997eea2abe3c1321f916e344144ee8e9072a6f33463ee8e57f858a4b1"}, @@ -769,6 +933,7 @@ files = [ name = "wcwidth" version = "0.2.13" summary = "Measures the displayed width of unicode strings in a terminal" +groups = ["default"] files = [ {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, @@ -779,6 +944,7 @@ name = "websockets" version = "12.0" requires_python = ">=3.8" summary = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" +groups = ["default"] files = [ {file = "websockets-12.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d554236b2a2006e0ce16315c16eaa0d628dab009c33b63ea03f41c6107958374"}, {file = "websockets-12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2d225bb6886591b1746b17c0573e29804619c8f755b5598d875bb4235ea639be"}, @@ -802,17 +968,17 @@ files = [ {file = "websockets-12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dff6cdf35e31d1315790149fee351f9e52978130cef6c87c4b6c9b3baf78bc53"}, {file = "websockets-12.0-cp311-cp311-win32.whl", hash = "sha256:3e3aa8c468af01d70332a382350ee95f6986db479ce7af14d5e81ec52aa2b402"}, {file = "websockets-12.0-cp311-cp311-win_amd64.whl", hash = "sha256:25eb766c8ad27da0f79420b2af4b85d29914ba0edf69f547cc4f06ca6f1d403b"}, - {file = "websockets-12.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:ab3d732ad50a4fbd04a4490ef08acd0517b6ae6b77eb967251f4c263011a990d"}, - {file = "websockets-12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1d9697f3337a89691e3bd8dc56dea45a6f6d975f92e7d5f773bc715c15dde28"}, - {file = "websockets-12.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1df2fbd2c8a98d38a66f5238484405b8d1d16f929bb7a33ed73e4801222a6f53"}, - {file = "websockets-12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23509452b3bc38e3a057382c2e941d5ac2e01e251acce7adc74011d7d8de434c"}, - {file = "websockets-12.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e5fc14ec6ea568200ea4ef46545073da81900a2b67b3e666f04adf53ad452ec"}, - {file = "websockets-12.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46e71dbbd12850224243f5d2aeec90f0aaa0f2dde5aeeb8fc8df21e04d99eff9"}, - {file = "websockets-12.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b81f90dcc6c85a9b7f29873beb56c94c85d6f0dac2ea8b60d995bd18bf3e2aae"}, - {file = "websockets-12.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:a02413bc474feda2849c59ed2dfb2cddb4cd3d2f03a2fedec51d6e959d9b608b"}, - {file = "websockets-12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bbe6013f9f791944ed31ca08b077e26249309639313fff132bfbf3ba105673b9"}, - {file = "websockets-12.0-cp39-cp39-win32.whl", hash = "sha256:cbe83a6bbdf207ff0541de01e11904827540aa069293696dd528a6640bd6a5f6"}, - {file = "websockets-12.0-cp39-cp39-win_amd64.whl", hash = "sha256:fc4e7fa5414512b481a2483775a8e8be7803a35b30ca805afa4998a84f9fd9e8"}, + {file = "websockets-12.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0e6e2711d5a8e6e482cacb927a49a3d432345dfe7dea8ace7b5790df5932e4df"}, + {file = "websockets-12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:dbcf72a37f0b3316e993e13ecf32f10c0e1259c28ffd0a85cee26e8549595fbc"}, + {file = "websockets-12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12743ab88ab2af1d17dd4acb4645677cb7063ef4db93abffbf164218a5d54c6b"}, + {file = "websockets-12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b645f491f3c48d3f8a00d1fce07445fab7347fec54a3e65f0725d730d5b99cb"}, + {file = "websockets-12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9893d1aa45a7f8b3bc4510f6ccf8db8c3b62120917af15e3de247f0780294b92"}, + {file = "websockets-12.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f38a7b376117ef7aff996e737583172bdf535932c9ca021746573bce40165ed"}, + {file = "websockets-12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f764ba54e33daf20e167915edc443b6f88956f37fb606449b4a5b10ba42235a5"}, + {file = "websockets-12.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:1e4b3f8ea6a9cfa8be8484c9221ec0257508e3a1ec43c36acdefb2a9c3b00aa2"}, + {file = "websockets-12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9fdf06fd06c32205a07e47328ab49c40fc1407cdec801d698a7c41167ea45113"}, + {file = "websockets-12.0-cp312-cp312-win32.whl", hash = "sha256:baa386875b70cbd81798fa9f71be689c1bf484f65fd6fb08d051a0ee4e79924d"}, + {file = "websockets-12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ae0a5da8f35a5be197f328d4727dbcfafa53d1824fac3d96cdd3a642fe09394f"}, {file = "websockets-12.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:248d8e2446e13c1d4326e0a6a4e9629cb13a11195051a73acf414812700badbd"}, {file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f44069528d45a933997a6fef143030d8ca8042f0dfaad753e2906398290e2870"}, {file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c4e37d36f0d19f0a4413d3e18c0d03d0c268ada2061868c1e6f5ab1a6d575077"}, @@ -834,9 +1000,10 @@ files = [ [[package]] name = "yt-dlp" -version = "2023.12.30" +version = "2024.4.9" requires_python = ">=3.8" -summary = "A youtube-dl fork with additional features and patches" +summary = "A feature-rich command-line audio/video downloader" +groups = ["default"] dependencies = [ "brotli; implementation_name == \"cpython\"", "brotlicffi; implementation_name != \"cpython\"", @@ -848,6 +1015,6 @@ dependencies = [ "websockets>=12.0", ] files = [ - {file = "yt-dlp-2023.12.30.tar.gz", hash = "sha256:a11862e57721b0a0f0883dfeb5a4d79ba213a2d4c45e1880e9fd70f8e6570c38"}, - {file = "yt_dlp-2023.12.30-py2.py3-none-any.whl", hash = "sha256:c00d9a71d64472ad441bcaa1ec0c3797d6e60c9f934f270096a96fe51657e7b3"}, + {file = "yt_dlp-2024.4.9-py3-none-any.whl", hash = "sha256:d6ff6798bd114cc48763564fcb2f296464ec1604f731e69b07a8814c89b170a2"}, + {file = "yt_dlp-2024.4.9.tar.gz", hash = "sha256:7ee90572b4d313b582b99c89e4eccf779b57ff54edc331873c6b3fba77faa8b0"}, ] diff --git a/pyproject.toml b/pyproject.toml index 98a1a055..248a11f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,32 +1,48 @@ [project] name = "archivebox" -version = "0.7.3" +version = "0.8.0" +package-dir = "archivebox" +requires-python = ">=3.10,<3.13" +platform = "py3-none-any" description = "Self-hosted internet archiving solution." -authors = [ - {name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}, -] +authors = [{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}] license = {text = "MIT"} readme = "README.md" -package-dir = "archivebox" -requires-python = ">=3.10,<3.12" + +# pdm install +# pdm update --unconstrained dependencies = [ - # pdm update [--unconstrained] - "croniter>=0.3.34", - "dateparser>=1.0.0", - "django-extensions>=3.2.3", + # Base Framework and Language Dependencies + "setuptools>=69.5.1", "django>=4.2.0,<5.0", - "setuptools>=69.0.3", + "django-extensions>=3.2.3", + "mypy-extensions>=1.0.0", + + # Python Helper Libraries + "requests>=2.31.0", + "dateparser>=1.0.0", "feedparser>=6.0.11", - "ipython>5.0.0", - "mypy-extensions>=0.4.3", - "python-crontab>=2.5.1", - "requests>=2.24.0", "w3lib>=1.22.0", - "yt-dlp>=2024.3.10", - # dont add playwright becuase packages without sdists cause trouble on many build systems that refuse to install wheel-only packages - "playwright>=1.39.0; platform_machine != 'armv7l'", + + # Feature-Specific Dependencies + "python-crontab>=2.5.1", # for: archivebox schedule + "croniter>=0.3.34", # for: archivebox schedule + "ipython>5.0.0", # for: archivebox shell + + # Extractor Dependencies + "yt-dlp>=2024.4.9", # for: media + "playwright>=1.43.0; platform_machine != 'armv7l'", # WARNING: playwright doesn't have any sdist, causes trouble on build systems that refuse to install wheel-only packages + + # TODO: add more extractors + # - gallery-dl + # - scihubdl + # - See Github issues for more... ] +homepage = "https://github.com/ArchiveBox/ArchiveBox" +repository = "https://github.com/ArchiveBox/ArchiveBox" +documentation = "https://github.com/ArchiveBox/ArchiveBox/wiki" +keywords = ["internet archiving", "web archiving", "digipres", "warc", "preservation", "backups", "archiving", "web", "bookmarks", "puppeteer", "browser", "download"] classifiers = [ "Development Status :: 4 - Beta", "Environment :: Console", @@ -59,50 +75,64 @@ classifiers = [ "Topic :: Utilities", "Typing :: Typed", ] +# dynamic = ["version"] # TODO: programatticaly fetch version from package.json at build time +# pdm lock --group=':all' +# pdm install -G:all +# pdm update --group=':all' --unconstrained [project.optional-dependencies] -# pdm update [--group=':all'] [--unconstrained] sonic = [ # echo "deb [signed-by=/usr/share/keyrings/valeriansaliou_sonic.gpg] https://packagecloud.io/valeriansaliou/sonic/debian/ bookworm main" > /etc/apt/sources.list.d/valeriansaliou_sonic.list # curl -fsSL https://packagecloud.io/valeriansaliou/sonic/gpgkey | gpg --dearmor -o /usr/share/keyrings/valeriansaliou_sonic.gpg # apt install sonic - "sonic-client>=0.0.5", + "sonic-client>=1.0.0", ] ldap = [ # apt install libldap2-dev libsasl2-dev python3-ldap "python-ldap>=3.4.3", "django-auth-ldap>=4.1.0", ] -# playwright = [ -# platform_machine isnt respected by pdm export -o requirements.txt, this breaks arm/v7 -# "playwright>=1.39.0; platform_machine != 'armv7l'", -# ] +# pdm lock --group=':all' --dev # pdm install -G:all --dev -# pdm update --dev [--unconstrained] +# pdm update --dev --unconstrained [tool.pdm.dev-dependencies] -dev = [ - # building +build = [ + "setuptools>=69.5.1", + "pip", "wheel", "pdm", - "homebrew-pypi-poet>=0.10.0", - # documentation + "homebrew-pypi-poet>=0.10.0", # for: generating archivebox.rb brewfile list of python packages +] +docs = [ "recommonmark", "sphinx", "sphinx-rtd-theme", - # debugging +] +debug = [ "django-debug-toolbar", "djdt_flamegraph", "ipdb", - # testing +] +test = [ + "pdm[pytest]", "pytest", - # linting +] +lint = [ "flake8", "mypy", "django-stubs", ] +[build-system] +requires = ["pdm-backend"] +build-backend = "pdm.backend" + +[project.scripts] +archivebox = "archivebox.cli:main" + + [tool.pdm.scripts] lint = "./bin/lint.sh" test = "./bin/test.sh" @@ -111,12 +141,18 @@ test = "./bin/test.sh" [tool.pytest.ini_options] testpaths = [ "tests" ] -[project.scripts] -archivebox = "archivebox.cli:main" +[tool.mypy] +mypy_path = "archivebox" +namespace_packages = true +explicit_package_bases = true +# follow_imports = "silent" +# ignore_missing_imports = true +# disallow_incomplete_defs = true +# disallow_untyped_defs = true +# disallow_untyped_decorators = true +# exclude = "pdm/(pep582/|models/in_process/.+\\.py)" +plugins = ["mypy_django_plugin.main"] -[build-system] -requires = ["pdm-backend"] -build-backend = "pdm.backend" [project.urls] diff --git a/requirements.txt b/requirements.txt index a7e46acc..ba31ebdd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,54 +1,60 @@ # This file is @generated by PDM. # Please do not edit it manually. -asgiref==3.7.2 +asgiref==3.8.1 asttokens==2.4.1 brotli==1.1.0; implementation_name == "cpython" brotlicffi==1.1.0.0; implementation_name != "cpython" -certifi==2023.11.17 +certifi==2024.2.2 cffi==1.16.0; implementation_name != "cpython" charset-normalizer==3.3.2 colorama==0.4.6; sys_platform == "win32" -croniter==2.0.1 +croniter==2.0.5 dateparser==1.2.0 decorator==5.1.1 -django==3.1.14 -django-auth-ldap==4.1.0 -django-extensions==3.1.5 -exceptiongroup==1.2.0; python_version < "3.11" +django==4.2.11 +django-auth-ldap==4.8.0 +django-extensions==3.2.3 +exceptiongroup==1.2.1; python_version < "3.11" executing==2.0.1 -idna==3.6 -ipython==8.18.1 +feedparser==6.0.11 +greenlet==3.0.3; platform_machine != "armv7l" +idna==3.7 +ipython==8.23.0 jedi==0.19.1 -matplotlib-inline==0.1.6 +matplotlib-inline==0.1.7 mutagen==1.47.0 mypy-extensions==1.0.0 -parso==0.8.3 -pexpect==4.9.0; sys_platform != "win32" +parso==0.8.4 +pexpect==4.9.0; sys_platform != "win32" and sys_platform != "emscripten" +playwright==1.43.0; platform_machine != "armv7l" prompt-toolkit==3.0.43 -ptyprocess==0.7.0; sys_platform != "win32" +ptyprocess==0.7.0; sys_platform != "win32" and sys_platform != "emscripten" pure-eval==0.2.2 -pyasn1==0.5.1 -pyasn1-modules==0.3.0 -pycparser==2.21; implementation_name != "cpython" -pycryptodomex==3.19.1 +pyasn1==0.6.0 +pyasn1-modules==0.4.0 +pycparser==2.22; implementation_name != "cpython" +pycryptodomex==3.20.0 +pyee==11.1.0; platform_machine != "armv7l" pygments==2.17.2 python-crontab==3.0.0 -python-dateutil==2.8.2 +python-dateutil==2.9.0.post0 python-ldap==3.4.4 -pytz==2023.3.post1 -regex==2023.12.25 +pytz==2024.1 +regex==2024.4.16 requests==2.31.0 +setuptools==69.5.1 +sgmllib3k==1.0.0 six==1.16.0 sonic-client==1.0.0 -sqlparse==0.4.4 +sqlparse==0.5.0 stack-data==0.6.3 -traitlets==5.14.1 -typing-extensions==4.9.0; python_version < "3.11" -tzdata==2023.4; platform_system == "Windows" +traitlets==5.14.3 +typing-extensions==4.11.0; python_version < "3.12" or platform_machine != "armv7l" +tzdata==2024.1; sys_platform == "win32" or platform_system == "Windows" tzlocal==5.2 -urllib3==2.1.0 +urllib3==2.2.1 w3lib==2.1.2 -wcwidth==0.2.12 +wcwidth==0.2.13 websockets==12.0 -yt-dlp==2023.12.30 +yt-dlp==2024.4.9 From 63fc317229e2f52076fcf71240c69270c12c0e3e Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 23 Apr 2024 17:45:18 -0700 Subject: [PATCH 165/166] minor pylint fixes in logging_util --- archivebox/logging_util.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py index 933214b9..a7ff95b7 100644 --- a/archivebox/logging_util.py +++ b/archivebox/logging_util.py @@ -494,12 +494,12 @@ def log_removal_started(links: List["Link"], yes: bool, delete: bool): if delete: file_counts = [link.num_outputs for link in links if Path(link.link_dir).exists()] print( - f' {len(links)} Links will be de-listed from the main index, and their archived content folders will be deleted from disk.\n' + f' {len(links)} Links will be de-listed from the main index, and their archived content folders will be deleted from disk.\n' + f' ({len(file_counts)} data folders with {sum(file_counts)} archived files will be deleted!)' ) else: print( - ' Matching links will be de-listed from the main index, but their archived content folders will remain in place on disk.\n' + ' Matching links will be de-listed from the main index, but their archived content folders will remain in place on disk.\n' + ' (Pass --delete if you also want to permanently delete the data folders)' ) @@ -638,17 +638,15 @@ def printable_folder_status(name: str, folder: Dict) -> str: @enforce_types def printable_dependency_version(name: str, dependency: Dict) -> str: - version = None + color, symbol, note, version = 'red', 'X', 'invalid', '?' + if dependency['enabled']: if dependency['is_valid']: - color, symbol, note, version = 'green', '√', 'valid', '' + color, symbol, note = 'green', '√', 'valid' parsed_version_num = re.search(r'[\d\.]+', dependency['version']) if parsed_version_num: version = f'v{parsed_version_num[0]}' - - if not version: - color, symbol, note, version = 'red', 'X', 'invalid', '?' else: color, symbol, note, version = 'lightyellow', '-', 'disabled', '-' From a1a877f47f0da54ccb0f4f40f6598b156f37b746 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 23 Apr 2024 17:48:53 -0700 Subject: [PATCH 166/166] bump pip_dist submodule --- pip_dist | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pip_dist b/pip_dist index 5323fc77..1380be7e 160000 --- a/pip_dist +++ b/pip_dist @@ -1 +1 @@ -Subproject commit 5323fc773d33ef3f219c35c946f3b353b1251d37 +Subproject commit 1380be7e4ef156d85957dfef8c6d154ef9880578