mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-15 07:34:27 -04:00
Merge branch 'dev' into issue1316
This commit is contained in:
commit
ef856e8051
50 changed files with 1469 additions and 1694 deletions
|
@ -17,6 +17,11 @@ venv/
|
|||
.venv-old/
|
||||
.docker-venv/
|
||||
node_modules/
|
||||
chrome/
|
||||
chromeprofile/
|
||||
|
||||
pdm.dev.lock
|
||||
pdm.lock
|
||||
|
||||
docs/
|
||||
build/
|
||||
|
|
5
.github/FUNDING.yml
vendored
5
.github/FUNDING.yml
vendored
|
@ -1,3 +1,2 @@
|
|||
github: pirate
|
||||
patreon: theSquashSH
|
||||
custom: ["https://hcb.hackclub.com/donations/start/archivebox", "https://paypal.me/NicholasSweeting"]
|
||||
github: ["ArchiveBox", "pirate"]
|
||||
custom: ["https://donate.archivebox.io", "https://paypal.me/NicholasSweeting"]
|
||||
|
|
32
.github/workflows/docker.yml
vendored
32
.github/workflows/docker.yml
vendored
|
@ -11,7 +11,7 @@ on:
|
|||
|
||||
env:
|
||||
DOCKER_IMAGE: archivebox-ci
|
||||
|
||||
|
||||
jobs:
|
||||
buildx:
|
||||
runs-on: ubuntu-latest
|
||||
|
@ -24,21 +24,21 @@ jobs:
|
|||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
with:
|
||||
version: latest
|
||||
install: true
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7
|
||||
|
||||
platforms: linux/amd64,linux/arm64
|
||||
|
||||
- name: Builder instance name
|
||||
run: echo ${{ steps.buildx.outputs.name }}
|
||||
|
||||
|
||||
- name: Available platforms
|
||||
run: echo ${{ steps.buildx.outputs.platforms }}
|
||||
|
||||
|
||||
- name: Cache Docker layers
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
|
@ -51,21 +51,27 @@ jobs:
|
|||
uses: docker/login-action@v3
|
||||
if: github.event_name != 'pull_request'
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
|
||||
- name: Collect Docker tags
|
||||
# https://github.com/docker/metadata-action
|
||||
id: docker_meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: archivebox/archivebox,nikisweeting/archivebox
|
||||
tags: |
|
||||
# :stable
|
||||
type=ref,event=branch
|
||||
# :0.7.3
|
||||
type=semver,pattern={{version}}
|
||||
# :0.7
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
# :sha-463ea54
|
||||
type=sha
|
||||
type=raw,value=latest,enable={{is_default_branch}}
|
||||
|
||||
# :latest
|
||||
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'stable') }}
|
||||
|
||||
- name: Build and push
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v5
|
||||
|
@ -77,7 +83,7 @@ jobs:
|
|||
tags: ${{ steps.docker_meta.outputs.tags }}
|
||||
cache-from: type=local,src=/tmp/.buildx-cache
|
||||
cache-to: type=local,dest=/tmp/.buildx-cache-new
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7
|
||||
platforms: linux/amd64,linux/arm64
|
||||
|
||||
- name: Image digest
|
||||
run: echo ${{ steps.docker_build.outputs.digest }}
|
||||
|
@ -88,7 +94,7 @@ jobs:
|
|||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
repository: archivebox/archivebox
|
||||
|
||||
|
||||
# This ugly bit is necessary if you don't want your cache to grow forever
|
||||
# until it hits GitHub's limit of 5GB.
|
||||
# Temp fix
|
||||
|
|
6
.gitignore
vendored
6
.gitignore
vendored
|
@ -13,8 +13,9 @@ venv/
|
|||
node_modules/
|
||||
|
||||
# Ignore dev lockfiles (should always be built fresh)
|
||||
requirements-dev.txt
|
||||
pdm.lock
|
||||
pdm.dev.lock
|
||||
requirements-dev.txt
|
||||
|
||||
# Packaging artifacts
|
||||
.pdm-python
|
||||
|
@ -26,9 +27,6 @@ dist/
|
|||
|
||||
# Data folders
|
||||
data/
|
||||
data1/
|
||||
data2/
|
||||
data3/
|
||||
data*/
|
||||
output/
|
||||
|
||||
|
|
91
Dockerfile
91
Dockerfile
|
@ -37,7 +37,7 @@ LABEL name="archivebox" \
|
|||
com.docker.extension.detailed-description='See here for detailed documentation: https://wiki.archivebox.io' \
|
||||
com.docker.extension.changelog='See here for release notes: https://github.com/ArchiveBox/ArchiveBox/releases' \
|
||||
com.docker.extension.categories='database,utility-tools'
|
||||
|
||||
|
||||
ARG TARGETPLATFORM
|
||||
ARG TARGETOS
|
||||
ARG TARGETARCH
|
||||
|
@ -87,7 +87,9 @@ COPY --chown=root:root --chmod=755 package.json "$CODE_DIR/"
|
|||
RUN grep '"version": ' "${CODE_DIR}/package.json" | awk -F'"' '{print $4}' > /VERSION.txt
|
||||
|
||||
# Force apt to leave downloaded binaries in /var/cache/apt (massively speeds up Docker builds)
|
||||
RUN echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache \
|
||||
RUN echo 'Binary::apt::APT::Keep-Downloaded-Packages "1";' > /etc/apt/apt.conf.d/99keep-cache \
|
||||
&& echo 'APT::Install-Recommends "0";' > /etc/apt/apt.conf.d/99no-intall-recommends \
|
||||
&& echo 'APT::Install-Suggests "0";' > /etc/apt/apt.conf.d/99no-intall-suggests \
|
||||
&& rm -f /etc/apt/apt.conf.d/docker-clean
|
||||
|
||||
# Print debug info about build and save it to disk, for human eyes only, not used by anything else
|
||||
|
@ -120,10 +122,10 @@ RUN echo "[*] Setting up $ARCHIVEBOX_USER user uid=${DEFAULT_PUID}..." \
|
|||
# Install system apt dependencies (adding backports to access more recent apt updates)
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT \
|
||||
echo "[+] Installing APT base system dependencies for $TARGETPLATFORM..." \
|
||||
&& echo 'deb https://deb.debian.org/debian bookworm-backports main contrib non-free' >> /etc/apt/sources.list.d/backports.list \
|
||||
&& echo 'deb https://deb.debian.org/debian bookworm-backports main contrib non-free' > /etc/apt/sources.list.d/backports.list \
|
||||
&& mkdir -p /etc/apt/keyrings \
|
||||
&& apt-get update -qq \
|
||||
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
||||
&& apt-get install -qq -y -t bookworm-backports \
|
||||
# 1. packaging dependencies
|
||||
apt-transport-https ca-certificates apt-utils gnupg2 curl wget \
|
||||
# 2. docker and init system dependencies
|
||||
|
@ -134,27 +136,13 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
|
|||
|
||||
######### Language Environments ####################################
|
||||
|
||||
# Install Node environment
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.npm,sharing=locked,id=npm-$TARGETARCH$TARGETVARIANT \
|
||||
echo "[+] Installing Node $NODE_VERSION environment in $NODE_MODULES..." \
|
||||
&& echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_VERSION}.x nodistro main" >> /etc/apt/sources.list.d/nodejs.list \
|
||||
&& curl -fsSL "https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key" | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
|
||||
&& apt-get update -qq \
|
||||
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
||||
nodejs libatomic1 python3-minimal \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
# Update NPM to latest version
|
||||
&& npm i -g npm --cache /root/.npm \
|
||||
# Save version info
|
||||
&& ( \
|
||||
which node && node --version \
|
||||
&& which npm && npm --version \
|
||||
&& echo -e '\n\n' \
|
||||
) | tee -a /VERSION.txt
|
||||
|
||||
# Install Python environment
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
|
||||
echo "[+] Setting up Python $PYTHON_VERSION runtime..." \
|
||||
# && apt-get update -qq \
|
||||
# && apt-get install -qq -y -t bookworm-backports --no-upgrade \
|
||||
# python${PYTHON_VERSION} python${PYTHON_VERSION}-minimal python3-pip \
|
||||
# && rm -rf /var/lib/apt/lists/* \
|
||||
# tell PDM to allow using global system python site packages
|
||||
# && rm /usr/lib/python3*/EXTERNALLY-MANAGED \
|
||||
# create global virtual environment GLOBAL_VENV to use (better than using pip install --global)
|
||||
|
@ -171,13 +159,34 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
|
|||
&& echo -e '\n\n' \
|
||||
) | tee -a /VERSION.txt
|
||||
|
||||
|
||||
# Install Node environment
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.npm,sharing=locked,id=npm-$TARGETARCH$TARGETVARIANT \
|
||||
echo "[+] Installing Node $NODE_VERSION environment in $NODE_MODULES..." \
|
||||
&& echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_VERSION}.x nodistro main" >> /etc/apt/sources.list.d/nodejs.list \
|
||||
&& curl -fsSL "https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key" | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
|
||||
&& apt-get update -qq \
|
||||
&& apt-get install -qq -y -t bookworm-backports --no-upgrade libatomic1 \
|
||||
&& apt-get install -y -t bookworm-backports --no-upgrade \
|
||||
nodejs \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
# Update NPM to latest version
|
||||
&& npm i -g npm --cache /root/.npm \
|
||||
# Save version info
|
||||
&& ( \
|
||||
which node && node --version \
|
||||
&& which npm && npm --version \
|
||||
&& echo -e '\n\n' \
|
||||
) | tee -a /VERSION.txt
|
||||
|
||||
|
||||
######### Extractor Dependencies ##################################
|
||||
|
||||
# Install apt dependencies
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
|
||||
echo "[+] Installing APT extractor dependencies globally using apt..." \
|
||||
&& apt-get update -qq \
|
||||
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
||||
&& apt-get install -qq -y -t bookworm-backports \
|
||||
curl wget git yt-dlp ffmpeg ripgrep \
|
||||
# Packages we have also needed in the past:
|
||||
# youtube-dl wget2 aria2 python3-pyxattr rtmpdump libfribidi-bin mpv \
|
||||
|
@ -196,25 +205,21 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
|
|||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/ms-playwright,sharing=locked,id=browsers-$TARGETARCH$TARGETVARIANT \
|
||||
echo "[+] Installing Browser binary dependencies to $PLAYWRIGHT_BROWSERS_PATH..." \
|
||||
&& apt-get update -qq \
|
||||
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
||||
&& apt-get install -qq -y -t bookworm-backports \
|
||||
fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-khmeros fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
|
||||
at-spi2-common fonts-liberation fonts-noto-color-emoji fonts-tlwg-loma-otf fonts-unifont libatk-bridge2.0-0 libatk1.0-0 libatspi2.0-0 libavahi-client3 \
|
||||
libavahi-common-data libavahi-common3 libcups2 libfontenc1 libice6 libnspr4 libnss3 libsm6 libunwind8 \
|
||||
libxaw7 libxcomposite1 libxdamage1 libxfont2 \
|
||||
libxkbfile1 libxmu6 libxpm4 libxt6 x11-xkb-utils xfonts-encodings \
|
||||
# xfonts-scalable xfonts-utils xserver-common xvfb \
|
||||
# chrome can run without dbus/upower technically, it complains about missing dbus but should run ok anyway
|
||||
# libxss1 dbus dbus-x11 upower \
|
||||
# && service dbus start \
|
||||
&& if [[ "$TARGETPLATFORM" == *amd64* || "$TARGETPLATFORM" == *arm64* ]]; then \
|
||||
# install Chromium using playwright
|
||||
pip install playwright \
|
||||
&& cp -r /root/.cache/ms-playwright "$PLAYWRIGHT_BROWSERS_PATH" \
|
||||
&& playwright install --with-deps chromium \
|
||||
&& export CHROME_BINARY="$(python -c 'from playwright.sync_api import sync_playwright; print(sync_playwright().start().chromium.executable_path)')"; \
|
||||
else \
|
||||
# fall back to installing Chromium via apt-get on platforms not supported by playwright (e.g. risc, ARMv7, etc.)
|
||||
# apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
||||
# chromium \
|
||||
# && export CHROME_BINARY="$(which chromium)"; \
|
||||
echo 'armv7 no longer supported in versions after v0.7.3' \
|
||||
exit 1; \
|
||||
fi \
|
||||
# install Chromium using playwright
|
||||
&& pip install playwright \
|
||||
&& cp -r /root/.cache/ms-playwright "$PLAYWRIGHT_BROWSERS_PATH" \
|
||||
&& playwright install chromium \
|
||||
&& export CHROME_BINARY="$(python -c 'from playwright.sync_api import sync_playwright; print(sync_playwright().start().chromium.executable_path)')" \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& ln -s "$CHROME_BINARY" /usr/bin/chromium-browser \
|
||||
&& mkdir -p "/home/${ARCHIVEBOX_USER}/.config/chromium/Crash Reports/pending/" \
|
||||
|
@ -247,8 +252,8 @@ COPY --chown=root:root --chmod=755 "./pyproject.toml" "requirements.txt" "$CODE_
|
|||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
|
||||
echo "[+] Installing PIP ArchiveBox dependencies from requirements.txt for ${TARGETPLATFORM}..." \
|
||||
&& apt-get update -qq \
|
||||
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
||||
build-essential \
|
||||
&& apt-get install -qq -y -t bookworm-backports \
|
||||
# build-essential \
|
||||
libssl-dev libldap2-dev libsasl2-dev \
|
||||
python3-ldap python3-msgpack python3-mutagen python3-regex python3-pycryptodome procps \
|
||||
# && ln -s "$GLOBAL_VENV" "$APP_VENV" \
|
||||
|
@ -258,8 +263,8 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
|
|||
# && pdm export -o requirements.txt --without-hashes \
|
||||
# && source $GLOBAL_VENV/bin/activate \
|
||||
&& pip install -r requirements.txt \
|
||||
&& apt-get purge -y \
|
||||
build-essential \
|
||||
# && apt-get purge -y \
|
||||
# build-essential \
|
||||
&& apt-get autoremove -y \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
@ -269,7 +274,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
|
|||
echo "[*] Installing PIP ArchiveBox package from $CODE_DIR..." \
|
||||
# && apt-get update -qq \
|
||||
# install C compiler to build deps on platforms that dont have 32-bit wheels available on pypi
|
||||
# && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
||||
# && apt-get install -qq -y -t bookworm-backports \
|
||||
# build-essential \
|
||||
# INSTALL ARCHIVEBOX python package globally from CODE_DIR, with all optional dependencies
|
||||
&& pip install -e "$CODE_DIR"[sonic,ldap] \
|
||||
|
|
|
@ -407,7 +407,7 @@ See <a href="#%EF%B8%8F-cli-usage">below</a> for usage examples using the CLI, W
|
|||
> *Warning: These are contributed by external volunteers and may lag behind the official `pip` channel.*
|
||||
|
||||
<ul>
|
||||
<li>TrueNAS: <a href="https://truecharts.org/charts/incubator/archivebox/">Official ArchiveBox TrueChart</a> / <a href="https://dev.to/finloop/setting-up-archivebox-on-truenas-scale-1788">Custom App Guide</a></li>
|
||||
<li>TrueNAS: <a href="https://truecharts.org/charts/stable/archivebox/">Official ArchiveBox TrueChart</a> / <a href="https://dev.to/finloop/setting-up-archivebox-on-truenas-scale-1788">Custom App Guide</a></li>
|
||||
<li><a href="https://unraid.net/community/apps?q=archivebox#r">UnRaid</a></li>
|
||||
<li><a href="https://github.com/YunoHost-Apps/archivebox_ynh">Yunohost</a></li>
|
||||
<li><a href="https://www.cloudron.io/store/io.archivebox.cloudronapp.html">Cloudron</a></li>
|
||||
|
@ -445,6 +445,9 @@ Other providers of paid ArchiveBox hosting (not officially endorsed):<br/>
|
|||
<li><a href="https://fly.io/">
|
||||
<img src="https://img.shields.io/badge/Unmanaged_App-Fly.io-%239a2de6.svg?style=flat" height="22px"/>
|
||||
</a> (USD $10-50+/mo, <a href="https://fly.io/docs/hands-on/start/">instructions</a>)</li>
|
||||
<li><a href="https://railway.app/template/2Vvhmy">
|
||||
<img src="https://img.shields.io/badge/Unmanaged_App-Railway-%23A11BE6.svg?style=flat" height="22px"/>
|
||||
</a> (USD $0-5+/mo)</li>
|
||||
<li><a href="https://aws.amazon.com/marketplace/pp/Linnovate-Open-Source-Innovation-Support-For-Archi/B08RVW6MJ2"><img src="https://img.shields.io/badge/Unmanaged_VPS-AWS-%23ee8135.svg?style=flat" height="22px"/></a> (USD $60-200+/mo)</li>
|
||||
<li><a href="https://azuremarketplace.microsoft.com/en-us/marketplace/apps/meanio.archivebox?ocid=gtmrewards_whatsnewblog_archivebox_vol118"><img src="https://img.shields.io/badge/Unmanaged_VPS-Azure-%237cb300.svg?style=flat" height="22px"/></a> (USD $60-200+/mo)</li>
|
||||
<br/>
|
||||
|
|
|
@ -1 +1,7 @@
|
|||
__package__ = 'archivebox'
|
||||
|
||||
|
||||
# monkey patch django timezone to add back utc (it was removed in Django 5.0)
|
||||
import datetime
|
||||
from django.utils import timezone
|
||||
timezone.utc = datetime.timezone.utc
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
__package__ = 'archivebox.api'
|
|
@ -1,3 +1,5 @@
|
|||
__package__ = 'archivebox.api'
|
||||
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
|
|
|
@ -1,184 +0,0 @@
|
|||
# archivebox_api.py
|
||||
from typing import List, Optional
|
||||
from enum import Enum
|
||||
from pydantic import BaseModel
|
||||
from ninja import Router
|
||||
from main import (
|
||||
add,
|
||||
remove,
|
||||
update,
|
||||
list_all,
|
||||
ONLY_NEW,
|
||||
) # Assuming these functions are defined in main.py
|
||||
|
||||
|
||||
# Schemas
|
||||
|
||||
class StatusChoices(str, Enum):
|
||||
indexed = 'indexed'
|
||||
archived = 'archived'
|
||||
unarchived = 'unarchived'
|
||||
present = 'present'
|
||||
valid = 'valid'
|
||||
invalid = 'invalid'
|
||||
duplicate = 'duplicate'
|
||||
orphaned = 'orphaned'
|
||||
corrupted = 'corrupted'
|
||||
unrecognized = 'unrecognized'
|
||||
|
||||
|
||||
class AddURLSchema(BaseModel):
|
||||
urls: List[str]
|
||||
tag: str = ""
|
||||
depth: int = 0
|
||||
update: bool = not ONLY_NEW # Default to the opposite of ONLY_NEW
|
||||
update_all: bool = False
|
||||
index_only: bool = False
|
||||
overwrite: bool = False
|
||||
init: bool = False
|
||||
extractors: str = ""
|
||||
parser: str = "auto"
|
||||
|
||||
|
||||
class RemoveURLSchema(BaseModel):
|
||||
yes: bool = False
|
||||
delete: bool = False
|
||||
before: Optional[float] = None
|
||||
after: Optional[float] = None
|
||||
filter_type: str = "exact"
|
||||
filter_patterns: Optional[List[str]] = None
|
||||
|
||||
|
||||
class UpdateSchema(BaseModel):
|
||||
resume: Optional[float] = None
|
||||
only_new: Optional[bool] = None
|
||||
index_only: Optional[bool] = False
|
||||
overwrite: Optional[bool] = False
|
||||
before: Optional[float] = None
|
||||
after: Optional[float] = None
|
||||
status: Optional[StatusChoices] = None
|
||||
filter_type: Optional[str] = 'exact'
|
||||
filter_patterns: Optional[List[str]] = None
|
||||
extractors: Optional[str] = ""
|
||||
|
||||
|
||||
class ListAllSchema(BaseModel):
|
||||
filter_patterns: Optional[List[str]] = None
|
||||
filter_type: str = 'exact'
|
||||
status: Optional[StatusChoices] = None
|
||||
after: Optional[float] = None
|
||||
before: Optional[float] = None
|
||||
sort: Optional[str] = None
|
||||
csv: Optional[str] = None
|
||||
json: bool = False
|
||||
html: bool = False
|
||||
with_headers: bool = False
|
||||
|
||||
|
||||
# API Router
|
||||
router = Router()
|
||||
|
||||
|
||||
@router.post("/add", response={200: dict})
|
||||
def api_add(request, payload: AddURLSchema):
|
||||
try:
|
||||
result = add(
|
||||
urls=payload.urls,
|
||||
tag=payload.tag,
|
||||
depth=payload.depth,
|
||||
update=payload.update,
|
||||
update_all=payload.update_all,
|
||||
index_only=payload.index_only,
|
||||
overwrite=payload.overwrite,
|
||||
init=payload.init,
|
||||
extractors=payload.extractors,
|
||||
parser=payload.parser,
|
||||
)
|
||||
# Currently the add function returns a list of ALL items in the DB, ideally only return new items
|
||||
return {
|
||||
"status": "success",
|
||||
"message": "URLs added successfully.",
|
||||
"result": str(result),
|
||||
}
|
||||
except Exception as e:
|
||||
# Handle exceptions raised by the add function or during processing
|
||||
return {"status": "error", "message": str(e)}
|
||||
|
||||
|
||||
@router.post("/remove", response={200: dict})
|
||||
def api_remove(request, payload: RemoveURLSchema):
|
||||
try:
|
||||
result = remove(
|
||||
yes=payload.yes,
|
||||
delete=payload.delete,
|
||||
before=payload.before,
|
||||
after=payload.after,
|
||||
filter_type=payload.filter_type,
|
||||
filter_patterns=payload.filter_patterns,
|
||||
)
|
||||
return {
|
||||
"status": "success",
|
||||
"message": "URLs removed successfully.",
|
||||
"result": result,
|
||||
}
|
||||
except Exception as e:
|
||||
# Handle exceptions raised by the remove function or during processing
|
||||
return {"status": "error", "message": str(e)}
|
||||
|
||||
|
||||
@router.post("/update", response={200: dict})
|
||||
def api_update(request, payload: UpdateSchema):
|
||||
try:
|
||||
result = update(
|
||||
resume=payload.resume,
|
||||
only_new=payload.only_new,
|
||||
index_only=payload.index_only,
|
||||
overwrite=payload.overwrite,
|
||||
before=payload.before,
|
||||
after=payload.after,
|
||||
status=payload.status,
|
||||
filter_type=payload.filter_type,
|
||||
filter_patterns=payload.filter_patterns,
|
||||
extractors=payload.extractors,
|
||||
)
|
||||
return {
|
||||
"status": "success",
|
||||
"message": "Archive updated successfully.",
|
||||
"result": result,
|
||||
}
|
||||
except Exception as e:
|
||||
# Handle exceptions raised by the update function or during processing
|
||||
return {"status": "error", "message": str(e)}
|
||||
|
||||
|
||||
@router.post("/list_all", response={200: dict})
|
||||
def api_list_all(request, payload: ListAllSchema):
|
||||
try:
|
||||
result = list_all(
|
||||
filter_patterns=payload.filter_patterns,
|
||||
filter_type=payload.filter_type,
|
||||
status=payload.status,
|
||||
after=payload.after,
|
||||
before=payload.before,
|
||||
sort=payload.sort,
|
||||
csv=payload.csv,
|
||||
json=payload.json,
|
||||
html=payload.html,
|
||||
with_headers=payload.with_headers,
|
||||
)
|
||||
# TODO: This is kind of bad, make the format a choice field
|
||||
if payload.json:
|
||||
return {"status": "success", "format": "json", "data": result}
|
||||
elif payload.html:
|
||||
return {"status": "success", "format": "html", "data": result}
|
||||
elif payload.csv:
|
||||
return {"status": "success", "format": "csv", "data": result}
|
||||
else:
|
||||
return {
|
||||
"status": "success",
|
||||
"message": "List generated successfully.",
|
||||
"data": result,
|
||||
}
|
||||
except Exception as e:
|
||||
# Handle exceptions raised by the list_all function or during processing
|
||||
return {"status": "error", "message": str(e)}
|
|
@ -1,48 +1,107 @@
|
|||
__package__ = 'archivebox.api'
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from django.http import HttpRequest
|
||||
from django.contrib.auth import login
|
||||
from django.contrib.auth import authenticate
|
||||
from ninja import Form, Router, Schema
|
||||
from ninja.security import HttpBearer
|
||||
from django.contrib.auth.models import AbstractBaseUser
|
||||
|
||||
from api.models import Token
|
||||
|
||||
router = Router()
|
||||
from ninja.security import HttpBearer, APIKeyQuery, APIKeyHeader, HttpBasicAuth, django_auth_superuser
|
||||
|
||||
|
||||
class GlobalAuth(HttpBearer):
|
||||
def authenticate(self, request, token):
|
||||
def auth_using_token(token, request: Optional[HttpRequest]=None) -> Optional[AbstractBaseUser]:
|
||||
"""Given an API token string, check if a corresponding non-expired APIToken exists, and return its user"""
|
||||
from api.models import APIToken # lazy import model to avoid loading it at urls.py import time
|
||||
|
||||
user = None
|
||||
|
||||
submitted_empty_form = token in ('string', '', None)
|
||||
if submitted_empty_form:
|
||||
user = request.user # see if user is authed via django session and use that as the default
|
||||
else:
|
||||
try:
|
||||
return Token.objects.get(token=token).user
|
||||
except Token.DoesNotExist:
|
||||
token = APIToken.objects.get(token=token)
|
||||
if token.is_valid():
|
||||
user = token.user
|
||||
except APIToken.DoesNotExist:
|
||||
pass
|
||||
|
||||
if not user:
|
||||
print('[❌] Failed to authenticate API user using API Key:', request)
|
||||
|
||||
class AuthSchema(Schema):
|
||||
email: str
|
||||
password: str
|
||||
return None
|
||||
|
||||
|
||||
@router.post("/authenticate", auth=None) # overriding global auth
|
||||
def get_token(request, auth_data: AuthSchema):
|
||||
user = authenticate(username=auth_data.email, password=auth_data.password)
|
||||
if user:
|
||||
# Assuming a user can have multiple tokens and you want to create a new one every time
|
||||
new_token = Token.objects.create(user=user)
|
||||
return {"token": new_token.token, "expires": new_token.expiry_as_iso8601}
|
||||
def auth_using_password(username, password, request: Optional[HttpRequest]=None) -> Optional[AbstractBaseUser]:
|
||||
"""Given a username and password, check if they are valid and return the corresponding user"""
|
||||
user = None
|
||||
|
||||
submitted_empty_form = (username, password) in (('string', 'string'), ('', ''), (None, None))
|
||||
if submitted_empty_form:
|
||||
user = request.user # see if user is authed via django session and use that as the default
|
||||
else:
|
||||
return {"error": "Invalid credentials"}
|
||||
user = authenticate(
|
||||
username=username,
|
||||
password=password,
|
||||
)
|
||||
|
||||
if not user:
|
||||
print('[❌] Failed to authenticate API user using API Key:', request)
|
||||
|
||||
return user
|
||||
|
||||
|
||||
class TokenValidationSchema(Schema):
|
||||
token: str
|
||||
### Base Auth Types
|
||||
|
||||
class APITokenAuthCheck:
|
||||
"""The base class for authentication methods that use an api.models.APIToken"""
|
||||
def authenticate(self, request: HttpRequest, key: Optional[str]=None) -> Optional[AbstractBaseUser]:
|
||||
user = auth_using_token(
|
||||
token=key,
|
||||
request=request,
|
||||
)
|
||||
if user is not None:
|
||||
login(request, user, backend='django.contrib.auth.backends.ModelBackend')
|
||||
return user
|
||||
|
||||
class UserPassAuthCheck:
|
||||
"""The base class for authentication methods that use a username & password"""
|
||||
def authenticate(self, request: HttpRequest, username: Optional[str]=None, password: Optional[str]=None) -> Optional[AbstractBaseUser]:
|
||||
user = auth_using_password(
|
||||
username=username,
|
||||
password=password,
|
||||
request=request,
|
||||
)
|
||||
if user is not None:
|
||||
login(request, user, backend='django.contrib.auth.backends.ModelBackend')
|
||||
return user
|
||||
|
||||
|
||||
@router.post("/validate_token", auth=None) # No authentication required for this endpoint
|
||||
def validate_token(request, token_data: TokenValidationSchema):
|
||||
try:
|
||||
# Attempt to authenticate using the provided token
|
||||
user = GlobalAuth().authenticate(request, token_data.token)
|
||||
if user:
|
||||
return {"status": "valid"}
|
||||
else:
|
||||
return {"status": "invalid"}
|
||||
except Token.DoesNotExist:
|
||||
return {"status": "invalid"}
|
||||
### Django-Ninja-Provided Auth Methods
|
||||
|
||||
class UsernameAndPasswordAuth(UserPassAuthCheck, HttpBasicAuth):
|
||||
"""Allow authenticating by passing username & password via HTTP Basic Authentication (not recommended)"""
|
||||
pass
|
||||
|
||||
class QueryParamTokenAuth(APITokenAuthCheck, APIKeyQuery):
|
||||
"""Allow authenticating by passing api_key=xyz as a GET/POST query parameter"""
|
||||
param_name = "api_key"
|
||||
|
||||
class HeaderTokenAuth(APITokenAuthCheck, APIKeyHeader):
|
||||
"""Allow authenticating by passing X-API-Key=xyz as a request header"""
|
||||
param_name = "X-API-Key"
|
||||
|
||||
class BearerTokenAuth(APITokenAuthCheck, HttpBearer):
|
||||
"""Allow authenticating by passing Bearer=xyz as a request header"""
|
||||
pass
|
||||
|
||||
|
||||
### Enabled Auth Methods
|
||||
|
||||
API_AUTH_METHODS = [
|
||||
QueryParamTokenAuth(),
|
||||
HeaderTokenAuth(),
|
||||
BearerTokenAuth(),
|
||||
django_auth_superuser,
|
||||
UsernameAndPasswordAuth(),
|
||||
]
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
# Generated by Django 3.1.14 on 2024-04-09 18:52
|
||||
# Generated by Django 4.2.11 on 2024-04-25 04:19
|
||||
|
||||
import api.models
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
import uuid
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
@ -16,13 +17,13 @@ class Migration(migrations.Migration):
|
|||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Token',
|
||||
name='APIToken',
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('token', models.CharField(default=auth.models.hex_uuid, max_length=32, unique=True)),
|
||||
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
|
||||
('token', models.CharField(default=api.models.generate_secret_token, max_length=32, unique=True)),
|
||||
('created', models.DateTimeField(auto_now_add=True)),
|
||||
('expiry', models.DateTimeField(blank=True, null=True)),
|
||||
('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='tokens', to=settings.AUTH_USER_MODEL)),
|
||||
('expires', models.DateTimeField(blank=True, null=True)),
|
||||
('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
|
||||
],
|
||||
),
|
||||
]
|
||||
|
|
17
archivebox/api/migrations/0002_alter_apitoken_options.py
Normal file
17
archivebox/api/migrations/0002_alter_apitoken_options.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
# Generated by Django 5.0.4 on 2024-04-26 05:28
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('api', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name='apitoken',
|
||||
options={'verbose_name': 'API Key', 'verbose_name_plural': 'API Keys'},
|
||||
),
|
||||
]
|
|
@ -1,30 +1,63 @@
|
|||
__package__ = 'archivebox.api'
|
||||
|
||||
import uuid
|
||||
import secrets
|
||||
from datetime import timedelta
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import models
|
||||
from django.utils import timezone
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
|
||||
def hex_uuid():
|
||||
return uuid.uuid4().hex
|
||||
from django_stubs_ext.db.models import TypedModelMeta
|
||||
|
||||
|
||||
class Token(models.Model):
|
||||
user = models.ForeignKey(
|
||||
settings.AUTH_USER_MODEL, on_delete=models.CASCADE, related_name="tokens"
|
||||
)
|
||||
token = models.CharField(max_length=32, default=hex_uuid, unique=True)
|
||||
def generate_secret_token() -> str:
|
||||
# returns cryptographically secure string with len() == 32
|
||||
return secrets.token_hex(16)
|
||||
|
||||
|
||||
class APIToken(models.Model):
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
|
||||
user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
|
||||
token = models.CharField(max_length=32, default=generate_secret_token, unique=True)
|
||||
|
||||
created = models.DateTimeField(auto_now_add=True)
|
||||
expiry = models.DateTimeField(null=True, blank=True)
|
||||
expires = models.DateTimeField(null=True, blank=True)
|
||||
|
||||
class Meta(TypedModelMeta):
|
||||
verbose_name = "API Key"
|
||||
verbose_name_plural = "API Keys"
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.token
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f'<APIToken user={self.user.username} token=************{self.token[-4:]}>'
|
||||
|
||||
def __json__(self) -> dict:
|
||||
return {
|
||||
"TYPE": "APIToken",
|
||||
"id": str(self.id),
|
||||
"user_id": str(self.user.id),
|
||||
"user_username": self.user.username,
|
||||
"token": self.token,
|
||||
"created": self.created.isoformat(),
|
||||
"expires": self.expires_as_iso8601,
|
||||
}
|
||||
|
||||
@property
|
||||
def expiry_as_iso8601(self):
|
||||
def expires_as_iso8601(self):
|
||||
"""Returns the expiry date of the token in ISO 8601 format or a date 100 years in the future if none."""
|
||||
expiry_date = (
|
||||
self.expiry if self.expiry else timezone.now() + timedelta(days=365 * 100)
|
||||
)
|
||||
expiry_date = self.expires or (timezone.now() + timedelta(days=365 * 100))
|
||||
|
||||
return expiry_date.isoformat()
|
||||
|
||||
def __str__(self):
|
||||
return self.token
|
||||
def is_valid(self, for_date=None):
|
||||
for_date = for_date or timezone.now()
|
||||
|
||||
if self.expires and self.expires < for_date:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
|
|
@ -1,27 +1,30 @@
|
|||
__package__ = 'archivebox.api'
|
||||
|
||||
from django.test import TestCase
|
||||
from ninja.testing import TestClient
|
||||
from archivebox.api.archive import router as archive_router
|
||||
|
||||
class ArchiveBoxAPITestCase(TestCase):
|
||||
from .routes_cli import router
|
||||
|
||||
class ArchiveBoxCLIAPITestCase(TestCase):
|
||||
def setUp(self):
|
||||
self.client = TestClient(archive_router)
|
||||
self.client = TestClient(router)
|
||||
|
||||
def test_add_endpoint(self):
|
||||
response = self.client.post("/add", json={"urls": ["http://example.com"], "tag": "test"})
|
||||
response = self.client.post("/add", json={"urls": ["http://example.com"], "tag": "testTag1,testTag2"})
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response.json()["status"], "success")
|
||||
self.assertTrue(response.json()["success"])
|
||||
|
||||
def test_remove_endpoint(self):
|
||||
response = self.client.post("/remove", json={"filter_patterns": ["http://example.com"]})
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response.json()["status"], "success")
|
||||
self.assertTrue(response.json()["success"])
|
||||
|
||||
def test_update_endpoint(self):
|
||||
response = self.client.post("/update", json={})
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response.json()["status"], "success")
|
||||
self.assertTrue(response.json()["success"])
|
||||
|
||||
def test_list_all_endpoint(self):
|
||||
response = self.client.post("/list_all", json={})
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertTrue("success" in response.json()["status"])
|
||||
self.assertTrue(response.json()["success"])
|
||||
|
|
17
archivebox/api/urls.py
Normal file
17
archivebox/api/urls.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
__package__ = 'archivebox.api'
|
||||
|
||||
from django.urls import path
|
||||
from django.views.generic.base import RedirectView
|
||||
|
||||
from .v1_api import urls as v1_api_urls
|
||||
|
||||
urlpatterns = [
|
||||
path("", RedirectView.as_view(url='/api/v1')),
|
||||
|
||||
path("v1/", v1_api_urls),
|
||||
path("v1", RedirectView.as_view(url='/api/v1/docs')),
|
||||
|
||||
# ... v2 can be added here ...
|
||||
# path("v2/", v2_api_urls),
|
||||
# path("v2", RedirectView.as_view(url='/api/v2/docs')),
|
||||
]
|
111
archivebox/api/v1_api.py
Normal file
111
archivebox/api/v1_api.py
Normal file
|
@ -0,0 +1,111 @@
|
|||
__package__ = 'archivebox.api'
|
||||
|
||||
|
||||
from io import StringIO
|
||||
from traceback import format_exception
|
||||
from contextlib import redirect_stdout, redirect_stderr
|
||||
|
||||
from django.http import HttpRequest, HttpResponse
|
||||
from django.core.exceptions import ObjectDoesNotExist, EmptyResultSet, PermissionDenied
|
||||
|
||||
from ninja import NinjaAPI, Swagger
|
||||
|
||||
# TODO: explore adding https://eadwincode.github.io/django-ninja-extra/
|
||||
|
||||
from api.auth import API_AUTH_METHODS
|
||||
from ..config import VERSION, COMMIT_HASH
|
||||
|
||||
|
||||
COMMIT_HASH = COMMIT_HASH or 'unknown'
|
||||
|
||||
html_description=f'''
|
||||
<h3>Welcome to your ArchiveBox server's REST API <code>[v1 ALPHA]</code> homepage!</h3>
|
||||
<br/>
|
||||
<i><b>WARNING: This API is still in an early development stage and may change!</b></i>
|
||||
<br/>
|
||||
<ul>
|
||||
<li>⬅️ Manage your server: <a href="/admin/api/"><b>Setup API Keys</b></a>, <a href="/admin/">Go to your Server Admin UI</a>, <a href="/">Go to your Snapshots list</a>
|
||||
<li>💬 Ask questions and get help here: <a href="https://zulip.archivebox.io">ArchiveBox Chat Forum</a></li>
|
||||
<li>🐞 Report API bugs here: <a href="https://github.com/ArchiveBox/ArchiveBox/issues">Github Issues</a></li>
|
||||
<li>📚 ArchiveBox Documentation: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki">Github Wiki</a></li>
|
||||
<li>📜 See the API source code: <a href="https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/api"><code>archivebox/api/</code></a></li>
|
||||
</ul>
|
||||
<small>Served by ArchiveBox v{VERSION} (<a href="https://github.com/ArchiveBox/ArchiveBox/commit/{COMMIT_HASH}"><code>{COMMIT_HASH[:8]}</code></a>), API powered by <a href="https://django-ninja.dev/"><code>django-ninja</code></a>.</small>
|
||||
'''
|
||||
|
||||
|
||||
def register_urls(api: NinjaAPI) -> NinjaAPI:
|
||||
api.add_router('/auth/', 'api.v1_auth.router')
|
||||
api.add_router('/core/', 'api.v1_core.router')
|
||||
api.add_router('/cli/', 'api.v1_cli.router')
|
||||
return api
|
||||
|
||||
|
||||
class NinjaAPIWithIOCapture(NinjaAPI):
|
||||
def create_temporal_response(self, request: HttpRequest) -> HttpResponse:
|
||||
stdout, stderr = StringIO(), StringIO()
|
||||
|
||||
with redirect_stderr(stderr):
|
||||
with redirect_stdout(stdout):
|
||||
request.stdout = stdout
|
||||
request.stderr = stderr
|
||||
|
||||
response = super().create_temporal_response(request)
|
||||
|
||||
print('RESPONDING NOW', response)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
api = NinjaAPIWithIOCapture(
|
||||
title='ArchiveBox API',
|
||||
description=html_description,
|
||||
version='1.0.0',
|
||||
csrf=False,
|
||||
auth=API_AUTH_METHODS,
|
||||
urls_namespace="api",
|
||||
docs=Swagger(settings={"persistAuthorization": True}),
|
||||
# docs_decorator=login_required,
|
||||
# renderer=ORJSONRenderer(),
|
||||
)
|
||||
api = register_urls(api)
|
||||
urls = api.urls
|
||||
|
||||
|
||||
@api.exception_handler(Exception)
|
||||
def generic_exception_handler(request, err):
|
||||
status = 503
|
||||
if isinstance(err, (ObjectDoesNotExist, EmptyResultSet, PermissionDenied)):
|
||||
status = 404
|
||||
|
||||
print(''.join(format_exception(err)))
|
||||
|
||||
return api.create_response(
|
||||
request,
|
||||
{
|
||||
"succeeded": False,
|
||||
"message": f'{err.__class__.__name__}: {err}',
|
||||
"errors": [
|
||||
''.join(format_exception(err)),
|
||||
# or send simpler parent-only traceback:
|
||||
# *([str(err.__context__)] if getattr(err, '__context__', None) else []),
|
||||
],
|
||||
},
|
||||
status=status,
|
||||
)
|
||||
|
||||
|
||||
|
||||
# import orjson
|
||||
# from ninja.renderers import BaseRenderer
|
||||
# class ORJSONRenderer(BaseRenderer):
|
||||
# media_type = "application/json"
|
||||
# def render(self, request, data, *, response_status):
|
||||
# return {
|
||||
# "success": True,
|
||||
# "errors": [],
|
||||
# "result": data,
|
||||
# "stdout": ansi_to_html(stdout.getvalue().strip()),
|
||||
# "stderr": ansi_to_html(stderr.getvalue().strip()),
|
||||
# }
|
||||
# return orjson.dumps(data)
|
52
archivebox/api/v1_auth.py
Normal file
52
archivebox/api/v1_auth.py
Normal file
|
@ -0,0 +1,52 @@
|
|||
__package__ = 'archivebox.api'
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from ninja import Router, Schema
|
||||
|
||||
from api.models import APIToken
|
||||
from api.auth import auth_using_token, auth_using_password
|
||||
|
||||
|
||||
router = Router(tags=['Authentication'])
|
||||
|
||||
|
||||
class PasswordAuthSchema(Schema):
|
||||
"""Schema for a /get_api_token request"""
|
||||
username: Optional[str] = None
|
||||
password: Optional[str] = None
|
||||
|
||||
|
||||
@router.post("/get_api_token", auth=None, summary='Generate an API token for a given username & password (or currently logged-in user)') # auth=None because they are not authed yet
|
||||
def get_api_token(request, auth_data: PasswordAuthSchema):
|
||||
user = auth_using_password(
|
||||
username=auth_data.username,
|
||||
password=auth_data.password,
|
||||
request=request,
|
||||
)
|
||||
|
||||
if user:
|
||||
# TODO: support multiple tokens in the future, for now we just have one per user
|
||||
api_token, created = APIToken.objects.get_or_create(user=user)
|
||||
|
||||
return api_token.__json__()
|
||||
|
||||
return {"success": False, "errors": ["Invalid credentials"]}
|
||||
|
||||
|
||||
|
||||
class TokenAuthSchema(Schema):
|
||||
"""Schema for a /check_api_token request"""
|
||||
token: str
|
||||
|
||||
|
||||
@router.post("/check_api_token", auth=None, summary='Validate an API token to make sure its valid and non-expired') # auth=None because they are not authed yet
|
||||
def check_api_token(request, token_data: TokenAuthSchema):
|
||||
user = auth_using_token(
|
||||
token=token_data.token,
|
||||
request=request,
|
||||
)
|
||||
if user:
|
||||
return {"success": True, "user_id": str(user.id)}
|
||||
|
||||
return {"success": False, "user_id": None}
|
234
archivebox/api/v1_cli.py
Normal file
234
archivebox/api/v1_cli.py
Normal file
|
@ -0,0 +1,234 @@
|
|||
__package__ = 'archivebox.api'
|
||||
|
||||
from typing import List, Dict, Any, Optional
|
||||
from enum import Enum
|
||||
|
||||
from ninja import Router, Schema
|
||||
|
||||
from ..main import (
|
||||
add,
|
||||
remove,
|
||||
update,
|
||||
list_all,
|
||||
schedule,
|
||||
)
|
||||
from ..util import ansi_to_html
|
||||
from ..config import ONLY_NEW
|
||||
|
||||
|
||||
# router for API that exposes archivebox cli subcommands as REST endpoints
|
||||
router = Router(tags=['ArchiveBox CLI Sub-Commands'])
|
||||
|
||||
|
||||
# Schemas
|
||||
|
||||
JSONType = List[Any] | Dict[str, Any] | bool | int | str | None
|
||||
|
||||
class CLICommandResponseSchema(Schema):
|
||||
success: bool
|
||||
errors: List[str]
|
||||
result: JSONType
|
||||
stdout: str
|
||||
stderr: str
|
||||
|
||||
class FilterTypeChoices(str, Enum):
|
||||
exact = 'exact'
|
||||
substring = 'substring'
|
||||
regex = 'regex'
|
||||
domain = 'domain'
|
||||
tag = 'tag'
|
||||
timestamp = 'timestamp'
|
||||
|
||||
class StatusChoices(str, Enum):
|
||||
indexed = 'indexed'
|
||||
archived = 'archived'
|
||||
unarchived = 'unarchived'
|
||||
present = 'present'
|
||||
valid = 'valid'
|
||||
invalid = 'invalid'
|
||||
duplicate = 'duplicate'
|
||||
orphaned = 'orphaned'
|
||||
corrupted = 'corrupted'
|
||||
unrecognized = 'unrecognized'
|
||||
|
||||
|
||||
class AddCommandSchema(Schema):
|
||||
urls: List[str]
|
||||
tag: str = ""
|
||||
depth: int = 0
|
||||
update: bool = not ONLY_NEW # Default to the opposite of ONLY_NEW
|
||||
update_all: bool = False
|
||||
index_only: bool = False
|
||||
overwrite: bool = False
|
||||
init: bool = False
|
||||
extractors: str = ""
|
||||
parser: str = "auto"
|
||||
|
||||
class UpdateCommandSchema(Schema):
|
||||
resume: Optional[float] = 0
|
||||
only_new: bool = ONLY_NEW
|
||||
index_only: bool = False
|
||||
overwrite: bool = False
|
||||
after: Optional[float] = 0
|
||||
before: Optional[float] = 999999999999999
|
||||
status: Optional[StatusChoices] = StatusChoices.unarchived
|
||||
filter_type: Optional[str] = FilterTypeChoices.substring
|
||||
filter_patterns: Optional[List[str]] = ['https://example.com']
|
||||
extractors: Optional[str] = ""
|
||||
|
||||
class ScheduleCommandSchema(Schema):
|
||||
import_path: Optional[str] = None
|
||||
add: bool = False
|
||||
every: Optional[str] = None
|
||||
tag: str = ''
|
||||
depth: int = 0
|
||||
overwrite: bool = False
|
||||
update: bool = not ONLY_NEW
|
||||
clear: bool = False
|
||||
|
||||
class ListCommandSchema(Schema):
|
||||
filter_patterns: Optional[List[str]] = ['https://example.com']
|
||||
filter_type: str = FilterTypeChoices.substring
|
||||
status: Optional[StatusChoices] = StatusChoices.indexed
|
||||
after: Optional[float] = 0
|
||||
before: Optional[float] = 999999999999999
|
||||
sort: str = 'added'
|
||||
as_json: bool = True
|
||||
as_html: bool = False
|
||||
as_csv: str | bool = 'timestamp,url'
|
||||
with_headers: bool = False
|
||||
|
||||
class RemoveCommandSchema(Schema):
|
||||
delete: bool = True
|
||||
after: Optional[float] = 0
|
||||
before: Optional[float] = 999999999999999
|
||||
filter_type: str = FilterTypeChoices.exact
|
||||
filter_patterns: Optional[List[str]] = ['https://example.com']
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@router.post("/add", response=CLICommandResponseSchema, summary='archivebox add [args] [urls]')
|
||||
def cli_add(request, args: AddCommandSchema):
|
||||
result = add(
|
||||
urls=args.urls,
|
||||
tag=args.tag,
|
||||
depth=args.depth,
|
||||
update=args.update,
|
||||
update_all=args.update_all,
|
||||
index_only=args.index_only,
|
||||
overwrite=args.overwrite,
|
||||
init=args.init,
|
||||
extractors=args.extractors,
|
||||
parser=args.parser,
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"errors": [],
|
||||
"result": result,
|
||||
"stdout": ansi_to_html(request.stdout.getvalue().strip()),
|
||||
"stderr": ansi_to_html(request.stderr.getvalue().strip()),
|
||||
}
|
||||
|
||||
|
||||
@router.post("/update", response=CLICommandResponseSchema, summary='archivebox update [args] [filter_patterns]')
|
||||
def cli_update(request, args: UpdateCommandSchema):
|
||||
result = update(
|
||||
resume=args.resume,
|
||||
only_new=args.only_new,
|
||||
index_only=args.index_only,
|
||||
overwrite=args.overwrite,
|
||||
before=args.before,
|
||||
after=args.after,
|
||||
status=args.status,
|
||||
filter_type=args.filter_type,
|
||||
filter_patterns=args.filter_patterns,
|
||||
extractors=args.extractors,
|
||||
)
|
||||
return {
|
||||
"success": True,
|
||||
"errors": [],
|
||||
"result": result,
|
||||
"stdout": ansi_to_html(request.stdout.getvalue().strip()),
|
||||
"stderr": ansi_to_html(request.stderr.getvalue().strip()),
|
||||
}
|
||||
|
||||
|
||||
@router.post("/schedule", response=CLICommandResponseSchema, summary='archivebox schedule [args] [import_path]')
|
||||
def cli_schedule(request, args: ScheduleCommandSchema):
|
||||
result = schedule(
|
||||
import_path=args.import_path,
|
||||
add=args.add,
|
||||
show=args.show,
|
||||
clear=args.clear,
|
||||
every=args.every,
|
||||
tag=args.tag,
|
||||
depth=args.depth,
|
||||
overwrite=args.overwrite,
|
||||
update=args.update,
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"errors": [],
|
||||
"result": result,
|
||||
"stdout": ansi_to_html(request.stdout.getvalue().strip()),
|
||||
"stderr": ansi_to_html(request.stderr.getvalue().strip()),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@router.post("/list", response=CLICommandResponseSchema, summary='archivebox list [args] [filter_patterns]')
|
||||
def cli_list(request, args: ListCommandSchema):
|
||||
result = list_all(
|
||||
filter_patterns=args.filter_patterns,
|
||||
filter_type=args.filter_type,
|
||||
status=args.status,
|
||||
after=args.after,
|
||||
before=args.before,
|
||||
sort=args.sort,
|
||||
csv=args.as_csv,
|
||||
json=args.as_json,
|
||||
html=args.as_html,
|
||||
with_headers=args.with_headers,
|
||||
)
|
||||
|
||||
result_format = 'txt'
|
||||
if args.as_json:
|
||||
result_format = "json"
|
||||
elif args.as_html:
|
||||
result_format = "html"
|
||||
elif args.as_csv:
|
||||
result_format = "csv"
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"errors": [],
|
||||
"result": result,
|
||||
"result_format": result_format,
|
||||
"stdout": ansi_to_html(request.stdout.getvalue().strip()),
|
||||
"stderr": ansi_to_html(request.stderr.getvalue().strip()),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@router.post("/remove", response=CLICommandResponseSchema, summary='archivebox remove [args] [filter_patterns]')
|
||||
def cli_remove(request, args: RemoveCommandSchema):
|
||||
result = remove(
|
||||
yes=True, # no way to interactively ask for confirmation via API, so we force yes
|
||||
delete=args.delete,
|
||||
before=args.before,
|
||||
after=args.after,
|
||||
filter_type=args.filter_type,
|
||||
filter_patterns=args.filter_patterns,
|
||||
)
|
||||
return {
|
||||
"success": True,
|
||||
"errors": [],
|
||||
"result": result,
|
||||
"stdout": ansi_to_html(request.stdout.getvalue().strip()),
|
||||
"stderr": ansi_to_html(request.stderr.getvalue().strip()),
|
||||
}
|
||||
|
210
archivebox/api/v1_core.py
Normal file
210
archivebox/api/v1_core.py
Normal file
|
@ -0,0 +1,210 @@
|
|||
__package__ = 'archivebox.api'
|
||||
|
||||
from uuid import UUID
|
||||
from typing import List, Optional
|
||||
from datetime import datetime
|
||||
|
||||
from django.shortcuts import get_object_or_404
|
||||
|
||||
from ninja import Router, Schema, FilterSchema, Field, Query
|
||||
from ninja.pagination import paginate
|
||||
|
||||
from core.models import Snapshot, ArchiveResult, Tag
|
||||
|
||||
|
||||
router = Router(tags=['Core Models'])
|
||||
|
||||
|
||||
|
||||
|
||||
### ArchiveResult #########################################################################
|
||||
|
||||
class ArchiveResultSchema(Schema):
|
||||
id: UUID
|
||||
|
||||
snapshot_id: UUID
|
||||
snapshot_url: str
|
||||
snapshot_tags: str
|
||||
|
||||
extractor: str
|
||||
cmd: List[str]
|
||||
pwd: str
|
||||
cmd_version: str
|
||||
output: str
|
||||
status: str
|
||||
|
||||
created: datetime
|
||||
|
||||
@staticmethod
|
||||
def resolve_id(obj):
|
||||
return obj.uuid
|
||||
|
||||
@staticmethod
|
||||
def resolve_created(obj):
|
||||
return obj.start_ts
|
||||
|
||||
@staticmethod
|
||||
def resolve_snapshot_url(obj):
|
||||
return obj.snapshot.url
|
||||
|
||||
@staticmethod
|
||||
def resolve_snapshot_tags(obj):
|
||||
return obj.snapshot.tags_str()
|
||||
|
||||
|
||||
class ArchiveResultFilterSchema(FilterSchema):
|
||||
id: Optional[UUID] = Field(None, q='uuid')
|
||||
|
||||
search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains'])
|
||||
snapshot_id: Optional[UUID] = Field(None, q='snapshot_id')
|
||||
snapshot_url: Optional[str] = Field(None, q='snapshot__url')
|
||||
snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name')
|
||||
|
||||
status: Optional[str] = Field(None, q='status')
|
||||
output: Optional[str] = Field(None, q='output__icontains')
|
||||
extractor: Optional[str] = Field(None, q='extractor__icontains')
|
||||
cmd: Optional[str] = Field(None, q='cmd__0__icontains')
|
||||
pwd: Optional[str] = Field(None, q='pwd__icontains')
|
||||
cmd_version: Optional[str] = Field(None, q='cmd_version')
|
||||
|
||||
created: Optional[datetime] = Field(None, q='updated')
|
||||
created__gte: Optional[datetime] = Field(None, q='updated__gte')
|
||||
created__lt: Optional[datetime] = Field(None, q='updated__lt')
|
||||
|
||||
|
||||
@router.get("/archiveresults", response=List[ArchiveResultSchema])
|
||||
@paginate
|
||||
def list_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)):
|
||||
qs = ArchiveResult.objects.all()
|
||||
results = filters.filter(qs)
|
||||
return results
|
||||
|
||||
|
||||
@router.get("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema)
|
||||
def get_archiveresult(request, archiveresult_id: str):
|
||||
archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
|
||||
return archiveresult
|
||||
|
||||
|
||||
# @router.post("/archiveresult", response=ArchiveResultSchema)
|
||||
# def create_archiveresult(request, payload: ArchiveResultSchema):
|
||||
# archiveresult = ArchiveResult.objects.create(**payload.dict())
|
||||
# return archiveresult
|
||||
#
|
||||
# @router.put("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema)
|
||||
# def update_archiveresult(request, archiveresult_id: str, payload: ArchiveResultSchema):
|
||||
# archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
|
||||
#
|
||||
# for attr, value in payload.dict().items():
|
||||
# setattr(archiveresult, attr, value)
|
||||
# archiveresult.save()
|
||||
#
|
||||
# return archiveresult
|
||||
#
|
||||
# @router.delete("/archiveresult/{archiveresult_id}")
|
||||
# def delete_archiveresult(request, archiveresult_id: str):
|
||||
# archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
|
||||
# archiveresult.delete()
|
||||
# return {"success": True}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
### Snapshot #########################################################################
|
||||
|
||||
|
||||
class SnapshotSchema(Schema):
|
||||
id: UUID
|
||||
|
||||
url: str
|
||||
tags: str
|
||||
title: Optional[str]
|
||||
timestamp: str
|
||||
bookmarked: datetime
|
||||
added: datetime
|
||||
updated: datetime
|
||||
archive_path: str
|
||||
|
||||
archiveresults: List[ArchiveResultSchema]
|
||||
|
||||
# @staticmethod
|
||||
# def resolve_id(obj):
|
||||
# return str(obj.id)
|
||||
|
||||
@staticmethod
|
||||
def resolve_tags(obj):
|
||||
return obj.tags_str()
|
||||
|
||||
@staticmethod
|
||||
def resolve_archiveresults(obj, context):
|
||||
if context['request'].with_archiveresults:
|
||||
return obj.archiveresult_set.all().distinct()
|
||||
return ArchiveResult.objects.none()
|
||||
|
||||
|
||||
class SnapshotFilterSchema(FilterSchema):
|
||||
id: Optional[UUID] = Field(None, q='id')
|
||||
|
||||
search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains'])
|
||||
url: Optional[str] = Field(None, q='url')
|
||||
tag: Optional[str] = Field(None, q='tags__name')
|
||||
title: Optional[str] = Field(None, q='title__icontains')
|
||||
|
||||
timestamp: Optional[str] = Field(None, q='timestamp__startswith')
|
||||
|
||||
added: Optional[datetime] = Field(None, q='added')
|
||||
added__gte: Optional[datetime] = Field(None, q='added__gte')
|
||||
added__lt: Optional[datetime] = Field(None, q='added__lt')
|
||||
|
||||
|
||||
@router.get("/snapshots", response=List[SnapshotSchema])
|
||||
@paginate
|
||||
def list_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool=True):
|
||||
request.with_archiveresults = with_archiveresults
|
||||
|
||||
qs = Snapshot.objects.all()
|
||||
results = filters.filter(qs)
|
||||
return results
|
||||
|
||||
@router.get("/snapshot/{snapshot_id}", response=SnapshotSchema)
|
||||
def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
|
||||
request.with_archiveresults = with_archiveresults
|
||||
snapshot = get_object_or_404(Snapshot, id=snapshot_id)
|
||||
return snapshot
|
||||
|
||||
|
||||
# @router.post("/snapshot", response=SnapshotSchema)
|
||||
# def create_snapshot(request, payload: SnapshotSchema):
|
||||
# snapshot = Snapshot.objects.create(**payload.dict())
|
||||
# return snapshot
|
||||
#
|
||||
# @router.put("/snapshot/{snapshot_id}", response=SnapshotSchema)
|
||||
# def update_snapshot(request, snapshot_id: str, payload: SnapshotSchema):
|
||||
# snapshot = get_object_or_404(Snapshot, id=snapshot_id)
|
||||
#
|
||||
# for attr, value in payload.dict().items():
|
||||
# setattr(snapshot, attr, value)
|
||||
# snapshot.save()
|
||||
#
|
||||
# return snapshot
|
||||
#
|
||||
# @router.delete("/snapshot/{snapshot_id}")
|
||||
# def delete_snapshot(request, snapshot_id: str):
|
||||
# snapshot = get_object_or_404(Snapshot, id=snapshot_id)
|
||||
# snapshot.delete()
|
||||
# return {"success": True}
|
||||
|
||||
|
||||
|
||||
### Tag #########################################################################
|
||||
|
||||
|
||||
class TagSchema(Schema):
|
||||
name: str
|
||||
slug: str
|
||||
|
||||
|
||||
@router.get("/tags", response=List[TagSchema])
|
||||
def list_tags(request):
|
||||
return Tag.objects.all()
|
|
@ -112,7 +112,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
|||
'LDAP_FIRSTNAME_ATTR': {'type': str, 'default': None},
|
||||
'LDAP_LASTNAME_ATTR': {'type': str, 'default': None},
|
||||
'LDAP_EMAIL_ATTR': {'type': str, 'default': None},
|
||||
'LDAP_CREATE_SUPERUSER': {'type': bool, 'default': False},
|
||||
'LDAP_CREATE_SUPERUSER': {'type': bool, 'default': False},
|
||||
},
|
||||
|
||||
'ARCHIVE_METHOD_TOGGLES': {
|
||||
|
@ -265,7 +265,7 @@ CONFIG_ALIASES = {
|
|||
for key, default in section.items()
|
||||
for alias in default.get('aliases', ())
|
||||
}
|
||||
USER_CONFIG = {key for section in CONFIG_SCHEMA.values() for key in section.keys()}
|
||||
USER_CONFIG = {key: section[key] for section in CONFIG_SCHEMA.values() for key in section.keys()}
|
||||
|
||||
def get_real_name(key: str) -> str:
|
||||
"""get the current canonical name for a given deprecated config key"""
|
||||
|
@ -282,6 +282,7 @@ ARCHIVE_DIR_NAME = 'archive'
|
|||
SOURCES_DIR_NAME = 'sources'
|
||||
LOGS_DIR_NAME = 'logs'
|
||||
PERSONAS_DIR_NAME = 'personas'
|
||||
CRONTABS_DIR_NAME = 'crontabs'
|
||||
SQL_INDEX_FILENAME = 'index.sqlite3'
|
||||
JSON_INDEX_FILENAME = 'index.json'
|
||||
HTML_INDEX_FILENAME = 'index.html'
|
||||
|
@ -355,7 +356,7 @@ ALLOWED_IN_OUTPUT_DIR = {
|
|||
'static',
|
||||
'sonic',
|
||||
'search.sqlite3',
|
||||
'crontabs',
|
||||
CRONTABS_DIR_NAME,
|
||||
ARCHIVE_DIR_NAME,
|
||||
SOURCES_DIR_NAME,
|
||||
LOGS_DIR_NAME,
|
||||
|
@ -598,7 +599,6 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
|||
|
||||
'DEPENDENCIES': {'default': lambda c: get_dependency_info(c)},
|
||||
'CODE_LOCATIONS': {'default': lambda c: get_code_locations(c)},
|
||||
'EXTERNAL_LOCATIONS': {'default': lambda c: get_external_locations(c)},
|
||||
'DATA_LOCATIONS': {'default': lambda c: get_data_locations(c)},
|
||||
'CHROME_OPTIONS': {'default': lambda c: get_chrome_info(c)},
|
||||
'CHROME_EXTRA_ARGS': {'default': lambda c: c['CHROME_EXTRA_ARGS'] or []},
|
||||
|
@ -985,11 +985,6 @@ def get_code_locations(config: ConfigDict) -> SimpleConfigValueDict:
|
|||
'enabled': True,
|
||||
'is_valid': (config['TEMPLATES_DIR'] / 'static').exists(),
|
||||
},
|
||||
'CUSTOM_TEMPLATES_DIR': {
|
||||
'path': config['CUSTOM_TEMPLATES_DIR'] and Path(config['CUSTOM_TEMPLATES_DIR']).resolve(),
|
||||
'enabled': bool(config['CUSTOM_TEMPLATES_DIR']),
|
||||
'is_valid': config['CUSTOM_TEMPLATES_DIR'] and Path(config['CUSTOM_TEMPLATES_DIR']).exists(),
|
||||
},
|
||||
# 'NODE_MODULES_DIR': {
|
||||
# 'path': ,
|
||||
# 'enabled': ,
|
||||
|
@ -997,50 +992,25 @@ def get_code_locations(config: ConfigDict) -> SimpleConfigValueDict:
|
|||
# },
|
||||
}
|
||||
|
||||
def get_external_locations(config: ConfigDict) -> ConfigValue:
|
||||
abspath = lambda path: None if path is None else Path(path).resolve()
|
||||
return {
|
||||
'CHROME_USER_DATA_DIR': {
|
||||
'path': abspath(config['CHROME_USER_DATA_DIR']),
|
||||
'enabled': config['USE_CHROME'] and config['CHROME_USER_DATA_DIR'],
|
||||
'is_valid': False if config['CHROME_USER_DATA_DIR'] is None else (Path(config['CHROME_USER_DATA_DIR']) / 'Default').exists(),
|
||||
},
|
||||
'COOKIES_FILE': {
|
||||
'path': abspath(config['COOKIES_FILE']),
|
||||
'enabled': config['USE_WGET'] and config['COOKIES_FILE'],
|
||||
'is_valid': False if config['COOKIES_FILE'] is None else Path(config['COOKIES_FILE']).exists(),
|
||||
},
|
||||
}
|
||||
|
||||
def get_data_locations(config: ConfigDict) -> ConfigValue:
|
||||
return {
|
||||
# OLD: migrating to personas
|
||||
# 'CHROME_USER_DATA_DIR': {
|
||||
# 'path': os.path.abspath(config['CHROME_USER_DATA_DIR']),
|
||||
# 'enabled': config['USE_CHROME'] and config['CHROME_USER_DATA_DIR'],
|
||||
# 'is_valid': False if config['CHROME_USER_DATA_DIR'] is None else (Path(config['CHROME_USER_DATA_DIR']) / 'Default').exists(),
|
||||
# },
|
||||
# 'COOKIES_FILE': {
|
||||
# 'path': os.path.abspath(config['COOKIES_FILE']),
|
||||
# 'enabled': config['USE_WGET'] and config['COOKIES_FILE'],
|
||||
# 'is_valid': False if config['COOKIES_FILE'] is None else Path(config['COOKIES_FILE']).exists(),
|
||||
# },
|
||||
'OUTPUT_DIR': {
|
||||
'path': config['OUTPUT_DIR'].resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(),
|
||||
'is_mount': os.path.ismount(config['OUTPUT_DIR'].resolve()),
|
||||
},
|
||||
'SOURCES_DIR': {
|
||||
'path': config['SOURCES_DIR'].resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': config['SOURCES_DIR'].exists(),
|
||||
},
|
||||
'LOGS_DIR': {
|
||||
'path': config['LOGS_DIR'].resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': config['LOGS_DIR'].exists(),
|
||||
},
|
||||
'PERSONAS_DIR': {
|
||||
'path': config['PERSONAS_DIR'].resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': config['PERSONAS_DIR'].exists(),
|
||||
},
|
||||
'ARCHIVE_DIR': {
|
||||
'path': config['ARCHIVE_DIR'].resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': config['ARCHIVE_DIR'].exists(),
|
||||
'is_mount': os.path.ismount(config['ARCHIVE_DIR'].resolve()),
|
||||
},
|
||||
'CONFIG_FILE': {
|
||||
'path': config['CONFIG_FILE'].resolve(),
|
||||
'enabled': True,
|
||||
|
@ -1052,6 +1022,38 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
|
|||
'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(),
|
||||
'is_mount': os.path.ismount((config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).resolve()),
|
||||
},
|
||||
'ARCHIVE_DIR': {
|
||||
'path': config['ARCHIVE_DIR'].resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': config['ARCHIVE_DIR'].exists(),
|
||||
'is_mount': os.path.ismount(config['ARCHIVE_DIR'].resolve()),
|
||||
},
|
||||
'SOURCES_DIR': {
|
||||
'path': config['SOURCES_DIR'].resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': config['SOURCES_DIR'].exists(),
|
||||
},
|
||||
'LOGS_DIR': {
|
||||
'path': config['LOGS_DIR'].resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': config['LOGS_DIR'].exists(),
|
||||
},
|
||||
'CUSTOM_TEMPLATES_DIR': {
|
||||
'path': config['CUSTOM_TEMPLATES_DIR'] and Path(config['CUSTOM_TEMPLATES_DIR']).resolve(),
|
||||
'enabled': bool(config['CUSTOM_TEMPLATES_DIR']),
|
||||
'is_valid': config['CUSTOM_TEMPLATES_DIR'] and Path(config['CUSTOM_TEMPLATES_DIR']).exists(),
|
||||
},
|
||||
'PERSONAS_DIR': {
|
||||
'path': config['PERSONAS_DIR'].resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': config['PERSONAS_DIR'].exists(),
|
||||
},
|
||||
# managed by bin/docker_entrypoint.sh and python-crontab:
|
||||
# 'CRONTABS_DIR': {
|
||||
# 'path': config['CRONTABS_DIR'].resolve(),
|
||||
# 'enabled': True,
|
||||
# 'is_valid': config['CRONTABS_DIR'].exists(),
|
||||
# },
|
||||
}
|
||||
|
||||
def get_dependency_info(config: ConfigDict) -> ConfigValue:
|
||||
|
@ -1366,6 +1368,7 @@ def check_data_folder(out_dir: Union[str, Path, None]=None, config: ConfigDict=C
|
|||
stderr(' archivebox init')
|
||||
raise SystemExit(2)
|
||||
|
||||
|
||||
def check_migrations(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG):
|
||||
output_dir = out_dir or config['OUTPUT_DIR']
|
||||
from .index.sql import list_migrations
|
||||
|
|
|
@ -14,12 +14,17 @@ from django.shortcuts import render, redirect
|
|||
from django.contrib.auth import get_user_model
|
||||
from django import forms
|
||||
|
||||
|
||||
from signal_webhooks.apps import DjangoSignalWebhooksConfig
|
||||
from signal_webhooks.admin import WebhookAdmin, WebhookModel
|
||||
|
||||
from ..util import htmldecode, urldecode, ansi_to_html
|
||||
|
||||
from core.models import Snapshot, ArchiveResult, Tag
|
||||
from core.forms import AddLinkForm
|
||||
|
||||
from core.mixins import SearchResultsAdminMixin
|
||||
from api.models import APIToken
|
||||
|
||||
from index.html import snapshot_icons
|
||||
from logging_util import printable_filesize
|
||||
|
@ -98,10 +103,32 @@ class ArchiveBoxAdmin(admin.AdminSite):
|
|||
|
||||
return render(template_name='add.html', request=request, context=context)
|
||||
|
||||
|
||||
# monkey patch django-signals-webhooks to change how it shows up in Admin UI
|
||||
DjangoSignalWebhooksConfig.verbose_name = 'API'
|
||||
WebhookModel._meta.get_field('name').help_text = 'Give your webhook a descriptive name (e.g. Notify ACME Slack channel of any new ArchiveResults).'
|
||||
WebhookModel._meta.get_field('signal').help_text = 'The type of event the webhook should fire for (e.g. Create, Update, Delete).'
|
||||
WebhookModel._meta.get_field('ref').help_text = 'Dot import notation of the model the webhook should fire for (e.g. core.models.Snapshot or core.models.ArchiveResult).'
|
||||
WebhookModel._meta.get_field('endpoint').help_text = 'External URL to POST the webhook notification to (e.g. https://someapp.example.com/webhook/some-webhook-receiver).'
|
||||
WebhookModel._meta.app_label = 'api'
|
||||
|
||||
|
||||
archivebox_admin = ArchiveBoxAdmin()
|
||||
archivebox_admin.register(get_user_model())
|
||||
archivebox_admin.register(APIToken)
|
||||
archivebox_admin.register(WebhookModel, WebhookAdmin)
|
||||
archivebox_admin.disable_action('delete_selected')
|
||||
|
||||
|
||||
# patch admin with methods to add data views
|
||||
from admin_data_views.admin import get_app_list, admin_data_index_view, get_admin_data_urls, get_urls
|
||||
|
||||
archivebox_admin.get_app_list = get_app_list.__get__(archivebox_admin, ArchiveBoxAdmin)
|
||||
archivebox_admin.admin_data_index_view = admin_data_index_view.__get__(archivebox_admin, ArchiveBoxAdmin)
|
||||
archivebox_admin.get_admin_data_urls = get_admin_data_urls.__get__(archivebox_admin, ArchiveBoxAdmin)
|
||||
archivebox_admin.get_urls = get_urls(archivebox_admin.get_urls).__get__(archivebox_admin, ArchiveBoxAdmin)
|
||||
|
||||
|
||||
class ArchiveResultInline(admin.TabularInline):
|
||||
model = ArchiveResult
|
||||
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
__package__ = 'archivebox.core'
|
||||
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
|
@ -5,6 +7,22 @@ class CoreConfig(AppConfig):
|
|||
name = 'core'
|
||||
|
||||
def ready(self):
|
||||
# register our custom admin as the primary django admin
|
||||
from django.contrib import admin
|
||||
from django.contrib.admin import sites
|
||||
from core.admin import archivebox_admin
|
||||
|
||||
admin.site = archivebox_admin
|
||||
sites.site = archivebox_admin
|
||||
|
||||
|
||||
# register signal handlers
|
||||
from .auth import register_signals
|
||||
|
||||
register_signals()
|
||||
|
||||
|
||||
|
||||
# from django.contrib.admin.apps import AdminConfig
|
||||
# class CoreAdminConfig(AdminConfig):
|
||||
# default_site = "core.admin.get_admin_site"
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import os
|
||||
from django.conf import settings
|
||||
__package__ = 'archivebox.core'
|
||||
|
||||
|
||||
from ..config import (
|
||||
LDAP
|
||||
)
|
||||
|
|
|
@ -1,10 +1,8 @@
|
|||
from django.conf import settings
|
||||
from ..config import (
|
||||
LDAP_CREATE_SUPERUSER
|
||||
)
|
||||
|
||||
def create_user(sender, user=None, ldap_user=None, **kwargs):
|
||||
|
||||
if not user.id and LDAP_CREATE_SUPERUSER:
|
||||
user.is_superuser = True
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ from ..config import (
|
|||
CUSTOM_TEMPLATES_DIR,
|
||||
SQL_INDEX_FILENAME,
|
||||
OUTPUT_DIR,
|
||||
ARCHIVE_DIR,
|
||||
LOGS_DIR,
|
||||
TIMEZONE,
|
||||
|
||||
|
@ -63,6 +64,9 @@ INSTALLED_APPS = [
|
|||
'core',
|
||||
'api',
|
||||
|
||||
'admin_data_views',
|
||||
|
||||
'signal_webhooks',
|
||||
'django_extensions',
|
||||
]
|
||||
|
||||
|
@ -173,6 +177,17 @@ if DEBUG_TOOLBAR:
|
|||
]
|
||||
MIDDLEWARE = [*MIDDLEWARE, 'debug_toolbar.middleware.DebugToolbarMiddleware']
|
||||
|
||||
|
||||
# https://github.com/bensi94/Django-Requests-Tracker (improved version of django-debug-toolbar)
|
||||
# Must delete archivebox/templates/admin to use because it relies on some things we override
|
||||
# visit /__requests_tracker__/ to access
|
||||
DEBUG_REQUESTS_TRACKER = False
|
||||
if DEBUG_REQUESTS_TRACKER:
|
||||
INSTALLED_APPS += ["requests_tracker"]
|
||||
MIDDLEWARE += ["requests_tracker.middleware.requests_tracker_middleware"]
|
||||
INTERNAL_IPS = ["127.0.0.1", "10.0.2.2", "0.0.0.0", "*"]
|
||||
|
||||
|
||||
################################################################################
|
||||
### Staticfile and Template Settings
|
||||
################################################################################
|
||||
|
@ -242,6 +257,29 @@ CACHES = {
|
|||
EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
|
||||
|
||||
|
||||
STORAGES = {
|
||||
"default": {
|
||||
"BACKEND": "django.core.files.storage.FileSystemStorage",
|
||||
},
|
||||
"staticfiles": {
|
||||
"BACKEND": "django.contrib.staticfiles.storage.StaticFilesStorage",
|
||||
},
|
||||
"archive": {
|
||||
"BACKEND": "django.core.files.storage.FileSystemStorage",
|
||||
"OPTIONS": {
|
||||
"base_url": "/archive/",
|
||||
"location": ARCHIVE_DIR,
|
||||
},
|
||||
},
|
||||
# "personas": {
|
||||
# "BACKEND": "django.core.files.storage.FileSystemStorage",
|
||||
# "OPTIONS": {
|
||||
# "base_url": "/personas/",
|
||||
# "location": PERSONAS_DIR,
|
||||
# },
|
||||
# },
|
||||
}
|
||||
|
||||
################################################################################
|
||||
### Security Settings
|
||||
################################################################################
|
||||
|
@ -368,3 +406,32 @@ LOGGING = {
|
|||
}
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# Add default webhook configuration to the User model
|
||||
SIGNAL_WEBHOOKS = {
|
||||
"HOOKS": {
|
||||
"django.contrib.auth.models.User": ...,
|
||||
"core.models.Snapshot": ...,
|
||||
"core.models.ArchiveResult": ...,
|
||||
"core.models.Tag": ...,
|
||||
"api.models.APIToken": ...,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
ADMIN_DATA_VIEWS = {
|
||||
"NAME": "configuration",
|
||||
"URLS": [
|
||||
{
|
||||
"route": "live/",
|
||||
"view": "core.views.live_config_list_view",
|
||||
"name": "live",
|
||||
"items": {
|
||||
"route": "<str:key>/",
|
||||
"view": "core.views.live_config_value_view",
|
||||
"name": "live_config_value",
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from .admin import archivebox_admin
|
||||
__package__ = 'archivebox.core'
|
||||
|
||||
from django.urls import path, include
|
||||
from django.views import static
|
||||
|
@ -6,14 +6,9 @@ from django.contrib.staticfiles.urls import staticfiles_urlpatterns
|
|||
from django.conf import settings
|
||||
from django.views.generic.base import RedirectView
|
||||
|
||||
from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
|
||||
from .admin import archivebox_admin
|
||||
from .views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
|
||||
|
||||
from ninja import NinjaAPI
|
||||
from api.auth import GlobalAuth
|
||||
|
||||
api = NinjaAPI(auth=GlobalAuth())
|
||||
api.add_router("/auth/", "api.auth.router")
|
||||
api.add_router("/archive/", "api.archive.router")
|
||||
|
||||
# GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306
|
||||
# from config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE
|
||||
|
@ -43,10 +38,10 @@ urlpatterns = [
|
|||
path('accounts/', include('django.contrib.auth.urls')),
|
||||
path('admin/', archivebox_admin.urls),
|
||||
|
||||
path("api/", api.urls),
|
||||
path("api/", include('api.urls')),
|
||||
|
||||
path('health/', HealthCheckView.as_view(), name='healthcheck'),
|
||||
path('error/', lambda _: 1/0),
|
||||
path('error/', lambda *_: 1/0),
|
||||
|
||||
# path('jet_api/', include('jet_django.urls')), Enable to use https://www.jetadmin.io/integrations/django
|
||||
|
||||
|
@ -57,10 +52,10 @@ urlpatterns = [
|
|||
urlpatterns += staticfiles_urlpatterns()
|
||||
|
||||
if settings.DEBUG_TOOLBAR:
|
||||
import debug_toolbar
|
||||
urlpatterns += [
|
||||
path('__debug__/', include(debug_toolbar.urls)),
|
||||
]
|
||||
urlpatterns += [path('__debug__/', include("debug_toolbar.urls"))]
|
||||
|
||||
if settings.DEBUG_REQUESTS_TRACKER:
|
||||
urlpatterns += [path("__requests_tracker__/", include("requests_tracker.urls"))]
|
||||
|
||||
|
||||
# # Proposed FUTURE URLs spec
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
__package__ = 'archivebox.core'
|
||||
|
||||
from typing import Callable
|
||||
|
||||
from io import StringIO
|
||||
from contextlib import redirect_stdout
|
||||
|
||||
from django.shortcuts import render, redirect
|
||||
from django.http import HttpResponse, Http404
|
||||
from django.http import HttpRequest, HttpResponse, Http404
|
||||
from django.utils.html import format_html, mark_safe
|
||||
from django.views import View, static
|
||||
from django.views.generic.list import ListView
|
||||
|
@ -14,6 +16,10 @@ from django.contrib.auth.mixins import UserPassesTestMixin
|
|||
from django.views.decorators.csrf import csrf_exempt
|
||||
from django.utils.decorators import method_decorator
|
||||
|
||||
from admin_data_views.typing import TableContext, ItemContext
|
||||
from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
|
||||
|
||||
|
||||
from core.models import Snapshot
|
||||
from core.forms import AddLinkForm
|
||||
|
||||
|
@ -26,6 +32,10 @@ from ..config import (
|
|||
COMMIT_HASH,
|
||||
FOOTER_INFO,
|
||||
SNAPSHOTS_PER_PAGE,
|
||||
CONFIG,
|
||||
CONFIG_SCHEMA,
|
||||
DYNAMIC_CONFIG_SCHEMA,
|
||||
USER_CONFIG,
|
||||
)
|
||||
from ..main import add
|
||||
from ..util import base_url, ansi_to_html
|
||||
|
@ -124,9 +134,9 @@ class SnapshotView(View):
|
|||
'<center><br/><br/><br/>'
|
||||
f'Snapshot <a href="/archive/{snapshot.timestamp}/index.html" target="_top"><b><code>[{snapshot.timestamp}]</code></b></a> exists in DB, but resource <b><code>{snapshot.timestamp}/'
|
||||
'{}'
|
||||
f'</code></b> does not exist in <a href="/archive/{snapshot.timestamp}/" target="_top">snapshot dir</a> yet.<br/><br/>'
|
||||
'Maybe this resource type is not availabe for this Snapshot,<br/>or the archiving process has not completed yet?<br/>'
|
||||
f'<pre><code># run this cmd to finish archiving this Snapshot<br/>archivebox update -t timestamp {snapshot.timestamp}</code></pre><br/><br/>'
|
||||
f'</code></b> does not exist in the <a href="/archive/{snapshot.timestamp}/" target="_top">snapshot dir</a> yet.<br/><br/>'
|
||||
'It\'s possible that this resource type is not available for the Snapshot,<br/>or that the archiving process has not completed yet.<br/>'
|
||||
f'<pre><code># if interrupted, run this cmd to finish archiving this Snapshot<br/>archivebox update -t timestamp {snapshot.timestamp}</code></pre><br/><br/>'
|
||||
'<div class="text-align: left; width: 100%; max-width: 400px">'
|
||||
'<i><b>Next steps:</i></b><br/>'
|
||||
f'- list all the <a href="/archive/{snapshot.timestamp}/" target="_top">Snapshot files <code>.*</code></a><br/>'
|
||||
|
@ -312,3 +322,124 @@ class HealthCheckView(View):
|
|||
content_type='text/plain',
|
||||
status=200
|
||||
)
|
||||
|
||||
|
||||
def find_config_section(key: str) -> str:
|
||||
matching_sections = [
|
||||
name for name, opts in CONFIG_SCHEMA.items() if key in opts
|
||||
]
|
||||
section = matching_sections[0] if matching_sections else 'DYNAMIC'
|
||||
return section
|
||||
|
||||
def find_config_default(key: str) -> str:
|
||||
default_val = USER_CONFIG.get(key, {}).get('default', lambda: None)
|
||||
if isinstance(default_val, Callable):
|
||||
return None
|
||||
else:
|
||||
default_val = repr(default_val)
|
||||
return default_val
|
||||
|
||||
def find_config_type(key: str) -> str:
|
||||
if key in USER_CONFIG:
|
||||
return USER_CONFIG[key]['type'].__name__
|
||||
elif key in DYNAMIC_CONFIG_SCHEMA:
|
||||
return type(CONFIG[key]).__name__
|
||||
return 'str'
|
||||
|
||||
def key_is_safe(key: str) -> bool:
|
||||
for term in ('key', 'password', 'secret', 'token'):
|
||||
if term in key.lower():
|
||||
return False
|
||||
return True
|
||||
|
||||
@render_with_table_view
|
||||
def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
|
||||
|
||||
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
|
||||
|
||||
rows = {
|
||||
"Section": [],
|
||||
"Key": [],
|
||||
"Type": [],
|
||||
"Value": [],
|
||||
"Default": [],
|
||||
# "Documentation": [],
|
||||
"Aliases": [],
|
||||
}
|
||||
|
||||
for section in CONFIG_SCHEMA.keys():
|
||||
for key in CONFIG_SCHEMA[section].keys():
|
||||
rows['Section'].append(section.replace('_', ' ').title().replace(' Config', ''))
|
||||
rows['Key'].append(ItemLink(key, key=key))
|
||||
rows['Type'].append(mark_safe(f'<code>{find_config_type(key)}</code>'))
|
||||
rows['Value'].append(mark_safe(f'<code>{CONFIG[key]}</code>') if key_is_safe(key) else '******** (redacted)')
|
||||
rows['Default'].append(mark_safe(f'<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig.py+%27{key}%27&type=code"><code style="text-decoration: underline">{find_config_default(key) or 'See here...'}</code></a>'))
|
||||
# rows['Documentation'].append(mark_safe(f'Wiki: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">{key}</a>'))
|
||||
rows['Aliases'].append(', '.join(CONFIG_SCHEMA[section][key].get('aliases', [])))
|
||||
|
||||
section = 'DYNAMIC'
|
||||
for key in DYNAMIC_CONFIG_SCHEMA.keys():
|
||||
rows['Section'].append(section.replace('_', ' ').title().replace(' Config', ''))
|
||||
rows['Key'].append(ItemLink(key, key=key))
|
||||
rows['Type'].append(mark_safe(f'<code>{find_config_type(key)}</code>'))
|
||||
rows['Value'].append(mark_safe(f'<code>{CONFIG[key]}</code>') if key_is_safe(key) else '******** (redacted)')
|
||||
rows['Default'].append(mark_safe(f'<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig.py+%27{key}%27&type=code"><code style="text-decoration: underline">{find_config_default(key) or 'See here...'}</code></a>'))
|
||||
# rows['Documentation'].append(mark_safe(f'Wiki: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">{key}</a>'))
|
||||
rows['Aliases'].append(ItemLink(key, key=key) if key in USER_CONFIG else '')
|
||||
|
||||
return TableContext(
|
||||
title="Computed Configuration Values",
|
||||
table=rows,
|
||||
)
|
||||
|
||||
@render_with_item_view
|
||||
def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
|
||||
|
||||
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
|
||||
|
||||
aliases = USER_CONFIG.get(key, {}).get("aliases", [])
|
||||
|
||||
return ItemContext(
|
||||
slug=key,
|
||||
title=key,
|
||||
data=[
|
||||
{
|
||||
"name": mark_safe(f'data / ArchiveBox.conf [{find_config_section(key)}] <b><code style="color: lightgray">{key}</code></b>' if key in USER_CONFIG else f'[DYNAMIC CONFIG] <b><code style="color: lightgray">{key}</code></b> <small>(calculated at runtime)</small>'),
|
||||
"description": None,
|
||||
"fields": {
|
||||
'Key': key,
|
||||
'Type': find_config_type(key),
|
||||
'Value': CONFIG[key] if key_is_safe(key) else '********',
|
||||
},
|
||||
"help_texts": {
|
||||
'Key': mark_safe(f'''
|
||||
<a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">Documentation</a>
|
||||
<span style="display: {"inline" if aliases else "none"}">
|
||||
Aliases: {", ".join(aliases)}
|
||||
</span>
|
||||
'''),
|
||||
'Type': mark_safe(f'''
|
||||
<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig.py+%27{key}%27&type=code">
|
||||
See full definition in <code>archivebox/config.py</code>...
|
||||
</a>
|
||||
'''),
|
||||
'Value': mark_safe(f'''
|
||||
{'<b style="color: red">Value is redacted for your security. (Passwords, secrets, API tokens, etc. cannot be viewed in the Web UI)</b><br/><br/>' if not key_is_safe(key) else ''}
|
||||
Default: <a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig.py+%27{key}%27&type=code">
|
||||
<code>{find_config_default(key) or 'See 1here...'}</code>
|
||||
</a>
|
||||
<br/><br/>
|
||||
<p style="display: {"block" if key in USER_CONFIG else "none"}">
|
||||
<i>To change this value, edit <code>data/ArchiveBox.conf</code> or run:</i>
|
||||
<br/><br/>
|
||||
<code>archivebox config --set {key}="{
|
||||
val.strip("'")
|
||||
if (val := find_config_default(key)) else
|
||||
(repr(CONFIG[key] if key_is_safe(key) else '********')).strip("'")
|
||||
}"</code>
|
||||
</p>
|
||||
'''),
|
||||
},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
|
|
@ -4,6 +4,7 @@ WARNING: THIS FILE IS ALL LEGACY CODE TO BE REMOVED.
|
|||
|
||||
DO NOT ADD ANY NEW FEATURES TO THIS FILE, NEW CODE GOES HERE: core/models.py
|
||||
|
||||
These are the old types we used to use before ArchiveBox v0.4 (before we switched to Django).
|
||||
"""
|
||||
|
||||
__package__ = 'archivebox.index'
|
||||
|
|
|
@ -494,12 +494,12 @@ def log_removal_started(links: List["Link"], yes: bool, delete: bool):
|
|||
if delete:
|
||||
file_counts = [link.num_outputs for link in links if Path(link.link_dir).exists()]
|
||||
print(
|
||||
f' {len(links)} Links will be de-listed from the main index, and their archived content folders will be deleted from disk.\n' +
|
||||
f' {len(links)} Links will be de-listed from the main index, and their archived content folders will be deleted from disk.\n'
|
||||
f' ({len(file_counts)} data folders with {sum(file_counts)} archived files will be deleted!)'
|
||||
)
|
||||
else:
|
||||
print(
|
||||
' Matching links will be de-listed from the main index, but their archived content folders will remain in place on disk.\n' +
|
||||
' Matching links will be de-listed from the main index, but their archived content folders will remain in place on disk.\n'
|
||||
' (Pass --delete if you also want to permanently delete the data folders)'
|
||||
)
|
||||
|
||||
|
|
|
@ -104,7 +104,6 @@ from .config import (
|
|||
COMMIT_HASH,
|
||||
BUILD_TIME,
|
||||
CODE_LOCATIONS,
|
||||
EXTERNAL_LOCATIONS,
|
||||
DATA_LOCATIONS,
|
||||
DEPENDENCIES,
|
||||
CHROME_BINARY,
|
||||
|
@ -231,7 +230,7 @@ def version(quiet: bool=False,
|
|||
p = platform.uname()
|
||||
print(
|
||||
'ArchiveBox v{}'.format(get_version(CONFIG)),
|
||||
*((f'COMMIT_HASH={COMMIT_HASH[:7]}',) if COMMIT_HASH else ()),
|
||||
f'COMMIT_HASH={COMMIT_HASH[:7] if COMMIT_HASH else "unknown"}',
|
||||
f'BUILD_TIME={BUILD_TIME}',
|
||||
)
|
||||
print(
|
||||
|
@ -272,11 +271,6 @@ def version(quiet: bool=False,
|
|||
for name, path in CODE_LOCATIONS.items():
|
||||
print(printable_folder_status(name, path))
|
||||
|
||||
print()
|
||||
print('{white}[i] Secrets locations:{reset}'.format(**ANSI))
|
||||
for name, path in EXTERNAL_LOCATIONS.items():
|
||||
print(printable_folder_status(name, path))
|
||||
|
||||
print()
|
||||
if DATA_LOCATIONS['OUTPUT_DIR']['is_valid']:
|
||||
print('{white}[i] Data locations:{reset}'.format(**ANSI))
|
||||
|
@ -695,7 +689,7 @@ def add(urls: Union[str, List[str]],
|
|||
if CAN_UPGRADE:
|
||||
hint(f"There's a new version of ArchiveBox available! Your current version is {VERSION}. You can upgrade to {VERSIONS_AVAILABLE['recommended_version']['tag_name']} ({VERSIONS_AVAILABLE['recommended_version']['html_url']}). For more on how to upgrade: https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\n")
|
||||
|
||||
return all_links
|
||||
return new_links
|
||||
|
||||
@enforce_types
|
||||
def remove(filter_str: Optional[str]=None,
|
||||
|
@ -1362,7 +1356,7 @@ def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None:
|
|||
if (args and "createsuperuser" in args) and (IN_DOCKER and not IS_TTY):
|
||||
stderr('[!] Warning: you need to pass -it to use interactive commands in docker', color='lightyellow')
|
||||
stderr(' docker run -it archivebox manage {}'.format(' '.join(args or ['...'])), color='lightyellow')
|
||||
stderr()
|
||||
stderr('')
|
||||
|
||||
execute_from_command_line([f'{ARCHIVEBOX_BINARY} manage', *(args or ['help'])])
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ if __name__ == '__main__':
|
|||
# versions of ./manage.py commands whenever possible. When that's not possible
|
||||
# (e.g. makemigrations), you can comment out this check temporarily
|
||||
|
||||
if not ('makemigrations' in sys.argv or 'migrate' in sys.argv):
|
||||
if not ('makemigrations' in sys.argv or 'migrate' in sys.argv or 'startapp' in sys.argv):
|
||||
print("[X] Don't run ./manage.py directly (unless you are a developer running makemigrations):")
|
||||
print()
|
||||
print(' Hint: Use these archivebox CLI commands instead of the ./manage.py equivalents:')
|
||||
|
|
|
@ -7,7 +7,6 @@ For examples of supported import formats see tests/.
|
|||
|
||||
__package__ = 'archivebox.parsers'
|
||||
|
||||
import re
|
||||
from io import StringIO
|
||||
|
||||
from typing import IO, Tuple, List, Optional
|
||||
|
@ -28,7 +27,6 @@ from ..util import (
|
|||
htmldecode,
|
||||
download_url,
|
||||
enforce_types,
|
||||
URL_REGEX,
|
||||
)
|
||||
from ..index.schema import Link
|
||||
from ..logging_util import TimedProgress, log_source_saved
|
||||
|
@ -202,54 +200,3 @@ def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{ba
|
|||
log_source_saved(source_file=source_path)
|
||||
|
||||
return source_path
|
||||
|
||||
|
||||
# Check that plain text regex URL parsing works as expected
|
||||
# this is last-line-of-defense to make sure the URL_REGEX isn't
|
||||
# misbehaving due to some OS-level or environment level quirks (e.g. bad regex lib)
|
||||
# the consequences of bad URL parsing could be disastrous and lead to many
|
||||
# incorrect/badly parsed links being added to the archive, so this is worth the cost of checking
|
||||
_test_url_strs = {
|
||||
'example.com': 0,
|
||||
'/example.com': 0,
|
||||
'//example.com': 0,
|
||||
':/example.com': 0,
|
||||
'://example.com': 0,
|
||||
'htt://example8.com': 0,
|
||||
'/htt://example.com': 0,
|
||||
'https://example': 1,
|
||||
'https://localhost/2345': 1,
|
||||
'https://localhost:1234/123': 1,
|
||||
'://': 0,
|
||||
'https://': 0,
|
||||
'http://': 0,
|
||||
'ftp://': 0,
|
||||
'ftp://example.com': 0,
|
||||
'https://example.com': 1,
|
||||
'https://example.com/': 1,
|
||||
'https://a.example.com': 1,
|
||||
'https://a.example.com/': 1,
|
||||
'https://a.example.com/what/is/happening.html': 1,
|
||||
'https://a.example.com/what/ís/happening.html': 1,
|
||||
'https://a.example.com/what/is/happening.html?what=1&2%20b#höw-about-this=1a': 1,
|
||||
'https://a.example.com/what/is/happéning/?what=1&2%20b#how-aboüt-this=1a': 1,
|
||||
'HTtpS://a.example.com/what/is/happening/?what=1&2%20b#how-about-this=1af&2f%20b': 1,
|
||||
'https://example.com/?what=1#how-about-this=1&2%20baf': 1,
|
||||
'https://example.com?what=1#how-about-this=1&2%20baf': 1,
|
||||
'<test>http://example7.com</test>': 1,
|
||||
'https://<test>': 0,
|
||||
'https://[test]': 0,
|
||||
'http://"test"': 0,
|
||||
'http://\'test\'': 0,
|
||||
'[https://example8.com/what/is/this.php?what=1]': 1,
|
||||
'[and http://example9.com?what=1&other=3#and-thing=2]': 1,
|
||||
'<what>https://example10.com#and-thing=2 "</about>': 1,
|
||||
'abc<this["https://example11.com/what/is#and-thing=2?whoami=23&where=1"]that>def': 1,
|
||||
'sdflkf[what](https://example12.com/who/what.php?whoami=1#whatami=2)?am=hi': 1,
|
||||
'<or>http://examplehttp://15.badc</that>': 2,
|
||||
'https://a.example.com/one.html?url=http://example.com/inside/of/another?=http://': 2,
|
||||
'[https://a.example.com/one.html?url=http://example.com/inside/of/another?=](http://a.example.com)': 3,
|
||||
}
|
||||
for url_str, num_urls in _test_url_strs.items():
|
||||
assert len(re.findall(URL_REGEX, url_str)) == num_urls, (
|
||||
f'{url_str} does not contain {num_urls} urls')
|
||||
|
|
|
@ -10,7 +10,7 @@ from ..index.schema import Link
|
|||
from ..util import (
|
||||
htmldecode,
|
||||
enforce_types,
|
||||
URL_REGEX,
|
||||
find_all_urls,
|
||||
)
|
||||
from html.parser import HTMLParser
|
||||
from urllib.parse import urljoin
|
||||
|
@ -40,10 +40,22 @@ def parse_generic_html_export(html_file: IO[str], root_url: Optional[str]=None,
|
|||
parser.feed(line)
|
||||
for url in parser.urls:
|
||||
if root_url:
|
||||
# resolve relative urls /home.html -> https://example.com/home.html
|
||||
url = urljoin(root_url, url)
|
||||
|
||||
for archivable_url in re.findall(URL_REGEX, url):
|
||||
url_is_absolute = (url.lower().startswith('http://') or url.lower().startswith('https://'))
|
||||
# url = https://abc.com => True
|
||||
# url = /page.php?next=https://example.com => False
|
||||
|
||||
if not url_is_absolute: # resolve it by joining it with root_url
|
||||
relative_path = url
|
||||
|
||||
url = urljoin(root_url, relative_path) # https://example.com/somepage.html + /home.html
|
||||
# => https://example.com/home.html
|
||||
|
||||
# special case to handle bug around // handling, crucial for urls that contain sub-urls
|
||||
# e.g. https://web.archive.org/web/https://example.com
|
||||
if did_urljoin_misbehave(root_url, relative_path, url):
|
||||
url = fix_urljoin_bug(url)
|
||||
|
||||
for archivable_url in find_all_urls(url):
|
||||
yield Link(
|
||||
url=htmldecode(archivable_url),
|
||||
timestamp=str(datetime.now(timezone.utc).timestamp()),
|
||||
|
@ -56,3 +68,74 @@ def parse_generic_html_export(html_file: IO[str], root_url: Optional[str]=None,
|
|||
KEY = 'html'
|
||||
NAME = 'Generic HTML'
|
||||
PARSER = parse_generic_html_export
|
||||
|
||||
|
||||
#### WORKAROUND CODE FOR https://github.com/python/cpython/issues/96015 ####
|
||||
|
||||
def did_urljoin_misbehave(root_url: str, relative_path: str, final_url: str) -> bool:
|
||||
"""
|
||||
Handle urljoin edge case bug where multiple slashes get turned into a single slash:
|
||||
- https://github.com/python/cpython/issues/96015
|
||||
- https://github.com/ArchiveBox/ArchiveBox/issues/1411
|
||||
|
||||
This workaround only fixes the most common case of a sub-URL inside an outer URL, e.g.:
|
||||
https://web.archive.org/web/https://example.com/some/inner/url
|
||||
|
||||
But there are other valid URLs containing // that are not fixed by this workaround, e.g.:
|
||||
https://example.com/drives/C//some/file
|
||||
"""
|
||||
|
||||
# if relative path is actually an absolute url, cut off its own scheme so we check the path component only
|
||||
relative_path = relative_path.lower()
|
||||
if relative_path.startswith('http://') or relative_path.startswith('https://'):
|
||||
relative_path = relative_path.split('://', 1)[-1]
|
||||
|
||||
# TODO: properly fix all double // getting stripped by urljoin, not just ://
|
||||
original_path_had_suburl = '://' in relative_path
|
||||
original_root_had_suburl = '://' in root_url[8:] # ignore first 8 chars because root always starts with https://
|
||||
final_joined_has_suburl = '://' in final_url[8:] # ignore first 8 chars because final always starts with https://
|
||||
|
||||
urljoin_broke_suburls = (
|
||||
(original_root_had_suburl or original_path_had_suburl)
|
||||
and not final_joined_has_suburl
|
||||
)
|
||||
return urljoin_broke_suburls
|
||||
|
||||
|
||||
def fix_urljoin_bug(url: str, nesting_limit=5):
|
||||
"""
|
||||
recursively replace broken suburls .../http:/... with .../http://...
|
||||
|
||||
basically equivalent to this for 99.9% of cases:
|
||||
url = url.replace('/http:/', '/http://')
|
||||
url = url.replace('/https:/', '/https://')
|
||||
except this handles:
|
||||
other schemes besides http/https (e.g. https://example.com/link/git+ssh://github.com/example)
|
||||
other preceding separators besides / (e.g. https://example.com/login/?next=https://example.com/home)
|
||||
fixing multiple suburls recursively
|
||||
"""
|
||||
input_url = url
|
||||
for _ in range(nesting_limit):
|
||||
url = re.sub(
|
||||
r'(?P<root>.+?)' # https://web.archive.org/web
|
||||
+ r'(?P<separator>[-=/_&+%$#@!*\(\\])' # /
|
||||
+ r'(?P<subscheme>[a-zA-Z0-9+_-]{1,32}?):/' # http:/
|
||||
+ r'(?P<suburl>[^/\\]+)', # example.com
|
||||
r"\1\2\3://\4",
|
||||
input_url,
|
||||
re.IGNORECASE | re.UNICODE,
|
||||
)
|
||||
if url == input_url:
|
||||
break # nothing left to replace, all suburls are fixed
|
||||
input_url = url
|
||||
|
||||
return url
|
||||
|
||||
|
||||
# sanity check to make sure workaround code works as expected and doesnt introduce *more* bugs
|
||||
assert did_urljoin_misbehave('https://web.archive.org/web/https://example.com', 'abc.html', 'https://web.archive.org/web/https:/example.com/abc.html') == True
|
||||
assert did_urljoin_misbehave('http://example.com', 'https://web.archive.org/web/http://example.com/abc.html', 'https://web.archive.org/web/http:/example.com/abc.html') == True
|
||||
assert fix_urljoin_bug('https:/example.com') == 'https:/example.com' # should not modify original url's scheme, only sub-urls
|
||||
assert fix_urljoin_bug('https://web.archive.org/web/https:/example.com/abc.html') == 'https://web.archive.org/web/https://example.com/abc.html'
|
||||
assert fix_urljoin_bug('http://example.com/link/git+ssh:/github.com/example?next=ftp:/example.com') == 'http://example.com/link/git+ssh://github.com/example?next=ftp://example.com'
|
||||
|
||||
|
|
|
@ -72,21 +72,13 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]:
|
|||
|
||||
json_file.seek(0)
|
||||
|
||||
try:
|
||||
links = json.load(json_file)
|
||||
if type(links) != list:
|
||||
raise Exception('JSON parser expects list of objects, maybe this is JSONL?')
|
||||
except json.decoder.JSONDecodeError:
|
||||
# sometimes the first line is a comment or other junk, so try without
|
||||
json_file.seek(0)
|
||||
first_line = json_file.readline()
|
||||
#print(' > Trying JSON parser without first line: "', first_line.strip(), '"', sep= '')
|
||||
links = json.load(json_file)
|
||||
# we may fail again, which means we really don't know what to do
|
||||
|
||||
links = json.load(json_file)
|
||||
if type(links) != list:
|
||||
raise Exception('JSON parser expects list of objects, maybe this is JSONL?')
|
||||
|
||||
for link in links:
|
||||
if link:
|
||||
yield jsonObjectToLink(link,json_file.name)
|
||||
yield jsonObjectToLink(link, json_file.name)
|
||||
|
||||
KEY = 'json'
|
||||
NAME = 'Generic JSON'
|
||||
|
|
|
@ -3,11 +3,9 @@ __package__ = 'archivebox.parsers'
|
|||
import json
|
||||
|
||||
from typing import IO, Iterable
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from ..index.schema import Link
|
||||
from ..util import (
|
||||
htmldecode,
|
||||
enforce_types,
|
||||
)
|
||||
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
__package__ = 'archivebox.parsers'
|
||||
__description__ = 'Plain Text'
|
||||
|
||||
import re
|
||||
|
||||
from typing import IO, Iterable
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
@ -11,7 +9,7 @@ from ..index.schema import Link
|
|||
from ..util import (
|
||||
htmldecode,
|
||||
enforce_types,
|
||||
URL_REGEX
|
||||
find_all_urls,
|
||||
)
|
||||
|
||||
|
||||
|
@ -39,7 +37,7 @@ def parse_generic_txt_export(text_file: IO[str], **_kwargs) -> Iterable[Link]:
|
|||
pass
|
||||
|
||||
# otherwise look for anything that looks like a URL in the line
|
||||
for url in re.findall(URL_REGEX, line):
|
||||
for url in find_all_urls(line):
|
||||
yield Link(
|
||||
url=htmldecode(url),
|
||||
timestamp=str(datetime.now(timezone.utc).timestamp()),
|
||||
|
@ -48,17 +46,6 @@ def parse_generic_txt_export(text_file: IO[str], **_kwargs) -> Iterable[Link]:
|
|||
sources=[text_file.name],
|
||||
)
|
||||
|
||||
# look inside the URL for any sub-urls, e.g. for archive.org links
|
||||
# https://web.archive.org/web/20200531203453/https://www.reddit.com/r/socialism/comments/gu24ke/nypd_officers_claim_they_are_protecting_the_rule/fsfq0sw/
|
||||
# -> https://www.reddit.com/r/socialism/comments/gu24ke/nypd_officers_claim_they_are_protecting_the_rule/fsfq0sw/
|
||||
for sub_url in re.findall(URL_REGEX, line[1:]):
|
||||
yield Link(
|
||||
url=htmldecode(sub_url),
|
||||
timestamp=str(datetime.now(timezone.utc).timestamp()),
|
||||
title=None,
|
||||
tags=None,
|
||||
sources=[text_file.name],
|
||||
)
|
||||
|
||||
KEY = 'txt'
|
||||
NAME = 'Generic TXT'
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
<a href="/admin/core/tag/">Tags</a> |
|
||||
<a href="/admin/core/archiveresult/?o=-1">Log</a>
|
||||
<a href="{% url 'Docs' %}" target="_blank" rel="noopener noreferrer">Docs</a> |
|
||||
<a href="/api">API</a> |
|
||||
<a href="{% url 'public-index' %}">Public</a> |
|
||||
<a href="/admin/">Admin</a>
|
||||
|
||||
|
@ -16,7 +17,7 @@
|
|||
{% endblock %}
|
||||
{% block userlinks %}
|
||||
{% if user.has_usable_password %}
|
||||
<a href="{% url 'admin:password_change' %}">Account</a> /
|
||||
<a href="{% url 'admin:password_change' %}" title="Change your account password">Account</a> /
|
||||
{% endif %}
|
||||
<a href="{% url 'admin:logout' %}">{% trans 'Log out' %}</a>
|
||||
{% endblock %}
|
||||
|
|
|
@ -62,12 +62,12 @@ COLOR_REGEX = re.compile(r'\[(?P<arg_1>\d+)(;(?P<arg_2>\d+)(;(?P<arg_3>\d+))?)?m
|
|||
|
||||
# https://mathiasbynens.be/demo/url-regex
|
||||
URL_REGEX = re.compile(
|
||||
r'(?=(' +
|
||||
r'http[s]?://' + # start matching from allowed schemes
|
||||
r'(?:[a-zA-Z]|[0-9]' + # followed by allowed alphanum characters
|
||||
r'|[-_$@.&+!*\(\),]' + # or allowed symbols (keep hyphen first to match literal hyphen)
|
||||
r'|[^\u0000-\u007F])+' + # or allowed unicode bytes
|
||||
r'[^\]\[<>"\'\s]+' + # stop parsing at these symbols
|
||||
r'(?=('
|
||||
r'http[s]?://' # start matching from allowed schemes
|
||||
r'(?:[a-zA-Z]|[0-9]' # followed by allowed alphanum characters
|
||||
r'|[-_$@.&+!*\(\),]' # or allowed symbols (keep hyphen first to match literal hyphen)
|
||||
r'|[^\u0000-\u007F])+' # or allowed unicode bytes
|
||||
r'[^\]\[<>"\'\s]+' # stop parsing at these symbols
|
||||
r'))',
|
||||
re.IGNORECASE | re.UNICODE,
|
||||
)
|
||||
|
@ -90,6 +90,11 @@ def fix_url_from_markdown(url_str: str) -> str:
|
|||
helpful to fix URLs parsed from markdown e.g.
|
||||
input: https://wikipedia.org/en/some_article_(Disambiguation).html?abc=def).somemoretext
|
||||
result: https://wikipedia.org/en/some_article_(Disambiguation).html?abc=def
|
||||
|
||||
IMPORTANT ASSUMPTION: valid urls wont have unbalanced or incorrectly nested parentheses
|
||||
e.g. this will fail the user actually wants to ingest a url like 'https://example.com/some_wei)(rd_url'
|
||||
in that case it will return https://example.com/some_wei (truncated up to the first unbalanced paren)
|
||||
This assumption is true 99.9999% of the time, and for the rare edge case the user can use url_list parser.
|
||||
"""
|
||||
trimmed_url = url_str
|
||||
|
||||
|
@ -353,7 +358,8 @@ def chrome_cleanup():
|
|||
if IN_DOCKER and lexists("/home/archivebox/.config/chromium/SingletonLock"):
|
||||
remove_file("/home/archivebox/.config/chromium/SingletonLock")
|
||||
|
||||
def ansi_to_html(text):
|
||||
@enforce_types
|
||||
def ansi_to_html(text: str) -> str:
|
||||
"""
|
||||
Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html
|
||||
"""
|
||||
|
@ -439,11 +445,14 @@ class ExtendedEncoder(pyjson.JSONEncoder):
|
|||
|
||||
|
||||
### URL PARSING TESTS / ASSERTIONS
|
||||
# they run at runtime because I like having them inline in this file,
|
||||
# I like the peace of mind knowing it's enforced at runtime across all OS's (in case the regex engine ever has any weird locale-specific quirks),
|
||||
# and these assertions are basically instant, so not a big performance cost to do it on startup
|
||||
|
||||
assert fix_url_from_markdown('/a(b)c).x(y)z') == '/a(b)c'
|
||||
# Check that plain text regex URL parsing works as expected
|
||||
# this is last-line-of-defense to make sure the URL_REGEX isn't
|
||||
# misbehaving due to some OS-level or environment level quirks (e.g. regex engine / cpython / locale differences)
|
||||
# the consequences of bad URL parsing could be disastrous and lead to many
|
||||
# incorrect/badly parsed links being added to the archive, so this is worth the cost of checking
|
||||
|
||||
assert fix_url_from_markdown('http://example.com/a(b)c).x(y)z') == 'http://example.com/a(b)c'
|
||||
assert fix_url_from_markdown('https://wikipedia.org/en/some_article_(Disambiguation).html?abc=def).link(with)_trailingtext') == 'https://wikipedia.org/en/some_article_(Disambiguation).html?abc=def'
|
||||
|
||||
URL_REGEX_TESTS = [
|
||||
|
@ -482,3 +491,50 @@ URL_REGEX_TESTS = [
|
|||
for urls_str, expected_url_matches in URL_REGEX_TESTS:
|
||||
url_matches = list(find_all_urls(urls_str))
|
||||
assert url_matches == expected_url_matches, 'FAILED URL_REGEX CHECK!'
|
||||
|
||||
|
||||
# More test cases
|
||||
_test_url_strs = {
|
||||
'example.com': 0,
|
||||
'/example.com': 0,
|
||||
'//example.com': 0,
|
||||
':/example.com': 0,
|
||||
'://example.com': 0,
|
||||
'htt://example8.com': 0,
|
||||
'/htt://example.com': 0,
|
||||
'https://example': 1,
|
||||
'https://localhost/2345': 1,
|
||||
'https://localhost:1234/123': 1,
|
||||
'://': 0,
|
||||
'https://': 0,
|
||||
'http://': 0,
|
||||
'ftp://': 0,
|
||||
'ftp://example.com': 0,
|
||||
'https://example.com': 1,
|
||||
'https://example.com/': 1,
|
||||
'https://a.example.com': 1,
|
||||
'https://a.example.com/': 1,
|
||||
'https://a.example.com/what/is/happening.html': 1,
|
||||
'https://a.example.com/what/ís/happening.html': 1,
|
||||
'https://a.example.com/what/is/happening.html?what=1&2%20b#höw-about-this=1a': 1,
|
||||
'https://a.example.com/what/is/happéning/?what=1&2%20b#how-aboüt-this=1a': 1,
|
||||
'HTtpS://a.example.com/what/is/happening/?what=1&2%20b#how-about-this=1af&2f%20b': 1,
|
||||
'https://example.com/?what=1#how-about-this=1&2%20baf': 1,
|
||||
'https://example.com?what=1#how-about-this=1&2%20baf': 1,
|
||||
'<test>http://example7.com</test>': 1,
|
||||
'https://<test>': 0,
|
||||
'https://[test]': 0,
|
||||
'http://"test"': 0,
|
||||
'http://\'test\'': 0,
|
||||
'[https://example8.com/what/is/this.php?what=1]': 1,
|
||||
'[and http://example9.com?what=1&other=3#and-thing=2]': 1,
|
||||
'<what>https://example10.com#and-thing=2 "</about>': 1,
|
||||
'abc<this["https://example11.com/what/is#and-thing=2?whoami=23&where=1"]that>def': 1,
|
||||
'sdflkf[what](https://example12.com/who/what.php?whoami=1#whatami=2)?am=hi': 1,
|
||||
'<or>http://examplehttp://15.badc</that>': 2,
|
||||
'https://a.example.com/one.html?url=http://example.com/inside/of/another?=http://': 2,
|
||||
'[https://a.example.com/one.html?url=http://example.com/inside/of/another?=](http://a.example.com)': 3,
|
||||
}
|
||||
for url_str, num_urls in _test_url_strs.items():
|
||||
assert len(list(find_all_urls(url_str))) == num_urls, (
|
||||
f'{url_str} does not contain {num_urls} urls')
|
||||
|
|
|
@ -18,7 +18,7 @@ which docker > /dev/null || exit 1
|
|||
which jq > /dev/null || exit 1
|
||||
# which pdm > /dev/null || exit 1
|
||||
|
||||
SUPPORTED_PLATFORMS="linux/amd64,linux/arm64,linux/arm/v7"
|
||||
SUPPORTED_PLATFORMS="linux/amd64,linux/arm64"
|
||||
|
||||
TAG_NAME="${1:-$(git rev-parse --abbrev-ref HEAD)}"
|
||||
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
|
||||
|
@ -80,20 +80,20 @@ echo "[+] Building archivebox:$VERSION docker image..."
|
|||
# docker build . --no-cache -t archivebox-dev \
|
||||
# replace --load with --push to deploy
|
||||
docker buildx build --platform "$SELECTED_PLATFORMS" --load . \
|
||||
-t archivebox/archivebox \
|
||||
# -t archivebox/archivebox \
|
||||
-t archivebox/archivebox:$TAG_NAME \
|
||||
-t archivebox/archivebox:$VERSION \
|
||||
-t archivebox/archivebox:$SHORT_VERSION \
|
||||
# -t archivebox/archivebox:$VERSION \
|
||||
# -t archivebox/archivebox:$SHORT_VERSION \
|
||||
-t archivebox/archivebox:$GIT_SHA \
|
||||
-t archivebox/archivebox:latest \
|
||||
-t nikisweeting/archivebox \
|
||||
# -t archivebox/archivebox:latest \
|
||||
# -t nikisweeting/archivebox \
|
||||
-t nikisweeting/archivebox:$TAG_NAME \
|
||||
-t nikisweeting/archivebox:$VERSION \
|
||||
-t nikisweeting/archivebox:$SHORT_VERSION \
|
||||
# -t nikisweeting/archivebox:$VERSION \
|
||||
# -t nikisweeting/archivebox:$SHORT_VERSION \
|
||||
-t nikisweeting/archivebox:$GIT_SHA \
|
||||
-t nikisweeting/archivebox:latest \
|
||||
# -t nikisweeting/archivebox:latest \
|
||||
-t ghcr.io/archivebox/archivebox/archivebox:$TAG_NAME \
|
||||
-t ghcr.io/archivebox/archivebox/archivebox:$VERSION \
|
||||
-t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION \
|
||||
# -t ghcr.io/archivebox/archivebox/archivebox:$VERSION \
|
||||
# -t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION \
|
||||
-t ghcr.io/archivebox/archivebox/archivebox:$GIT_SHA \
|
||||
-t ghcr.io/archivebox/archivebox/archivebox:latest
|
||||
# -t ghcr.io/archivebox/archivebox/archivebox:latest
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
# https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html
|
||||
# set -o xtrace
|
||||
# set -o nounset
|
||||
shopt -s nullglob
|
||||
set -o errexit
|
||||
set -o errtrace
|
||||
set -o pipefail
|
||||
|
|
|
@ -15,7 +15,7 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
|||
source "$DIR/.venv/bin/activate"
|
||||
|
||||
echo "[*] Running flake8..."
|
||||
cd archivebox
|
||||
cd "$DIR/archivebox"
|
||||
flake8 . && echo "√ No errors found."
|
||||
|
||||
echo
|
||||
|
|
|
@ -48,7 +48,7 @@ echo
|
|||
|
||||
echo "[+] Generating dev & prod requirements.txt & pdm.lock from pyproject.toml..."
|
||||
pip install --upgrade pip setuptools
|
||||
pdm self update
|
||||
pdm self update >/dev/null 2>&1 || true
|
||||
pdm venv create 3.12
|
||||
echo
|
||||
echo "pyproject.toml: archivebox $(grep 'version = ' pyproject.toml | awk '{print $3}' | jq -r)"
|
||||
|
@ -73,7 +73,7 @@ cp ./pdm.dev.lock ./pip_dist/
|
|||
cp ./requirements-dev.txt ./pip_dist/
|
||||
|
||||
echo
|
||||
echo "[+]] Generating package-lock.json from package.json..."
|
||||
echo "[+] Generating package-lock.json from package.json..."
|
||||
npm install -g npm
|
||||
echo
|
||||
echo "package.json: archivebox $(jq -r '.version' package.json)"
|
||||
|
|
|
@ -27,9 +27,9 @@ if (which docker-compose > /dev/null && docker pull archivebox/archivebox:latest
|
|||
if [ -f "./index.sqlite3" ]; then
|
||||
mv -i ~/archivebox/* ~/archivebox/data/
|
||||
fi
|
||||
curl -fsSL 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/main/docker-compose.yml' > docker-compose.yml
|
||||
curl -fsSL 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/stable/docker-compose.yml' > docker-compose.yml
|
||||
mkdir -p ./etc
|
||||
curl -fsSL 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/main/etc/sonic.cfg' > ./etc/sonic.cfg
|
||||
curl -fsSL 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/stable/etc/sonic.cfg' > ./etc/sonic.cfg
|
||||
docker compose run --rm archivebox init --setup
|
||||
echo
|
||||
echo "[+] Starting ArchiveBox server using: docker compose up -d..."
|
||||
|
|
|
@ -48,17 +48,17 @@ services:
|
|||
# $ docker compose restart archivebox_scheduler
|
||||
|
||||
archivebox_scheduler:
|
||||
image: archivebox/archivebox:latest
|
||||
command: schedule --foreground --update --every=day
|
||||
environment:
|
||||
- TIMEOUT=120 # use a higher timeout than the main container to give slow tasks more time when retrying
|
||||
# - PUID=502 # set to your host user's UID & GID if you encounter permissions issues
|
||||
# - PGID=20
|
||||
volumes:
|
||||
- ./data:/data
|
||||
# cpus: 2 # uncomment / edit these values to limit scheduler container resource consumption
|
||||
# mem_limit: 2048m
|
||||
# restart: always
|
||||
image: archivebox/archivebox:latest
|
||||
command: schedule --foreground --update --every=day
|
||||
environment:
|
||||
- TIMEOUT=120 # use a higher timeout than the main container to give slow tasks more time when retrying
|
||||
# - PUID=502 # set to your host user's UID & GID if you encounter permissions issues
|
||||
# - PGID=20
|
||||
volumes:
|
||||
- ./data:/data
|
||||
# cpus: 2 # uncomment / edit these values to limit scheduler container resource consumption
|
||||
# mem_limit: 2048m
|
||||
# restart: always
|
||||
|
||||
|
||||
### This runs the optional Sonic full-text search backend (much faster than default rg backend).
|
||||
|
@ -72,7 +72,7 @@ services:
|
|||
# not needed after first run / if you have already have ./etc/sonic.cfg present
|
||||
dockerfile_inline: |
|
||||
FROM quay.io/curl/curl:latest AS config_downloader
|
||||
RUN curl -fsSL 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/main/etc/sonic.cfg' > /tmp/sonic.cfg
|
||||
RUN curl -fsSL 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/stable/etc/sonic.cfg' > /tmp/sonic.cfg
|
||||
FROM valeriansaliou/sonic:latest
|
||||
COPY --from=config_downloader /tmp/sonic.cfg /etc/sonic.cfg
|
||||
expose:
|
||||
|
@ -99,7 +99,7 @@ services:
|
|||
# restricted to access from localhost by default because it has no authentication
|
||||
- 127.0.0.1:8080:8080
|
||||
|
||||
|
||||
|
||||
### Example: Put Nginx in front of the ArchiveBox server for SSL termination and static file serving.
|
||||
# You can also any other ingress provider for SSL like Apache, Caddy, Traefik, Cloudflare Tunnels, etc.
|
||||
|
||||
|
@ -173,7 +173,7 @@ services:
|
|||
|
||||
### Example: run all your ArchiveBox traffic through a WireGuard VPN tunnel to avoid IP blocks.
|
||||
# You can also use any other VPN that works at the docker IP level, e.g. Tailscale, OpenVPN, etc.
|
||||
|
||||
|
||||
# wireguard:
|
||||
# image: linuxserver/wireguard:latest
|
||||
# network_mode: 'service:archivebox'
|
||||
|
|
2
docs
2
docs
|
@ -1 +1 @@
|
|||
Subproject commit a1b69c51ba9b249c0b2a6efd141dbb792fc36ad2
|
||||
Subproject commit f23abba9773b67ad9f2fd04d6f2e8e056dfa6521
|
50
package-lock.json
generated
50
package-lock.json
generated
|
@ -25,9 +25,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@babel/runtime-corejs2": {
|
||||
"version": "7.24.4",
|
||||
"resolved": "https://registry.npmjs.org/@babel/runtime-corejs2/-/runtime-corejs2-7.24.4.tgz",
|
||||
"integrity": "sha512-ZCKqyUKt/Coimg+3Kafu43yNetgYnTXzNbEGAgxc81J5sI0qFNbQ613w7PNny+SmijAmGVroL0GDvx5rG/JI5Q==",
|
||||
"version": "7.24.5",
|
||||
"resolved": "https://registry.npmjs.org/@babel/runtime-corejs2/-/runtime-corejs2-7.24.5.tgz",
|
||||
"integrity": "sha512-cC9jiO6s/IN+xwCHYy1AGrcFJ4bwgIwb8HX1KaoEpRsznLlO4x9eBP6AX7RIeMSWlQqEj2WHox637OS8cDq6Ew==",
|
||||
"dependencies": {
|
||||
"core-js": "^2.6.12",
|
||||
"regenerator-runtime": "^0.14.0"
|
||||
|
@ -203,9 +203,9 @@
|
|||
"integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA=="
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "20.12.7",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.7.tgz",
|
||||
"integrity": "sha512-wq0cICSkRLVaf3UGLMGItu/PtdY7oaXaI/RVU+xliKVOtRna3PRY57ZDfztpDL0n11vfymMUnXv8QwYCO7L1wg==",
|
||||
"version": "20.12.8",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.8.tgz",
|
||||
"integrity": "sha512-NU0rJLJnshZWdE/097cdCBbyW1h4hEg0xpovcoAQYHl8dnEyp/NAOiE45pvc+Bd1Dt+2r94v2eGFpQJ4R7g+2w==",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"undici-types": "~5.26.4"
|
||||
|
@ -713,9 +713,9 @@
|
|||
"integrity": "sha512-3VdM/SXBZX2omc9JF9nOPCtDaYQ67BGp5CoLpIQlO2KCAPETs8TcDHacF26jXadGbvUteZzRTeos2fhID5+ucQ=="
|
||||
},
|
||||
"node_modules/dompurify": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.1.0.tgz",
|
||||
"integrity": "sha512-yoU4rhgPKCo+p5UrWWWNKiIq+ToGqmVVhk0PmMYBK4kRsR3/qhemNFL8f6CFmBd4gMwm3F4T7HBoydP5uY07fA=="
|
||||
"version": "3.1.2",
|
||||
"resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.1.2.tgz",
|
||||
"integrity": "sha512-hLGGBI1tw5N8qTELr3blKjAML/LY4ANxksbS612UiJyDfyf/2D092Pvm+S7pmeTGJRqvlJkFzBoHBQKgQlOQVg=="
|
||||
},
|
||||
"node_modules/domutils": {
|
||||
"version": "1.5.1",
|
||||
|
@ -1655,6 +1655,26 @@
|
|||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/puppeteer-core/node_modules/ws": {
|
||||
"version": "8.16.0",
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.16.0.tgz",
|
||||
"integrity": "sha512-HS0c//TP7Ina87TfiPUz1rQzMhHrl/SG2guqRcTOIUYD2q8uhUdNHZYJUaQ8aTGPzCh+c6oawMKW35nFl1dxyQ==",
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"bufferutil": "^4.0.1",
|
||||
"utf-8-validate": ">=5.0.2"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bufferutil": {
|
||||
"optional": true
|
||||
},
|
||||
"utf-8-validate": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/qs": {
|
||||
"version": "6.5.3",
|
||||
"resolved": "https://registry.npmjs.org/qs/-/qs-6.5.3.tgz",
|
||||
|
@ -2071,9 +2091,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/tough-cookie": {
|
||||
"version": "4.1.3",
|
||||
"resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-4.1.3.tgz",
|
||||
"integrity": "sha512-aX/y5pVRkfRnfmuX+OdbSdXvPe6ieKX/G2s7e98f4poJHnqH3281gDPm/metm6E/WRamfx7WC4HUqkWHfQHprw==",
|
||||
"version": "4.1.4",
|
||||
"resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-4.1.4.tgz",
|
||||
"integrity": "sha512-Loo5UUvLD9ScZ6jh8beX1T6sO1w2/MpCRpEP7V280GKMVUQ0Jzar2U3UJPsrdbziLEMMhu3Ujnq//rhiFuIeag==",
|
||||
"dependencies": {
|
||||
"psl": "^1.1.33",
|
||||
"punycode": "^2.1.1",
|
||||
|
@ -2276,9 +2296,9 @@
|
|||
"integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ=="
|
||||
},
|
||||
"node_modules/ws": {
|
||||
"version": "8.16.0",
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.16.0.tgz",
|
||||
"integrity": "sha512-HS0c//TP7Ina87TfiPUz1rQzMhHrl/SG2guqRcTOIUYD2q8uhUdNHZYJUaQ8aTGPzCh+c6oawMKW35nFl1dxyQ==",
|
||||
"version": "8.17.0",
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.17.0.tgz",
|
||||
"integrity": "sha512-uJq6108EgZMAl20KagGkzCKfMEjxmKvZHG7Tlq0Z6nOky7YF7aq4mOx6xK8TJ/i1LeK4Qus7INktacctDgY8Ow==",
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
},
|
||||
|
|
|
@ -12,32 +12,31 @@ readme = "README.md"
|
|||
# pdm install
|
||||
# pdm update --unconstrained
|
||||
dependencies = [
|
||||
# Last Bumped: 2024-04-25
|
||||
# Base Framework and Language Dependencies
|
||||
"setuptools>=69.5.1",
|
||||
"django>=4.2.0,<5.0",
|
||||
"django>=5.0.4,<6.0",
|
||||
"django-ninja>=1.1.0",
|
||||
"django-extensions>=3.2.3",
|
||||
"mypy-extensions>=1.0.0",
|
||||
|
||||
# Python Helper Libraries
|
||||
"requests>=2.31.0",
|
||||
"dateparser>=1.0.0",
|
||||
"feedparser>=6.0.11",
|
||||
"w3lib>=1.22.0",
|
||||
|
||||
"w3lib>=2.1.2",
|
||||
# Feature-Specific Dependencies
|
||||
"python-crontab>=2.5.1", # for: archivebox schedule
|
||||
"croniter>=0.3.34", # for: archivebox schedule
|
||||
"ipython>5.0.0", # for: archivebox shell
|
||||
|
||||
"python-crontab>=3.0.0", # for: archivebox schedule
|
||||
"croniter>=2.0.5", # for: archivebox schedule
|
||||
"ipython>=8.23.0", # for: archivebox shell
|
||||
# Extractor Dependencies
|
||||
"yt-dlp>=2024.4.9", # for: media
|
||||
"playwright>=1.43.0; platform_machine != 'armv7l'", # WARNING: playwright doesn't have any sdist, causes trouble on build systems that refuse to install wheel-only packages
|
||||
|
||||
# "playwright>=1.43.0; platform_machine != 'armv7l'", # WARNING: playwright doesn't have any sdist, causes trouble on build systems that refuse to install wheel-only packages
|
||||
# TODO: add more extractors
|
||||
# - gallery-dl
|
||||
# - scihubdl
|
||||
# - See Github issues for more...
|
||||
"django-signal-webhooks>=0.3.0",
|
||||
"django-admin-data-views>=0.3.1",
|
||||
]
|
||||
|
||||
homepage = "https://github.com/ArchiveBox/ArchiveBox"
|
||||
|
@ -59,9 +58,6 @@ classifiers = [
|
|||
"Natural Language :: English",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.7",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
|
@ -100,10 +96,10 @@ ldap = [
|
|||
# pdm update --dev --unconstrained
|
||||
[tool.pdm.dev-dependencies]
|
||||
build = [
|
||||
# "pdm", # usually installed by apt/brew, dont double-install with pip
|
||||
"setuptools>=69.5.1",
|
||||
"pip",
|
||||
"wheel",
|
||||
"pdm",
|
||||
"homebrew-pypi-poet>=0.10.0", # for: generating archivebox.rb brewfile list of python packages
|
||||
]
|
||||
docs = [
|
||||
|
@ -115,10 +111,11 @@ debug = [
|
|||
"django-debug-toolbar",
|
||||
"djdt_flamegraph",
|
||||
"ipdb",
|
||||
"requests-tracker>=0.3.3",
|
||||
]
|
||||
test = [
|
||||
"pdm[pytest]",
|
||||
"pytest",
|
||||
"bottle",
|
||||
]
|
||||
lint = [
|
||||
"flake8",
|
||||
|
@ -126,6 +123,12 @@ lint = [
|
|||
"django-stubs",
|
||||
]
|
||||
|
||||
[tool.pdm.scripts]
|
||||
lint = "./bin/lint.sh"
|
||||
test = "./bin/test.sh"
|
||||
# all = {composite = ["lint mypackage/", "test -v tests/"]}
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["pdm-backend"]
|
||||
build-backend = "pdm.backend"
|
||||
|
@ -134,11 +137,6 @@ build-backend = "pdm.backend"
|
|||
archivebox = "archivebox.cli:main"
|
||||
|
||||
|
||||
[tool.pdm.scripts]
|
||||
lint = "./bin/lint.sh"
|
||||
test = "./bin/test.sh"
|
||||
# all = {composite = ["lint mypackage/", "test -v tests/"]}
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = [ "tests" ]
|
||||
|
||||
|
@ -154,6 +152,8 @@ explicit_package_bases = true
|
|||
# exclude = "pdm/(pep582/|models/in_process/.+\\.py)"
|
||||
plugins = ["mypy_django_plugin.main"]
|
||||
|
||||
[tool.django-stubs]
|
||||
django_settings_module = "core.settings"
|
||||
|
||||
|
||||
[project.urls]
|
||||
|
|
|
@ -2,54 +2,59 @@
|
|||
# Please do not edit it manually.
|
||||
|
||||
annotated-types==0.6.0
|
||||
anyio==4.3.0
|
||||
asgiref==3.8.1
|
||||
asttokens==2.4.1
|
||||
brotli==1.1.0; implementation_name == "cpython"
|
||||
brotlicffi==1.1.0.0; implementation_name != "cpython"
|
||||
certifi==2024.2.2
|
||||
cffi==1.16.0; implementation_name != "cpython"
|
||||
cffi==1.16.0; platform_python_implementation != "PyPy" or implementation_name != "cpython"
|
||||
charset-normalizer==3.3.2
|
||||
colorama==0.4.6; sys_platform == "win32"
|
||||
croniter==2.0.5
|
||||
cryptography==42.0.7
|
||||
dateparser==1.2.0
|
||||
decorator==5.1.1
|
||||
django==4.2.11
|
||||
django==5.0.4
|
||||
django-auth-ldap==4.8.0
|
||||
django-extensions==3.2.3
|
||||
django-ninja==1.1.0
|
||||
django-settings-holder==0.1.2
|
||||
django-signal-webhooks==0.3.0
|
||||
exceptiongroup==1.2.1; python_version < "3.11"
|
||||
executing==2.0.1
|
||||
feedparser==6.0.11
|
||||
greenlet==3.0.3; platform_machine != "armv7l"
|
||||
h11==0.14.0
|
||||
httpcore==1.0.5
|
||||
httpx==0.27.0
|
||||
idna==3.7
|
||||
ipython==8.23.0
|
||||
ipython==8.24.0
|
||||
jedi==0.19.1
|
||||
matplotlib-inline==0.1.7
|
||||
mutagen==1.47.0
|
||||
mypy-extensions==1.0.0
|
||||
parso==0.8.4
|
||||
pexpect==4.9.0; sys_platform != "win32" and sys_platform != "emscripten"
|
||||
playwright==1.43.0; platform_machine != "armv7l"
|
||||
prompt-toolkit==3.0.43
|
||||
ptyprocess==0.7.0; sys_platform != "win32" and sys_platform != "emscripten"
|
||||
pure-eval==0.2.2
|
||||
pyasn1==0.6.0
|
||||
pyasn1-modules==0.4.0
|
||||
pycparser==2.22; implementation_name != "cpython"
|
||||
pycparser==2.22; platform_python_implementation != "PyPy" or implementation_name != "cpython"
|
||||
pycryptodomex==3.20.0
|
||||
pydantic==2.7.1
|
||||
pydantic-core==2.18.2
|
||||
pyee==11.1.0; platform_machine != "armv7l"
|
||||
pygments==2.17.2
|
||||
pygments==2.18.0
|
||||
python-crontab==3.0.0
|
||||
python-dateutil==2.9.0.post0
|
||||
python-ldap==3.4.4
|
||||
pytz==2024.1
|
||||
regex==2024.4.16
|
||||
regex==2024.4.28
|
||||
requests==2.31.0
|
||||
setuptools==69.5.1
|
||||
sgmllib3k==1.0.0
|
||||
six==1.16.0
|
||||
sniffio==1.3.1
|
||||
sonic-client==1.0.0
|
||||
sqlparse==0.5.0
|
||||
stack-data==0.6.3
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue