diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index 25aa5316..9ff04c51 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -75,7 +75,7 @@ jobs:
tags: ${{ steps.docker_meta.outputs.tags }}
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
- platforms: linux/amd64,linux/386,linux/arm64,linux/arm/v7
+ platforms: linux/amd64,linux/arm64,linux/arm/v7
- name: Image digest
run: echo ${{ steps.docker_build.outputs.digest }}
diff --git a/Dockerfile b/Dockerfile
index 507ee6ac..8cf2da30 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -63,6 +63,7 @@ RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add -
&& apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
nodejs \
+ # && npm install -g npm \
&& rm -rf /var/lib/apt/lists/*
# Install Node dependencies
@@ -82,6 +83,7 @@ ADD ./pip_dist/archivebox.egg-info/requires.txt "$CODE_DIR/pip_dist/archivebox.e
RUN apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
build-essential python-dev python3-dev \
+ # && pip install --upgrade pip \
&& grep -B 1000 -E '^$' "$CODE_DIR/pip_dist/archivebox.egg-info/requires.txt" | pip install --quiet -r /dev/stdin \
&& pip install --quiet "sonic-client==0.0.5" \
&& apt-get purge -y build-essential python-dev python3-dev \
diff --git a/MANIFEST.in b/MANIFEST.in
index c9ae1535..f33f160f 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,3 +2,5 @@ graft archivebox
global-exclude .DS_Store
global-exclude __pycache__
global-exclude *.pyc
+
+prune tests/
diff --git a/README.md b/README.md
index a83922a3..76b51be3 100644
--- a/README.md
+++ b/README.md
@@ -82,8 +82,9 @@ archivebox help
. . . . . . . . . . . . . . . . . . . . . . . . . . . .
+
-
+

@@ -266,10 +267,7 @@ No matter which install method you choose, they all roughly follow this 3-step p
-

-

-

-
+
. . . . . . . . . . . . . . . . . . . . . . . . . . . .
DEMO: https://archivebox.zervice.io
@@ -327,7 +325,15 @@ All of ArchiveBox's state (including the index, snapshot data, and config file)
The on-disk layout is optimized to be easy to browse by hand and durable long-term. The main index is a standard sqlite3 database (it can also be exported as static JSON/HTML), and the archive snapshots are organized by date-added timestamp in the `archive/` subfolder. Each snapshot subfolder includes a static JSON and HTML index describing its contents, and the snapshot extrator outputs are plain files within the folder (e.g. `media/example.mp4`, `git/somerepo.git`, `static/someimage.png`, etc.)
```bash
- ls ./archive/
/
+# to browse your index statically without running the archivebox server, run:
+archivebox list --html --with-headers > index.html
+archivebox list --json --with-headers > index.json
+
+# then open the static index in a browser
+open index.html
+
+# or browse the snapshots via filesystem directly
+ls ./archive//
```
- **Index:** `index.html` & `index.json` HTML and JSON index files containing metadata and details
@@ -346,6 +352,12 @@ The on-disk layout is optimized to be easy to browse by hand and durable long-te
It does everything out-of-the-box by default, but you can disable or tweak [individual archive methods](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration) via environment variables or config file.
+```bash
+archivebox config --set SAVE_ARCHIVE_DOT_ORG=False
+archivebox config --set YOUTUBEDL_ARGS='--max-filesize=500m'
+archivebox config --help
+```
+
@@ -445,7 +457,7 @@ archivebox add 'https://example.com#2020-10-25'
-
+
|
diff --git a/archivebox/config.py b/archivebox/config.py
index 349817ec..3d48344f 100644
--- a/archivebox/config.py
+++ b/archivebox/config.py
@@ -1079,6 +1079,6 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
if check_db:
sql_index_path = Path(output_dir) / SQL_INDEX_FILENAME
assert sql_index_path.exists(), (
- f'No database file {SQL_INDEX_FILENAME} found in OUTPUT_DIR: {config["OUTPUT_DIR"]}')
+ f'No database file {SQL_INDEX_FILENAME} found in: {config["OUTPUT_DIR"]} (Are you in an ArchiveBox collection directory?)')
except KeyboardInterrupt:
raise SystemExit(2)
diff --git a/archivebox/core/migrations/0007_archiveresult.py b/archivebox/core/migrations/0007_archiveresult.py
index ec48d3ff..29b269f6 100644
--- a/archivebox/core/migrations/0007_archiveresult.py
+++ b/archivebox/core/migrations/0007_archiveresult.py
@@ -36,8 +36,25 @@ def forwards_func(apps, schema_editor):
for extractor in history:
for result in history[extractor]:
- ArchiveResult.objects.create(extractor=extractor, snapshot=snapshot, cmd=result["cmd"], cmd_version=result["cmd_version"] or 'unknown',
- start_ts=result["start_ts"], end_ts=result["end_ts"], status=result["status"], pwd=result["pwd"], output=result["output"])
+ try:
+ ArchiveResult.objects.create(
+ extractor=extractor,
+ snapshot=snapshot,
+ pwd=result["pwd"],
+ cmd=result.get("cmd") or [],
+ cmd_version=result.get("cmd_version") or 'unknown',
+ start_ts=result["start_ts"],
+ end_ts=result["end_ts"],
+ status=result["status"],
+ output=result.get("output") or 'null',
+ )
+ except Exception as e:
+ print(
+ ' ! Skipping import due to missing/invalid index.json:',
+ out_dir,
+ e,
+ '(open an issue with this index.json for help)',
+ )
def verify_json_index_integrity(snapshot):
diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index 7501da3a..1ca4e801 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -55,11 +55,11 @@ class ArchiveResult:
assert isinstance(self.end_ts, datetime)
assert isinstance(self.cmd, list)
assert all(isinstance(arg, str) and arg for arg in self.cmd)
- assert self.pwd is None or isinstance(self.pwd, str) and self.pwd
- assert self.cmd_version is None or isinstance(self.cmd_version, str) and self.cmd_version
+
+ # TODO: replace emptystrings in these three with None / remove them from the DB
+ assert self.pwd is None or isinstance(self.pwd, str)
+ assert self.cmd_version is None or isinstance(self.cmd_version, str)
assert self.output is None or isinstance(self.output, (str, Exception))
- if isinstance(self.output, str):
- assert self.output
@classmethod
def guess_ts(_cls, dict_info):
diff --git a/bin/build_deb.sh b/bin/build_deb.sh
index b9279369..8c5c7fcf 100755
--- a/bin/build_deb.sh
+++ b/bin/build_deb.sh
@@ -10,14 +10,6 @@ set -o nounset
set -o pipefail
IFS=$'\n'
-REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
-
-if [[ -f "$REPO_DIR/.venv/bin/activate" ]]; then
- source "$REPO_DIR/.venv/bin/activate"
-else
- echo "[!] Warning: No virtualenv presesnt in $REPO_DIR.venv"
-fi
-cd "$REPO_DIR"
CURRENT_PLAFORM="$(uname)"
REQUIRED_PLATFORM="Linux"
@@ -26,30 +18,27 @@ if [[ "$CURRENT_PLAFORM" != "$REQUIRED_PLATFORM" ]]; then
exit 0
fi
-VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
-DEBIAN_VERSION="1"
-PGP_KEY_ID="7D5695D3B618872647861D51C38137A7C1675988"
-# make sure you have this in ~/.dput.cf:
-# [archivebox-ppa]
-# fqdn: ppa.launchpad.net
-# method: ftp
-# incoming: ~archivebox/ubuntu/archivebox/
-# login: anonymous
-# allow_unsigned_uploads: 0
+REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
+VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
+DEBIAN_VERSION="${DEBIAN_VERSION:-1}"
+cd "$REPO_DIR"
+
+
+if [[ -f "$REPO_DIR/.venv/bin/activate" ]]; then
+ source "$REPO_DIR/.venv/bin/activate"
+else
+ echo "[!] Warning: No virtualenv presesnt in $REPO_DIR.venv"
+fi
# cleanup build artifacts
rm -Rf build deb_dist dist archivebox-*.tar.gz
-# make sure the stdeb.cfg file is up-to-date with all the dependencies
# build source and binary packages
+# make sure the stdeb.cfg file is up-to-date with all the dependencies
python3 setup.py --command-packages=stdeb.command \
sdist_dsc --debian-version=$DEBIAN_VERSION \
bdist_deb
-# sign the build with your PGP key ID
-debsign -k "$PGP_KEY_ID" "deb_dist/archivebox_${VERSION}-${DEBIAN_VERSION}_source.changes"
-
-# push the build to launchpad ppa
-# dput archivebox "deb_dist/archivebox_${VERSION}-${DEBIAN_VERSION}_source.changes"
+# should output deb_dist/archivebox_0.5.4-1.{deb,changes,buildinfo,tar.gz}
diff --git a/bin/release_deb.sh b/bin/release_deb.sh
index dc1bff35..a470c4f3 100755
--- a/bin/release_deb.sh
+++ b/bin/release_deb.sh
@@ -10,11 +10,41 @@ set -o nounset
set -o pipefail
IFS=$'\n'
+
+CURRENT_PLAFORM="$(uname)"
+REQUIRED_PLATFORM="Linux"
+if [[ "$CURRENT_PLAFORM" != "$REQUIRED_PLATFORM" ]]; then
+ echo "[!] Skipping the Debian package build on $CURRENT_PLAFORM (it can only be run on $REQUIRED_PLATFORM)."
+ exit 0
+fi
+
+
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
-SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
+DEBIAN_VERSION="${DEBIAN_VERSION:-1}"
cd "$REPO_DIR"
+echo "[+] Loading PGP keys from env vars and filesystem..."
+# https://github.com/ArchiveBox/debian-archivebox/settings/secrets/actions
+PGP_KEY_ID="${PGP_KEY_ID:-BC2D21B0D84E16C437300B8652423FBED1586F45}"
+[[ "${PGP_PUBLIC_KEY:-}" ]] && echo "$PGP_PUBLIC_KEY" > /tmp/archivebox_gpg.key.pub
+[[ "${PGP_PRIVATE_KEY:-}" ]] && echo "$PGP_PRIVATE_KEY" > /tmp/archivebox_gpg.key
+gpg --import /tmp/archivebox_gpg.key.pub || true
+gpg --import --allow-secret-key-import /tmp/archivebox_gpg.key || true
+echo "$PGP_KEY_ID:6:" | gpg --import-ownertrust || true
+
+echo "[*] Signing build and changelog with PGP..."
+debsign --re-sign -k "$PGP_KEY_ID" "deb_dist/archivebox_${VERSION}-${DEBIAN_VERSION}_source.changes"
+
+# make sure you have this in ~/.dput.cf:
+# [archivebox-ppa]
+# fqdn: ppa.launchpad.net
+# method: ftp
+# incoming: ~archivebox/ubuntu/archivebox/
+# login: anonymous
+# allow_unsigned_uploads: 0
+
+
echo "[^] Uploading to launchpad.net"
-dput archivebox "deb_dist/archivebox_${VERSION}-1_source.changes"
+dput -f archivebox "deb_dist/archivebox_${VERSION}-${DEBIAN_VERSION}_source.changes"
diff --git a/bin/release_docker.sh b/bin/release_docker.sh
index 344a456d..80353808 100755
--- a/bin/release_docker.sh
+++ b/bin/release_docker.sh
@@ -19,6 +19,7 @@ cd "$REPO_DIR"
echo "[^] Uploading docker image"
# docker login --username=nikisweeting
# docker login docker.pkg.github.com --username=pirate
+docker push archivebox/archivebox:$VERSION archivebox/archivebox:$SHORT_VERSION archivebox/archivebox:latest
docker push docker.io/nikisweeting/archivebox
docker push docker.io/archivebox/archivebox
docker push docker.pkg.github.com/archivebox/archivebox/archivebox
diff --git a/package.json b/package.json
index 29d73765..02247f7d 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "archivebox",
- "version": "0.5.4",
+ "version": "0.5.6",
"description": "ArchiveBox: The self-hosted internet archive",
"author": "Nick Sweeting ",
"license": "MIT",
diff --git a/setup.py b/setup.py
index 692e5850..962db8d8 100755
--- a/setup.py
+++ b/setup.py
@@ -33,11 +33,10 @@ VERSION = json.loads((PACKAGE_DIR / "package.json").read_text().strip())['versio
# print('>', sys.executable, *sys.argv)
-class CustomTest(test):
+class DisabledTestCommand(test):
def run(self):
# setup.py test is deprecated, disable it here by force so stdeb doesnt run it
- #super().run()
- pass
+ print('Use the ./bin/test.sh script to run tests, not setup.py test.')
setuptools.setup(
@@ -129,6 +128,6 @@ setuptools.setup(
"Typing :: Typed",
],
cmdclass={
- "test": CustomTest,
+ "test": DisabledTestCommand,
},
)
diff --git a/stdeb.cfg b/stdeb.cfg
index a07147e2..cd191a42 100644
--- a/stdeb.cfg
+++ b/stdeb.cfg
@@ -7,3 +7,4 @@ Suite3: focal
Build-Depends: dh-python, python3-pip, python3-setuptools, python3-wheel, python3-stdeb
Depends3: nodejs, chromium-browser, wget, curl, git, ffmpeg, youtube-dl, python3-atomicwrites, python3-croniter, python3-crontab, python3-dateparser, python3-django, python3-django-extensions, python3-django-jsonfield, python3-mypy-extensions, python3-requests, python3-w3lib, ripgrep
XS-Python-Version: >= 3.7
+Setup-Env-Vars: DEB_BUILD_OPTIONS=nocheck