mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 06:34:25 -04:00
Merge branch 'dev' into version-banner
This commit is contained in:
commit
14f10a0461
20 changed files with 2055 additions and 1063 deletions
|
@ -16,6 +16,7 @@ venv/
|
||||||
.docker-venv/
|
.docker-venv/
|
||||||
node_modules/
|
node_modules/
|
||||||
|
|
||||||
|
docs/
|
||||||
build/
|
build/
|
||||||
dist/
|
dist/
|
||||||
brew_dist/
|
brew_dist/
|
||||||
|
|
19
.github/workflows/docker.yml
vendored
19
.github/workflows/docker.yml
vendored
|
@ -11,8 +11,7 @@ on:
|
||||||
|
|
||||||
env:
|
env:
|
||||||
DOCKER_IMAGE: archivebox-ci
|
DOCKER_IMAGE: archivebox-ci
|
||||||
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
buildx:
|
buildx:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
@ -60,13 +59,11 @@ jobs:
|
||||||
uses: docker/metadata-action@v5
|
uses: docker/metadata-action@v5
|
||||||
with:
|
with:
|
||||||
images: archivebox/archivebox,nikisweeting/archivebox
|
images: archivebox/archivebox,nikisweeting/archivebox
|
||||||
flavor: |
|
|
||||||
latest=auto
|
|
||||||
tags: |
|
tags: |
|
||||||
type=ref,event=branch
|
type=ref,event=branch
|
||||||
type=semver,pattern={{version}}
|
type=semver,pattern={{version}}
|
||||||
type=semver,pattern={{major}}.{{minor}}
|
type=semver,pattern={{major}}.{{minor}}
|
||||||
type=sha
|
type=raw,value=latest,enable={{is_default_branch}}
|
||||||
|
|
||||||
- name: Build and push
|
- name: Build and push
|
||||||
id: docker_build
|
id: docker_build
|
||||||
|
@ -78,8 +75,18 @@ jobs:
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: ${{ steps.docker_meta.outputs.tags }}
|
tags: ${{ steps.docker_meta.outputs.tags }}
|
||||||
cache-from: type=local,src=/tmp/.buildx-cache
|
cache-from: type=local,src=/tmp/.buildx-cache
|
||||||
cache-to: type=local,dest=/tmp/.buildx-cache
|
cache-to: type=local,dest=/tmp/.buildx-cache-new
|
||||||
platforms: linux/amd64,linux/arm64,linux/arm/v7
|
platforms: linux/amd64,linux/arm64,linux/arm/v7
|
||||||
|
|
||||||
- name: Image digest
|
- name: Image digest
|
||||||
run: echo ${{ steps.docker_build.outputs.digest }}
|
run: echo ${{ steps.docker_build.outputs.digest }}
|
||||||
|
|
||||||
|
# This ugly bit is necessary if you don't want your cache to grow forever
|
||||||
|
# until it hits GitHub's limit of 5GB.
|
||||||
|
# Temp fix
|
||||||
|
# https://github.com/docker/build-push-action/issues/252
|
||||||
|
# https://github.com/moby/buildkit/issues/1896
|
||||||
|
- name: Move cache
|
||||||
|
run: |
|
||||||
|
rm -rf /tmp/.buildx-cache
|
||||||
|
mv /tmp/.buildx-cache-new /tmp/.buildx-cache
|
||||||
|
|
24
Dockerfile
24
Dockerfile
|
@ -73,7 +73,8 @@ COPY --chown=root:root --chmod=755 package.json "$CODE_DIR/"
|
||||||
RUN grep '"version": ' "${CODE_DIR}/package.json" | awk -F'"' '{print $4}' > /VERSION.txt
|
RUN grep '"version": ' "${CODE_DIR}/package.json" | awk -F'"' '{print $4}' > /VERSION.txt
|
||||||
|
|
||||||
# Force apt to leave downloaded binaries in /var/cache/apt (massively speeds up Docker builds)
|
# Force apt to leave downloaded binaries in /var/cache/apt (massively speeds up Docker builds)
|
||||||
RUN rm -f /etc/apt/apt.conf.d/docker-clean; echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
|
RUN echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache \
|
||||||
|
&& rm -f /etc/apt/apt.conf.d/docker-clean
|
||||||
|
|
||||||
# Print debug info about build and save it to disk, for human eyes only, not used by anything else
|
# Print debug info about build and save it to disk, for human eyes only, not used by anything else
|
||||||
RUN (echo "[i] Docker build for ArchiveBox $(cat /VERSION.txt) starting..." \
|
RUN (echo "[i] Docker build for ArchiveBox $(cat /VERSION.txt) starting..." \
|
||||||
|
@ -123,7 +124,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
|
||||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.npm,sharing=locked,id=npm-$TARGETARCH$TARGETVARIANT \
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.npm,sharing=locked,id=npm-$TARGETARCH$TARGETVARIANT \
|
||||||
echo "[+] Installing Node $NODE_VERSION environment in $NODE_MODULES..." \
|
echo "[+] Installing Node $NODE_VERSION environment in $NODE_MODULES..." \
|
||||||
&& echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_VERSION}.x nodistro main" >> /etc/apt/sources.list.d/nodejs.list \
|
&& echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_VERSION}.x nodistro main" >> /etc/apt/sources.list.d/nodejs.list \
|
||||||
&& curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
|
&& curl -fsSL "https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key" | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
|
||||||
&& apt-get update -qq \
|
&& apt-get update -qq \
|
||||||
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
||||||
nodejs libatomic1 python3-minimal \
|
nodejs libatomic1 python3-minimal \
|
||||||
|
@ -202,7 +203,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
|
||||||
&& chown -R $ARCHIVEBOX_USER "$PLAYWRIGHT_BROWSERS_PATH" \
|
&& chown -R $ARCHIVEBOX_USER "$PLAYWRIGHT_BROWSERS_PATH" \
|
||||||
# Save version info
|
# Save version info
|
||||||
&& ( \
|
&& ( \
|
||||||
which chromium-browser && /usr/bin/chromium-browser --version \
|
which chromium-browser && /usr/bin/chromium-browser --version || /usr/lib/chromium/chromium --version \
|
||||||
&& echo -e '\n\n' \
|
&& echo -e '\n\n' \
|
||||||
) | tee -a /VERSION.txt
|
) | tee -a /VERSION.txt
|
||||||
|
|
||||||
|
@ -246,15 +247,15 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
|
||||||
COPY --chown=root:root --chmod=755 "." "$CODE_DIR/"
|
COPY --chown=root:root --chmod=755 "." "$CODE_DIR/"
|
||||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
|
||||||
echo "[*] Installing PIP ArchiveBox package from $CODE_DIR..." \
|
echo "[*] Installing PIP ArchiveBox package from $CODE_DIR..." \
|
||||||
&& apt-get update -qq \
|
# && apt-get update -qq \
|
||||||
# install C compiler to build deps on platforms that dont have 32-bit wheels available on pypi
|
# install C compiler to build deps on platforms that dont have 32-bit wheels available on pypi
|
||||||
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
# && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
||||||
build-essential \
|
# build-essential \
|
||||||
# INSTALL ARCHIVEBOX python package globally from CODE_DIR, with all optional dependencies
|
# INSTALL ARCHIVEBOX python package globally from CODE_DIR, with all optional dependencies
|
||||||
&& pip install -e "$CODE_DIR"[sonic,ldap] \
|
&& pip install -e "$CODE_DIR"[sonic,ldap] \
|
||||||
# save docker image size and always remove compilers / build tools after building is complete
|
# save docker image size and always remove compilers / build tools after building is complete
|
||||||
&& apt-get purge -y build-essential \
|
# && apt-get purge -y build-essential \
|
||||||
&& apt-get autoremove -y \
|
# && apt-get autoremove -y \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
####################################################
|
####################################################
|
||||||
|
@ -276,11 +277,10 @@ ENV IN_DOCKER=True
|
||||||
|
|
||||||
# Print version for nice docker finish summary
|
# Print version for nice docker finish summary
|
||||||
RUN (echo -e "\n\n[√] Finished Docker build succesfully. Saving build summary in: /VERSION.txt" \
|
RUN (echo -e "\n\n[√] Finished Docker build succesfully. Saving build summary in: /VERSION.txt" \
|
||||||
&& echo -e "PLATFORM=${TARGETPLATFORM} ARCH=$(uname -m) ($(uname -s) ${TARGETARCH} ${TARGETVARIANT})" \
|
&& echo -e "PLATFORM=${TARGETPLATFORM} ARCH=$(uname -m) ($(uname -s) ${TARGETARCH} ${TARGETVARIANT})\n" \
|
||||||
&& echo -e "BUILD_END_TIME=$(date +"%Y-%m-%d %H:%M:%S %s") TZ=${TZ}\n\n" \
|
&& echo -e "BUILD_END_TIME=$(date +"%Y-%m-%d %H:%M:%S %s")\n\n" \
|
||||||
&& "$CODE_DIR/bin/docker_entrypoint.sh" \
|
|
||||||
archivebox version 2>&1 \
|
|
||||||
) | tee -a /VERSION.txt
|
) | tee -a /VERSION.txt
|
||||||
|
RUN "$CODE_DIR"/bin/docker_entrypoint.sh version 2>&1 | tee -a /VERSION.txt
|
||||||
|
|
||||||
####################################################
|
####################################################
|
||||||
|
|
||||||
|
|
|
@ -54,6 +54,7 @@ from .config_stubs import (
|
||||||
|
|
||||||
### Pre-Fetch Minimal System Config
|
### Pre-Fetch Minimal System Config
|
||||||
|
|
||||||
|
TIMEZONE = 'UTC'
|
||||||
SYSTEM_USER = getpass.getuser() or os.getlogin()
|
SYSTEM_USER = getpass.getuser() or os.getlogin()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -82,7 +83,6 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
||||||
'IN_QEMU': {'type': bool, 'default': False},
|
'IN_QEMU': {'type': bool, 'default': False},
|
||||||
'PUID': {'type': int, 'default': os.getuid()},
|
'PUID': {'type': int, 'default': os.getuid()},
|
||||||
'PGID': {'type': int, 'default': os.getgid()},
|
'PGID': {'type': int, 'default': os.getgid()},
|
||||||
# TODO: 'SHOW_HINTS': {'type: bool, 'default': True},
|
|
||||||
},
|
},
|
||||||
|
|
||||||
'GENERAL_CONFIG': {
|
'GENERAL_CONFIG': {
|
||||||
|
@ -377,7 +377,7 @@ ALLOWED_IN_OUTPUT_DIR = {
|
||||||
'static_index.json',
|
'static_index.json',
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_version(config):
|
def get_version(config) -> str:
|
||||||
try:
|
try:
|
||||||
return importlib.metadata.version(__package__ or 'archivebox')
|
return importlib.metadata.version(__package__ or 'archivebox')
|
||||||
except importlib.metadata.PackageNotFoundError:
|
except importlib.metadata.PackageNotFoundError:
|
||||||
|
@ -392,58 +392,76 @@ def get_version(config):
|
||||||
|
|
||||||
raise Exception('Failed to detect installed archivebox version!')
|
raise Exception('Failed to detect installed archivebox version!')
|
||||||
|
|
||||||
def get_commit_hash(config):
|
def get_commit_hash(config) -> Optional[str]:
|
||||||
|
try:
|
||||||
|
git_dir = config['PACKAGE_DIR'] / '../'
|
||||||
|
ref = (git_dir / 'HEAD').read_text().strip().split(' ')[-1]
|
||||||
|
commit_hash = git_dir.joinpath(ref).read_text().strip()
|
||||||
|
return commit_hash
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return list((config['PACKAGE_DIR'] / '../.git/refs/heads/').glob('*'))[0].read_text().strip()
|
return list((config['PACKAGE_DIR'] / '../.git/refs/heads/').glob('*'))[0].read_text().strip()
|
||||||
except Exception:
|
except Exception:
|
||||||
return None
|
pass
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
def get_version_releases(config):
|
def get_build_time(config) -> str:
|
||||||
|
if config['IN_DOCKER']:
|
||||||
|
docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0]
|
||||||
|
return docker_build_end_time
|
||||||
|
|
||||||
|
src_last_modified_unix_timestamp = (config['PACKAGE_DIR'] / 'config.py').stat().st_mtime
|
||||||
|
return datetime.fromtimestamp(src_last_modified_unix_timestamp).strftime('%Y-%m-%d %H:%M:%S %s')
|
||||||
|
|
||||||
|
def get_versions_available_on_github(config):
|
||||||
"""
|
"""
|
||||||
returns a dictionary containing the GitHub release data for
|
returns a dictionary containing the ArchiveBox GitHub release info for
|
||||||
the recommended upgrade version and the currently installed version
|
the recommended upgrade version and the currently installed version
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# we only want to perform the (relatively expensive) check for new versions
|
||||||
|
# when its most relevant, e.g. when the user runs a long-running command
|
||||||
|
subcommand_run_by_user = sys.argv[3]
|
||||||
|
long_running_commands = ('add', 'schedule', 'update', 'status', 'server')
|
||||||
|
if subcommand_run_by_user not in long_running_commands:
|
||||||
|
return None
|
||||||
|
|
||||||
github_releases_api = "https://api.github.com/repos/ArchiveBox/ArchiveBox/releases"
|
github_releases_api = "https://api.github.com/repos/ArchiveBox/ArchiveBox/releases"
|
||||||
response = requests.get(github_releases_api)
|
response = requests.get(github_releases_api)
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
stderr('Failed to get release data from GitHub', color='lightyellow', config=config)
|
stderr(f'[!] Warning: GitHub API call to check for new ArchiveBox version failed! (status={response.status_code})', color='lightyellow', config=config)
|
||||||
return None
|
return None
|
||||||
|
all_releases = response.json()
|
||||||
|
|
||||||
releases = response.json()
|
installed_version = parse_version_string(config['VERSION'])
|
||||||
installed_version = config['VERSION']
|
|
||||||
installed_version_parts = parse_tag_name(installed_version)
|
|
||||||
|
|
||||||
# find current version or nearest older version (to link to)
|
# find current version or nearest older version (to link to)
|
||||||
current_version = None
|
current_version = None
|
||||||
for release in releases:
|
for idx, release in enumerate(all_releases):
|
||||||
release_parts = parse_tag_name(release["tag_name"])
|
release_version = parse_version_string(release["tag_name"])
|
||||||
if release_parts <= installed_version_parts :
|
if release_version <= installed_version:
|
||||||
current_version = release
|
current_version = release
|
||||||
break
|
break
|
||||||
|
|
||||||
current_version = current_version if current_version else releases[-1]
|
current_version = current_version or releases[-1]
|
||||||
|
|
||||||
|
# recommended version is whatever comes after current_version in the release list
|
||||||
|
# (perhaps too conservative to only recommend upgrading one version at a time, but it's safest)
|
||||||
|
try:
|
||||||
|
recommended_version = all_releases[idx+1]
|
||||||
|
except IndexError:
|
||||||
|
recommended_version = None
|
||||||
|
|
||||||
# find upgrade version
|
return {"recommended_version": recommended_version, "current_version": current_version}
|
||||||
upgrade_version = None
|
|
||||||
smallest_version_diff = parse_tag_name(releases[0]["tag_name"])[1]
|
|
||||||
for release in releases:
|
|
||||||
release_parts = parse_tag_name(release["tag_name"])
|
|
||||||
major_version_diff = release_parts[1] - installed_version_parts[1]
|
|
||||||
if major_version_diff < smallest_version_diff:
|
|
||||||
smallest_version_diff = major_version_diff
|
|
||||||
if smallest_version_diff < 1:
|
|
||||||
break
|
|
||||||
upgrade_version = release
|
|
||||||
|
|
||||||
upgrade_version = upgrade_version if upgrade_version else releases[0]
|
|
||||||
|
|
||||||
return {"upgrade_version": upgrade_version, "current_version": current_version}
|
|
||||||
|
|
||||||
def can_upgrade(config):
|
def can_upgrade(config):
|
||||||
if config['VERSION_RELEASES']:
|
if config['VERSIONS_AVAILABLE'] and config['VERSIONS_AVAILABLE']['recommended_version']:
|
||||||
upgrade_version = parse_tag_name(config['VERSION_RELEASES']['upgrade_version']['tag_name'])
|
recommended_version = parse_version_string(config['VERSIONS_AVAILABLE']['recommended_version']['tag_name'])
|
||||||
current_version = parse_tag_name(config['VERSION_RELEASES']['current_version']['tag_name'])
|
current_version = parse_version_string(config['VERSIONS_AVAILABLE']['current_version']['tag_name'])
|
||||||
return upgrade_version > current_version
|
return recommended_version > current_version
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
@ -473,11 +491,14 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
||||||
'DIR_OUTPUT_PERMISSIONS': {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')},
|
'DIR_OUTPUT_PERMISSIONS': {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')},
|
||||||
|
|
||||||
'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0] or bin_path('archivebox')},
|
'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0] or bin_path('archivebox')},
|
||||||
'VERSION': {'default': lambda c: get_version(c)},
|
|
||||||
'VERSION_RELEASES': {'default': lambda c: get_version_releases(c)},
|
'VERSION': {'default': lambda c: get_version(c).split('+', 1)[0]},
|
||||||
'CAN_UPGRADE': {'default': lambda c: can_upgrade(c)},
|
|
||||||
'COMMIT_HASH': {'default': lambda c: get_commit_hash(c)},
|
'COMMIT_HASH': {'default': lambda c: get_commit_hash(c)},
|
||||||
|
'BUILD_TIME': {'default': lambda c: get_build_time(c)},
|
||||||
|
|
||||||
|
'VERSIONS_AVAILABLE': {'default': lambda c: get_versions_available_on_github(c)},
|
||||||
|
'CAN_UPGRADE': {'default': lambda c: can_upgrade(c)},
|
||||||
|
|
||||||
'PYTHON_BINARY': {'default': lambda c: sys.executable},
|
'PYTHON_BINARY': {'default': lambda c: sys.executable},
|
||||||
'PYTHON_ENCODING': {'default': lambda c: sys.stdout.encoding.upper()},
|
'PYTHON_ENCODING': {'default': lambda c: sys.stdout.encoding.upper()},
|
||||||
'PYTHON_VERSION': {'default': lambda c: '{}.{}.{}'.format(*sys.version_info[:3])},
|
'PYTHON_VERSION': {'default': lambda c: '{}.{}.{}'.format(*sys.version_info[:3])},
|
||||||
|
@ -487,7 +508,7 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
||||||
|
|
||||||
'SQLITE_BINARY': {'default': lambda c: inspect.getfile(sqlite3)},
|
'SQLITE_BINARY': {'default': lambda c: inspect.getfile(sqlite3)},
|
||||||
'SQLITE_VERSION': {'default': lambda c: sqlite3.version},
|
'SQLITE_VERSION': {'default': lambda c: sqlite3.version},
|
||||||
#'SQLITE_JOURNAL_MODE': {'default': lambda c: 'wal'}, # set at runtime below, interesting but unused for now
|
#'SQLITE_JOURNAL_MODE': {'default': lambda c: 'wal'}, # set at runtime below, interesting if changed later but unused for now because its always expected to be wal
|
||||||
#'SQLITE_OPTIONS': {'default': lambda c: ['JSON1']}, # set at runtime below
|
#'SQLITE_OPTIONS': {'default': lambda c: ['JSON1']}, # set at runtime below
|
||||||
|
|
||||||
'USE_CURL': {'default': lambda c: c['USE_CURL'] and (c['SAVE_FAVICON'] or c['SAVE_TITLE'] or c['SAVE_ARCHIVE_DOT_ORG'])},
|
'USE_CURL': {'default': lambda c: c['USE_CURL'] and (c['SAVE_FAVICON'] or c['SAVE_TITLE'] or c['SAVE_ARCHIVE_DOT_ORG'])},
|
||||||
|
@ -744,14 +765,11 @@ def load_config(defaults: ConfigDefaultDict,
|
||||||
|
|
||||||
return extended_config
|
return extended_config
|
||||||
|
|
||||||
# def write_config(config: ConfigDict):
|
|
||||||
|
|
||||||
# with open(os.path.join(config['OUTPUT_DIR'], CONFIG_FILENAME), 'w+') as f:
|
def parse_version_string(version: str) -> Tuple[int, int int]:
|
||||||
|
"""parses a version tag string formatted like 'vx.x.x' into (major, minor, patch) ints"""
|
||||||
def parse_tag_name(v):
|
|
||||||
"""parses a version tag string formatted like 'vx.x.x'"""
|
|
||||||
base = v.split('+')[0].split('v')[-1] # remove 'v' prefix and '+editable' suffix
|
base = v.split('+')[0].split('v')[-1] # remove 'v' prefix and '+editable' suffix
|
||||||
return tuple(int(part) for part in base.split('.'))
|
return tuple(int(part) for part in base.split('.'))[:3]
|
||||||
|
|
||||||
|
|
||||||
# Logging Helpers
|
# Logging Helpers
|
||||||
|
@ -840,6 +858,7 @@ def find_chrome_binary() -> Optional[str]:
|
||||||
# Precedence: Chromium, Chrome, Beta, Canary, Unstable, Dev
|
# Precedence: Chromium, Chrome, Beta, Canary, Unstable, Dev
|
||||||
# make sure data dir finding precedence order always matches binary finding order
|
# make sure data dir finding precedence order always matches binary finding order
|
||||||
default_executable_paths = (
|
default_executable_paths = (
|
||||||
|
# '~/Library/Caches/ms-playwright/chromium-*/chrome-mac/Chromium.app/Contents/MacOS/Chromium',
|
||||||
'chromium-browser',
|
'chromium-browser',
|
||||||
'chromium',
|
'chromium',
|
||||||
'/Applications/Chromium.app/Contents/MacOS/Chromium',
|
'/Applications/Chromium.app/Contents/MacOS/Chromium',
|
||||||
|
@ -1166,14 +1185,25 @@ if not CONFIG['CHECK_SSL_VALIDITY']:
|
||||||
|
|
||||||
def check_system_config(config: ConfigDict=CONFIG) -> None:
|
def check_system_config(config: ConfigDict=CONFIG) -> None:
|
||||||
### Check system environment
|
### Check system environment
|
||||||
if config['USER'] == 'root':
|
if config['USER'] == 'root' or str(config['PUID']) == "0":
|
||||||
stderr('[!] ArchiveBox should never be run as root!', color='red')
|
stderr('[!] ArchiveBox should never be run as root!', color='red')
|
||||||
stderr(' For more information, see the security overview documentation:')
|
stderr(' For more information, see the security overview documentation:')
|
||||||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root')
|
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root')
|
||||||
|
|
||||||
|
if config['IN_DOCKER']:
|
||||||
|
attempted_command = ' '.join(sys.argv[:3])
|
||||||
|
stderr('')
|
||||||
|
stderr(' {lightred}Hint{reset}: When using Docker, you must run commands with {green}docker run{reset} instead of {lightyellow}docker exec{reset}, e.g.:'.format(**config['ANSI']))
|
||||||
|
stderr(f' docker compose run archivebox {attempted_command}')
|
||||||
|
stderr(f' docker compose exec --user=archivebox archivebox {attempted_command}')
|
||||||
|
stderr(' or')
|
||||||
|
stderr(f' docker run -it -v ... -p ... archivebox/archivebox {attempted_command}')
|
||||||
|
stderr(f' docker exec -it --user=archivebox <container id> /bin/bash')
|
||||||
|
|
||||||
raise SystemExit(2)
|
raise SystemExit(2)
|
||||||
|
|
||||||
### Check Python environment
|
### Check Python environment
|
||||||
if sys.version_info[:3] < (3, 6, 0):
|
if sys.version_info[:3] < (3, 7, 0):
|
||||||
stderr(f'[X] Python version is not new enough: {config["PYTHON_VERSION"]} (>3.6 is required)', color='red')
|
stderr(f'[X] Python version is not new enough: {config["PYTHON_VERSION"]} (>3.6 is required)', color='red')
|
||||||
stderr(' See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
|
stderr(' See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
|
||||||
raise SystemExit(2)
|
raise SystemExit(2)
|
||||||
|
@ -1249,7 +1279,7 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
|
||||||
|
|
||||||
if config['USE_YOUTUBEDL'] and config['MEDIA_TIMEOUT'] < 20:
|
if config['USE_YOUTUBEDL'] and config['MEDIA_TIMEOUT'] < 20:
|
||||||
stderr(f'[!] Warning: MEDIA_TIMEOUT is set too low! (currently set to MEDIA_TIMEOUT={config["MEDIA_TIMEOUT"]} seconds)', color='red')
|
stderr(f'[!] Warning: MEDIA_TIMEOUT is set too low! (currently set to MEDIA_TIMEOUT={config["MEDIA_TIMEOUT"]} seconds)', color='red')
|
||||||
stderr(' Youtube-dl will fail to archive all media if set to less than ~20 seconds.')
|
stderr(' youtube-dl/yt-dlp will fail to archive any media if set to less than ~20 seconds.')
|
||||||
stderr(' (Setting it somewhere over 60 seconds is recommended)')
|
stderr(' (Setting it somewhere over 60 seconds is recommended)')
|
||||||
stderr()
|
stderr()
|
||||||
stderr(' If you want to disable media archiving entirely, set SAVE_MEDIA=False instead:')
|
stderr(' If you want to disable media archiving entirely, set SAVE_MEDIA=False instead:')
|
||||||
|
@ -1337,8 +1367,7 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
|
||||||
with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
|
with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
|
||||||
command = ' '.join(sys.argv)
|
command = ' '.join(sys.argv)
|
||||||
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
|
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
|
||||||
f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n")
|
f.write(f"\n> {command}; TS={ts} VERSION={config['VERSION']} IN_DOCKER={config['IN_DOCKER']} IS_TTY={config['IS_TTY']}\n")
|
||||||
|
|
||||||
|
|
||||||
if check_db:
|
if check_db:
|
||||||
# Enable WAL mode in sqlite3
|
# Enable WAL mode in sqlite3
|
||||||
|
|
|
@ -48,22 +48,25 @@ class TagInline(admin.TabularInline):
|
||||||
from django.contrib.admin.helpers import ActionForm
|
from django.contrib.admin.helpers import ActionForm
|
||||||
from django.contrib.admin.widgets import AutocompleteSelectMultiple
|
from django.contrib.admin.widgets import AutocompleteSelectMultiple
|
||||||
|
|
||||||
class AutocompleteTags:
|
# WIP: commented out because broken by Django 3.1.2 -> 4.0 migration
|
||||||
model = Tag
|
# class AutocompleteTags:
|
||||||
search_fields = ['name']
|
# model = Tag
|
||||||
|
# search_fields = ['name']
|
||||||
|
# name = 'tags'
|
||||||
|
|
||||||
class AutocompleteTagsAdminStub:
|
# class AutocompleteTagsAdminStub:
|
||||||
name = 'admin'
|
# name = 'admin'
|
||||||
|
|
||||||
|
|
||||||
class SnapshotActionForm(ActionForm):
|
class SnapshotActionForm(ActionForm):
|
||||||
tags = forms.ModelMultipleChoiceField(
|
tags = forms.ModelMultipleChoiceField(
|
||||||
queryset=Tag.objects.all(),
|
queryset=Tag.objects.all(),
|
||||||
required=False,
|
required=False,
|
||||||
widget=AutocompleteSelectMultiple(
|
# WIP: commented out because broken by Django 3.1.2 -> 4.0 migration
|
||||||
AutocompleteTags(),
|
# widget=AutocompleteSelectMultiple(
|
||||||
AutocompleteTagsAdminStub(),
|
# # AutocompleteTags(),
|
||||||
),
|
# # AutocompleteTagsAdminStub(),
|
||||||
|
# ),
|
||||||
)
|
)
|
||||||
|
|
||||||
# TODO: allow selecting actions for specific extractors? is this useful?
|
# TODO: allow selecting actions for specific extractors? is this useful?
|
||||||
|
|
|
@ -3,4 +3,4 @@ from django.apps import AppConfig
|
||||||
|
|
||||||
class CoreConfig(AppConfig):
|
class CoreConfig(AppConfig):
|
||||||
name = 'core'
|
name = 'core'
|
||||||
default_auto_field = 'django.db.models.UUIDField'
|
# default_auto_field = 'django.db.models.UUIDField'
|
||||||
|
|
|
@ -268,6 +268,8 @@ AUTH_PASSWORD_VALIDATORS = [
|
||||||
{'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator'},
|
{'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator'},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# WIP: commented out because broken by Django 3.1.2 -> 4.0 migration
|
||||||
|
# DEFAULT_AUTO_FIELD = 'django.db.models.UUIDField'
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
### Shell Settings
|
### Shell Settings
|
||||||
|
|
|
@ -184,7 +184,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
|
||||||
link.url,
|
link.url,
|
||||||
command,
|
command,
|
||||||
ts
|
ts
|
||||||
) + "\n"))
|
) + "\n" + str(e) + "\n"))
|
||||||
#f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n")
|
#f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n")
|
||||||
|
|
||||||
# print(' ', stats)
|
# print(' ', stats)
|
||||||
|
|
|
@ -393,7 +393,11 @@ def log_link_archiving_finished(link: "Link", link_dir: str, is_new: bool, stats
|
||||||
else:
|
else:
|
||||||
_LAST_RUN_STATS.succeeded += 1
|
_LAST_RUN_STATS.succeeded += 1
|
||||||
|
|
||||||
size = get_dir_size(link_dir)
|
try:
|
||||||
|
size = get_dir_size(link_dir)
|
||||||
|
except FileNotFoundError:
|
||||||
|
size = (0, None, '0')
|
||||||
|
|
||||||
end_ts = datetime.now(timezone.utc)
|
end_ts = datetime.now(timezone.utc)
|
||||||
duration = str(end_ts - start_ts).split('.')[0]
|
duration = str(end_ts - start_ts).split('.')[0]
|
||||||
print(' {black}{} files ({}) in {}s {reset}'.format(size[2], printable_filesize(size[0]), duration, **ANSI))
|
print(' {black}{} files ({}) in {}s {reset}'.format(size[2], printable_filesize(size[0]), duration, **ANSI))
|
||||||
|
|
|
@ -93,6 +93,8 @@ from .config import (
|
||||||
SQL_INDEX_FILENAME,
|
SQL_INDEX_FILENAME,
|
||||||
ALLOWED_IN_OUTPUT_DIR,
|
ALLOWED_IN_OUTPUT_DIR,
|
||||||
SEARCH_BACKEND_ENGINE,
|
SEARCH_BACKEND_ENGINE,
|
||||||
|
LDAP,
|
||||||
|
get_version,
|
||||||
check_dependencies,
|
check_dependencies,
|
||||||
check_data_folder,
|
check_data_folder,
|
||||||
write_config_file,
|
write_config_file,
|
||||||
|
@ -100,6 +102,7 @@ from .config import (
|
||||||
VERSION_RELEASES,
|
VERSION_RELEASES,
|
||||||
CAN_UPGRADE,
|
CAN_UPGRADE,
|
||||||
COMMIT_HASH,
|
COMMIT_HASH,
|
||||||
|
BUILD_TIME,
|
||||||
CODE_LOCATIONS,
|
CODE_LOCATIONS,
|
||||||
EXTERNAL_LOCATIONS,
|
EXTERNAL_LOCATIONS,
|
||||||
DATA_LOCATIONS,
|
DATA_LOCATIONS,
|
||||||
|
@ -220,31 +223,39 @@ def version(quiet: bool=False,
|
||||||
|
|
||||||
if not quiet:
|
if not quiet:
|
||||||
# 0.7.1
|
# 0.7.1
|
||||||
# ArchiveBox v0.7.1 Cpython Linux Linux-4.19.121-linuxkit-x86_64-with-glibc2.28 x86_64 (in Docker) (in TTY)
|
# ArchiveBox v0.7.1+editable COMMIT_HASH=951bba5 BUILD_TIME=2023-12-17 16:46:05 1702860365
|
||||||
# DEBUG=False IN_DOCKER=True IN_QEMU=False IS_TTY=True TZ=UTC FS_ATOMIC=True FS_REMOTE=False FS_PERMS=644 FS_USER=501:20 SEARCH_BACKEND=ripgrep
|
# IN_DOCKER=False IN_QEMU=False ARCH=arm64 OS=Darwin PLATFORM=macOS-14.2-arm64-arm-64bit PYTHON=Cpython
|
||||||
|
# FS_ATOMIC=True FS_REMOTE=False FS_USER=501:20 FS_PERMS=644
|
||||||
|
# DEBUG=False IS_TTY=True TZ=UTC SEARCH_BACKEND=ripgrep LDAP=False
|
||||||
|
|
||||||
p = platform.uname()
|
p = platform.uname()
|
||||||
print(
|
print(
|
||||||
'ArchiveBox v{}'.format(VERSION),
|
'ArchiveBox v{}'.format(get_version(CONFIG)),
|
||||||
*((COMMIT_HASH[:7],) if COMMIT_HASH else ()),
|
*((f'COMMIT_HASH={COMMIT_HASH[:7]}',) if COMMIT_HASH else ()),
|
||||||
sys.implementation.name.title(),
|
f'BUILD_TIME={BUILD_TIME}',
|
||||||
p.system,
|
)
|
||||||
platform.platform(),
|
print(
|
||||||
p.machine,
|
f'IN_DOCKER={IN_DOCKER}',
|
||||||
|
f'IN_QEMU={IN_QEMU}',
|
||||||
|
f'ARCH={p.machine}',
|
||||||
|
f'OS={p.system}',
|
||||||
|
f'PLATFORM={platform.platform()}',
|
||||||
|
f'PYTHON={sys.implementation.name.title()}',
|
||||||
)
|
)
|
||||||
OUTPUT_IS_REMOTE_FS = DATA_LOCATIONS['OUTPUT_DIR']['is_mount'] or DATA_LOCATIONS['ARCHIVE_DIR']['is_mount']
|
OUTPUT_IS_REMOTE_FS = DATA_LOCATIONS['OUTPUT_DIR']['is_mount'] or DATA_LOCATIONS['ARCHIVE_DIR']['is_mount']
|
||||||
print(
|
print(
|
||||||
f'DEBUG={DEBUG}',
|
|
||||||
f'IN_DOCKER={IN_DOCKER}',
|
|
||||||
f'IN_QEMU={IN_QEMU}',
|
|
||||||
f'IS_TTY={IS_TTY}',
|
|
||||||
f'TZ={TIMEZONE}',
|
|
||||||
#f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually
|
|
||||||
f'FS_ATOMIC={ENFORCE_ATOMIC_WRITES}',
|
f'FS_ATOMIC={ENFORCE_ATOMIC_WRITES}',
|
||||||
f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
|
f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
|
||||||
f'FS_USER={PUID}:{PGID}',
|
f'FS_USER={PUID}:{PGID}',
|
||||||
f'FS_PERMS={OUTPUT_PERMISSIONS}',
|
f'FS_PERMS={OUTPUT_PERMISSIONS}',
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
f'DEBUG={DEBUG}',
|
||||||
|
f'IS_TTY={IS_TTY}',
|
||||||
|
f'TZ={TIMEZONE}',
|
||||||
f'SEARCH_BACKEND={SEARCH_BACKEND_ENGINE}',
|
f'SEARCH_BACKEND={SEARCH_BACKEND_ENGINE}',
|
||||||
|
f'LDAP={LDAP}',
|
||||||
|
#f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually
|
||||||
)
|
)
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
@ -273,7 +284,7 @@ def version(quiet: bool=False,
|
||||||
print(printable_folder_status(name, path))
|
print(printable_folder_status(name, path))
|
||||||
else:
|
else:
|
||||||
print()
|
print()
|
||||||
print('{white}[i] Data locations:{reset}'.format(**ANSI))
|
print('{white}[i] Data locations:{reset} (not in a data directory)'.format(**ANSI))
|
||||||
|
|
||||||
print()
|
print()
|
||||||
check_dependencies()
|
check_dependencies()
|
||||||
|
@ -1010,9 +1021,9 @@ def setup(out_dir: Path=OUTPUT_DIR) -> None:
|
||||||
|
|
||||||
stderr('\n Installing SINGLEFILE_BINARY, READABILITY_BINARY, MERCURY_BINARY automatically using npm...')
|
stderr('\n Installing SINGLEFILE_BINARY, READABILITY_BINARY, MERCURY_BINARY automatically using npm...')
|
||||||
if not NODE_VERSION:
|
if not NODE_VERSION:
|
||||||
stderr('[X] You must first install node using your system package manager', color='red')
|
stderr('[X] You must first install node & npm using your system package manager', color='red')
|
||||||
hint([
|
hint([
|
||||||
'curl -sL https://deb.nodesource.com/setup_15.x | sudo -E bash -',
|
'https://github.com/nodesource/distributions#table-of-contents',
|
||||||
'or to disable all node-based modules run: archivebox config --set USE_NODE=False',
|
'or to disable all node-based modules run: archivebox config --set USE_NODE=False',
|
||||||
])
|
])
|
||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
|
1
archivebox/static
Symbolic link
1
archivebox/static
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
templates/static
|
|
@ -23,6 +23,7 @@ SUPPORTED_PLATFORMS="linux/amd64,linux/arm64,linux/arm/v7"
|
||||||
TAG_NAME="${1:-$(git rev-parse --abbrev-ref HEAD)}"
|
TAG_NAME="${1:-$(git rev-parse --abbrev-ref HEAD)}"
|
||||||
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
|
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
|
||||||
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
|
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
|
||||||
|
GIT_SHA=sha-"$(git rev-parse --short HEAD)"
|
||||||
SELECTED_PLATFORMS="${2:-$SUPPORTED_PLATFORMS}"
|
SELECTED_PLATFORMS="${2:-$SUPPORTED_PLATFORMS}"
|
||||||
|
|
||||||
echo "[+] Building Docker image: tag=$TAG_NAME version=$SHORT_VERSION arch=$SELECTED_PLATFORMS"
|
echo "[+] Building Docker image: tag=$TAG_NAME version=$SHORT_VERSION arch=$SELECTED_PLATFORMS"
|
||||||
|
@ -50,6 +51,7 @@ function create_builder() {
|
||||||
docker buildx use xbuilder && return 0
|
docker buildx use xbuilder && return 0
|
||||||
echo "[+] Creating new xbuilder for: $SELECTED_PLATFORMS"
|
echo "[+] Creating new xbuilder for: $SELECTED_PLATFORMS"
|
||||||
echo
|
echo
|
||||||
|
docker pull 'moby/buildkit:buildx-stable-1'
|
||||||
|
|
||||||
# Switch to buildx builder if already present / previously created
|
# Switch to buildx builder if already present / previously created
|
||||||
docker buildx create --name xbuilder --driver docker-container --bootstrap --use --platform "$SELECTED_PLATFORMS" || true
|
docker buildx create --name xbuilder --driver docker-container --bootstrap --use --platform "$SELECTED_PLATFORMS" || true
|
||||||
|
@ -74,6 +76,7 @@ echo "[+] Generating requirements.txt and pdm.lock from pyproject.toml..."
|
||||||
pdm lock --group=':all' --strategy="cross_platform" --production
|
pdm lock --group=':all' --strategy="cross_platform" --production
|
||||||
pdm export --group=':all' --production --without-hashes -o requirements.txt
|
pdm export --group=':all' --production --without-hashes -o requirements.txt
|
||||||
|
|
||||||
|
|
||||||
echo "[+] Building archivebox:$VERSION docker image..."
|
echo "[+] Building archivebox:$VERSION docker image..."
|
||||||
# docker builder prune
|
# docker builder prune
|
||||||
# docker build . --no-cache -t archivebox-dev \
|
# docker build . --no-cache -t archivebox-dev \
|
||||||
|
@ -83,12 +86,16 @@ docker buildx build --platform "$SELECTED_PLATFORMS" --load . \
|
||||||
-t archivebox/archivebox:$TAG_NAME \
|
-t archivebox/archivebox:$TAG_NAME \
|
||||||
-t archivebox/archivebox:$VERSION \
|
-t archivebox/archivebox:$VERSION \
|
||||||
-t archivebox/archivebox:$SHORT_VERSION \
|
-t archivebox/archivebox:$SHORT_VERSION \
|
||||||
|
-t archivebox/archivebox:$GIT_SHA \
|
||||||
-t archivebox/archivebox:latest \
|
-t archivebox/archivebox:latest \
|
||||||
-t nikisweeting/archivebox \
|
-t nikisweeting/archivebox \
|
||||||
-t nikisweeting/archivebox:$TAG_NAME \
|
-t nikisweeting/archivebox:$TAG_NAME \
|
||||||
-t nikisweeting/archivebox:$VERSION \
|
-t nikisweeting/archivebox:$VERSION \
|
||||||
-t nikisweeting/archivebox:$SHORT_VERSION \
|
-t nikisweeting/archivebox:$SHORT_VERSION \
|
||||||
|
-t nikisweeting/archivebox:$GIT_SHA \
|
||||||
-t nikisweeting/archivebox:latest \
|
-t nikisweeting/archivebox:latest \
|
||||||
-t ghcr.io/archivebox/archivebox/archivebox:$TAG_NAME \
|
-t ghcr.io/archivebox/archivebox/archivebox:$TAG_NAME \
|
||||||
-t ghcr.io/archivebox/archivebox/archivebox:$VERSION \
|
-t ghcr.io/archivebox/archivebox/archivebox:$VERSION \
|
||||||
-t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION
|
-t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION \
|
||||||
|
-t ghcr.io/archivebox/archivebox/archivebox:$GIT_SHA \
|
||||||
|
-t ghcr.io/archivebox/archivebox/archivebox:latest
|
||||||
|
|
|
@ -1,20 +1,55 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
|
# This Docker ENTRYPOINT script is called by `docker run archivebox ...` or `docker compose run archivebox ...`.
|
||||||
|
# It takes a CMD as $* shell arguments and runs it following these setup steps:
|
||||||
|
|
||||||
|
# - Set the archivebox user to use the correct PUID & PGID
|
||||||
|
# 1. highest precedence is for valid PUID and PGID env vars passsed in explicitly
|
||||||
|
# 2. fall back to DETECTED_PUID of files found within existing data dir
|
||||||
|
# 3. fall back to DEFAULT_PUID if no data dir or its owned by root
|
||||||
|
# - Create a new /data dir if necessary and set the correct ownership on it
|
||||||
|
# - Create a new /browsers dir if necessary and set the correct ownership on it
|
||||||
|
# - Check whether we're running inside QEMU emulation and show a warning if so.
|
||||||
|
# - Drop down to archivebox user permisisons and execute passed CMD command.
|
||||||
|
|
||||||
|
# Bash Environment Setup
|
||||||
|
# http://redsymbol.net/articles/unofficial-bash-strict-mode/
|
||||||
|
# https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html
|
||||||
|
# set -o xtrace
|
||||||
|
# set -o nounset
|
||||||
|
set -o errexit
|
||||||
|
set -o errtrace
|
||||||
|
set -o pipefail
|
||||||
|
# IFS=$'\n'
|
||||||
|
|
||||||
|
# Load global invariants (set by Dockerfile during image build time, not intended to be customized by users at runtime)
|
||||||
export DATA_DIR="${DATA_DIR:-/data}"
|
export DATA_DIR="${DATA_DIR:-/data}"
|
||||||
export ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}"
|
export ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}"
|
||||||
|
|
||||||
# default PUID and PGID if data dir is empty and no PUID+PGID is set
|
# Global default PUID and PGID if data dir is empty and no intended PUID+PGID is set manually by user
|
||||||
export DEFAULT_PUID=911
|
export DEFAULT_PUID=911
|
||||||
export DEFAULT_PGID=911
|
export DEFAULT_PGID=911
|
||||||
|
|
||||||
# if data directory already exists, autodetect detect owner by looking at files within
|
# If user tires to set PUID and PGID to root values manually, catch and reject because root is not allowed
|
||||||
export DETECTED_UID="$(stat -c '%u' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PUID")"
|
if [[ "$PUID" == "0" ]] || [[ "$PGID" == "0" ]]; then
|
||||||
export DETECTED_GID="$(stat -c '%g' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PGID")"
|
echo -e "\n[X] Error: Got PUID=$PUID and PGID=$PGID but ArchiveBox is not allowed to be run as root, please change or unset PUID & PGID and try again." > /dev/stderr
|
||||||
|
echo -e " Hint: some NFS/SMB/FUSE/etc. filesystems force-remap all permissions, leave PUID/PGID blank" > /dev/stderr
|
||||||
|
echo -e " or set PUID/PGID to the same value as the user/group they remap to (e.g. $DEFAULT_PUID:$DEFAULT_PGID)." > /dev/stderr
|
||||||
|
echo -e " https://linux.die.net/man/8/mount.cifs#:~:text=does%20not%20provide%20unix%20ownership" > /dev/stderr
|
||||||
|
exit 3
|
||||||
|
fi
|
||||||
|
|
||||||
# Set the archivebox user to use the configured UID & GID
|
# If data directory already exists, autodetect detect owner by looking at files within
|
||||||
# prefers PUID and PGID env vars passsed in explicitly, falls back to autodetected defaults
|
export DETECTED_PUID="$(stat -c '%u' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PUID")"
|
||||||
usermod -o -u "${PUID:-$DETECTED_UID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
|
export DETECTED_PGID="$(stat -c '%g' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PGID")"
|
||||||
groupmod -o -g "${PGID:-$DETECTED_GID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
|
|
||||||
|
# If data directory exists but is owned by root, use defaults instead of root because root is not allowed
|
||||||
|
[[ "$DETECTED_PUID" == "0" ]] && export DETECTED_PUID="$DEFAULT_PUID"
|
||||||
|
[[ "$DETECTED_PGID" == "0" ]] && export DETECTED_PGID="$DEFAULT_PGID"
|
||||||
|
|
||||||
|
# Set archivebox user and group ids to desired PUID/PGID
|
||||||
|
usermod -o -u "${PUID:-$DETECTED_PUID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
|
||||||
|
groupmod -o -g "${PGID:-$DETECTED_PGID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
|
||||||
|
|
||||||
# re-set PUID and PGID to values reported by system instead of values we tried to set,
|
# re-set PUID and PGID to values reported by system instead of values we tried to set,
|
||||||
# in case wonky filesystems or Docker setups try to play UID/GID remapping tricks on us
|
# in case wonky filesystems or Docker setups try to play UID/GID remapping tricks on us
|
||||||
|
@ -29,12 +64,12 @@ if [[ -d "$DATA_DIR/archive" ]]; then
|
||||||
# echo "[√] Permissions are correct"
|
# echo "[√] Permissions are correct"
|
||||||
else
|
else
|
||||||
# the only time this fails is if the host filesystem doesn't allow us to write as root (e.g. some NFS mapall/maproot problems, connection issues, drive dissapeared, etc.)
|
# the only time this fails is if the host filesystem doesn't allow us to write as root (e.g. some NFS mapall/maproot problems, connection issues, drive dissapeared, etc.)
|
||||||
echo -e "\n[X] Error: archivebox user (PUID=$PUID) is not able to write to your ./data dir." >&2
|
echo -e "\n[X] Error: archivebox user (PUID=$PUID) is not able to write to your ./data dir (currently owned by $(stat -c '%u' "$DATA_DIR"):$(stat -c '%g' "$DATA_DIR")." >&2
|
||||||
echo -e " Change ./data to be owned by PUID=$PUID PGID=$PGID on the host and retry:"
|
echo -e " Change ./data to be owned by PUID=$PUID PGID=$PGID on the host and retry:" > /dev/stderr
|
||||||
echo -e " \$ chown -R $PUID:$PGID ./data\n" >&2
|
echo -e " \$ chown -R $PUID:$PGID ./data\n" > /dev/stderr
|
||||||
echo -e " Configure the PUID & PGID environment variables to change the desired owner:" >&2
|
echo -e " Configure the PUID & PGID environment variables to change the desired owner:" > /dev/stderr
|
||||||
echo -e " https://docs.linuxserver.io/general/understanding-puid-and-pgid\n" >&2
|
echo -e " https://docs.linuxserver.io/general/understanding-puid-and-pgid\n" > /dev/stderr
|
||||||
exit 1
|
exit 3
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
# create data directory
|
# create data directory
|
||||||
|
@ -46,29 +81,35 @@ fi
|
||||||
chown $PUID:$PGID "$DATA_DIR"
|
chown $PUID:$PGID "$DATA_DIR"
|
||||||
chown $PUID:$PGID "$DATA_DIR"/*
|
chown $PUID:$PGID "$DATA_DIR"/*
|
||||||
|
|
||||||
# also chown BROWSERS_DIR because otherwise 'archivebox setup' wont be able to install chrome
|
# also chown BROWSERS_DIR because otherwise 'archivebox setup' wont be able to install chrome at runtime
|
||||||
PLAYWRIGHT_BROWSERS_PATH="${PLAYWRIGHT_BROWSERS_PATH:-/browsers}"
|
PLAYWRIGHT_BROWSERS_PATH="${PLAYWRIGHT_BROWSERS_PATH:-/browsers}"
|
||||||
|
mkdir -p "$PLAYWRIGHT_BROWSERS_PATH/permissions_test_safe_to_delete"
|
||||||
chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"
|
chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"
|
||||||
chown $PUID:$PGID "${PLAYWRIGHT_BROWSERS_PATH}/*"
|
chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/*
|
||||||
|
rm -Rf "$PLAYWRIGHT_BROWSERS_PATH/permissions_test_safe_to_delete"
|
||||||
|
|
||||||
|
|
||||||
# (this check is written in blood, QEMU silently breaks things in ways that are not obvious)
|
# (this check is written in blood, QEMU silently breaks things in ways that are not obvious)
|
||||||
export IN_QEMU="$(pmap 1 | grep qemu | wc -l | grep -E '^0$' >/dev/null && echo 'False' || echo 'True')"
|
export IN_QEMU="$(pmap 1 | grep qemu >/dev/null && echo 'True' || echo 'False')"
|
||||||
if [[ "$IN_QEMU" == 'True' ]]; then
|
if [[ "$IN_QEMU" == "True" ]]; then
|
||||||
echo -e "\n[!] Warning: Running $(uname -m) emulated container in QEMU, some things will break!" >&2
|
echo -e "\n[!] Warning: Running $(uname -m) docker image using QEMU emulation, some things will break!" > /dev/stderr
|
||||||
echo -e " chromium (screenshot, pdf, dom), singlefile, and any dependencies that rely on inotify will not run in QEMU." >&2
|
echo -e " chromium (screenshot, pdf, dom), singlefile, and any dependencies that rely on inotify will not run in QEMU." > /dev/stderr
|
||||||
echo -e " See here for more info: https://github.com/microsoft/playwright/issues/17395#issuecomment-1250830493\n" >&2
|
echo -e " See here for more info: https://github.com/microsoft/playwright/issues/17395#issuecomment-1250830493\n" > /dev/stderr
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
# Drop permissions to run commands as the archivebox user
|
# Drop permissions to run commands as the archivebox user
|
||||||
if [[ "$1" == /* || "$1" == "bash" || "$1" == "sh" || "$1" == "echo" || "$1" == "cat" || "$1" == "archivebox" ]]; then
|
if [[ "$1" == /* || "$1" == "bash" || "$1" == "sh" || "$1" == "echo" || "$1" == "cat" || "$1" == "archivebox" ]]; then
|
||||||
# handle "docker run archivebox /some/non-archivebox/command --with=some args" by passing args directly to bash -c
|
# handle "docker run archivebox /some/non-archivebox/command --with=some args" by passing args directly to bash -c
|
||||||
# e.g. "docker run archivebox /venv/bin/archivebox-alt init"
|
# e.g. "docker run archivebox archivebox init:
|
||||||
|
# "docker run archivebox /venv/bin/archivebox-alt init"
|
||||||
# "docker run archivebox /bin/bash -c '...'"
|
# "docker run archivebox /bin/bash -c '...'"
|
||||||
# "docker run archivebox echo test"
|
# "docker run archivebox cat /VERSION.txt"
|
||||||
exec gosu "$PUID" bash -c "$*"
|
exec gosu "$PUID" bash -c "$*"
|
||||||
else
|
else
|
||||||
# handle "docker run archivebox add some subcommand --with=args abc" by calling archivebox to run as args as CLI subcommand
|
# handle "docker run archivebox add some subcommand --with=args abc" by calling archivebox to run as args as CLI subcommand
|
||||||
# e.g. "docker run archivebox add --depth=1 https://example.com"
|
# e.g. "docker run archivebox help"
|
||||||
|
# "docker run archivebox add --depth=1 https://example.com"
|
||||||
# "docker run archivebox manage createsupseruser"
|
# "docker run archivebox manage createsupseruser"
|
||||||
# "docker run archivebox server 0.0.0.0:8000"
|
# "docker run archivebox server 0.0.0.0:8000"
|
||||||
exec gosu "$PUID" bash -c "archivebox $*"
|
exec gosu "$PUID" bash -c "archivebox $*"
|
||||||
|
|
|
@ -18,6 +18,7 @@ SUPPORTED_PLATFORMS="linux/amd64,linux/arm64,linux/arm/v7"
|
||||||
TAG_NAME="${1:-$(git rev-parse --abbrev-ref HEAD)}"
|
TAG_NAME="${1:-$(git rev-parse --abbrev-ref HEAD)}"
|
||||||
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
|
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
|
||||||
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
|
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
|
||||||
|
GIT_SHA=sha-"$(git rev-parse --short HEAD)"
|
||||||
SELECTED_PLATFORMS="${2:-$SUPPORTED_PLATFORMS}"
|
SELECTED_PLATFORMS="${2:-$SUPPORTED_PLATFORMS}"
|
||||||
|
|
||||||
|
|
||||||
|
@ -34,12 +35,16 @@ docker buildx build --platform "$SELECTED_PLATFORMS" --push . \
|
||||||
-t archivebox/archivebox:$TAG_NAME \
|
-t archivebox/archivebox:$TAG_NAME \
|
||||||
-t archivebox/archivebox:$VERSION \
|
-t archivebox/archivebox:$VERSION \
|
||||||
-t archivebox/archivebox:$SHORT_VERSION \
|
-t archivebox/archivebox:$SHORT_VERSION \
|
||||||
|
-t archivebox/archivebox:$GIT_SHA \
|
||||||
-t archivebox/archivebox:latest \
|
-t archivebox/archivebox:latest \
|
||||||
-t nikisweeting/archivebox \
|
-t nikisweeting/archivebox \
|
||||||
-t nikisweeting/archivebox:$TAG_NAME \
|
-t nikisweeting/archivebox:$TAG_NAME \
|
||||||
-t nikisweeting/archivebox:$VERSION \
|
-t nikisweeting/archivebox:$VERSION \
|
||||||
-t nikisweeting/archivebox:$SHORT_VERSION \
|
-t nikisweeting/archivebox:$SHORT_VERSION \
|
||||||
|
-t nikisweeting/archivebox:$GIT_SHA \
|
||||||
-t nikisweeting/archivebox:latest \
|
-t nikisweeting/archivebox:latest \
|
||||||
-t ghcr.io/archivebox/archivebox/archivebox:$TAG_NAME \
|
-t ghcr.io/archivebox/archivebox/archivebox:$TAG_NAME \
|
||||||
-t ghcr.io/archivebox/archivebox/archivebox:$VERSION \
|
-t ghcr.io/archivebox/archivebox/archivebox:$VERSION \
|
||||||
-t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION
|
-t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION \
|
||||||
|
-t ghcr.io/archivebox/archivebox/archivebox:$GIT_SHA
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,6 @@ services:
|
||||||
# - ./etc/crontabs:/var/spool/cron/crontabs # uncomment this and archivebox_scheduler below to set up automatic recurring archive jobs
|
# - ./etc/crontabs:/var/spool/cron/crontabs # uncomment this and archivebox_scheduler below to set up automatic recurring archive jobs
|
||||||
# - ./archivebox:/app/archivebox # uncomment this to mount the ArchiveBox source code at runtime (for developers working on archivebox)
|
# - ./archivebox:/app/archivebox # uncomment this to mount the ArchiveBox source code at runtime (for developers working on archivebox)
|
||||||
# build: . # uncomment this to build the image from source code at buildtime (for developers working on archivebox)
|
# build: . # uncomment this to build the image from source code at buildtime (for developers working on archivebox)
|
||||||
|
|
||||||
environment:
|
environment:
|
||||||
- ALLOWED_HOSTS=* # restrict this to only accept incoming traffic via specific domain name
|
- ALLOWED_HOSTS=* # restrict this to only accept incoming traffic via specific domain name
|
||||||
# - PUBLIC_INDEX=True # set to False to prevent anonymous users from viewing snapshot list
|
# - PUBLIC_INDEX=True # set to False to prevent anonymous users from viewing snapshot list
|
||||||
|
@ -161,4 +160,4 @@ networks:
|
||||||
ipam:
|
ipam:
|
||||||
driver: default
|
driver: default
|
||||||
config:
|
config:
|
||||||
- subnet: 172.20.0.0/24
|
- subnet: 172.20.0.0/24
|
||||||
|
|
8
etc/crontabs/archivebox
Normal file
8
etc/crontabs/archivebox
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
# DO NOT EDIT THIS FILE - edit the master and reinstall.
|
||||||
|
# (/tmp/tmpe3dawo9u installed on Tue Jun 13 23:21:48 2023)
|
||||||
|
# (Cron version -- $Id: crontab.c,v 2.13 1994/01/17 03:20:37 vixie Exp $)
|
||||||
|
|
||||||
|
@daily cd /data && /usr/local/bin/archivebox add --depth=0 "https://example.com/3" >> /data/logs/schedule.log 2>&1 # archivebox_schedule
|
||||||
|
@daily cd /data && /usr/local/bin/archivebox add --depth=0 "https://example.com/2" >> /data/logs/schedule.log 2>&1 # archivebox_schedule
|
||||||
|
@daily cd /data && /usr/local/bin/archivebox add --depth=0 "https://example.com" >> /data/logs/schedule.log 2>&1 # archivebox_schedule
|
||||||
|
@daily cd /data && /usr/local/bin/archivebox add --depth=0 "update" >> /data/logs/schedule.log 2>&1 # archivebox_schedule
|
1740
package-lock.json
generated
1740
package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
@ -39,6 +39,9 @@ classifiers = [
|
||||||
"Programming Language :: Python :: 3.7",
|
"Programming Language :: Python :: 3.7",
|
||||||
"Programming Language :: Python :: 3.8",
|
"Programming Language :: Python :: 3.8",
|
||||||
"Programming Language :: Python :: 3.9",
|
"Programming Language :: Python :: 3.9",
|
||||||
|
"Programming Language :: Python :: 3.10",
|
||||||
|
"Programming Language :: Python :: 3.11",
|
||||||
|
"Programming Language :: Python :: 3.12",
|
||||||
"Topic :: Internet :: WWW/HTTP",
|
"Topic :: Internet :: WWW/HTTP",
|
||||||
"Topic :: Internet :: WWW/HTTP :: Indexing/Search",
|
"Topic :: Internet :: WWW/HTTP :: Indexing/Search",
|
||||||
"Topic :: Internet :: WWW/HTTP :: WSGI :: Application",
|
"Topic :: Internet :: WWW/HTTP :: WSGI :: Application",
|
||||||
|
|
|
@ -1,35 +1,41 @@
|
||||||
# This file is @generated by PDM.
|
# This file is @generated by PDM.
|
||||||
# Please do not edit it manually.
|
# Please do not edit it manually.
|
||||||
|
|
||||||
appnope==0.1.3
|
|
||||||
asgiref==3.7.2
|
asgiref==3.7.2
|
||||||
asttokens==2.4.1
|
asttokens==2.4.1
|
||||||
brotli==1.1.0
|
brotli==1.1.0; implementation_name == "cpython"
|
||||||
certifi==2023.7.22
|
brotlicffi==1.1.0.0; implementation_name != "cpython"
|
||||||
|
certifi==2023.11.17
|
||||||
|
cffi==1.16.0; implementation_name != "cpython"
|
||||||
charset-normalizer==3.3.2
|
charset-normalizer==3.3.2
|
||||||
|
colorama==0.4.6; sys_platform == "win32"
|
||||||
croniter==2.0.1
|
croniter==2.0.1
|
||||||
dateparser==1.1.8
|
dateparser==1.2.0
|
||||||
decorator==5.1.1
|
decorator==5.1.1
|
||||||
django==3.1.14
|
django==3.1.14
|
||||||
|
django-auth-ldap==4.1.0
|
||||||
django-extensions==3.1.5
|
django-extensions==3.1.5
|
||||||
|
exceptiongroup==1.2.0; python_version < "3.11"
|
||||||
executing==2.0.1
|
executing==2.0.1
|
||||||
idna==3.4
|
idna==3.6
|
||||||
ipython==8.17.2
|
ipython==8.18.1
|
||||||
jedi==0.19.1
|
jedi==0.19.1
|
||||||
matplotlib-inline==0.1.6
|
matplotlib-inline==0.1.6
|
||||||
mutagen==1.47.0
|
mutagen==1.47.0
|
||||||
mypy-extensions==1.0.0
|
mypy-extensions==1.0.0
|
||||||
parso==0.8.3
|
parso==0.8.3
|
||||||
pexpect==4.8.0
|
pexpect==4.9.0; sys_platform != "win32"
|
||||||
prompt-toolkit==3.0.40
|
prompt-toolkit==3.0.43
|
||||||
ptyprocess==0.7.0
|
ptyprocess==0.7.0; sys_platform != "win32"
|
||||||
pure-eval==0.2.2
|
pure-eval==0.2.2
|
||||||
pyasn1==0.5.0
|
pyasn1==0.5.1
|
||||||
pyasn1-modules==0.3.0
|
pyasn1-modules==0.3.0
|
||||||
|
pycparser==2.21; implementation_name != "cpython"
|
||||||
pycryptodomex==3.19.0
|
pycryptodomex==3.19.0
|
||||||
pygments==2.16.1
|
pygments==2.17.2
|
||||||
python-crontab==3.0.0
|
python-crontab==3.0.0
|
||||||
python-dateutil==2.8.2
|
python-dateutil==2.8.2
|
||||||
|
python-ldap==3.4.4
|
||||||
pytz==2023.3.post1
|
pytz==2023.3.post1
|
||||||
regex==2023.10.3
|
regex==2023.10.3
|
||||||
requests==2.31.0
|
requests==2.31.0
|
||||||
|
@ -37,10 +43,12 @@ six==1.16.0
|
||||||
sonic-client==1.0.0
|
sonic-client==1.0.0
|
||||||
sqlparse==0.4.4
|
sqlparse==0.4.4
|
||||||
stack-data==0.6.3
|
stack-data==0.6.3
|
||||||
traitlets==5.13.0
|
traitlets==5.14.0
|
||||||
|
typing-extensions==4.9.0; python_version < "3.11"
|
||||||
|
tzdata==2023.3; platform_system == "Windows"
|
||||||
tzlocal==5.2
|
tzlocal==5.2
|
||||||
urllib3==2.1.0
|
urllib3==2.1.0
|
||||||
w3lib==2.1.2
|
w3lib==2.1.2
|
||||||
wcwidth==0.2.10
|
wcwidth==0.2.12
|
||||||
websockets==12.0
|
websockets==12.0
|
||||||
yt-dlp==2023.11.14
|
yt-dlp==2023.11.16
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue