mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-12 22:25:44 -04:00
feat: Add support for singlefile in docker
This commit is contained in:
parent
5b6eb5e4ad
commit
06d0e9de6c
3 changed files with 35 additions and 16 deletions
45
Dockerfile
45
Dockerfile
|
@ -10,8 +10,8 @@
|
||||||
FROM python:3.8-slim-buster
|
FROM python:3.8-slim-buster
|
||||||
|
|
||||||
LABEL name="archivebox" \
|
LABEL name="archivebox" \
|
||||||
maintainer="Nick Sweeting <archivebox-git@sweeting.me>" \
|
maintainer="Nick Sweeting <archivebox-git@sweeting.me>" \
|
||||||
description="All-in-one personal internet archiving container"
|
description="All-in-one personal internet archiving container"
|
||||||
|
|
||||||
ENV TZ=UTC \
|
ENV TZ=UTC \
|
||||||
LANGUAGE=en_US:en \
|
LANGUAGE=en_US:en \
|
||||||
|
@ -22,28 +22,41 @@ ENV TZ=UTC \
|
||||||
APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \
|
APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \
|
||||||
CODE_PATH=/app \
|
CODE_PATH=/app \
|
||||||
VENV_PATH=/venv \
|
VENV_PATH=/venv \
|
||||||
DATA_PATH=/data
|
DATA_PATH=/data \
|
||||||
|
EXTRA_PATH=/extra
|
||||||
|
|
||||||
# First install CLI utils and base deps, then Chrome + Fons
|
# First install CLI utils and base deps, then Chrome + Fons + nodejs
|
||||||
RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections \
|
RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections \
|
||||||
&& apt-get update -qq \
|
&& apt-get update -qq \
|
||||||
&& apt-get install -qq -y --no-install-recommends \
|
&& apt-get install -qq -y --no-install-recommends \
|
||||||
apt-transport-https ca-certificates apt-utils gnupg gosu gnupg2 libgconf-2-4 zlib1g-dev \
|
apt-transport-https ca-certificates apt-utils gnupg gosu gnupg2 libgconf-2-4 zlib1g-dev \
|
||||||
dumb-init jq git wget curl youtube-dl ffmpeg \
|
dumb-init jq git wget curl youtube-dl ffmpeg \
|
||||||
&& curl -sSL "https://dl.google.com/linux/linux_signing_key.pub" | apt-key add - \
|
&& curl -sSL "https://dl.google.com/linux/linux_signing_key.pub" | apt-key add - \
|
||||||
&& echo "deb https://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
|
&& echo "deb https://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
|
||||||
|
&& curl -sL https://deb.nodesource.com/setup_14.x | bash - \
|
||||||
&& apt-get update -qq \
|
&& apt-get update -qq \
|
||||||
&& apt-get install -qq -y --no-install-recommends \
|
&& apt-get install -qq -y --no-install-recommends \
|
||||||
google-chrome-stable \
|
google-chrome-stable \
|
||||||
fontconfig \
|
fontconfig \
|
||||||
fonts-ipafont-gothic \
|
fonts-ipafont-gothic \
|
||||||
fonts-wqy-zenhei \
|
fonts-wqy-zenhei \
|
||||||
fonts-thai-tlwg \
|
fonts-thai-tlwg \
|
||||||
fonts-kacst \
|
fonts-kacst \
|
||||||
fonts-symbola \
|
fonts-symbola \
|
||||||
fonts-noto \
|
fonts-noto \
|
||||||
fonts-freefont-ttf \
|
fonts-freefont-ttf \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
nodejs \
|
||||||
|
unzip \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Clone singlefile and move it to the /bin folder so archivebox can find it
|
||||||
|
|
||||||
|
WORKDIR "$EXTRA_PATH"
|
||||||
|
RUN wget -qO - https://github.com/gildas-lormeau/SingleFile/archive/master.zip > SingleFile.zip \
|
||||||
|
&& unzip -q SingleFile.zip \
|
||||||
|
&& npm install --prefix SingleFile-master/cli --production > /dev/null 2>&1 \
|
||||||
|
&& chmod +x SingleFile-master/cli/single-file \
|
||||||
|
&& ln -s "$EXTRA_PATH/SingleFile-master/cli/single-file" "/bin/single-file"
|
||||||
|
|
||||||
# Run everything from here on out as non-privileged user
|
# Run everything from here on out as non-privileged user
|
||||||
RUN groupadd --system archivebox \
|
RUN groupadd --system archivebox \
|
||||||
|
|
|
@ -3,11 +3,13 @@ __package__ = 'archivebox.extractors'
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
import json
|
||||||
|
|
||||||
from ..index.schema import Link, ArchiveResult, ArchiveError
|
from ..index.schema import Link, ArchiveResult, ArchiveError
|
||||||
from ..system import run, chmod_file
|
from ..system import run, chmod_file
|
||||||
from ..util import (
|
from ..util import (
|
||||||
enforce_types,
|
enforce_types,
|
||||||
|
chrome_args
|
||||||
)
|
)
|
||||||
from ..config import (
|
from ..config import (
|
||||||
TIMEOUT,
|
TIMEOUT,
|
||||||
|
@ -34,10 +36,13 @@ def save_singlefile(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOU
|
||||||
out_dir = out_dir or link.link_dir
|
out_dir = out_dir or link.link_dir
|
||||||
output = str(Path(out_dir).absolute() / "singlefile.html")
|
output = str(Path(out_dir).absolute() / "singlefile.html")
|
||||||
|
|
||||||
|
browser_args = chrome_args(TIMEOUT=0)
|
||||||
|
|
||||||
# SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli
|
# SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli
|
||||||
cmd = [
|
cmd = [
|
||||||
SINGLEFILE_BINARY,
|
SINGLEFILE_BINARY,
|
||||||
'--browser-executable-path={}'.format(CHROME_BINARY),
|
'--browser-executable-path={}'.format(CHROME_BINARY),
|
||||||
|
'--browser-args="{}"'.format(json.dumps(browser_args[1:])),
|
||||||
link.url,
|
link.url,
|
||||||
output
|
output
|
||||||
]
|
]
|
||||||
|
|
|
@ -518,6 +518,7 @@ def printable_folder_status(name: str, folder: Dict) -> str:
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def printable_dependency_version(name: str, dependency: Dict) -> str:
|
def printable_dependency_version(name: str, dependency: Dict) -> str:
|
||||||
|
version = None
|
||||||
if dependency['enabled']:
|
if dependency['enabled']:
|
||||||
if dependency['is_valid']:
|
if dependency['is_valid']:
|
||||||
color, symbol, note, version = 'green', '√', 'valid', ''
|
color, symbol, note, version = 'green', '√', 'valid', ''
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue