mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-21 10:25:11 -04:00
major refactor + ability to handle http downloads
This commit is contained in:
parent
f31da350c5
commit
eb47155a12
12 changed files with 1325 additions and 659 deletions
147
config.py
147
config.py
|
@ -1,10 +1,8 @@
|
|||
import os
|
||||
import sys
|
||||
import time
|
||||
import shutil
|
||||
|
||||
from subprocess import run, PIPE, DEVNULL
|
||||
from multiprocessing import Process
|
||||
from subprocess import run, PIPE
|
||||
|
||||
# os.getenv('VARIABLE', 'DEFAULT') gets the value of environment
|
||||
# variable "VARIABLE" and if it is not set, sets it to 'DEFAULT'
|
||||
|
@ -12,8 +10,10 @@ from multiprocessing import Process
|
|||
# for boolean values, check to see if the string is 'true', and
|
||||
# if so, the python variable will be True
|
||||
|
||||
IS_TTY = sys.stdout.isatty()
|
||||
# *******************************************************************************
|
||||
# *** TO SET YOUR PREFERENCES, EDIT THE VALUES HERE, or use the 'env' command ***
|
||||
|
||||
IS_TTY = sys.stdout.isatty()
|
||||
USE_COLOR = os.getenv('USE_COLOR', str(IS_TTY) ).lower() == 'true'
|
||||
SHOW_PROGRESS = os.getenv('SHOW_PROGRESS', str(IS_TTY) ).lower() == 'true'
|
||||
FETCH_WGET = os.getenv('FETCH_WGET', 'True' ).lower() == 'true'
|
||||
|
@ -31,9 +31,12 @@ CHROME_BINARY = os.getenv('CHROME_BINARY', 'chromium-browser'
|
|||
WGET_BINARY = os.getenv('WGET_BINARY', 'wget' )
|
||||
WGET_USER_AGENT = os.getenv('WGET_USER_AGENT', None)
|
||||
TIMEOUT = int(os.getenv('TIMEOUT', '60'))
|
||||
LINK_INDEX_TEMPLATE = os.getenv('LINK_INDEX_TEMPLATE', 'templates/link_index.html')
|
||||
INDEX_TEMPLATE = os.getenv('INDEX_TEMPLATE', 'templates/index.html')
|
||||
INDEX_ROW_TEMPLATE = os.getenv('INDEX_ROW_TEMPLATE', 'templates/index_row.html')
|
||||
|
||||
# *******************************************************************************
|
||||
|
||||
TERM_WIDTH = shutil.get_terminal_size((100, 10)).columns
|
||||
ANSI = {
|
||||
'reset': '\033[00;00m',
|
||||
|
@ -50,6 +53,17 @@ if not USE_COLOR:
|
|||
# dont show colors if USE_COLOR is False
|
||||
ANSI = {k: '' for k in ANSI.keys()}
|
||||
|
||||
|
||||
ROOT_FOLDER = os.path.dirname(os.path.abspath(__file__))
|
||||
HTML_FOLDER = os.path.join(ARCHIVE_DIR, 'html')
|
||||
ARCHIVE_FOLDER = os.path.join(HTML_FOLDER, 'archive')
|
||||
try:
|
||||
GIT_SHA = run(["git", "rev-list", "-1", "HEAD", "./"], stdout=PIPE, cwd=ROOT_FOLDER).stdout.strip().decode()
|
||||
except Exception:
|
||||
GIT_SHA = None
|
||||
print('[!] Warning, you need git installed for some archiving features to save correct version numbers!')
|
||||
|
||||
|
||||
if sys.stdout.encoding.upper() != 'UTF-8':
|
||||
print('[X] Your system is running python3 scripts with a bad locale setting: {} (it should be UTF-8).'.format(sys.stdout.encoding))
|
||||
print(' To fix it, add the line "export PYTHONIOENCODING=utf8" to your ~/.bashrc file (without quotes)')
|
||||
|
@ -59,128 +73,3 @@ if sys.stdout.encoding.upper() != 'UTF-8':
|
|||
print('')
|
||||
print(' Alternatively, run this script with:')
|
||||
print(' env PYTHONIOENCODING=utf8 ./archive.py export.html')
|
||||
|
||||
### Util Functions
|
||||
|
||||
def check_dependencies():
|
||||
"""Check that all necessary dependencies are installed, and have valid versions"""
|
||||
|
||||
print('[*] Checking Dependencies:')
|
||||
|
||||
python_vers = float('{}.{}'.format(sys.version_info.major, sys.version_info.minor))
|
||||
if python_vers < 3.5:
|
||||
print('{}[X] Python version is not new enough: {} (>3.5 is required){}'.format(ANSI['red'], python_vers, ANSI['reset']))
|
||||
print(' See https://github.com/pirate/bookmark-archiver#troubleshooting for help upgrading your Python installation.')
|
||||
raise SystemExit(1)
|
||||
|
||||
if FETCH_PDF or FETCH_SCREENSHOT:
|
||||
if run(['which', CHROME_BINARY]).returncode:
|
||||
print('{}[X] Missing dependency: {}{}'.format(ANSI['red'], CHROME_BINARY, ANSI['reset']))
|
||||
print(' Run ./setup.sh, then confirm it was installed with: {} --version'.format(CHROME_BINARY))
|
||||
print(' See https://github.com/pirate/bookmark-archiver for help.')
|
||||
raise SystemExit(1)
|
||||
|
||||
# parse chrome --version e.g. Google Chrome 61.0.3114.0 canary / Chromium 59.0.3029.110 built on Ubuntu, running on Ubuntu 16.04
|
||||
try:
|
||||
result = run([CHROME_BINARY, '--version'], stdout=PIPE)
|
||||
version = result.stdout.decode('utf-8').replace('Google Chrome ', '').replace('Chromium ', '').split(' ', 1)[0].split('.', 1)[0] # TODO: regex might be better
|
||||
if int(version) < 59:
|
||||
print('{red}[X] Chrome version must be 59 or greater for headless PDF and screenshot saving{reset}'.format(**ANSI))
|
||||
print(' See https://github.com/pirate/bookmark-archiver for help.')
|
||||
raise SystemExit(1)
|
||||
except (TypeError, OSError):
|
||||
print('{red}[X] Failed to parse Chrome version, is it installed properly?{reset}'.format(**ANSI))
|
||||
print(' Run ./setup.sh, then confirm it was installed with: {} --version'.format(CHROME_BINARY))
|
||||
print(' See https://github.com/pirate/bookmark-archiver for help.')
|
||||
raise SystemExit(1)
|
||||
|
||||
if FETCH_WGET:
|
||||
if run(['which', 'wget']).returncode or run(['wget', '--version'], stdout=DEVNULL).returncode:
|
||||
print('{red}[X] Missing dependency: wget{reset}'.format(**ANSI))
|
||||
print(' Run ./setup.sh, then confirm it was installed with: {} --version'.format('wget'))
|
||||
print(' See https://github.com/pirate/bookmark-archiver for help.')
|
||||
raise SystemExit(1)
|
||||
|
||||
if FETCH_FAVICON or SUBMIT_ARCHIVE_DOT_ORG:
|
||||
if run(['which', 'curl']).returncode or run(['curl', '--version'], stdout=DEVNULL).returncode:
|
||||
print('{red}[X] Missing dependency: curl{reset}'.format(**ANSI))
|
||||
print(' Run ./setup.sh, then confirm it was installed with: {} --version'.format('curl'))
|
||||
print(' See https://github.com/pirate/bookmark-archiver for help.')
|
||||
raise SystemExit(1)
|
||||
|
||||
if FETCH_AUDIO or FETCH_VIDEO:
|
||||
if run(['which', 'youtube-dl']).returncode or run(['youtube-dl', '--version'], stdout=DEVNULL).returncode:
|
||||
print('{red}[X] Missing dependency: youtube-dl{reset}'.format(**ANSI))
|
||||
print(' Run ./setup.sh, then confirm it was installed with: {} --version'.format('youtube-dl'))
|
||||
print(' See https://github.com/pirate/bookmark-archiver for help.')
|
||||
raise SystemExit(1)
|
||||
|
||||
|
||||
def chmod_file(path, cwd='.', permissions=ARCHIVE_PERMISSIONS, timeout=30):
|
||||
"""chmod -R <permissions> <cwd>/<path>"""
|
||||
|
||||
if not os.path.exists(os.path.join(cwd, path)):
|
||||
raise Exception('Failed to chmod: {} does not exist (did the previous step fail?)'.format(path))
|
||||
|
||||
chmod_result = run(['chmod', '-R', permissions, path], cwd=cwd, stdout=DEVNULL, stderr=PIPE, timeout=timeout)
|
||||
if chmod_result.returncode == 1:
|
||||
print(' ', chmod_result.stderr.decode())
|
||||
raise Exception('Failed to chmod {}/{}'.format(cwd, path))
|
||||
|
||||
|
||||
def progress(seconds=TIMEOUT, prefix=''):
|
||||
"""Show a (subprocess-controlled) progress bar with a <seconds> timeout,
|
||||
returns end() function to instantly finish the progress
|
||||
"""
|
||||
|
||||
if not SHOW_PROGRESS:
|
||||
return lambda: None
|
||||
|
||||
chunk = '█' if sys.stdout.encoding == 'UTF-8' else '#'
|
||||
chunks = TERM_WIDTH - len(prefix) - 20 # number of progress chunks to show (aka max bar width)
|
||||
|
||||
def progress_bar(seconds=seconds, prefix=prefix):
|
||||
"""show timer in the form of progress bar, with percentage and seconds remaining"""
|
||||
try:
|
||||
for s in range(seconds * chunks):
|
||||
progress = s / chunks / seconds * 100
|
||||
bar_width = round(progress/(100/chunks))
|
||||
|
||||
# ████████████████████ 0.9% (1/60sec)
|
||||
sys.stdout.write('\r{0}{1}{2}{3} {4}% ({5}/{6}sec)'.format(
|
||||
prefix,
|
||||
ANSI['green'],
|
||||
(chunk * bar_width).ljust(chunks),
|
||||
ANSI['reset'],
|
||||
round(progress, 1),
|
||||
round(s/chunks),
|
||||
seconds,
|
||||
))
|
||||
sys.stdout.flush()
|
||||
time.sleep(1 / chunks)
|
||||
|
||||
# ██████████████████████████████████ 100.0% (60/60sec)
|
||||
sys.stdout.write('\r{0}{1}{2}{3} {4}% ({5}/{6}sec)\n'.format(
|
||||
prefix,
|
||||
ANSI['red'],
|
||||
chunk * chunks,
|
||||
ANSI['reset'],
|
||||
100.0,
|
||||
seconds,
|
||||
seconds,
|
||||
))
|
||||
sys.stdout.flush()
|
||||
except KeyboardInterrupt:
|
||||
print()
|
||||
pass
|
||||
|
||||
p = Process(target=progress_bar)
|
||||
p.start()
|
||||
|
||||
def end():
|
||||
"""immediately finish progress and clear the progressbar line"""
|
||||
p.terminate()
|
||||
sys.stdout.write('\r{}{}\r'.format((' ' * TERM_WIDTH), ANSI['reset'])) # clear whole terminal line
|
||||
sys.stdout.flush()
|
||||
|
||||
return end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue