diff --git a/archivebox/cli/__init__.py b/archivebox/cli/__init__.py index dfeda23e..a0452883 100644 --- a/archivebox/cli/__init__.py +++ b/archivebox/cli/__init__.py @@ -15,11 +15,10 @@ from importlib import import_module CLI_DIR = Path(__file__).resolve().parent # these common commands will appear sorted before any others for ease-of-use -meta_cmds = ('help', 'version') -main_cmds = ('init', 'info', 'config') -archive_cmds = ('add', 'remove', 'update', 'list', 'status') - -fake_db = ("oneshot",) +meta_cmds = ('help', 'version') # dont require valid data folder at all +main_cmds = ('init', 'config', 'setup') # dont require existing db present +archive_cmds = ('add', 'remove', 'update', 'list', 'status') # require existing db present +fake_db = ("oneshot",) # use fake in-memory db display_first = (*meta_cmds, *main_cmds, *archive_cmds) diff --git a/archivebox/cli/archivebox_setup.py b/archivebox/cli/archivebox_setup.py new file mode 100755 index 00000000..40797784 --- /dev/null +++ b/archivebox/cli/archivebox_setup.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 + +__package__ = 'archivebox.cli' +__command__ = 'archivebox setup' + +import sys +import argparse + +from typing import Optional, List, IO + +from ..main import setup +from ..util import docstring +from ..config import OUTPUT_DIR +from ..logging_util import SmartFormatter, reject_stdin + + +@docstring(setup.__doc__) +def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None: + parser = argparse.ArgumentParser( + prog=__command__, + description=setup.__doc__, + add_help=True, + formatter_class=SmartFormatter, + ) + # parser.add_argument( + # '--force', # '-f', + # action='store_true', + # help='Overwrite any existing packages that conflict with the ones ArchiveBox is trying to install', + # ) + command = parser.parse_args(args or ()) + reject_stdin(__command__, stdin) + + setup( + # force=command.force, + out_dir=pwd or OUTPUT_DIR, + ) + + +if __name__ == '__main__': + main(args=sys.argv[1:], stdin=sys.stdin) diff --git a/archivebox/config.py b/archivebox/config.py index 884ad236..bc2f568e 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -984,8 +984,8 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None: ) ) if dependency in ('SINGLEFILE_BINARY', 'READABILITY_BINARY', 'MERCURY_BINARY'): - hint(('npm install --prefix . "git+https://github.com/ArchiveBox/ArchiveBox.git"', - f'or archivebox config --set SAVE_{dependency.rsplit("_", 1)[0]}=False to silence this warning', + hint(('To install all packages automatically run: archivebox setup', + f'or to disable it and silence this warning: archivebox config --set SAVE_{dependency.rsplit("_", 1)[0]}=False', ''), prefix=' ') stderr('') diff --git a/archivebox/main.py b/archivebox/main.py index 92874878..12f17a3e 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -27,6 +27,7 @@ from .parsers import ( from .index.schema import Link from .util import enforce_types # type: ignore from .system import get_dir_size, dedupe_cron_jobs, CRON_COMMENT +from .system import run as run_shell from .index import ( load_main_index, parse_links_from_source, @@ -70,12 +71,14 @@ from .config import ( DEBUG, IN_DOCKER, USER, + PYTHON_BINARY, ARCHIVEBOX_BINARY, ONLY_NEW, OUTPUT_DIR, SOURCES_DIR, ARCHIVE_DIR, LOGS_DIR, + PACKAGE_DIR, CONFIG_FILE, CONFIG_FILENAME, ARCHIVE_DIR_NAME, @@ -95,6 +98,13 @@ from .config import ( EXTERNAL_LOCATIONS, DATA_LOCATIONS, DEPENDENCIES, + USE_CHROME, + CHROME_BINARY, + CHROME_VERSION, + USE_YOUTUBEDL, + USE_NODE, + NODE_VERSION, + USE_CHROME, load_all_config, CONFIG, USER_CONFIG, @@ -906,6 +916,78 @@ def list_folders(links: List[Link], except KeyError: raise ValueError('Status not recognized.') +@enforce_types +def setup(out_dir: Path=OUTPUT_DIR) -> None: + """Automatically install all ArchiveBox dependencies and extras""" + + check_data_folder(out_dir=out_dir) + + stderr('[+] Installing enabled ArchiveBox dependencies automatically...', color='green') + + stderr('\n Installing YOUTUBEDL_BINARY automatically using pip...') + if USE_YOUTUBEDL: + try: + run_shell([ + PYTHON_BINARY, + '-m', 'pip', + 'install', '--upgrade', 'youtube_dl', + ], capture_output=False, cwd=out_dir) + run_shell([PYTHON_BINARY, '-m', 'youtube_dl', '--version'], capture_output=False, cwd=out_dir) + except BaseException as e: + stderr(f'[X] Failed to install python packages: {e}', color='red') + raise SystemExit(1) + + stderr('\n Installing CHROME_BINARY automatically using playwright...') + if USE_CHROME: + if CHROME_VERSION: + print(f'{CHROME_VERSION} is already installed', CHROME_BINARY) + else: + try: + run_shell([PYTHON_BINARY, '-m', 'pip', 'install', '--upgrade', 'playwright'], capture_output=False, cwd=out_dir) + run_shell([PYTHON_BINARY, '-m', 'playwright', 'install', 'chromium'], capture_output=False, cwd=out_dir) + proc = run_shell([PYTHON_BINARY, '-c', 'from playwright.sync_api import sync_playwright; print(sync_playwright().start().chromium.executable_path)'], capture_output=True, text=True, cwd=out_dir) + NEW_CHROME_BINARY = proc.stdout.strip() + assert NEW_CHROME_BINARY and len(NEW_CHROME_BINARY), 'CHROME_BINARY must contain a path' + config(f'CHROME_BINARY={NEW_CHROME_BINARY}', set=True, out_dir=out_dir) + except BaseException as e: + stderr(f'[X] Failed to install chromium using playwright: {e.__class__.__name__} {e}', color='red') + raise SystemExit(1) + + stderr('\n Installing SINGLEFILE_BINARY, READABILITY_BINARY, MERCURY_BINARY automatically using npm...') + if USE_NODE: + try: + if not NODE_VERSION: + stderr('[X] You must first install node using your system package manager', color='red') + hint('Or run: curl -sL https://deb.nodesource.com/setup_15.x | sudo -E bash -') + raise SystemExit(1) + + # clear out old npm package locations + to_delete = ( + Path(out_dir) / 'package.json', + Path(out_dir) / 'package_lock.json', + ) + for path in to_delete: + if path.exists(): + os.remove(path) + + shutil.copyfile(PACKAGE_DIR / 'package.json', out_dir / 'package.json') + run_shell([ + 'npm', + 'install', + '--prefix', out_dir, + '--no-save', + '--no-audit', + '--no-fund', + '--loglevel', 'error', + ], capture_output=False, cwd=out_dir) + os.remove(out_dir / 'package.json') + except BaseException as e: + stderr(f'[X] Failed to install npm packages: {e}', color='red') + hint(f'Try deleting {out_dir}/node_modules and running it again') + raise SystemExit(1) + + stderr('\n[√] Installed ArchiveBox dependencies successfully.', color='green') + hint('To see all the installed package versions run: archivebox --version') @enforce_types def config(config_options_str: Optional[str]=None,