From 7144e0bdceec53d34f192d62697831faadcfa8b5 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 18 Aug 2020 18:40:19 -0400 Subject: [PATCH] search for node dependencies in output dir first --- archivebox/config/__init__.py | 2 +- archivebox/extractors/readability.py | 4 ++-- archivebox/extractors/singlefile.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py index 079c073f..fd424c2b 100644 --- a/archivebox/config/__init__.py +++ b/archivebox/config/__init__.py @@ -780,7 +780,7 @@ globals().update(CONFIG) os.environ["TZ"] = 'UTC' # add ./node_modules/.bin to $PATH so we can use node scripts in extractors -NODE_BIN_PATH = str((Path(CONFIG["OUTPUT_DIR"]) / 'node_modules' / '.bin').resolve()) +NODE_BIN_PATH = str((Path(CONFIG["OUTPUT_DIR"]).absolute() / 'node_modules' / '.bin')) sys.path.append(NODE_BIN_PATH) diff --git a/archivebox/extractors/readability.py b/archivebox/extractors/readability.py index f181160d..219402b5 100644 --- a/archivebox/extractors/readability.py +++ b/archivebox/extractors/readability.py @@ -17,7 +17,7 @@ from ..util import ( from ..config import ( TIMEOUT, SAVE_READABILITY, - READABILITY_BINARY, + DEPENDENCIES, READABILITY_VERSION, ) from ..logging_util import TimedProgress @@ -73,7 +73,7 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO temp_doc.close() cmd = [ - READABILITY_BINARY, + DEPENDENCIES['READABILITY_BINARY']['path'], temp_doc.name ] diff --git a/archivebox/extractors/singlefile.py b/archivebox/extractors/singlefile.py index 87e7d5fd..702e44a0 100644 --- a/archivebox/extractors/singlefile.py +++ b/archivebox/extractors/singlefile.py @@ -15,7 +15,7 @@ from ..util import ( from ..config import ( TIMEOUT, SAVE_SINGLEFILE, - SINGLEFILE_BINARY, + DEPENDENCIES, SINGLEFILE_VERSION, CHROME_BINARY, ) @@ -43,7 +43,7 @@ def save_singlefile(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOU # SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli cmd = [ - SINGLEFILE_BINARY, + DEPENDENCIES['SINGLEFILE_BINARY']['path'], '--browser-executable-path={}'.format(CHROME_BINARY), '--browser-args="{}"'.format(json.dumps(browser_args[1:])), link.url,