diff --git a/archivebox/config.py b/archivebox/config.py index a3444f07..d3e34151 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -161,6 +161,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = { 'USE_CHROME': {'type': bool, 'default': True}, 'USE_NODE': {'type': bool, 'default': True}, 'USE_YOUTUBEDL': {'type': bool, 'default': True}, + 'USE_RIPGREP': {'type': bool, 'default': True}, 'CURL_BINARY': {'type': str, 'default': 'curl'}, 'GIT_BINARY': {'type': str, 'default': 'git'}, @@ -170,6 +171,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = { 'MERCURY_BINARY': {'type': str, 'default': 'mercury-parser'}, 'YOUTUBEDL_BINARY': {'type': str, 'default': 'youtube-dl'}, 'NODE_BINARY': {'type': str, 'default': 'node'}, + 'RIPGREP_BINARY': {'type': str, 'default': 'rg'}, 'CHROME_BINARY': {'type': str, 'default': None}, 'POCKET_CONSUMER_KEY': {'type': str, 'default': None}, @@ -312,6 +314,8 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = { 'SAVE_WARC': {'default': lambda c: c['USE_WGET'] and c['SAVE_WARC']}, 'WGET_ARGS': {'default': lambda c: c['WGET_ARGS'] or []}, + 'USE_RIPGREP': {'default': lambda c: c['USE_RIPGREP']}, + 'RIPGREP_VERSION': {'default': lambda c: bin_version(c['RIPGREP_BINARY']) if c['USE_RIPGREP'] else None}, 'USE_SINGLEFILE': {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']}, 'SINGLEFILE_VERSION': {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None}, @@ -827,6 +831,13 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue: 'enabled': config['USE_CHROME'], 'is_valid': bool(config['CHROME_VERSION']), }, + 'RIPGREP_BINARY': { + 'path': bin_path(config['RIPGREP_BINARY']), + 'version': config['RIPGREP_VERSION'], + 'hash': bin_hash(config['RIPGREP_BINARY']), + 'enabled': config['USE_RIPGREP'], + 'is_valid': bool(config['RIPGREP_VERSION']), + }, } def get_chrome_info(config: ConfigDict) -> ConfigValue: diff --git a/archivebox/search/backends/ripgrep.py b/archivebox/search/backends/ripgrep.py index e2e03c9b..b37eca20 100644 --- a/archivebox/search/backends/ripgrep.py +++ b/archivebox/search/backends/ripgrep.py @@ -2,7 +2,7 @@ import re from subprocess import run, PIPE, DEVNULL from typing import List, Generator -from archivebox.config import ARCHIVE_DIR +from archivebox.config import ARCHIVE_DIR, RIPGREP_BINARY from archivebox.util import enforce_types RG_IGNORE_EXTENSIONS = ('css','js','orig','svg') @@ -26,7 +26,7 @@ def flush(snapshot_ids: Generator[str, None, None]): @enforce_types def search(text: str) -> List[str]: - is_rg_installed = run(['which', 'rg'], stdout=DEVNULL, stderr=DEVNULL) + is_rg_installed = run(['which', RIPGREP_BINARY], stdout=DEVNULL, stderr=DEVNULL) if is_rg_installed.returncode: raise Exception("ripgrep binary not found, install ripgrep to use this search backend")