From c2c01af3adfd69c1984b5c6b2cdc1aa59b08c32b Mon Sep 17 00:00:00 2001 From: JDC Date: Thu, 19 Nov 2020 08:06:13 -0500 Subject: [PATCH] Add config for search backend --- archivebox/config.py | 14 +++++++++++++- archivebox/search/__init__.py | 11 ++++------- archivebox/search/backends/sonic.py | 11 ++++++----- 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/archivebox/config.py b/archivebox/config.py index 47049342..0ca2d7d9 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -139,6 +139,18 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = { 'GIT_ARGS': {'type': list, 'default': ['--recursive']}, }, + 'SEARCH_BACKEND_CONFIG' : { + 'USE_INDEXING_BACKEND': {'type': bool, 'default': True}, + 'USE_SEARCHING_BACKEND': {'type': bool, 'default': True}, + 'SEARCH_BACKEND_ENGINE': {'type': str, 'default': 'sonic'}, + 'SEARCH_BACKEND_HOST_NAME': {'type': str, 'default': 'localhost'}, + 'SEARCH_BACKEND_PORT': {'type': int, 'default': 1491}, + 'SEARCH_BACKEND_PASSWORD': {'type': str, 'default': 'SecretPassword'}, + # SONIC + 'SONIC_BUCKET': {'type': str, 'default': 'archivebox'}, + 'SONIC_COLLECTION': {'type': str, 'default': 'snapshots'}, + }, + 'DEPENDENCY_CONFIG': { 'USE_CURL': {'type': bool, 'default': True}, 'USE_WGET': {'type': bool, 'default': True}, @@ -149,7 +161,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = { 'USE_CHROME': {'type': bool, 'default': True}, 'USE_NODE': {'type': bool, 'default': True}, 'USE_YOUTUBEDL': {'type': bool, 'default': True}, - + 'CURL_BINARY': {'type': str, 'default': 'curl'}, 'GIT_BINARY': {'type': str, 'default': 'git'}, 'WGET_BINARY': {'type': str, 'default': 'wget'}, diff --git a/archivebox/search/__init__.py b/archivebox/search/__init__.py index 6e604224..7db4af46 100644 --- a/archivebox/search/__init__.py +++ b/archivebox/search/__init__.py @@ -5,19 +5,16 @@ from importlib import import_module from archivebox.index.schema import Link from archivebox.util import enforce_types -from archivebox.config import setup_django, OUTPUT_DIR - +from archivebox.config import setup_django, OUTPUT_DIR, USE_INDEXING_BACKEND, USE_SEARCHING_BACKEND, SEARCH_BACKEND_ENGINE def indexing_enabled(): - return True - # return FULLTEXT_INDEXING_ENABLED + return USE_INDEXING_BACKEND def search_backend_enabled(): - return True - # return FULLTEXT_SEARCH_ENABLED + return USE_SEARCHING_BACKEND def get_backend(): - return 'search.backends.sonic' + return f'search.backends.{SEARCH_BACKEND_ENGINE}' def import_backend(): backend_string = get_backend() diff --git a/archivebox/search/backends/sonic.py b/archivebox/search/backends/sonic.py index 28725f27..e062f9e1 100644 --- a/archivebox/search/backends/sonic.py +++ b/archivebox/search/backends/sonic.py @@ -3,17 +3,18 @@ from typing import List from sonic import IngestClient, SearchClient from archivebox.util import enforce_types +from archivebox.config import SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD, SONIC_BUCKET, SONIC_COLLECTION + @enforce_types def index(snapshot_id: str, texts: List[str]): - # TODO add variables to localhost, port, password, bucket, collection - with IngestClient("localhost", 1491, "SecretPassword") as ingestcl: + with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl: for text in texts: - ingestcl.push("archivebox", "snapshots", snapshot_id, str(text)) + ingestcl.push(SONIC_BUCKET, SONIC_COLLECTION, snapshot_id, str(text)) @enforce_types def search(text: str) -> List: - with SearchClient("localhost", 1491, "SecretPassword") as querycl: - snap_ids = querycl.query("archivebox", "snapshots", text) + with SearchClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as querycl: + snap_ids = querycl.query(SONIC_BUCKET, SONIC_COLLECTION, text) return snap_ids \ No newline at end of file