mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 06:34:25 -04:00
Implement flush for search backend after remove command
This commit is contained in:
parent
c2c01af3ad
commit
47daa038eb
5 changed files with 21 additions and 7 deletions
|
@ -147,8 +147,8 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
|
||||||
'SEARCH_BACKEND_PORT': {'type': int, 'default': 1491},
|
'SEARCH_BACKEND_PORT': {'type': int, 'default': 1491},
|
||||||
'SEARCH_BACKEND_PASSWORD': {'type': str, 'default': 'SecretPassword'},
|
'SEARCH_BACKEND_PASSWORD': {'type': str, 'default': 'SecretPassword'},
|
||||||
# SONIC
|
# SONIC
|
||||||
'SONIC_BUCKET': {'type': str, 'default': 'archivebox'},
|
'SONIC_COLLECTION': {'type': str, 'default': 'archivebox'},
|
||||||
'SONIC_COLLECTION': {'type': str, 'default': 'snapshots'},
|
'SONIC_BUCKET': {'type': str, 'default': 'snapshots'},
|
||||||
},
|
},
|
||||||
|
|
||||||
'DEPENDENCY_CONFIG': {
|
'DEPENDENCY_CONFIG': {
|
||||||
|
|
|
@ -18,7 +18,7 @@ class SearchResultsAdminMixin(object):
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
messages.add_message(request, messages.WARNING, f'Error from the search backend, only showing results from default admin search fields - Error: {err}')
|
messages.add_message(request, messages.WARNING, f'Error from the search backend, only showing results from default admin search fields - Error: {err}')
|
||||||
else:
|
else:
|
||||||
qsearch = queryset.filter(id__in=snapshot_ids)
|
qsearch = queryset.filter(pk__in=snapshot_ids)
|
||||||
qs |= qsearch
|
qs |= qsearch
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
|
|
@ -115,6 +115,7 @@ from .logging_util import (
|
||||||
printable_dependency_version,
|
printable_dependency_version,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from .search import flush_search_index
|
||||||
|
|
||||||
ALLOWED_IN_OUTPUT_DIR = {
|
ALLOWED_IN_OUTPUT_DIR = {
|
||||||
'lost+found',
|
'lost+found',
|
||||||
|
@ -665,6 +666,7 @@ def remove(filter_str: Optional[str]=None,
|
||||||
to_remove = snapshots.count()
|
to_remove = snapshots.count()
|
||||||
|
|
||||||
remove_from_sql_main_index(snapshots=snapshots, out_dir=out_dir)
|
remove_from_sql_main_index(snapshots=snapshots, out_dir=out_dir)
|
||||||
|
flush_search_index(snapshot_ids=[str(pk) for pk in snapshots.values_list('pk',flat=True)])
|
||||||
all_snapshots = load_main_index(out_dir=out_dir)
|
all_snapshots = load_main_index(out_dir=out_dir)
|
||||||
log_removal_finished(all_snapshots.count(), to_remove)
|
log_removal_finished(all_snapshots.count(), to_remove)
|
||||||
|
|
||||||
|
|
|
@ -45,4 +45,11 @@ def query_search_index(text: str) -> List:
|
||||||
return backend.search(text)
|
return backend.search(text)
|
||||||
else:
|
else:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
|
def flush_search_index(snapshot_ids: List[str]):
|
||||||
|
if not indexing_enabled() or not snapshot_ids:
|
||||||
|
return
|
||||||
|
backend = import_backend()
|
||||||
|
backend.flush(snapshot_ids)
|
||||||
|
|
|
@ -10,11 +10,16 @@ from archivebox.config import SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEA
|
||||||
def index(snapshot_id: str, texts: List[str]):
|
def index(snapshot_id: str, texts: List[str]):
|
||||||
with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
|
with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
|
||||||
for text in texts:
|
for text in texts:
|
||||||
ingestcl.push(SONIC_BUCKET, SONIC_COLLECTION, snapshot_id, str(text))
|
ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(text))
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def search(text: str) -> List:
|
def search(text: str) -> List:
|
||||||
with SearchClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as querycl:
|
with SearchClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as querycl:
|
||||||
snap_ids = querycl.query(SONIC_BUCKET, SONIC_COLLECTION, text)
|
snap_ids = querycl.query(SONIC_COLLECTION, SONIC_BUCKET, text)
|
||||||
return snap_ids
|
return snap_ids
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
|
def flush(snapshot_ids: List[str]):
|
||||||
|
with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
|
||||||
|
for id in snapshot_ids:
|
||||||
|
ingestcl.flush_object(SONIC_COLLECTION, SONIC_BUCKET, str(id))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue