mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-06-07 09:54:57 -04:00
bail out on sonic indexing after 5 errors
This commit is contained in:
parent
89158d5dc7
commit
3093057675
1 changed files with 10 additions and 3 deletions
|
@ -7,10 +7,11 @@ from archivebox.config import SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEA
|
||||||
|
|
||||||
MAX_SONIC_TEXT_TOTAL_LENGTH = 100000000 # dont index more than 100 million characters per text
|
MAX_SONIC_TEXT_TOTAL_LENGTH = 100000000 # dont index more than 100 million characters per text
|
||||||
MAX_SONIC_TEXT_CHUNK_LENGTH = 2000 # dont index more than 2000 characters per chunk
|
MAX_SONIC_TEXT_CHUNK_LENGTH = 2000 # dont index more than 2000 characters per chunk
|
||||||
|
MAX_SONIC_ERRORS_BEFORE_ABORT = 5
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def index(snapshot_id: str, texts: List[str]):
|
def index(snapshot_id: str, texts: List[str]):
|
||||||
|
error_count = 0
|
||||||
with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
|
with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
|
||||||
for text in texts:
|
for text in texts:
|
||||||
chunks = (
|
chunks = (
|
||||||
|
@ -21,8 +22,14 @@ def index(snapshot_id: str, texts: List[str]):
|
||||||
MAX_SONIC_TEXT_CHUNK_LENGTH,
|
MAX_SONIC_TEXT_CHUNK_LENGTH,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
try:
|
||||||
for chunk in chunks:
|
for chunk in chunks:
|
||||||
ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(chunk))
|
ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(chunk))
|
||||||
|
except Exception as err:
|
||||||
|
print(f'[!] Sonic search backend threw an error while indexing: {err.__class__.__name__} {err}')
|
||||||
|
error_count += 1
|
||||||
|
if error_count > MAX_SONIC_ERRORS_BEFORE_ABORT:
|
||||||
|
raise
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def search(text: str) -> List[str]:
|
def search(text: str) -> List[str]:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue