From 14df0cbb7c8e948c46a69a398af4ec2e502dc7dd Mon Sep 17 00:00:00 2001 From: jdcaballerov <743513+jdcaballerov@users.noreply.github.com> Date: Wed, 20 Jan 2021 14:51:46 -0500 Subject: [PATCH] Update sonic.py Sonic buffer accepts 20.000 bytes not unicode characters, since the chunking here is on unicode characters, sending 20.000 characters will overflow sonic's buffer. UTF-8 can take up to 6 bytes, so sending less than (20.000 / 6) rounded minus should be ok. --- archivebox/search/backends/sonic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/archivebox/search/backends/sonic.py b/archivebox/search/backends/sonic.py index f0beaddd..f3ef6628 100644 --- a/archivebox/search/backends/sonic.py +++ b/archivebox/search/backends/sonic.py @@ -5,7 +5,7 @@ from sonic import IngestClient, SearchClient from archivebox.util import enforce_types from archivebox.config import SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD, SONIC_BUCKET, SONIC_COLLECTION -MAX_SONIC_TEXT_LENGTH = 20000 +MAX_SONIC_TEXT_LENGTH = 2000 @enforce_types def index(snapshot_id: str, texts: List[str]):