From 14df0cbb7c8e948c46a69a398af4ec2e502dc7dd Mon Sep 17 00:00:00 2001
From: jdcaballerov <743513+jdcaballerov@users.noreply.github.com>
Date: Wed, 20 Jan 2021 14:51:46 -0500
Subject: [PATCH] Update sonic.py

Sonic buffer accepts 20.000 bytes not unicode characters, since the chunking here is on unicode characters, sending 20.000 characters will overflow sonic's buffer.
UTF-8 can take up to 6 bytes, so sending less than (20.000 / 6) rounded minus should be ok.
---
 archivebox/search/backends/sonic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/search/backends/sonic.py b/archivebox/search/backends/sonic.py
index f0beaddd..f3ef6628 100644
--- a/archivebox/search/backends/sonic.py
+++ b/archivebox/search/backends/sonic.py
@@ -5,7 +5,7 @@ from sonic import IngestClient, SearchClient
 from archivebox.util import enforce_types
 from archivebox.config import SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD, SONIC_BUCKET, SONIC_COLLECTION
 
-MAX_SONIC_TEXT_LENGTH = 20000
+MAX_SONIC_TEXT_LENGTH = 2000
 
 @enforce_types
 def index(snapshot_id: str, texts: List[str]):