diff --git a/Dockerfile b/Dockerfile index 33d4a488..20a410e2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,7 +46,7 @@ RUN apt-get update -qq \ # Install apt dependencies RUN apt-get update -qq \ && apt-get install -qq -y --no-install-recommends \ - wget curl chromium git ffmpeg youtube-dl \ + wget curl chromium git ffmpeg youtube-dl ripgrep \ fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \ && rm -rf /var/lib/apt/lists/* diff --git a/archivebox.egg-info b/archivebox.egg-info deleted file mode 120000 index 8ce20dd2..00000000 --- a/archivebox.egg-info +++ /dev/null @@ -1 +0,0 @@ -pip_dist/archivebox.egg-info \ No newline at end of file diff --git a/archivebox/cli/archivebox_list.py b/archivebox/cli/archivebox_list.py index 140810a6..3838cf60 100644 --- a/archivebox/cli/archivebox_list.py +++ b/archivebox/cli/archivebox_list.py @@ -98,7 +98,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional parser.add_argument( '--filter-type', type=str, - choices=('exact', 'substring', 'domain', 'regex','tag'), + choices=('exact', 'substring', 'domain', 'regex', 'tag', 'search'), default='exact', help='Type of pattern matching to use when filtering URLs', ) diff --git a/archivebox/cli/archivebox_update.py b/archivebox/cli/archivebox_update.py index aa8cae1b..6748096e 100644 --- a/archivebox/cli/archivebox_update.py +++ b/archivebox/cli/archivebox_update.py @@ -91,7 +91,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional parser.add_argument( '--filter-type', type=str, - choices=('exact', 'substring', 'domain', 'regex'), + choices=('exact', 'substring', 'domain', 'regex', 'tag', 'search'), default='exact', help='Type of pattern matching to use when filtering URLs', ) diff --git a/archivebox/config.py b/archivebox/config.py index 47049342..846df0c9 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -139,6 +139,18 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = { 'GIT_ARGS': {'type': list, 'default': ['--recursive']}, }, + 'SEARCH_BACKEND_CONFIG' : { + 'USE_INDEXING_BACKEND': {'type': bool, 'default': True}, + 'USE_SEARCHING_BACKEND': {'type': bool, 'default': True}, + 'SEARCH_BACKEND_ENGINE': {'type': str, 'default': 'ripgrep'}, + 'SEARCH_BACKEND_HOST_NAME': {'type': str, 'default': 'localhost'}, + 'SEARCH_BACKEND_PORT': {'type': int, 'default': 1491}, + 'SEARCH_BACKEND_PASSWORD': {'type': str, 'default': 'SecretPassword'}, + # SONIC + 'SONIC_COLLECTION': {'type': str, 'default': 'archivebox'}, + 'SONIC_BUCKET': {'type': str, 'default': 'snapshots'}, + }, + 'DEPENDENCY_CONFIG': { 'USE_CURL': {'type': bool, 'default': True}, 'USE_WGET': {'type': bool, 'default': True}, @@ -149,7 +161,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = { 'USE_CHROME': {'type': bool, 'default': True}, 'USE_NODE': {'type': bool, 'default': True}, 'USE_YOUTUBEDL': {'type': bool, 'default': True}, - + 'CURL_BINARY': {'type': str, 'default': 'curl'}, 'GIT_BINARY': {'type': str, 'default': 'git'}, 'WGET_BINARY': {'type': str, 'default': 'wget'}, diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py index 5d3db409..e078bdaf 100644 --- a/archivebox/core/admin.py +++ b/archivebox/core/admin.py @@ -14,6 +14,9 @@ from django import forms from core.models import Snapshot, Tag from core.forms import AddLinkForm, TagField +from core.utils import get_icons +from core.mixins import SearchResultsAdminMixin + from index.html import snapshot_icons from util import htmldecode, urldecode, ansi_to_html from logging_util import printable_filesize @@ -82,7 +85,7 @@ class SnapshotAdminForm(forms.ModelForm): return instance -class SnapshotAdmin(admin.ModelAdmin): +class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin): list_display = ('added', 'title_str', 'url_str', 'files', 'size') sort_fields = ('title_str', 'url_str', 'added') readonly_fields = ('id', 'url', 'timestamp', 'num_outputs', 'is_archived', 'url_hash', 'added', 'updated') diff --git a/archivebox/core/mixins.py b/archivebox/core/mixins.py new file mode 100644 index 00000000..d1203745 --- /dev/null +++ b/archivebox/core/mixins.py @@ -0,0 +1,23 @@ +from django.contrib import messages + +from archivebox.search import query_search_index + +class SearchResultsAdminMixin(object): + def get_search_results(self, request, queryset, search_term): + ''' Enhances the search queryset with results from the search backend. + ''' + qs, use_distinct = \ + super(SearchResultsAdminMixin, self).get_search_results( + request, queryset, search_term) + + search_term = search_term.strip() + if not search_term: + return qs, use_distinct + try: + qsearch = query_search_index(search_term) + except Exception as err: + messages.add_message(request, messages.WARNING, f'Error from the search backend, only showing results from default admin search fields - Error: {err}') + else: + qs |= qsearch + finally: + return qs, use_distinct diff --git a/archivebox/core/models.py b/archivebox/core/models.py index 5555c798..fe2d05ab 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -5,10 +5,11 @@ import uuid from django.db import models, transaction from django.utils.functional import cached_property from django.utils.text import slugify +from django.db.models import Case, When, Value, IntegerField from ..util import parse_date from ..index.schema import Link -from ..extractors import get_default_archive_methods +from ..extractors import get_default_archive_methods, ARCHIVE_METHODS_INDEXING_PRECEDENCE EXTRACTORS = [(extractor[0], extractor[0]) for extractor in get_default_archive_methods()] STATUS_CHOICES = [ @@ -91,7 +92,7 @@ class Snapshot(models.Model): return { key: getattr(self, key) if key != 'tags' else self.tags_str() - for key in args + for key in args } def as_link(self) -> Link: @@ -100,7 +101,7 @@ class Snapshot(models.Model): def as_link_with_details(self) -> Link: from ..index import load_link_details return load_link_details(self.as_link()) - + def tags_str(self) -> str: return ','.join(self.tags.order_by('name').values_list('name', flat=True)) @@ -157,7 +158,15 @@ class Snapshot(models.Model): self.tags.clear() self.tags.add(*tags_id) +class ArchiveResultManager(models.Manager): + def indexable(self, sorted: bool = True): + INDEXABLE_METHODS = [ r[0] for r in ARCHIVE_METHODS_INDEXING_PRECEDENCE ] + qs = self.get_queryset().filter(extractor__in=INDEXABLE_METHODS,status='succeeded') + if sorted: + precedence = [ When(extractor=method, then=Value(precedence)) for method, precedence in ARCHIVE_METHODS_INDEXING_PRECEDENCE ] + qs = qs.annotate(indexing_precedence=Case(*precedence, default=Value(1000),output_field=IntegerField())).order_by('indexing_precedence') + return qs class ArchiveResult(models.Model): snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE) cmd = models.JSONField() @@ -169,5 +178,7 @@ class ArchiveResult(models.Model): status = models.CharField(max_length=16, choices=STATUS_CHOICES) extractor = models.CharField(choices=EXTRACTORS, max_length=32) + objects = ArchiveResultManager() + def __str__(self): return self.extractor diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py index ef5ef446..ceef3b51 100644 --- a/archivebox/extractors/__init__.py +++ b/archivebox/extractors/__init__.py @@ -23,6 +23,7 @@ from ..logging_util import ( log_archive_method_started, log_archive_method_finished, ) +from ..search import write_search_index from .title import should_save_title, save_title from .favicon import should_save_favicon, save_favicon @@ -38,6 +39,7 @@ from .media import should_save_media, save_media from .archive_org import should_save_archive_dot_org, save_archive_dot_org from .headers import should_save_headers, save_headers + def get_default_archive_methods(): return [ ('title', should_save_title, save_title), @@ -55,6 +57,8 @@ def get_default_archive_methods(): ('archive_org', should_save_archive_dot_org, save_archive_dot_org), ] +ARCHIVE_METHODS_INDEXING_PRECEDENCE = [('readability', 1), ('singlefile', 2), ('dom', 3), ('wget', 4)] + @enforce_types def ignore_methods(to_ignore: List[str]): ARCHIVE_METHODS = get_default_archive_methods() @@ -107,6 +111,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s link.history[method_name].append(result) stats[result.status] += 1 + write_search_index(link=link, texts=result.index_texts) log_archive_method_finished(result) if not skip_index: ArchiveResult.objects.create(snapshot=snapshot, extractor=method_name, cmd=result.cmd, cmd_version=result.cmd_version, diff --git a/archivebox/extractors/readability.py b/archivebox/extractors/readability.py index bd45e9d5..9da620b4 100644 --- a/archivebox/extractors/readability.py +++ b/archivebox/extractors/readability.py @@ -71,6 +71,7 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO CURL_BINARY, link.url ] + readability_content = None timer = TimedProgress(timeout, prefix=' ') try: document = get_html(link, out_dir) @@ -86,8 +87,9 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO result = run(cmd, cwd=out_dir, timeout=timeout) result_json = json.loads(result.stdout) output_folder.mkdir(exist_ok=True) + readability_content = result_json.pop("textContent") atomic_write(str(output_folder / "content.html"), result_json.pop("content")) - atomic_write(str(output_folder / "content.txt"), result_json.pop("textContent")) + atomic_write(str(output_folder / "content.txt"), readability_content) atomic_write(str(output_folder / "article.json"), result_json) # parse out number of files downloaded from last line of stderr: @@ -117,5 +119,6 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO cmd_version=READABILITY_VERSION, output=output, status=status, - **timer.stats, + index_texts= [readability_content] if readability_content else [], + **timer.stats, ) diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py index 3a066e18..bf1d0c6a 100644 --- a/archivebox/index/__init__.py +++ b/archivebox/index/__init__.py @@ -51,6 +51,8 @@ from .sql import ( write_sql_link_details, ) +from ..search import search_backend_enabled, query_search_index + ### Link filtering and checking @enforce_types @@ -365,7 +367,7 @@ LINK_FILTERS = { } @enforce_types -def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='exact') -> QuerySet: +def q_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='exact') -> QuerySet: q_filter = Q() for pattern in filter_patterns: try: @@ -380,6 +382,31 @@ def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type raise SystemExit(2) return snapshots.filter(q_filter) +def search_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='search') -> QuerySet: + if not search_backend_enabled(): + stderr() + stderr( + '[X] The search backend is not enabled, set config.USE_SEARCHING_BACKEND = True', + color='red', + ) + raise SystemExit(2) + + qsearch = get_empty_snapshot_queryset() + for pattern in filter_patterns: + try: + qsearch |= query_search_index(pattern) + except: + raise SystemExit(2) + + return snapshots & qsearch + +@enforce_types +def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='exact') -> QuerySet: + if filter_type != 'search': + return q_filter(snapshots, filter_patterns, filter_type) + else: + return search_filter(snapshots, filter_patterns, filter_type) + def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]: """indexed links without checking archive status or data directory validity""" diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py index 90021e0b..bc3a25da 100644 --- a/archivebox/index/schema.py +++ b/archivebox/index/schema.py @@ -39,6 +39,7 @@ class ArchiveResult: status: str start_ts: datetime end_ts: datetime + index_texts: Union[List[str], None] = None schema: str = 'ArchiveResult' def __post_init__(self): diff --git a/archivebox/main.py b/archivebox/main.py index cbbd2218..bb24d124 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -115,6 +115,7 @@ from .logging_util import ( printable_dependency_version, ) +from .search import flush_search_index, index_links ALLOWED_IN_OUTPUT_DIR = { 'lost+found', @@ -664,6 +665,7 @@ def remove(filter_str: Optional[str]=None, to_remove = snapshots.count() + flush_search_index(snapshots=snapshots) remove_from_sql_main_index(snapshots=snapshots, out_dir=out_dir) all_snapshots = load_main_index(out_dir=out_dir) log_removal_finished(all_snapshots.count(), to_remove) @@ -709,6 +711,7 @@ def update(resume: Optional[float]=None, if index_only: for link in all_links: write_link_details(link, out_dir=out_dir, skip_sql_index=True) + index_links(all_links, out_dir=out_dir) return all_links # Step 2: Run the archive methods for each link diff --git a/archivebox/search/__init__.py b/archivebox/search/__init__.py new file mode 100644 index 00000000..ebeebcd0 --- /dev/null +++ b/archivebox/search/__init__.py @@ -0,0 +1,110 @@ +from typing import List, Union +from pathlib import Path +from importlib import import_module + +from django.db.models import QuerySet + +from archivebox.index.schema import Link +from archivebox.util import enforce_types +from archivebox.config import setup_django,stderr, OUTPUT_DIR, USE_INDEXING_BACKEND, USE_SEARCHING_BACKEND, SEARCH_BACKEND_ENGINE + +from .utils import get_indexable_content, log_index_started + +def indexing_enabled(): + return USE_INDEXING_BACKEND + +def search_backend_enabled(): + return USE_SEARCHING_BACKEND + +def get_backend(): + return f'search.backends.{SEARCH_BACKEND_ENGINE}' + +def import_backend(): + backend_string = get_backend() + try: + backend = import_module(backend_string) + except Exception as err: + raise Exception("Could not load '%s' as a backend: %s" % (backend_string, err)) + return backend + +@enforce_types +def write_search_index(link: Link, texts: Union[List[str], None]=None, out_dir: Path=OUTPUT_DIR, skip_text_index: bool=False) -> None: + if not indexing_enabled(): + return + + if not skip_text_index and texts: + setup_django(out_dir, check_db=True) + from core.models import Snapshot + + snap = Snapshot.objects.filter(url=link.url).first() + backend = import_backend() + if snap: + try: + backend.index(snapshot_id=str(snap.id), texts=texts) + except Exception as err: + stderr() + stderr( + f'[X] The search backend threw an exception={err}:', + color='red', + ) + +@enforce_types +def query_search_index(query: str, out_dir: Path=OUTPUT_DIR) -> QuerySet: + setup_django(out_dir, check_db=True) + from core.models import Snapshot + + if search_backend_enabled(): + backend = import_backend() + try: + snapshot_ids = backend.search(query) + except Exception as err: + stderr() + stderr( + f'[X] The search backend threw an exception={err}:', + color='red', + ) + raise + else: + # TODO preserve ordering from backend + qsearch = Snapshot.objects.filter(pk__in=snapshot_ids) + return qsearch + + return Snapshot.objects.none() + +@enforce_types +def flush_search_index(snapshots: QuerySet): + if not indexing_enabled() or not snapshots: + return + backend = import_backend() + snapshot_ids=(str(pk) for pk in snapshots.values_list('pk',flat=True)) + try: + backend.flush(snapshot_ids) + except Exception as err: + stderr() + stderr( + f'[X] The search backend threw an exception={err}:', + color='red', + ) + +@enforce_types +def index_links(links: Union[List[Link],None], out_dir: Path=OUTPUT_DIR): + if not links: + return + + setup_django(out_dir=out_dir, check_db=True) + from core.models import Snapshot, ArchiveResult + + for link in links: + if snap := Snapshot.objects.filter(url=link.url).first(): + results = ArchiveResult.objects.indexable().filter(snapshot=snap) + log_index_started(link.url) + try: + texts = get_indexable_content(results) + except Exception as err: + stderr() + stderr( + f'[X] An Exception ocurred reading the indexable content={err}:', + color='red', + ) + else: + write_search_index(link, texts, out_dir=out_dir) \ No newline at end of file diff --git a/archivebox/search/backends/__init__.py b/archivebox/search/backends/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/search/backends/ripgrep.py b/archivebox/search/backends/ripgrep.py new file mode 100644 index 00000000..07292e37 --- /dev/null +++ b/archivebox/search/backends/ripgrep.py @@ -0,0 +1,47 @@ +import re +from subprocess import run, PIPE, DEVNULL +from typing import List, Generator + +from archivebox.config import setup_django, ARCHIVE_DIR +from archivebox.util import enforce_types + +RG_IGNORE_EXTENSIONS = ('css','js','orig','svg') + +RG_ADD_TYPE = '--type-add' +RG_IGNORE_ARGUMENTS = f"ignore:*.{{{','.join(RG_IGNORE_EXTENSIONS)}}}" +RG_DEFAULT_ARGUMENTS = "-ilTignore" # Case insensitive(i), matching files results(l) +RG_REGEX_ARGUMENT = '-e' + +TIMESTAMP_REGEX = r'\/([\d]+\.[\d]+)\/' + +ts_regex = re.compile(TIMESTAMP_REGEX) + +@enforce_types +def index(snapshot_id: str, texts: List[str]): + return + +@enforce_types +def flush(snapshot_ids: Generator[str, None, None]): + return + +@enforce_types +def search(text: str) -> List[str]: + is_rg_installed = run(['which', 'rg'], stdout=DEVNULL, stderr=DEVNULL) + if is_rg_installed.returncode: + raise Exception("ripgrep binary not found, install ripgrep to use this search backend") + + setup_django(check_db=True) + from core.models import Snapshot + + rg_cmd = ['rg', RG_ADD_TYPE, RG_IGNORE_ARGUMENTS, RG_DEFAULT_ARGUMENTS, RG_REGEX_ARGUMENT, text, str(ARCHIVE_DIR)] + rg = run(rg_cmd, stdout=PIPE, stderr=PIPE, timeout=60) + file_paths = [p.decode() for p in rg.stdout.splitlines()] + timestamps = set() + for path in file_paths: + if ts := ts_regex.findall(path): + timestamps.add(ts[0]) + + snap_ids = [str(id) for id in Snapshot.objects.filter(timestamp__in=timestamps).values_list('pk', flat=True)] + + return snap_ids + diff --git a/archivebox/search/backends/sonic.py b/archivebox/search/backends/sonic.py new file mode 100644 index 00000000..f0beaddd --- /dev/null +++ b/archivebox/search/backends/sonic.py @@ -0,0 +1,28 @@ +from typing import List, Generator + +from sonic import IngestClient, SearchClient + +from archivebox.util import enforce_types +from archivebox.config import SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD, SONIC_BUCKET, SONIC_COLLECTION + +MAX_SONIC_TEXT_LENGTH = 20000 + +@enforce_types +def index(snapshot_id: str, texts: List[str]): + with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl: + for text in texts: + chunks = [text[i:i+MAX_SONIC_TEXT_LENGTH] for i in range(0, len(text), MAX_SONIC_TEXT_LENGTH)] + for chunk in chunks: + ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(chunk)) + +@enforce_types +def search(text: str) -> List[str]: + with SearchClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as querycl: + snap_ids = querycl.query(SONIC_COLLECTION, SONIC_BUCKET, text) + return snap_ids + +@enforce_types +def flush(snapshot_ids: Generator[str, None, None]): + with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl: + for id in snapshot_ids: + ingestcl.flush_object(SONIC_COLLECTION, SONIC_BUCKET, str(id)) diff --git a/archivebox/search/utils.py b/archivebox/search/utils.py new file mode 100644 index 00000000..55c97e75 --- /dev/null +++ b/archivebox/search/utils.py @@ -0,0 +1,44 @@ +from django.db.models import QuerySet + +from archivebox.util import enforce_types +from archivebox.config import ANSI + +def log_index_started(url): + print('{green}[*] Indexing url: {} in the search index {reset}'.format(url, **ANSI)) + print( ) + +def get_file_result_content(res, extra_path, use_pwd=False): + if use_pwd: + fpath = f'{res.pwd}/{res.output}' + else: + fpath = f'{res.output}' + + if extra_path: + fpath = f'{fpath}/{extra_path}' + + with open(fpath, 'r') as file: + data = file.read() + if data: + return [data] + return [] + + +# This should be abstracted by a plugin interface for extractors +@enforce_types +def get_indexable_content(results: QuerySet): + if not results: + return [] + # Only use the first method available + res, method = results.first(), results.first().extractor + if method not in ('readability', 'singlefile', 'dom', 'wget'): + return [] + # This should come from a plugin interface + + if method == 'readability': + return get_file_result_content(res, 'content.txt') + elif method == 'singlefile': + return get_file_result_content(res, '') + elif method == 'dom': + return get_file_result_content(res,'',use_pwd=True) + elif method == 'wget': + return get_file_result_content(res,'',use_pwd=True) diff --git a/docker-compose.yml b/docker-compose.yml index 5fe91026..c76f734a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -23,6 +23,7 @@ services: - SHOW_PROGRESS=False volumes: - ./data:/data + # Optional Addons: tweak these examples as needed for your specific use case @@ -73,3 +74,14 @@ services: # volumes: # ./data:/archivebox # ./data/wayback:/webarchive + + # Example: Run sonic search backend + # sonic: + # image: valeriansaliou/sonic:v1.3.0 + # ports: + # - 1491:1491 + # environment: + # - SEARCH_BACKEND_PASSWORD=SecretPassword + # volumes: + # - ./etc/sonic/config.cfg:/etc/sonic.cfg + # - ./data:/var/lib/sonic/store/ \ No newline at end of file diff --git a/etc/sonic/config.cfg b/etc/sonic/config.cfg new file mode 100644 index 00000000..10fbda53 --- /dev/null +++ b/etc/sonic/config.cfg @@ -0,0 +1,66 @@ +# Sonic +# Fast, lightweight and schema-less search backend +# Configuration file +# Example: https://github.com/valeriansaliou/sonic/blob/master/config.cfg + + +[server] + +log_level = "debug" + + +[channel] + +inet = "0.0.0.0:1491" +tcp_timeout = 300 + +auth_password = "${env.SEARCH_BACKEND_PASSWORD}" + +[channel.search] + +query_limit_default = 65535 +query_limit_maximum = 65535 +query_alternates_try = 10 + +suggest_limit_default = 5 +suggest_limit_maximum = 20 + + +[store] + +[store.kv] + +path = "/var/lib/sonic/store/kv/" + +retain_word_objects = 100000 + +[store.kv.pool] + +inactive_after = 1800 + +[store.kv.database] + +flush_after = 900 + +compress = true +parallelism = 2 +max_files = 100 +max_compactions = 1 +max_flushes = 1 +write_buffer = 16384 +write_ahead_log = true + +[store.fst] + +path = "/var/lib/sonic/store/fst/" + +[store.fst.pool] + +inactive_after = 300 + +[store.fst.graph] + +consolidate_after = 180 + +max_size = 2048 +max_words = 250000