mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 06:34:25 -04:00
more settings loading tweaks and improvements
This commit is contained in:
parent
fbfd16e195
commit
97695bda5e
10 changed files with 350 additions and 260 deletions
|
@ -1,4 +1,30 @@
|
||||||
__package__ = 'archivebox'
|
__package__ = 'archivebox'
|
||||||
|
|
||||||
|
|
||||||
from .monkey_patches import *
|
from .monkey_patches import *
|
||||||
|
|
||||||
|
import os
|
||||||
|
import importlib
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
PACKAGE_DIR = Path(__file__).resolve().parent # archivebox source code dir
|
||||||
|
DATA_DIR = Path(os.curdir).resolve() # archivebox user data dir
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_installed_version():
|
||||||
|
try:
|
||||||
|
return importlib.metadata.version(__package__ or 'archivebox')
|
||||||
|
except importlib.metadata.PackageNotFoundError:
|
||||||
|
try:
|
||||||
|
pyproject_config = (PACKAGE_DIR / 'pyproject.toml').read_text()
|
||||||
|
for line in pyproject_config:
|
||||||
|
if line.startswith('version = '):
|
||||||
|
return line.split(' = ', 1)[-1].strip('"')
|
||||||
|
except FileNotFoundError:
|
||||||
|
# building docs, pyproject.toml is not available
|
||||||
|
return 'dev'
|
||||||
|
|
||||||
|
raise Exception('Failed to detect installed archivebox version!')
|
||||||
|
|
||||||
|
|
||||||
|
__version__ = _detect_installed_version()
|
||||||
|
|
|
@ -6,6 +6,7 @@ import re
|
||||||
import logging
|
import logging
|
||||||
import inspect
|
import inspect
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import archivebox
|
||||||
|
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
@ -22,14 +23,16 @@ IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ
|
||||||
IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3]
|
IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3]
|
||||||
|
|
||||||
|
|
||||||
PACKAGE_DIR = Path(__file__).resolve().parent.parent
|
PACKAGE_DIR = archivebox.PACKAGE_DIR
|
||||||
assert PACKAGE_DIR == CONFIG.PACKAGE_DIR
|
assert PACKAGE_DIR == CONFIG.PACKAGE_DIR
|
||||||
|
|
||||||
DATA_DIR = Path(os.curdir).resolve()
|
DATA_DIR = archivebox.DATA_DIR
|
||||||
assert DATA_DIR == CONFIG.OUTPUT_DIR
|
assert DATA_DIR == CONFIG.OUTPUT_DIR
|
||||||
ARCHIVE_DIR = DATA_DIR / 'archive'
|
ARCHIVE_DIR = DATA_DIR / 'archive'
|
||||||
assert ARCHIVE_DIR == CONFIG.ARCHIVE_DIR
|
assert ARCHIVE_DIR == CONFIG.ARCHIVE_DIR
|
||||||
|
|
||||||
|
VERSION = archivebox.__version__
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
### ArchiveBox Plugin Settings
|
### ArchiveBox Plugin Settings
|
||||||
################################################################################
|
################################################################################
|
||||||
|
@ -164,11 +167,19 @@ STATIC_URL = '/static/'
|
||||||
|
|
||||||
STATICFILES_DIRS = [
|
STATICFILES_DIRS = [
|
||||||
*([str(CONFIG.CUSTOM_TEMPLATES_DIR / 'static')] if CONFIG.CUSTOM_TEMPLATES_DIR else []),
|
*([str(CONFIG.CUSTOM_TEMPLATES_DIR / 'static')] if CONFIG.CUSTOM_TEMPLATES_DIR else []),
|
||||||
|
*[
|
||||||
|
str(plugin_dir / 'static')
|
||||||
|
for plugin_dir in PLUGIN_DIRS.values()
|
||||||
|
],
|
||||||
str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME / 'static'),
|
str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME / 'static'),
|
||||||
]
|
]
|
||||||
|
|
||||||
TEMPLATE_DIRS = [
|
TEMPLATE_DIRS = [
|
||||||
*([str(CONFIG.CUSTOM_TEMPLATES_DIR)] if CONFIG.CUSTOM_TEMPLATES_DIR else []),
|
*([str(CONFIG.CUSTOM_TEMPLATES_DIR)] if CONFIG.CUSTOM_TEMPLATES_DIR else []),
|
||||||
|
*[
|
||||||
|
str(plugin_dir / 'templates')
|
||||||
|
for plugin_dir in PLUGIN_DIRS.values()
|
||||||
|
],
|
||||||
str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME / 'core'),
|
str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME / 'core'),
|
||||||
str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME / 'admin'),
|
str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME / 'admin'),
|
||||||
str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME),
|
str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME),
|
||||||
|
@ -394,7 +405,7 @@ SHELL_PLUS_PRINT_SQL = False
|
||||||
IPYTHON_ARGUMENTS = ['--no-confirm-exit', '--no-banner']
|
IPYTHON_ARGUMENTS = ['--no-confirm-exit', '--no-banner']
|
||||||
IPYTHON_KERNEL_DISPLAY_NAME = 'ArchiveBox Django Shell'
|
IPYTHON_KERNEL_DISPLAY_NAME = 'ArchiveBox Django Shell'
|
||||||
if IS_SHELL:
|
if IS_SHELL:
|
||||||
os.environ['PYTHONSTARTUP'] = str(PACKAGE_DIR / 'core' / 'welcome_message.py')
|
os.environ['PYTHONSTARTUP'] = str(PACKAGE_DIR / 'core' / 'shell_welcome_message.py')
|
||||||
|
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
|
@ -411,7 +422,7 @@ TIME_ZONE = CONFIG.TIMEZONE # django convention is TIME_ZONE, archivebox
|
||||||
|
|
||||||
from django.conf.locale.en import formats as en_formats # type: ignore
|
from django.conf.locale.en import formats as en_formats # type: ignore
|
||||||
|
|
||||||
en_formats.DATETIME_FORMAT = DATETIME_FORMAT
|
en_formats.DATETIME_FORMAT = DATETIME_FORMAT # monkey patch en_format default with our preferred format
|
||||||
en_formats.SHORT_DATETIME_FORMAT = SHORT_DATETIME_FORMAT
|
en_formats.SHORT_DATETIME_FORMAT = SHORT_DATETIME_FORMAT
|
||||||
|
|
||||||
|
|
||||||
|
@ -419,193 +430,10 @@ en_formats.SHORT_DATETIME_FORMAT = SHORT_DATETIME_FORMAT
|
||||||
### Logging Settings
|
### Logging Settings
|
||||||
################################################################################
|
################################################################################
|
||||||
|
|
||||||
IGNORABLE_URL_PATTERNS = [
|
|
||||||
re.compile(r"/.*/?apple-touch-icon.*\.png"),
|
|
||||||
re.compile(r"/.*/?favicon\.ico"),
|
|
||||||
re.compile(r"/.*/?robots\.txt"),
|
|
||||||
re.compile(r"/.*/?.*\.(css|js)\.map"),
|
|
||||||
re.compile(r"/.*/?.*\.(css|js)\.map"),
|
|
||||||
re.compile(r"/static/.*"),
|
|
||||||
re.compile(r"/admin/jsi18n/"),
|
|
||||||
]
|
|
||||||
|
|
||||||
class NoisyRequestsFilter(logging.Filter):
|
from .settings_logging import SETTINGS_LOGGING, LOGS_DIR, ERROR_LOG
|
||||||
def filter(self, record) -> bool:
|
|
||||||
logline = record.getMessage()
|
|
||||||
# '"GET /api/v1/docs HTTP/1.1" 200 1023'
|
|
||||||
# '"GET /static/admin/js/SelectFilter2.js HTTP/1.1" 200 15502'
|
|
||||||
# '"GET /static/admin/js/SelectBox.js HTTP/1.1" 304 0'
|
|
||||||
# '"GET /admin/jsi18n/ HTTP/1.1" 200 3352'
|
|
||||||
# '"GET /admin/api/apitoken/0191bbf8-fd5e-0b8c-83a8-0f32f048a0af/change/ HTTP/1.1" 200 28778'
|
|
||||||
|
|
||||||
# ignore harmless 404s for the patterns in IGNORABLE_URL_PATTERNS
|
LOGGING = SETTINGS_LOGGING
|
||||||
for pattern in IGNORABLE_URL_PATTERNS:
|
|
||||||
ignorable_GET_request = re.compile(f'"GET {pattern.pattern} HTTP/.*" (2..|30.|404) .+$', re.I | re.M)
|
|
||||||
if ignorable_GET_request.match(logline):
|
|
||||||
return False
|
|
||||||
|
|
||||||
ignorable_404_pattern = re.compile(f'Not Found: {pattern.pattern}', re.I | re.M)
|
|
||||||
if ignorable_404_pattern.match(logline):
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
class CustomOutboundWebhookLogFormatter(logging.Formatter):
|
|
||||||
def format(self, record):
|
|
||||||
result = super().format(record)
|
|
||||||
return result.replace('HTTP Request: ', 'OutboundWebhook: ')
|
|
||||||
|
|
||||||
|
|
||||||
ERROR_LOG = tempfile.NamedTemporaryFile().name
|
|
||||||
|
|
||||||
if CONFIG.LOGS_DIR.exists():
|
|
||||||
ERROR_LOG = (CONFIG.LOGS_DIR / 'errors.log')
|
|
||||||
else:
|
|
||||||
# historically too many edge cases here around creating log dir w/ correct permissions early on
|
|
||||||
# if there's an issue on startup, we trash the log and let user figure it out via stdout/stderr
|
|
||||||
print(f'[!] WARNING: data/logs dir does not exist. Logging to temp file: {ERROR_LOG}')
|
|
||||||
|
|
||||||
|
|
||||||
LOG_LEVEL_DATABASE = 'DEBUG' if DEBUG else 'WARNING'
|
|
||||||
LOG_LEVEL_REQUEST = 'DEBUG' if DEBUG else 'WARNING'
|
|
||||||
|
|
||||||
|
|
||||||
import pydantic
|
|
||||||
import django.template
|
|
||||||
|
|
||||||
LOGGING = {
|
|
||||||
"version": 1,
|
|
||||||
"disable_existing_loggers": False,
|
|
||||||
"formatters": {
|
|
||||||
"rich": {
|
|
||||||
"datefmt": "[%Y-%m-%d %H:%M:%S]",
|
|
||||||
# "format": "{asctime} {levelname} {module} {name} {message} {username}",
|
|
||||||
"format": "%(name)s %(message)s",
|
|
||||||
},
|
|
||||||
"outbound_webhooks": {
|
|
||||||
"()": CustomOutboundWebhookLogFormatter,
|
|
||||||
"datefmt": "[%Y-%m-%d %H:%M:%S]",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"filters": {
|
|
||||||
"noisyrequestsfilter": {
|
|
||||||
"()": NoisyRequestsFilter,
|
|
||||||
},
|
|
||||||
"require_debug_false": {
|
|
||||||
"()": "django.utils.log.RequireDebugFalse",
|
|
||||||
},
|
|
||||||
"require_debug_true": {
|
|
||||||
"()": "django.utils.log.RequireDebugTrue",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"handlers": {
|
|
||||||
# "console": {
|
|
||||||
# "level": "DEBUG",
|
|
||||||
# 'formatter': 'simple',
|
|
||||||
# "class": "logging.StreamHandler",
|
|
||||||
# 'filters': ['noisyrequestsfilter', 'add_extra_logging_attrs'],
|
|
||||||
# },
|
|
||||||
"default": {
|
|
||||||
"class": "rich.logging.RichHandler",
|
|
||||||
"formatter": "rich",
|
|
||||||
"level": "DEBUG",
|
|
||||||
"markup": False,
|
|
||||||
"rich_tracebacks": True,
|
|
||||||
"filters": ["noisyrequestsfilter"],
|
|
||||||
"tracebacks_suppress": [
|
|
||||||
django,
|
|
||||||
pydantic,
|
|
||||||
],
|
|
||||||
},
|
|
||||||
"logfile": {
|
|
||||||
"level": "INFO",
|
|
||||||
"class": "logging.handlers.RotatingFileHandler",
|
|
||||||
"filename": ERROR_LOG,
|
|
||||||
"maxBytes": 1024 * 1024 * 25, # 25 MB
|
|
||||||
"backupCount": 10,
|
|
||||||
"formatter": "rich",
|
|
||||||
"filters": ["noisyrequestsfilter"],
|
|
||||||
},
|
|
||||||
"outbound_webhooks": {
|
|
||||||
"class": "rich.logging.RichHandler",
|
|
||||||
"markup": False,
|
|
||||||
"rich_tracebacks": True,
|
|
||||||
"formatter": "outbound_webhooks",
|
|
||||||
},
|
|
||||||
# "mail_admins": {
|
|
||||||
# "level": "ERROR",
|
|
||||||
# "filters": ["require_debug_false"],
|
|
||||||
# "class": "django.utils.log.AdminEmailHandler",
|
|
||||||
# },
|
|
||||||
"null": {
|
|
||||||
"class": "logging.NullHandler",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"root": {
|
|
||||||
"handlers": ["default", "logfile"],
|
|
||||||
"level": "INFO",
|
|
||||||
"formatter": "rich",
|
|
||||||
},
|
|
||||||
"loggers": {
|
|
||||||
"api": {
|
|
||||||
"handlers": ["default", "logfile"],
|
|
||||||
"level": "DEBUG",
|
|
||||||
},
|
|
||||||
"checks": {
|
|
||||||
"handlers": ["default", "logfile"],
|
|
||||||
"level": "DEBUG",
|
|
||||||
},
|
|
||||||
"core": {
|
|
||||||
"handlers": ["default", "logfile"],
|
|
||||||
"level": "DEBUG",
|
|
||||||
},
|
|
||||||
"plugins_extractor": {
|
|
||||||
"handlers": ["default", "logfile"],
|
|
||||||
"level": "DEBUG",
|
|
||||||
},
|
|
||||||
"httpx": {
|
|
||||||
"handlers": ["outbound_webhooks"],
|
|
||||||
"level": "INFO",
|
|
||||||
"formatter": "outbound_webhooks",
|
|
||||||
"propagate": False,
|
|
||||||
},
|
|
||||||
"django": {
|
|
||||||
"handlers": ["default", "logfile"],
|
|
||||||
"level": "INFO",
|
|
||||||
"filters": ["noisyrequestsfilter"],
|
|
||||||
},
|
|
||||||
"django.utils.autoreload": {
|
|
||||||
"propagate": False,
|
|
||||||
"handlers": [],
|
|
||||||
"level": "ERROR",
|
|
||||||
},
|
|
||||||
"django.channels.server": {
|
|
||||||
# see archivebox.monkey_patches.ModifiedAccessLogGenerator for dedicated daphne server logging settings
|
|
||||||
"propagate": False,
|
|
||||||
"handlers": ["default", "logfile"],
|
|
||||||
"level": "INFO",
|
|
||||||
"filters": ["noisyrequestsfilter"],
|
|
||||||
},
|
|
||||||
"django.server": { # logs all requests (2xx, 3xx, 4xx)
|
|
||||||
"propagate": False,
|
|
||||||
"handlers": ["default", "logfile"],
|
|
||||||
"level": "INFO",
|
|
||||||
"filters": ["noisyrequestsfilter"],
|
|
||||||
},
|
|
||||||
"django.request": { # only logs 4xx and 5xx errors
|
|
||||||
"propagate": False,
|
|
||||||
"handlers": ["default", "logfile"],
|
|
||||||
"level": "ERROR",
|
|
||||||
"filters": ["noisyrequestsfilter"],
|
|
||||||
},
|
|
||||||
"django.db.backends": {
|
|
||||||
"propagate": False,
|
|
||||||
"handlers": ["default"],
|
|
||||||
"level": LOG_LEVEL_DATABASE,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
|
|
198
archivebox/core/settings_logging.py
Normal file
198
archivebox/core/settings_logging.py
Normal file
|
@ -0,0 +1,198 @@
|
||||||
|
import re
|
||||||
|
import tempfile
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import pydantic
|
||||||
|
import django.template
|
||||||
|
|
||||||
|
import archivebox
|
||||||
|
|
||||||
|
|
||||||
|
IGNORABLE_URL_PATTERNS = [
|
||||||
|
re.compile(r"/.*/?apple-touch-icon.*\.png"),
|
||||||
|
re.compile(r"/.*/?favicon\.ico"),
|
||||||
|
re.compile(r"/.*/?robots\.txt"),
|
||||||
|
re.compile(r"/.*/?.*\.(css|js)\.map"),
|
||||||
|
re.compile(r"/.*/?.*\.(css|js)\.map"),
|
||||||
|
re.compile(r"/static/.*"),
|
||||||
|
re.compile(r"/admin/jsi18n/"),
|
||||||
|
]
|
||||||
|
|
||||||
|
class NoisyRequestsFilter(logging.Filter):
|
||||||
|
def filter(self, record) -> bool:
|
||||||
|
logline = record.getMessage()
|
||||||
|
# '"GET /api/v1/docs HTTP/1.1" 200 1023'
|
||||||
|
# '"GET /static/admin/js/SelectFilter2.js HTTP/1.1" 200 15502'
|
||||||
|
# '"GET /static/admin/js/SelectBox.js HTTP/1.1" 304 0'
|
||||||
|
# '"GET /admin/jsi18n/ HTTP/1.1" 200 3352'
|
||||||
|
# '"GET /admin/api/apitoken/0191bbf8-fd5e-0b8c-83a8-0f32f048a0af/change/ HTTP/1.1" 200 28778'
|
||||||
|
|
||||||
|
# ignore harmless 404s for the patterns in IGNORABLE_URL_PATTERNS
|
||||||
|
for pattern in IGNORABLE_URL_PATTERNS:
|
||||||
|
ignorable_GET_request = re.compile(f'"GET {pattern.pattern} HTTP/.*" (2..|30.|404) .+$', re.I | re.M)
|
||||||
|
if ignorable_GET_request.match(logline):
|
||||||
|
return False
|
||||||
|
|
||||||
|
ignorable_404_pattern = re.compile(f'Not Found: {pattern.pattern}', re.I | re.M)
|
||||||
|
if ignorable_404_pattern.match(logline):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class CustomOutboundWebhookLogFormatter(logging.Formatter):
|
||||||
|
def format(self, record):
|
||||||
|
result = super().format(record)
|
||||||
|
return result.replace('HTTP Request: ', 'OutboundWebhook: ')
|
||||||
|
|
||||||
|
|
||||||
|
ERROR_LOG = tempfile.NamedTemporaryFile().name
|
||||||
|
|
||||||
|
LOGS_DIR = archivebox.DATA_DIR / 'logs'
|
||||||
|
|
||||||
|
if LOGS_DIR.is_dir():
|
||||||
|
ERROR_LOG = (LOGS_DIR / 'errors.log')
|
||||||
|
else:
|
||||||
|
# historically too many edge cases here around creating log dir w/ correct permissions early on
|
||||||
|
# if there's an issue on startup, we trash the log and let user figure it out via stdout/stderr
|
||||||
|
# print(f'[!] WARNING: data/logs dir does not exist. Logging to temp file: {ERROR_LOG}')
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
LOG_LEVEL_DATABASE = 'WARNING' # if DEBUG else 'WARNING'
|
||||||
|
LOG_LEVEL_REQUEST = 'WARNING' # if DEBUG else 'WARNING'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
SETTINGS_LOGGING = {
|
||||||
|
"version": 1,
|
||||||
|
"disable_existing_loggers": False,
|
||||||
|
"formatters": {
|
||||||
|
"rich": {
|
||||||
|
"datefmt": "[%Y-%m-%d %H:%M:%S]",
|
||||||
|
# "format": "{asctime} {levelname} {module} {name} {message} {username}",
|
||||||
|
"format": "%(name)s %(message)s",
|
||||||
|
},
|
||||||
|
"outbound_webhooks": {
|
||||||
|
"()": CustomOutboundWebhookLogFormatter,
|
||||||
|
"datefmt": "[%Y-%m-%d %H:%M:%S]",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"filters": {
|
||||||
|
"noisyrequestsfilter": {
|
||||||
|
"()": NoisyRequestsFilter,
|
||||||
|
},
|
||||||
|
"require_debug_false": {
|
||||||
|
"()": "django.utils.log.RequireDebugFalse",
|
||||||
|
},
|
||||||
|
"require_debug_true": {
|
||||||
|
"()": "django.utils.log.RequireDebugTrue",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"handlers": {
|
||||||
|
# "console": {
|
||||||
|
# "level": "DEBUG",
|
||||||
|
# 'formatter': 'simple',
|
||||||
|
# "class": "logging.StreamHandler",
|
||||||
|
# 'filters': ['noisyrequestsfilter', 'add_extra_logging_attrs'],
|
||||||
|
# },
|
||||||
|
"default": {
|
||||||
|
"class": "rich.logging.RichHandler",
|
||||||
|
"formatter": "rich",
|
||||||
|
"level": "DEBUG",
|
||||||
|
"markup": False,
|
||||||
|
"rich_tracebacks": True,
|
||||||
|
"filters": ["noisyrequestsfilter"],
|
||||||
|
"tracebacks_suppress": [
|
||||||
|
django,
|
||||||
|
pydantic,
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"logfile": {
|
||||||
|
"level": "INFO",
|
||||||
|
"class": "logging.handlers.RotatingFileHandler",
|
||||||
|
"filename": ERROR_LOG,
|
||||||
|
"maxBytes": 1024 * 1024 * 25, # 25 MB
|
||||||
|
"backupCount": 10,
|
||||||
|
"formatter": "rich",
|
||||||
|
"filters": ["noisyrequestsfilter"],
|
||||||
|
},
|
||||||
|
"outbound_webhooks": {
|
||||||
|
"class": "rich.logging.RichHandler",
|
||||||
|
"markup": False,
|
||||||
|
"rich_tracebacks": True,
|
||||||
|
"formatter": "outbound_webhooks",
|
||||||
|
},
|
||||||
|
# "mail_admins": {
|
||||||
|
# "level": "ERROR",
|
||||||
|
# "filters": ["require_debug_false"],
|
||||||
|
# "class": "django.utils.log.AdminEmailHandler",
|
||||||
|
# },
|
||||||
|
"null": {
|
||||||
|
"class": "logging.NullHandler",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"root": {
|
||||||
|
"handlers": ["default", "logfile"],
|
||||||
|
"level": "INFO",
|
||||||
|
"formatter": "rich",
|
||||||
|
},
|
||||||
|
"loggers": {
|
||||||
|
"api": {
|
||||||
|
"handlers": ["default", "logfile"],
|
||||||
|
"level": "DEBUG",
|
||||||
|
},
|
||||||
|
"checks": {
|
||||||
|
"handlers": ["default", "logfile"],
|
||||||
|
"level": "DEBUG",
|
||||||
|
},
|
||||||
|
"core": {
|
||||||
|
"handlers": ["default", "logfile"],
|
||||||
|
"level": "DEBUG",
|
||||||
|
},
|
||||||
|
"plugins_extractor": {
|
||||||
|
"handlers": ["default", "logfile"],
|
||||||
|
"level": "DEBUG",
|
||||||
|
},
|
||||||
|
"httpx": {
|
||||||
|
"handlers": ["outbound_webhooks"],
|
||||||
|
"level": "INFO",
|
||||||
|
"formatter": "outbound_webhooks",
|
||||||
|
"propagate": False,
|
||||||
|
},
|
||||||
|
"django": {
|
||||||
|
"handlers": ["default", "logfile"],
|
||||||
|
"level": "INFO",
|
||||||
|
"filters": ["noisyrequestsfilter"],
|
||||||
|
},
|
||||||
|
"django.utils.autoreload": {
|
||||||
|
"propagate": False,
|
||||||
|
"handlers": [],
|
||||||
|
"level": "ERROR",
|
||||||
|
},
|
||||||
|
"django.channels.server": {
|
||||||
|
# see archivebox.monkey_patches.ModifiedAccessLogGenerator for dedicated daphne server logging settings
|
||||||
|
"propagate": False,
|
||||||
|
"handlers": ["default", "logfile"],
|
||||||
|
"level": "INFO",
|
||||||
|
"filters": ["noisyrequestsfilter"],
|
||||||
|
},
|
||||||
|
"django.server": { # logs all requests (2xx, 3xx, 4xx)
|
||||||
|
"propagate": False,
|
||||||
|
"handlers": ["default", "logfile"],
|
||||||
|
"level": "INFO",
|
||||||
|
"filters": ["noisyrequestsfilter"],
|
||||||
|
},
|
||||||
|
"django.request": { # only logs 4xx and 5xx errors
|
||||||
|
"propagate": False,
|
||||||
|
"handlers": ["default", "logfile"],
|
||||||
|
"level": "ERROR",
|
||||||
|
"filters": ["noisyrequestsfilter"],
|
||||||
|
},
|
||||||
|
"django.db.backends": {
|
||||||
|
"propagate": False,
|
||||||
|
"handlers": ["default"],
|
||||||
|
"level": LOG_LEVEL_DATABASE,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
|
@ -176,22 +176,43 @@ class ArchiveBoxBaseConfig(BaseSettings):
|
||||||
"""Populate any unset values using function provided as their default"""
|
"""Populate any unset values using function provided as their default"""
|
||||||
|
|
||||||
for key, field in self.model_fields.items():
|
for key, field in self.model_fields.items():
|
||||||
config_so_far = benedict(self.model_dump(include=set(self.model_fields.keys()), warnings=False))
|
|
||||||
value = getattr(self, key)
|
value = getattr(self, key)
|
||||||
|
|
||||||
if isinstance(value, Callable):
|
if isinstance(value, Callable):
|
||||||
# if value is a function, execute it to get the actual value, passing existing config as a dict arg
|
# if value is a function, execute it to get the actual value, passing existing config as a dict arg if expected
|
||||||
if func_takes_args_or_kwargs(value):
|
if func_takes_args_or_kwargs(value):
|
||||||
|
# assemble dict of existing field values to pass to default factory functions
|
||||||
|
config_so_far = benedict(self.model_dump(include=set(self.model_fields.keys()), warnings=False))
|
||||||
computed_default = field.default(config_so_far)
|
computed_default = field.default(config_so_far)
|
||||||
else:
|
else:
|
||||||
|
# otherwise it's a pure function with no args, just call it
|
||||||
computed_default = field.default()
|
computed_default = field.default()
|
||||||
|
|
||||||
# check to make sure default factory return value matches type annotation
|
# coerce/check to make sure default factory return value matches type annotation
|
||||||
TypeAdapter(field.annotation).validate_python(computed_default)
|
TypeAdapter(field.annotation).validate_python(computed_default)
|
||||||
|
|
||||||
# set generated default value as final validated value
|
# set generated default value as final validated value
|
||||||
setattr(self, key, computed_default)
|
setattr(self, key, computed_default)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
def update_in_place(self, warn=True, **kwargs):
|
||||||
|
"""
|
||||||
|
Update the config with new values. Use this sparingly! We should almost never be updating config at runtime.
|
||||||
|
Sets them in the environment so they propagate to spawned subprocesses / across future re-__init__()s and reload from environment
|
||||||
|
|
||||||
|
Example acceptable use case: user config says SEARCH_BACKEND_ENGINE=sonic but sonic_client pip library is not installed so we cannot use it.
|
||||||
|
SEARCH_BACKEND_CONFIG.update_in_place(SEARCH_BACKEND_ENGINE='ripgrep') can be used to reset it back to ripgrep so we can continue.
|
||||||
|
"""
|
||||||
|
if warn:
|
||||||
|
print('[!] WARNING: Some of the provided user config values cannot be used, temporarily ignoring them:')
|
||||||
|
for key, value in kwargs.items():
|
||||||
|
os.environ[key] = str(value)
|
||||||
|
original_value = getattr(self, key)
|
||||||
|
if warn:
|
||||||
|
print(f' {key}={original_value} -> {value}')
|
||||||
|
self.__init__()
|
||||||
|
return self
|
||||||
|
|
||||||
class BaseConfigSet(ArchiveBoxBaseConfig, BaseHook): # type: ignore[type-arg]
|
class BaseConfigSet(ArchiveBoxBaseConfig, BaseHook): # type: ignore[type-arg]
|
||||||
hook_type: ClassVar[HookType] = 'CONFIG'
|
hook_type: ClassVar[HookType] = 'CONFIG'
|
||||||
|
|
||||||
|
|
|
@ -20,8 +20,9 @@ except ImportError:
|
||||||
|
|
||||||
class LdapConfig(BaseConfigSet):
|
class LdapConfig(BaseConfigSet):
|
||||||
"""
|
"""
|
||||||
LDAP Config gets imported by core/settings.py very early during startup, so it needs to be in a separate file from apps.py
|
LDAP Config gets imported by core/settings.py very early during startup.
|
||||||
so that it can be imported during settings.py initialization before the apps are loaded.
|
It needs to be in a separate file from apps.py so that it can be imported
|
||||||
|
during settings.py initialization before the apps are loaded.
|
||||||
"""
|
"""
|
||||||
section: ClassVar[ConfigSectionName] = 'LDAP_CONFIG'
|
section: ClassVar[ConfigSectionName] = 'LDAP_CONFIG'
|
||||||
|
|
||||||
|
@ -41,20 +42,29 @@ class LdapConfig(BaseConfigSet):
|
||||||
|
|
||||||
@model_validator(mode='after')
|
@model_validator(mode='after')
|
||||||
def validate_ldap_config(self):
|
def validate_ldap_config(self):
|
||||||
|
# Check that LDAP libraries are installed
|
||||||
if self.LDAP_ENABLED and LDAP_LIB is None:
|
if self.LDAP_ENABLED and LDAP_LIB is None:
|
||||||
sys.stderr.write('[X] Error: Found LDAP=True config but LDAP packages not installed. You may need to run: pip install archivebox[ldap]\n\n')
|
sys.stderr.write('[X] Error: LDAP Authentication is enabled but LDAP libraries are not installed. You may need to run: pip install archivebox[ldap]\n')
|
||||||
# dont hard exit here. in case the user is just running "archivebox version" or "archivebox help", we still want those to work despite broken ldap
|
# dont hard exit here. in case the user is just running "archivebox version" or "archivebox help", we still want those to work despite broken ldap
|
||||||
# sys.exit(1)
|
# sys.exit(1)
|
||||||
self.LDAP_ENABLED = False
|
self.update(LDAP_ENABLED=False)
|
||||||
|
|
||||||
if self.LDAP_ENABLED:
|
# Check that all required LDAP config options are set
|
||||||
assert (
|
all_config_is_set = (
|
||||||
self.LDAP_SERVER_URI
|
self.LDAP_SERVER_URI
|
||||||
and self.LDAP_BIND_DN
|
and self.LDAP_BIND_DN
|
||||||
and self.LDAP_BIND_PASSWORD
|
and self.LDAP_BIND_PASSWORD
|
||||||
and self.LDAP_USER_BASE
|
and self.LDAP_USER_BASE
|
||||||
and self.LDAP_USER_FILTER
|
and self.LDAP_USER_FILTER
|
||||||
), 'LDAP_* config options must all be set if LDAP_ENABLED=True'
|
)
|
||||||
|
if self.LDAP_ENABLED and not all_config_is_set:
|
||||||
|
missing_config_options = [
|
||||||
|
key for key, value in self.model_dump().items()
|
||||||
|
if value is None and key != 'LDAP_ENABLED'
|
||||||
|
]
|
||||||
|
sys.stderr.write('[X] Error: LDAP_* config options must all be set if LDAP_ENABLED=True\n')
|
||||||
|
sys.stderr.write(f' Missing: {", ".join(missing_config_options)}\n')
|
||||||
|
self.update(LDAP_ENABLED=False)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
|
@ -28,8 +28,21 @@ class RipgrepConfig(BaseConfigSet):
|
||||||
|
|
||||||
RIPGREP_BINARY: str = Field(default='rg')
|
RIPGREP_BINARY: str = Field(default='rg')
|
||||||
|
|
||||||
|
RIPGREP_IGNORE_EXTENSIONS: str = Field(default='css,js,orig,svg')
|
||||||
|
RIPGREP_ARGS_DEFAULT: List[str] = Field(default=lambda c: [
|
||||||
|
# https://github.com/BurntSushi/ripgrep/blob/master/GUIDE.md
|
||||||
|
f'--type-add=ignore:*.{{{c.RIPGREP_IGNORE_EXTENSIONS}}}',
|
||||||
|
'--type-not=ignore',
|
||||||
|
'--ignore-case',
|
||||||
|
'--files-with-matches',
|
||||||
|
'--regexp',
|
||||||
|
])
|
||||||
|
RIPGREP_SEARCH_DIR: str = Field(default=lambda: str(settings.ARCHIVE_DIR))
|
||||||
|
|
||||||
RIPGREP_CONFIG = RipgrepConfig()
|
RIPGREP_CONFIG = RipgrepConfig()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class RipgrepBinary(BaseBinary):
|
class RipgrepBinary(BaseBinary):
|
||||||
name: BinName = RIPGREP_CONFIG.RIPGREP_BINARY
|
name: BinName = RIPGREP_CONFIG.RIPGREP_BINARY
|
||||||
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
|
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
|
||||||
|
@ -41,17 +54,8 @@ class RipgrepBinary(BaseBinary):
|
||||||
|
|
||||||
RIPGREP_BINARY = RipgrepBinary()
|
RIPGREP_BINARY = RipgrepBinary()
|
||||||
|
|
||||||
|
# regex to match archive/<ts>/... snapshot dir names
|
||||||
RG_IGNORE_EXTENSIONS = ('css','js','orig','svg')
|
TIMESTAMP_REGEX = re.compile(r'\/([\d]+\.[\d]+)\/')
|
||||||
|
|
||||||
RG_ADD_TYPE = '--type-add'
|
|
||||||
RG_IGNORE_ARGUMENTS = f"ignore:*.{{{','.join(RG_IGNORE_EXTENSIONS)}}}"
|
|
||||||
RG_DEFAULT_ARGUMENTS = "-ilTignore" # Case insensitive(i), matching files results(l)
|
|
||||||
RG_REGEX_ARGUMENT = '-e'
|
|
||||||
|
|
||||||
TIMESTAMP_REGEX = r'\/([\d]+\.[\d]+)\/'
|
|
||||||
ts_regex = re.compile(TIMESTAMP_REGEX)
|
|
||||||
|
|
||||||
|
|
||||||
class RipgrepSearchBackend(BaseSearchBackend):
|
class RipgrepSearchBackend(BaseSearchBackend):
|
||||||
name: str = 'ripgrep'
|
name: str = 'ripgrep'
|
||||||
|
@ -67,23 +71,22 @@ class RipgrepSearchBackend(BaseSearchBackend):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def search(text: str) -> List[str]:
|
def search(text: str) -> List[str]:
|
||||||
rg_bin = RIPGREP_BINARY.load()
|
from core.models import Snapshot
|
||||||
if not rg_bin.version:
|
|
||||||
|
ripgrep_binary = RIPGREP_BINARY.load()
|
||||||
|
if not ripgrep_binary.version:
|
||||||
raise Exception("ripgrep binary not found, install ripgrep to use this search backend")
|
raise Exception("ripgrep binary not found, install ripgrep to use this search backend")
|
||||||
|
|
||||||
rg_cmd = [
|
cmd = [
|
||||||
rg_bin.abspath,
|
ripgrep_binary.abspath,
|
||||||
RG_ADD_TYPE,
|
*RIPGREP_CONFIG.RIPGREP_ARGS_DEFAULT,
|
||||||
RG_IGNORE_ARGUMENTS,
|
|
||||||
RG_DEFAULT_ARGUMENTS,
|
|
||||||
RG_REGEX_ARGUMENT,
|
|
||||||
text,
|
text,
|
||||||
str(settings.ARCHIVE_DIR)
|
RIPGREP_CONFIG.RIPGREP_SEARCH_DIR,
|
||||||
]
|
]
|
||||||
rg = run(rg_cmd, timeout=SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_TIMEOUT, capture_output=True, text=True)
|
proc = run(cmd, timeout=SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_TIMEOUT, capture_output=True, text=True)
|
||||||
timestamps = set()
|
timestamps = set()
|
||||||
for path in rg.stdout.splitlines():
|
for path in proc.stdout.splitlines():
|
||||||
ts = ts_regex.findall(path)
|
ts = TIMESTAMP_REGEX.findall(path)
|
||||||
if ts:
|
if ts:
|
||||||
timestamps.add(ts[0])
|
timestamps.add(ts[0])
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
__package__ = 'archivebox.plugins_search.sonic'
|
__package__ = 'archivebox.plugins_search.sonic'
|
||||||
|
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
from typing import List, Dict, ClassVar, Generator, cast
|
from typing import List, Dict, ClassVar, Generator, cast
|
||||||
|
|
||||||
|
@ -39,15 +40,23 @@ class SonicConfig(BaseConfigSet):
|
||||||
SONIC_COLLECTION: str = Field(default='archivebox')
|
SONIC_COLLECTION: str = Field(default='archivebox')
|
||||||
SONIC_BUCKET: str = Field(default='archivebox')
|
SONIC_BUCKET: str = Field(default='archivebox')
|
||||||
|
|
||||||
|
SONIC_MAX_CHUNK_LENGTH: int = Field(default=2000)
|
||||||
|
SONIC_MAX_TEXT_LENGTH: int = Field(default=100000000)
|
||||||
|
SONIC_MAX_RETRIES: int = Field(default=5)
|
||||||
|
|
||||||
@model_validator(mode='after')
|
@model_validator(mode='after')
|
||||||
def validate_sonic_port(self):
|
def validate_sonic_port(self):
|
||||||
if SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE == 'sonic':
|
if SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE == 'sonic' and SONIC_LIB is None:
|
||||||
if SONIC_LIB is None:
|
sys.stderr.write('[X] Error: Sonic search backend is enabled but sonic-client lib is not installed. You may need to run: pip install archivebox[sonic]\n')
|
||||||
sys.stderr.write('[!] Sonic search backend is enabled but not installed. Install Sonic to use the Sonic search backend.\n')
|
# dont hard exit here. in case the user is just running "archivebox version" or "archivebox help", we still want those to work despite broken ldap
|
||||||
|
# sys.exit(1)
|
||||||
|
SEARCH_BACKEND_CONFIG.update_in_place(SEARCH_BACKEND_ENGINE='ripgrep')
|
||||||
return self
|
return self
|
||||||
|
|
||||||
SONIC_CONFIG = SonicConfig()
|
SONIC_CONFIG = SonicConfig()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class SonicBinary(BaseBinary):
|
class SonicBinary(BaseBinary):
|
||||||
name: BinName = SONIC_CONFIG.SONIC_BINARY
|
name: BinName = SONIC_CONFIG.SONIC_BINARY
|
||||||
binproviders_supported: List[InstanceOf[BinProvider]] = [brew, env] # TODO: add cargo
|
binproviders_supported: List[InstanceOf[BinProvider]] = [brew, env] # TODO: add cargo
|
||||||
|
@ -57,6 +66,7 @@ class SonicBinary(BaseBinary):
|
||||||
# cargo.name: {'packages': lambda: ['sonic-server']}, # TODO: add cargo
|
# cargo.name: {'packages': lambda: ['sonic-server']}, # TODO: add cargo
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# TODO: add version checking over protocol? for when sonic backend is on remote server and binary is not installed locally
|
||||||
# def on_get_version(self):
|
# def on_get_version(self):
|
||||||
# with sonic.IngestClient(SONIC_CONFIG.SONIC_HOST, str(SONIC_CONFIG.SONIC_PORT), SONIC_CONFIG.SONIC_PASSWORD) as ingestcl:
|
# with sonic.IngestClient(SONIC_CONFIG.SONIC_HOST, str(SONIC_CONFIG.SONIC_PORT), SONIC_CONFIG.SONIC_PASSWORD) as ingestcl:
|
||||||
# return SemVer.parse(str(ingestcl.protocol))
|
# return SemVer.parse(str(ingestcl.protocol))
|
||||||
|
@ -64,11 +74,6 @@ class SonicBinary(BaseBinary):
|
||||||
SONIC_BINARY = SonicBinary()
|
SONIC_BINARY = SonicBinary()
|
||||||
|
|
||||||
|
|
||||||
MAX_SONIC_TEXT_TOTAL_LENGTH = 100000000 # dont index more than 100 million characters per text
|
|
||||||
MAX_SONIC_TEXT_CHUNK_LENGTH = 2000 # dont index more than 2000 characters per chunk
|
|
||||||
MAX_SONIC_ERRORS_BEFORE_ABORT = 5
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class SonicSearchBackend(BaseSearchBackend):
|
class SonicSearchBackend(BaseSearchBackend):
|
||||||
name: str = 'sonic'
|
name: str = 'sonic'
|
||||||
|
@ -80,11 +85,11 @@ class SonicSearchBackend(BaseSearchBackend):
|
||||||
with sonic.IngestClient(SONIC_CONFIG.SONIC_HOST, str(SONIC_CONFIG.SONIC_PORT), SONIC_CONFIG.SONIC_PASSWORD) as ingestcl:
|
with sonic.IngestClient(SONIC_CONFIG.SONIC_HOST, str(SONIC_CONFIG.SONIC_PORT), SONIC_CONFIG.SONIC_PASSWORD) as ingestcl:
|
||||||
for text in texts:
|
for text in texts:
|
||||||
chunks = (
|
chunks = (
|
||||||
text[i:i+MAX_SONIC_TEXT_CHUNK_LENGTH]
|
text[i:i+SONIC_CONFIG.SONIC_MAX_CHUNK_LENGTH]
|
||||||
for i in range(
|
for i in range(
|
||||||
0,
|
0,
|
||||||
min(len(text), MAX_SONIC_TEXT_TOTAL_LENGTH),
|
min(len(text), SONIC_CONFIG.SONIC_MAX_TEXT_LENGTH),
|
||||||
MAX_SONIC_TEXT_CHUNK_LENGTH,
|
SONIC_CONFIG.SONIC_MAX_CHUNK_LENGTH,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
|
@ -93,7 +98,7 @@ class SonicSearchBackend(BaseSearchBackend):
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
print(f'[!] Sonic search backend threw an error while indexing: {err.__class__.__name__} {err}')
|
print(f'[!] Sonic search backend threw an error while indexing: {err.__class__.__name__} {err}')
|
||||||
error_count += 1
|
error_count += 1
|
||||||
if error_count > MAX_SONIC_ERRORS_BEFORE_ABORT:
|
if error_count > SONIC_CONFIG.SONIC_MAX_RETRIES:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
__package__ = 'archivebox.plugins_search.sqlite'
|
__package__ = 'archivebox.plugins_search.sqlite'
|
||||||
|
|
||||||
|
import sys
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import codecs
|
import codecs
|
||||||
from typing import List, ClassVar, Generator, Callable
|
from typing import List, ClassVar, Iterable, Callable
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.db import connection as database
|
from django.db import connection as database
|
||||||
|
@ -17,7 +18,7 @@ from plugantic.base_hook import BaseHook
|
||||||
from plugantic.base_searchbackend import BaseSearchBackend
|
from plugantic.base_searchbackend import BaseSearchBackend
|
||||||
|
|
||||||
# Depends on Other Plugins:
|
# Depends on Other Plugins:
|
||||||
# from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG
|
from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -30,6 +31,7 @@ class SqliteftsConfig(BaseConfigSet):
|
||||||
SQLITEFTS_TOKENIZERS: str = Field(default='porter unicode61 remove_diacritics 2', alias='FTS_TOKENIZERS')
|
SQLITEFTS_TOKENIZERS: str = Field(default='porter unicode61 remove_diacritics 2', alias='FTS_TOKENIZERS')
|
||||||
SQLITEFTS_MAX_LENGTH: int = Field(default=int(1e9), alias='FTS_SQLITE_MAX_LENGTH')
|
SQLITEFTS_MAX_LENGTH: int = Field(default=int(1e9), alias='FTS_SQLITE_MAX_LENGTH')
|
||||||
|
|
||||||
|
# Not really meant to be user-modified, just here as constants
|
||||||
SQLITEFTS_DB: str = Field(default='search.sqlite3')
|
SQLITEFTS_DB: str = Field(default='search.sqlite3')
|
||||||
SQLITEFTS_TABLE: str = Field(default='snapshot_fts')
|
SQLITEFTS_TABLE: str = Field(default='snapshot_fts')
|
||||||
SQLITEFTS_ID_TABLE: str = Field(default='snapshot_id_fts')
|
SQLITEFTS_ID_TABLE: str = Field(default='snapshot_id_fts')
|
||||||
|
@ -37,8 +39,9 @@ class SqliteftsConfig(BaseConfigSet):
|
||||||
|
|
||||||
@model_validator(mode='after')
|
@model_validator(mode='after')
|
||||||
def validate_fts_separate_database(self):
|
def validate_fts_separate_database(self):
|
||||||
if self.SQLITEFTS_SEPARATE_DATABASE:
|
if SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE == 'sqlite' and self.SQLITEFTS_SEPARATE_DATABASE and not self.SQLITEFTS_DB:
|
||||||
assert self.SQLITEFTS_DB, "SQLITEFTS_DB must be set if SQLITEFTS_SEPARATE_DATABASE is True"
|
sys.stderr.write('[X] Error: SQLITEFTS_DB must be set if SQLITEFTS_SEPARATE_DATABASE is True\n')
|
||||||
|
SEARCH_BACKEND_CONFIG.update_in_place(SEARCH_BACKEND_ENGINE='ripgrep')
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -84,8 +87,7 @@ def _escape_sqlite3(value: str, *, quote: str, errors='strict') -> str:
|
||||||
|
|
||||||
nul_index = encodable.find("\x00")
|
nul_index = encodable.find("\x00")
|
||||||
if nul_index >= 0:
|
if nul_index >= 0:
|
||||||
error = UnicodeEncodeError("NUL-terminated utf-8", encodable,
|
error = UnicodeEncodeError("NUL-terminated utf-8", encodable, nul_index, nul_index + 1, "NUL not allowed")
|
||||||
nul_index, nul_index + 1, "NUL not allowed")
|
|
||||||
error_handler = codecs.lookup_error(errors)
|
error_handler = codecs.lookup_error(errors)
|
||||||
replacement, _ = error_handler(error)
|
replacement, _ = error_handler(error)
|
||||||
assert isinstance(replacement, str), "handling a UnicodeEncodeError should return a str replacement"
|
assert isinstance(replacement, str), "handling a UnicodeEncodeError should return a str replacement"
|
||||||
|
@ -224,7 +226,7 @@ class SqliteftsSearchBackend(BaseSearchBackend):
|
||||||
return snap_ids
|
return snap_ids
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def flush(snapshot_ids: Generator[str, None, None]):
|
def flush(snapshot_ids: Iterable[str]):
|
||||||
snapshot_ids = list(snapshot_ids) # type: ignore[assignment]
|
snapshot_ids = list(snapshot_ids) # type: ignore[assignment]
|
||||||
|
|
||||||
id_table = _escape_sqlite3_identifier(SQLITEFTS_CONFIG.SQLITEFTS_ID_TABLE)
|
id_table = _escape_sqlite3_identifier(SQLITEFTS_CONFIG.SQLITEFTS_ID_TABLE)
|
||||||
|
@ -243,7 +245,7 @@ SQLITEFTS_SEARCH_BACKEND = SqliteftsSearchBackend()
|
||||||
|
|
||||||
class SqliteftsSearchPlugin(BasePlugin):
|
class SqliteftsSearchPlugin(BasePlugin):
|
||||||
app_label: str ='sqlitefts'
|
app_label: str ='sqlitefts'
|
||||||
verbose_name: str = 'Sqlitefts'
|
verbose_name: str = 'SQLite FTS5 Search'
|
||||||
|
|
||||||
hooks: List[InstanceOf[BaseHook]] = [
|
hooks: List[InstanceOf[BaseHook]] = [
|
||||||
SQLITEFTS_CONFIG,
|
SQLITEFTS_CONFIG,
|
||||||
|
|
|
@ -115,9 +115,6 @@ class SearchBackendConfig(BaseConfigSet):
|
||||||
USE_SEARCHING_BACKEND: bool = Field(default=True)
|
USE_SEARCHING_BACKEND: bool = Field(default=True)
|
||||||
|
|
||||||
SEARCH_BACKEND_ENGINE: str = Field(default='ripgrep')
|
SEARCH_BACKEND_ENGINE: str = Field(default='ripgrep')
|
||||||
SEARCH_BACKEND_HOST_NAME: str = Field(default='localhost')
|
|
||||||
SEARCH_BACKEND_PORT: int = Field(default=1491)
|
|
||||||
SEARCH_BACKEND_PASSWORD: str = Field(default='SecretPassword')
|
|
||||||
SEARCH_PROCESS_HTML: bool = Field(default=True)
|
SEARCH_PROCESS_HTML: bool = Field(default=True)
|
||||||
SEARCH_BACKEND_TIMEOUT: int = Field(default=10)
|
SEARCH_BACKEND_TIMEOUT: int = Field(default=10)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue