add django_huey, huey_monitor, and replace Threads with huey tasks

This commit is contained in:
Nick Sweeting 2024-09-10 00:04:39 -07:00
parent 4df90fbb40
commit 60154fba5f
No known key found for this signature in database
19 changed files with 850 additions and 92 deletions
archivebox/core

View file

@ -87,6 +87,7 @@ INSTALLED_APPS = [
'django_object_actions', # provides easy Django Admin action buttons on change views https://github.com/crccheck/django-object-actions
# Our ArchiveBox-provided apps
'queues', # handles starting and managing background workers and processes
'abid_utils', # handles ABID ID creation, handling, and models
'plugantic', # ArchiveBox plugin API definition + finding/registering/calling interface
'core', # core django model with Snapshot, ArchiveResult, etc.
@ -98,6 +99,9 @@ INSTALLED_APPS = [
# 3rd-party apps from PyPI that need to be loaded last
'admin_data_views', # handles rendering some convenient automatic read-only views of data in Django admin
'django_extensions', # provides Django Debug Toolbar (and other non-debug helpers)
'django_huey', # provides multi-queue support for django huey https://github.com/gaiacoop/django-huey
'bx_django_utils', # needed for huey_monitor https://github.com/boxine/bx_django_utils
'huey_monitor', # adds an admin UI for monitoring background huey tasks https://github.com/boxine/django-huey-monitor
]
@ -212,17 +216,28 @@ CACHE_DB_TABLE = 'django_cache'
DATABASE_FILE = Path(CONFIG.OUTPUT_DIR) / CONFIG.SQL_INDEX_FILENAME
DATABASE_NAME = os.environ.get("ARCHIVEBOX_DATABASE_NAME", str(DATABASE_FILE))
QUEUE_DATABASE_NAME = DATABASE_NAME.replace('index.sqlite3', 'queue.sqlite3')
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': DATABASE_NAME,
'OPTIONS': {
'timeout': 60,
'check_same_thread': False,
"default": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": DATABASE_NAME,
"OPTIONS": {
"timeout": 60,
"check_same_thread": False,
},
'TIME_ZONE': CONFIG.TIMEZONE,
"TIME_ZONE": CONFIG.TIMEZONE,
# DB setup is sometimes modified at runtime by setup_django() in config.py
},
"queue": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": QUEUE_DATABASE_NAME,
"OPTIONS": {
"timeout": 60,
"check_same_thread": False,
},
"TIME_ZONE": CONFIG.TIMEZONE,
},
# 'cache': {
# 'ENGINE': 'django.db.backends.sqlite3',
# 'NAME': CACHE_DB_PATH,
@ -239,6 +254,64 @@ MIGRATION_MODULES = {'signal_webhooks': None}
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
HUEY = {
"huey_class": "huey.SqliteHuey",
"filename": QUEUE_DATABASE_NAME,
"name": "system_tasks",
"results": True,
"store_none": True,
"immediate": False,
"utc": True,
"consumer": {
"workers": 1,
"worker_type": "thread",
"initial_delay": 0.1, # Smallest polling interval, same as -d.
"backoff": 1.15, # Exponential backoff using this rate, -b.
"max_delay": 10.0, # Max possible polling interval, -m.
"scheduler_interval": 1, # Check schedule every second, -s.
"periodic": True, # Enable crontab feature.
"check_worker_health": True, # Enable worker health checks.
"health_check_interval": 1, # Check worker health every second.
},
}
# https://huey.readthedocs.io/en/latest/contrib.html#setting-things-up
# https://github.com/gaiacoop/django-huey
DJANGO_HUEY = {
"default": "system_tasks",
"queues": {
HUEY["name"]: HUEY.copy(),
# more registered here at plugin import-time by BaseQueue.register()
},
}
class HueyDBRouter:
"""A router to store all the Huey Monitor models in the queue.sqlite3 database."""
route_app_labels = {"huey_monitor", "django_huey", "djhuey"}
def db_for_read(self, model, **hints):
if model._meta.app_label in self.route_app_labels:
return "queue"
return 'default'
def db_for_write(self, model, **hints):
if model._meta.app_label in self.route_app_labels:
return "queue"
return 'default'
def allow_relation(self, obj1, obj2, **hints):
if obj1._meta.app_label in self.route_app_labels or obj2._meta.app_label in self.route_app_labels:
return obj1._meta.app_label == obj2._meta.app_label
return None
def allow_migrate(self, db, app_label, model_name=None, **hints):
if app_label in self.route_app_labels:
return db == "queue"
return db == "default"
DATABASE_ROUTERS = ['core.settings.HueyDBRouter']
CACHES = {
'default': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'},
# 'sqlite': {'BACKEND': 'django.core.cache.backends.db.DatabaseCache', 'LOCATION': 'cache'},