merge queues and actors apps into new workers app

This commit is contained in:
Nick Sweeting 2024-11-18 18:52:48 -08:00
parent e50f8cb3b6
commit e469c5a344
No known key found for this signature in database
37 changed files with 89 additions and 304 deletions

View file

@ -82,11 +82,10 @@ ABX_ECOSYSTEM_PLUGINS = abx.get_pip_installed_plugins(group='abx')
# Load all built-in ArchiveBox plugins # Load all built-in ArchiveBox plugins
ARCHIVEBOX_BUILTIN_PLUGINS = { ARCHIVEBOX_BUILTIN_PLUGINS = {
'config': PACKAGE_DIR / 'config', 'config': PACKAGE_DIR / 'config',
'workers': PACKAGE_DIR / 'workers',
'core': PACKAGE_DIR / 'core', 'core': PACKAGE_DIR / 'core',
'crawls': PACKAGE_DIR / 'crawls', 'crawls': PACKAGE_DIR / 'crawls',
'queues': PACKAGE_DIR / 'queues',
'seeds': PACKAGE_DIR / 'seeds', 'seeds': PACKAGE_DIR / 'seeds',
'actors': PACKAGE_DIR / 'actors',
# 'search': PACKAGE_DIR / 'search', # 'search': PACKAGE_DIR / 'search',
# 'core': PACKAGE_DIR / 'core', # 'core': PACKAGE_DIR / 'core',
} }

View file

@ -1,2 +0,0 @@
__package__ = 'archivebox.actors'
__order__ = 100

View file

@ -1,3 +0,0 @@
from django.contrib import admin
# Register your models here.

View file

@ -1,6 +0,0 @@
from django.apps import AppConfig
class ActorsConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "actors"

View file

@ -1,202 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Job Dashboard</title>
<style>
body {
font-family: Arial, sans-serif;
line-height: 1.6;
color: #333;
width: 100%;
margin: 0 auto;
padding: 20px;
}
@keyframes pulse {
0% { opacity: 1; }
48% { opacity: 0.2; }
52% { opacity: 1; }
100% { opacity: 1; }
}
h1 {
text-align: center;
}
h1 a {
animation: pulse 1s;
}
.dashboard {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
gap: 20px;
}
.card {
border: 1px solid #ddd;
border-radius: 8px;
padding: 15px;
background-color: #f9f9f9;
}
.card h2 {
margin-top: 0;
border-bottom: 2px solid #ddd;
padding-bottom: 10px;
font-family: monospace;
}
.scroll-area {
/*height: 800px;
overflow-y: scroll; */
height: auto;
border: 1px solid #ddd;
padding: 10px;
background-color: #fff;
}
.job-item {
border: 1px solid #eee;
border-radius: 4px;
padding: 10px;
margin-bottom: 10px;
}
.job-item:last-child {
margin-bottom: 0;
}
.badge {
display: inline-block;
padding: 3px 7px;
border-radius: 3px;
font-size: 12px;
font-weight: bold;
}
.badge-started {
background-color: #4CAF50;
color: white;
}
.badge-queued {
background-color: #2196F3;
color: white;
}
.badge-failed {
background-color: #f44336;
color: white;
}
.badge-succeeded {
background-color: #666;
color: white;
}
.badge-sealed {
background-color: #666;
color: white;
}
.date {
font-size: 16px;
color: #666;
float: right;
}
</style>
</head>
<body>
<h1>Job Dashboard <small><a href="?refresh=true" id="current-time">♻️ {{now}}}</a></small></h1>
<div id="dashboard" class="dashboard"></div>
<script>
function formatDate(dateString) {
const now = Date.now()
const date = new Date(dateString)
// return new Date(dateString).toLocaleString();
// return date.toISOString().split('T').at(-1).replace('Z', '');
const seconds_diff = Math.round((date - now) / 1000, 0)
if (seconds_diff < 0) {
return `${seconds_diff}s ago`;
} else {
return `${seconds_diff}s in the future`;
}
}
function createJobElement(job) {
const jobElement = document.createElement('div');
jobElement.className = 'job-item';
jobElement.innerHTML = `
<p><a href="/api/v1/core/any/${job.abid}?api_key={{api_token|default:'NONE PROVIDED BY VIEW'}}"><code>${job.abid}</code></a></p>
<p>
<span class="badge badge-${job.status}">${job.status}</span>
<span class="date">♻️ ${formatDate(job.retry_at)}</span>
</p>
<p style="font-size: 12px; color: #666;">${job.description}</p>
`;
return jobElement;
}
function updateDashboard(data) {
const currentTime = document.getElementById('current-time');
window.now = new Date();
currentTime.innerHTML = `♻️ ${window.now.toISOString().split('T').at(-1).replace('Z', '')}`;
const dashboard = document.getElementById('dashboard');
dashboard.innerHTML = '';
data.forEach(actor => {
const card = document.createElement('div');
card.className = 'card';
card.innerHTML = `
<h2>${actor.model}</h2>
<hr/>
Future
<div class="scroll-area" style="background-color: white;" id="future-${actor.model}"></div>
<hr/>
Pending
<div class="scroll-area" style="background-color: lightblue;" id="pending-${actor.model}"></div>
<hr/>
Stalled
<div class="scroll-area" style="background-color: lightcoral;" id="stalled-${actor.model}"></div>
<hr/>
Active
<div class="scroll-area" style="background-color: lightgreen;" id="active-${actor.model}"></div>
<hr/>
Past
<div class="scroll-area" style="background-color: lightgrey;" id="past-${actor.model}"></div>
`;
dashboard.appendChild(card);
const futureContainer = document.getElementById(`future-${actor.model}`);
actor.future.forEach(job => {
futureContainer.appendChild(createJobElement(job));
});
const pendingContainer = document.getElementById(`pending-${actor.model}`);
actor.pending.forEach(job => {
pendingContainer.appendChild(createJobElement(job));
});
const stalledContainer = document.getElementById(`stalled-${actor.model}`);
actor.stalled.forEach(job => {
stalledContainer.appendChild(createJobElement(job));
});
const activeContainer = document.getElementById(`active-${actor.model}`);
actor.active.forEach(job => {
activeContainer.appendChild(createJobElement(job));
});
const pastContainer = document.getElementById(`past-${actor.model}`);
actor.past.forEach(job => {
pastContainer.appendChild(createJobElement(job));
});
});
}
function fetchData() {
fetch('/api/v1/jobs/actors', {
headers: {
'Authorization': `Bearer {{api_token|default:'NONE PROVIDED BY VIEW'}}`
}
})
.then(response => response.json())
.then(data => updateDashboard(data))
.catch(error => console.error('Error fetching data:', error));
}
fetchData();
setInterval(fetchData, 750);
</script>
</body>
</html>

View file

@ -41,7 +41,7 @@ def register_urls(api: NinjaAPI) -> NinjaAPI:
api.add_router('/core/', 'api.v1_core.router') api.add_router('/core/', 'api.v1_core.router')
api.add_router('/crawls/', 'api.v1_crawls.router') api.add_router('/crawls/', 'api.v1_crawls.router')
api.add_router('/cli/', 'api.v1_cli.router') api.add_router('/cli/', 'api.v1_cli.router')
api.add_router('/jobs/', 'api.v1_actors.router') api.add_router('/workers/', 'api.v1_workers.router')
return api return api

View file

@ -31,7 +31,7 @@ class TaskSchema(Schema):
class ActorSchema(Schema): class ActorSchema(Schema):
# TYPE: str = 'actors.actor.ActorType' # TYPE: str = 'workers.actor.ActorType'
# name: str # name: str
#pid: int | None #pid: int | None
@ -97,7 +97,7 @@ class ActorSchema(Schema):
class OrchestratorSchema(Schema): class OrchestratorSchema(Schema):
# TYPE: str = 'actors.orchestrator.Orchestrator' # TYPE: str = 'workers.orchestrator.Orchestrator'
#pid: int | None #pid: int | None
exit_on_idle: bool exit_on_idle: bool
@ -114,7 +114,7 @@ class OrchestratorSchema(Schema):
def get_orchestrators(request): def get_orchestrators(request):
"""List all the task orchestrators (aka Orchestrators) that are currently running""" """List all the task orchestrators (aka Orchestrators) that are currently running"""
from actors.orchestrator import Orchestrator from workers.orchestrator import Orchestrator
orchestrator = Orchestrator() orchestrator = Orchestrator()
return [orchestrator] return [orchestrator]
@ -124,6 +124,6 @@ def get_orchestrators(request):
def get_actors(request): def get_actors(request):
"""List all the task consumer workers (aka Actors) that are currently running""" """List all the task consumer workers (aka Actors) that are currently running"""
from actors.orchestrator import Orchestrator from workers.orchestrator import Orchestrator
orchestrator = Orchestrator() orchestrator = Orchestrator()
return orchestrator.actor_types.values() return orchestrator.actor_types.values()

View file

@ -53,7 +53,7 @@ def add(urls: str | list[str],
from seeds.models import Seed from seeds.models import Seed
from crawls.models import Crawl from crawls.models import Crawl
from actors.orchestrator import Orchestrator from workers.orchestrator import Orchestrator
from abid_utils.models import get_or_create_system_user_pk from abid_utils.models import get_or_create_system_user_pk

View file

@ -28,7 +28,7 @@ def update():
from archivebox.config.django import setup_django from archivebox.config.django import setup_django
setup_django() setup_django()
from actors.orchestrator import Orchestrator from workers.orchestrator import Orchestrator
orchestrator = Orchestrator(exit_on_idle=False) orchestrator = Orchestrator(exit_on_idle=False)
orchestrator.start() orchestrator.start()

View file

@ -7,7 +7,6 @@ from typing import Any, List, Dict, cast
from benedict import benedict from benedict import benedict
from django.http import HttpRequest from django.http import HttpRequest
from django.conf import settings
from django.utils import timezone from django.utils import timezone
from django.utils.html import format_html, mark_safe from django.utils.html import format_html, mark_safe
@ -304,7 +303,7 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
"Exit Status": [], "Exit Status": [],
} }
from queues.supervisor_util import get_existing_supervisord_process from workers.supervisor_util import get_existing_supervisord_process
supervisor = get_existing_supervisord_process() supervisor = get_existing_supervisord_process()
if supervisor is None: if supervisor is None:
@ -374,8 +373,10 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext: def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
assert request.user.is_superuser, "Must be a superuser to view configuration settings." assert request.user.is_superuser, "Must be a superuser to view configuration settings."
from queues.supervisor_util import get_existing_supervisord_process, get_worker from workers.supervisor_util import get_existing_supervisord_process, get_worker, get_sock_file, CONFIG_FILE_NAME
from queues.settings import SUPERVISORD_CONFIG_FILE
SOCK_FILE = get_sock_file()
CONFIG_FILE = SOCK_FILE.parent / CONFIG_FILE_NAME
supervisor = get_existing_supervisord_process() supervisor = get_existing_supervisord_process()
if supervisor is None: if supervisor is None:
@ -388,7 +389,7 @@ def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
all_config = cast(List[Dict[str, Any]], supervisor.getAllConfigInfo() or []) all_config = cast(List[Dict[str, Any]], supervisor.getAllConfigInfo() or [])
if key == 'supervisord': if key == 'supervisord':
relevant_config = SUPERVISORD_CONFIG_FILE.read_text() relevant_config = CONFIG_FILE.read_text()
relevant_logs = cast(str, supervisor.readLog(0, 10_000_000)) relevant_logs = cast(str, supervisor.readLog(0, 10_000_000))
start_ts = [line for line in relevant_logs.split("\n") if "RPC interface 'supervisor' initialized" in line][-1].split(",", 1)[0] start_ts = [line for line in relevant_logs.split("\n") if "RPC interface 'supervisor' initialized" in line][-1].split(",", 1)[0]
uptime = str(timezone.now() - parse_date(start_ts)).split(".")[0] uptime = str(timezone.now() - parse_date(start_ts)).split(".")[0]
@ -476,8 +477,6 @@ def log_list_view(request: HttpRequest, **kwargs) -> TableContext:
def log_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext: def log_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
assert request.user.is_superuser, "Must be a superuser to view configuration settings." assert request.user.is_superuser, "Must be a superuser to view configuration settings."
from django.conf import settings
log_file = [logfile for logfile in CONSTANTS.LOGS_DIR.glob('*.log') if key in logfile.name][0] log_file = [logfile for logfile in CONSTANTS.LOGS_DIR.glob('*.log') if key in logfile.name][0]
log_text = log_file.read_text() log_text = log_file.read_text()

View file

@ -25,7 +25,7 @@ from archivebox.extractors import archive_links
from archivebox.main import remove from archivebox.main import remove
from archivebox.abid_utils.admin import ABIDModelAdmin from archivebox.abid_utils.admin import ABIDModelAdmin
from archivebox.queues.tasks import bg_archive_links, bg_add from archivebox.workers.tasks import bg_archive_links, bg_add
from core.models import Tag from core.models import Tag
from core.admin_tags import TagInline from core.admin_tags import TagInline

View file

@ -26,8 +26,8 @@ import abx
from archivebox.config import CONSTANTS from archivebox.config import CONSTANTS
from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithOutputDir from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithOutputDir
from actors.models import ModelWithStateMachine from workers.models import ModelWithStateMachine
from queues.tasks import bg_archive_snapshot from workers.tasks import bg_archive_snapshot
from crawls.models import Crawl from crawls.models import Crawl
# from machine.models import Machine, NetworkInterface # from machine.models import Machine, NetworkInterface

View file

@ -64,8 +64,7 @@ INSTALLED_APPS = [
# 'abid_utils', # handles ABID ID creation, handling, and models # 'abid_utils', # handles ABID ID creation, handling, and models
'config', # ArchiveBox config settings (loaded as a plugin, don't need to add it here) 'config', # ArchiveBox config settings (loaded as a plugin, don't need to add it here)
'machine', # handles collecting and storing information about the host machine, network interfaces, installed binaries, etc. 'machine', # handles collecting and storing information about the host machine, network interfaces, installed binaries, etc.
'actors', # handles starting and managing background workers and processes (orchestrators and actors) 'workers', # handles starting and managing background workers and processes (orchestrators and actors)
'queues', # handles starting and managing background workers and processes (supervisord)
'seeds', # handles Seed model and URL source management 'seeds', # handles Seed model and URL source management
'crawls', # handles Crawl and CrawlSchedule models and management 'crawls', # handles Crawl and CrawlSchedule models and management
'personas', # handles Persona and session management 'personas', # handles Persona and session management

View file

@ -8,7 +8,7 @@ from django.utils import timezone
from statemachine import State, StateMachine from statemachine import State, StateMachine
from actors.actor import ActorType from workers.actor import ActorType
from core.models import Snapshot, ArchiveResult from core.models import Snapshot, ArchiveResult

View file

@ -10,7 +10,7 @@ from archivebox.misc.serve_static import serve_static
from core.admin_site import archivebox_admin from core.admin_site import archivebox_admin
from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
from actors.views import JobsDashboardView from workers.views import JobsDashboardView
# GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306 # GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306
# from archivebox.config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE # from archivebox.config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE

View file

@ -25,7 +25,7 @@ import archivebox
from core.models import Snapshot from core.models import Snapshot
from core.forms import AddLinkForm from core.forms import AddLinkForm
from queues.tasks import bg_add from workers.tasks import bg_add
from archivebox.config import CONSTANTS_CONFIG, DATA_DIR, VERSION from archivebox.config import CONSTANTS_CONFIG, DATA_DIR, VERSION
from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG

View file

@ -10,7 +10,7 @@ from django.conf import settings
from django.urls import reverse_lazy from django.urls import reverse_lazy
from django.utils import timezone from django.utils import timezone
from actors.models import ModelWithStateMachine from workers.models import ModelWithStateMachine
if TYPE_CHECKING: if TYPE_CHECKING:
from core.models import Snapshot, ArchiveResult from core.models import Snapshot, ArchiveResult

View file

@ -6,7 +6,7 @@ from django.utils import timezone
from statemachine import State, StateMachine from statemachine import State, StateMachine
from actors.actor import ActorType from workers.actor import ActorType
from crawls.models import Crawl from crawls.models import Crawl

View file

@ -675,8 +675,8 @@ def add(urls: Union[str, List[str]],
"""Add a new URL or list of URLs to your archive""" """Add a new URL or list of URLs to your archive"""
from core.models import Snapshot, Tag from core.models import Snapshot, Tag
# from queues.supervisor_util import start_cli_workers, tail_worker_logs # from workers.supervisor_util import start_cli_workers, tail_worker_logs
# from queues.tasks import bg_archive_link # from workers.tasks import bg_archive_link
assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)' assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
@ -873,7 +873,7 @@ def update(resume: Optional[float]=None,
from core.models import ArchiveResult from core.models import ArchiveResult
from .search import index_links from .search import index_links
# from .queues.supervisor_util import start_cli_workers # from workers.supervisor_util import start_cli_workers
check_data_folder() check_data_folder()
@ -1494,7 +1494,7 @@ def server(runserver_args: Optional[List[str]]=None,
runserver_args.append('--noreload') # '--insecure' runserver_args.append('--noreload') # '--insecure'
call_command("runserver", *runserver_args) call_command("runserver", *runserver_args)
else: else:
from queues.supervisor_util import start_server_workers from workers.supervisor_util import start_server_workers
print() print()
start_server_workers(host=host, port=port, daemonize=False) start_server_workers(host=host, port=port, daemonize=False)

View file

@ -1,27 +1,27 @@
__package__ = 'abx_plugin_singlefile' # __package__ = 'abx_plugin_singlefile'
from typing import ClassVar # from typing import ClassVar
from django.db.models import QuerySet # from django.db.models import QuerySet
from django.utils.functional import classproperty # from django.utils.functional import classproperty
from actors.actor import ActorType # from workers.actor import ActorType
from .models import SinglefileResult # from .models import SinglefileResult
class SinglefileActor(ActorType[SinglefileResult]): # class SinglefileActor(ActorType[SinglefileResult]):
CLAIM_ORDER: ClassVar[str] = 'created_at DESC' # CLAIM_ORDER: ClassVar[str] = 'created_at DESC'
CLAIM_WHERE: ClassVar[str] = 'status = "queued" AND extractor = "favicon"' # CLAIM_WHERE: ClassVar[str] = 'status = "queued" AND extractor = "favicon"'
CLAIM_SET: ClassVar[str] = 'status = "started"' # CLAIM_SET: ClassVar[str] = 'status = "started"'
@classproperty # @classproperty
def QUERYSET(cls) -> QuerySet: # def QUERYSET(cls) -> QuerySet:
return SinglefileResult.objects.filter(status='queued') # return SinglefileResult.objects.filter(status='queued')
def tick(self, obj: SinglefileResult): # def tick(self, obj: SinglefileResult):
print(f'[grey53]{self}.tick({obj.abid or obj.id}, status={obj.status}) remaining:[/grey53]', self.get_queue().count()) # print(f'[grey53]{self}.tick({obj.abid or obj.id}, status={obj.status}) remaining:[/grey53]', self.get_queue().count())
updated = SinglefileResult.objects.filter(id=obj.id, status='started').update(status='success') == 1 # updated = SinglefileResult.objects.filter(id=obj.id, status='started').update(status='success') == 1
if not updated: # if not updated:
raise Exception(f'Failed to update {obj.abid or obj.id}, interrupted by another actor writing to the same object') # raise Exception(f'Failed to update {obj.abid or obj.id}, interrupted by another actor writing to the same object')
obj.refresh_from_db() # obj.refresh_from_db()
obj.save() # obj.save()

View file

@ -13,11 +13,11 @@ class WgetExtractor(BaseExtractor):
name: ExtractorName = 'wget' name: ExtractorName = 'wget'
binary: BinName = WGET_BINARY.name binary: BinName = WGET_BINARY.name
def get_output_path(self, snapshot) -> Path | None: def get_output_path(self, snapshot) -> str:
wget_index_path = wget_output_path(snapshot.as_link()) # wget_index_path = wget_output_path(snapshot.as_link())
if wget_index_path: # if wget_index_path:
return Path(wget_index_path) # return Path(wget_index_path)
return None return 'wget'
WGET_EXTRACTOR = WgetExtractor() WGET_EXTRACTOR = WgetExtractor()

View file

@ -65,16 +65,16 @@ class ConfigPluginSpec:
def get_SCOPE_CONFIG(extra=None, archiveresult=None, snapshot=None, crawl=None, user=None, request=None, collection=..., environment=..., machine=..., default=...) -> dict[ConfigKeyStr, Any]: def get_SCOPE_CONFIG(extra=None, archiveresult=None, snapshot=None, crawl=None, user=None, request=None, collection=..., environment=..., machine=..., default=...) -> dict[ConfigKeyStr, Any]:
"""Get the config as it applies to you right now, based on the current context""" """Get the config as it applies to you right now, based on the current context"""
return benedict({ return benedict({
**pm.hook.get_default_config(default=default), **pm.hook.get_default_config(default=default), # schema defaults defined in source code
**pm.hook.get_machine_config(machine=machine), **pm.hook.get_machine_config(machine=machine), # machine defaults set on the Machine model
**pm.hook.get_environment_config(environment=environment), **pm.hook.get_environment_config(environment=environment), # env config set for just this run on this machine
**pm.hook.get_collection_config(collection=collection), **pm.hook.get_collection_config(collection=collection), # collection defaults set in ArchiveBox.conf
**pm.hook.get_user_config(user=user), **pm.hook.get_user_config(user=user), # user config set on User model
**pm.hook.get_crawl_config(crawl=crawl), **pm.hook.get_request_config(request=request), # extra config derived from the current request
**pm.hook.get_snapshot_config(snapshot=snapshot), **pm.hook.get_crawl_config(crawl=crawl), # extra config set on the Crawl model
**pm.hook.get_archiveresult_config(archiveresult=archiveresult), **pm.hook.get_snapshot_config(snapshot=snapshot), # extra config set on the Snapshot model
**pm.hook.get_request_config(request=request), **pm.hook.get_archiveresult_config(archiveresult=archiveresult), # extra config set on the ArchiveResult model
**(extra or {}), **(extra or {}), # extra config passed in by the caller
}) })
@staticmethod @staticmethod

View file

@ -1,8 +0,0 @@
__package__ = 'archivebox.queues'
import abx
@abx.hookimpl
def register_admin(admin_site):
from queues.admin import register_admin
register_admin(admin_site)

View file

@ -0,0 +1,9 @@
__package__ = 'archivebox.workers'
__order__ = 100
import abx
@abx.hookimpl
def register_admin(admin_site):
from workers.admin import register_admin
register_admin(admin_site)

View file

@ -1,4 +1,4 @@
__package__ = 'archivebox.actors' __package__ = 'archivebox.workers'
import os import os
import time import time

View file

@ -1,4 +1,4 @@
__package__ = 'archivebox.queues' __package__ = 'archivebox.workers'
import abx import abx

View file

@ -1,7 +1,7 @@
from django.apps import AppConfig from django.apps import AppConfig
class QueuesConfig(AppConfig): class WorkersConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField' default_auto_field = 'django.db.models.BigAutoField'
name = 'queues' name = 'workers'

View file

@ -2,7 +2,7 @@
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from actors.orchestrator import ArchivingOrchestrator from workers.orchestrator import ArchivingOrchestrator
class Command(BaseCommand): class Command(BaseCommand):

View file

@ -60,7 +60,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
f'{cls.__name__}.{field.name} must have choices set to {cls.__name__}.StatusChoices.choices', f'{cls.__name__}.{field.name} must have choices set to {cls.__name__}.StatusChoices.choices',
hint=f'{cls.__name__}.{field.name}.choices = {getattr(field, "choices", None)!r}', hint=f'{cls.__name__}.{field.name}.choices = {getattr(field, "choices", None)!r}',
obj=cls, obj=cls,
id='actors.E011', id='workers.E011',
)) ))
if getattr(field, '_is_retry_at_field', False): if getattr(field, '_is_retry_at_field', False):
if cls.retry_at_field_name == field.name: if cls.retry_at_field_name == field.name:
@ -73,14 +73,14 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
f'{cls.__name__}.state_field_name must be defined and point to a StatusField()', f'{cls.__name__}.state_field_name must be defined and point to a StatusField()',
hint=f'{cls.__name__}.state_field_name = {cls.state_field_name!r} but {cls.__name__}.{cls.state_field_name!r} was not found or does not refer to StatusField', hint=f'{cls.__name__}.state_field_name = {cls.state_field_name!r} but {cls.__name__}.{cls.state_field_name!r} was not found or does not refer to StatusField',
obj=cls, obj=cls,
id='actors.E012', id='workers.E012',
)) ))
if not found_retry_at_field: if not found_retry_at_field:
errors.append(checks.Error( errors.append(checks.Error(
f'{cls.__name__}.retry_at_field_name must be defined and point to a RetryAtField()', f'{cls.__name__}.retry_at_field_name must be defined and point to a RetryAtField()',
hint=f'{cls.__name__}.retry_at_field_name = {cls.retry_at_field_name!r} but {cls.__name__}.{cls.retry_at_field_name!r} was not found or does not refer to RetryAtField', hint=f'{cls.__name__}.retry_at_field_name = {cls.retry_at_field_name!r} but {cls.__name__}.{cls.retry_at_field_name!r} was not found or does not refer to RetryAtField',
obj=cls, obj=cls,
id='actors.E013', id='workers.E013',
)) ))
if not found_id_field: if not found_id_field:
@ -88,7 +88,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
f'{cls.__name__} must have an id field that is a primary key', f'{cls.__name__} must have an id field that is a primary key',
hint=f'{cls.__name__}.id = {cls.id!r}', hint=f'{cls.__name__}.id = {cls.id!r}',
obj=cls, obj=cls,
id='actors.E014', id='workers.E014',
)) ))
if not isinstance(cls.state_machine_name, str): if not isinstance(cls.state_machine_name, str):
@ -96,7 +96,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
f'{cls.__name__}.state_machine_name must be a dotted-import path to a StateMachine class', f'{cls.__name__}.state_machine_name must be a dotted-import path to a StateMachine class',
hint=f'{cls.__name__}.state_machine_name = {cls.state_machine_name!r}', hint=f'{cls.__name__}.state_machine_name = {cls.state_machine_name!r}',
obj=cls, obj=cls,
id='actors.E015', id='workers.E015',
)) ))
try: try:
@ -106,7 +106,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
f'{cls.__name__}.state_machine_name must point to a valid StateMachine class, but got {type(err).__name__} {err} when trying to access {cls.__name__}.StateMachineClass', f'{cls.__name__}.state_machine_name must point to a valid StateMachine class, but got {type(err).__name__} {err} when trying to access {cls.__name__}.StateMachineClass',
hint=f'{cls.__name__}.state_machine_name = {cls.state_machine_name!r}', hint=f'{cls.__name__}.state_machine_name = {cls.state_machine_name!r}',
obj=cls, obj=cls,
id='actors.E016', id='workers.E016',
)) ))
if cls.INITIAL_STATE not in cls.StatusChoices.values: if cls.INITIAL_STATE not in cls.StatusChoices.values:
@ -114,7 +114,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
f'{cls.__name__}.StateMachineClass.initial_state must be present within {cls.__name__}.StatusChoices', f'{cls.__name__}.StateMachineClass.initial_state must be present within {cls.__name__}.StatusChoices',
hint=f'{cls.__name__}.StateMachineClass.initial_state = {cls.StateMachineClass.initial_state!r}', hint=f'{cls.__name__}.StateMachineClass.initial_state = {cls.StateMachineClass.initial_state!r}',
obj=cls, obj=cls,
id='actors.E017', id='workers.E017',
)) ))
if cls.ACTIVE_STATE not in cls.StatusChoices.values: if cls.ACTIVE_STATE not in cls.StatusChoices.values:
@ -122,7 +122,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
f'{cls.__name__}.active_state must be set to a valid State present within {cls.__name__}.StatusChoices', f'{cls.__name__}.active_state must be set to a valid State present within {cls.__name__}.StatusChoices',
hint=f'{cls.__name__}.active_state = {cls.active_state!r}', hint=f'{cls.__name__}.active_state = {cls.active_state!r}',
obj=cls, obj=cls,
id='actors.E018', id='workers.E018',
)) ))
@ -132,7 +132,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
f'{cls.__name__}.StateMachineClass.final_states must all be present within {cls.__name__}.StatusChoices', f'{cls.__name__}.StateMachineClass.final_states must all be present within {cls.__name__}.StatusChoices',
hint=f'{cls.__name__}.StateMachineClass.final_states = {cls.StateMachineClass.final_states!r}', hint=f'{cls.__name__}.StateMachineClass.final_states = {cls.StateMachineClass.final_states!r}',
obj=cls, obj=cls,
id='actors.E019', id='workers.E019',
)) ))
break break
return errors return errors

View file

@ -1,4 +1,4 @@
__package__ = 'archivebox.actors' __package__ = 'archivebox.workers'
import os import os
import time import time

View file

@ -1,4 +1,4 @@
__package__ = 'archivebox.queues' __package__ = 'archivebox.workers'
import sys import sys
import time import time

View file

@ -1,4 +1,4 @@
__package__ = 'archivebox.queues' __package__ = 'archivebox.workers'
from functools import wraps from functools import wraps
# from django.utils import timezone # from django.utils import timezone