mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-12 22:25:44 -04:00
merge queues and actors apps into new workers app
This commit is contained in:
parent
e50f8cb3b6
commit
e469c5a344
37 changed files with 89 additions and 304 deletions
|
@ -82,11 +82,10 @@ ABX_ECOSYSTEM_PLUGINS = abx.get_pip_installed_plugins(group='abx')
|
||||||
# Load all built-in ArchiveBox plugins
|
# Load all built-in ArchiveBox plugins
|
||||||
ARCHIVEBOX_BUILTIN_PLUGINS = {
|
ARCHIVEBOX_BUILTIN_PLUGINS = {
|
||||||
'config': PACKAGE_DIR / 'config',
|
'config': PACKAGE_DIR / 'config',
|
||||||
|
'workers': PACKAGE_DIR / 'workers',
|
||||||
'core': PACKAGE_DIR / 'core',
|
'core': PACKAGE_DIR / 'core',
|
||||||
'crawls': PACKAGE_DIR / 'crawls',
|
'crawls': PACKAGE_DIR / 'crawls',
|
||||||
'queues': PACKAGE_DIR / 'queues',
|
|
||||||
'seeds': PACKAGE_DIR / 'seeds',
|
'seeds': PACKAGE_DIR / 'seeds',
|
||||||
'actors': PACKAGE_DIR / 'actors',
|
|
||||||
# 'search': PACKAGE_DIR / 'search',
|
# 'search': PACKAGE_DIR / 'search',
|
||||||
# 'core': PACKAGE_DIR / 'core',
|
# 'core': PACKAGE_DIR / 'core',
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,2 +0,0 @@
|
||||||
__package__ = 'archivebox.actors'
|
|
||||||
__order__ = 100
|
|
|
@ -1,3 +0,0 @@
|
||||||
from django.contrib import admin
|
|
||||||
|
|
||||||
# Register your models here.
|
|
|
@ -1,6 +0,0 @@
|
||||||
from django.apps import AppConfig
|
|
||||||
|
|
||||||
|
|
||||||
class ActorsConfig(AppConfig):
|
|
||||||
default_auto_field = "django.db.models.BigAutoField"
|
|
||||||
name = "actors"
|
|
|
@ -1,202 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="UTF-8">
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
||||||
<title>Job Dashboard</title>
|
|
||||||
<style>
|
|
||||||
body {
|
|
||||||
font-family: Arial, sans-serif;
|
|
||||||
line-height: 1.6;
|
|
||||||
color: #333;
|
|
||||||
width: 100%;
|
|
||||||
margin: 0 auto;
|
|
||||||
padding: 20px;
|
|
||||||
}
|
|
||||||
@keyframes pulse {
|
|
||||||
0% { opacity: 1; }
|
|
||||||
48% { opacity: 0.2; }
|
|
||||||
52% { opacity: 1; }
|
|
||||||
100% { opacity: 1; }
|
|
||||||
}
|
|
||||||
h1 {
|
|
||||||
text-align: center;
|
|
||||||
}
|
|
||||||
h1 a {
|
|
||||||
animation: pulse 1s;
|
|
||||||
}
|
|
||||||
.dashboard {
|
|
||||||
display: grid;
|
|
||||||
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
|
||||||
gap: 20px;
|
|
||||||
}
|
|
||||||
.card {
|
|
||||||
border: 1px solid #ddd;
|
|
||||||
border-radius: 8px;
|
|
||||||
padding: 15px;
|
|
||||||
background-color: #f9f9f9;
|
|
||||||
}
|
|
||||||
.card h2 {
|
|
||||||
margin-top: 0;
|
|
||||||
border-bottom: 2px solid #ddd;
|
|
||||||
padding-bottom: 10px;
|
|
||||||
font-family: monospace;
|
|
||||||
}
|
|
||||||
.scroll-area {
|
|
||||||
/*height: 800px;
|
|
||||||
overflow-y: scroll; */
|
|
||||||
height: auto;
|
|
||||||
border: 1px solid #ddd;
|
|
||||||
padding: 10px;
|
|
||||||
background-color: #fff;
|
|
||||||
}
|
|
||||||
.job-item {
|
|
||||||
border: 1px solid #eee;
|
|
||||||
border-radius: 4px;
|
|
||||||
padding: 10px;
|
|
||||||
margin-bottom: 10px;
|
|
||||||
}
|
|
||||||
.job-item:last-child {
|
|
||||||
margin-bottom: 0;
|
|
||||||
}
|
|
||||||
.badge {
|
|
||||||
display: inline-block;
|
|
||||||
padding: 3px 7px;
|
|
||||||
border-radius: 3px;
|
|
||||||
font-size: 12px;
|
|
||||||
font-weight: bold;
|
|
||||||
}
|
|
||||||
.badge-started {
|
|
||||||
background-color: #4CAF50;
|
|
||||||
color: white;
|
|
||||||
}
|
|
||||||
.badge-queued {
|
|
||||||
background-color: #2196F3;
|
|
||||||
color: white;
|
|
||||||
}
|
|
||||||
.badge-failed {
|
|
||||||
background-color: #f44336;
|
|
||||||
color: white;
|
|
||||||
}
|
|
||||||
.badge-succeeded {
|
|
||||||
background-color: #666;
|
|
||||||
color: white;
|
|
||||||
}
|
|
||||||
.badge-sealed {
|
|
||||||
background-color: #666;
|
|
||||||
color: white;
|
|
||||||
}
|
|
||||||
.date {
|
|
||||||
font-size: 16px;
|
|
||||||
color: #666;
|
|
||||||
float: right;
|
|
||||||
}
|
|
||||||
</style>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<h1>Job Dashboard <small><a href="?refresh=true" id="current-time">♻️ {{now}}}</a></small></h1>
|
|
||||||
<div id="dashboard" class="dashboard"></div>
|
|
||||||
|
|
||||||
<script>
|
|
||||||
function formatDate(dateString) {
|
|
||||||
const now = Date.now()
|
|
||||||
const date = new Date(dateString)
|
|
||||||
// return new Date(dateString).toLocaleString();
|
|
||||||
// return date.toISOString().split('T').at(-1).replace('Z', '');
|
|
||||||
const seconds_diff = Math.round((date - now) / 1000, 0)
|
|
||||||
if (seconds_diff < 0) {
|
|
||||||
return `${seconds_diff}s ago`;
|
|
||||||
} else {
|
|
||||||
return `${seconds_diff}s in the future`;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function createJobElement(job) {
|
|
||||||
const jobElement = document.createElement('div');
|
|
||||||
jobElement.className = 'job-item';
|
|
||||||
jobElement.innerHTML = `
|
|
||||||
<p><a href="/api/v1/core/any/${job.abid}?api_key={{api_token|default:'NONE PROVIDED BY VIEW'}}"><code>${job.abid}</code></a></p>
|
|
||||||
<p>
|
|
||||||
<span class="badge badge-${job.status}">${job.status}</span>
|
|
||||||
<span class="date">♻️ ${formatDate(job.retry_at)}</span>
|
|
||||||
</p>
|
|
||||||
<p style="font-size: 12px; color: #666;">${job.description}</p>
|
|
||||||
`;
|
|
||||||
return jobElement;
|
|
||||||
}
|
|
||||||
|
|
||||||
function updateDashboard(data) {
|
|
||||||
const currentTime = document.getElementById('current-time');
|
|
||||||
window.now = new Date();
|
|
||||||
currentTime.innerHTML = `♻️ ${window.now.toISOString().split('T').at(-1).replace('Z', '')}`;
|
|
||||||
|
|
||||||
const dashboard = document.getElementById('dashboard');
|
|
||||||
dashboard.innerHTML = '';
|
|
||||||
|
|
||||||
data.forEach(actor => {
|
|
||||||
const card = document.createElement('div');
|
|
||||||
card.className = 'card';
|
|
||||||
card.innerHTML = `
|
|
||||||
<h2>${actor.model}</h2>
|
|
||||||
<hr/>
|
|
||||||
Future
|
|
||||||
<div class="scroll-area" style="background-color: white;" id="future-${actor.model}"></div>
|
|
||||||
<hr/>
|
|
||||||
Pending
|
|
||||||
<div class="scroll-area" style="background-color: lightblue;" id="pending-${actor.model}"></div>
|
|
||||||
<hr/>
|
|
||||||
Stalled
|
|
||||||
<div class="scroll-area" style="background-color: lightcoral;" id="stalled-${actor.model}"></div>
|
|
||||||
<hr/>
|
|
||||||
Active
|
|
||||||
<div class="scroll-area" style="background-color: lightgreen;" id="active-${actor.model}"></div>
|
|
||||||
<hr/>
|
|
||||||
Past
|
|
||||||
<div class="scroll-area" style="background-color: lightgrey;" id="past-${actor.model}"></div>
|
|
||||||
`;
|
|
||||||
dashboard.appendChild(card);
|
|
||||||
|
|
||||||
const futureContainer = document.getElementById(`future-${actor.model}`);
|
|
||||||
actor.future.forEach(job => {
|
|
||||||
futureContainer.appendChild(createJobElement(job));
|
|
||||||
});
|
|
||||||
|
|
||||||
const pendingContainer = document.getElementById(`pending-${actor.model}`);
|
|
||||||
actor.pending.forEach(job => {
|
|
||||||
pendingContainer.appendChild(createJobElement(job));
|
|
||||||
});
|
|
||||||
|
|
||||||
const stalledContainer = document.getElementById(`stalled-${actor.model}`);
|
|
||||||
actor.stalled.forEach(job => {
|
|
||||||
stalledContainer.appendChild(createJobElement(job));
|
|
||||||
});
|
|
||||||
|
|
||||||
const activeContainer = document.getElementById(`active-${actor.model}`);
|
|
||||||
actor.active.forEach(job => {
|
|
||||||
activeContainer.appendChild(createJobElement(job));
|
|
||||||
});
|
|
||||||
|
|
||||||
const pastContainer = document.getElementById(`past-${actor.model}`);
|
|
||||||
actor.past.forEach(job => {
|
|
||||||
pastContainer.appendChild(createJobElement(job));
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
function fetchData() {
|
|
||||||
fetch('/api/v1/jobs/actors', {
|
|
||||||
headers: {
|
|
||||||
'Authorization': `Bearer {{api_token|default:'NONE PROVIDED BY VIEW'}}`
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.then(response => response.json())
|
|
||||||
.then(data => updateDashboard(data))
|
|
||||||
.catch(error => console.error('Error fetching data:', error));
|
|
||||||
}
|
|
||||||
|
|
||||||
fetchData();
|
|
||||||
|
|
||||||
setInterval(fetchData, 750);
|
|
||||||
</script>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
|
@ -41,7 +41,7 @@ def register_urls(api: NinjaAPI) -> NinjaAPI:
|
||||||
api.add_router('/core/', 'api.v1_core.router')
|
api.add_router('/core/', 'api.v1_core.router')
|
||||||
api.add_router('/crawls/', 'api.v1_crawls.router')
|
api.add_router('/crawls/', 'api.v1_crawls.router')
|
||||||
api.add_router('/cli/', 'api.v1_cli.router')
|
api.add_router('/cli/', 'api.v1_cli.router')
|
||||||
api.add_router('/jobs/', 'api.v1_actors.router')
|
api.add_router('/workers/', 'api.v1_workers.router')
|
||||||
return api
|
return api
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -31,7 +31,7 @@ class TaskSchema(Schema):
|
||||||
|
|
||||||
|
|
||||||
class ActorSchema(Schema):
|
class ActorSchema(Schema):
|
||||||
# TYPE: str = 'actors.actor.ActorType'
|
# TYPE: str = 'workers.actor.ActorType'
|
||||||
|
|
||||||
# name: str
|
# name: str
|
||||||
#pid: int | None
|
#pid: int | None
|
||||||
|
@ -97,7 +97,7 @@ class ActorSchema(Schema):
|
||||||
|
|
||||||
|
|
||||||
class OrchestratorSchema(Schema):
|
class OrchestratorSchema(Schema):
|
||||||
# TYPE: str = 'actors.orchestrator.Orchestrator'
|
# TYPE: str = 'workers.orchestrator.Orchestrator'
|
||||||
|
|
||||||
#pid: int | None
|
#pid: int | None
|
||||||
exit_on_idle: bool
|
exit_on_idle: bool
|
||||||
|
@ -114,7 +114,7 @@ class OrchestratorSchema(Schema):
|
||||||
def get_orchestrators(request):
|
def get_orchestrators(request):
|
||||||
"""List all the task orchestrators (aka Orchestrators) that are currently running"""
|
"""List all the task orchestrators (aka Orchestrators) that are currently running"""
|
||||||
|
|
||||||
from actors.orchestrator import Orchestrator
|
from workers.orchestrator import Orchestrator
|
||||||
orchestrator = Orchestrator()
|
orchestrator = Orchestrator()
|
||||||
|
|
||||||
return [orchestrator]
|
return [orchestrator]
|
||||||
|
@ -124,6 +124,6 @@ def get_orchestrators(request):
|
||||||
def get_actors(request):
|
def get_actors(request):
|
||||||
"""List all the task consumer workers (aka Actors) that are currently running"""
|
"""List all the task consumer workers (aka Actors) that are currently running"""
|
||||||
|
|
||||||
from actors.orchestrator import Orchestrator
|
from workers.orchestrator import Orchestrator
|
||||||
orchestrator = Orchestrator()
|
orchestrator = Orchestrator()
|
||||||
return orchestrator.actor_types.values()
|
return orchestrator.actor_types.values()
|
|
@ -53,7 +53,7 @@ def add(urls: str | list[str],
|
||||||
|
|
||||||
from seeds.models import Seed
|
from seeds.models import Seed
|
||||||
from crawls.models import Crawl
|
from crawls.models import Crawl
|
||||||
from actors.orchestrator import Orchestrator
|
from workers.orchestrator import Orchestrator
|
||||||
from abid_utils.models import get_or_create_system_user_pk
|
from abid_utils.models import get_or_create_system_user_pk
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,7 @@ def update():
|
||||||
from archivebox.config.django import setup_django
|
from archivebox.config.django import setup_django
|
||||||
setup_django()
|
setup_django()
|
||||||
|
|
||||||
from actors.orchestrator import Orchestrator
|
from workers.orchestrator import Orchestrator
|
||||||
orchestrator = Orchestrator(exit_on_idle=False)
|
orchestrator = Orchestrator(exit_on_idle=False)
|
||||||
orchestrator.start()
|
orchestrator.start()
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,6 @@ from typing import Any, List, Dict, cast
|
||||||
from benedict import benedict
|
from benedict import benedict
|
||||||
|
|
||||||
from django.http import HttpRequest
|
from django.http import HttpRequest
|
||||||
from django.conf import settings
|
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
from django.utils.html import format_html, mark_safe
|
from django.utils.html import format_html, mark_safe
|
||||||
|
|
||||||
|
@ -304,7 +303,7 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
|
||||||
"Exit Status": [],
|
"Exit Status": [],
|
||||||
}
|
}
|
||||||
|
|
||||||
from queues.supervisor_util import get_existing_supervisord_process
|
from workers.supervisor_util import get_existing_supervisord_process
|
||||||
|
|
||||||
supervisor = get_existing_supervisord_process()
|
supervisor = get_existing_supervisord_process()
|
||||||
if supervisor is None:
|
if supervisor is None:
|
||||||
|
@ -374,8 +373,10 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
|
||||||
def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
|
def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
|
||||||
assert request.user.is_superuser, "Must be a superuser to view configuration settings."
|
assert request.user.is_superuser, "Must be a superuser to view configuration settings."
|
||||||
|
|
||||||
from queues.supervisor_util import get_existing_supervisord_process, get_worker
|
from workers.supervisor_util import get_existing_supervisord_process, get_worker, get_sock_file, CONFIG_FILE_NAME
|
||||||
from queues.settings import SUPERVISORD_CONFIG_FILE
|
|
||||||
|
SOCK_FILE = get_sock_file()
|
||||||
|
CONFIG_FILE = SOCK_FILE.parent / CONFIG_FILE_NAME
|
||||||
|
|
||||||
supervisor = get_existing_supervisord_process()
|
supervisor = get_existing_supervisord_process()
|
||||||
if supervisor is None:
|
if supervisor is None:
|
||||||
|
@ -388,7 +389,7 @@ def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
|
||||||
all_config = cast(List[Dict[str, Any]], supervisor.getAllConfigInfo() or [])
|
all_config = cast(List[Dict[str, Any]], supervisor.getAllConfigInfo() or [])
|
||||||
|
|
||||||
if key == 'supervisord':
|
if key == 'supervisord':
|
||||||
relevant_config = SUPERVISORD_CONFIG_FILE.read_text()
|
relevant_config = CONFIG_FILE.read_text()
|
||||||
relevant_logs = cast(str, supervisor.readLog(0, 10_000_000))
|
relevant_logs = cast(str, supervisor.readLog(0, 10_000_000))
|
||||||
start_ts = [line for line in relevant_logs.split("\n") if "RPC interface 'supervisor' initialized" in line][-1].split(",", 1)[0]
|
start_ts = [line for line in relevant_logs.split("\n") if "RPC interface 'supervisor' initialized" in line][-1].split(",", 1)[0]
|
||||||
uptime = str(timezone.now() - parse_date(start_ts)).split(".")[0]
|
uptime = str(timezone.now() - parse_date(start_ts)).split(".")[0]
|
||||||
|
@ -476,8 +477,6 @@ def log_list_view(request: HttpRequest, **kwargs) -> TableContext:
|
||||||
def log_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
|
def log_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
|
||||||
assert request.user.is_superuser, "Must be a superuser to view configuration settings."
|
assert request.user.is_superuser, "Must be a superuser to view configuration settings."
|
||||||
|
|
||||||
from django.conf import settings
|
|
||||||
|
|
||||||
log_file = [logfile for logfile in CONSTANTS.LOGS_DIR.glob('*.log') if key in logfile.name][0]
|
log_file = [logfile for logfile in CONSTANTS.LOGS_DIR.glob('*.log') if key in logfile.name][0]
|
||||||
|
|
||||||
log_text = log_file.read_text()
|
log_text = log_file.read_text()
|
||||||
|
|
|
@ -25,7 +25,7 @@ from archivebox.extractors import archive_links
|
||||||
from archivebox.main import remove
|
from archivebox.main import remove
|
||||||
|
|
||||||
from archivebox.abid_utils.admin import ABIDModelAdmin
|
from archivebox.abid_utils.admin import ABIDModelAdmin
|
||||||
from archivebox.queues.tasks import bg_archive_links, bg_add
|
from archivebox.workers.tasks import bg_archive_links, bg_add
|
||||||
|
|
||||||
from core.models import Tag
|
from core.models import Tag
|
||||||
from core.admin_tags import TagInline
|
from core.admin_tags import TagInline
|
||||||
|
|
|
@ -26,8 +26,8 @@ import abx
|
||||||
from archivebox.config import CONSTANTS
|
from archivebox.config import CONSTANTS
|
||||||
|
|
||||||
from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithOutputDir
|
from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithOutputDir
|
||||||
from actors.models import ModelWithStateMachine
|
from workers.models import ModelWithStateMachine
|
||||||
from queues.tasks import bg_archive_snapshot
|
from workers.tasks import bg_archive_snapshot
|
||||||
from crawls.models import Crawl
|
from crawls.models import Crawl
|
||||||
# from machine.models import Machine, NetworkInterface
|
# from machine.models import Machine, NetworkInterface
|
||||||
|
|
||||||
|
|
|
@ -61,11 +61,10 @@ INSTALLED_APPS = [
|
||||||
'django_object_actions', # provides easy Django Admin action buttons on change views https://github.com/crccheck/django-object-actions
|
'django_object_actions', # provides easy Django Admin action buttons on change views https://github.com/crccheck/django-object-actions
|
||||||
|
|
||||||
# Our ArchiveBox-provided apps
|
# Our ArchiveBox-provided apps
|
||||||
# 'abid_utils', # handles ABID ID creation, handling, and models
|
# 'abid_utils', # handles ABID ID creation, handling, and models
|
||||||
'config', # ArchiveBox config settings (loaded as a plugin, don't need to add it here)
|
'config', # ArchiveBox config settings (loaded as a plugin, don't need to add it here)
|
||||||
'machine', # handles collecting and storing information about the host machine, network interfaces, installed binaries, etc.
|
'machine', # handles collecting and storing information about the host machine, network interfaces, installed binaries, etc.
|
||||||
'actors', # handles starting and managing background workers and processes (orchestrators and actors)
|
'workers', # handles starting and managing background workers and processes (orchestrators and actors)
|
||||||
'queues', # handles starting and managing background workers and processes (supervisord)
|
|
||||||
'seeds', # handles Seed model and URL source management
|
'seeds', # handles Seed model and URL source management
|
||||||
'crawls', # handles Crawl and CrawlSchedule models and management
|
'crawls', # handles Crawl and CrawlSchedule models and management
|
||||||
'personas', # handles Persona and session management
|
'personas', # handles Persona and session management
|
||||||
|
|
|
@ -8,7 +8,7 @@ from django.utils import timezone
|
||||||
|
|
||||||
from statemachine import State, StateMachine
|
from statemachine import State, StateMachine
|
||||||
|
|
||||||
from actors.actor import ActorType
|
from workers.actor import ActorType
|
||||||
|
|
||||||
from core.models import Snapshot, ArchiveResult
|
from core.models import Snapshot, ArchiveResult
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ from archivebox.misc.serve_static import serve_static
|
||||||
from core.admin_site import archivebox_admin
|
from core.admin_site import archivebox_admin
|
||||||
from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
|
from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
|
||||||
|
|
||||||
from actors.views import JobsDashboardView
|
from workers.views import JobsDashboardView
|
||||||
|
|
||||||
# GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306
|
# GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306
|
||||||
# from archivebox.config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE
|
# from archivebox.config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE
|
||||||
|
|
|
@ -25,7 +25,7 @@ import archivebox
|
||||||
from core.models import Snapshot
|
from core.models import Snapshot
|
||||||
from core.forms import AddLinkForm
|
from core.forms import AddLinkForm
|
||||||
|
|
||||||
from queues.tasks import bg_add
|
from workers.tasks import bg_add
|
||||||
|
|
||||||
from archivebox.config import CONSTANTS_CONFIG, DATA_DIR, VERSION
|
from archivebox.config import CONSTANTS_CONFIG, DATA_DIR, VERSION
|
||||||
from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG
|
from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG
|
||||||
|
|
|
@ -10,7 +10,7 @@ from django.conf import settings
|
||||||
from django.urls import reverse_lazy
|
from django.urls import reverse_lazy
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
|
||||||
from actors.models import ModelWithStateMachine
|
from workers.models import ModelWithStateMachine
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from core.models import Snapshot, ArchiveResult
|
from core.models import Snapshot, ArchiveResult
|
||||||
|
|
|
@ -6,7 +6,7 @@ from django.utils import timezone
|
||||||
|
|
||||||
from statemachine import State, StateMachine
|
from statemachine import State, StateMachine
|
||||||
|
|
||||||
from actors.actor import ActorType
|
from workers.actor import ActorType
|
||||||
from crawls.models import Crawl
|
from crawls.models import Crawl
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -675,8 +675,8 @@ def add(urls: Union[str, List[str]],
|
||||||
"""Add a new URL or list of URLs to your archive"""
|
"""Add a new URL or list of URLs to your archive"""
|
||||||
|
|
||||||
from core.models import Snapshot, Tag
|
from core.models import Snapshot, Tag
|
||||||
# from queues.supervisor_util import start_cli_workers, tail_worker_logs
|
# from workers.supervisor_util import start_cli_workers, tail_worker_logs
|
||||||
# from queues.tasks import bg_archive_link
|
# from workers.tasks import bg_archive_link
|
||||||
|
|
||||||
|
|
||||||
assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
|
assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
|
||||||
|
@ -873,7 +873,7 @@ def update(resume: Optional[float]=None,
|
||||||
|
|
||||||
from core.models import ArchiveResult
|
from core.models import ArchiveResult
|
||||||
from .search import index_links
|
from .search import index_links
|
||||||
# from .queues.supervisor_util import start_cli_workers
|
# from workers.supervisor_util import start_cli_workers
|
||||||
|
|
||||||
|
|
||||||
check_data_folder()
|
check_data_folder()
|
||||||
|
@ -1494,7 +1494,7 @@ def server(runserver_args: Optional[List[str]]=None,
|
||||||
runserver_args.append('--noreload') # '--insecure'
|
runserver_args.append('--noreload') # '--insecure'
|
||||||
call_command("runserver", *runserver_args)
|
call_command("runserver", *runserver_args)
|
||||||
else:
|
else:
|
||||||
from queues.supervisor_util import start_server_workers
|
from workers.supervisor_util import start_server_workers
|
||||||
|
|
||||||
print()
|
print()
|
||||||
start_server_workers(host=host, port=port, daemonize=False)
|
start_server_workers(host=host, port=port, daemonize=False)
|
||||||
|
|
|
@ -1,27 +1,27 @@
|
||||||
__package__ = 'abx_plugin_singlefile'
|
# __package__ = 'abx_plugin_singlefile'
|
||||||
|
|
||||||
from typing import ClassVar
|
# from typing import ClassVar
|
||||||
from django.db.models import QuerySet
|
# from django.db.models import QuerySet
|
||||||
from django.utils.functional import classproperty
|
# from django.utils.functional import classproperty
|
||||||
|
|
||||||
from actors.actor import ActorType
|
# from workers.actor import ActorType
|
||||||
|
|
||||||
from .models import SinglefileResult
|
# from .models import SinglefileResult
|
||||||
|
|
||||||
|
|
||||||
class SinglefileActor(ActorType[SinglefileResult]):
|
# class SinglefileActor(ActorType[SinglefileResult]):
|
||||||
CLAIM_ORDER: ClassVar[str] = 'created_at DESC'
|
# CLAIM_ORDER: ClassVar[str] = 'created_at DESC'
|
||||||
CLAIM_WHERE: ClassVar[str] = 'status = "queued" AND extractor = "favicon"'
|
# CLAIM_WHERE: ClassVar[str] = 'status = "queued" AND extractor = "favicon"'
|
||||||
CLAIM_SET: ClassVar[str] = 'status = "started"'
|
# CLAIM_SET: ClassVar[str] = 'status = "started"'
|
||||||
|
|
||||||
@classproperty
|
# @classproperty
|
||||||
def QUERYSET(cls) -> QuerySet:
|
# def QUERYSET(cls) -> QuerySet:
|
||||||
return SinglefileResult.objects.filter(status='queued')
|
# return SinglefileResult.objects.filter(status='queued')
|
||||||
|
|
||||||
def tick(self, obj: SinglefileResult):
|
# def tick(self, obj: SinglefileResult):
|
||||||
print(f'[grey53]{self}.tick({obj.abid or obj.id}, status={obj.status}) remaining:[/grey53]', self.get_queue().count())
|
# print(f'[grey53]{self}.tick({obj.abid or obj.id}, status={obj.status}) remaining:[/grey53]', self.get_queue().count())
|
||||||
updated = SinglefileResult.objects.filter(id=obj.id, status='started').update(status='success') == 1
|
# updated = SinglefileResult.objects.filter(id=obj.id, status='started').update(status='success') == 1
|
||||||
if not updated:
|
# if not updated:
|
||||||
raise Exception(f'Failed to update {obj.abid or obj.id}, interrupted by another actor writing to the same object')
|
# raise Exception(f'Failed to update {obj.abid or obj.id}, interrupted by another actor writing to the same object')
|
||||||
obj.refresh_from_db()
|
# obj.refresh_from_db()
|
||||||
obj.save()
|
# obj.save()
|
||||||
|
|
|
@ -13,11 +13,11 @@ class WgetExtractor(BaseExtractor):
|
||||||
name: ExtractorName = 'wget'
|
name: ExtractorName = 'wget'
|
||||||
binary: BinName = WGET_BINARY.name
|
binary: BinName = WGET_BINARY.name
|
||||||
|
|
||||||
def get_output_path(self, snapshot) -> Path | None:
|
def get_output_path(self, snapshot) -> str:
|
||||||
wget_index_path = wget_output_path(snapshot.as_link())
|
# wget_index_path = wget_output_path(snapshot.as_link())
|
||||||
if wget_index_path:
|
# if wget_index_path:
|
||||||
return Path(wget_index_path)
|
# return Path(wget_index_path)
|
||||||
return None
|
return 'wget'
|
||||||
|
|
||||||
WGET_EXTRACTOR = WgetExtractor()
|
WGET_EXTRACTOR = WgetExtractor()
|
||||||
|
|
||||||
|
|
|
@ -65,16 +65,16 @@ class ConfigPluginSpec:
|
||||||
def get_SCOPE_CONFIG(extra=None, archiveresult=None, snapshot=None, crawl=None, user=None, request=None, collection=..., environment=..., machine=..., default=...) -> dict[ConfigKeyStr, Any]:
|
def get_SCOPE_CONFIG(extra=None, archiveresult=None, snapshot=None, crawl=None, user=None, request=None, collection=..., environment=..., machine=..., default=...) -> dict[ConfigKeyStr, Any]:
|
||||||
"""Get the config as it applies to you right now, based on the current context"""
|
"""Get the config as it applies to you right now, based on the current context"""
|
||||||
return benedict({
|
return benedict({
|
||||||
**pm.hook.get_default_config(default=default),
|
**pm.hook.get_default_config(default=default), # schema defaults defined in source code
|
||||||
**pm.hook.get_machine_config(machine=machine),
|
**pm.hook.get_machine_config(machine=machine), # machine defaults set on the Machine model
|
||||||
**pm.hook.get_environment_config(environment=environment),
|
**pm.hook.get_environment_config(environment=environment), # env config set for just this run on this machine
|
||||||
**pm.hook.get_collection_config(collection=collection),
|
**pm.hook.get_collection_config(collection=collection), # collection defaults set in ArchiveBox.conf
|
||||||
**pm.hook.get_user_config(user=user),
|
**pm.hook.get_user_config(user=user), # user config set on User model
|
||||||
**pm.hook.get_crawl_config(crawl=crawl),
|
**pm.hook.get_request_config(request=request), # extra config derived from the current request
|
||||||
**pm.hook.get_snapshot_config(snapshot=snapshot),
|
**pm.hook.get_crawl_config(crawl=crawl), # extra config set on the Crawl model
|
||||||
**pm.hook.get_archiveresult_config(archiveresult=archiveresult),
|
**pm.hook.get_snapshot_config(snapshot=snapshot), # extra config set on the Snapshot model
|
||||||
**pm.hook.get_request_config(request=request),
|
**pm.hook.get_archiveresult_config(archiveresult=archiveresult), # extra config set on the ArchiveResult model
|
||||||
**(extra or {}),
|
**(extra or {}), # extra config passed in by the caller
|
||||||
})
|
})
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
|
@ -1,8 +0,0 @@
|
||||||
__package__ = 'archivebox.queues'
|
|
||||||
|
|
||||||
import abx
|
|
||||||
|
|
||||||
@abx.hookimpl
|
|
||||||
def register_admin(admin_site):
|
|
||||||
from queues.admin import register_admin
|
|
||||||
register_admin(admin_site)
|
|
9
archivebox/workers/__init__.py
Normal file
9
archivebox/workers/__init__.py
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
__package__ = 'archivebox.workers'
|
||||||
|
__order__ = 100
|
||||||
|
|
||||||
|
import abx
|
||||||
|
|
||||||
|
@abx.hookimpl
|
||||||
|
def register_admin(admin_site):
|
||||||
|
from workers.admin import register_admin
|
||||||
|
register_admin(admin_site)
|
|
@ -1,4 +1,4 @@
|
||||||
__package__ = 'archivebox.actors'
|
__package__ = 'archivebox.workers'
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
|
@ -1,4 +1,4 @@
|
||||||
__package__ = 'archivebox.queues'
|
__package__ = 'archivebox.workers'
|
||||||
|
|
||||||
import abx
|
import abx
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
from django.apps import AppConfig
|
from django.apps import AppConfig
|
||||||
|
|
||||||
|
|
||||||
class QueuesConfig(AppConfig):
|
class WorkersConfig(AppConfig):
|
||||||
default_auto_field = 'django.db.models.BigAutoField'
|
default_auto_field = 'django.db.models.BigAutoField'
|
||||||
name = 'queues'
|
name = 'workers'
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
from actors.orchestrator import ArchivingOrchestrator
|
from workers.orchestrator import ArchivingOrchestrator
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
|
@ -60,7 +60,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
|
||||||
f'{cls.__name__}.{field.name} must have choices set to {cls.__name__}.StatusChoices.choices',
|
f'{cls.__name__}.{field.name} must have choices set to {cls.__name__}.StatusChoices.choices',
|
||||||
hint=f'{cls.__name__}.{field.name}.choices = {getattr(field, "choices", None)!r}',
|
hint=f'{cls.__name__}.{field.name}.choices = {getattr(field, "choices", None)!r}',
|
||||||
obj=cls,
|
obj=cls,
|
||||||
id='actors.E011',
|
id='workers.E011',
|
||||||
))
|
))
|
||||||
if getattr(field, '_is_retry_at_field', False):
|
if getattr(field, '_is_retry_at_field', False):
|
||||||
if cls.retry_at_field_name == field.name:
|
if cls.retry_at_field_name == field.name:
|
||||||
|
@ -73,14 +73,14 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
|
||||||
f'{cls.__name__}.state_field_name must be defined and point to a StatusField()',
|
f'{cls.__name__}.state_field_name must be defined and point to a StatusField()',
|
||||||
hint=f'{cls.__name__}.state_field_name = {cls.state_field_name!r} but {cls.__name__}.{cls.state_field_name!r} was not found or does not refer to StatusField',
|
hint=f'{cls.__name__}.state_field_name = {cls.state_field_name!r} but {cls.__name__}.{cls.state_field_name!r} was not found or does not refer to StatusField',
|
||||||
obj=cls,
|
obj=cls,
|
||||||
id='actors.E012',
|
id='workers.E012',
|
||||||
))
|
))
|
||||||
if not found_retry_at_field:
|
if not found_retry_at_field:
|
||||||
errors.append(checks.Error(
|
errors.append(checks.Error(
|
||||||
f'{cls.__name__}.retry_at_field_name must be defined and point to a RetryAtField()',
|
f'{cls.__name__}.retry_at_field_name must be defined and point to a RetryAtField()',
|
||||||
hint=f'{cls.__name__}.retry_at_field_name = {cls.retry_at_field_name!r} but {cls.__name__}.{cls.retry_at_field_name!r} was not found or does not refer to RetryAtField',
|
hint=f'{cls.__name__}.retry_at_field_name = {cls.retry_at_field_name!r} but {cls.__name__}.{cls.retry_at_field_name!r} was not found or does not refer to RetryAtField',
|
||||||
obj=cls,
|
obj=cls,
|
||||||
id='actors.E013',
|
id='workers.E013',
|
||||||
))
|
))
|
||||||
|
|
||||||
if not found_id_field:
|
if not found_id_field:
|
||||||
|
@ -88,7 +88,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
|
||||||
f'{cls.__name__} must have an id field that is a primary key',
|
f'{cls.__name__} must have an id field that is a primary key',
|
||||||
hint=f'{cls.__name__}.id = {cls.id!r}',
|
hint=f'{cls.__name__}.id = {cls.id!r}',
|
||||||
obj=cls,
|
obj=cls,
|
||||||
id='actors.E014',
|
id='workers.E014',
|
||||||
))
|
))
|
||||||
|
|
||||||
if not isinstance(cls.state_machine_name, str):
|
if not isinstance(cls.state_machine_name, str):
|
||||||
|
@ -96,7 +96,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
|
||||||
f'{cls.__name__}.state_machine_name must be a dotted-import path to a StateMachine class',
|
f'{cls.__name__}.state_machine_name must be a dotted-import path to a StateMachine class',
|
||||||
hint=f'{cls.__name__}.state_machine_name = {cls.state_machine_name!r}',
|
hint=f'{cls.__name__}.state_machine_name = {cls.state_machine_name!r}',
|
||||||
obj=cls,
|
obj=cls,
|
||||||
id='actors.E015',
|
id='workers.E015',
|
||||||
))
|
))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -106,7 +106,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
|
||||||
f'{cls.__name__}.state_machine_name must point to a valid StateMachine class, but got {type(err).__name__} {err} when trying to access {cls.__name__}.StateMachineClass',
|
f'{cls.__name__}.state_machine_name must point to a valid StateMachine class, but got {type(err).__name__} {err} when trying to access {cls.__name__}.StateMachineClass',
|
||||||
hint=f'{cls.__name__}.state_machine_name = {cls.state_machine_name!r}',
|
hint=f'{cls.__name__}.state_machine_name = {cls.state_machine_name!r}',
|
||||||
obj=cls,
|
obj=cls,
|
||||||
id='actors.E016',
|
id='workers.E016',
|
||||||
))
|
))
|
||||||
|
|
||||||
if cls.INITIAL_STATE not in cls.StatusChoices.values:
|
if cls.INITIAL_STATE not in cls.StatusChoices.values:
|
||||||
|
@ -114,7 +114,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
|
||||||
f'{cls.__name__}.StateMachineClass.initial_state must be present within {cls.__name__}.StatusChoices',
|
f'{cls.__name__}.StateMachineClass.initial_state must be present within {cls.__name__}.StatusChoices',
|
||||||
hint=f'{cls.__name__}.StateMachineClass.initial_state = {cls.StateMachineClass.initial_state!r}',
|
hint=f'{cls.__name__}.StateMachineClass.initial_state = {cls.StateMachineClass.initial_state!r}',
|
||||||
obj=cls,
|
obj=cls,
|
||||||
id='actors.E017',
|
id='workers.E017',
|
||||||
))
|
))
|
||||||
|
|
||||||
if cls.ACTIVE_STATE not in cls.StatusChoices.values:
|
if cls.ACTIVE_STATE not in cls.StatusChoices.values:
|
||||||
|
@ -122,7 +122,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
|
||||||
f'{cls.__name__}.active_state must be set to a valid State present within {cls.__name__}.StatusChoices',
|
f'{cls.__name__}.active_state must be set to a valid State present within {cls.__name__}.StatusChoices',
|
||||||
hint=f'{cls.__name__}.active_state = {cls.active_state!r}',
|
hint=f'{cls.__name__}.active_state = {cls.active_state!r}',
|
||||||
obj=cls,
|
obj=cls,
|
||||||
id='actors.E018',
|
id='workers.E018',
|
||||||
))
|
))
|
||||||
|
|
||||||
|
|
||||||
|
@ -132,7 +132,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
|
||||||
f'{cls.__name__}.StateMachineClass.final_states must all be present within {cls.__name__}.StatusChoices',
|
f'{cls.__name__}.StateMachineClass.final_states must all be present within {cls.__name__}.StatusChoices',
|
||||||
hint=f'{cls.__name__}.StateMachineClass.final_states = {cls.StateMachineClass.final_states!r}',
|
hint=f'{cls.__name__}.StateMachineClass.final_states = {cls.StateMachineClass.final_states!r}',
|
||||||
obj=cls,
|
obj=cls,
|
||||||
id='actors.E019',
|
id='workers.E019',
|
||||||
))
|
))
|
||||||
break
|
break
|
||||||
return errors
|
return errors
|
|
@ -1,4 +1,4 @@
|
||||||
__package__ = 'archivebox.actors'
|
__package__ = 'archivebox.workers'
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
|
@ -1,4 +1,4 @@
|
||||||
__package__ = 'archivebox.queues'
|
__package__ = 'archivebox.workers'
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
|
@ -1,4 +1,4 @@
|
||||||
__package__ = 'archivebox.queues'
|
__package__ = 'archivebox.workers'
|
||||||
|
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
# from django.utils import timezone
|
# from django.utils import timezone
|
Loading…
Add table
Add a link
Reference in a new issue