mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-28 05:34:14 -04:00
API fixes and add actors endpoints
This commit is contained in:
parent
c8e186f21b
commit
8f8fbbb7a2
12 changed files with 229 additions and 52 deletions
|
@ -1,15 +1,14 @@
|
|||
__package__ = 'archivebox.api'
|
||||
|
||||
from typing import Any, Optional, cast
|
||||
from typing import Optional, cast
|
||||
from datetime import timedelta
|
||||
|
||||
from django.http import HttpRequest
|
||||
from django.utils import timezone
|
||||
from django.contrib.auth import login
|
||||
from django.contrib.auth import authenticate
|
||||
from django.contrib.auth.models import AbstractBaseUser
|
||||
|
||||
from ninja.security import HttpBearer, APIKeyQuery, APIKeyHeader, HttpBasicAuth, django_auth_superuser
|
||||
from ninja.security import HttpBearer, APIKeyQuery, APIKeyHeader, HttpBasicAuth
|
||||
from ninja.errors import HttpError
|
||||
|
||||
|
||||
|
|
117
archivebox/api/v1_actors.py
Normal file
117
archivebox/api/v1_actors.py
Normal file
|
@ -0,0 +1,117 @@
|
|||
__package__ = 'archivebox.api'
|
||||
|
||||
from uuid import UUID
|
||||
from typing import List, Any
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
from ninja import Router, Schema
|
||||
|
||||
from .auth import API_AUTH_METHODS
|
||||
|
||||
router = Router(tags=['Workers and Tasks'], auth=API_AUTH_METHODS)
|
||||
|
||||
|
||||
class TaskSchema(Schema):
|
||||
TYPE: str
|
||||
|
||||
id: UUID
|
||||
abid: str
|
||||
description: str
|
||||
|
||||
status: str
|
||||
retry_at: datetime | None
|
||||
|
||||
created_at: datetime
|
||||
modified_at: datetime
|
||||
created_by_id: int
|
||||
|
||||
@staticmethod
|
||||
def resolve_description(obj) -> str:
|
||||
return str(obj)
|
||||
|
||||
|
||||
class ActorSchema(Schema):
|
||||
# TYPE: str = 'actors.actor.ActorType'
|
||||
|
||||
# name: str
|
||||
#pid: int | None
|
||||
idle_count: int
|
||||
launch_kwargs: dict[str, Any]
|
||||
mode: str
|
||||
|
||||
model: str
|
||||
statemachine: str
|
||||
STATE_FIELD_NAME: str
|
||||
# ACTIVE_STATE: str
|
||||
FINAL_STATES: list[str]
|
||||
EVENT_NAME: str
|
||||
CLAIM_ORDER: list[str]
|
||||
CLAIM_FROM_TOP_N: int
|
||||
CLAIM_ATOMIC: bool
|
||||
MAX_TICK_TIME: int
|
||||
MAX_CONCURRENT_ACTORS: int
|
||||
|
||||
queue: list[TaskSchema]
|
||||
past: list[TaskSchema]
|
||||
|
||||
@staticmethod
|
||||
def resolve_model(obj) -> str:
|
||||
return obj.Model.__name__
|
||||
|
||||
@staticmethod
|
||||
def resolve_statemachine(obj) -> str:
|
||||
return obj.StateMachineClass.__name__
|
||||
|
||||
@staticmethod
|
||||
def resolve_name(obj) -> str:
|
||||
return str(obj)
|
||||
|
||||
# @staticmethod
|
||||
# def resolve_ACTIVE_STATE(obj) -> str:
|
||||
# return str(obj.ACTIVE_STATE)
|
||||
|
||||
@staticmethod
|
||||
def resolve_FINAL_STATES(obj) -> list[str]:
|
||||
return [str(state) for state in obj.FINAL_STATES]
|
||||
|
||||
@staticmethod
|
||||
def resolve_queue(obj) -> list[TaskSchema]:
|
||||
return [obj for obj in obj.qs.filter(obj.pending_q | obj.future_q | obj.active_q | obj.stalled_q).order_by('-retry_at')]
|
||||
|
||||
@staticmethod
|
||||
def resolve_past(obj) -> list[TaskSchema]:
|
||||
return [obj for obj in obj.qs.filter(obj.final_q).order_by('-modified_at')]
|
||||
|
||||
|
||||
class OrchestratorSchema(Schema):
|
||||
# TYPE: str = 'actors.orchestrator.Orchestrator'
|
||||
|
||||
#pid: int | None
|
||||
exit_on_idle: bool
|
||||
mode: str
|
||||
|
||||
actors: list[ActorSchema]
|
||||
|
||||
@staticmethod
|
||||
def resolve_actors(obj) -> list[ActorSchema]:
|
||||
return [actor() for actor in obj.actor_types.values()]
|
||||
|
||||
|
||||
@router.get("/orchestrators", response=List[OrchestratorSchema], url_name="get_orchestrators")
|
||||
def get_orchestrators(request):
|
||||
"""List all the task orchestrators (aka Orchestrators) that are currently running"""
|
||||
|
||||
from actors.orchestrator import Orchestrator
|
||||
orchestrator = Orchestrator()
|
||||
|
||||
return [orchestrator]
|
||||
|
||||
|
||||
@router.get("/actors", response=List[ActorSchema], url_name="get_actors")
|
||||
def get_actors(request):
|
||||
"""List all the task consumer workers (aka Actors) that are currently running"""
|
||||
|
||||
from actors.orchestrator import Orchestrator
|
||||
orchestrator = Orchestrator()
|
||||
return orchestrator.actor_types.values()
|
|
@ -40,6 +40,7 @@ def register_urls(api: NinjaAPI) -> NinjaAPI:
|
|||
api.add_router('/auth/', 'api.v1_auth.router')
|
||||
api.add_router('/core/', 'api.v1_core.router')
|
||||
api.add_router('/cli/', 'api.v1_cli.router')
|
||||
api.add_router('/jobs/', 'api.v1_actors.router')
|
||||
return api
|
||||
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
__package__ = 'archivebox.api'
|
||||
|
||||
import json
|
||||
from typing import List, Dict, Any, Optional
|
||||
from enum import Enum
|
||||
|
||||
|
@ -30,6 +31,7 @@ class CLICommandResponseSchema(Schema):
|
|||
success: bool
|
||||
errors: List[str]
|
||||
result: JSONType
|
||||
result_format: str = 'str'
|
||||
stdout: str
|
||||
stderr: str
|
||||
|
||||
|
@ -97,7 +99,7 @@ class ListCommandSchema(Schema):
|
|||
sort: str = 'bookmarked_at'
|
||||
as_json: bool = True
|
||||
as_html: bool = False
|
||||
as_csv: str | bool = 'timestamp,url'
|
||||
as_csv: str | None = 'timestamp,url'
|
||||
with_headers: bool = False
|
||||
|
||||
class RemoveCommandSchema(Schema):
|
||||
|
@ -182,7 +184,7 @@ def cli_schedule(request, args: ScheduleCommandSchema):
|
|||
|
||||
|
||||
|
||||
@router.post("/list", response=CLICommandResponseSchema, summary='archivebox list [args] [filter_patterns]')
|
||||
@router.post("/list", response=CLICommandResponseSchema, summary='archivebox list [args] [filter_patterns] (use this endpoint with ?filter_type=search to search for snapshots)')
|
||||
def cli_list(request, args: ListCommandSchema):
|
||||
result = list_all(
|
||||
filter_patterns=args.filter_patterns,
|
||||
|
@ -200,6 +202,7 @@ def cli_list(request, args: ListCommandSchema):
|
|||
result_format = 'txt'
|
||||
if args.as_json:
|
||||
result_format = "json"
|
||||
result = json.loads(result)
|
||||
elif args.as_html:
|
||||
result_format = "html"
|
||||
elif args.as_csv:
|
||||
|
|
|
@ -8,6 +8,7 @@ from datetime import datetime
|
|||
from django.db.models import Q
|
||||
from django.core.exceptions import ValidationError
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.shortcuts import redirect
|
||||
|
||||
from ninja import Router, Schema, FilterSchema, Field, Query
|
||||
from ninja.pagination import paginate, PaginationBase
|
||||
|
@ -66,38 +67,36 @@ class MinimalArchiveResultSchema(Schema):
|
|||
id: UUID
|
||||
abid: str
|
||||
|
||||
modified_at: datetime
|
||||
created_at: datetime
|
||||
created_at: datetime | None
|
||||
modified_at: datetime | None
|
||||
created_by_id: str
|
||||
created_by_username: str
|
||||
|
||||
extractor: str
|
||||
cmd_version: Optional[str]
|
||||
cmd: List[str]
|
||||
pwd: str
|
||||
status: str
|
||||
output: str
|
||||
retry_at: datetime | None
|
||||
|
||||
extractor: str
|
||||
cmd_version: str | None
|
||||
cmd: list[str] | None
|
||||
pwd: str | None
|
||||
output: str | None
|
||||
|
||||
start_ts: Optional[datetime]
|
||||
end_ts: Optional[datetime]
|
||||
start_ts: datetime | None
|
||||
end_ts: datetime | None
|
||||
|
||||
@staticmethod
|
||||
def resolve_created_by_id(obj):
|
||||
return str(obj.created_by_id)
|
||||
|
||||
@staticmethod
|
||||
def resolve_created_by_username(obj):
|
||||
def resolve_created_by_username(obj) -> str:
|
||||
User = get_user_model()
|
||||
return User.objects.get(id=obj.created_by_id).username
|
||||
return User.objects.filter(pk=obj.created_by_id).values_list('username', flat=True)[0]
|
||||
|
||||
@staticmethod
|
||||
def resolve_abid(obj):
|
||||
return str(obj.ABID)
|
||||
|
||||
@staticmethod
|
||||
def resolve_created_at(obj):
|
||||
return obj.start_ts
|
||||
|
||||
@staticmethod
|
||||
def resolve_snapshot_timestamp(obj):
|
||||
return obj.snapshot.timestamp
|
||||
|
@ -203,6 +202,9 @@ class SnapshotSchema(Schema):
|
|||
created_by_username: str
|
||||
created_at: datetime
|
||||
modified_at: datetime
|
||||
|
||||
status: str
|
||||
retry_at: datetime | None
|
||||
|
||||
bookmarked_at: datetime
|
||||
downloaded_at: Optional[datetime]
|
||||
|
@ -421,6 +423,9 @@ class SeedSchema(Schema):
|
|||
User = get_user_model()
|
||||
return User.objects.get(id=obj.created_by_id).username
|
||||
|
||||
@router.get("/seeds", response=List[SeedSchema], url_name="get_seeds")
|
||||
def get_seeds(request):
|
||||
return Seed.objects.all().distinct()
|
||||
|
||||
@router.get("/seed/{seed_id}", response=SeedSchema, url_name="get_seed")
|
||||
def get_seed(request, seed_id: str):
|
||||
|
@ -445,11 +450,12 @@ class CrawlSchema(Schema):
|
|||
created_at: datetime
|
||||
created_by_id: str
|
||||
created_by_username: str
|
||||
|
||||
status: str
|
||||
retry_at: datetime | None
|
||||
|
||||
seed: SeedSchema
|
||||
max_depth: int
|
||||
status: str
|
||||
retry_at: datetime
|
||||
|
||||
# snapshots: List[SnapshotSchema]
|
||||
|
||||
|
@ -469,9 +475,14 @@ class CrawlSchema(Schema):
|
|||
return Snapshot.objects.none()
|
||||
|
||||
|
||||
@router.get("/crawls", response=List[CrawlSchema], url_name="get_crawls")
|
||||
def get_crawls(request):
|
||||
return Crawl.objects.all().distinct()
|
||||
|
||||
@router.get("/crawl/{crawl_id}", response=CrawlSchema, url_name="get_crawl")
|
||||
def get_crawl(request, crawl_id: str, with_snapshots: bool=False, with_archiveresults: bool=False):
|
||||
"""Get a specific Crawl by id or abid."""
|
||||
|
||||
crawl = None
|
||||
request.with_snapshots = with_snapshots
|
||||
request.with_archiveresults = with_archiveresults
|
||||
|
@ -488,9 +499,10 @@ def get_crawl(request, crawl_id: str, with_snapshots: bool=False, with_archivere
|
|||
return crawl
|
||||
|
||||
|
||||
# [..., CrawlSchema]
|
||||
@router.get("/any/{abid}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema], url_name="get_any")
|
||||
@router.get("/any/{abid}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema, SeedSchema, CrawlSchema], url_name="get_any", summary="Get any object by its ABID or ID (e.g. snapshot, archiveresult, tag, seed, crawl, etc.)")
|
||||
def get_any(request, abid: str):
|
||||
"""Get any object by its ABID or ID (e.g. snapshot, archiveresult, tag, seed, crawl, etc.)."""
|
||||
|
||||
request.with_snapshots = False
|
||||
request.with_archiveresults = False
|
||||
|
||||
|
@ -516,12 +528,18 @@ def get_any(request, abid: str):
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
# try:
|
||||
# response = response or get_crawl(request, abid)
|
||||
# except Exception:
|
||||
# pass
|
||||
try:
|
||||
response = response or get_seed(request, abid)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
response = response or get_crawl(request, abid)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if response:
|
||||
app_label, model_name = response._meta.app_label, response._meta.model_name
|
||||
return redirect(f"/api/v1/{app_label}/{model_name}/{response.abid}?{request.META['QUERY_STRING']}")
|
||||
|
||||
if not response:
|
||||
raise HttpError(404, 'Object with given ABID not found')
|
||||
|
||||
return response
|
||||
raise HttpError(404, 'Object with given ABID not found')
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue