mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 14:44:29 -04:00
API fixes and add actors endpoints
This commit is contained in:
parent
c8e186f21b
commit
8f8fbbb7a2
12 changed files with 229 additions and 52 deletions
|
@ -13,6 +13,7 @@ from django.contrib import admin
|
||||||
from django.core.exceptions import ValidationError, NON_FIELD_ERRORS
|
from django.core.exceptions import ValidationError, NON_FIELD_ERRORS
|
||||||
from django.db import models
|
from django.db import models
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
from django.utils.functional import classproperty
|
||||||
from django.db.utils import OperationalError
|
from django.db.utils import OperationalError
|
||||||
from django.contrib.auth import get_user_model
|
from django.contrib.auth import get_user_model
|
||||||
from django.urls import reverse_lazy
|
from django.urls import reverse_lazy
|
||||||
|
@ -94,13 +95,19 @@ class ABIDModel(models.Model):
|
||||||
class Meta(TypedModelMeta):
|
class Meta(TypedModelMeta):
|
||||||
abstract = True
|
abstract = True
|
||||||
|
|
||||||
|
@classproperty
|
||||||
|
def TYPE(cls) -> str:
|
||||||
|
"""Get the full Python dotted-import path for this model, e.g. 'core.models.Snapshot'"""
|
||||||
|
return f'{cls.__module__}.{cls.__name__}'
|
||||||
|
|
||||||
@admin.display(description='Summary')
|
@admin.display(description='Summary')
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
return f'[{self.abid or (self.abid_prefix + "NEW")}] {self.__class__.__name__} {eval(self.abid_uri_src)}'
|
return f'[{self.abid or (self.abid_prefix + "NEW")}] {self.__class__.__name__} {eval(self.abid_uri_src)}'
|
||||||
|
|
||||||
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
||||||
"""Overriden __init__ method ensures we have a stable creation timestamp that fields can use within initialization code pre-saving to DB."""
|
"""Overriden __init__ method ensures we have a stable creation timestamp that fields can use within initialization code pre-saving to DB."""
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs) # type: ignore
|
||||||
|
|
||||||
# pre-compute a stable timestamp of the obj init time (with abid.ts precision limit applied) for use when object is first created,
|
# pre-compute a stable timestamp of the obj init time (with abid.ts precision limit applied) for use when object is first created,
|
||||||
# some other fields depend on a timestamp at creation time, and it's nice to have one common timestamp they can all share.
|
# some other fields depend on a timestamp at creation time, and it's nice to have one common timestamp they can all share.
|
||||||
# Used as an alternative to auto_now_add=True + auto_now=True which can produce two different times & requires saving to DB to get the TS.
|
# Used as an alternative to auto_now_add=True + auto_now=True which can produce two different times & requires saving to DB to get the TS.
|
||||||
|
@ -165,6 +172,7 @@ class ABIDModel(models.Model):
|
||||||
def id_from_abid(cls, abid: str) -> str:
|
def id_from_abid(cls, abid: str) -> str:
|
||||||
return str(cls.objects.only('pk').get(abid=cls.abid_prefix + str(abid).split('_', 1)[-1]).pk)
|
return str(cls.objects.only('pk').get(abid=cls.abid_prefix + str(abid).split('_', 1)[-1]).pk)
|
||||||
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def ABID_SOURCES(self) -> Dict[str, str]:
|
def ABID_SOURCES(self) -> Dict[str, str]:
|
||||||
""""Get the dict of fresh ABID component values based on the live object's properties."""
|
""""Get the dict of fresh ABID component values based on the live object's properties."""
|
||||||
|
|
|
@ -60,9 +60,9 @@ class ActorType(Generic[ModelType]):
|
||||||
Model: Type[ModelType]
|
Model: Type[ModelType]
|
||||||
StateMachineClass: Type[StateMachine]
|
StateMachineClass: Type[StateMachine]
|
||||||
|
|
||||||
STATE_FIELD_NAME: ClassVar[str]
|
STATE_FIELD_NAME: ClassVar[str] = 'status'
|
||||||
ACTIVE_STATE: ClassVar[ObjectState]
|
ACTIVE_STATE: ClassVar[ObjectState] = 'started'
|
||||||
FINAL_STATES: ClassVar[ObjectStateList]
|
FINAL_STATES: ClassVar[ObjectStateList] # e.g. ['succeeded', 'failed', 'skipped'] or ['sealed']
|
||||||
EVENT_NAME: ClassVar[str] = 'tick' # the event name to trigger on the obj.sm: StateMachine (usually 'tick')
|
EVENT_NAME: ClassVar[str] = 'tick' # the event name to trigger on the obj.sm: StateMachine (usually 'tick')
|
||||||
|
|
||||||
CLAIM_ORDER: ClassVar[tuple[str, ...]] = ('retry_at',) # the .order(*args) to claim the queue objects in, use ('?',) for random order
|
CLAIM_ORDER: ClassVar[tuple[str, ...]] = ('retry_at',) # the .order(*args) to claim the queue objects in, use ('?',) for random order
|
||||||
|
@ -294,7 +294,7 @@ class ActorType(Generic[ModelType]):
|
||||||
|
|
||||||
@classproperty
|
@classproperty
|
||||||
def final_q(cls) -> Q:
|
def final_q(cls) -> Q:
|
||||||
"""Get the filter for objects that are in a final state"""
|
"""Get the filter for objects that are already completed / in a final state"""
|
||||||
return Q(**{f'{cls.STATE_FIELD_NAME}__in': [cls._state_to_str(s) for s in cls.FINAL_STATES]})
|
return Q(**{f'{cls.STATE_FIELD_NAME}__in': [cls._state_to_str(s) for s in cls.FINAL_STATES]})
|
||||||
|
|
||||||
@classproperty
|
@classproperty
|
||||||
|
|
|
@ -1,15 +1,14 @@
|
||||||
__package__ = 'archivebox.api'
|
__package__ = 'archivebox.api'
|
||||||
|
|
||||||
from typing import Any, Optional, cast
|
from typing import Optional, cast
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
|
|
||||||
from django.http import HttpRequest
|
from django.http import HttpRequest
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
from django.contrib.auth import login
|
|
||||||
from django.contrib.auth import authenticate
|
from django.contrib.auth import authenticate
|
||||||
from django.contrib.auth.models import AbstractBaseUser
|
from django.contrib.auth.models import AbstractBaseUser
|
||||||
|
|
||||||
from ninja.security import HttpBearer, APIKeyQuery, APIKeyHeader, HttpBasicAuth, django_auth_superuser
|
from ninja.security import HttpBearer, APIKeyQuery, APIKeyHeader, HttpBasicAuth
|
||||||
from ninja.errors import HttpError
|
from ninja.errors import HttpError
|
||||||
|
|
||||||
|
|
||||||
|
|
117
archivebox/api/v1_actors.py
Normal file
117
archivebox/api/v1_actors.py
Normal file
|
@ -0,0 +1,117 @@
|
||||||
|
__package__ = 'archivebox.api'
|
||||||
|
|
||||||
|
from uuid import UUID
|
||||||
|
from typing import List, Any
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
from ninja import Router, Schema
|
||||||
|
|
||||||
|
from .auth import API_AUTH_METHODS
|
||||||
|
|
||||||
|
router = Router(tags=['Workers and Tasks'], auth=API_AUTH_METHODS)
|
||||||
|
|
||||||
|
|
||||||
|
class TaskSchema(Schema):
|
||||||
|
TYPE: str
|
||||||
|
|
||||||
|
id: UUID
|
||||||
|
abid: str
|
||||||
|
description: str
|
||||||
|
|
||||||
|
status: str
|
||||||
|
retry_at: datetime | None
|
||||||
|
|
||||||
|
created_at: datetime
|
||||||
|
modified_at: datetime
|
||||||
|
created_by_id: int
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def resolve_description(obj) -> str:
|
||||||
|
return str(obj)
|
||||||
|
|
||||||
|
|
||||||
|
class ActorSchema(Schema):
|
||||||
|
# TYPE: str = 'actors.actor.ActorType'
|
||||||
|
|
||||||
|
# name: str
|
||||||
|
#pid: int | None
|
||||||
|
idle_count: int
|
||||||
|
launch_kwargs: dict[str, Any]
|
||||||
|
mode: str
|
||||||
|
|
||||||
|
model: str
|
||||||
|
statemachine: str
|
||||||
|
STATE_FIELD_NAME: str
|
||||||
|
# ACTIVE_STATE: str
|
||||||
|
FINAL_STATES: list[str]
|
||||||
|
EVENT_NAME: str
|
||||||
|
CLAIM_ORDER: list[str]
|
||||||
|
CLAIM_FROM_TOP_N: int
|
||||||
|
CLAIM_ATOMIC: bool
|
||||||
|
MAX_TICK_TIME: int
|
||||||
|
MAX_CONCURRENT_ACTORS: int
|
||||||
|
|
||||||
|
queue: list[TaskSchema]
|
||||||
|
past: list[TaskSchema]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def resolve_model(obj) -> str:
|
||||||
|
return obj.Model.__name__
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def resolve_statemachine(obj) -> str:
|
||||||
|
return obj.StateMachineClass.__name__
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def resolve_name(obj) -> str:
|
||||||
|
return str(obj)
|
||||||
|
|
||||||
|
# @staticmethod
|
||||||
|
# def resolve_ACTIVE_STATE(obj) -> str:
|
||||||
|
# return str(obj.ACTIVE_STATE)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def resolve_FINAL_STATES(obj) -> list[str]:
|
||||||
|
return [str(state) for state in obj.FINAL_STATES]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def resolve_queue(obj) -> list[TaskSchema]:
|
||||||
|
return [obj for obj in obj.qs.filter(obj.pending_q | obj.future_q | obj.active_q | obj.stalled_q).order_by('-retry_at')]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def resolve_past(obj) -> list[TaskSchema]:
|
||||||
|
return [obj for obj in obj.qs.filter(obj.final_q).order_by('-modified_at')]
|
||||||
|
|
||||||
|
|
||||||
|
class OrchestratorSchema(Schema):
|
||||||
|
# TYPE: str = 'actors.orchestrator.Orchestrator'
|
||||||
|
|
||||||
|
#pid: int | None
|
||||||
|
exit_on_idle: bool
|
||||||
|
mode: str
|
||||||
|
|
||||||
|
actors: list[ActorSchema]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def resolve_actors(obj) -> list[ActorSchema]:
|
||||||
|
return [actor() for actor in obj.actor_types.values()]
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/orchestrators", response=List[OrchestratorSchema], url_name="get_orchestrators")
|
||||||
|
def get_orchestrators(request):
|
||||||
|
"""List all the task orchestrators (aka Orchestrators) that are currently running"""
|
||||||
|
|
||||||
|
from actors.orchestrator import Orchestrator
|
||||||
|
orchestrator = Orchestrator()
|
||||||
|
|
||||||
|
return [orchestrator]
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/actors", response=List[ActorSchema], url_name="get_actors")
|
||||||
|
def get_actors(request):
|
||||||
|
"""List all the task consumer workers (aka Actors) that are currently running"""
|
||||||
|
|
||||||
|
from actors.orchestrator import Orchestrator
|
||||||
|
orchestrator = Orchestrator()
|
||||||
|
return orchestrator.actor_types.values()
|
|
@ -40,6 +40,7 @@ def register_urls(api: NinjaAPI) -> NinjaAPI:
|
||||||
api.add_router('/auth/', 'api.v1_auth.router')
|
api.add_router('/auth/', 'api.v1_auth.router')
|
||||||
api.add_router('/core/', 'api.v1_core.router')
|
api.add_router('/core/', 'api.v1_core.router')
|
||||||
api.add_router('/cli/', 'api.v1_cli.router')
|
api.add_router('/cli/', 'api.v1_cli.router')
|
||||||
|
api.add_router('/jobs/', 'api.v1_actors.router')
|
||||||
return api
|
return api
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
__package__ = 'archivebox.api'
|
__package__ = 'archivebox.api'
|
||||||
|
|
||||||
|
import json
|
||||||
from typing import List, Dict, Any, Optional
|
from typing import List, Dict, Any, Optional
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
|
@ -30,6 +31,7 @@ class CLICommandResponseSchema(Schema):
|
||||||
success: bool
|
success: bool
|
||||||
errors: List[str]
|
errors: List[str]
|
||||||
result: JSONType
|
result: JSONType
|
||||||
|
result_format: str = 'str'
|
||||||
stdout: str
|
stdout: str
|
||||||
stderr: str
|
stderr: str
|
||||||
|
|
||||||
|
@ -97,7 +99,7 @@ class ListCommandSchema(Schema):
|
||||||
sort: str = 'bookmarked_at'
|
sort: str = 'bookmarked_at'
|
||||||
as_json: bool = True
|
as_json: bool = True
|
||||||
as_html: bool = False
|
as_html: bool = False
|
||||||
as_csv: str | bool = 'timestamp,url'
|
as_csv: str | None = 'timestamp,url'
|
||||||
with_headers: bool = False
|
with_headers: bool = False
|
||||||
|
|
||||||
class RemoveCommandSchema(Schema):
|
class RemoveCommandSchema(Schema):
|
||||||
|
@ -182,7 +184,7 @@ def cli_schedule(request, args: ScheduleCommandSchema):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@router.post("/list", response=CLICommandResponseSchema, summary='archivebox list [args] [filter_patterns]')
|
@router.post("/list", response=CLICommandResponseSchema, summary='archivebox list [args] [filter_patterns] (use this endpoint with ?filter_type=search to search for snapshots)')
|
||||||
def cli_list(request, args: ListCommandSchema):
|
def cli_list(request, args: ListCommandSchema):
|
||||||
result = list_all(
|
result = list_all(
|
||||||
filter_patterns=args.filter_patterns,
|
filter_patterns=args.filter_patterns,
|
||||||
|
@ -200,6 +202,7 @@ def cli_list(request, args: ListCommandSchema):
|
||||||
result_format = 'txt'
|
result_format = 'txt'
|
||||||
if args.as_json:
|
if args.as_json:
|
||||||
result_format = "json"
|
result_format = "json"
|
||||||
|
result = json.loads(result)
|
||||||
elif args.as_html:
|
elif args.as_html:
|
||||||
result_format = "html"
|
result_format = "html"
|
||||||
elif args.as_csv:
|
elif args.as_csv:
|
||||||
|
|
|
@ -8,6 +8,7 @@ from datetime import datetime
|
||||||
from django.db.models import Q
|
from django.db.models import Q
|
||||||
from django.core.exceptions import ValidationError
|
from django.core.exceptions import ValidationError
|
||||||
from django.contrib.auth import get_user_model
|
from django.contrib.auth import get_user_model
|
||||||
|
from django.shortcuts import redirect
|
||||||
|
|
||||||
from ninja import Router, Schema, FilterSchema, Field, Query
|
from ninja import Router, Schema, FilterSchema, Field, Query
|
||||||
from ninja.pagination import paginate, PaginationBase
|
from ninja.pagination import paginate, PaginationBase
|
||||||
|
@ -66,38 +67,36 @@ class MinimalArchiveResultSchema(Schema):
|
||||||
id: UUID
|
id: UUID
|
||||||
abid: str
|
abid: str
|
||||||
|
|
||||||
modified_at: datetime
|
created_at: datetime | None
|
||||||
created_at: datetime
|
modified_at: datetime | None
|
||||||
created_by_id: str
|
created_by_id: str
|
||||||
created_by_username: str
|
created_by_username: str
|
||||||
|
|
||||||
extractor: str
|
|
||||||
cmd_version: Optional[str]
|
|
||||||
cmd: List[str]
|
|
||||||
pwd: str
|
|
||||||
status: str
|
status: str
|
||||||
output: str
|
retry_at: datetime | None
|
||||||
|
|
||||||
|
extractor: str
|
||||||
|
cmd_version: str | None
|
||||||
|
cmd: list[str] | None
|
||||||
|
pwd: str | None
|
||||||
|
output: str | None
|
||||||
|
|
||||||
start_ts: Optional[datetime]
|
start_ts: datetime | None
|
||||||
end_ts: Optional[datetime]
|
end_ts: datetime | None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def resolve_created_by_id(obj):
|
def resolve_created_by_id(obj):
|
||||||
return str(obj.created_by_id)
|
return str(obj.created_by_id)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def resolve_created_by_username(obj):
|
def resolve_created_by_username(obj) -> str:
|
||||||
User = get_user_model()
|
User = get_user_model()
|
||||||
return User.objects.get(id=obj.created_by_id).username
|
return User.objects.filter(pk=obj.created_by_id).values_list('username', flat=True)[0]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def resolve_abid(obj):
|
def resolve_abid(obj):
|
||||||
return str(obj.ABID)
|
return str(obj.ABID)
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def resolve_created_at(obj):
|
|
||||||
return obj.start_ts
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def resolve_snapshot_timestamp(obj):
|
def resolve_snapshot_timestamp(obj):
|
||||||
return obj.snapshot.timestamp
|
return obj.snapshot.timestamp
|
||||||
|
@ -203,6 +202,9 @@ class SnapshotSchema(Schema):
|
||||||
created_by_username: str
|
created_by_username: str
|
||||||
created_at: datetime
|
created_at: datetime
|
||||||
modified_at: datetime
|
modified_at: datetime
|
||||||
|
|
||||||
|
status: str
|
||||||
|
retry_at: datetime | None
|
||||||
|
|
||||||
bookmarked_at: datetime
|
bookmarked_at: datetime
|
||||||
downloaded_at: Optional[datetime]
|
downloaded_at: Optional[datetime]
|
||||||
|
@ -421,6 +423,9 @@ class SeedSchema(Schema):
|
||||||
User = get_user_model()
|
User = get_user_model()
|
||||||
return User.objects.get(id=obj.created_by_id).username
|
return User.objects.get(id=obj.created_by_id).username
|
||||||
|
|
||||||
|
@router.get("/seeds", response=List[SeedSchema], url_name="get_seeds")
|
||||||
|
def get_seeds(request):
|
||||||
|
return Seed.objects.all().distinct()
|
||||||
|
|
||||||
@router.get("/seed/{seed_id}", response=SeedSchema, url_name="get_seed")
|
@router.get("/seed/{seed_id}", response=SeedSchema, url_name="get_seed")
|
||||||
def get_seed(request, seed_id: str):
|
def get_seed(request, seed_id: str):
|
||||||
|
@ -445,11 +450,12 @@ class CrawlSchema(Schema):
|
||||||
created_at: datetime
|
created_at: datetime
|
||||||
created_by_id: str
|
created_by_id: str
|
||||||
created_by_username: str
|
created_by_username: str
|
||||||
|
|
||||||
|
status: str
|
||||||
|
retry_at: datetime | None
|
||||||
|
|
||||||
seed: SeedSchema
|
seed: SeedSchema
|
||||||
max_depth: int
|
max_depth: int
|
||||||
status: str
|
|
||||||
retry_at: datetime
|
|
||||||
|
|
||||||
# snapshots: List[SnapshotSchema]
|
# snapshots: List[SnapshotSchema]
|
||||||
|
|
||||||
|
@ -469,9 +475,14 @@ class CrawlSchema(Schema):
|
||||||
return Snapshot.objects.none()
|
return Snapshot.objects.none()
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/crawls", response=List[CrawlSchema], url_name="get_crawls")
|
||||||
|
def get_crawls(request):
|
||||||
|
return Crawl.objects.all().distinct()
|
||||||
|
|
||||||
@router.get("/crawl/{crawl_id}", response=CrawlSchema, url_name="get_crawl")
|
@router.get("/crawl/{crawl_id}", response=CrawlSchema, url_name="get_crawl")
|
||||||
def get_crawl(request, crawl_id: str, with_snapshots: bool=False, with_archiveresults: bool=False):
|
def get_crawl(request, crawl_id: str, with_snapshots: bool=False, with_archiveresults: bool=False):
|
||||||
"""Get a specific Crawl by id or abid."""
|
"""Get a specific Crawl by id or abid."""
|
||||||
|
|
||||||
crawl = None
|
crawl = None
|
||||||
request.with_snapshots = with_snapshots
|
request.with_snapshots = with_snapshots
|
||||||
request.with_archiveresults = with_archiveresults
|
request.with_archiveresults = with_archiveresults
|
||||||
|
@ -488,9 +499,10 @@ def get_crawl(request, crawl_id: str, with_snapshots: bool=False, with_archivere
|
||||||
return crawl
|
return crawl
|
||||||
|
|
||||||
|
|
||||||
# [..., CrawlSchema]
|
@router.get("/any/{abid}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema, SeedSchema, CrawlSchema], url_name="get_any", summary="Get any object by its ABID or ID (e.g. snapshot, archiveresult, tag, seed, crawl, etc.)")
|
||||||
@router.get("/any/{abid}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema], url_name="get_any")
|
|
||||||
def get_any(request, abid: str):
|
def get_any(request, abid: str):
|
||||||
|
"""Get any object by its ABID or ID (e.g. snapshot, archiveresult, tag, seed, crawl, etc.)."""
|
||||||
|
|
||||||
request.with_snapshots = False
|
request.with_snapshots = False
|
||||||
request.with_archiveresults = False
|
request.with_archiveresults = False
|
||||||
|
|
||||||
|
@ -516,12 +528,18 @@ def get_any(request, abid: str):
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# try:
|
try:
|
||||||
# response = response or get_crawl(request, abid)
|
response = response or get_seed(request, abid)
|
||||||
# except Exception:
|
except Exception:
|
||||||
# pass
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = response or get_crawl(request, abid)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if response:
|
||||||
|
app_label, model_name = response._meta.app_label, response._meta.model_name
|
||||||
|
return redirect(f"/api/v1/{app_label}/{model_name}/{response.abid}?{request.META['QUERY_STRING']}")
|
||||||
|
|
||||||
if not response:
|
raise HttpError(404, 'Object with given ABID not found')
|
||||||
raise HttpError(404, 'Object with given ABID not found')
|
|
||||||
|
|
||||||
return response
|
|
||||||
|
|
|
@ -12,8 +12,12 @@ from archivebox.config.django import setup_django
|
||||||
setup_django(in_memory_db=False, check_db=True)
|
setup_django(in_memory_db=False, check_db=True)
|
||||||
|
|
||||||
|
|
||||||
|
# from channels.auth import AuthMiddlewareStack
|
||||||
|
# from channels.security.websocket import AllowedHostsOriginValidator
|
||||||
|
from channels.routing import ProtocolTypeRouter # , URLRouter
|
||||||
from django.core.asgi import get_asgi_application
|
from django.core.asgi import get_asgi_application
|
||||||
from channels.routing import ProtocolTypeRouter
|
|
||||||
|
# from core.routing import websocket_urlpatterns
|
||||||
|
|
||||||
|
|
||||||
django_asgi_app = get_asgi_application()
|
django_asgi_app = get_asgi_application()
|
||||||
|
@ -21,6 +25,9 @@ django_asgi_app = get_asgi_application()
|
||||||
application = ProtocolTypeRouter(
|
application = ProtocolTypeRouter(
|
||||||
{
|
{
|
||||||
"http": django_asgi_app,
|
"http": django_asgi_app,
|
||||||
# Just HTTP for now. (We can add other protocols later.)
|
# only if we need websocket support later:
|
||||||
|
# "websocket": AllowedHostsOriginValidator(
|
||||||
|
# AuthMiddlewareStack(URLRouter(websocket_urlpatterns))
|
||||||
|
# ),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
|
@ -326,6 +326,12 @@ STORAGES = {
|
||||||
# },
|
# },
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CHANNEL_LAYERS = {
|
||||||
|
"default": {
|
||||||
|
"BACKEND": "channels.layers.InMemoryChannelLayer"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
### Security Settings
|
### Security Settings
|
||||||
################################################################################
|
################################################################################
|
||||||
|
|
|
@ -150,8 +150,8 @@ class Crawl(ABIDModel, ModelWithHealthStats, ModelWithStateMachine):
|
||||||
parser = (self.seed and self.seed.extractor) or 'auto'
|
parser = (self.seed and self.seed.extractor) or 'auto'
|
||||||
created_at = self.created_at.strftime("%Y-%m-%d %H:%M") if self.created_at else '<no timestamp set>'
|
created_at = self.created_at.strftime("%Y-%m-%d %H:%M") if self.created_at else '<no timestamp set>'
|
||||||
if self.id and self.seed:
|
if self.id and self.seed:
|
||||||
return f'\\[{self.ABID}] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})'
|
return f'[{self.ABID}] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})'
|
||||||
return f'\\[{self.abid_prefix}****not*saved*yet****] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})'
|
return f'[{self.abid_prefix}****not*saved*yet****] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_seed(cls, seed: Seed, max_depth: int=0, persona: str='Default', tags_str: str='', config: dict|None=None, created_by: int|None=None):
|
def from_seed(cls, seed: Seed, max_depth: int=0, persona: str='Default', tags_str: str='', config: dict|None=None, created_by: int|None=None):
|
||||||
|
|
|
@ -20,7 +20,7 @@ from archivebox.misc.util import enforce_types
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def generate_json_index_from_links(links: List[Link], with_headers: bool):
|
def generate_json_index_from_links(links: List[Link], with_headers: bool=False):
|
||||||
MAIN_INDEX_HEADER = {
|
MAIN_INDEX_HEADER = {
|
||||||
'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.',
|
'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.',
|
||||||
'schema': 'archivebox.index.json',
|
'schema': 'archivebox.index.json',
|
||||||
|
@ -33,9 +33,9 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool):
|
||||||
'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki',
|
'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki',
|
||||||
'source': 'https://github.com/ArchiveBox/ArchiveBox',
|
'source': 'https://github.com/ArchiveBox/ArchiveBox',
|
||||||
'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues',
|
'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues',
|
||||||
'dependencies': dict(abx.pm.hook.get_BINARIES()),
|
'dependencies': abx.as_dict(abx.pm.hook.get_BINARIES()),
|
||||||
},
|
},
|
||||||
}
|
} if with_headers else {}
|
||||||
|
|
||||||
if with_headers:
|
if with_headers:
|
||||||
output = {
|
output = {
|
||||||
|
@ -137,13 +137,16 @@ class ExtendedEncoder(pyjson.JSONEncoder):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def default(self, obj):
|
def default(self, obj):
|
||||||
cls_name = obj.__class__.__name__
|
cls_name = type(obj).__name__
|
||||||
|
|
||||||
if hasattr(obj, '_asdict'):
|
if hasattr(obj, '_asdict'):
|
||||||
return obj._asdict()
|
return obj._asdict()
|
||||||
|
|
||||||
elif isinstance(obj, bytes):
|
elif isinstance(obj, bytes):
|
||||||
return obj.decode()
|
return obj.decode()
|
||||||
|
|
||||||
|
elif isinstance(obj, Path):
|
||||||
|
return str(obj)
|
||||||
|
|
||||||
elif isinstance(obj, datetime):
|
elif isinstance(obj, datetime):
|
||||||
return obj.isoformat()
|
return obj.isoformat()
|
||||||
|
@ -152,12 +155,27 @@ class ExtendedEncoder(pyjson.JSONEncoder):
|
||||||
return '{}: {}'.format(obj.__class__.__name__, obj)
|
return '{}: {}'.format(obj.__class__.__name__, obj)
|
||||||
|
|
||||||
elif cls_name in ('dict_items', 'dict_keys', 'dict_values'):
|
elif cls_name in ('dict_items', 'dict_keys', 'dict_values'):
|
||||||
return tuple(obj)
|
return list(obj)
|
||||||
|
|
||||||
|
try:
|
||||||
|
return dict(obj)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
return list(obj)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
return str(obj)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
return pyjson.JSONEncoder.default(self, obj)
|
return pyjson.JSONEncoder.default(self, obj)
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def to_json(obj: Any, indent: Optional[int]=4, sort_keys: bool=True, cls=ExtendedEncoder) -> str:
|
def to_json(obj: Any, indent: Optional[int]=4, sort_keys: bool=True, cls=ExtendedEncoder, default=None) -> str:
|
||||||
return pyjson.dumps(obj, indent=indent, sort_keys=sort_keys, cls=ExtendedEncoder)
|
return pyjson.dumps(obj, indent=indent, sort_keys=sort_keys, cls=ExtendedEncoder, default=default)
|
||||||
|
|
||||||
|
|
|
@ -944,7 +944,7 @@ def list_all(filter_patterns_str: Optional[str]=None,
|
||||||
json: bool=False,
|
json: bool=False,
|
||||||
html: bool=False,
|
html: bool=False,
|
||||||
with_headers: bool=False,
|
with_headers: bool=False,
|
||||||
out_dir: Path=DATA_DIR) -> Iterable[Link]:
|
out_dir: Path=DATA_DIR):
|
||||||
"""List, filter, and export information about archive entries"""
|
"""List, filter, and export information about archive entries"""
|
||||||
|
|
||||||
check_data_folder()
|
check_data_folder()
|
||||||
|
@ -976,15 +976,15 @@ def list_all(filter_patterns_str: Optional[str]=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
if json:
|
if json:
|
||||||
output = generate_json_index_from_links(folders.values(), with_headers)
|
output = generate_json_index_from_links(folders.values(), with_headers=with_headers)
|
||||||
elif html:
|
elif html:
|
||||||
output = generate_index_from_links(folders.values(), with_headers)
|
output = generate_index_from_links(folders.values(), with_headers=with_headers)
|
||||||
elif csv:
|
elif csv:
|
||||||
output = links_to_csv(folders.values(), cols=csv.split(','), header=with_headers)
|
output = links_to_csv(folders.values(), cols=csv.split(','), header=with_headers)
|
||||||
else:
|
else:
|
||||||
output = printable_folders(folders, with_headers=with_headers)
|
output = printable_folders(folders, with_headers=with_headers)
|
||||||
print(output)
|
print(output)
|
||||||
return folders
|
return output
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue