add abx.archivebox extract hookspec

This commit is contained in:
Nick Sweeting 2024-10-01 21:44:19 -07:00
parent 81d16e96fd
commit 8498ca5c64
No known key found for this signature in database
3 changed files with 33 additions and 1 deletions

View file

@ -6,6 +6,7 @@ from huey.api import TaskWrapper
from pathlib import Path
from typing import Tuple, Literal, ClassVar, get_args
from pydantic import BaseModel, ConfigDict
from django.utils.functional import cached_property
import abx
@ -21,7 +22,7 @@ class BaseHook(BaseModel):
validate_defaults=True,
validate_assignment=False,
revalidate_instances="subclass-instances",
ignored_types=(TaskWrapper, ),
ignored_types=(TaskWrapper, cached_property),
)
hook_type: ClassVar[HookType] # e.g. = 'CONFIG'

View file

@ -1,5 +1,7 @@
__package__ = 'abx.archivebox'
from typing import Dict, Any
from .. import hookspec
@ -30,3 +32,8 @@ def get_QUEUES():
@hookspec
def get_SEARCHBACKENDS():
return {}
@hookspec
def extract(snapshot_id) -> Dict[str, Any]:
return {}

View file

@ -2,6 +2,7 @@ __package__ = 'abx.archivebox'
from typing import Dict, Any
from django.utils import timezone
from benedict import benedict
from .. import pm
@ -106,3 +107,26 @@ def get_SEARCHBACKENDS() -> Dict[str, BaseSearchBackend]:
def register_all_hooks(settings):
pm.hook.register(settings=settings)
def extract(url_or_snapshot_id):
from core.models import Snapshot
url, snapshot_abid, snapshot_id = None, None, None
snapshot = None
if '://' in url_or_snapshot_id:
url = url_or_snapshot_id
try:
snapshot = Snapshot.objects.get(url=url)
except Snapshot.DoesNotExist:
snapshot = Snapshot(url=url_or_snapshot_id, timestamp=str(timezone.now().timestamp()), bookmarked_at=timezone.now())
snapshot.save()
elif '-' in url_or_snapshot_id:
snapshot_id = url_or_snapshot_id
snapshot = Snapshot.objects.get(id=snapshot_id)
else:
snapshot_abid = url_or_snapshot_id
snapshot = Snapshot.objects.get(abid=snapshot_abid)
return pm.hook.extract(snapshot_id=snapshot.id)