switch everywhere to use Snapshot.pk and ArchiveResult.pk instead of id

This commit is contained in:
Nick Sweeting 2024-05-13 05:12:12 -07:00
parent 9733b8d04c
commit 0420662174
No known key found for this signature in database
15 changed files with 175 additions and 104 deletions

View file

@ -48,6 +48,8 @@ class ABID(NamedTuple):
@classmethod @classmethod
def parse(cls, buffer: Union[str, UUID, ulid.ULID, TypeID, 'ABID'], prefix=DEFAULT_ABID_PREFIX) -> 'ABID': def parse(cls, buffer: Union[str, UUID, ulid.ULID, TypeID, 'ABID'], prefix=DEFAULT_ABID_PREFIX) -> 'ABID':
assert buffer, f'Attempted to create ABID from null value {buffer}'
buffer = str(buffer) buffer = str(buffer)
if '_' in buffer: if '_' in buffer:
prefix, suffix = buffer.split('_') prefix, suffix = buffer.split('_')
@ -55,7 +57,7 @@ class ABID(NamedTuple):
prefix, suffix = prefix.strip('_'), buffer prefix, suffix = prefix.strip('_'), buffer
assert len(prefix) == ABID_PREFIX_LEN - 1 # length without trailing _ assert len(prefix) == ABID_PREFIX_LEN - 1 # length without trailing _
assert len(suffix) == ABID_SUFFIX_LEN assert len(suffix) == ABID_SUFFIX_LEN, f'Suffix {suffix} from {buffer} was not {ABID_SUFFIX_LEN} chars long'
return cls( return cls(
prefix=abid_part_from_prefix(prefix), prefix=abid_part_from_prefix(prefix),
@ -118,6 +120,7 @@ def abid_part_from_uri(uri: str) -> str:
""" """
'E4A5CCD9' # takes first 8 characters of sha256(url) 'E4A5CCD9' # takes first 8 characters of sha256(url)
""" """
uri = str(uri)
return uri_hash(uri)[:ABID_URI_LEN] return uri_hash(uri)[:ABID_URI_LEN]
def abid_part_from_ts(ts: Optional[datetime]) -> str: def abid_part_from_ts(ts: Optional[datetime]) -> str:
@ -131,10 +134,11 @@ def abid_part_from_subtype(subtype: str) -> str:
Snapshots have 01 type, other objects have other subtypes like wget/media/etc. Snapshots have 01 type, other objects have other subtypes like wget/media/etc.
Also allows us to change the ulid spec later by putting special sigil values here. Also allows us to change the ulid spec later by putting special sigil values here.
""" """
subtype = str(subtype)
if len(subtype) == ABID_SUBTYPE_LEN: if len(subtype) == ABID_SUBTYPE_LEN:
return subtype return subtype
return hashlib.sha256(subtype.encode('utf-8')).hexdigest()[:ABID_SUBTYPE_LEN] return hashlib.sha256(subtype.encode('utf-8')).hexdigest()[:ABID_SUBTYPE_LEN].upper()
def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str: def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
""" """
@ -146,16 +150,15 @@ def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
elif isinstance(rand, UUID): elif isinstance(rand, UUID):
# if it's a uuid we take the last 6 characters of the ULID represation of it # if it's a uuid we take the last 6 characters of the ULID represation of it
return str(ulid.from_uuid(rand))[-ABID_RAND_LEN:] return str(ulid.from_uuid(rand))[-ABID_RAND_LEN:]
elif isinstance(rand, str):
# if it's a string we take the last 6 characters of it verbatim
return rand[-ABID_RAND_LEN:]
elif isinstance(rand, int): elif isinstance(rand, int):
# if it's a BigAutoInteger field we convert it from an int to a 0-padded string # if it's a BigAutoInteger field we convert it from an int to a 0-padded string
rand_str = str(rand)[-ABID_RAND_LEN:] rand_str = str(rand)[-ABID_RAND_LEN:]
padding_needed = ABID_RAND_LEN - len(rand_str) padding_needed = ABID_RAND_LEN - len(rand_str)
rand_str = ('0'*padding_needed) + rand_str rand_str = ('0'*padding_needed) + rand_str
return rand_str return rand_str
raise NotImplementedError('Random component of an ABID can only be computed from a str or UUID')
# otherwise treat it as a string, take the last 6 characters of it verbatim
return str(rand)[-ABID_RAND_LEN:].upper()
def abid_from_values(prefix, ts, uri, subtype, rand) -> ABID: def abid_from_values(prefix, ts, uri, subtype, rand) -> ABID:

View file

@ -28,14 +28,16 @@ from .abid import (
# Database Field for typeid/ulid style IDs with a prefix, e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ # Database Field for typeid/ulid style IDs with a prefix, e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ
ABIDField = partial( ABIDField = partial(
CharIDField, CharIDField,
default=ulid.new,
max_length=ABID_LEN, max_length=ABID_LEN,
help_text="ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)" help_text="ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)",
default=None,
null=True,
blank=True,
db_index=True,
unique=True,
) )
class ABIDModel(models.Model): class ABIDModel(models.Model):
abid_prefix: str = DEFAULT_ABID_PREFIX # e.g. 'tag_' abid_prefix: str = DEFAULT_ABID_PREFIX # e.g. 'tag_'
abid_ts_src = 'None' # e.g. 'self.created' abid_ts_src = 'None' # e.g. 'self.created'
@ -54,7 +56,8 @@ class ABIDModel(models.Model):
def save(self, *args: Any, **kwargs: Any) -> None: def save(self, *args: Any, **kwargs: Any) -> None:
if hasattr(self, 'abid'): if hasattr(self, 'abid'):
self.abid: ABID = self.abid or self.calculate_abid() # self.abid = ABID.parse(self.abid) if self.abid else self.calculate_abid()
self.abid = self.calculate_abid()
else: else:
print(f'[!] WARNING: {self.__class__.__name__}.abid is not a DB field so ABID will not be persisted!') print(f'[!] WARNING: {self.__class__.__name__}.abid is not a DB field so ABID will not be persisted!')
self.abid = self.calculate_abid() self.abid = self.calculate_abid()
@ -106,7 +109,7 @@ class ABIDModel(models.Model):
""" """
ULIDParts(timestamp='01HX9FPYTR', url='E4A5CCD9', subtype='00', randomness='ZYEBQE') ULIDParts(timestamp='01HX9FPYTR', url='E4A5CCD9', subtype='00', randomness='ZYEBQE')
""" """
return ABID.parse(self.abid) if self.abid else self.calculate_abid() return ABID.parse(self.abid) if getattr(self, 'abid', None) else self.calculate_abid()
@property @property
def ULID(self) -> ulid.ULID: def ULID(self) -> ulid.ULID:

View file

@ -12,7 +12,7 @@ from signal_webhooks.models import WebhookBase
from django_stubs_ext.db.models import TypedModelMeta from django_stubs_ext.db.models import TypedModelMeta
from abid_utils.models import ABIDModel from abid_utils.models import ABIDModel, ABIDField
def generate_secret_token() -> str: def generate_secret_token() -> str:
@ -21,7 +21,15 @@ def generate_secret_token() -> str:
class APIToken(ABIDModel): class APIToken(ABIDModel):
abid_prefix = 'apt'
abid_ts_src = 'self.created'
abid_uri_src = 'self.token'
abid_subtype_src = 'self.user_id'
abid_rand_src = 'self.id'
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True) id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True)
uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
abid = ABIDField(prefix=abid_prefix)
user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE) user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
token = models.CharField(max_length=32, default=generate_secret_token, unique=True) token = models.CharField(max_length=32, default=generate_secret_token, unique=True)
@ -42,7 +50,8 @@ class APIToken(ABIDModel):
def __json__(self) -> dict: def __json__(self) -> dict:
return { return {
"TYPE": "APIToken", "TYPE": "APIToken",
"id": str(self.id), "uuid": str(self.id),
"abid": str(self.calculate_abid()),
"user_id": str(self.user.id), "user_id": str(self.user.id),
"user_username": self.user.username, "user_username": self.user.username,
"token": self.token, "token": self.token,
@ -77,9 +86,14 @@ class OutboundWebhook(ABIDModel, WebhookBase):
Model used in place of (extending) signals_webhooks.models.WebhookModel. Swapped using: Model used in place of (extending) signals_webhooks.models.WebhookModel. Swapped using:
settings.SIGNAL_WEBHOOKS_CUSTOM_MODEL = 'api.models.OutboundWebhook' settings.SIGNAL_WEBHOOKS_CUSTOM_MODEL = 'api.models.OutboundWebhook'
""" """
ID_PREFIX = 'whk' abid_prefix = 'whk'
abid_ts_src = 'self.created'
abid_uri_src = 'self.endpoint'
abid_subtype_src = 'self.ref'
abid_rand_src = 'self.id'
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True) uuid = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True)
abid = ABIDField(prefix=abid_prefix)
WebhookBase._meta.get_field('name').help_text = ( WebhookBase._meta.get_field('name').help_text = (
'Give your webhook a descriptive name (e.g. Notify ACME Slack channel of any new ArchiveResults).') 'Give your webhook a descriptive name (e.g. Notify ACME Slack channel of any new ArchiveResults).')
@ -92,3 +106,4 @@ class OutboundWebhook(ABIDModel, WebhookBase):
class Meta(WebhookBase.Meta): class Meta(WebhookBase.Meta):
verbose_name = 'API Outbound Webhook' verbose_name = 'API Outbound Webhook'

View file

@ -47,6 +47,6 @@ def check_api_token(request, token_data: TokenAuthSchema):
request=request, request=request,
) )
if user: if user:
return {"success": True, "user_id": str(user.id)} return {"success": True, "user_id": str(user.pk)}
return {"success": False, "user_id": None} return {"success": False, "user_id": None}

View file

@ -10,7 +10,7 @@ from ninja import Router, Schema, FilterSchema, Field, Query
from ninja.pagination import paginate from ninja.pagination import paginate
from core.models import Snapshot, ArchiveResult, Tag from core.models import Snapshot, ArchiveResult, Tag
from abid_utils.abid import ABID
router = Router(tags=['Core Models']) router = Router(tags=['Core Models'])
@ -20,9 +20,12 @@ router = Router(tags=['Core Models'])
### ArchiveResult ######################################################################### ### ArchiveResult #########################################################################
class ArchiveResultSchema(Schema): class ArchiveResultSchema(Schema):
id: UUID pk: str
uuid: UUID
abid: str
snapshot_abid: str
snapshot_id: UUID
snapshot_url: str snapshot_url: str
snapshot_tags: str snapshot_tags: str
@ -36,8 +39,16 @@ class ArchiveResultSchema(Schema):
created: datetime created: datetime
@staticmethod @staticmethod
def resolve_id(obj): def resolve_pk(obj):
return obj.uuid return str(obj.pk)
@staticmethod
def resolve_uuid(obj):
return str(obj.uuid)
@staticmethod
def resolve_abid(obj):
return str(obj.ABID)
@staticmethod @staticmethod
def resolve_created(obj): def resolve_created(obj):
@ -47,16 +58,21 @@ class ArchiveResultSchema(Schema):
def resolve_snapshot_url(obj): def resolve_snapshot_url(obj):
return obj.snapshot.url return obj.snapshot.url
@staticmethod
def resolve_snapshot_abid(obj):
return str(obj.snapshot.ABID)
@staticmethod @staticmethod
def resolve_snapshot_tags(obj): def resolve_snapshot_tags(obj):
return obj.snapshot.tags_str() return obj.snapshot.tags_str()
class ArchiveResultFilterSchema(FilterSchema): class ArchiveResultFilterSchema(FilterSchema):
id: Optional[UUID] = Field(None, q='uuid') uuid: Optional[UUID] = Field(None, q='uuid')
# abid: Optional[str] = Field(None, q='abid')
search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains']) search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains'])
snapshot_id: Optional[UUID] = Field(None, q='snapshot_id') snapshot_uuid: Optional[UUID] = Field(None, q='snapshot_uuid')
snapshot_url: Optional[str] = Field(None, q='snapshot__url') snapshot_url: Optional[str] = Field(None, q='snapshot__url')
snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name') snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name')
@ -115,7 +131,9 @@ def get_archiveresult(request, archiveresult_id: str):
class SnapshotSchema(Schema): class SnapshotSchema(Schema):
id: UUID pk: str
uuid: UUID
abid: str
url: str url: str
tags: str tags: str
@ -128,9 +146,17 @@ class SnapshotSchema(Schema):
archiveresults: List[ArchiveResultSchema] archiveresults: List[ArchiveResultSchema]
# @staticmethod @staticmethod
# def resolve_id(obj): def resolve_pk(obj):
# return str(obj.id) return str(obj.pk)
@staticmethod
def resolve_uuid(obj):
return str(obj.uuid)
@staticmethod
def resolve_abid(obj):
return str(obj.ABID)
@staticmethod @staticmethod
def resolve_tags(obj): def resolve_tags(obj):
@ -167,10 +193,10 @@ def list_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_arc
results = filters.filter(qs) results = filters.filter(qs)
return results return results
@router.get("/snapshot/{snapshot_id}", response=SnapshotSchema) @router.get("/snapshot/{snapshot_uuid}", response=SnapshotSchema)
def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True): def get_snapshot(request, snapshot_uuid: str, with_archiveresults: bool=True):
request.with_archiveresults = with_archiveresults request.with_archiveresults = with_archiveresults
snapshot = get_object_or_404(Snapshot, id=snapshot_id) snapshot = get_object_or_404(Snapshot, uuid=snapshot_uuid)
return snapshot return snapshot
@ -179,9 +205,9 @@ def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
# snapshot = Snapshot.objects.create(**payload.dict()) # snapshot = Snapshot.objects.create(**payload.dict())
# return snapshot # return snapshot
# #
# @router.put("/snapshot/{snapshot_id}", response=SnapshotSchema) # @router.put("/snapshot/{snapshot_uuid}", response=SnapshotSchema)
# def update_snapshot(request, snapshot_id: str, payload: SnapshotSchema): # def update_snapshot(request, snapshot_uuid: str, payload: SnapshotSchema):
# snapshot = get_object_or_404(Snapshot, id=snapshot_id) # snapshot = get_object_or_404(Snapshot, uuid=snapshot_uuid)
# #
# for attr, value in payload.dict().items(): # for attr, value in payload.dict().items():
# setattr(snapshot, attr, value) # setattr(snapshot, attr, value)
@ -189,9 +215,9 @@ def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
# #
# return snapshot # return snapshot
# #
# @router.delete("/snapshot/{snapshot_id}") # @router.delete("/snapshot/{snapshot_uuid}")
# def delete_snapshot(request, snapshot_id: str): # def delete_snapshot(request, snapshot_uuid: str):
# snapshot = get_object_or_404(Snapshot, id=snapshot_id) # snapshot = get_object_or_404(Snapshot, uuid=snapshot_uuid)
# snapshot.delete() # snapshot.delete()
# return {"success": True} # return {"success": True}

View file

@ -164,7 +164,7 @@ class SnapshotActionForm(ActionForm):
class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin): class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
list_display = ('added', 'title_str', 'files', 'size', 'url_str') list_display = ('added', 'title_str', 'files', 'size', 'url_str')
sort_fields = ('title_str', 'url_str', 'added', 'files') sort_fields = ('title_str', 'url_str', 'added', 'files')
readonly_fields = ('info', 'bookmarked', 'added', 'updated') readonly_fields = ('info', 'pk', 'uuid', 'abid', 'calculate_abid', 'bookmarked', 'added', 'updated')
search_fields = ('id', 'url', 'timestamp', 'title', 'tags__name') search_fields = ('id', 'url', 'timestamp', 'title', 'tags__name')
fields = ('timestamp', 'url', 'title', 'tags', *readonly_fields) fields = ('timestamp', 'url', 'title', 'tags', *readonly_fields)
list_filter = ('added', 'updated', 'tags', 'archiveresult__status') list_filter = ('added', 'updated', 'tags', 'archiveresult__status')
@ -213,12 +213,14 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
# </form> # </form>
# ''', # ''',
# csrf.get_token(self.request), # csrf.get_token(self.request),
# obj.id, # obj.pk,
# ) # )
def info(self, obj): def info(self, obj):
return format_html( return format_html(
''' '''
PK: <code style="font-size: 10px; user-select: all">{}</code> &nbsp; &nbsp;
ABID: <code style="font-size: 10px; user-select: all">{}</code> &nbsp; &nbsp;
UUID: <code style="font-size: 10px; user-select: all">{}</code> &nbsp; &nbsp; UUID: <code style="font-size: 10px; user-select: all">{}</code> &nbsp; &nbsp;
Timestamp: <code style="font-size: 10px; user-select: all">{}</code> &nbsp; &nbsp; Timestamp: <code style="font-size: 10px; user-select: all">{}</code> &nbsp; &nbsp;
URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/> URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
@ -230,9 +232,11 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
Extension: {} &nbsp; &nbsp; Extension: {} &nbsp; &nbsp;
<br/><br/> <br/><br/>
<a href="/archive/{}">View Snapshot index </a> &nbsp; &nbsp; <a href="/archive/{}">View Snapshot index </a> &nbsp; &nbsp;
<a href="/admin/core/snapshot/?id__exact={}">View actions </a> <a href="/admin/core/snapshot/?uuid__exact={}">View actions </a>
''', ''',
obj.id, obj.pk,
obj.ABID,
obj.uuid,
obj.timestamp, obj.timestamp,
obj.url_hash, obj.url_hash,
'' if obj.is_archived else '', '' if obj.is_archived else '',
@ -244,7 +248,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
obj.headers and obj.headers.get('Content-Type') or '?', obj.headers and obj.headers.get('Content-Type') or '?',
obj.extension or '?', obj.extension or '?',
obj.timestamp, obj.timestamp,
obj.id, obj.uuid,
) )
@admin.display( @admin.display(
@ -411,38 +415,38 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
class TagAdmin(admin.ModelAdmin): class TagAdmin(admin.ModelAdmin):
list_display = ('slug', 'name', 'num_snapshots', 'snapshots', 'id') list_display = ('slug', 'name', 'num_snapshots', 'snapshots', 'id')
sort_fields = ('id', 'name', 'slug') sort_fields = ('id', 'name', 'slug')
readonly_fields = ('id', 'num_snapshots', 'snapshots') readonly_fields = ('id', 'pk', 'abid', 'calculate_abid', 'num_snapshots', 'snapshots')
search_fields = ('id', 'name', 'slug') search_fields = ('id', 'name', 'slug')
fields = (*readonly_fields, 'name', 'slug') fields = (*readonly_fields, 'name', 'slug')
actions = ['delete_selected'] actions = ['delete_selected']
ordering = ['-id'] ordering = ['-id']
def num_snapshots(self, obj): def num_snapshots(self, tag):
return format_html( return format_html(
'<a href="/admin/core/snapshot/?tags__id__exact={}">{} total</a>', '<a href="/admin/core/snapshot/?tags__id__exact={}">{} total</a>',
obj.id, tag.id,
obj.snapshot_set.count(), tag.snapshot_set.count(),
) )
def snapshots(self, obj): def snapshots(self, tag):
total_count = obj.snapshot_set.count() total_count = tag.snapshot_set.count()
return mark_safe('<br/>'.join( return mark_safe('<br/>'.join(
format_html( format_html(
'{} <code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a> {}</code>', '{} <code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a> {}</code>',
snap.updated.strftime('%Y-%m-%d %H:%M') if snap.updated else 'pending...', snap.updated.strftime('%Y-%m-%d %H:%M') if snap.updated else 'pending...',
snap.id, snap.pk,
snap.timestamp, snap.abid,
snap.url, snap.url,
) )
for snap in obj.snapshot_set.order_by('-updated')[:10] for snap in tag.snapshot_set.order_by('-updated')[:10]
) + (f'<br/><a href="/admin/core/snapshot/?tags__id__exact={obj.id}">and {total_count-10} more...<a>' if obj.snapshot_set.count() > 10 else '')) ) + (f'<br/><a href="/admin/core/snapshot/?tags__id__exact={tag.id}">and {total_count-10} more...<a>' if tag.snapshot_set.count() > 10 else ''))
@admin.register(ArchiveResult, site=archivebox_admin) @admin.register(ArchiveResult, site=archivebox_admin)
class ArchiveResultAdmin(admin.ModelAdmin): class ArchiveResultAdmin(admin.ModelAdmin):
list_display = ('id', 'start_ts', 'extractor', 'snapshot_str', 'tags_str', 'cmd_str', 'status', 'output_str') list_display = ('id', 'start_ts', 'extractor', 'snapshot_str', 'tags_str', 'cmd_str', 'status', 'output_str')
sort_fields = ('start_ts', 'extractor', 'status') sort_fields = ('start_ts', 'extractor', 'status')
readonly_fields = ('id', 'uuid', 'snapshot_str', 'tags_str') readonly_fields = ('id', 'ABID', 'snapshot_str', 'tags_str')
search_fields = ('id', 'uuid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp') search_fields = ('id', 'uuid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
fields = (*readonly_fields, 'snapshot', 'extractor', 'status', 'start_ts', 'end_ts', 'output', 'pwd', 'cmd', 'cmd_version') fields = (*readonly_fields, 'snapshot', 'extractor', 'status', 'start_ts', 'end_ts', 'output', 'pwd', 'cmd', 'cmd_version')
autocomplete_fields = ['snapshot'] autocomplete_fields = ['snapshot']
@ -454,31 +458,31 @@ class ArchiveResultAdmin(admin.ModelAdmin):
@admin.display( @admin.display(
description='snapshot' description='snapshot'
) )
def snapshot_str(self, obj): def snapshot_str(self, result):
return format_html( return format_html(
'<a href="/archive/{}/index.html"><b><code>[{}]</code></b></a><br/>' '<a href="/archive/{}/index.html"><b><code>[{}]</code></b></a><br/>'
'<small>{}</small>', '<small>{}</small>',
obj.snapshot.timestamp, result.snapshot.timestamp,
obj.snapshot.timestamp, result.snapshot.timestamp,
obj.snapshot.url[:128], result.snapshot.url[:128],
) )
@admin.display( @admin.display(
description='tags' description='tags'
) )
def tags_str(self, obj): def tags_str(self, result):
return obj.snapshot.tags_str() return result.snapshot.tags_str()
def cmd_str(self, obj): def cmd_str(self, result):
return format_html( return format_html(
'<pre>{}</pre>', '<pre>{}</pre>',
' '.join(obj.cmd) if isinstance(obj.cmd, list) else str(obj.cmd), ' '.join(result.cmd) if isinstance(result.cmd, list) else str(result.cmd),
) )
def output_str(self, obj): def output_str(self, result):
return format_html( return format_html(
'<a href="/archive/{}/{}" class="output-link">↗️</a><pre>{}</pre>', '<a href="/archive/{}/{}" class="output-link">↗️</a><pre>{}</pre>',
obj.snapshot.timestamp, result.snapshot.timestamp,
obj.output if (obj.status == 'succeeded') and obj.extractor not in ('title', 'archive_org') else 'index.html', result.output if (result.status == 'succeeded') and result.extractor not in ('title', 'archive_org') else 'index.html',
obj.output, result.output,
) )

View file

@ -6,6 +6,7 @@ from django_stubs_ext.db.models import TypedModelMeta
import json import json
import uuid
from uuid import uuid4 from uuid import uuid4
from pathlib import Path from pathlib import Path
@ -17,7 +18,7 @@ from django.urls import reverse
from django.db.models import Case, When, Value, IntegerField from django.db.models import Case, When, Value, IntegerField
from django.contrib.auth.models import User # noqa from django.contrib.auth.models import User # noqa
from abid_utils.models import ABIDModel from abid_utils.models import ABIDModel, ABIDField
from ..config import ARCHIVE_DIR, ARCHIVE_DIR_NAME from ..config import ARCHIVE_DIR, ARCHIVE_DIR_NAME
from ..system import get_dir_size from ..system import get_dir_size
@ -58,6 +59,8 @@ class Tag(ABIDModel):
abid_rand_src = 'self.id' abid_rand_src = 'self.id'
id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID') id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
abid = ABIDField(prefix=abid_prefix)
# no uuid on Tags
name = models.CharField(unique=True, blank=False, max_length=100) name = models.CharField(unique=True, blank=False, max_length=100)
@ -108,9 +111,9 @@ class Snapshot(ABIDModel):
abid_subtype_src = '"01"' abid_subtype_src = '"01"'
abid_rand_src = 'self.id' abid_rand_src = 'self.id'
id = models.UUIDField(primary_key=True, default=uuid4, editable=True) id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) # legacy pk
uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
# ulid = models.CharField(max_length=26, null=True, blank=True, db_index=True, unique=True) abid = ABIDField(prefix=abid_prefix)
url = models.URLField(unique=True, db_index=True) url = models.URLField(unique=True, db_index=True)
timestamp = models.CharField(max_length=32, unique=True, db_index=True) timestamp = models.CharField(max_length=32, unique=True, db_index=True)
@ -153,7 +156,7 @@ class Snapshot(ABIDModel):
return load_link_details(self.as_link()) return load_link_details(self.as_link())
def tags_str(self, nocache=True) -> str | None: def tags_str(self, nocache=True) -> str | None:
cache_key = f'{self.id}-{(self.updated or self.added).timestamp()}-tags' cache_key = f'{self.pk}-{(self.updated or self.added).timestamp()}-tags'
calc_tags_str = lambda: ','.join(self.tags.order_by('name').values_list('name', flat=True)) calc_tags_str = lambda: ','.join(self.tags.order_by('name').values_list('name', flat=True))
if nocache: if nocache:
tags_str = calc_tags_str() tags_str = calc_tags_str()
@ -200,7 +203,7 @@ class Snapshot(ABIDModel):
@cached_property @cached_property
def archive_size(self): def archive_size(self):
cache_key = f'{str(self.id)[:12]}-{(self.updated or self.added).timestamp()}-size' cache_key = f'{str(self.pk)[:12]}-{(self.updated or self.added).timestamp()}-size'
def calc_dir_size(): def calc_dir_size():
try: try:
@ -272,7 +275,7 @@ class Snapshot(ABIDModel):
tags_id = [] tags_id = []
for tag in tags: for tag in tags:
if tag.strip(): if tag.strip():
tags_id.append(Tag.objects.get_or_create(name=tag)[0].id) tags_id.append(Tag.objects.get_or_create(name=tag)[0].pk)
self.tags.clear() self.tags.clear()
self.tags.add(*tags_id) self.tags.add(*tags_id)
@ -322,9 +325,9 @@ class ArchiveResult(ABIDModel):
abid_rand_src = 'self.uuid' abid_rand_src = 'self.uuid'
EXTRACTOR_CHOICES = EXTRACTOR_CHOICES EXTRACTOR_CHOICES = EXTRACTOR_CHOICES
id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID') id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID') # legacy pk
uuid = models.UUIDField(default=uuid4, editable=True) uuid = models.UUIDField(default=uuid.uuid4, editable=False, unique=True) # legacy uuid
# ulid = models.CharField(max_length=26, null=True, blank=True, db_index=True, unique=True) abid = ABIDField(prefix=abid_prefix)
snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE) snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)
extractor = models.CharField(choices=EXTRACTOR_CHOICES, max_length=32) extractor = models.CharField(choices=EXTRACTOR_CHOICES, max_length=32)

View file

@ -62,13 +62,13 @@ INSTALLED_APPS = [
'django.contrib.staticfiles', 'django.contrib.staticfiles',
'django.contrib.admin', 'django.contrib.admin',
'signal_webhooks',
'abid_utils', 'abid_utils',
'core', 'core',
'api', 'api',
'admin_data_views', 'admin_data_views',
'signal_webhooks',
'django_extensions', 'django_extensions',
] ]
@ -248,26 +248,27 @@ DATABASES = {
'TIME_ZONE': TIMEZONE, 'TIME_ZONE': TIMEZONE,
# DB setup is sometimes modified at runtime by setup_django() in config.py # DB setup is sometimes modified at runtime by setup_django() in config.py
}, },
'cache': { # 'cache': {
'ENGINE': 'django.db.backends.sqlite3', # 'ENGINE': 'django.db.backends.sqlite3',
'NAME': CACHE_DB_PATH, # 'NAME': CACHE_DB_PATH,
'OPTIONS': { # 'OPTIONS': {
'timeout': 60, # 'timeout': 60,
'check_same_thread': False, # 'check_same_thread': False,
}, # },
'TIME_ZONE': TIMEZONE, # 'TIME_ZONE': TIMEZONE,
}, # },
} }
MIGRATION_MODULES = {'signal_webhooks': None}
# as much as I'd love this to be a UUID or ULID field, it's not supported yet as of Django 5.0 # as much as I'd love this to be a UUID or ULID field, it's not supported yet as of Django 5.0
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
CACHES = { CACHES = {
'default': {'BACKEND': 'django.core.cache.backends.db.DatabaseCache', 'LOCATION': 'cache'}, 'default': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'},
'dummy': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}, # 'sqlite': {'BACKEND': 'django.core.cache.backends.db.DatabaseCache', 'LOCATION': 'cache'},
'locmem': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'}, # 'dummy': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'},
'filebased': {"BACKEND": "django.core.cache.backends.filebased.FileBasedCache", "LOCATION": CACHE_DIR / 'cache_filebased'}, # 'filebased': {"BACKEND": "django.core.cache.backends.filebased.FileBasedCache", "LOCATION": CACHE_DIR / 'cache_filebased'},
} }
EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend' EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'

View file

@ -226,8 +226,8 @@ class SnapshotView(View):
'<i><b>Next steps:</i></b><br/>' '<i><b>Next steps:</i></b><br/>'
f'- list all the <a href="/archive/{snapshot.timestamp}/" target="_top">Snapshot files <code>.*</code></a><br/>' f'- list all the <a href="/archive/{snapshot.timestamp}/" target="_top">Snapshot files <code>.*</code></a><br/>'
f'- view the <a href="/archive/{snapshot.timestamp}/index.html" target="_top">Snapshot <code>./index.html</code></a><br/>' f'- view the <a href="/archive/{snapshot.timestamp}/index.html" target="_top">Snapshot <code>./index.html</code></a><br/>'
f'- go to the <a href="/admin/core/snapshot/{snapshot.id}/change/" target="_top">Snapshot admin</a> to edit<br/>' f'- go to the <a href="/admin/core/snapshot/{snapshot.pk}/change/" target="_top">Snapshot admin</a> to edit<br/>'
f'- go to the <a href="/admin/core/snapshot/?id__startswith={snapshot.id}" target="_top">Snapshot actions</a> to re-archive<br/>' f'- go to the <a href="/admin/core/snapshot/?uuid__startswith={snapshot.uuid}" target="_top">Snapshot actions</a> to re-archive<br/>'
'- or return to <a href="/" target="_top">the main index...</a></div>' '- or return to <a href="/" target="_top">the main index...</a></div>'
'</center>' '</center>'
), ),

View file

@ -160,7 +160,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
# bump the updated time on the main Snapshot here, this is critical # bump the updated time on the main Snapshot here, this is critical
# to be able to cache summaries of the ArchiveResults for a given # to be able to cache summaries of the ArchiveResults for a given
# snapshot without having to load all the results from the DB each time. # snapshot without having to load all the results from the DB each time.
# (we use {Snapshot.id}-{Snapshot.updated} as the cache key and assume # (we use {Snapshot.pk}-{Snapshot.updated} as the cache key and assume
# ArchiveResults are unchanged as long as the updated timestamp is unchanged) # ArchiveResults are unchanged as long as the updated timestamp is unchanged)
snapshot.save() snapshot.save()
else: else:

View file

@ -118,7 +118,7 @@ def render_django_template(template: str, context: Mapping[str, str]) -> str:
def snapshot_icons(snapshot) -> str: def snapshot_icons(snapshot) -> str:
cache_key = f'{snapshot.id}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons' cache_key = f'{snapshot.pk}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons'
def calc_snapshot_icons(): def calc_snapshot_icons():
from core.models import EXTRACTOR_CHOICES from core.models import EXTRACTOR_CHOICES

View file

@ -192,6 +192,9 @@ class Link:
if extended: if extended:
info.update({ info.update({
'snapshot_id': self.snapshot_id, 'snapshot_id': self.snapshot_id,
'snapshot_uuid': self.snapshot_uuid,
'snapshot_abid': self.snapshot_abid,
'link_dir': self.link_dir, 'link_dir': self.link_dir,
'archive_path': self.archive_path, 'archive_path': self.archive_path,
@ -261,9 +264,21 @@ class Link:
return to_csv(self, cols=cols or self.field_names(), separator=separator, ljust=ljust) return to_csv(self, cols=cols or self.field_names(), separator=separator, ljust=ljust)
@cached_property @cached_property
def snapshot_id(self): def snapshot(self):
from core.models import Snapshot from core.models import Snapshot
return str(Snapshot.objects.only('id').get(url=self.url).id) return Snapshot.objects.only('uuid').get(url=self.url)
@cached_property
def snapshot_id(self):
return str(self.snapshot.pk)
@cached_property
def snapshot_uuid(self):
return str(self.snapshot.uuid)
@cached_property
def snapshot_abid(self):
return str(self.snapshot.ABID)
@classmethod @classmethod
def field_names(cls): def field_names(cls):

View file

@ -45,7 +45,8 @@ def write_link_to_sql_index(link: Link):
info.pop('tags') info.pop('tags')
try: try:
info["timestamp"] = Snapshot.objects.get(url=link.url).timestamp snapshot = Snapshot.objects.get(url=link.url)
info["timestamp"] = snapshot.timestamp
except Snapshot.DoesNotExist: except Snapshot.DoesNotExist:
while Snapshot.objects.filter(timestamp=info["timestamp"]).exists(): while Snapshot.objects.filter(timestamp=info["timestamp"]).exists():
info["timestamp"] = str(float(info["timestamp"]) + 1.0) info["timestamp"] = str(float(info["timestamp"]) + 1.0)
@ -57,7 +58,7 @@ def write_link_to_sql_index(link: Link):
for entry in entries: for entry in entries:
if isinstance(entry, dict): if isinstance(entry, dict):
result, _ = ArchiveResult.objects.get_or_create( result, _ = ArchiveResult.objects.get_or_create(
snapshot_id=snapshot.id, snapshot_id=snapshot.pk,
extractor=extractor, extractor=extractor,
start_ts=parse_date(entry['start_ts']), start_ts=parse_date(entry['start_ts']),
defaults={ defaults={
@ -71,7 +72,7 @@ def write_link_to_sql_index(link: Link):
) )
else: else:
result, _ = ArchiveResult.objects.update_or_create( result, _ = ArchiveResult.objects.update_or_create(
snapshot_id=snapshot.id, snapshot_id=snapshot.pk,
extractor=extractor, extractor=extractor,
start_ts=parse_date(entry.start_ts), start_ts=parse_date(entry.start_ts),
defaults={ defaults={

View file

@ -39,7 +39,7 @@ def write_search_index(link: Link, texts: Union[List[str], None]=None, out_dir:
backend = import_backend() backend = import_backend()
if snap: if snap:
try: try:
backend.index(snapshot_id=str(snap.id), texts=texts) backend.index(snapshot_id=str(snap.pk), texts=texts)
except Exception as err: except Exception as err:
stderr() stderr()
stderr( stderr(
@ -54,7 +54,7 @@ def query_search_index(query: str, out_dir: Path=OUTPUT_DIR) -> QuerySet:
if search_backend_enabled(): if search_backend_enabled():
backend = import_backend() backend = import_backend()
try: try:
snapshot_ids = backend.search(query) snapshot_pks = backend.search(query)
except Exception as err: except Exception as err:
stderr() stderr()
stderr( stderr(
@ -64,7 +64,7 @@ def query_search_index(query: str, out_dir: Path=OUTPUT_DIR) -> QuerySet:
raise raise
else: else:
# TODO preserve ordering from backend # TODO preserve ordering from backend
qsearch = Snapshot.objects.filter(pk__in=snapshot_ids) qsearch = Snapshot.objects.filter(pk__in=snapshot_pks)
return qsearch return qsearch
return Snapshot.objects.none() return Snapshot.objects.none()
@ -74,9 +74,9 @@ def flush_search_index(snapshots: QuerySet):
if not indexing_enabled() or not snapshots: if not indexing_enabled() or not snapshots:
return return
backend = import_backend() backend = import_backend()
snapshot_ids=(str(pk) for pk in snapshots.values_list('pk',flat=True)) snapshot_pks = (str(pk) for pk in snapshots.values_list('pk', flat=True))
try: try:
backend.flush(snapshot_ids) backend.flush(snapshot_pks)
except Exception as err: except Exception as err:
stderr() stderr()
stderr( stderr(

View file

@ -147,7 +147,7 @@
{% for obj in results %} {% for obj in results %}
<div class="card"> <div class="card">
<div class="card-info"> <div class="card-info">
<a href="{% url 'admin:core_snapshot_change' obj.id %}"> <a href="{% url 'admin:core_snapshot_change' obj.pk %}">
<span class="timestamp">{{obj.added}}</span> <span class="timestamp">{{obj.added}}</span>
</a> </a>
<label> <label>