mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 14:44:29 -04:00
better base_models separation of concerns
This commit is contained in:
parent
51447b9d0a
commit
bab26d6a9b
1 changed files with 244 additions and 154 deletions
|
@ -2,16 +2,19 @@
|
||||||
This file provides the Django ABIDField and ABIDModel base model to inherit from.
|
This file provides the Django ABIDField and ABIDModel base model to inherit from.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import io
|
||||||
|
import csv
|
||||||
|
import json
|
||||||
|
from typing import Any, Dict, Union, List, Set, cast, ClassVar, Iterable
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, Union, List, Set, cast
|
|
||||||
from charidfield import CharIDField # type: ignore[import-untyped]
|
from charidfield import CharIDField # type: ignore[import-untyped]
|
||||||
|
|
||||||
from django.contrib import admin
|
from django.contrib import admin
|
||||||
|
from django.core import checks
|
||||||
from django.core.exceptions import ValidationError, NON_FIELD_ERRORS
|
from django.core.exceptions import ValidationError, NON_FIELD_ERRORS
|
||||||
from django.db import models
|
from django.db import models
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
@ -26,7 +29,9 @@ from django.conf import settings
|
||||||
|
|
||||||
from django_stubs_ext.db.models import TypedModelMeta
|
from django_stubs_ext.db.models import TypedModelMeta
|
||||||
|
|
||||||
|
from tags.models import KVTag, ModelWithKVTags
|
||||||
|
|
||||||
|
from archivebox import DATA_DIR
|
||||||
from archivebox.index.json import to_json
|
from archivebox.index.json import to_json
|
||||||
from archivebox.misc.hashing import get_dir_info
|
from archivebox.misc.hashing import get_dir_info
|
||||||
|
|
||||||
|
@ -45,6 +50,8 @@ from .abid import (
|
||||||
|
|
||||||
####################################################
|
####################################################
|
||||||
|
|
||||||
|
DEFAULT_ICON = '<img src="" alt="Icon"/>'
|
||||||
|
|
||||||
|
|
||||||
# Database Field for typeid/ulid style IDs with a prefix, e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ
|
# Database Field for typeid/ulid style IDs with a prefix, e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ
|
||||||
ABIDField = partial(
|
ABIDField = partial(
|
||||||
|
@ -81,90 +88,168 @@ class ABIDError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
# class LabelType:
|
|
||||||
# """
|
class ModelWithReadOnlyFields(models.Model):
|
||||||
# A Many:1 reference to an object by a human-readable or machine-readable label, e.g.:
|
"""
|
||||||
# """
|
Base class for models that have some read-only fields enforced by .save().
|
||||||
#
|
"""
|
||||||
# name: str
|
read_only_fields: ClassVar[tuple[str, ...]] = ()
|
||||||
# verbose_name: str
|
|
||||||
#
|
class Meta:
|
||||||
# class UUIDLabelType(LabelType):
|
abstract = True
|
||||||
# name = 'UUID'
|
|
||||||
# verbose_name = 'UUID'
|
def _fresh_from_db(self):
|
||||||
#
|
try:
|
||||||
# class ABIDLabelType(LabelType):
|
return self.objects.get(pk=self.pk)
|
||||||
# name = 'ABID'
|
except self.__class__.DoesNotExist:
|
||||||
# verbose_name = 'ABID'
|
return None
|
||||||
#
|
|
||||||
# class TimestampLabelType(LabelType):
|
def diff_from_db(self, keys: Iterable[str]=()) -> dict[str, tuple[Any, Any]]:
|
||||||
# name = 'TIMESTAMP'
|
"""Get a dictionary of the fields that have changed from the values in the database"""
|
||||||
# verbose_name = 'Timestamp'
|
keys = keys or [field.name for field in self._meta.get_fields()]
|
||||||
|
if not keys:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
in_db = self._fresh_from_db()
|
||||||
|
if not in_db:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
diff = {}
|
||||||
|
for field in keys:
|
||||||
|
new_value = getattr(self, field, None)
|
||||||
|
existing_value = getattr(in_db, field, None)
|
||||||
|
if new_value != existing_value:
|
||||||
|
diff[field] = (existing_value, new_value)
|
||||||
|
return diff
|
||||||
|
|
||||||
|
def save(self, *args, **kwargs) -> None:
|
||||||
|
diff = self.diff_from_db(keys=self.read_only_fields)
|
||||||
|
if diff:
|
||||||
|
changed_key = next(iter(diff.keys()))
|
||||||
|
existing_value, new_value = diff[changed_key]
|
||||||
|
raise AttributeError(f'{self}.{changed_key} is read-only and cannot be changed from {existing_value} -> {new_value}')
|
||||||
|
super().save(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
# class Label(models.Model):
|
class ModelWithUUID(ModelWithReadOnlyFields, ModelWithKVTags):
|
||||||
# """
|
|
||||||
# A 1:1 reference to an object by a human-readable or machine-readable label, e.g.:
|
read_only_fields = ('id', 'created_at')
|
||||||
#
|
|
||||||
# Label(label='snp_01BJQMF54D093DXEAWZ6JYRPAQ', content_object=snapshot, reftype='ABID')
|
id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
|
||||||
# """
|
created_at = AutoDateTimeField(default=None, null=False, db_index=True)
|
||||||
# class RefTypeChoices(models.TextChoices):
|
|
||||||
# UUID = UUIDLabelType.name, UUIDLabelType.verbose_name
|
class Meta(TypedModelMeta):
|
||||||
# ABID = ABIDLabelType.name, ABIDLabelType.verbose_name
|
abstract = True
|
||||||
# URI = URILabelType.name, URILabelType.verbose_name
|
|
||||||
# TAG = TagLabelType.name, TagLabelType.verbose_name
|
default_json_keys: ClassVar[tuple[str, ...]] = (
|
||||||
# TIMESTAMP = TimestampLabelType.name, TimestampLabelType.verbose_name
|
'TYPE',
|
||||||
#
|
'id',
|
||||||
# id = models.CharField(max_length=255, primary_key=True, null=False, blank=False, db_index=True)
|
'abid',
|
||||||
# reftype = models.CharField(choices=RefTypeChoices.choices, default=RefTypeChoices.ABID, max_length=32)
|
'str',
|
||||||
#
|
'modified_at',
|
||||||
# content_type = models.ForeignKey(ContentType, on_delete=models.CASCADE)
|
'created_at',
|
||||||
# object_id = models.UUIDField(default=None, null=False, editable=False)
|
'created_by_id',
|
||||||
# content_object = GenericForeignKey("content_type", "object_id")
|
'status',
|
||||||
#
|
'retry_at',
|
||||||
# @property
|
'notes',
|
||||||
# def created_by(self) -> User:
|
)
|
||||||
# return self.content_object.created_by
|
|
||||||
#
|
@classmethod
|
||||||
# @property
|
def from_dict(cls, fields: dict[str, Any]) -> Self:
|
||||||
# def created_by_id(self) -> int:
|
init_kwargs = {k: v for k, v in fields.items() if hasattr(cls, k)}
|
||||||
# return self.content_object.created_by_id
|
return cls(**init_kwargs)
|
||||||
#
|
|
||||||
# @created_by.setter
|
def update(self, **kwargs) -> None:
|
||||||
# def created_by(self, value: User) -> None:
|
"""Update the object's properties from a dict"""
|
||||||
# self.content_object.created_by = value
|
for key, value in kwargs.items():
|
||||||
#
|
setattr(self, key, value)
|
||||||
# @created_by_id.setter
|
self.save()
|
||||||
# def created_by_id(self, value: int) -> None:
|
|
||||||
# self.content_object.created_by_id = value
|
def as_json(self, keys: Iterable[str]=()) -> dict:
|
||||||
#
|
"""Get the object's properties as a dict"""
|
||||||
# @property
|
return benedict({
|
||||||
# def abid_prefix(self) -> str:
|
key: getattr(self, key)
|
||||||
# return self.content_object.abid_prefix
|
for key in (keys or self.default_json_keys)
|
||||||
#
|
if hasattr(self, key)
|
||||||
# @property
|
})
|
||||||
# def ABID(self) -> ABID:
|
|
||||||
# return ABID.parse(self.abid_prefix + self.abid.split('_', 1)[-1])
|
@classproperty
|
||||||
#
|
def TYPE(cls) -> str:
|
||||||
# def __str__(self):
|
"""Get the full Python dotted-import path for this model, e.g. 'core.models.Snapshot'"""
|
||||||
# return self.tag
|
return f'{cls.__module__}.{cls.__name__}'
|
||||||
#
|
|
||||||
# class Meta:
|
@property
|
||||||
# indexes = [
|
def admin_change_url(self) -> str:
|
||||||
# models.Index(fields=["content_type", "object_id"]),
|
"""get the admin URL e.g. /admin/core/snapshot/abcd-1234-1234-asdfjkl23jsdf4/change/"""
|
||||||
# ]
|
return f"/admin/{self._meta.app_label}/{self._meta.model_name}/{self.pk}/change/"
|
||||||
#
|
|
||||||
# class ModelWithLabels(models.Model):
|
|
||||||
# labels = GenericRelation(Label)
|
|
||||||
#
|
|
||||||
# def UUID(self) -> uuid4.UUID:
|
|
||||||
# return uuid4.UUID(self.labels.filter(reftype=Label.RefTypeChoices.UUID).first().id)
|
|
||||||
#
|
|
||||||
# def ABID(self) -> ABID:
|
|
||||||
# return ABID.parse(self.labels.filter(reftype=Label.RefTypeChoices.ABID).first().id)
|
|
||||||
|
|
||||||
|
|
||||||
class ABIDModel(models.Model):
|
class ModelWithSerializers(ModelWithUUID):
|
||||||
|
|
||||||
|
def as_csv_row(self, keys: Iterable[str]=(), separator: str=',') -> str:
|
||||||
|
"""Get the object's properties as a csv string"""
|
||||||
|
keys = keys or self.as_json().keys()
|
||||||
|
# return separator.join(
|
||||||
|
# str(getattr(self, key, ''))
|
||||||
|
# for key in keys
|
||||||
|
# )
|
||||||
|
# use real csv lib instead:
|
||||||
|
buffer = io.StringIO()
|
||||||
|
csv_writer = csv.writer(buffer, delimiter=separator)
|
||||||
|
csv_writer.writerow(
|
||||||
|
str(getattr(self, key, ''))
|
||||||
|
for key in keys
|
||||||
|
)
|
||||||
|
return buffer.getvalue()
|
||||||
|
|
||||||
|
def as_jsonl_row(self, keys: Iterable[str]=(), **json_kwargs) -> str:
|
||||||
|
"""Get the object's properties as a jsonl string"""
|
||||||
|
keys = keys or self.as_json().keys()
|
||||||
|
return json.dumps({
|
||||||
|
key: getattr(self, key, '')
|
||||||
|
for key in keys
|
||||||
|
}, **{'sort_keys': True, 'indent': None, **json_kwargs})
|
||||||
|
|
||||||
|
def as_html_icon(self) -> str:
|
||||||
|
"""Get a representation of this object as a simple html <img> tag or emoji"""
|
||||||
|
# render snapshot_detail.html template with self as context and return html string
|
||||||
|
return DEFAULT_ICON
|
||||||
|
|
||||||
|
def as_html_row(self) -> str:
|
||||||
|
"""Get a representation of this object as a static html table <tr>...</tr> string"""
|
||||||
|
# render snapshot_detail.html template with self as context and return html string
|
||||||
|
# TODO: replace with a real django template
|
||||||
|
return f'<tr><td>{self.as_html_icon()}</td><td>{self.as_csv_row()}</td></tr>'
|
||||||
|
|
||||||
|
def as_html_embed(self) -> str:
|
||||||
|
"""Get a representation of this object suitable for embedding inside a roughly 400x300px iframe"""
|
||||||
|
# render snapshot_detail.html template with self as context and return html string
|
||||||
|
# TODO: replace with a real django template
|
||||||
|
return f'{self.as_html_row()}'
|
||||||
|
|
||||||
|
def as_html_fullpage(self) -> str:
|
||||||
|
"""Get a static html page representation of this object"""
|
||||||
|
# TODO: replace with a real django template
|
||||||
|
return f'''
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>{self}</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1>{self}</h1>
|
||||||
|
<pre>{self.as_jsonl_row()}</pre>
|
||||||
|
</header>
|
||||||
|
<hr/>
|
||||||
|
<article>
|
||||||
|
{self.as_html_embed()}
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
class ABIDModel(ModelWithReadOnlyFields, ModelWithUUID):
|
||||||
"""
|
"""
|
||||||
Abstract Base Model for other models to depend on. Provides ArchiveBox ID (ABID) interface and other helper methods.
|
Abstract Base Model for other models to depend on. Provides ArchiveBox ID (ABID) interface and other helper methods.
|
||||||
"""
|
"""
|
||||||
|
@ -173,46 +258,24 @@ class ABIDModel(models.Model):
|
||||||
abid_uri_src = 'None' # e.g. 'self.uri' (MUST BE SET)
|
abid_uri_src = 'None' # e.g. 'self.uri' (MUST BE SET)
|
||||||
abid_subtype_src = 'self.__class__.__name__' # e.g. 'self.extractor'
|
abid_subtype_src = 'self.__class__.__name__' # e.g. 'self.extractor'
|
||||||
abid_rand_src = 'self.id' # e.g. 'self.uuid' or 'self.id'
|
abid_rand_src = 'self.id' # e.g. 'self.uuid' or 'self.id'
|
||||||
abid_salt: str = DEFAULT_ABID_URI_SALT # combined with self.uri to anonymize hashes on a per-install basis (default is shared globally with all users, means everyone will hash ABC to -> 123 the same around the world, makes it easy to share ABIDs across installs and see if they are for the same URI. Change this if you dont want your hashes to be guessable / in the same hash space as all other users)
|
|
||||||
abid_drift_allowed: bool = False # set to True to allow abid_field values to change after a fixed ABID has been issued (NOT RECOMMENDED: means values can drift out of sync from original ABID)
|
abid_drift_allowed: bool = False # set to True to allow abid_field values to change after a fixed ABID has been issued (NOT RECOMMENDED: means values can drift out of sync from original ABID)
|
||||||
|
abid_salt: str = DEFAULT_ABID_URI_SALT # combined with self.uri to anonymize hashes on a per-install basis (default is shared globally with all users, means everyone will hash ABC to -> 123 the same around the world, makes it easy to share ABIDs across installs and see if they are for the same URI. Change this if you dont want your hashes to be guessable / in the same hash space as all other users)
|
||||||
|
|
||||||
|
# **all abid_*_src fields listed above should be in read_only_fields!
|
||||||
|
read_only_fields = ('id', 'abid', 'created_at', 'created_by')
|
||||||
|
|
||||||
id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
|
id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
|
||||||
abid = ABIDField(prefix=abid_prefix)
|
abid = ABIDField(prefix=abid_prefix)
|
||||||
|
|
||||||
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False, db_index=True)
|
|
||||||
created_at = AutoDateTimeField(default=None, null=False, db_index=True)
|
created_at = AutoDateTimeField(default=None, null=False, db_index=True)
|
||||||
|
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False, db_index=True)
|
||||||
modified_at = models.DateTimeField(auto_now=True)
|
modified_at = models.DateTimeField(auto_now=True)
|
||||||
|
|
||||||
# labels = GenericRelation(Label)
|
|
||||||
|
|
||||||
# if ModelWithNotesMixin model:
|
|
||||||
# notes = models.TextField(blank=True, null=False, default='', help_text='Any extra notes this snapshot should have')
|
|
||||||
|
|
||||||
# if StateMachineMixin model:
|
|
||||||
# retry_at = models.DateTimeField(default=None, null=True, db_index=True)
|
|
||||||
# status = models.CharField(max_length=16, choices=StatusChoices.choices, default=StatusChoices.QUEUED)
|
|
||||||
#
|
|
||||||
# StatusChoices: ClassVar[Type[DefaultStatusChoices]] = DefaultStatusChoices
|
|
||||||
# state_machine_attr: ClassVar[str] = 'sm'
|
|
||||||
# state_machine_name: ClassVar[str] = 'core.statemachines.ArchiveResultMachine'
|
|
||||||
# retry_at_field_name: ClassVar[str] = 'retry_at'
|
|
||||||
# state_field_name: ClassVar[str] = 'status'
|
|
||||||
# active_state: ClassVar[str] = StatusChoices.STARTED
|
|
||||||
|
|
||||||
# if ModelWithHealthStats model:
|
|
||||||
# num_uses_failed = models.PositiveIntegerField(default=0)
|
|
||||||
# num_uses_succeeded = models.PositiveIntegerField(default=0)
|
|
||||||
|
|
||||||
_prefetched_objects_cache: Dict[str, Any]
|
_prefetched_objects_cache: Dict[str, Any]
|
||||||
|
|
||||||
class Meta(TypedModelMeta):
|
class Meta(TypedModelMeta):
|
||||||
abstract = True
|
abstract = True
|
||||||
|
|
||||||
@classproperty
|
|
||||||
def TYPE(cls) -> str:
|
|
||||||
"""Get the full Python dotted-import path for this model, e.g. 'core.models.Snapshot'"""
|
|
||||||
return f'{cls.__module__}.{cls.__name__}'
|
|
||||||
|
|
||||||
@admin.display(description='Summary')
|
@admin.display(description='Summary')
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
return f'[{self.abid or (self.abid_prefix + "NEW")}] {self.__class__.__name__} {eval(self.abid_uri_src)}'
|
return f'[{self.abid or (self.abid_prefix + "NEW")}] {self.__class__.__name__} {eval(self.abid_uri_src)}'
|
||||||
|
@ -227,15 +290,29 @@ class ABIDModel(models.Model):
|
||||||
# (ordinarily fields cant depend on other fields until the obj is saved to db and recalled)
|
# (ordinarily fields cant depend on other fields until the obj is saved to db and recalled)
|
||||||
self._init_timestamp = ts_from_abid(abid_part_from_ts(timezone.now()))
|
self._init_timestamp = ts_from_abid(abid_part_from_ts(timezone.now()))
|
||||||
|
|
||||||
def check(self):
|
@classmethod
|
||||||
super().check()
|
def check(cls, **kwargs):
|
||||||
assert 'id' in self._meta.get_fields(), 'All ABIDModel subclasses must define an id field'
|
errors = super().check(**kwargs)
|
||||||
assert 'abid' in self._meta.get_fields(), 'All ABIDModel subclasses must define an abid field'
|
|
||||||
assert 'created_at' in self._meta.get_fields(), 'All ABIDModel subclasses must define a created_at field'
|
try:
|
||||||
assert 'modified_at' in self._meta.get_fields(), 'All ABIDModel subclasses must define a modified_at field'
|
assert hasattr(cls, 'id'), f'{cls.__name__}: All ABIDModel subclasses must define an id field'
|
||||||
assert 'created_by' in self._meta.get_fields(), 'All ABIDModel subclasses must define a created_by field'
|
assert hasattr(cls, 'abid'), f'{cls.__name__}: All ABIDModel subclasses must define an abid field'
|
||||||
|
assert hasattr(cls, 'created_at'), f'{cls.__name__}: All ABIDModel subclasses must define a created_at field'
|
||||||
|
assert hasattr(cls, 'modified_at'), f'{cls.__name__}: All ABIDModel subclasses must define a modified_at field'
|
||||||
|
assert hasattr(cls, 'created_by'), f'{cls.__name__}: All ABIDModel subclasses must define a created_by field'
|
||||||
|
except AssertionError as e:
|
||||||
|
errors.append(checks.Error(
|
||||||
|
str(e),
|
||||||
|
# hint='...',
|
||||||
|
obj=cls,
|
||||||
|
id=f"{cls.__module__}.{cls.__name__}.E001",
|
||||||
|
))
|
||||||
|
return errors
|
||||||
|
|
||||||
def clean(self, abid_drift_allowed: bool | None=None) -> None:
|
def clean(self, abid_drift_allowed: bool | None=None) -> None:
|
||||||
|
# TODO: ideally issuing new ABIDs should be farmed out to a separate service that makes sure they're all unique and monotonic
|
||||||
|
# but for now this works and is much faster, we just calculate ABID on first save, and warn if updating any fields would ever invalidate it
|
||||||
|
|
||||||
if self._state.adding:
|
if self._state.adding:
|
||||||
# only runs once when a new object is first saved to the DB
|
# only runs once when a new object is first saved to the DB
|
||||||
# sets self.id, self.pk, self.created_by, self.created_at, self.modified_at
|
# sets self.id, self.pk, self.created_by, self.created_at, self.modified_at
|
||||||
|
@ -362,6 +439,8 @@ class ABIDModel(models.Model):
|
||||||
def issue_new_abid(self, overwrite=False) -> ABID:
|
def issue_new_abid(self, overwrite=False) -> ABID:
|
||||||
"""
|
"""
|
||||||
Issue a new ABID based on the current object's properties, can only be called once on new objects (before they are saved to DB).
|
Issue a new ABID based on the current object's properties, can only be called once on new objects (before they are saved to DB).
|
||||||
|
TODO: eventually we should move this to a separate service that makes sure they're all unique and monotonic
|
||||||
|
perhaps it could be moved to a KVTag as well, and we could just use the KVTag service + Events to issue new ABIDs
|
||||||
"""
|
"""
|
||||||
if not overwrite:
|
if not overwrite:
|
||||||
assert self._state.adding, 'Can only issue new ABID when model._state.adding is True'
|
assert self._state.adding, 'Can only issue new ABID when model._state.adding is True'
|
||||||
|
@ -373,7 +452,7 @@ class ABIDModel(models.Model):
|
||||||
self.pk = self.id
|
self.pk = self.id
|
||||||
self.created_at = self.created_at or self._init_timestamp # cut off precision to match precision of TS component
|
self.created_at = self.created_at or self._init_timestamp # cut off precision to match precision of TS component
|
||||||
self.modified_at = self.modified_at or self.created_at
|
self.modified_at = self.modified_at or self.created_at
|
||||||
self.created_by_id = (hasattr(self, 'created_by_id') and self.created_by_id) or get_or_create_system_user_pk()
|
self.created_by_id = getattr(self, 'created_by_id', None) or get_or_create_system_user_pk()
|
||||||
|
|
||||||
# Compute fresh ABID values & hashes based on object's live properties
|
# Compute fresh ABID values & hashes based on object's live properties
|
||||||
abid_fresh_values = self.ABID_FRESH_VALUES
|
abid_fresh_values = self.ABID_FRESH_VALUES
|
||||||
|
@ -444,18 +523,24 @@ class ABIDModel(models.Model):
|
||||||
"""
|
"""
|
||||||
return f'/api/v1/docs#/{self._meta.app_label.title()}%20Models/api_v1_{self._meta.app_label}_get_{self._meta.db_table}'
|
return f'/api/v1/docs#/{self._meta.app_label.title()}%20Models/api_v1_{self._meta.app_label}_get_{self._meta.db_table}'
|
||||||
|
|
||||||
@property
|
|
||||||
def admin_change_url(self) -> str:
|
|
||||||
return f"/admin/{self._meta.app_label}/{self._meta.model_name}/{self.pk}/change/"
|
|
||||||
|
|
||||||
def get_absolute_url(self):
|
|
||||||
return self.api_docs_url
|
|
||||||
|
|
||||||
def update_for_workers(self, **update_kwargs) -> bool:
|
|
||||||
"""Immediately update the **kwargs on the object in DB, and reset the retry_at to now()"""
|
# class ModelWithStateMachine(models.Model):
|
||||||
updated = bool(self._meta.model.objects.filter(pk=self.pk).update(**{'retry_at': timezone.now(), **update_kwargs}))
|
# ... see workers/models.py ...
|
||||||
self.refresh_from_db()
|
# retry_at = models.DateTimeField(default=None, null=True, db_index=True)
|
||||||
return updated
|
# status = models.CharField(max_length=16, choices=StatusChoices.choices, default=StatusChoices.QUEUED)
|
||||||
|
|
||||||
|
|
||||||
|
class ModelWithNotes(models.Model):
|
||||||
|
"""
|
||||||
|
Very simple Model that adds a notes field to any model.
|
||||||
|
"""
|
||||||
|
# label = models.CharField(max_length=63, blank=True, null=False, default='', help_text='A custom label for this object')
|
||||||
|
notes = models.TextField(blank=True, null=False, default='', help_text='Any extra extra custom notes')
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
abstract = True
|
||||||
|
|
||||||
|
|
||||||
class ModelWithHealthStats(models.Model):
|
class ModelWithHealthStats(models.Model):
|
||||||
|
@ -465,15 +550,15 @@ class ModelWithHealthStats(models.Model):
|
||||||
class Meta:
|
class Meta:
|
||||||
abstract = True
|
abstract = True
|
||||||
|
|
||||||
def record_health_failure(self) -> None:
|
def increment_num_uses_failed(self) -> None:
|
||||||
self.num_uses_failed += 1
|
self.num_uses_failed += 1
|
||||||
self.save()
|
self.save()
|
||||||
|
|
||||||
def record_health_success(self) -> None:
|
def increment_num_uses_succeeded(self) -> None:
|
||||||
self.num_uses_succeeded += 1
|
self.num_uses_succeeded += 1
|
||||||
self.save()
|
self.save()
|
||||||
|
|
||||||
def reset_health(self) -> None:
|
def reset_health_counts(self) -> None:
|
||||||
# move all the failures to successes when resetting so we dont lose track of the total count
|
# move all the failures to successes when resetting so we dont lose track of the total count
|
||||||
self.num_uses_succeeded = self.num_uses_failed + self.num_uses_succeeded
|
self.num_uses_succeeded = self.num_uses_failed + self.num_uses_succeeded
|
||||||
self.num_uses_failed = 0
|
self.num_uses_failed = 0
|
||||||
|
@ -486,7 +571,7 @@ class ModelWithHealthStats(models.Model):
|
||||||
return round(success_pct)
|
return round(success_pct)
|
||||||
|
|
||||||
|
|
||||||
class ModelWithConfig(ABIDModel):
|
class ModelWithConfig(models.Model):
|
||||||
"""
|
"""
|
||||||
Base Model that adds a config property to any ABIDModel.
|
Base Model that adds a config property to any ABIDModel.
|
||||||
This config is retrieved by abx.pm.hook.get_scope_config(...) later whenever this model is used.
|
This config is retrieved by abx.pm.hook.get_scope_config(...) later whenever this model is used.
|
||||||
|
@ -509,7 +594,7 @@ class ModelWithConfig(ABIDModel):
|
||||||
# }
|
# }
|
||||||
|
|
||||||
|
|
||||||
class ModelWithOutputDir(ABIDModel):
|
class ModelWithOutputDir(ModelsWithSerializers, ModelWithUUID, ABIDModel):
|
||||||
"""
|
"""
|
||||||
Base Model that adds an output_dir property to any ABIDModel.
|
Base Model that adds an output_dir property to any ABIDModel.
|
||||||
|
|
||||||
|
@ -556,37 +641,36 @@ class ModelWithOutputDir(ABIDModel):
|
||||||
@property
|
@property
|
||||||
def OUTPUT_DIR(self) -> Path:
|
def OUTPUT_DIR(self) -> Path:
|
||||||
"""Get absolute filesystem directory Path that holds that data for this object e.g. Path('/data/snapshots/snp_2342353k2jn3j32l4324')"""
|
"""Get absolute filesystem directory Path that holds that data for this object e.g. Path('/data/snapshots/snp_2342353k2jn3j32l4324')"""
|
||||||
from archivebox import DATA_DIR
|
|
||||||
return DATA_DIR / self.output_dir_str # e.g. /data/snapshots/snp_2342353k2jn3j32l4324
|
return DATA_DIR / self.output_dir_str # e.g. /data/snapshots/snp_2342353k2jn3j32l4324
|
||||||
|
|
||||||
def write_indexes(self):
|
def write_indexes(self):
|
||||||
"""Write the Snapshot json, html, and merkle indexes to its output dir"""
|
"""Write the Snapshot json, html, and merkle indexes to its output dir"""
|
||||||
print(f'{type(self).__name__}[{self.ABID}].write_indexes()')
|
print(f'{type(self).__name__}[{self.ABID}].write_indexes()')
|
||||||
self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
self.migrate_output_dir()
|
# self.migrate_output_dir()
|
||||||
self.save_merkle_index()
|
self.save_merkle_index()
|
||||||
self.save_html_index()
|
self.save_html_index()
|
||||||
self.save_symlinks_index()
|
self.save_symlinks_index()
|
||||||
|
|
||||||
def migrate_output_dir(self):
|
# def migrate_output_dir(self):
|
||||||
"""Move the output files to the new folder structure if needed"""
|
# """Move the output files to the new folder structure if needed"""
|
||||||
print(f'{type(self).__name__}[{self.ABID}].migrate_output_dir()')
|
# print(f'{type(self).__name__}[{self.ABID}].migrate_output_dir()')
|
||||||
self.migrate_from_0_7_2()
|
# self.migrate_from_0_7_2()
|
||||||
self.migrate_from_0_8_6()
|
# self.migrate_from_0_8_6()
|
||||||
# ... future migrations here
|
# # ... future migrations here
|
||||||
|
|
||||||
def migrate_from_0_7_2(self) -> None:
|
# def migrate_from_0_7_2(self) -> None:
|
||||||
"""Migrate output_dir generated by ArchiveBox <= 0.7.2 to current version"""
|
# """Migrate output_dir generated by ArchiveBox <= 0.7.2 to current version"""
|
||||||
print(f'{type(self).__name__}[{self.ABID}].migrate_from_0_7_2()')
|
# print(f'{type(self).__name__}[{self.ABID}].migrate_from_0_7_2()')
|
||||||
# move /data/archive/<timestamp> -> /data/archive/snapshots/<abid>
|
# # move /data/archive/<timestamp> -> /data/archive/snapshots/<abid>
|
||||||
# update self.output_path = /data/archive/snapshots/<abid>
|
# # update self.output_path = /data/archive/snapshots/<abid>
|
||||||
pass
|
# pass
|
||||||
|
|
||||||
def migrate_from_0_8_6(self) -> None:
|
# def migrate_from_0_8_6(self) -> None:
|
||||||
"""Migrate output_dir generated by ArchiveBox <= 0.8.6 to current version"""
|
# """Migrate output_dir generated by ArchiveBox <= 0.8.6 to current version"""
|
||||||
# ... future migration code here ...
|
# # ... future migration code here ...
|
||||||
print(f'{type(self).__name__}[{self.ABID}].migrate_from_0_8_6()')
|
# print(f'{type(self).__name__}[{self.ABID}].migrate_from_0_8_6()')
|
||||||
pass
|
# pass
|
||||||
|
|
||||||
def save_merkle_index(self, **kwargs) -> None:
|
def save_merkle_index(self, **kwargs) -> None:
|
||||||
"""Write the ./.index.merkle file to the output dir"""
|
"""Write the ./.index.merkle file to the output dir"""
|
||||||
|
@ -603,11 +687,13 @@ class ModelWithOutputDir(ABIDModel):
|
||||||
(self.OUTPUT_DIR / 'index.html').write_text(self.as_html())
|
(self.OUTPUT_DIR / 'index.html').write_text(self.as_html())
|
||||||
|
|
||||||
def save_json_index(self, **kwargs) -> None:
|
def save_json_index(self, **kwargs) -> None:
|
||||||
|
"""Save a JSON dump of the object to the output dir"""
|
||||||
print(f'{type(self).__name__}[{self.ABID}].save_json_index()')
|
print(f'{type(self).__name__}[{self.ABID}].save_json_index()')
|
||||||
# write self.as_json() to self.output_dir / 'index.json'
|
# write self.as_json() to self.output_dir / 'index.json'
|
||||||
(self.OUTPUT_DIR / 'index.json').write_text(to_json(self.as_json()))
|
(self.OUTPUT_DIR / 'index.json').write_text(to_json(self.as_json()))
|
||||||
|
|
||||||
def save_symlinks_index(self) -> None:
|
def save_symlinks_index(self) -> None:
|
||||||
|
"""Set up the symlink farm pointing to this object's data"""
|
||||||
print(f'{type(self).__name__}[{self.ABID}].save_symlinks_index()')
|
print(f'{type(self).__name__}[{self.ABID}].save_symlinks_index()')
|
||||||
# ln -s ../../../../self.output_dir data/index/snapshots_by_date/2024-01-01/example.com/<abid>
|
# ln -s ../../../../self.output_dir data/index/snapshots_by_date/2024-01-01/example.com/<abid>
|
||||||
# ln -s ../../../../self.output_dir data/index/snapshots_by_domain/example.com/2024-01-01/<abid>
|
# ln -s ../../../../self.output_dir data/index/snapshots_by_domain/example.com/2024-01-01/<abid>
|
||||||
|
@ -693,10 +779,12 @@ def find_model_from_abid(abid: ABID) -> type[models.Model] | None:
|
||||||
|
|
||||||
def find_obj_from_abid_rand(rand: Union[ABID, str], model=None) -> List[ABIDModel]:
|
def find_obj_from_abid_rand(rand: Union[ABID, str], model=None) -> List[ABIDModel]:
|
||||||
"""
|
"""
|
||||||
|
This is a huge hack and should only be used for debugging, never use this in real code / expose this to users.
|
||||||
|
|
||||||
Find an object corresponding to an ABID by exhaustively searching using its random suffix (slow).
|
Find an object corresponding to an ABID by exhaustively searching using its random suffix (slow).
|
||||||
e.g. 'obj_....................JYRPAQ' -> Snapshot('snp_01BJQMF54D093DXEAWZ6JYRPAQ')
|
e.g. 'obj_....................JYRPAQ' -> Snapshot('snp_01BJQMF54D093DXEAWZ6JYRPAQ')
|
||||||
Honestly should only be used for debugging, no reason to expose this ability to users.
|
|
||||||
"""
|
"""
|
||||||
|
raise Exception('THIS FUNCTION IS FOR DEBUGGING ONLY, comment this line out temporarily when you need to use it, but dont commit it!')
|
||||||
|
|
||||||
# convert str to ABID if necessary
|
# convert str to ABID if necessary
|
||||||
if isinstance(rand, ABID):
|
if isinstance(rand, ABID):
|
||||||
|
@ -783,3 +871,5 @@ def find_obj_from_abid(abid: ABID, model=None, fuzzy=False) -> Any:
|
||||||
|
|
||||||
raise model.DoesNotExist
|
raise model.DoesNotExist
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue