diff --git a/archivebox/abid_utils/admin.py b/archivebox/abid_utils/admin.py index 95e48641..3adf4b34 100644 --- a/archivebox/abid_utils/admin.py +++ b/archivebox/abid_utils/admin.py @@ -69,7 +69,6 @@ def get_abid_info(self, obj, request=None):     SUBTYPE:       {}           {}                           {} {}: {}
    RAND:             {}       {}                 {} {}: {}

-     .old_id:                {}
''', obj.api_url + (f'?api_key={get_or_create_api_token(request.user)}' if request and request.user else ''), obj.api_url, obj.api_docs_url, @@ -82,7 +81,6 @@ def get_abid_info(self, obj, request=None): highlight_diff(obj.ABID.uri, derived_uri), highlight_diff(str(obj.ABID.uuid)[14:26], str(fresh_abid.uuid)[14:26]), mark_safe(uri_diff), obj.abid_uri_src, str(obj.ABID_FRESH_VALUES['uri']), highlight_diff(obj.ABID.subtype, derived_subtype), highlight_diff(str(obj.ABID.uuid)[26:28], str(fresh_abid.uuid)[26:28]), mark_safe(subtype_diff), obj.abid_subtype_src, str(obj.ABID_FRESH_VALUES['subtype']), highlight_diff(obj.ABID.rand, derived_rand), highlight_diff(str(obj.ABID.uuid)[28:36], str(fresh_abid.uuid)[28:36]), mark_safe(rand_diff), obj.abid_rand_src, str(obj.ABID_FRESH_VALUES['rand'])[-7:], - highlight_diff(getattr(obj, 'old_id', ''), obj.pk), ) except Exception as e: return str(e) diff --git a/archivebox/abid_utils/models.py b/archivebox/abid_utils/models.py index 55aacd13..a860c69d 100644 --- a/archivebox/abid_utils/models.py +++ b/archivebox/abid_utils/models.py @@ -73,27 +73,30 @@ class ABIDModel(models.Model): """ Abstract Base Model for other models to depend on. Provides ArchiveBox ID (ABID) interface. """ - abid_prefix: str = DEFAULT_ABID_PREFIX # e.g. 'tag_' - abid_ts_src = 'None' # e.g. 'self.created' - abid_uri_src = 'None' # e.g. 'self.uri' - abid_subtype_src = 'None' # e.g. 'self.extractor' - abid_rand_src = 'None' # e.g. 'self.uuid' or 'self.id' + abid_prefix: str = DEFAULT_ABID_PREFIX # e.g. 'tag_' + abid_ts_src = 'self.created' # e.g. 'self.created' + abid_uri_src = 'None' # e.g. 'self.uri' + abid_subtype_src = 'self.__class__.__name__' # e.g. 'self.extractor' + abid_rand_src = 'self.id' # e.g. 'self.uuid' or 'self.id' abid_salt: str = DEFAULT_ABID_URI_SALT - # id = models.UUIDField(primary_key=True, default=uuid4, editable=True) - # uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True) - abid = ABIDField(prefix=abid_prefix) + # id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID') + # abid = ABIDField(prefix=abid_prefix) - created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk) - created = AutoDateTimeField(default=None, null=False, db_index=True) - modified = models.DateTimeField(auto_now=True) + # created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False) + # created = AutoDateTimeField(default=None, null=False, db_index=True) + # modified = models.DateTimeField(auto_now=True) class Meta(TypedModelMeta): abstract = True def save(self, *args: Any, **kwargs: Any) -> None: if self._state.adding: - self.issue_new_abid() + self.pk = self.id = self.id or uuid4() + self.created = ts_from_abid(abid_part_from_ts(timezone.now())) # cut off precision to match precision of TS component + self.modified = self.created + self.created_by = self.created_by or get_or_create_system_user_pk() + self.abid = str(self.issue_new_abid()) return super().save(*args, **kwargs) # assert str(self.id) == str(self.ABID.uuid), f'self.id {self.id} does not match self.ABID {self.ABID.uuid}' @@ -119,48 +122,28 @@ class ABIDModel(models.Model): def ABID_FRESH_HASHES(self) -> Dict[str, str]: return abid_hashes_from_values(**self.ABID_FRESH_VALUES) - - @property - def ABID_FRESH(self) -> ABID: - """ - Return a pure freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src). - """ + def issue_new_abid(self): + assert self.abid is None, f'Can only issue new ABID for new objects that dont already have one {self.abid}' + assert self._state.adding, 'Can only issue new ABID when model._state.adding is True' + assert eval(self.abid_uri_src), f'Can only issue new ABID if self.abid_uri_src is defined ({self.abid_uri_src}={eval(self.abid_uri_src)})' + + self.abid = None + self.pk = self.id = self.id or uuid4() + self.created = ts_from_abid(abid_part_from_ts(timezone.now())) # cut off precision to match precision of TS component abid_fresh_values = self.ABID_FRESH_VALUES assert all(abid_fresh_values.values()), f'All ABID_FRESH_VALUES must be set {abid_fresh_values}' abid_fresh_hashes = self.ABID_FRESH_HASHES assert all(abid_fresh_hashes.values()), f'All ABID_FRESH_HASHES must be able to be generated {abid_fresh_hashes}' - abid = ABID(**abid_fresh_hashes) + new_abid = ABID(**abid_fresh_hashes) - assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {abid_fresh_values["prefix"]}_ABID for {self.__class__.__name__}' - return abid - - - def issue_new_abid(self): - assert self.abid is None, f'Can only issue new ABID for new objects that dont already have one {self.abid}' - assert self._state.adding, 'Can only issue new ABID when model._state.adding is True' - assert eval(self.abid_uri_src), f'Can only issue new ABID if self.abid_uri_src is defined ({self.abid_uri_src}={eval(self.abid_uri_src)})' - - self.old_id = getattr(self, 'old_id', None) or self.id or uuid4() - self.abid = None - self.created = ts_from_abid(abid_part_from_ts(getattr(self, 'bookmarked', None) or timezone.now())) # cut off precision to match precision of TS component - self.added = getattr(self, 'added', None) or self.created - self.modified = self.created - - assert all(self.ABID_FRESH_VALUES.values()), f'Can only issue new ABID if all self.ABID_FRESH_VALUES are defined {self.ABID_FRESH_VALUES}' - - new_abid = self.ABID_FRESH + assert new_abid.ulid and new_abid.uuid and new_abid.typeid, f'Failed to calculate {abid_fresh_values["prefix"]}_ABID for {self.__class__.__name__}' # store stable ABID on local fields, overwrite them because we are adding a new entry and existing defaults havent touched db yet self.abid = str(new_abid) - self.id = new_abid.uuid - self.pk = new_abid.uuid - - assert self.ABID == new_abid - assert str(self.ABID.uuid) == str(self.id) == str(self.pk) == str(ABID.parse(self.abid).uuid) - - self._ready_to_save_as_new = True + assert str(self.ABID.uuid) == str(new_abid.uuid) + return new_abid @property @@ -169,27 +152,12 @@ class ABIDModel(models.Model): aka get_or_generate_abid -> ULIDParts(timestamp='01HX9FPYTR', url='E4A5CCD9', subtype='00', randomness='ZYEBQE') """ - # otherwise DB is single source of truth, load ABID from existing db pk - abid: ABID | None = None - try: - abid = abid or ABID.parse(cast(str, self.abid)) - except Exception: - pass - - try: - abid = abid or ABID.parse(cast(str, self.id)) - except Exception: - pass - - try: - abid = abid or ABID.parse(cast(str, self.pk)) - except Exception: - pass - - abid = abid or self.ABID_FRESH - - return abid + if not self.abid: + pre_save_abid = self.issue_new_abid() + self.abid = str(pre_save_abid) + return pre_save_abid + return ABID.parse(cast(str, self.abid)) @property def ULID(self) -> ULID: diff --git a/archivebox/api/migrations/0008_alter_apitoken_created_alter_apitoken_created_by_and_more.py b/archivebox/api/migrations/0008_alter_apitoken_created_alter_apitoken_created_by_and_more.py new file mode 100644 index 00000000..4776e096 --- /dev/null +++ b/archivebox/api/migrations/0008_alter_apitoken_created_alter_apitoken_created_by_and_more.py @@ -0,0 +1,47 @@ +# Generated by Django 5.1 on 2024-09-04 23:32 + +import abid_utils.models +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('api', '0007_alter_apitoken_created_by'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.AlterField( + model_name='apitoken', + name='created', + field=abid_utils.models.AutoDateTimeField(db_index=True, default=None), + ), + migrations.AlterField( + model_name='apitoken', + name='created_by', + field=models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL), + ), + migrations.AlterField( + model_name='apitoken', + name='id', + field=models.UUIDField(default=None, editable=False, primary_key=True, serialize=False, unique=True, verbose_name='ID'), + ), + migrations.AlterField( + model_name='outboundwebhook', + name='created', + field=abid_utils.models.AutoDateTimeField(db_index=True, default=None), + ), + migrations.AlterField( + model_name='outboundwebhook', + name='created_by', + field=models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL), + ), + migrations.AlterField( + model_name='outboundwebhook', + name='id', + field=models.UUIDField(default=None, editable=False, primary_key=True, serialize=False, unique=True, verbose_name='ID'), + ), + ] diff --git a/archivebox/api/models.py b/archivebox/api/models.py index c6363379..fe5e0750 100644 --- a/archivebox/api/models.py +++ b/archivebox/api/models.py @@ -12,7 +12,7 @@ from signal_webhooks.models import WebhookBase from django_stubs_ext.db.models import TypedModelMeta -from abid_utils.models import ABIDModel, ABIDField, get_or_create_system_user_pk +from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField @@ -32,11 +32,12 @@ class APIToken(ABIDModel): abid_subtype_src = 'self.created_by_id' abid_rand_src = 'self.id' - id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) + id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID') abid = ABIDField(prefix=abid_prefix) - created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk) - created = models.DateTimeField(auto_now_add=True) + created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False) + created = AutoDateTimeField(default=None, null=False, db_index=True) + modified = models.DateTimeField(auto_now=True) token = models.CharField(max_length=32, default=generate_secret_token, unique=True) expires = models.DateTimeField(null=True, blank=True) @@ -99,11 +100,11 @@ class OutboundWebhook(ABIDModel, WebhookBase): abid_subtype_src = 'self.ref' abid_rand_src = 'self.id' - id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) + id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID') abid = ABIDField(prefix=abid_prefix) - created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk) - created = models.DateTimeField(auto_now_add=True) + created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False) + created = AutoDateTimeField(default=None, null=False, db_index=True) modified = models.DateTimeField(auto_now=True) # More fields here: WebhookBase... diff --git a/archivebox/api/v1_core.py b/archivebox/api/v1_core.py index fb933169..a103f354 100644 --- a/archivebox/api/v1_core.py +++ b/archivebox/api/v1_core.py @@ -64,7 +64,6 @@ class ArchiveResultSchema(Schema): TYPE: str = 'core.models.ArchiveResult' id: UUID - old_id: int abid: str modified: datetime @@ -127,9 +126,9 @@ class ArchiveResultSchema(Schema): class ArchiveResultFilterSchema(FilterSchema): - id: Optional[str] = Field(None, q=['id__startswith', 'abid__icontains', 'old_id__startswith', 'snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith']) + id: Optional[str] = Field(None, q=['id__startswith', 'abid__icontains', 'snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith']) - search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains', 'id__startswith', 'abid__icontains', 'old_id__startswith', 'snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith']) + search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains', 'id__startswith', 'abid__icontains', 'snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith']) snapshot_id: Optional[str] = Field(None, q=['snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith']) snapshot_url: Optional[str] = Field(None, q='snapshot__url__icontains') snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name__icontains') @@ -157,8 +156,8 @@ def get_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)) @router.get("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema, url_name="get_archiveresult") def get_archiveresult(request, archiveresult_id: str): - """Get a specific ArchiveResult by pk, abid, or old_id.""" - return ArchiveResult.objects.get(Q(id__icontains=archiveresult_id) | Q(abid__icontains=archiveresult_id) | Q(old_id__icontains=archiveresult_id)) + """Get a specific ArchiveResult by id or abid.""" + return ArchiveResult.objects.get(Q(id__icontains=archiveresult_id) | Q(abid__icontains=archiveresult_id)) # @router.post("/archiveresult", response=ArchiveResultSchema) @@ -193,7 +192,6 @@ class SnapshotSchema(Schema): TYPE: str = 'core.models.Snapshot' id: UUID - old_id: UUID abid: str modified: datetime @@ -251,9 +249,7 @@ class SnapshotSchema(Schema): class SnapshotFilterSchema(FilterSchema): - id: Optional[str] = Field(None, q=['id__icontains', 'abid__icontains', 'old_id__icontains', 'timestamp__startswith']) - - old_id: Optional[str] = Field(None, q='old_id__icontains') + id: Optional[str] = Field(None, q=['id__icontains', 'abid__icontains', 'timestamp__startswith']) abid: Optional[str] = Field(None, q='abid__icontains') created_by_id: str = Field(None, q='created_by_id') @@ -266,7 +262,7 @@ class SnapshotFilterSchema(FilterSchema): modified__gte: datetime = Field(None, q='modified__gte') modified__lt: datetime = Field(None, q='modified__lt') - search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains', 'id__icontains', 'abid__icontains', 'old_id__icontains', 'timestamp__startswith']) + search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains', 'id__icontains', 'abid__icontains', 'timestamp__startswith']) url: Optional[str] = Field(None, q='url') tag: Optional[str] = Field(None, q='tags__name') title: Optional[str] = Field(None, q='title__icontains') @@ -293,12 +289,12 @@ def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True): request.with_archiveresults = with_archiveresults snapshot = None try: - snapshot = Snapshot.objects.get(Q(abid__startswith=snapshot_id) | Q(id__startswith=snapshot_id) | Q(old_id__startswith=snapshot_id) | Q(timestamp__startswith=snapshot_id)) + snapshot = Snapshot.objects.get(Q(abid__startswith=snapshot_id) | Q(id__startswith=snapshot_id) | Q(timestamp__startswith=snapshot_id)) except Snapshot.DoesNotExist: pass try: - snapshot = snapshot or Snapshot.objects.get(Q(abid__icontains=snapshot_id) | Q(id__icontains=snapshot_id) | Q(old_id__icontains=snapshot_id)) + snapshot = snapshot or Snapshot.objects.get(Q(abid__icontains=snapshot_id) | Q(id__icontains=snapshot_id)) except Snapshot.DoesNotExist: pass @@ -338,7 +334,6 @@ class TagSchema(Schema): TYPE: str = 'core.models.Tag' id: UUID - old_id: str abid: str modified: datetime @@ -351,10 +346,6 @@ class TagSchema(Schema): num_snapshots: int snapshots: List[SnapshotSchema] - @staticmethod - def resolve_old_id(obj): - return str(obj.old_id) - @staticmethod def resolve_created_by_id(obj): return str(obj.created_by_id) @@ -386,11 +377,6 @@ def get_tag(request, tag_id: str, with_snapshots: bool=True): request.with_snapshots = with_snapshots request.with_archiveresults = False tag = None - try: - tag = tag or Tag.objects.get(old_id__icontains=tag_id) - except (Tag.DoesNotExist, ValidationError, ValueError): - pass - try: tag = Tag.objects.get(abid__icontains=tag_id) except (Tag.DoesNotExist, ValidationError): diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py index 18696e56..fbc4494c 100644 --- a/archivebox/core/admin.py +++ b/archivebox/core/admin.py @@ -240,8 +240,8 @@ class ArchiveResultInline(admin.TabularInline): # fk_name = 'snapshot' extra = 0 sort_fields = ('end_ts', 'extractor', 'output', 'status', 'cmd_version') - readonly_fields = ('result_id', 'completed', 'extractor', 'command', 'version') - fields = ('id', 'start_ts', 'end_ts', *readonly_fields, 'cmd', 'cmd_version', 'pwd', 'created_by', 'status', 'output') + readonly_fields = ('id', 'result_id', 'completed', 'command', 'version') + fields = ('start_ts', 'end_ts', *readonly_fields, 'extractor', 'cmd', 'cmd_version', 'pwd', 'created_by', 'status', 'output') # exclude = ('id',) ordering = ('end_ts',) show_change_link = True @@ -273,7 +273,7 @@ class ArchiveResultInline(admin.TabularInline): snapshot = self.get_parent_object_from_request(request) # import ipdb; ipdb.set_trace() - formset.form.base_fields['id'].widget = formset.form.base_fields['id'].hidden_widget() + # formset.form.base_fields['id'].widget = formset.form.base_fields['id'].hidden_widget() # default values for new entries formset.form.base_fields['status'].initial = 'succeeded' @@ -351,13 +351,13 @@ class SnapshotActionForm(ActionForm): @admin.register(Snapshot, site=archivebox_admin) class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin): - list_display = ('added', 'title_str', 'files', 'size', 'url_str') - sort_fields = ('title_str', 'url_str', 'added') - readonly_fields = ('tags_str', 'timestamp', 'admin_actions', 'status_info', 'bookmarked', 'created', 'added', 'updated', 'modified', 'API', 'link_dir') - search_fields = ('id', 'url', 'abid', 'old_id', 'timestamp', 'title', 'tags__name') - list_filter = ('added', 'updated', 'archiveresult__status', 'created_by', 'tags__name') + list_display = ('created', 'title_str', 'files', 'size', 'url_str') + sort_fields = ('title_str', 'url_str', 'created') + readonly_fields = ('tags_str', 'timestamp', 'admin_actions', 'status_info', 'bookmarked', 'created', 'created', 'updated', 'modified', 'API', 'link_dir') + search_fields = ('id', 'url', 'abid', 'timestamp', 'title', 'tags__name') + list_filter = ('created', 'updated', 'archiveresult__status', 'created_by', 'tags__name') fields = ('url', 'created_by', 'title',*readonly_fields) - ordering = ['-added'] + ordering = ['-created'] actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots'] inlines = [TagInline, ArchiveResultInline] list_per_page = min(max(5, CONFIG.SNAPSHOTS_PER_PAGE), 5000) @@ -391,12 +391,6 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin): except (Snapshot.DoesNotExist, ValidationError): pass - - try: - snapshot = snapshot or Snapshot.objects.get(old_id=object_id) - except (Snapshot.DoesNotExist, Snapshot.MultipleObjectsReturned, ValidationError): - pass - if snapshot: object_id = str(snapshot.id) @@ -690,7 +684,7 @@ class ArchiveResultAdmin(ABIDModelAdmin): list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str') sort_fields = ('start_ts', 'extractor', 'status') readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created', 'modified', 'API', 'output_summary') - search_fields = ('id', 'old_id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp') + search_fields = ('id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp') fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', 'cmd', *readonly_fields) autocomplete_fields = ['snapshot'] diff --git a/archivebox/core/migrations/0070_alter_archiveresult_created_by_alter_snapshot_added_and_more.py b/archivebox/core/migrations/0070_alter_archiveresult_created_by_alter_snapshot_added_and_more.py new file mode 100644 index 00000000..40dfe6c2 --- /dev/null +++ b/archivebox/core/migrations/0070_alter_archiveresult_created_by_alter_snapshot_added_and_more.py @@ -0,0 +1,52 @@ +# Generated by Django 5.1 on 2024-09-04 09:00 + +import abid_utils.models +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0069_alter_archiveresult_created_alter_snapshot_added_and_more'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.AlterField( + model_name='archiveresult', + name='created_by', + field=models.ForeignKey(default=abid_utils.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='archiveresult_set', to=settings.AUTH_USER_MODEL), + ), + migrations.AlterField( + model_name='snapshot', + name='added', + field=abid_utils.models.AutoDateTimeField(db_index=True, default=None), + ), + migrations.AlterField( + model_name='snapshot', + name='created', + field=abid_utils.models.AutoDateTimeField(db_index=True, default=None), + ), + migrations.AlterField( + model_name='snapshot', + name='created_by', + field=models.ForeignKey(default=abid_utils.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to=settings.AUTH_USER_MODEL), + ), + migrations.AlterField( + model_name='snapshot', + name='id', + field=models.UUIDField(default=None, primary_key=True, serialize=False, unique=True), + ), + migrations.AlterField( + model_name='snapshot', + name='old_id', + field=models.UUIDField(default=None, editable=False, unique=True), + ), + migrations.AlterField( + model_name='tag', + name='created', + field=abid_utils.models.AutoDateTimeField(db_index=True, default=None), + ), + ] diff --git a/archivebox/core/migrations/0071_remove_archiveresult_old_id_remove_snapshot_old_id_and_more.py b/archivebox/core/migrations/0071_remove_archiveresult_old_id_remove_snapshot_old_id_and_more.py new file mode 100644 index 00000000..e19666ad --- /dev/null +++ b/archivebox/core/migrations/0071_remove_archiveresult_old_id_remove_snapshot_old_id_and_more.py @@ -0,0 +1,65 @@ +# Generated by Django 5.1 on 2024-09-04 23:23 + +import abid_utils.models +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0070_alter_archiveresult_created_by_alter_snapshot_added_and_more'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.RemoveField( + model_name='archiveresult', + name='old_id', + ), + migrations.RemoveField( + model_name='snapshot', + name='old_id', + ), + migrations.RemoveField( + model_name='tag', + name='old_id', + ), + + migrations.AlterField( + model_name='archiveresult', + name='created', + field=abid_utils.models.AutoDateTimeField(db_index=True, default=None), + ), + migrations.AlterField( + model_name='archiveresult', + name='created_by', + field=models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, related_name='archiveresult_set', to=settings.AUTH_USER_MODEL), + ), + migrations.AlterField( + model_name='archiveresult', + name='id', + field=models.UUIDField(default=None, editable=False, primary_key=True, serialize=False, unique=True, verbose_name='ID'), + ), + migrations.AlterField( + model_name='snapshot', + name='created_by', + field=models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to=settings.AUTH_USER_MODEL), + ), + migrations.AlterField( + model_name='snapshot', + name='id', + field=models.UUIDField(default=None, editable=False, primary_key=True, serialize=False, unique=True, verbose_name='ID'), + ), + migrations.AlterField( + model_name='tag', + name='created_by', + field=models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, related_name='tag_set', to=settings.AUTH_USER_MODEL), + ), + migrations.AlterField( + model_name='tag', + name='id', + field=models.UUIDField(default=None, editable=False, primary_key=True, serialize=False, unique=True, verbose_name='ID'), + ), + ] diff --git a/archivebox/core/models.py b/archivebox/core/models.py index a76a86c9..5abc8274 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -54,13 +54,15 @@ class Tag(ABIDModel): abid_ts_src = 'self.created' abid_uri_src = 'self.slug' abid_subtype_src = '"03"' - abid_rand_src = 'self.old_id' + abid_rand_src = 'self.id' - old_id = models.BigIntegerField(unique=True, default=rand_int_id, serialize=False, verbose_name='Old ID') # legacy PK - - id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False, unique=True) + id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID') abid = ABIDField(prefix=abid_prefix) + created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False, related_name='tag_set') + created = AutoDateTimeField(default=None, null=False, db_index=True) + modified = models.DateTimeField(auto_now=True) + name = models.CharField(unique=True, blank=False, max_length=100) slug = models.SlugField(unique=True, blank=False, max_length=100, editable=False) # slug is autoset on save from name, never set it manually @@ -74,10 +76,6 @@ class Tag(ABIDModel): def __str__(self): return self.name - # @property - # def old_id(self): - # return self.id - def slugify(self, tag, i=None): slug = slugify(tag) if i is not None: @@ -133,16 +131,15 @@ class SnapshotManager(models.Manager): class Snapshot(ABIDModel): abid_prefix = 'snp_' - abid_ts_src = 'self.added' + abid_ts_src = 'self.created' abid_uri_src = 'self.url' abid_subtype_src = '"01"' - abid_rand_src = 'self.old_id' + abid_rand_src = 'self.id' - old_id = models.UUIDField(default=None, null=False, editable=False, unique=True) # legacy pk - id = models.UUIDField(default=None, null=False, primary_key=True, editable=True, unique=True) + id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID') abid = ABIDField(prefix=abid_prefix) - created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk, related_name='snapshot_set') + created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False, related_name='snapshot_set') created = AutoDateTimeField(default=None, null=False, db_index=True) modified = models.DateTimeField(auto_now=True) @@ -161,7 +158,6 @@ class Snapshot(ABIDModel): objects = SnapshotManager() - def __repr__(self) -> str: title = (self.title_stripped or '-')[:64] return f'[{self.timestamp}] {self.url[:64]} ({title})' @@ -414,7 +410,7 @@ class ArchiveResult(ABIDModel): abid_ts_src = 'self.snapshot.added' abid_uri_src = 'self.snapshot.url' abid_subtype_src = 'self.extractor' - abid_rand_src = 'self.old_id' + abid_rand_src = 'self.id' EXTRACTOR_CHOICES = ( ('htmltotext', 'htmltotext'), @@ -438,13 +434,11 @@ class ArchiveResult(ABIDModel): ("skipped", "skipped") ] - old_id = models.BigIntegerField(default=rand_int_id, serialize=False, verbose_name='Old ID') - - id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True, unique=True, verbose_name='ID') + id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID') abid = ABIDField(prefix=abid_prefix) - created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk, related_name='archiveresult_set') - created = AutoDateTimeField(default=timezone.now, db_index=True) + created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False, related_name='archiveresult_set') + created = AutoDateTimeField(default=None, null=False, db_index=True) modified = models.DateTimeField(auto_now=True) snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE, to_field='id', db_column='snapshot_id') diff --git a/archivebox/core/views.py b/archivebox/core/views.py index c5c09b09..da09224c 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -285,17 +285,17 @@ class SnapshotView(View): # ulid = slug.split('_', 1)[-1] # try: # try: - # snapshot = snapshot or Snapshot.objects.get(Q(abid=ulid) | Q(id=ulid) | Q(old_id=ulid)) + # snapshot = snapshot or Snapshot.objects.get(Q(abid=ulid) | Q(id=ulid)) # except Snapshot.DoesNotExist: # pass # try: - # snapshot = Snapshot.objects.get(Q(abid__startswith=slug) | Q(abid__startswith=Snapshot.abid_prefix + slug) | Q(id__startswith=slug) | Q(old_id__startswith=slug)) + # snapshot = Snapshot.objects.get(Q(abid__startswith=slug) | Q(abid__startswith=Snapshot.abid_prefix + slug) | Q(id__startswith=slug)) # except (Snapshot.DoesNotExist, Snapshot.MultipleObjectsReturned): # pass # try: - # snapshot = snapshot or Snapshot.objects.get(Q(abid__icontains=snapshot_id) | Q(id__icontains=snapshot_id) | Q(old_id__icontains=snapshot_id)) + # snapshot = snapshot or Snapshot.objects.get(Q(abid__icontains=snapshot_id) | Q(id__icontains=snapshot_id)) # except Snapshot.DoesNotExist: # pass # return redirect(f'/archive/{snapshot.timestamp}/index.html') @@ -308,7 +308,7 @@ class SnapshotView(View): # try exact match on full url / ABID first snapshot = Snapshot.objects.get( Q(url='http://' + path) | Q(url='https://' + path) | Q(id__startswith=path) - | Q(abid__icontains=path) | Q(id__icontains=path) | Q(old_id__icontains=path) + | Q(abid__icontains=path) | Q(id__icontains=path) ) except Snapshot.DoesNotExist: # fall back to match on exact base_url @@ -352,7 +352,7 @@ class SnapshotView(View): ) for snap in Snapshot.objects.filter( Q(url__startswith='http://' + base_url(path)) | Q(url__startswith='https://' + base_url(path)) - | Q(abid__icontains=path) | Q(id__icontains=path) | Q(old_id__icontains=path) + | Q(abid__icontains=path) | Q(id__icontains=path) ).only('url', 'timestamp', 'title', 'added').order_by('-added') ) return HttpResponse( diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py index 0f0d5b83..b01b6ae5 100644 --- a/archivebox/index/schema.py +++ b/archivebox/index/schema.py @@ -192,7 +192,6 @@ class Link: if extended: info.update({ 'snapshot_id': self.snapshot_id, - 'snapshot_old_id': self.snapshot_old_id, 'snapshot_abid': self.snapshot_abid, 'link_dir': self.link_dir, @@ -266,16 +265,12 @@ class Link: @cached_property def snapshot(self): from core.models import Snapshot - return Snapshot.objects.only('id', 'old_id', 'abid').get(url=self.url) + return Snapshot.objects.only('id', 'abid').get(url=self.url) @cached_property def snapshot_id(self): return str(self.snapshot.pk) - @cached_property - def snapshot_old_id(self): - return str(self.snapshot.old_id) - @cached_property def snapshot_abid(self): return str(self.snapshot.ABID)