diff --git a/archivebox/core/migrations/0044_alter_archiveresult_snapshot_alter_tag_uuid_and_more.py b/archivebox/core/migrations/0044_alter_archiveresult_snapshot_alter_tag_uuid_and_more.py index d981dca9..b7531233 100644 --- a/archivebox/core/migrations/0044_alter_archiveresult_snapshot_alter_tag_uuid_and_more.py +++ b/archivebox/core/migrations/0044_alter_archiveresult_snapshot_alter_tag_uuid_and_more.py @@ -22,7 +22,7 @@ class Migration(migrations.Migration): name='SnapshotTag', fields=[ ('id', models.AutoField(primary_key=True, serialize=False)), - ('snapshot', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to='core.snapshot')), + ('snapshot', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.snapshot')), ('tag', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.tag')), ], options={ diff --git a/archivebox/core/migrations/0045_alter_snapshot_old_id.py b/archivebox/core/migrations/0045_alter_snapshot_old_id.py new file mode 100644 index 00000000..7dc1a26a --- /dev/null +++ b/archivebox/core/migrations/0045_alter_snapshot_old_id.py @@ -0,0 +1,19 @@ +# Generated by Django 5.0.6 on 2024-08-20 01:54 + +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0044_alter_archiveresult_snapshot_alter_tag_uuid_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='snapshot', + name='old_id', + field=models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False, unique=True), + ), + ] diff --git a/archivebox/core/migrations/0046_alter_archiveresult_snapshot_alter_snapshot_id_and_more.py b/archivebox/core/migrations/0046_alter_archiveresult_snapshot_alter_snapshot_id_and_more.py new file mode 100644 index 00000000..39216ec5 --- /dev/null +++ b/archivebox/core/migrations/0046_alter_archiveresult_snapshot_alter_snapshot_id_and_more.py @@ -0,0 +1,30 @@ +# Generated by Django 5.0.6 on 2024-08-20 01:55 + +import django.db.models.deletion +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0045_alter_snapshot_old_id'), + ] + + operations = [ + migrations.AlterField( + model_name='archiveresult', + name='snapshot', + field=models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot', to_field='id'), + ), + migrations.AlterField( + model_name='snapshot', + name='id', + field=models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False, unique=True), + ), + migrations.AlterField( + model_name='snapshot', + name='old_id', + field=models.UUIDField(default=uuid.uuid4, editable=False, unique=True), + ), + ] diff --git a/archivebox/core/migrations/0047_alter_snapshottag_unique_together_and_more.py b/archivebox/core/migrations/0047_alter_snapshottag_unique_together_and_more.py new file mode 100644 index 00000000..b1c845f8 --- /dev/null +++ b/archivebox/core/migrations/0047_alter_snapshottag_unique_together_and_more.py @@ -0,0 +1,24 @@ +# Generated by Django 5.0.6 on 2024-08-20 02:16 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0046_alter_archiveresult_snapshot_alter_snapshot_id_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='archiveresult', + name='snapshot', + field=models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot', to_field='id'), + ), + migrations.AlterField( + model_name='snapshottag', + name='tag', + field=models.ForeignKey(db_column='tag_id', on_delete=django.db.models.deletion.CASCADE, to='core.tag'), + ), + ] diff --git a/archivebox/core/migrations/0048_alter_archiveresult_snapshot_and_more.py b/archivebox/core/migrations/0048_alter_archiveresult_snapshot_and_more.py new file mode 100644 index 00000000..81bc8a06 --- /dev/null +++ b/archivebox/core/migrations/0048_alter_archiveresult_snapshot_and_more.py @@ -0,0 +1,24 @@ +# Generated by Django 5.0.6 on 2024-08-20 02:17 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0047_alter_snapshottag_unique_together_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='archiveresult', + name='snapshot', + field=models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot'), + ), + migrations.AlterField( + model_name='snapshottag', + name='snapshot', + field=models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot', to_field='old_id'), + ), + ] diff --git a/archivebox/core/migrations/0049_rename_snapshot_snapshottag_snapshot_old_and_more.py b/archivebox/core/migrations/0049_rename_snapshot_snapshottag_snapshot_old_and_more.py new file mode 100644 index 00000000..aa0c5b39 --- /dev/null +++ b/archivebox/core/migrations/0049_rename_snapshot_snapshottag_snapshot_old_and_more.py @@ -0,0 +1,22 @@ +# Generated by Django 5.0.6 on 2024-08-20 02:26 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0048_alter_archiveresult_snapshot_and_more'), + ] + + operations = [ + migrations.RenameField( + model_name='snapshottag', + old_name='snapshot', + new_name='snapshot_old', + ), + migrations.AlterUniqueTogether( + name='snapshottag', + unique_together={('snapshot_old', 'tag')}, + ), + ] diff --git a/archivebox/core/migrations/0050_alter_snapshottag_snapshot_old.py b/archivebox/core/migrations/0050_alter_snapshottag_snapshot_old.py new file mode 100644 index 00000000..4bff827c --- /dev/null +++ b/archivebox/core/migrations/0050_alter_snapshottag_snapshot_old.py @@ -0,0 +1,19 @@ +# Generated by Django 5.0.6 on 2024-08-20 02:30 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0049_rename_snapshot_snapshottag_snapshot_old_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='snapshottag', + name='snapshot_old', + field=models.ForeignKey(db_column='snapshot_old_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot', to_field='old_id'), + ), + ] diff --git a/archivebox/core/migrations/0051_snapshottag_snapshot_alter_snapshottag_snapshot_old.py b/archivebox/core/migrations/0051_snapshottag_snapshot_alter_snapshottag_snapshot_old.py new file mode 100644 index 00000000..ddb7afbb --- /dev/null +++ b/archivebox/core/migrations/0051_snapshottag_snapshot_alter_snapshottag_snapshot_old.py @@ -0,0 +1,40 @@ +# Generated by Django 5.0.6 on 2024-08-20 02:31 + +import django.db.models.deletion +from django.db import migrations, models + + +def update_snapshottag_ids(apps, schema_editor): + Snapshot = apps.get_model("core", "Snapshot") + SnapshotTag = apps.get_model("core", "SnapshotTag") + num_total = SnapshotTag.objects.all().count() + print(f' Updating {num_total} SnapshotTag.snapshot_id values in place... (may take an hour or longer for large collections...)') + for idx, snapshottag in enumerate(SnapshotTag.objects.all().only('snapshot_old_id').iterator()): + assert snapshottag.snapshot_old_id + snapshot = Snapshot.objects.get(old_id=snapshottag.snapshot_old_id) + snapshottag.snapshot_id = snapshot.id + snapshottag.save(update_fields=["snapshot_id"]) + assert str(snapshottag.snapshot_id) == str(snapshot.id) + if idx % 100 == 0: + print(f'Migrated {idx}/{num_total} SnapshotTag objects...') + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0050_alter_snapshottag_snapshot_old'), + ] + + operations = [ + migrations.AddField( + model_name='snapshottag', + name='snapshot', + field=models.ForeignKey(blank=True, db_column='snapshot_id', null=True, on_delete=django.db.models.deletion.CASCADE, to='core.snapshot'), + ), + migrations.AlterField( + model_name='snapshottag', + name='snapshot_old', + field=models.ForeignKey(db_column='snapshot_old_id', on_delete=django.db.models.deletion.CASCADE, related_name='snapshottag_old_set', to='core.snapshot', to_field='old_id'), + ), + migrations.RunPython(update_snapshottag_ids, reverse_code=migrations.RunPython.noop), + ] diff --git a/archivebox/core/migrations/0052_alter_snapshottag_unique_together_and_more.py b/archivebox/core/migrations/0052_alter_snapshottag_unique_together_and_more.py new file mode 100644 index 00000000..e11000bc --- /dev/null +++ b/archivebox/core/migrations/0052_alter_snapshottag_unique_together_and_more.py @@ -0,0 +1,27 @@ +# Generated by Django 5.0.6 on 2024-08-20 02:37 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0051_snapshottag_snapshot_alter_snapshottag_snapshot_old'), + ] + + operations = [ + migrations.AlterUniqueTogether( + name='snapshottag', + unique_together=set(), + ), + migrations.AlterField( + model_name='snapshottag', + name='snapshot', + field=models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot'), + ), + migrations.AlterUniqueTogether( + name='snapshottag', + unique_together={('snapshot', 'tag')}, + ), + ] diff --git a/archivebox/core/migrations/0053_remove_snapshottag_snapshot_old.py b/archivebox/core/migrations/0053_remove_snapshottag_snapshot_old.py new file mode 100644 index 00000000..cf50fc2c --- /dev/null +++ b/archivebox/core/migrations/0053_remove_snapshottag_snapshot_old.py @@ -0,0 +1,17 @@ +# Generated by Django 5.0.6 on 2024-08-20 02:38 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0052_alter_snapshottag_unique_together_and_more'), + ] + + operations = [ + migrations.RemoveField( + model_name='snapshottag', + name='snapshot_old', + ), + ] diff --git a/archivebox/core/migrations/0054_alter_snapshot_timestamp.py b/archivebox/core/migrations/0054_alter_snapshot_timestamp.py new file mode 100644 index 00000000..6febe7c3 --- /dev/null +++ b/archivebox/core/migrations/0054_alter_snapshot_timestamp.py @@ -0,0 +1,18 @@ +# Generated by Django 5.0.6 on 2024-08-20 02:40 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0053_remove_snapshottag_snapshot_old'), + ] + + operations = [ + migrations.AlterField( + model_name='snapshot', + name='timestamp', + field=models.CharField(db_index=True, editable=False, max_length=32, unique=True), + ), + ] diff --git a/archivebox/core/models.py b/archivebox/core/models.py index 9ccd6145..61a62714 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -121,8 +121,8 @@ class Tag(ABIDModel): class SnapshotTag(models.Model): id = models.AutoField(primary_key=True) - snapshot = models.OneToOneField('Snapshot', on_delete=models.CASCADE, to_field='old_id') - tag = models.ForeignKey(Tag, on_delete=models.CASCADE, to_field='id') + snapshot = models.ForeignKey('Snapshot', db_column='snapshot_id', on_delete=models.CASCADE, to_field='id') + tag = models.ForeignKey(Tag, db_column='tag_id', on_delete=models.CASCADE, to_field='id') class Meta: db_table = 'core_snapshot_tags' @@ -135,12 +135,12 @@ class Snapshot(ABIDModel): abid_subtype_src = '"01"' abid_rand_src = 'self.old_id' - old_id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) # legacy pk - id = models.UUIDField(default=uuid.uuid4, editable=True, unique=True) + old_id = models.UUIDField(default=uuid.uuid4, editable=False, unique=True) # legacy pk + id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True, unique=True) abid = ABIDField(prefix=abid_prefix) url = models.URLField(unique=True, db_index=True) - timestamp = models.CharField(max_length=32, unique=True, db_index=True) + timestamp = models.CharField(max_length=32, unique=True, db_index=True, editable=False) title = models.CharField(max_length=512, null=True, blank=True, db_index=True) @@ -365,6 +365,7 @@ class ArchiveResult(ABIDModel): EXTRACTOR_CHOICES = EXTRACTOR_CHOICES old_id = models.BigIntegerField(default=rand_int_id, serialize=False, verbose_name='Old ID') + id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True, unique=True, verbose_name='ID') abid = ABIDField(prefix=abid_prefix) @@ -392,6 +393,10 @@ class ArchiveResult(ABIDModel): super().save(*args, **kwargs) assert str(self.id) == str(self.abid.uuid) + @property + def uuid(self): + return self.id + @cached_property def snapshot_dir(self): return Path(self.snapshot.link_dir)