mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-31 06:48:25 -04:00
add migrations to create and populate ABIDField and UUIDField values
Some checks are pending
Build Debian package / build (push) Waiting to run
Build Docker image / buildx (push) Waiting to run
Build Homebrew package / build (push) Waiting to run
Run linters / lint (push) Waiting to run
Build Pip package / build (push) Waiting to run
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Waiting to run
Run tests / docker_tests (push) Waiting to run
Some checks are pending
Build Debian package / build (push) Waiting to run
Build Docker image / buildx (push) Waiting to run
Build Homebrew package / build (push) Waiting to run
Run linters / lint (push) Waiting to run
Build Pip package / build (push) Waiting to run
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Waiting to run
Run tests / docker_tests (push) Waiting to run
This commit is contained in:
parent
0420662174
commit
206e7e74b3
4 changed files with 217 additions and 0 deletions
|
@ -0,0 +1,43 @@
|
|||
# Generated by Django 5.0.6 on 2024-05-13 10:56
|
||||
|
||||
import charidfield.fields
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0022_auto_20231023_2008'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name='archiveresult',
|
||||
options={'verbose_name': 'Result'},
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='abid',
|
||||
field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='res_', unique=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='abid',
|
||||
field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='snp_', unique=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='uuid',
|
||||
field=models.UUIDField(blank=True, null=True, unique=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='tag',
|
||||
name='abid',
|
||||
field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='tag_', unique=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='extractor',
|
||||
field=models.CharField(choices=[('htmltotext', 'htmltotext'), ('git', 'git'), ('singlefile', 'singlefile'), ('media', 'media'), ('archive_org', 'archive_org'), ('readability', 'readability'), ('mercury', 'mercury'), ('favicon', 'favicon'), ('pdf', 'pdf'), ('headers', 'headers'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('title', 'title'), ('wget', 'wget')], max_length=32),
|
||||
),
|
||||
]
|
95
archivebox/core/migrations/0024_auto_20240513_1143.py
Normal file
95
archivebox/core/migrations/0024_auto_20240513_1143.py
Normal file
|
@ -0,0 +1,95 @@
|
|||
# Generated by Django 5.0.6 on 2024-05-13 11:43
|
||||
|
||||
from django.db import migrations
|
||||
from datetime import datetime
|
||||
from abid_utils.abid import abid_from_values
|
||||
|
||||
|
||||
def calculate_abid(self):
|
||||
"""
|
||||
Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
|
||||
"""
|
||||
prefix = self.abid_prefix
|
||||
ts = eval(self.abid_ts_src)
|
||||
uri = eval(self.abid_uri_src)
|
||||
subtype = eval(self.abid_subtype_src)
|
||||
rand = eval(self.abid_rand_src)
|
||||
|
||||
if (not prefix) or prefix == 'obj_':
|
||||
suggested_abid = self.__class__.__name__[:3].lower()
|
||||
raise Exception(f'{self.__class__.__name__}.abid_prefix must be defined to calculate ABIDs (suggested: {suggested_abid})')
|
||||
|
||||
if not ts:
|
||||
ts = datetime.utcfromtimestamp(0)
|
||||
print(f'[!] WARNING: Generating ABID with ts=0000000000 placeholder because {self.__class__.__name__}.abid_ts_src={self.abid_ts_src} is unset!', ts.isoformat())
|
||||
|
||||
if not uri:
|
||||
uri = str(self)
|
||||
print(f'[!] WARNING: Generating ABID with uri=str(self) placeholder because {self.__class__.__name__}.abid_uri_src={self.abid_uri_src} is unset!', uri)
|
||||
|
||||
if not subtype:
|
||||
subtype = self.__class__.__name__
|
||||
print(f'[!] WARNING: Generating ABID with subtype={subtype} placeholder because {self.__class__.__name__}.abid_subtype_src={self.abid_subtype_src} is unset!', subtype)
|
||||
|
||||
if not rand:
|
||||
rand = getattr(self, 'uuid', None) or getattr(self, 'id', None) or getattr(self, 'pk')
|
||||
print(f'[!] WARNING: Generating ABID with rand=self.id placeholder because {self.__class__.__name__}.abid_rand_src={self.abid_rand_src} is unset!', rand)
|
||||
|
||||
abid = abid_from_values(
|
||||
prefix=prefix,
|
||||
ts=ts,
|
||||
uri=uri,
|
||||
subtype=subtype,
|
||||
rand=rand,
|
||||
)
|
||||
assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for {self.__class__.__name__}'
|
||||
return abid
|
||||
|
||||
|
||||
def copy_snapshot_uuids(apps, schema_editor):
|
||||
Snapshot = apps.get_model("core", "Snapshot")
|
||||
for snapshot in Snapshot.objects.all():
|
||||
snapshot.uuid = snapshot.id
|
||||
snapshot.save(update_fields=["uuid"])
|
||||
|
||||
def generate_snapshot_abids(apps, schema_editor):
|
||||
Snapshot = apps.get_model("core", "Snapshot")
|
||||
for snapshot in Snapshot.objects.all():
|
||||
snapshot.abid_prefix = 'snp_'
|
||||
snapshot.abid_ts_src = 'self.added'
|
||||
snapshot.abid_uri_src = 'self.url'
|
||||
snapshot.abid_subtype_src = '"01"'
|
||||
snapshot.abid_rand_src = 'self.uuid'
|
||||
|
||||
snapshot.abid = calculate_abid(snapshot)
|
||||
snapshot.save(update_fields=["abid"])
|
||||
|
||||
def generate_archiveresult_abids(apps, schema_editor):
|
||||
ArchiveResult = apps.get_model("core", "ArchiveResult")
|
||||
Snapshot = apps.get_model("core", "Snapshot")
|
||||
for result in ArchiveResult.objects.all():
|
||||
result.abid_prefix = 'res_'
|
||||
result.snapshot = Snapshot.objects.get(pk=result.snapshot_id)
|
||||
result.snapshot_added = result.snapshot.added
|
||||
result.snapshot_url = result.snapshot.url
|
||||
result.abid_ts_src = 'self.snapshot_added'
|
||||
result.abid_uri_src = 'self.snapshot_url'
|
||||
result.abid_subtype_src = 'self.extractor'
|
||||
result.abid_rand_src = 'self.id'
|
||||
|
||||
result.abid = calculate_abid(result)
|
||||
result.uuid = result.abid.uuid
|
||||
result.save(update_fields=["abid", "uuid"])
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0023_alter_archiveresult_options_archiveresult_abid_and_more'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(copy_snapshot_uuids, reverse_code=migrations.RunPython.noop),
|
||||
migrations.RunPython(generate_snapshot_abids, reverse_code=migrations.RunPython.noop),
|
||||
migrations.RunPython(generate_archiveresult_abids, reverse_code=migrations.RunPython.noop),
|
||||
]
|
19
archivebox/core/migrations/0025_alter_archiveresult_uuid.py
Normal file
19
archivebox/core/migrations/0025_alter_archiveresult_uuid.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
# Generated by Django 5.0.6 on 2024-05-13 12:08
|
||||
|
||||
import uuid
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0024_auto_20240513_1143'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='uuid',
|
||||
field=models.UUIDField(default=uuid.uuid4, editable=False, unique=True),
|
||||
),
|
||||
]
|
Loading…
Add table
Add a link
Reference in a new issue