mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-14 15:14:31 -04:00
add ulid and typeid to Snapshot and ArchiveResult
This commit is contained in:
parent
0529099639
commit
33bc4622a0
3 changed files with 85 additions and 4 deletions
|
@ -2,10 +2,13 @@ __package__ = 'archivebox.core'
|
||||||
|
|
||||||
|
|
||||||
import uuid
|
import uuid
|
||||||
|
import ulid
|
||||||
import json
|
import json
|
||||||
|
import hashlib
|
||||||
|
from typeid import TypeID
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional, List
|
from typing import Optional, List, NamedTuple
|
||||||
from importlib import import_module
|
from importlib import import_module
|
||||||
|
|
||||||
from django.db import models
|
from django.db import models
|
||||||
|
@ -37,6 +40,13 @@ except AttributeError:
|
||||||
JSONField = jsonfield.JSONField
|
JSONField = jsonfield.JSONField
|
||||||
|
|
||||||
|
|
||||||
|
class ULIDParts(NamedTuple):
|
||||||
|
timestamp: str
|
||||||
|
url: str
|
||||||
|
subtype: str
|
||||||
|
randomness: str
|
||||||
|
|
||||||
|
|
||||||
class Tag(models.Model):
|
class Tag(models.Model):
|
||||||
"""
|
"""
|
||||||
Based on django-taggit model
|
Based on django-taggit model
|
||||||
|
@ -99,6 +109,38 @@ class Snapshot(models.Model):
|
||||||
|
|
||||||
keys = ('url', 'timestamp', 'title', 'tags', 'updated')
|
keys = ('url', 'timestamp', 'title', 'tags', 'updated')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ulid_from_timestamp(self):
|
||||||
|
return str(ulid.from_timestamp(self.added))[:10]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ulid_from_urlhash(self):
|
||||||
|
return str(ulid.from_randomness(self.url_hash))[10:18]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ulid_from_type(self):
|
||||||
|
return '00'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ulid_from_randomness(self):
|
||||||
|
return str(ulid.from_uuid(self.id))[20:]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ulid_tuple(self) -> ULIDParts:
|
||||||
|
return ULIDParts(self.ulid_from_timestamp, self.ulid_from_urlhash, self.ulid_from_type, self.ulid_from_randomness)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ulid(self):
|
||||||
|
return ulid.parse(''.join(self.ulid_tuple))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def uuid(self):
|
||||||
|
return self.ulid.uuid
|
||||||
|
|
||||||
|
@property
|
||||||
|
def typeid(self):
|
||||||
|
return TypeID.from_uuid(prefix='snapshot', suffix=self.ulid.uuid)
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
title = self.title or '-'
|
title = self.title or '-'
|
||||||
return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})'
|
return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})'
|
||||||
|
@ -163,7 +205,10 @@ class Snapshot(models.Model):
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def url_hash(self):
|
def url_hash(self):
|
||||||
return hashurl(self.url)
|
# return hashurl(self.url)
|
||||||
|
url_hash = hashlib.new('sha256')
|
||||||
|
url_hash.update(self.url.encode('utf-8'))
|
||||||
|
return url_hash.hexdigest()[:16]
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def base_url(self):
|
def base_url(self):
|
||||||
|
@ -271,7 +316,7 @@ class ArchiveResult(models.Model):
|
||||||
EXTRACTOR_CHOICES = EXTRACTOR_CHOICES
|
EXTRACTOR_CHOICES = EXTRACTOR_CHOICES
|
||||||
|
|
||||||
id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
|
id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
|
||||||
uuid = models.UUIDField(default=uuid.uuid4, editable=False)
|
uuid = models.UUIDField(default=uuid.uuid4, editable=True)
|
||||||
|
|
||||||
snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)
|
snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)
|
||||||
extractor = models.CharField(choices=EXTRACTOR_CHOICES, max_length=32)
|
extractor = models.CharField(choices=EXTRACTOR_CHOICES, max_length=32)
|
||||||
|
@ -292,6 +337,40 @@ class ArchiveResult(models.Model):
|
||||||
def snapshot_dir(self):
|
def snapshot_dir(self):
|
||||||
return Path(self.snapshot.link_dir)
|
return Path(self.snapshot.link_dir)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ulid_from_timestamp(self):
|
||||||
|
return self.snapshot.ulid_from_timestamp
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ulid_from_urlhash(self):
|
||||||
|
return self.snapshot.ulid_from_urlhash
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ulid_from_snapshot(self):
|
||||||
|
return str(self.snapshot.ulid)[:18]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ulid_from_type(self):
|
||||||
|
return hashlib.sha256(self.extractor.encode('utf-8')).hexdigest()[:2]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ulid_from_randomness(self):
|
||||||
|
return str(ulid.from_uuid(self.uuid))[20:]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ulid_tuple(self) -> ULIDParts:
|
||||||
|
return ULIDParts(self.ulid_from_timestamp, self.ulid_from_urlhash, self.ulid_from_type, self.ulid_from_randomness)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ulid(self):
|
||||||
|
final_ulid = ulid.parse(''.join(self.ulid_tuple))
|
||||||
|
# TODO: migrate self.uuid to match this new uuid
|
||||||
|
# self.uuid = final_ulid.uuid
|
||||||
|
return final_ulid
|
||||||
|
|
||||||
|
@property
|
||||||
|
def typeid(self):
|
||||||
|
return TypeID.from_uuid(prefix='result', suffix=self.ulid.uuid)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def extractor_module(self):
|
def extractor_module(self):
|
||||||
|
|
|
@ -263,7 +263,7 @@ CACHES = {
|
||||||
'default': {'BACKEND': 'django.core.cache.backends.db.DatabaseCache', 'LOCATION': 'cache'},
|
'default': {'BACKEND': 'django.core.cache.backends.db.DatabaseCache', 'LOCATION': 'cache'},
|
||||||
'dummy': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'},
|
'dummy': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'},
|
||||||
'locmem': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'},
|
'locmem': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'},
|
||||||
# 'filebased': {"BACKEND": "django.core.cache.backends.filebased.FileBasedCache", "LOCATION": CACHE_DIR / 'cache_filebased'},
|
'filebased': {"BACKEND": "django.core.cache.backends.filebased.FileBasedCache", "LOCATION": CACHE_DIR / 'cache_filebased'},
|
||||||
}
|
}
|
||||||
|
|
||||||
EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
|
EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
|
||||||
|
|
|
@ -37,6 +37,8 @@ dependencies = [
|
||||||
# - See Github issues for more...
|
# - See Github issues for more...
|
||||||
"django-signal-webhooks>=0.3.0",
|
"django-signal-webhooks>=0.3.0",
|
||||||
"django-admin-data-views>=0.3.1",
|
"django-admin-data-views>=0.3.1",
|
||||||
|
"ulid-py>=1.1.0",
|
||||||
|
"typeid-python>=0.3.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
homepage = "https://github.com/ArchiveBox/ArchiveBox"
|
homepage = "https://github.com/ArchiveBox/ArchiveBox"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue