mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 06:34:25 -04:00
abid gradual improvements, some regrets
This commit is contained in:
parent
dd97f01bfc
commit
d060eaa499
4 changed files with 51 additions and 31 deletions
|
@ -1,4 +1,6 @@
|
||||||
from typing import NamedTuple, Any, Union, Optional
|
__package__ = 'archivebox.abid_utils'
|
||||||
|
|
||||||
|
from typing import NamedTuple, Any, Union, Optional, Dict
|
||||||
|
|
||||||
import ulid
|
import ulid
|
||||||
import uuid6
|
import uuid6
|
||||||
|
@ -9,6 +11,7 @@ from uuid import UUID
|
||||||
from typeid import TypeID # type: ignore[import-untyped]
|
from typeid import TypeID # type: ignore[import-untyped]
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
from ..util import enforce_types
|
||||||
|
|
||||||
|
|
||||||
ABID_PREFIX_LEN = 4
|
ABID_PREFIX_LEN = 4
|
||||||
|
@ -108,6 +111,7 @@ class ABID(NamedTuple):
|
||||||
####################################################
|
####################################################
|
||||||
|
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
def uri_hash(uri: Union[str, bytes], salt: str=DEFAULT_ABID_URI_SALT) -> str:
|
def uri_hash(uri: Union[str, bytes], salt: str=DEFAULT_ABID_URI_SALT) -> str:
|
||||||
"""
|
"""
|
||||||
'E4A5CCD9AF4ED2A6E0954DF19FD274E9CDDB4853051F033FD518BFC90AA1AC25'
|
'E4A5CCD9AF4ED2A6E0954DF19FD274E9CDDB4853051F033FD518BFC90AA1AC25'
|
||||||
|
@ -130,17 +134,19 @@ def uri_hash(uri: Union[str, bytes], salt: str=DEFAULT_ABID_URI_SALT) -> str:
|
||||||
|
|
||||||
return hashlib.sha256(uri_bytes).hexdigest().upper()
|
return hashlib.sha256(uri_bytes).hexdigest().upper()
|
||||||
|
|
||||||
def abid_part_from_prefix(prefix: Optional[str]) -> str:
|
@enforce_types
|
||||||
|
def abid_part_from_prefix(prefix: str) -> str:
|
||||||
"""
|
"""
|
||||||
'snp_'
|
'snp_'
|
||||||
"""
|
"""
|
||||||
if prefix is None:
|
# if prefix is None:
|
||||||
return 'obj_'
|
# return 'obj_'
|
||||||
|
|
||||||
prefix = prefix.strip('_').lower()
|
prefix = prefix.strip('_').lower()
|
||||||
assert len(prefix) == 3
|
assert len(prefix) == 3
|
||||||
return prefix + '_'
|
return prefix + '_'
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
def abid_part_from_uri(uri: str, salt: str=DEFAULT_ABID_URI_SALT) -> str:
|
def abid_part_from_uri(uri: str, salt: str=DEFAULT_ABID_URI_SALT) -> str:
|
||||||
"""
|
"""
|
||||||
'E4A5CCD9' # takes first 8 characters of sha256(url)
|
'E4A5CCD9' # takes first 8 characters of sha256(url)
|
||||||
|
@ -148,12 +154,14 @@ def abid_part_from_uri(uri: str, salt: str=DEFAULT_ABID_URI_SALT) -> str:
|
||||||
uri = str(uri)
|
uri = str(uri)
|
||||||
return uri_hash(uri, salt=salt)[:ABID_URI_LEN]
|
return uri_hash(uri, salt=salt)[:ABID_URI_LEN]
|
||||||
|
|
||||||
def abid_part_from_ts(ts: Optional[datetime]) -> str:
|
@enforce_types
|
||||||
|
def abid_part_from_ts(ts: datetime) -> str:
|
||||||
"""
|
"""
|
||||||
'01HX9FPYTR' # produces 10 character Timestamp section of ulid based on added date
|
'01HX9FPYTR' # produces 10 character Timestamp section of ulid based on added date
|
||||||
"""
|
"""
|
||||||
return str(ulid.from_timestamp(ts) if ts else ulid.new())[:ABID_TS_LEN]
|
return str(ulid.from_timestamp(ts))[:ABID_TS_LEN]
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
def abid_part_from_subtype(subtype: str) -> str:
|
def abid_part_from_subtype(subtype: str) -> str:
|
||||||
"""
|
"""
|
||||||
Snapshots have 01 type, other objects have other subtypes like wget/media/etc.
|
Snapshots have 01 type, other objects have other subtypes like wget/media/etc.
|
||||||
|
@ -165,6 +173,7 @@ def abid_part_from_subtype(subtype: str) -> str:
|
||||||
|
|
||||||
return hashlib.sha256(subtype.encode('utf-8')).hexdigest()[:ABID_SUBTYPE_LEN].upper()
|
return hashlib.sha256(subtype.encode('utf-8')).hexdigest()[:ABID_SUBTYPE_LEN].upper()
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
|
def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
|
||||||
"""
|
"""
|
||||||
'ZYEBQE' # takes last 6 characters of randomness from existing legacy uuid db field
|
'ZYEBQE' # takes last 6 characters of randomness from existing legacy uuid db field
|
||||||
|
@ -186,17 +195,22 @@ def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
|
||||||
return str(rand)[-ABID_RAND_LEN:].upper()
|
return str(rand)[-ABID_RAND_LEN:].upper()
|
||||||
|
|
||||||
|
|
||||||
def abid_from_values(prefix, ts, uri, subtype, rand, salt=DEFAULT_ABID_URI_SALT) -> ABID:
|
@enforce_types
|
||||||
|
def abid_hashes_from_values(prefix: str, ts: datetime, uri: str, subtype: str, rand: Union[str, UUID, None, int], salt: str=DEFAULT_ABID_URI_SALT) -> Dict[str, str]:
|
||||||
|
return {
|
||||||
|
'prefix': abid_part_from_prefix(prefix),
|
||||||
|
'ts': abid_part_from_ts(ts),
|
||||||
|
'uri': abid_part_from_uri(uri, salt=salt),
|
||||||
|
'subtype': abid_part_from_subtype(subtype),
|
||||||
|
'rand': abid_part_from_rand(rand),
|
||||||
|
}
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
|
def abid_from_values(prefix: str, ts: datetime, uri: str, subtype: str, rand: Union[str, UUID, None, int], salt: str=DEFAULT_ABID_URI_SALT) -> ABID:
|
||||||
"""
|
"""
|
||||||
Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
|
Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
abid = ABID(
|
abid = ABID(**abid_hashes_from_values(prefix, ts, uri, subtype, rand, salt=salt))
|
||||||
prefix=abid_part_from_prefix(prefix),
|
|
||||||
ts=abid_part_from_ts(ts),
|
|
||||||
uri=abid_part_from_uri(uri, salt=salt),
|
|
||||||
subtype=abid_part_from_subtype(subtype),
|
|
||||||
rand=abid_part_from_rand(rand),
|
|
||||||
)
|
|
||||||
assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for ts={ts} uri={uri} subtyp={subtype} rand={rand}'
|
assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for ts={ts} uri={uri} subtyp={subtype} rand={rand}'
|
||||||
return abid
|
return abid
|
||||||
|
|
|
@ -16,21 +16,20 @@ def highlight_diff(display_val, compare_val):
|
||||||
display_val = str(display_val)
|
display_val = str(display_val)
|
||||||
compare_val = str(compare_val)
|
compare_val = str(compare_val)
|
||||||
|
|
||||||
diff_chars = mark_safe('').join(
|
return mark_safe(''.join(
|
||||||
format_html('<span style="color: red;">{}</span>', display_val[i])
|
format_html('<span style="color: red;">{}</span>', display_val[i])
|
||||||
if display_val[i] != compare_val[i] else
|
if display_val[i] != compare_val[i] else
|
||||||
format_html('<span display="color: black">{}</span>', display_val[i])
|
format_html('<span display="color: black">{}</span>', display_val[i])
|
||||||
for i in range(len(display_val))
|
for i in range(len(display_val))
|
||||||
)
|
))
|
||||||
return diff_chars
|
|
||||||
|
|
||||||
def get_abid_info(self, obj, request=None):
|
def get_abid_info(self, obj, request=None):
|
||||||
try:
|
try:
|
||||||
abid_diff = f' != obj.ABID: {highlight_diff(obj.ABID, obj.abid)} ❌' if str(obj.ABID) != str(obj.abid) else ' == .ABID ✅'
|
abid_diff = f' != obj.ABID: {highlight_diff(obj.ABID, obj.abid)} ❌' if str(obj.ABID) != str(obj.abid) else ' == .ABID ✅'
|
||||||
|
|
||||||
fresh_abid = obj.generate_abid()
|
fresh_abid = obj.ABID_FRESH
|
||||||
fresh_abid_diff = f' != .fresh_abid: {highlight_diff(obj.ABID, fresh_abid)} ❌' if str(fresh_abid) != str(obj.ABID) else '✅'
|
fresh_abid_diff = f' != .fresh_abid: {highlight_diff(fresh_abid, obj.ABID)} ❌' if str(fresh_abid) != str(obj.ABID) else '✅'
|
||||||
fresh_uuid_diff = f' != .fresh_uuid: {highlight_diff(obj.ABID.uuid, fresh_abid.uuid)} ❌' if str(fresh_abid.uuid) != str(obj.ABID.uuid) else '✅'
|
fresh_uuid_diff = f' != .fresh_uuid: {highlight_diff(fresh_abid.uuid, obj.ABID.uuid)} ❌' if str(fresh_abid.uuid) != str(obj.ABID.uuid) else '✅'
|
||||||
|
|
||||||
id_fresh_abid_diff = f' != .fresh_abid ❌' if str(fresh_abid.uuid) != str(obj.id) else ' == .fresh_abid ✅'
|
id_fresh_abid_diff = f' != .fresh_abid ❌' if str(fresh_abid.uuid) != str(obj.id) else ' == .fresh_abid ✅'
|
||||||
id_abid_diff = f' != .abid.uuid: {highlight_diff(obj.ABID.uuid, obj.id)} ❌' if str(obj.id) != str(obj.ABID.uuid) else ' == .abid ✅'
|
id_abid_diff = f' != .abid.uuid: {highlight_diff(obj.ABID.uuid, obj.id)} ❌' if str(obj.id) != str(obj.ABID.uuid) else ' == .abid ✅'
|
||||||
|
@ -74,16 +73,16 @@ def get_abid_info(self, obj, request=None):
|
||||||
</div>
|
</div>
|
||||||
''',
|
''',
|
||||||
obj.api_url + (f'?api_key={get_or_create_api_token(request.user)}' if request and request.user else ''), obj.api_url, obj.api_docs_url,
|
obj.api_url + (f'?api_key={get_or_create_api_token(request.user)}' if request and request.user else ''), obj.api_url, obj.api_docs_url,
|
||||||
str(obj.abid), mark_safe(fresh_abid_diff),
|
highlight_diff(obj.abid, fresh_abid), mark_safe(fresh_abid_diff),
|
||||||
str(obj.ABID.uuid), mark_safe(fresh_uuid_diff),
|
highlight_diff(obj.ABID.uuid, fresh_abid.uuid), mark_safe(fresh_uuid_diff),
|
||||||
str(obj.id), mark_safe(id_pk_diff + id_abid_diff + id_fresh_abid_diff),
|
str(obj.id), mark_safe(id_pk_diff + id_abid_diff + id_fresh_abid_diff),
|
||||||
# str(fresh_abid.uuid), mark_safe(fresh_uuid_diff),
|
# str(fresh_abid.uuid), mark_safe(fresh_uuid_diff),
|
||||||
# str(fresh_abid), mark_safe(fresh_abid_diff),
|
# str(fresh_abid), mark_safe(fresh_abid_diff),
|
||||||
obj.ABID.ts, str(obj.ABID.uuid)[0:14], mark_safe(ts_diff), obj.abid_ts_src, source_ts_val and source_ts_val.isoformat(),
|
highlight_diff(obj.ABID.ts, derived_ts), highlight_diff(str(obj.ABID.uuid)[0:14], str(fresh_abid.uuid)[0:14]), mark_safe(ts_diff), obj.abid_ts_src, source_ts_val and source_ts_val.isoformat(),
|
||||||
obj.ABID.uri, str(obj.ABID.uuid)[14:26], mark_safe(uri_diff), obj.abid_uri_src, str(obj.abid_values['uri']),
|
highlight_diff(obj.ABID.uri, derived_uri), highlight_diff(str(obj.ABID.uuid)[14:26], str(fresh_abid.uuid)[14:26]), mark_safe(uri_diff), obj.abid_uri_src, str(obj.abid_values['uri']),
|
||||||
obj.ABID.subtype, str(obj.ABID.uuid)[26:28], mark_safe(subtype_diff), obj.abid_subtype_src, str(obj.abid_values['subtype']),
|
highlight_diff(obj.ABID.subtype, derived_subtype), highlight_diff(str(obj.ABID.uuid)[26:28], str(fresh_abid.uuid)[26:28]), mark_safe(subtype_diff), obj.abid_subtype_src, str(obj.abid_values['subtype']),
|
||||||
obj.ABID.rand, str(obj.ABID.uuid)[28:36], mark_safe(rand_diff), obj.abid_rand_src, str(obj.abid_values['rand'])[-7:],
|
highlight_diff(obj.ABID.rand, derived_rand), highlight_diff(str(obj.ABID.uuid)[28:36], str(fresh_abid.uuid)[28:36]), mark_safe(rand_diff), obj.abid_rand_src, str(obj.abid_values['rand'])[-7:],
|
||||||
str(getattr(obj, 'old_id', '')),
|
highlight_diff(getattr(obj, 'old_id', ''), obj.pk),
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return str(e)
|
return str(e)
|
||||||
|
|
|
@ -352,7 +352,7 @@ class SnapshotActionForm(ActionForm):
|
||||||
@admin.register(Snapshot, site=archivebox_admin)
|
@admin.register(Snapshot, site=archivebox_admin)
|
||||||
class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
|
class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
|
||||||
list_display = ('added', 'title_str', 'files', 'size', 'url_str')
|
list_display = ('added', 'title_str', 'files', 'size', 'url_str')
|
||||||
sort_fields = ('title_str', 'url_str', 'added', 'files')
|
sort_fields = ('title_str', 'url_str', 'added')
|
||||||
readonly_fields = ('tags_str', 'timestamp', 'admin_actions', 'status_info', 'bookmarked', 'added', 'updated', 'created', 'modified', 'API', 'link_dir')
|
readonly_fields = ('tags_str', 'timestamp', 'admin_actions', 'status_info', 'bookmarked', 'added', 'updated', 'created', 'modified', 'API', 'link_dir')
|
||||||
search_fields = ('id', 'url', 'abid', 'old_id', 'timestamp', 'title', 'tags__name')
|
search_fields = ('id', 'url', 'abid', 'old_id', 'timestamp', 'title', 'tags__name')
|
||||||
list_filter = ('added', 'updated', 'archiveresult__status', 'created_by', 'tags__name')
|
list_filter = ('added', 'updated', 'archiveresult__status', 'created_by', 'tags__name')
|
||||||
|
@ -510,6 +510,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
|
||||||
# ordering='archiveresult_count',
|
# ordering='archiveresult_count',
|
||||||
)
|
)
|
||||||
def files(self, obj):
|
def files(self, obj):
|
||||||
|
# return '-'
|
||||||
return snapshot_icons(obj)
|
return snapshot_icons(obj)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -118,7 +118,7 @@ def render_django_template(template: str, context: Mapping[str, str]) -> str:
|
||||||
|
|
||||||
|
|
||||||
def snapshot_icons(snapshot) -> str:
|
def snapshot_icons(snapshot) -> str:
|
||||||
cache_key = f'{snapshot.pk}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons'
|
cache_key = f'result_icons:{snapshot.pk}:{(snapshot.modified or snapshot.created or snapshot.added).timestamp()}'
|
||||||
|
|
||||||
def calc_snapshot_icons():
|
def calc_snapshot_icons():
|
||||||
from core.models import ArchiveResult
|
from core.models import ArchiveResult
|
||||||
|
@ -133,6 +133,7 @@ def snapshot_icons(snapshot) -> str:
|
||||||
else:
|
else:
|
||||||
archive_results = snapshot.archiveresult_set.filter(status="succeeded", output__isnull=False)
|
archive_results = snapshot.archiveresult_set.filter(status="succeeded", output__isnull=False)
|
||||||
|
|
||||||
|
# import ipdb; ipdb.set_trace()
|
||||||
link = snapshot.as_link()
|
link = snapshot.as_link()
|
||||||
path = link.archive_path
|
path = link.archive_path
|
||||||
canon = link.canonical_outputs()
|
canon = link.canonical_outputs()
|
||||||
|
@ -197,7 +198,12 @@ def snapshot_icons(snapshot) -> str:
|
||||||
# print(((end - start).total_seconds()*1000) // 1, 'ms')
|
# print(((end - start).total_seconds()*1000) // 1, 'ms')
|
||||||
return result
|
return result
|
||||||
|
|
||||||
return cache.get_or_set(cache_key, calc_snapshot_icons)
|
cache_result = cache.get(cache_key)
|
||||||
# return calc_snapshot_icons()
|
if cache_result:
|
||||||
|
return cache_result
|
||||||
|
|
||||||
|
fresh_result = calc_snapshot_icons()
|
||||||
|
cache.set(cache_key, fresh_result, timeout=60 * 60 * 24)
|
||||||
|
return fresh_result
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue