diff --git a/archivebox/abid_utils/abid.py b/archivebox/abid_utils/abid.py
index a0e71937..e294e6a5 100644
--- a/archivebox/abid_utils/abid.py
+++ b/archivebox/abid_utils/abid.py
@@ -1,4 +1,6 @@
-from typing import NamedTuple, Any, Union, Optional
+__package__ = 'archivebox.abid_utils'
+
+from typing import NamedTuple, Any, Union, Optional, Dict
import ulid
import uuid6
@@ -9,6 +11,7 @@ from uuid import UUID
from typeid import TypeID # type: ignore[import-untyped]
from datetime import datetime
+from ..util import enforce_types
ABID_PREFIX_LEN = 4
@@ -108,6 +111,7 @@ class ABID(NamedTuple):
####################################################
+@enforce_types
def uri_hash(uri: Union[str, bytes], salt: str=DEFAULT_ABID_URI_SALT) -> str:
"""
'E4A5CCD9AF4ED2A6E0954DF19FD274E9CDDB4853051F033FD518BFC90AA1AC25'
@@ -130,17 +134,19 @@ def uri_hash(uri: Union[str, bytes], salt: str=DEFAULT_ABID_URI_SALT) -> str:
return hashlib.sha256(uri_bytes).hexdigest().upper()
-def abid_part_from_prefix(prefix: Optional[str]) -> str:
+@enforce_types
+def abid_part_from_prefix(prefix: str) -> str:
"""
'snp_'
"""
- if prefix is None:
- return 'obj_'
+ # if prefix is None:
+ # return 'obj_'
prefix = prefix.strip('_').lower()
assert len(prefix) == 3
return prefix + '_'
+@enforce_types
def abid_part_from_uri(uri: str, salt: str=DEFAULT_ABID_URI_SALT) -> str:
"""
'E4A5CCD9' # takes first 8 characters of sha256(url)
@@ -148,12 +154,14 @@ def abid_part_from_uri(uri: str, salt: str=DEFAULT_ABID_URI_SALT) -> str:
uri = str(uri)
return uri_hash(uri, salt=salt)[:ABID_URI_LEN]
-def abid_part_from_ts(ts: Optional[datetime]) -> str:
+@enforce_types
+def abid_part_from_ts(ts: datetime) -> str:
"""
'01HX9FPYTR' # produces 10 character Timestamp section of ulid based on added date
"""
- return str(ulid.from_timestamp(ts) if ts else ulid.new())[:ABID_TS_LEN]
+ return str(ulid.from_timestamp(ts))[:ABID_TS_LEN]
+@enforce_types
def abid_part_from_subtype(subtype: str) -> str:
"""
Snapshots have 01 type, other objects have other subtypes like wget/media/etc.
@@ -165,6 +173,7 @@ def abid_part_from_subtype(subtype: str) -> str:
return hashlib.sha256(subtype.encode('utf-8')).hexdigest()[:ABID_SUBTYPE_LEN].upper()
+@enforce_types
def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
"""
'ZYEBQE' # takes last 6 characters of randomness from existing legacy uuid db field
@@ -186,17 +195,22 @@ def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
return str(rand)[-ABID_RAND_LEN:].upper()
-def abid_from_values(prefix, ts, uri, subtype, rand, salt=DEFAULT_ABID_URI_SALT) -> ABID:
+@enforce_types
+def abid_hashes_from_values(prefix: str, ts: datetime, uri: str, subtype: str, rand: Union[str, UUID, None, int], salt: str=DEFAULT_ABID_URI_SALT) -> Dict[str, str]:
+ return {
+ 'prefix': abid_part_from_prefix(prefix),
+ 'ts': abid_part_from_ts(ts),
+ 'uri': abid_part_from_uri(uri, salt=salt),
+ 'subtype': abid_part_from_subtype(subtype),
+ 'rand': abid_part_from_rand(rand),
+ }
+
+@enforce_types
+def abid_from_values(prefix: str, ts: datetime, uri: str, subtype: str, rand: Union[str, UUID, None, int], salt: str=DEFAULT_ABID_URI_SALT) -> ABID:
"""
Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
"""
- abid = ABID(
- prefix=abid_part_from_prefix(prefix),
- ts=abid_part_from_ts(ts),
- uri=abid_part_from_uri(uri, salt=salt),
- subtype=abid_part_from_subtype(subtype),
- rand=abid_part_from_rand(rand),
- )
+ abid = ABID(**abid_hashes_from_values(prefix, ts, uri, subtype, rand, salt=salt))
assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for ts={ts} uri={uri} subtyp={subtype} rand={rand}'
return abid
diff --git a/archivebox/abid_utils/admin.py b/archivebox/abid_utils/admin.py
index bd97b60d..46adf3f7 100644
--- a/archivebox/abid_utils/admin.py
+++ b/archivebox/abid_utils/admin.py
@@ -16,21 +16,20 @@ def highlight_diff(display_val, compare_val):
display_val = str(display_val)
compare_val = str(compare_val)
- diff_chars = mark_safe('').join(
+ return mark_safe(''.join(
format_html('{}', display_val[i])
if display_val[i] != compare_val[i] else
format_html('{}', display_val[i])
for i in range(len(display_val))
- )
- return diff_chars
+ ))
def get_abid_info(self, obj, request=None):
try:
abid_diff = f' != obj.ABID: {highlight_diff(obj.ABID, obj.abid)} ❌' if str(obj.ABID) != str(obj.abid) else ' == .ABID ✅'
- fresh_abid = obj.generate_abid()
- fresh_abid_diff = f' != .fresh_abid: {highlight_diff(obj.ABID, fresh_abid)} ❌' if str(fresh_abid) != str(obj.ABID) else '✅'
- fresh_uuid_diff = f' != .fresh_uuid: {highlight_diff(obj.ABID.uuid, fresh_abid.uuid)} ❌' if str(fresh_abid.uuid) != str(obj.ABID.uuid) else '✅'
+ fresh_abid = obj.ABID_FRESH
+ fresh_abid_diff = f' != .fresh_abid: {highlight_diff(fresh_abid, obj.ABID)} ❌' if str(fresh_abid) != str(obj.ABID) else '✅'
+ fresh_uuid_diff = f' != .fresh_uuid: {highlight_diff(fresh_abid.uuid, obj.ABID.uuid)} ❌' if str(fresh_abid.uuid) != str(obj.ABID.uuid) else '✅'
id_fresh_abid_diff = f' != .fresh_abid ❌' if str(fresh_abid.uuid) != str(obj.id) else ' == .fresh_abid ✅'
id_abid_diff = f' != .abid.uuid: {highlight_diff(obj.ABID.uuid, obj.id)} ❌' if str(obj.id) != str(obj.ABID.uuid) else ' == .abid ✅'
@@ -74,16 +73,16 @@ def get_abid_info(self, obj, request=None):
''',
obj.api_url + (f'?api_key={get_or_create_api_token(request.user)}' if request and request.user else ''), obj.api_url, obj.api_docs_url,
- str(obj.abid), mark_safe(fresh_abid_diff),
- str(obj.ABID.uuid), mark_safe(fresh_uuid_diff),
+ highlight_diff(obj.abid, fresh_abid), mark_safe(fresh_abid_diff),
+ highlight_diff(obj.ABID.uuid, fresh_abid.uuid), mark_safe(fresh_uuid_diff),
str(obj.id), mark_safe(id_pk_diff + id_abid_diff + id_fresh_abid_diff),
# str(fresh_abid.uuid), mark_safe(fresh_uuid_diff),
# str(fresh_abid), mark_safe(fresh_abid_diff),
- obj.ABID.ts, str(obj.ABID.uuid)[0:14], mark_safe(ts_diff), obj.abid_ts_src, source_ts_val and source_ts_val.isoformat(),
- obj.ABID.uri, str(obj.ABID.uuid)[14:26], mark_safe(uri_diff), obj.abid_uri_src, str(obj.abid_values['uri']),
- obj.ABID.subtype, str(obj.ABID.uuid)[26:28], mark_safe(subtype_diff), obj.abid_subtype_src, str(obj.abid_values['subtype']),
- obj.ABID.rand, str(obj.ABID.uuid)[28:36], mark_safe(rand_diff), obj.abid_rand_src, str(obj.abid_values['rand'])[-7:],
- str(getattr(obj, 'old_id', '')),
+ highlight_diff(obj.ABID.ts, derived_ts), highlight_diff(str(obj.ABID.uuid)[0:14], str(fresh_abid.uuid)[0:14]), mark_safe(ts_diff), obj.abid_ts_src, source_ts_val and source_ts_val.isoformat(),
+ highlight_diff(obj.ABID.uri, derived_uri), highlight_diff(str(obj.ABID.uuid)[14:26], str(fresh_abid.uuid)[14:26]), mark_safe(uri_diff), obj.abid_uri_src, str(obj.abid_values['uri']),
+ highlight_diff(obj.ABID.subtype, derived_subtype), highlight_diff(str(obj.ABID.uuid)[26:28], str(fresh_abid.uuid)[26:28]), mark_safe(subtype_diff), obj.abid_subtype_src, str(obj.abid_values['subtype']),
+ highlight_diff(obj.ABID.rand, derived_rand), highlight_diff(str(obj.ABID.uuid)[28:36], str(fresh_abid.uuid)[28:36]), mark_safe(rand_diff), obj.abid_rand_src, str(obj.abid_values['rand'])[-7:],
+ highlight_diff(getattr(obj, 'old_id', ''), obj.pk),
)
except Exception as e:
return str(e)
diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py
index 61323a84..832a9348 100644
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -352,7 +352,7 @@ class SnapshotActionForm(ActionForm):
@admin.register(Snapshot, site=archivebox_admin)
class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
list_display = ('added', 'title_str', 'files', 'size', 'url_str')
- sort_fields = ('title_str', 'url_str', 'added', 'files')
+ sort_fields = ('title_str', 'url_str', 'added')
readonly_fields = ('tags_str', 'timestamp', 'admin_actions', 'status_info', 'bookmarked', 'added', 'updated', 'created', 'modified', 'API', 'link_dir')
search_fields = ('id', 'url', 'abid', 'old_id', 'timestamp', 'title', 'tags__name')
list_filter = ('added', 'updated', 'archiveresult__status', 'created_by', 'tags__name')
@@ -510,6 +510,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
# ordering='archiveresult_count',
)
def files(self, obj):
+ # return '-'
return snapshot_icons(obj)
diff --git a/archivebox/index/html.py b/archivebox/index/html.py
index 2e5d18bc..504385b2 100644
--- a/archivebox/index/html.py
+++ b/archivebox/index/html.py
@@ -118,7 +118,7 @@ def render_django_template(template: str, context: Mapping[str, str]) -> str:
def snapshot_icons(snapshot) -> str:
- cache_key = f'{snapshot.pk}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons'
+ cache_key = f'result_icons:{snapshot.pk}:{(snapshot.modified or snapshot.created or snapshot.added).timestamp()}'
def calc_snapshot_icons():
from core.models import ArchiveResult
@@ -133,6 +133,7 @@ def snapshot_icons(snapshot) -> str:
else:
archive_results = snapshot.archiveresult_set.filter(status="succeeded", output__isnull=False)
+ # import ipdb; ipdb.set_trace()
link = snapshot.as_link()
path = link.archive_path
canon = link.canonical_outputs()
@@ -197,7 +198,12 @@ def snapshot_icons(snapshot) -> str:
# print(((end - start).total_seconds()*1000) // 1, 'ms')
return result
- return cache.get_or_set(cache_key, calc_snapshot_icons)
- # return calc_snapshot_icons()
+ cache_result = cache.get(cache_key)
+ if cache_result:
+ return cache_result
+
+ fresh_result = calc_snapshot_icons()
+ cache.set(cache_key, fresh_result, timeout=60 * 60 * 24)
+ return fresh_result