fix ABID generation consistency when self._state.adding is True
Some checks are pending
Build Debian package / build (push) Waiting to run
Build Docker image / buildx (push) Waiting to run
Build Homebrew package / build (push) Waiting to run
CodeQL / Analyze (python) (push) Waiting to run
Build GitHub Pages website / build (push) Waiting to run
Build GitHub Pages website / deploy (push) Blocked by required conditions
Run linters / lint (push) Waiting to run
Build Pip package / build (push) Waiting to run
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Waiting to run
Run tests / docker_tests (push) Waiting to run

This commit is contained in:
Nick Sweeting 2024-08-20 05:56:19 -07:00
parent 9d2116ad9a
commit 4ae186dfca
No known key found for this signature in database
4 changed files with 91 additions and 33 deletions

View file

@ -1,7 +1,7 @@
""" """
This file provides the Django ABIDField and ABIDModel base model to inherit from. This file provides the Django ABIDField and ABIDModel base model to inherit from.
It implements the ArchiveBox ID (ABID) interfaces including abid_values, get_abid, .abid, .uuid, .id. It implements the ArchiveBox ID (ABID) interfaces including abid_values, generate_abid, .abid, .uuid, .id.
""" """
from typing import Any, Dict, Union, List, Set, NamedTuple, cast from typing import Any, Dict, Union, List, Set, NamedTuple, cast
@ -82,14 +82,17 @@ class ABIDModel(models.Model):
abstract = True abstract = True
def save(self, *args: Any, **kwargs: Any) -> None: def save(self, *args: Any, **kwargs: Any) -> None:
if hasattr(self, 'abid'): # when first creating a row, self.ABID is the source of truth
# self.abid = ABID.parse(self.abid) if self.abid else self.get_abid() # overwrite default prefilled self.id & self.abid with generated self.ABID value
self.abid = self.get_abid() if self._state.adding or not self.id:
else: self.id = self.ABID.uuid
print(f'[!] WARNING: {self.__class__.__name__}.abid is not a DB field so ABID will not be persisted!') if self._state.adding or not self.abid:
self.abid = self.get_abid() self.abid = str(self.ABID)
super().save(*args, **kwargs) super().save(*args, **kwargs)
assert str(self.id) == str(self.ABID.uuid), f'self.id {self.id} does not match self.ABID {self.ABID.uuid}'
assert str(self.abid) == str(self.ABID), f'self.abid {self.id} does not match self.ABID {self.ABID.uuid}'
@property @property
def abid_values(self) -> Dict[str, Any]: def abid_values(self) -> Dict[str, Any]:
@ -101,7 +104,7 @@ class ABIDModel(models.Model):
'rand': eval(self.abid_rand_src), 'rand': eval(self.abid_rand_src),
} }
def get_abid(self) -> ABID: def generate_abid(self) -> ABID:
""" """
Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src). Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
""" """
@ -143,7 +146,30 @@ class ABIDModel(models.Model):
""" """
ULIDParts(timestamp='01HX9FPYTR', url='E4A5CCD9', subtype='00', randomness='ZYEBQE') ULIDParts(timestamp='01HX9FPYTR', url='E4A5CCD9', subtype='00', randomness='ZYEBQE')
""" """
return ABID.parse(self.abid) if getattr(self, 'abid', None) else self.get_abid() abid = None
try:
abid = abid or ABID.parse(self.pk)
except Exception:
pass
try:
abid = abid or ABID.parse(self.id)
except Exception:
pass
try:
abid = abid or ABID.parse(self.uuid)
except Exception:
pass
try:
abid = abid or ABID.parse(self.abid)
except Exception:
pass
abid = abid or self.generate_abid()
return abid
@property @property
def ULID(self) -> ULID: def ULID(self) -> ULID:
@ -276,7 +302,7 @@ def find_obj_from_abid_rand(rand: Union[ABID, str], model=None) -> List[ABIDMode
) )
for obj in qs: for obj in qs:
if obj.get_abid() == abid: if obj.generate_abid() == abid:
# found exact match, no need to keep iterating # found exact match, no need to keep iterating
return [obj] return [obj]
partial_matches.append(obj) partial_matches.append(obj)

View file

@ -55,11 +55,9 @@ class APIToken(ABIDModel):
def __json__(self) -> dict: def __json__(self) -> dict:
return { return {
"TYPE": "APIToken", "TYPE": "APIToken",
"uuid": str(self.id), "id": str(self.pk),
"ulid": str(self.ulid), "abid": str(self.ABID),
"abid": str(self.get_abid()), "created_by_id": str(self.created_by_id),
"user_id": str(self.user.id),
"user_username": self.user.username,
"token": self.token, "token": self.token,
"created": self.created.isoformat(), "created": self.created.isoformat(),
"expires": self.expires_as_iso8601, "expires": self.expires_as_iso8601,

View file

@ -1,6 +1,8 @@
__package__ = 'archivebox.core' __package__ = 'archivebox.core'
import os
import json import json
from io import StringIO from io import StringIO
from pathlib import Path from pathlib import Path
from contextlib import redirect_stdout from contextlib import redirect_stdout
@ -197,28 +199,29 @@ def get_abid_info(self, obj):
<a href="{}" style="font-size: 16px; font-family: monospace; user-select: all; border-radius: 8px; background-color: #ddf; padding: 3px 5px; border: 1px solid #aaa; margin-bottom: 8px; display: inline-block; vertical-align: top;">{}</a> &nbsp; &nbsp; <a href="{}" style="color: limegreen; font-size: 0.9em; vertical-align: 1px; font-family: monospace;">📖 API DOCS</a> <a href="{}" style="font-size: 16px; font-family: monospace; user-select: all; border-radius: 8px; background-color: #ddf; padding: 3px 5px; border: 1px solid #aaa; margin-bottom: 8px; display: inline-block; vertical-align: top;">{}</a> &nbsp; &nbsp; <a href="{}" style="color: limegreen; font-size: 0.9em; vertical-align: 1px; font-family: monospace;">📖 API DOCS</a>
<br/><hr/> <br/><hr/>
<div style="opacity: 0.8"> <div style="opacity: 0.8">
&nbsp; &nbsp; TS: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<code style="font-size: 10px; user-select: all"><b>{}</b></code> &nbsp; &nbsp; &nbsp;&nbsp; ({})<br/>
&nbsp; &nbsp; URI: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px; user-select: all"><b>{}</b></code> &nbsp;&nbsp; &nbsp; &nbsp; &nbsp;&nbsp; (<span style="display:inline-block; vertical-align: -4px; user-select: all; width: 230px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{}</span>)<br/>
&nbsp; &nbsp; SUBTYPE: &nbsp; &nbsp; &nbsp; <code style="font-size: 10px; user-select: all"><b>{}</b></code> ({}) &nbsp; &nbsp;
&nbsp; RAND: &nbsp; <code style="font-size: 10px; user-select: all"><b>{}</b></code> ({}) &nbsp; &nbsp;
&nbsp; SALT: &nbsp; <code style="font-size: 10px; user-select: all"><b style="display:inline-block; user-select: all; width: 50px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{}</b></code>
<br/><hr/>
&nbsp; &nbsp; <small style="opacity: 0.8">.abid: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px; user-select: all">{}</code></small><br/> &nbsp; &nbsp; <small style="opacity: 0.8">.abid: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px; user-select: all">{}</code></small><br/>
&nbsp; &nbsp; <small style="opacity: 0.8">.abid.uuid: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px; user-select: all">{}</code></small><br/> &nbsp; &nbsp; <small style="opacity: 0.8">.abid.uuid: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px; user-select: all">{}</code></small><br/>
&nbsp; &nbsp; <small style="opacity: 0.8">.id: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<code style="font-size: 10px; user-select: all">{}</code></small><br/> &nbsp; &nbsp; <small style="opacity: 0.8">.id: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<code style="font-size: 10px; user-select: all">{}</code></small><br/>
<hr/>
&nbsp; &nbsp; TS: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<code style="font-size: 10px;"><b style="user-select: all">{}</b> &nbsp; {}</code> &nbsp; &nbsp; &nbsp;&nbsp; {}: <code style="user-select: all">{}</code><br/>
&nbsp; &nbsp; URI: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px; "><b style="user-select: all">{}</b> &nbsp; &nbsp; {}</code> &nbsp;&nbsp; &nbsp; &nbsp; &nbsp;&nbsp; <span style="display:inline-block; vertical-align: -4px; width: 290px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{}: <code style="user-select: all">{}</code></span>
&nbsp; SALT: &nbsp; <code style="font-size: 10px;"><b style="display:inline-block; user-select: all; width: 50px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{}</b></code><br/>
&nbsp; &nbsp; SUBTYPE: &nbsp; &nbsp; &nbsp; <code style="font-size: 10px;"><b style="user-select: all">{}</b> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; {}</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; {}: <code style="user-select: all">{}</code><br/>
&nbsp; &nbsp; RAND: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px;"><b style="user-select: all">{}</b> &nbsp; &nbsp; &nbsp; {}</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; {}: <code style="user-select: all">{}</code>
<br/><hr/>
&nbsp; &nbsp; <small style="opacity: 0.5">.old_id: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<code style="font-size: 10px; user-select: all">{}</code></small><br/> &nbsp; &nbsp; <small style="opacity: 0.5">.old_id: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<code style="font-size: 10px; user-select: all">{}</code></small><br/>
</div> </div>
''', ''',
obj.api_url, obj.api_url, obj.api_docs_url, obj.api_url, obj.api_url, obj.api_docs_url,
obj.ABID.ts, obj.abid_values['ts'].isoformat() if isinstance(obj.abid_values['ts'], datetime) else obj.abid_values['ts'],
obj.ABID.uri, str(obj.abid_values['uri']),
obj.ABID.subtype, str(obj.abid_values['subtype']),
obj.ABID.rand, str(obj.abid_values['rand'])[-7:],
obj.ABID.uri_salt,
str(obj.abid), str(obj.abid),
str(obj.ABID.uuid), str(obj.ABID.uuid),
obj.id, str(obj.id),
getattr(obj, 'old_id', ''), obj.ABID.ts, str(obj.ABID.uuid)[0:14], obj.abid_ts_src, obj.abid_values['ts'].isoformat() if isinstance(obj.abid_values['ts'], datetime) else obj.abid_values['ts'],
obj.ABID.uri, str(obj.ABID.uuid)[14:26], obj.abid_uri_src, str(obj.abid_values['uri']),
obj.ABID.uri_salt,
obj.ABID.subtype, str(obj.ABID.uuid)[26:28], obj.abid_subtype_src, str(obj.abid_values['subtype']),
obj.ABID.rand, str(obj.ABID.uuid)[28:36], obj.abid_rand_src, str(obj.abid_values['rand'])[-7:],
str(getattr(obj, 'old_id', '')),
) )
@ -568,9 +571,9 @@ class TagAdmin(admin.ModelAdmin):
class ArchiveResultAdmin(admin.ModelAdmin): class ArchiveResultAdmin(admin.ModelAdmin):
list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str') list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str')
sort_fields = ('start_ts', 'extractor', 'status') sort_fields = ('start_ts', 'extractor', 'status')
readonly_fields = ('snapshot_info', 'tags_str', 'created', 'modified', 'API') readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created', 'modified', 'API', 'output_summary')
search_fields = ('id', 'old_id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp') search_fields = ('id', 'old_id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'cmd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', *readonly_fields) fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', 'cmd', *readonly_fields)
autocomplete_fields = ['snapshot'] autocomplete_fields = ['snapshot']
list_filter = ('status', 'extractor', 'start_ts', 'cmd_version') list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
@ -593,6 +596,7 @@ class ArchiveResultAdmin(admin.ModelAdmin):
try: try:
return get_abid_info(self, obj) return get_abid_info(self, obj)
except Exception as e: except Exception as e:
raise e
return str(e) return str(e)
@admin.display( @admin.display(
@ -614,3 +618,33 @@ class ArchiveResultAdmin(admin.ModelAdmin):
result.output if (result.status == 'succeeded') and result.extractor not in ('title', 'archive_org') else 'index.html', result.output if (result.status == 'succeeded') and result.extractor not in ('title', 'archive_org') else 'index.html',
result.output, result.output,
) )
def output_summary(self, result):
snapshot_dir = Path(OUTPUT_DIR) / str(result.pwd).split('data/', 1)[-1]
output_str = format_html(
'<pre style="display: inline-block">{}</pre><br/>',
result.output,
)
output_str += format_html('<a href="/archive/{}/index.html#all">See result files ...</a><br/><pre><code>', str(result.snapshot.timestamp))
path_from_output_str = (snapshot_dir / result.output)
output_str += format_html('<i style="padding: 1px">{}</i><b style="padding-right: 20px">/</b><i>{}</i><br/><hr/>', str(snapshot_dir), str(result.output))
if path_from_output_str.exists():
root_dir = str(path_from_output_str)
else:
root_dir = str(snapshot_dir)
# print(root_dir, str(list(os.walk(root_dir))))
for root, dirs, files in os.walk(root_dir):
depth = root.replace(root_dir, '').count(os.sep) + 1
if depth > 2:
continue
indent = ' ' * 4 * (depth)
output_str += format_html('<b style="padding: 1px">{}{}/</b><br/>', indent, os.path.basename(root))
indentation_str = ' ' * 4 * (depth + 1)
for filename in sorted(files):
is_hidden = filename.startswith('.')
output_str += format_html('<span style="opacity: {}.2">{}{}</span><br/>', int(not is_hidden), indentation_str, filename.strip())
return output_str + format_html('</code></pre>')

View file

@ -372,7 +372,7 @@ class ArchiveResult(ABIDModel):
abid_ts_src = 'self.snapshot.added' abid_ts_src = 'self.snapshot.added'
abid_uri_src = 'self.snapshot.url' abid_uri_src = 'self.snapshot.url'
abid_subtype_src = 'self.extractor' abid_subtype_src = 'self.extractor'
abid_rand_src = 'self.id' abid_rand_src = 'self.old_id'
EXTRACTOR_CHOICES = EXTRACTOR_CHOICES EXTRACTOR_CHOICES = EXTRACTOR_CHOICES
old_id = models.BigIntegerField(default=rand_int_id, serialize=False, verbose_name='Old ID') old_id = models.BigIntegerField(default=rand_int_id, serialize=False, verbose_name='Old ID')