mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 06:34:25 -04:00
fix ABID generation consistency when self._state.adding is True
Some checks are pending
Build Debian package / build (push) Waiting to run
Build Docker image / buildx (push) Waiting to run
Build Homebrew package / build (push) Waiting to run
CodeQL / Analyze (python) (push) Waiting to run
Build GitHub Pages website / build (push) Waiting to run
Build GitHub Pages website / deploy (push) Blocked by required conditions
Run linters / lint (push) Waiting to run
Build Pip package / build (push) Waiting to run
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Waiting to run
Run tests / docker_tests (push) Waiting to run
Some checks are pending
Build Debian package / build (push) Waiting to run
Build Docker image / buildx (push) Waiting to run
Build Homebrew package / build (push) Waiting to run
CodeQL / Analyze (python) (push) Waiting to run
Build GitHub Pages website / build (push) Waiting to run
Build GitHub Pages website / deploy (push) Blocked by required conditions
Run linters / lint (push) Waiting to run
Build Pip package / build (push) Waiting to run
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Waiting to run
Run tests / docker_tests (push) Waiting to run
This commit is contained in:
parent
9d2116ad9a
commit
4ae186dfca
4 changed files with 91 additions and 33 deletions
|
@ -1,7 +1,7 @@
|
||||||
"""
|
"""
|
||||||
This file provides the Django ABIDField and ABIDModel base model to inherit from.
|
This file provides the Django ABIDField and ABIDModel base model to inherit from.
|
||||||
|
|
||||||
It implements the ArchiveBox ID (ABID) interfaces including abid_values, get_abid, .abid, .uuid, .id.
|
It implements the ArchiveBox ID (ABID) interfaces including abid_values, generate_abid, .abid, .uuid, .id.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Any, Dict, Union, List, Set, NamedTuple, cast
|
from typing import Any, Dict, Union, List, Set, NamedTuple, cast
|
||||||
|
@ -82,14 +82,17 @@ class ABIDModel(models.Model):
|
||||||
abstract = True
|
abstract = True
|
||||||
|
|
||||||
def save(self, *args: Any, **kwargs: Any) -> None:
|
def save(self, *args: Any, **kwargs: Any) -> None:
|
||||||
if hasattr(self, 'abid'):
|
# when first creating a row, self.ABID is the source of truth
|
||||||
# self.abid = ABID.parse(self.abid) if self.abid else self.get_abid()
|
# overwrite default prefilled self.id & self.abid with generated self.ABID value
|
||||||
self.abid = self.get_abid()
|
if self._state.adding or not self.id:
|
||||||
else:
|
self.id = self.ABID.uuid
|
||||||
print(f'[!] WARNING: {self.__class__.__name__}.abid is not a DB field so ABID will not be persisted!')
|
if self._state.adding or not self.abid:
|
||||||
self.abid = self.get_abid()
|
self.abid = str(self.ABID)
|
||||||
|
|
||||||
super().save(*args, **kwargs)
|
super().save(*args, **kwargs)
|
||||||
|
assert str(self.id) == str(self.ABID.uuid), f'self.id {self.id} does not match self.ABID {self.ABID.uuid}'
|
||||||
|
assert str(self.abid) == str(self.ABID), f'self.abid {self.id} does not match self.ABID {self.ABID.uuid}'
|
||||||
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def abid_values(self) -> Dict[str, Any]:
|
def abid_values(self) -> Dict[str, Any]:
|
||||||
|
@ -101,7 +104,7 @@ class ABIDModel(models.Model):
|
||||||
'rand': eval(self.abid_rand_src),
|
'rand': eval(self.abid_rand_src),
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_abid(self) -> ABID:
|
def generate_abid(self) -> ABID:
|
||||||
"""
|
"""
|
||||||
Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
|
Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
|
||||||
"""
|
"""
|
||||||
|
@ -143,7 +146,30 @@ class ABIDModel(models.Model):
|
||||||
"""
|
"""
|
||||||
ULIDParts(timestamp='01HX9FPYTR', url='E4A5CCD9', subtype='00', randomness='ZYEBQE')
|
ULIDParts(timestamp='01HX9FPYTR', url='E4A5CCD9', subtype='00', randomness='ZYEBQE')
|
||||||
"""
|
"""
|
||||||
return ABID.parse(self.abid) if getattr(self, 'abid', None) else self.get_abid()
|
abid = None
|
||||||
|
try:
|
||||||
|
abid = abid or ABID.parse(self.pk)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
abid = abid or ABID.parse(self.id)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
abid = abid or ABID.parse(self.uuid)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
abid = abid or ABID.parse(self.abid)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
abid = abid or self.generate_abid()
|
||||||
|
|
||||||
|
return abid
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def ULID(self) -> ULID:
|
def ULID(self) -> ULID:
|
||||||
|
@ -276,7 +302,7 @@ def find_obj_from_abid_rand(rand: Union[ABID, str], model=None) -> List[ABIDMode
|
||||||
)
|
)
|
||||||
|
|
||||||
for obj in qs:
|
for obj in qs:
|
||||||
if obj.get_abid() == abid:
|
if obj.generate_abid() == abid:
|
||||||
# found exact match, no need to keep iterating
|
# found exact match, no need to keep iterating
|
||||||
return [obj]
|
return [obj]
|
||||||
partial_matches.append(obj)
|
partial_matches.append(obj)
|
||||||
|
|
|
@ -55,11 +55,9 @@ class APIToken(ABIDModel):
|
||||||
def __json__(self) -> dict:
|
def __json__(self) -> dict:
|
||||||
return {
|
return {
|
||||||
"TYPE": "APIToken",
|
"TYPE": "APIToken",
|
||||||
"uuid": str(self.id),
|
"id": str(self.pk),
|
||||||
"ulid": str(self.ulid),
|
"abid": str(self.ABID),
|
||||||
"abid": str(self.get_abid()),
|
"created_by_id": str(self.created_by_id),
|
||||||
"user_id": str(self.user.id),
|
|
||||||
"user_username": self.user.username,
|
|
||||||
"token": self.token,
|
"token": self.token,
|
||||||
"created": self.created.isoformat(),
|
"created": self.created.isoformat(),
|
||||||
"expires": self.expires_as_iso8601,
|
"expires": self.expires_as_iso8601,
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
__package__ = 'archivebox.core'
|
__package__ = 'archivebox.core'
|
||||||
|
|
||||||
|
import os
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from contextlib import redirect_stdout
|
from contextlib import redirect_stdout
|
||||||
|
@ -197,28 +199,29 @@ def get_abid_info(self, obj):
|
||||||
<a href="{}" style="font-size: 16px; font-family: monospace; user-select: all; border-radius: 8px; background-color: #ddf; padding: 3px 5px; border: 1px solid #aaa; margin-bottom: 8px; display: inline-block; vertical-align: top;">{}</a> <a href="{}" style="color: limegreen; font-size: 0.9em; vertical-align: 1px; font-family: monospace;">📖 API DOCS</a>
|
<a href="{}" style="font-size: 16px; font-family: monospace; user-select: all; border-radius: 8px; background-color: #ddf; padding: 3px 5px; border: 1px solid #aaa; margin-bottom: 8px; display: inline-block; vertical-align: top;">{}</a> <a href="{}" style="color: limegreen; font-size: 0.9em; vertical-align: 1px; font-family: monospace;">📖 API DOCS</a>
|
||||||
<br/><hr/>
|
<br/><hr/>
|
||||||
<div style="opacity: 0.8">
|
<div style="opacity: 0.8">
|
||||||
TS: <code style="font-size: 10px; user-select: all"><b>{}</b></code> ({})<br/>
|
|
||||||
URI: <code style="font-size: 10px; user-select: all"><b>{}</b></code> (<span style="display:inline-block; vertical-align: -4px; user-select: all; width: 230px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{}</span>)<br/>
|
|
||||||
SUBTYPE: <code style="font-size: 10px; user-select: all"><b>{}</b></code> ({})
|
|
||||||
RAND: <code style="font-size: 10px; user-select: all"><b>{}</b></code> ({})
|
|
||||||
SALT: <code style="font-size: 10px; user-select: all"><b style="display:inline-block; user-select: all; width: 50px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{}</b></code>
|
|
||||||
<br/><hr/>
|
|
||||||
<small style="opacity: 0.8">.abid: <code style="font-size: 10px; user-select: all">{}</code></small><br/>
|
<small style="opacity: 0.8">.abid: <code style="font-size: 10px; user-select: all">{}</code></small><br/>
|
||||||
<small style="opacity: 0.8">.abid.uuid: <code style="font-size: 10px; user-select: all">{}</code></small><br/>
|
<small style="opacity: 0.8">.abid.uuid: <code style="font-size: 10px; user-select: all">{}</code></small><br/>
|
||||||
<small style="opacity: 0.8">.id: <code style="font-size: 10px; user-select: all">{}</code></small><br/>
|
<small style="opacity: 0.8">.id: <code style="font-size: 10px; user-select: all">{}</code></small><br/>
|
||||||
|
<hr/>
|
||||||
|
TS: <code style="font-size: 10px;"><b style="user-select: all">{}</b> {}</code> {}: <code style="user-select: all">{}</code><br/>
|
||||||
|
URI: <code style="font-size: 10px; "><b style="user-select: all">{}</b> {}</code> <span style="display:inline-block; vertical-align: -4px; width: 290px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{}: <code style="user-select: all">{}</code></span>
|
||||||
|
SALT: <code style="font-size: 10px;"><b style="display:inline-block; user-select: all; width: 50px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{}</b></code><br/>
|
||||||
|
SUBTYPE: <code style="font-size: 10px;"><b style="user-select: all">{}</b> {}</code> {}: <code style="user-select: all">{}</code><br/>
|
||||||
|
RAND: <code style="font-size: 10px;"><b style="user-select: all">{}</b> {}</code> {}: <code style="user-select: all">{}</code>
|
||||||
|
<br/><hr/>
|
||||||
<small style="opacity: 0.5">.old_id: <code style="font-size: 10px; user-select: all">{}</code></small><br/>
|
<small style="opacity: 0.5">.old_id: <code style="font-size: 10px; user-select: all">{}</code></small><br/>
|
||||||
</div>
|
</div>
|
||||||
''',
|
''',
|
||||||
obj.api_url, obj.api_url, obj.api_docs_url,
|
obj.api_url, obj.api_url, obj.api_docs_url,
|
||||||
obj.ABID.ts, obj.abid_values['ts'].isoformat() if isinstance(obj.abid_values['ts'], datetime) else obj.abid_values['ts'],
|
|
||||||
obj.ABID.uri, str(obj.abid_values['uri']),
|
|
||||||
obj.ABID.subtype, str(obj.abid_values['subtype']),
|
|
||||||
obj.ABID.rand, str(obj.abid_values['rand'])[-7:],
|
|
||||||
obj.ABID.uri_salt,
|
|
||||||
str(obj.abid),
|
str(obj.abid),
|
||||||
str(obj.ABID.uuid),
|
str(obj.ABID.uuid),
|
||||||
obj.id,
|
str(obj.id),
|
||||||
getattr(obj, 'old_id', ''),
|
obj.ABID.ts, str(obj.ABID.uuid)[0:14], obj.abid_ts_src, obj.abid_values['ts'].isoformat() if isinstance(obj.abid_values['ts'], datetime) else obj.abid_values['ts'],
|
||||||
|
obj.ABID.uri, str(obj.ABID.uuid)[14:26], obj.abid_uri_src, str(obj.abid_values['uri']),
|
||||||
|
obj.ABID.uri_salt,
|
||||||
|
obj.ABID.subtype, str(obj.ABID.uuid)[26:28], obj.abid_subtype_src, str(obj.abid_values['subtype']),
|
||||||
|
obj.ABID.rand, str(obj.ABID.uuid)[28:36], obj.abid_rand_src, str(obj.abid_values['rand'])[-7:],
|
||||||
|
str(getattr(obj, 'old_id', '')),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -568,9 +571,9 @@ class TagAdmin(admin.ModelAdmin):
|
||||||
class ArchiveResultAdmin(admin.ModelAdmin):
|
class ArchiveResultAdmin(admin.ModelAdmin):
|
||||||
list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str')
|
list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str')
|
||||||
sort_fields = ('start_ts', 'extractor', 'status')
|
sort_fields = ('start_ts', 'extractor', 'status')
|
||||||
readonly_fields = ('snapshot_info', 'tags_str', 'created', 'modified', 'API')
|
readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created', 'modified', 'API', 'output_summary')
|
||||||
search_fields = ('id', 'old_id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
|
search_fields = ('id', 'old_id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
|
||||||
fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'cmd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', *readonly_fields)
|
fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', 'cmd', *readonly_fields)
|
||||||
autocomplete_fields = ['snapshot']
|
autocomplete_fields = ['snapshot']
|
||||||
|
|
||||||
list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
|
list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
|
||||||
|
@ -593,6 +596,7 @@ class ArchiveResultAdmin(admin.ModelAdmin):
|
||||||
try:
|
try:
|
||||||
return get_abid_info(self, obj)
|
return get_abid_info(self, obj)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
raise e
|
||||||
return str(e)
|
return str(e)
|
||||||
|
|
||||||
@admin.display(
|
@admin.display(
|
||||||
|
@ -614,3 +618,33 @@ class ArchiveResultAdmin(admin.ModelAdmin):
|
||||||
result.output if (result.status == 'succeeded') and result.extractor not in ('title', 'archive_org') else 'index.html',
|
result.output if (result.status == 'succeeded') and result.extractor not in ('title', 'archive_org') else 'index.html',
|
||||||
result.output,
|
result.output,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def output_summary(self, result):
|
||||||
|
snapshot_dir = Path(OUTPUT_DIR) / str(result.pwd).split('data/', 1)[-1]
|
||||||
|
output_str = format_html(
|
||||||
|
'<pre style="display: inline-block">{}</pre><br/>',
|
||||||
|
result.output,
|
||||||
|
)
|
||||||
|
output_str += format_html('<a href="/archive/{}/index.html#all">See result files ...</a><br/><pre><code>', str(result.snapshot.timestamp))
|
||||||
|
path_from_output_str = (snapshot_dir / result.output)
|
||||||
|
output_str += format_html('<i style="padding: 1px">{}</i><b style="padding-right: 20px">/</b><i>{}</i><br/><hr/>', str(snapshot_dir), str(result.output))
|
||||||
|
if path_from_output_str.exists():
|
||||||
|
root_dir = str(path_from_output_str)
|
||||||
|
else:
|
||||||
|
root_dir = str(snapshot_dir)
|
||||||
|
|
||||||
|
|
||||||
|
# print(root_dir, str(list(os.walk(root_dir))))
|
||||||
|
|
||||||
|
for root, dirs, files in os.walk(root_dir):
|
||||||
|
depth = root.replace(root_dir, '').count(os.sep) + 1
|
||||||
|
if depth > 2:
|
||||||
|
continue
|
||||||
|
indent = ' ' * 4 * (depth)
|
||||||
|
output_str += format_html('<b style="padding: 1px">{}{}/</b><br/>', indent, os.path.basename(root))
|
||||||
|
indentation_str = ' ' * 4 * (depth + 1)
|
||||||
|
for filename in sorted(files):
|
||||||
|
is_hidden = filename.startswith('.')
|
||||||
|
output_str += format_html('<span style="opacity: {}.2">{}{}</span><br/>', int(not is_hidden), indentation_str, filename.strip())
|
||||||
|
|
||||||
|
return output_str + format_html('</code></pre>')
|
||||||
|
|
|
@ -372,7 +372,7 @@ class ArchiveResult(ABIDModel):
|
||||||
abid_ts_src = 'self.snapshot.added'
|
abid_ts_src = 'self.snapshot.added'
|
||||||
abid_uri_src = 'self.snapshot.url'
|
abid_uri_src = 'self.snapshot.url'
|
||||||
abid_subtype_src = 'self.extractor'
|
abid_subtype_src = 'self.extractor'
|
||||||
abid_rand_src = 'self.id'
|
abid_rand_src = 'self.old_id'
|
||||||
EXTRACTOR_CHOICES = EXTRACTOR_CHOICES
|
EXTRACTOR_CHOICES = EXTRACTOR_CHOICES
|
||||||
|
|
||||||
old_id = models.BigIntegerField(default=rand_int_id, serialize=False, verbose_name='Old ID')
|
old_id = models.BigIntegerField(default=rand_int_id, serialize=False, verbose_name='Old ID')
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue