diff --git a/.dockerignore b/.dockerignore
index b5c3c630..9f03a946 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -35,3 +35,5 @@ docker/
data/
data*/
output/
+index.sqlite3
+index.sqlite3-wal
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000..afb03617
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+**/*.lock
+**/*-lock.json
diff --git a/.gitignore b/.gitignore
index 7e3fbe26..76fe607e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,7 +13,6 @@ venv/
node_modules/
# Ignore dev lockfiles (should always be built fresh)
-pdm.lock
pdm.dev.lock
requirements-dev.txt
@@ -30,6 +29,9 @@ data/
data*/
output/
index.sqlite3
+*.sqlite*
+data.*
# vim
*.sw?
+.vscode
diff --git a/README.md b/README.md
index 44e8796c..2ad7ed1f 100644
--- a/README.md
+++ b/README.md
@@ -154,7 +154,7 @@ ArchiveBox is free for everyone to self-host, but we also provide support, secur
> ***[Contact us](https://zulip.archivebox.io/#narrow/stream/167-enterprise/topic/welcome/near/1191102)** if your org wants help using ArchiveBox professionally.* (we are also seeking [grant funding](https://github.com/ArchiveBox/ArchiveBox/issues/1126#issuecomment-1487431394))
> We offer: setup & support, CAPTCHA/ratelimit unblocking, SSO, audit logging/chain-of-custody, and more
-> *ArchiveBox has 🏛️ 501(c)(3) [nonprofit status](https://hackclub.com/hcb/) and all our work supports open-source development.*
+> *ArchiveBox is a 🏛️ 501(c)(3) [nonprofit FSP](https://hackclub.com/hcb/) and all our work supports open-source development.*
@@ -291,7 +291,8 @@ See below for more usage examples using the C
@@ -407,10 +407,12 @@ See below for usage examples using the CLI, W
> *Warning: These are contributed by external volunteers and may lag behind the official `pip` channel.*
# Start the server on bare metal (pip/apt/brew/etc):
archivebox manage createsuperuser # create a new admin user via CLI
@@ -756,8 +758,8 @@ The configuration is documented here: **[Configuration Wiki](https://github.com/
# e.g. archivebox config --set TIMEOUT=120
# or docker compose run archivebox config --set TIMEOUT=120
-TIMEOUT=120 # default: 60 add more seconds on slower networks
-CHECK_SSL_VALIDITY=True # default: False True = allow saving URLs w/ bad SSL
+TIMEOUT=240 # default: 60 add more seconds on slower networks
+CHECK_SSL_VALIDITY=False # default: True False = allow saving URLs w/ bad SSL
SAVE_ARCHIVE_DOT_ORG=False # default: True False = disable Archive.org saving
MAX_MEDIA_SIZE=1500m # default: 750m raise/lower youtubedl output size
@@ -776,7 +778,7 @@ CURL_USER_AGENT="Mozilla/5.0 ..."
To achieve high-fidelity archives in as many situations as possible, ArchiveBox depends on a variety of 3rd-party libraries and tools that specialize in extracting different types of content.
-> Under-the-hood, ArchiveBox uses [Django](https://www.djangoproject.com/start/overview/) to power its [Web UI](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#ui-usage) and [SQlite](https://www.sqlite.org/locrsf.html) + the filesystem to provide [fast & durable metadata storage](https://www.sqlite.org/locrsf.html) w/ [determinisitc upgrades](https://stackoverflow.com/a/39976321/2156113).
+> Under-the-hood, ArchiveBox uses [Django](https://www.djangoproject.com/start/overview/) to power its [Web UI](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#ui-usage), [Django Ninja](https://django-ninja.dev/) for the REST API, and [SQlite](https://www.sqlite.org/locrsf.html) + the filesystem to provide [fast & durable metadata storage](https://www.sqlite.org/locrsf.html) w/ [deterministic upgrades](https://stackoverflow.com/a/39976321/2156113).
ArchiveBox bundles industry-standard tools like [Google Chrome](https://github.com/ArchiveBox/ArchiveBox/wiki/Chromium-Install), [`wget`, `yt-dlp`, `readability`, etc.](#dependencies) internally, and its operation can be [tuned, secured, and extended](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration) as-needed for many different applications.
@@ -785,7 +787,7 @@ ArchiveBox bundles industry-standard tools like [Google Chrome](https://github.c
Expand to learn more about ArchiveBox's internals & dependencies...
-TIP: For better security, easier updating, and to avoid polluting your host system with extra dependencies,it is strongly recommended to use the ⭐️ official Docker image with everything pre-installed for the best experience.
+TIP: For better security while running ArchiveBox, and to avoid polluting your host system with a bunch of sub-dependencies that you need to keep up-to-date,it is strongly recommended to use the ⭐️ official Docker image which provides everything in an easy container with simple one-liner upgrades.
These optional dependencies used for archiving sites include:
@@ -1608,7 +1610,7 @@ Extractors take the URL of a page to archive, write their output to the filesyst
-ArchiveBox operates as a US 501(c)(3) nonprofit (sponsored by HCB), direct donations are tax-deductible.
+ArchiveBox operates as a US 501(c)(3) nonprofit FSP (sponsored by HCB), direct donations are tax-deductible.
diff --git a/archivebox/abid_utils/abid.py b/archivebox/abid_utils/abid.py
index 48597813..3c90e83c 100644
--- a/archivebox/abid_utils/abid.py
+++ b/archivebox/abid_utils/abid.py
@@ -21,6 +21,11 @@ ABID_RAND_LEN = 6
DEFAULT_ABID_PREFIX = 'obj_'
+# allows people to keep their uris secret on a per-instance basis by changing the salt.
+# the default means everyone can share the same namespace for URI hashes,
+# meaning anyone who has a URI and wants to check if you have it can guess the ABID
+DEFAULT_ABID_URI_SALT = '687c2fff14e3a7780faa5a40c237b19b5b51b089'
+
class ABID(NamedTuple):
"""
@@ -31,6 +36,8 @@ class ABID(NamedTuple):
uri: str # e.g. E4A5CCD9
subtype: str # e.g. 01
rand: str # e.g. ZYEBQE
+
+ # salt: str = DEFAULT_ABID_URI_SALT
def __getattr__(self, attr: str) -> Any:
return getattr(self.ulid, attr)
@@ -67,6 +74,10 @@ class ABID(NamedTuple):
subtype=suffix[18:20].upper(),
rand=suffix[20:26].upper(),
)
+
+ @property
+ def uri_salt(self) -> str:
+ return DEFAULT_ABID_URI_SALT
@property
def suffix(self):
@@ -97,7 +108,7 @@ class ABID(NamedTuple):
####################################################
-def uri_hash(uri: Union[str, bytes]) -> str:
+def uri_hash(uri: Union[str, bytes], salt: str=DEFAULT_ABID_URI_SALT) -> str:
"""
'E4A5CCD9AF4ED2A6E0954DF19FD274E9CDDB4853051F033FD518BFC90AA1AC25'
"""
@@ -115,7 +126,7 @@ def uri_hash(uri: Union[str, bytes]) -> str:
except AttributeError:
pass
- uri_bytes = uri_str.encode('utf-8')
+ uri_bytes = uri_str.encode('utf-8') + salt.encode('utf-8')
return hashlib.sha256(uri_bytes).hexdigest().upper()
@@ -130,12 +141,12 @@ def abid_part_from_prefix(prefix: Optional[str]) -> str:
assert len(prefix) == 3
return prefix + '_'
-def abid_part_from_uri(uri: str) -> str:
+def abid_part_from_uri(uri: str, salt: str=DEFAULT_ABID_URI_SALT) -> str:
"""
'E4A5CCD9' # takes first 8 characters of sha256(url)
"""
uri = str(uri)
- return uri_hash(uri)[:ABID_URI_LEN]
+ return uri_hash(uri, salt=salt)[:ABID_URI_LEN]
def abid_part_from_ts(ts: Optional[datetime]) -> str:
"""
@@ -175,7 +186,7 @@ def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
return str(rand)[-ABID_RAND_LEN:].upper()
-def abid_from_values(prefix, ts, uri, subtype, rand) -> ABID:
+def abid_from_values(prefix, ts, uri, subtype, rand, salt=DEFAULT_ABID_URI_SALT) -> ABID:
"""
Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
"""
@@ -183,7 +194,7 @@ def abid_from_values(prefix, ts, uri, subtype, rand) -> ABID:
abid = ABID(
prefix=abid_part_from_prefix(prefix),
ts=abid_part_from_ts(ts),
- uri=abid_part_from_uri(uri),
+ uri=abid_part_from_uri(uri, salt=salt),
subtype=abid_part_from_subtype(subtype),
rand=abid_part_from_rand(rand),
)
diff --git a/archivebox/abid_utils/models.py b/archivebox/abid_utils/models.py
index de8b3c87..9d0ab1d5 100644
--- a/archivebox/abid_utils/models.py
+++ b/archivebox/abid_utils/models.py
@@ -26,6 +26,7 @@ from .abid import (
ABID_RAND_LEN,
ABID_SUFFIX_LEN,
DEFAULT_ABID_PREFIX,
+ DEFAULT_ABID_URI_SALT,
abid_part_from_prefix,
abid_from_values
)
@@ -69,8 +70,8 @@ class ABIDModel(models.Model):
abid_subtype_src = 'None' # e.g. 'self.extractor'
abid_rand_src = 'None' # e.g. 'self.uuid' or 'self.id'
- id = models.UUIDField(primary_key=True, default=uuid4, editable=True)
- uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
+ # id = models.UUIDField(primary_key=True, default=uuid4, editable=True)
+ # uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
abid = ABIDField(prefix=abid_prefix)
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk)
@@ -132,6 +133,7 @@ class ABIDModel(models.Model):
uri=uri,
subtype=subtype,
rand=rand,
+ salt=DEFAULT_ABID_URI_SALT,
)
assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for {self.__class__.__name__}'
return abid
diff --git a/archivebox/api/models.py b/archivebox/api/models.py
index 177b275f..d8598002 100644
--- a/archivebox/api/models.py
+++ b/archivebox/api/models.py
@@ -56,6 +56,7 @@ class APIToken(ABIDModel):
return {
"TYPE": "APIToken",
"uuid": str(self.id),
+ "ulid": str(self.ulid),
"abid": str(self.get_abid()),
"user_id": str(self.user.id),
"user_username": self.user.username,
@@ -64,6 +65,10 @@ class APIToken(ABIDModel):
"expires": self.expires_as_iso8601,
}
+ @property
+ def ulid(self):
+ return self.get_abid().ulid
+
@property
def expires_as_iso8601(self):
"""Returns the expiry date of the token in ISO 8601 format or a date 100 years in the future if none."""
diff --git a/archivebox/api/v1_api.py b/archivebox/api/v1_api.py
index 4fa5d94b..546ef8a0 100644
--- a/archivebox/api/v1_api.py
+++ b/archivebox/api/v1_api.py
@@ -63,7 +63,7 @@ api = NinjaAPIWithIOCapture(
version='1.0.0',
csrf=False,
auth=API_AUTH_METHODS,
- urls_namespace="api",
+ urls_namespace="api-1",
docs=Swagger(settings={"persistAuthorization": True}),
# docs_decorator=login_required,
# renderer=ORJSONRenderer(),
diff --git a/archivebox/api/v1_core.py b/archivebox/api/v1_core.py
index 9046c361..0c701104 100644
--- a/archivebox/api/v1_core.py
+++ b/archivebox/api/v1_core.py
@@ -1,14 +1,17 @@
__package__ = 'archivebox.api'
+import math
from uuid import UUID
-from typing import List, Optional
+from typing import List, Optional, Union, Any
from datetime import datetime
from django.db.models import Q
from django.shortcuts import get_object_or_404
+from django.core.exceptions import ValidationError
+from django.contrib.auth import get_user_model
from ninja import Router, Schema, FilterSchema, Field, Query
-from ninja.pagination import paginate
+from ninja.pagination import paginate, PaginationBase
from core.models import Snapshot, ArchiveResult, Tag
from abid_utils.abid import ABID
@@ -17,23 +20,61 @@ router = Router(tags=['Core Models'])
+class CustomPagination(PaginationBase):
+ class Input(Schema):
+ limit: int = 200
+ offset: int = 0
+ page: int = 0
+
+
+ class Output(Schema):
+ total_items: int
+ total_pages: int
+ page: int
+ limit: int
+ offset: int
+ num_items: int
+ items: List[Any]
+
+ def paginate_queryset(self, queryset, pagination: Input, **params):
+ limit = min(pagination.limit, 500)
+ offset = pagination.offset or (pagination.page * limit)
+ total = queryset.count()
+ total_pages = math.ceil(total / limit)
+ current_page = math.ceil(offset / (limit + 1))
+ items = queryset[offset : offset + limit]
+ return {
+ 'total_items': total,
+ 'total_pages': total_pages,
+ 'page': current_page,
+ 'limit': limit,
+ 'offset': offset,
+ 'num_items': len(items),
+ 'items': items,
+ }
+
### ArchiveResult #########################################################################
class ArchiveResultSchema(Schema):
+ TYPE: str = 'core.models.ArchiveResult'
+
+ id: UUID
+ old_id: int
abid: str
- uuid: UUID
- pk: str
+
modified: datetime
created: datetime
created_by_id: str
+ created_by_username: str
snapshot_abid: str
+ snapshot_timestamp: str
snapshot_url: str
snapshot_tags: str
extractor: str
- cmd_version: str
+ cmd_version: Optional[str]
cmd: List[str]
pwd: str
status: str
@@ -42,6 +83,11 @@ class ArchiveResultSchema(Schema):
@staticmethod
def resolve_created_by_id(obj):
return str(obj.created_by_id)
+
+ @staticmethod
+ def resolve_created_by_username(obj):
+ User = get_user_model()
+ return User.objects.get(id=obj.created_by_id).username
@staticmethod
def resolve_pk(obj):
@@ -59,6 +105,10 @@ class ArchiveResultSchema(Schema):
def resolve_created(obj):
return obj.start_ts
+ @staticmethod
+ def resolve_snapshot_timestamp(obj):
+ return obj.snapshot.timestamp
+
@staticmethod
def resolve_snapshot_url(obj):
return obj.snapshot.url
@@ -73,11 +123,10 @@ class ArchiveResultSchema(Schema):
class ArchiveResultFilterSchema(FilterSchema):
- uuid: Optional[UUID] = Field(None, q='uuid')
- # abid: Optional[str] = Field(None, q='abid')
+ id: Optional[str] = Field(None, q=['id__startswith', 'abid__icontains', 'old_id__startswith', 'snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith'])
- search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains'])
- snapshot_uuid: Optional[UUID] = Field(None, q='snapshot_uuid__icontains')
+ search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains', 'id__startswith', 'abid__icontains', 'old_id__startswith', 'snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith'])
+ snapshot_id: Optional[str] = Field(None, q=['snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith'])
snapshot_url: Optional[str] = Field(None, q='snapshot__url__icontains')
snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name__icontains')
@@ -93,19 +142,19 @@ class ArchiveResultFilterSchema(FilterSchema):
created__lt: Optional[datetime] = Field(None, q='updated__lt')
-@router.get("/archiveresults", response=List[ArchiveResultSchema])
-@paginate
-def list_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)):
+@router.get("/archiveresults", response=List[ArchiveResultSchema], url_name="get_archiveresult")
+@paginate(CustomPagination)
+def get_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)):
"""List all ArchiveResult entries matching these filters."""
qs = ArchiveResult.objects.all()
- results = filters.filter(qs)
+ results = filters.filter(qs).distinct()
return results
-@router.get("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema)
+@router.get("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema, url_name="get_archiveresult")
def get_archiveresult(request, archiveresult_id: str):
- """Get a specific ArchiveResult by abid, uuid, or pk."""
- return ArchiveResult.objects.get(Q(pk__icontains=archiveresult_id) | Q(abid__icontains=archiveresult_id) | Q(uuid__icontains=archiveresult_id))
+ """Get a specific ArchiveResult by pk, abid, or old_id."""
+ return ArchiveResult.objects.get(Q(id__icontains=archiveresult_id) | Q(abid__icontains=archiveresult_id) | Q(old_id__icontains=archiveresult_id))
# @router.post("/archiveresult", response=ArchiveResultSchema)
@@ -137,12 +186,16 @@ def get_archiveresult(request, archiveresult_id: str):
class SnapshotSchema(Schema):
+ TYPE: str = 'core.models.Snapshot'
+
+ id: UUID
+ old_id: UUID
abid: str
- uuid: UUID
- pk: str
+
modified: datetime
created: datetime
created_by_id: str
+ created_by_username: str
url: str
tags: str
@@ -160,6 +213,11 @@ class SnapshotSchema(Schema):
@staticmethod
def resolve_created_by_id(obj):
return str(obj.created_by_id)
+
+ @staticmethod
+ def resolve_created_by_username(obj):
+ User = get_user_model()
+ return User.objects.get(id=obj.created_by_id).username
@staticmethod
def resolve_pk(obj):
@@ -189,10 +247,14 @@ class SnapshotSchema(Schema):
class SnapshotFilterSchema(FilterSchema):
+ id: Optional[str] = Field(None, q=['id__icontains', 'abid__icontains', 'old_id__icontains', 'timestamp__startswith'])
+
+ old_id: Optional[str] = Field(None, q='old_id__icontains')
abid: Optional[str] = Field(None, q='abid__icontains')
- uuid: Optional[str] = Field(None, q='uuid__icontains')
- pk: Optional[str] = Field(None, q='pk__icontains')
- created_by_id: str = Field(None, q='created_by_id__icontains')
+
+ created_by_id: str = Field(None, q='created_by_id')
+ created_by_username: str = Field(None, q='created_by__username__icontains')
+
created__gte: datetime = Field(None, q='created__gte')
created__lt: datetime = Field(None, q='created__lt')
created: datetime = Field(None, q='created')
@@ -200,7 +262,7 @@ class SnapshotFilterSchema(FilterSchema):
modified__gte: datetime = Field(None, q='modified__gte')
modified__lt: datetime = Field(None, q='modified__lt')
- search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains', 'abid__icontains', 'uuid__icontains'])
+ search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains', 'id__icontains', 'abid__icontains', 'old_id__icontains', 'timestamp__startswith'])
url: Optional[str] = Field(None, q='url')
tag: Optional[str] = Field(None, q='tags__name')
title: Optional[str] = Field(None, q='title__icontains')
@@ -211,35 +273,33 @@ class SnapshotFilterSchema(FilterSchema):
-@router.get("/snapshots", response=List[SnapshotSchema])
-@paginate
-def list_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool=True):
+@router.get("/snapshots", response=List[SnapshotSchema], url_name="get_snapshots")
+@paginate(CustomPagination)
+def get_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool=False):
"""List all Snapshot entries matching these filters."""
request.with_archiveresults = with_archiveresults
qs = Snapshot.objects.all()
- results = filters.filter(qs)
+ results = filters.filter(qs).distinct()
return results
-@router.get("/snapshot/{snapshot_id}", response=SnapshotSchema)
+@router.get("/snapshot/{snapshot_id}", response=SnapshotSchema, url_name="get_snapshot")
def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
"""Get a specific Snapshot by abid, uuid, or pk."""
request.with_archiveresults = with_archiveresults
snapshot = None
try:
- snapshot = Snapshot.objects.get(Q(uuid__startswith=snapshot_id) | Q(abid__startswith=snapshot_id)| Q(pk__startswith=snapshot_id))
+ snapshot = Snapshot.objects.get(Q(abid__startswith=snapshot_id) | Q(id__startswith=snapshot_id) | Q(old_id__startswith=snapshot_id) | Q(timestamp__startswith=snapshot_id))
except Snapshot.DoesNotExist:
pass
try:
- snapshot = snapshot or Snapshot.objects.get()
+ snapshot = snapshot or Snapshot.objects.get(Q(abid__icontains=snapshot_id) | Q(id__icontains=snapshot_id) | Q(old_id__icontains=snapshot_id))
except Snapshot.DoesNotExist:
pass
- try:
- snapshot = snapshot or Snapshot.objects.get(Q(uuid__icontains=snapshot_id) | Q(abid__icontains=snapshot_id))
- except Snapshot.DoesNotExist:
- pass
+ if not snapshot:
+ raise Snapshot.DoesNotExist
return snapshot
@@ -271,21 +331,94 @@ def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
class TagSchema(Schema):
- abid: Optional[UUID] = Field(None, q='abid')
- uuid: Optional[UUID] = Field(None, q='uuid')
- pk: Optional[UUID] = Field(None, q='pk')
+ TYPE: str = 'core.models.Tag'
+
+ id: UUID
+ old_id: str
+ abid: str
+
modified: datetime
created: datetime
created_by_id: str
+ created_by_username: str
name: str
slug: str
+ num_snapshots: int
+ snapshots: List[SnapshotSchema]
+ @staticmethod
+ def resolve_old_id(obj):
+ return str(obj.old_id)
@staticmethod
def resolve_created_by_id(obj):
return str(obj.created_by_id)
+
+ @staticmethod
+ def resolve_created_by_username(obj):
+ User = get_user_model()
+ return User.objects.get(id=obj.created_by_id).username
+
+ @staticmethod
+ def resolve_num_snapshots(obj, context):
+ return obj.snapshot_set.all().distinct().count()
-@router.get("/tags", response=List[TagSchema])
-def list_tags(request):
- return Tag.objects.all()
+ @staticmethod
+ def resolve_snapshots(obj, context):
+ if context['request'].with_snapshots:
+ return obj.snapshot_set.all().distinct()
+ return Snapshot.objects.none()
+
+@router.get("/tags", response=List[TagSchema], url_name="get_tags")
+@paginate(CustomPagination)
+def get_tags(request):
+ request.with_snapshots = False
+ request.with_archiveresults = False
+ return Tag.objects.all().distinct()
+
+@router.get("/tag/{tag_id}", response=TagSchema, url_name="get_tag")
+def get_tag(request, tag_id: str, with_snapshots: bool=True):
+ request.with_snapshots = with_snapshots
+ request.with_archiveresults = False
+ tag = None
+ try:
+ tag = tag or Tag.objects.get(old_id__icontains=tag_id)
+ except (Tag.DoesNotExist, ValidationError, ValueError):
+ pass
+
+ try:
+ tag = Tag.objects.get(abid__icontains=tag_id)
+ except (Tag.DoesNotExist, ValidationError):
+ pass
+
+ try:
+ tag = tag or Tag.objects.get(id__icontains=tag_id)
+ except (Tag.DoesNotExist, ValidationError):
+ pass
+ return tag
+
+
+
+@router.get("/any/{abid}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema], url_name="get_any")
+def get_any(request, abid: str):
+ request.with_snapshots = False
+ request.with_archiveresults = False
+
+ response = None
+ try:
+ response = response or get_snapshot(request, abid)
+ except Exception:
+ pass
+
+ try:
+ response = response or get_archiveresult(request, abid)
+ except Exception:
+ pass
+
+ try:
+ response = response or get_tag(request, abid)
+ except Exception:
+ pass
+
+ return response
diff --git a/archivebox/config.py b/archivebox/config.py
index 0151c3c2..afa334c6 100644
--- a/archivebox/config.py
+++ b/archivebox/config.py
@@ -1036,6 +1036,11 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
'enabled': True,
'is_valid': config['SOURCES_DIR'].exists(),
},
+ 'PERSONAS_DIR': {
+ 'path': config['PERSONAS_DIR'].resolve(),
+ 'enabled': True,
+ 'is_valid': config['PERSONAS_DIR'].exists(),
+ },
'LOGS_DIR': {
'path': config['LOGS_DIR'].resolve(),
'enabled': True,
@@ -1051,11 +1056,6 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
'enabled': bool(config['CUSTOM_TEMPLATES_DIR']),
'is_valid': config['CUSTOM_TEMPLATES_DIR'] and Path(config['CUSTOM_TEMPLATES_DIR']).exists(),
},
- 'PERSONAS_DIR': {
- 'path': config['PERSONAS_DIR'].resolve(),
- 'enabled': True,
- 'is_valid': config['PERSONAS_DIR'].exists(),
- },
# managed by bin/docker_entrypoint.sh and python-crontab:
# 'CRONTABS_DIR': {
# 'path': config['CRONTABS_DIR'].resolve(),
diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py
index 4bcbc222..78b6bdf8 100644
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -1,17 +1,19 @@
__package__ = 'archivebox.core'
+import json
from io import StringIO
from pathlib import Path
from contextlib import redirect_stdout
from datetime import datetime, timezone
from django.contrib import admin
-from django.db.models import Count
-from django.urls import path
+from django.db.models import Count, Q
+from django.urls import path, reverse
from django.utils.html import format_html
from django.utils.safestring import mark_safe
from django.shortcuts import render, redirect
from django.contrib.auth import get_user_model
+from django.core.exceptions import ValidationError
from django import forms
@@ -20,7 +22,7 @@ from signal_webhooks.admin import WebhookAdmin, get_webhook_model
from ..util import htmldecode, urldecode, ansi_to_html
-from core.models import Snapshot, ArchiveResult, Tag
+from core.models import Snapshot, ArchiveResult, Tag, SnapshotTag
from core.forms import AddLinkForm
from core.mixins import SearchResultsAdminMixin
@@ -124,31 +126,55 @@ archivebox_admin.get_urls = get_urls(archivebox_admin.get_urls).__get__(archiveb
class ArchiveResultInline(admin.TabularInline):
+ name = 'Archive Results Log'
model = ArchiveResult
+ # fk_name = 'snapshot'
+ extra = 1
+ readonly_fields = ('result_id', 'start_ts', 'end_ts', 'extractor', 'command', 'cmd_version')
+ fields = ('id', *readonly_fields, 'status', 'output')
+ show_change_link = True
+ # # classes = ['collapse']
+ # # list_display_links = ['abid']
+
+ def result_id(self, obj):
+ return format_html('[{}]
', reverse('admin:core_archiveresult_change', args=(obj.id,)), obj.abid)
+
+ def command(self, obj):
+ return format_html('{}
', " ".join(obj.cmd or []))
+
class TagInline(admin.TabularInline):
- model = Snapshot.tags.through
+ model = Tag.snapshot_set.through
+ # fk_name = 'snapshot'
+ fields = ('id', 'tag')
+ extra = 1
+ # min_num = 1
+ max_num = 1000
+ autocomplete_fields = (
+ 'tag',
+ )
from django.contrib.admin.helpers import ActionForm
-from django.contrib.admin.widgets import AutocompleteSelectMultiple
+from django.contrib.admin.widgets import FilteredSelectMultiple
-class AutocompleteTags:
- model = Tag
- search_fields = ['name']
- name = 'tags'
- remote_field = TagInline
+# class AutocompleteTags:
+# model = Tag
+# search_fields = ['name']
+# name = 'name'
+# # source_field = 'name'
+# remote_field = Tag._meta.get_field('name')
-class AutocompleteTagsAdminStub:
- name = 'admin'
+# class AutocompleteTagsAdminStub:
+# name = 'admin'
class SnapshotActionForm(ActionForm):
tags = forms.ModelMultipleChoiceField(
queryset=Tag.objects.all(),
required=False,
- widget=AutocompleteSelectMultiple(
- AutocompleteTags(),
- AutocompleteTagsAdminStub(),
+ widget=FilteredSelectMultiple(
+ 'core_tag__name',
+ False,
),
)
@@ -168,48 +194,92 @@ def get_abid_info(self, obj):
return format_html(
# URL Hash: {}
'''
- ABID: {}
- TS: {}
({})
- URI: {}
({})
- SUBTYPE: {}
({})
- RAND: {}
({})
- ABID AS UUID: {}
-
- .uuid: {}
- .id: {}
- .pk: {}
+ {} 📖 API DOCS
+
+
+ TS: {}
({})
+ URI: {}
( )
+ SUBTYPE: {}
({})
+ RAND: {}
({})
+ SALT: {}
+
+ .abid: {}
+ .abid.uuid: {}
+ .id: {}
+ .old_id: {}
+
''',
- obj.abid,
+ obj.api_url, obj.api_url, obj.api_docs_url,
obj.ABID.ts, obj.abid_values['ts'].isoformat() if isinstance(obj.abid_values['ts'], datetime) else obj.abid_values['ts'],
obj.ABID.uri, str(obj.abid_values['uri']),
obj.ABID.subtype, str(obj.abid_values['subtype']),
obj.ABID.rand, str(obj.abid_values['rand'])[-7:],
- obj.ABID.uuid,
- obj.uuid,
+ obj.ABID.uri_salt,
+ str(obj.abid),
+ str(obj.ABID.uuid),
obj.id,
- obj.pk,
+ getattr(obj, 'old_id', ''),
)
@admin.register(Snapshot, site=archivebox_admin)
class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
+ class Meta:
+ model = Snapshot
+
list_display = ('added', 'title_str', 'files', 'size', 'url_str')
+ # list_editable = ('title',)
sort_fields = ('title_str', 'url_str', 'added', 'files')
- readonly_fields = ('admin_actions', 'status_info', 'bookmarked', 'added', 'updated', 'created', 'modified', 'identifiers')
- search_fields = ('id', 'url', 'abid', 'uuid', 'timestamp', 'title', 'tags__name')
- fields = ('url', 'timestamp', 'created_by', 'tags', 'title', *readonly_fields)
- list_filter = ('added', 'updated', 'tags', 'archiveresult__status', 'created_by')
+ readonly_fields = ('tags', 'timestamp', 'admin_actions', 'status_info', 'bookmarked', 'added', 'updated', 'created', 'modified', 'API', 'link_dir')
+ search_fields = ('id', 'url', 'abid', 'old_id', 'timestamp', 'title', 'tags__name')
+ list_filter = ('added', 'updated', 'archiveresult__status', 'created_by', 'tags')
+ fields = ('url', 'created_by', 'title', *readonly_fields)
ordering = ['-added']
actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
autocomplete_fields = ['tags']
- inlines = [ArchiveResultInline]
+ inlines = [TagInline, ArchiveResultInline]
list_per_page = SNAPSHOTS_PER_PAGE
action_form = SnapshotActionForm
+ save_on_top = True
+
def changelist_view(self, request, extra_context=None):
extra_context = extra_context or {}
- return super().changelist_view(request, extra_context | GLOBAL_CONTEXT)
+ try:
+ return super().changelist_view(request, extra_context | GLOBAL_CONTEXT)
+ except Exception as e:
+ self.message_user(request, f'Error occurred while loading the page: {str(e)} {request.GET} {request.POST}')
+ return super().changelist_view(request, GLOBAL_CONTEXT)
+
+ def change_view(self, request, object_id, form_url="", extra_context=None):
+ snapshot = None
+
+ try:
+ snapshot = snapshot or Snapshot.objects.get(id=object_id)
+ except (Snapshot.DoesNotExist, Snapshot.MultipleObjectsReturned, ValidationError):
+ pass
+
+ try:
+ snapshot = snapshot or Snapshot.objects.get(abid=Snapshot.abid_prefix + object_id.split('_', 1)[-1])
+ except (Snapshot.DoesNotExist, ValidationError):
+ pass
+
+
+ try:
+ snapshot = snapshot or Snapshot.objects.get(old_id=object_id)
+ except (Snapshot.DoesNotExist, Snapshot.MultipleObjectsReturned, ValidationError):
+ pass
+
+ if snapshot:
+ object_id = str(snapshot.id)
+
+ return super().change_view(
+ request,
+ object_id,
+ form_url,
+ extra_context=extra_context,
+ )
def get_urls(self):
urls = super().get_urls()
@@ -220,7 +290,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
def get_queryset(self, request):
self.request = request
- return super().get_queryset(request).prefetch_related('tags').annotate(archiveresult_count=Count('archiveresult'))
+ return super().get_queryset(request).prefetch_related('tags', 'archiveresult_set').annotate(archiveresult_count=Count('archiveresult'))
def tag_list(self, obj):
return ', '.join(obj.tags.values_list('name', flat=True))
@@ -281,8 +351,11 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
obj.extension or '-',
)
- def identifiers(self, obj):
- return get_abid_info(self, obj)
+ def API(self, obj):
+ try:
+ return get_abid_info(self, obj)
+ except Exception as e:
+ return str(e)
@admin.display(
description='Title',
@@ -442,20 +515,34 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
+# @admin.register(SnapshotTag, site=archivebox_admin)
+# class SnapshotTagAdmin(admin.ModelAdmin):
+# list_display = ('id', 'snapshot', 'tag')
+# sort_fields = ('id', 'snapshot', 'tag')
+# search_fields = ('id', 'snapshot_id', 'tag_id')
+# fields = ('snapshot', 'id')
+# actions = ['delete_selected']
+# ordering = ['-id']
+
+# def API(self, obj):
+# return get_abid_info(self, obj)
@admin.register(Tag, site=archivebox_admin)
class TagAdmin(admin.ModelAdmin):
- list_display = ('slug', 'name', 'num_snapshots', 'snapshots', 'abid')
- sort_fields = ('id', 'name', 'slug', 'abid')
- readonly_fields = ('created', 'modified', 'identifiers', 'num_snapshots', 'snapshots')
- search_fields = ('id', 'abid', 'uuid', 'name', 'slug')
- fields = ('name', 'slug', 'created_by', *readonly_fields, )
+ list_display = ('abid', 'name', 'created', 'created_by', 'num_snapshots', 'snapshots')
+ sort_fields = ('name', 'slug', 'abid', 'created_by', 'created')
+ readonly_fields = ('slug', 'abid', 'created', 'modified', 'API', 'num_snapshots', 'snapshots')
+ search_fields = ('abid', 'name', 'slug')
+ fields = ('name', 'created_by', *readonly_fields)
actions = ['delete_selected']
- ordering = ['-id']
+ ordering = ['-created']
- def identifiers(self, obj):
- return get_abid_info(self, obj)
+ def API(self, obj):
+ try:
+ return get_abid_info(self, obj)
+ except Exception as e:
+ return str(e)
def num_snapshots(self, tag):
return format_html(
@@ -468,11 +555,10 @@ class TagAdmin(admin.ModelAdmin):
total_count = tag.snapshot_set.count()
return mark_safe('
'.join(
format_html(
- '{} [{}] {}
',
- snap.updated.strftime('%Y-%m-%d %H:%M') if snap.updated else 'pending...',
+ '[{}]
{}',
snap.pk,
- snap.abid,
- snap.url,
+ snap.updated.strftime('%Y-%m-%d %H:%M') if snap.updated else 'pending...',
+ snap.url[:64],
)
for snap in tag.snapshot_set.order_by('-updated')[:10]
) + (f'
and {total_count-10} more...' if tag.snapshot_set.count() > 10 else ''))
@@ -482,9 +568,9 @@ class TagAdmin(admin.ModelAdmin):
class ArchiveResultAdmin(admin.ModelAdmin):
list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str')
sort_fields = ('start_ts', 'extractor', 'status')
- readonly_fields = ('snapshot_info', 'tags_str', 'created_by', 'created', 'modified', 'identifiers')
- search_fields = ('id', 'uuid', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
- fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'cmd', 'start_ts', 'end_ts', 'cmd_version', *readonly_fields)
+ readonly_fields = ('snapshot_info', 'tags_str', 'created', 'modified', 'API')
+ search_fields = ('id', 'old_id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
+ fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'cmd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', *readonly_fields)
autocomplete_fields = ['snapshot']
list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
@@ -503,8 +589,11 @@ class ArchiveResultAdmin(admin.ModelAdmin):
result.snapshot.url[:128],
)
- def identifiers(self, obj):
- return get_abid_info(self, obj)
+ def API(self, obj):
+ try:
+ return get_abid_info(self, obj)
+ except Exception as e:
+ return str(e)
@admin.display(
description='Snapshot Tags'
diff --git a/archivebox/core/migrations/0024_auto_20240513_1143.py b/archivebox/core/migrations/0024_auto_20240513_1143.py
index 31f1e773..f8cf645c 100644
--- a/archivebox/core/migrations/0024_auto_20240513_1143.py
+++ b/archivebox/core/migrations/0024_auto_20240513_1143.py
@@ -2,7 +2,7 @@
from django.db import migrations
from datetime import datetime
-from abid_utils.abid import abid_from_values
+from abid_utils.abid import abid_from_values, DEFAULT_ABID_URI_SALT
def calculate_abid(self):
@@ -41,18 +41,21 @@ def calculate_abid(self):
uri=uri,
subtype=subtype,
rand=rand,
+ salt=DEFAULT_ABID_URI_SALT,
)
assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for {self.__class__.__name__}'
return abid
def copy_snapshot_uuids(apps, schema_editor):
+ print(' Copying snapshot.id -> snapshot.uuid...')
Snapshot = apps.get_model("core", "Snapshot")
for snapshot in Snapshot.objects.all():
snapshot.uuid = snapshot.id
snapshot.save(update_fields=["uuid"])
def generate_snapshot_abids(apps, schema_editor):
+ print(' Generating snapshot.abid values...')
Snapshot = apps.get_model("core", "Snapshot")
for snapshot in Snapshot.objects.all():
snapshot.abid_prefix = 'snp_'
@@ -62,9 +65,11 @@ def generate_snapshot_abids(apps, schema_editor):
snapshot.abid_rand_src = 'self.uuid'
snapshot.abid = calculate_abid(snapshot)
- snapshot.save(update_fields=["abid"])
+ snapshot.uuid = snapshot.abid.uuid
+ snapshot.save(update_fields=["abid", "uuid"])
def generate_archiveresult_abids(apps, schema_editor):
+ print(' Generating ArchiveResult.abid values... (may take an hour or longer for large collections...)')
ArchiveResult = apps.get_model("core", "ArchiveResult")
Snapshot = apps.get_model("core", "Snapshot")
for result in ArchiveResult.objects.all():
diff --git a/archivebox/core/migrations/0027_update_snapshot_ids.py b/archivebox/core/migrations/0027_update_snapshot_ids.py
new file mode 100644
index 00000000..ad197c04
--- /dev/null
+++ b/archivebox/core/migrations/0027_update_snapshot_ids.py
@@ -0,0 +1,106 @@
+# Generated by Django 5.0.6 on 2024-08-18 02:48
+
+from django.db import migrations
+
+from django.db import migrations
+from datetime import datetime
+from abid_utils.abid import ABID, abid_from_values, DEFAULT_ABID_URI_SALT
+
+
+def calculate_abid(self):
+ """
+ Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
+ """
+ prefix = self.abid_prefix
+ ts = eval(self.abid_ts_src)
+ uri = eval(self.abid_uri_src)
+ subtype = eval(self.abid_subtype_src)
+ rand = eval(self.abid_rand_src)
+
+ if (not prefix) or prefix == 'obj_':
+ suggested_abid = self.__class__.__name__[:3].lower()
+ raise Exception(f'{self.__class__.__name__}.abid_prefix must be defined to calculate ABIDs (suggested: {suggested_abid})')
+
+ if not ts:
+ ts = datetime.utcfromtimestamp(0)
+ print(f'[!] WARNING: Generating ABID with ts=0000000000 placeholder because {self.__class__.__name__}.abid_ts_src={self.abid_ts_src} is unset!', ts.isoformat())
+
+ if not uri:
+ uri = str(self)
+ print(f'[!] WARNING: Generating ABID with uri=str(self) placeholder because {self.__class__.__name__}.abid_uri_src={self.abid_uri_src} is unset!', uri)
+
+ if not subtype:
+ subtype = self.__class__.__name__
+ print(f'[!] WARNING: Generating ABID with subtype={subtype} placeholder because {self.__class__.__name__}.abid_subtype_src={self.abid_subtype_src} is unset!', subtype)
+
+ if not rand:
+ rand = getattr(self, 'uuid', None) or getattr(self, 'id', None) or getattr(self, 'pk')
+ print(f'[!] WARNING: Generating ABID with rand=self.id placeholder because {self.__class__.__name__}.abid_rand_src={self.abid_rand_src} is unset!', rand)
+
+ abid = abid_from_values(
+ prefix=prefix,
+ ts=ts,
+ uri=uri,
+ subtype=subtype,
+ rand=rand,
+ salt=DEFAULT_ABID_URI_SALT,
+ )
+ assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for {self.__class__.__name__}'
+ return abid
+
+def update_snapshot_ids(apps, schema_editor):
+ Snapshot = apps.get_model("core", "Snapshot")
+ num_total = Snapshot.objects.all().count()
+ print(f' Updating {num_total} Snapshot.id, Snapshot.uuid values in place...')
+ for idx, snapshot in enumerate(Snapshot.objects.all().only('abid').iterator()):
+ assert snapshot.abid
+ snapshot.abid_prefix = 'snp_'
+ snapshot.abid_ts_src = 'self.added'
+ snapshot.abid_uri_src = 'self.url'
+ snapshot.abid_subtype_src = '"01"'
+ snapshot.abid_rand_src = 'self.uuid'
+
+ snapshot.abid = calculate_abid(snapshot)
+ snapshot.uuid = snapshot.abid.uuid
+ snapshot.save(update_fields=["abid", "uuid"])
+ assert str(ABID.parse(snapshot.abid).uuid) == str(snapshot.uuid)
+ if idx % 1000 == 0:
+ print(f'Migrated {idx}/{num_total} Snapshot objects...')
+
+def update_archiveresult_ids(apps, schema_editor):
+ Snapshot = apps.get_model("core", "Snapshot")
+ ArchiveResult = apps.get_model("core", "ArchiveResult")
+ num_total = ArchiveResult.objects.all().count()
+ print(f' Updating {num_total} ArchiveResult.id, ArchiveResult.uuid values in place... (may take an hour or longer for large collections...)')
+ for idx, result in enumerate(ArchiveResult.objects.all().only('abid', 'snapshot_id').iterator()):
+ assert result.abid
+ result.abid_prefix = 'res_'
+ result.snapshot = Snapshot.objects.get(pk=result.snapshot_id)
+ result.snapshot_added = result.snapshot.added
+ result.snapshot_url = result.snapshot.url
+ result.abid_ts_src = 'self.snapshot_added'
+ result.abid_uri_src = 'self.snapshot_url'
+ result.abid_subtype_src = 'self.extractor'
+ result.abid_rand_src = 'self.id'
+
+ result.abid = calculate_abid(result)
+ result.uuid = result.abid.uuid
+ result.uuid = ABID.parse(result.abid).uuid
+ result.save(update_fields=["abid", "uuid"])
+ assert str(ABID.parse(result.abid).uuid) == str(result.uuid)
+ if idx % 5000 == 0:
+ print(f'Migrated {idx}/{num_total} ArchiveResult objects...')
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0026_archiveresult_created_archiveresult_created_by_and_more'),
+ ]
+
+ operations = [
+ migrations.RunPython(update_snapshot_ids, reverse_code=migrations.RunPython.noop),
+ migrations.RunPython(update_archiveresult_ids, reverse_code=migrations.RunPython.noop),
+ ]
+
+
diff --git a/archivebox/core/migrations/0028_alter_archiveresult_uuid.py b/archivebox/core/migrations/0028_alter_archiveresult_uuid.py
new file mode 100644
index 00000000..9b10f044
--- /dev/null
+++ b/archivebox/core/migrations/0028_alter_archiveresult_uuid.py
@@ -0,0 +1,19 @@
+# Generated by Django 5.0.6 on 2024-08-18 04:28
+
+import uuid
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0027_update_snapshot_ids'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='archiveresult',
+ name='uuid',
+ field=models.UUIDField(default=uuid.uuid4),
+ ),
+ ]
diff --git a/archivebox/core/migrations/0029_alter_archiveresult_id.py b/archivebox/core/migrations/0029_alter_archiveresult_id.py
new file mode 100644
index 00000000..7464a670
--- /dev/null
+++ b/archivebox/core/migrations/0029_alter_archiveresult_id.py
@@ -0,0 +1,18 @@
+# Generated by Django 5.0.6 on 2024-08-18 04:28
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0028_alter_archiveresult_uuid'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='archiveresult',
+ name='id',
+ field=models.BigIntegerField(primary_key=True, serialize=False, verbose_name='ID'),
+ ),
+ ]
diff --git a/archivebox/core/migrations/0030_alter_archiveresult_uuid.py b/archivebox/core/migrations/0030_alter_archiveresult_uuid.py
new file mode 100644
index 00000000..3c1ad788
--- /dev/null
+++ b/archivebox/core/migrations/0030_alter_archiveresult_uuid.py
@@ -0,0 +1,18 @@
+# Generated by Django 5.0.6 on 2024-08-18 05:00
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0029_alter_archiveresult_id'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='archiveresult',
+ name='uuid',
+ field=models.UUIDField(unique=True),
+ ),
+ ]
diff --git a/archivebox/core/migrations/0031_alter_archiveresult_id_alter_archiveresult_uuid_and_more.py b/archivebox/core/migrations/0031_alter_archiveresult_id_alter_archiveresult_uuid_and_more.py
new file mode 100644
index 00000000..64fd6cbe
--- /dev/null
+++ b/archivebox/core/migrations/0031_alter_archiveresult_id_alter_archiveresult_uuid_and_more.py
@@ -0,0 +1,34 @@
+# Generated by Django 5.0.6 on 2024-08-18 05:09
+
+import uuid
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0030_alter_archiveresult_uuid'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='archiveresult',
+ name='id',
+ field=models.IntegerField(default=uuid.uuid4, primary_key=True, serialize=False, verbose_name='ID'),
+ ),
+ migrations.AlterField(
+ model_name='archiveresult',
+ name='uuid',
+ field=models.UUIDField(default=uuid.uuid4, unique=True),
+ ),
+ migrations.AlterField(
+ model_name='snapshot',
+ name='uuid',
+ field=models.UUIDField(default=uuid.uuid4, unique=True),
+ ),
+ migrations.AlterField(
+ model_name='tag',
+ name='uuid',
+ field=models.UUIDField(default=uuid.uuid4, null=True, unique=True),
+ ),
+ ]
diff --git a/archivebox/core/migrations/0032_alter_archiveresult_id.py b/archivebox/core/migrations/0032_alter_archiveresult_id.py
new file mode 100644
index 00000000..98299a31
--- /dev/null
+++ b/archivebox/core/migrations/0032_alter_archiveresult_id.py
@@ -0,0 +1,19 @@
+# Generated by Django 5.0.6 on 2024-08-18 05:20
+
+import core.models
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0031_alter_archiveresult_id_alter_archiveresult_uuid_and_more'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='archiveresult',
+ name='id',
+ field=models.BigIntegerField(default=core.models.rand_int_id, primary_key=True, serialize=False, verbose_name='ID'),
+ ),
+ ]
diff --git a/archivebox/core/migrations/0033_rename_id_archiveresult_old_id.py b/archivebox/core/migrations/0033_rename_id_archiveresult_old_id.py
new file mode 100644
index 00000000..ebced58e
--- /dev/null
+++ b/archivebox/core/migrations/0033_rename_id_archiveresult_old_id.py
@@ -0,0 +1,18 @@
+# Generated by Django 5.0.6 on 2024-08-18 05:34
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0032_alter_archiveresult_id'),
+ ]
+
+ operations = [
+ migrations.RenameField(
+ model_name='archiveresult',
+ old_name='id',
+ new_name='old_id',
+ ),
+ ]
diff --git a/archivebox/core/migrations/0034_alter_archiveresult_old_id_alter_archiveresult_uuid.py b/archivebox/core/migrations/0034_alter_archiveresult_old_id_alter_archiveresult_uuid.py
new file mode 100644
index 00000000..121a2154
--- /dev/null
+++ b/archivebox/core/migrations/0034_alter_archiveresult_old_id_alter_archiveresult_uuid.py
@@ -0,0 +1,41 @@
+# Generated by Django 5.0.6 on 2024-08-18 05:37
+
+import core.models
+import uuid
+from django.db import migrations, models
+
+from abid_utils.abid import ABID
+
+
+def update_archiveresult_ids(apps, schema_editor):
+ ArchiveResult = apps.get_model("core", "ArchiveResult")
+ num_total = ArchiveResult.objects.all().count()
+ print(f' Updating {num_total} ArchiveResult.id, ArchiveResult.uuid values in place... (may take an hour or longer for large collections...)')
+ for idx, result in enumerate(ArchiveResult.objects.all().only('abid').iterator()):
+ assert result.abid
+ result.uuid = ABID.parse(result.abid).uuid
+ result.save(update_fields=["uuid"])
+ assert str(ABID.parse(result.abid).uuid) == str(result.uuid)
+ if idx % 2500 == 0:
+ print(f'Migrated {idx}/{num_total} ArchiveResult objects...')
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0033_rename_id_archiveresult_old_id'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='archiveresult',
+ name='old_id',
+ field=models.BigIntegerField(default=core.models.rand_int_id, serialize=False, verbose_name='ID'),
+ ),
+ migrations.RunPython(update_archiveresult_ids, reverse_code=migrations.RunPython.noop),
+ migrations.AlterField(
+ model_name='archiveresult',
+ name='uuid',
+ field=models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False, unique=True),
+ ),
+ ]
diff --git a/archivebox/core/migrations/0035_remove_archiveresult_uuid_archiveresult_id.py b/archivebox/core/migrations/0035_remove_archiveresult_uuid_archiveresult_id.py
new file mode 100644
index 00000000..26287e3c
--- /dev/null
+++ b/archivebox/core/migrations/0035_remove_archiveresult_uuid_archiveresult_id.py
@@ -0,0 +1,19 @@
+# Generated by Django 5.0.6 on 2024-08-18 05:49
+
+import uuid
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0034_alter_archiveresult_old_id_alter_archiveresult_uuid'),
+ ]
+
+ operations = [
+ migrations.RenameField(
+ model_name='archiveresult',
+ old_name='uuid',
+ new_name='id',
+ ),
+ ]
diff --git a/archivebox/core/migrations/0036_alter_archiveresult_id_alter_archiveresult_old_id.py b/archivebox/core/migrations/0036_alter_archiveresult_id_alter_archiveresult_old_id.py
new file mode 100644
index 00000000..10b4f9c6
--- /dev/null
+++ b/archivebox/core/migrations/0036_alter_archiveresult_id_alter_archiveresult_old_id.py
@@ -0,0 +1,25 @@
+# Generated by Django 5.0.6 on 2024-08-18 05:59
+
+import core.models
+import uuid
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0035_remove_archiveresult_uuid_archiveresult_id'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='archiveresult',
+ name='id',
+ field=models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False, unique=True, verbose_name='ID'),
+ ),
+ migrations.AlterField(
+ model_name='archiveresult',
+ name='old_id',
+ field=models.BigIntegerField(default=core.models.rand_int_id, serialize=False, verbose_name='Old ID'),
+ ),
+ ]
diff --git a/archivebox/core/migrations/0037_rename_id_snapshot_old_id.py b/archivebox/core/migrations/0037_rename_id_snapshot_old_id.py
new file mode 100644
index 00000000..7d901d96
--- /dev/null
+++ b/archivebox/core/migrations/0037_rename_id_snapshot_old_id.py
@@ -0,0 +1,18 @@
+# Generated by Django 5.0.6 on 2024-08-18 06:08
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0036_alter_archiveresult_id_alter_archiveresult_old_id'),
+ ]
+
+ operations = [
+ migrations.RenameField(
+ model_name='snapshot',
+ old_name='id',
+ new_name='old_id',
+ ),
+ ]
diff --git a/archivebox/core/migrations/0038_rename_uuid_snapshot_id.py b/archivebox/core/migrations/0038_rename_uuid_snapshot_id.py
new file mode 100644
index 00000000..d22a8fc4
--- /dev/null
+++ b/archivebox/core/migrations/0038_rename_uuid_snapshot_id.py
@@ -0,0 +1,18 @@
+# Generated by Django 5.0.6 on 2024-08-18 06:09
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0037_rename_id_snapshot_old_id'),
+ ]
+
+ operations = [
+ migrations.RenameField(
+ model_name='snapshot',
+ old_name='uuid',
+ new_name='id',
+ ),
+ ]
diff --git a/archivebox/core/migrations/0039_rename_snapshot_archiveresult_snapshot_old.py b/archivebox/core/migrations/0039_rename_snapshot_archiveresult_snapshot_old.py
new file mode 100644
index 00000000..7c2a4e29
--- /dev/null
+++ b/archivebox/core/migrations/0039_rename_snapshot_archiveresult_snapshot_old.py
@@ -0,0 +1,18 @@
+# Generated by Django 5.0.6 on 2024-08-18 06:25
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0038_rename_uuid_snapshot_id'),
+ ]
+
+ operations = [
+ migrations.RenameField(
+ model_name='archiveresult',
+ old_name='snapshot',
+ new_name='snapshot_old',
+ ),
+ ]
diff --git a/archivebox/core/migrations/0040_archiveresult_snapshot.py b/archivebox/core/migrations/0040_archiveresult_snapshot.py
new file mode 100644
index 00000000..8c09d079
--- /dev/null
+++ b/archivebox/core/migrations/0040_archiveresult_snapshot.py
@@ -0,0 +1,34 @@
+# Generated by Django 5.0.6 on 2024-08-18 06:46
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+def update_archiveresult_snapshot_ids(apps, schema_editor):
+ ArchiveResult = apps.get_model("core", "ArchiveResult")
+ Snapshot = apps.get_model("core", "Snapshot")
+ num_total = ArchiveResult.objects.all().count()
+ print(f' Updating {num_total} ArchiveResult.snapshot_id values in place... (may take an hour or longer for large collections...)')
+ for idx, result in enumerate(ArchiveResult.objects.all().only('snapshot_old_id').iterator(chunk_size=5000)):
+ assert result.snapshot_old_id
+ snapshot = Snapshot.objects.only('id').get(old_id=result.snapshot_old_id)
+ result.snapshot_id = snapshot.id
+ result.save(update_fields=["snapshot_id"])
+ assert str(result.snapshot_id) == str(snapshot.id)
+ if idx % 5000 == 0:
+ print(f'Migrated {idx}/{num_total} ArchiveResult objects...')
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0039_rename_snapshot_archiveresult_snapshot_old'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='archiveresult',
+ name='snapshot',
+ field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='archiveresults', to='core.snapshot', to_field='id'),
+ ),
+ migrations.RunPython(update_archiveresult_snapshot_ids, reverse_code=migrations.RunPython.noop),
+ ]
diff --git a/archivebox/core/migrations/0041_alter_archiveresult_snapshot_and_more.py b/archivebox/core/migrations/0041_alter_archiveresult_snapshot_and_more.py
new file mode 100644
index 00000000..d4be8875
--- /dev/null
+++ b/archivebox/core/migrations/0041_alter_archiveresult_snapshot_and_more.py
@@ -0,0 +1,24 @@
+# Generated by Django 5.0.6 on 2024-08-18 06:50
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0040_archiveresult_snapshot'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='archiveresult',
+ name='snapshot',
+ field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.snapshot', to_field='id'),
+ ),
+ migrations.AlterField(
+ model_name='archiveresult',
+ name='snapshot_old',
+ field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='archiveresults_old', to='core.snapshot'),
+ ),
+ ]
diff --git a/archivebox/core/migrations/0042_remove_archiveresult_snapshot_old.py b/archivebox/core/migrations/0042_remove_archiveresult_snapshot_old.py
new file mode 100644
index 00000000..3fe9f316
--- /dev/null
+++ b/archivebox/core/migrations/0042_remove_archiveresult_snapshot_old.py
@@ -0,0 +1,17 @@
+# Generated by Django 5.0.6 on 2024-08-18 06:51
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0041_alter_archiveresult_snapshot_and_more'),
+ ]
+
+ operations = [
+ migrations.RemoveField(
+ model_name='archiveresult',
+ name='snapshot_old',
+ ),
+ ]
diff --git a/archivebox/core/migrations/0043_alter_archiveresult_snapshot_alter_snapshot_id_and_more.py b/archivebox/core/migrations/0043_alter_archiveresult_snapshot_alter_snapshot_id_and_more.py
new file mode 100644
index 00000000..c0acddb0
--- /dev/null
+++ b/archivebox/core/migrations/0043_alter_archiveresult_snapshot_alter_snapshot_id_and_more.py
@@ -0,0 +1,20 @@
+# Generated by Django 5.0.6 on 2024-08-18 06:52
+
+import django.db.models.deletion
+import uuid
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0042_remove_archiveresult_snapshot_old'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='archiveresult',
+ name='snapshot',
+ field=models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot', to_field='id'),
+ ),
+ ]
diff --git a/archivebox/core/migrations/0044_alter_archiveresult_snapshot_alter_tag_uuid_and_more.py b/archivebox/core/migrations/0044_alter_archiveresult_snapshot_alter_tag_uuid_and_more.py
new file mode 100644
index 00000000..b7531233
--- /dev/null
+++ b/archivebox/core/migrations/0044_alter_archiveresult_snapshot_alter_tag_uuid_and_more.py
@@ -0,0 +1,40 @@
+# Generated by Django 5.0.6 on 2024-08-19 23:01
+
+import django.db.models.deletion
+import uuid
+from django.db import migrations, models
+
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0043_alter_archiveresult_snapshot_alter_snapshot_id_and_more'),
+ ]
+
+ operations = [
+ migrations.SeparateDatabaseAndState(
+ database_operations=[
+ # No-op, SnapshotTag model already exists in DB
+ ],
+ state_operations=[
+ migrations.CreateModel(
+ name='SnapshotTag',
+ fields=[
+ ('id', models.AutoField(primary_key=True, serialize=False)),
+ ('snapshot', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.snapshot')),
+ ('tag', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.tag')),
+ ],
+ options={
+ 'db_table': 'core_snapshot_tags',
+ 'unique_together': {('snapshot', 'tag')},
+ },
+ ),
+ migrations.AlterField(
+ model_name='snapshot',
+ name='tags',
+ field=models.ManyToManyField(blank=True, related_name='snapshot_set', through='core.SnapshotTag', to='core.tag'),
+ ),
+ ],
+ ),
+ ]
diff --git a/archivebox/core/migrations/0045_alter_snapshot_old_id.py b/archivebox/core/migrations/0045_alter_snapshot_old_id.py
new file mode 100644
index 00000000..7dc1a26a
--- /dev/null
+++ b/archivebox/core/migrations/0045_alter_snapshot_old_id.py
@@ -0,0 +1,19 @@
+# Generated by Django 5.0.6 on 2024-08-20 01:54
+
+import uuid
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0044_alter_archiveresult_snapshot_alter_tag_uuid_and_more'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='snapshot',
+ name='old_id',
+ field=models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False, unique=True),
+ ),
+ ]
diff --git a/archivebox/core/migrations/0046_alter_archiveresult_snapshot_alter_snapshot_id_and_more.py b/archivebox/core/migrations/0046_alter_archiveresult_snapshot_alter_snapshot_id_and_more.py
new file mode 100644
index 00000000..39216ec5
--- /dev/null
+++ b/archivebox/core/migrations/0046_alter_archiveresult_snapshot_alter_snapshot_id_and_more.py
@@ -0,0 +1,30 @@
+# Generated by Django 5.0.6 on 2024-08-20 01:55
+
+import django.db.models.deletion
+import uuid
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0045_alter_snapshot_old_id'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='archiveresult',
+ name='snapshot',
+ field=models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot', to_field='id'),
+ ),
+ migrations.AlterField(
+ model_name='snapshot',
+ name='id',
+ field=models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False, unique=True),
+ ),
+ migrations.AlterField(
+ model_name='snapshot',
+ name='old_id',
+ field=models.UUIDField(default=uuid.uuid4, editable=False, unique=True),
+ ),
+ ]
diff --git a/archivebox/core/migrations/0047_alter_snapshottag_unique_together_and_more.py b/archivebox/core/migrations/0047_alter_snapshottag_unique_together_and_more.py
new file mode 100644
index 00000000..b1c845f8
--- /dev/null
+++ b/archivebox/core/migrations/0047_alter_snapshottag_unique_together_and_more.py
@@ -0,0 +1,24 @@
+# Generated by Django 5.0.6 on 2024-08-20 02:16
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0046_alter_archiveresult_snapshot_alter_snapshot_id_and_more'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='archiveresult',
+ name='snapshot',
+ field=models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot', to_field='id'),
+ ),
+ migrations.AlterField(
+ model_name='snapshottag',
+ name='tag',
+ field=models.ForeignKey(db_column='tag_id', on_delete=django.db.models.deletion.CASCADE, to='core.tag'),
+ ),
+ ]
diff --git a/archivebox/core/migrations/0048_alter_archiveresult_snapshot_and_more.py b/archivebox/core/migrations/0048_alter_archiveresult_snapshot_and_more.py
new file mode 100644
index 00000000..81bc8a06
--- /dev/null
+++ b/archivebox/core/migrations/0048_alter_archiveresult_snapshot_and_more.py
@@ -0,0 +1,24 @@
+# Generated by Django 5.0.6 on 2024-08-20 02:17
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0047_alter_snapshottag_unique_together_and_more'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='archiveresult',
+ name='snapshot',
+ field=models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot'),
+ ),
+ migrations.AlterField(
+ model_name='snapshottag',
+ name='snapshot',
+ field=models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot', to_field='old_id'),
+ ),
+ ]
diff --git a/archivebox/core/migrations/0049_rename_snapshot_snapshottag_snapshot_old_and_more.py b/archivebox/core/migrations/0049_rename_snapshot_snapshottag_snapshot_old_and_more.py
new file mode 100644
index 00000000..aa0c5b39
--- /dev/null
+++ b/archivebox/core/migrations/0049_rename_snapshot_snapshottag_snapshot_old_and_more.py
@@ -0,0 +1,22 @@
+# Generated by Django 5.0.6 on 2024-08-20 02:26
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0048_alter_archiveresult_snapshot_and_more'),
+ ]
+
+ operations = [
+ migrations.RenameField(
+ model_name='snapshottag',
+ old_name='snapshot',
+ new_name='snapshot_old',
+ ),
+ migrations.AlterUniqueTogether(
+ name='snapshottag',
+ unique_together={('snapshot_old', 'tag')},
+ ),
+ ]
diff --git a/archivebox/core/migrations/0050_alter_snapshottag_snapshot_old.py b/archivebox/core/migrations/0050_alter_snapshottag_snapshot_old.py
new file mode 100644
index 00000000..4bff827c
--- /dev/null
+++ b/archivebox/core/migrations/0050_alter_snapshottag_snapshot_old.py
@@ -0,0 +1,19 @@
+# Generated by Django 5.0.6 on 2024-08-20 02:30
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0049_rename_snapshot_snapshottag_snapshot_old_and_more'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='snapshottag',
+ name='snapshot_old',
+ field=models.ForeignKey(db_column='snapshot_old_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot', to_field='old_id'),
+ ),
+ ]
diff --git a/archivebox/core/migrations/0051_snapshottag_snapshot_alter_snapshottag_snapshot_old.py b/archivebox/core/migrations/0051_snapshottag_snapshot_alter_snapshottag_snapshot_old.py
new file mode 100644
index 00000000..ddb7afbb
--- /dev/null
+++ b/archivebox/core/migrations/0051_snapshottag_snapshot_alter_snapshottag_snapshot_old.py
@@ -0,0 +1,40 @@
+# Generated by Django 5.0.6 on 2024-08-20 02:31
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+def update_snapshottag_ids(apps, schema_editor):
+ Snapshot = apps.get_model("core", "Snapshot")
+ SnapshotTag = apps.get_model("core", "SnapshotTag")
+ num_total = SnapshotTag.objects.all().count()
+ print(f' Updating {num_total} SnapshotTag.snapshot_id values in place... (may take an hour or longer for large collections...)')
+ for idx, snapshottag in enumerate(SnapshotTag.objects.all().only('snapshot_old_id').iterator()):
+ assert snapshottag.snapshot_old_id
+ snapshot = Snapshot.objects.get(old_id=snapshottag.snapshot_old_id)
+ snapshottag.snapshot_id = snapshot.id
+ snapshottag.save(update_fields=["snapshot_id"])
+ assert str(snapshottag.snapshot_id) == str(snapshot.id)
+ if idx % 100 == 0:
+ print(f'Migrated {idx}/{num_total} SnapshotTag objects...')
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0050_alter_snapshottag_snapshot_old'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='snapshottag',
+ name='snapshot',
+ field=models.ForeignKey(blank=True, db_column='snapshot_id', null=True, on_delete=django.db.models.deletion.CASCADE, to='core.snapshot'),
+ ),
+ migrations.AlterField(
+ model_name='snapshottag',
+ name='snapshot_old',
+ field=models.ForeignKey(db_column='snapshot_old_id', on_delete=django.db.models.deletion.CASCADE, related_name='snapshottag_old_set', to='core.snapshot', to_field='old_id'),
+ ),
+ migrations.RunPython(update_snapshottag_ids, reverse_code=migrations.RunPython.noop),
+ ]
diff --git a/archivebox/core/migrations/0052_alter_snapshottag_unique_together_and_more.py b/archivebox/core/migrations/0052_alter_snapshottag_unique_together_and_more.py
new file mode 100644
index 00000000..e11000bc
--- /dev/null
+++ b/archivebox/core/migrations/0052_alter_snapshottag_unique_together_and_more.py
@@ -0,0 +1,27 @@
+# Generated by Django 5.0.6 on 2024-08-20 02:37
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0051_snapshottag_snapshot_alter_snapshottag_snapshot_old'),
+ ]
+
+ operations = [
+ migrations.AlterUniqueTogether(
+ name='snapshottag',
+ unique_together=set(),
+ ),
+ migrations.AlterField(
+ model_name='snapshottag',
+ name='snapshot',
+ field=models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot'),
+ ),
+ migrations.AlterUniqueTogether(
+ name='snapshottag',
+ unique_together={('snapshot', 'tag')},
+ ),
+ ]
diff --git a/archivebox/core/migrations/0053_remove_snapshottag_snapshot_old.py b/archivebox/core/migrations/0053_remove_snapshottag_snapshot_old.py
new file mode 100644
index 00000000..cf50fc2c
--- /dev/null
+++ b/archivebox/core/migrations/0053_remove_snapshottag_snapshot_old.py
@@ -0,0 +1,17 @@
+# Generated by Django 5.0.6 on 2024-08-20 02:38
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0052_alter_snapshottag_unique_together_and_more'),
+ ]
+
+ operations = [
+ migrations.RemoveField(
+ model_name='snapshottag',
+ name='snapshot_old',
+ ),
+ ]
diff --git a/archivebox/core/migrations/0054_alter_snapshot_timestamp.py b/archivebox/core/migrations/0054_alter_snapshot_timestamp.py
new file mode 100644
index 00000000..6febe7c3
--- /dev/null
+++ b/archivebox/core/migrations/0054_alter_snapshot_timestamp.py
@@ -0,0 +1,18 @@
+# Generated by Django 5.0.6 on 2024-08-20 02:40
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0053_remove_snapshottag_snapshot_old'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='snapshot',
+ name='timestamp',
+ field=models.CharField(db_index=True, editable=False, max_length=32, unique=True),
+ ),
+ ]
diff --git a/archivebox/core/migrations/0055_alter_tag_slug.py b/archivebox/core/migrations/0055_alter_tag_slug.py
new file mode 100644
index 00000000..741b1365
--- /dev/null
+++ b/archivebox/core/migrations/0055_alter_tag_slug.py
@@ -0,0 +1,18 @@
+# Generated by Django 5.0.6 on 2024-08-20 03:24
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0054_alter_snapshot_timestamp'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='tag',
+ name='slug',
+ field=models.SlugField(editable=False, max_length=100, unique=True),
+ ),
+ ]
diff --git a/archivebox/core/migrations/0056_remove_tag_uuid.py b/archivebox/core/migrations/0056_remove_tag_uuid.py
new file mode 100644
index 00000000..9c01507e
--- /dev/null
+++ b/archivebox/core/migrations/0056_remove_tag_uuid.py
@@ -0,0 +1,17 @@
+# Generated by Django 5.0.6 on 2024-08-20 03:25
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0055_alter_tag_slug'),
+ ]
+
+ operations = [
+ migrations.RemoveField(
+ model_name='tag',
+ name='uuid',
+ ),
+ ]
diff --git a/archivebox/core/migrations/0057_rename_id_tag_old_id.py b/archivebox/core/migrations/0057_rename_id_tag_old_id.py
new file mode 100644
index 00000000..ebe20b01
--- /dev/null
+++ b/archivebox/core/migrations/0057_rename_id_tag_old_id.py
@@ -0,0 +1,18 @@
+# Generated by Django 5.0.6 on 2024-08-20 03:29
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0056_remove_tag_uuid'),
+ ]
+
+ operations = [
+ migrations.RenameField(
+ model_name='tag',
+ old_name='id',
+ new_name='old_id',
+ ),
+ ]
diff --git a/archivebox/core/migrations/0058_alter_tag_old_id.py b/archivebox/core/migrations/0058_alter_tag_old_id.py
new file mode 100644
index 00000000..4cc291c0
--- /dev/null
+++ b/archivebox/core/migrations/0058_alter_tag_old_id.py
@@ -0,0 +1,19 @@
+# Generated by Django 5.0.6 on 2024-08-20 03:30
+
+import core.models
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0057_rename_id_tag_old_id'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='tag',
+ name='old_id',
+ field=models.BigIntegerField(default=core.models.rand_int_id, primary_key=True, serialize=False, verbose_name='Old ID'),
+ ),
+ ]
diff --git a/archivebox/core/migrations/0059_tag_id.py b/archivebox/core/migrations/0059_tag_id.py
new file mode 100644
index 00000000..004ac541
--- /dev/null
+++ b/archivebox/core/migrations/0059_tag_id.py
@@ -0,0 +1,81 @@
+# Generated by Django 5.0.6 on 2024-08-20 03:33
+
+from django.db import migrations, models
+from abid_utils.models import ABID, abid_from_values
+
+
+def calculate_abid(self):
+ """
+ Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
+ """
+ prefix = self.abid_prefix
+ ts = eval(self.abid_ts_src)
+ uri = eval(self.abid_uri_src)
+ subtype = eval(self.abid_subtype_src)
+ rand = eval(self.abid_rand_src)
+
+ if (not prefix) or prefix == 'obj_':
+ suggested_abid = self.__class__.__name__[:3].lower()
+ raise Exception(f'{self.__class__.__name__}.abid_prefix must be defined to calculate ABIDs (suggested: {suggested_abid})')
+
+ if not ts:
+ ts = datetime.utcfromtimestamp(0)
+ print(f'[!] WARNING: Generating ABID with ts=0000000000 placeholder because {self.__class__.__name__}.abid_ts_src={self.abid_ts_src} is unset!', ts.isoformat())
+
+ if not uri:
+ uri = str(self)
+ print(f'[!] WARNING: Generating ABID with uri=str(self) placeholder because {self.__class__.__name__}.abid_uri_src={self.abid_uri_src} is unset!', uri)
+
+ if not subtype:
+ subtype = self.__class__.__name__
+ print(f'[!] WARNING: Generating ABID with subtype={subtype} placeholder because {self.__class__.__name__}.abid_subtype_src={self.abid_subtype_src} is unset!', subtype)
+
+ if not rand:
+ rand = getattr(self, 'uuid', None) or getattr(self, 'id', None) or getattr(self, 'pk')
+ print(f'[!] WARNING: Generating ABID with rand=self.id placeholder because {self.__class__.__name__}.abid_rand_src={self.abid_rand_src} is unset!', rand)
+
+ abid = abid_from_values(
+ prefix=prefix,
+ ts=ts,
+ uri=uri,
+ subtype=subtype,
+ rand=rand,
+ )
+ assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for {self.__class__.__name__}'
+ return abid
+
+
+def update_archiveresult_ids(apps, schema_editor):
+ Tag = apps.get_model("core", "Tag")
+ num_total = Tag.objects.all().count()
+ print(f' Updating {num_total} Tag.id, ArchiveResult.uuid values in place...')
+ for idx, tag in enumerate(Tag.objects.all().iterator()):
+ assert tag.name
+ tag.abid_prefix = 'tag_'
+ tag.abid_ts_src = 'self.created'
+ tag.abid_uri_src = 'self.slug'
+ tag.abid_subtype_src = '"03"'
+ tag.abid_rand_src = 'self.old_id'
+ tag.abid = calculate_abid(tag)
+ tag.id = tag.abid.uuid
+ tag.save(update_fields=["abid", "id"])
+ assert str(ABID.parse(tag.abid).uuid) == str(tag.id)
+ if idx % 10 == 0:
+ print(f'Migrated {idx}/{num_total} Tag objects...')
+
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0058_alter_tag_old_id'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='tag',
+ name='id',
+ field=models.UUIDField(blank=True, null=True),
+ ),
+ migrations.RunPython(update_archiveresult_ids, reverse_code=migrations.RunPython.noop),
+ ]
diff --git a/archivebox/core/migrations/0060_alter_tag_id.py b/archivebox/core/migrations/0060_alter_tag_id.py
new file mode 100644
index 00000000..aeabefdc
--- /dev/null
+++ b/archivebox/core/migrations/0060_alter_tag_id.py
@@ -0,0 +1,19 @@
+# Generated by Django 5.0.6 on 2024-08-20 03:42
+
+import uuid
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0059_tag_id'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='tag',
+ name='id',
+ field=models.UUIDField(default=uuid.uuid4, editable=False, unique=True),
+ ),
+ ]
diff --git a/archivebox/core/migrations/0061_rename_tag_snapshottag_old_tag_and_more.py b/archivebox/core/migrations/0061_rename_tag_snapshottag_old_tag_and_more.py
new file mode 100644
index 00000000..e29c8081
--- /dev/null
+++ b/archivebox/core/migrations/0061_rename_tag_snapshottag_old_tag_and_more.py
@@ -0,0 +1,22 @@
+# Generated by Django 5.0.6 on 2024-08-20 03:43
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0060_alter_tag_id'),
+ ]
+
+ operations = [
+ migrations.RenameField(
+ model_name='snapshottag',
+ old_name='tag',
+ new_name='old_tag',
+ ),
+ migrations.AlterUniqueTogether(
+ name='snapshottag',
+ unique_together={('snapshot', 'old_tag')},
+ ),
+ ]
diff --git a/archivebox/core/migrations/0062_alter_snapshottag_old_tag.py b/archivebox/core/migrations/0062_alter_snapshottag_old_tag.py
new file mode 100644
index 00000000..561d739c
--- /dev/null
+++ b/archivebox/core/migrations/0062_alter_snapshottag_old_tag.py
@@ -0,0 +1,19 @@
+# Generated by Django 5.0.6 on 2024-08-20 03:44
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0061_rename_tag_snapshottag_old_tag_and_more'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='snapshottag',
+ name='old_tag',
+ field=models.ForeignKey(db_column='old_tag_id', on_delete=django.db.models.deletion.CASCADE, to='core.tag'),
+ ),
+ ]
diff --git a/archivebox/core/migrations/0063_snapshottag_tag_alter_snapshottag_old_tag.py b/archivebox/core/migrations/0063_snapshottag_tag_alter_snapshottag_old_tag.py
new file mode 100644
index 00000000..6c574669
--- /dev/null
+++ b/archivebox/core/migrations/0063_snapshottag_tag_alter_snapshottag_old_tag.py
@@ -0,0 +1,40 @@
+# Generated by Django 5.0.6 on 2024-08-20 03:45
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+def update_snapshottag_ids(apps, schema_editor):
+ Tag = apps.get_model("core", "Tag")
+ SnapshotTag = apps.get_model("core", "SnapshotTag")
+ num_total = SnapshotTag.objects.all().count()
+ print(f' Updating {num_total} SnapshotTag.tag_id values in place... (may take an hour or longer for large collections...)')
+ for idx, snapshottag in enumerate(SnapshotTag.objects.all().only('old_tag_id').iterator()):
+ assert snapshottag.old_tag_id
+ tag = Tag.objects.get(old_id=snapshottag.old_tag_id)
+ snapshottag.tag_id = tag.id
+ snapshottag.save(update_fields=["tag_id"])
+ assert str(snapshottag.tag_id) == str(tag.id)
+ if idx % 100 == 0:
+ print(f'Migrated {idx}/{num_total} SnapshotTag objects...')
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0062_alter_snapshottag_old_tag'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='snapshottag',
+ name='tag',
+ field=models.ForeignKey(blank=True, db_column='tag_id', null=True, on_delete=django.db.models.deletion.CASCADE, to='core.tag', to_field='id'),
+ ),
+ migrations.AlterField(
+ model_name='snapshottag',
+ name='old_tag',
+ field=models.ForeignKey(db_column='old_tag_id', on_delete=django.db.models.deletion.CASCADE, related_name='snapshottags_old', to='core.tag'),
+ ),
+ migrations.RunPython(update_snapshottag_ids, reverse_code=migrations.RunPython.noop),
+ ]
diff --git a/archivebox/core/migrations/0064_alter_snapshottag_unique_together_and_more.py b/archivebox/core/migrations/0064_alter_snapshottag_unique_together_and_more.py
new file mode 100644
index 00000000..911bf68b
--- /dev/null
+++ b/archivebox/core/migrations/0064_alter_snapshottag_unique_together_and_more.py
@@ -0,0 +1,27 @@
+# Generated by Django 5.0.6 on 2024-08-20 03:50
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0063_snapshottag_tag_alter_snapshottag_old_tag'),
+ ]
+
+ operations = [
+ migrations.AlterUniqueTogether(
+ name='snapshottag',
+ unique_together=set(),
+ ),
+ migrations.AlterField(
+ model_name='snapshottag',
+ name='tag',
+ field=models.ForeignKey(db_column='tag_id', on_delete=django.db.models.deletion.CASCADE, to='core.tag', to_field='id'),
+ ),
+ migrations.AlterUniqueTogether(
+ name='snapshottag',
+ unique_together={('snapshot', 'tag')},
+ ),
+ ]
diff --git a/archivebox/core/migrations/0065_remove_snapshottag_old_tag.py b/archivebox/core/migrations/0065_remove_snapshottag_old_tag.py
new file mode 100644
index 00000000..16b2eea0
--- /dev/null
+++ b/archivebox/core/migrations/0065_remove_snapshottag_old_tag.py
@@ -0,0 +1,17 @@
+# Generated by Django 5.0.6 on 2024-08-20 03:51
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0064_alter_snapshottag_unique_together_and_more'),
+ ]
+
+ operations = [
+ migrations.RemoveField(
+ model_name='snapshottag',
+ name='old_tag',
+ ),
+ ]
diff --git a/archivebox/core/migrations/0066_alter_snapshottag_tag_alter_tag_id_alter_tag_old_id.py b/archivebox/core/migrations/0066_alter_snapshottag_tag_alter_tag_id_alter_tag_old_id.py
new file mode 100644
index 00000000..e6022eab
--- /dev/null
+++ b/archivebox/core/migrations/0066_alter_snapshottag_tag_alter_tag_id_alter_tag_old_id.py
@@ -0,0 +1,31 @@
+# Generated by Django 5.0.6 on 2024-08-20 03:52
+
+import core.models
+import django.db.models.deletion
+import uuid
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0065_remove_snapshottag_old_tag'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='snapshottag',
+ name='tag',
+ field=models.ForeignKey(db_column='tag_id', on_delete=django.db.models.deletion.CASCADE, to='core.tag', to_field='id'),
+ ),
+ migrations.AlterField(
+ model_name='tag',
+ name='id',
+ field=models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False, unique=True),
+ ),
+ migrations.AlterField(
+ model_name='tag',
+ name='old_id',
+ field=models.BigIntegerField(default=core.models.rand_int_id, serialize=False, unique=True, verbose_name='Old ID'),
+ ),
+ ]
diff --git a/archivebox/core/migrations/0067_alter_snapshottag_tag.py b/archivebox/core/migrations/0067_alter_snapshottag_tag.py
new file mode 100644
index 00000000..b1c9f6a5
--- /dev/null
+++ b/archivebox/core/migrations/0067_alter_snapshottag_tag.py
@@ -0,0 +1,19 @@
+# Generated by Django 5.0.6 on 2024-08-20 03:53
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0066_alter_snapshottag_tag_alter_tag_id_alter_tag_old_id'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='snapshottag',
+ name='tag',
+ field=models.ForeignKey(db_column='tag_id', on_delete=django.db.models.deletion.CASCADE, to='core.tag'),
+ ),
+ ]
diff --git a/archivebox/core/migrations/0068_alter_archiveresult_options.py b/archivebox/core/migrations/0068_alter_archiveresult_options.py
new file mode 100644
index 00000000..d5606592
--- /dev/null
+++ b/archivebox/core/migrations/0068_alter_archiveresult_options.py
@@ -0,0 +1,17 @@
+# Generated by Django 5.0.6 on 2024-08-20 07:26
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('core', '0067_alter_snapshottag_tag'),
+ ]
+
+ operations = [
+ migrations.AlterModelOptions(
+ name='archiveresult',
+ options={'verbose_name': 'Archive Result', 'verbose_name_plural': 'Archive Results Log'},
+ ),
+ ]
diff --git a/archivebox/core/models.py b/archivebox/core/models.py
index 1b896217..c2b6d4e6 100644
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -5,6 +5,7 @@ from typing import Optional, List, Dict
from django_stubs_ext.db.models import TypedModelMeta
import json
+import random
import uuid
from uuid import uuid4
@@ -14,9 +15,8 @@ from django.db import models
from django.utils.functional import cached_property
from django.utils.text import slugify
from django.core.cache import cache
-from django.urls import reverse
+from django.urls import reverse, reverse_lazy
from django.db.models import Case, When, Value, IntegerField
-from django.contrib.auth.models import User # noqa
from abid_utils.models import ABIDModel, ABIDField
@@ -35,6 +35,8 @@ STATUS_CHOICES = [
("skipped", "skipped")
]
+def rand_int_id():
+ return random.getrandbits(32)
# class BaseModel(models.Model):
@@ -48,24 +50,26 @@ STATUS_CHOICES = [
# abstract = True
+
+
class Tag(ABIDModel):
"""
Based on django-taggit model + ABID base.
"""
abid_prefix = 'tag_'
abid_ts_src = 'self.created' # TODO: add created/modified time
- abid_uri_src = 'self.name'
+ abid_uri_src = 'self.slug'
abid_subtype_src = '"03"'
- abid_rand_src = 'self.id'
+ abid_rand_src = 'self.old_id'
- # id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True)
- id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
- uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
+ old_id = models.BigIntegerField(unique=True, default=rand_int_id, serialize=False, verbose_name='Old ID') # legacy PK
+
+ id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False, unique=True)
abid = ABIDField(prefix=abid_prefix)
name = models.CharField(unique=True, blank=False, max_length=100)
- slug = models.SlugField(unique=True, blank=True, max_length=100)
+ slug = models.SlugField(unique=True, blank=False, max_length=100, editable=False)
# slug is autoset on save from name, never set it manually
@@ -76,6 +80,10 @@ class Tag(ABIDModel):
def __str__(self):
return self.name
+ # @property
+ # def old_id(self):
+ # return self.id
+
def slugify(self, tag, i=None):
slug = slugify(tag)
if i is not None:
@@ -103,38 +111,67 @@ class Tag(ABIDModel):
i = 1 if i is None else i+1
else:
return super().save(*args, **kwargs)
+
+ @property
+ def api_url(self) -> str:
+ # /api/v1/core/snapshot/{uulid}
+ return reverse_lazy('api-1:get_tag', args=[self.abid])
+ @property
+ def api_docs_url(self) -> str:
+ return f'/api/v1/docs#/Core%20Models/api_v1_core_get_tag'
+
+class SnapshotTag(models.Model):
+ id = models.AutoField(primary_key=True)
+
+ snapshot = models.ForeignKey('Snapshot', db_column='snapshot_id', on_delete=models.CASCADE, to_field='id')
+ tag = models.ForeignKey(Tag, db_column='tag_id', on_delete=models.CASCADE, to_field='id')
+
+ class Meta:
+ db_table = 'core_snapshot_tags'
+ unique_together = [('snapshot', 'tag')]
class Snapshot(ABIDModel):
abid_prefix = 'snp_'
abid_ts_src = 'self.added'
abid_uri_src = 'self.url'
abid_subtype_src = '"01"'
- abid_rand_src = 'self.id'
+ abid_rand_src = 'self.old_id'
- id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) # legacy pk
- uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
+ old_id = models.UUIDField(default=uuid.uuid4, editable=False, unique=True) # legacy pk
+ id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True, unique=True)
abid = ABIDField(prefix=abid_prefix)
url = models.URLField(unique=True, db_index=True)
- timestamp = models.CharField(max_length=32, unique=True, db_index=True)
+ timestamp = models.CharField(max_length=32, unique=True, db_index=True, editable=False)
title = models.CharField(max_length=512, null=True, blank=True, db_index=True)
+
+ tags = models.ManyToManyField(Tag, blank=True, through=SnapshotTag, related_name='snapshot_set', through_fields=('snapshot', 'tag'))
added = models.DateTimeField(auto_now_add=True, db_index=True)
updated = models.DateTimeField(auto_now=True, blank=True, null=True, db_index=True)
- tags = models.ManyToManyField(Tag, blank=True)
keys = ('url', 'timestamp', 'title', 'tags', 'updated')
+ @property
+ def uuid(self):
+ return self.id
def __repr__(self) -> str:
- title = self.title or '-'
- return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})'
+ title = (self.title_stripped or '-')[:64]
+ return f'[{self.timestamp}] {self.url[:64]} ({title})'
def __str__(self) -> str:
- title = self.title or '-'
- return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})'
+ title = (self.title_stripped or '-')[:64]
+ return f'[{self.timestamp}] {self.url[:64]} ({title})'
+
+ def save(self, *args, **kwargs):
+ super().save(*args, **kwargs)
+ try:
+ assert str(self.id) == str(self.ABID.uuid) == str(self.uuid), f'Snapshot.id ({self.id}) does not match .ABID.uuid ({self.ABID.uuid})'
+ except AssertionError as e:
+ print(e)
@classmethod
def from_json(cls, info: dict):
@@ -167,6 +204,19 @@ class Snapshot(ABIDModel):
def icons(self) -> str:
return snapshot_icons(self)
+
+ @property
+ def api_url(self) -> str:
+ # /api/v1/core/snapshot/{uulid}
+ return reverse_lazy('api-1:get_snapshot', args=[self.abid])
+
+ @property
+ def api_docs_url(self) -> str:
+ return f'/api/v1/docs#/Core%20Models/api_v1_core_get_snapshot'
+
+ @cached_property
+ def title_stripped(self) -> str:
+ return (self.title or '').replace("\n", " ").replace("\r", "")
@cached_property
def extension(self) -> str:
@@ -317,21 +367,21 @@ class ArchiveResultManager(models.Manager):
qs = qs.annotate(indexing_precedence=Case(*precedence, default=Value(1000),output_field=IntegerField())).order_by('indexing_precedence')
return qs
-
class ArchiveResult(ABIDModel):
abid_prefix = 'res_'
abid_ts_src = 'self.snapshot.added'
abid_uri_src = 'self.snapshot.url'
abid_subtype_src = 'self.extractor'
- abid_rand_src = 'self.uuid'
+ abid_rand_src = 'self.id'
EXTRACTOR_CHOICES = EXTRACTOR_CHOICES
- # id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
- id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID') # legacy pk
- uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
+ old_id = models.BigIntegerField(default=rand_int_id, serialize=False, verbose_name='Old ID')
+
+ id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True, unique=True, verbose_name='ID')
abid = ABIDField(prefix=abid_prefix)
- snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)
+ snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE, to_field='id', db_column='snapshot_id')
+
extractor = models.CharField(choices=EXTRACTOR_CHOICES, max_length=32)
cmd = models.JSONField()
pwd = models.CharField(max_length=256)
@@ -344,15 +394,36 @@ class ArchiveResult(ABIDModel):
objects = ArchiveResultManager()
class Meta(TypedModelMeta):
- verbose_name = 'Result'
+ verbose_name = 'Archive Result'
+ verbose_name_plural = 'Archive Results Log'
+
def __str__(self):
return self.extractor
+ def save(self, *args, **kwargs):
+ super().save(*args, **kwargs)
+ try:
+ assert str(self.id) == str(self.ABID.uuid) == str(self.uuid), f'ArchiveResult.id ({self.id}) does not match .ABID.uuid ({self.ABID.uuid})'
+ except AssertionError as e:
+ print(e)
+
+ @property
+ def uuid(self):
+ return self.id
+
@cached_property
def snapshot_dir(self):
return Path(self.snapshot.link_dir)
+ @property
+ def api_url(self) -> str:
+ # /api/v1/core/archiveresult/{uulid}
+ return reverse_lazy('api-1:get_archiveresult', args=[self.abid])
+
+ @property
+ def api_docs_url(self) -> str:
+ return f'/api/v1/docs#/Core%20Models/api_v1_core_get_archiveresult'
@property
def extractor_module(self):
diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py
index 870c5681..cac65ee6 100644
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@@ -83,7 +83,7 @@ INSTALLED_APPS = [
'django.contrib.staticfiles',
'django.contrib.admin',
'django_jsonform',
-
+
'signal_webhooks',
'abid_utils',
'plugantic',
@@ -120,6 +120,8 @@ MIDDLEWARE = [
### Authentication Settings
################################################################################
+# AUTH_USER_MODEL = 'auth.User' # cannot be easily changed unfortunately
+
AUTHENTICATION_BACKENDS = [
'django.contrib.auth.backends.RemoteUserBackend',
'django.contrib.auth.backends.ModelBackend',
@@ -463,6 +465,7 @@ SIGNAL_WEBHOOKS = {
},
}
+DATA_UPLOAD_MAX_NUMBER_FIELDS = None
ADMIN_DATA_VIEWS = {
"NAME": "Environment",
diff --git a/archivebox/core/urls.py b/archivebox/core/urls.py
index 14b3d774..04382c99 100644
--- a/archivebox/core/urls.py
+++ b/archivebox/core/urls.py
@@ -38,7 +38,7 @@ urlpatterns = [
path('accounts/', include('django.contrib.auth.urls')),
path('admin/', archivebox_admin.urls),
- path("api/", include('api.urls')),
+ path("api/", include('api.urls'), name='api'),
path('health/', HealthCheckView.as_view(), name='healthcheck'),
path('error/', lambda *_: 1/0),
diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index 3d7f2e23..128de658 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -90,7 +90,7 @@ class SnapshotView(View):
archiveresults[result.extractor] = result_info
existing_files = {result['path'] for result in archiveresults.values()}
- min_size_threshold = 128 # bytes
+ min_size_threshold = 10_000 # bytes
allowed_extensions = {
'txt',
'html',
@@ -104,16 +104,19 @@ class SnapshotView(View):
'webm',
'mp4',
'mp3',
+ 'opus',
'pdf',
'md',
}
- # iterate through all the files in the snapshot dir and add the biggest ones to the result list
- for result_file in Path(snapshot.link_dir).glob('*/*/*'):
+
+ # iterate through all the files in the snapshot dir and add the biggest ones to1 the result list
+ snap_dir = Path(snapshot.link_dir)
+ for result_file in (*snap_dir.glob('*'), *snap_dir.glob('*/*')):
extension = result_file.suffix.lstrip('.').lower()
if result_file.is_dir() or result_file.name.startswith('.') or extension not in allowed_extensions:
continue
- if result_file.name in existing_files:
+ if result_file.name in existing_files or result_file.name == 'index.html':
continue
file_size = result_file.stat().st_size or 0
@@ -121,12 +124,12 @@ class SnapshotView(View):
if file_size > min_size_threshold:
archiveresults[result_file.name] = {
'name': result_file.stem,
- 'path': result_file.relative_to(snapshot.link_dir),
+ 'path': result_file.relative_to(snap_dir),
'ts': ts_to_date_str(result_file.stat().st_mtime or 0),
'size': file_size,
}
- preferred_types = ('singlefile', 'wget', 'screenshot', 'dom', 'media', 'pdf', 'readability', 'mercury')
+ preferred_types = ('singlefile', 'screenshot', 'wget', 'dom', 'media', 'pdf', 'readability', 'mercury')
all_types = preferred_types + tuple(result_type for result_type in archiveresults.keys() if result_type not in preferred_types)
best_result = {'path': 'None'}
@@ -140,7 +143,7 @@ class SnapshotView(View):
link_info = link._asdict(extended=True)
try:
- warc_path = 'warc/' + list(Path(snapshot.link_dir).glob('warc/*.warc.*'))[0].name
+ warc_path = 'warc/' + list(Path(snap_dir).glob('warc/*.warc.*'))[0].name
except IndexError:
warc_path = 'warc/'
@@ -160,7 +163,7 @@ class SnapshotView(View):
'warc_path': warc_path,
'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG,
'PREVIEW_ORIGINALS': PREVIEW_ORIGINALS,
- 'archiveresults': sorted(archiveresults.values(), key=lambda r: all_types.index(r['name'])),
+ 'archiveresults': sorted(archiveresults.values(), key=lambda r: all_types.index(r['name']) if r['name'] in all_types else -r['size']),
'best_result': best_result,
# 'tags_str': 'somealskejrewlkrjwer,werlmwrwlekrjewlkrjwer324m532l,4m32,23m324234',
}
@@ -178,6 +181,7 @@ class SnapshotView(View):
except (IndexError, ValueError):
slug, archivefile = path.split('/', 1)[0], 'index.html'
+
# slug is a timestamp
if slug.replace('.','').isdigit():
@@ -224,7 +228,7 @@ class SnapshotView(View):
snap.timestamp,
snap.timestamp,
snap.url,
- snap.title or '',
+ snap.title_stripped[:64] or '',
)
for snap in Snapshot.objects.filter(timestamp__startswith=slug).only('url', 'timestamp', 'title', 'added').order_by('-added')
)
@@ -275,12 +279,35 @@ class SnapshotView(View):
content_type="text/html",
status=404,
)
+
+ # # slud is an ID
+ # ulid = slug.split('_', 1)[-1]
+ # try:
+ # try:
+ # snapshot = snapshot or Snapshot.objects.get(Q(abid=ulid) | Q(id=ulid) | Q(old_id=ulid))
+ # except Snapshot.DoesNotExist:
+ # pass
+
+ # try:
+ # snapshot = Snapshot.objects.get(Q(abid__startswith=slug) | Q(abid__startswith=Snapshot.abid_prefix + slug) | Q(id__startswith=slug) | Q(old_id__startswith=slug))
+ # except (Snapshot.DoesNotExist, Snapshot.MultipleObjectsReturned):
+ # pass
+
+ # try:
+ # snapshot = snapshot or Snapshot.objects.get(Q(abid__icontains=snapshot_id) | Q(id__icontains=snapshot_id) | Q(old_id__icontains=snapshot_id))
+ # except Snapshot.DoesNotExist:
+ # pass
+ # return redirect(f'/archive/{snapshot.timestamp}/index.html')
+ # except Snapshot.DoesNotExist:
+ # pass
+
# slug is a URL
try:
try:
- # try exact match on full url first
+ # try exact match on full url / ABID first
snapshot = Snapshot.objects.get(
Q(url='http://' + path) | Q(url='https://' + path) | Q(id__startswith=path)
+ | Q(abid__icontains=path) | Q(id__icontains=path) | Q(old_id__icontains=path)
)
except Snapshot.DoesNotExist:
# fall back to match on exact base_url
@@ -314,15 +341,17 @@ class SnapshotView(View):
except Snapshot.MultipleObjectsReturned:
snapshot_hrefs = mark_safe('
').join(
format_html(
- '{} {}
{} {}',
+ '{} {}
{}
{} {}',
snap.added.strftime('%Y-%m-%d %H:%M:%S'),
+ snap.abid,
snap.timestamp,
snap.timestamp,
snap.url,
- snap.title or '',
+ snap.title_stripped[:64] or '',
)
for snap in Snapshot.objects.filter(
Q(url__startswith='http://' + base_url(path)) | Q(url__startswith='https://' + base_url(path))
+ | Q(abid__icontains=path) | Q(id__icontains=path) | Q(old_id__icontains=path)
).only('url', 'timestamp', 'title', 'added').order_by('-added')
)
return HttpResponse(
diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index c2644eb2..5dfe4630 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -266,7 +266,7 @@ class Link:
@cached_property
def snapshot(self):
from core.models import Snapshot
- return Snapshot.objects.only('uuid').get(url=self.url)
+ return Snapshot.objects.only('id').get(url=self.url)
@cached_property
def snapshot_id(self):
@@ -274,7 +274,7 @@ class Link:
@cached_property
def snapshot_uuid(self):
- return str(self.snapshot.uuid)
+ return str(self.snapshot.id)
@cached_property
def snapshot_abid(self):
diff --git a/archivebox/manage.py b/archivebox/manage.py
index 413a4cfb..6e8c578a 100755
--- a/archivebox/manage.py
+++ b/archivebox/manage.py
@@ -7,7 +7,7 @@ if __name__ == '__main__':
# versions of ./manage.py commands whenever possible. When that's not possible
# (e.g. makemigrations), you can comment out this check temporarily
- if not ('makemigrations' in sys.argv or 'migrate' in sys.argv or 'startapp' in sys.argv):
+ if not ('makemigrations' in sys.argv or 'migrate' in sys.argv or 'startapp' in sys.argv or 'squashmigrations' in sys.argv):
print("[X] Don't run ./manage.py directly (unless you are a developer running makemigrations):")
print()
print(' Hint: Use these archivebox CLI commands instead of the ./manage.py equivalents:')
diff --git a/archivebox/package-lock.json b/archivebox/package-lock.json
index f1f0bc14..99c50d56 100644
--- a/archivebox/package-lock.json
+++ b/archivebox/package-lock.json
@@ -1,12 +1,12 @@
{
"name": "archivebox",
- "version": "0.8.1",
+ "version": "0.8.2",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "archivebox",
- "version": "0.8.1",
+ "version": "0.8.2",
"license": "MIT",
"dependencies": {
"@postlight/parser": "^2.2.3",
@@ -26,9 +26,9 @@
}
},
"node_modules/@babel/runtime-corejs2": {
- "version": "7.24.6",
- "resolved": "https://registry.npmjs.org/@babel/runtime-corejs2/-/runtime-corejs2-7.24.6.tgz",
- "integrity": "sha512-5UK2PnfpmiCftYGBeJ+SpFIMNaoMPU/eQt1P5ISx0TB7nGGzEMLT4/3PapNZEfGZh+nGxGOGj2t59prGFBhunQ==",
+ "version": "7.25.0",
+ "resolved": "https://registry.npmjs.org/@babel/runtime-corejs2/-/runtime-corejs2-7.25.0.tgz",
+ "integrity": "sha512-aoYVE3tm+vgAoezmXFWmVcp+NlSdsUqQMPL7c6zRxq8KDHCf570pamC7005Q/UkSlTuoL6oeE16zIw/9J3YFyw==",
"license": "MIT",
"dependencies": {
"core-js": "^2.6.12",
@@ -180,9 +180,9 @@
}
},
"node_modules/@postman/tunnel-agent": {
- "version": "0.6.3",
- "resolved": "https://registry.npmjs.org/@postman/tunnel-agent/-/tunnel-agent-0.6.3.tgz",
- "integrity": "sha512-k57fzmAZ2PJGxfOA4SGR05ejorHbVAa/84Hxh/2nAztjNXc4ZjOm9NUIk6/Z6LCrBvJZqjRZbN8e/nROVUPVdg==",
+ "version": "0.6.4",
+ "resolved": "https://registry.npmjs.org/@postman/tunnel-agent/-/tunnel-agent-0.6.4.tgz",
+ "integrity": "sha512-CJJlq8V7rNKhAw4sBfjixKpJW00SHqebqNUQKxMoepgeWZIbdPcD+rguRcivGhS4N12PymDcKgUgSD4rVC+RjQ==",
"license": "Apache-2.0",
"dependencies": {
"safe-buffer": "^5.0.1"
@@ -236,13 +236,13 @@
"license": "MIT"
},
"node_modules/@types/node": {
- "version": "20.14.0",
- "resolved": "https://registry.npmjs.org/@types/node/-/node-20.14.0.tgz",
- "integrity": "sha512-5cHBxFGJx6L4s56Bubp4fglrEpmyJypsqI6RgzMfBHWUJQGWAAi8cWcgetEbZXHYXo9C2Fa4EEds/uSyS4cxmA==",
+ "version": "22.4.1",
+ "resolved": "https://registry.npmjs.org/@types/node/-/node-22.4.1.tgz",
+ "integrity": "sha512-1tbpb9325+gPnKK0dMm+/LMriX0vKxf6RnB0SZUqfyVkQ4fMgUSySqhxE/y8Jvs4NyF1yHzTfG9KlnkIODxPKg==",
"license": "MIT",
"optional": true,
"dependencies": {
- "undici-types": "~5.26.4"
+ "undici-types": "~6.19.2"
}
},
"node_modules/@types/yauzl": {
@@ -353,9 +353,9 @@
}
},
"node_modules/aws4": {
- "version": "1.13.0",
- "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.13.0.tgz",
- "integrity": "sha512-3AungXC4I8kKsS9PuS4JH2nc+0bVY/mjgrephHTIi8fpEeGsTHBUJeosp0Wc1myYMElmD0B3Oc4XL/HVJ4PV2g==",
+ "version": "1.13.1",
+ "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.13.1.tgz",
+ "integrity": "sha512-u5w79Rd7SU4JaIlA/zFqG+gOiuq25q5VLyZ8E+ijJeILuTxVzZgp2CaGw/UTw6pXYN9XMO9yiqj/nEHmhTG5CA==",
"license": "MIT"
},
"node_modules/b4a": {
@@ -365,9 +365,9 @@
"license": "Apache-2.0"
},
"node_modules/bare-events": {
- "version": "2.3.1",
- "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.3.1.tgz",
- "integrity": "sha512-sJnSOTVESURZ61XgEleqmP255T6zTYwHPwE4r6SssIh0U9/uDvfpdoJYpVUerJJZH2fueO+CdT8ZT+OC/7aZDA==",
+ "version": "2.4.2",
+ "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.4.2.tgz",
+ "integrity": "sha512-qMKFd2qG/36aA4GwvKq8MxnPgCQAmBWmSyLWsJcbn8v03wvIPQ/hG1Ms8bPzndZxMDoHpxez5VOS+gC9Yi24/Q==",
"license": "Apache-2.0",
"optional": true
},
@@ -700,9 +700,9 @@
}
},
"node_modules/debug": {
- "version": "4.3.5",
- "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.5.tgz",
- "integrity": "sha512-pt0bNEmneDIvdL1Xsd9oDQ/wrQRkXDT4AUWlNZNPKvW5x/jyO9VFXkJUP07vQ2upmw5PlaITaPKc31jK13V+jg==",
+ "version": "4.3.6",
+ "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.6.tgz",
+ "integrity": "sha512-O/09Bd4Z1fBrU4VzkhFqVgpPzaGbw6Sm9FEkBT1A/YBXQFGuuSxa1dN2nxgxS34JmKXqYx8CZAwEVoJFImUXIg==",
"license": "MIT",
"dependencies": {
"ms": "2.1.2"
@@ -793,9 +793,9 @@
}
},
"node_modules/dompurify": {
- "version": "3.1.5",
- "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.1.5.tgz",
- "integrity": "sha512-lwG+n5h8QNpxtyrJW/gJWckL+1/DQiYMX8f7t8Z2AZTPw1esVrqjI63i7Zc2Gz0aKzLVMYC1V1PL/ky+aY/NgA==",
+ "version": "3.1.6",
+ "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.1.6.tgz",
+ "integrity": "sha512-cTOAhc36AalkjtBpfG6O8JimdTMWNXjiePT2xQH/ppBGi/4uIpmj8eKyIkMJErXWARyINV/sB38yf8JCLF5pbQ==",
"license": "(MPL-2.0 OR Apache-2.0)"
},
"node_modules/domutils": {
@@ -1174,9 +1174,9 @@
}
},
"node_modules/https-proxy-agent": {
- "version": "7.0.4",
- "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.4.tgz",
- "integrity": "sha512-wlwpilI7YdjSkWaQ/7omYBMTliDcmCN8OLihO6I9B86g06lMyAoqgoDpV0XqoaPOKj+0DIdAvnsWfyAAhmimcg==",
+ "version": "7.0.5",
+ "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.5.tgz",
+ "integrity": "sha512-1e4Wqeblerz+tMKPIq2EMGiiWW1dIjZOksyHWSUm1rmuvw/how9hBHZ38lAGj5ID4Ik6EdkOw7NmWPy6LAwalw==",
"license": "MIT",
"dependencies": {
"agent-base": "^7.0.2",
@@ -1629,9 +1629,9 @@
}
},
"node_modules/nwsapi": {
- "version": "2.2.10",
- "resolved": "https://registry.npmjs.org/nwsapi/-/nwsapi-2.2.10.tgz",
- "integrity": "sha512-QK0sRs7MKv0tKe1+5uZIQk/C8XGza4DAnztJG8iD+TpJIORARrCxczA738awHrZoHeTjSSoHqao2teO0dC/gFQ==",
+ "version": "2.2.12",
+ "resolved": "https://registry.npmjs.org/nwsapi/-/nwsapi-2.2.12.tgz",
+ "integrity": "sha512-qXDmcVlZV4XRtKFzddidpfVP4oMSGhga+xdMc25mv8kaLUHtgzCDhUxkrN8exkGdTlLNaXj7CV3GtON7zuGZ+w==",
"license": "MIT"
},
"node_modules/oauth-sign": {
@@ -1653,9 +1653,9 @@
}
},
"node_modules/pac-proxy-agent": {
- "version": "7.0.1",
- "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.0.1.tgz",
- "integrity": "sha512-ASV8yU4LLKBAjqIPMbrgtaKIvxQri/yh2OpI+S6hVa9JRkUI3Y3NPFbfngDtY7oFtSMD3w31Xns89mDa3Feo5A==",
+ "version": "7.0.2",
+ "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.0.2.tgz",
+ "integrity": "sha512-BFi3vZnO9X5Qt6NRz7ZOaPja3ic0PhlsmCRYLOpN11+mWBCR6XJDqW5RF3j8jm4WGGQZtBA+bTfxYzeKW73eHg==",
"license": "MIT",
"dependencies": {
"@tootallnate/quickjs-emscripten": "^0.23.0",
@@ -1663,9 +1663,9 @@
"debug": "^4.3.4",
"get-uri": "^6.0.1",
"http-proxy-agent": "^7.0.0",
- "https-proxy-agent": "^7.0.2",
- "pac-resolver": "^7.0.0",
- "socks-proxy-agent": "^8.0.2"
+ "https-proxy-agent": "^7.0.5",
+ "pac-resolver": "^7.0.1",
+ "socks-proxy-agent": "^8.0.4"
},
"engines": {
"node": ">= 14"
@@ -1727,14 +1727,14 @@
"license": "MIT"
},
"node_modules/postman-request": {
- "version": "2.88.1-postman.33",
- "resolved": "https://registry.npmjs.org/postman-request/-/postman-request-2.88.1-postman.33.tgz",
- "integrity": "sha512-uL9sCML4gPH6Z4hreDWbeinKU0p0Ke261nU7OvII95NU22HN6Dk7T/SaVPaj6T4TsQqGKIFw6/woLZnH7ugFNA==",
+ "version": "2.88.1-postman.39",
+ "resolved": "https://registry.npmjs.org/postman-request/-/postman-request-2.88.1-postman.39.tgz",
+ "integrity": "sha512-rsncxxDlbn1YpygXSgJqbJzIjGlHFcZjbYDzeBPTQHMDfLuSTzZz735JHV8i1+lOROuJ7MjNap4eaSD3UijHzQ==",
"license": "Apache-2.0",
"dependencies": {
"@postman/form-data": "~3.1.1",
"@postman/tough-cookie": "~4.1.3-postman.1",
- "@postman/tunnel-agent": "^0.6.3",
+ "@postman/tunnel-agent": "^0.6.4",
"aws-sign2": "~0.7.0",
"aws4": "^1.12.0",
"brotli": "^1.3.3",
@@ -1756,7 +1756,7 @@
"uuid": "^8.3.2"
},
"engines": {
- "node": ">= 6"
+ "node": ">= 16"
}
},
"node_modules/process-nextick-args": {
@@ -2148,14 +2148,14 @@
}
},
"node_modules/socks-proxy-agent": {
- "version": "8.0.3",
- "resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.3.tgz",
- "integrity": "sha512-VNegTZKhuGq5vSD6XNKlbqWhyt/40CgoEw8XxD6dhnm8Jq9IEa3nIa4HwnM8XOqU0CdB0BwWVXusqiFXfHB3+A==",
+ "version": "8.0.4",
+ "resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.4.tgz",
+ "integrity": "sha512-GNAq/eg8Udq2x0eNiFkr9gRg5bA7PXEWagQdeRX4cPSG+X/8V38v637gim9bjFptMk1QWsCTr0ttrJEiXbNnRw==",
"license": "MIT",
"dependencies": {
"agent-base": "^7.1.1",
"debug": "^4.3.4",
- "socks": "^2.7.1"
+ "socks": "^2.8.3"
},
"engines": {
"node": ">= 14"
@@ -2322,9 +2322,9 @@
}
},
"node_modules/text-decoder": {
- "version": "1.1.0",
- "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.1.0.tgz",
- "integrity": "sha512-TmLJNj6UgX8xcUZo4UDStGQtDiTzF7BzWlzn9g7UWrjkpHr5uJTK1ld16wZ3LXb2vb6jH8qU89dW5whuMdXYdw==",
+ "version": "1.1.1",
+ "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.1.1.tgz",
+ "integrity": "sha512-8zll7REEv4GDD3x4/0pW+ppIxSNs7H1J10IKFZsuOMscumCdM2a+toDGLPA3T+1+fLBql4zbt5z83GEQGGV5VA==",
"license": "Apache-2.0",
"dependencies": {
"b4a": "^1.6.4"
@@ -2376,9 +2376,9 @@
}
},
"node_modules/tslib": {
- "version": "2.6.2",
- "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz",
- "integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==",
+ "version": "2.6.3",
+ "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.3.tgz",
+ "integrity": "sha512-xNvxJEOUiWPGhUuUdQgAJPKOOJfGnIyKySOc09XkKsgdUV/3E2zvwZYdejjmRgPCgcym1juLH3226yA7sEFJKQ==",
"license": "0BSD"
},
"node_modules/turndown": {
@@ -2407,9 +2407,9 @@
}
},
"node_modules/undici-types": {
- "version": "5.26.5",
- "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
- "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
+ "version": "6.19.8",
+ "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz",
+ "integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==",
"license": "MIT",
"optional": true
},
@@ -2575,9 +2575,9 @@
"license": "ISC"
},
"node_modules/ws": {
- "version": "8.17.0",
- "resolved": "https://registry.npmjs.org/ws/-/ws-8.17.0.tgz",
- "integrity": "sha512-uJq6108EgZMAl20KagGkzCKfMEjxmKvZHG7Tlq0Z6nOky7YF7aq4mOx6xK8TJ/i1LeK4Qus7INktacctDgY8Ow==",
+ "version": "8.18.0",
+ "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.0.tgz",
+ "integrity": "sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==",
"license": "MIT",
"engines": {
"node": ">=10.0.0"
diff --git a/archivebox/package.json b/archivebox/package.json
index a148316f..04b4b601 100644
--- a/archivebox/package.json
+++ b/archivebox/package.json
@@ -1,6 +1,6 @@
{
"name": "archivebox",
- "version": "0.8.1",
+ "version": "0.8.2",
"description": "ArchiveBox: The self-hosted internet archive",
"author": "Nick Sweeting ",
"repository": "github:ArchiveBox/ArchiveBox",
diff --git a/archivebox/templates/admin/base.html b/archivebox/templates/admin/base.html
index 897a26d5..00e2f205 100644
--- a/archivebox/templates/admin/base.html
+++ b/archivebox/templates/admin/base.html
@@ -45,6 +45,13 @@
{% endif %}
{% endblock %}
+
+
+
+