From 97b185987d664e6213339960a0286558782cb0d2 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 8 Feb 2024 05:54:34 -0800 Subject: [PATCH] add TODO to support archive.org-style urls --- archivebox/core/urls.py | 1 + archivebox/core/views.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/archivebox/core/urls.py b/archivebox/core/urls.py index 53d0bb45..da9cfb52 100644 --- a/archivebox/core/urls.py +++ b/archivebox/core/urls.py @@ -20,6 +20,7 @@ urlpatterns = [ path('archive/', RedirectView.as_view(url='/')), path('archive/', SnapshotView.as_view(), name='Snapshot'), + path('web/', SnapshotView.as_view()), # support archive.org-style URLs path('plugins/replaywebpage/', include('plugins.replaywebpage.urls')), # ... dynamic load these someday if there are more of them diff --git a/archivebox/core/views.py b/archivebox/core/views.py index 6cd146f4..bde8807d 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -56,12 +56,18 @@ class SnapshotView(View): slug, archivefile = path.split('/', 1)[0], 'index.html' # slug is a timestamp - if slug.replace('.','').isdigit(): + if slug.replace('.', '').isdigit(): # missing trailing slash -> redirect to index if '/' not in path: return redirect(f'{path}/index.html') + # TODO: add support for archive.org-style URLs where timestamp may be a human-readable date + # https://web.archivebox.io / web / 2022-01 / https://example.com + # https://web.archivebox.io / web / 20220505103616 / https://example.com + # https://web.archivebox.io / web / 2022-05-05__0:36:16 / https://example.com + # use archivebox.util.parse_date (supports unix timestamps, iso date strings, and lots more etc.) + try: try: snapshot = Snapshot.objects.get(Q(timestamp=slug) | Q(id__startswith=slug))