add new live django template for snapshot detail page

This commit is contained in:
Nick Sweeting 2024-05-11 22:33:02 -07:00
parent 457c42bf84
commit 8841e8b181
No known key found for this signature in database
2 changed files with 630 additions and 2 deletions

View file

@ -3,6 +3,7 @@ __package__ = 'archivebox.core'
from typing import Callable
from io import StringIO
from pathlib import Path
from contextlib import redirect_stdout
from django.shortcuts import render, redirect
@ -36,10 +37,14 @@ from ..config import (
CONFIG_SCHEMA,
DYNAMIC_CONFIG_SCHEMA,
USER_CONFIG,
SAVE_ARCHIVE_DOT_ORG,
PREVIEW_ORIGINALS,
)
from ..logging_util import printable_filesize
from ..main import add
from ..util import base_url, ansi_to_html
from ..util import base_url, ansi_to_html, htmlencode, urldecode, urlencode, ts_to_date_str
from ..search import query_search_index
from ..extractors.wget import wget_output_path
class HomepageView(View):
@ -56,10 +61,85 @@ class HomepageView(View):
class SnapshotView(View):
# render static html index from filesystem archive/<timestamp>/index.html
@staticmethod
def render_live_index(request, snapshot):
TITLE_LOADING_MSG = 'Not yet archived...'
HIDDEN_RESULTS = ('favicon', 'headers', 'title', 'htmltotext', 'warc', 'archive_org')
archiveresults = {}
results = snapshot.archiveresult_set.all()
for result in results:
embed_path = result.embed_path()
abs_path = result.snapshot_dir / (embed_path or 'None')
if (result.status == 'succeeded'
and (result.extractor not in HIDDEN_RESULTS)
and embed_path
and abs_path.exists()):
if abs_path.is_dir() and not any(abs_path.glob('*.*')):
continue
result_info = {
'name': result.extractor,
'path': embed_path,
'ts': ts_to_date_str(result.end_ts),
}
archiveresults[result.extractor] = result_info
preferred_types = ('singlefile', 'wget', 'screenshot', 'dom', 'media', 'pdf', 'readability', 'mercury')
all_types = preferred_types + tuple(result_type for result_type in archiveresults.keys() if result_type not in preferred_types)
best_result = {'path': 'None'}
for result_type in preferred_types:
if result_type in archiveresults:
best_result = archiveresults[result_type]
break
link = snapshot.as_link()
link_info = link._asdict(extended=True)
try:
warc_path = 'warc/' + list(Path(snapshot.link_dir).glob('warc/*.warc.*'))[0].name
except IndexError:
warc_path = 'warc/'
context = {
**link_info,
**link_info['canonical'],
'title': htmlencode(
link.title
or (link.base_url if link.is_archived else TITLE_LOADING_MSG)
),
'url_str': htmlencode(urldecode(link.base_url)),
'archive_url': urlencode(
wget_output_path(link)
or (link.domain if link.is_archived else '')
) or 'about:blank',
'extension': link.extension or 'html',
'tags': link.tags or 'untagged',
'size': printable_filesize(link.archive_size) if link.archive_size else 'pending',
'status': 'archived' if link.is_archived else 'not yet archived',
'status_color': 'success' if link.is_archived else 'danger',
'oldest_archive_date': ts_to_date_str(link.oldest_archive_date),
'warc_path': warc_path,
'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG,
'PREVIEW_ORIGINALS': PREVIEW_ORIGINALS,
'archiveresults': sorted(archiveresults.values(), key=lambda r: all_types.index(r['name'])),
'best_result': best_result,
# 'tags_str': 'somealskejrewlkrjwer,werlmwrwlekrjewlkrjwer324m532l,4m32,23m324234',
}
return render(template_name='core/snapshot_live.html', request=request, context=context)
def get(self, request, path):
if not request.user.is_authenticated and not PUBLIC_SNAPSHOTS:
return redirect(f'/admin/login/?next={request.path}')
snapshot = None
try:
slug, archivefile = path.split('/', 1)
except (IndexError, ValueError):
@ -75,7 +155,11 @@ class SnapshotView(View):
try:
try:
snapshot = Snapshot.objects.get(Q(timestamp=slug) | Q(id__startswith=slug))
response = static.serve(request, archivefile, document_root=snapshot.link_dir, show_indexes=True)
if archivefile == 'index.html':
# if they requested snapshot index, serve live rendered template instead of static html
response = self.render_live_index(request, snapshot)
else:
response = static.serve(request, archivefile, document_root=snapshot.link_dir, show_indexes=True)
response["Link"] = f'<{snapshot.url}>; rel="canonical"'
return response
except Snapshot.DoesNotExist: