mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-15 07:34:27 -04:00
add new live django template for snapshot detail page
This commit is contained in:
parent
457c42bf84
commit
8841e8b181
2 changed files with 630 additions and 2 deletions
|
@ -3,6 +3,7 @@ __package__ = 'archivebox.core'
|
|||
from typing import Callable
|
||||
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
from contextlib import redirect_stdout
|
||||
|
||||
from django.shortcuts import render, redirect
|
||||
|
@ -36,10 +37,14 @@ from ..config import (
|
|||
CONFIG_SCHEMA,
|
||||
DYNAMIC_CONFIG_SCHEMA,
|
||||
USER_CONFIG,
|
||||
SAVE_ARCHIVE_DOT_ORG,
|
||||
PREVIEW_ORIGINALS,
|
||||
)
|
||||
from ..logging_util import printable_filesize
|
||||
from ..main import add
|
||||
from ..util import base_url, ansi_to_html
|
||||
from ..util import base_url, ansi_to_html, htmlencode, urldecode, urlencode, ts_to_date_str
|
||||
from ..search import query_search_index
|
||||
from ..extractors.wget import wget_output_path
|
||||
|
||||
|
||||
class HomepageView(View):
|
||||
|
@ -56,10 +61,85 @@ class HomepageView(View):
|
|||
class SnapshotView(View):
|
||||
# render static html index from filesystem archive/<timestamp>/index.html
|
||||
|
||||
@staticmethod
|
||||
def render_live_index(request, snapshot):
|
||||
TITLE_LOADING_MSG = 'Not yet archived...'
|
||||
HIDDEN_RESULTS = ('favicon', 'headers', 'title', 'htmltotext', 'warc', 'archive_org')
|
||||
|
||||
archiveresults = {}
|
||||
|
||||
results = snapshot.archiveresult_set.all()
|
||||
|
||||
for result in results:
|
||||
embed_path = result.embed_path()
|
||||
abs_path = result.snapshot_dir / (embed_path or 'None')
|
||||
|
||||
if (result.status == 'succeeded'
|
||||
and (result.extractor not in HIDDEN_RESULTS)
|
||||
and embed_path
|
||||
and abs_path.exists()):
|
||||
if abs_path.is_dir() and not any(abs_path.glob('*.*')):
|
||||
continue
|
||||
|
||||
result_info = {
|
||||
'name': result.extractor,
|
||||
'path': embed_path,
|
||||
'ts': ts_to_date_str(result.end_ts),
|
||||
}
|
||||
archiveresults[result.extractor] = result_info
|
||||
|
||||
preferred_types = ('singlefile', 'wget', 'screenshot', 'dom', 'media', 'pdf', 'readability', 'mercury')
|
||||
all_types = preferred_types + tuple(result_type for result_type in archiveresults.keys() if result_type not in preferred_types)
|
||||
|
||||
best_result = {'path': 'None'}
|
||||
for result_type in preferred_types:
|
||||
if result_type in archiveresults:
|
||||
best_result = archiveresults[result_type]
|
||||
break
|
||||
|
||||
link = snapshot.as_link()
|
||||
|
||||
link_info = link._asdict(extended=True)
|
||||
|
||||
try:
|
||||
warc_path = 'warc/' + list(Path(snapshot.link_dir).glob('warc/*.warc.*'))[0].name
|
||||
except IndexError:
|
||||
warc_path = 'warc/'
|
||||
|
||||
context = {
|
||||
**link_info,
|
||||
**link_info['canonical'],
|
||||
'title': htmlencode(
|
||||
link.title
|
||||
or (link.base_url if link.is_archived else TITLE_LOADING_MSG)
|
||||
),
|
||||
'url_str': htmlencode(urldecode(link.base_url)),
|
||||
'archive_url': urlencode(
|
||||
wget_output_path(link)
|
||||
or (link.domain if link.is_archived else '')
|
||||
) or 'about:blank',
|
||||
'extension': link.extension or 'html',
|
||||
'tags': link.tags or 'untagged',
|
||||
'size': printable_filesize(link.archive_size) if link.archive_size else 'pending',
|
||||
'status': 'archived' if link.is_archived else 'not yet archived',
|
||||
'status_color': 'success' if link.is_archived else 'danger',
|
||||
'oldest_archive_date': ts_to_date_str(link.oldest_archive_date),
|
||||
'warc_path': warc_path,
|
||||
'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG,
|
||||
'PREVIEW_ORIGINALS': PREVIEW_ORIGINALS,
|
||||
'archiveresults': sorted(archiveresults.values(), key=lambda r: all_types.index(r['name'])),
|
||||
'best_result': best_result,
|
||||
# 'tags_str': 'somealskejrewlkrjwer,werlmwrwlekrjewlkrjwer324m532l,4m32,23m324234',
|
||||
}
|
||||
return render(template_name='core/snapshot_live.html', request=request, context=context)
|
||||
|
||||
|
||||
def get(self, request, path):
|
||||
if not request.user.is_authenticated and not PUBLIC_SNAPSHOTS:
|
||||
return redirect(f'/admin/login/?next={request.path}')
|
||||
|
||||
snapshot = None
|
||||
|
||||
try:
|
||||
slug, archivefile = path.split('/', 1)
|
||||
except (IndexError, ValueError):
|
||||
|
@ -75,7 +155,11 @@ class SnapshotView(View):
|
|||
try:
|
||||
try:
|
||||
snapshot = Snapshot.objects.get(Q(timestamp=slug) | Q(id__startswith=slug))
|
||||
response = static.serve(request, archivefile, document_root=snapshot.link_dir, show_indexes=True)
|
||||
if archivefile == 'index.html':
|
||||
# if they requested snapshot index, serve live rendered template instead of static html
|
||||
response = self.render_live_index(request, snapshot)
|
||||
else:
|
||||
response = static.serve(request, archivefile, document_root=snapshot.link_dir, show_indexes=True)
|
||||
response["Link"] = f'<{snapshot.url}>; rel="canonical"'
|
||||
return response
|
||||
except Snapshot.DoesNotExist:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue