diff --git a/archivebox/config.py b/archivebox/config.py index 8fcc5352..8d4a0695 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -103,7 +103,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = { 'PUBLIC_SNAPSHOTS': {'type': bool, 'default': True}, 'PUBLIC_ADD_VIEW': {'type': bool, 'default': False}, 'FOOTER_INFO': {'type': str, 'default': 'Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.'}, - 'SNAPSHOTS_PER_PAGE': {'type': int, 'default': 40}, + 'SNAPSHOTS_PER_PAGE': {'type': int, 'default': 100}, 'CUSTOM_TEMPLATES_DIR': {'type': str, 'default': None}, 'TIME_ZONE': {'type': str, 'default': 'UTC'}, 'TIMEZONE': {'type': str, 'default': 'UTC'}, diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py index b87f6874..d8b3854d 100644 --- a/archivebox/core/admin.py +++ b/archivebox/core/admin.py @@ -10,12 +10,15 @@ from datetime import datetime, timezone from typing import Dict, Any from django.contrib import admin -from django.db.models import Count, Q -from django.urls import path, reverse +from django.db.models import Count, Q, Prefetch +from django.urls import path, reverse, resolve +from django.utils import timezone +from django.utils.functional import cached_property from django.utils.html import format_html from django.utils.safestring import mark_safe from django.shortcuts import render, redirect from django.contrib.auth import get_user_model +from django.core.paginator import Paginator from django.core.exceptions import ValidationError from django.conf import settings from django import forms @@ -126,22 +129,99 @@ archivebox_admin.get_admin_data_urls = get_admin_data_urls.__get__(archivebox_ad archivebox_admin.get_urls = get_urls(archivebox_admin.get_urls).__get__(archivebox_admin, ArchiveBoxAdmin) +class AccelleratedPaginator(Paginator): + """ + Accellerated Pagniator ignores DISTINCT when counting total number of rows. + Speeds up SELECT Count(*) on Admin views by >20x. + https://hakibenita.com/optimizing-the-django-admin-paginator + """ + + @cached_property + def count(self): + if self.object_list._has_filters(): + # fallback to normal count method on filtered queryset + return super().count + else: + # otherwise count total rows in a separate fast query + return self.object_list.model.objects.count() + + # Alternative approach for PostgreSQL: fallback count takes > 200ms + # from django.db import connection, transaction, OperationalError + # with transaction.atomic(), connection.cursor() as cursor: + # cursor.execute('SET LOCAL statement_timeout TO 200;') + # try: + # return super().count + # except OperationalError: + # return 9999999999999 + + class ArchiveResultInline(admin.TabularInline): name = 'Archive Results Log' model = ArchiveResult + parent_model = Snapshot # fk_name = 'snapshot' - extra = 1 - readonly_fields = ('result_id', 'start_ts', 'end_ts', 'extractor', 'command', 'cmd_version') - fields = ('id', *readonly_fields, 'status', 'output') + extra = 0 + sort_fields = ('end_ts', 'extractor', 'output', 'status', 'cmd_version') + readonly_fields = ('result_id', 'completed', 'extractor', 'command', 'version') + fields = ('id', 'start_ts', 'end_ts', *readonly_fields, 'cmd', 'cmd_version', 'pwd', 'created_by', 'status', 'output') + # exclude = ('id',) + ordering = ('end_ts',) show_change_link = True # # classes = ['collapse'] # # list_display_links = ['abid'] + def get_parent_object_from_request(self, request): + resolved = resolve(request.path_info) + return self.parent_model.objects.get(pk=resolved.kwargs['object_id']) + + @admin.display( + description='Completed', + ordering='end_ts', + ) + def completed(self, obj): + return format_html('
{}
', obj.end_ts.strftime('%Y-%m-%d %H:%M:%S')) + def result_id(self, obj): - return format_html('[{}]
', reverse('admin:core_archiveresult_change', args=(obj.id,)), obj.abid)
+ return format_html('[{}]
', reverse('admin:core_archiveresult_change', args=(obj.id,)), obj.abid)
def command(self, obj):
return format_html('{}
', " ".join(obj.cmd or []))
+
+ def version(self, obj):
+ return format_html('{}
', obj.cmd_version or '-')
+
+ def get_formset(self, request, obj=None, **kwargs):
+ formset = super().get_formset(request, obj, **kwargs)
+ snapshot = self.get_parent_object_from_request(request)
+
+ # import ipdb; ipdb.set_trace()
+ formset.form.base_fields['id'].widget = formset.form.base_fields['id'].hidden_widget()
+
+ # default values for new entries
+ formset.form.base_fields['status'].initial = 'succeeded'
+ formset.form.base_fields['start_ts'].initial = timezone.now()
+ formset.form.base_fields['end_ts'].initial = timezone.now()
+ formset.form.base_fields['cmd_version'].initial = '-'
+ formset.form.base_fields['pwd'].initial = str(snapshot.link_dir)
+ formset.form.base_fields['created_by'].initial = request.user
+ formset.form.base_fields['cmd'] = forms.JSONField(initial=['-'])
+ formset.form.base_fields['output'].initial = 'Manually recorded cmd output...'
+
+ if obj is not None:
+ # hidden values for existing entries and new entries
+ formset.form.base_fields['start_ts'].widget = formset.form.base_fields['start_ts'].hidden_widget()
+ formset.form.base_fields['end_ts'].widget = formset.form.base_fields['end_ts'].hidden_widget()
+ formset.form.base_fields['cmd'].widget = formset.form.base_fields['cmd'].hidden_widget()
+ formset.form.base_fields['pwd'].widget = formset.form.base_fields['pwd'].hidden_widget()
+ formset.form.base_fields['created_by'].widget = formset.form.base_fields['created_by'].hidden_widget()
+ formset.form.base_fields['cmd_version'].widget = formset.form.base_fields['cmd_version'].hidden_widget()
+ return formset
+
+ def get_readonly_fields(self, request, obj=None):
+ if obj is not None:
+ return self.readonly_fields
+ else:
+ return []
class TagInline(admin.TabularInline):
@@ -222,25 +302,22 @@ def get_abid_info(self, obj):
@admin.register(Snapshot, site=archivebox_admin)
class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
- class Meta:
- model = Snapshot
-
list_display = ('added', 'title_str', 'files', 'size', 'url_str')
- # list_editable = ('title',)
sort_fields = ('title_str', 'url_str', 'added', 'files')
- readonly_fields = ('tags', 'timestamp', 'admin_actions', 'status_info', 'bookmarked', 'added', 'updated', 'created', 'modified', 'API', 'link_dir')
+ readonly_fields = ('tags_str', 'timestamp', 'admin_actions', 'status_info', 'bookmarked', 'added', 'updated', 'created', 'modified', 'API', 'link_dir')
search_fields = ('id', 'url', 'abid', 'old_id', 'timestamp', 'title', 'tags__name')
- list_filter = ('added', 'updated', 'archiveresult__status', 'created_by', 'tags')
+ list_filter = ('added', 'updated', 'archiveresult__status', 'created_by', 'tags__name')
fields = ('url', 'created_by', 'title', *readonly_fields)
ordering = ['-added']
actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
- autocomplete_fields = ['tags']
inlines = [TagInline, ArchiveResultInline]
- list_per_page = CONFIG.SNAPSHOTS_PER_PAGE
+ list_per_page = min(max(5, CONFIG.SNAPSHOTS_PER_PAGE), 5000)
action_form = SnapshotActionForm
+ paginator = AccelleratedPaginator
save_on_top = True
+ show_full_result_count = False
def changelist_view(self, request, extra_context=None):
extra_context = extra_context or {}
@@ -286,12 +363,15 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
]
return custom_urls + urls
- def get_queryset(self, request):
- self.request = request
- return super().get_queryset(request).prefetch_related('tags', 'archiveresult_set').annotate(archiveresult_count=Count('archiveresult'))
+ # def get_queryset(self, request):
+ # # tags_qs = SnapshotTag.objects.all().select_related('tag')
+ # # prefetch = Prefetch('snapshottag_set', queryset=tags_qs)
+
+ # self.request = request
+ # return super().get_queryset(request).prefetch_related('archiveresult_set').distinct() # .annotate(archiveresult_count=Count('archiveresult'))
def tag_list(self, obj):
- return ', '.join(obj.tags.values_list('name', flat=True))
+ return ', '.join(tag.name for tag in obj.tags.all())
# TODO: figure out a different way to do this, you cant nest forms so this doenst work
# def action(self, obj):
@@ -360,21 +440,20 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
ordering='title',
)
def title_str(self, obj):
- canon = obj.as_link().canonical_outputs()
tags = ''.join(
- format_html('{} ', tag.id, tag)
+ format_html('{} ', tag.pk, tag.name)
for tag in obj.tags.all()
- if str(tag).strip()
+ if str(tag.name).strip()
)
return format_html(
''
- '