diff --git a/archivebox/api/admin.py b/archivebox/api/admin.py new file mode 100644 index 00000000..49114936 --- /dev/null +++ b/archivebox/api/admin.py @@ -0,0 +1,33 @@ +# __package__ = 'archivebox.api' + +# import abx + +# from signal_webhooks.admin import WebhookAdmin +# from signal_webhooks.utils import get_webhook_model + +# from abid_utils.admin import ABIDModelAdmin + +# from .models import APIToken + + +# class APITokenAdmin(ABIDModelAdmin): +# list_display = ('created_at', 'abid', 'created_by', 'token_redacted', 'expires') +# sort_fields = ('abid', 'created_at', 'created_by', 'expires') +# readonly_fields = ('created_at', 'modified_at', 'abid_info') +# search_fields = ('id', 'abid', 'created_by__username', 'token') +# fields = ('created_by', 'token', 'expires', *readonly_fields) + +# list_filter = ('created_by',) +# ordering = ['-created_at'] +# list_per_page = 100 + +# class CustomWebhookAdmin(WebhookAdmin, ABIDModelAdmin): +# list_display = ('created_at', 'created_by', 'abid', *WebhookAdmin.list_display) +# sort_fields = ('created_at', 'created_by', 'abid', 'referenced_model', 'endpoint', 'last_success', 'last_error') +# readonly_fields = ('created_at', 'modified_at', 'abid_info', *WebhookAdmin.readonly_fields) + + +# @abx.hookimpl +# def register_admin(admin_site): +# admin_site.register(APIToken, APITokenAdmin) +# admin_site.register(get_webhook_model(), CustomWebhookAdmin) diff --git a/archivebox/api/v1_core.py b/archivebox/api/v1_core.py index 9676b0d9..bcc957ee 100644 --- a/archivebox/api/v1_core.py +++ b/archivebox/api/v1_core.py @@ -15,7 +15,6 @@ from ninja.errors import HttpError from core.models import Snapshot, ArchiveResult, Tag from api.models import APIToken, OutboundWebhook -from abid_utils.abid import ABID from .auth import API_AUTH_METHODS @@ -396,11 +395,70 @@ def get_tag(request, tag_id: str, with_snapshots: bool=True): +# class CrawlSchema(Schema): +# TYPE: str = 'core.models.Crawl' + +# id: UUID +# abid: str + +# modified_at: datetime +# created_at: datetime +# created_by_id: str +# created_by_username: str + +# urls: str +# depth: int +# parser: str + +# # snapshots: List[SnapshotSchema] + +# @staticmethod +# def resolve_created_by_id(obj): +# return str(obj.created_by_id) + +# @staticmethod +# def resolve_created_by_username(obj): +# User = get_user_model() +# return User.objects.get(id=obj.created_by_id).username + +# @staticmethod +# def resolve_snapshots(obj, context): +# if context['request'].with_snapshots: +# return obj.snapshot_set.all().distinct() +# return Snapshot.objects.none() + + +# @router.get("/crawl/{crawl_id}", response=CrawlSchema, url_name="get_crawl") +# def get_crawl(request, crawl_id: str, with_snapshots: bool=False, with_archiveresults: bool=False): +# """Get a specific Crawl by id or abid.""" +# crawl = None +# request.with_snapshots = with_snapshots +# request.with_archiveresults = with_archiveresults + +# try: +# crawl = Crawl.objects.get(abid__icontains=crawl_id) +# except Exception: +# pass + +# try: +# crawl = crawl or Crawl.objects.get(id__icontains=crawl_id) +# except Exception: +# pass +# return crawl + + +# [..., CrawlSchema] @router.get("/any/{abid}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema], url_name="get_any") def get_any(request, abid: str): request.with_snapshots = False request.with_archiveresults = False + if abid.startswith(APIToken.abid_prefix): + raise HttpError(403, 'APIToken objects are not accessible via REST API') + + if abid.startswith(OutboundWebhook.abid_prefix): + raise HttpError(403, 'OutboundWebhook objects are not accessible via REST API') + response = None try: response = response or get_snapshot(request, abid) @@ -416,11 +474,13 @@ def get_any(request, abid: str): response = response or get_tag(request, abid) except Exception: pass - - if abid.startswith(APIToken.abid_prefix): - raise HttpError(403, 'APIToken objects are not accessible via REST API') - if abid.startswith(OutboundWebhook.abid_prefix): - raise HttpError(403, 'OutboundWebhook objects are not accessible via REST API') + # try: + # response = response or get_crawl(request, abid) + # except Exception: + # pass - raise HttpError(404, 'Object with given ABID not found') + if not response: + raise HttpError(404, 'Object with given ABID not found') + + return response diff --git a/archivebox/core/admin_archiveresults.py b/archivebox/core/admin_archiveresults.py new file mode 100644 index 00000000..e9645b03 --- /dev/null +++ b/archivebox/core/admin_archiveresults.py @@ -0,0 +1,198 @@ +__package__ = 'archivebox.core' + +import os +from pathlib import Path + +from django.contrib import admin +from django.utils.html import format_html, mark_safe +from django.core.exceptions import ValidationError +from django.urls import reverse, resolve +from django.utils import timezone +from django.forms import forms + +from huey_monitor.admin import TaskModel + +import abx + +from archivebox.config import DATA_DIR +from archivebox.config.common import SERVER_CONFIG +from archivebox.misc.paginators import AccelleratedPaginator +from archivebox.abid_utils.admin import ABIDModelAdmin + +from .models import ArchiveResult, Snapshot + + + + +def result_url(result: TaskModel) -> str: + url = reverse("admin:huey_monitor_taskmodel_change", args=[str(result.id)]) + return format_html('See progress...'.format(url=url)) + + + +class ArchiveResultInline(admin.TabularInline): + name = 'Archive Results Log' + model = ArchiveResult + parent_model = Snapshot + # fk_name = 'snapshot' + extra = 0 + sort_fields = ('end_ts', 'extractor', 'output', 'status', 'cmd_version') + readonly_fields = ('id', 'result_id', 'completed', 'command', 'version') + fields = ('start_ts', 'end_ts', *readonly_fields, 'extractor', 'cmd', 'cmd_version', 'pwd', 'created_by', 'status', 'output') + # exclude = ('id',) + ordering = ('end_ts',) + show_change_link = True + # # classes = ['collapse'] + # # list_display_links = ['abid'] + + def get_parent_object_from_request(self, request): + resolved = resolve(request.path_info) + try: + return self.parent_model.objects.get(pk=resolved.kwargs['object_id']) + except (self.parent_model.DoesNotExist, ValidationError): + return self.parent_model.objects.get(pk=self.parent_model.id_from_abid(resolved.kwargs['object_id'])) + + @admin.display( + description='Completed', + ordering='end_ts', + ) + def completed(self, obj): + return format_html('
{}
', obj.end_ts.strftime('%Y-%m-%d %H:%M:%S')) + + def result_id(self, obj): + return format_html('[{}]
', reverse('admin:core_archiveresult_change', args=(obj.id,)), obj.abid)
+
+ def command(self, obj):
+ return format_html('{}
', " ".join(obj.cmd or []))
+
+ def version(self, obj):
+ return format_html('{}
', obj.cmd_version or '-')
+
+ def get_formset(self, request, obj=None, **kwargs):
+ formset = super().get_formset(request, obj, **kwargs)
+ snapshot = self.get_parent_object_from_request(request)
+
+ # import ipdb; ipdb.set_trace()
+ # formset.form.base_fields['id'].widget = formset.form.base_fields['id'].hidden_widget()
+
+ # default values for new entries
+ formset.form.base_fields['status'].initial = 'succeeded'
+ formset.form.base_fields['start_ts'].initial = timezone.now()
+ formset.form.base_fields['end_ts'].initial = timezone.now()
+ formset.form.base_fields['cmd_version'].initial = '-'
+ formset.form.base_fields['pwd'].initial = str(snapshot.link_dir)
+ formset.form.base_fields['created_by'].initial = request.user
+ formset.form.base_fields['cmd'] = forms.JSONField(initial=['-'])
+ formset.form.base_fields['output'].initial = 'Manually recorded cmd output...'
+
+ if obj is not None:
+ # hidden values for existing entries and new entries
+ formset.form.base_fields['start_ts'].widget = formset.form.base_fields['start_ts'].hidden_widget()
+ formset.form.base_fields['end_ts'].widget = formset.form.base_fields['end_ts'].hidden_widget()
+ formset.form.base_fields['cmd'].widget = formset.form.base_fields['cmd'].hidden_widget()
+ formset.form.base_fields['pwd'].widget = formset.form.base_fields['pwd'].hidden_widget()
+ formset.form.base_fields['created_by'].widget = formset.form.base_fields['created_by'].hidden_widget()
+ formset.form.base_fields['cmd_version'].widget = formset.form.base_fields['cmd_version'].hidden_widget()
+ return formset
+
+ def get_readonly_fields(self, request, obj=None):
+ if obj is not None:
+ return self.readonly_fields
+ else:
+ return []
+
+
+
+class ArchiveResultAdmin(ABIDModelAdmin):
+ list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str')
+ sort_fields = ('start_ts', 'extractor', 'status')
+ readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'abid_info', 'output_summary')
+ search_fields = ('id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
+ fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', 'cmd', *readonly_fields)
+ autocomplete_fields = ['snapshot']
+
+ list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
+ ordering = ['-start_ts']
+ list_per_page = SERVER_CONFIG.SNAPSHOTS_PER_PAGE
+
+ paginator = AccelleratedPaginator
+ save_on_top = True
+
+ actions = ['delete_selected']
+
+ class Meta:
+ verbose_name = 'Archive Result'
+ verbose_name_plural = 'Archive Results'
+
+ def change_view(self, request, object_id, form_url="", extra_context=None):
+ self.request = request
+ return super().change_view(request, object_id, form_url, extra_context)
+
+ @admin.display(
+ description='Snapshot Info'
+ )
+ def snapshot_info(self, result):
+ return format_html(
+ '[{}]
{} {}{}', + ' '.join(result.cmd) if isinstance(result.cmd, list) else str(result.cmd), + ) + + def output_str(self, result): + return format_html( + 'âī¸
{}', + result.snapshot.timestamp, + result.output if (result.status == 'succeeded') and result.extractor not in ('title', 'archive_org') else 'index.html', + result.output, + ) + + def output_summary(self, result): + snapshot_dir = Path(DATA_DIR) / str(result.pwd).split('data/', 1)[-1] + output_str = format_html( + '
{}
', str(result.snapshot.timestamp))
+ path_from_output_str = (snapshot_dir / result.output)
+ output_str += format_html('{}/{}
', str(snapshot_dir), str(result.output))
+ if os.access(path_from_output_str, os.R_OK):
+ root_dir = str(path_from_output_str)
+ else:
+ root_dir = str(snapshot_dir)
+
+ # print(root_dir, str(list(os.walk(root_dir))))
+
+ for root, dirs, files in os.walk(root_dir):
+ depth = root.replace(root_dir, '').count(os.sep) + 1
+ if depth > 2:
+ continue
+ indent = ' ' * 4 * (depth)
+ output_str += format_html('{}{}/
', indent, os.path.basename(root))
+ indentation_str = ' ' * 4 * (depth + 1)
+ for filename in sorted(files):
+ is_hidden = filename.startswith('.')
+ output_str += format_html('{}{}
', int(not is_hidden), indentation_str, filename.strip())
+
+ return output_str + format_html('
')
+
+
+
+
+@abx.hookimpl
+def register_admin(admin_site):
+ admin_site.register(ArchiveResult, ArchiveResultAdmin)
diff --git a/archivebox/core/admin_snapshots.py b/archivebox/core/admin_snapshots.py
new file mode 100644
index 00000000..60d194f5
--- /dev/null
+++ b/archivebox/core/admin_snapshots.py
@@ -0,0 +1,368 @@
+
+__package__ = 'archivebox.core'
+
+import os
+from pathlib import Path
+
+from django.contrib import admin, messages
+from django.urls import path
+from django.utils.html import format_html, mark_safe
+from django.utils import timezone
+from django.forms import forms
+from django.template import Template, RequestContext
+from django.contrib.admin.helpers import ActionForm
+from django.contrib.admin.widgets import FilteredSelectMultiple
+
+
+
+import abx
+
+from archivebox.config import DATA_DIR, VERSION
+from archivebox.config.common import SERVER_CONFIG
+from archivebox.misc.util import htmldecode, urldecode
+from archivebox.misc.paginators import AccelleratedPaginator
+from archivebox.abid_utils.admin import ABIDModelAdmin
+from archivebox.search.admin import SearchResultsAdminMixin
+
+from archivebox.logging_util import printable_filesize
+from archivebox.index.html import snapshot_icons
+from archivebox.extractors import archive_links
+from archivebox.main import remove
+
+from archivebox.queues.tasks import bg_archive_links, bg_add
+
+
+from .models import Snapshot
+from .admin_archiveresults import ArchiveResultInline, result_url
+from .admin_tags import TagInline
+
+
+GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': [], 'CAN_UPGRADE': False}
+
+
+
+class SnapshotActionForm(ActionForm):
+ tags = forms.ModelMultipleChoiceField(
+ label='Edit tags',
+ queryset=Tag.objects.all(),
+ required=False,
+ widget=FilteredSelectMultiple(
+ 'core_tag__name',
+ False,
+ ),
+ )
+
+ # TODO: allow selecting actions for specific extractors? is this useful?
+ # extractor = forms.ChoiceField(
+ # choices=ArchiveResult.EXTRACTOR_CHOICES,
+ # required=False,
+ # widget=forms.MultileChoiceField(attrs={'class': "form-control"})
+ # )
+
+
+class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
+ list_display = ('created_at', 'title_str', 'files', 'size', 'url_str')
+ sort_fields = ('title_str', 'url_str', 'created_at')
+ readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'abid_info', 'link_dir')
+ search_fields = ('id', 'url', 'abid', 'timestamp', 'title', 'tags__name')
+ list_filter = ('created_at', 'downloaded_at', 'archiveresult__status', 'created_by', 'tags__name')
+ fields = ('url', 'title', 'created_by', 'bookmarked_at', *readonly_fields)
+ ordering = ['-created_at']
+ actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
+ inlines = [TagInline, ArchiveResultInline]
+ list_per_page = min(max(5, SERVER_CONFIG.SNAPSHOTS_PER_PAGE), 5000)
+
+ action_form = SnapshotActionForm
+ paginator = AccelleratedPaginator
+
+ save_on_top = True
+ show_full_result_count = False
+
+ def changelist_view(self, request, extra_context=None):
+ self.request = request
+ extra_context = extra_context or {}
+ try:
+ return super().changelist_view(request, extra_context | GLOBAL_CONTEXT)
+ except Exception as e:
+ self.message_user(request, f'Error occurred while loading the page: {str(e)} {request.GET} {request.POST}')
+ return super().changelist_view(request, GLOBAL_CONTEXT)
+
+
+ def get_urls(self):
+ urls = super().get_urls()
+ custom_urls = [
+ path('grid/', self.admin_site.admin_view(self.grid_view), name='grid')
+ ]
+ return custom_urls + urls
+
+ # def get_queryset(self, request):
+ # # tags_qs = SnapshotTag.objects.all().select_related('tag')
+ # # prefetch = Prefetch('snapshottag_set', queryset=tags_qs)
+
+ # self.request = request
+ # return super().get_queryset(request).prefetch_related('archiveresult_set').distinct() # .annotate(archiveresult_count=Count('archiveresult'))
+
+ @admin.action(
+ description="Imported Timestamp"
+ )
+ def imported_timestamp(self, obj):
+ context = RequestContext(self.request, {
+ 'bookmarked_date': obj.bookmarked,
+ 'timestamp': obj.timestamp,
+ })
+
+ html = Template("""{{bookmarked_date}} ({{timestamp}}
)""")
+ return mark_safe(html.render(context))
+
+ # pretty_time = obj.bookmarked.strftime('%Y-%m-%d %H:%M:%S')
+ # return f'{pretty_time} ({obj.timestamp})'
+
+ # TODO: figure out a different way to do this, you cant nest forms so this doenst work
+ # def action(self, obj):
+ # # csrfmiddlewaretoken: Wa8UcQ4fD3FJibzxqHN3IYrrjLo4VguWynmbzzcPYoebfVUnDovon7GEMYFRgsh0
+ # # action: update_snapshots
+ # # select_across: 0
+ # # _selected_action: 76d29b26-2a88-439e-877c-a7cca1b72bb3
+ # return format_html(
+ # '''
+ #
+ # ''',
+ # csrf.get_token(self.request),
+ # obj.pk,
+ # )
+
+ def admin_actions(self, obj):
+ return format_html(
+ # URL Hash: {}
{}
{}
',
+ obj.url,
+ obj.url[:128],
+ )
+
+ def grid_view(self, request, extra_context=None):
+
+ # cl = self.get_changelist_instance(request)
+
+ # Save before monkey patching to restore for changelist list view
+ saved_change_list_template = self.change_list_template
+ saved_list_per_page = self.list_per_page
+ saved_list_max_show_all = self.list_max_show_all
+
+ # Monkey patch here plus core_tags.py
+ self.change_list_template = 'private_index_grid.html'
+ self.list_per_page = SERVER_CONFIG.SNAPSHOTS_PER_PAGE
+ self.list_max_show_all = self.list_per_page
+
+ # Call monkey patched view
+ rendered_response = self.changelist_view(request, extra_context=extra_context)
+
+ # Restore values
+ self.change_list_template = saved_change_list_template
+ self.list_per_page = saved_list_per_page
+ self.list_max_show_all = saved_list_max_show_all
+
+ return rendered_response
+
+ # for debugging, uncomment this to print all requests:
+ # def changelist_view(self, request, extra_context=None):
+ # print('[*] Got request', request.method, request.POST)
+ # return super().changelist_view(request, extra_context=None)
+
+ @admin.action(
+ description="âšī¸ Get Title"
+ )
+ def update_titles(self, request, queryset):
+ links = [snapshot.as_link() for snapshot in queryset]
+ if len(links) < 3:
+ # run syncronously if there are only 1 or 2 links
+ archive_links(links, overwrite=True, methods=('title','favicon'), out_dir=DATA_DIR)
+ messages.success(request, f"Title and favicon have been fetched and saved for {len(links)} URLs.")
+ else:
+ # otherwise run in a background worker
+ result = bg_archive_links((links,), kwargs={"overwrite": True, "methods": ["title", "favicon"], "out_dir": DATA_DIR})
+ messages.success(
+ request,
+ mark_safe(f"Title and favicon are updating in the background for {len(links)} URLs. {result_url(result)}"),
+ )
+
+ @admin.action(
+ description="âŦī¸ Get Missing"
+ )
+ def update_snapshots(self, request, queryset):
+ links = [snapshot.as_link() for snapshot in queryset]
+
+ result = bg_archive_links((links,), kwargs={"overwrite": False, "out_dir": DATA_DIR})
+
+ messages.success(
+ request,
+ mark_safe(f"Re-trying any previously failed methods for {len(links)} URLs in the background. {result_url(result)}"),
+ )
+
+
+ @admin.action(
+ description="đ Archive Again"
+ )
+ def resnapshot_snapshot(self, request, queryset):
+ for snapshot in queryset:
+ timestamp = timezone.now().isoformat('T', 'seconds')
+ new_url = snapshot.url.split('#')[0] + f'#{timestamp}'
+
+ result = bg_add({'urls': new_url, 'tag': snapshot.tags_str()})
+
+ messages.success(
+ request,
+ mark_safe(f"Creating new fresh snapshots for {queryset.count()} URLs in the background. {result_url(result)}"),
+ )
+
+ @admin.action(
+ description="đ Redo"
+ )
+ def overwrite_snapshots(self, request, queryset):
+ links = [snapshot.as_link() for snapshot in queryset]
+
+ result = bg_archive_links((links,), kwargs={"overwrite": True, "out_dir": DATA_DIR})
+
+ messages.success(
+ request,
+ mark_safe(f"Clearing all previous results and re-downloading {len(links)} URLs in the background. {result_url(result)}"),
+ )
+
+ @admin.action(
+ description="â ī¸ Delete"
+ )
+ def delete_snapshots(self, request, queryset):
+ remove(snapshots=queryset, yes=True, delete=True, out_dir=DATA_DIR)
+ messages.success(
+ request,
+ mark_safe(f"Succesfully deleted {queryset.count()} Snapshots. Don't forget to scrub URLs from import logs (data/sources) and error logs (data/logs) if needed."),
+ )
+
+
+ @admin.action(
+ description="+"
+ )
+ def add_tags(self, request, queryset):
+ tags = request.POST.getlist('tags')
+ print('[+] Adding tags', tags, 'to Snapshots', queryset)
+ for obj in queryset:
+ obj.tags.add(*tags)
+ messages.success(
+ request,
+ f"Added {len(tags)} tags to {queryset.count()} Snapshots.",
+ )
+
+
+ @admin.action(
+ description="â"
+ )
+ def remove_tags(self, request, queryset):
+ tags = request.POST.getlist('tags')
+ print('[-] Removing tags', tags, 'to Snapshots', queryset)
+ for obj in queryset:
+ obj.tags.remove(*tags)
+ messages.success(
+ request,
+ f"Removed {len(tags)} tags from {queryset.count()} Snapshots.",
+ )
+
+
+
+@abx.hookimpl
+def register_admin(admin_site):
+ admin_site.register(Snapshot, SnapshotAdmin)
diff --git a/archivebox/core/admin_tags.py b/archivebox/core/admin_tags.py
new file mode 100644
index 00000000..8d2d28c8
--- /dev/null
+++ b/archivebox/core/admin_tags.py
@@ -0,0 +1,81 @@
+__package__ = 'archivebox.core'
+
+from django.contrib import admin
+from django.utils.html import format_html, mark_safe
+
+import abx
+
+from archivebox.abid_utils.admin import ABIDModelAdmin
+from archivebox.misc.paginators import AccelleratedPaginator
+
+
+class TagInline(admin.TabularInline):
+ model = Tag.snapshot_set.through # type: ignore
+ # fk_name = 'snapshot'
+ fields = ('id', 'tag')
+ extra = 1
+ # min_num = 1
+ max_num = 1000
+ autocomplete_fields = (
+ 'tag',
+ )
+
+
+# class AutocompleteTags:
+# model = Tag
+# search_fields = ['name']
+# name = 'name'
+# # source_field = 'name'
+# remote_field = Tag._meta.get_field('name')
+
+# class AutocompleteTagsAdminStub:
+# name = 'admin'
+
+class TagAdmin(ABIDModelAdmin):
+ list_display = ('created_at', 'created_by', 'abid', 'name', 'num_snapshots', 'snapshots')
+ list_filter = ('created_at', 'created_by')
+ sort_fields = ('name', 'slug', 'abid', 'created_by', 'created_at')
+ readonly_fields = ('slug', 'abid', 'created_at', 'modified_at', 'abid_info', 'snapshots')
+ search_fields = ('abid', 'name', 'slug')
+ fields = ('name', 'created_by', *readonly_fields)
+ actions = ['delete_selected']
+ ordering = ['-created_at']
+
+ paginator = AccelleratedPaginator
+
+
+ def num_snapshots(self, tag):
+ return format_html(
+ '{} total',
+ tag.id,
+ tag.snapshot_set.count(),
+ )
+
+ def snapshots(self, tag):
+ total_count = tag.snapshot_set.count()
+ return mark_safe('[{}]
{}',
+ snap.pk,
+ snap.downloaded_at.strftime('%Y-%m-%d %H:%M') if snap.downloaded_at else 'pending...',
+ snap.url[:64],
+ )
+ for snap in tag.snapshot_set.order_by('-downloaded_at')[:10]
+ ) + (f'[{}]
đ
{} {}',
+ snap.pk,
+ snap.abid,
+ snap.downloaded_at.strftime('%Y-%m-%d %H:%M') if snap.downloaded_at else 'pending...',
+ snap.url[:64],
+ )
+ for snap in obj.snapshot_set.order_by('-modified_at')[:10]
+ ) + f'[{}]
đ
{} đ {} {}',
+ result.pk,
+ result.abid,
+ result.snapshot.downloaded_at.strftime('%Y-%m-%d %H:%M') if result.snapshot.downloaded_at else 'pending...',
+ result.extractor,
+ result.snapshot.url[:64],
+ )
+ for result in obj.archiveresult_set.order_by('-modified_at')[:10]
+ ) + f'{}
',
+ tag.pk,
+ tag.name,
+ )
+ for tag in obj.tag_set.order_by('-modified_at')[:10]
+ ) + f'[{}]
{} (expires {})',
+ apitoken.pk,
+ apitoken.abid,
+ apitoken.token_redacted[:64],
+ apitoken.expires,
+ )
+ for apitoken in obj.apitoken_set.order_by('-modified_at')[:10]
+ ) + f'[{}]
{} -> {}',
+ outboundwebhook.pk,
+ outboundwebhook.abid,
+ outboundwebhook.referenced_model,
+ outboundwebhook.endpoint,
+ )
+ for outboundwebhook in obj.outboundwebhook_set.order_by('-modified_at')[:10]
+ ) + f'{}
',
+# machine.abid,
+# ', '.join(machine.networkinterface_set.values_list('ip_public', flat=True)),
+# )
+
+# class NetworkInterfaceAdmin(ABIDModelAdmin):
+# list_display = ('abid', 'created_at', 'machine_info', 'ip_public', 'dns_server', 'isp', 'country', 'region', 'city', 'iface', 'ip_local', 'mac_address', 'health')
+# sort_fields = ('abid', 'created_at', 'machine_info', 'ip_public', 'dns_server', 'isp', 'country', 'region', 'city', 'iface', 'ip_local', 'mac_address')
+# search_fields = ('abid', 'machine__abid', 'iface', 'ip_public', 'ip_local', 'mac_address', 'dns_server', 'hostname', 'isp', 'city', 'region', 'country')
+
+# readonly_fields = ('machine', 'created_at', 'modified_at', 'abid_info', 'mac_address', 'ip_public', 'ip_local', 'dns_server')
+# fields = (*readonly_fields, 'iface', 'hostname', 'isp', 'city', 'region', 'country', 'num_uses_succeeded', 'num_uses_failed')
+
+# list_filter = ('isp', 'country', 'region')
+# ordering = ['-created_at']
+# list_per_page = 100
+# actions = ["delete_selected"]
+
+# @admin.display(
+# description='Machine',
+# ordering='machine__abid',
+# )
+# def machine_info(self, iface):
+# return format_html(
+# '[{}]
{}',
+# iface.machine.id,
+# iface.machine.abid,
+# iface.machine.hostname,
+# )
+
+# class InstalledBinaryAdmin(ABIDModelAdmin):
+# list_display = ('abid', 'created_at', 'machine_info', 'name', 'binprovider', 'version', 'abspath', 'sha256', 'health')
+# sort_fields = ('abid', 'created_at', 'machine_info', 'name', 'binprovider', 'version', 'abspath', 'sha256')
+# search_fields = ('abid', 'machine__abid', 'name', 'binprovider', 'version', 'abspath', 'sha256')
+
+# readonly_fields = ('created_at', 'modified_at', 'abid_info')
+# fields = ('machine', 'name', 'binprovider', 'abspath', 'version', 'sha256', *readonly_fields, 'num_uses_succeeded', 'num_uses_failed')
+
+# list_filter = ('name', 'binprovider', 'machine_id')
+# ordering = ['-created_at']
+# list_per_page = 100
+# actions = ["delete_selected"]
+
+# @admin.display(
+# description='Machine',
+# ordering='machine__abid',
+# )
+# def machine_info(self, installed_binary):
+# return format_html(
+# '[{}]
{}',
+# installed_binary.machine.id,
+# installed_binary.machine.abid,
+# installed_binary.machine.hostname,
+# )
+
+
+
+# @abx.hookimpl
+# def register_admin(admin_site):
+# admin_site.register(Machine, MachineAdmin)
+# admin_site.register(NetworkInterface, NetworkInterfaceAdmin)
+# admin_site.register(InstalledBinary, InstalledBinaryAdmin)
diff --git a/archivebox/misc/paginators.py b/archivebox/misc/paginators.py
new file mode 100644
index 00000000..2e623a65
--- /dev/null
+++ b/archivebox/misc/paginators.py
@@ -0,0 +1,30 @@
+__package__ = 'archivebox.misc'
+
+from django.core.paginator import Paginator
+from django.utils.functional import cached_property
+
+
+class AccelleratedPaginator(Paginator):
+ """
+ Accellerated Pagniator ignores DISTINCT when counting total number of rows.
+ Speeds up SELECT Count(*) on Admin views by >20x.
+ https://hakibenita.com/optimizing-the-django-admin-paginator
+ """
+
+ @cached_property
+ def count(self):
+ if self.object_list._has_filters(): # type: ignore
+ # fallback to normal count method on filtered queryset
+ return super().count
+ else:
+ # otherwise count total rows in a separate fast query
+ return self.object_list.model.objects.count()
+
+ # Alternative approach for PostgreSQL: fallback count takes > 200ms
+ # from django.db import connection, transaction, OperationalError
+ # with transaction.atomic(), connection.cursor() as cursor:
+ # cursor.execute('SET LOCAL statement_timeout TO 200;')
+ # try:
+ # return super().count
+ # except OperationalError:
+ # return 9999999999999
diff --git a/archivebox/queues/admin.py b/archivebox/queues/admin.py
new file mode 100644
index 00000000..aee5788b
--- /dev/null
+++ b/archivebox/queues/admin.py
@@ -0,0 +1,26 @@
+__package__ = 'archivebox.queues'
+
+import abx
+
+from django.contrib.auth import get_permission_codename
+
+from huey_monitor.apps import HueyMonitorConfig
+from huey_monitor.admin import TaskModel, TaskModelAdmin, SignalInfoModel, SignalInfoModelAdmin
+
+
+HueyMonitorConfig.verbose_name = 'Background Workers'
+
+
+class CustomTaskModelAdmin(TaskModelAdmin):
+ actions = ["delete_selected"]
+
+ def has_delete_permission(self, request, obj=None):
+ codename = get_permission_codename("delete", self.opts)
+ return request.user.has_perm("%s.%s" % (self.opts.app_label, codename))
+
+
+
+@abx.hookimpl
+def register_admin(admin_site):
+ admin_site.register(TaskModel, CustomTaskModelAdmin)
+ admin_site.register(SignalInfoModel, SignalInfoModelAdmin)
diff --git a/archivebox/search/admin.py b/archivebox/search/admin.py
new file mode 100644
index 00000000..42aadf6f
--- /dev/null
+++ b/archivebox/search/admin.py
@@ -0,0 +1,23 @@
+__package__ = 'archivebox.search'
+
+from django.contrib import messages
+
+from archivebox.search import query_search_index
+
+class SearchResultsAdminMixin:
+ def get_search_results(self, request, queryset, search_term: str):
+ """Enhances the search queryset with results from the search backend"""
+
+ qs, use_distinct = super().get_search_results(request, queryset, search_term)
+
+ search_term = search_term.strip()
+ if not search_term:
+ return qs.distinct(), use_distinct
+ try:
+ qsearch = query_search_index(search_term)
+ qs = qs | qsearch
+ except Exception as err:
+ print(f'[!] Error while using search backend: {err.__class__.__name__} {err}')
+ messages.add_message(request, messages.WARNING, f'Error from the search backend, only showing results from default admin search fields - Error: {err}')
+
+ return qs.distinct(), use_distinct