From 3aeca0e45040b1b47eeb13ce3275e8ed2b71548d Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 27 Jul 2020 23:26:45 -0400 Subject: [PATCH] fix pending titles and favicons, improve add page, custom admin --- archivebox/core/admin.py | 90 ++++- archivebox/core/forms.py | 12 +- .../migrations/0005_auto_20200728_0326.py | 28 ++ archivebox/core/models.py | 6 +- archivebox/core/settings.py | 9 +- archivebox/core/urls.py | 12 +- archivebox/core/views.py | 50 +-- archivebox/index/html.py | 4 +- archivebox/main.py | 11 +- archivebox/themes/admin/base.html | 8 +- archivebox/themes/default/add_links.html | 323 ++++++------------ archivebox/themes/default/static/admin.css | 126 +++++++ .../themes/{ => default}/static/archive.png | Bin .../{ => default}/static/bootstrap.min.css | 0 .../themes/{ => default}/static/external.png | Bin .../static/jquery.dataTables.min.css | 0 .../static/jquery.dataTables.min.js | 0 .../themes/{ => default}/static/jquery.min.js | 0 .../themes/{ => default}/static/sort_asc.png | Bin .../themes/{ => default}/static/sort_both.png | Bin .../themes/{ => default}/static/sort_desc.png | Bin .../themes/{ => default}/static/spinner.gif | Bin archivebox/themes/legacy/link_details.html | 24 +- 23 files changed, 387 insertions(+), 316 deletions(-) create mode 100644 archivebox/core/migrations/0005_auto_20200728_0326.py create mode 100644 archivebox/themes/default/static/admin.css rename archivebox/themes/{ => default}/static/archive.png (100%) rename archivebox/themes/{ => default}/static/bootstrap.min.css (100%) rename archivebox/themes/{ => default}/static/external.png (100%) rename archivebox/themes/{ => default}/static/jquery.dataTables.min.css (100%) rename archivebox/themes/{ => default}/static/jquery.dataTables.min.js (100%) rename archivebox/themes/{ => default}/static/jquery.min.js (100%) rename archivebox/themes/{ => default}/static/sort_asc.png (100%) rename archivebox/themes/{ => default}/static/sort_both.png (100%) rename archivebox/themes/{ => default}/static/sort_desc.png (100%) rename archivebox/themes/{ => default}/static/spinner.gif (100%) diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py index 814b2f5e..97ac7712 100644 --- a/archivebox/core/admin.py +++ b/archivebox/core/admin.py @@ -1,19 +1,31 @@ -from django.contrib import admin -from django.utils.html import format_html +__package__ = 'archivebox.core' + +from io import StringIO +from contextlib import redirect_stdout + +from django.contrib import admin +from django.urls import path +from django.utils.html import format_html +from django.shortcuts import render +from django.contrib.auth import get_user_model -from util import htmldecode, urldecode from core.models import Snapshot -from archivebox.logging_util import printable_filesize +from core.forms import AddLinkForm + +from ..util import htmldecode, urldecode, ansi_to_html +from ..logging_util import printable_filesize +from ..main import add +from ..config import OUTPUT_DIR # TODO: https://stackoverflow.com/questions/40760880/add-custom-button-to-django-admin-panel class SnapshotAdmin(admin.ModelAdmin): - list_display = ('title_str', 'url_str', 'tags', 'files', 'size', 'added', 'updated') + list_display = ('added', 'title_str', 'url_str', 'tags', 'files', 'size', 'updated') sort_fields = ('title_str', 'url_str', 'tags', 'added', 'updated') - readonly_fields = ('id', 'num_outputs', 'is_archived', 'url_hash', 'added', 'updated') + readonly_fields = ('id', 'url', 'timestamp', 'num_outputs', 'is_archived', 'url_hash', 'added', 'updated') search_fields = ('url', 'timestamp', 'title', 'tags') - fields = ('url', 'timestamp', 'title', 'tags', *readonly_fields) + fields = ('title', 'tags', *readonly_fields) list_filter = ('added', 'updated', 'tags') ordering = ['-added'] @@ -27,15 +39,16 @@ class SnapshotAdmin(admin.ModelAdmin): canon = obj.as_link().canonical_outputs() return format_html( '' - '' - '     ' + '' '' '' - '{}', + '{}' + '', obj.archive_path, obj.archive_path, canon['favicon_path'], obj.archive_path, canon['wget_path'] or '', - urldecode(htmldecode(obj.latest_title or obj.title or '-'))[:128], + 'fetched' if obj.latest_title or obj.title else 'pending', + urldecode(htmldecode(obj.latest_title or obj.title or ''))[:128] or 'Pending...', ) def files(self, obj): @@ -68,17 +81,68 @@ class SnapshotAdmin(admin.ModelAdmin): def url_str(self, obj): return format_html( - '{}', + '{}', obj.url, obj.url.split('://www.', 1)[-1].split('://', 1)[-1][:64], ) id_str.short_description = 'ID' title_str.short_description = 'Title' - url_str.short_description = 'URL' + url_str.short_description = 'Original URL' id_str.admin_order_field = 'id' title_str.admin_order_field = 'title' url_str.admin_order_field = 'url' + + +class ArchiveBoxAdmin(admin.AdminSite): + site_header = 'ArchiveBox' + index_title = 'Links' + site_title = 'Index' + + def get_urls(self): + return [ + path('core/snapshot/add/', self.add_view, name='add'), + ] + super().get_urls() + + def add_view(self, request): + request.current_app = self.name + context = { + **self.each_context(request), + 'title': 'Add URLs', + } + + if request.method == 'GET': + context['form'] = AddLinkForm() + + elif request.method == 'POST': + form = AddLinkForm(request.POST) + if form.is_valid(): + url = form.cleaned_data["url"] + print(f'[+] Adding URL: {url}') + depth = 0 if form.cleaned_data["depth"] == "0" else 1 + input_kwargs = { + "urls": url, + "depth": depth, + "update_all": False, + "out_dir": OUTPUT_DIR, + } + add_stdout = StringIO() + with redirect_stdout(add_stdout): + add(**input_kwargs) + print(add_stdout.getvalue()) + + context.update({ + "stdout": ansi_to_html(add_stdout.getvalue().strip()), + "form": AddLinkForm() + }) + else: + context["form"] = form + + return render(template_name='add_links.html', request=request, context=context) + + +admin.site = ArchiveBoxAdmin() +admin.site.register(get_user_model()) admin.site.register(Snapshot, SnapshotAdmin) diff --git a/archivebox/core/forms.py b/archivebox/core/forms.py index 8bf0cbd0..f641298a 100644 --- a/archivebox/core/forms.py +++ b/archivebox/core/forms.py @@ -1,10 +1,14 @@ +__package__ = 'archivebox.core' + from django import forms +from ..util import URL_REGEX + CHOICES = ( - ('0', 'depth=0 (archive just this url)'), - ('1', 'depth=1 (archive this url and all sites one link away)'), + ('0', 'depth = 0 (archive just these URLs)'), + ('1', 'depth = 1 (archive these URLs and all URLs one hop away)'), ) class AddLinkForm(forms.Form): - url = forms.URLField() - depth = forms.ChoiceField(choices=CHOICES, widget=forms.RadioSelect, initial='0') + url = forms.RegexField(label="URLs (one per line)", regex=URL_REGEX, min_length='6', strip=True, widget=forms.Textarea, required=True) + depth = forms.ChoiceField(label="Archive depth", choices=CHOICES, widget=forms.RadioSelect, initial='0') diff --git a/archivebox/core/migrations/0005_auto_20200728_0326.py b/archivebox/core/migrations/0005_auto_20200728_0326.py new file mode 100644 index 00000000..f367aeb1 --- /dev/null +++ b/archivebox/core/migrations/0005_auto_20200728_0326.py @@ -0,0 +1,28 @@ +# Generated by Django 3.0.7 on 2020-07-28 03:26 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0004_auto_20200713_1552'), + ] + + operations = [ + migrations.AlterField( + model_name='snapshot', + name='tags', + field=models.CharField(blank=True, db_index=True, max_length=256, null=True), + ), + migrations.AlterField( + model_name='snapshot', + name='title', + field=models.CharField(blank=True, db_index=True, max_length=128, null=True), + ), + migrations.AlterField( + model_name='snapshot', + name='updated', + field=models.DateTimeField(blank=True, db_index=True, null=True), + ), + ] diff --git a/archivebox/core/models.py b/archivebox/core/models.py index 7ac9427b..95638bc1 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -15,11 +15,11 @@ class Snapshot(models.Model): url = models.URLField(unique=True) timestamp = models.CharField(max_length=32, unique=True, db_index=True) - title = models.CharField(max_length=128, null=True, default=None, db_index=True) - tags = models.CharField(max_length=256, null=True, default=None, db_index=True) + title = models.CharField(max_length=128, null=True, blank=True, db_index=True) + tags = models.CharField(max_length=256, null=True, blank=True, db_index=True) added = models.DateTimeField(auto_now_add=True, db_index=True) - updated = models.DateTimeField(null=True, default=None, db_index=True) + updated = models.DateTimeField(null=True, blank=True, db_index=True) # bookmarked = models.DateTimeField() keys = ('url', 'timestamp', 'title', 'tags', 'updated') diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index e9cc0dc8..a0da8b92 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -5,16 +5,16 @@ import sys from django.utils.crypto import get_random_string -from ..config import ( - OUTPUT_DIR, +from ..config import ( # noqa: F401 + DEBUG, SECRET_KEY, ALLOWED_HOSTS, PYTHON_DIR, ACTIVE_THEME, SQL_INDEX_FILENAME, + OUTPUT_DIR, ) - ALLOWED_HOSTS = ALLOWED_HOSTS.split(',') IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3] @@ -25,8 +25,8 @@ INSTALLED_APPS = [ 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.messages', - 'django.contrib.admin', 'django.contrib.staticfiles', + 'django.contrib.admin', 'core', @@ -121,5 +121,4 @@ STATIC_URL = '/static/' STATICFILES_DIRS = [ os.path.join(PYTHON_DIR, 'themes', ACTIVE_THEME, 'static'), os.path.join(PYTHON_DIR, 'themes', 'default', 'static'), - os.path.join(PYTHON_DIR, 'themes', 'static'), ] diff --git a/archivebox/core/urls.py b/archivebox/core/urls.py index 70ebaf63..0c1f8131 100644 --- a/archivebox/core/urls.py +++ b/archivebox/core/urls.py @@ -3,15 +3,12 @@ from django.contrib import admin from django.urls import path, include from django.views import static from django.conf import settings -from django.contrib.staticfiles.views import serve as serve_static from django.views.generic.base import RedirectView -from core.views import MainIndex, AddLinks, LinkDetails +from core.views import MainIndex, LinkDetails -admin.site.site_header = 'ArchiveBox' -admin.site.index_title = 'Links' -admin.site.site_title = 'Index' +# print('DEBUG', settings.DEBUG) urlpatterns = [ path('robots.txt', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'robots.txt'}), @@ -19,14 +16,11 @@ urlpatterns = [ path('archive/', RedirectView.as_view(url='/')), path('archive/', LinkDetails.as_view(), name='LinkAssets'), - path('add/', AddLinks.as_view(), name='AddLinks'), - - path('static/', serve_static), + path('add/', RedirectView.as_view(url='/admin/core/snapshot/add/')), path('accounts/login/', RedirectView.as_view(url='/admin/login/')), path('accounts/logout/', RedirectView.as_view(url='/admin/logout/')), - path('admin/core/snapshot/add/', RedirectView.as_view(url='/add/')), path('accounts/', include('django.contrib.auth.urls')), path('admin/', admin.site.urls), diff --git a/archivebox/core/views.py b/archivebox/core/views.py index c411e98b..1eb8fc20 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -7,9 +7,6 @@ from django.views import View, static from core.models import Snapshot -from contextlib import redirect_stdout -from io import StringIO - from ..index import load_main_index, load_main_index_meta from ..config import ( OUTPUT_DIR, @@ -18,10 +15,7 @@ from ..config import ( PUBLIC_INDEX, PUBLIC_SNAPSHOTS, ) -from ..util import base_url, ansi_to_html -from .. main import add - -from .forms import AddLinkForm +from ..util import base_url class MainIndex(View): @@ -45,48 +39,6 @@ class MainIndex(View): return render(template_name=self.template, request=request, context=context) -class AddLinks(View): - template = 'add_links.html' - - def get(self, request): - if not request.user.is_authenticated and not PUBLIC_INDEX: - return redirect(f'/admin/login/?next={request.path}') - - context = { - "form": AddLinkForm() - } - - return render(template_name=self.template, request=request, context=context) - - def post(self, request): - if not request.user.is_authenticated and not PUBLIC_INDEX: - return redirect(f'/admin/login/?next={request.path}') - form = AddLinkForm(request.POST) - if form.is_valid(): - url = form.cleaned_data["url"] - print(f'[+] Adding URL: {url}') - depth = 0 if form.cleaned_data["depth"] == "0" else 0 - input_kwargs = { - "urls": url, - "depth": depth, - "update_all": False, - "out_dir": OUTPUT_DIR, - } - add_stdout = StringIO() - with redirect_stdout(add_stdout): - add(**input_kwargs) - print(add_stdout.getvalue()) - - context = { - "stdout": ansi_to_html(add_stdout.getvalue()), - "form": AddLinkForm() - } - else: - context = {"form": form} - - return render(template_name=self.template, request=request, context=context) - - class LinkDetails(View): def get(self, request, path): # missing trailing slash -> redirect to index diff --git a/archivebox/index/html.py b/archivebox/index/html.py index e21ae576..4c6ae8bb 100644 --- a/archivebox/index/html.py +++ b/archivebox/index/html.py @@ -136,8 +136,8 @@ def link_details_template(link: Link) -> str: 'url_str': htmlencode(urldecode(link.base_url)), 'archive_url': urlencode( wget_output_path(link) - or (link.domain if link.is_archived else 'about:blank') - ), + or (link.domain if link.is_archived else '') + ) or 'about:blank', 'extension': link.extension or 'html', 'tags': link.tags or 'untagged', 'status': 'archived' if link.is_archived else 'not yet archived', diff --git a/archivebox/main.py b/archivebox/main.py index 1cb34b30..141fe34c 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -83,6 +83,7 @@ from .config import ( EXTERNAL_LOCATIONS, DATA_LOCATIONS, DEPENDENCIES, + DEBUG, load_all_config, CONFIG, USER_CONFIG, @@ -987,13 +988,19 @@ def server(runserver_args: Optional[List[str]]=None, """Run the ArchiveBox HTTP server""" runserver_args = runserver_args or [] - check_data_folder(out_dir=out_dir) + + from . import config + config.SHOW_PROGRESS = False if debug: - os.environ['DEBUG'] = 'True' + # if --debug is passed, patch config.DEBUG to be True for this run + config.DEBUG = True else: + # force staticfiles to be served when DEBUG=False + # TODO: do this using nginx or another server instead of django? runserver_args.append('--insecure') + check_data_folder(out_dir=out_dir) setup_django(out_dir) from django.core.management import call_command from django.contrib.auth.models import User diff --git a/archivebox/themes/admin/base.html b/archivebox/themes/admin/base.html index 2160dfeb..2a67873e 100644 --- a/archivebox/themes/admin/base.html +++ b/archivebox/themes/admin/base.html @@ -2,7 +2,7 @@ {% get_current_language as LANGUAGE_CODE %}{% get_current_language_bidi as LANGUAGE_BIDI %} -{% block title %}{% endblock %} +{% block title %}{% endblock %} | ArchiveBox {% block extrastyle %}{% endblock %} {% if LANGUAGE_BIDI %}{% endif %} @@ -13,6 +13,7 @@ {% if LANGUAGE_BIDI %}{% endif %} {% endblock %} {% block blockbots %}{% endblock %} + {% load i18n %} @@ -26,13 +27,14 @@ -
-
- -
- - - -

Original

-

$domain

-
-
-
@@ -383,6 +371,18 @@
+
+
+ +
+ + + +

Original

+

$domain

+
+
+