mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-23 03:06:55 -04:00
Merge branch 'dev' into link-removal2
This commit is contained in:
commit
1fe95474c2
52 changed files with 896 additions and 550 deletions
|
@ -42,6 +42,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
parser.add_argument(
|
||||
'--depth', # '-d',
|
||||
type=int,
|
||||
choices=[0, 1],
|
||||
default=0,
|
||||
help='Depth to archive to [0] or 1, see "add" command help for more info.',
|
||||
)
|
||||
|
|
|
@ -43,6 +43,11 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
action='store_true',
|
||||
help='Run archivebox init before starting the server',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--createsuperuser',
|
||||
action='store_true',
|
||||
help='Run archivebox manage createsuperuser before starting the server',
|
||||
)
|
||||
command = parser.parse_args(args or ())
|
||||
reject_stdin(__command__, stdin)
|
||||
|
||||
|
@ -51,6 +56,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
reload=command.reload,
|
||||
debug=command.debug,
|
||||
init=command.init,
|
||||
createsuperuser=command.createsuperuser,
|
||||
out_dir=pwd or OUTPUT_DIR,
|
||||
)
|
||||
|
||||
|
|
|
@ -116,16 +116,15 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
|||
'--write-annotations',
|
||||
'--write-thumbnail',
|
||||
'--no-call-home',
|
||||
'--user-agent',
|
||||
'--all-subs',
|
||||
'--extract-audio',
|
||||
'--keep-video',
|
||||
'--yes-playlist',
|
||||
'--continue',
|
||||
'--ignore-errors',
|
||||
'--geo-bypass',
|
||||
'--audio-format', 'mp3',
|
||||
'--audio-quality', '320K',
|
||||
'--embed-thumbnail',
|
||||
'--add-metadata']},
|
||||
'--add-metadata',
|
||||
'--max-filesize=750m',
|
||||
]},
|
||||
|
||||
|
||||
'WGET_ARGS': {'type': list, 'default': ['--no-verbose',
|
||||
'--adjust-extension',
|
||||
|
@ -775,7 +774,7 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
|
|||
'version': config['PYTHON_VERSION'],
|
||||
'hash': bin_hash(config['PYTHON_BINARY']),
|
||||
'enabled': True,
|
||||
'is_valid': bool(config['DJANGO_VERSION']),
|
||||
'is_valid': bool(config['PYTHON_VERSION']),
|
||||
},
|
||||
'DJANGO_BINARY': {
|
||||
'path': bin_path(config['DJANGO_BINARY']),
|
||||
|
@ -787,7 +786,7 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
|
|||
'CURL_BINARY': {
|
||||
'path': bin_path(config['CURL_BINARY']),
|
||||
'version': config['CURL_VERSION'],
|
||||
'hash': bin_hash(config['PYTHON_BINARY']),
|
||||
'hash': bin_hash(config['CURL_BINARY']),
|
||||
'enabled': config['USE_CURL'],
|
||||
'is_valid': bool(config['CURL_VERSION']),
|
||||
},
|
||||
|
@ -803,7 +802,7 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
|
|||
'version': config['NODE_VERSION'],
|
||||
'hash': bin_hash(config['NODE_BINARY']),
|
||||
'enabled': config['USE_NODE'],
|
||||
'is_valid': bool(config['SINGLEFILE_VERSION']),
|
||||
'is_valid': bool(config['NODE_VERSION']),
|
||||
},
|
||||
'SINGLEFILE_BINARY': {
|
||||
'path': bin_path(config['SINGLEFILE_BINARY']),
|
||||
|
|
|
@ -11,13 +11,14 @@ from django.shortcuts import render, redirect
|
|||
from django.contrib.auth import get_user_model
|
||||
from django import forms
|
||||
|
||||
from ..util import htmldecode, urldecode, ansi_to_html
|
||||
|
||||
from core.models import Snapshot, Tag
|
||||
from core.forms import AddLinkForm, TagField
|
||||
|
||||
from core.mixins import SearchResultsAdminMixin
|
||||
|
||||
from index.html import snapshot_icons
|
||||
from util import htmldecode, urldecode, ansi_to_html
|
||||
from logging_util import printable_filesize
|
||||
from main import add, remove
|
||||
from config import OUTPUT_DIR
|
||||
|
|
|
@ -22,10 +22,32 @@ class AddLinkForm(forms.Form):
|
|||
url = forms.RegexField(label="URLs (one per line)", regex=URL_REGEX, min_length='6', strip=True, widget=forms.Textarea, required=True)
|
||||
depth = forms.ChoiceField(label="Archive depth", choices=CHOICES, widget=forms.RadioSelect, initial='0')
|
||||
archive_methods = forms.MultipleChoiceField(
|
||||
label="Archive methods (select at least 1, otherwise all will be used by default)",
|
||||
required=False,
|
||||
widget=forms.SelectMultiple,
|
||||
choices=ARCHIVE_METHODS,
|
||||
)
|
||||
# TODO: hook these up to the view and put them
|
||||
# in a collapsible UI section labeled "Advanced"
|
||||
#
|
||||
# exclude_patterns = forms.CharField(
|
||||
# label="Exclude patterns",
|
||||
# min_length='1',
|
||||
# required=False,
|
||||
# initial=URL_BLACKLIST,
|
||||
# )
|
||||
# timeout = forms.IntegerField(
|
||||
# initial=TIMEOUT,
|
||||
# )
|
||||
# overwrite = forms.BooleanField(
|
||||
# label="Overwrite any existing Snapshots",
|
||||
# initial=False,
|
||||
# )
|
||||
# index_only = forms.BooleanField(
|
||||
# label="Add URLs to index without Snapshotting",
|
||||
# initial=False,
|
||||
# )
|
||||
|
||||
class TagWidgetMixin:
|
||||
def format_value(self, value):
|
||||
if value is not None and not isinstance(value, str):
|
||||
|
|
|
@ -36,7 +36,7 @@ def forwards_func(apps, schema_editor):
|
|||
|
||||
for extractor in history:
|
||||
for result in history[extractor]:
|
||||
ArchiveResult.objects.create(extractor=extractor, snapshot=snapshot, cmd=result["cmd"], cmd_version=result["cmd_version"],
|
||||
ArchiveResult.objects.create(extractor=extractor, snapshot=snapshot, cmd=result["cmd"], cmd_version=result["cmd_version"] or 'unknown',
|
||||
start_ts=result["start_ts"], end_ts=result["end_ts"], status=result["status"], pwd=result["pwd"], output=result["output"])
|
||||
|
||||
|
||||
|
|
|
@ -101,7 +101,7 @@ TEMPLATES = [
|
|||
################################################################################
|
||||
|
||||
DATABASE_FILE = Path(OUTPUT_DIR) / SQL_INDEX_FILENAME
|
||||
DATABASE_NAME = os.environ.get("ARCHIVEBOX_DATABASE_NAME", DATABASE_FILE)
|
||||
DATABASE_NAME = os.environ.get("ARCHIVEBOX_DATABASE_NAME", str(DATABASE_FILE))
|
||||
|
||||
DATABASES = {
|
||||
'default': {
|
||||
|
|
|
@ -9,6 +9,7 @@ from django.http import HttpResponse
|
|||
from django.views import View, static
|
||||
from django.views.generic.list import ListView
|
||||
from django.views.generic import FormView
|
||||
from django.db.models import Q
|
||||
from django.contrib.auth.mixins import UserPassesTestMixin
|
||||
|
||||
from core.models import Snapshot
|
||||
|
@ -108,7 +109,7 @@ class PublicArchiveView(ListView):
|
|||
qs = super().get_queryset(**kwargs)
|
||||
query = self.request.GET.get('q')
|
||||
if query:
|
||||
qs = qs.filter(title__icontains=query)
|
||||
qs = qs.filter(Q(title__icontains=query) | Q(url__icontains=query) | Q(timestamp__icontains=query) | Q(tags__name__icontains=query))
|
||||
for snapshot in qs:
|
||||
snapshot.icons = snapshot_icons(snapshot)
|
||||
return qs
|
||||
|
|
|
@ -96,7 +96,7 @@ def archive_snapshot(snapshot: Model, overwrite: bool=False, methods: Optional[I
|
|||
if method_name not in details["history"]:
|
||||
details["history"][method_name] = []
|
||||
|
||||
if should_run(snapshot, out_dir) or overwrite:
|
||||
if should_run(snapshot, out_dir, overwrite):
|
||||
log_archive_method_started(method_name)
|
||||
|
||||
result = method_function(snapshot=snapshot, out_dir=out_dir)
|
||||
|
|
|
@ -25,14 +25,17 @@ from ..config import (
|
|||
from ..logging_util import TimedProgress
|
||||
|
||||
|
||||
# output = '{domain}/'
|
||||
|
||||
|
||||
@enforce_types
|
||||
def should_save_archive_dot_org(snapshot: Model, out_dir: Optional[Path]=None) -> bool:
|
||||
def should_save_archive_dot_org(snapshot: Model, overwrite: Optional[bool]=False, out_dir: Optional[Path]=None) -> bool:
|
||||
out_dir = out_dir or Path(snapshot.snapshot_dir)
|
||||
if is_static_file(snapshot.url):
|
||||
return False
|
||||
|
||||
if (out_dir / "archive.org.txt").exists():
|
||||
out_dir = out_dir or Path(link.link_dir)
|
||||
if not overwrite and (out_dir / 'archive.org.txt').exists():
|
||||
# if open(path, 'r').read().strip() != 'None':
|
||||
return False
|
||||
|
||||
|
|
|
@ -20,18 +20,21 @@ from ..config import (
|
|||
from ..logging_util import TimedProgress
|
||||
|
||||
|
||||
# output = 'output.html'
|
||||
|
||||
|
||||
@enforce_types
|
||||
def should_save_dom(snapshot: Model, out_dir: Optional[Path]=None) -> bool:
|
||||
def should_save_dom(snapshot: Model, overwrite: Optional[bool]=False, out_dir: Optional[Path]=None) -> bool:
|
||||
out_dir = out_dir or Path(snapshot.snapshot_dir)
|
||||
if is_static_file(snapshot.url):
|
||||
return False
|
||||
|
||||
if (out_dir / 'output.html').exists():
|
||||
|
||||
out_dir = out_dir or Path(link.link_dir)
|
||||
if not overwrite and (out_dir / 'output.html').exists():
|
||||
return False
|
||||
|
||||
return SAVE_DOM
|
||||
|
||||
|
||||
@enforce_types
|
||||
def save_dom(snapshot: Model, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
|
||||
"""print HTML of site to file using chrome --dump-html"""
|
||||
|
|
|
@ -21,14 +21,17 @@ from ..config import (
|
|||
from ..logging_util import TimedProgress
|
||||
|
||||
|
||||
# output = 'favicon.ico'
|
||||
|
||||
|
||||
@enforce_types
|
||||
def should_save_favicon(snapshot: Model, out_dir: Optional[str]=None) -> bool:
|
||||
def should_save_favicon(snapshot: Model, overwrite: Optional[bool]=False, out_dir: Optional[str]=None) -> bool:
|
||||
out_dir = out_dir or snapshot.snapshot_dir
|
||||
if (Path(out_dir) / 'favicon.ico').exists():
|
||||
if not overwrite and (Path(out_dir) / 'favicon.ico').exists():
|
||||
return False
|
||||
|
||||
return SAVE_FAVICON
|
||||
|
||||
|
||||
@enforce_types
|
||||
def save_favicon(snapshot: Model, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
|
||||
"""download site favicon from google's favicon api"""
|
||||
|
|
|
@ -28,14 +28,20 @@ from ..config import (
|
|||
from ..logging_util import TimedProgress
|
||||
|
||||
|
||||
# output = 'git/'
|
||||
# @contents = output.glob('*.*')
|
||||
# @exists = self.contents.exists()
|
||||
# @size => get_size(self.contents)
|
||||
# @num_files => len(self.contents)
|
||||
|
||||
@enforce_types
|
||||
def should_save_git(snapshot: Model, out_dir: Optional[Path]=None) -> bool:
|
||||
def should_save_git(snapshot: Model, overwrite: Optional[bool]=False, out_dir: Optional[Path]=None) -> bool:
|
||||
out_dir = out_dir or snapshot.snapshot_dir
|
||||
if is_static_file(snapshot.url):
|
||||
return False
|
||||
|
||||
if (out_dir / "git").exists():
|
||||
out_dir = out_dir or Path(link.link_dir)
|
||||
if not overwrite and (out_dir / 'git').exists():
|
||||
return False
|
||||
|
||||
is_clonable_url = (
|
||||
|
|
|
@ -23,12 +23,21 @@ from ..config import (
|
|||
)
|
||||
from ..logging_util import TimedProgress
|
||||
|
||||
|
||||
# output = 'headers.json'
|
||||
|
||||
@enforce_types
|
||||
def should_save_headers(snapshot: Model, out_dir: Optional[str]=None) -> bool:
|
||||
def should_save_headers(snapshot: Model, overwrite: Optional[bool]=False, out_dir: Optional[str]=None) -> bool:
|
||||
out_dir = out_dir or snapshot.snapshot_dir
|
||||
|
||||
if not SAVE_HEADERS:
|
||||
return False
|
||||
|
||||
if overwrite:
|
||||
return True
|
||||
|
||||
output = Path(out_dir or snapshot.snapshot_dir) / 'headers.json'
|
||||
return not output.exists() and SAVE_HEADERS
|
||||
return not output.exists()
|
||||
|
||||
|
||||
@enforce_types
|
||||
|
|
|
@ -22,14 +22,17 @@ from ..config import (
|
|||
from ..logging_util import TimedProgress
|
||||
|
||||
|
||||
# output = 'media/'
|
||||
|
||||
@enforce_types
|
||||
def should_save_media(snapshot: Model, out_dir: Optional[Path]=None) -> bool:
|
||||
def should_save_media(snapshot: Model, overwrite: Optional[bool]=False, out_dir: Optional[Path]=None) -> bool:
|
||||
out_dir = out_dir or snapshot.snapshot_dir
|
||||
|
||||
if is_static_file(snapshot.url):
|
||||
return False
|
||||
|
||||
if (out_dir / "media").exists():
|
||||
out_dir = out_dir or Path(link.link_dir)
|
||||
if not overwrite and (out_dir / 'media').exists():
|
||||
return False
|
||||
|
||||
return SAVE_MEDIA
|
||||
|
|
|
@ -39,13 +39,16 @@ def ShellError(cmd: List[str], result: CompletedProcess, lines: int=20) -> Archi
|
|||
|
||||
|
||||
@enforce_types
|
||||
def should_save_mercury(snapshot: Model, out_dir: Optional[str]=None) -> bool:
|
||||
def should_save_mercury(snapshot: Model, overwrite: Optional[bool]=False, out_dir: Optional[str]=None) -> bool:
|
||||
out_dir = out_dir or snapshot.snapshot_dir
|
||||
if is_static_file(snapshot.url):
|
||||
return False
|
||||
|
||||
output = Path(out_dir or snapshot.snapshot_dir) / 'mercury'
|
||||
return SAVE_MERCURY and MERCURY_VERSION and (not output.exists())
|
||||
if not overwrite and output.exists():
|
||||
return False
|
||||
|
||||
return SAVE_MERCURY and MERCURY_VERSION
|
||||
|
||||
|
||||
@enforce_types
|
||||
|
|
|
@ -19,14 +19,16 @@ from ..config import (
|
|||
)
|
||||
from ..logging_util import TimedProgress
|
||||
|
||||
# output = 'output.pdf'
|
||||
|
||||
@enforce_types
|
||||
def should_save_pdf(snapshot: Model, out_dir: Optional[Path]=None) -> bool:
|
||||
def should_save_pdf(snapshot: Model, verwrite: Optional[bool]=False, out_dir: Optional[Path]=None) -> bool:
|
||||
out_dir = out_dir or Path(snapshot.snapshot_dir)
|
||||
if is_static_file(snapshot.url):
|
||||
return False
|
||||
|
||||
if (out_dir / "output.pdf").exists():
|
||||
|
||||
out_dir = out_dir or Path(link.link_dir)
|
||||
if not overwrite and (out_dir / 'output.pdf').exists():
|
||||
return False
|
||||
|
||||
return SAVE_PDF
|
||||
|
|
|
@ -25,6 +25,7 @@ from ..config import (
|
|||
)
|
||||
from ..logging_util import TimedProgress
|
||||
|
||||
|
||||
@enforce_types
|
||||
def get_html(snapshot: Model, path: Path) -> str:
|
||||
"""
|
||||
|
@ -47,14 +48,20 @@ def get_html(snapshot: Model, path: Path) -> str:
|
|||
else:
|
||||
return document
|
||||
|
||||
|
||||
# output = 'readability/'
|
||||
|
||||
@enforce_types
|
||||
def should_save_readability(snapshot: Model, out_dir: Optional[str]=None) -> bool:
|
||||
def should_save_readability(snapshot: Model, overwrite: Optional[bool]=False, out_dir: Optional[str]=None) -> bool:
|
||||
out_dir = out_dir or snapshot.link_dir
|
||||
if is_static_file(snapshot.url):
|
||||
return False
|
||||
|
||||
output = Path(out_dir or snapshot.snapshot_dir) / 'readability'
|
||||
return SAVE_READABILITY and READABILITY_VERSION and (not output.exists())
|
||||
if not overwrite and output.exists():
|
||||
return False
|
||||
|
||||
return SAVE_READABILITY and READABILITY_VERSION
|
||||
|
||||
|
||||
@enforce_types
|
||||
|
|
|
@ -20,14 +20,16 @@ from ..config import (
|
|||
from ..logging_util import TimedProgress
|
||||
|
||||
|
||||
# output = 'screenshot.png'
|
||||
|
||||
@enforce_types
|
||||
def should_save_screenshot(snapshot: Model, out_dir: Optional[Path]=None) -> bool:
|
||||
def should_save_screenshot(snapshot: Model, overwrite: Optional[bool]=False, out_dir: Optional[Path]=None) -> bool:
|
||||
out_dir = out_dir or Path(snapshot.snapshot_dir)
|
||||
if is_static_file(snapshot.url):
|
||||
return False
|
||||
|
||||
if (out_dir / "screenshot.png").exists():
|
||||
|
||||
out_dir = out_dir or Path(link.link_dir)
|
||||
if not overwrite and (out_dir / 'screenshot.png').exists():
|
||||
return False
|
||||
|
||||
return SAVE_SCREENSHOT
|
||||
|
|
|
@ -25,13 +25,16 @@ from ..logging_util import TimedProgress
|
|||
|
||||
|
||||
@enforce_types
|
||||
def should_save_singlefile(snapshot: Model, out_dir: Optional[Path]=None) -> bool:
|
||||
def should_save_singlefile(snapshot: Model, overwrite: Optional[bool]=False, out_dir: Optional[Path]=None) -> bool:
|
||||
out_dir = out_dir or Path(snapshot.snapshot_dir)
|
||||
if is_static_file(snapshot.url):
|
||||
return False
|
||||
|
||||
output = out_dir / 'singlefile.html'
|
||||
return SAVE_SINGLEFILE and SINGLEFILE_VERSION and (not output.exists())
|
||||
out_dir = out_dir or Path(link.link_dir)
|
||||
if not overwrite and (out_dir / 'singlefile.html').exists():
|
||||
return False
|
||||
|
||||
return SAVE_SINGLEFILE
|
||||
|
||||
|
||||
@enforce_types
|
||||
|
|
|
@ -62,13 +62,15 @@ class TitleParser(HTMLParser):
|
|||
self.inside_title_tag = False
|
||||
|
||||
|
||||
@enforce_types
|
||||
def should_save_title(snapshot: Model, out_dir: Optional[str]=None) -> bool:
|
||||
# if link already has valid title, skip it
|
||||
if snapshot.title and not snapshot.title.lower().startswith('http'):
|
||||
return False
|
||||
# output = '{title}'
|
||||
|
||||
@enforce_types
|
||||
def should_save_title(snapshot: Model, overwrite: Optional[bool]=False, out_dir: Optional[str]=None) -> bool:
|
||||
if is_static_file(snapshot.url):
|
||||
False
|
||||
|
||||
# if snapshot already has valid title, skip it
|
||||
if not overwrite and snapshot.title and not snapshot.title.lower().startswith('http'):
|
||||
return False
|
||||
|
||||
return SAVE_TITLE
|
||||
|
|
|
@ -38,10 +38,10 @@ from ..logging_util import TimedProgress
|
|||
|
||||
|
||||
@enforce_types
|
||||
def should_save_wget(snapshot: Model, out_dir: Optional[Path]=None) -> bool:
|
||||
def should_save_wget(snapshot: Model, overwrite: Optional[bool]=False, out_dir: Optional[Path]=None) -> bool:
|
||||
output_path = wget_output_path(snapshot)
|
||||
out_dir = out_dir or Path(snapshot.snapshot_dir)
|
||||
if output_path and (out_dir / output_path).exists():
|
||||
if not overwrite output_path and (out_dir / output_path).exists():
|
||||
return False
|
||||
|
||||
return SAVE_WGET
|
||||
|
@ -68,7 +68,7 @@ def save_wget(snapshot: Model, out_dir: Optional[Path]=None, timeout: int=TIMEOU
|
|||
*(['--warc-file={}'.format(str(warc_path))] if SAVE_WARC else []),
|
||||
*(['--page-requisites'] if SAVE_WGET_REQUISITES else []),
|
||||
*(['--user-agent={}'.format(WGET_USER_AGENT)] if WGET_USER_AGENT else []),
|
||||
*(['--load-cookies', COOKIES_FILE] if COOKIES_FILE else []),
|
||||
*(['--load-cookies', str(COOKIES_FILE)] if COOKIES_FILE else []),
|
||||
*(['--compression=auto'] if WGET_AUTO_COMPRESSION else []),
|
||||
*([] if SAVE_WARC else ['--timestamping']),
|
||||
*([] if CHECK_SSL_VALIDITY else ['--no-check-certificate', '--no-hsts']),
|
||||
|
@ -177,11 +177,22 @@ def wget_output_path(snapshot: Model) -> Optional[str]:
|
|||
if html_files:
|
||||
return str(html_files[0].relative_to(snapshot.snapshot_dir))
|
||||
|
||||
# sometimes wget'd URLs have no ext and return non-html
|
||||
# e.g. /some/example/rss/all -> some RSS XML content)
|
||||
# /some/other/url.o4g -> some binary unrecognized ext)
|
||||
# test this with archivebox add --depth=1 https://getpocket.com/users/nikisweeting/feed/all
|
||||
last_part_of_url = urldecode(full_path.rsplit('/', 1)[-1])
|
||||
for file_present in search_dir.iterdir():
|
||||
if file_present == last_part_of_url:
|
||||
return str(search_dir / file_present)
|
||||
|
||||
# Move up one directory level
|
||||
search_dir = search_dir.parent
|
||||
|
||||
if search_dir == snapshot.snapshot_dir:
|
||||
break
|
||||
|
||||
|
||||
|
||||
search_dir = Path(snapshot.snapshot_dir) / domain(snapshot.url).replace(":", "+") / urldecode(full_path)
|
||||
if not search_dir.is_dir():
|
||||
|
|
|
@ -4,8 +4,8 @@ from datetime import datetime
|
|||
from typing import List, Optional, Iterator, Mapping
|
||||
from pathlib import Path
|
||||
|
||||
from django.utils.html import format_html
|
||||
from django.db.models import Model
|
||||
from django.utils.html import format_html, mark_safe
|
||||
from collections import defaultdict
|
||||
|
||||
from .schema import Link
|
||||
|
@ -119,7 +119,7 @@ def snapshot_icons(snapshot) -> str:
|
|||
path = snapshot.archive_path
|
||||
canon = snapshot.canonical_outputs()
|
||||
output = ""
|
||||
output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{} </a>'
|
||||
output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{}</a> '
|
||||
icons = {
|
||||
"singlefile": "❶",
|
||||
"wget": "🆆",
|
||||
|
@ -145,12 +145,12 @@ def snapshot_icons(snapshot) -> str:
|
|||
for extractor, _ in EXTRACTORS:
|
||||
if extractor not in exclude:
|
||||
exists = extractor_items[extractor] is not None
|
||||
output += output_template.format(path, canon[f"{extractor}_path"], str(exists),
|
||||
output += format_html(output_template, path, canon[f"{extractor}_path"], str(exists),
|
||||
extractor, icons.get(extractor, "?"))
|
||||
if extractor == "wget":
|
||||
# warc isn't technically it's own extractor, so we have to add it after wget
|
||||
exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
|
||||
output += output_template.format(exists[0] if exists else '#', canon["warc_path"], str(bool(exists)), "warc", icons.get("warc", "?"))
|
||||
output += format_html(output_template, exists[0] if exists else '#', canon["warc_path"], str(bool(exists)), "warc", icons.get("warc", "?"))
|
||||
|
||||
if extractor == "archive_org":
|
||||
# The check for archive_org is different, so it has to be handled separately
|
||||
|
@ -159,4 +159,4 @@ def snapshot_icons(snapshot) -> str:
|
|||
output += '<a href="{}" class="exists-{}" title="{}">{}</a> '.format(canon["archive_org_path"], str(exists),
|
||||
"archive_org", icons.get("archive_org", "?"))
|
||||
|
||||
return format_html(f'<span class="files-icons" style="font-size: 1.1em; opacity: 0.8">{output}<span>')
|
||||
return format_html('<span class="files-icons" style="font-size: 1.1em; opacity: 0.8; min-width: 240px; display: inline-block">{}<span>', mark_safe(output))
|
||||
|
|
|
@ -1070,6 +1070,7 @@ def server(runserver_args: Optional[List[str]]=None,
|
|||
reload: bool=False,
|
||||
debug: bool=False,
|
||||
init: bool=False,
|
||||
createsuperuser: bool=False,
|
||||
out_dir: Path=OUTPUT_DIR) -> None:
|
||||
"""Run the ArchiveBox HTTP server"""
|
||||
|
||||
|
@ -1078,6 +1079,9 @@ def server(runserver_args: Optional[List[str]]=None,
|
|||
if init:
|
||||
run_subcommand('init', stdin=None, pwd=out_dir)
|
||||
|
||||
if createsuperuser:
|
||||
run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
|
||||
|
||||
# setup config for django runserver
|
||||
from . import config
|
||||
config.SHOW_PROGRESS = False
|
||||
|
|
|
@ -5,7 +5,7 @@ from sonic import IngestClient, SearchClient
|
|||
from archivebox.util import enforce_types
|
||||
from archivebox.config import SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD, SONIC_BUCKET, SONIC_COLLECTION
|
||||
|
||||
MAX_SONIC_TEXT_LENGTH = 20000
|
||||
MAX_SONIC_TEXT_LENGTH = 2000
|
||||
|
||||
@enforce_types
|
||||
def index(snapshot_id: str, texts: List[str]):
|
||||
|
|
|
@ -68,4 +68,6 @@
|
|||
</div>
|
||||
{% endblock %}
|
||||
|
||||
{% block footer %}{% endblock %}
|
||||
|
||||
{% block sidebar %}{% endblock %}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
{% load admin_urls %}
|
||||
{% load static %}
|
||||
|
||||
<!DOCTYPE html>
|
||||
|
@ -7,222 +8,8 @@
|
|||
<title>Archived Sites</title>
|
||||
<meta charset="utf-8" name="viewport" content="width=device-width, initial-scale=1">
|
||||
|
||||
<style>
|
||||
:root {
|
||||
--bg-main: #efefef;
|
||||
--accent-1: #aa1e55;
|
||||
--accent-2: #ffebeb;
|
||||
--accent-3: #efefef;
|
||||
|
||||
--text-1: #1c1c1c;
|
||||
--text-2: #eaeaea;
|
||||
--text-main: #1a1a1a;
|
||||
--font-main: "Gill Sans", Helvetica, sans-serif;
|
||||
}
|
||||
|
||||
/* Dark Mode (WIP) */
|
||||
/*
|
||||
@media (prefers-color-scheme: dark) {
|
||||
:root {
|
||||
--accent-2: hsl(160, 100%, 96%);
|
||||
|
||||
--text-1: #eaeaea;
|
||||
--text-2: #1a1a1a;
|
||||
--bg-main: #101010;
|
||||
}
|
||||
|
||||
#table-bookmarks_wrapper,
|
||||
#table-bookmarks_wrapper img,
|
||||
tbody td:nth-child(3),
|
||||
tbody td:nth-child(3) span,
|
||||
footer {
|
||||
filter: invert(100%);
|
||||
}
|
||||
}*/
|
||||
|
||||
html,
|
||||
body {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
font-size: 18px;
|
||||
font-weight: 200;
|
||||
text-align: center;
|
||||
margin: 0px;
|
||||
padding: 0px;
|
||||
font-family: var(--font-main);
|
||||
}
|
||||
|
||||
.header-top small {
|
||||
font-weight: 200;
|
||||
color: var(--accent-3);
|
||||
}
|
||||
|
||||
.header-top {
|
||||
width: 100%;
|
||||
height: auto;
|
||||
min-height: 40px;
|
||||
margin: 0px;
|
||||
text-align: center;
|
||||
color: white;
|
||||
font-size: calc(11px + 0.84vw);
|
||||
font-weight: 200;
|
||||
padding: 4px 4px;
|
||||
border-bottom: 3px solid var(--accent-1);
|
||||
background-color: var(--accent-1);
|
||||
}
|
||||
|
||||
input[type=search] {
|
||||
width: 22vw;
|
||||
border-radius: 4px;
|
||||
border: 1px solid #aeaeae;
|
||||
padding: 3px 5px;
|
||||
}
|
||||
|
||||
.nav>div {
|
||||
min-height: 30px;
|
||||
}
|
||||
|
||||
.header-top a {
|
||||
text-decoration: none;
|
||||
color: rgba(0, 0, 0, 0.6);
|
||||
}
|
||||
|
||||
.header-top a:hover {
|
||||
text-decoration: none;
|
||||
color: rgba(0, 0, 0, 0.9);
|
||||
}
|
||||
|
||||
.header-top .col-lg-4 {
|
||||
text-align: center;
|
||||
padding-top: 4px;
|
||||
padding-bottom: 4px;
|
||||
}
|
||||
|
||||
.header-archivebox img {
|
||||
display: inline-block;
|
||||
margin-right: 3px;
|
||||
height: 30px;
|
||||
margin-left: 12px;
|
||||
margin-top: -4px;
|
||||
margin-bottom: 2px;
|
||||
}
|
||||
|
||||
.header-archivebox img:hover {
|
||||
opacity: 0.5;
|
||||
}
|
||||
|
||||
#table-bookmarks_length,
|
||||
#table-bookmarks_filter {
|
||||
padding-top: 12px;
|
||||
opacity: 0.8;
|
||||
padding-left: 24px;
|
||||
padding-right: 22px;
|
||||
margin-bottom: -16px;
|
||||
}
|
||||
|
||||
table {
|
||||
padding: 6px;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
table thead th {
|
||||
font-weight: 400;
|
||||
}
|
||||
|
||||
table tr {
|
||||
height: 35px;
|
||||
}
|
||||
|
||||
tbody tr:nth-child(odd) {
|
||||
background-color: var(--accent-2) !important;
|
||||
}
|
||||
|
||||
table tr td {
|
||||
white-space: nowrap;
|
||||
overflow: hidden;
|
||||
/*padding-bottom: 0.4em;*/
|
||||
/*padding-top: 0.4em;*/
|
||||
padding-left: 2px;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
table tr td a {
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
table tr td img,
|
||||
table tr td object {
|
||||
display: inline-block;
|
||||
margin: auto;
|
||||
height: 24px;
|
||||
width: 24px;
|
||||
padding: 0px;
|
||||
padding-right: 5px;
|
||||
vertical-align: middle;
|
||||
margin-left: 4px;
|
||||
}
|
||||
|
||||
#table-bookmarks {
|
||||
width: 100%;
|
||||
overflow-y: scroll;
|
||||
table-layout: fixed;
|
||||
}
|
||||
|
||||
.dataTables_wrapper {
|
||||
background-color: #fafafa;
|
||||
}
|
||||
|
||||
table tr a span[data-archived~=False] {
|
||||
opacity: 0.4;
|
||||
}
|
||||
|
||||
.files-spinner {
|
||||
height: 15px;
|
||||
width: auto;
|
||||
opacity: 0.5;
|
||||
vertical-align: -2px;
|
||||
}
|
||||
|
||||
.in-progress {
|
||||
display: none;
|
||||
}
|
||||
|
||||
tr td a.favicon img {
|
||||
padding-left: 6px;
|
||||
padding-right: 12px;
|
||||
vertical-align: -4px;
|
||||
}
|
||||
|
||||
tr td a.title {
|
||||
font-size: 1.4em;
|
||||
text-decoration: none;
|
||||
color: black;
|
||||
}
|
||||
|
||||
tr td a.title small {
|
||||
background-color: var(--accent-3);
|
||||
border-radius: 4px;
|
||||
float: right
|
||||
}
|
||||
|
||||
input[type=search]::-webkit-search-cancel-button {
|
||||
-webkit-appearance: searchfield-cancel-button;
|
||||
}
|
||||
|
||||
.title-col {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
.title-col a {
|
||||
color: black;
|
||||
}
|
||||
|
||||
.exists-False {
|
||||
opacity: 0.1;
|
||||
filter: grayscale(100%);
|
||||
pointer-events: none;
|
||||
}
|
||||
</style>
|
||||
<link rel="stylesheet" href="{% static 'admin/css/base.css' %}">
|
||||
<link rel="stylesheet" type="text/css" href="{% static 'admin.css' %}">
|
||||
<link rel="stylesheet" href="{% static 'bootstrap.min.css' %}">
|
||||
<link rel="stylesheet" href="{% static 'jquery.dataTables.min.css' %}" />
|
||||
{% block extra_head %}
|
||||
|
@ -247,38 +34,51 @@
|
|||
<base href="{% url 'Home' %}">
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<div class="header-top container-fluid">
|
||||
<div class="row nav">
|
||||
<div class="col-sm-2">
|
||||
<div id="container">
|
||||
<div id="header">
|
||||
<div id="branding">
|
||||
<h1 id="site-name">
|
||||
<a href="{% url 'public-index' %}" class="header-archivebox" title="Last updated: {{updated}}">
|
||||
<img src="{% static 'archive.png' %}" alt="Logo" />
|
||||
ArchiveBox: Index
|
||||
<img src="{% static 'archive.png' %}" alt="Logo" style="height: 30px"/>
|
||||
ArchiveBox
|
||||
</a>
|
||||
</div>
|
||||
<div class="col-sm-10" style="text-align: right">
|
||||
<a href="/add/">Add Links</a> |
|
||||
<a href="/admin/core/snapshot/">Admin</a> |
|
||||
<a href="https://github.com/ArchiveBox/ArchiveBox/wiki">Docs</a>
|
||||
</div>
|
||||
</h1>
|
||||
</div>
|
||||
<div id="user-tools">
|
||||
<a href="/add/">➕ Add</a> /
|
||||
<a href="/">Snapshots</a> /
|
||||
<a href="/admin/">Admin</a> /
|
||||
<a href="https://github.com/ArchiveBox/ArchiveBox/wiki">Docs</a>
|
||||
{% if user.is_authenticated %}
|
||||
|
||||
User
|
||||
<strong>{% firstof user.get_short_name user.get_username %}</strong>
|
||||
{% if user.has_usable_password %}
|
||||
<a href="{% url 'admin:password_change' %}">Change password</a> /
|
||||
{% endif %}
|
||||
<a href="{% url 'admin:logout' %}">Log out</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</header>
|
||||
{% block body %}
|
||||
{% endblock %}
|
||||
<br>
|
||||
<footer>
|
||||
<br />
|
||||
<center>
|
||||
<small>
|
||||
Archive created using <a href="https://github.com/ArchiveBox/ArchiveBox" title="Github">ArchiveBox</a> version
|
||||
<a href="https://github.com/ArchiveBox/ArchiveBox/releases" title="Releases">v{{VERSION}}</a>.
|
||||
<br/><br/>
|
||||
{{FOOTER_INFO}}
|
||||
</small>
|
||||
</center>
|
||||
<br />
|
||||
</footer>
|
||||
<div id="content" class="flex">
|
||||
{% block body %}
|
||||
{% endblock %}
|
||||
</div>
|
||||
{% block footer %}
|
||||
<footer>
|
||||
<br />
|
||||
<center>
|
||||
<small>
|
||||
Archive created using <a href="https://github.com/ArchiveBox/ArchiveBox" title="Github">ArchiveBox</a> version
|
||||
<a href="https://github.com/ArchiveBox/ArchiveBox/releases" title="Releases">v{{VERSION}}</a>.
|
||||
<br/><br/>
|
||||
{{FOOTER_INFO}}
|
||||
</small>
|
||||
</center>
|
||||
<br />
|
||||
</footer>
|
||||
{% endblock %}
|
||||
</div>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
|
|
|
@ -2,13 +2,21 @@
|
|||
{% load static %}
|
||||
|
||||
{% block body %}
|
||||
<br>
|
||||
<form action="{% url 'public-index' %}" method="get">
|
||||
<input name="q" type="text" placeholder="Search...">
|
||||
<button type="submit">Search</button>
|
||||
<button onclick="location.href='{% url 'public-index' %}'" type="button">
|
||||
Reload Index</button>
|
||||
</form>
|
||||
<div id="toolbar">
|
||||
<form id="changelist-search" action="{% url 'public-index' %}" method="get">
|
||||
<div>
|
||||
<label for="searchbar"><img src="/static/admin/img/search.svg" alt="Search"></label>
|
||||
<input type="text" size="40" name="q" value="" id="searchbar" autofocus placeholder="Title, URL, tags, timestamp, or content...".>
|
||||
<input type="submit" value="Search" style="height: 36px; padding-top: 6px; margin: 8px"/>
|
||||
<input type="button"
|
||||
value="♺"
|
||||
title="Refresh..."
|
||||
onclick="location.href='{% url 'public-index' %}'"
|
||||
style="background-color: rgba(121, 174, 200, 0.8); height: 30px; font-size: 0.8em; margin-top: 12px; padding-top: 6px; float:right">
|
||||
</input>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
<table id="table-bookmarks">
|
||||
<thead>
|
||||
<tr>
|
||||
|
|
|
@ -243,7 +243,7 @@
|
|||
<center>
|
||||
<small>
|
||||
Archive created using <a href="https://github.com/ArchiveBox/ArchiveBox" title="Github">ArchiveBox</a>
|
||||
version <a href="https://github.com/ArchiveBox/ArchiveBox/tree/v{{VERSION}}" title="Git commit">v{{VERSION}}</a> |
|
||||
version <a href="https://github.com/ArchiveBox/ArchiveBox/tree/v{{version}}" title="Git commit">v{{version}}</a> |
|
||||
Download index as <a href="index.json" title="JSON summary of archived links.">JSON</a>
|
||||
<br/><br/>
|
||||
{{FOOTER_INFO}}
|
||||
|
|
|
@ -1,12 +1,14 @@
|
|||
{% load static %}
|
||||
|
||||
<tr>
|
||||
{% comment %}
|
||||
<!-- from upstream, may need to merge these two -->
|
||||
<td title="{{snapshot.timestamp}}"> {% if snapshot.bookmarked_date %} {{ snapshot.bookmarked_date }} {% else %} {{ snapshot.added }} {% endif %} </td>
|
||||
<td class="title-col">
|
||||
{% if snapshot.is_archived %}
|
||||
<a href="archive/{{snapshot.timestamp}}/index.html"><img src="archive/{{snapshot.timestamp}}/favicon.ico" class="link-favicon" decoding="async"></a>
|
||||
<a href="archive/{{snapshot.timestamp}}/index.html"><img src="archive/{{snapshot.timestamp}}/favicon.ico" class="snapshot-favicon" decoding="async"></a>
|
||||
{% else %}
|
||||
<a href="archive/{{snapshot.timestamp}}/index.html"><img src="{% static 'spinner.gif' %}" class="link-favicon" decoding="async"></a>
|
||||
<a href="archive/{{snapshot.timestamp}}/index.html"><img src="{% static 'spinner.gif' %}" class="snapshot-favicon" decoding="async"></a>
|
||||
{% endif %}
|
||||
<a href="archive/{{snapshot.timestamp}}/{{snapshot.canonical_outputs.wget_path}}" title="{{snapshot.title}}">
|
||||
<span data-title-for="{{snapshot.url}}" data-archived="{{snapshot.is_archived}}">{{snapshot.title|default:'Loading...'}}</span>
|
||||
|
@ -19,4 +21,39 @@
|
|||
</a>
|
||||
</td>
|
||||
<td style="text-align:left"><a href="{{snapshot.url}}">{{snapshot.url}}</a></td>
|
||||
{% endcomment %}
|
||||
|
||||
<td title="{{snapshot.timestamp}}"> {% if snapshot.bookmarked_date %} {{ snapshot.bookmarked_date }} {% else %} {{ snapshot.added }} {% endif %} </td>
|
||||
<td class="title-col" style="opacity: {% if snapshot.title %}1{% else %}0.3{% endif %}">
|
||||
{% if snapshot.is_archived %}
|
||||
<a href="archive/{{snapshot.timestamp}}/index.html"><img src="archive/{{snapshot.timestamp}}/favicon.ico" class="snapshot-favicon" decoding="async"></a>
|
||||
{% else %}
|
||||
<a href="archive/{{snapshot.timestamp}}/index.html"><img src="{% static 'spinner.gif' %}" class="snapshot-favicon" decoding="async" style="height: 15px"></a>
|
||||
{% endif %}
|
||||
|
||||
<a href="archive/{{snapshot.timestamp}}/index.html" title="{{snapshot.title|default:'Not yet archived...'}}">
|
||||
<span data-title-for="{{snapshot.url}}" data-archived="{{snapshot.is_archived}}">{{snapshot.title|default:'Loading...'}}</span>
|
||||
{% if snapshot.tags_str %}
|
||||
<span class="tags" style="float: right; border-radius: 5px; background-color: #bfdfff; padding: 2px 5px; margin-left: 4px; margin-top: 1px;">
|
||||
{% if snapshot.tags_str != None %}
|
||||
{{snapshot.tags_str|default:''}}
|
||||
{% else %}
|
||||
{{ snapshot.tags|default:'' }}
|
||||
{% endif %}
|
||||
</span>
|
||||
{% endif %}
|
||||
</a>
|
||||
</td>
|
||||
<td>
|
||||
<span data-number-for="{{snapshot.url}}" title="Fetching any missing files...">
|
||||
{% if snapshot.icons %}
|
||||
{{snapshot.icons}} <small style="float:right; opacity: 0.5">{{snapshot.num_outputs}}</small>
|
||||
{% else %}
|
||||
<a href="archive/{{snapshot.timestamp}}/index.html">📄
|
||||
{{snapshot.num_outputs}} <img src="{% static 'spinner.gif' %}" class="files-spinner" decoding="async" style="height: 15px"/>
|
||||
</a>
|
||||
{% endif %}
|
||||
</span>
|
||||
</td>
|
||||
<td style="text-align:left"><a href="{{snapshot.url}}">{{snapshot.url}}</a></td>
|
||||
</tr>
|
||||
|
|
|
@ -1,3 +1,13 @@
|
|||
header {
|
||||
font-family: "Roboto","Lucida Grande","DejaVu Sans","Bitstream Vera Sans",Verdana,Arial,sans-serif;
|
||||
font-size: 13px;
|
||||
color: white;
|
||||
height: 30px;
|
||||
}
|
||||
.header-top {
|
||||
color: white;
|
||||
}
|
||||
|
||||
.dashboard #content {
|
||||
width: 100%;
|
||||
margin-right: 0px;
|
||||
|
@ -60,3 +70,21 @@ ul#id_depth {
|
|||
box-sizing: border-box;
|
||||
animation: spin 2s linear infinite;
|
||||
}
|
||||
|
||||
|
||||
textarea, select {
|
||||
border-radius: 4px;
|
||||
border: 2px solid #004882;
|
||||
box-shadow: 4px 4px 4px rgba(0,0,0,0.02);
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
select option:not(:checked) {
|
||||
border: 1px dashed rgba(10,200,20,0.12);
|
||||
}
|
||||
select option:checked {
|
||||
border: 1px solid green;
|
||||
background-color: green;
|
||||
color: green;
|
||||
}
|
||||
|
||||
|
|
|
@ -224,7 +224,7 @@ body.model-snapshot.change-list #content .object-tools {
|
|||
100% { transform: rotate(360deg); }
|
||||
}
|
||||
|
||||
.tags > a > .tag {
|
||||
.tag {
|
||||
float: right;
|
||||
border-radius: 5px;
|
||||
background-color: #bfdfff;
|
||||
|
@ -232,3 +232,8 @@ body.model-snapshot.change-list #content .object-tools {
|
|||
margin-left: 4px;
|
||||
margin-top: 1px;
|
||||
}
|
||||
|
||||
.exists-False {
|
||||
opacity: 0.1;
|
||||
filter: grayscale(100%);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue