mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-15 23:54:27 -04:00
fix: Partially restore server
command functionality (html still needs some refactoring)
This commit is contained in:
parent
e95f14d1d0
commit
15d88be229
3 changed files with 66 additions and 29 deletions
|
@ -3,6 +3,7 @@ __package__ = 'archivebox.core'
|
||||||
import uuid
|
import uuid
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, Optional, List
|
from typing import Dict, Optional, List
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
from django.db import models, transaction
|
from django.db import models, transaction
|
||||||
from django.utils.functional import cached_property
|
from django.utils.functional import cached_property
|
||||||
|
@ -12,6 +13,7 @@ from django.db.models import Case, When, Value, IntegerField
|
||||||
from ..util import parse_date
|
from ..util import parse_date
|
||||||
from ..index.schema import Link
|
from ..index.schema import Link
|
||||||
from ..config import CONFIG
|
from ..config import CONFIG
|
||||||
|
from ..system import get_dir_size
|
||||||
|
|
||||||
#EXTRACTORS = [(extractor[0], extractor[0]) for extractor in get_default_archive_methods()]
|
#EXTRACTORS = [(extractor[0], extractor[0]) for extractor in get_default_archive_methods()]
|
||||||
EXTRACTORS = [("title", "title"), ("wget", "wget")]
|
EXTRACTORS = [("title", "title"), ("wget", "wget")]
|
||||||
|
@ -133,8 +135,36 @@ class Snapshot(models.Model):
|
||||||
return parse_date(self.timestamp)
|
return parse_date(self.timestamp)
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def is_archived(self):
|
def is_archived(self) -> bool:
|
||||||
return self.as_link().is_archived
|
from ..config import ARCHIVE_DIR
|
||||||
|
from ..util import domain
|
||||||
|
|
||||||
|
output_paths = (
|
||||||
|
domain(self.url),
|
||||||
|
'output.pdf',
|
||||||
|
'screenshot.png',
|
||||||
|
'output.html',
|
||||||
|
'media',
|
||||||
|
'singlefile.html'
|
||||||
|
)
|
||||||
|
|
||||||
|
return any(
|
||||||
|
(Path(ARCHIVE_DIR) / self.timestamp / path).exists()
|
||||||
|
for path in output_paths
|
||||||
|
)
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def archive_dates(self) -> List[datetime]:
|
||||||
|
return [
|
||||||
|
result.start_ts
|
||||||
|
for result in self.archiveresult_set.all()
|
||||||
|
]
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def oldest_archive_date(self) -> Optional[datetime]:
|
||||||
|
oldest = self.archiveresult_set.all().order_by("-start_ts")[:1]
|
||||||
|
if len(oldest) > 0:
|
||||||
|
return oldest[0].start_ts
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def num_outputs(self):
|
def num_outputs(self):
|
||||||
|
@ -145,8 +175,9 @@ class Snapshot(models.Model):
|
||||||
return self.as_link().url_hash
|
return self.as_link().url_hash
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def base_url(self):
|
def base_url(self) -> str:
|
||||||
return self.as_link().base_url
|
from ..util import base_url
|
||||||
|
return base_url(self.url)
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def snapshot_dir(self):
|
def snapshot_dir(self):
|
||||||
|
@ -155,11 +186,15 @@ class Snapshot(models.Model):
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def archive_path(self):
|
def archive_path(self):
|
||||||
return self.as_link().archive_path
|
from ..config import ARCHIVE_DIR_NAME
|
||||||
|
return '{}/{}'.format(ARCHIVE_DIR_NAME, self.timestamp)
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def archive_size(self):
|
def archive_size(self) -> float:
|
||||||
return self.as_link().archive_size
|
try:
|
||||||
|
return get_dir_size(self.archive_path)[0]
|
||||||
|
except Exception:
|
||||||
|
return 0
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def history(self):
|
def history(self):
|
||||||
|
@ -191,7 +226,10 @@ class Snapshot(models.Model):
|
||||||
# TODO: Define what details are, and return them accordingly
|
# TODO: Define what details are, and return them accordingly
|
||||||
return {"history": {}}
|
return {"history": {}}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def extension(self) -> str:
|
||||||
|
from ..util import extension
|
||||||
|
return extension(self.url)
|
||||||
|
|
||||||
def canonical_outputs(self) -> Dict[str, Optional[str]]:
|
def canonical_outputs(self) -> Dict[str, Optional[str]]:
|
||||||
"""predict the expected output paths that should be present after archiving"""
|
"""predict the expected output paths that should be present after archiving"""
|
||||||
|
|
|
@ -61,7 +61,7 @@ class LinkDetails(View):
|
||||||
by_ts = {page.timestamp: page for page in all_pages}
|
by_ts = {page.timestamp: page for page in all_pages}
|
||||||
try:
|
try:
|
||||||
# print('SERVING STATICFILE', by_ts[slug].link_dir, request.path, path)
|
# print('SERVING STATICFILE', by_ts[slug].link_dir, request.path, path)
|
||||||
response = static.serve(request, archivefile, document_root=by_ts[slug].link_dir, show_indexes=True)
|
response = static.serve(request, archivefile, document_root=by_ts[slug].snapshot_dir, show_indexes=True)
|
||||||
response["Link"] = f'<{by_ts[slug].url}>; rel="canonical"'
|
response["Link"] = f'<{by_ts[slug].url}>; rel="canonical"'
|
||||||
return response
|
return response
|
||||||
except KeyError:
|
except KeyError:
|
||||||
|
|
|
@ -61,10 +61,10 @@ def main_index_template(snapshots: List[Model], template: str=MAIN_INDEX_TEMPLAT
|
||||||
return render_django_template(template, {
|
return render_django_template(template, {
|
||||||
'version': VERSION,
|
'version': VERSION,
|
||||||
'git_sha': GIT_SHA,
|
'git_sha': GIT_SHA,
|
||||||
'num_links': str(len(snapshots)),
|
'num_snapshots': str(len(snapshots)),
|
||||||
'date_updated': datetime.now().strftime('%Y-%m-%d'),
|
'date_updated': datetime.now().strftime('%Y-%m-%d'),
|
||||||
'time_updated': datetime.now().strftime('%Y-%m-%d %H:%M'),
|
'time_updated': datetime.now().strftime('%Y-%m-%d %H:%M'),
|
||||||
'links': [snapshot.as_json() for snapshot in snapshots],
|
'snapshots': snapshots,
|
||||||
'FOOTER_INFO': FOOTER_INFO,
|
'FOOTER_INFO': FOOTER_INFO,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -80,30 +80,30 @@ def write_html_snapshot_details(snapshot: Model, out_dir: Optional[str]=None) ->
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def link_details_template(link: Link) -> str:
|
def link_details_template(snapshot: Model) -> str:
|
||||||
|
|
||||||
from ..extractors.wget import wget_output_path
|
from ..extractors.wget import wget_output_path
|
||||||
|
|
||||||
link_info = link._asdict(extended=True)
|
snapshot._asdict()
|
||||||
|
|
||||||
return render_django_template(LINK_DETAILS_TEMPLATE, {
|
return render_django_template(LINK_DETAILS_TEMPLATE, {
|
||||||
**link_info,
|
**snapshot._asdict(),
|
||||||
**link_info['canonical'],
|
**snapshot.canonical_outputs(),
|
||||||
'title': htmlencode(
|
'title': htmlencode(
|
||||||
link.title
|
snapshot.title
|
||||||
or (link.base_url if link.is_archived else TITLE_LOADING_MSG)
|
or (snapshot.base_url if snapshot.is_archived else TITLE_LOADING_MSG)
|
||||||
),
|
),
|
||||||
'url_str': htmlencode(urldecode(link.base_url)),
|
'url_str': htmlencode(urldecode(snapshot.base_url)),
|
||||||
'archive_url': urlencode(
|
'archive_url': urlencode(
|
||||||
wget_output_path(link)
|
wget_output_path(snapshot)
|
||||||
or (link.domain if link.is_archived else '')
|
or (snapshot.domain if snapshot.is_archived else '')
|
||||||
) or 'about:blank',
|
) or 'about:blank',
|
||||||
'extension': link.extension or 'html',
|
'extension': snapshot.extension or 'html',
|
||||||
'tags': link.tags or 'untagged',
|
'tags': snapshot.tags.all() or 'untagged', #TODO: Return a proper comma separated list. Leaving it like this for now to revisit when fixing tags
|
||||||
'size': printable_filesize(link.archive_size) if link.archive_size else 'pending',
|
'size': printable_filesize(snapshot.archive_size) if snapshot.archive_size else 'pending',
|
||||||
'status': 'archived' if link.is_archived else 'not yet archived',
|
'status': 'archived' if snapshot.is_archived else 'not yet archived',
|
||||||
'status_color': 'success' if link.is_archived else 'danger',
|
'status_color': 'success' if snapshot.is_archived else 'danger',
|
||||||
'oldest_archive_date': ts_to_date(link.oldest_archive_date),
|
'oldest_archive_date': ts_to_date(snapshot.oldest_archive_date),
|
||||||
})
|
})
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
|
@ -118,9 +118,8 @@ def snapshot_icons(snapshot) -> str:
|
||||||
from core.models import EXTRACTORS
|
from core.models import EXTRACTORS
|
||||||
|
|
||||||
archive_results = snapshot.archiveresult_set.filter(status="succeeded")
|
archive_results = snapshot.archiveresult_set.filter(status="succeeded")
|
||||||
link = snapshot.as_link()
|
path = snapshot.archive_path
|
||||||
path = link.archive_path
|
canon = snapshot.canonical_outputs()
|
||||||
canon = link.canonical_outputs()
|
|
||||||
output = ""
|
output = ""
|
||||||
output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{} </a>'
|
output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{} </a>'
|
||||||
icons = {
|
icons = {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue