mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-12 22:25:44 -04:00
load ArchiveResults from orphaned links history during init
This commit is contained in:
parent
d0f8a5e710
commit
bdf1b102be
2 changed files with 38 additions and 3 deletions
|
@ -7,7 +7,7 @@ from django.db.models import QuerySet
|
|||
from django.db import transaction
|
||||
|
||||
from .schema import Link
|
||||
from ..util import enforce_types
|
||||
from ..util import enforce_types, parse_date
|
||||
from ..config import OUTPUT_DIR
|
||||
|
||||
|
||||
|
@ -29,7 +29,8 @@ def remove_from_sql_main_index(snapshots: QuerySet, out_dir: Path=OUTPUT_DIR) ->
|
|||
|
||||
@enforce_types
|
||||
def write_link_to_sql_index(link: Link):
|
||||
from core.models import Snapshot
|
||||
from core.models import Snapshot, ArchiveResult
|
||||
from index.schema import ArchiveResult as LegacyArchiveResult
|
||||
info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys}
|
||||
tags = info.pop("tags")
|
||||
if tags is None:
|
||||
|
@ -43,6 +44,40 @@ def write_link_to_sql_index(link: Link):
|
|||
|
||||
snapshot, _ = Snapshot.objects.update_or_create(url=link.url, defaults=info)
|
||||
snapshot.save_tags(tags)
|
||||
|
||||
for extractor, entries in link.history.items():
|
||||
for entry in entries:
|
||||
if isinstance(entry, dict):
|
||||
result, _ = ArchiveResult.objects.get_or_create(
|
||||
snapshot_id=snapshot.id,
|
||||
extractor=extractor,
|
||||
start_ts=parse_date(entry['start_ts']),
|
||||
defaults={
|
||||
'end_ts': parse_date(entry['end_ts']),
|
||||
'cmd': entry['cmd'],
|
||||
'output': entry['output'],
|
||||
'cmd_version': entry.get('cmd_version') or 'unknown',
|
||||
'pwd': entry['pwd'],
|
||||
'status': entry['status'],
|
||||
}
|
||||
)
|
||||
else:
|
||||
result, _ = ArchiveResult.objects.get_or_create(
|
||||
snapshot_id=snapshot.id,
|
||||
extractor=extractor,
|
||||
start_ts=parse_date(entry.start_ts),
|
||||
defaults={
|
||||
'end_ts': parse_date(entry.end_ts),
|
||||
'cmd': entry.cmd,
|
||||
'output': entry.output,
|
||||
'cmd_version': entry.cmd_version or 'unknown',
|
||||
'pwd': entry.pwd,
|
||||
'status': entry.status,
|
||||
}
|
||||
)
|
||||
|
||||
print(result)
|
||||
|
||||
return snapshot
|
||||
|
||||
|
||||
|
|
|
@ -411,7 +411,7 @@ def init(force: bool=False, quick: bool=False, out_dir: Path=OUTPUT_DIR) -> None
|
|||
if existing_index:
|
||||
print('{green}[√] Done. Verified and updated the existing ArchiveBox collection.{reset}'.format(**ANSI))
|
||||
else:
|
||||
print('{green}[√] Done. A new ArchiveBox collection was initialized ({} links).{reset}'.format(len(all_links), **ANSI))
|
||||
print('{green}[√] Done. A new ArchiveBox collection was initialized ({} links).{reset}'.format(len(all_links) + len(pending_links), **ANSI))
|
||||
|
||||
if Snapshot.objects.count() < 20: # hide the hints for experienced users
|
||||
print()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue