diff --git a/archivebox/index/sql.py b/archivebox/index/sql.py index 1e99f67c..c453df1c 100644 --- a/archivebox/index/sql.py +++ b/archivebox/index/sql.py @@ -7,7 +7,7 @@ from django.db.models import QuerySet from django.db import transaction from .schema import Link -from ..util import enforce_types +from ..util import enforce_types, parse_date from ..config import OUTPUT_DIR @@ -29,7 +29,8 @@ def remove_from_sql_main_index(snapshots: QuerySet, out_dir: Path=OUTPUT_DIR) -> @enforce_types def write_link_to_sql_index(link: Link): - from core.models import Snapshot + from core.models import Snapshot, ArchiveResult + from index.schema import ArchiveResult as LegacyArchiveResult info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys} tags = info.pop("tags") if tags is None: @@ -43,6 +44,40 @@ def write_link_to_sql_index(link: Link): snapshot, _ = Snapshot.objects.update_or_create(url=link.url, defaults=info) snapshot.save_tags(tags) + + for extractor, entries in link.history.items(): + for entry in entries: + if isinstance(entry, dict): + result, _ = ArchiveResult.objects.get_or_create( + snapshot_id=snapshot.id, + extractor=extractor, + start_ts=parse_date(entry['start_ts']), + defaults={ + 'end_ts': parse_date(entry['end_ts']), + 'cmd': entry['cmd'], + 'output': entry['output'], + 'cmd_version': entry.get('cmd_version') or 'unknown', + 'pwd': entry['pwd'], + 'status': entry['status'], + } + ) + else: + result, _ = ArchiveResult.objects.get_or_create( + snapshot_id=snapshot.id, + extractor=extractor, + start_ts=parse_date(entry.start_ts), + defaults={ + 'end_ts': parse_date(entry.end_ts), + 'cmd': entry.cmd, + 'output': entry.output, + 'cmd_version': entry.cmd_version or 'unknown', + 'pwd': entry.pwd, + 'status': entry.status, + } + ) + + print(result) + return snapshot diff --git a/archivebox/main.py b/archivebox/main.py index 4acfa81f..26129b6c 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -411,7 +411,7 @@ def init(force: bool=False, quick: bool=False, out_dir: Path=OUTPUT_DIR) -> None if existing_index: print('{green}[√] Done. Verified and updated the existing ArchiveBox collection.{reset}'.format(**ANSI)) else: - print('{green}[√] Done. A new ArchiveBox collection was initialized ({} links).{reset}'.format(len(all_links), **ANSI)) + print('{green}[√] Done. A new ArchiveBox collection was initialized ({} links).{reset}'.format(len(all_links) + len(pending_links), **ANSI)) if Snapshot.objects.count() < 20: # hide the hints for experienced users print()