mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 14:44:29 -04:00
dont dedupe snapshots in sqlite on every run
This commit is contained in:
parent
dfb83b4f27
commit
354a63ccd4
1 changed files with 1 additions and 16 deletions
|
@ -26,23 +26,8 @@ def write_sql_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
|
||||||
from core.models import Snapshot
|
from core.models import Snapshot
|
||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
|
|
||||||
all_urls = {link.url: link for link in links}
|
|
||||||
all_ts = {link.timestamp: link for link in links}
|
|
||||||
|
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
for snapshot in Snapshot.objects.all():
|
for link in links:
|
||||||
if snapshot.timestamp in all_ts:
|
|
||||||
info = {k: v for k, v in all_urls.pop(snapshot.url)._asdict().items() if k in Snapshot.keys}
|
|
||||||
snapshot.delete()
|
|
||||||
Snapshot.objects.create(**info)
|
|
||||||
elif snapshot.url in all_urls:
|
|
||||||
info = {k: v for k, v in all_urls.pop(snapshot.url)._asdict().items() if k in Snapshot.keys}
|
|
||||||
snapshot.delete()
|
|
||||||
Snapshot.objects.create(**info)
|
|
||||||
else:
|
|
||||||
snapshot.delete()
|
|
||||||
|
|
||||||
for url, link in all_urls.items():
|
|
||||||
info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys}
|
info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys}
|
||||||
Snapshot.objects.update_or_create(url=url, defaults=info)
|
Snapshot.objects.update_or_create(url=url, defaults=info)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue