mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-16 08:04:26 -04:00
refactor: Initial and dirty refactor to replace link with snapshot. Barely functional add command
This commit is contained in:
parent
8e2270e21b
commit
8c4ae73d65
13 changed files with 246 additions and 233 deletions
|
@ -3,8 +3,9 @@ __package__ = 'archivebox.index'
|
|||
from io import StringIO
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple, Iterator
|
||||
from django.db.models import QuerySet
|
||||
from django.db.models import QuerySet, Model
|
||||
from django.db import transaction
|
||||
from datetime import datetime
|
||||
|
||||
from .schema import Link
|
||||
from ..util import enforce_types
|
||||
|
@ -28,21 +29,20 @@ def remove_from_sql_main_index(snapshots: QuerySet, out_dir: Path=OUTPUT_DIR) ->
|
|||
snapshots.delete()
|
||||
|
||||
@enforce_types
|
||||
def write_link_to_sql_index(link: Link):
|
||||
def write_snapshot_to_index(snapshot: Model):
|
||||
from core.models import Snapshot
|
||||
info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys}
|
||||
tags = info.pop("tags")
|
||||
if tags is None:
|
||||
tags = []
|
||||
|
||||
try:
|
||||
info["timestamp"] = Snapshot.objects.get(url=link.url).timestamp
|
||||
timestamp = Snapshot.objects.get(url=snapshot.url).timestamp
|
||||
except Snapshot.DoesNotExist:
|
||||
while Snapshot.objects.filter(timestamp=info["timestamp"]).exists():
|
||||
info["timestamp"] = str(float(info["timestamp"]) + 1.0)
|
||||
timestamp = snapshot.timestamp
|
||||
if not timestamp:
|
||||
timestamp = str(datetime.now().timestamp())
|
||||
while Snapshot.objects.filter(timestamp=timestamp).exists():
|
||||
print("the timestamp is: ", timestamp)
|
||||
timestamp = str(float(timestamp) + 1.0)
|
||||
|
||||
snapshot, _ = Snapshot.objects.update_or_create(url=link.url, defaults=info)
|
||||
snapshot.save_tags(tags)
|
||||
snapshot.timestamp = timestamp
|
||||
snapshot.save()
|
||||
return snapshot
|
||||
|
||||
|
||||
|
@ -50,27 +50,29 @@ def write_link_to_sql_index(link: Link):
|
|||
def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None:
|
||||
with transaction.atomic():
|
||||
for link in links:
|
||||
write_link_to_sql_index(link)
|
||||
write_snapshot_to_index(link)
|
||||
|
||||
|
||||
@enforce_types
|
||||
def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR) -> None:
|
||||
def write_sql_snapshot_details(snapshot: Model, out_dir: Path=OUTPUT_DIR) -> None:
|
||||
from core.models import Snapshot
|
||||
|
||||
with transaction.atomic():
|
||||
try:
|
||||
snap = Snapshot.objects.get(url=link.url)
|
||||
snap = Snapshot.objects.get(url=snapshot.url)
|
||||
except Snapshot.DoesNotExist:
|
||||
snap = write_link_to_sql_index(link)
|
||||
snap.title = link.title
|
||||
snap = write_snapshot_to_sql_index(snapshot)
|
||||
snap.title = snapshot.title
|
||||
|
||||
tag_set = (
|
||||
set(tag.strip() for tag in (link.tags or '').split(','))
|
||||
)
|
||||
tag_list = list(tag_set) or []
|
||||
# TODO: If there are actual tags, this will break
|
||||
#tag_set = (
|
||||
# set(tag.strip() for tag in (snapshot.tags.all() or '').split(','))
|
||||
#)
|
||||
#tag_list = list(tag_set) or []
|
||||
|
||||
snap.save()
|
||||
snap.save_tags(tag_list)
|
||||
#snap.save_tags(tag_list)
|
||||
return snap
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue