mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-06-03 08:08:43 -04:00
feat: Update update
command to work with querysets
This commit is contained in:
parent
dafa1dd63c
commit
f55153eab3
4 changed files with 84 additions and 56 deletions
|
@ -392,45 +392,50 @@ def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type
|
|||
return snapshots.filter(q_filter)
|
||||
|
||||
|
||||
def get_indexed_folders(links, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||
def get_indexed_folders(snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||
"""indexed links without checking archive status or data directory validity"""
|
||||
links = [snapshot.as_link() for snapshot in snapshots.iterator()]
|
||||
return {
|
||||
link.link_dir: link
|
||||
for link in links
|
||||
}
|
||||
|
||||
def get_archived_folders(links, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||
def get_archived_folders(snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||
"""indexed links that are archived with a valid data directory"""
|
||||
links = [snapshot.as_link() for snapshot in snapshots.iterator()]
|
||||
return {
|
||||
link.link_dir: link
|
||||
for link in filter(is_archived, links)
|
||||
}
|
||||
|
||||
def get_unarchived_folders(links, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||
def get_unarchived_folders(snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||
"""indexed links that are unarchived with no data directory or an empty data directory"""
|
||||
links = [snapshot.as_link() for snapshot in snapshots.iterator()]
|
||||
return {
|
||||
link.link_dir: link
|
||||
for link in filter(is_unarchived, links)
|
||||
}
|
||||
|
||||
def get_present_folders(links, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||
def get_present_folders(_snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||
"""dirs that actually exist in the archive/ folder"""
|
||||
|
||||
all_folders = {}
|
||||
|
||||
for entry in os.scandir(os.path.join(out_dir, ARCHIVE_DIR_NAME)):
|
||||
if entry.is_dir(follow_symlinks=True):
|
||||
for entry in (Path(out_dir) / ARCHIVE_DIR_NAME).iterdir():
|
||||
if entry.is_dir():
|
||||
link = None
|
||||
try:
|
||||
link = parse_json_link_details(entry.path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
all_folders[entry.path] = link
|
||||
all_folders[entry.name] = link
|
||||
|
||||
return all_folders
|
||||
|
||||
def get_valid_folders(links, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||
def get_valid_folders(snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||
"""dirs with a valid index matched to the main index and archived content"""
|
||||
links = [snapshot.as_link() for snapshot in snapshots.iterator()]
|
||||
return {
|
||||
link.link_dir: link
|
||||
for link in filter(is_valid, links)
|
||||
|
|
|
@ -29,22 +29,28 @@ def remove_from_sql_main_index(snapshots: QuerySet, out_dir: str=OUTPUT_DIR) ->
|
|||
with transaction.atomic():
|
||||
snapshots.delete()
|
||||
|
||||
@enforce_types
|
||||
def write_link_to_sql_index(link: Link):
|
||||
from core.models import Snapshot
|
||||
info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys}
|
||||
try:
|
||||
info["timestamp"] = Snapshot.objects.get(url=link.url).timestamp
|
||||
except Snapshot.DoesNotExist:
|
||||
while Snapshot.objects.filter(timestamp=info["timestamp"]).exists():
|
||||
info["timestamp"] = str(float(info["timestamp"]) + 1.0)
|
||||
|
||||
return Snapshot.objects.update_or_create(url=link.url, defaults=info)[0]
|
||||
|
||||
|
||||
@enforce_types
|
||||
def write_sql_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
|
||||
setup_django(out_dir, check_db=True)
|
||||
from core.models import Snapshot
|
||||
from django.db import transaction
|
||||
|
||||
with transaction.atomic():
|
||||
for link in links:
|
||||
info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys}
|
||||
try:
|
||||
info["timestamp"] = Snapshot.objects.get(url=link.url).timestamp
|
||||
except Snapshot.DoesNotExist:
|
||||
while Snapshot.objects.filter(timestamp=info["timestamp"]).exists():
|
||||
info["timestamp"] = str(float(info["timestamp"]) + 1.0)
|
||||
|
||||
Snapshot.objects.update_or_create(url=link.url, defaults=info)
|
||||
write_link_to_sql_index(link)
|
||||
|
||||
|
||||
@enforce_types
|
||||
def write_sql_link_details(link: Link, out_dir: str=OUTPUT_DIR) -> None:
|
||||
|
@ -53,7 +59,10 @@ def write_sql_link_details(link: Link, out_dir: str=OUTPUT_DIR) -> None:
|
|||
from django.db import transaction
|
||||
|
||||
with transaction.atomic():
|
||||
snap = Snapshot.objects.get(url=link.url)
|
||||
try:
|
||||
snap = Snapshot.objects.get(url=link.url)
|
||||
except Snapshot.DoesNotExist:
|
||||
snap = write_link_to_sql_index(link)
|
||||
snap.title = link.title
|
||||
snap.tags = link.tags
|
||||
snap.save()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue