mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-17 00:24:26 -04:00
refactor: update command is functional
This commit is contained in:
parent
de3c82730c
commit
d92083b928
4 changed files with 91 additions and 99 deletions
|
@ -88,7 +88,7 @@ def archive_snapshot(snapshot: Model, overwrite: bool=False, methods: Optional[I
|
||||||
details = {"history": {}}
|
details = {"history": {}}
|
||||||
write_snapshot_details(snapshot, out_dir=out_dir, skip_sql_index=False)
|
write_snapshot_details(snapshot, out_dir=out_dir, skip_sql_index=False)
|
||||||
else:
|
else:
|
||||||
details = list(load_snapshot_details(snapshot))
|
details = load_snapshot_details(snapshot)
|
||||||
|
|
||||||
#log_link_archiving_started(link, out_dir, is_new)
|
#log_link_archiving_started(link, out_dir, is_new)
|
||||||
stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}
|
stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}
|
||||||
|
|
|
@ -42,6 +42,7 @@ from .html import (
|
||||||
write_html_snapshot_details,
|
write_html_snapshot_details,
|
||||||
)
|
)
|
||||||
from .json import (
|
from .json import (
|
||||||
|
load_json_snapshot_details,
|
||||||
parse_json_snapshot_details,
|
parse_json_snapshot_details,
|
||||||
write_json_snapshot_details,
|
write_json_snapshot_details,
|
||||||
)
|
)
|
||||||
|
@ -318,9 +319,9 @@ def load_snapshot_details(snapshot: Model, out_dir: Optional[str]=None) -> Model
|
||||||
"""check for an existing link archive in the given directory,
|
"""check for an existing link archive in the given directory,
|
||||||
and load+merge it into the given link dict
|
and load+merge it into the given link dict
|
||||||
"""
|
"""
|
||||||
out_dir = out_dir or snapshot.snapshot_dir
|
out_dir = out_dir or Path(snapshot.snapshot_dir)
|
||||||
|
|
||||||
existing_snapshot = parse_json_snapshot_details(out_dir)
|
existing_snapshot = load_json_snapshot_details(out_dir)
|
||||||
if existing_snapshot:
|
if existing_snapshot:
|
||||||
return merge_snapshots(existing_snapshot, snapshot)
|
return merge_snapshots(existing_snapshot, snapshot)
|
||||||
|
|
||||||
|
@ -379,56 +380,41 @@ def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type
|
||||||
return search_filter(snapshots, filter_patterns, filter_type)
|
return search_filter(snapshots, filter_patterns, filter_type)
|
||||||
|
|
||||||
|
|
||||||
def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Model]]:
|
||||||
"""indexed links without checking archive status or data directory validity"""
|
"""indexed links without checking archive status or data directory validity"""
|
||||||
links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
|
return {snapshot.snapshot_dir: snapshot for snapshot in snapshots}
|
||||||
return {
|
|
||||||
link.link_dir: link
|
|
||||||
for link in links
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_archived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
def get_archived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Model]]:
|
||||||
"""indexed links that are archived with a valid data directory"""
|
"""indexed links that are archived with a valid data directory"""
|
||||||
links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
|
return {snapshot.snapshot_dir: snapshot for snapshot in filter(is_archived, snapshots)}
|
||||||
return {
|
|
||||||
link.link_dir: link
|
|
||||||
for link in filter(is_archived, links)
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_unarchived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
def get_unarchived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Model]]:
|
||||||
"""indexed links that are unarchived with no data directory or an empty data directory"""
|
"""indexed links that are unarchived with no data directory or an empty data directory"""
|
||||||
links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
|
return {snapshot.snapshot_dir: snapshot for snapshot in filter(is_unarchived, snapshots)}
|
||||||
return {
|
|
||||||
link.link_dir: link
|
|
||||||
for link in filter(is_unarchived, links)
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_present_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
def get_present_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Model]]:
|
||||||
"""dirs that actually exist in the archive/ folder"""
|
"""dirs that actually exist in the archive/ folder"""
|
||||||
|
from core.models import Snapshot
|
||||||
|
|
||||||
all_folders = {}
|
all_folders = {}
|
||||||
|
|
||||||
for entry in (out_dir / ARCHIVE_DIR_NAME).iterdir():
|
for entry in (out_dir / ARCHIVE_DIR_NAME).iterdir():
|
||||||
if entry.is_dir():
|
if entry.is_dir():
|
||||||
link = None
|
snapshot = None
|
||||||
try:
|
try:
|
||||||
link = parse_json_link_details(entry.path)
|
snapshot = parse_json_snapshot_details(entry.path)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
all_folders[entry.name] = link
|
all_folders[entry.name] = snapshot
|
||||||
|
|
||||||
return all_folders
|
return all_folders
|
||||||
|
|
||||||
def get_valid_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
def get_valid_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Model]]:
|
||||||
"""dirs with a valid index matched to the main index and archived content"""
|
"""dirs with a valid index matched to the main index and archived content"""
|
||||||
links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
|
return {snapshot.snapshot_dir: snapshot for snapshot in filter(is_valid, snapshots)}
|
||||||
return {
|
|
||||||
link.link_dir: link
|
|
||||||
for link in filter(is_valid, links)
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_invalid_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
def get_invalid_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Model]]:
|
||||||
"""dirs that are invalid for any reason: corrupted/duplicate/orphaned/unrecognized"""
|
"""dirs that are invalid for any reason: corrupted/duplicate/orphaned/unrecognized"""
|
||||||
duplicate = get_duplicate_folders(snapshots, out_dir=OUTPUT_DIR)
|
duplicate = get_duplicate_folders(snapshots, out_dir=OUTPUT_DIR)
|
||||||
orphaned = get_orphaned_folders(snapshots, out_dir=OUTPUT_DIR)
|
orphaned = get_orphaned_folders(snapshots, out_dir=OUTPUT_DIR)
|
||||||
|
@ -437,7 +423,7 @@ def get_invalid_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Option
|
||||||
return {**duplicate, **orphaned, **corrupted, **unrecognized}
|
return {**duplicate, **orphaned, **corrupted, **unrecognized}
|
||||||
|
|
||||||
|
|
||||||
def get_duplicate_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
def get_duplicate_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Model]]:
|
||||||
"""dirs that conflict with other directories that have the same link URL or timestamp"""
|
"""dirs that conflict with other directories that have the same link URL or timestamp"""
|
||||||
by_url = {}
|
by_url = {}
|
||||||
by_timestamp = {}
|
by_timestamp = {}
|
||||||
|
@ -450,91 +436,92 @@ def get_duplicate_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Opti
|
||||||
)
|
)
|
||||||
|
|
||||||
for path in chain(snapshots.iterator(), data_folders):
|
for path in chain(snapshots.iterator(), data_folders):
|
||||||
link = None
|
snapshot = None
|
||||||
if type(path) is not str:
|
if type(path) is not str:
|
||||||
path = path.as_link().link_dir
|
path = path.snapshot_dir
|
||||||
|
|
||||||
try:
|
try:
|
||||||
link = parse_json_link_details(path)
|
snapshot = parse_json_snapshot_details(path)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if link:
|
if snapshot:
|
||||||
# link folder has same timestamp as different link folder
|
# snapshot folder has same timestamp as different link folder
|
||||||
by_timestamp[link.timestamp] = by_timestamp.get(link.timestamp, 0) + 1
|
by_timestamp[snapshot.timestamp] = by_timestamp.get(snapshot.timestamp, 0) + 1
|
||||||
if by_timestamp[link.timestamp] > 1:
|
if by_timestamp[snapshot.timestamp] > 1:
|
||||||
duplicate_folders[path] = link
|
duplicate_folders[path] = snapshot
|
||||||
|
|
||||||
# link folder has same url as different link folder
|
# link folder has same url as different link folder
|
||||||
by_url[link.url] = by_url.get(link.url, 0) + 1
|
by_url[snapshot.url] = by_url.get(snapshot.url, 0) + 1
|
||||||
if by_url[link.url] > 1:
|
if by_url[snapshot.url] > 1:
|
||||||
duplicate_folders[path] = link
|
duplicate_folders[path] = snapshot
|
||||||
return duplicate_folders
|
return duplicate_folders
|
||||||
|
|
||||||
def get_orphaned_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
def get_orphaned_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Model]]:
|
||||||
"""dirs that contain a valid index but aren't listed in the main index"""
|
"""dirs that contain a valid index but aren't listed in the main index"""
|
||||||
orphaned_folders = {}
|
orphaned_folders = {}
|
||||||
|
|
||||||
for entry in (Path(out_dir) / ARCHIVE_DIR_NAME).iterdir():
|
for entry in (Path(out_dir) / ARCHIVE_DIR_NAME).iterdir():
|
||||||
if entry.is_dir():
|
if entry.is_dir():
|
||||||
link = None
|
snapshot = None
|
||||||
try:
|
try:
|
||||||
link = parse_json_link_details(str(entry))
|
snapshot = parse_json_snapshot_details(str(entry))
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if link and not snapshots.filter(timestamp=entry.name).exists():
|
if snapshot and not snapshots.filter(timestamp=entry.name).exists():
|
||||||
# folder is a valid link data dir with index details, but it's not in the main index
|
# folder is a valid link data dir with index details, but it's not in the main index
|
||||||
orphaned_folders[str(entry)] = link
|
orphaned_folders[str(entry)] = snapshot
|
||||||
|
|
||||||
return orphaned_folders
|
return orphaned_folders
|
||||||
|
|
||||||
def get_corrupted_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
def get_corrupted_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Model]]:
|
||||||
"""dirs that don't contain a valid index and aren't listed in the main index"""
|
"""dirs that don't contain a valid index and aren't listed in the main index"""
|
||||||
corrupted = {}
|
corrupted = {}
|
||||||
for snapshot in snapshots.iterator():
|
for snapshot in snapshots.iterator():
|
||||||
link = snapshot.as_link()
|
if is_corrupt(snapshot):
|
||||||
if is_corrupt(link):
|
corrupted[snapshot.snapshot_dir] = snapshot
|
||||||
corrupted[link.link_dir] = link
|
|
||||||
return corrupted
|
return corrupted
|
||||||
|
|
||||||
def get_unrecognized_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
def get_unrecognized_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Model]]:
|
||||||
"""dirs that don't contain recognizable archive data and aren't listed in the main index"""
|
"""dirs that don't contain recognizable archive data and aren't listed in the main index"""
|
||||||
unrecognized_folders: Dict[str, Optional[Link]] = {}
|
unrecognized_folders: Dict[str, Optional[Model]] = {}
|
||||||
|
|
||||||
for entry in (Path(out_dir) / ARCHIVE_DIR_NAME).iterdir():
|
for entry in (Path(out_dir) / ARCHIVE_DIR_NAME).iterdir():
|
||||||
if entry.is_dir():
|
if entry.is_dir():
|
||||||
index_exists = (entry / "index.json").exists()
|
index_exists = (entry / "index.json").exists()
|
||||||
link = None
|
snapshot = None
|
||||||
try:
|
try:
|
||||||
link = parse_json_link_details(str(entry))
|
snapshot = parse_json_snapshot_details(str(entry))
|
||||||
except KeyError:
|
except KeyError:
|
||||||
# Try to fix index
|
# Try to fix index
|
||||||
if index_exists:
|
if index_exists:
|
||||||
try:
|
pass
|
||||||
|
# TODO: Implement the `guess` bit for snapshots
|
||||||
|
# try:
|
||||||
# Last attempt to repair the detail index
|
# Last attempt to repair the detail index
|
||||||
link_guessed = parse_json_link_details(str(entry), guess=True)
|
# link_guessed = parse_json_snapshot_details(str(entry), guess=True)
|
||||||
write_json_link_details(link_guessed, out_dir=str(entry))
|
# write_json_snapshot_details(link_guessed, out_dir=str(entry))
|
||||||
link = parse_json_link_details(str(entry))
|
# link = parse_json_link_details(str(entry))
|
||||||
except Exception:
|
# except Exception:
|
||||||
pass
|
# pass
|
||||||
|
|
||||||
if index_exists and link is None:
|
if index_exists and snapshot is None:
|
||||||
# index exists but it's corrupted or unparseable
|
# index exists but it's corrupted or unparseable
|
||||||
unrecognized_folders[str(entry)] = link
|
unrecognized_folders[str(entry)] = snapshot
|
||||||
|
|
||||||
elif not index_exists:
|
elif not index_exists:
|
||||||
# link details index doesn't exist and the folder isn't in the main index
|
# link details index doesn't exist and the folder isn't in the main index
|
||||||
timestamp = entry.name
|
timestamp = entry.name
|
||||||
if not snapshots.filter(timestamp=timestamp).exists():
|
if not snapshots.filter(timestamp=timestamp).exists():
|
||||||
unrecognized_folders[str(entry)] = link
|
unrecognized_folders[str(entry)] = snapshot
|
||||||
|
|
||||||
return unrecognized_folders
|
return unrecognized_folders
|
||||||
|
|
||||||
|
|
||||||
def is_valid(link: Link) -> bool:
|
def is_valid(snapshot: Model) -> bool:
|
||||||
dir_exists = Path(link.link_dir).exists()
|
dir_exists = Path(snapshot.snapshot_dir).exists()
|
||||||
index_exists = (Path(link.link_dir) / "index.json").exists()
|
index_exists = (Path(snapshot.snapshot_dir) / "index.json").exists()
|
||||||
if not dir_exists:
|
if not dir_exists:
|
||||||
# unarchived links are not included in the valid list
|
# unarchived links are not included in the valid list
|
||||||
return False
|
return False
|
||||||
|
@ -542,29 +529,30 @@ def is_valid(link: Link) -> bool:
|
||||||
return False
|
return False
|
||||||
if dir_exists and index_exists:
|
if dir_exists and index_exists:
|
||||||
try:
|
try:
|
||||||
parsed_link = parse_json_link_details(link.link_dir, guess=True)
|
# TODO: review if the `guess` was necessary here
|
||||||
return link.url == parsed_link.url
|
parsed_snapshot = parse_json_snapshot_details(snapshot.snapshot_dir)
|
||||||
|
return snapshot.url == parsed_snapshot.url
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def is_corrupt(link: Link) -> bool:
|
def is_corrupt(snapshot: Model) -> bool:
|
||||||
if not Path(link.link_dir).exists():
|
if not Path(snapshot.snapshot_dir).exists():
|
||||||
# unarchived links are not considered corrupt
|
# unarchived links are not considered corrupt
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if is_valid(link):
|
if is_valid(snapshot):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def is_archived(link: Link) -> bool:
|
def is_archived(snapshot: Model) -> bool:
|
||||||
return is_valid(link) and link.is_archived
|
return is_valid(snapshot) and snapshot.is_archived
|
||||||
|
|
||||||
def is_unarchived(link: Link) -> bool:
|
def is_unarchived(snapshot: Model) -> bool:
|
||||||
if not Path(link.link_dir).exists():
|
if not Path(snapshot.snapshot_dir).exists():
|
||||||
return True
|
return True
|
||||||
return not link.is_archived
|
return not snapshot.is_archived
|
||||||
|
|
||||||
|
|
||||||
def fix_invalid_folder_locations(out_dir: Path=OUTPUT_DIR) -> Tuple[List[str], List[str]]:
|
def fix_invalid_folder_locations(out_dir: Path=OUTPUT_DIR) -> Tuple[List[str], List[str]]:
|
||||||
|
@ -574,22 +562,22 @@ def fix_invalid_folder_locations(out_dir: Path=OUTPUT_DIR) -> Tuple[List[str], L
|
||||||
if entry.is_dir(follow_symlinks=True):
|
if entry.is_dir(follow_symlinks=True):
|
||||||
if (Path(entry.path) / 'index.json').exists():
|
if (Path(entry.path) / 'index.json').exists():
|
||||||
try:
|
try:
|
||||||
link = parse_json_link_details(entry.path)
|
snapshot = parse_json_snapshot_details(entry.path)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
link = None
|
snapshot = None
|
||||||
if not link:
|
if not snapshot:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not entry.path.endswith(f'/{link.timestamp}'):
|
if not entry.path.endswith(f'/{link.timestamp}'):
|
||||||
dest = out_dir / ARCHIVE_DIR_NAME / link.timestamp
|
dest = out_dir / ARCHIVE_DIR_NAME / snapshot.timestamp
|
||||||
if dest.exists():
|
if dest.exists():
|
||||||
cant_fix.append(entry.path)
|
cant_fix.append(entry.path)
|
||||||
else:
|
else:
|
||||||
shutil.move(entry.path, dest)
|
shutil.move(entry.path, dest)
|
||||||
fixed.append(dest)
|
fixed.append(dest)
|
||||||
timestamp = entry.path.rsplit('/', 1)[-1]
|
timestamp = entry.path.rsplit('/', 1)[-1]
|
||||||
assert link.link_dir == entry.path
|
assert snapshot.snapshot_dir == entry.path
|
||||||
assert link.timestamp == timestamp
|
assert snapshot.timestamp == timestamp
|
||||||
write_json_link_details(link, out_dir=entry.path)
|
write_json_snapshot_details(snapshot, out_dir=entry.path)
|
||||||
|
|
||||||
return fixed, cant_fix
|
return fixed, cant_fix
|
||||||
|
|
|
@ -91,7 +91,7 @@ def write_json_snapshot_details(snapshot: Model, out_dir: Optional[str]=None) ->
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def load_snapshot_details(out_Dir: Path) -> Optional[Model]:
|
def load_json_snapshot_details(out_dir: Path) -> Optional[Model]:
|
||||||
"""
|
"""
|
||||||
Loads the detail from the local json index
|
Loads the detail from the local json index
|
||||||
"""
|
"""
|
||||||
|
@ -99,7 +99,10 @@ def load_snapshot_details(out_Dir: Path) -> Optional[Model]:
|
||||||
if existing_index.exists():
|
if existing_index.exists():
|
||||||
with open(existing_index, 'r', encoding='utf-8') as f:
|
with open(existing_index, 'r', encoding='utf-8') as f:
|
||||||
try:
|
try:
|
||||||
return pyjson.load(f)
|
output = pyjson.load(f)
|
||||||
|
if "history" not in output.keys():
|
||||||
|
output["history"] = {}
|
||||||
|
return output
|
||||||
except pyjson.JSONDecodeError:
|
except pyjson.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
return None
|
return None
|
||||||
|
@ -109,7 +112,7 @@ def load_snapshot_details(out_Dir: Path) -> Optional[Model]:
|
||||||
def parse_json_snapshot_details(out_dir: Union[Path, str]) -> Iterator[dict]:
|
def parse_json_snapshot_details(out_dir: Union[Path, str]) -> Iterator[dict]:
|
||||||
"""read through all the archive data folders and return the parsed links"""
|
"""read through all the archive data folders and return the parsed links"""
|
||||||
|
|
||||||
for entry in os.scandir(Path(out_dir) / ARCHIVE_DIR_NAME):
|
for entry in os.scandir(Path(out_dir)):
|
||||||
if entry.is_dir(follow_symlinks=True):
|
if entry.is_dir(follow_symlinks=True):
|
||||||
if (Path(entry.path) / 'index.json').exists():
|
if (Path(entry.path) / 'index.json').exists():
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -9,7 +9,7 @@ from datetime import date
|
||||||
|
|
||||||
from typing import Dict, List, Optional, Iterable, IO, Union
|
from typing import Dict, List, Optional, Iterable, IO, Union
|
||||||
from crontab import CronTab, CronSlices
|
from crontab import CronTab, CronSlices
|
||||||
from django.db.models import QuerySet
|
from django.db.models import QuerySet, Model
|
||||||
|
|
||||||
from .cli import (
|
from .cli import (
|
||||||
list_subcommands,
|
list_subcommands,
|
||||||
|
@ -689,15 +689,16 @@ def update(resume: Optional[float]=None,
|
||||||
extractors: str="",
|
extractors: str="",
|
||||||
out_dir: Path=OUTPUT_DIR) -> List[Link]:
|
out_dir: Path=OUTPUT_DIR) -> List[Link]:
|
||||||
"""Import any new links from subscriptions and retry any previously failed/skipped links"""
|
"""Import any new links from subscriptions and retry any previously failed/skipped links"""
|
||||||
|
from core.models import Snapshot
|
||||||
|
|
||||||
check_data_folder(out_dir=out_dir)
|
check_data_folder(out_dir=out_dir)
|
||||||
check_dependencies()
|
check_dependencies()
|
||||||
new_links: List[Link] = [] # TODO: Remove input argument: only_new
|
new_links: List[Snapshot] = [] # TODO: Remove input argument: only_new
|
||||||
|
|
||||||
extractors = extractors.split(",") if extractors else []
|
extractors = extractors.split(",") if extractors else []
|
||||||
|
|
||||||
# Step 1: Filter for selected_links
|
# Step 1: Filter for selected_links
|
||||||
matching_snapshots = list_links(
|
matching_snapshots = list_snapshots(
|
||||||
filter_patterns=filter_patterns,
|
filter_patterns=filter_patterns,
|
||||||
filter_type=filter_type,
|
filter_type=filter_type,
|
||||||
before=before,
|
before=before,
|
||||||
|
@ -705,15 +706,15 @@ def update(resume: Optional[float]=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
matching_folders = list_folders(
|
matching_folders = list_folders(
|
||||||
links=matching_snapshots,
|
snapshots=matching_snapshots,
|
||||||
status=status,
|
status=status,
|
||||||
out_dir=out_dir,
|
out_dir=out_dir,
|
||||||
)
|
)
|
||||||
all_links = [link for link in matching_folders.values() if link]
|
all_links = [link for link in matching_folders.values() if link]
|
||||||
|
|
||||||
if index_only:
|
if index_only:
|
||||||
for link in all_links:
|
for snapshot in all_snapshots:
|
||||||
write_snapshot_details(link, out_dir=out_dir, skip_sql_index=True)
|
write_snapshot_details(snapshot, out_dir=out_dir, skip_sql_index=True)
|
||||||
index_links(all_links, out_dir=out_dir)
|
index_links(all_links, out_dir=out_dir)
|
||||||
return all_links
|
return all_links
|
||||||
|
|
||||||
|
@ -797,7 +798,7 @@ def list_all(filter_patterns_str: Optional[str]=None,
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def list_links(snapshots: Optional[QuerySet]=None,
|
def list_snapshots(snapshots: Optional[QuerySet]=None,
|
||||||
filter_patterns: Optional[List[str]]=None,
|
filter_patterns: Optional[List[str]]=None,
|
||||||
filter_type: str='exact',
|
filter_type: str='exact',
|
||||||
after: Optional[float]=None,
|
after: Optional[float]=None,
|
||||||
|
@ -820,9 +821,9 @@ def list_links(snapshots: Optional[QuerySet]=None,
|
||||||
return all_snapshots
|
return all_snapshots
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def list_folders(links: List[Link],
|
def list_folders(snapshots: List[Model],
|
||||||
status: str,
|
status: str,
|
||||||
out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Model]]:
|
||||||
|
|
||||||
check_data_folder(out_dir=out_dir)
|
check_data_folder(out_dir=out_dir)
|
||||||
|
|
||||||
|
@ -840,7 +841,7 @@ def list_folders(links: List[Link],
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return STATUS_FUNCTIONS[status](links, out_dir=out_dir)
|
return STATUS_FUNCTIONS[status](snapshots, out_dir=out_dir)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise ValueError('Status not recognized.')
|
raise ValueError('Status not recognized.')
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue