mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-16 16:14:28 -04:00
fix: Init and status commands now are able to navigate the right archive folder
This commit is contained in:
parent
9fdcb9857e
commit
ea84607b47
3 changed files with 15 additions and 10 deletions
|
@ -464,7 +464,7 @@ def get_orphaned_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optio
|
||||||
if entry.is_dir():
|
if entry.is_dir():
|
||||||
snapshot = None
|
snapshot = None
|
||||||
try:
|
try:
|
||||||
snapshot = load_json_snapshot(str(entry))
|
snapshot = load_json_snapshot(entry)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
@ -113,7 +113,7 @@ def load_json_snapshot(out_dir: Path) -> Optional[Model]:
|
||||||
def parse_json_snapshot_details(out_dir: Union[Path, str]) -> Iterator[dict]:
|
def parse_json_snapshot_details(out_dir: Union[Path, str]) -> Iterator[dict]:
|
||||||
"""read through all the archive data folders and return the parsed snapshots"""
|
"""read through all the archive data folders and return the parsed snapshots"""
|
||||||
|
|
||||||
for entry in os.scandir(Path(out_dir)):
|
for entry in os.scandir(Path(out_dir) / ARCHIVE_DIR_NAME):
|
||||||
if entry.is_dir(follow_symlinks=True):
|
if entry.is_dir(follow_symlinks=True):
|
||||||
if (Path(entry.path) / 'index.json').exists():
|
if (Path(entry.path) / 'index.json').exists():
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -335,8 +335,8 @@ def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
|
||||||
print()
|
print()
|
||||||
print('{green}[*] Collecting links from any existing indexes and archive folders...{reset}'.format(**ANSI))
|
print('{green}[*] Collecting links from any existing indexes and archive folders...{reset}'.format(**ANSI))
|
||||||
|
|
||||||
all_links = Snapshot.objects.none()
|
all_snapshots = Snapshot.objects.none()
|
||||||
pending_snapshots: Dict[str, Link] = {}
|
pending_snapshots: Dict[str, Snapshot] = {}
|
||||||
|
|
||||||
if existing_index:
|
if existing_index:
|
||||||
all_snapshots = load_main_index(out_dir=out_dir, warn=False)
|
all_snapshots = load_main_index(out_dir=out_dir, warn=False)
|
||||||
|
@ -350,14 +350,14 @@ def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
|
||||||
print(' {lightyellow}! Could not fix {} data directory locations due to conflicts with existing folders.{reset}'.format(len(cant_fix), **ANSI))
|
print(' {lightyellow}! Could not fix {} data directory locations due to conflicts with existing folders.{reset}'.format(len(cant_fix), **ANSI))
|
||||||
|
|
||||||
# Links in JSON index but not in main index
|
# Links in JSON index but not in main index
|
||||||
orphaned_json_links = {
|
orphaned_json_snapshots = {
|
||||||
link.url: link
|
snapshot.url: snapshot
|
||||||
for link in parse_json_main_index(out_dir)
|
for snapshot in parse_json_main_index(out_dir)
|
||||||
if not all_links.filter(url=link.url).exists()
|
if not all_links.filter(url=link.url).exists()
|
||||||
}
|
}
|
||||||
if orphaned_json_links:
|
if orphaned_json_snapshots:
|
||||||
pending_links.update(orphaned_json_links)
|
pending_snapshots.update(orphaned_json_snapshots)
|
||||||
print(' {lightyellow}√ Added {} orphaned links from existing JSON index...{reset}'.format(len(orphaned_json_links), **ANSI))
|
print(' {lightyellow}√ Added {} orphaned snapshots from deprecated JSON index...{reset}'.format(len(orphaned_json_snapshots), **ANSI))
|
||||||
|
|
||||||
# Links in data dir indexes but not in main index
|
# Links in data dir indexes but not in main index
|
||||||
orphaned_data_dir_snapshots = {
|
orphaned_data_dir_snapshots = {
|
||||||
|
@ -369,6 +369,11 @@ def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
|
||||||
pending_snapshots.update(orphaned_data_dir_snapshots)
|
pending_snapshots.update(orphaned_data_dir_snapshots)
|
||||||
print(' {lightyellow}√ Added {} orphaned snapshots from existing archive directories.{reset}'.format(len(orphaned_data_dir_snapshots), **ANSI))
|
print(' {lightyellow}√ Added {} orphaned snapshots from existing archive directories.{reset}'.format(len(orphaned_data_dir_snapshots), **ANSI))
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: Should we remove orphaned folders from the invalid list? With init they are being imported, but the same links that were
|
||||||
|
# listed as just imported are listed as skipped because they are invalid. At the very least I think we should improve this message,
|
||||||
|
# because it makes this command a little more confusing.
|
||||||
|
|
||||||
# Links in invalid/duplicate data dirs
|
# Links in invalid/duplicate data dirs
|
||||||
invalid_folders = {
|
invalid_folders = {
|
||||||
folder: snapshot
|
folder: snapshot
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue