mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-25 20:24:25 -04:00
new methods for detecting valid/invalid data dirs on init
This commit is contained in:
parent
ae782a1a0c
commit
56d0b2c088
2 changed files with 355 additions and 54 deletions
|
@ -7,11 +7,21 @@ __description__ = 'List all the URLs currently in the archive.'
|
|||
import sys
|
||||
import argparse
|
||||
|
||||
|
||||
from ..legacy.util import reject_stdin, to_json, to_csv
|
||||
from ..legacy.config import check_data_folder
|
||||
from ..legacy.main import list_archive_data
|
||||
|
||||
from ..legacy.util import SmartFormatter, reject_stdin, to_json, to_csv
|
||||
from ..legacy.config import check_data_folder, OUTPUT_DIR
|
||||
from ..legacy.main import (
|
||||
list_archive_data,
|
||||
get_indexed_folders,
|
||||
get_archived_folders,
|
||||
get_unarchived_folders,
|
||||
get_present_folders,
|
||||
get_valid_folders,
|
||||
get_invalid_folders,
|
||||
get_duplicate_folders,
|
||||
get_orphaned_folders,
|
||||
get_corrupted_folders,
|
||||
get_unrecognized_folders,
|
||||
)
|
||||
|
||||
def main(args=None):
|
||||
check_data_folder()
|
||||
|
@ -22,6 +32,7 @@ def main(args=None):
|
|||
prog=__command__,
|
||||
description=__description__,
|
||||
add_help=True,
|
||||
formatter_class=SmartFormatter,
|
||||
)
|
||||
group = parser.add_mutually_exclusive_group()
|
||||
group.add_argument(
|
||||
|
@ -44,15 +55,36 @@ def main(args=None):
|
|||
parser.add_argument(
|
||||
'--before', #'-b',
|
||||
type=float,
|
||||
help="List only URLs bookmarked before the given timestamp.",
|
||||
help="List only links bookmarked before the given timestamp.",
|
||||
default=None,
|
||||
)
|
||||
parser.add_argument(
|
||||
'--after', #'-a',
|
||||
type=float,
|
||||
help="List only URLs bookmarked after the given timestamp.",
|
||||
help="List only links bookmarked after the given timestamp.",
|
||||
default=None,
|
||||
)
|
||||
parser.add_argument(
|
||||
'--status',
|
||||
type=str,
|
||||
choices=('indexed', 'archived', 'unarchived', 'present', 'valid', 'invalid', 'duplicate', 'orphaned', 'corrupted', 'unrecognized'),
|
||||
default='indexed',
|
||||
help=(
|
||||
'List only links or data directories that have the given status\n'
|
||||
f' indexed {get_indexed_folders.__doc__} (the default)\n'
|
||||
f' archived {get_archived_folders.__doc__}\n'
|
||||
f' unarchived {get_unarchived_folders.__doc__}\n'
|
||||
'\n'
|
||||
f' present {get_present_folders.__doc__}\n'
|
||||
f' valid {get_valid_folders.__doc__}\n'
|
||||
f' invalid {get_invalid_folders.__doc__}\n'
|
||||
'\n'
|
||||
f' duplicate {get_duplicate_folders.__doc__}\n'
|
||||
f' orphaned {get_orphaned_folders.__doc__}\n'
|
||||
f' corrupted {get_corrupted_folders.__doc__}\n'
|
||||
f' unrecognized {get_unrecognized_folders.__doc__}\n'
|
||||
)
|
||||
)
|
||||
parser.add_argument(
|
||||
'--filter-type',
|
||||
type=str,
|
||||
|
@ -76,17 +108,40 @@ def main(args=None):
|
|||
before=command.before,
|
||||
after=command.after,
|
||||
)
|
||||
|
||||
if command.sort:
|
||||
links = sorted(links, key=lambda link: getattr(link, command.sort))
|
||||
|
||||
if command.status == 'indexed':
|
||||
folders = get_indexed_folders(links, out_dir=OUTPUT_DIR)
|
||||
elif command.status == 'archived':
|
||||
folders = get_archived_folders(links, out_dir=OUTPUT_DIR)
|
||||
elif command.status == 'unarchived':
|
||||
folders = get_unarchived_folders(links, out_dir=OUTPUT_DIR)
|
||||
|
||||
elif command.status == 'present':
|
||||
folders = get_present_folders(links, out_dir=OUTPUT_DIR)
|
||||
elif command.status == 'valid':
|
||||
folders = get_valid_folders(links, out_dir=OUTPUT_DIR)
|
||||
elif command.status == 'invalid':
|
||||
folders = get_invalid_folders(links, out_dir=OUTPUT_DIR)
|
||||
|
||||
elif command.status == 'duplicate':
|
||||
folders = get_duplicate_folders(links, out_dir=OUTPUT_DIR)
|
||||
elif command.status == 'orphaned':
|
||||
folders = get_orphaned_folders(links, out_dir=OUTPUT_DIR)
|
||||
elif command.status == 'corrupted':
|
||||
folders = get_corrupted_folders(links, out_dir=OUTPUT_DIR)
|
||||
elif command.status == 'unrecognized':
|
||||
folders = get_unrecognized_folders(links, out_dir=OUTPUT_DIR)
|
||||
|
||||
if command.csv:
|
||||
print(to_csv(links, csv_cols=command.csv.split(','), header=True))
|
||||
print(to_csv(folders.values(), csv_cols=command.csv.split(','), header=True))
|
||||
elif command.json:
|
||||
print(to_json(list(links), indent=4, sort_keys=True))
|
||||
print(to_json(folders.values(), indent=4, sort_keys=True))
|
||||
else:
|
||||
print('\n'.join(link.url for link in links))
|
||||
|
||||
print('\n'.join(f'{folder} {link}' for folder, link in folders.items()))
|
||||
raise SystemExit(not folders)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue