From aab8f96520bebd5648826a9c33219e434708fef4 Mon Sep 17 00:00:00 2001 From: Cristian Date: Wed, 19 Aug 2020 12:32:25 -0500 Subject: [PATCH] feat: Add flag to list command to support index like output --- archivebox/cli/archivebox_list.py | 6 ++++++ archivebox/logging_util.py | 18 ++++++++++++++++-- archivebox/main.py | 5 +++-- tests/{test_args.py => test_add.py} | 0 tests/test_list.py | 18 ++++++++++++++++++ 5 files changed, 43 insertions(+), 4 deletions(-) rename tests/{test_args.py => test_add.py} (100%) create mode 100644 tests/test_list.py diff --git a/archivebox/cli/archivebox_list.py b/archivebox/cli/archivebox_list.py index 95c5cc4e..0f5d6a80 100644 --- a/archivebox/cli/archivebox_list.py +++ b/archivebox/cli/archivebox_list.py @@ -46,6 +46,11 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional action='store_true', help="Print the output in JSON format with all columns included.", ) + parser.add_argument( + '--index', + action='store_true', + help='Include the index additional structures' + ) parser.add_argument( '--sort', #'-s', type=str, @@ -112,6 +117,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional sort=command.sort, csv=command.csv, json=command.json, + index=command.index, out_dir=pwd or OUTPUT_DIR, ) raise SystemExit(not matching_folders) diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py index 63542122..6a9b4301 100644 --- a/archivebox/logging_util.py +++ b/archivebox/logging_util.py @@ -14,6 +14,8 @@ from typing import Optional, List, Dict, Union, IO, TYPE_CHECKING if TYPE_CHECKING: from .index.schema import Link, ArchiveResult +from .index.json import MAIN_INDEX_HEADER + from .util import enforce_types from .config import ( ConfigDict, @@ -460,10 +462,22 @@ def printable_filesize(num_bytes: Union[int, float]) -> str: @enforce_types def printable_folders(folders: Dict[str, Optional["Link"]], json: bool=False, - csv: Optional[str]=None) -> str: + csv: Optional[str]=None, + index: bool=False) -> str: + links = folders.values() if json: from .index.json import to_json - return to_json(folders.values(), indent=4, sort_keys=True) + if index: + output = { + **MAIN_INDEX_HEADER, + 'num_links': len(links), + 'updated': datetime.now(), + 'last_run_cmd': sys.argv, + 'links': links, + } + else: + output = links + return to_json(output, indent=4, sort_keys=True) elif csv: from .index.csv import links_to_csv diff --git a/archivebox/main.py b/archivebox/main.py index 933dac09..624e9cf6 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -151,7 +151,7 @@ def help(out_dir: str=OUTPUT_DIR) -> None: ) - if os.path.exists(os.path.join(out_dir, JSON_INDEX_FILENAME)): + if os.path.exists(os.path.join(out_dir, SQL_INDEX_FILENAME)): print('''{green}ArchiveBox v{}: The self-hosted internet archive.{reset} {lightred}Active data directory:{reset} @@ -730,6 +730,7 @@ def list_all(filter_patterns_str: Optional[str]=None, sort: Optional[str]=None, csv: Optional[str]=None, json: bool=False, + index: bool=False, out_dir: str=OUTPUT_DIR) -> Iterable[Link]: """List, filter, and export information about archive entries""" @@ -762,7 +763,7 @@ def list_all(filter_patterns_str: Optional[str]=None, out_dir=out_dir, ) - print(printable_folders(folders, json=json, csv=csv)) + print(printable_folders(folders, json=json, csv=csv, index=index)) return folders diff --git a/tests/test_args.py b/tests/test_add.py similarity index 100% rename from tests/test_args.py rename to tests/test_add.py diff --git a/tests/test_list.py b/tests/test_list.py new file mode 100644 index 00000000..d75e75ad --- /dev/null +++ b/tests/test_list.py @@ -0,0 +1,18 @@ +import json + +from .fixtures import * + +def test_list_json(process, disable_extractors_dict): + subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"], + capture_output=True, env=disable_extractors_dict) + list_process = subprocess.run(["archivebox", "list", "--json"], capture_output=True) + output_json = json.loads(list_process.stdout.decode("utf-8")) + assert output_json[0]["url"] == "http://127.0.0.1:8080/static/example.com.html" + + +def test_list_json_index(process, disable_extractors_dict): + subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"], + capture_output=True, env=disable_extractors_dict) + list_process = subprocess.run(["archivebox", "list", "--json", "--index"], capture_output=True) + output_json = json.loads(list_process.stdout.decode("utf-8")) + assert output_json["links"][0]["url"] == "http://127.0.0.1:8080/static/example.com.html"