feat: Add flag to list command to support index like output

This commit is contained in:
Cristian 2020-08-19 12:32:25 -05:00 committed by Cristian Vargas
parent be57db1369
commit aab8f96520
5 changed files with 43 additions and 4 deletions

View file

@ -46,6 +46,11 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
action='store_true', action='store_true',
help="Print the output in JSON format with all columns included.", help="Print the output in JSON format with all columns included.",
) )
parser.add_argument(
'--index',
action='store_true',
help='Include the index additional structures'
)
parser.add_argument( parser.add_argument(
'--sort', #'-s', '--sort', #'-s',
type=str, type=str,
@ -112,6 +117,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
sort=command.sort, sort=command.sort,
csv=command.csv, csv=command.csv,
json=command.json, json=command.json,
index=command.index,
out_dir=pwd or OUTPUT_DIR, out_dir=pwd or OUTPUT_DIR,
) )
raise SystemExit(not matching_folders) raise SystemExit(not matching_folders)

View file

@ -14,6 +14,8 @@ from typing import Optional, List, Dict, Union, IO, TYPE_CHECKING
if TYPE_CHECKING: if TYPE_CHECKING:
from .index.schema import Link, ArchiveResult from .index.schema import Link, ArchiveResult
from .index.json import MAIN_INDEX_HEADER
from .util import enforce_types from .util import enforce_types
from .config import ( from .config import (
ConfigDict, ConfigDict,
@ -460,10 +462,22 @@ def printable_filesize(num_bytes: Union[int, float]) -> str:
@enforce_types @enforce_types
def printable_folders(folders: Dict[str, Optional["Link"]], def printable_folders(folders: Dict[str, Optional["Link"]],
json: bool=False, json: bool=False,
csv: Optional[str]=None) -> str: csv: Optional[str]=None,
index: bool=False) -> str:
links = folders.values()
if json: if json:
from .index.json import to_json from .index.json import to_json
return to_json(folders.values(), indent=4, sort_keys=True) if index:
output = {
**MAIN_INDEX_HEADER,
'num_links': len(links),
'updated': datetime.now(),
'last_run_cmd': sys.argv,
'links': links,
}
else:
output = links
return to_json(output, indent=4, sort_keys=True)
elif csv: elif csv:
from .index.csv import links_to_csv from .index.csv import links_to_csv

View file

@ -151,7 +151,7 @@ def help(out_dir: str=OUTPUT_DIR) -> None:
) )
if os.path.exists(os.path.join(out_dir, JSON_INDEX_FILENAME)): if os.path.exists(os.path.join(out_dir, SQL_INDEX_FILENAME)):
print('''{green}ArchiveBox v{}: The self-hosted internet archive.{reset} print('''{green}ArchiveBox v{}: The self-hosted internet archive.{reset}
{lightred}Active data directory:{reset} {lightred}Active data directory:{reset}
@ -730,6 +730,7 @@ def list_all(filter_patterns_str: Optional[str]=None,
sort: Optional[str]=None, sort: Optional[str]=None,
csv: Optional[str]=None, csv: Optional[str]=None,
json: bool=False, json: bool=False,
index: bool=False,
out_dir: str=OUTPUT_DIR) -> Iterable[Link]: out_dir: str=OUTPUT_DIR) -> Iterable[Link]:
"""List, filter, and export information about archive entries""" """List, filter, and export information about archive entries"""
@ -762,7 +763,7 @@ def list_all(filter_patterns_str: Optional[str]=None,
out_dir=out_dir, out_dir=out_dir,
) )
print(printable_folders(folders, json=json, csv=csv)) print(printable_folders(folders, json=json, csv=csv, index=index))
return folders return folders

18
tests/test_list.py Normal file
View file

@ -0,0 +1,18 @@
import json
from .fixtures import *
def test_list_json(process, disable_extractors_dict):
subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"],
capture_output=True, env=disable_extractors_dict)
list_process = subprocess.run(["archivebox", "list", "--json"], capture_output=True)
output_json = json.loads(list_process.stdout.decode("utf-8"))
assert output_json[0]["url"] == "http://127.0.0.1:8080/static/example.com.html"
def test_list_json_index(process, disable_extractors_dict):
subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"],
capture_output=True, env=disable_extractors_dict)
list_process = subprocess.run(["archivebox", "list", "--json", "--index"], capture_output=True)
output_json = json.loads(list_process.stdout.decode("utf-8"))
assert output_json["links"][0]["url"] == "http://127.0.0.1:8080/static/example.com.html"