From ea695b8bef54511d7b473a16aef3cbf42bafc269 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Wed, 27 Mar 2019 15:32:39 -0400 Subject: [PATCH] remove dataclass for ArchiveIndex in favor of plain dict to simplify schema file --- archivebox/index.py | 22 ++++++++++------------ archivebox/logs.py | 45 +++++++++++++++++++++++++++----------------- archivebox/schema.py | 35 ---------------------------------- 3 files changed, 38 insertions(+), 64 deletions(-) diff --git a/archivebox/index.py b/archivebox/index.py index 50cd000f..58b752b1 100644 --- a/archivebox/index.py +++ b/archivebox/index.py @@ -101,20 +101,18 @@ def write_json_links_index(out_dir: str, links: List[Link]) -> None: path = os.path.join(out_dir, 'index.json') - index_json = ArchiveIndex( - info='ArchiveBox Index', - source='https://github.com/pirate/ArchiveBox', - docs='https://github.com/pirate/ArchiveBox/wiki', - version=GIT_SHA, - num_links=len(links), - updated=datetime.now(), - links=links, - ) - - assert isinstance(index_json._asdict(), dict) + index_json = { + 'info': 'ArchiveBox Index', + 'source': 'https://github.com/pirate/ArchiveBox', + 'docs': 'https://github.com/pirate/ArchiveBox/wiki', + 'version': VERSION, + 'num_links': len(links), + 'updated': datetime.now(), + 'links': links, + } with open(path, 'w', encoding='utf-8') as f: - json.dump(index_json._asdict(), f, indent=4, cls=ExtendedEncoder) + json.dump(index_json, f, indent=4, cls=ExtendedEncoder) chmod_file(path) diff --git a/archivebox/logs.py b/archivebox/logs.py index 660e27cc..ccb9a10c 100644 --- a/archivebox/logs.py +++ b/archivebox/logs.py @@ -1,29 +1,40 @@ +import os import sys -from datetime import datetime +from datetime import datetime +from dataclasses import dataclass from typing import Optional -from schema import Link, ArchiveResult, RuntimeStats -from config import ANSI, REPO_DIR, OUTPUT_DIR + +from .schema import Link, ArchiveResult +from .config import ANSI, REPO_DIR, OUTPUT_DIR + + +@dataclass +class RuntimeStats: + """mutable stats counter for logging archiving timing info to CLI output""" + + skipped: int = 0 + succeeded: int = 0 + failed: int = 0 + + parse_start_ts: datetime = None + parse_end_ts: datetime = None + + index_start_ts: datetime = None + index_end_ts: datetime = None + + archiving_start_ts: datetime = None + archiving_end_ts: datetime = None # globals are bad, mmkay -_LAST_RUN_STATS = RuntimeStats( - skipped=0, - succeeded=0, - failed=0, +_LAST_RUN_STATS = RuntimeStats() - parse_start_ts=0, - parse_end_ts=0, - - index_start_ts=0, - index_end_ts=0, - - archiving_start_ts=0, - archiving_end_ts=0, -) def pretty_path(path: str) -> str: """convert paths like .../ArchiveBox/archivebox/../output/abc into output/abc""" - return path.replace(REPO_DIR + '/', '') + pwd = os.path.abspath('.') + # parent = os.path.abspath(os.path.join(pwd, os.path.pardir)) + return path.replace(pwd + '/', './') ### Parsing Stage diff --git a/archivebox/schema.py b/archivebox/schema.py index 5aa629d7..619ffd7c 100644 --- a/archivebox/schema.py +++ b/archivebox/schema.py @@ -268,38 +268,3 @@ class Link: 'dom_url': static_url, }) return canonical - - -@dataclass(frozen=True) -class ArchiveIndex: - info: str - version: str - source: str - docs: str - num_links: int - updated: str - links: List[Link] - schema: str = 'ArchiveIndex' - - def __post_init__(self): - assert self.schema == self.__class__.__name__ - - def _asdict(self): - return asdict(self) - -@dataclass -class RuntimeStats: - """mutable stats counter for logging archiving timing info to CLI output""" - - skipped: int - succeeded: int - failed: int - - parse_start_ts: datetime - parse_end_ts: datetime - - index_start_ts: datetime - index_end_ts: datetime - - archiving_start_ts: datetime - archiving_end_ts: datetime