mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-14 23:24:30 -04:00
switch to strict type hints with NamedTuples instead of dicts
This commit is contained in:
parent
0a44779b21
commit
76abc58135
8 changed files with 201 additions and 98 deletions
|
@ -1,43 +1,44 @@
|
|||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
from schema import Link, ArchiveResult, RuntimeStats
|
||||
from config import ANSI, REPO_DIR, OUTPUT_DIR
|
||||
|
||||
|
||||
# globals are bad, mmkay
|
||||
_LAST_RUN_STATS = {
|
||||
'skipped': 0,
|
||||
'succeeded': 0,
|
||||
'failed': 0,
|
||||
_LAST_RUN_STATS = RuntimeStats(
|
||||
skipped=0,
|
||||
succeeded=0,
|
||||
failed=0,
|
||||
|
||||
'parsing_start_ts': 0,
|
||||
'parsing_end_ts': 0,
|
||||
parse_start_ts=0,
|
||||
parse_end_ts=0,
|
||||
|
||||
'indexing_start_ts': 0,
|
||||
'indexing_end_ts': 0,
|
||||
index_start_ts=0,
|
||||
index_end_ts=0,
|
||||
|
||||
'archiving_start_ts': 0,
|
||||
'archiving_end_ts': 0,
|
||||
archiving_start_ts=0,
|
||||
archiving_end_ts=0,
|
||||
)
|
||||
|
||||
'links': {},
|
||||
}
|
||||
|
||||
def pretty_path(path):
|
||||
def pretty_path(path: str) -> str:
|
||||
"""convert paths like .../ArchiveBox/archivebox/../output/abc into output/abc"""
|
||||
return path.replace(REPO_DIR + '/', '')
|
||||
|
||||
|
||||
### Parsing Stage
|
||||
|
||||
def log_parsing_started(source_file):
|
||||
def log_parsing_started(source_file: str):
|
||||
start_ts = datetime.now()
|
||||
_LAST_RUN_STATS['parse_start_ts'] = start_ts
|
||||
_LAST_RUN_STATS.parse_start_ts = start_ts
|
||||
print('{green}[*] [{}] Parsing new links from output/sources/{}...{reset}'.format(
|
||||
start_ts.strftime('%Y-%m-%d %H:%M:%S'),
|
||||
source_file.rsplit('/', 1)[-1],
|
||||
**ANSI,
|
||||
))
|
||||
|
||||
def log_parsing_finished(num_new_links, parser_name):
|
||||
def log_parsing_finished(num_new_links: int, parser_name: str):
|
||||
end_ts = datetime.now()
|
||||
_LAST_RUN_STATS.parse_end_ts = end_ts
|
||||
print(' > Adding {} new links to index (parsed import as {})'.format(
|
||||
num_new_links,
|
||||
parser_name,
|
||||
|
@ -48,26 +49,26 @@ def log_parsing_finished(num_new_links, parser_name):
|
|||
|
||||
def log_indexing_process_started():
|
||||
start_ts = datetime.now()
|
||||
_LAST_RUN_STATS['index_start_ts'] = start_ts
|
||||
_LAST_RUN_STATS.index_start_ts = start_ts
|
||||
print('{green}[*] [{}] Saving main index files...{reset}'.format(
|
||||
start_ts.strftime('%Y-%m-%d %H:%M:%S'),
|
||||
**ANSI,
|
||||
))
|
||||
|
||||
def log_indexing_started(out_dir, out_file):
|
||||
def log_indexing_started(out_dir: str, out_file: str):
|
||||
sys.stdout.write(' > {}/{}'.format(pretty_path(out_dir), out_file))
|
||||
|
||||
def log_indexing_finished(out_dir, out_file):
|
||||
def log_indexing_finished(out_dir: str, out_file: str):
|
||||
end_ts = datetime.now()
|
||||
_LAST_RUN_STATS['index_end_ts'] = end_ts
|
||||
_LAST_RUN_STATS.index_end_ts = end_ts
|
||||
print('\r √ {}/{}'.format(pretty_path(out_dir), out_file))
|
||||
|
||||
|
||||
### Archiving Stage
|
||||
|
||||
def log_archiving_started(num_links, resume):
|
||||
def log_archiving_started(num_links: int, resume: float):
|
||||
start_ts = datetime.now()
|
||||
_LAST_RUN_STATS['start_ts'] = start_ts
|
||||
_LAST_RUN_STATS.archiving_start_ts = start_ts
|
||||
if resume:
|
||||
print('{green}[▶] [{}] Resuming archive updating for {} pages starting from {}...{reset}'.format(
|
||||
start_ts.strftime('%Y-%m-%d %H:%M:%S'),
|
||||
|
@ -82,9 +83,9 @@ def log_archiving_started(num_links, resume):
|
|||
**ANSI,
|
||||
))
|
||||
|
||||
def log_archiving_paused(num_links, idx, timestamp):
|
||||
def log_archiving_paused(num_links: int, idx: int, timestamp: str):
|
||||
end_ts = datetime.now()
|
||||
_LAST_RUN_STATS['end_ts'] = end_ts
|
||||
_LAST_RUN_STATS.archiving_end_ts = end_ts
|
||||
print()
|
||||
print('\n{lightyellow}[X] [{now}] Downloading paused on link {timestamp} ({idx}/{total}){reset}'.format(
|
||||
**ANSI,
|
||||
|
@ -100,10 +101,10 @@ def log_archiving_paused(num_links, idx, timestamp):
|
|||
timestamp,
|
||||
))
|
||||
|
||||
def log_archiving_finished(num_links):
|
||||
def log_archiving_finished(num_links: int):
|
||||
end_ts = datetime.now()
|
||||
_LAST_RUN_STATS['end_ts'] = end_ts
|
||||
seconds = end_ts.timestamp() - _LAST_RUN_STATS['start_ts'].timestamp()
|
||||
_LAST_RUN_STATS.archiving_end_ts = end_ts
|
||||
seconds = end_ts.timestamp() - _LAST_RUN_STATS.archiving_start_ts.timestamp()
|
||||
if seconds > 60:
|
||||
duration = '{0:.2f} min'.format(seconds / 60, 2)
|
||||
else:
|
||||
|
@ -116,13 +117,13 @@ def log_archiving_finished(num_links):
|
|||
duration,
|
||||
ANSI['reset'],
|
||||
))
|
||||
print(' - {} links skipped'.format(_LAST_RUN_STATS['skipped']))
|
||||
print(' - {} links updated'.format(_LAST_RUN_STATS['succeeded']))
|
||||
print(' - {} links had errors'.format(_LAST_RUN_STATS['failed']))
|
||||
print(' - {} links skipped'.format(_LAST_RUN_STATS.skipped))
|
||||
print(' - {} links updated'.format(_LAST_RUN_STATS.succeeded))
|
||||
print(' - {} links had errors'.format(_LAST_RUN_STATS.failed))
|
||||
print(' To view your archive, open: {}/index.html'.format(OUTPUT_DIR.replace(REPO_DIR + '/', '')))
|
||||
|
||||
|
||||
def log_link_archiving_started(link_dir, link, is_new):
|
||||
def log_link_archiving_started(link_dir: str, link: Link, is_new: bool):
|
||||
# [*] [2019-03-22 13:46:45] "Log Structured Merge Trees - ben stopford"
|
||||
# http://www.benstopford.com/2015/02/14/log-structured-merge-trees/
|
||||
# > output/archive/1478739709
|
||||
|
@ -140,40 +141,34 @@ def log_link_archiving_started(link_dir, link, is_new):
|
|||
pretty_path(link_dir),
|
||||
))
|
||||
|
||||
def log_link_archiving_finished(link_dir, link, is_new, stats):
|
||||
def log_link_archiving_finished(link_dir: str, link: Link, is_new: bool, stats: dict):
|
||||
total = sum(stats.values())
|
||||
|
||||
if stats['failed'] > 0 :
|
||||
_LAST_RUN_STATS['failed'] += 1
|
||||
_LAST_RUN_STATS.failed += 1
|
||||
elif stats['skipped'] == total:
|
||||
_LAST_RUN_STATS['skipped'] += 1
|
||||
_LAST_RUN_STATS.skipped += 1
|
||||
else:
|
||||
_LAST_RUN_STATS['succeeded'] += 1
|
||||
_LAST_RUN_STATS.succeeded += 1
|
||||
|
||||
|
||||
def log_archive_method_started(method):
|
||||
def log_archive_method_started(method: str):
|
||||
print(' > {}'.format(method))
|
||||
|
||||
def log_archive_method_finished(result):
|
||||
|
||||
def log_archive_method_finished(result: ArchiveResult):
|
||||
"""quote the argument with whitespace in a command so the user can
|
||||
copy-paste the outputted string directly to run the cmd
|
||||
"""
|
||||
required_keys = ('cmd', 'pwd', 'output', 'status', 'start_ts', 'end_ts')
|
||||
assert (
|
||||
isinstance(result, dict)
|
||||
and all(key in result for key in required_keys)
|
||||
and ('output' in result)
|
||||
), 'Archive method did not return a valid result.'
|
||||
|
||||
# Prettify CMD string and make it safe to copy-paste by quoting arguments
|
||||
quoted_cmd = ' '.join(
|
||||
'"{}"'.format(arg) if ' ' in arg else arg
|
||||
for arg in result['cmd']
|
||||
for arg in result.cmd
|
||||
)
|
||||
|
||||
if result['status'] == 'failed':
|
||||
if result.status == 'failed':
|
||||
# Prettify error output hints string and limit to five lines
|
||||
hints = getattr(result['output'], 'hints', None) or ()
|
||||
hints = getattr(result.output, 'hints', None) or ()
|
||||
if hints:
|
||||
hints = hints if isinstance(hints, (list, tuple)) else hints.split('\n')
|
||||
hints = (
|
||||
|
@ -185,13 +180,13 @@ def log_archive_method_finished(result):
|
|||
output_lines = [
|
||||
'{}Failed:{} {}{}'.format(
|
||||
ANSI['red'],
|
||||
result['output'].__class__.__name__.replace('ArchiveError', ''),
|
||||
result['output'],
|
||||
result.output.__class__.__name__.replace('ArchiveError', ''),
|
||||
result.output,
|
||||
ANSI['reset']
|
||||
),
|
||||
*hints,
|
||||
'{}Run to see full output:{}'.format(ANSI['lightred'], ANSI['reset']),
|
||||
' cd {};'.format(result['pwd']),
|
||||
*((' cd {};'.format(result.pwd),) if result.pwd else ()),
|
||||
' {}'.format(quoted_cmd),
|
||||
]
|
||||
print('\n'.join(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue