working better removal ux

This commit is contained in:
Nick Sweeting 2019-04-11 08:11:32 -04:00
parent 525f8beb55
commit 3fb10dbf35
4 changed files with 104 additions and 45 deletions

View file

@ -3,7 +3,7 @@ import sys
from datetime import datetime from datetime import datetime
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional from typing import Optional, List
from .schema import Link, ArchiveResult from .schema import Link, ArchiveResult
from .config import ANSI, OUTPUT_DIR from .config import ANSI, OUTPUT_DIR
@ -205,3 +205,58 @@ def log_archive_method_finished(result: ArchiveResult):
if line if line
)) ))
print() print()
def log_list_started(filter_patterns: List[str], filter_type: str):
print('{green}[*] Finding links in the archive index matching these {} patterns:{reset}'.format(
filter_type,
**ANSI,
))
print(' {}'.format(' '.join(filter_patterns)))
def log_list_finished(links):
from .util import to_csv
print()
print('---------------------------------------------------------------------------------------------------')
print(to_csv(links, csv_cols=['timestamp', 'is_archived', 'num_outputs', 'url'], header=True, ljust=16, separator=' | '))
print('---------------------------------------------------------------------------------------------------')
print()
def log_removal_started(links: List[Link], yes: bool, delete: bool):
log_list_finished(links)
print('{lightyellow}[i] Found {} matching URLs to remove.{reset}'.format(len(links), **ANSI))
if delete:
file_counts = [link.num_outputs for link in links if os.path.exists(link.link_dir)]
print(
f' {len(links)} Links will be de-listed from the main index, and their archived content folders will be deleted from disk.\n'
f' ({len(file_counts)} data folders with {sum(file_counts)} archived files will be deleted!)'
)
else:
print(
f' Matching links will be de-listed from the main index, but their archived content folders will remain in place on disk.\n'
f' (Pass --delete if you also want to permanently delete the data folders)'
)
if not yes:
print()
print('{lightyellow}[?] Do you want to proceed with removing these {} links?{reset}'.format(len(links), **ANSI))
try:
assert input(' y/[n]: ').lower() == 'y'
except (KeyboardInterrupt, EOFError, AssertionError):
raise SystemExit(0)
def log_removal_finished(all_links: int, to_keep: int):
if all_links == 0:
print()
print('{red}[X] No matching links found.{reset}'.format(**ANSI))
else:
num_removed = all_links - to_keep
print()
print('{red}[√] Removed {} out of {} links from the archive index.{reset}'.format(
num_removed,
all_links,
**ANSI,
))
print(' Index now contains {} links.'.format(to_keep))

View file

@ -4,7 +4,7 @@ import shutil
from typing import List, Optional, Iterable from typing import List, Optional, Iterable
from .schema import Link from .schema import Link
from .util import enforce_types, TimedProgress, to_csv from .util import enforce_types, TimedProgress
from .index import ( from .index import (
links_after_timestamp, links_after_timestamp,
load_links_index, load_links_index,
@ -21,6 +21,10 @@ from .logs import (
log_archiving_started, log_archiving_started,
log_archiving_paused, log_archiving_paused,
log_archiving_finished, log_archiving_finished,
log_removal_started,
log_removal_finished,
log_list_started,
log_list_finished,
) )
@ -69,6 +73,7 @@ LINK_FILTERS = {
'domain': lambda link, pattern: link.domain == pattern, 'domain': lambda link, pattern: link.domain == pattern,
} }
@enforce_types
def link_matches_filter(link: Link, filter_patterns: List[str], filter_type: str='exact') -> bool: def link_matches_filter(link: Link, filter_patterns: List[str], filter_type: str='exact') -> bool:
for pattern in filter_patterns: for pattern in filter_patterns:
if LINK_FILTERS[filter_type](link, pattern): if LINK_FILTERS[filter_type](link, pattern):
@ -99,12 +104,10 @@ def list_archive_data(filter_patterns: Optional[List[str]]=None, filter_type: st
@enforce_types @enforce_types
def remove_archive_links(filter_patterns: List[str], filter_type: str='exact', def remove_archive_links(filter_patterns: List[str], filter_type: str='exact',
after: Optional[float]=None, before: Optional[float]=None, after: Optional[float]=None, before: Optional[float]=None,
yes: bool=False, delete: bool=False): yes: bool=False, delete: bool=False) -> List[Link]:
check_dependencies() check_dependencies()
log_list_started(filter_patterns, filter_type)
print('[*] Finding links in the archive index matching these {} patterns:'.format(filter_type))
print(' {}'.format(' '.join(filter_patterns)))
timer = TimedProgress(360, prefix=' ') timer = TimedProgress(360, prefix=' ')
try: try:
links = list(list_archive_data( links = list(list_archive_data(
@ -116,24 +119,14 @@ def remove_archive_links(filter_patterns: List[str], filter_type: str='exact',
finally: finally:
timer.end() timer.end()
if not len(links): if not len(links):
print() log_removal_finished(0, 0)
print('{red}[X] No matching links found.{reset}'.format(**ANSI))
raise SystemExit(1) raise SystemExit(1)
print() log_removal_started(links, yes=yes, delete=delete)
print('-------------------------------------------------------------------') timer = TimedProgress(360, prefix=' ')
print(to_csv(links, csv_cols=['link_dir', 'url', 'is_archived', 'num_outputs'])) try:
print('-------------------------------------------------------------------')
print()
if not yes:
resp = input('{lightyellow}[?] Are you sure you want to permanently remove these {} archived links? N/y: {reset}'.format(len(links), **ANSI))
if not resp.lower() == 'y':
raise SystemExit(0)
all_links, _ = load_links_index(out_dir=OUTPUT_DIR)
to_keep = [] to_keep = []
all_links, _ = load_links_index(out_dir=OUTPUT_DIR)
for link in all_links: for link in all_links:
should_remove = ( should_remove = (
(after is not None and float(link.timestamp) < after) (after is not None and float(link.timestamp) < after)
@ -144,9 +137,10 @@ def remove_archive_links(filter_patterns: List[str], filter_type: str='exact',
to_keep.append(link) to_keep.append(link)
elif should_remove and delete: elif should_remove and delete:
shutil.rmtree(link.link_dir) shutil.rmtree(link.link_dir)
finally:
timer.end()
num_removed = len(all_links) - len(to_keep)
write_links_index(links=to_keep, out_dir=OUTPUT_DIR, finished=True) write_links_index(links=to_keep, out_dir=OUTPUT_DIR, finished=True)
print() log_removal_finished(len(all_links), len(to_keep))
print('{red}[√] Removed {} out of {} links from the archive index.{reset}'.format(num_removed, len(all_links), **ANSI))
print(' Index now contains {} links.'.format(len(to_keep))) return to_keep

View file

@ -64,12 +64,12 @@ class ArchiveResult:
return to_json(self, indent=indent, sort_keys=sort_keys) return to_json(self, indent=indent, sort_keys=sort_keys)
def to_csv(self, cols=None): def to_csv(self, cols=None, ljust: int=0, separator: str=','):
from .util import to_json from .util import to_json
cols = cols or self.field_names() cols = cols or self.field_names()
return ','.join( return separator.join(
to_json(getattr(self, col), indent=False) to_json(getattr(self, col), indent=False).ljust(ljust)
for col in cols for col in cols
) )
@ -187,11 +187,11 @@ class Link:
return to_json(self, indent=indent, sort_keys=sort_keys) return to_json(self, indent=indent, sort_keys=sort_keys)
def to_csv(self, csv_cols: List[str]): def to_csv(self, csv_cols: List[str], ljust: int=0, separator: str=','):
from .util import to_json from .util import to_json
return ','.join( return separator.join(
to_json(getattr(self, col), indent=None) to_json(getattr(self, col), indent=None).ljust(ljust)
for col in csv_cols for col in csv_cols
) )

View file

@ -624,10 +624,20 @@ def to_json(obj: Any, file: IO=None, indent: Optional[int]=4, sort_keys: bool=Tr
return json.dumps(obj, indent=indent, sort_keys=sort_keys, cls=ExtendedEncoder) return json.dumps(obj, indent=indent, sort_keys=sort_keys, cls=ExtendedEncoder)
def to_csv(links: List[Link], csv_cols: Optional[List[str]]=None, header: bool=True) -> str: def to_csv(links: List[Link], csv_cols: Optional[List[str]]=None,
header: bool=True, ljust: int=0, separator: str=',') -> str:
csv_cols = csv_cols or ['timestamp', 'is_archived', 'url'] csv_cols = csv_cols or ['timestamp', 'is_archived', 'url']
header_str = '{}\n'.format(','.join(csv_cols)) if header else ''
return header_str + '\n'.join(link.to_csv(csv_cols=csv_cols) for link in links) header_str = ''
if header:
header_str = separator.join(col.ljust(ljust) for col in csv_cols)
row_strs = (
link.to_csv(csv_cols=csv_cols, ljust=ljust, separator=separator)
for link in links
)
return '\n'.join((header_str, *row_strs))
def atomic_write(contents: Union[dict, str], path: str) -> None: def atomic_write(contents: Union[dict, str], path: str) -> None: