Refactor should_save_extractor methods to accept overwrite parameter

This commit is contained in:
Dan Arnfield 2021-01-21 15:45:11 -06:00
parent 553c3ca219
commit 5420903102
14 changed files with 56 additions and 50 deletions

View file

@ -20,16 +20,16 @@ from ..logging_util import TimedProgress
@enforce_types
def should_save_dom(link: Link, out_dir: Optional[Path]=None) -> bool:
out_dir = out_dir or Path(link.link_dir)
def should_save_dom(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
if is_static_file(link.url):
return False
if (out_dir / 'output.html').exists():
out_dir = out_dir or Path(link.link_dir)
if not overwrite and (out_dir / 'output.html').exists():
return False
return SAVE_DOM
@enforce_types
def save_dom(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
"""print HTML of site to file using chrome --dump-html"""