add overwrite flag to add command to force re-archiving

This commit is contained in:
Nick Sweeting 2020-08-18 04:37:54 -04:00
parent da671532a4
commit b681a477ae
4 changed files with 35 additions and 18 deletions

View file

@ -55,6 +55,12 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
type=int, type=int,
help="Recursively archive all linked pages up to this many hops away" help="Recursively archive all linked pages up to this many hops away"
) )
parser.add_argument(
"--overwrite",
default=False,
action="store_true",
help="Re-archive URLs from scratch, overwriting any existing files"
)
command = parser.parse_args(args or ()) command = parser.parse_args(args or ())
urls = command.urls urls = command.urls
stdin_urls = accept_stdin(stdin) stdin_urls = accept_stdin(stdin)
@ -69,6 +75,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
depth=command.depth, depth=command.depth,
update_all=command.update_all, update_all=command.update_all,
index_only=command.index_only, index_only=command.index_only,
overwrite=command.overwrite,
out_dir=pwd or OUTPUT_DIR, out_dir=pwd or OUTPUT_DIR,
) )

View file

@ -522,6 +522,7 @@ def add(urls: Union[str, List[str]],
depth: int=0, depth: int=0,
update_all: bool=not ONLY_NEW, update_all: bool=not ONLY_NEW,
index_only: bool=False, index_only: bool=False,
overwrite: bool=False,
out_dir: str=OUTPUT_DIR) -> List[Link]: out_dir: str=OUTPUT_DIR) -> List[Link]:
"""Add a new URL or list of URLs to your archive""" """Add a new URL or list of URLs to your archive"""
@ -551,18 +552,26 @@ def add(urls: Union[str, List[str]],
for new_link in new_links: for new_link in new_links:
downloaded_file = save_file_as_source(new_link.url, filename='{ts}-crawl-{basename}.txt', out_dir=out_dir) downloaded_file = save_file_as_source(new_link.url, filename='{ts}-crawl-{basename}.txt', out_dir=out_dir)
new_links_depth += parse_links_from_source(downloaded_file) new_links_depth += parse_links_from_source(downloaded_file)
all_links, new_links = dedupe_links(all_links, new_links + new_links_depth)
imported_links = new_links + new_links_depth
all_links, new_links = dedupe_links(all_links, imported_links)
write_main_index(links=all_links, out_dir=out_dir, finished=not new_links) write_main_index(links=all_links, out_dir=out_dir, finished=not new_links)
if index_only: if index_only:
return all_links return all_links
# Run the archive methods for each link # Run the archive methods for each link
to_archive = all_links if update_all else new_links if update_all:
archive_links(to_archive, out_dir=out_dir) archive_links(all_links, overwrite=overwrite, out_dir=out_dir)
elif overwrite:
archive_links(imported_links, overwrite=True, out_dir=out_dir)
elif new_links:
archive_links(new_links, overwrite=False, out_dir=out_dir)
else:
# nothing was updated, don't bother re-saving the index
return all_links
# Step 4: Re-write links index with updated titles, icons, and resources # Step 4: Re-write links index with updated titles, icons, and resources
if to_archive:
all_links = load_main_index(out_dir=out_dir) all_links = load_main_index(out_dir=out_dir)
write_main_index(links=list(all_links), out_dir=out_dir, finished=True) write_main_index(links=list(all_links), out_dir=out_dir, finished=True)
return all_links return all_links

View file

@ -16,6 +16,7 @@ from .util import enforce_types, ExtendedEncoder
from .config import OUTPUT_PERMISSIONS from .config import OUTPUT_PERMISSIONS
def run(*args, input=None, capture_output=True, text=False, **kwargs): def run(*args, input=None, capture_output=True, text=False, **kwargs):
"""Patched of subprocess.run to fix blocking io making timeout=innefective""" """Patched of subprocess.run to fix blocking io making timeout=innefective"""