From 31343c1367afc5bf0a40a492b0938bdd77716210 Mon Sep 17 00:00:00 2001 From: Cristian Date: Tue, 18 Aug 2020 12:24:43 -0500 Subject: [PATCH] feat: Update extractors and add command to use sql index as source of truth --- archivebox/extractors/__init__.py | 2 -- archivebox/index/__init__.py | 4 ++-- archivebox/main.py | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py index 23d5cfd0..988635b6 100644 --- a/archivebox/extractors/__init__.py +++ b/archivebox/extractors/__init__.py @@ -113,8 +113,6 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s pass write_link_details(link, out_dir=out_dir, skip_sql_index=skip_index) - if not skip_index: - patch_main_index(link) # # If any changes were made, update the main links index json and html # was_changed = stats['succeeded'] or stats['failed'] diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py index 64829bd0..555882ab 100644 --- a/archivebox/index/__init__.py +++ b/archivebox/index/__init__.py @@ -236,7 +236,7 @@ def timed_index_update(out_path: str): @enforce_types -def write_main_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: bool=False, write_static: bool=False) -> None: +def write_main_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: bool=False) -> None: """create index.html file for a given list of links""" log_indexing_process_started(len(links)) @@ -246,7 +246,7 @@ def write_main_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: bool= write_sql_main_index(links, out_dir=out_dir) os.chmod(os.path.join(out_dir, SQL_INDEX_FILENAME), int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes - if write_static: + if finished: with timed_index_update(os.path.join(out_dir, JSON_INDEX_FILENAME)): write_json_main_index(links, out_dir=out_dir) diff --git a/archivebox/main.py b/archivebox/main.py index e5f2cb9d..c8ec98c4 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -368,7 +368,7 @@ def init(force: bool=False, out_dir: str=OUTPUT_DIR) -> None: print(' archivebox list --status=invalid') - write_main_index(list(all_links.values()), out_dir=out_dir, write_static=True) + write_main_index(list(all_links.values()), out_dir=out_dir) print('\n{green}------------------------------------------------------------------{reset}'.format(**ANSI)) if existing_index: