feat: Replace index.json with index.sql as the main index in init

This commit is contained in:
Cristian 2020-08-18 11:45:27 -05:00 committed by Cristian Vargas
parent a6b9b04297
commit 02f36b2096
2 changed files with 14 additions and 38 deletions

View file

@ -236,7 +236,7 @@ def timed_index_update(out_path: str):
@enforce_types
def write_main_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: bool=False) -> None:
def write_main_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: bool=False, write_static: bool=False) -> None:
"""create index.html file for a given list of links"""
log_indexing_process_started(len(links))
@ -246,11 +246,12 @@ def write_main_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: bool=
write_sql_main_index(links, out_dir=out_dir)
os.chmod(os.path.join(out_dir, SQL_INDEX_FILENAME), int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes
with timed_index_update(os.path.join(out_dir, JSON_INDEX_FILENAME)):
write_json_main_index(links, out_dir=out_dir)
if write_static:
with timed_index_update(os.path.join(out_dir, JSON_INDEX_FILENAME)):
write_json_main_index(links, out_dir=out_dir)
with timed_index_update(os.path.join(out_dir, HTML_INDEX_FILENAME)):
write_html_main_index(links, out_dir=out_dir, finished=finished)
with timed_index_update(os.path.join(out_dir, HTML_INDEX_FILENAME)):
write_html_main_index(links, out_dir=out_dir, finished=finished)
except (KeyboardInterrupt, SystemExit):
stderr('[!] Warning: Still writing index to disk...', color='lightyellow')
stderr(' Run archivebox init to fix any inconsisntencies from an ungraceful exit.')
@ -268,26 +269,9 @@ def load_main_index(out_dir: str=OUTPUT_DIR, warn: bool=True) -> List[Link]:
all_links: List[Link] = []
try:
all_links = list(parse_json_main_index(out_dir))
links_from_sql = list(parse_sql_main_index(out_dir))
all_links = list(parse_sql_main_index(out_dir))
list(parse_sql_main_index(out_dir))
json_urls = set(l.url for l in all_links)
sql_urls = set(l.url for l in links_from_sql)
only_in_sql = sql_urls - json_urls
only_in_json = json_urls - sql_urls
if only_in_json:
stderr('{red}[!] Warning: SQL index does not match JSON index!{reset}'.format(**ANSI))
if only_in_json:
stderr(' > Only in JSON: {}...'.format(', '.join(list(only_in_json)[:5])))
if only_in_sql:
stderr(' > Only in SQL: {}...'.format(', '.join(list(only_in_sql)[:5])))
stderr(' To repair the index and re-import any orphaned links run:')
stderr(' archivebox init')
if only_in_sql:
# meh, this harmless, it'll get overwritten on next run anyway
pass
except (KeyboardInterrupt, SystemExit):
raise SystemExit(0)