diff --git a/archivebox/cli/archivebox_init.py b/archivebox/cli/archivebox_init.py index 6255ef26..5753269c 100755 --- a/archivebox/cli/archivebox_init.py +++ b/archivebox/cli/archivebox_init.py @@ -27,11 +27,17 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional action='store_true', help='Ignore unrecognized files in current directory and initialize anyway', ) + parser.add_argument( + '--quick', '-q', + action='store_true', + help='Run any updates or migrations without rechecking all snapshot dirs', + ) command = parser.parse_args(args or ()) reject_stdin(__command__, stdin) init( force=command.force, + quick=command.quick, out_dir=pwd or OUTPUT_DIR, ) diff --git a/archivebox/cli/archivebox_server.py b/archivebox/cli/archivebox_server.py index a4d96dc9..a6ec987e 100644 --- a/archivebox/cli/archivebox_server.py +++ b/archivebox/cli/archivebox_server.py @@ -41,7 +41,12 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional parser.add_argument( '--init', action='store_true', - help='Run archivebox init before starting the server', + help='Run a full archivebox init/upgrade before starting the server', + ) + parser.add_argument( + '--quick-init', '-i', + action='store_true', + help='Run quick archivebox init/upgrade before starting the server', ) parser.add_argument( '--createsuperuser', @@ -56,6 +61,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional reload=command.reload, debug=command.debug, init=command.init, + quick_init=command.quick_init, createsuperuser=command.createsuperuser, out_dir=pwd or OUTPUT_DIR, ) diff --git a/archivebox/main.py b/archivebox/main.py index a1e58619..bad93706 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -263,7 +263,7 @@ def run(subcommand: str, @enforce_types -def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None: +def init(force: bool=False, quick: bool=False, out_dir: Path=OUTPUT_DIR) -> None: """Initialize a new ArchiveBox collection in the current directory""" from core.models import Snapshot @@ -345,48 +345,49 @@ def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None: all_links = load_main_index(out_dir=out_dir, warn=False) print(' √ Loaded {} links from existing main index.'.format(all_links.count())) - # Links in data folders that dont match their timestamp - fixed, cant_fix = fix_invalid_folder_locations(out_dir=out_dir) - if fixed: - print(' {lightyellow}√ Fixed {} data directory locations that didn\'t match their link timestamps.{reset}'.format(len(fixed), **ANSI)) - if cant_fix: - print(' {lightyellow}! Could not fix {} data directory locations due to conflicts with existing folders.{reset}'.format(len(cant_fix), **ANSI)) + if not quick: + # Links in data folders that dont match their timestamp + fixed, cant_fix = fix_invalid_folder_locations(out_dir=out_dir) + if fixed: + print(' {lightyellow}√ Fixed {} data directory locations that didn\'t match their link timestamps.{reset}'.format(len(fixed), **ANSI)) + if cant_fix: + print(' {lightyellow}! Could not fix {} data directory locations due to conflicts with existing folders.{reset}'.format(len(cant_fix), **ANSI)) - # Links in JSON index but not in main index - orphaned_json_links = { - link.url: link - for link in parse_json_main_index(out_dir) - if not all_links.filter(url=link.url).exists() - } - if orphaned_json_links: - pending_links.update(orphaned_json_links) - print(' {lightyellow}√ Added {} orphaned links from existing JSON index...{reset}'.format(len(orphaned_json_links), **ANSI)) + # Links in JSON index but not in main index + orphaned_json_links = { + link.url: link + for link in parse_json_main_index(out_dir) + if not all_links.filter(url=link.url).exists() + } + if orphaned_json_links: + pending_links.update(orphaned_json_links) + print(' {lightyellow}√ Added {} orphaned links from existing JSON index...{reset}'.format(len(orphaned_json_links), **ANSI)) - # Links in data dir indexes but not in main index - orphaned_data_dir_links = { - link.url: link - for link in parse_json_links_details(out_dir) - if not all_links.filter(url=link.url).exists() - } - if orphaned_data_dir_links: - pending_links.update(orphaned_data_dir_links) - print(' {lightyellow}√ Added {} orphaned links from existing archive directories.{reset}'.format(len(orphaned_data_dir_links), **ANSI)) + # Links in data dir indexes but not in main index + orphaned_data_dir_links = { + link.url: link + for link in parse_json_links_details(out_dir) + if not all_links.filter(url=link.url).exists() + } + if orphaned_data_dir_links: + pending_links.update(orphaned_data_dir_links) + print(' {lightyellow}√ Added {} orphaned links from existing archive directories.{reset}'.format(len(orphaned_data_dir_links), **ANSI)) - # Links in invalid/duplicate data dirs - invalid_folders = { - folder: link - for folder, link in get_invalid_folders(all_links, out_dir=out_dir).items() - } - if invalid_folders: - print(' {lightyellow}! Skipped adding {} invalid link data directories.{reset}'.format(len(invalid_folders), **ANSI)) - print(' X ' + '\n X '.join(f'{folder} {link}' for folder, link in invalid_folders.items())) - print() - print(' {lightred}Hint:{reset} For more information about the link data directories that were skipped, run:'.format(**ANSI)) - print(' archivebox status') - print(' archivebox list --status=invalid') + # Links in invalid/duplicate data dirs + invalid_folders = { + folder: link + for folder, link in get_invalid_folders(all_links, out_dir=out_dir).items() + } + if invalid_folders: + print(' {lightyellow}! Skipped adding {} invalid link data directories.{reset}'.format(len(invalid_folders), **ANSI)) + print(' X ' + '\n X '.join(f'{folder} {link}' for folder, link in invalid_folders.items())) + print() + print(' {lightred}Hint:{reset} For more information about the link data directories that were skipped, run:'.format(**ANSI)) + print(' archivebox status') + print(' archivebox list --status=invalid') - write_main_index(list(pending_links.values()), out_dir=out_dir) + write_main_index(list(pending_links.values()), out_dir=out_dir) print('\n{green}------------------------------------------------------------------{reset}'.format(**ANSI)) if existing_index: @@ -1063,14 +1064,15 @@ def server(runserver_args: Optional[List[str]]=None, reload: bool=False, debug: bool=False, init: bool=False, + quick_init: bool=False, createsuperuser: bool=False, out_dir: Path=OUTPUT_DIR) -> None: """Run the ArchiveBox HTTP server""" runserver_args = runserver_args or [] - if init: - run_subcommand('init', stdin=None, pwd=out_dir) + if init or quick_init: + run_subcommand('init', quick=quick_init, stdin=None, pwd=out_dir) if createsuperuser: run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir) diff --git a/docker-compose.yml b/docker-compose.yml index 96b90a35..a8fd08a9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,7 +13,7 @@ services: archivebox: # build: . image: ${DOCKER_IMAGE:-archivebox/archivebox:latest} - command: server 0.0.0.0:8000 + command: server --quick-init 0.0.0.0:8000 stdin_open: true tty: true ports: