speed up startup time, add rich startup progressbar, split logging and checks into misc, fix search index import backend bug

This commit is contained in:
Nick Sweeting 2024-09-24 19:04:38 -07:00
parent 7ffb81f61b
commit 64c7100cf9
No known key found for this signature in database
22 changed files with 566 additions and 762 deletions

View file

@ -1,16 +1,20 @@
__package__ = 'archivebox.cli'
__command__ = 'archivebox'
import os
import sys
import argparse
import threading
from time import sleep
import archivebox
from typing import Optional, Dict, List, IO, Union, Iterable
from time import sleep
from collections.abc import Mapping
from typing import Optional, List, IO, Union, Iterable
from pathlib import Path
from ..config import OUTPUT_DIR, check_data_folder, check_migrations, stderr
from ..misc.checks import check_data_folder, check_migrations
from ..misc.logging import stderr
from importlib import import_module
@ -18,13 +22,46 @@ BUILTIN_LIST = list
CLI_DIR = Path(__file__).resolve().parent
# these common commands will appear sorted before any others for ease-of-use
meta_cmds = ('help', 'version') # dont require valid data folder at all
main_cmds = ('init', 'config', 'setup') # dont require existing db present
archive_cmds = ('add', 'remove', 'update', 'list', 'status') # require existing db present
fake_db = ("oneshot",) # use fake in-memory db
display_first = (*meta_cmds, *main_cmds, *archive_cmds)
# def list_subcommands() -> Dict[str, str]:
# """find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
# COMMANDS = []
# for filename in os.listdir(CLI_DIR):
# if is_cli_module(filename):
# subcommand = filename.replace('archivebox_', '').replace('.py', '')
# module = import_module('.archivebox_{}'.format(subcommand), __package__)
# assert is_valid_cli_module(module, subcommand)
# COMMANDS.append((subcommand, module.main.__doc__))
# globals()[subcommand] = module.main
# display_order = lambda cmd: (
# display_first.index(cmd[0])
# if cmd[0] in display_first else
# 100 + len(cmd[0])
# )
# return dict(sorted(COMMANDS, key=display_order))
# just define it statically, it's much faster:
SUBCOMMAND_MODULES = {
'help': 'archivebox_help',
'version': 'archivebox_version' ,
'init': 'archivebox_init',
'config': 'archivebox_config',
'setup': 'archivebox_setup',
'add': 'archivebox_add',
'remove': 'archivebox_remove',
'update': 'archivebox_update',
'list': 'archivebox_list',
'status': 'archivebox_status',
'schedule': 'archivebox_schedule',
'server': 'archivebox_server',
'shell': 'archivebox_shell',
'manage': 'archivebox_manage',
'oneshot': 'archivebox_oneshot',
}
# every imported command module must have these properties in order to be valid
required_attrs = ('__package__', '__command__', 'main')
@ -36,6 +73,38 @@ is_valid_cli_module = lambda module, subcommand: (
and module.__command__.split(' ')[-1] == subcommand
)
class LazySubcommands(Mapping):
def keys(self):
return SUBCOMMAND_MODULES.keys()
def values(self):
return [self[key] for key in self.keys()]
def items(self):
return [(key, self[key]) for key in self.keys()]
def __getitem__(self, key):
module = import_module(f'.{SUBCOMMAND_MODULES[key]}', __package__)
assert is_valid_cli_module(module, key)
return module.main
def __iter__(self):
return iter(SUBCOMMAND_MODULES.keys())
def __len__(self):
return len(SUBCOMMAND_MODULES)
CLI_SUBCOMMANDS = LazySubcommands()
# these common commands will appear sorted before any others for ease-of-use
meta_cmds = ('help', 'version') # dont require valid data folder at all
main_cmds = ('init', 'config', 'setup') # dont require existing db present
archive_cmds = ('add', 'remove', 'update', 'list', 'status') # require existing db present
fake_db = ("oneshot",) # use fake in-memory db
display_first = (*meta_cmds, *main_cmds, *archive_cmds)
IGNORED_BG_THREADS = ('MainThread', 'ThreadPoolExecutor', 'IPythonHistorySavingThread', 'Scheduler') # threads we dont have to wait for before exiting
@ -71,29 +140,9 @@ def wait_for_bg_threads_to_exit(thread_names: Iterable[str]=(), ignore_names: It
raise Exception(f'Background threads failed to exit after {tries}s: {threads_summary}')
def list_subcommands() -> Dict[str, str]:
"""find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
COMMANDS = []
for filename in os.listdir(CLI_DIR):
if is_cli_module(filename):
subcommand = filename.replace('archivebox_', '').replace('.py', '')
module = import_module('.archivebox_{}'.format(subcommand), __package__)
assert is_valid_cli_module(module, subcommand)
COMMANDS.append((subcommand, module.main.__doc__))
globals()[subcommand] = module.main
display_order = lambda cmd: (
display_first.index(cmd[0])
if cmd[0] in display_first else
100 + len(cmd[0])
)
return dict(sorted(COMMANDS, key=display_order))
def run_subcommand(subcommand: str,
subcommand_args: List[str]=None,
subcommand_args: List[str] | None = None,
stdin: Optional[IO]=None,
pwd: Union[Path, str, None]=None) -> None:
"""Run a given ArchiveBox subcommand with the given list of args"""
@ -101,18 +150,18 @@ def run_subcommand(subcommand: str,
subcommand_args = subcommand_args or []
if subcommand not in meta_cmds:
from ..config import setup_django
from ..config import setup_django, CONFIG
cmd_requires_db = subcommand in archive_cmds
init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
if cmd_requires_db:
check_data_folder(pwd)
check_data_folder(CONFIG)
setup_django(in_memory_db=subcommand in fake_db, check_db=cmd_requires_db and not init_pending)
if cmd_requires_db:
check_migrations()
check_migrations(CONFIG)
module = import_module('.archivebox_{}'.format(subcommand), __package__)
module.main(args=subcommand_args, stdin=stdin, pwd=pwd) # type: ignore
@ -121,17 +170,28 @@ def run_subcommand(subcommand: str,
wait_for_bg_threads_to_exit(timeout=60)
SUBCOMMANDS = list_subcommands()
class NotProvided:
pass
def __len__(self):
return 0
def __bool__(self):
return False
def __repr__(self):
return '<not provided>'
Omitted = Union[None, NotProvided]
OMITTED = NotProvided()
def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided, pwd: Optional[str]=None) -> None:
args = sys.argv[1:] if args is NotProvided else args
stdin = sys.stdin if stdin is NotProvided else stdin
def main(args: List[str] | Omitted=OMITTED, stdin: IO | Omitted=OMITTED, pwd: str | None=None) -> None:
# print('STARTING CLI MAIN ENTRYPOINT')
args = sys.argv[1:] if args is OMITTED else args
stdin = sys.stdin if stdin is OMITTED else stdin
subcommands = list_subcommands()
parser = argparse.ArgumentParser(
prog=__command__,
description='ArchiveBox: The self-hosted internet archive',
@ -141,19 +201,19 @@ def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided,
group.add_argument(
'--help', '-h',
action='store_true',
help=subcommands['help'],
help=CLI_SUBCOMMANDS['help'].__doc__,
)
group.add_argument(
'--version',
action='store_true',
help=subcommands['version'],
help=CLI_SUBCOMMANDS['version'].__doc__,
)
group.add_argument(
"subcommand",
type=str,
help= "The name of the subcommand to run",
nargs='?',
choices=subcommands.keys(),
choices=CLI_SUBCOMMANDS.keys(),
default=None,
)
parser.add_argument(
@ -174,23 +234,13 @@ def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided,
log_cli_command(
subcommand=command.subcommand,
subcommand_args=command.subcommand_args,
stdin=stdin,
pwd=pwd or OUTPUT_DIR
stdin=stdin or None,
pwd=pwd or archivebox.DATA_DIR,
)
run_subcommand(
subcommand=command.subcommand,
subcommand_args=command.subcommand_args,
stdin=stdin,
pwd=pwd or OUTPUT_DIR,
stdin=stdin or None,
pwd=pwd or archivebox.DATA_DIR,
)
__all__ = (
'SUBCOMMANDS',
'list_subcommands',
'run_subcommand',
*SUBCOMMANDS.keys(),
)