mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-17 16:44:26 -04:00
speed up startup time, add rich startup progressbar, split logging and checks into misc, fix search index import backend bug
This commit is contained in:
parent
7ffb81f61b
commit
64c7100cf9
22 changed files with 566 additions and 762 deletions
|
@ -1,16 +1,20 @@
|
|||
__package__ = 'archivebox.cli'
|
||||
__command__ = 'archivebox'
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import threading
|
||||
from time import sleep
|
||||
import archivebox
|
||||
|
||||
from typing import Optional, Dict, List, IO, Union, Iterable
|
||||
from time import sleep
|
||||
from collections.abc import Mapping
|
||||
|
||||
from typing import Optional, List, IO, Union, Iterable
|
||||
from pathlib import Path
|
||||
|
||||
from ..config import OUTPUT_DIR, check_data_folder, check_migrations, stderr
|
||||
|
||||
from ..misc.checks import check_data_folder, check_migrations
|
||||
from ..misc.logging import stderr
|
||||
|
||||
from importlib import import_module
|
||||
|
||||
|
@ -18,13 +22,46 @@ BUILTIN_LIST = list
|
|||
|
||||
CLI_DIR = Path(__file__).resolve().parent
|
||||
|
||||
# these common commands will appear sorted before any others for ease-of-use
|
||||
meta_cmds = ('help', 'version') # dont require valid data folder at all
|
||||
main_cmds = ('init', 'config', 'setup') # dont require existing db present
|
||||
archive_cmds = ('add', 'remove', 'update', 'list', 'status') # require existing db present
|
||||
fake_db = ("oneshot",) # use fake in-memory db
|
||||
|
||||
display_first = (*meta_cmds, *main_cmds, *archive_cmds)
|
||||
# def list_subcommands() -> Dict[str, str]:
|
||||
# """find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
|
||||
# COMMANDS = []
|
||||
# for filename in os.listdir(CLI_DIR):
|
||||
# if is_cli_module(filename):
|
||||
# subcommand = filename.replace('archivebox_', '').replace('.py', '')
|
||||
# module = import_module('.archivebox_{}'.format(subcommand), __package__)
|
||||
# assert is_valid_cli_module(module, subcommand)
|
||||
# COMMANDS.append((subcommand, module.main.__doc__))
|
||||
# globals()[subcommand] = module.main
|
||||
# display_order = lambda cmd: (
|
||||
# display_first.index(cmd[0])
|
||||
# if cmd[0] in display_first else
|
||||
# 100 + len(cmd[0])
|
||||
# )
|
||||
# return dict(sorted(COMMANDS, key=display_order))
|
||||
|
||||
# just define it statically, it's much faster:
|
||||
SUBCOMMAND_MODULES = {
|
||||
'help': 'archivebox_help',
|
||||
'version': 'archivebox_version' ,
|
||||
|
||||
'init': 'archivebox_init',
|
||||
'config': 'archivebox_config',
|
||||
'setup': 'archivebox_setup',
|
||||
|
||||
'add': 'archivebox_add',
|
||||
'remove': 'archivebox_remove',
|
||||
'update': 'archivebox_update',
|
||||
'list': 'archivebox_list',
|
||||
'status': 'archivebox_status',
|
||||
|
||||
'schedule': 'archivebox_schedule',
|
||||
'server': 'archivebox_server',
|
||||
'shell': 'archivebox_shell',
|
||||
'manage': 'archivebox_manage',
|
||||
|
||||
'oneshot': 'archivebox_oneshot',
|
||||
}
|
||||
|
||||
# every imported command module must have these properties in order to be valid
|
||||
required_attrs = ('__package__', '__command__', 'main')
|
||||
|
@ -36,6 +73,38 @@ is_valid_cli_module = lambda module, subcommand: (
|
|||
and module.__command__.split(' ')[-1] == subcommand
|
||||
)
|
||||
|
||||
class LazySubcommands(Mapping):
|
||||
def keys(self):
|
||||
return SUBCOMMAND_MODULES.keys()
|
||||
|
||||
def values(self):
|
||||
return [self[key] for key in self.keys()]
|
||||
|
||||
def items(self):
|
||||
return [(key, self[key]) for key in self.keys()]
|
||||
|
||||
def __getitem__(self, key):
|
||||
module = import_module(f'.{SUBCOMMAND_MODULES[key]}', __package__)
|
||||
assert is_valid_cli_module(module, key)
|
||||
return module.main
|
||||
|
||||
def __iter__(self):
|
||||
return iter(SUBCOMMAND_MODULES.keys())
|
||||
|
||||
def __len__(self):
|
||||
return len(SUBCOMMAND_MODULES)
|
||||
|
||||
CLI_SUBCOMMANDS = LazySubcommands()
|
||||
|
||||
|
||||
# these common commands will appear sorted before any others for ease-of-use
|
||||
meta_cmds = ('help', 'version') # dont require valid data folder at all
|
||||
main_cmds = ('init', 'config', 'setup') # dont require existing db present
|
||||
archive_cmds = ('add', 'remove', 'update', 'list', 'status') # require existing db present
|
||||
fake_db = ("oneshot",) # use fake in-memory db
|
||||
|
||||
display_first = (*meta_cmds, *main_cmds, *archive_cmds)
|
||||
|
||||
|
||||
IGNORED_BG_THREADS = ('MainThread', 'ThreadPoolExecutor', 'IPythonHistorySavingThread', 'Scheduler') # threads we dont have to wait for before exiting
|
||||
|
||||
|
@ -71,29 +140,9 @@ def wait_for_bg_threads_to_exit(thread_names: Iterable[str]=(), ignore_names: It
|
|||
raise Exception(f'Background threads failed to exit after {tries}s: {threads_summary}')
|
||||
|
||||
|
||||
def list_subcommands() -> Dict[str, str]:
|
||||
"""find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
|
||||
|
||||
COMMANDS = []
|
||||
for filename in os.listdir(CLI_DIR):
|
||||
if is_cli_module(filename):
|
||||
subcommand = filename.replace('archivebox_', '').replace('.py', '')
|
||||
module = import_module('.archivebox_{}'.format(subcommand), __package__)
|
||||
assert is_valid_cli_module(module, subcommand)
|
||||
COMMANDS.append((subcommand, module.main.__doc__))
|
||||
globals()[subcommand] = module.main
|
||||
|
||||
display_order = lambda cmd: (
|
||||
display_first.index(cmd[0])
|
||||
if cmd[0] in display_first else
|
||||
100 + len(cmd[0])
|
||||
)
|
||||
|
||||
return dict(sorted(COMMANDS, key=display_order))
|
||||
|
||||
|
||||
def run_subcommand(subcommand: str,
|
||||
subcommand_args: List[str]=None,
|
||||
subcommand_args: List[str] | None = None,
|
||||
stdin: Optional[IO]=None,
|
||||
pwd: Union[Path, str, None]=None) -> None:
|
||||
"""Run a given ArchiveBox subcommand with the given list of args"""
|
||||
|
@ -101,18 +150,18 @@ def run_subcommand(subcommand: str,
|
|||
subcommand_args = subcommand_args or []
|
||||
|
||||
if subcommand not in meta_cmds:
|
||||
from ..config import setup_django
|
||||
from ..config import setup_django, CONFIG
|
||||
|
||||
cmd_requires_db = subcommand in archive_cmds
|
||||
init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
|
||||
|
||||
if cmd_requires_db:
|
||||
check_data_folder(pwd)
|
||||
check_data_folder(CONFIG)
|
||||
|
||||
setup_django(in_memory_db=subcommand in fake_db, check_db=cmd_requires_db and not init_pending)
|
||||
|
||||
if cmd_requires_db:
|
||||
check_migrations()
|
||||
check_migrations(CONFIG)
|
||||
|
||||
module = import_module('.archivebox_{}'.format(subcommand), __package__)
|
||||
module.main(args=subcommand_args, stdin=stdin, pwd=pwd) # type: ignore
|
||||
|
@ -121,17 +170,28 @@ def run_subcommand(subcommand: str,
|
|||
wait_for_bg_threads_to_exit(timeout=60)
|
||||
|
||||
|
||||
SUBCOMMANDS = list_subcommands()
|
||||
|
||||
|
||||
|
||||
class NotProvided:
|
||||
pass
|
||||
def __len__(self):
|
||||
return 0
|
||||
def __bool__(self):
|
||||
return False
|
||||
def __repr__(self):
|
||||
return '<not provided>'
|
||||
|
||||
Omitted = Union[None, NotProvided]
|
||||
|
||||
OMITTED = NotProvided()
|
||||
|
||||
|
||||
def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided, pwd: Optional[str]=None) -> None:
|
||||
args = sys.argv[1:] if args is NotProvided else args
|
||||
stdin = sys.stdin if stdin is NotProvided else stdin
|
||||
def main(args: List[str] | Omitted=OMITTED, stdin: IO | Omitted=OMITTED, pwd: str | None=None) -> None:
|
||||
# print('STARTING CLI MAIN ENTRYPOINT')
|
||||
|
||||
args = sys.argv[1:] if args is OMITTED else args
|
||||
stdin = sys.stdin if stdin is OMITTED else stdin
|
||||
|
||||
subcommands = list_subcommands()
|
||||
parser = argparse.ArgumentParser(
|
||||
prog=__command__,
|
||||
description='ArchiveBox: The self-hosted internet archive',
|
||||
|
@ -141,19 +201,19 @@ def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided,
|
|||
group.add_argument(
|
||||
'--help', '-h',
|
||||
action='store_true',
|
||||
help=subcommands['help'],
|
||||
help=CLI_SUBCOMMANDS['help'].__doc__,
|
||||
)
|
||||
group.add_argument(
|
||||
'--version',
|
||||
action='store_true',
|
||||
help=subcommands['version'],
|
||||
help=CLI_SUBCOMMANDS['version'].__doc__,
|
||||
)
|
||||
group.add_argument(
|
||||
"subcommand",
|
||||
type=str,
|
||||
help= "The name of the subcommand to run",
|
||||
nargs='?',
|
||||
choices=subcommands.keys(),
|
||||
choices=CLI_SUBCOMMANDS.keys(),
|
||||
default=None,
|
||||
)
|
||||
parser.add_argument(
|
||||
|
@ -174,23 +234,13 @@ def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided,
|
|||
log_cli_command(
|
||||
subcommand=command.subcommand,
|
||||
subcommand_args=command.subcommand_args,
|
||||
stdin=stdin,
|
||||
pwd=pwd or OUTPUT_DIR
|
||||
stdin=stdin or None,
|
||||
pwd=pwd or archivebox.DATA_DIR,
|
||||
)
|
||||
|
||||
run_subcommand(
|
||||
subcommand=command.subcommand,
|
||||
subcommand_args=command.subcommand_args,
|
||||
stdin=stdin,
|
||||
pwd=pwd or OUTPUT_DIR,
|
||||
stdin=stdin or None,
|
||||
pwd=pwd or archivebox.DATA_DIR,
|
||||
)
|
||||
|
||||
|
||||
__all__ = (
|
||||
'SUBCOMMANDS',
|
||||
'list_subcommands',
|
||||
'run_subcommand',
|
||||
*SUBCOMMANDS.keys(),
|
||||
)
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue