mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-15 07:34:27 -04:00
move main funcs into cli files and switch to using click for CLI
This commit is contained in:
parent
569081a9eb
commit
328eb98a38
35 changed files with 1885 additions and 2296 deletions
|
@ -1,264 +1,117 @@
|
|||
__package__ = 'archivebox.cli'
|
||||
__command__ = 'archivebox'
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import threading
|
||||
|
||||
from time import sleep
|
||||
from collections.abc import Mapping
|
||||
|
||||
from rich import print
|
||||
|
||||
from typing import Optional, List, IO, Union, Iterable
|
||||
from pathlib import Path
|
||||
|
||||
from importlib import import_module
|
||||
|
||||
BUILTIN_LIST = list
|
||||
import rich_click as click
|
||||
from rich import print
|
||||
|
||||
from archivebox.config.version import VERSION
|
||||
|
||||
CLI_DIR = Path(__file__).resolve().parent
|
||||
|
||||
# rewrite setup -> install for backwards compatibility
|
||||
if len(sys.argv) > 1 and sys.argv[1] == 'setup':
|
||||
from rich import print
|
||||
print(':warning: [bold red]DEPRECATED[/bold red] `archivebox setup` is deprecated, use `archivebox install` instead')
|
||||
sys.argv[1] = 'install'
|
||||
|
||||
if '--debug' in sys.argv:
|
||||
os.environ['DEBUG'] = 'True'
|
||||
sys.argv.remove('--debug')
|
||||
|
||||
|
||||
# def list_subcommands() -> Dict[str, str]:
|
||||
# """find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
|
||||
# COMMANDS = []
|
||||
# for filename in os.listdir(CLI_DIR):
|
||||
# if is_cli_module(filename):
|
||||
# subcommand = filename.replace('archivebox_', '').replace('.py', '')
|
||||
# module = import_module('.archivebox_{}'.format(subcommand), __package__)
|
||||
# assert is_valid_cli_module(module, subcommand)
|
||||
# COMMANDS.append((subcommand, module.main.__doc__))
|
||||
# globals()[subcommand] = module.main
|
||||
# display_order = lambda cmd: (
|
||||
# display_first.index(cmd[0])
|
||||
# if cmd[0] in display_first else
|
||||
# 100 + len(cmd[0])
|
||||
# )
|
||||
# return dict(sorted(COMMANDS, key=display_order))
|
||||
|
||||
# just define it statically, it's much faster:
|
||||
SUBCOMMAND_MODULES = {
|
||||
'help': 'archivebox_help',
|
||||
'version': 'archivebox_version' ,
|
||||
class ArchiveBoxGroup(click.Group):
|
||||
"""lazy loading click group for archivebox commands"""
|
||||
meta_commands = {
|
||||
'help': 'archivebox.cli.archivebox_help.main',
|
||||
'version': 'archivebox.cli.archivebox_version.main',
|
||||
}
|
||||
setup_commands = {
|
||||
'init': 'archivebox.cli.archivebox_init.main',
|
||||
'install': 'archivebox.cli.archivebox_install.main',
|
||||
}
|
||||
archive_commands = {
|
||||
'add': 'archivebox.cli.archivebox_add.main',
|
||||
'remove': 'archivebox.cli.archivebox_remove.main',
|
||||
'update': 'archivebox.cli.archivebox_update.main',
|
||||
'search': 'archivebox.cli.archivebox_search.main',
|
||||
'status': 'archivebox.cli.archivebox_status.main',
|
||||
'config': 'archivebox.cli.archivebox_config.main',
|
||||
'schedule': 'archivebox.cli.archivebox_schedule.main',
|
||||
'server': 'archivebox.cli.archivebox_server.main',
|
||||
'shell': 'archivebox.cli.archivebox_shell.main',
|
||||
'manage': 'archivebox.cli.archivebox_manage.main',
|
||||
}
|
||||
all_subcommands = {
|
||||
**meta_commands,
|
||||
**setup_commands,
|
||||
**archive_commands,
|
||||
}
|
||||
renamed_commands = {
|
||||
'setup': 'install',
|
||||
'list': 'search',
|
||||
'import': 'add',
|
||||
'archive': 'add',
|
||||
'export': 'search',
|
||||
}
|
||||
|
||||
'init': 'archivebox_init',
|
||||
'install': 'archivebox_install',
|
||||
##############################################
|
||||
'config': 'archivebox_config',
|
||||
'add': 'archivebox_add',
|
||||
'remove': 'archivebox_remove',
|
||||
'update': 'archivebox_update',
|
||||
'list': 'archivebox_list',
|
||||
'status': 'archivebox_status',
|
||||
|
||||
def get_command(self, ctx, cmd_name):
|
||||
# handle renamed commands
|
||||
if cmd_name in self.renamed_commands:
|
||||
new_name = self.renamed_commands[cmd_name]
|
||||
print(f' [violet]Hint:[/violet] `archivebox {cmd_name}` has been renamed to `archivebox {new_name}`')
|
||||
cmd_name = new_name
|
||||
ctx.invoked_subcommand = cmd_name
|
||||
|
||||
# handle lazy loading of commands
|
||||
if cmd_name in self.all_subcommands:
|
||||
return self._lazy_load(cmd_name)
|
||||
|
||||
# fall-back to using click's default command lookup
|
||||
return super().get_command(ctx, cmd_name)
|
||||
|
||||
@classmethod
|
||||
def _lazy_load(cls, cmd_name):
|
||||
import_path = cls.all_subcommands[cmd_name]
|
||||
modname, funcname = import_path.rsplit('.', 1)
|
||||
|
||||
# print(f'LAZY LOADING {import_path}')
|
||||
mod = import_module(modname)
|
||||
func = getattr(mod, funcname)
|
||||
|
||||
if not hasattr(func, '__doc__'):
|
||||
raise ValueError(f'lazy loading of {import_path} failed - no docstring found on method')
|
||||
|
||||
# if not isinstance(cmd, click.BaseCommand):
|
||||
# raise ValueError(f'lazy loading of {import_path} failed - not a click command')
|
||||
|
||||
return func
|
||||
|
||||
|
||||
@click.group(cls=ArchiveBoxGroup, invoke_without_command=True)
|
||||
@click.option('--help', '-h', is_flag=True, help='Show help')
|
||||
@click.version_option(version=VERSION, package_name='archivebox', message='%(version)s')
|
||||
@click.pass_context
|
||||
def cli(ctx, help=False):
|
||||
"""ArchiveBox: The self-hosted internet archive"""
|
||||
|
||||
'schedule': 'archivebox_schedule',
|
||||
'server': 'archivebox_server',
|
||||
'shell': 'archivebox_shell',
|
||||
'manage': 'archivebox_manage',
|
||||
|
||||
# 'oneshot': 'archivebox_oneshot',
|
||||
}
|
||||
|
||||
# every imported command module must have these properties in order to be valid
|
||||
required_attrs = ('__package__', '__command__', 'main')
|
||||
|
||||
# basic checks to make sure imported files are valid subcommands
|
||||
is_cli_module = lambda fname: fname.startswith('archivebox_') and fname.endswith('.py')
|
||||
is_valid_cli_module = lambda module, subcommand: (
|
||||
all(hasattr(module, attr) for attr in required_attrs)
|
||||
and module.__command__.split(' ')[-1] == subcommand
|
||||
)
|
||||
|
||||
class LazySubcommands(Mapping):
|
||||
def keys(self):
|
||||
return SUBCOMMAND_MODULES.keys()
|
||||
if help or ctx.invoked_subcommand is None:
|
||||
ctx.invoke(ctx.command.get_command(ctx, 'help'))
|
||||
|
||||
def values(self):
|
||||
return [self[key] for key in self.keys()]
|
||||
|
||||
def items(self):
|
||||
return [(key, self[key]) for key in self.keys()]
|
||||
|
||||
def __getitem__(self, key):
|
||||
module = import_module(f'.{SUBCOMMAND_MODULES[key]}', __package__)
|
||||
assert is_valid_cli_module(module, key)
|
||||
return module.main
|
||||
|
||||
def __iter__(self):
|
||||
return iter(SUBCOMMAND_MODULES.keys())
|
||||
|
||||
def __len__(self):
|
||||
return len(SUBCOMMAND_MODULES)
|
||||
if ctx.invoked_subcommand in ArchiveBoxGroup.archive_commands:
|
||||
# print('SETUP DJANGO AND CHECK DATA FOLDER')
|
||||
from archivebox.config.django import setup_django
|
||||
from archivebox.misc.checks import check_data_folder
|
||||
setup_django()
|
||||
check_data_folder()
|
||||
|
||||
CLI_SUBCOMMANDS = LazySubcommands()
|
||||
|
||||
|
||||
# these common commands will appear sorted before any others for ease-of-use
|
||||
meta_cmds = ('help', 'version') # dont require valid data folder at all
|
||||
setup_cmds = ('init', 'setup', 'install') # require valid data folder, but dont require DB present in it yet
|
||||
archive_cmds = ('add', 'remove', 'update', 'list', 'status', 'schedule', 'server', 'shell', 'manage') # require valid data folder + existing db present
|
||||
fake_db = ("oneshot",) # use fake in-memory db
|
||||
|
||||
display_first = (*meta_cmds, *setup_cmds, *archive_cmds)
|
||||
|
||||
|
||||
IGNORED_BG_THREADS = ('MainThread', 'ThreadPoolExecutor', 'IPythonHistorySavingThread', 'Scheduler') # threads we dont have to wait for before exiting
|
||||
|
||||
|
||||
def wait_for_bg_threads_to_exit(thread_names: Iterable[str]=(), ignore_names: Iterable[str]=IGNORED_BG_THREADS, timeout: int=60) -> int:
|
||||
"""
|
||||
Block until the specified threads exit. e.g. pass thread_names=('default_hook_handler',) to wait for webhooks.
|
||||
Useful for waiting for signal handlers, webhooks, etc. to finish running after a mgmt command completes.
|
||||
"""
|
||||
|
||||
wait_for_all: bool = thread_names == ()
|
||||
|
||||
thread_matches = lambda thread, ptns: any(ptn in repr(thread) for ptn in ptns)
|
||||
|
||||
should_wait = lambda thread: (
|
||||
not thread_matches(thread, ignore_names)
|
||||
and (wait_for_all or thread_matches(thread, thread_names)))
|
||||
|
||||
for tries in range(timeout):
|
||||
all_threads = [*threading.enumerate()]
|
||||
blocking_threads = [*filter(should_wait, all_threads)]
|
||||
threads_summary = ', '.join(repr(t) for t in blocking_threads)
|
||||
if blocking_threads:
|
||||
sleep(1)
|
||||
if tries == 5: # only show stderr message if we need to wait more than 5s
|
||||
print(
|
||||
f'[…] Waiting up to {timeout}s for background jobs (e.g. webhooks) to finish...',
|
||||
threads_summary,
|
||||
file=sys.stderr,
|
||||
)
|
||||
else:
|
||||
return tries
|
||||
|
||||
raise Exception(f'Background threads failed to exit after {tries}s: {threads_summary}')
|
||||
|
||||
|
||||
|
||||
def run_subcommand(subcommand: str,
|
||||
subcommand_args: List[str] | None = None,
|
||||
stdin: Optional[IO]=None,
|
||||
pwd: Union[Path, str, None]=None) -> None:
|
||||
"""Run a given ArchiveBox subcommand with the given list of args"""
|
||||
|
||||
subcommand_args = subcommand_args or []
|
||||
|
||||
from archivebox.misc.checks import check_migrations
|
||||
from archivebox.config.django import setup_django
|
||||
|
||||
# print('DATA_DIR is', DATA_DIR)
|
||||
# print('pwd is', os.getcwd())
|
||||
|
||||
cmd_requires_db = (subcommand in archive_cmds)
|
||||
init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
|
||||
|
||||
check_db = cmd_requires_db and not init_pending
|
||||
|
||||
setup_django(in_memory_db=subcommand in fake_db, check_db=check_db)
|
||||
|
||||
for ignore_pattern in ('help', '-h', '--help', 'version', '--version'):
|
||||
if ignore_pattern in sys.argv[:4]:
|
||||
cmd_requires_db = False
|
||||
break
|
||||
|
||||
if subcommand in archive_cmds:
|
||||
if cmd_requires_db:
|
||||
check_migrations()
|
||||
|
||||
module = import_module('.archivebox_{}'.format(subcommand), __package__)
|
||||
module.main(args=subcommand_args, stdin=stdin, pwd=pwd) # type: ignore
|
||||
|
||||
# wait for webhooks, signals, and other background jobs to finish before exit
|
||||
wait_for_bg_threads_to_exit(timeout=60)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class NotProvided:
|
||||
def __len__(self):
|
||||
return 0
|
||||
def __bool__(self):
|
||||
return False
|
||||
def __repr__(self):
|
||||
return '<not provided>'
|
||||
|
||||
Omitted = Union[None, NotProvided]
|
||||
|
||||
OMITTED = NotProvided()
|
||||
|
||||
|
||||
def main(args: List[str] | Omitted=OMITTED, stdin: IO | Omitted=OMITTED, pwd: str | None=None) -> None:
|
||||
# print('STARTING CLI MAIN ENTRYPOINT')
|
||||
|
||||
args = sys.argv[1:] if args is OMITTED else args
|
||||
stdin = sys.stdin if stdin is OMITTED else stdin
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog=__command__,
|
||||
description='ArchiveBox: The self-hosted internet archive',
|
||||
add_help=False,
|
||||
)
|
||||
group = parser.add_mutually_exclusive_group()
|
||||
group.add_argument(
|
||||
'--help', '-h',
|
||||
action='store_true',
|
||||
help=CLI_SUBCOMMANDS['help'].__doc__,
|
||||
)
|
||||
group.add_argument(
|
||||
'--version',
|
||||
action='store_true',
|
||||
help=CLI_SUBCOMMANDS['version'].__doc__,
|
||||
)
|
||||
group.add_argument(
|
||||
"subcommand",
|
||||
type=str,
|
||||
help= "The name of the subcommand to run",
|
||||
nargs='?',
|
||||
choices=CLI_SUBCOMMANDS.keys(),
|
||||
default=None,
|
||||
)
|
||||
parser.add_argument(
|
||||
"subcommand_args",
|
||||
help="Arguments for the subcommand",
|
||||
nargs=argparse.REMAINDER,
|
||||
)
|
||||
command = parser.parse_args(args or ())
|
||||
|
||||
if command.version:
|
||||
command.subcommand = 'version'
|
||||
elif command.help or command.subcommand is None:
|
||||
command.subcommand = 'help'
|
||||
|
||||
if command.subcommand not in ('version',):
|
||||
from archivebox.misc.logging_util import log_cli_command
|
||||
|
||||
log_cli_command(
|
||||
subcommand=command.subcommand,
|
||||
subcommand_args=command.subcommand_args,
|
||||
stdin=stdin or None,
|
||||
)
|
||||
def main(args=None, prog_name=None):
|
||||
# show `docker run archivebox xyz` in help messages if running in docker
|
||||
IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE', 'yes')
|
||||
prog_name = prog_name or ('docker compose run archivebox' if IN_DOCKER else 'archivebox')
|
||||
|
||||
try:
|
||||
run_subcommand(
|
||||
subcommand=command.subcommand,
|
||||
subcommand_args=command.subcommand_args,
|
||||
stdin=stdin or None,
|
||||
)
|
||||
cli(args=args, prog_name=prog_name)
|
||||
except KeyboardInterrupt:
|
||||
print('\n\n[red][X] Got CTRL+C. Exiting...[/red]')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue