mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 06:34:25 -04:00
speed up startup time, add rich startup progressbar, split logging and checks into misc, fix search index import backend bug
This commit is contained in:
parent
7ffb81f61b
commit
64c7100cf9
22 changed files with 566 additions and 762 deletions
|
@ -1,5 +1,7 @@
|
||||||
__package__ = 'archivebox'
|
__package__ = 'archivebox'
|
||||||
|
|
||||||
|
# print('INSTALLING MONKEY PATCHES')
|
||||||
|
|
||||||
from .monkey_patches import *
|
from .monkey_patches import *
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
@ -28,3 +30,5 @@ def _detect_installed_version():
|
||||||
|
|
||||||
|
|
||||||
__version__ = _detect_installed_version()
|
__version__ = _detect_installed_version()
|
||||||
|
|
||||||
|
# print('DONE INSTALLING MONKEY PATCHES')
|
||||||
|
|
|
@ -1,16 +1,20 @@
|
||||||
__package__ = 'archivebox.cli'
|
__package__ = 'archivebox.cli'
|
||||||
__command__ = 'archivebox'
|
__command__ = 'archivebox'
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
import sys
|
||||||
import argparse
|
import argparse
|
||||||
import threading
|
import threading
|
||||||
from time import sleep
|
import archivebox
|
||||||
|
|
||||||
from typing import Optional, Dict, List, IO, Union, Iterable
|
from time import sleep
|
||||||
|
from collections.abc import Mapping
|
||||||
|
|
||||||
|
from typing import Optional, List, IO, Union, Iterable
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from ..config import OUTPUT_DIR, check_data_folder, check_migrations, stderr
|
|
||||||
|
from ..misc.checks import check_data_folder, check_migrations
|
||||||
|
from ..misc.logging import stderr
|
||||||
|
|
||||||
from importlib import import_module
|
from importlib import import_module
|
||||||
|
|
||||||
|
@ -18,13 +22,46 @@ BUILTIN_LIST = list
|
||||||
|
|
||||||
CLI_DIR = Path(__file__).resolve().parent
|
CLI_DIR = Path(__file__).resolve().parent
|
||||||
|
|
||||||
# these common commands will appear sorted before any others for ease-of-use
|
|
||||||
meta_cmds = ('help', 'version') # dont require valid data folder at all
|
|
||||||
main_cmds = ('init', 'config', 'setup') # dont require existing db present
|
|
||||||
archive_cmds = ('add', 'remove', 'update', 'list', 'status') # require existing db present
|
|
||||||
fake_db = ("oneshot",) # use fake in-memory db
|
|
||||||
|
|
||||||
display_first = (*meta_cmds, *main_cmds, *archive_cmds)
|
# def list_subcommands() -> Dict[str, str]:
|
||||||
|
# """find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
|
||||||
|
# COMMANDS = []
|
||||||
|
# for filename in os.listdir(CLI_DIR):
|
||||||
|
# if is_cli_module(filename):
|
||||||
|
# subcommand = filename.replace('archivebox_', '').replace('.py', '')
|
||||||
|
# module = import_module('.archivebox_{}'.format(subcommand), __package__)
|
||||||
|
# assert is_valid_cli_module(module, subcommand)
|
||||||
|
# COMMANDS.append((subcommand, module.main.__doc__))
|
||||||
|
# globals()[subcommand] = module.main
|
||||||
|
# display_order = lambda cmd: (
|
||||||
|
# display_first.index(cmd[0])
|
||||||
|
# if cmd[0] in display_first else
|
||||||
|
# 100 + len(cmd[0])
|
||||||
|
# )
|
||||||
|
# return dict(sorted(COMMANDS, key=display_order))
|
||||||
|
|
||||||
|
# just define it statically, it's much faster:
|
||||||
|
SUBCOMMAND_MODULES = {
|
||||||
|
'help': 'archivebox_help',
|
||||||
|
'version': 'archivebox_version' ,
|
||||||
|
|
||||||
|
'init': 'archivebox_init',
|
||||||
|
'config': 'archivebox_config',
|
||||||
|
'setup': 'archivebox_setup',
|
||||||
|
|
||||||
|
'add': 'archivebox_add',
|
||||||
|
'remove': 'archivebox_remove',
|
||||||
|
'update': 'archivebox_update',
|
||||||
|
'list': 'archivebox_list',
|
||||||
|
'status': 'archivebox_status',
|
||||||
|
|
||||||
|
'schedule': 'archivebox_schedule',
|
||||||
|
'server': 'archivebox_server',
|
||||||
|
'shell': 'archivebox_shell',
|
||||||
|
'manage': 'archivebox_manage',
|
||||||
|
|
||||||
|
'oneshot': 'archivebox_oneshot',
|
||||||
|
}
|
||||||
|
|
||||||
# every imported command module must have these properties in order to be valid
|
# every imported command module must have these properties in order to be valid
|
||||||
required_attrs = ('__package__', '__command__', 'main')
|
required_attrs = ('__package__', '__command__', 'main')
|
||||||
|
@ -36,6 +73,38 @@ is_valid_cli_module = lambda module, subcommand: (
|
||||||
and module.__command__.split(' ')[-1] == subcommand
|
and module.__command__.split(' ')[-1] == subcommand
|
||||||
)
|
)
|
||||||
|
|
||||||
|
class LazySubcommands(Mapping):
|
||||||
|
def keys(self):
|
||||||
|
return SUBCOMMAND_MODULES.keys()
|
||||||
|
|
||||||
|
def values(self):
|
||||||
|
return [self[key] for key in self.keys()]
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
return [(key, self[key]) for key in self.keys()]
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
module = import_module(f'.{SUBCOMMAND_MODULES[key]}', __package__)
|
||||||
|
assert is_valid_cli_module(module, key)
|
||||||
|
return module.main
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter(SUBCOMMAND_MODULES.keys())
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(SUBCOMMAND_MODULES)
|
||||||
|
|
||||||
|
CLI_SUBCOMMANDS = LazySubcommands()
|
||||||
|
|
||||||
|
|
||||||
|
# these common commands will appear sorted before any others for ease-of-use
|
||||||
|
meta_cmds = ('help', 'version') # dont require valid data folder at all
|
||||||
|
main_cmds = ('init', 'config', 'setup') # dont require existing db present
|
||||||
|
archive_cmds = ('add', 'remove', 'update', 'list', 'status') # require existing db present
|
||||||
|
fake_db = ("oneshot",) # use fake in-memory db
|
||||||
|
|
||||||
|
display_first = (*meta_cmds, *main_cmds, *archive_cmds)
|
||||||
|
|
||||||
|
|
||||||
IGNORED_BG_THREADS = ('MainThread', 'ThreadPoolExecutor', 'IPythonHistorySavingThread', 'Scheduler') # threads we dont have to wait for before exiting
|
IGNORED_BG_THREADS = ('MainThread', 'ThreadPoolExecutor', 'IPythonHistorySavingThread', 'Scheduler') # threads we dont have to wait for before exiting
|
||||||
|
|
||||||
|
@ -71,29 +140,9 @@ def wait_for_bg_threads_to_exit(thread_names: Iterable[str]=(), ignore_names: It
|
||||||
raise Exception(f'Background threads failed to exit after {tries}s: {threads_summary}')
|
raise Exception(f'Background threads failed to exit after {tries}s: {threads_summary}')
|
||||||
|
|
||||||
|
|
||||||
def list_subcommands() -> Dict[str, str]:
|
|
||||||
"""find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
|
|
||||||
|
|
||||||
COMMANDS = []
|
|
||||||
for filename in os.listdir(CLI_DIR):
|
|
||||||
if is_cli_module(filename):
|
|
||||||
subcommand = filename.replace('archivebox_', '').replace('.py', '')
|
|
||||||
module = import_module('.archivebox_{}'.format(subcommand), __package__)
|
|
||||||
assert is_valid_cli_module(module, subcommand)
|
|
||||||
COMMANDS.append((subcommand, module.main.__doc__))
|
|
||||||
globals()[subcommand] = module.main
|
|
||||||
|
|
||||||
display_order = lambda cmd: (
|
|
||||||
display_first.index(cmd[0])
|
|
||||||
if cmd[0] in display_first else
|
|
||||||
100 + len(cmd[0])
|
|
||||||
)
|
|
||||||
|
|
||||||
return dict(sorted(COMMANDS, key=display_order))
|
|
||||||
|
|
||||||
|
|
||||||
def run_subcommand(subcommand: str,
|
def run_subcommand(subcommand: str,
|
||||||
subcommand_args: List[str]=None,
|
subcommand_args: List[str] | None = None,
|
||||||
stdin: Optional[IO]=None,
|
stdin: Optional[IO]=None,
|
||||||
pwd: Union[Path, str, None]=None) -> None:
|
pwd: Union[Path, str, None]=None) -> None:
|
||||||
"""Run a given ArchiveBox subcommand with the given list of args"""
|
"""Run a given ArchiveBox subcommand with the given list of args"""
|
||||||
|
@ -101,18 +150,18 @@ def run_subcommand(subcommand: str,
|
||||||
subcommand_args = subcommand_args or []
|
subcommand_args = subcommand_args or []
|
||||||
|
|
||||||
if subcommand not in meta_cmds:
|
if subcommand not in meta_cmds:
|
||||||
from ..config import setup_django
|
from ..config import setup_django, CONFIG
|
||||||
|
|
||||||
cmd_requires_db = subcommand in archive_cmds
|
cmd_requires_db = subcommand in archive_cmds
|
||||||
init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
|
init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
|
||||||
|
|
||||||
if cmd_requires_db:
|
if cmd_requires_db:
|
||||||
check_data_folder(pwd)
|
check_data_folder(CONFIG)
|
||||||
|
|
||||||
setup_django(in_memory_db=subcommand in fake_db, check_db=cmd_requires_db and not init_pending)
|
setup_django(in_memory_db=subcommand in fake_db, check_db=cmd_requires_db and not init_pending)
|
||||||
|
|
||||||
if cmd_requires_db:
|
if cmd_requires_db:
|
||||||
check_migrations()
|
check_migrations(CONFIG)
|
||||||
|
|
||||||
module = import_module('.archivebox_{}'.format(subcommand), __package__)
|
module = import_module('.archivebox_{}'.format(subcommand), __package__)
|
||||||
module.main(args=subcommand_args, stdin=stdin, pwd=pwd) # type: ignore
|
module.main(args=subcommand_args, stdin=stdin, pwd=pwd) # type: ignore
|
||||||
|
@ -121,17 +170,28 @@ def run_subcommand(subcommand: str,
|
||||||
wait_for_bg_threads_to_exit(timeout=60)
|
wait_for_bg_threads_to_exit(timeout=60)
|
||||||
|
|
||||||
|
|
||||||
SUBCOMMANDS = list_subcommands()
|
|
||||||
|
|
||||||
|
|
||||||
class NotProvided:
|
class NotProvided:
|
||||||
pass
|
def __len__(self):
|
||||||
|
return 0
|
||||||
|
def __bool__(self):
|
||||||
|
return False
|
||||||
|
def __repr__(self):
|
||||||
|
return '<not provided>'
|
||||||
|
|
||||||
|
Omitted = Union[None, NotProvided]
|
||||||
|
|
||||||
|
OMITTED = NotProvided()
|
||||||
|
|
||||||
|
|
||||||
def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided, pwd: Optional[str]=None) -> None:
|
def main(args: List[str] | Omitted=OMITTED, stdin: IO | Omitted=OMITTED, pwd: str | None=None) -> None:
|
||||||
args = sys.argv[1:] if args is NotProvided else args
|
# print('STARTING CLI MAIN ENTRYPOINT')
|
||||||
stdin = sys.stdin if stdin is NotProvided else stdin
|
|
||||||
|
args = sys.argv[1:] if args is OMITTED else args
|
||||||
|
stdin = sys.stdin if stdin is OMITTED else stdin
|
||||||
|
|
||||||
subcommands = list_subcommands()
|
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
prog=__command__,
|
prog=__command__,
|
||||||
description='ArchiveBox: The self-hosted internet archive',
|
description='ArchiveBox: The self-hosted internet archive',
|
||||||
|
@ -141,19 +201,19 @@ def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided,
|
||||||
group.add_argument(
|
group.add_argument(
|
||||||
'--help', '-h',
|
'--help', '-h',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help=subcommands['help'],
|
help=CLI_SUBCOMMANDS['help'].__doc__,
|
||||||
)
|
)
|
||||||
group.add_argument(
|
group.add_argument(
|
||||||
'--version',
|
'--version',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help=subcommands['version'],
|
help=CLI_SUBCOMMANDS['version'].__doc__,
|
||||||
)
|
)
|
||||||
group.add_argument(
|
group.add_argument(
|
||||||
"subcommand",
|
"subcommand",
|
||||||
type=str,
|
type=str,
|
||||||
help= "The name of the subcommand to run",
|
help= "The name of the subcommand to run",
|
||||||
nargs='?',
|
nargs='?',
|
||||||
choices=subcommands.keys(),
|
choices=CLI_SUBCOMMANDS.keys(),
|
||||||
default=None,
|
default=None,
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
@ -174,23 +234,13 @@ def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided,
|
||||||
log_cli_command(
|
log_cli_command(
|
||||||
subcommand=command.subcommand,
|
subcommand=command.subcommand,
|
||||||
subcommand_args=command.subcommand_args,
|
subcommand_args=command.subcommand_args,
|
||||||
stdin=stdin,
|
stdin=stdin or None,
|
||||||
pwd=pwd or OUTPUT_DIR
|
pwd=pwd or archivebox.DATA_DIR,
|
||||||
)
|
)
|
||||||
|
|
||||||
run_subcommand(
|
run_subcommand(
|
||||||
subcommand=command.subcommand,
|
subcommand=command.subcommand,
|
||||||
subcommand_args=command.subcommand_args,
|
subcommand_args=command.subcommand_args,
|
||||||
stdin=stdin,
|
stdin=stdin or None,
|
||||||
pwd=pwd or OUTPUT_DIR,
|
pwd=pwd or archivebox.DATA_DIR,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
__all__ = (
|
|
||||||
'SUBCOMMANDS',
|
|
||||||
'list_subcommands',
|
|
||||||
'run_subcommand',
|
|
||||||
*SUBCOMMANDS.keys(),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -28,21 +28,19 @@ import sys
|
||||||
import json
|
import json
|
||||||
import inspect
|
import inspect
|
||||||
import getpass
|
import getpass
|
||||||
import platform
|
|
||||||
import shutil
|
import shutil
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from benedict import benedict
|
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Optional, Type, Tuple, Dict, Union, List
|
from typing import Optional, Type, Tuple, Dict
|
||||||
from subprocess import run, PIPE, DEVNULL, STDOUT, TimeoutExpired
|
from subprocess import run, PIPE, DEVNULL, STDOUT, TimeoutExpired
|
||||||
from configparser import ConfigParser
|
from configparser import ConfigParser
|
||||||
from collections import defaultdict
|
|
||||||
import importlib.metadata
|
import importlib.metadata
|
||||||
|
|
||||||
from pydantic_pkgr import SemVer
|
from pydantic_pkgr import SemVer
|
||||||
|
from rich.progress import Progress
|
||||||
|
|
||||||
import django
|
import django
|
||||||
from django.db.backends.sqlite3.base import Database as sqlite3
|
from django.db.backends.sqlite3.base import Database as sqlite3
|
||||||
|
@ -56,6 +54,17 @@ from .config_stubs import (
|
||||||
ConfigDefaultDict,
|
ConfigDefaultDict,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from .misc.logging import (
|
||||||
|
CONSOLE,
|
||||||
|
SHOW_PROGRESS,
|
||||||
|
DEFAULT_CLI_COLORS,
|
||||||
|
ANSI,
|
||||||
|
COLOR_DICT,
|
||||||
|
stderr,
|
||||||
|
hint,
|
||||||
|
)
|
||||||
|
from .misc.checks import check_system_config
|
||||||
|
|
||||||
# print('STARTING CONFIG LOADING')
|
# print('STARTING CONFIG LOADING')
|
||||||
|
|
||||||
# load fallback libraries from vendor dir
|
# load fallback libraries from vendor dir
|
||||||
|
@ -70,7 +79,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
||||||
'SHELL_CONFIG': {
|
'SHELL_CONFIG': {
|
||||||
'IS_TTY': {'type': bool, 'default': lambda _: sys.stdout.isatty()},
|
'IS_TTY': {'type': bool, 'default': lambda _: sys.stdout.isatty()},
|
||||||
'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']},
|
'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']},
|
||||||
'SHOW_PROGRESS': {'type': bool, 'default': lambda c: (c['IS_TTY'] and platform.system() != 'Darwin')}, # progress bars are buggy on mac, disable for now
|
'SHOW_PROGRESS': {'type': bool, 'default': lambda c: c['IS_TTY']}, # progress bars are buggy on mac, disable for now
|
||||||
'IN_DOCKER': {'type': bool, 'default': False},
|
'IN_DOCKER': {'type': bool, 'default': False},
|
||||||
'IN_QEMU': {'type': bool, 'default': False},
|
'IN_QEMU': {'type': bool, 'default': False},
|
||||||
'PUID': {'type': int, 'default': os.getuid()},
|
'PUID': {'type': int, 'default': os.getuid()},
|
||||||
|
@ -306,32 +315,7 @@ ROBOTS_TXT_FILENAME = 'robots.txt'
|
||||||
FAVICON_FILENAME = 'favicon.ico'
|
FAVICON_FILENAME = 'favicon.ico'
|
||||||
CONFIG_FILENAME = 'ArchiveBox.conf'
|
CONFIG_FILENAME = 'ArchiveBox.conf'
|
||||||
|
|
||||||
DEFAULT_CLI_COLORS = benedict(
|
|
||||||
{
|
|
||||||
"reset": "\033[00;00m",
|
|
||||||
"lightblue": "\033[01;30m",
|
|
||||||
"lightyellow": "\033[01;33m",
|
|
||||||
"lightred": "\033[01;35m",
|
|
||||||
"red": "\033[01;31m",
|
|
||||||
"green": "\033[01;32m",
|
|
||||||
"blue": "\033[01;34m",
|
|
||||||
"white": "\033[01;37m",
|
|
||||||
"black": "\033[01;30m",
|
|
||||||
}
|
|
||||||
)
|
|
||||||
ANSI = AttrDict({k: '' for k in DEFAULT_CLI_COLORS.keys()})
|
|
||||||
|
|
||||||
COLOR_DICT = defaultdict(lambda: [(0, 0, 0), (0, 0, 0)], {
|
|
||||||
'00': [(0, 0, 0), (0, 0, 0)],
|
|
||||||
'30': [(0, 0, 0), (0, 0, 0)],
|
|
||||||
'31': [(255, 0, 0), (128, 0, 0)],
|
|
||||||
'32': [(0, 200, 0), (0, 128, 0)],
|
|
||||||
'33': [(255, 255, 0), (128, 128, 0)],
|
|
||||||
'34': [(0, 0, 255), (0, 0, 128)],
|
|
||||||
'35': [(255, 0, 255), (128, 0, 128)],
|
|
||||||
'36': [(0, 255, 255), (0, 128, 128)],
|
|
||||||
'37': [(255, 255, 255), (255, 255, 255)],
|
|
||||||
})
|
|
||||||
|
|
||||||
STATICFILE_EXTENSIONS = {
|
STATICFILE_EXTENSIONS = {
|
||||||
# 99.999% of the time, URLs ending in these extensions are static files
|
# 99.999% of the time, URLs ending in these extensions are static files
|
||||||
|
@ -880,37 +864,6 @@ def parse_version_string(version: str) -> Tuple[int, int, int]:
|
||||||
return tuple(int(part) for part in base.split('.'))[:3]
|
return tuple(int(part) for part in base.split('.'))[:3]
|
||||||
|
|
||||||
|
|
||||||
# Logging Helpers
|
|
||||||
def stdout(*args, color: Optional[str]=None, prefix: str='', config: Optional[ConfigDict]=None) -> None:
|
|
||||||
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
|
||||||
|
|
||||||
if color:
|
|
||||||
strs = [ansi[color], ' '.join(str(a) for a in args), ansi['reset'], '\n']
|
|
||||||
else:
|
|
||||||
strs = [' '.join(str(a) for a in args), '\n']
|
|
||||||
|
|
||||||
sys.stdout.write(prefix + ''.join(strs))
|
|
||||||
|
|
||||||
def stderr(*args, color: Optional[str]=None, prefix: str='', config: Optional[ConfigDict]=None) -> None:
|
|
||||||
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
|
||||||
|
|
||||||
if color:
|
|
||||||
strs = [ansi[color], ' '.join(str(a) for a in args), ansi['reset'], '\n']
|
|
||||||
else:
|
|
||||||
strs = [' '.join(str(a) for a in args), '\n']
|
|
||||||
|
|
||||||
sys.stderr.write(prefix + ''.join(strs))
|
|
||||||
|
|
||||||
def hint(text: Union[Tuple[str, ...], List[str], str], prefix=' ', config: Optional[ConfigDict]=None) -> None:
|
|
||||||
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
|
||||||
|
|
||||||
if isinstance(text, str):
|
|
||||||
stderr('{}{lightred}Hint:{reset} {}'.format(prefix, text, **ansi))
|
|
||||||
else:
|
|
||||||
stderr('{}{lightred}Hint:{reset} {}'.format(prefix, text[0], **ansi))
|
|
||||||
for line in text[1:]:
|
|
||||||
stderr('{} {}'.format(prefix, line))
|
|
||||||
|
|
||||||
|
|
||||||
# Dependency Metadata Helpers
|
# Dependency Metadata Helpers
|
||||||
def bin_version(binary: Optional[str], cmd: Optional[str]=None, timeout: int=3) -> Optional[str]:
|
def bin_version(binary: Optional[str], cmd: Optional[str]=None, timeout: int=3) -> Optional[str]:
|
||||||
|
@ -920,6 +873,10 @@ def bin_version(binary: Optional[str], cmd: Optional[str]=None, timeout: int=3)
|
||||||
if not binary or not abspath:
|
if not binary or not abspath:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
return '999.999.999'
|
||||||
|
|
||||||
|
# Now handled by new BinProvider plugin system, no longer needed:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
bin_env = os.environ | {'LANG': 'C'}
|
bin_env = os.environ | {'LANG': 'C'}
|
||||||
is_cmd_str = cmd and isinstance(cmd, str)
|
is_cmd_str = cmd and isinstance(cmd, str)
|
||||||
|
@ -960,6 +917,9 @@ def bin_path(binary: Optional[str]) -> Optional[str]:
|
||||||
return shutil.which(str(Path(binary).expanduser())) or shutil.which(str(binary)) or binary
|
return shutil.which(str(Path(binary).expanduser())) or shutil.which(str(binary)) or binary
|
||||||
|
|
||||||
def bin_hash(binary: Optional[str]) -> Optional[str]:
|
def bin_hash(binary: Optional[str]) -> Optional[str]:
|
||||||
|
return 'UNUSED'
|
||||||
|
# DEPRECATED: now handled by new BinProvider plugin system, no longer needed:
|
||||||
|
|
||||||
if binary is None:
|
if binary is None:
|
||||||
return None
|
return None
|
||||||
abs_path = bin_path(binary)
|
abs_path = bin_path(binary)
|
||||||
|
@ -1329,163 +1289,28 @@ if not CONFIG['CHECK_SSL_VALIDITY']:
|
||||||
|
|
||||||
########################### Config Validity Checkers ###########################
|
########################### Config Validity Checkers ###########################
|
||||||
|
|
||||||
|
INITIAL_STARTUP_PROGRESS = None
|
||||||
|
INITIAL_STARTUP_PROGRESS_TASK = 0
|
||||||
|
|
||||||
def check_system_config(config: ConfigDict=CONFIG) -> None:
|
def bump_startup_progress_bar():
|
||||||
### Check system environment
|
global INITIAL_STARTUP_PROGRESS
|
||||||
if config['USER'] == 'root' or str(config['PUID']) == "0":
|
global INITIAL_STARTUP_PROGRESS_TASK
|
||||||
stderr('[!] ArchiveBox should never be run as root!', color='red')
|
if INITIAL_STARTUP_PROGRESS:
|
||||||
stderr(' For more information, see the security overview documentation:')
|
INITIAL_STARTUP_PROGRESS.update(INITIAL_STARTUP_PROGRESS_TASK, advance=1) # type: ignore
|
||||||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root')
|
|
||||||
|
|
||||||
if config['IN_DOCKER']:
|
|
||||||
attempted_command = ' '.join(sys.argv[:3])
|
|
||||||
stderr('')
|
|
||||||
stderr(' {lightred}Hint{reset}: When using Docker, you must run commands with {green}docker run{reset} instead of {lightyellow}docker exec{reset}, e.g.:'.format(**config['ANSI']))
|
|
||||||
stderr(f' docker compose run archivebox {attempted_command}')
|
|
||||||
stderr(f' docker run -it -v $PWD/data:/data archivebox/archivebox {attempted_command}')
|
|
||||||
stderr(' or:')
|
|
||||||
stderr(f' docker compose exec --user=archivebox archivebox /bin/bash -c "archivebox {attempted_command}"')
|
|
||||||
stderr(f' docker exec -it --user=archivebox <container id> /bin/bash -c "archivebox {attempted_command}"')
|
|
||||||
|
|
||||||
raise SystemExit(2)
|
|
||||||
|
|
||||||
### Check Python environment
|
|
||||||
if sys.version_info[:3] < (3, 7, 0):
|
|
||||||
stderr(f'[X] Python version is not new enough: {config["PYTHON_VERSION"]} (>3.6 is required)', color='red')
|
|
||||||
stderr(' See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
|
|
||||||
raise SystemExit(2)
|
|
||||||
|
|
||||||
if int(CONFIG['DJANGO_VERSION'].split('.')[0]) < 3:
|
|
||||||
stderr(f'[X] Django version is not new enough: {config["DJANGO_VERSION"]} (>3.0 is required)', color='red')
|
|
||||||
stderr(' Upgrade django using pip or your system package manager: pip3 install --upgrade django')
|
|
||||||
raise SystemExit(2)
|
|
||||||
|
|
||||||
if config['PYTHON_ENCODING'] not in ('UTF-8', 'UTF8'):
|
|
||||||
stderr(f'[X] Your system is running python3 scripts with a bad locale setting: {config["PYTHON_ENCODING"]} (it should be UTF-8).', color='red')
|
|
||||||
stderr(' To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)')
|
|
||||||
stderr(' Or if you\'re using ubuntu/debian, run "dpkg-reconfigure locales"')
|
|
||||||
stderr('')
|
|
||||||
stderr(' Confirm that it\'s fixed by opening a new shell and running:')
|
|
||||||
stderr(' python3 -c "import sys; print(sys.stdout.encoding)" # should output UTF-8')
|
|
||||||
raise SystemExit(2)
|
|
||||||
|
|
||||||
# stderr('[i] Using Chrome binary: {}'.format(shutil.which(CHROME_BINARY) or CHROME_BINARY))
|
|
||||||
# stderr('[i] Using Chrome data dir: {}'.format(os.path.abspath(CHROME_USER_DATA_DIR)))
|
|
||||||
if config['CHROME_USER_DATA_DIR'] is not None and Path(config['CHROME_USER_DATA_DIR']).exists():
|
|
||||||
if not (Path(config['CHROME_USER_DATA_DIR']) / 'Default').exists():
|
|
||||||
stderr('[X] Could not find profile "Default" in CHROME_USER_DATA_DIR.', color='red')
|
|
||||||
stderr(f' {config["CHROME_USER_DATA_DIR"]}')
|
|
||||||
stderr(' Make sure you set it to a Chrome user data directory containing a Default profile folder.')
|
|
||||||
stderr(' For more info see:')
|
|
||||||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#CHROME_USER_DATA_DIR')
|
|
||||||
if '/Default' in str(config['CHROME_USER_DATA_DIR']):
|
|
||||||
stderr()
|
|
||||||
stderr(' Try removing /Default from the end e.g.:')
|
|
||||||
stderr(' CHROME_USER_DATA_DIR="{}"'.format(str(config['CHROME_USER_DATA_DIR']).split('/Default')[0]))
|
|
||||||
|
|
||||||
# hard error is too annoying here, instead just set it to nothing
|
|
||||||
# raise SystemExit(2)
|
|
||||||
config['CHROME_USER_DATA_DIR'] = None
|
|
||||||
else:
|
|
||||||
config['CHROME_USER_DATA_DIR'] = None
|
|
||||||
|
|
||||||
|
|
||||||
def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
|
|
||||||
invalid_dependencies = [
|
|
||||||
(name, info) for name, info in config['DEPENDENCIES'].items()
|
|
||||||
if info['enabled'] and not info['is_valid']
|
|
||||||
]
|
|
||||||
if invalid_dependencies and show_help:
|
|
||||||
stderr(f'[!] Warning: Missing {len(invalid_dependencies)} recommended dependencies', color='lightyellow')
|
|
||||||
for dependency, info in invalid_dependencies:
|
|
||||||
stderr(
|
|
||||||
' ! {}: {} ({})'.format(
|
|
||||||
dependency,
|
|
||||||
info['path'] or 'unable to find binary',
|
|
||||||
info['version'] or 'unable to detect version',
|
|
||||||
)
|
|
||||||
)
|
|
||||||
if dependency in ('YOUTUBEDL_BINARY', 'CHROME_BINARY', 'SINGLEFILE_BINARY', 'READABILITY_BINARY', 'MERCURY_BINARY'):
|
|
||||||
hint(('To install all packages automatically run: archivebox setup',
|
|
||||||
f'or to disable it and silence this warning: archivebox config --set SAVE_{dependency.rsplit("_", 1)[0]}=False',
|
|
||||||
''), prefix=' ')
|
|
||||||
stderr('')
|
|
||||||
|
|
||||||
if config['TIMEOUT'] < 5:
|
|
||||||
stderr(f'[!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={config["TIMEOUT"]} seconds)', color='red')
|
|
||||||
stderr(' You must allow *at least* 5 seconds for indexing and archive methods to run succesfully.')
|
|
||||||
stderr(' (Setting it to somewhere between 30 and 3000 seconds is recommended)')
|
|
||||||
stderr()
|
|
||||||
stderr(' If you want to make ArchiveBox run faster, disable specific archive methods instead:')
|
|
||||||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles')
|
|
||||||
stderr()
|
|
||||||
|
|
||||||
elif config['USE_CHROME'] and config['TIMEOUT'] < 15:
|
|
||||||
stderr(f'[!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={config["TIMEOUT"]} seconds)', color='red')
|
|
||||||
stderr(' Chrome will fail to archive all sites if set to less than ~15 seconds.')
|
|
||||||
stderr(' (Setting it to somewhere between 30 and 300 seconds is recommended)')
|
|
||||||
stderr()
|
|
||||||
stderr(' If you want to make ArchiveBox run faster, disable specific archive methods instead:')
|
|
||||||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles')
|
|
||||||
stderr()
|
|
||||||
|
|
||||||
if config['USE_YOUTUBEDL'] and config['MEDIA_TIMEOUT'] < 20:
|
|
||||||
stderr(f'[!] Warning: MEDIA_TIMEOUT is set too low! (currently set to MEDIA_TIMEOUT={config["MEDIA_TIMEOUT"]} seconds)', color='red')
|
|
||||||
stderr(' youtube-dl/yt-dlp will fail to archive any media if set to less than ~20 seconds.')
|
|
||||||
stderr(' (Setting it somewhere over 60 seconds is recommended)')
|
|
||||||
stderr()
|
|
||||||
stderr(' If you want to disable media archiving entirely, set SAVE_MEDIA=False instead:')
|
|
||||||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_media')
|
|
||||||
stderr()
|
|
||||||
|
|
||||||
|
|
||||||
def check_data_folder(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG) -> None:
|
|
||||||
output_dir = out_dir or config['OUTPUT_DIR']
|
|
||||||
assert isinstance(output_dir, (str, Path))
|
|
||||||
|
|
||||||
archive_dir_exists = (Path(output_dir) / ARCHIVE_DIR_NAME).exists()
|
|
||||||
if not archive_dir_exists:
|
|
||||||
stderr('[X] No archivebox index found in the current directory.', color='red')
|
|
||||||
stderr(f' {output_dir}', color='lightyellow')
|
|
||||||
stderr()
|
|
||||||
stderr(' {lightred}Hint{reset}: Are you running archivebox in the right folder?'.format(**config['ANSI']))
|
|
||||||
stderr(' cd path/to/your/archive/folder')
|
|
||||||
stderr(' archivebox [command]')
|
|
||||||
stderr()
|
|
||||||
stderr(' {lightred}Hint{reset}: To create a new archive collection or import existing data in this folder, run:'.format(**config['ANSI']))
|
|
||||||
stderr(' archivebox init')
|
|
||||||
raise SystemExit(2)
|
|
||||||
|
|
||||||
|
|
||||||
def check_migrations(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG):
|
|
||||||
output_dir = out_dir or config['OUTPUT_DIR']
|
|
||||||
from .index.sql import list_migrations
|
|
||||||
|
|
||||||
pending_migrations = [name for status, name in list_migrations() if not status]
|
|
||||||
|
|
||||||
if pending_migrations:
|
|
||||||
stderr('[X] This collection was created with an older version of ArchiveBox and must be upgraded first.', color='lightyellow')
|
|
||||||
stderr(f' {output_dir}')
|
|
||||||
stderr()
|
|
||||||
stderr(f' To upgrade it to the latest version and apply the {len(pending_migrations)} pending migrations, run:')
|
|
||||||
stderr(' archivebox init')
|
|
||||||
raise SystemExit(3)
|
|
||||||
|
|
||||||
(Path(output_dir) / SOURCES_DIR_NAME).mkdir(exist_ok=True)
|
|
||||||
(Path(output_dir) / LOGS_DIR_NAME).mkdir(exist_ok=True)
|
|
||||||
(Path(output_dir) / CACHE_DIR_NAME).mkdir(exist_ok=True)
|
|
||||||
(Path(output_dir) / LIB_DIR_NAME / 'bin').mkdir(exist_ok=True, parents=True)
|
|
||||||
(Path(output_dir) / PERSONAS_DIR_NAME / 'Default').mkdir(exist_ok=True, parents=True)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
|
def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
|
||||||
check_system_config()
|
global INITIAL_STARTUP_PROGRESS
|
||||||
|
global INITIAL_STARTUP_PROGRESS_TASK
|
||||||
|
|
||||||
|
with Progress(transient=True, expand=True, console=CONSOLE) as INITIAL_STARTUP_PROGRESS:
|
||||||
|
INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25)
|
||||||
|
check_system_config(config)
|
||||||
|
|
||||||
output_dir = out_dir or Path(config['OUTPUT_DIR'])
|
output_dir = out_dir or Path(config['OUTPUT_DIR'])
|
||||||
|
|
||||||
assert isinstance(output_dir, Path) and isinstance(config['PACKAGE_DIR'], Path)
|
assert isinstance(output_dir, Path) and isinstance(config['PACKAGE_DIR'], Path)
|
||||||
|
|
||||||
|
bump_startup_progress_bar()
|
||||||
try:
|
try:
|
||||||
from django.core.management import call_command
|
from django.core.management import call_command
|
||||||
|
|
||||||
|
@ -1505,18 +1330,24 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
|
||||||
'https://code.djangoproject.com/wiki/JSON1Extension'
|
'https://code.djangoproject.com/wiki/JSON1Extension'
|
||||||
])
|
])
|
||||||
|
|
||||||
|
bump_startup_progress_bar()
|
||||||
|
|
||||||
if in_memory_db:
|
if in_memory_db:
|
||||||
# some commands (e.g. oneshot) dont store a long-lived sqlite3 db file on disk.
|
# some commands (e.g. oneshot) dont store a long-lived sqlite3 db file on disk.
|
||||||
# in those cases we create a temporary in-memory db and run the migrations
|
# in those cases we create a temporary in-memory db and run the migrations
|
||||||
# immediately to get a usable in-memory-database at startup
|
# immediately to get a usable in-memory-database at startup
|
||||||
os.environ.setdefault("ARCHIVEBOX_DATABASE_NAME", ":memory:")
|
os.environ.setdefault("ARCHIVEBOX_DATABASE_NAME", ":memory:")
|
||||||
django.setup()
|
django.setup()
|
||||||
|
|
||||||
|
bump_startup_progress_bar()
|
||||||
call_command("migrate", interactive=False, verbosity=0)
|
call_command("migrate", interactive=False, verbosity=0)
|
||||||
else:
|
else:
|
||||||
# Otherwise use default sqlite3 file-based database and initialize django
|
# Otherwise use default sqlite3 file-based database and initialize django
|
||||||
# without running migrations automatically (user runs them manually by calling init)
|
# without running migrations automatically (user runs them manually by calling init)
|
||||||
django.setup()
|
django.setup()
|
||||||
|
|
||||||
|
bump_startup_progress_bar()
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
|
||||||
# log startup message to the error log
|
# log startup message to the error log
|
||||||
|
@ -1547,6 +1378,8 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
|
||||||
except django.db.utils.OperationalError:
|
except django.db.utils.OperationalError:
|
||||||
call_command("createcachetable", verbosity=0)
|
call_command("createcachetable", verbosity=0)
|
||||||
|
|
||||||
|
bump_startup_progress_bar()
|
||||||
|
|
||||||
# if archivebox gets imported multiple times, we have to close
|
# if archivebox gets imported multiple times, we have to close
|
||||||
# the sqlite3 whenever we init from scratch to avoid multiple threads
|
# the sqlite3 whenever we init from scratch to avoid multiple threads
|
||||||
# sharing the same connection by accident
|
# sharing the same connection by accident
|
||||||
|
@ -1558,6 +1391,7 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
|
||||||
assert sql_index_path.exists(), (
|
assert sql_index_path.exists(), (
|
||||||
f'No database file {SQL_INDEX_FILENAME} found in: {config["OUTPUT_DIR"]} (Are you in an ArchiveBox collection directory?)')
|
f'No database file {SQL_INDEX_FILENAME} found in: {config["OUTPUT_DIR"]} (Are you in an ArchiveBox collection directory?)')
|
||||||
|
|
||||||
|
bump_startup_progress_bar()
|
||||||
|
|
||||||
# https://docs.pydantic.dev/logfire/integrations/django/ Logfire Debugging
|
# https://docs.pydantic.dev/logfire/integrations/django/ Logfire Debugging
|
||||||
if settings.DEBUG_LOGFIRE:
|
if settings.DEBUG_LOGFIRE:
|
||||||
|
@ -1572,3 +1406,6 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
raise SystemExit(2)
|
raise SystemExit(2)
|
||||||
|
|
||||||
|
INITIAL_STARTUP_PROGRESS = None
|
||||||
|
INITIAL_STARTUP_PROGRESS_TASK = None
|
||||||
|
|
|
@ -170,6 +170,7 @@ STATICFILES_DIRS = [
|
||||||
*[
|
*[
|
||||||
str(plugin_dir / 'static')
|
str(plugin_dir / 'static')
|
||||||
for plugin_dir in PLUGIN_DIRS.values()
|
for plugin_dir in PLUGIN_DIRS.values()
|
||||||
|
if (plugin_dir / 'static').is_dir()
|
||||||
],
|
],
|
||||||
str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME / 'static'),
|
str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME / 'static'),
|
||||||
]
|
]
|
||||||
|
@ -179,6 +180,7 @@ TEMPLATE_DIRS = [
|
||||||
*[
|
*[
|
||||||
str(plugin_dir / 'templates')
|
str(plugin_dir / 'templates')
|
||||||
for plugin_dir in PLUGIN_DIRS.values()
|
for plugin_dir in PLUGIN_DIRS.values()
|
||||||
|
if (plugin_dir / 'templates').is_dir()
|
||||||
],
|
],
|
||||||
str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME / 'core'),
|
str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME / 'core'),
|
||||||
str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME / 'admin'),
|
str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME / 'admin'),
|
||||||
|
|
|
@ -141,18 +141,22 @@ SETTINGS_LOGGING = {
|
||||||
"api": {
|
"api": {
|
||||||
"handlers": ["default", "logfile"],
|
"handlers": ["default", "logfile"],
|
||||||
"level": "DEBUG",
|
"level": "DEBUG",
|
||||||
|
"propagate": False,
|
||||||
},
|
},
|
||||||
"checks": {
|
"checks": {
|
||||||
"handlers": ["default", "logfile"],
|
"handlers": ["default", "logfile"],
|
||||||
"level": "DEBUG",
|
"level": "DEBUG",
|
||||||
|
"propagate": False,
|
||||||
},
|
},
|
||||||
"core": {
|
"core": {
|
||||||
"handlers": ["default", "logfile"],
|
"handlers": ["default", "logfile"],
|
||||||
"level": "DEBUG",
|
"level": "DEBUG",
|
||||||
|
"propagate": False,
|
||||||
},
|
},
|
||||||
"plugins_extractor": {
|
"plugins_extractor": {
|
||||||
"handlers": ["default", "logfile"],
|
"handlers": ["default", "logfile"],
|
||||||
"level": "DEBUG",
|
"level": "DEBUG",
|
||||||
|
"propagate": False,
|
||||||
},
|
},
|
||||||
"httpx": {
|
"httpx": {
|
||||||
"handlers": ["outbound_webhooks"],
|
"handlers": ["outbound_webhooks"],
|
||||||
|
@ -164,6 +168,7 @@ SETTINGS_LOGGING = {
|
||||||
"handlers": ["default", "logfile"],
|
"handlers": ["default", "logfile"],
|
||||||
"level": "INFO",
|
"level": "INFO",
|
||||||
"filters": ["noisyrequestsfilter"],
|
"filters": ["noisyrequestsfilter"],
|
||||||
|
"propagate": False,
|
||||||
},
|
},
|
||||||
"django.utils.autoreload": {
|
"django.utils.autoreload": {
|
||||||
"propagate": False,
|
"propagate": False,
|
||||||
|
|
|
@ -230,7 +230,7 @@ def progress_bar(seconds: int, prefix: str='') -> None:
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
|
||||||
def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional[str], pwd: str):
|
def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional[str | IO], pwd: str):
|
||||||
cmd = ' '.join(('archivebox', subcommand, *subcommand_args))
|
cmd = ' '.join(('archivebox', subcommand, *subcommand_args))
|
||||||
stderr('{black}[i] [{now}] ArchiveBox v{VERSION}: {cmd}{reset}'.format(
|
stderr('{black}[i] [{now}] ArchiveBox v{VERSION}: {cmd}{reset}'.format(
|
||||||
now=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'),
|
now=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'),
|
||||||
|
@ -526,11 +526,11 @@ def log_removal_finished(all_links: int, to_remove: int):
|
||||||
|
|
||||||
|
|
||||||
def log_shell_welcome_msg():
|
def log_shell_welcome_msg():
|
||||||
from .cli import list_subcommands
|
from .cli import CLI_SUBCOMMANDS
|
||||||
|
|
||||||
print('{green}# ArchiveBox Imports{reset}'.format(**ANSI))
|
print('{green}# ArchiveBox Imports{reset}'.format(**ANSI))
|
||||||
print('{green}from core.models import Snapshot, ArchiveResult, Tag, User{reset}'.format(**ANSI))
|
print('{green}from core.models import Snapshot, ArchiveResult, Tag, User{reset}'.format(**ANSI))
|
||||||
print('{green}from cli import *\n {}{reset}'.format("\n ".join(list_subcommands().keys()), **ANSI))
|
print('{green}from cli import *\n {}{reset}'.format("\n ".join(CLI_SUBCOMMANDS.keys()), **ANSI))
|
||||||
print()
|
print()
|
||||||
print('[i] Welcome to the ArchiveBox Shell!')
|
print('[i] Welcome to the ArchiveBox Shell!')
|
||||||
print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage')
|
print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage')
|
||||||
|
|
|
@ -16,7 +16,7 @@ from django.db.models import QuerySet
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
|
||||||
from .cli import (
|
from .cli import (
|
||||||
list_subcommands,
|
CLI_SUBCOMMANDS,
|
||||||
run_subcommand,
|
run_subcommand,
|
||||||
display_first,
|
display_first,
|
||||||
meta_cmds,
|
meta_cmds,
|
||||||
|
@ -66,9 +66,9 @@ from .index.html import (
|
||||||
)
|
)
|
||||||
from .index.csv import links_to_csv
|
from .index.csv import links_to_csv
|
||||||
from .extractors import archive_links, archive_link, ignore_methods
|
from .extractors import archive_links, archive_link, ignore_methods
|
||||||
|
from .misc.logging import stderr, hint
|
||||||
|
from .misc.checks import check_data_folder, check_dependencies
|
||||||
from .config import (
|
from .config import (
|
||||||
stderr,
|
|
||||||
hint,
|
|
||||||
ConfigDict,
|
ConfigDict,
|
||||||
ANSI,
|
ANSI,
|
||||||
IS_TTY,
|
IS_TTY,
|
||||||
|
@ -98,8 +98,6 @@ from .config import (
|
||||||
SEARCH_BACKEND_ENGINE,
|
SEARCH_BACKEND_ENGINE,
|
||||||
LDAP,
|
LDAP,
|
||||||
get_version,
|
get_version,
|
||||||
check_dependencies,
|
|
||||||
check_data_folder,
|
|
||||||
write_config_file,
|
write_config_file,
|
||||||
VERSION,
|
VERSION,
|
||||||
VERSIONS_AVAILABLE,
|
VERSIONS_AVAILABLE,
|
||||||
|
@ -146,7 +144,7 @@ from .logging_util import (
|
||||||
def help(out_dir: Path=OUTPUT_DIR) -> None:
|
def help(out_dir: Path=OUTPUT_DIR) -> None:
|
||||||
"""Print the ArchiveBox help message and usage"""
|
"""Print the ArchiveBox help message and usage"""
|
||||||
|
|
||||||
all_subcommands = list_subcommands()
|
all_subcommands = CLI_SUBCOMMANDS
|
||||||
COMMANDS_HELP_TEXT = '\n '.join(
|
COMMANDS_HELP_TEXT = '\n '.join(
|
||||||
f'{cmd.ljust(20)} {summary}'
|
f'{cmd.ljust(20)} {summary}'
|
||||||
for cmd, summary in all_subcommands.items()
|
for cmd, summary in all_subcommands.items()
|
||||||
|
@ -281,7 +279,7 @@ def version(quiet: bool=False,
|
||||||
print('{white}[i] Data locations:{reset} (not in a data directory)'.format(**ANSI))
|
print('{white}[i] Data locations:{reset} (not in a data directory)'.format(**ANSI))
|
||||||
|
|
||||||
print()
|
print()
|
||||||
check_dependencies()
|
check_dependencies(CONFIG)
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
|
@ -469,7 +467,7 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
|
||||||
def status(out_dir: Path=OUTPUT_DIR) -> None:
|
def status(out_dir: Path=OUTPUT_DIR) -> None:
|
||||||
"""Print out some info and statistics about the archive collection"""
|
"""Print out some info and statistics about the archive collection"""
|
||||||
|
|
||||||
check_data_folder(out_dir=out_dir)
|
check_data_folder(CONFIG)
|
||||||
|
|
||||||
from core.models import Snapshot
|
from core.models import Snapshot
|
||||||
from django.contrib.auth import get_user_model
|
from django.contrib.auth import get_user_model
|
||||||
|
@ -609,8 +607,8 @@ def add(urls: Union[str, List[str]],
|
||||||
run_subcommand('init', stdin=None, pwd=out_dir)
|
run_subcommand('init', stdin=None, pwd=out_dir)
|
||||||
|
|
||||||
# Load list of links from the existing index
|
# Load list of links from the existing index
|
||||||
check_data_folder(out_dir=out_dir)
|
check_data_folder(CONFIG)
|
||||||
check_dependencies()
|
check_dependencies(CONFIG)
|
||||||
new_links: List[Link] = []
|
new_links: List[Link] = []
|
||||||
all_links = load_main_index(out_dir=out_dir)
|
all_links = load_main_index(out_dir=out_dir)
|
||||||
|
|
||||||
|
@ -705,7 +703,7 @@ def remove(filter_str: Optional[str]=None,
|
||||||
out_dir: Path=OUTPUT_DIR) -> List[Link]:
|
out_dir: Path=OUTPUT_DIR) -> List[Link]:
|
||||||
"""Remove the specified URLs from the archive"""
|
"""Remove the specified URLs from the archive"""
|
||||||
|
|
||||||
check_data_folder(out_dir=out_dir)
|
check_data_folder(CONFIG)
|
||||||
|
|
||||||
if snapshots is None:
|
if snapshots is None:
|
||||||
if filter_str and filter_patterns:
|
if filter_str and filter_patterns:
|
||||||
|
@ -792,8 +790,8 @@ def update(resume: Optional[float]=None,
|
||||||
from core.models import ArchiveResult
|
from core.models import ArchiveResult
|
||||||
from .search import index_links
|
from .search import index_links
|
||||||
|
|
||||||
check_data_folder(out_dir=out_dir)
|
check_data_folder(CONFIG)
|
||||||
check_dependencies()
|
check_dependencies(CONFIG)
|
||||||
new_links: List[Link] = [] # TODO: Remove input argument: only_new
|
new_links: List[Link] = [] # TODO: Remove input argument: only_new
|
||||||
|
|
||||||
extractors = extractors.split(",") if extractors else []
|
extractors = extractors.split(",") if extractors else []
|
||||||
|
@ -863,7 +861,7 @@ def list_all(filter_patterns_str: Optional[str]=None,
|
||||||
out_dir: Path=OUTPUT_DIR) -> Iterable[Link]:
|
out_dir: Path=OUTPUT_DIR) -> Iterable[Link]:
|
||||||
"""List, filter, and export information about archive entries"""
|
"""List, filter, and export information about archive entries"""
|
||||||
|
|
||||||
check_data_folder(out_dir=out_dir)
|
check_data_folder(CONFIG)
|
||||||
|
|
||||||
if filter_patterns and filter_patterns_str:
|
if filter_patterns and filter_patterns_str:
|
||||||
stderr(
|
stderr(
|
||||||
|
@ -911,7 +909,7 @@ def list_links(snapshots: Optional[QuerySet]=None,
|
||||||
before: Optional[float]=None,
|
before: Optional[float]=None,
|
||||||
out_dir: Path=OUTPUT_DIR) -> Iterable[Link]:
|
out_dir: Path=OUTPUT_DIR) -> Iterable[Link]:
|
||||||
|
|
||||||
check_data_folder(out_dir=out_dir)
|
check_data_folder(CONFIG)
|
||||||
|
|
||||||
if snapshots:
|
if snapshots:
|
||||||
all_snapshots = snapshots
|
all_snapshots = snapshots
|
||||||
|
@ -935,7 +933,7 @@ def list_folders(links: List[Link],
|
||||||
status: str,
|
status: str,
|
||||||
out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||||
|
|
||||||
check_data_folder(out_dir=out_dir)
|
check_data_folder(CONFIG)
|
||||||
|
|
||||||
STATUS_FUNCTIONS = {
|
STATUS_FUNCTIONS = {
|
||||||
"indexed": get_indexed_folders,
|
"indexed": get_indexed_folders,
|
||||||
|
@ -1080,7 +1078,7 @@ def config(config_options_str: Optional[str]=None,
|
||||||
out_dir: Path=OUTPUT_DIR) -> None:
|
out_dir: Path=OUTPUT_DIR) -> None:
|
||||||
"""Get and set your ArchiveBox project configuration values"""
|
"""Get and set your ArchiveBox project configuration values"""
|
||||||
|
|
||||||
check_data_folder(out_dir=out_dir)
|
check_data_folder(CONFIG)
|
||||||
|
|
||||||
if config_options and config_options_str:
|
if config_options and config_options_str:
|
||||||
stderr(
|
stderr(
|
||||||
|
@ -1183,7 +1181,7 @@ def schedule(add: bool=False,
|
||||||
out_dir: Path=OUTPUT_DIR):
|
out_dir: Path=OUTPUT_DIR):
|
||||||
"""Set ArchiveBox to regularly import URLs at specific times using cron"""
|
"""Set ArchiveBox to regularly import URLs at specific times using cron"""
|
||||||
|
|
||||||
check_data_folder(out_dir=out_dir)
|
check_data_folder(CONFIG)
|
||||||
|
|
||||||
Path(LOGS_DIR).mkdir(exist_ok=True)
|
Path(LOGS_DIR).mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
@ -1324,7 +1322,7 @@ def server(runserver_args: Optional[List[str]]=None,
|
||||||
config.SHOW_PROGRESS = False
|
config.SHOW_PROGRESS = False
|
||||||
config.DEBUG = config.DEBUG or debug
|
config.DEBUG = config.DEBUG or debug
|
||||||
|
|
||||||
check_data_folder(out_dir=out_dir)
|
check_data_folder(CONFIG)
|
||||||
|
|
||||||
from django.core.management import call_command
|
from django.core.management import call_command
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
|
@ -1417,7 +1415,7 @@ def server(runserver_args: Optional[List[str]]=None,
|
||||||
def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None:
|
def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None:
|
||||||
"""Run an ArchiveBox Django management command"""
|
"""Run an ArchiveBox Django management command"""
|
||||||
|
|
||||||
check_data_folder(out_dir=out_dir)
|
check_data_folder(CONFIG)
|
||||||
from django.core.management import execute_from_command_line
|
from django.core.management import execute_from_command_line
|
||||||
|
|
||||||
if (args and "createsuperuser" in args) and (IN_DOCKER and not IS_TTY):
|
if (args and "createsuperuser" in args) and (IN_DOCKER and not IS_TTY):
|
||||||
|
@ -1432,7 +1430,7 @@ def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None:
|
||||||
def shell(out_dir: Path=OUTPUT_DIR) -> None:
|
def shell(out_dir: Path=OUTPUT_DIR) -> None:
|
||||||
"""Enter an interactive ArchiveBox Django shell"""
|
"""Enter an interactive ArchiveBox Django shell"""
|
||||||
|
|
||||||
check_data_folder(out_dir=out_dir)
|
check_data_folder(CONFIG)
|
||||||
|
|
||||||
from django.core.management import call_command
|
from django.core.management import call_command
|
||||||
call_command("shell_plus")
|
call_command("shell_plus")
|
||||||
|
|
|
@ -7,7 +7,7 @@ if __name__ == '__main__':
|
||||||
# versions of ./manage.py commands whenever possible. When that's not possible
|
# versions of ./manage.py commands whenever possible. When that's not possible
|
||||||
# (e.g. makemigrations), you can comment out this check temporarily
|
# (e.g. makemigrations), you can comment out this check temporarily
|
||||||
|
|
||||||
allowed_commands = ['makemigrations', 'migrate', 'startapp','squashmigrations', 'generate_stubs']
|
allowed_commands = ['makemigrations', 'migrate', 'startapp','squashmigrations', 'generate_stubs', 'test']
|
||||||
|
|
||||||
if not any(cmd in sys.argv for cmd in allowed_commands):
|
if not any(cmd in sys.argv for cmd in allowed_commands):
|
||||||
print("[X] Don't run ./manage.py directly (unless you are a developer running makemigrations):")
|
print("[X] Don't run ./manage.py directly (unless you are a developer running makemigrations):")
|
||||||
|
|
0
archivebox/misc/__init__.py
Normal file
0
archivebox/misc/__init__.py
Normal file
159
archivebox/misc/checks.py
Normal file
159
archivebox/misc/checks.py
Normal file
|
@ -0,0 +1,159 @@
|
||||||
|
__package__ = 'archivebox.misc'
|
||||||
|
|
||||||
|
# TODO: migrate all of these to new plugantic/base_check.py Check system
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from benedict import benedict
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from .logging import stderr, hint
|
||||||
|
|
||||||
|
|
||||||
|
def check_system_config(config: benedict) -> None:
|
||||||
|
### Check system environment
|
||||||
|
if config['USER'] == 'root' or str(config['PUID']) == "0":
|
||||||
|
stderr('[!] ArchiveBox should never be run as root!', color='red')
|
||||||
|
stderr(' For more information, see the security overview documentation:')
|
||||||
|
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root')
|
||||||
|
|
||||||
|
if config['IN_DOCKER']:
|
||||||
|
attempted_command = ' '.join(sys.argv[:3])
|
||||||
|
stderr('')
|
||||||
|
stderr(' {lightred}Hint{reset}: When using Docker, you must run commands with {green}docker run{reset} instead of {lightyellow}docker exec{reset}, e.g.:'.format(**config['ANSI']))
|
||||||
|
stderr(f' docker compose run archivebox {attempted_command}')
|
||||||
|
stderr(f' docker run -it -v $PWD/data:/data archivebox/archivebox {attempted_command}')
|
||||||
|
stderr(' or:')
|
||||||
|
stderr(f' docker compose exec --user=archivebox archivebox /bin/bash -c "archivebox {attempted_command}"')
|
||||||
|
stderr(f' docker exec -it --user=archivebox <container id> /bin/bash -c "archivebox {attempted_command}"')
|
||||||
|
|
||||||
|
raise SystemExit(2)
|
||||||
|
|
||||||
|
### Check Python environment
|
||||||
|
if sys.version_info[:3] < (3, 7, 0):
|
||||||
|
stderr(f'[X] Python version is not new enough: {config["PYTHON_VERSION"]} (>3.6 is required)', color='red')
|
||||||
|
stderr(' See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
|
||||||
|
raise SystemExit(2)
|
||||||
|
|
||||||
|
if int(config['DJANGO_VERSION'].split('.')[0]) < 3:
|
||||||
|
stderr(f'[X] Django version is not new enough: {config["DJANGO_VERSION"]} (>3.0 is required)', color='red')
|
||||||
|
stderr(' Upgrade django using pip or your system package manager: pip3 install --upgrade django')
|
||||||
|
raise SystemExit(2)
|
||||||
|
|
||||||
|
if config['PYTHON_ENCODING'] not in ('UTF-8', 'UTF8'):
|
||||||
|
stderr(f'[X] Your system is running python3 scripts with a bad locale setting: {config["PYTHON_ENCODING"]} (it should be UTF-8).', color='red')
|
||||||
|
stderr(' To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)')
|
||||||
|
stderr(' Or if you\'re using ubuntu/debian, run "dpkg-reconfigure locales"')
|
||||||
|
stderr('')
|
||||||
|
stderr(' Confirm that it\'s fixed by opening a new shell and running:')
|
||||||
|
stderr(' python3 -c "import sys; print(sys.stdout.encoding)" # should output UTF-8')
|
||||||
|
raise SystemExit(2)
|
||||||
|
|
||||||
|
# stderr('[i] Using Chrome binary: {}'.format(shutil.which(CHROME_BINARY) or CHROME_BINARY))
|
||||||
|
# stderr('[i] Using Chrome data dir: {}'.format(os.path.abspath(CHROME_USER_DATA_DIR)))
|
||||||
|
if config['CHROME_USER_DATA_DIR'] is not None and Path(config['CHROME_USER_DATA_DIR']).exists():
|
||||||
|
if not (Path(config['CHROME_USER_DATA_DIR']) / 'Default').exists():
|
||||||
|
stderr('[X] Could not find profile "Default" in CHROME_USER_DATA_DIR.', color='red')
|
||||||
|
stderr(f' {config["CHROME_USER_DATA_DIR"]}')
|
||||||
|
stderr(' Make sure you set it to a Chrome user data directory containing a Default profile folder.')
|
||||||
|
stderr(' For more info see:')
|
||||||
|
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#CHROME_USER_DATA_DIR')
|
||||||
|
if '/Default' in str(config['CHROME_USER_DATA_DIR']):
|
||||||
|
stderr()
|
||||||
|
stderr(' Try removing /Default from the end e.g.:')
|
||||||
|
stderr(' CHROME_USER_DATA_DIR="{}"'.format(str(config['CHROME_USER_DATA_DIR']).split('/Default')[0]))
|
||||||
|
|
||||||
|
# hard error is too annoying here, instead just set it to nothing
|
||||||
|
# raise SystemExit(2)
|
||||||
|
config['CHROME_USER_DATA_DIR'] = None
|
||||||
|
else:
|
||||||
|
config['CHROME_USER_DATA_DIR'] = None
|
||||||
|
|
||||||
|
|
||||||
|
def check_dependencies(config: benedict, show_help: bool=True) -> None:
|
||||||
|
invalid_dependencies = [
|
||||||
|
(name, info) for name, info in config['DEPENDENCIES'].items()
|
||||||
|
if info['enabled'] and not info['is_valid']
|
||||||
|
]
|
||||||
|
if invalid_dependencies and show_help:
|
||||||
|
stderr(f'[!] Warning: Missing {len(invalid_dependencies)} recommended dependencies', color='lightyellow')
|
||||||
|
for dependency, info in invalid_dependencies:
|
||||||
|
stderr(
|
||||||
|
' ! {}: {} ({})'.format(
|
||||||
|
dependency,
|
||||||
|
info['path'] or 'unable to find binary',
|
||||||
|
info['version'] or 'unable to detect version',
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if dependency in ('YOUTUBEDL_BINARY', 'CHROME_BINARY', 'SINGLEFILE_BINARY', 'READABILITY_BINARY', 'MERCURY_BINARY'):
|
||||||
|
hint(('To install all packages automatically run: archivebox setup',
|
||||||
|
f'or to disable it and silence this warning: archivebox config --set SAVE_{dependency.rsplit("_", 1)[0]}=False',
|
||||||
|
''), prefix=' ')
|
||||||
|
stderr('')
|
||||||
|
|
||||||
|
if config['TIMEOUT'] < 5:
|
||||||
|
stderr(f'[!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={config["TIMEOUT"]} seconds)', color='red')
|
||||||
|
stderr(' You must allow *at least* 5 seconds for indexing and archive methods to run succesfully.')
|
||||||
|
stderr(' (Setting it to somewhere between 30 and 3000 seconds is recommended)')
|
||||||
|
stderr()
|
||||||
|
stderr(' If you want to make ArchiveBox run faster, disable specific archive methods instead:')
|
||||||
|
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles')
|
||||||
|
stderr()
|
||||||
|
|
||||||
|
elif config['USE_CHROME'] and config['TIMEOUT'] < 15:
|
||||||
|
stderr(f'[!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={config["TIMEOUT"]} seconds)', color='red')
|
||||||
|
stderr(' Chrome will fail to archive all sites if set to less than ~15 seconds.')
|
||||||
|
stderr(' (Setting it to somewhere between 30 and 300 seconds is recommended)')
|
||||||
|
stderr()
|
||||||
|
stderr(' If you want to make ArchiveBox run faster, disable specific archive methods instead:')
|
||||||
|
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles')
|
||||||
|
stderr()
|
||||||
|
|
||||||
|
if config['USE_YOUTUBEDL'] and config['MEDIA_TIMEOUT'] < 20:
|
||||||
|
stderr(f'[!] Warning: MEDIA_TIMEOUT is set too low! (currently set to MEDIA_TIMEOUT={config["MEDIA_TIMEOUT"]} seconds)', color='red')
|
||||||
|
stderr(' youtube-dl/yt-dlp will fail to archive any media if set to less than ~20 seconds.')
|
||||||
|
stderr(' (Setting it somewhere over 60 seconds is recommended)')
|
||||||
|
stderr()
|
||||||
|
stderr(' If you want to disable media archiving entirely, set SAVE_MEDIA=False instead:')
|
||||||
|
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_media')
|
||||||
|
stderr()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def check_data_folder(config: benedict) -> None:
|
||||||
|
output_dir = config['OUTPUT_DIR']
|
||||||
|
|
||||||
|
archive_dir_exists = (Path(output_dir) / 'archive').exists()
|
||||||
|
if not archive_dir_exists:
|
||||||
|
stderr('[X] No archivebox index found in the current directory.', color='red')
|
||||||
|
stderr(f' {output_dir}', color='lightyellow')
|
||||||
|
stderr()
|
||||||
|
stderr(' {lightred}Hint{reset}: Are you running archivebox in the right folder?'.format(**config['ANSI']))
|
||||||
|
stderr(' cd path/to/your/archive/folder')
|
||||||
|
stderr(' archivebox [command]')
|
||||||
|
stderr()
|
||||||
|
stderr(' {lightred}Hint{reset}: To create a new archive collection or import existing data in this folder, run:'.format(**config['ANSI']))
|
||||||
|
stderr(' archivebox init')
|
||||||
|
raise SystemExit(2)
|
||||||
|
|
||||||
|
|
||||||
|
def check_migrations(config: benedict):
|
||||||
|
output_dir = config['OUTPUT_DIR']
|
||||||
|
|
||||||
|
from ..index.sql import list_migrations
|
||||||
|
|
||||||
|
pending_migrations = [name for status, name in list_migrations() if not status]
|
||||||
|
|
||||||
|
if pending_migrations:
|
||||||
|
stderr('[X] This collection was created with an older version of ArchiveBox and must be upgraded first.', color='lightyellow')
|
||||||
|
stderr(f' {output_dir}')
|
||||||
|
stderr()
|
||||||
|
stderr(f' To upgrade it to the latest version and apply the {len(pending_migrations)} pending migrations, run:')
|
||||||
|
stderr(' archivebox init')
|
||||||
|
raise SystemExit(3)
|
||||||
|
|
||||||
|
(Path(output_dir) / config['SOURCES_DIR_NAME']).mkdir(exist_ok=True)
|
||||||
|
(Path(output_dir) / config['LOGS_DIR_NAME']).mkdir(exist_ok=True)
|
||||||
|
(Path(output_dir) / config['CACHE_DIR_NAME']).mkdir(exist_ok=True)
|
||||||
|
(Path(output_dir) / config['LIB_DIR_NAME'] / 'bin').mkdir(exist_ok=True, parents=True)
|
||||||
|
(Path(output_dir) / config['PERSONAS_DIR_NAME'] / 'Default').mkdir(exist_ok=True, parents=True)
|
30
archivebox/misc/debugging.py
Normal file
30
archivebox/misc/debugging.py
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
from functools import wraps
|
||||||
|
from time import time
|
||||||
|
|
||||||
|
def timed_function(func):
|
||||||
|
"""
|
||||||
|
Very simple profiling decorator for debugging.
|
||||||
|
Usage:
|
||||||
|
@timed_function
|
||||||
|
def my_func():
|
||||||
|
...
|
||||||
|
|
||||||
|
More advanced alternatives:
|
||||||
|
- viztracer ../.venv/bin/archivebox manage check # https://viztracer.readthedocs.io/en/latest/filter.html
|
||||||
|
- python -m cProfile -o archivebox.prof ../.venv/bin/archivebox manage check; snakeviz archivebox.prof
|
||||||
|
- Django Debug Toolbar + django-debug-toolbar-flamegraph
|
||||||
|
+ Django Requests Tracker (requests-tracker)
|
||||||
|
"""
|
||||||
|
@wraps(func)
|
||||||
|
def wrap(*args, **kwargs):
|
||||||
|
if args and hasattr(args[0], '__module__'):
|
||||||
|
module = args[0].__module__
|
||||||
|
else:
|
||||||
|
module = func.__module__
|
||||||
|
ts_start = time()
|
||||||
|
result = func(*args, **kwargs)
|
||||||
|
ts_end = time()
|
||||||
|
ms_elapsed = int((ts_end-ts_start) * 1000)
|
||||||
|
print(f'[DEBUG][{ms_elapsed}ms] {module}.{func.__name__}(...)')
|
||||||
|
return result
|
||||||
|
return wrap
|
77
archivebox/misc/logging.py
Normal file
77
archivebox/misc/logging.py
Normal file
|
@ -0,0 +1,77 @@
|
||||||
|
__package__ = 'archivebox.misc'
|
||||||
|
|
||||||
|
# TODO: merge/dedupe this file with archivebox/logging_util.py
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from typing import Optional, Union, Tuple, List
|
||||||
|
from collections import defaultdict
|
||||||
|
from benedict import benedict
|
||||||
|
from rich.console import Console
|
||||||
|
|
||||||
|
from ..config_stubs import ConfigDict
|
||||||
|
|
||||||
|
SHOW_PROGRESS = None
|
||||||
|
if os.environ.get('SHOW_PROGRESS', 'None') in ('True', '1', 'true', 'yes'):
|
||||||
|
SHOW_PROGRESS = True
|
||||||
|
|
||||||
|
CONSOLE = Console(force_interactive=SHOW_PROGRESS)
|
||||||
|
SHOW_PROGRESS = CONSOLE.is_interactive if SHOW_PROGRESS is None else SHOW_PROGRESS
|
||||||
|
|
||||||
|
DEFAULT_CLI_COLORS = benedict(
|
||||||
|
{
|
||||||
|
"reset": "\033[00;00m",
|
||||||
|
"lightblue": "\033[01;30m",
|
||||||
|
"lightyellow": "\033[01;33m",
|
||||||
|
"lightred": "\033[01;35m",
|
||||||
|
"red": "\033[01;31m",
|
||||||
|
"green": "\033[01;32m",
|
||||||
|
"blue": "\033[01;34m",
|
||||||
|
"white": "\033[01;37m",
|
||||||
|
"black": "\033[01;30m",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
ANSI = benedict({k: '' for k in DEFAULT_CLI_COLORS.keys()})
|
||||||
|
|
||||||
|
COLOR_DICT = defaultdict(lambda: [(0, 0, 0), (0, 0, 0)], {
|
||||||
|
'00': [(0, 0, 0), (0, 0, 0)],
|
||||||
|
'30': [(0, 0, 0), (0, 0, 0)],
|
||||||
|
'31': [(255, 0, 0), (128, 0, 0)],
|
||||||
|
'32': [(0, 200, 0), (0, 128, 0)],
|
||||||
|
'33': [(255, 255, 0), (128, 128, 0)],
|
||||||
|
'34': [(0, 0, 255), (0, 0, 128)],
|
||||||
|
'35': [(255, 0, 255), (128, 0, 128)],
|
||||||
|
'36': [(0, 255, 255), (0, 128, 128)],
|
||||||
|
'37': [(255, 255, 255), (255, 255, 255)],
|
||||||
|
})
|
||||||
|
|
||||||
|
# Logging Helpers
|
||||||
|
def stdout(*args, color: Optional[str]=None, prefix: str='', config: Optional[ConfigDict]=None) -> None:
|
||||||
|
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
||||||
|
|
||||||
|
if color:
|
||||||
|
strs = [ansi[color], ' '.join(str(a) for a in args), ansi['reset'], '\n']
|
||||||
|
else:
|
||||||
|
strs = [' '.join(str(a) for a in args), '\n']
|
||||||
|
|
||||||
|
sys.stdout.write(prefix + ''.join(strs))
|
||||||
|
|
||||||
|
def stderr(*args, color: Optional[str]=None, prefix: str='', config: Optional[ConfigDict]=None) -> None:
|
||||||
|
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
||||||
|
|
||||||
|
if color:
|
||||||
|
strs = [ansi[color], ' '.join(str(a) for a in args), ansi['reset'], '\n']
|
||||||
|
else:
|
||||||
|
strs = [' '.join(str(a) for a in args), '\n']
|
||||||
|
|
||||||
|
sys.stderr.write(prefix + ''.join(strs))
|
||||||
|
|
||||||
|
def hint(text: Union[Tuple[str, ...], List[str], str], prefix=' ', config: Optional[ConfigDict]=None) -> None:
|
||||||
|
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
||||||
|
|
||||||
|
if isinstance(text, str):
|
||||||
|
stderr('{}{lightred}Hint:{reset} {}'.format(prefix, text, **ansi))
|
||||||
|
else:
|
||||||
|
stderr('{}{lightred}Hint:{reset} {}'.format(prefix, text[0], **ansi))
|
||||||
|
for line in text[1:]:
|
||||||
|
stderr('{} {}'.format(prefix, line))
|
|
@ -10,7 +10,6 @@ import datetime
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
timezone.utc = datetime.timezone.utc
|
timezone.utc = datetime.timezone.utc
|
||||||
|
|
||||||
|
|
||||||
# monkey patch django-signals-webhooks to change how it shows up in Admin UI
|
# monkey patch django-signals-webhooks to change how it shows up in Admin UI
|
||||||
# from signal_webhooks.apps import DjangoSignalWebhooksConfig
|
# from signal_webhooks.apps import DjangoSignalWebhooksConfig
|
||||||
# DjangoSignalWebhooksConfig.verbose_name = 'API'
|
# DjangoSignalWebhooksConfig.verbose_name = 'API'
|
||||||
|
|
6
archivebox/package-lock.json
generated
6
archivebox/package-lock.json
generated
|
@ -371,9 +371,9 @@
|
||||||
"license": "Apache-2.0"
|
"license": "Apache-2.0"
|
||||||
},
|
},
|
||||||
"node_modules/bare-events": {
|
"node_modules/bare-events": {
|
||||||
"version": "2.4.2",
|
"version": "2.5.0",
|
||||||
"resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.4.2.tgz",
|
"resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.5.0.tgz",
|
||||||
"integrity": "sha512-qMKFd2qG/36aA4GwvKq8MxnPgCQAmBWmSyLWsJcbn8v03wvIPQ/hG1Ms8bPzndZxMDoHpxez5VOS+gC9Yi24/Q==",
|
"integrity": "sha512-/E8dDe9dsbLyh2qrZ64PEPadOQ0F4gbl1sUJOrmph7xOiIxfY8vwab/4bFLh4Y88/Hk/ujKcrQKc+ps0mv873A==",
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
"optional": true
|
"optional": true
|
||||||
},
|
},
|
||||||
|
|
|
@ -3,6 +3,7 @@ __package__ = "archivebox.plugantic"
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
from typing_extensions import Self
|
from typing_extensions import Self
|
||||||
|
|
||||||
|
from benedict import benedict
|
||||||
from pydantic import Field, InstanceOf, validate_call
|
from pydantic import Field, InstanceOf, validate_call
|
||||||
from pydantic_pkgr import (
|
from pydantic_pkgr import (
|
||||||
Binary,
|
Binary,
|
||||||
|
@ -17,7 +18,6 @@ from pydantic_pkgr import (
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
|
||||||
from .base_hook import BaseHook, HookType
|
from .base_hook import BaseHook, HookType
|
||||||
from ..config_stubs import AttrDict
|
|
||||||
|
|
||||||
|
|
||||||
class BaseBinProvider(BaseHook, BinProvider):
|
class BaseBinProvider(BaseHook, BinProvider):
|
||||||
|
@ -38,7 +38,7 @@ class BaseBinProvider(BaseHook, BinProvider):
|
||||||
def register(self, settings, parent_plugin=None):
|
def register(self, settings, parent_plugin=None):
|
||||||
# self._plugin = parent_plugin # for debugging only, never rely on this!
|
# self._plugin = parent_plugin # for debugging only, never rely on this!
|
||||||
|
|
||||||
settings.BINPROVIDERS = getattr(settings, "BINPROVIDERS", None) or AttrDict({})
|
settings.BINPROVIDERS = getattr(settings, "BINPROVIDERS", None) or benedict({})
|
||||||
settings.BINPROVIDERS[self.id] = self
|
settings.BINPROVIDERS[self.id] = self
|
||||||
|
|
||||||
super().register(settings, parent_plugin=parent_plugin)
|
super().register(settings, parent_plugin=parent_plugin)
|
||||||
|
@ -58,7 +58,7 @@ class BaseBinary(BaseHook, Binary):
|
||||||
def register(self, settings, parent_plugin=None):
|
def register(self, settings, parent_plugin=None):
|
||||||
# self._plugin = parent_plugin # for debugging only, never rely on this!
|
# self._plugin = parent_plugin # for debugging only, never rely on this!
|
||||||
|
|
||||||
settings.BINARIES = getattr(settings, "BINARIES", None) or AttrDict({})
|
settings.BINARIES = getattr(settings, "BINARIES", None) or benedict({})
|
||||||
settings.BINARIES[self.id] = self
|
settings.BINARIES[self.id] = self
|
||||||
|
|
||||||
super().register(settings, parent_plugin=parent_plugin)
|
super().register(settings, parent_plugin=parent_plugin)
|
||||||
|
|
|
@ -28,7 +28,7 @@ class BaseCheck(BaseHook):
|
||||||
def register(self, settings, parent_plugin=None):
|
def register(self, settings, parent_plugin=None):
|
||||||
# self._plugin = parent_plugin # backref to parent is for debugging only, never rely on this!
|
# self._plugin = parent_plugin # backref to parent is for debugging only, never rely on this!
|
||||||
|
|
||||||
self.register_with_django_check_system() # (SIDE EFFECT)
|
self.register_with_django_check_system(settings) # (SIDE EFFECT)
|
||||||
|
|
||||||
# install hook into settings.CHECKS
|
# install hook into settings.CHECKS
|
||||||
settings.CHECKS = getattr(settings, "CHECKS", None) or AttrDict({})
|
settings.CHECKS = getattr(settings, "CHECKS", None) or AttrDict({})
|
||||||
|
@ -37,12 +37,9 @@ class BaseCheck(BaseHook):
|
||||||
# record installed hook in settings.HOOKS
|
# record installed hook in settings.HOOKS
|
||||||
super().register(settings, parent_plugin=parent_plugin)
|
super().register(settings, parent_plugin=parent_plugin)
|
||||||
|
|
||||||
def register_with_django_check_system(self):
|
def register_with_django_check_system(self, settings):
|
||||||
|
|
||||||
def run_check(app_configs, **kwargs) -> List[Warning]:
|
def run_check(app_configs, **kwargs) -> List[Warning]:
|
||||||
from django.conf import settings
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
return self.check(settings, logging.getLogger("checks"))
|
return self.check(settings, logging.getLogger("checks"))
|
||||||
|
|
||||||
run_check.__name__ = self.id
|
run_check.__name__ = self.id
|
||||||
|
|
|
@ -96,14 +96,13 @@ class BaseHook(BaseModel):
|
||||||
# e.g. /admin/environment/config/LdapConfig/
|
# e.g. /admin/environment/config/LdapConfig/
|
||||||
return f"/admin/environment/{self.hook_type.lower()}/{self.id}/"
|
return f"/admin/environment/{self.hook_type.lower()}/{self.id}/"
|
||||||
|
|
||||||
|
|
||||||
def register(self, settings, parent_plugin=None):
|
def register(self, settings, parent_plugin=None):
|
||||||
"""Load a record of an installed hook into global Django settings.HOOKS at runtime."""
|
"""Load a record of an installed hook into global Django settings.HOOKS at runtime."""
|
||||||
self._plugin = parent_plugin # for debugging only, never rely on this!
|
self._plugin = parent_plugin # for debugging only, never rely on this!
|
||||||
|
|
||||||
# assert json.dumps(self.model_json_schema(), indent=4), f"Hook {self.hook_module} has invalid JSON schema."
|
# assert json.dumps(self.model_json_schema(), indent=4), f"Hook {self.hook_module} has invalid JSON schema."
|
||||||
|
|
||||||
print(' -', self.hook_module, '.register()')
|
# print(' -', self.hook_module, '.register()')
|
||||||
|
|
||||||
# record installed hook in settings.HOOKS
|
# record installed hook in settings.HOOKS
|
||||||
settings.HOOKS[self.id] = self
|
settings.HOOKS[self.id] = self
|
||||||
|
@ -118,7 +117,7 @@ class BaseHook(BaseModel):
|
||||||
def ready(self, settings):
|
def ready(self, settings):
|
||||||
"""Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
|
"""Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
|
||||||
|
|
||||||
print(' -', self.hook_module, '.ready()')
|
# print(' -', self.hook_module, '.ready()')
|
||||||
|
|
||||||
assert self.id in settings.HOOKS, f"Tried to ready hook {self.hook_module} but it is not registered in settings.HOOKS."
|
assert self.id in settings.HOOKS, f"Tried to ready hook {self.hook_module} but it is not registered in settings.HOOKS."
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
__package__ = 'archivebox.plugantic'
|
__package__ = 'archivebox.plugantic'
|
||||||
|
|
||||||
import json
|
|
||||||
import inspect
|
import inspect
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
@ -18,10 +17,11 @@ from pydantic import (
|
||||||
computed_field,
|
computed_field,
|
||||||
validate_call,
|
validate_call,
|
||||||
)
|
)
|
||||||
|
from benedict import benedict
|
||||||
|
|
||||||
from .base_hook import BaseHook, HookType
|
from .base_hook import BaseHook, HookType
|
||||||
|
|
||||||
from ..config import AttrDict
|
from ..config import bump_startup_progress_bar
|
||||||
|
|
||||||
|
|
||||||
class BasePlugin(BaseModel):
|
class BasePlugin(BaseModel):
|
||||||
|
@ -90,7 +90,8 @@ class BasePlugin(BaseModel):
|
||||||
|
|
||||||
assert self.app_label and self.app_label and self.verbose_name, f'{self.__class__.__name__} is missing .name or .app_label or .verbose_name'
|
assert self.app_label and self.app_label and self.verbose_name, f'{self.__class__.__name__} is missing .name or .app_label or .verbose_name'
|
||||||
|
|
||||||
assert json.dumps(self.model_json_schema(), indent=4), f"Plugin {self.plugin_module} has invalid JSON schema."
|
# assert json.dumps(self.model_json_schema(), indent=4), f"Plugin {self.plugin_module} has invalid JSON schema."
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -114,13 +115,13 @@ class BasePlugin(BaseModel):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def HOOKS_BY_ID(self) -> Dict[str, InstanceOf[BaseHook]]:
|
def HOOKS_BY_ID(self) -> Dict[str, InstanceOf[BaseHook]]:
|
||||||
return AttrDict({hook.id: hook for hook in self.hooks})
|
return benedict({hook.id: hook for hook in self.hooks})
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def HOOKS_BY_TYPE(self) -> Dict[HookType, Dict[str, InstanceOf[BaseHook]]]:
|
def HOOKS_BY_TYPE(self) -> Dict[HookType, Dict[str, InstanceOf[BaseHook]]]:
|
||||||
hooks = AttrDict({})
|
hooks = benedict({})
|
||||||
for hook in self.hooks:
|
for hook in self.hooks:
|
||||||
hooks[hook.hook_type] = hooks.get(hook.hook_type) or AttrDict({})
|
hooks[hook.hook_type] = hooks.get(hook.hook_type) or benedict({})
|
||||||
hooks[hook.hook_type][hook.id] = hook
|
hooks[hook.hook_type][hook.id] = hook
|
||||||
return hooks
|
return hooks
|
||||||
|
|
||||||
|
@ -131,10 +132,10 @@ class BasePlugin(BaseModel):
|
||||||
from django.conf import settings as django_settings
|
from django.conf import settings as django_settings
|
||||||
settings = django_settings
|
settings = django_settings
|
||||||
|
|
||||||
print()
|
# print()
|
||||||
print(self.plugin_module_full, '.register()')
|
# print(self.plugin_module_full, '.register()')
|
||||||
|
|
||||||
assert json.dumps(self.model_json_schema(), indent=4), f'Plugin {self.plugin_module} has invalid JSON schema.'
|
# assert json.dumps(self.model_json_schema(), indent=4), f'Plugin {self.plugin_module} has invalid JSON schema.'
|
||||||
|
|
||||||
assert self.id not in settings.PLUGINS, f'Tried to register plugin {self.plugin_module} but it conflicts with existing plugin of the same name ({self.app_label}).'
|
assert self.id not in settings.PLUGINS, f'Tried to register plugin {self.plugin_module} but it conflicts with existing plugin of the same name ({self.app_label}).'
|
||||||
|
|
||||||
|
@ -149,6 +150,7 @@ class BasePlugin(BaseModel):
|
||||||
|
|
||||||
settings.PLUGINS[self.id]._is_registered = True
|
settings.PLUGINS[self.id]._is_registered = True
|
||||||
# print('√ REGISTERED PLUGIN:', self.plugin_module)
|
# print('√ REGISTERED PLUGIN:', self.plugin_module)
|
||||||
|
bump_startup_progress_bar()
|
||||||
|
|
||||||
def ready(self, settings=None):
|
def ready(self, settings=None):
|
||||||
"""Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
|
"""Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
|
||||||
|
@ -157,8 +159,8 @@ class BasePlugin(BaseModel):
|
||||||
from django.conf import settings as django_settings
|
from django.conf import settings as django_settings
|
||||||
settings = django_settings
|
settings = django_settings
|
||||||
|
|
||||||
print()
|
# print()
|
||||||
print(self.plugin_module_full, '.ready()')
|
# print(self.plugin_module_full, '.ready()')
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
self.id in settings.PLUGINS and settings.PLUGINS[self.id]._is_registered
|
self.id in settings.PLUGINS and settings.PLUGINS[self.id]._is_registered
|
||||||
|
@ -171,6 +173,7 @@ class BasePlugin(BaseModel):
|
||||||
hook.ready(settings)
|
hook.ready(settings)
|
||||||
|
|
||||||
settings.PLUGINS[self.id]._is_ready = True
|
settings.PLUGINS[self.id]._is_ready = True
|
||||||
|
bump_startup_progress_bar()
|
||||||
|
|
||||||
# @validate_call
|
# @validate_call
|
||||||
# def install_binaries(self) -> Self:
|
# def install_binaries(self) -> Self:
|
||||||
|
|
|
@ -83,338 +83,3 @@ class JSONSchemaWithLambdas(GenerateJsonSchema):
|
||||||
# for computed_field properties render them like this instead:
|
# for computed_field properties render them like this instead:
|
||||||
# inspect.getsource(field.wrapped_property.fget).split('def ', 1)[-1].split('\n', 1)[-1].strip().strip('return '),
|
# inspect.getsource(field.wrapped_property.fget).split('def ', 1)[-1].split('\n', 1)[-1].strip().strip('return '),
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### Basic Assertions
|
|
||||||
|
|
||||||
# test_input = """
|
|
||||||
# [SERVER_CONFIG]
|
|
||||||
# IS_TTY=False
|
|
||||||
# USE_COLOR=False
|
|
||||||
# SHOW_PROGRESS=False
|
|
||||||
# IN_DOCKER=False
|
|
||||||
# IN_QEMU=False
|
|
||||||
# PUID=501
|
|
||||||
# PGID=20
|
|
||||||
# OUTPUT_DIR=/opt/archivebox/data
|
|
||||||
# CONFIG_FILE=/opt/archivebox/data/ArchiveBox.conf
|
|
||||||
# ONLY_NEW=True
|
|
||||||
# TIMEOUT=60
|
|
||||||
# MEDIA_TIMEOUT=3600
|
|
||||||
# OUTPUT_PERMISSIONS=644
|
|
||||||
# RESTRICT_FILE_NAMES=windows
|
|
||||||
# URL_DENYLIST=\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$
|
|
||||||
# URL_ALLOWLIST=None
|
|
||||||
# ADMIN_USERNAME=None
|
|
||||||
# ADMIN_PASSWORD=None
|
|
||||||
# ENFORCE_ATOMIC_WRITES=True
|
|
||||||
# TAG_SEPARATOR_PATTERN=[,]
|
|
||||||
# SECRET_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
|
||||||
# BIND_ADDR=127.0.0.1:8000
|
|
||||||
# ALLOWED_HOSTS=*
|
|
||||||
# DEBUG=False
|
|
||||||
# PUBLIC_INDEX=True
|
|
||||||
# PUBLIC_SNAPSHOTS=True
|
|
||||||
# PUBLIC_ADD_VIEW=False
|
|
||||||
# FOOTER_INFO=Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.
|
|
||||||
# SNAPSHOTS_PER_PAGE=40
|
|
||||||
# CUSTOM_TEMPLATES_DIR=None
|
|
||||||
# TIME_ZONE=UTC
|
|
||||||
# TIMEZONE=UTC
|
|
||||||
# REVERSE_PROXY_USER_HEADER=Remote-User
|
|
||||||
# REVERSE_PROXY_WHITELIST=
|
|
||||||
# LOGOUT_REDIRECT_URL=/
|
|
||||||
# PREVIEW_ORIGINALS=True
|
|
||||||
# LDAP=False
|
|
||||||
# LDAP_SERVER_URI=None
|
|
||||||
# LDAP_BIND_DN=None
|
|
||||||
# LDAP_BIND_PASSWORD=None
|
|
||||||
# LDAP_USER_BASE=None
|
|
||||||
# LDAP_USER_FILTER=None
|
|
||||||
# LDAP_USERNAME_ATTR=None
|
|
||||||
# LDAP_FIRSTNAME_ATTR=None
|
|
||||||
# LDAP_LASTNAME_ATTR=None
|
|
||||||
# LDAP_EMAIL_ATTR=None
|
|
||||||
# LDAP_CREATE_SUPERUSER=False
|
|
||||||
# SAVE_TITLE=True
|
|
||||||
# SAVE_FAVICON=True
|
|
||||||
# SAVE_WGET=True
|
|
||||||
# SAVE_WGET_REQUISITES=True
|
|
||||||
# SAVE_SINGLEFILE=True
|
|
||||||
# SAVE_READABILITY=True
|
|
||||||
# SAVE_MERCURY=True
|
|
||||||
# SAVE_HTMLTOTEXT=True
|
|
||||||
# SAVE_PDF=True
|
|
||||||
# SAVE_SCREENSHOT=True
|
|
||||||
# SAVE_DOM=True
|
|
||||||
# SAVE_HEADERS=True
|
|
||||||
# SAVE_WARC=True
|
|
||||||
# SAVE_GIT=True
|
|
||||||
# SAVE_MEDIA=True
|
|
||||||
# SAVE_ARCHIVE_DOT_ORG=True
|
|
||||||
# RESOLUTION=1440,2000
|
|
||||||
# GIT_DOMAINS=github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht
|
|
||||||
# CHECK_SSL_VALIDITY=True
|
|
||||||
# MEDIA_MAX_SIZE=750m
|
|
||||||
# USER_AGENT=None
|
|
||||||
# CURL_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)
|
|
||||||
# WGET_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5
|
|
||||||
# CHROME_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)
|
|
||||||
# COOKIES_FILE=None
|
|
||||||
# CHROME_USER_DATA_DIR=None
|
|
||||||
# CHROME_TIMEOUT=0
|
|
||||||
# CHROME_HEADLESS=True
|
|
||||||
# CHROME_SANDBOX=True
|
|
||||||
# CHROME_EXTRA_ARGS=[]
|
|
||||||
# YOUTUBEDL_ARGS=['--restrict-filenames', '--trim-filenames', '128', '--write-description', '--write-info-json', '--write-annotations', '--write-thumbnail', '--no-call-home', '--write-sub', '--write-auto-subs', '--convert-subs=srt', '--yes-playlist', '--continue', '--no-abort-on-error', '--ignore-errors', '--geo-bypass', '--add-metadata', '--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)']
|
|
||||||
# YOUTUBEDL_EXTRA_ARGS=[]
|
|
||||||
# WGET_ARGS=['--no-verbose', '--adjust-extension', '--convert-links', '--force-directories', '--backup-converted', '--span-hosts', '--no-parent', '-e', 'robots=off']
|
|
||||||
# WGET_EXTRA_ARGS=[]
|
|
||||||
# CURL_ARGS=['--silent', '--location', '--compressed']
|
|
||||||
# CURL_EXTRA_ARGS=[]
|
|
||||||
# GIT_ARGS=['--recursive']
|
|
||||||
# SINGLEFILE_ARGS=[]
|
|
||||||
# SINGLEFILE_EXTRA_ARGS=[]
|
|
||||||
# MERCURY_ARGS=['--format=text']
|
|
||||||
# MERCURY_EXTRA_ARGS=[]
|
|
||||||
# FAVICON_PROVIDER=https://www.google.com/s2/favicons?domain={}
|
|
||||||
# USE_INDEXING_BACKEND=True
|
|
||||||
# USE_SEARCHING_BACKEND=True
|
|
||||||
# SEARCH_BACKEND_ENGINE=ripgrep
|
|
||||||
# SEARCH_BACKEND_HOST_NAME=localhost
|
|
||||||
# SEARCH_BACKEND_PORT=1491
|
|
||||||
# SEARCH_BACKEND_PASSWORD=SecretPassword
|
|
||||||
# SEARCH_PROCESS_HTML=True
|
|
||||||
# SONIC_COLLECTION=archivebox
|
|
||||||
# SONIC_BUCKET=snapshots
|
|
||||||
# SEARCH_BACKEND_TIMEOUT=90
|
|
||||||
# FTS_SEPARATE_DATABASE=True
|
|
||||||
# FTS_TOKENIZERS=porter unicode61 remove_diacritics 2
|
|
||||||
# FTS_SQLITE_MAX_LENGTH=1000000000
|
|
||||||
# USE_CURL=True
|
|
||||||
# USE_WGET=True
|
|
||||||
# USE_SINGLEFILE=True
|
|
||||||
# USE_READABILITY=True
|
|
||||||
# USE_MERCURY=True
|
|
||||||
# USE_GIT=True
|
|
||||||
# USE_CHROME=True
|
|
||||||
# USE_NODE=True
|
|
||||||
# USE_YOUTUBEDL=True
|
|
||||||
# USE_RIPGREP=True
|
|
||||||
# CURL_BINARY=curl
|
|
||||||
# GIT_BINARY=git
|
|
||||||
# WGET_BINARY=wget
|
|
||||||
# SINGLEFILE_BINARY=single-file
|
|
||||||
# READABILITY_BINARY=readability-extractor
|
|
||||||
# MERCURY_BINARY=postlight-parser
|
|
||||||
# YOUTUBEDL_BINARY=yt-dlp
|
|
||||||
# NODE_BINARY=node
|
|
||||||
# RIPGREP_BINARY=rg
|
|
||||||
# CHROME_BINARY=chrome
|
|
||||||
# POCKET_CONSUMER_KEY=None
|
|
||||||
# USER=squash
|
|
||||||
# PACKAGE_DIR=/opt/archivebox/archivebox
|
|
||||||
# TEMPLATES_DIR=/opt/archivebox/archivebox/templates
|
|
||||||
# ARCHIVE_DIR=/opt/archivebox/data/archive
|
|
||||||
# SOURCES_DIR=/opt/archivebox/data/sources
|
|
||||||
# LOGS_DIR=/opt/archivebox/data/logs
|
|
||||||
# PERSONAS_DIR=/opt/archivebox/data/personas
|
|
||||||
# URL_DENYLIST_PTN=re.compile('\\.(css|js|otf|ttf|woff|woff2|gstatic\\.com|googleapis\\.com/css)(\\?.*)?$', re.IGNORECASE|re.MULTILINE)
|
|
||||||
# URL_ALLOWLIST_PTN=None
|
|
||||||
# DIR_OUTPUT_PERMISSIONS=755
|
|
||||||
# ARCHIVEBOX_BINARY=/opt/archivebox/.venv/bin/archivebox
|
|
||||||
# VERSION=0.8.0
|
|
||||||
# COMMIT_HASH=102e87578c6036bb0132dd1ebd17f8f05ffc880f
|
|
||||||
# BUILD_TIME=2024-05-15 03:28:05 1715768885
|
|
||||||
# VERSIONS_AVAILABLE=None
|
|
||||||
# CAN_UPGRADE=False
|
|
||||||
# PYTHON_BINARY=/opt/archivebox/.venv/bin/python3.10
|
|
||||||
# PYTHON_ENCODING=UTF-8
|
|
||||||
# PYTHON_VERSION=3.10.14
|
|
||||||
# DJANGO_BINARY=/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py
|
|
||||||
# DJANGO_VERSION=5.0.6 final (0)
|
|
||||||
# SQLITE_BINARY=/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py
|
|
||||||
# SQLITE_VERSION=2.6.0
|
|
||||||
# CURL_VERSION=curl 8.4.0 (x86_64-apple-darwin23.0)
|
|
||||||
# WGET_VERSION=GNU Wget 1.24.5
|
|
||||||
# WGET_AUTO_COMPRESSION=True
|
|
||||||
# RIPGREP_VERSION=ripgrep 14.1.0
|
|
||||||
# SINGLEFILE_VERSION=None
|
|
||||||
# READABILITY_VERSION=None
|
|
||||||
# MERCURY_VERSION=None
|
|
||||||
# GIT_VERSION=git version 2.44.0
|
|
||||||
# YOUTUBEDL_VERSION=2024.04.09
|
|
||||||
# CHROME_VERSION=Google Chrome 124.0.6367.207
|
|
||||||
# NODE_VERSION=v21.7.3
|
|
||||||
# """
|
|
||||||
|
|
||||||
|
|
||||||
# expected_output = TOML_HEADER + '''[SERVER_CONFIG]
|
|
||||||
# IS_TTY = false
|
|
||||||
# USE_COLOR = false
|
|
||||||
# SHOW_PROGRESS = false
|
|
||||||
# IN_DOCKER = false
|
|
||||||
# IN_QEMU = false
|
|
||||||
# PUID = 501
|
|
||||||
# PGID = 20
|
|
||||||
# OUTPUT_DIR = "/opt/archivebox/data"
|
|
||||||
# CONFIG_FILE = "/opt/archivebox/data/ArchiveBox.conf"
|
|
||||||
# ONLY_NEW = true
|
|
||||||
# TIMEOUT = 60
|
|
||||||
# MEDIA_TIMEOUT = 3600
|
|
||||||
# OUTPUT_PERMISSIONS = 644
|
|
||||||
# RESTRICT_FILE_NAMES = "windows"
|
|
||||||
# URL_DENYLIST = "\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$"
|
|
||||||
# URL_ALLOWLIST = null
|
|
||||||
# ADMIN_USERNAME = null
|
|
||||||
# ADMIN_PASSWORD = null
|
|
||||||
# ENFORCE_ATOMIC_WRITES = true
|
|
||||||
# TAG_SEPARATOR_PATTERN = "[,]"
|
|
||||||
# SECRET_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
|
||||||
# BIND_ADDR = "127.0.0.1:8000"
|
|
||||||
# ALLOWED_HOSTS = "*"
|
|
||||||
# DEBUG = false
|
|
||||||
# PUBLIC_INDEX = true
|
|
||||||
# PUBLIC_SNAPSHOTS = true
|
|
||||||
# PUBLIC_ADD_VIEW = false
|
|
||||||
# FOOTER_INFO = "Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests."
|
|
||||||
# SNAPSHOTS_PER_PAGE = 40
|
|
||||||
# CUSTOM_TEMPLATES_DIR = null
|
|
||||||
# TIME_ZONE = "UTC"
|
|
||||||
# TIMEZONE = "UTC"
|
|
||||||
# REVERSE_PROXY_USER_HEADER = "Remote-User"
|
|
||||||
# REVERSE_PROXY_WHITELIST = ""
|
|
||||||
# LOGOUT_REDIRECT_URL = "/"
|
|
||||||
# PREVIEW_ORIGINALS = true
|
|
||||||
# LDAP = false
|
|
||||||
# LDAP_SERVER_URI = null
|
|
||||||
# LDAP_BIND_DN = null
|
|
||||||
# LDAP_BIND_PASSWORD = null
|
|
||||||
# LDAP_USER_BASE = null
|
|
||||||
# LDAP_USER_FILTER = null
|
|
||||||
# LDAP_USERNAME_ATTR = null
|
|
||||||
# LDAP_FIRSTNAME_ATTR = null
|
|
||||||
# LDAP_LASTNAME_ATTR = null
|
|
||||||
# LDAP_EMAIL_ATTR = null
|
|
||||||
# LDAP_CREATE_SUPERUSER = false
|
|
||||||
# SAVE_TITLE = true
|
|
||||||
# SAVE_FAVICON = true
|
|
||||||
# SAVE_WGET = true
|
|
||||||
# SAVE_WGET_REQUISITES = true
|
|
||||||
# SAVE_SINGLEFILE = true
|
|
||||||
# SAVE_READABILITY = true
|
|
||||||
# SAVE_MERCURY = true
|
|
||||||
# SAVE_HTMLTOTEXT = true
|
|
||||||
# SAVE_PDF = true
|
|
||||||
# SAVE_SCREENSHOT = true
|
|
||||||
# SAVE_DOM = true
|
|
||||||
# SAVE_HEADERS = true
|
|
||||||
# SAVE_WARC = true
|
|
||||||
# SAVE_GIT = true
|
|
||||||
# SAVE_MEDIA = true
|
|
||||||
# SAVE_ARCHIVE_DOT_ORG = true
|
|
||||||
# RESOLUTION = [1440, 2000]
|
|
||||||
# GIT_DOMAINS = "github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht"
|
|
||||||
# CHECK_SSL_VALIDITY = true
|
|
||||||
# MEDIA_MAX_SIZE = "750m"
|
|
||||||
# USER_AGENT = null
|
|
||||||
# CURL_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)"
|
|
||||||
# WGET_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5"
|
|
||||||
# CHROME_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)"
|
|
||||||
# COOKIES_FILE = null
|
|
||||||
# CHROME_USER_DATA_DIR = null
|
|
||||||
# CHROME_TIMEOUT = false
|
|
||||||
# CHROME_HEADLESS = true
|
|
||||||
# CHROME_SANDBOX = true
|
|
||||||
# CHROME_EXTRA_ARGS = []
|
|
||||||
# YOUTUBEDL_ARGS = ["--restrict-filenames", "--trim-filenames", "128", "--write-description", "--write-info-json", "--write-annotations", "--write-thumbnail", "--no-call-home", "--write-sub", "--write-auto-subs", "--convert-subs=srt", "--yes-playlist", "--continue", "--no-abort-on-error", "--ignore-errors", "--geo-bypass", "--add-metadata", "--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)"]
|
|
||||||
# YOUTUBEDL_EXTRA_ARGS = []
|
|
||||||
# WGET_ARGS = ["--no-verbose", "--adjust-extension", "--convert-links", "--force-directories", "--backup-converted", "--span-hosts", "--no-parent", "-e", "robots=off"]
|
|
||||||
# WGET_EXTRA_ARGS = []
|
|
||||||
# CURL_ARGS = ["--silent", "--location", "--compressed"]
|
|
||||||
# CURL_EXTRA_ARGS = []
|
|
||||||
# GIT_ARGS = ["--recursive"]
|
|
||||||
# SINGLEFILE_ARGS = []
|
|
||||||
# SINGLEFILE_EXTRA_ARGS = []
|
|
||||||
# MERCURY_ARGS = ["--format=text"]
|
|
||||||
# MERCURY_EXTRA_ARGS = []
|
|
||||||
# FAVICON_PROVIDER = "https://www.google.com/s2/favicons?domain={}"
|
|
||||||
# USE_INDEXING_BACKEND = true
|
|
||||||
# USE_SEARCHING_BACKEND = true
|
|
||||||
# SEARCH_BACKEND_ENGINE = "ripgrep"
|
|
||||||
# SEARCH_BACKEND_HOST_NAME = "localhost"
|
|
||||||
# SEARCH_BACKEND_PORT = 1491
|
|
||||||
# SEARCH_BACKEND_PASSWORD = "SecretPassword"
|
|
||||||
# SEARCH_PROCESS_HTML = true
|
|
||||||
# SONIC_COLLECTION = "archivebox"
|
|
||||||
# SONIC_BUCKET = "snapshots"
|
|
||||||
# SEARCH_BACKEND_TIMEOUT = 90
|
|
||||||
# FTS_SEPARATE_DATABASE = true
|
|
||||||
# FTS_TOKENIZERS = "porter unicode61 remove_diacritics 2"
|
|
||||||
# FTS_SQLITE_MAX_LENGTH = 1000000000
|
|
||||||
# USE_CURL = true
|
|
||||||
# USE_WGET = true
|
|
||||||
# USE_SINGLEFILE = true
|
|
||||||
# USE_READABILITY = true
|
|
||||||
# USE_MERCURY = true
|
|
||||||
# USE_GIT = true
|
|
||||||
# USE_CHROME = true
|
|
||||||
# USE_NODE = true
|
|
||||||
# USE_YOUTUBEDL = true
|
|
||||||
# USE_RIPGREP = true
|
|
||||||
# CURL_BINARY = "curl"
|
|
||||||
# GIT_BINARY = "git"
|
|
||||||
# WGET_BINARY = "wget"
|
|
||||||
# SINGLEFILE_BINARY = "single-file"
|
|
||||||
# READABILITY_BINARY = "readability-extractor"
|
|
||||||
# MERCURY_BINARY = "postlight-parser"
|
|
||||||
# YOUTUBEDL_BINARY = "yt-dlp"
|
|
||||||
# NODE_BINARY = "node"
|
|
||||||
# RIPGREP_BINARY = "rg"
|
|
||||||
# CHROME_BINARY = "chrome"
|
|
||||||
# POCKET_CONSUMER_KEY = null
|
|
||||||
# USER = "squash"
|
|
||||||
# PACKAGE_DIR = "/opt/archivebox/archivebox"
|
|
||||||
# TEMPLATES_DIR = "/opt/archivebox/archivebox/templates"
|
|
||||||
# ARCHIVE_DIR = "/opt/archivebox/data/archive"
|
|
||||||
# SOURCES_DIR = "/opt/archivebox/data/sources"
|
|
||||||
# LOGS_DIR = "/opt/archivebox/data/logs"
|
|
||||||
# PERSONAS_DIR = "/opt/archivebox/data/personas"
|
|
||||||
# URL_DENYLIST_PTN = "re.compile(\'\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$\', re.IGNORECASE|re.MULTILINE)"
|
|
||||||
# URL_ALLOWLIST_PTN = null
|
|
||||||
# DIR_OUTPUT_PERMISSIONS = 755
|
|
||||||
# ARCHIVEBOX_BINARY = "/opt/archivebox/.venv/bin/archivebox"
|
|
||||||
# VERSION = "0.8.0"
|
|
||||||
# COMMIT_HASH = "102e87578c6036bb0132dd1ebd17f8f05ffc880f"
|
|
||||||
# BUILD_TIME = "2024-05-15 03:28:05 1715768885"
|
|
||||||
# VERSIONS_AVAILABLE = null
|
|
||||||
# CAN_UPGRADE = false
|
|
||||||
# PYTHON_BINARY = "/opt/archivebox/.venv/bin/python3.10"
|
|
||||||
# PYTHON_ENCODING = "UTF-8"
|
|
||||||
# PYTHON_VERSION = "3.10.14"
|
|
||||||
# DJANGO_BINARY = "/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py"
|
|
||||||
# DJANGO_VERSION = "5.0.6 final (0)"
|
|
||||||
# SQLITE_BINARY = "/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py"
|
|
||||||
# SQLITE_VERSION = "2.6.0"
|
|
||||||
# CURL_VERSION = "curl 8.4.0 (x86_64-apple-darwin23.0)"
|
|
||||||
# WGET_VERSION = "GNU Wget 1.24.5"
|
|
||||||
# WGET_AUTO_COMPRESSION = true
|
|
||||||
# RIPGREP_VERSION = "ripgrep 14.1.0"
|
|
||||||
# SINGLEFILE_VERSION = null
|
|
||||||
# READABILITY_VERSION = null
|
|
||||||
# MERCURY_VERSION = null
|
|
||||||
# GIT_VERSION = "git version 2.44.0"
|
|
||||||
# YOUTUBEDL_VERSION = "2024.04.09"
|
|
||||||
# CHROME_VERSION = "Google Chrome 124.0.6367.207"
|
|
||||||
# NODE_VERSION = "v21.7.3"'''
|
|
||||||
|
|
||||||
|
|
||||||
# first_output = convert(test_input) # make sure ini -> toml parses correctly
|
|
||||||
# second_output = convert(first_output) # make sure toml -> toml parses/dumps consistently
|
|
||||||
# assert first_output == second_output == expected_output # make sure parsing is indempotent
|
|
||||||
|
|
||||||
# # DEBUGGING
|
|
||||||
# import sys
|
|
||||||
# import difflib
|
|
||||||
# sys.stdout.writelines(difflib.context_diff(first_output, second_output, fromfile='first', tofile='second'))
|
|
||||||
# print(repr(second_output))
|
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
__package__ = 'archivebox.plugins_extractor.chrome'
|
||||||
|
|
||||||
import platform
|
import platform
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Optional, Dict, ClassVar
|
from typing import List, Optional, Dict, ClassVar
|
||||||
|
@ -77,40 +79,16 @@ def create_macos_app_symlink(target: Path, shortcut: Path):
|
||||||
###################### Config ##########################
|
###################### Config ##########################
|
||||||
|
|
||||||
|
|
||||||
class ChromeDependencyConfigs(BaseConfigSet):
|
class ChromeConfig(BaseConfigSet):
|
||||||
section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
|
section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
|
||||||
|
|
||||||
CHROME_BINARY: str = Field(default='chrome')
|
CHROME_BINARY: str = Field(default='chrome')
|
||||||
CHROME_ARGS: Optional[List[str]] = Field(default=None)
|
CHROME_ARGS: List[str] | None = Field(default=None)
|
||||||
CHROME_EXTRA_ARGS: List[str] = []
|
CHROME_EXTRA_ARGS: List[str] = Field(default=[])
|
||||||
CHROME_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
|
CHROME_DEFAULT_ARGS: List[str] = Field(default=lambda: ['--timeout={TIMEOUT-10}'])
|
||||||
|
|
||||||
# def load(self) -> Self:
|
|
||||||
# # for each field in the model, load its value
|
|
||||||
# # load from each source in order of precedence (lowest to highest):
|
|
||||||
# # - schema default
|
|
||||||
# # - ArchiveBox.conf INI file
|
|
||||||
# # - environment variables
|
|
||||||
# # - command-line arguments
|
|
||||||
|
|
||||||
# LOADED_VALUES: Dict[str, Any] = {}
|
CHROME_CONFIG = ChromeConfig()
|
||||||
|
|
||||||
# for field_name, field in self.__fields__.items():
|
|
||||||
# def_value = field.default_factory() if field.default_factory else field.default
|
|
||||||
# ini_value = settings.INI_CONFIG.get_value(field_name)
|
|
||||||
# env_value = settings.ENV_CONFIG.get_value(field_name)
|
|
||||||
# cli_value = settings.CLI_CONFIG.get_value(field_name)
|
|
||||||
# run_value = settings.RUN_CONFIG.get_value(field_name)
|
|
||||||
# value = run_value or cli_value or env_value or ini_value or def_value
|
|
||||||
|
|
||||||
class ChromeConfigs(ChromeDependencyConfigs):
|
|
||||||
# section: ConfigSectionName = 'ALL_CONFIGS'
|
|
||||||
pass
|
|
||||||
|
|
||||||
DEFAULT_GLOBAL_CONFIG = {
|
|
||||||
}
|
|
||||||
|
|
||||||
CHROME_CONFIG = ChromeConfigs(**DEFAULT_GLOBAL_CONFIG)
|
|
||||||
|
|
||||||
|
|
||||||
class ChromeBinary(BaseBinary):
|
class ChromeBinary(BaseBinary):
|
||||||
|
@ -133,6 +111,7 @@ class ChromeBinary(BaseBinary):
|
||||||
def symlink_to_lib(binary, bin_dir=settings.CONFIG.BIN_DIR) -> None:
|
def symlink_to_lib(binary, bin_dir=settings.CONFIG.BIN_DIR) -> None:
|
||||||
if not (binary.abspath and binary.abspath.exists()):
|
if not (binary.abspath and binary.abspath.exists()):
|
||||||
return
|
return
|
||||||
|
|
||||||
bin_dir.mkdir(parents=True, exist_ok=True)
|
bin_dir.mkdir(parents=True, exist_ok=True)
|
||||||
symlink = bin_dir / binary.name
|
symlink = bin_dir / binary.name
|
||||||
|
|
||||||
|
@ -146,7 +125,6 @@ class ChromeBinary(BaseBinary):
|
||||||
|
|
||||||
CHROME_BINARY = ChromeBinary()
|
CHROME_BINARY = ChromeBinary()
|
||||||
|
|
||||||
PLUGIN_BINARIES = [CHROME_BINARY]
|
|
||||||
|
|
||||||
class ChromePlugin(BasePlugin):
|
class ChromePlugin(BasePlugin):
|
||||||
app_label: str = 'chrome'
|
app_label: str = 'chrome'
|
||||||
|
|
|
@ -150,6 +150,7 @@ class CheckUserIsNotRoot(BaseCheck):
|
||||||
logger.debug('[√] UID is not root')
|
logger.debug('[√] UID is not root')
|
||||||
return errors
|
return errors
|
||||||
|
|
||||||
|
|
||||||
class CheckPipEnvironment(BaseCheck):
|
class CheckPipEnvironment(BaseCheck):
|
||||||
label: str = "CheckPipEnvironment"
|
label: str = "CheckPipEnvironment"
|
||||||
tag: str = Tags.database
|
tag: str = Tags.database
|
||||||
|
|
|
@ -14,7 +14,7 @@ from .utils import get_indexable_content, log_index_started
|
||||||
|
|
||||||
|
|
||||||
def import_backend():
|
def import_backend():
|
||||||
for backend in settings.SEARCH_BACKENDS:
|
for backend in settings.SEARCH_BACKENDS.values():
|
||||||
if backend.name == settings.CONFIGS.SearchBackendConfig.SEARCH_BACKEND_ENGINE:
|
if backend.name == settings.CONFIGS.SearchBackendConfig.SEARCH_BACKEND_ENGINE:
|
||||||
return backend
|
return backend
|
||||||
raise Exception(f'Could not load {settings.CONFIGS.SearchBackendConfig.SEARCH_BACKEND_ENGINE} as search backend')
|
raise Exception(f'Could not load {settings.CONFIGS.SearchBackendConfig.SEARCH_BACKEND_ENGINE} as search backend')
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue