diff --git a/archivebox/__init__.py b/archivebox/__init__.py index b1afc052..bccb2314 100755 --- a/archivebox/__init__.py +++ b/archivebox/__init__.py @@ -1,52 +1,33 @@ __package__ = 'archivebox' - -# print('INSTALLING MONKEY PATCHES') -from .monkey_patches import * # noqa -# print('DONE INSTALLING MONKEY PATCHES') - - import os import sys -import importlib.metadata from pathlib import Path -PACKAGE_DIR = Path(__file__).resolve().parent # archivebox source code dir -DATA_DIR = Path(os.curdir).resolve() # archivebox user data dir -ARCHIVE_DIR = DATA_DIR / 'archive' +PACKAGE_DIR = Path(__file__).resolve().parent # archivebox source code dir +DATA_DIR = Path(os.curdir).resolve() # archivebox user data dir +ARCHIVE_DIR = DATA_DIR / 'archive' # archivebox snapshot data dir # make sure PACKAGE_DIR is in sys.path so we can import all subfolders # without necessarily waiting for django to load them thorugh INSTALLED_APPS if str(PACKAGE_DIR) not in sys.path: sys.path.append(str(PACKAGE_DIR)) -# load fallback libraries from vendor dir -from .vendor import load_vendored_libs -load_vendored_libs() +from .config.constants import CONSTANTS, VERSION, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa os.environ['OUTPUT_DIR'] = str(DATA_DIR) os.environ['DJANGO_SETTINGS_MODULE'] = 'core.settings' +# print('INSTALLING MONKEY PATCHES') +from .monkey_patches import * # noqa +# print('DONE INSTALLING MONKEY PATCHES') -def _detect_installed_version(): - try: - return importlib.metadata.version(__package__ or 'archivebox') - except importlib.metadata.PackageNotFoundError: - try: - pyproject_config = (PACKAGE_DIR / 'pyproject.toml').read_text() - for line in pyproject_config: - if line.startswith('version = '): - return line.split(' = ', 1)[-1].strip('"') - except FileNotFoundError: - # building docs, pyproject.toml is not available - return 'dev' - - raise Exception('Failed to detect installed archivebox version!') - -VERSION = _detect_installed_version() +# print('LOADING VENDOR LIBRARIES') +from .vendor import load_vendored_libs # noqa +load_vendored_libs() +# print('DONE LOADING VENDOR LIBRARIES') __version__ = VERSION - - -from .constants import CONSTANTS +__author__ = 'Nick Sweeting' +__license__ = 'MIT' diff --git a/archivebox/abx/archivebox/base_binary.py b/archivebox/abx/archivebox/base_binary.py index 786f41e5..d4fa6df0 100644 --- a/archivebox/abx/archivebox/base_binary.py +++ b/archivebox/abx/archivebox/base_binary.py @@ -15,7 +15,8 @@ from pydantic_pkgr import ( ) import abx -import archivebox + +from archivebox.config import CONSTANTS from .base_hook import BaseHook, HookType @@ -54,7 +55,7 @@ class BaseBinary(BaseHook, Binary): @staticmethod def symlink_to_lib(binary, bin_dir=None) -> None: - bin_dir = bin_dir or archivebox.CONSTANTS.LIB_BIN_DIR + bin_dir = bin_dir or CONSTANTS.LIB_BIN_DIR if not (binary.abspath and binary.abspath.exists()): return @@ -68,19 +69,19 @@ class BaseBinary(BaseHook, Binary): @validate_call def load(self, **kwargs) -> Self: binary = super().load(**kwargs) - self.symlink_to_lib(binary=binary, bin_dir=archivebox.CONSTANTS.LIB_BIN_DIR) + self.symlink_to_lib(binary=binary, bin_dir=CONSTANTS.LIB_BIN_DIR) return binary @validate_call def install(self, **kwargs) -> Self: binary = super().install(**kwargs) - self.symlink_to_lib(binary=binary, bin_dir=archivebox.CONSTANTS.LIB_BIN_DIR) + self.symlink_to_lib(binary=binary, bin_dir=CONSTANTS.LIB_BIN_DIR) return binary @validate_call def load_or_install(self, **kwargs) -> Self: binary = super().load_or_install(**kwargs) - self.symlink_to_lib(binary=binary, bin_dir=archivebox.CONSTANTS.LIB_BIN_DIR) + self.symlink_to_lib(binary=binary, bin_dir=CONSTANTS.LIB_BIN_DIR) return binary @property diff --git a/archivebox/abx/archivebox/base_plugin.py b/archivebox/abx/archivebox/base_plugin.py index ac17e9c9..d276b339 100644 --- a/archivebox/abx/archivebox/base_plugin.py +++ b/archivebox/abx/archivebox/base_plugin.py @@ -127,7 +127,7 @@ class BasePlugin(BaseModel): @abx.hookimpl def register(self, settings): - from archivebox.config import bump_startup_progress_bar + from archivebox.config.legacy import bump_startup_progress_bar self._is_registered = True bump_startup_progress_bar() @@ -139,7 +139,7 @@ class BasePlugin(BaseModel): def ready(self, settings=None): """Runs any runtime code needed when AppConfig.ready() is called (after all models are imported).""" - from archivebox.config import bump_startup_progress_bar + from archivebox.config.legacy import bump_startup_progress_bar assert self._is_registered, f"Tried to run {self.plugin_module}.ready() but it was never registered!" self._is_ready = True diff --git a/archivebox/abx/django/use.py b/archivebox/abx/django/use.py index c4960898..87d3f9bd 100644 --- a/archivebox/abx/django/use.py +++ b/archivebox/abx/django/use.py @@ -1,7 +1,7 @@ __package__ = 'abx.django' import itertools -from benedict import benedict +# from benedict import benedict from .. import pm diff --git a/archivebox/api/v1_api.py b/archivebox/api/v1_api.py index 0b33b8ef..b71ceb3d 100644 --- a/archivebox/api/v1_api.py +++ b/archivebox/api/v1_api.py @@ -12,8 +12,7 @@ from ninja import NinjaAPI, Swagger # TODO: explore adding https://eadwincode.github.io/django-ninja-extra/ -import archivebox -from plugins_sys.config.apps import SHELL_CONFIG +from archivebox.config import SHELL_CONFIG, VERSION from api.auth import API_AUTH_METHODS @@ -32,7 +31,7 @@ html_description=f'''
  • 📚 ArchiveBox Documentation: Github Wiki
  • 📜 See the API source code: archivebox/api/
  • -Served by ArchiveBox v{archivebox.VERSION} ({COMMIT_HASH[:8]}), API powered by django-ninja. +Served by ArchiveBox v{VERSION} ({COMMIT_HASH[:8]}), API powered by django-ninja. ''' diff --git a/archivebox/api/v1_cli.py b/archivebox/api/v1_cli.py index cb0cc561..392b1193 100644 --- a/archivebox/api/v1_cli.py +++ b/archivebox/api/v1_cli.py @@ -13,7 +13,7 @@ from ..main import ( schedule, ) from ..util import ansi_to_html -from ..config import ONLY_NEW +from ..config.legacy import ONLY_NEW from .auth import API_AUTH_METHODS diff --git a/archivebox/cli/__init__.py b/archivebox/cli/__init__.py index 6cf0c63d..e13a3b34 100644 --- a/archivebox/cli/__init__.py +++ b/archivebox/cli/__init__.py @@ -4,7 +4,6 @@ __command__ = 'archivebox' import sys import argparse import threading -import archivebox from time import sleep from collections.abc import Mapping @@ -12,6 +11,7 @@ from collections.abc import Mapping from typing import Optional, List, IO, Union, Iterable from pathlib import Path +from archivebox.config import DATA_DIR from ..misc.checks import check_data_folder, check_migrations from ..misc.logging import stderr @@ -149,7 +149,7 @@ def run_subcommand(subcommand: str, subcommand_args = subcommand_args or [] if subcommand not in meta_cmds: - from ..config import setup_django, CONFIG + from ..config.legacy import setup_django, CONFIG cmd_requires_db = subcommand in archive_cmds init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args @@ -234,12 +234,12 @@ def main(args: List[str] | Omitted=OMITTED, stdin: IO | Omitted=OMITTED, pwd: st subcommand=command.subcommand, subcommand_args=command.subcommand_args, stdin=stdin or None, - pwd=pwd or archivebox.DATA_DIR, + pwd=pwd or DATA_DIR, ) run_subcommand( subcommand=command.subcommand, subcommand_args=command.subcommand_args, stdin=stdin or None, - pwd=pwd or archivebox.DATA_DIR, + pwd=pwd or DATA_DIR, ) diff --git a/archivebox/cli/archivebox_add.py b/archivebox/cli/archivebox_add.py index ed05584c..e34bfc25 100644 --- a/archivebox/cli/archivebox_add.py +++ b/archivebox/cli/archivebox_add.py @@ -11,7 +11,7 @@ from typing import List, Optional, IO from ..main import add from ..util import docstring from ..parsers import PARSERS -from ..config import OUTPUT_DIR, ONLY_NEW +from ..config.legacy import OUTPUT_DIR, ONLY_NEW from ..logging_util import SmartFormatter, accept_stdin, stderr diff --git a/archivebox/cli/archivebox_config.py b/archivebox/cli/archivebox_config.py index 25621972..76f711ef 100644 --- a/archivebox/cli/archivebox_config.py +++ b/archivebox/cli/archivebox_config.py @@ -10,7 +10,7 @@ from typing import Optional, List, IO from ..main import config from ..util import docstring -from ..config import OUTPUT_DIR +from ..config.legacy import OUTPUT_DIR from ..logging_util import SmartFormatter, accept_stdin diff --git a/archivebox/cli/archivebox_help.py b/archivebox/cli/archivebox_help.py index 46f17cbc..56e1cb77 100755 --- a/archivebox/cli/archivebox_help.py +++ b/archivebox/cli/archivebox_help.py @@ -10,7 +10,7 @@ from typing import Optional, List, IO from ..main import help from ..util import docstring -from ..config import OUTPUT_DIR +from ..config.legacy import OUTPUT_DIR from ..logging_util import SmartFormatter, reject_stdin diff --git a/archivebox/cli/archivebox_init.py b/archivebox/cli/archivebox_init.py index 48b65b1f..e7a0430a 100755 --- a/archivebox/cli/archivebox_init.py +++ b/archivebox/cli/archivebox_init.py @@ -10,7 +10,7 @@ from typing import Optional, List, IO from ..main import init from ..util import docstring -from ..config import OUTPUT_DIR +from ..config.legacy import OUTPUT_DIR from ..logging_util import SmartFormatter, reject_stdin diff --git a/archivebox/cli/archivebox_list.py b/archivebox/cli/archivebox_list.py index 5477bfc8..f8afb524 100644 --- a/archivebox/cli/archivebox_list.py +++ b/archivebox/cli/archivebox_list.py @@ -10,7 +10,7 @@ from typing import Optional, List, IO from ..main import list_all from ..util import docstring -from ..config import OUTPUT_DIR +from ..config.legacy import OUTPUT_DIR from ..index import ( LINK_FILTERS, get_indexed_folders, diff --git a/archivebox/cli/archivebox_manage.py b/archivebox/cli/archivebox_manage.py index f05604e1..1e28cd35 100644 --- a/archivebox/cli/archivebox_manage.py +++ b/archivebox/cli/archivebox_manage.py @@ -9,7 +9,7 @@ from typing import Optional, List, IO from ..main import manage from ..util import docstring -from ..config import OUTPUT_DIR +from ..config.legacy import OUTPUT_DIR @docstring(manage.__doc__) diff --git a/archivebox/cli/archivebox_oneshot.py b/archivebox/cli/archivebox_oneshot.py index 411cce8b..12a176ad 100644 --- a/archivebox/cli/archivebox_oneshot.py +++ b/archivebox/cli/archivebox_oneshot.py @@ -11,7 +11,7 @@ from typing import List, Optional, IO from ..main import oneshot from ..util import docstring -from ..config import OUTPUT_DIR +from ..config.legacy import OUTPUT_DIR from ..logging_util import SmartFormatter, accept_stdin, stderr diff --git a/archivebox/cli/archivebox_remove.py b/archivebox/cli/archivebox_remove.py index dadf2654..ac45cd9d 100644 --- a/archivebox/cli/archivebox_remove.py +++ b/archivebox/cli/archivebox_remove.py @@ -10,7 +10,7 @@ from typing import Optional, List, IO from ..main import remove from ..util import docstring -from ..config import OUTPUT_DIR +from ..config.legacy import OUTPUT_DIR from ..logging_util import SmartFormatter, accept_stdin diff --git a/archivebox/cli/archivebox_schedule.py b/archivebox/cli/archivebox_schedule.py index f606979b..59c2884d 100644 --- a/archivebox/cli/archivebox_schedule.py +++ b/archivebox/cli/archivebox_schedule.py @@ -10,7 +10,7 @@ from typing import Optional, List, IO from ..main import schedule from ..util import docstring -from ..config import OUTPUT_DIR +from ..config.legacy import OUTPUT_DIR from ..logging_util import SmartFormatter, reject_stdin diff --git a/archivebox/cli/archivebox_server.py b/archivebox/cli/archivebox_server.py index 4cc050dd..a5007b91 100644 --- a/archivebox/cli/archivebox_server.py +++ b/archivebox/cli/archivebox_server.py @@ -10,7 +10,7 @@ from typing import Optional, List, IO from ..main import server from ..util import docstring -from ..config import OUTPUT_DIR, BIND_ADDR +from ..config.legacy import OUTPUT_DIR, BIND_ADDR from ..logging_util import SmartFormatter, reject_stdin @docstring(server.__doc__) diff --git a/archivebox/cli/archivebox_setup.py b/archivebox/cli/archivebox_setup.py index 02ce57c9..f5e102f1 100755 --- a/archivebox/cli/archivebox_setup.py +++ b/archivebox/cli/archivebox_setup.py @@ -10,7 +10,7 @@ from typing import Optional, List, IO from ..main import setup from ..util import docstring -from ..config import OUTPUT_DIR +from ..config.legacy import OUTPUT_DIR from ..logging_util import SmartFormatter, reject_stdin diff --git a/archivebox/cli/archivebox_shell.py b/archivebox/cli/archivebox_shell.py index bcd5fdd6..afb225a7 100644 --- a/archivebox/cli/archivebox_shell.py +++ b/archivebox/cli/archivebox_shell.py @@ -10,7 +10,7 @@ from typing import Optional, List, IO from ..main import shell from ..util import docstring -from ..config import OUTPUT_DIR +from ..config.legacy import OUTPUT_DIR from ..logging_util import SmartFormatter, reject_stdin diff --git a/archivebox/cli/archivebox_status.py b/archivebox/cli/archivebox_status.py index 2bef19c7..86ace191 100644 --- a/archivebox/cli/archivebox_status.py +++ b/archivebox/cli/archivebox_status.py @@ -10,7 +10,7 @@ from typing import Optional, List, IO from ..main import status from ..util import docstring -from ..config import OUTPUT_DIR +from ..config.legacy import OUTPUT_DIR from ..logging_util import SmartFormatter, reject_stdin diff --git a/archivebox/cli/archivebox_update.py b/archivebox/cli/archivebox_update.py index 500d4c07..6cb97401 100644 --- a/archivebox/cli/archivebox_update.py +++ b/archivebox/cli/archivebox_update.py @@ -10,7 +10,7 @@ from typing import List, Optional, IO from ..main import update from ..util import docstring -from ..config import OUTPUT_DIR +from ..config.legacy import OUTPUT_DIR from ..index import ( LINK_FILTERS, get_indexed_folders, diff --git a/archivebox/cli/archivebox_version.py b/archivebox/cli/archivebox_version.py index e7922f37..3131b1d4 100755 --- a/archivebox/cli/archivebox_version.py +++ b/archivebox/cli/archivebox_version.py @@ -10,7 +10,7 @@ from typing import Optional, List, IO from ..main import version from ..util import docstring -from ..config import OUTPUT_DIR +from ..config.legacy import OUTPUT_DIR from ..logging_util import SmartFormatter, reject_stdin diff --git a/archivebox/cli/tests.py b/archivebox/cli/tests.py index 04c54df8..cc9a8e52 100644 --- a/archivebox/cli/tests.py +++ b/archivebox/cli/tests.py @@ -32,7 +32,7 @@ os.environ.update(TEST_CONFIG) from ..main import init from ..index import load_main_index -from ..config import ( +from ..config.legacy import ( SQL_INDEX_FILENAME, JSON_INDEX_FILENAME, HTML_INDEX_FILENAME, diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py new file mode 100644 index 00000000..ce4a5ed1 --- /dev/null +++ b/archivebox/config/__init__.py @@ -0,0 +1,26 @@ +__package__ = 'archivebox.config' + +from .constants import CONSTANTS, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR, VERSION +from .defaults import ( + SHELL_CONFIG, + STORAGE_CONFIG, + GENERAL_CONFIG, + SERVER_CONFIG, + ARCHIVING_CONFIG, + SEARCH_BACKEND_CONFIG, +) + + +__all__ = [ + 'CONSTANTS', + 'PACKAGE_DIR', + 'DATA_DIR', + 'ARCHIVE_DIR', + 'VERSION', + 'SHELL_CONFIG', + 'STORAGE_CONFIG', + 'GENERAL_CONFIG', + 'SERVER_CONFIG', + 'ARCHIVING_CONFIG', + 'SEARCH_BACKEND_CONFIG', +] diff --git a/archivebox/config/apps.py b/archivebox/config/apps.py new file mode 100644 index 00000000..b5b32364 --- /dev/null +++ b/archivebox/config/apps.py @@ -0,0 +1,58 @@ +__package__ = 'archivebox.config' + +from typing import List +from pydantic import InstanceOf + +from abx.archivebox.base_plugin import BasePlugin +from abx.archivebox.base_hook import BaseHook + + +from .constants import CONSTANTS, CONSTANTS_CONFIG, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa +from .defaults import ( + ShellConfig, # noqa: F401 + StorageConfig, # noqa: F401 + GeneralConfig, # noqa: F401 + ServerConfig, # noqa: F401 + ArchivingConfig, # noqa: F401 + SearchBackendConfig, # noqa: F401 + SHELL_CONFIG, + STORAGE_CONFIG, + GENERAL_CONFIG, + SERVER_CONFIG, + ARCHIVING_CONFIG, + SEARCH_BACKEND_CONFIG, +) + +###################### Config ########################## + + +class ConfigPlugin(BasePlugin): + app_label: str = 'CONFIG' + verbose_name: str = 'Configuration' + + hooks: List[InstanceOf[BaseHook]] = [ + SHELL_CONFIG, + GENERAL_CONFIG, + STORAGE_CONFIG, + SERVER_CONFIG, + ARCHIVING_CONFIG, + SEARCH_BACKEND_CONFIG, + ] + + + +PLUGIN = ConfigPlugin() +DJANGO_APP = PLUGIN.AppConfig + + + +# # register django apps +# @abx.hookimpl +# def get_INSTALLED_APPS(): +# return [DJANGO_APP.name] + +# # register configs +# @abx.hookimpl +# def register_CONFIG(): +# return PLUGIN.HOOKS_BY_TYPE['CONFIG'].values() + diff --git a/archivebox/plugins_sys/config/check_for_update.py b/archivebox/config/check_for_update.py similarity index 100% rename from archivebox/plugins_sys/config/check_for_update.py rename to archivebox/config/check_for_update.py diff --git a/archivebox/config_stubs.py b/archivebox/config/config_stubs.py similarity index 100% rename from archivebox/config_stubs.py rename to archivebox/config/config_stubs.py diff --git a/archivebox/constants.py b/archivebox/config/constants.py similarity index 72% rename from archivebox/constants.py rename to archivebox/config/constants.py index e577a6f2..d49a3573 100644 --- a/archivebox/constants.py +++ b/archivebox/config/constants.py @@ -1,27 +1,46 @@ -__package__ = 'archivebox' +__package__ = 'archivebox.config' import os import re from typing import Dict from pathlib import Path +import importlib.metadata from benedict import benedict -import archivebox - -from .misc.logging import DEFAULT_CLI_COLORS +from ..misc.logging import DEFAULT_CLI_COLORS ###################### Config ########################## -VERSION = archivebox.VERSION -PACKAGE_DIR = archivebox.PACKAGE_DIR -DATA_DIR = archivebox.DATA_DIR -ARCHIVE_DIR = archivebox.ARCHIVE_DIR +PACKAGE_DIR = Path(__file__).resolve().parent.parent # archivebox source code dir +DATA_DIR = Path(os.curdir).resolve() # archivebox user data dir +ARCHIVE_DIR = DATA_DIR / 'archive' # archivebox snapshot data dir -PACKAGE_DIR_NAME: str = archivebox.PACKAGE_DIR.name + +def _detect_installed_version(): + """Autodetect the installed archivebox version by using pip package metadata or pyproject.toml file""" + try: + return importlib.metadata.version(__package__ or 'archivebox') + except importlib.metadata.PackageNotFoundError: + try: + pyproject_config = (PACKAGE_DIR / 'pyproject.toml').read_text() + for line in pyproject_config: + if line.startswith('version = '): + return line.split(' = ', 1)[-1].strip('"') + except FileNotFoundError: + # building docs, pyproject.toml is not available + return 'dev' + + raise Exception('Failed to detect installed archivebox version!') + +VERSION = _detect_installed_version() +__version__ = VERSION + + +PACKAGE_DIR_NAME: str = PACKAGE_DIR.name TEMPLATES_DIR_NAME: str = 'templates' -TEMPLATES_DIR: Path = archivebox.PACKAGE_DIR / TEMPLATES_DIR_NAME +TEMPLATES_DIR: Path = PACKAGE_DIR / TEMPLATES_DIR_NAME STATIC_DIR: Path = TEMPLATES_DIR / 'static' USER_PLUGINS_DIR_NAME: str = 'user_plugins' CUSTOM_TEMPLATES_DIR_NAME: str = 'user_templates' @@ -35,16 +54,16 @@ LOGS_DIR_NAME: str = 'logs' LIB_DIR_NAME: str = 'lib' TMP_DIR_NAME: str = 'tmp' -OUTPUT_DIR: Path = archivebox.DATA_DIR -ARCHIVE_DIR: Path = archivebox.DATA_DIR / ARCHIVE_DIR_NAME -SOURCES_DIR: Path = archivebox.DATA_DIR / SOURCES_DIR_NAME -PERSONAS_DIR: Path = archivebox.DATA_DIR / PERSONAS_DIR_NAME -CACHE_DIR: Path = archivebox.DATA_DIR / CACHE_DIR_NAME -LOGS_DIR: Path = archivebox.DATA_DIR / LOGS_DIR_NAME -LIB_DIR: Path = archivebox.DATA_DIR / LIB_DIR_NAME -TMP_DIR: Path = archivebox.DATA_DIR / TMP_DIR_NAME -CUSTOM_TEMPLATES_DIR: Path = archivebox.DATA_DIR / CUSTOM_TEMPLATES_DIR_NAME -USER_PLUGINS_DIR: Path = archivebox.DATA_DIR / USER_PLUGINS_DIR_NAME +OUTPUT_DIR: Path = DATA_DIR +ARCHIVE_DIR: Path = DATA_DIR / ARCHIVE_DIR_NAME +SOURCES_DIR: Path = DATA_DIR / SOURCES_DIR_NAME +PERSONAS_DIR: Path = DATA_DIR / PERSONAS_DIR_NAME +CACHE_DIR: Path = DATA_DIR / CACHE_DIR_NAME +LOGS_DIR: Path = DATA_DIR / LOGS_DIR_NAME +LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME +TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME +CUSTOM_TEMPLATES_DIR: Path = DATA_DIR / CUSTOM_TEMPLATES_DIR_NAME +USER_PLUGINS_DIR: Path = DATA_DIR / USER_PLUGINS_DIR_NAME LIB_PIP_DIR: Path = LIB_DIR / 'pip' LIB_NPM_DIR: Path = LIB_DIR / 'npm' @@ -55,9 +74,9 @@ BIN_DIR: Path = LIB_BIN_DIR CONFIG_FILENAME: str = 'ArchiveBox.conf' SQL_INDEX_FILENAME: str = 'index.sqlite3' -CONFIG_FILE: Path = archivebox.DATA_DIR / CONFIG_FILENAME -DATABASE_FILE: Path = archivebox.DATA_DIR / SQL_INDEX_FILENAME -QUEUE_DATABASE_FILE: Path = archivebox.DATA_DIR / SQL_INDEX_FILENAME.replace('index.', 'queue.') +CONFIG_FILE: Path = DATA_DIR / CONFIG_FILENAME +DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME +QUEUE_DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME.replace('index.', 'queue.') JSON_INDEX_FILENAME: str = 'index.json' HTML_INDEX_FILENAME: str = 'index.html' @@ -125,7 +144,7 @@ DATA_DIR_NAMES: frozenset[str] = frozenset(( CUSTOM_TEMPLATES_DIR_NAME, USER_PLUGINS_DIR_NAME, )) -DATA_DIRS: frozenset[Path] = frozenset(archivebox.DATA_DIR / dirname for dirname in DATA_DIR_NAMES) +DATA_DIRS: frozenset[Path] = frozenset(DATA_DIR / dirname for dirname in DATA_DIR_NAMES) DATA_FILE_NAMES: frozenset[str] = frozenset(( CONFIG_FILENAME, SQL_INDEX_FILENAME, @@ -160,9 +179,9 @@ ALLOWED_IN_OUTPUT_DIR: frozenset[str] = frozenset(( CODE_LOCATIONS = benedict({ 'PACKAGE_DIR': { - 'path': (archivebox.PACKAGE_DIR).resolve(), + 'path': (PACKAGE_DIR).resolve(), 'enabled': True, - 'is_valid': (archivebox.PACKAGE_DIR / '__main__.py').exists(), + 'is_valid': (PACKAGE_DIR / '__main__.py').exists(), }, 'LIB_DIR': { 'path': LIB_DIR.resolve(), @@ -188,10 +207,10 @@ CODE_LOCATIONS = benedict({ DATA_LOCATIONS = benedict({ "OUTPUT_DIR": { - "path": archivebox.DATA_DIR.resolve(), + "path": DATA_DIR.resolve(), "enabled": True, "is_valid": DATABASE_FILE.exists(), - "is_mount": os.path.ismount(archivebox.DATA_DIR.resolve()), + "is_mount": os.path.ismount(DATA_DIR.resolve()), }, "CONFIG_FILE": { "path": CONFIG_FILE.resolve(), diff --git a/archivebox/plugins_sys/config/apps.py b/archivebox/config/defaults.py similarity index 87% rename from archivebox/plugins_sys/config/apps.py rename to archivebox/config/defaults.py index 4a4ab297..1b7bc15a 100644 --- a/archivebox/plugins_sys/config/apps.py +++ b/archivebox/config/defaults.py @@ -1,24 +1,21 @@ -__package__ = 'plugins_sys.config' +__package__ = 'archivebox.config' import os import sys import shutil -from typing import List, ClassVar, Dict, Optional +from typing import ClassVar, Dict, Optional from datetime import datetime from pathlib import Path from rich import print -from pydantic import InstanceOf, Field, field_validator, model_validator, computed_field +from pydantic import Field, field_validator, model_validator, computed_field from django.utils.crypto import get_random_string -from abx.archivebox.base_plugin import BasePlugin from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName -from abx.archivebox.base_hook import BaseHook -import archivebox -from archivebox.constants import CONSTANTS, CONSTANTS_CONFIG # noqa +from .constants import CONSTANTS, PACKAGE_DIR ###################### Config ########################## @@ -26,7 +23,7 @@ from archivebox.constants import CONSTANTS, CONSTANTS_CONFIG # noqa class ShellConfig(BaseConfigSet): section: ClassVar[ConfigSectionName] = 'SHELL_CONFIG' - DEBUG: bool = Field(default=False) + DEBUG: bool = Field(default=lambda: '--debug' in sys.argv) IS_TTY: bool = Field(default=sys.stdout.isatty()) USE_COLOR: bool = Field(default=lambda c: c.IS_TTY) @@ -56,7 +53,7 @@ class ShellConfig(BaseConfigSet): @property def COMMIT_HASH(self) -> Optional[str]: try: - git_dir = archivebox.PACKAGE_DIR / '../.git' + git_dir = PACKAGE_DIR / '../.git' ref = (git_dir / 'HEAD').read_text().strip().split(' ')[-1] commit_hash = git_dir.joinpath(ref).read_text().strip() return commit_hash @@ -64,7 +61,7 @@ class ShellConfig(BaseConfigSet): pass try: - return list((archivebox.PACKAGE_DIR / '../.git/refs/heads/').glob('*'))[0].read_text().strip() + return list((PACKAGE_DIR / '../.git/refs/heads/').glob('*'))[0].read_text().strip() except Exception: pass @@ -77,7 +74,7 @@ class ShellConfig(BaseConfigSet): docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0] return docker_build_end_time - src_last_modified_unix_timestamp = (archivebox.PACKAGE_DIR / 'config.py').stat().st_mtime + src_last_modified_unix_timestamp = (PACKAGE_DIR / 'package.json').stat().st_mtime return datetime.fromtimestamp(src_last_modified_unix_timestamp).strftime('%Y-%m-%d %H:%M:%S %s') @@ -227,39 +224,3 @@ class SearchBackendConfig(BaseConfigSet): SEARCH_BACKEND_CONFIG = SearchBackendConfig() - -class ConfigPlugin(BasePlugin): - app_label: str = 'CONFIG' - verbose_name: str = 'Configuration' - - hooks: List[InstanceOf[BaseHook]] = [ - SHELL_CONFIG, - GENERAL_CONFIG, - STORAGE_CONFIG, - SERVER_CONFIG, - ARCHIVING_CONFIG, - SEARCH_BACKEND_CONFIG, - ] - - # def register(self, settings, parent_plugin=None): - # try: - # super().register(settings, parent_plugin=parent_plugin) - # except Exception as e: - # print(f'[red][X] Error registering config plugin: {e}[/red]', file=sys.stderr) - - -PLUGIN = ConfigPlugin() -DJANGO_APP = PLUGIN.AppConfig - - - -# # register django apps -# @abx.hookimpl -# def get_INSTALLED_APPS(): -# return [DJANGO_APP.name] - -# # register configs -# @abx.hookimpl -# def register_CONFIG(): -# return PLUGIN.HOOKS_BY_TYPE['CONFIG'].values() - diff --git a/archivebox/config.py b/archivebox/config/legacy.py similarity index 94% rename from archivebox/config.py rename to archivebox/config/legacy.py index f2c4ca1b..55424646 100644 --- a/archivebox/config.py +++ b/archivebox/config/legacy.py @@ -19,7 +19,7 @@ Documentation: """ -__package__ = 'archivebox' +__package__ = 'archivebox.config' import os import io @@ -38,31 +38,27 @@ from configparser import ConfigParser from rich.progress import Progress from rich.console import Console from benedict import benedict +from pydantic_pkgr import SemVer import django from django.db.backends.sqlite3.base import Database as sqlite3 -import archivebox -from archivebox.constants import CONSTANTS -from archivebox.constants import * - -from pydantic_pkgr import SemVer +from .constants import CONSTANTS, TIMEZONE, OUTPUT_DIR +from .constants import * from .config_stubs import ( ConfigValue, - ConfigDict, ConfigDefaultValue, ConfigDefaultDict, ) - -from .misc.logging import ( +from ..misc.logging import ( stderr, hint, # noqa ) -from .plugins_sys.config.apps import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG -from .plugins_auth.ldap.apps import LDAP_CONFIG -from .plugins_extractor.favicon.apps import FAVICON_CONFIG +from .defaults import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG +from ..plugins_auth.ldap.apps import LDAP_CONFIG +from ..plugins_extractor.favicon.apps import FAVICON_CONFIG ANSI = SHELL_CONFIG.ANSI LDAP = LDAP_CONFIG.LDAP_ENABLED @@ -218,7 +214,7 @@ def get_real_name(key: str) -> str: # These are derived/computed values calculated *after* all user-provided config values are ingested # they appear in `archivebox config` output and are intended to be read-only for the user DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = { - 'PACKAGE_DIR': {'default': lambda c: archivebox.PACKAGE_DIR.resolve()}, + 'PACKAGE_DIR': {'default': lambda c: CONSTANTS.PACKAGE_DIR.resolve()}, 'TEMPLATES_DIR': {'default': lambda c: c['PACKAGE_DIR'] / CONSTANTS.TEMPLATES_DIR_NAME}, 'CUSTOM_TEMPLATES_DIR': {'default': lambda c: c['CUSTOM_TEMPLATES_DIR'] and Path(c['CUSTOM_TEMPLATES_DIR'])}, @@ -259,8 +255,8 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = { # 'CODE_LOCATIONS': {'default': lambda c: get_code_locations(c)}, # 'DATA_LOCATIONS': {'default': lambda c: get_data_locations(c)}, - 'SAVE_ALLOWLIST_PTN': {'default': lambda c: c['SAVE_ALLOWLIST'] and {re.compile(k, ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_ALLOWLIST'].items()}}, - 'SAVE_DENYLIST_PTN': {'default': lambda c: c['SAVE_DENYLIST'] and {re.compile(k, ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_DENYLIST'].items()}}, + 'SAVE_ALLOWLIST_PTN': {'default': lambda c: c['SAVE_ALLOWLIST'] and {re.compile(k, CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_ALLOWLIST'].items()}}, + 'SAVE_DENYLIST_PTN': {'default': lambda c: c['SAVE_DENYLIST'] and {re.compile(k, CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_DENYLIST'].items()}}, } @@ -273,7 +269,7 @@ def load_config_val(key: str, default: ConfigDefaultValue=None, type: Optional[Type]=None, aliases: Optional[Tuple[str, ...]]=None, - config: Optional[ConfigDict]=None, + config: Optional[benedict]=None, env_vars: Optional[os._Environ]=None, config_file_vars: Optional[Dict[str, str]]=None) -> ConfigValue: """parse bool, int, and str key=value pairs from env""" @@ -334,16 +330,16 @@ def load_config_val(key: str, raise Exception('Config values can only be str, bool, int, or json') -def load_config_file(out_dir: str | None=archivebox.DATA_DIR) -> Optional[ConfigDict]: +def load_config_file(out_dir: str | None=CONSTANTS.DATA_DIR) -> Optional[benedict]: """load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf""" - config_path = archivebox.CONSTANTS.CONFIG_FILE + config_path = CONSTANTS.CONFIG_FILE if config_path.exists(): config_file = ConfigParser() config_file.optionxform = str config_file.read(config_path) # flatten into one namespace - config_file_vars = ConfigDict({ + config_file_vars = benedict({ key.upper(): val for section, options in config_file.items() for key, val in options.items() @@ -354,10 +350,10 @@ def load_config_file(out_dir: str | None=archivebox.DATA_DIR) -> Optional[Config return None -def write_config_file(config: Dict[str, str], out_dir: str | None=archivebox.DATA_DIR) -> ConfigDict: +def write_config_file(config: Dict[str, str], out_dir: str | None=CONSTANTS.DATA_DIR) -> benedict: """load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf""" - from .system import atomic_write + from ..system import atomic_write CONFIG_HEADER = ( """# This is the config file for your ArchiveBox collection. @@ -373,7 +369,7 @@ def write_config_file(config: Dict[str, str], out_dir: str | None=archivebox.DAT """) - config_path = archivebox.CONSTANTS.CONFIG_FILE + config_path = CONSTANTS.CONFIG_FILE if not config_path.exists(): atomic_write(config_path, CONFIG_HEADER) @@ -394,7 +390,7 @@ def write_config_file(config: Dict[str, str], out_dir: str | None=archivebox.DAT existing_config = dict(config_file[section]) else: existing_config = {} - config_file[section] = ConfigDict({**existing_config, key: val}) + config_file[section] = benedict({**existing_config, key: val}) # always make sure there's a SECRET_KEY defined for Django existing_secret_key = None @@ -426,15 +422,15 @@ def write_config_file(config: Dict[str, str], out_dir: str | None=archivebox.DAT if Path(f'{config_path}.bak').exists(): os.remove(f'{config_path}.bak') - return { + return benedict({ key.upper(): CONFIG.get(key.upper()) for key in config.keys() - } + }) def load_config(defaults: ConfigDefaultDict, - config: Optional[ConfigDict]=None, + config: Optional[benedict]=None, out_dir: Optional[str]=None, env_vars: Optional[os._Environ]=None, config_file_vars: Optional[Dict[str, str]]=None) -> benedict: @@ -442,7 +438,7 @@ def load_config(defaults: ConfigDefaultDict, env_vars = env_vars or os.environ config_file_vars = config_file_vars or load_config_file(out_dir=out_dir) - extended_config: ConfigDict = config.copy() if config else {} + extended_config = benedict(config.copy() if config else {}) for key, default in defaults.items(): try: # print('LOADING CONFIG KEY:', key, 'DEFAULT=', default) @@ -614,7 +610,7 @@ def wget_supports_compression(config): return False -def get_dependency_info(config: ConfigDict) -> ConfigValue: +def get_dependency_info(config: benedict) -> ConfigValue: return { # 'PYTHON_BINARY': { # 'path': bin_path(config['PYTHON_BINARY']), @@ -733,7 +729,7 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue: def load_all_config(): - CONFIG: ConfigDict = ConfigDict() + CONFIG = benedict() for section_name, section_config in CONFIG_SCHEMA.items(): # print('LOADING CONFIG SECTION:', section_name) CONFIG = load_config(section_config, CONFIG) @@ -742,7 +738,7 @@ def load_all_config(): return load_config(DYNAMIC_CONFIG_SCHEMA, CONFIG) # add all final config values in CONFIG to globals in this file -CONFIG: ConfigDict = load_all_config() +CONFIG: benedict = load_all_config() globals().update(CONFIG) # this lets us do: from .config import DEBUG, MEDIA_TIMEOUT, ... @@ -773,7 +769,7 @@ if not SHELL_CONFIG.SHOW_PROGRESS: # recreate rich console obj based on new config values CONSOLE = Console() -from .misc import logging +from ..misc import logging logging.CONSOLE = CONSOLE @@ -788,8 +784,8 @@ def bump_startup_progress_bar(): def setup_django_minimal(): - # sys.path.append(str(archivebox.PACKAGE_DIR)) - # os.environ.setdefault('OUTPUT_DIR', str(archivebox.DATA_DIR)) + # sys.path.append(str(CONSTANTS.PACKAGE_DIR)) + # os.environ.setdefault('OUTPUT_DIR', str(CONSTANTS.DATA_DIR)) # os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings') # django.setup() raise Exception('dont use this anymore') @@ -797,7 +793,7 @@ def setup_django_minimal(): DJANGO_SET_UP = False -def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None: +def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CONFIG, in_memory_db=False) -> None: global INITIAL_STARTUP_PROGRESS global INITIAL_STARTUP_PROGRESS_TASK global DJANGO_SET_UP @@ -808,9 +804,9 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=C with Progress(transient=True, expand=True, console=CONSOLE) as INITIAL_STARTUP_PROGRESS: INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25) - output_dir = out_dir or archivebox.DATA_DIR + output_dir = out_dir or CONSTANTS.DATA_DIR - assert isinstance(output_dir, Path) and isinstance(archivebox.PACKAGE_DIR, Path) + assert isinstance(output_dir, Path) and isinstance(CONSTANTS.PACKAGE_DIR, Path) bump_startup_progress_bar() try: @@ -842,7 +838,7 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=C with open(settings.ERROR_LOG, "a", encoding='utf-8') as f: command = ' '.join(sys.argv) ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S') - f.write(f"\n> {command}; TS={ts} VERSION={archivebox.VERSION} IN_DOCKER={SHELL_CONFIG.IN_DOCKER} IS_TTY={SHELL_CONFIG.IS_TTY}\n") + f.write(f"\n> {command}; TS={ts} VERSION={CONSTANTS.VERSION} IN_DOCKER={SHELL_CONFIG.IN_DOCKER} IS_TTY={SHELL_CONFIG.IS_TTY}\n") if check_db: # Create cache table in DB if needed @@ -861,9 +857,9 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=C for conn in connections.all(): conn.close_if_unusable_or_obsolete() - sql_index_path = archivebox.CONSTANTS.DATABASE_FILE + sql_index_path = CONSTANTS.DATABASE_FILE assert sql_index_path.exists(), ( - f'No database file {sql_index_path} found in: {archivebox.DATA_DIR} (Are you in an ArchiveBox collection directory?)') + f'No database file {sql_index_path} found in: {CONSTANTS.DATA_DIR} (Are you in an ArchiveBox collection directory?)') bump_startup_progress_bar() @@ -876,7 +872,7 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=C logfire.configure() logfire.instrument_django(is_sql_commentor_enabled=True) - logfire.info(f'Started ArchiveBox v{archivebox.VERSION}', argv=sys.argv) + logfire.info(f'Started ArchiveBox v{CONSTANTS.VERSION}', argv=sys.argv) except KeyboardInterrupt: raise SystemExit(2) diff --git a/archivebox/plugins_sys/config/views.py b/archivebox/config/views.py similarity index 98% rename from archivebox/plugins_sys/config/views.py rename to archivebox/config/views.py index c38a957e..0e5350ba 100644 --- a/archivebox/plugins_sys/config/views.py +++ b/archivebox/config/views.py @@ -13,8 +13,7 @@ from django.utils.html import format_html, mark_safe from admin_data_views.typing import TableContext, ItemContext from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink -import archivebox - +from archivebox.config import CONSTANTS from archivebox.util import parse_date @@ -381,7 +380,7 @@ def log_list_view(request: HttpRequest, **kwargs) -> TableContext: assert request.user.is_superuser, "Must be a superuser to view configuration settings." - log_files = archivebox.CONSTANTS.LOGS_DIR.glob("*.log") + log_files = CONSTANTS.LOGS_DIR.glob("*.log") log_files = sorted(log_files, key=os.path.getmtime)[::-1] rows = { @@ -419,7 +418,7 @@ def log_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext: from django.conf import settings - log_file = [logfile for logfile in archivebox.CONSTANTS.LOGS_DIR.glob('*.log') if key in logfile.name][0] + log_file = [logfile for logfile in CONSTANTS.LOGS_DIR.glob('*.log') if key in logfile.name][0] log_text = log_file.read_text() log_stat = log_file.stat() diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py index e81c569d..96f6863b 100644 --- a/archivebox/core/admin.py +++ b/archivebox/core/admin.py @@ -18,11 +18,10 @@ from django.template import Template, RequestContext from django.conf import settings from django import forms -import archivebox - from signal_webhooks.admin import WebhookAdmin from signal_webhooks.utils import get_webhook_model -# from abx.archivebox.admin import CustomPlugin + +from archivebox.config import VERSION from ..util import htmldecode, urldecode @@ -30,7 +29,7 @@ from core.models import Snapshot, ArchiveResult, Tag from core.mixins import SearchResultsAdminMixin from api.models import APIToken from abid_utils.admin import ABIDModelAdmin -from queues.tasks import bg_archive_links, bg_archive_link, bg_add +from queues.tasks import bg_archive_links, bg_add from index.html import snapshot_icons from logging_util import printable_filesize @@ -40,7 +39,7 @@ from extractors import archive_links CONFIG = settings.CONFIG -GLOBAL_CONTEXT = {'VERSION': archivebox.VERSION, 'VERSIONS_AVAILABLE': [], 'CAN_UPGRADE': False} +GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': [], 'CAN_UPGRADE': False} # Admin URLs # /admin/ diff --git a/archivebox/core/auth.py b/archivebox/core/auth.py index 048f029c..536e0778 100644 --- a/archivebox/core/auth.py +++ b/archivebox/core/auth.py @@ -1,7 +1,7 @@ __package__ = 'archivebox.core' -from ..config import ( +from ..config.legacy import ( LDAP ) diff --git a/archivebox/core/auth_ldap.py b/archivebox/core/auth_ldap.py index b5e2877e..1d0e8658 100644 --- a/archivebox/core/auth_ldap.py +++ b/archivebox/core/auth_ldap.py @@ -1,4 +1,4 @@ -from ..config import ( +from ..config.legacy import ( LDAP_CREATE_SUPERUSER ) diff --git a/archivebox/core/middleware.py b/archivebox/core/middleware.py index cf7ab991..4cd45e01 100644 --- a/archivebox/core/middleware.py +++ b/archivebox/core/middleware.py @@ -5,7 +5,7 @@ from django.utils import timezone from django.contrib.auth.middleware import RemoteUserMiddleware from django.core.exceptions import ImproperlyConfigured -from ..config import PUBLIC_SNAPSHOTS, REVERSE_PROXY_USER_HEADER, REVERSE_PROXY_WHITELIST +from ..config.legacy import PUBLIC_SNAPSHOTS, REVERSE_PROXY_USER_HEADER, REVERSE_PROXY_WHITELIST def detect_timezone(request, activate: bool=True): diff --git a/archivebox/core/migrations/0007_archiveresult.py b/archivebox/core/migrations/0007_archiveresult.py index 3da3b93c..d852af63 100644 --- a/archivebox/core/migrations/0007_archiveresult.py +++ b/archivebox/core/migrations/0007_archiveresult.py @@ -1,14 +1,18 @@ # Generated by Django 3.0.8 on 2020-11-04 12:25 +import os import json from pathlib import Path from django.db import migrations, models import django.db.models.deletion -from config import CONFIG from index.json import to_json +DATA_DIR = Path(os.curdir).resolve() # archivebox user data dir +ARCHIVE_DIR = DATA_DIR / 'archive' # archivebox snapshot data dir + + try: JSONField = models.JSONField except AttributeError: @@ -22,7 +26,7 @@ def forwards_func(apps, schema_editor): snapshots = Snapshot.objects.all() for snapshot in snapshots: - out_dir = Path(CONFIG['ARCHIVE_DIR']) / snapshot.timestamp + out_dir = ARCHIVE_DIR / snapshot.timestamp try: with open(out_dir / "index.json", "r") as f: @@ -57,7 +61,7 @@ def forwards_func(apps, schema_editor): def verify_json_index_integrity(snapshot): results = snapshot.archiveresult_set.all() - out_dir = Path(CONFIG['ARCHIVE_DIR']) / snapshot.timestamp + out_dir = ARCHIVE_DIR / snapshot.timestamp with open(out_dir / "index.json", "r") as f: index = json.load(f) diff --git a/archivebox/core/models.py b/archivebox/core/models.py index 0b70f17a..0630f625 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -17,10 +17,9 @@ from django.db.models import Case, When, Value, IntegerField from django.contrib import admin from django.conf import settings -import archivebox +from archivebox.config import CONSTANTS from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField - from queues.tasks import bg_archive_snapshot from ..system import get_dir_size @@ -261,11 +260,11 @@ class Snapshot(ABIDModel): @cached_property def link_dir(self): - return str(archivebox.CONSTANTS.ARCHIVE_DIR / self.timestamp) + return str(CONSTANTS.ARCHIVE_DIR / self.timestamp) @cached_property def archive_path(self): - return '{}/{}'.format(archivebox.CONSTANTS.ARCHIVE_DIR_NAME, self.timestamp) + return '{}/{}'.format(CONSTANTS.ARCHIVE_DIR_NAME, self.timestamp) @cached_property def archive_size(self): @@ -375,17 +374,17 @@ class Snapshot(ABIDModel): # def get_storage_dir(self, create=True, symlink=True) -> Path: # date_str = self.bookmarked_at.strftime('%Y%m%d') # domain_str = domain(self.url) - # abs_storage_dir = Path(archivebox.CONSTANTS.ARCHIVE_DIR) / 'snapshots' / date_str / domain_str / str(self.ulid) + # abs_storage_dir = Path(CONSTANTS.ARCHIVE_DIR) / 'snapshots' / date_str / domain_str / str(self.ulid) # if create and not abs_storage_dir.is_dir(): # abs_storage_dir.mkdir(parents=True, exist_ok=True) # if symlink: # LINK_PATHS = [ - # Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid), - # # Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_id' / str(self.ulid), - # Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_date' / date_str / domain_str / str(self.ulid), - # Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_domain' / domain_str / date_str / str(self.ulid), + # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid), + # # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_id' / str(self.ulid), + # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_date' / date_str / domain_str / str(self.ulid), + # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_domain' / domain_str / date_str / str(self.ulid), # ] # for link_path in LINK_PATHS: # link_path.parent.mkdir(parents=True, exist_ok=True) @@ -524,18 +523,18 @@ class ArchiveResult(ABIDModel): # def get_storage_dir(self, create=True, symlink=True): # date_str = self.snapshot.bookmarked_at.strftime('%Y%m%d') # domain_str = domain(self.snapshot.url) - # abs_storage_dir = Path(archivebox.CONSTANTS.ARCHIVE_DIR) / 'results' / date_str / domain_str / self.extractor / str(self.ulid) + # abs_storage_dir = Path(CONSTANTS.ARCHIVE_DIR) / 'results' / date_str / domain_str / self.extractor / str(self.ulid) # if create and not abs_storage_dir.is_dir(): # abs_storage_dir.mkdir(parents=True, exist_ok=True) # if symlink: # LINK_PATHS = [ - # Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid), - # # Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_id' / str(self.ulid), - # # Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_date' / date_str / domain_str / self.extractor / str(self.ulid), - # Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_domain' / domain_str / date_str / self.extractor / str(self.ulid), - # Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_type' / self.extractor / date_str / domain_str / str(self.ulid), + # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid), + # # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_id' / str(self.ulid), + # # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_date' / date_str / domain_str / self.extractor / str(self.ulid), + # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_domain' / domain_str / date_str / self.extractor / str(self.ulid), + # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_type' / self.extractor / date_str / domain_str / str(self.ulid), # ] # for link_path in LINK_PATHS: # link_path.parent.mkdir(parents=True, exist_ok=True) diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index cdab906c..c0e612c7 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -13,20 +13,15 @@ import abx.archivebox import abx.archivebox.use import abx.django.use -import archivebox -from archivebox.constants import CONSTANTS +from archivebox.config import VERSION, DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS # noqa -from ..config import CONFIG +from ..config.legacy import CONFIG IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3] IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3] -VERSION = archivebox.VERSION -PACKAGE_DIR = archivebox.PACKAGE_DIR -DATA_DIR = archivebox.DATA_DIR -ARCHIVE_DIR = archivebox.ARCHIVE_DIR ################################################################################ ### ArchiveBox Plugin Settings @@ -40,14 +35,14 @@ PLUGIN_HOOKSPECS = [ abx.register_hookspecs(PLUGIN_HOOKSPECS) BUILTIN_PLUGIN_DIRS = { - 'plugins_sys': archivebox.PACKAGE_DIR / 'plugins_sys', - 'plugins_pkg': archivebox.PACKAGE_DIR / 'plugins_pkg', - 'plugins_auth': archivebox.PACKAGE_DIR / 'plugins_auth', - 'plugins_search': archivebox.PACKAGE_DIR / 'plugins_search', - 'plugins_extractor': archivebox.PACKAGE_DIR / 'plugins_extractor', + 'archivebox': PACKAGE_DIR, + 'plugins_pkg': PACKAGE_DIR / 'plugins_pkg', + 'plugins_auth': PACKAGE_DIR / 'plugins_auth', + 'plugins_search': PACKAGE_DIR / 'plugins_search', + 'plugins_extractor': PACKAGE_DIR / 'plugins_extractor', } USER_PLUGIN_DIRS = { - 'user_plugins': archivebox.DATA_DIR / 'user_plugins', + 'user_plugins': DATA_DIR / 'user_plugins', } BUILTIN_PLUGINS = abx.get_plugins_in_dirs(BUILTIN_PLUGIN_DIRS) @@ -105,6 +100,7 @@ INSTALLED_APPS = [ 'django_object_actions', # provides easy Django Admin action buttons on change views https://github.com/crccheck/django-object-actions # Our ArchiveBox-provided apps + #'config', # ArchiveBox config settings 'queues', # handles starting and managing background workers and processes 'abid_utils', # handles ABID ID creation, handling, and models 'core', # core django model with Snapshot, ArchiveResult, etc. @@ -481,41 +477,41 @@ ADMIN_DATA_VIEWS = { }, { "route": "binaries/", - "view": "plugins_sys.config.views.binaries_list_view", + "view": "archivebox.config.views.binaries_list_view", "name": "Binaries", "items": { "route": "/", - "view": "plugins_sys.config.views.binary_detail_view", + "view": "archivebox.config.views.binary_detail_view", "name": "binary", }, }, { "route": "plugins/", - "view": "plugins_sys.config.views.plugins_list_view", + "view": "archivebox.config.views.plugins_list_view", "name": "Plugins", "items": { "route": "/", - "view": "plugins_sys.config.views.plugin_detail_view", + "view": "archivebox.config.views.plugin_detail_view", "name": "plugin", }, }, { "route": "workers/", - "view": "plugins_sys.config.views.worker_list_view", + "view": "archivebox.config.views.worker_list_view", "name": "Workers", "items": { "route": "/", - "view": "plugins_sys.config.views.worker_detail_view", + "view": "archivebox.config.views.worker_detail_view", "name": "worker", }, }, { "route": "logs/", - "view": "plugins_sys.config.views.log_list_view", + "view": "archivebox.config.views.log_list_view", "name": "Logs", "items": { "route": "/", - "view": "plugins_sys.config.views.log_detail_view", + "view": "archivebox.config.views.log_detail_view", "name": "log", }, }, diff --git a/archivebox/core/settings_logging.py b/archivebox/core/settings_logging.py index 28b2e0c3..afe101b2 100644 --- a/archivebox/core/settings_logging.py +++ b/archivebox/core/settings_logging.py @@ -7,7 +7,7 @@ import logging import pydantic import django.template -import archivebox +from archivebox.config import CONSTANTS from ..misc.logging import IS_TTY @@ -52,7 +52,7 @@ class CustomOutboundWebhookLogFormatter(logging.Formatter): ERROR_LOG = tempfile.NamedTemporaryFile().name -LOGS_DIR = archivebox.DATA_DIR / 'logs' +LOGS_DIR = CONSTANTS.LOGS_DIR if LOGS_DIR.is_dir(): ERROR_LOG = (LOGS_DIR / 'errors.log') diff --git a/archivebox/core/urls.py b/archivebox/core/urls.py index e1dba738..971b8ea2 100644 --- a/archivebox/core/urls.py +++ b/archivebox/core/urls.py @@ -10,7 +10,7 @@ from .views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthC from .serve_static import serve_static # GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306 -# from config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE +# from .config.legacy import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE # GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': VERSIONS_AVAILABLE, 'CAN_UPGRADE': CAN_UPGRADE} diff --git a/archivebox/core/views.py b/archivebox/core/views.py index 5d0c614a..5a7c7f4c 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -20,8 +20,6 @@ from django.utils.decorators import method_decorator from admin_data_views.typing import TableContext, ItemContext from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink -import archivebox -from archivebox.constants import CONSTANTS from core.models import Snapshot from core.forms import AddLinkForm @@ -29,10 +27,10 @@ from core.admin import result_url from queues.tasks import bg_add -from ..plugins_sys.config.apps import SHELL_CONFIG, SERVER_CONFIG +from archivebox.config import CONSTANTS, DATA_DIR, VERSION, SHELL_CONFIG, SERVER_CONFIG from ..plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG -from ..config import ( +from ..config.legacy import ( CONFIG_SCHEMA, DYNAMIC_CONFIG_SCHEMA, USER_CONFIG, @@ -381,7 +379,7 @@ class PublicIndexView(ListView): def get_context_data(self, **kwargs): return { **super().get_context_data(**kwargs), - 'VERSION': archivebox.VERSION, + 'VERSION': VERSION, 'COMMIT_HASH': SHELL_CONFIG.COMMIT_HASH, 'FOOTER_INFO': SERVER_CONFIG.FOOTER_INFO, } @@ -451,7 +449,7 @@ class AddView(UserPassesTestMixin, FormView): 'title': "Add URLs", # We can't just call request.build_absolute_uri in the template, because it would include query parameters 'absolute_add_path': self.request.build_absolute_uri(self.request.path), - 'VERSION': archivebox.VERSION, + 'VERSION': VERSION, 'FOOTER_INFO': SERVER_CONFIG.FOOTER_INFO, 'stdout': '', } @@ -469,7 +467,7 @@ class AddView(UserPassesTestMixin, FormView): "depth": depth, "parser": parser, "update_all": False, - "out_dir": archivebox.DATA_DIR, + "out_dir": DATA_DIR, "created_by_id": self.request.user.pk, } if extractors: diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py index 700aede7..443a1aed 100644 --- a/archivebox/extractors/__init__.py +++ b/archivebox/extractors/__init__.py @@ -10,7 +10,7 @@ from datetime import datetime, timezone from django.db.models import QuerySet -from ..config import ( +from ..config.legacy import ( SAVE_ALLOWLIST_PTN, SAVE_DENYLIST_PTN, ) diff --git a/archivebox/extractors/archive_org.py b/archivebox/extractors/archive_org.py index 5aa66fa7..ac73f721 100644 --- a/archivebox/extractors/archive_org.py +++ b/archivebox/extractors/archive_org.py @@ -12,7 +12,7 @@ from ..util import ( is_static_file, dedupe, ) -from ..config import ( +from ..config.legacy import ( TIMEOUT, CURL_ARGS, CURL_EXTRA_ARGS, @@ -24,6 +24,7 @@ from ..config import ( ) from ..logging_util import TimedProgress + def get_output_path(): return 'archive.org.txt' diff --git a/archivebox/extractors/favicon.py b/archivebox/extractors/favicon.py index b9b5c3a7..791184f9 100644 --- a/archivebox/extractors/favicon.py +++ b/archivebox/extractors/favicon.py @@ -11,7 +11,7 @@ from ..util import ( domain, dedupe, ) -from ..config import CONFIG +from ..config.legacy import CONFIG from ..logging_util import TimedProgress diff --git a/archivebox/extractors/git.py b/archivebox/extractors/git.py index 3b8a4b9d..1e9decbb 100644 --- a/archivebox/extractors/git.py +++ b/archivebox/extractors/git.py @@ -14,7 +14,7 @@ from ..util import ( without_query, without_fragment, ) -from ..config import CONFIG +from ..config.legacy import CONFIG from ..logging_util import TimedProgress diff --git a/archivebox/extractors/headers.py b/archivebox/extractors/headers.py index 9fd48469..4bd2780b 100644 --- a/archivebox/extractors/headers.py +++ b/archivebox/extractors/headers.py @@ -11,7 +11,7 @@ from ..util import ( get_headers, dedupe, ) -from ..config import ( +from ..config.legacy import ( TIMEOUT, CURL_BINARY, CURL_ARGS, diff --git a/archivebox/extractors/htmltotext.py b/archivebox/extractors/htmltotext.py index 29591e69..276ed5b8 100644 --- a/archivebox/extractors/htmltotext.py +++ b/archivebox/extractors/htmltotext.py @@ -1,13 +1,12 @@ __package__ = 'archivebox.extractors' -import archivebox - from html.parser import HTMLParser import io from pathlib import Path from typing import Optional -from ..config import ( +from archivebox.config import VERSION +from ..config.legacy import ( SAVE_HTMLTOTEXT, TIMEOUT, ) @@ -154,7 +153,7 @@ def save_htmltotext(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO return ArchiveResult( cmd=cmd, pwd=str(out_dir), - cmd_version=archivebox.__version__, + cmd_version=VERSION, output=output, status=status, index_texts=[extracted_text] if extracted_text else [], diff --git a/archivebox/extractors/mercury.py b/archivebox/extractors/mercury.py index 71af1329..0aad67e6 100644 --- a/archivebox/extractors/mercury.py +++ b/archivebox/extractors/mercury.py @@ -13,7 +13,7 @@ from ..util import ( is_static_file, dedupe, ) -from ..config import ( +from ..config.legacy import ( TIMEOUT, SAVE_MERCURY, DEPENDENCIES, diff --git a/archivebox/extractors/title.py b/archivebox/extractors/title.py index a1cb769f..9779e042 100644 --- a/archivebox/extractors/title.py +++ b/archivebox/extractors/title.py @@ -12,7 +12,7 @@ from ..util import ( htmldecode, dedupe, ) -from ..config import ( +from ..config.legacy import ( TIMEOUT, CHECK_SSL_VALIDITY, SAVE_TITLE, diff --git a/archivebox/extractors/wget.py b/archivebox/extractors/wget.py index c4cb6d44..9cc30c6f 100644 --- a/archivebox/extractors/wget.py +++ b/archivebox/extractors/wget.py @@ -17,7 +17,7 @@ from ..util import ( urldecode, dedupe, ) -from ..config import ( +from ..config.legacy import ( WGET_ARGS, WGET_EXTRA_ARGS, TIMEOUT, diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py index 9b9619e0..8219f1db 100644 --- a/archivebox/index/__init__.py +++ b/archivebox/index/__init__.py @@ -12,15 +12,14 @@ from urllib.parse import urlparse from django.db.models import QuerySet, Q -import archivebox - +from archivebox.config import DATA_DIR, CONSTANTS, SEARCH_BACKEND_CONFIG from ..util import ( scheme, enforce_types, ExtendedEncoder, ) from ..misc.logging import stderr -from ..config import ( +from ..config.legacy import ( TIMEOUT, URL_DENYLIST_PTN, URL_ALLOWLIST_PTN, @@ -223,28 +222,28 @@ def timed_index_update(out_path: Path): @enforce_types -def write_main_index(links: List[Link], out_dir: Path=archivebox.DATA_DIR, created_by_id: int | None=None) -> None: +def write_main_index(links: List[Link], out_dir: Path=DATA_DIR, created_by_id: int | None=None) -> None: """Writes links to sqlite3 file for a given list of links""" log_indexing_process_started(len(links)) try: - with timed_index_update(archivebox.CONSTANTS.DATABASE_FILE): + with timed_index_update(CONSTANTS.DATABASE_FILE): write_sql_main_index(links, out_dir=out_dir, created_by_id=created_by_id) - os.chmod(archivebox.CONSTANTS.DATABASE_FILE, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes + os.chmod(CONSTANTS.DATABASE_FILE, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes except (KeyboardInterrupt, SystemExit): stderr('[!] Warning: Still writing index to disk...', color='lightyellow') stderr(' Run archivebox init to fix any inconsistencies from an ungraceful exit.') - with timed_index_update(archivebox.CONSTANTS.DATABASE_FILE): + with timed_index_update(CONSTANTS.DATABASE_FILE): write_sql_main_index(links, out_dir=out_dir, created_by_id=created_by_id) - os.chmod(archivebox.CONSTANTS.DATABASE_FILE, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes + os.chmod(CONSTANTS.DATABASE_FILE, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes raise SystemExit(0) log_indexing_process_finished() @enforce_types -def load_main_index(out_dir: Path=archivebox.DATA_DIR, warn: bool=True) -> List[Link]: +def load_main_index(out_dir: Path=DATA_DIR, warn: bool=True) -> List[Link]: """parse and load existing index with any new links from import_path merged in""" from core.models import Snapshot try: @@ -254,8 +253,8 @@ def load_main_index(out_dir: Path=archivebox.DATA_DIR, warn: bool=True) -> List[ raise SystemExit(0) @enforce_types -def load_main_index_meta(out_dir: Path=archivebox.DATA_DIR) -> Optional[dict]: - index_path = out_dir / archivebox.CONSTANTS.JSON_INDEX_FILENAME +def load_main_index_meta(out_dir: Path=DATA_DIR) -> Optional[dict]: + index_path = out_dir / CONSTANTS.JSON_INDEX_FILENAME if index_path.exists(): with open(index_path, 'r', encoding='utf-8') as f: meta_dict = pyjson.load(f) @@ -377,7 +376,6 @@ def q_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str=' return snapshots.filter(q_filter) def search_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='search') -> QuerySet: - from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG from ..search import query_search_index if not SEARCH_BACKEND_CONFIG.USE_SEARCHING_BACKEND: @@ -406,7 +404,7 @@ def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type return search_filter(snapshots, filter_patterns, filter_type) -def get_indexed_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]: +def get_indexed_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]: """indexed links without checking archive status or data directory validity""" links = (snapshot.as_link() for snapshot in snapshots.iterator(chunk_size=500)) return { @@ -414,7 +412,7 @@ def get_indexed_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[st for link in links } -def get_archived_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]: +def get_archived_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]: """indexed links that are archived with a valid data directory""" links = (snapshot.as_link() for snapshot in snapshots.iterator(chunk_size=500)) return { @@ -422,7 +420,7 @@ def get_archived_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[s for link in filter(is_archived, links) } -def get_unarchived_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]: +def get_unarchived_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]: """indexed links that are unarchived with no data directory or an empty data directory""" links = (snapshot.as_link() for snapshot in snapshots.iterator(chunk_size=500)) return { @@ -430,12 +428,12 @@ def get_unarchived_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict for link in filter(is_unarchived, links) } -def get_present_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]: +def get_present_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]: """dirs that actually exist in the archive/ folder""" all_folders = {} - for entry in (out_dir / archivebox.CONSTANTS.ARCHIVE_DIR_NAME).iterdir(): + for entry in (out_dir / CONSTANTS.ARCHIVE_DIR_NAME).iterdir(): if entry.is_dir(): link = None try: @@ -447,7 +445,7 @@ def get_present_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[st return all_folders -def get_valid_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]: +def get_valid_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]: """dirs with a valid index matched to the main index and archived content""" links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator(chunk_size=500)] return { @@ -455,7 +453,7 @@ def get_valid_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, for link in filter(is_valid, links) } -def get_invalid_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]: +def get_invalid_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]: """dirs that are invalid for any reason: corrupted/duplicate/orphaned/unrecognized""" duplicate = get_duplicate_folders(snapshots, out_dir=out_dir) orphaned = get_orphaned_folders(snapshots, out_dir=out_dir) @@ -464,7 +462,7 @@ def get_invalid_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[st return {**duplicate, **orphaned, **corrupted, **unrecognized} -def get_duplicate_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]: +def get_duplicate_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]: """dirs that conflict with other directories that have the same link URL or timestamp""" by_url = {} by_timestamp = {} @@ -472,7 +470,7 @@ def get_duplicate_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[ data_folders = ( str(entry) - for entry in archivebox.CONSTANTS.ARCHIVE_DIR.iterdir() + for entry in CONSTANTS.ARCHIVE_DIR.iterdir() if entry.is_dir() and not snapshots.filter(timestamp=entry.name).exists() ) @@ -498,11 +496,11 @@ def get_duplicate_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[ duplicate_folders[path] = link return duplicate_folders -def get_orphaned_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]: +def get_orphaned_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]: """dirs that contain a valid index but aren't listed in the main index""" orphaned_folders = {} - for entry in archivebox.CONSTANTS.ARCHIVE_DIR.iterdir(): + for entry in CONSTANTS.ARCHIVE_DIR.iterdir(): if entry.is_dir(): link = None try: @@ -516,7 +514,7 @@ def get_orphaned_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[s return orphaned_folders -def get_corrupted_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]: +def get_corrupted_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]: """dirs that don't contain a valid index and aren't listed in the main index""" corrupted = {} for snapshot in snapshots.iterator(chunk_size=500): @@ -525,11 +523,11 @@ def get_corrupted_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[ corrupted[link.link_dir] = link return corrupted -def get_unrecognized_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]: +def get_unrecognized_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]: """dirs that don't contain recognizable archive data and aren't listed in the main index""" unrecognized_folders: Dict[str, Optional[Link]] = {} - for entry in (Path(out_dir) / archivebox.CONSTANTS.ARCHIVE_DIR_NAME).iterdir(): + for entry in (Path(out_dir) / CONSTANTS.ARCHIVE_DIR_NAME).iterdir(): if entry.is_dir(): index_exists = (entry / "index.json").exists() link = None @@ -594,10 +592,10 @@ def is_unarchived(link: Link) -> bool: return not link.is_archived -def fix_invalid_folder_locations(out_dir: Path=archivebox.DATA_DIR) -> Tuple[List[str], List[str]]: +def fix_invalid_folder_locations(out_dir: Path=DATA_DIR) -> Tuple[List[str], List[str]]: fixed = [] cant_fix = [] - for entry in os.scandir(out_dir / archivebox.CONSTANTS.ARCHIVE_DIR_NAME): + for entry in os.scandir(out_dir / CONSTANTS.ARCHIVE_DIR_NAME): if entry.is_dir(follow_symlinks=True): if (Path(entry.path) / 'index.json').exists(): try: @@ -608,7 +606,7 @@ def fix_invalid_folder_locations(out_dir: Path=archivebox.DATA_DIR) -> Tuple[Lis continue if not entry.path.endswith(f'/{link.timestamp}'): - dest = out_dir /archivebox.CONSTANTS.ARCHIVE_DIR_NAME / link.timestamp + dest = out_dir /CONSTANTS.ARCHIVE_DIR_NAME / link.timestamp if dest.exists(): cant_fix.append(entry.path) else: diff --git a/archivebox/index/html.py b/archivebox/index/html.py index 747928c5..4b2c6485 100644 --- a/archivebox/index/html.py +++ b/archivebox/index/html.py @@ -1,6 +1,5 @@ __package__ = 'archivebox.index' -import archivebox from pathlib import Path from datetime import datetime, timezone from collections import defaultdict @@ -19,10 +18,11 @@ from ..util import ( htmlencode, urldecode, ) -from ..config import ( +from archivebox.config.legacy import ( SAVE_ARCHIVE_DOT_ORG, PREVIEW_ORIGINALS, ) +from archivebox.config import CONSTANTS, DATA_DIR, VERSION, SHELL_CONFIG, SERVER_CONFIG MAIN_INDEX_TEMPLATE = 'static_index.html' MINIMAL_INDEX_TEMPLATE = 'minimal_index.html' @@ -33,11 +33,9 @@ TITLE_LOADING_MSG = 'Not yet archived...' ### Main Links Index @enforce_types -def parse_html_main_index(out_dir: Path=archivebox.DATA_DIR) -> Iterator[str]: +def parse_html_main_index(out_dir: Path=DATA_DIR) -> Iterator[str]: """parse an archive index html file and return the list of urls""" - from plugins_sys.config.constants import CONSTANTS - index_path = Path(out_dir) / CONSTANTS.HTML_INDEX_FILENAME if index_path.exists(): with open(index_path, 'r', encoding='utf-8') as f: @@ -58,11 +56,9 @@ def generate_index_from_links(links: List[Link], with_headers: bool): def main_index_template(links: List[Link], template: str=MAIN_INDEX_TEMPLATE) -> str: """render the template for the entire main index""" - from plugins_sys.config.apps import SHELL_CONFIG, SERVER_CONFIG - return render_django_template(template, { - 'version': archivebox.VERSION, - 'git_sha': SHELL_CONFIG.COMMIT_HASH or archivebox.VERSION, + 'version': VERSION, + 'git_sha': SHELL_CONFIG.COMMIT_HASH or VERSION, 'num_links': str(len(links)), 'date_updated': datetime.now(timezone.utc).strftime('%Y-%m-%d'), 'time_updated': datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M'), @@ -75,7 +71,6 @@ def main_index_template(links: List[Link], template: str=MAIN_INDEX_TEMPLATE) -> @enforce_types def write_html_link_details(link: Link, out_dir: Optional[str]=None) -> None: - from plugins_sys.config.constants import CONSTANTS out_dir = out_dir or link.link_dir rendered_html = link_details_template(link) diff --git a/archivebox/index/json.py b/archivebox/index/json.py index 06455053..acaa2a18 100644 --- a/archivebox/index/json.py +++ b/archivebox/index/json.py @@ -8,7 +8,7 @@ from pathlib import Path from datetime import datetime, timezone from typing import List, Optional, Iterator, Any, Union -import archivebox +from archivebox.config import VERSION, DATA_DIR, CONSTANTS, SERVER_CONFIG, SHELL_CONFIG from .schema import Link from ..system import atomic_write @@ -19,7 +19,6 @@ from ..util import enforce_types @enforce_types def generate_json_index_from_links(links: List[Link], with_headers: bool): from django.conf import settings - from plugins_sys.config.apps import SERVER_CONFIG MAIN_INDEX_HEADER = { 'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.', @@ -27,8 +26,8 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool): 'copyright_info': SERVER_CONFIG.FOOTER_INFO, 'meta': { 'project': 'ArchiveBox', - 'version': archivebox.VERSION, - 'git_sha': archivebox.VERSION, # not used anymore, but kept for backwards compatibility + 'version': VERSION, + 'git_sha': VERSION, # not used anymore, but kept for backwards compatibility 'website': 'https://ArchiveBox.io', 'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki', 'source': 'https://github.com/ArchiveBox/ArchiveBox', @@ -52,11 +51,9 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool): @enforce_types -def parse_json_main_index(out_dir: Path=archivebox.DATA_DIR) -> Iterator[Link]: +def parse_json_main_index(out_dir: Path=DATA_DIR) -> Iterator[Link]: """parse an archive index json file and return the list of links""" - from plugins_sys.config.constants import CONSTANTS - index_path = Path(out_dir) / CONSTANTS.JSON_INDEX_FILENAME if index_path.exists(): with open(index_path, 'r', encoding='utf-8') as f: @@ -68,7 +65,7 @@ def parse_json_main_index(out_dir: Path=archivebox.DATA_DIR) -> Iterator[Link]: print(" {lightyellow}! Found an index.json in the project root but couldn't load links from it: {} {}".format( err.__class__.__name__, err, - **ANSI, + **SHELL_CONFIG.ANSI, )) return () @@ -94,8 +91,6 @@ def parse_json_main_index(out_dir: Path=archivebox.DATA_DIR) -> Iterator[Link]: def write_json_link_details(link: Link, out_dir: Optional[str]=None) -> None: """write a json file with some info about the link""" - from plugins_sys.config.constants import CONSTANTS - out_dir = out_dir or link.link_dir path = Path(out_dir) / CONSTANTS.JSON_INDEX_FILENAME atomic_write(str(path), link._asdict(extended=True)) @@ -104,7 +99,6 @@ def write_json_link_details(link: Link, out_dir: Optional[str]=None) -> None: @enforce_types def parse_json_link_details(out_dir: Union[Path, str], guess: bool=False) -> Optional[Link]: """load the json link index from a given directory""" - from plugins_sys.config.constants import CONSTANTS existing_index = Path(out_dir) / CONSTANTS.JSON_INDEX_FILENAME if existing_index.exists(): @@ -121,7 +115,6 @@ def parse_json_link_details(out_dir: Union[Path, str], guess: bool=False) -> Opt def parse_json_links_details(out_dir: Union[Path, str]) -> Iterator[Link]: """read through all the archive data folders and return the parsed links""" - from plugins_sys.config.constants import CONSTANTS for entry in os.scandir(CONSTANTS.ARCHIVE_DIR): if entry.is_dir(follow_symlinks=True): diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py index 1c16c3bd..a6697c9f 100644 --- a/archivebox/index/schema.py +++ b/archivebox/index/schema.py @@ -17,7 +17,7 @@ from dataclasses import dataclass, asdict, field, fields from django.utils.functional import cached_property -from archivebox.constants import ARCHIVE_DIR, ARCHIVE_DIR_NAME +from archivebox.config.constants import ARCHIVE_DIR, ARCHIVE_DIR_NAME from plugins_extractor.favicon.apps import FAVICON_CONFIG @@ -160,7 +160,7 @@ class Link: return float(self.timestamp) > float(other.timestamp) def typecheck(self) -> None: - from ..config import stderr, ANSI + from ..config.legacy import stderr, ANSI try: assert self.schema == self.__class__.__name__ assert isinstance(self.timestamp, str) and self.timestamp diff --git a/archivebox/index/sql.py b/archivebox/index/sql.py index 0071f60b..6ac7c3e7 100644 --- a/archivebox/index/sql.py +++ b/archivebox/index/sql.py @@ -10,7 +10,7 @@ from django.db import transaction from .schema import Link from ..util import enforce_types, parse_date -from ..config import ( +from ..config.legacy import ( OUTPUT_DIR, TAG_SEPARATOR_PATTERN, ) diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py index b4e4f975..baf7030f 100644 --- a/archivebox/logging_util.py +++ b/archivebox/logging_util.py @@ -4,10 +4,8 @@ import re import os import sys import stat -import shutil import time import argparse -import archivebox from math import log from multiprocessing import Process @@ -23,6 +21,7 @@ if TYPE_CHECKING: from rich import print from rich.panel import Panel +from archivebox.config import CONSTANTS, DATA_DIR, VERSION, SHELL_CONFIG from .system import get_dir_size from .util import enforce_types from .misc.logging import ANSI, stderr @@ -133,11 +132,8 @@ class TimedProgress: def __init__(self, seconds, prefix=''): - from plugins_sys.config.apps import SHELL_CONFIG - self.SHOW_PROGRESS = SHELL_CONFIG.SHOW_PROGRESS self.ANSI = SHELL_CONFIG.ANSI - self.TERM_WIDTH = lambda: shutil.get_terminal_size().columns # lambda so it live-updates when terminal is resized if self.SHOW_PROGRESS: self.p = Process(target=progress_bar, args=(seconds, prefix, self.ANSI)) @@ -169,7 +165,7 @@ class TimedProgress: # clear whole terminal line try: - sys.stdout.write('\r{}{}\r'.format((' ' * self.TERM_WIDTH()), self.ANSI['reset'])) + sys.stdout.write('\r{}{}\r'.format((' ' * SHELL_CONFIG.TERM_WIDTH), self.ANSI['reset'])) except (IOError, BrokenPipeError): # ignore when the parent proc has stopped listening to our stdout pass @@ -182,11 +178,11 @@ def progress_bar(seconds: int, prefix: str='', ANSI: Dict[str, str]=ANSI) -> Non """show timer in the form of progress bar, with percentage and seconds remaining""" output_buf = (sys.stdout or sys.__stdout__ or sys.stderr or sys.__stderr__) chunk = 'â–ˆ' if output_buf and output_buf.encoding.upper() == 'UTF-8' else '#' - last_width = TERM_WIDTH() + last_width = SHELL_CONFIG.TERM_WIDTH chunks = last_width - len(prefix) - 20 # number of progress chunks to show (aka max bar width) try: for s in range(seconds * chunks): - max_width = TERM_WIDTH() + max_width = SHELL_CONFIG.TERM_WIDTH if max_width < last_width: # when the terminal size is shrunk, we have to write a newline # otherwise the progress bar will keep wrapping incorrectly @@ -224,7 +220,7 @@ def progress_bar(seconds: int, prefix: str='', ANSI: Dict[str, str]=ANSI) -> Non sys.stdout.flush() # uncomment to have it disappear when it hits 100% instead of staying full red: # time.sleep(0.5) - # sys.stdout.write('\r{}{}\r'.format((' ' * TERM_WIDTH()), ANSI['reset'])) + # sys.stdout.write('\r{}{}\r'.format((' ' * SHELL_CONFIG.TERM_WIDTH), ANSI['reset'])) # sys.stdout.flush() except (KeyboardInterrupt, BrokenPipeError): print() @@ -234,7 +230,7 @@ def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional args = ' '.join(subcommand_args) version_msg = '[dark_magenta]\\[i] [{now}] ArchiveBox v{VERSION}: [/dark_magenta][green4]archivebox [green3]{subcommand}[green2] {args}[/green2]'.format( now=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'), - VERSION=archivebox.__version__, + VERSION=VERSION, subcommand=subcommand, args=args, ) @@ -256,7 +252,6 @@ def log_importing_started(urls: Union[str, List[str]], depth: int, index_only: b )) def log_source_saved(source_file: str): - from plugins_sys.config.constants import CONSTANTS print(' > Saved verbatim input to {}/{}'.format(CONSTANTS.SOURCES_DIR_NAME, source_file.rsplit('/', 1)[-1])) def log_parsing_finished(num_parsed: int, parser_name: str): @@ -289,14 +284,12 @@ def log_indexing_process_finished(): def log_indexing_started(out_path: str): - from plugins_sys.config.apps import SHELL_CONFIG - if SHELL_CONFIG.IS_TTY: - sys.stdout.write(f' > ./{Path(out_path).relative_to(archivebox.DATA_DIR)}') + sys.stdout.write(f' > ./{Path(out_path).relative_to(DATA_DIR)}') def log_indexing_finished(out_path: str): - print(f'\r √ ./{Path(out_path).relative_to(archivebox.DATA_DIR)}') + print(f'\r √ ./{Path(out_path).relative_to(DATA_DIR)}') ### Archiving Stage @@ -532,7 +525,7 @@ def log_shell_welcome_msg(): ### Helpers @enforce_types -def pretty_path(path: Union[Path, str], pwd: Union[Path, str]=archivebox.DATA_DIR) -> str: +def pretty_path(path: Union[Path, str], pwd: Union[Path, str]=DATA_DIR) -> str: """convert paths like .../ArchiveBox/archivebox/../output/abc into output/abc""" pwd = str(Path(pwd)) # .resolve() path = str(path) diff --git a/archivebox/main.py b/archivebox/main.py index 2c4ce277..4ec2a93e 100755 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -4,7 +4,6 @@ import os import sys import shutil import platform -import archivebox from typing import Dict, List, Optional, Iterable, IO, Union from pathlib import Path @@ -15,6 +14,7 @@ from crontab import CronTab, CronSlices from django.db.models import QuerySet from django.utils import timezone +from archivebox.config import CONSTANTS, VERSION, DATA_DIR, ARCHIVE_DIR, SHELL_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG, SERVER_CONFIG, ARCHIVING_CONFIG from .cli import ( CLI_SUBCOMMANDS, run_subcommand, @@ -66,22 +66,9 @@ from .index.html import ( ) from .index.csv import links_to_csv from .extractors import archive_links, archive_link, ignore_methods -from .misc.logging import stderr, hint, ANSI +from .misc.logging import stderr, hint from .misc.checks import check_data_folder -from .config import ( - ConfigDict, - IS_TTY, - DEBUG, - IN_DOCKER, - IN_QEMU, - PUID, - PGID, - TIMEZONE, - ONLY_NEW, - JSON_INDEX_FILENAME, - HTML_INDEX_FILENAME, - SQL_INDEX_FILENAME, - LDAP, +from .config.legacy import ( write_config_file, DEPENDENCIES, load_all_config, @@ -104,15 +91,9 @@ from .logging_util import ( printable_dependency_version, ) -CONSTANTS = archivebox.CONSTANTS -VERSION = archivebox.VERSION -PACKAGE_DIR = archivebox.PACKAGE_DIR -OUTPUT_DIR = archivebox.DATA_DIR -ARCHIVE_DIR = archivebox.DATA_DIR / 'archive' - @enforce_types -def help(out_dir: Path=archivebox.DATA_DIR) -> None: +def help(out_dir: Path=DATA_DIR) -> None: """Print the ArchiveBox help message and usage""" all_subcommands = CLI_SUBCOMMANDS @@ -135,7 +116,7 @@ def help(out_dir: Path=archivebox.DATA_DIR) -> None: ) - if archivebox.CONSTANTS.DATABASE_FILE.exists(): + if CONSTANTS.DATABASE_FILE.exists(): print('''{green}ArchiveBox v{}: The self-hosted internet archive.{reset} {lightred}Active data directory:{reset} @@ -161,17 +142,17 @@ def help(out_dir: Path=archivebox.DATA_DIR) -> None: {lightred}Documentation:{reset} https://github.com/ArchiveBox/ArchiveBox/wiki -'''.format(VERSION, out_dir, COMMANDS_HELP_TEXT, **ANSI)) +'''.format(VERSION, out_dir, COMMANDS_HELP_TEXT, **SHELL_CONFIG.ANSI)) else: - print('{green}Welcome to ArchiveBox v{}!{reset}'.format(VERSION, **ANSI)) + print('{green}Welcome to ArchiveBox v{}!{reset}'.format(VERSION, **SHELL_CONFIG.ANSI)) print() - if IN_DOCKER: + if SHELL_CONFIG.IN_DOCKER: print('When using Docker, you need to mount a volume to use as your data dir:') print(' docker run -v /some/path:/data archivebox ...') print() print('To import an existing archive (from a previous version of ArchiveBox):') - print(' 1. cd into your data dir OUTPUT_DIR (usually ArchiveBox/output) and run:') + print(' 1. cd into your data dir DATA_DIR (usually ArchiveBox/output) and run:') print(' 2. archivebox init') print() print('To start a new archive:') @@ -184,10 +165,9 @@ def help(out_dir: Path=archivebox.DATA_DIR) -> None: @enforce_types def version(quiet: bool=False, - out_dir: Path=OUTPUT_DIR) -> None: + out_dir: Path=DATA_DIR) -> None: """Print the ArchiveBox version and dependency information""" - from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG, STORAGE_CONFIG, SHELL_CONFIG from plugins_auth.ldap.apps import LDAP_CONFIG from django.conf import settings @@ -202,19 +182,19 @@ def version(quiet: bool=False, p = platform.uname() print( - 'ArchiveBox v{}'.format(archivebox.__version__), + 'ArchiveBox v{}'.format(CONSTANTS.VERSION), f'COMMIT_HASH={SHELL_CONFIG.COMMIT_HASH[:7] if SHELL_CONFIG.COMMIT_HASH else "unknown"}', f'BUILD_TIME={SHELL_CONFIG.BUILD_TIME}', ) print( - f'IN_DOCKER={IN_DOCKER}', - f'IN_QEMU={IN_QEMU}', + f'IN_DOCKER={SHELL_CONFIG.IN_DOCKER}', + f'IN_QEMU={SHELL_CONFIG.IN_QEMU}', f'ARCH={p.machine}', f'OS={p.system}', f'PLATFORM={platform.platform()}', f'PYTHON={sys.implementation.name.title()}', ) - OUTPUT_IS_REMOTE_FS = CONSTANTS.DATA_LOCATIONS['OUTPUT_DIR']['is_mount'] or CONSTANTS.DATA_LOCATIONS['ARCHIVE_DIR']['is_mount'] + OUTPUT_IS_REMOTE_FS = CONSTANTS.DATA_LOCATIONS['DATA_DIR']['is_mount'] or CONSTANTS.DATA_LOCATIONS['ARCHIVE_DIR']['is_mount'] print( f'FS_ATOMIC={STORAGE_CONFIG.ENFORCE_ATOMIC_WRITES}', f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}', @@ -224,14 +204,14 @@ def version(quiet: bool=False, print( f'DEBUG={SHELL_CONFIG.DEBUG}', f'IS_TTY={SHELL_CONFIG.IS_TTY}', - f'TZ={TIMEZONE}', + f'TZ={CONSTANTS.TIMEZONE}', f'SEARCH_BACKEND={SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}', f'LDAP={LDAP_CONFIG.LDAP_ENABLED}', #f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually ) print() - print('{white}[i] Old dependency versions:{reset}'.format(**ANSI)) + print('{white}[i] Old dependency versions:{reset}'.format(**SHELL_CONFIG.ANSI)) for name, dependency in DEPENDENCIES.items(): print(printable_dependency_version(name, dependency)) @@ -240,7 +220,7 @@ def version(quiet: bool=False, print() print() - print('{white}[i] New dependency versions:{reset}'.format(**ANSI)) + print('{white}[i] New dependency versions:{reset}'.format(**SHELL_CONFIG.ANSI)) for name, binary in settings.BINARIES.items(): err = None try: @@ -252,18 +232,18 @@ def version(quiet: bool=False, print('', '√' if loaded_bin.is_valid else 'X', '', loaded_bin.name.ljust(21), str(loaded_bin.version).ljust(15), loaded_bin.abspath or str(err)) print() - print('{white}[i] Source-code locations:{reset}'.format(**ANSI)) + print('{white}[i] Source-code locations:{reset}'.format(**SHELL_CONFIG.ANSI)) for name, path in CONSTANTS.CODE_LOCATIONS.items(): print(printable_folder_status(name, path)) print() if CONSTANTS.DATABASE_FILE.exists() or CONSTANTS.ARCHIVE_DIR.exists() or CONSTANTS.CONFIG_FILE.exists(): - print('{white}[i] Data locations:{reset}'.format(**ANSI)) + print('{white}[i] Data locations:{reset}'.format(**SHELL_CONFIG.ANSI)) for name, path in CONSTANTS.DATA_LOCATIONS.items(): print(printable_folder_status(name, path)) else: print() - print('{white}[i] Data locations:{reset} (not in a data directory)'.format(**ANSI)) + print('{white}[i] Data locations:{reset} (not in a data directory)'.format(**SHELL_CONFIG.ANSI)) print() @@ -272,7 +252,7 @@ def version(quiet: bool=False, def run(subcommand: str, subcommand_args: Optional[List[str]], stdin: Optional[IO]=None, - out_dir: Path=OUTPUT_DIR) -> None: + out_dir: Path=DATA_DIR) -> None: """Run a given ArchiveBox subcommand with the given list of args""" run_subcommand( subcommand=subcommand, @@ -283,27 +263,27 @@ def run(subcommand: str, @enforce_types -def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=archivebox.DATA_DIR) -> None: +def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=DATA_DIR) -> None: """Initialize a new ArchiveBox collection in the current directory""" from core.models import Snapshot out_dir.mkdir(exist_ok=True) - is_empty = not len(set(os.listdir(out_dir)) - CONSTANTS.ALLOWED_IN_OUTPUT_DIR) + is_empty = not len(set(os.listdir(out_dir)) - CONSTANTS.ALLOWED_IN_DATA_DIR) - if (out_dir / archivebox.CONSTANTS.JSON_INDEX_FILENAME).exists(): + if (out_dir / CONSTANTS.JSON_INDEX_FILENAME).exists(): stderr("[!] This folder contains a JSON index. It is deprecated, and will no longer be kept up to date automatically.", color="lightyellow") stderr(" You can run `archivebox list --json --with-headers > static_index.json` to manually generate it.", color="lightyellow") - existing_index = archivebox.CONSTANTS.DATABASE_FILE.exists() + existing_index = CONSTANTS.DATABASE_FILE.exists() if is_empty and not existing_index: - print('{green}[+] Initializing a new ArchiveBox v{} collection...{reset}'.format(VERSION, **ANSI)) - print('{green}----------------------------------------------------------------------{reset}'.format(**ANSI)) + print('{green}[+] Initializing a new ArchiveBox v{} collection...{reset}'.format(VERSION, **SHELL_CONFIG.ANSI)) + print('{green}----------------------------------------------------------------------{reset}'.format(**SHELL_CONFIG.ANSI)) elif existing_index: # TODO: properly detect and print the existing version in current index as well - print('{green}[*] Verifying and updating existing ArchiveBox collection to v{}...{reset}'.format(VERSION, **ANSI)) - print('{green}----------------------------------------------------------------------{reset}'.format(**ANSI)) + print('{green}[*] Verifying and updating existing ArchiveBox collection to v{}...{reset}'.format(VERSION, **SHELL_CONFIG.ANSI)) + print('{green}----------------------------------------------------------------------{reset}'.format(**SHELL_CONFIG.ANSI)) else: if force: stderr('[!] This folder appears to already have files in it, but no index.sqlite3 is present.', color='lightyellow') @@ -315,41 +295,41 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path= " {lightred}Hint:{reset} To import an existing data folder make sure to cd into the folder first, \n" " then run and run 'archivebox init' to pick up where you left off.\n\n" " (Always make sure your data folder is backed up first before updating ArchiveBox)" - ).format(**ANSI) + ).format(**SHELL_CONFIG.ANSI) ) raise SystemExit(2) if existing_index: - print('\n{green}[*] Verifying archive folder structure...{reset}'.format(**ANSI)) + print('\n{green}[*] Verifying archive folder structure...{reset}'.format(**SHELL_CONFIG.ANSI)) else: - print('\n{green}[+] Building archive folder structure...{reset}'.format(**ANSI)) + print('\n{green}[+] Building archive folder structure...{reset}'.format(**SHELL_CONFIG.ANSI)) - print(f' + ./{CONSTANTS.ARCHIVE_DIR.relative_to(OUTPUT_DIR)}, ./{CONSTANTS.SOURCES_DIR.relative_to(OUTPUT_DIR)}, ./{CONSTANTS.LOGS_DIR.relative_to(OUTPUT_DIR)}...') + print(f' + ./{CONSTANTS.ARCHIVE_DIR.relative_to(DATA_DIR)}, ./{CONSTANTS.SOURCES_DIR.relative_to(DATA_DIR)}, ./{CONSTANTS.LOGS_DIR.relative_to(DATA_DIR)}...') Path(CONSTANTS.SOURCES_DIR).mkdir(exist_ok=True) Path(CONSTANTS.ARCHIVE_DIR).mkdir(exist_ok=True) Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True) - print(f' + ./{CONSTANTS.CONFIG_FILE.relative_to(OUTPUT_DIR)}...') + print(f' + ./{CONSTANTS.CONFIG_FILE.relative_to(DATA_DIR)}...') write_config_file({}, out_dir=out_dir) if CONSTANTS.DATABASE_FILE.exists(): - print('\n{green}[*] Verifying main SQL index and running any migrations needed...{reset}'.format(**ANSI)) + print('\n{green}[*] Verifying main SQL index and running any migrations needed...{reset}'.format(**SHELL_CONFIG.ANSI)) else: - print('\n{green}[+] Building main SQL index and running initial migrations...{reset}'.format(**ANSI)) + print('\n{green}[+] Building main SQL index and running initial migrations...{reset}'.format(**SHELL_CONFIG.ANSI)) for migration_line in apply_migrations(out_dir): print(f' {migration_line}') assert CONSTANTS.DATABASE_FILE.exists() print() - print(f' √ ./{CONSTANTS.DATABASE_FILE.relative_to(OUTPUT_DIR)}') + print(f' √ ./{CONSTANTS.DATABASE_FILE.relative_to(DATA_DIR)}') # from django.contrib.auth.models import User - # if IS_TTY and not User.objects.filter(is_superuser=True).exists(): - # print('{green}[+] Creating admin user account...{reset}'.format(**ANSI)) + # if SHELL_CONFIG.IS_TTY and not User.objects.filter(is_superuser=True).exists(): + # print('{green}[+] Creating admin user account...{reset}'.format(**SHELL_CONFIG.ANSI)) # call_command("createsuperuser", interactive=True) print() - print('{green}[*] Checking links from indexes and archive folders (safe to Ctrl+C)...{reset}'.format(**ANSI)) + print('{green}[*] Checking links from indexes and archive folders (safe to Ctrl+C)...{reset}'.format(**SHELL_CONFIG.ANSI)) all_links = Snapshot.objects.none() pending_links: Dict[str, Link] = {} @@ -365,9 +345,9 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path= # Links in data folders that dont match their timestamp fixed, cant_fix = fix_invalid_folder_locations(out_dir=out_dir) if fixed: - print(' {lightyellow}√ Fixed {} data directory locations that didn\'t match their link timestamps.{reset}'.format(len(fixed), **ANSI)) + print(' {lightyellow}√ Fixed {} data directory locations that didn\'t match their link timestamps.{reset}'.format(len(fixed), **SHELL_CONFIG.ANSI)) if cant_fix: - print(' {lightyellow}! Could not fix {} data directory locations due to conflicts with existing folders.{reset}'.format(len(cant_fix), **ANSI)) + print(' {lightyellow}! Could not fix {} data directory locations due to conflicts with existing folders.{reset}'.format(len(cant_fix), **SHELL_CONFIG.ANSI)) # Links in JSON index but not in main index orphaned_json_links = { @@ -377,7 +357,7 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path= } if orphaned_json_links: pending_links.update(orphaned_json_links) - print(' {lightyellow}√ Added {} orphaned links from existing JSON index...{reset}'.format(len(orphaned_json_links), **ANSI)) + print(' {lightyellow}√ Added {} orphaned links from existing JSON index...{reset}'.format(len(orphaned_json_links), **SHELL_CONFIG.ANSI)) # Links in data dir indexes but not in main index orphaned_data_dir_links = { @@ -387,7 +367,7 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path= } if orphaned_data_dir_links: pending_links.update(orphaned_data_dir_links) - print(' {lightyellow}√ Added {} orphaned links from existing archive directories.{reset}'.format(len(orphaned_data_dir_links), **ANSI)) + print(' {lightyellow}√ Added {} orphaned links from existing archive directories.{reset}'.format(len(orphaned_data_dir_links), **SHELL_CONFIG.ANSI)) # Links in invalid/duplicate data dirs invalid_folders = { @@ -395,10 +375,10 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path= for folder, link in get_invalid_folders(all_links, out_dir=out_dir).items() } if invalid_folders: - print(' {lightyellow}! Skipped adding {} invalid link data directories.{reset}'.format(len(invalid_folders), **ANSI)) - print(' X ' + '\n X '.join(f'./{Path(folder).relative_to(OUTPUT_DIR)} {link}' for folder, link in invalid_folders.items())) + print(' {lightyellow}! Skipped adding {} invalid link data directories.{reset}'.format(len(invalid_folders), **SHELL_CONFIG.ANSI)) + print(' X ' + '\n X '.join(f'./{Path(folder).relative_to(DATA_DIR)} {link}' for folder, link in invalid_folders.items())) print() - print(' {lightred}Hint:{reset} For more information about the link data directories that were skipped, run:'.format(**ANSI)) + print(' {lightred}Hint:{reset} For more information about the link data directories that were skipped, run:'.format(**SHELL_CONFIG.ANSI)) print(' archivebox status') print(' archivebox list --status=invalid') @@ -407,28 +387,27 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path= stderr('[x] Stopped checking archive directories due to Ctrl-C/SIGTERM', color='red') stderr(' Your archive data is safe, but you should re-run `archivebox init` to finish the process later.') stderr() - stderr(' {lightred}Hint:{reset} In the future you can run a quick init without checking dirs like so:'.format(**ANSI)) + stderr(' {lightred}Hint:{reset} In the future you can run a quick init without checking dirs like so:'.format(**SHELL_CONFIG.ANSI)) stderr(' archivebox init --quick') raise SystemExit(1) write_main_index(list(pending_links.values()), out_dir=out_dir) - print('\n{green}----------------------------------------------------------------------{reset}'.format(**ANSI)) + print('\n{green}----------------------------------------------------------------------{reset}'.format(**SHELL_CONFIG.ANSI)) from django.contrib.auth.models import User - from plugins_sys.config.apps import SERVER_CONFIG if (SERVER_CONFIG.ADMIN_USERNAME and SERVER_CONFIG.ADMIN_PASSWORD) and not User.objects.filter(username=SERVER_CONFIG.ADMIN_USERNAME).exists(): - print('{green}[+] Found ADMIN_USERNAME and ADMIN_PASSWORD configuration options, creating new admin user.{reset}'.format(**ANSI)) + print('{green}[+] Found ADMIN_USERNAME and ADMIN_PASSWORD configuration options, creating new admin user.{reset}'.format(**SHELL_CONFIG.ANSI)) User.objects.create_superuser(username=SERVER_CONFIG.ADMIN_USERNAME, password=SERVER_CONFIG.ADMIN_PASSWORD) if existing_index: - print('{green}[√] Done. Verified and updated the existing ArchiveBox collection.{reset}'.format(**ANSI)) + print('{green}[√] Done. Verified and updated the existing ArchiveBox collection.{reset}'.format(**SHELL_CONFIG.ANSI)) else: - print('{green}[√] Done. A new ArchiveBox collection was initialized ({} links).{reset}'.format(len(all_links) + len(pending_links), **ANSI)) + print('{green}[√] Done. A new ArchiveBox collection was initialized ({} links).{reset}'.format(len(all_links) + len(pending_links), **SHELL_CONFIG.ANSI)) - json_index = out_dir / JSON_INDEX_FILENAME - html_index = out_dir / HTML_INDEX_FILENAME + json_index = out_dir / CONSTANTS.JSON_INDEX_FILENAME + html_index = out_dir / CONSTANTS.HTML_INDEX_FILENAME index_name = f"{date.today()}_index_old" if json_index.exists(): json_index.rename(f"{index_name}.json") @@ -440,7 +419,7 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path= if Snapshot.objects.count() < 25: # hide the hints for experienced users print() - print(' {lightred}Hint:{reset} To view your archive index, run:'.format(**ANSI)) + print(' {lightred}Hint:{reset} To view your archive index, run:'.format(**SHELL_CONFIG.ANSI)) print(' archivebox server # then visit http://127.0.0.1:8000') print() print(' To add new links, you can run:') @@ -450,7 +429,7 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path= print(' archivebox help') @enforce_types -def status(out_dir: Path=OUTPUT_DIR) -> None: +def status(out_dir: Path=DATA_DIR) -> None: """Print out some info and statistics about the archive collection""" check_data_folder(CONFIG) @@ -459,8 +438,8 @@ def status(out_dir: Path=OUTPUT_DIR) -> None: from django.contrib.auth import get_user_model User = get_user_model() - print('{green}[*] Scanning archive main index...{reset}'.format(**ANSI)) - print(ANSI['lightyellow'], f' {out_dir}/*', ANSI['reset']) + print('{green}[*] Scanning archive main index...{reset}'.format(**SHELL_CONFIG.ANSI)) + print(SHELL_CONFIG.ANSI['lightyellow'], f' {out_dir}/*', SHELL_CONFIG.ANSI['reset']) num_bytes, num_dirs, num_files = get_dir_size(out_dir, recursive=False, pattern='index.') size = printable_filesize(num_bytes) print(f' Index size: {size} across {num_files} files') @@ -469,15 +448,15 @@ def status(out_dir: Path=OUTPUT_DIR) -> None: links = load_main_index(out_dir=out_dir) num_sql_links = links.count() num_link_details = sum(1 for link in parse_json_links_details(out_dir=out_dir)) - print(f' > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {SQL_INDEX_FILENAME})') + print(f' > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {CONSTANTS.SQL_INDEX_FILENAME})') print(f' > JSON Link Details: {num_link_details} links'.ljust(36), f'(found in {ARCHIVE_DIR.name}/*/index.json)') print() - print('{green}[*] Scanning archive data directories...{reset}'.format(**ANSI)) - print(ANSI['lightyellow'], f' {ARCHIVE_DIR}/*', ANSI['reset']) + print('{green}[*] Scanning archive data directories...{reset}'.format(**SHELL_CONFIG.ANSI)) + print(SHELL_CONFIG.ANSI['lightyellow'], f' {ARCHIVE_DIR}/*', SHELL_CONFIG.ANSI['reset']) num_bytes, num_dirs, num_files = get_dir_size(ARCHIVE_DIR) size = printable_filesize(num_bytes) print(f' Size: {size} across {num_files} files in {num_dirs} directories') - print(ANSI['black']) + print(SHELL_CONFIG.ANSI['black']) num_indexed = len(get_indexed_folders(links, out_dir=out_dir)) num_archived = len(get_archived_folders(links, out_dir=out_dir)) num_unarchived = len(get_unarchived_folders(links, out_dir=out_dir)) @@ -502,23 +481,23 @@ def status(out_dir: Path=OUTPUT_DIR) -> None: print(f' > corrupted: {len(corrupted)}'.ljust(36), f'({get_corrupted_folders.__doc__})') print(f' > unrecognized: {len(unrecognized)}'.ljust(36), f'({get_unrecognized_folders.__doc__})') - print(ANSI['reset']) + print(SHELL_CONFIG.ANSI['reset']) if num_indexed: - print(' {lightred}Hint:{reset} You can list link data directories by status like so:'.format(**ANSI)) + print(' {lightred}Hint:{reset} You can list link data directories by status like so:'.format(**SHELL_CONFIG.ANSI)) print(' archivebox list --status= (e.g. indexed, corrupted, archived, etc.)') if orphaned: - print(' {lightred}Hint:{reset} To automatically import orphaned data directories into the main index, run:'.format(**ANSI)) + print(' {lightred}Hint:{reset} To automatically import orphaned data directories into the main index, run:'.format(**SHELL_CONFIG.ANSI)) print(' archivebox init') if num_invalid: - print(' {lightred}Hint:{reset} You may need to manually remove or fix some invalid data directories, afterwards make sure to run:'.format(**ANSI)) + print(' {lightred}Hint:{reset} You may need to manually remove or fix some invalid data directories, afterwards make sure to run:'.format(**SHELL_CONFIG.ANSI)) print(' archivebox init') print() - print('{green}[*] Scanning recent archive changes and user logins:{reset}'.format(**ANSI)) - print(ANSI['lightyellow'], f' {CONSTANTS.LOGS_DIR}/*', ANSI['reset']) + print('{green}[*] Scanning recent archive changes and user logins:{reset}'.format(**SHELL_CONFIG.ANSI)) + print(SHELL_CONFIG.ANSI['lightyellow'], f' {CONSTANTS.LOGS_DIR}/*', SHELL_CONFIG.ANSI['reset']) users = get_admins().values_list('username', flat=True) print(f' UI users {len(users)}: {", ".join(users)}') last_login = User.objects.order_by('last_login').last() @@ -530,7 +509,7 @@ def status(out_dir: Path=OUTPUT_DIR) -> None: if not users: print() - print(' {lightred}Hint:{reset} You can create an admin user by running:'.format(**ANSI)) + print(' {lightred}Hint:{reset} You can create an admin user by running:'.format(**SHELL_CONFIG.ANSI)) print(' archivebox manage createsuperuser') print() @@ -538,19 +517,19 @@ def status(out_dir: Path=OUTPUT_DIR) -> None: if not snapshot.downloaded_at: continue print( - ANSI['black'], + SHELL_CONFIG.ANSI['black'], ( f' > {str(snapshot.downloaded_at)[:16]} ' f'[{snapshot.num_outputs} {("X", "√")[snapshot.is_archived]} {printable_filesize(snapshot.archive_size)}] ' f'"{snapshot.title}": {snapshot.url}' )[:SHELL_CONFIG.TERM_WIDTH], - ANSI['reset'], + SHELL_CONFIG.ANSI['reset'], ) - print(ANSI['black'], ' ...', ANSI['reset']) + print(SHELL_CONFIG.ANSI['black'], ' ...', SHELL_CONFIG.ANSI['reset']) @enforce_types -def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR, created_by_id: int | None=None) -> List[Link]: +def oneshot(url: str, extractors: str="", out_dir: Path=DATA_DIR, created_by_id: int | None=None) -> List[Link]: """ Create a single URL archive folder with an index.json and index.html, and all the archive method outputs. You can run this to archive single pages without needing to create a whole collection with archivebox init. @@ -571,7 +550,7 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR, created_by_i def add(urls: Union[str, List[str]], tag: str='', depth: int=0, - update: bool=not ONLY_NEW, + update: bool=not ARCHIVING_CONFIG.ONLY_NEW, update_all: bool=False, index_only: bool=False, overwrite: bool=False, @@ -580,7 +559,7 @@ def add(urls: Union[str, List[str]], extractors: str="", parser: str="auto", created_by_id: int | None=None, - out_dir: Path=OUTPUT_DIR) -> List[Link]: + out_dir: Path=DATA_DIR) -> List[Link]: """Add a new URL or list of URLs to your archive""" from core.models import Snapshot, Tag @@ -693,7 +672,7 @@ def remove(filter_str: Optional[str]=None, before: Optional[float]=None, yes: bool=False, delete: bool=False, - out_dir: Path=OUTPUT_DIR) -> List[Link]: + out_dir: Path=DATA_DIR) -> List[Link]: """Remove the specified URLs from the archive""" check_data_folder(CONFIG) @@ -767,7 +746,7 @@ def remove(filter_str: Optional[str]=None, @enforce_types def update(resume: Optional[float]=None, - only_new: bool=ONLY_NEW, + only_new: bool=ARCHIVING_CONFIG.ONLY_NEW, index_only: bool=False, overwrite: bool=False, filter_patterns_str: Optional[str]=None, @@ -777,7 +756,7 @@ def update(resume: Optional[float]=None, after: Optional[str]=None, before: Optional[str]=None, extractors: str="", - out_dir: Path=OUTPUT_DIR) -> List[Link]: + out_dir: Path=DATA_DIR) -> List[Link]: """Import any new links from subscriptions and retry any previously failed/skipped links""" from core.models import ArchiveResult @@ -853,7 +832,7 @@ def list_all(filter_patterns_str: Optional[str]=None, json: bool=False, html: bool=False, with_headers: bool=False, - out_dir: Path=OUTPUT_DIR) -> Iterable[Link]: + out_dir: Path=DATA_DIR) -> Iterable[Link]: """List, filter, and export information about archive entries""" check_data_folder(CONFIG) @@ -902,7 +881,7 @@ def list_links(snapshots: Optional[QuerySet]=None, filter_type: str='exact', after: Optional[float]=None, before: Optional[float]=None, - out_dir: Path=OUTPUT_DIR) -> Iterable[Link]: + out_dir: Path=DATA_DIR) -> Iterable[Link]: check_data_folder(CONFIG) @@ -926,7 +905,7 @@ def list_links(snapshots: Optional[QuerySet]=None, @enforce_types def list_folders(links: List[Link], status: str, - out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]: + out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]: check_data_folder(CONFIG) @@ -949,7 +928,7 @@ def list_folders(links: List[Link], raise ValueError('Status not recognized.') @enforce_types -def setup(out_dir: Path=OUTPUT_DIR) -> None: +def setup(out_dir: Path=DATA_DIR) -> None: """Automatically install all ArchiveBox dependencies and extras""" from rich import print @@ -996,7 +975,7 @@ def config(config_options_str: Optional[str]=None, get: bool=False, set: bool=False, reset: bool=False, - out_dir: Path=OUTPUT_DIR) -> None: + out_dir: Path=DATA_DIR) -> None: """Get and set your ArchiveBox project configuration values""" check_data_folder(CONFIG) @@ -1014,7 +993,7 @@ def config(config_options_str: Optional[str]=None, no_args = not (get or set or reset or config_options) - matching_config: ConfigDict = {} + matching_config = {} if get or no_args: if config_options: config_options = [get_real_name(key) for key in config_options] @@ -1054,11 +1033,11 @@ def config(config_options_str: Optional[str]=None, if new_config: before = CONFIG - matching_config = write_config_file(new_config, out_dir=OUTPUT_DIR) + matching_config = write_config_file(new_config, out_dir=DATA_DIR) after = load_all_config() print(printable_config(matching_config)) - side_effect_changes: ConfigDict = {} + side_effect_changes = {} for key, val in after.items(): if key in USER_CONFIG and (before[key] != after[key]) and (key not in matching_config): side_effect_changes[key] = after[key] @@ -1095,14 +1074,13 @@ def schedule(add: bool=False, tag: str='', depth: int=0, overwrite: bool=False, - update: bool=not ONLY_NEW, + update: bool=not ARCHIVING_CONFIG.ONLY_NEW, import_path: Optional[str]=None, - out_dir: Path=OUTPUT_DIR): + out_dir: Path=DATA_DIR): """Set ArchiveBox to regularly import URLs at specific times using cron""" check_data_folder(CONFIG) from plugins_pkg.pip.apps import ARCHIVEBOX_BINARY - from plugins_sys.config.apps import SHELL_CONFIG, CONSTANTS Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True) @@ -1222,7 +1200,7 @@ def server(runserver_args: Optional[List[str]]=None, init: bool=False, quick_init: bool=False, createsuperuser: bool=False, - out_dir: Path=OUTPUT_DIR) -> None: + out_dir: Path=DATA_DIR) -> None: """Run the ArchiveBox HTTP server""" runserver_args = runserver_args or [] @@ -1238,10 +1216,6 @@ def server(runserver_args: Optional[List[str]]=None, run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir) print() - # setup config for django runserver - from . import config - config.SHOW_PROGRESS = False - config.DEBUG = config.DEBUG or debug check_data_folder(CONFIG) @@ -1250,20 +1224,17 @@ def server(runserver_args: Optional[List[str]]=None, - print('{green}[+] Starting ArchiveBox webserver... {reset}'.format(**ANSI)) + print('{green}[+] Starting ArchiveBox webserver... {reset}'.format(**SHELL_CONFIG.ANSI)) print(' > Logging errors to ./logs/errors.log') if not User.objects.filter(is_superuser=True).exists(): - print('{lightyellow}[!] No admin users exist yet, you will not be able to edit links in the UI.{reset}'.format(**ANSI)) + print('{lightyellow}[!] No admin users exist yet, you will not be able to edit links in the UI.{reset}'.format(**SHELL_CONFIG.ANSI)) print() print(' To create an admin user, run:') print(' archivebox manage createsuperuser') print() - # toggle autoreloading when archivebox code changes - config.SHOW_PROGRESS = False - config.DEBUG = config.DEBUG or debug - if debug: + if SHELL_CONFIG.DEBUG: if not reload: runserver_args.append('--noreload') # '--insecure' call_command("runserver", *runserver_args) @@ -1295,13 +1266,13 @@ def server(runserver_args: Optional[List[str]]=None, @enforce_types -def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None: +def manage(args: Optional[List[str]]=None, out_dir: Path=DATA_DIR) -> None: """Run an ArchiveBox Django management command""" check_data_folder(CONFIG) from django.core.management import execute_from_command_line - if (args and "createsuperuser" in args) and (IN_DOCKER and not IS_TTY): + if (args and "createsuperuser" in args) and (SHELL_CONFIG.IN_DOCKER and not SHELL_CONFIG.IS_TTY): stderr('[!] Warning: you need to pass -it to use interactive commands in docker', color='lightyellow') stderr(' docker run -it archivebox manage {}'.format(' '.join(args or ['...'])), color='lightyellow') stderr('') @@ -1312,7 +1283,7 @@ def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None: @enforce_types -def shell(out_dir: Path=OUTPUT_DIR) -> None: +def shell(out_dir: Path=DATA_DIR) -> None: """Enter an interactive ArchiveBox Django shell""" check_data_folder(CONFIG) diff --git a/archivebox/misc/checks.py b/archivebox/misc/checks.py index c4d3db79..69e0c52c 100644 --- a/archivebox/misc/checks.py +++ b/archivebox/misc/checks.py @@ -2,45 +2,42 @@ __package__ = 'archivebox.misc' from benedict import benedict -import archivebox +from archivebox.config import DATA_DIR, ARCHIVE_DIR, CONSTANTS, SHELL_CONFIG -from .logging import stderr, ANSI +from .logging import stderr def check_data_folder(config: benedict) -> None: - output_dir = archivebox.DATA_DIR - archive_dir_exists = (archivebox.CONSTANTS.ARCHIVE_DIR).exists() + archive_dir_exists = ARCHIVE_DIR.exists() if not archive_dir_exists: stderr('[X] No archivebox index found in the current directory.', color='red') - stderr(f' {output_dir}', color='lightyellow') + stderr(f' {DATA_DIR}', color='lightyellow') stderr() - stderr(' {lightred}Hint{reset}: Are you running archivebox in the right folder?'.format(**ANSI)) + stderr(' {lightred}Hint{reset}: Are you running archivebox in the right folder?'.format(**SHELL_CONFIG.ANSI)) stderr(' cd path/to/your/archive/folder') stderr(' archivebox [command]') stderr() - stderr(' {lightred}Hint{reset}: To create a new archive collection or import existing data in this folder, run:'.format(**ANSI)) + stderr(' {lightred}Hint{reset}: To create a new archive collection or import existing data in this folder, run:'.format(**SHELL_CONFIG.ANSI)) stderr(' archivebox init') raise SystemExit(2) def check_migrations(config: benedict): - output_dir = archivebox.DATA_DIR - from ..index.sql import list_migrations pending_migrations = [name for status, name in list_migrations() if not status] if pending_migrations: stderr('[X] This collection was created with an older version of ArchiveBox and must be upgraded first.', color='lightyellow') - stderr(f' {output_dir}') + stderr(f' {DATA_DIR}') stderr() stderr(f' To upgrade it to the latest version and apply the {len(pending_migrations)} pending migrations, run:') stderr(' archivebox init') raise SystemExit(3) - archivebox.CONSTANTS.SOURCES_DIR.mkdir(exist_ok=True) - archivebox.CONSTANTS.LOGS_DIR.mkdir(exist_ok=True) - archivebox.CONSTANTS.CACHE_DIR.mkdir(exist_ok=True) - (archivebox.CONSTANTS.LIB_DIR / 'bin').mkdir(exist_ok=True, parents=True) - (archivebox.CONSTANTS.PERSONAS_DIR / 'Default').mkdir(exist_ok=True, parents=True) + CONSTANTS.SOURCES_DIR.mkdir(exist_ok=True) + CONSTANTS.LOGS_DIR.mkdir(exist_ok=True) + CONSTANTS.CACHE_DIR.mkdir(exist_ok=True) + (CONSTANTS.LIB_DIR / 'bin').mkdir(exist_ok=True, parents=True) + (CONSTANTS.PERSONAS_DIR / 'Default').mkdir(exist_ok=True, parents=True) diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py index 99cd690d..9464c53b 100644 --- a/archivebox/parsers/__init__.py +++ b/archivebox/parsers/__init__.py @@ -14,7 +14,7 @@ from datetime import datetime, timezone from pathlib import Path from ..system import atomic_write -from ..config import ( +from ..config.legacy import ( ANSI, OUTPUT_DIR, SOURCES_DIR_NAME, diff --git a/archivebox/parsers/pocket_api.py b/archivebox/parsers/pocket_api.py index 429f4a9d..10daf54e 100644 --- a/archivebox/parsers/pocket_api.py +++ b/archivebox/parsers/pocket_api.py @@ -2,24 +2,25 @@ __package__ = 'archivebox.parsers' import re -import archivebox from typing import IO, Iterable, Optional from configparser import ConfigParser from pocket import Pocket +from archivebox.config import CONSTANTS + from ..index.schema import Link from ..util import enforce_types from ..system import atomic_write -from ..config import ( +from ..config.legacy import ( POCKET_CONSUMER_KEY, POCKET_ACCESS_TOKENS, ) COUNT_PER_PAGE = 500 -API_DB_PATH = archivebox.DATA_DIR / 'sources' / 'pocket_api.db' +API_DB_PATH = CONSTANTS.SOURCES_DIR / 'pocket_api.db' # search for broken protocols that sometimes come from the Pocket API _BROKEN_PROTOCOL_RE = re.compile('^(http[s]?)(:/(?!/))') diff --git a/archivebox/parsers/readwise_reader_api.py b/archivebox/parsers/readwise_reader_api.py index b676dfe8..c12bdc24 100644 --- a/archivebox/parsers/readwise_reader_api.py +++ b/archivebox/parsers/readwise_reader_api.py @@ -3,19 +3,20 @@ __package__ = "archivebox.parsers" import re import requests -import archivebox from datetime import datetime from typing import IO, Iterable, Optional from configparser import ConfigParser +from archivebox.config import CONSTANTS + from ..index.schema import Link from ..util import enforce_types from ..system import atomic_write -from ..config import READWISE_READER_TOKENS +from ..config.legacy import READWISE_READER_TOKENS -API_DB_PATH = archivebox.DATA_DIR / "sources" / "readwise_reader_api.db" +API_DB_PATH = CONSTANTS.SOURCES_DIR / "readwise_reader_api.db" class ReadwiseReaderAPI: diff --git a/archivebox/plugins_extractor/chrome/apps.py b/archivebox/plugins_extractor/chrome/apps.py index 8b08ae30..35a0f77b 100644 --- a/archivebox/plugins_extractor/chrome/apps.py +++ b/archivebox/plugins_extractor/chrome/apps.py @@ -5,8 +5,6 @@ import platform from pathlib import Path from typing import List, Optional, Dict, ClassVar -from django.conf import settings - # Depends on other PyPI/vendor packages: from rich import print from pydantic import InstanceOf, Field, model_validator @@ -18,8 +16,6 @@ from pydantic_pkgr import ( bin_abspath, ) -import archivebox - # Depends on other Django apps: from abx.archivebox.base_plugin import BasePlugin from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName @@ -29,7 +25,7 @@ from abx.archivebox.base_binary import BaseBinary, env from abx.archivebox.base_hook import BaseHook # Depends on Other Plugins: -from plugins_sys.config.apps import ARCHIVING_CONFIG, SHELL_CONFIG +from archivebox.config import CONSTANTS, ARCHIVING_CONFIG, SHELL_CONFIG from plugins_pkg.puppeteer.apps import PUPPETEER_BINPROVIDER from plugins_pkg.playwright.apps import PLAYWRIGHT_BINPROVIDER @@ -217,7 +213,7 @@ class ChromeBinary(BaseBinary): } @staticmethod - def symlink_to_lib(binary, bin_dir=archivebox.CONSTANTS.LIB_BIN_DIR) -> None: + def symlink_to_lib(binary, bin_dir=CONSTANTS.LIB_BIN_DIR) -> None: if not (binary.abspath and binary.abspath.exists()): return diff --git a/archivebox/plugins_extractor/readability/apps.py b/archivebox/plugins_extractor/readability/apps.py index 14b0a3a6..5af8de7a 100644 --- a/archivebox/plugins_extractor/readability/apps.py +++ b/archivebox/plugins_extractor/readability/apps.py @@ -18,7 +18,7 @@ from abx.archivebox.base_extractor import BaseExtractor from abx.archivebox.base_hook import BaseHook # Depends on Other Plugins: -from plugins_sys.config.apps import ARCHIVING_CONFIG +from archivebox.config import ARCHIVING_CONFIG from plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER ###################### Config ########################## diff --git a/archivebox/plugins_extractor/singlefile/apps.py b/archivebox/plugins_extractor/singlefile/apps.py index e737e87a..cabfe67f 100644 --- a/archivebox/plugins_extractor/singlefile/apps.py +++ b/archivebox/plugins_extractor/singlefile/apps.py @@ -19,7 +19,7 @@ from abx.archivebox.base_queue import BaseQueue from abx.archivebox.base_hook import BaseHook # Depends on Other Plugins: -from plugins_sys.config.apps import ARCHIVING_CONFIG +from archivebox.config import ARCHIVING_CONFIG from plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER ###################### Config ########################## diff --git a/archivebox/plugins_extractor/ytdlp/apps.py b/archivebox/plugins_extractor/ytdlp/apps.py index e6355103..fdab408f 100644 --- a/archivebox/plugins_extractor/ytdlp/apps.py +++ b/archivebox/plugins_extractor/ytdlp/apps.py @@ -12,7 +12,7 @@ from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName from abx.archivebox.base_binary import BaseBinary, env, apt, brew from abx.archivebox.base_hook import BaseHook -from plugins_sys.config.apps import ARCHIVING_CONFIG +from archivebox.config import ARCHIVING_CONFIG from plugins_pkg.pip.apps import pip ###################### Config ########################## diff --git a/archivebox/plugins_pkg/npm/apps.py b/archivebox/plugins_pkg/npm/apps.py index 31e92c4f..5923b9e6 100644 --- a/archivebox/plugins_pkg/npm/apps.py +++ b/archivebox/plugins_pkg/npm/apps.py @@ -1,16 +1,14 @@ -__package__ = 'archivebox.plugins_pkg.npm' - -import archivebox +__package__ = 'plugins_pkg.npm' from pathlib import Path from typing import List, Optional -from django.conf import settings - from pydantic import InstanceOf, model_validator from pydantic_pkgr import BinProvider, NpmProvider, BinName, PATHStr, BinProviderName +from archivebox.config import DATA_DIR, CONSTANTS + from abx.archivebox.base_plugin import BasePlugin from abx.archivebox.base_configset import BaseConfigSet from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env, apt, brew @@ -36,8 +34,8 @@ DEFAULT_GLOBAL_CONFIG = { NPM_CONFIG = NpmDependencyConfigs(**DEFAULT_GLOBAL_CONFIG) -OLD_NODE_BIN_PATH = archivebox.DATA_DIR / 'node_modules' / '.bin' -NEW_NODE_BIN_PATH = archivebox.CONSTANTS.LIB_NPM_DIR / 'node_modules' / '.bin' +OLD_NODE_BIN_PATH = DATA_DIR / 'node_modules' / '.bin' +NEW_NODE_BIN_PATH = CONSTANTS.LIB_NPM_DIR / 'node_modules' / '.bin' class SystemNpmProvider(NpmProvider, BaseBinProvider): name: BinProviderName = "sys_npm" @@ -48,7 +46,7 @@ class LibNpmProvider(NpmProvider, BaseBinProvider): name: BinProviderName = "lib_npm" PATH: PATHStr = str(OLD_NODE_BIN_PATH) - npm_prefix: Optional[Path] = archivebox.CONSTANTS.LIB_NPM_DIR + npm_prefix: Optional[Path] = CONSTANTS.LIB_NPM_DIR @model_validator(mode='after') def validate_path(self): diff --git a/archivebox/plugins_pkg/pip/apps.py b/archivebox/plugins_pkg/pip/apps.py index 78021c48..d7596aec 100644 --- a/archivebox/plugins_pkg/pip/apps.py +++ b/archivebox/plugins_pkg/pip/apps.py @@ -3,18 +3,19 @@ __package__ = 'archivebox.plugins_pkg.pip' import os import sys import inspect -import archivebox from pathlib import Path from typing import List, Dict, Optional, ClassVar from pydantic import InstanceOf, Field, model_validator -import abx import django from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type] from django.core.checks import Error, Tags - from pydantic_pkgr import BinProvider, PipProvider, BinName, BinProviderName, ProviderLookupDict, SemVer + +from archivebox.config import CONSTANTS, VERSION + +import abx from abx.archivebox.base_plugin import BasePlugin from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName from abx.archivebox.base_check import BaseCheck @@ -70,7 +71,7 @@ class LibPipBinProvider(PipProvider, BaseBinProvider): name: BinProviderName = "lib_pip" INSTALLER_BIN: BinName = "pip" - pip_venv: Optional[Path] = archivebox.CONSTANTS.LIB_PIP_DIR / 'venv' + pip_venv: Optional[Path] = CONSTANTS.LIB_PIP_DIR / 'venv' SYS_PIP_BINPROVIDER = SystemPipBinProvider() PIPX_PIP_BINPROVIDER = SystemPipxBinProvider() @@ -84,10 +85,10 @@ class ArchiveboxBinary(BaseBinary): binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env] provider_overrides: Dict[BinProviderName, ProviderLookupDict] = { - VENV_PIP_BINPROVIDER.name: {'packages': lambda: [], 'version': lambda: archivebox.__version__}, - SYS_PIP_BINPROVIDER.name: {'packages': lambda: [], 'version': lambda: archivebox.__version__}, - apt.name: {'packages': lambda: [], 'version': lambda: archivebox.__version__}, - brew.name: {'packages': lambda: [], 'version': lambda: archivebox.__version__}, + VENV_PIP_BINPROVIDER.name: {'packages': lambda: [], 'version': lambda: VERSION}, + SYS_PIP_BINPROVIDER.name: {'packages': lambda: [], 'version': lambda: VERSION}, + apt.name: {'packages': lambda: [], 'version': lambda: VERSION}, + brew.name: {'packages': lambda: [], 'version': lambda: VERSION}, } ARCHIVEBOX_BINARY = ArchiveboxBinary() diff --git a/archivebox/plugins_pkg/playwright/apps.py b/archivebox/plugins_pkg/playwright/apps.py index 8c01c997..1cb5d765 100644 --- a/archivebox/plugins_pkg/playwright/apps.py +++ b/archivebox/plugins_pkg/playwright/apps.py @@ -2,8 +2,6 @@ import platform from pathlib import Path from typing import List, Optional, Dict, ClassVar -from django.conf import settings - # Depends on other PyPI/vendor packages: from pydantic import InstanceOf, computed_field, Field from pydantic_pkgr import ( @@ -19,7 +17,7 @@ from pydantic_pkgr import ( DEFAULT_ENV_PATH, ) -import archivebox +from archivebox.config import CONSTANTS # Depends on other Django apps: from abx.archivebox.base_plugin import BasePlugin @@ -47,7 +45,7 @@ class PlaywrightConfigs(BaseConfigSet): PLAYWRIGHT_CONFIG = PlaywrightConfigs() -LIB_DIR_BROWSERS = archivebox.CONSTANTS.LIB_BROWSERS_DIR +LIB_DIR_BROWSERS = CONSTANTS.LIB_BROWSERS_DIR @@ -65,7 +63,7 @@ class PlaywrightBinProvider(BaseBinProvider): name: BinProviderName = "playwright" INSTALLER_BIN: BinName = PLAYWRIGHT_BINARY.name - PATH: PATHStr = f"{archivebox.CONSTANTS.LIB_BIN_DIR}:{DEFAULT_ENV_PATH}" + PATH: PATHStr = f"{CONSTANTS.LIB_BIN_DIR}:{DEFAULT_ENV_PATH}" puppeteer_browsers_dir: Optional[Path] = ( Path("~/Library/Caches/ms-playwright").expanduser() # macos playwright cache dir diff --git a/archivebox/plugins_pkg/puppeteer/apps.py b/archivebox/plugins_pkg/puppeteer/apps.py index f2d4adf0..8314fb5a 100644 --- a/archivebox/plugins_pkg/puppeteer/apps.py +++ b/archivebox/plugins_pkg/puppeteer/apps.py @@ -2,8 +2,6 @@ import platform from pathlib import Path from typing import List, Optional, Dict, ClassVar -from django.conf import settings - # Depends on other PyPI/vendor packages: from pydantic import InstanceOf, Field from pydantic_pkgr import ( @@ -16,7 +14,7 @@ from pydantic_pkgr import ( HostBinPath, ) -import archivebox +from archivebox.config import CONSTANTS # Depends on other Django apps: from abx.archivebox.base_plugin import BasePlugin @@ -45,7 +43,7 @@ class PuppeteerConfigs(BaseConfigSet): PUPPETEER_CONFIG = PuppeteerConfigs() -LIB_DIR_BROWSERS = archivebox.CONSTANTS.LIB_BROWSERS_DIR +LIB_DIR_BROWSERS = CONSTANTS.LIB_BROWSERS_DIR class PuppeteerBinary(BaseBinary): @@ -61,7 +59,7 @@ class PuppeteerBinProvider(BaseBinProvider): name: BinProviderName = "puppeteer" INSTALLER_BIN: BinName = "npx" - PATH: PATHStr = str(archivebox.CONSTANTS.LIB_BIN_DIR) + PATH: PATHStr = str(CONSTANTS.LIB_BIN_DIR) puppeteer_browsers_dir: Optional[Path] = LIB_DIR_BROWSERS puppeteer_install_args: List[str] = ["@puppeteer/browsers", "install", "--path", str(LIB_DIR_BROWSERS)] @@ -140,7 +138,7 @@ PUPPETEER_BINPROVIDER = PuppeteerBinProvider() # ALTERNATIVE INSTALL METHOD using Ansible: # install_playbook = self.plugin_dir / 'install_puppeteer.yml' -# chrome_bin = run_playbook(install_playbook, data_dir=archivebox.DATA_DIR, quiet=quiet).BINARIES.chrome +# chrome_bin = run_playbook(install_playbook, data_dir=DATA_DIR, quiet=quiet).BINARIES.chrome # return self.__class__.model_validate( # { # **self.model_dump(), diff --git a/archivebox/plugins_search/ripgrep/apps.py b/archivebox/plugins_search/ripgrep/apps.py index 0e597f8e..1d44d84b 100644 --- a/archivebox/plugins_search/ripgrep/apps.py +++ b/archivebox/plugins_search/ripgrep/apps.py @@ -6,8 +6,6 @@ from subprocess import run from typing import List, Dict, ClassVar, Iterable # from typing_extensions import Self -import archivebox - # Depends on other PyPI/vendor packages: from pydantic import InstanceOf, Field from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName @@ -20,7 +18,7 @@ from abx.archivebox.base_hook import BaseHook from abx.archivebox.base_searchbackend import BaseSearchBackend # Depends on Other Plugins: -from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG +from archivebox.config import CONSTANTS, SEARCH_BACKEND_CONFIG ###################### Config ########################## @@ -38,7 +36,7 @@ class RipgrepConfig(BaseConfigSet): '--files-with-matches', '--regexp', ]) - RIPGREP_SEARCH_DIR: Path = archivebox.CONSTANTS.ARCHIVE_DIR + RIPGREP_SEARCH_DIR: Path = CONSTANTS.ARCHIVE_DIR RIPGREP_CONFIG = RipgrepConfig() diff --git a/archivebox/plugins_search/sonic/apps.py b/archivebox/plugins_search/sonic/apps.py index 5bf37044..97f7b816 100644 --- a/archivebox/plugins_search/sonic/apps.py +++ b/archivebox/plugins_search/sonic/apps.py @@ -1,11 +1,8 @@ __package__ = 'archivebox.plugins_search.sonic' -import os import sys from typing import List, Dict, ClassVar, Generator, cast -from django.conf import settings - # Depends on other PyPI/vendor packages: from pydantic import InstanceOf, Field, model_validator from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName @@ -18,7 +15,7 @@ from abx.archivebox.base_hook import BaseHook from abx.archivebox.base_searchbackend import BaseSearchBackend # Depends on Other Plugins: -from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG +from archivebox.config import SEARCH_BACKEND_CONFIG SONIC_LIB = None try: diff --git a/archivebox/plugins_search/sqlite/apps.py b/archivebox/plugins_search/sqlite/apps.py index fe5949f6..28209b0f 100644 --- a/archivebox/plugins_search/sqlite/apps.py +++ b/archivebox/plugins_search/sqlite/apps.py @@ -17,7 +17,7 @@ from abx.archivebox.base_hook import BaseHook from abx.archivebox.base_searchbackend import BaseSearchBackend # Depends on Other Plugins: -from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG +from archivebox.config import SEARCH_BACKEND_CONFIG diff --git a/archivebox/plugins_sys/config/__init__.py b/archivebox/plugins_sys/config/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/archivebox/plugins_sys/config/constants.py b/archivebox/plugins_sys/config/constants.py deleted file mode 100644 index 7a5c63e0..00000000 --- a/archivebox/plugins_sys/config/constants.py +++ /dev/null @@ -1 +0,0 @@ -from archivebox.constants import * diff --git a/archivebox/queues/settings.py b/archivebox/queues/settings.py index 50a60ce2..0244e740 100644 --- a/archivebox/queues/settings.py +++ b/archivebox/queues/settings.py @@ -1,11 +1,10 @@ from pathlib import Path +from archivebox.config import DATA_DIR, CONSTANTS -import archivebox -OUTPUT_DIR = archivebox.DATA_DIR -LOGS_DIR = archivebox.CONSTANTS.LOGS_DIR - -TMP_DIR = archivebox.CONSTANTS.TMP_DIR +OUTPUT_DIR = DATA_DIR +LOGS_DIR = CONSTANTS.LOGS_DIR +TMP_DIR = CONSTANTS.TMP_DIR Path.mkdir(TMP_DIR, exist_ok=True) CONFIG_FILE = TMP_DIR / "supervisord.conf" diff --git a/archivebox/search/__init__.py b/archivebox/search/__init__.py index befbd675..29eccee5 100644 --- a/archivebox/search/__init__.py +++ b/archivebox/search/__init__.py @@ -6,9 +6,9 @@ from django.conf import settings from archivebox.index.schema import Link from archivebox.util import enforce_types -from archivebox.config import stderr +from archivebox.misc.logging import stderr -# from archivebox.plugins_sys.config.apps import settings.CONFIGS.SearchBackendConfig +# from archivebox.archivebox.config import settings.CONFIGS.SearchBackendConfig from .utils import get_indexable_content, log_index_started diff --git a/archivebox/search/utils.py b/archivebox/search/utils.py index 723c7fb5..55a1fa7a 100644 --- a/archivebox/search/utils.py +++ b/archivebox/search/utils.py @@ -1,7 +1,7 @@ from django.db.models import QuerySet from archivebox.util import enforce_types -from archivebox.config import ANSI +from archivebox.config.legacy import ANSI def log_index_started(url): print('{green}[*] Indexing url: {} in the search index {reset}'.format(url, **ANSI)) diff --git a/archivebox/system.py b/archivebox/system.py index cae487e5..4eaa94a0 100644 --- a/archivebox/system.py +++ b/archivebox/system.py @@ -15,7 +15,7 @@ from crontab import CronTab from atomicwrites import atomic_write as lib_atomic_write from .util import enforce_types, ExtendedEncoder -from .config import OUTPUT_PERMISSIONS, DIR_OUTPUT_PERMISSIONS, ENFORCE_ATOMIC_WRITES +from .config.legacy import OUTPUT_PERMISSIONS, DIR_OUTPUT_PERMISSIONS, ENFORCE_ATOMIC_WRITES def run(cmd, *args, input=None, capture_output=True, timeout=None, check=False, text=False, start_new_session=True, **kwargs): diff --git a/archivebox/util.py b/archivebox/util.py index b26333e0..8c30670e 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -25,8 +25,8 @@ except ImportError: detect_encoding = lambda rawdata: "utf-8" -from archivebox.constants import STATICFILE_EXTENSIONS -from plugins_sys.config.apps import ARCHIVING_CONFIG +from archivebox.config.constants import STATICFILE_EXTENSIONS +from archivebox.config import ARCHIVING_CONFIG from .misc.logging import COLOR_DICT