diff --git a/archivebox/__init__.py b/archivebox/__init__.py
index b1afc052..bccb2314 100755
--- a/archivebox/__init__.py
+++ b/archivebox/__init__.py
@@ -1,52 +1,33 @@
__package__ = 'archivebox'
-
-# print('INSTALLING MONKEY PATCHES')
-from .monkey_patches import * # noqa
-# print('DONE INSTALLING MONKEY PATCHES')
-
-
import os
import sys
-import importlib.metadata
from pathlib import Path
-PACKAGE_DIR = Path(__file__).resolve().parent # archivebox source code dir
-DATA_DIR = Path(os.curdir).resolve() # archivebox user data dir
-ARCHIVE_DIR = DATA_DIR / 'archive'
+PACKAGE_DIR = Path(__file__).resolve().parent # archivebox source code dir
+DATA_DIR = Path(os.curdir).resolve() # archivebox user data dir
+ARCHIVE_DIR = DATA_DIR / 'archive' # archivebox snapshot data dir
# make sure PACKAGE_DIR is in sys.path so we can import all subfolders
# without necessarily waiting for django to load them thorugh INSTALLED_APPS
if str(PACKAGE_DIR) not in sys.path:
sys.path.append(str(PACKAGE_DIR))
-# load fallback libraries from vendor dir
-from .vendor import load_vendored_libs
-load_vendored_libs()
+from .config.constants import CONSTANTS, VERSION, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa
os.environ['OUTPUT_DIR'] = str(DATA_DIR)
os.environ['DJANGO_SETTINGS_MODULE'] = 'core.settings'
+# print('INSTALLING MONKEY PATCHES')
+from .monkey_patches import * # noqa
+# print('DONE INSTALLING MONKEY PATCHES')
-def _detect_installed_version():
- try:
- return importlib.metadata.version(__package__ or 'archivebox')
- except importlib.metadata.PackageNotFoundError:
- try:
- pyproject_config = (PACKAGE_DIR / 'pyproject.toml').read_text()
- for line in pyproject_config:
- if line.startswith('version = '):
- return line.split(' = ', 1)[-1].strip('"')
- except FileNotFoundError:
- # building docs, pyproject.toml is not available
- return 'dev'
-
- raise Exception('Failed to detect installed archivebox version!')
-
-VERSION = _detect_installed_version()
+# print('LOADING VENDOR LIBRARIES')
+from .vendor import load_vendored_libs # noqa
+load_vendored_libs()
+# print('DONE LOADING VENDOR LIBRARIES')
__version__ = VERSION
-
-
-from .constants import CONSTANTS
+__author__ = 'Nick Sweeting'
+__license__ = 'MIT'
diff --git a/archivebox/abx/archivebox/base_binary.py b/archivebox/abx/archivebox/base_binary.py
index 786f41e5..d4fa6df0 100644
--- a/archivebox/abx/archivebox/base_binary.py
+++ b/archivebox/abx/archivebox/base_binary.py
@@ -15,7 +15,8 @@ from pydantic_pkgr import (
)
import abx
-import archivebox
+
+from archivebox.config import CONSTANTS
from .base_hook import BaseHook, HookType
@@ -54,7 +55,7 @@ class BaseBinary(BaseHook, Binary):
@staticmethod
def symlink_to_lib(binary, bin_dir=None) -> None:
- bin_dir = bin_dir or archivebox.CONSTANTS.LIB_BIN_DIR
+ bin_dir = bin_dir or CONSTANTS.LIB_BIN_DIR
if not (binary.abspath and binary.abspath.exists()):
return
@@ -68,19 +69,19 @@ class BaseBinary(BaseHook, Binary):
@validate_call
def load(self, **kwargs) -> Self:
binary = super().load(**kwargs)
- self.symlink_to_lib(binary=binary, bin_dir=archivebox.CONSTANTS.LIB_BIN_DIR)
+ self.symlink_to_lib(binary=binary, bin_dir=CONSTANTS.LIB_BIN_DIR)
return binary
@validate_call
def install(self, **kwargs) -> Self:
binary = super().install(**kwargs)
- self.symlink_to_lib(binary=binary, bin_dir=archivebox.CONSTANTS.LIB_BIN_DIR)
+ self.symlink_to_lib(binary=binary, bin_dir=CONSTANTS.LIB_BIN_DIR)
return binary
@validate_call
def load_or_install(self, **kwargs) -> Self:
binary = super().load_or_install(**kwargs)
- self.symlink_to_lib(binary=binary, bin_dir=archivebox.CONSTANTS.LIB_BIN_DIR)
+ self.symlink_to_lib(binary=binary, bin_dir=CONSTANTS.LIB_BIN_DIR)
return binary
@property
diff --git a/archivebox/abx/archivebox/base_plugin.py b/archivebox/abx/archivebox/base_plugin.py
index ac17e9c9..d276b339 100644
--- a/archivebox/abx/archivebox/base_plugin.py
+++ b/archivebox/abx/archivebox/base_plugin.py
@@ -127,7 +127,7 @@ class BasePlugin(BaseModel):
@abx.hookimpl
def register(self, settings):
- from archivebox.config import bump_startup_progress_bar
+ from archivebox.config.legacy import bump_startup_progress_bar
self._is_registered = True
bump_startup_progress_bar()
@@ -139,7 +139,7 @@ class BasePlugin(BaseModel):
def ready(self, settings=None):
"""Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
- from archivebox.config import bump_startup_progress_bar
+ from archivebox.config.legacy import bump_startup_progress_bar
assert self._is_registered, f"Tried to run {self.plugin_module}.ready() but it was never registered!"
self._is_ready = True
diff --git a/archivebox/abx/django/use.py b/archivebox/abx/django/use.py
index c4960898..87d3f9bd 100644
--- a/archivebox/abx/django/use.py
+++ b/archivebox/abx/django/use.py
@@ -1,7 +1,7 @@
__package__ = 'abx.django'
import itertools
-from benedict import benedict
+# from benedict import benedict
from .. import pm
diff --git a/archivebox/api/v1_api.py b/archivebox/api/v1_api.py
index 0b33b8ef..b71ceb3d 100644
--- a/archivebox/api/v1_api.py
+++ b/archivebox/api/v1_api.py
@@ -12,8 +12,7 @@ from ninja import NinjaAPI, Swagger
# TODO: explore adding https://eadwincode.github.io/django-ninja-extra/
-import archivebox
-from plugins_sys.config.apps import SHELL_CONFIG
+from archivebox.config import SHELL_CONFIG, VERSION
from api.auth import API_AUTH_METHODS
@@ -32,7 +31,7 @@ html_description=f'''
📚 ArchiveBox Documentation: Github Wiki
📜 See the API source code: archivebox/api/
-Served by ArchiveBox v{archivebox.VERSION} ({COMMIT_HASH[:8]}
), API powered by django-ninja
.
+Served by ArchiveBox v{VERSION} ({COMMIT_HASH[:8]}
), API powered by django-ninja
.
'''
diff --git a/archivebox/api/v1_cli.py b/archivebox/api/v1_cli.py
index cb0cc561..392b1193 100644
--- a/archivebox/api/v1_cli.py
+++ b/archivebox/api/v1_cli.py
@@ -13,7 +13,7 @@ from ..main import (
schedule,
)
from ..util import ansi_to_html
-from ..config import ONLY_NEW
+from ..config.legacy import ONLY_NEW
from .auth import API_AUTH_METHODS
diff --git a/archivebox/cli/__init__.py b/archivebox/cli/__init__.py
index 6cf0c63d..e13a3b34 100644
--- a/archivebox/cli/__init__.py
+++ b/archivebox/cli/__init__.py
@@ -4,7 +4,6 @@ __command__ = 'archivebox'
import sys
import argparse
import threading
-import archivebox
from time import sleep
from collections.abc import Mapping
@@ -12,6 +11,7 @@ from collections.abc import Mapping
from typing import Optional, List, IO, Union, Iterable
from pathlib import Path
+from archivebox.config import DATA_DIR
from ..misc.checks import check_data_folder, check_migrations
from ..misc.logging import stderr
@@ -149,7 +149,7 @@ def run_subcommand(subcommand: str,
subcommand_args = subcommand_args or []
if subcommand not in meta_cmds:
- from ..config import setup_django, CONFIG
+ from ..config.legacy import setup_django, CONFIG
cmd_requires_db = subcommand in archive_cmds
init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
@@ -234,12 +234,12 @@ def main(args: List[str] | Omitted=OMITTED, stdin: IO | Omitted=OMITTED, pwd: st
subcommand=command.subcommand,
subcommand_args=command.subcommand_args,
stdin=stdin or None,
- pwd=pwd or archivebox.DATA_DIR,
+ pwd=pwd or DATA_DIR,
)
run_subcommand(
subcommand=command.subcommand,
subcommand_args=command.subcommand_args,
stdin=stdin or None,
- pwd=pwd or archivebox.DATA_DIR,
+ pwd=pwd or DATA_DIR,
)
diff --git a/archivebox/cli/archivebox_add.py b/archivebox/cli/archivebox_add.py
index ed05584c..e34bfc25 100644
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -11,7 +11,7 @@ from typing import List, Optional, IO
from ..main import add
from ..util import docstring
from ..parsers import PARSERS
-from ..config import OUTPUT_DIR, ONLY_NEW
+from ..config.legacy import OUTPUT_DIR, ONLY_NEW
from ..logging_util import SmartFormatter, accept_stdin, stderr
diff --git a/archivebox/cli/archivebox_config.py b/archivebox/cli/archivebox_config.py
index 25621972..76f711ef 100644
--- a/archivebox/cli/archivebox_config.py
+++ b/archivebox/cli/archivebox_config.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import config
from ..util import docstring
-from ..config import OUTPUT_DIR
+from ..config.legacy import OUTPUT_DIR
from ..logging_util import SmartFormatter, accept_stdin
diff --git a/archivebox/cli/archivebox_help.py b/archivebox/cli/archivebox_help.py
index 46f17cbc..56e1cb77 100755
--- a/archivebox/cli/archivebox_help.py
+++ b/archivebox/cli/archivebox_help.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import help
from ..util import docstring
-from ..config import OUTPUT_DIR
+from ..config.legacy import OUTPUT_DIR
from ..logging_util import SmartFormatter, reject_stdin
diff --git a/archivebox/cli/archivebox_init.py b/archivebox/cli/archivebox_init.py
index 48b65b1f..e7a0430a 100755
--- a/archivebox/cli/archivebox_init.py
+++ b/archivebox/cli/archivebox_init.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import init
from ..util import docstring
-from ..config import OUTPUT_DIR
+from ..config.legacy import OUTPUT_DIR
from ..logging_util import SmartFormatter, reject_stdin
diff --git a/archivebox/cli/archivebox_list.py b/archivebox/cli/archivebox_list.py
index 5477bfc8..f8afb524 100644
--- a/archivebox/cli/archivebox_list.py
+++ b/archivebox/cli/archivebox_list.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import list_all
from ..util import docstring
-from ..config import OUTPUT_DIR
+from ..config.legacy import OUTPUT_DIR
from ..index import (
LINK_FILTERS,
get_indexed_folders,
diff --git a/archivebox/cli/archivebox_manage.py b/archivebox/cli/archivebox_manage.py
index f05604e1..1e28cd35 100644
--- a/archivebox/cli/archivebox_manage.py
+++ b/archivebox/cli/archivebox_manage.py
@@ -9,7 +9,7 @@ from typing import Optional, List, IO
from ..main import manage
from ..util import docstring
-from ..config import OUTPUT_DIR
+from ..config.legacy import OUTPUT_DIR
@docstring(manage.__doc__)
diff --git a/archivebox/cli/archivebox_oneshot.py b/archivebox/cli/archivebox_oneshot.py
index 411cce8b..12a176ad 100644
--- a/archivebox/cli/archivebox_oneshot.py
+++ b/archivebox/cli/archivebox_oneshot.py
@@ -11,7 +11,7 @@ from typing import List, Optional, IO
from ..main import oneshot
from ..util import docstring
-from ..config import OUTPUT_DIR
+from ..config.legacy import OUTPUT_DIR
from ..logging_util import SmartFormatter, accept_stdin, stderr
diff --git a/archivebox/cli/archivebox_remove.py b/archivebox/cli/archivebox_remove.py
index dadf2654..ac45cd9d 100644
--- a/archivebox/cli/archivebox_remove.py
+++ b/archivebox/cli/archivebox_remove.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import remove
from ..util import docstring
-from ..config import OUTPUT_DIR
+from ..config.legacy import OUTPUT_DIR
from ..logging_util import SmartFormatter, accept_stdin
diff --git a/archivebox/cli/archivebox_schedule.py b/archivebox/cli/archivebox_schedule.py
index f606979b..59c2884d 100644
--- a/archivebox/cli/archivebox_schedule.py
+++ b/archivebox/cli/archivebox_schedule.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import schedule
from ..util import docstring
-from ..config import OUTPUT_DIR
+from ..config.legacy import OUTPUT_DIR
from ..logging_util import SmartFormatter, reject_stdin
diff --git a/archivebox/cli/archivebox_server.py b/archivebox/cli/archivebox_server.py
index 4cc050dd..a5007b91 100644
--- a/archivebox/cli/archivebox_server.py
+++ b/archivebox/cli/archivebox_server.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import server
from ..util import docstring
-from ..config import OUTPUT_DIR, BIND_ADDR
+from ..config.legacy import OUTPUT_DIR, BIND_ADDR
from ..logging_util import SmartFormatter, reject_stdin
@docstring(server.__doc__)
diff --git a/archivebox/cli/archivebox_setup.py b/archivebox/cli/archivebox_setup.py
index 02ce57c9..f5e102f1 100755
--- a/archivebox/cli/archivebox_setup.py
+++ b/archivebox/cli/archivebox_setup.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import setup
from ..util import docstring
-from ..config import OUTPUT_DIR
+from ..config.legacy import OUTPUT_DIR
from ..logging_util import SmartFormatter, reject_stdin
diff --git a/archivebox/cli/archivebox_shell.py b/archivebox/cli/archivebox_shell.py
index bcd5fdd6..afb225a7 100644
--- a/archivebox/cli/archivebox_shell.py
+++ b/archivebox/cli/archivebox_shell.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import shell
from ..util import docstring
-from ..config import OUTPUT_DIR
+from ..config.legacy import OUTPUT_DIR
from ..logging_util import SmartFormatter, reject_stdin
diff --git a/archivebox/cli/archivebox_status.py b/archivebox/cli/archivebox_status.py
index 2bef19c7..86ace191 100644
--- a/archivebox/cli/archivebox_status.py
+++ b/archivebox/cli/archivebox_status.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import status
from ..util import docstring
-from ..config import OUTPUT_DIR
+from ..config.legacy import OUTPUT_DIR
from ..logging_util import SmartFormatter, reject_stdin
diff --git a/archivebox/cli/archivebox_update.py b/archivebox/cli/archivebox_update.py
index 500d4c07..6cb97401 100644
--- a/archivebox/cli/archivebox_update.py
+++ b/archivebox/cli/archivebox_update.py
@@ -10,7 +10,7 @@ from typing import List, Optional, IO
from ..main import update
from ..util import docstring
-from ..config import OUTPUT_DIR
+from ..config.legacy import OUTPUT_DIR
from ..index import (
LINK_FILTERS,
get_indexed_folders,
diff --git a/archivebox/cli/archivebox_version.py b/archivebox/cli/archivebox_version.py
index e7922f37..3131b1d4 100755
--- a/archivebox/cli/archivebox_version.py
+++ b/archivebox/cli/archivebox_version.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import version
from ..util import docstring
-from ..config import OUTPUT_DIR
+from ..config.legacy import OUTPUT_DIR
from ..logging_util import SmartFormatter, reject_stdin
diff --git a/archivebox/cli/tests.py b/archivebox/cli/tests.py
index 04c54df8..cc9a8e52 100644
--- a/archivebox/cli/tests.py
+++ b/archivebox/cli/tests.py
@@ -32,7 +32,7 @@ os.environ.update(TEST_CONFIG)
from ..main import init
from ..index import load_main_index
-from ..config import (
+from ..config.legacy import (
SQL_INDEX_FILENAME,
JSON_INDEX_FILENAME,
HTML_INDEX_FILENAME,
diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
new file mode 100644
index 00000000..ce4a5ed1
--- /dev/null
+++ b/archivebox/config/__init__.py
@@ -0,0 +1,26 @@
+__package__ = 'archivebox.config'
+
+from .constants import CONSTANTS, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR, VERSION
+from .defaults import (
+ SHELL_CONFIG,
+ STORAGE_CONFIG,
+ GENERAL_CONFIG,
+ SERVER_CONFIG,
+ ARCHIVING_CONFIG,
+ SEARCH_BACKEND_CONFIG,
+)
+
+
+__all__ = [
+ 'CONSTANTS',
+ 'PACKAGE_DIR',
+ 'DATA_DIR',
+ 'ARCHIVE_DIR',
+ 'VERSION',
+ 'SHELL_CONFIG',
+ 'STORAGE_CONFIG',
+ 'GENERAL_CONFIG',
+ 'SERVER_CONFIG',
+ 'ARCHIVING_CONFIG',
+ 'SEARCH_BACKEND_CONFIG',
+]
diff --git a/archivebox/config/apps.py b/archivebox/config/apps.py
new file mode 100644
index 00000000..b5b32364
--- /dev/null
+++ b/archivebox/config/apps.py
@@ -0,0 +1,58 @@
+__package__ = 'archivebox.config'
+
+from typing import List
+from pydantic import InstanceOf
+
+from abx.archivebox.base_plugin import BasePlugin
+from abx.archivebox.base_hook import BaseHook
+
+
+from .constants import CONSTANTS, CONSTANTS_CONFIG, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa
+from .defaults import (
+ ShellConfig, # noqa: F401
+ StorageConfig, # noqa: F401
+ GeneralConfig, # noqa: F401
+ ServerConfig, # noqa: F401
+ ArchivingConfig, # noqa: F401
+ SearchBackendConfig, # noqa: F401
+ SHELL_CONFIG,
+ STORAGE_CONFIG,
+ GENERAL_CONFIG,
+ SERVER_CONFIG,
+ ARCHIVING_CONFIG,
+ SEARCH_BACKEND_CONFIG,
+)
+
+###################### Config ##########################
+
+
+class ConfigPlugin(BasePlugin):
+ app_label: str = 'CONFIG'
+ verbose_name: str = 'Configuration'
+
+ hooks: List[InstanceOf[BaseHook]] = [
+ SHELL_CONFIG,
+ GENERAL_CONFIG,
+ STORAGE_CONFIG,
+ SERVER_CONFIG,
+ ARCHIVING_CONFIG,
+ SEARCH_BACKEND_CONFIG,
+ ]
+
+
+
+PLUGIN = ConfigPlugin()
+DJANGO_APP = PLUGIN.AppConfig
+
+
+
+# # register django apps
+# @abx.hookimpl
+# def get_INSTALLED_APPS():
+# return [DJANGO_APP.name]
+
+# # register configs
+# @abx.hookimpl
+# def register_CONFIG():
+# return PLUGIN.HOOKS_BY_TYPE['CONFIG'].values()
+
diff --git a/archivebox/plugins_sys/config/check_for_update.py b/archivebox/config/check_for_update.py
similarity index 100%
rename from archivebox/plugins_sys/config/check_for_update.py
rename to archivebox/config/check_for_update.py
diff --git a/archivebox/config_stubs.py b/archivebox/config/config_stubs.py
similarity index 100%
rename from archivebox/config_stubs.py
rename to archivebox/config/config_stubs.py
diff --git a/archivebox/constants.py b/archivebox/config/constants.py
similarity index 72%
rename from archivebox/constants.py
rename to archivebox/config/constants.py
index e577a6f2..d49a3573 100644
--- a/archivebox/constants.py
+++ b/archivebox/config/constants.py
@@ -1,27 +1,46 @@
-__package__ = 'archivebox'
+__package__ = 'archivebox.config'
import os
import re
from typing import Dict
from pathlib import Path
+import importlib.metadata
from benedict import benedict
-import archivebox
-
-from .misc.logging import DEFAULT_CLI_COLORS
+from ..misc.logging import DEFAULT_CLI_COLORS
###################### Config ##########################
-VERSION = archivebox.VERSION
-PACKAGE_DIR = archivebox.PACKAGE_DIR
-DATA_DIR = archivebox.DATA_DIR
-ARCHIVE_DIR = archivebox.ARCHIVE_DIR
+PACKAGE_DIR = Path(__file__).resolve().parent.parent # archivebox source code dir
+DATA_DIR = Path(os.curdir).resolve() # archivebox user data dir
+ARCHIVE_DIR = DATA_DIR / 'archive' # archivebox snapshot data dir
-PACKAGE_DIR_NAME: str = archivebox.PACKAGE_DIR.name
+
+def _detect_installed_version():
+ """Autodetect the installed archivebox version by using pip package metadata or pyproject.toml file"""
+ try:
+ return importlib.metadata.version(__package__ or 'archivebox')
+ except importlib.metadata.PackageNotFoundError:
+ try:
+ pyproject_config = (PACKAGE_DIR / 'pyproject.toml').read_text()
+ for line in pyproject_config:
+ if line.startswith('version = '):
+ return line.split(' = ', 1)[-1].strip('"')
+ except FileNotFoundError:
+ # building docs, pyproject.toml is not available
+ return 'dev'
+
+ raise Exception('Failed to detect installed archivebox version!')
+
+VERSION = _detect_installed_version()
+__version__ = VERSION
+
+
+PACKAGE_DIR_NAME: str = PACKAGE_DIR.name
TEMPLATES_DIR_NAME: str = 'templates'
-TEMPLATES_DIR: Path = archivebox.PACKAGE_DIR / TEMPLATES_DIR_NAME
+TEMPLATES_DIR: Path = PACKAGE_DIR / TEMPLATES_DIR_NAME
STATIC_DIR: Path = TEMPLATES_DIR / 'static'
USER_PLUGINS_DIR_NAME: str = 'user_plugins'
CUSTOM_TEMPLATES_DIR_NAME: str = 'user_templates'
@@ -35,16 +54,16 @@ LOGS_DIR_NAME: str = 'logs'
LIB_DIR_NAME: str = 'lib'
TMP_DIR_NAME: str = 'tmp'
-OUTPUT_DIR: Path = archivebox.DATA_DIR
-ARCHIVE_DIR: Path = archivebox.DATA_DIR / ARCHIVE_DIR_NAME
-SOURCES_DIR: Path = archivebox.DATA_DIR / SOURCES_DIR_NAME
-PERSONAS_DIR: Path = archivebox.DATA_DIR / PERSONAS_DIR_NAME
-CACHE_DIR: Path = archivebox.DATA_DIR / CACHE_DIR_NAME
-LOGS_DIR: Path = archivebox.DATA_DIR / LOGS_DIR_NAME
-LIB_DIR: Path = archivebox.DATA_DIR / LIB_DIR_NAME
-TMP_DIR: Path = archivebox.DATA_DIR / TMP_DIR_NAME
-CUSTOM_TEMPLATES_DIR: Path = archivebox.DATA_DIR / CUSTOM_TEMPLATES_DIR_NAME
-USER_PLUGINS_DIR: Path = archivebox.DATA_DIR / USER_PLUGINS_DIR_NAME
+OUTPUT_DIR: Path = DATA_DIR
+ARCHIVE_DIR: Path = DATA_DIR / ARCHIVE_DIR_NAME
+SOURCES_DIR: Path = DATA_DIR / SOURCES_DIR_NAME
+PERSONAS_DIR: Path = DATA_DIR / PERSONAS_DIR_NAME
+CACHE_DIR: Path = DATA_DIR / CACHE_DIR_NAME
+LOGS_DIR: Path = DATA_DIR / LOGS_DIR_NAME
+LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME
+TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME
+CUSTOM_TEMPLATES_DIR: Path = DATA_DIR / CUSTOM_TEMPLATES_DIR_NAME
+USER_PLUGINS_DIR: Path = DATA_DIR / USER_PLUGINS_DIR_NAME
LIB_PIP_DIR: Path = LIB_DIR / 'pip'
LIB_NPM_DIR: Path = LIB_DIR / 'npm'
@@ -55,9 +74,9 @@ BIN_DIR: Path = LIB_BIN_DIR
CONFIG_FILENAME: str = 'ArchiveBox.conf'
SQL_INDEX_FILENAME: str = 'index.sqlite3'
-CONFIG_FILE: Path = archivebox.DATA_DIR / CONFIG_FILENAME
-DATABASE_FILE: Path = archivebox.DATA_DIR / SQL_INDEX_FILENAME
-QUEUE_DATABASE_FILE: Path = archivebox.DATA_DIR / SQL_INDEX_FILENAME.replace('index.', 'queue.')
+CONFIG_FILE: Path = DATA_DIR / CONFIG_FILENAME
+DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME
+QUEUE_DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME.replace('index.', 'queue.')
JSON_INDEX_FILENAME: str = 'index.json'
HTML_INDEX_FILENAME: str = 'index.html'
@@ -125,7 +144,7 @@ DATA_DIR_NAMES: frozenset[str] = frozenset((
CUSTOM_TEMPLATES_DIR_NAME,
USER_PLUGINS_DIR_NAME,
))
-DATA_DIRS: frozenset[Path] = frozenset(archivebox.DATA_DIR / dirname for dirname in DATA_DIR_NAMES)
+DATA_DIRS: frozenset[Path] = frozenset(DATA_DIR / dirname for dirname in DATA_DIR_NAMES)
DATA_FILE_NAMES: frozenset[str] = frozenset((
CONFIG_FILENAME,
SQL_INDEX_FILENAME,
@@ -160,9 +179,9 @@ ALLOWED_IN_OUTPUT_DIR: frozenset[str] = frozenset((
CODE_LOCATIONS = benedict({
'PACKAGE_DIR': {
- 'path': (archivebox.PACKAGE_DIR).resolve(),
+ 'path': (PACKAGE_DIR).resolve(),
'enabled': True,
- 'is_valid': (archivebox.PACKAGE_DIR / '__main__.py').exists(),
+ 'is_valid': (PACKAGE_DIR / '__main__.py').exists(),
},
'LIB_DIR': {
'path': LIB_DIR.resolve(),
@@ -188,10 +207,10 @@ CODE_LOCATIONS = benedict({
DATA_LOCATIONS = benedict({
"OUTPUT_DIR": {
- "path": archivebox.DATA_DIR.resolve(),
+ "path": DATA_DIR.resolve(),
"enabled": True,
"is_valid": DATABASE_FILE.exists(),
- "is_mount": os.path.ismount(archivebox.DATA_DIR.resolve()),
+ "is_mount": os.path.ismount(DATA_DIR.resolve()),
},
"CONFIG_FILE": {
"path": CONFIG_FILE.resolve(),
diff --git a/archivebox/plugins_sys/config/apps.py b/archivebox/config/defaults.py
similarity index 87%
rename from archivebox/plugins_sys/config/apps.py
rename to archivebox/config/defaults.py
index 4a4ab297..1b7bc15a 100644
--- a/archivebox/plugins_sys/config/apps.py
+++ b/archivebox/config/defaults.py
@@ -1,24 +1,21 @@
-__package__ = 'plugins_sys.config'
+__package__ = 'archivebox.config'
import os
import sys
import shutil
-from typing import List, ClassVar, Dict, Optional
+from typing import ClassVar, Dict, Optional
from datetime import datetime
from pathlib import Path
from rich import print
-from pydantic import InstanceOf, Field, field_validator, model_validator, computed_field
+from pydantic import Field, field_validator, model_validator, computed_field
from django.utils.crypto import get_random_string
-from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
-from abx.archivebox.base_hook import BaseHook
-import archivebox
-from archivebox.constants import CONSTANTS, CONSTANTS_CONFIG # noqa
+from .constants import CONSTANTS, PACKAGE_DIR
###################### Config ##########################
@@ -26,7 +23,7 @@ from archivebox.constants import CONSTANTS, CONSTANTS_CONFIG # noqa
class ShellConfig(BaseConfigSet):
section: ClassVar[ConfigSectionName] = 'SHELL_CONFIG'
- DEBUG: bool = Field(default=False)
+ DEBUG: bool = Field(default=lambda: '--debug' in sys.argv)
IS_TTY: bool = Field(default=sys.stdout.isatty())
USE_COLOR: bool = Field(default=lambda c: c.IS_TTY)
@@ -56,7 +53,7 @@ class ShellConfig(BaseConfigSet):
@property
def COMMIT_HASH(self) -> Optional[str]:
try:
- git_dir = archivebox.PACKAGE_DIR / '../.git'
+ git_dir = PACKAGE_DIR / '../.git'
ref = (git_dir / 'HEAD').read_text().strip().split(' ')[-1]
commit_hash = git_dir.joinpath(ref).read_text().strip()
return commit_hash
@@ -64,7 +61,7 @@ class ShellConfig(BaseConfigSet):
pass
try:
- return list((archivebox.PACKAGE_DIR / '../.git/refs/heads/').glob('*'))[0].read_text().strip()
+ return list((PACKAGE_DIR / '../.git/refs/heads/').glob('*'))[0].read_text().strip()
except Exception:
pass
@@ -77,7 +74,7 @@ class ShellConfig(BaseConfigSet):
docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0]
return docker_build_end_time
- src_last_modified_unix_timestamp = (archivebox.PACKAGE_DIR / 'config.py').stat().st_mtime
+ src_last_modified_unix_timestamp = (PACKAGE_DIR / 'package.json').stat().st_mtime
return datetime.fromtimestamp(src_last_modified_unix_timestamp).strftime('%Y-%m-%d %H:%M:%S %s')
@@ -227,39 +224,3 @@ class SearchBackendConfig(BaseConfigSet):
SEARCH_BACKEND_CONFIG = SearchBackendConfig()
-
-class ConfigPlugin(BasePlugin):
- app_label: str = 'CONFIG'
- verbose_name: str = 'Configuration'
-
- hooks: List[InstanceOf[BaseHook]] = [
- SHELL_CONFIG,
- GENERAL_CONFIG,
- STORAGE_CONFIG,
- SERVER_CONFIG,
- ARCHIVING_CONFIG,
- SEARCH_BACKEND_CONFIG,
- ]
-
- # def register(self, settings, parent_plugin=None):
- # try:
- # super().register(settings, parent_plugin=parent_plugin)
- # except Exception as e:
- # print(f'[red][X] Error registering config plugin: {e}[/red]', file=sys.stderr)
-
-
-PLUGIN = ConfigPlugin()
-DJANGO_APP = PLUGIN.AppConfig
-
-
-
-# # register django apps
-# @abx.hookimpl
-# def get_INSTALLED_APPS():
-# return [DJANGO_APP.name]
-
-# # register configs
-# @abx.hookimpl
-# def register_CONFIG():
-# return PLUGIN.HOOKS_BY_TYPE['CONFIG'].values()
-
diff --git a/archivebox/config.py b/archivebox/config/legacy.py
similarity index 94%
rename from archivebox/config.py
rename to archivebox/config/legacy.py
index f2c4ca1b..55424646 100644
--- a/archivebox/config.py
+++ b/archivebox/config/legacy.py
@@ -19,7 +19,7 @@ Documentation:
"""
-__package__ = 'archivebox'
+__package__ = 'archivebox.config'
import os
import io
@@ -38,31 +38,27 @@ from configparser import ConfigParser
from rich.progress import Progress
from rich.console import Console
from benedict import benedict
+from pydantic_pkgr import SemVer
import django
from django.db.backends.sqlite3.base import Database as sqlite3
-import archivebox
-from archivebox.constants import CONSTANTS
-from archivebox.constants import *
-
-from pydantic_pkgr import SemVer
+from .constants import CONSTANTS, TIMEZONE, OUTPUT_DIR
+from .constants import *
from .config_stubs import (
ConfigValue,
- ConfigDict,
ConfigDefaultValue,
ConfigDefaultDict,
)
-
-from .misc.logging import (
+from ..misc.logging import (
stderr,
hint, # noqa
)
-from .plugins_sys.config.apps import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG
-from .plugins_auth.ldap.apps import LDAP_CONFIG
-from .plugins_extractor.favicon.apps import FAVICON_CONFIG
+from .defaults import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG
+from ..plugins_auth.ldap.apps import LDAP_CONFIG
+from ..plugins_extractor.favicon.apps import FAVICON_CONFIG
ANSI = SHELL_CONFIG.ANSI
LDAP = LDAP_CONFIG.LDAP_ENABLED
@@ -218,7 +214,7 @@ def get_real_name(key: str) -> str:
# These are derived/computed values calculated *after* all user-provided config values are ingested
# they appear in `archivebox config` output and are intended to be read-only for the user
DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
- 'PACKAGE_DIR': {'default': lambda c: archivebox.PACKAGE_DIR.resolve()},
+ 'PACKAGE_DIR': {'default': lambda c: CONSTANTS.PACKAGE_DIR.resolve()},
'TEMPLATES_DIR': {'default': lambda c: c['PACKAGE_DIR'] / CONSTANTS.TEMPLATES_DIR_NAME},
'CUSTOM_TEMPLATES_DIR': {'default': lambda c: c['CUSTOM_TEMPLATES_DIR'] and Path(c['CUSTOM_TEMPLATES_DIR'])},
@@ -259,8 +255,8 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
# 'CODE_LOCATIONS': {'default': lambda c: get_code_locations(c)},
# 'DATA_LOCATIONS': {'default': lambda c: get_data_locations(c)},
- 'SAVE_ALLOWLIST_PTN': {'default': lambda c: c['SAVE_ALLOWLIST'] and {re.compile(k, ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_ALLOWLIST'].items()}},
- 'SAVE_DENYLIST_PTN': {'default': lambda c: c['SAVE_DENYLIST'] and {re.compile(k, ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_DENYLIST'].items()}},
+ 'SAVE_ALLOWLIST_PTN': {'default': lambda c: c['SAVE_ALLOWLIST'] and {re.compile(k, CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_ALLOWLIST'].items()}},
+ 'SAVE_DENYLIST_PTN': {'default': lambda c: c['SAVE_DENYLIST'] and {re.compile(k, CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_DENYLIST'].items()}},
}
@@ -273,7 +269,7 @@ def load_config_val(key: str,
default: ConfigDefaultValue=None,
type: Optional[Type]=None,
aliases: Optional[Tuple[str, ...]]=None,
- config: Optional[ConfigDict]=None,
+ config: Optional[benedict]=None,
env_vars: Optional[os._Environ]=None,
config_file_vars: Optional[Dict[str, str]]=None) -> ConfigValue:
"""parse bool, int, and str key=value pairs from env"""
@@ -334,16 +330,16 @@ def load_config_val(key: str,
raise Exception('Config values can only be str, bool, int, or json')
-def load_config_file(out_dir: str | None=archivebox.DATA_DIR) -> Optional[ConfigDict]:
+def load_config_file(out_dir: str | None=CONSTANTS.DATA_DIR) -> Optional[benedict]:
"""load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
- config_path = archivebox.CONSTANTS.CONFIG_FILE
+ config_path = CONSTANTS.CONFIG_FILE
if config_path.exists():
config_file = ConfigParser()
config_file.optionxform = str
config_file.read(config_path)
# flatten into one namespace
- config_file_vars = ConfigDict({
+ config_file_vars = benedict({
key.upper(): val
for section, options in config_file.items()
for key, val in options.items()
@@ -354,10 +350,10 @@ def load_config_file(out_dir: str | None=archivebox.DATA_DIR) -> Optional[Config
return None
-def write_config_file(config: Dict[str, str], out_dir: str | None=archivebox.DATA_DIR) -> ConfigDict:
+def write_config_file(config: Dict[str, str], out_dir: str | None=CONSTANTS.DATA_DIR) -> benedict:
"""load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
- from .system import atomic_write
+ from ..system import atomic_write
CONFIG_HEADER = (
"""# This is the config file for your ArchiveBox collection.
@@ -373,7 +369,7 @@ def write_config_file(config: Dict[str, str], out_dir: str | None=archivebox.DAT
""")
- config_path = archivebox.CONSTANTS.CONFIG_FILE
+ config_path = CONSTANTS.CONFIG_FILE
if not config_path.exists():
atomic_write(config_path, CONFIG_HEADER)
@@ -394,7 +390,7 @@ def write_config_file(config: Dict[str, str], out_dir: str | None=archivebox.DAT
existing_config = dict(config_file[section])
else:
existing_config = {}
- config_file[section] = ConfigDict({**existing_config, key: val})
+ config_file[section] = benedict({**existing_config, key: val})
# always make sure there's a SECRET_KEY defined for Django
existing_secret_key = None
@@ -426,15 +422,15 @@ def write_config_file(config: Dict[str, str], out_dir: str | None=archivebox.DAT
if Path(f'{config_path}.bak').exists():
os.remove(f'{config_path}.bak')
- return {
+ return benedict({
key.upper(): CONFIG.get(key.upper())
for key in config.keys()
- }
+ })
def load_config(defaults: ConfigDefaultDict,
- config: Optional[ConfigDict]=None,
+ config: Optional[benedict]=None,
out_dir: Optional[str]=None,
env_vars: Optional[os._Environ]=None,
config_file_vars: Optional[Dict[str, str]]=None) -> benedict:
@@ -442,7 +438,7 @@ def load_config(defaults: ConfigDefaultDict,
env_vars = env_vars or os.environ
config_file_vars = config_file_vars or load_config_file(out_dir=out_dir)
- extended_config: ConfigDict = config.copy() if config else {}
+ extended_config = benedict(config.copy() if config else {})
for key, default in defaults.items():
try:
# print('LOADING CONFIG KEY:', key, 'DEFAULT=', default)
@@ -614,7 +610,7 @@ def wget_supports_compression(config):
return False
-def get_dependency_info(config: ConfigDict) -> ConfigValue:
+def get_dependency_info(config: benedict) -> ConfigValue:
return {
# 'PYTHON_BINARY': {
# 'path': bin_path(config['PYTHON_BINARY']),
@@ -733,7 +729,7 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
def load_all_config():
- CONFIG: ConfigDict = ConfigDict()
+ CONFIG = benedict()
for section_name, section_config in CONFIG_SCHEMA.items():
# print('LOADING CONFIG SECTION:', section_name)
CONFIG = load_config(section_config, CONFIG)
@@ -742,7 +738,7 @@ def load_all_config():
return load_config(DYNAMIC_CONFIG_SCHEMA, CONFIG)
# add all final config values in CONFIG to globals in this file
-CONFIG: ConfigDict = load_all_config()
+CONFIG: benedict = load_all_config()
globals().update(CONFIG)
# this lets us do: from .config import DEBUG, MEDIA_TIMEOUT, ...
@@ -773,7 +769,7 @@ if not SHELL_CONFIG.SHOW_PROGRESS:
# recreate rich console obj based on new config values
CONSOLE = Console()
-from .misc import logging
+from ..misc import logging
logging.CONSOLE = CONSOLE
@@ -788,8 +784,8 @@ def bump_startup_progress_bar():
def setup_django_minimal():
- # sys.path.append(str(archivebox.PACKAGE_DIR))
- # os.environ.setdefault('OUTPUT_DIR', str(archivebox.DATA_DIR))
+ # sys.path.append(str(CONSTANTS.PACKAGE_DIR))
+ # os.environ.setdefault('OUTPUT_DIR', str(CONSTANTS.DATA_DIR))
# os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
# django.setup()
raise Exception('dont use this anymore')
@@ -797,7 +793,7 @@ def setup_django_minimal():
DJANGO_SET_UP = False
-def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
+def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CONFIG, in_memory_db=False) -> None:
global INITIAL_STARTUP_PROGRESS
global INITIAL_STARTUP_PROGRESS_TASK
global DJANGO_SET_UP
@@ -808,9 +804,9 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=C
with Progress(transient=True, expand=True, console=CONSOLE) as INITIAL_STARTUP_PROGRESS:
INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25)
- output_dir = out_dir or archivebox.DATA_DIR
+ output_dir = out_dir or CONSTANTS.DATA_DIR
- assert isinstance(output_dir, Path) and isinstance(archivebox.PACKAGE_DIR, Path)
+ assert isinstance(output_dir, Path) and isinstance(CONSTANTS.PACKAGE_DIR, Path)
bump_startup_progress_bar()
try:
@@ -842,7 +838,7 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=C
with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
command = ' '.join(sys.argv)
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
- f.write(f"\n> {command}; TS={ts} VERSION={archivebox.VERSION} IN_DOCKER={SHELL_CONFIG.IN_DOCKER} IS_TTY={SHELL_CONFIG.IS_TTY}\n")
+ f.write(f"\n> {command}; TS={ts} VERSION={CONSTANTS.VERSION} IN_DOCKER={SHELL_CONFIG.IN_DOCKER} IS_TTY={SHELL_CONFIG.IS_TTY}\n")
if check_db:
# Create cache table in DB if needed
@@ -861,9 +857,9 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=C
for conn in connections.all():
conn.close_if_unusable_or_obsolete()
- sql_index_path = archivebox.CONSTANTS.DATABASE_FILE
+ sql_index_path = CONSTANTS.DATABASE_FILE
assert sql_index_path.exists(), (
- f'No database file {sql_index_path} found in: {archivebox.DATA_DIR} (Are you in an ArchiveBox collection directory?)')
+ f'No database file {sql_index_path} found in: {CONSTANTS.DATA_DIR} (Are you in an ArchiveBox collection directory?)')
bump_startup_progress_bar()
@@ -876,7 +872,7 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=C
logfire.configure()
logfire.instrument_django(is_sql_commentor_enabled=True)
- logfire.info(f'Started ArchiveBox v{archivebox.VERSION}', argv=sys.argv)
+ logfire.info(f'Started ArchiveBox v{CONSTANTS.VERSION}', argv=sys.argv)
except KeyboardInterrupt:
raise SystemExit(2)
diff --git a/archivebox/plugins_sys/config/views.py b/archivebox/config/views.py
similarity index 98%
rename from archivebox/plugins_sys/config/views.py
rename to archivebox/config/views.py
index c38a957e..0e5350ba 100644
--- a/archivebox/plugins_sys/config/views.py
+++ b/archivebox/config/views.py
@@ -13,8 +13,7 @@ from django.utils.html import format_html, mark_safe
from admin_data_views.typing import TableContext, ItemContext
from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
-import archivebox
-
+from archivebox.config import CONSTANTS
from archivebox.util import parse_date
@@ -381,7 +380,7 @@ def log_list_view(request: HttpRequest, **kwargs) -> TableContext:
assert request.user.is_superuser, "Must be a superuser to view configuration settings."
- log_files = archivebox.CONSTANTS.LOGS_DIR.glob("*.log")
+ log_files = CONSTANTS.LOGS_DIR.glob("*.log")
log_files = sorted(log_files, key=os.path.getmtime)[::-1]
rows = {
@@ -419,7 +418,7 @@ def log_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
from django.conf import settings
- log_file = [logfile for logfile in archivebox.CONSTANTS.LOGS_DIR.glob('*.log') if key in logfile.name][0]
+ log_file = [logfile for logfile in CONSTANTS.LOGS_DIR.glob('*.log') if key in logfile.name][0]
log_text = log_file.read_text()
log_stat = log_file.stat()
diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py
index e81c569d..96f6863b 100644
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -18,11 +18,10 @@ from django.template import Template, RequestContext
from django.conf import settings
from django import forms
-import archivebox
-
from signal_webhooks.admin import WebhookAdmin
from signal_webhooks.utils import get_webhook_model
-# from abx.archivebox.admin import CustomPlugin
+
+from archivebox.config import VERSION
from ..util import htmldecode, urldecode
@@ -30,7 +29,7 @@ from core.models import Snapshot, ArchiveResult, Tag
from core.mixins import SearchResultsAdminMixin
from api.models import APIToken
from abid_utils.admin import ABIDModelAdmin
-from queues.tasks import bg_archive_links, bg_archive_link, bg_add
+from queues.tasks import bg_archive_links, bg_add
from index.html import snapshot_icons
from logging_util import printable_filesize
@@ -40,7 +39,7 @@ from extractors import archive_links
CONFIG = settings.CONFIG
-GLOBAL_CONTEXT = {'VERSION': archivebox.VERSION, 'VERSIONS_AVAILABLE': [], 'CAN_UPGRADE': False}
+GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': [], 'CAN_UPGRADE': False}
# Admin URLs
# /admin/
diff --git a/archivebox/core/auth.py b/archivebox/core/auth.py
index 048f029c..536e0778 100644
--- a/archivebox/core/auth.py
+++ b/archivebox/core/auth.py
@@ -1,7 +1,7 @@
__package__ = 'archivebox.core'
-from ..config import (
+from ..config.legacy import (
LDAP
)
diff --git a/archivebox/core/auth_ldap.py b/archivebox/core/auth_ldap.py
index b5e2877e..1d0e8658 100644
--- a/archivebox/core/auth_ldap.py
+++ b/archivebox/core/auth_ldap.py
@@ -1,4 +1,4 @@
-from ..config import (
+from ..config.legacy import (
LDAP_CREATE_SUPERUSER
)
diff --git a/archivebox/core/middleware.py b/archivebox/core/middleware.py
index cf7ab991..4cd45e01 100644
--- a/archivebox/core/middleware.py
+++ b/archivebox/core/middleware.py
@@ -5,7 +5,7 @@ from django.utils import timezone
from django.contrib.auth.middleware import RemoteUserMiddleware
from django.core.exceptions import ImproperlyConfigured
-from ..config import PUBLIC_SNAPSHOTS, REVERSE_PROXY_USER_HEADER, REVERSE_PROXY_WHITELIST
+from ..config.legacy import PUBLIC_SNAPSHOTS, REVERSE_PROXY_USER_HEADER, REVERSE_PROXY_WHITELIST
def detect_timezone(request, activate: bool=True):
diff --git a/archivebox/core/migrations/0007_archiveresult.py b/archivebox/core/migrations/0007_archiveresult.py
index 3da3b93c..d852af63 100644
--- a/archivebox/core/migrations/0007_archiveresult.py
+++ b/archivebox/core/migrations/0007_archiveresult.py
@@ -1,14 +1,18 @@
# Generated by Django 3.0.8 on 2020-11-04 12:25
+import os
import json
from pathlib import Path
from django.db import migrations, models
import django.db.models.deletion
-from config import CONFIG
from index.json import to_json
+DATA_DIR = Path(os.curdir).resolve() # archivebox user data dir
+ARCHIVE_DIR = DATA_DIR / 'archive' # archivebox snapshot data dir
+
+
try:
JSONField = models.JSONField
except AttributeError:
@@ -22,7 +26,7 @@ def forwards_func(apps, schema_editor):
snapshots = Snapshot.objects.all()
for snapshot in snapshots:
- out_dir = Path(CONFIG['ARCHIVE_DIR']) / snapshot.timestamp
+ out_dir = ARCHIVE_DIR / snapshot.timestamp
try:
with open(out_dir / "index.json", "r") as f:
@@ -57,7 +61,7 @@ def forwards_func(apps, schema_editor):
def verify_json_index_integrity(snapshot):
results = snapshot.archiveresult_set.all()
- out_dir = Path(CONFIG['ARCHIVE_DIR']) / snapshot.timestamp
+ out_dir = ARCHIVE_DIR / snapshot.timestamp
with open(out_dir / "index.json", "r") as f:
index = json.load(f)
diff --git a/archivebox/core/models.py b/archivebox/core/models.py
index 0b70f17a..0630f625 100644
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -17,10 +17,9 @@ from django.db.models import Case, When, Value, IntegerField
from django.contrib import admin
from django.conf import settings
-import archivebox
+from archivebox.config import CONSTANTS
from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
-
from queues.tasks import bg_archive_snapshot
from ..system import get_dir_size
@@ -261,11 +260,11 @@ class Snapshot(ABIDModel):
@cached_property
def link_dir(self):
- return str(archivebox.CONSTANTS.ARCHIVE_DIR / self.timestamp)
+ return str(CONSTANTS.ARCHIVE_DIR / self.timestamp)
@cached_property
def archive_path(self):
- return '{}/{}'.format(archivebox.CONSTANTS.ARCHIVE_DIR_NAME, self.timestamp)
+ return '{}/{}'.format(CONSTANTS.ARCHIVE_DIR_NAME, self.timestamp)
@cached_property
def archive_size(self):
@@ -375,17 +374,17 @@ class Snapshot(ABIDModel):
# def get_storage_dir(self, create=True, symlink=True) -> Path:
# date_str = self.bookmarked_at.strftime('%Y%m%d')
# domain_str = domain(self.url)
- # abs_storage_dir = Path(archivebox.CONSTANTS.ARCHIVE_DIR) / 'snapshots' / date_str / domain_str / str(self.ulid)
+ # abs_storage_dir = Path(CONSTANTS.ARCHIVE_DIR) / 'snapshots' / date_str / domain_str / str(self.ulid)
# if create and not abs_storage_dir.is_dir():
# abs_storage_dir.mkdir(parents=True, exist_ok=True)
# if symlink:
# LINK_PATHS = [
- # Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
- # # Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_id' / str(self.ulid),
- # Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_date' / date_str / domain_str / str(self.ulid),
- # Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_domain' / domain_str / date_str / str(self.ulid),
+ # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
+ # # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_id' / str(self.ulid),
+ # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_date' / date_str / domain_str / str(self.ulid),
+ # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_domain' / domain_str / date_str / str(self.ulid),
# ]
# for link_path in LINK_PATHS:
# link_path.parent.mkdir(parents=True, exist_ok=True)
@@ -524,18 +523,18 @@ class ArchiveResult(ABIDModel):
# def get_storage_dir(self, create=True, symlink=True):
# date_str = self.snapshot.bookmarked_at.strftime('%Y%m%d')
# domain_str = domain(self.snapshot.url)
- # abs_storage_dir = Path(archivebox.CONSTANTS.ARCHIVE_DIR) / 'results' / date_str / domain_str / self.extractor / str(self.ulid)
+ # abs_storage_dir = Path(CONSTANTS.ARCHIVE_DIR) / 'results' / date_str / domain_str / self.extractor / str(self.ulid)
# if create and not abs_storage_dir.is_dir():
# abs_storage_dir.mkdir(parents=True, exist_ok=True)
# if symlink:
# LINK_PATHS = [
- # Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
- # # Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_id' / str(self.ulid),
- # # Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_date' / date_str / domain_str / self.extractor / str(self.ulid),
- # Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_domain' / domain_str / date_str / self.extractor / str(self.ulid),
- # Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_type' / self.extractor / date_str / domain_str / str(self.ulid),
+ # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
+ # # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_id' / str(self.ulid),
+ # # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_date' / date_str / domain_str / self.extractor / str(self.ulid),
+ # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_domain' / domain_str / date_str / self.extractor / str(self.ulid),
+ # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_type' / self.extractor / date_str / domain_str / str(self.ulid),
# ]
# for link_path in LINK_PATHS:
# link_path.parent.mkdir(parents=True, exist_ok=True)
diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py
index cdab906c..c0e612c7 100644
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@@ -13,20 +13,15 @@ import abx.archivebox
import abx.archivebox.use
import abx.django.use
-import archivebox
-from archivebox.constants import CONSTANTS
+from archivebox.config import VERSION, DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS # noqa
-from ..config import CONFIG
+from ..config.legacy import CONFIG
IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3]
IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ
IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3]
-VERSION = archivebox.VERSION
-PACKAGE_DIR = archivebox.PACKAGE_DIR
-DATA_DIR = archivebox.DATA_DIR
-ARCHIVE_DIR = archivebox.ARCHIVE_DIR
################################################################################
### ArchiveBox Plugin Settings
@@ -40,14 +35,14 @@ PLUGIN_HOOKSPECS = [
abx.register_hookspecs(PLUGIN_HOOKSPECS)
BUILTIN_PLUGIN_DIRS = {
- 'plugins_sys': archivebox.PACKAGE_DIR / 'plugins_sys',
- 'plugins_pkg': archivebox.PACKAGE_DIR / 'plugins_pkg',
- 'plugins_auth': archivebox.PACKAGE_DIR / 'plugins_auth',
- 'plugins_search': archivebox.PACKAGE_DIR / 'plugins_search',
- 'plugins_extractor': archivebox.PACKAGE_DIR / 'plugins_extractor',
+ 'archivebox': PACKAGE_DIR,
+ 'plugins_pkg': PACKAGE_DIR / 'plugins_pkg',
+ 'plugins_auth': PACKAGE_DIR / 'plugins_auth',
+ 'plugins_search': PACKAGE_DIR / 'plugins_search',
+ 'plugins_extractor': PACKAGE_DIR / 'plugins_extractor',
}
USER_PLUGIN_DIRS = {
- 'user_plugins': archivebox.DATA_DIR / 'user_plugins',
+ 'user_plugins': DATA_DIR / 'user_plugins',
}
BUILTIN_PLUGINS = abx.get_plugins_in_dirs(BUILTIN_PLUGIN_DIRS)
@@ -105,6 +100,7 @@ INSTALLED_APPS = [
'django_object_actions', # provides easy Django Admin action buttons on change views https://github.com/crccheck/django-object-actions
# Our ArchiveBox-provided apps
+ #'config', # ArchiveBox config settings
'queues', # handles starting and managing background workers and processes
'abid_utils', # handles ABID ID creation, handling, and models
'core', # core django model with Snapshot, ArchiveResult, etc.
@@ -481,41 +477,41 @@ ADMIN_DATA_VIEWS = {
},
{
"route": "binaries/",
- "view": "plugins_sys.config.views.binaries_list_view",
+ "view": "archivebox.config.views.binaries_list_view",
"name": "Binaries",
"items": {
"route": "/",
- "view": "plugins_sys.config.views.binary_detail_view",
+ "view": "archivebox.config.views.binary_detail_view",
"name": "binary",
},
},
{
"route": "plugins/",
- "view": "plugins_sys.config.views.plugins_list_view",
+ "view": "archivebox.config.views.plugins_list_view",
"name": "Plugins",
"items": {
"route": "/",
- "view": "plugins_sys.config.views.plugin_detail_view",
+ "view": "archivebox.config.views.plugin_detail_view",
"name": "plugin",
},
},
{
"route": "workers/",
- "view": "plugins_sys.config.views.worker_list_view",
+ "view": "archivebox.config.views.worker_list_view",
"name": "Workers",
"items": {
"route": "/",
- "view": "plugins_sys.config.views.worker_detail_view",
+ "view": "archivebox.config.views.worker_detail_view",
"name": "worker",
},
},
{
"route": "logs/",
- "view": "plugins_sys.config.views.log_list_view",
+ "view": "archivebox.config.views.log_list_view",
"name": "Logs",
"items": {
"route": "/",
- "view": "plugins_sys.config.views.log_detail_view",
+ "view": "archivebox.config.views.log_detail_view",
"name": "log",
},
},
diff --git a/archivebox/core/settings_logging.py b/archivebox/core/settings_logging.py
index 28b2e0c3..afe101b2 100644
--- a/archivebox/core/settings_logging.py
+++ b/archivebox/core/settings_logging.py
@@ -7,7 +7,7 @@ import logging
import pydantic
import django.template
-import archivebox
+from archivebox.config import CONSTANTS
from ..misc.logging import IS_TTY
@@ -52,7 +52,7 @@ class CustomOutboundWebhookLogFormatter(logging.Formatter):
ERROR_LOG = tempfile.NamedTemporaryFile().name
-LOGS_DIR = archivebox.DATA_DIR / 'logs'
+LOGS_DIR = CONSTANTS.LOGS_DIR
if LOGS_DIR.is_dir():
ERROR_LOG = (LOGS_DIR / 'errors.log')
diff --git a/archivebox/core/urls.py b/archivebox/core/urls.py
index e1dba738..971b8ea2 100644
--- a/archivebox/core/urls.py
+++ b/archivebox/core/urls.py
@@ -10,7 +10,7 @@ from .views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthC
from .serve_static import serve_static
# GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306
-# from config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE
+# from .config.legacy import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE
# GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': VERSIONS_AVAILABLE, 'CAN_UPGRADE': CAN_UPGRADE}
diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index 5d0c614a..5a7c7f4c 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -20,8 +20,6 @@ from django.utils.decorators import method_decorator
from admin_data_views.typing import TableContext, ItemContext
from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
-import archivebox
-from archivebox.constants import CONSTANTS
from core.models import Snapshot
from core.forms import AddLinkForm
@@ -29,10 +27,10 @@ from core.admin import result_url
from queues.tasks import bg_add
-from ..plugins_sys.config.apps import SHELL_CONFIG, SERVER_CONFIG
+from archivebox.config import CONSTANTS, DATA_DIR, VERSION, SHELL_CONFIG, SERVER_CONFIG
from ..plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG
-from ..config import (
+from ..config.legacy import (
CONFIG_SCHEMA,
DYNAMIC_CONFIG_SCHEMA,
USER_CONFIG,
@@ -381,7 +379,7 @@ class PublicIndexView(ListView):
def get_context_data(self, **kwargs):
return {
**super().get_context_data(**kwargs),
- 'VERSION': archivebox.VERSION,
+ 'VERSION': VERSION,
'COMMIT_HASH': SHELL_CONFIG.COMMIT_HASH,
'FOOTER_INFO': SERVER_CONFIG.FOOTER_INFO,
}
@@ -451,7 +449,7 @@ class AddView(UserPassesTestMixin, FormView):
'title': "Add URLs",
# We can't just call request.build_absolute_uri in the template, because it would include query parameters
'absolute_add_path': self.request.build_absolute_uri(self.request.path),
- 'VERSION': archivebox.VERSION,
+ 'VERSION': VERSION,
'FOOTER_INFO': SERVER_CONFIG.FOOTER_INFO,
'stdout': '',
}
@@ -469,7 +467,7 @@ class AddView(UserPassesTestMixin, FormView):
"depth": depth,
"parser": parser,
"update_all": False,
- "out_dir": archivebox.DATA_DIR,
+ "out_dir": DATA_DIR,
"created_by_id": self.request.user.pk,
}
if extractors:
diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py
index 700aede7..443a1aed 100644
--- a/archivebox/extractors/__init__.py
+++ b/archivebox/extractors/__init__.py
@@ -10,7 +10,7 @@ from datetime import datetime, timezone
from django.db.models import QuerySet
-from ..config import (
+from ..config.legacy import (
SAVE_ALLOWLIST_PTN,
SAVE_DENYLIST_PTN,
)
diff --git a/archivebox/extractors/archive_org.py b/archivebox/extractors/archive_org.py
index 5aa66fa7..ac73f721 100644
--- a/archivebox/extractors/archive_org.py
+++ b/archivebox/extractors/archive_org.py
@@ -12,7 +12,7 @@ from ..util import (
is_static_file,
dedupe,
)
-from ..config import (
+from ..config.legacy import (
TIMEOUT,
CURL_ARGS,
CURL_EXTRA_ARGS,
@@ -24,6 +24,7 @@ from ..config import (
)
from ..logging_util import TimedProgress
+
def get_output_path():
return 'archive.org.txt'
diff --git a/archivebox/extractors/favicon.py b/archivebox/extractors/favicon.py
index b9b5c3a7..791184f9 100644
--- a/archivebox/extractors/favicon.py
+++ b/archivebox/extractors/favicon.py
@@ -11,7 +11,7 @@ from ..util import (
domain,
dedupe,
)
-from ..config import CONFIG
+from ..config.legacy import CONFIG
from ..logging_util import TimedProgress
diff --git a/archivebox/extractors/git.py b/archivebox/extractors/git.py
index 3b8a4b9d..1e9decbb 100644
--- a/archivebox/extractors/git.py
+++ b/archivebox/extractors/git.py
@@ -14,7 +14,7 @@ from ..util import (
without_query,
without_fragment,
)
-from ..config import CONFIG
+from ..config.legacy import CONFIG
from ..logging_util import TimedProgress
diff --git a/archivebox/extractors/headers.py b/archivebox/extractors/headers.py
index 9fd48469..4bd2780b 100644
--- a/archivebox/extractors/headers.py
+++ b/archivebox/extractors/headers.py
@@ -11,7 +11,7 @@ from ..util import (
get_headers,
dedupe,
)
-from ..config import (
+from ..config.legacy import (
TIMEOUT,
CURL_BINARY,
CURL_ARGS,
diff --git a/archivebox/extractors/htmltotext.py b/archivebox/extractors/htmltotext.py
index 29591e69..276ed5b8 100644
--- a/archivebox/extractors/htmltotext.py
+++ b/archivebox/extractors/htmltotext.py
@@ -1,13 +1,12 @@
__package__ = 'archivebox.extractors'
-import archivebox
-
from html.parser import HTMLParser
import io
from pathlib import Path
from typing import Optional
-from ..config import (
+from archivebox.config import VERSION
+from ..config.legacy import (
SAVE_HTMLTOTEXT,
TIMEOUT,
)
@@ -154,7 +153,7 @@ def save_htmltotext(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
return ArchiveResult(
cmd=cmd,
pwd=str(out_dir),
- cmd_version=archivebox.__version__,
+ cmd_version=VERSION,
output=output,
status=status,
index_texts=[extracted_text] if extracted_text else [],
diff --git a/archivebox/extractors/mercury.py b/archivebox/extractors/mercury.py
index 71af1329..0aad67e6 100644
--- a/archivebox/extractors/mercury.py
+++ b/archivebox/extractors/mercury.py
@@ -13,7 +13,7 @@ from ..util import (
is_static_file,
dedupe,
)
-from ..config import (
+from ..config.legacy import (
TIMEOUT,
SAVE_MERCURY,
DEPENDENCIES,
diff --git a/archivebox/extractors/title.py b/archivebox/extractors/title.py
index a1cb769f..9779e042 100644
--- a/archivebox/extractors/title.py
+++ b/archivebox/extractors/title.py
@@ -12,7 +12,7 @@ from ..util import (
htmldecode,
dedupe,
)
-from ..config import (
+from ..config.legacy import (
TIMEOUT,
CHECK_SSL_VALIDITY,
SAVE_TITLE,
diff --git a/archivebox/extractors/wget.py b/archivebox/extractors/wget.py
index c4cb6d44..9cc30c6f 100644
--- a/archivebox/extractors/wget.py
+++ b/archivebox/extractors/wget.py
@@ -17,7 +17,7 @@ from ..util import (
urldecode,
dedupe,
)
-from ..config import (
+from ..config.legacy import (
WGET_ARGS,
WGET_EXTRA_ARGS,
TIMEOUT,
diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py
index 9b9619e0..8219f1db 100644
--- a/archivebox/index/__init__.py
+++ b/archivebox/index/__init__.py
@@ -12,15 +12,14 @@ from urllib.parse import urlparse
from django.db.models import QuerySet, Q
-import archivebox
-
+from archivebox.config import DATA_DIR, CONSTANTS, SEARCH_BACKEND_CONFIG
from ..util import (
scheme,
enforce_types,
ExtendedEncoder,
)
from ..misc.logging import stderr
-from ..config import (
+from ..config.legacy import (
TIMEOUT,
URL_DENYLIST_PTN,
URL_ALLOWLIST_PTN,
@@ -223,28 +222,28 @@ def timed_index_update(out_path: Path):
@enforce_types
-def write_main_index(links: List[Link], out_dir: Path=archivebox.DATA_DIR, created_by_id: int | None=None) -> None:
+def write_main_index(links: List[Link], out_dir: Path=DATA_DIR, created_by_id: int | None=None) -> None:
"""Writes links to sqlite3 file for a given list of links"""
log_indexing_process_started(len(links))
try:
- with timed_index_update(archivebox.CONSTANTS.DATABASE_FILE):
+ with timed_index_update(CONSTANTS.DATABASE_FILE):
write_sql_main_index(links, out_dir=out_dir, created_by_id=created_by_id)
- os.chmod(archivebox.CONSTANTS.DATABASE_FILE, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes
+ os.chmod(CONSTANTS.DATABASE_FILE, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes
except (KeyboardInterrupt, SystemExit):
stderr('[!] Warning: Still writing index to disk...', color='lightyellow')
stderr(' Run archivebox init to fix any inconsistencies from an ungraceful exit.')
- with timed_index_update(archivebox.CONSTANTS.DATABASE_FILE):
+ with timed_index_update(CONSTANTS.DATABASE_FILE):
write_sql_main_index(links, out_dir=out_dir, created_by_id=created_by_id)
- os.chmod(archivebox.CONSTANTS.DATABASE_FILE, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes
+ os.chmod(CONSTANTS.DATABASE_FILE, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes
raise SystemExit(0)
log_indexing_process_finished()
@enforce_types
-def load_main_index(out_dir: Path=archivebox.DATA_DIR, warn: bool=True) -> List[Link]:
+def load_main_index(out_dir: Path=DATA_DIR, warn: bool=True) -> List[Link]:
"""parse and load existing index with any new links from import_path merged in"""
from core.models import Snapshot
try:
@@ -254,8 +253,8 @@ def load_main_index(out_dir: Path=archivebox.DATA_DIR, warn: bool=True) -> List[
raise SystemExit(0)
@enforce_types
-def load_main_index_meta(out_dir: Path=archivebox.DATA_DIR) -> Optional[dict]:
- index_path = out_dir / archivebox.CONSTANTS.JSON_INDEX_FILENAME
+def load_main_index_meta(out_dir: Path=DATA_DIR) -> Optional[dict]:
+ index_path = out_dir / CONSTANTS.JSON_INDEX_FILENAME
if index_path.exists():
with open(index_path, 'r', encoding='utf-8') as f:
meta_dict = pyjson.load(f)
@@ -377,7 +376,6 @@ def q_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='
return snapshots.filter(q_filter)
def search_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='search') -> QuerySet:
- from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG
from ..search import query_search_index
if not SEARCH_BACKEND_CONFIG.USE_SEARCHING_BACKEND:
@@ -406,7 +404,7 @@ def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type
return search_filter(snapshots, filter_patterns, filter_type)
-def get_indexed_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]:
+def get_indexed_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
"""indexed links without checking archive status or data directory validity"""
links = (snapshot.as_link() for snapshot in snapshots.iterator(chunk_size=500))
return {
@@ -414,7 +412,7 @@ def get_indexed_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[st
for link in links
}
-def get_archived_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]:
+def get_archived_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
"""indexed links that are archived with a valid data directory"""
links = (snapshot.as_link() for snapshot in snapshots.iterator(chunk_size=500))
return {
@@ -422,7 +420,7 @@ def get_archived_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[s
for link in filter(is_archived, links)
}
-def get_unarchived_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]:
+def get_unarchived_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
"""indexed links that are unarchived with no data directory or an empty data directory"""
links = (snapshot.as_link() for snapshot in snapshots.iterator(chunk_size=500))
return {
@@ -430,12 +428,12 @@ def get_unarchived_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict
for link in filter(is_unarchived, links)
}
-def get_present_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]:
+def get_present_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
"""dirs that actually exist in the archive/ folder"""
all_folders = {}
- for entry in (out_dir / archivebox.CONSTANTS.ARCHIVE_DIR_NAME).iterdir():
+ for entry in (out_dir / CONSTANTS.ARCHIVE_DIR_NAME).iterdir():
if entry.is_dir():
link = None
try:
@@ -447,7 +445,7 @@ def get_present_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[st
return all_folders
-def get_valid_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]:
+def get_valid_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
"""dirs with a valid index matched to the main index and archived content"""
links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator(chunk_size=500)]
return {
@@ -455,7 +453,7 @@ def get_valid_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str,
for link in filter(is_valid, links)
}
-def get_invalid_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]:
+def get_invalid_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
"""dirs that are invalid for any reason: corrupted/duplicate/orphaned/unrecognized"""
duplicate = get_duplicate_folders(snapshots, out_dir=out_dir)
orphaned = get_orphaned_folders(snapshots, out_dir=out_dir)
@@ -464,7 +462,7 @@ def get_invalid_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[st
return {**duplicate, **orphaned, **corrupted, **unrecognized}
-def get_duplicate_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]:
+def get_duplicate_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
"""dirs that conflict with other directories that have the same link URL or timestamp"""
by_url = {}
by_timestamp = {}
@@ -472,7 +470,7 @@ def get_duplicate_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[
data_folders = (
str(entry)
- for entry in archivebox.CONSTANTS.ARCHIVE_DIR.iterdir()
+ for entry in CONSTANTS.ARCHIVE_DIR.iterdir()
if entry.is_dir() and not snapshots.filter(timestamp=entry.name).exists()
)
@@ -498,11 +496,11 @@ def get_duplicate_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[
duplicate_folders[path] = link
return duplicate_folders
-def get_orphaned_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]:
+def get_orphaned_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
"""dirs that contain a valid index but aren't listed in the main index"""
orphaned_folders = {}
- for entry in archivebox.CONSTANTS.ARCHIVE_DIR.iterdir():
+ for entry in CONSTANTS.ARCHIVE_DIR.iterdir():
if entry.is_dir():
link = None
try:
@@ -516,7 +514,7 @@ def get_orphaned_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[s
return orphaned_folders
-def get_corrupted_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]:
+def get_corrupted_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
"""dirs that don't contain a valid index and aren't listed in the main index"""
corrupted = {}
for snapshot in snapshots.iterator(chunk_size=500):
@@ -525,11 +523,11 @@ def get_corrupted_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[
corrupted[link.link_dir] = link
return corrupted
-def get_unrecognized_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]:
+def get_unrecognized_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
"""dirs that don't contain recognizable archive data and aren't listed in the main index"""
unrecognized_folders: Dict[str, Optional[Link]] = {}
- for entry in (Path(out_dir) / archivebox.CONSTANTS.ARCHIVE_DIR_NAME).iterdir():
+ for entry in (Path(out_dir) / CONSTANTS.ARCHIVE_DIR_NAME).iterdir():
if entry.is_dir():
index_exists = (entry / "index.json").exists()
link = None
@@ -594,10 +592,10 @@ def is_unarchived(link: Link) -> bool:
return not link.is_archived
-def fix_invalid_folder_locations(out_dir: Path=archivebox.DATA_DIR) -> Tuple[List[str], List[str]]:
+def fix_invalid_folder_locations(out_dir: Path=DATA_DIR) -> Tuple[List[str], List[str]]:
fixed = []
cant_fix = []
- for entry in os.scandir(out_dir / archivebox.CONSTANTS.ARCHIVE_DIR_NAME):
+ for entry in os.scandir(out_dir / CONSTANTS.ARCHIVE_DIR_NAME):
if entry.is_dir(follow_symlinks=True):
if (Path(entry.path) / 'index.json').exists():
try:
@@ -608,7 +606,7 @@ def fix_invalid_folder_locations(out_dir: Path=archivebox.DATA_DIR) -> Tuple[Lis
continue
if not entry.path.endswith(f'/{link.timestamp}'):
- dest = out_dir /archivebox.CONSTANTS.ARCHIVE_DIR_NAME / link.timestamp
+ dest = out_dir /CONSTANTS.ARCHIVE_DIR_NAME / link.timestamp
if dest.exists():
cant_fix.append(entry.path)
else:
diff --git a/archivebox/index/html.py b/archivebox/index/html.py
index 747928c5..4b2c6485 100644
--- a/archivebox/index/html.py
+++ b/archivebox/index/html.py
@@ -1,6 +1,5 @@
__package__ = 'archivebox.index'
-import archivebox
from pathlib import Path
from datetime import datetime, timezone
from collections import defaultdict
@@ -19,10 +18,11 @@ from ..util import (
htmlencode,
urldecode,
)
-from ..config import (
+from archivebox.config.legacy import (
SAVE_ARCHIVE_DOT_ORG,
PREVIEW_ORIGINALS,
)
+from archivebox.config import CONSTANTS, DATA_DIR, VERSION, SHELL_CONFIG, SERVER_CONFIG
MAIN_INDEX_TEMPLATE = 'static_index.html'
MINIMAL_INDEX_TEMPLATE = 'minimal_index.html'
@@ -33,11 +33,9 @@ TITLE_LOADING_MSG = 'Not yet archived...'
### Main Links Index
@enforce_types
-def parse_html_main_index(out_dir: Path=archivebox.DATA_DIR) -> Iterator[str]:
+def parse_html_main_index(out_dir: Path=DATA_DIR) -> Iterator[str]:
"""parse an archive index html file and return the list of urls"""
- from plugins_sys.config.constants import CONSTANTS
-
index_path = Path(out_dir) / CONSTANTS.HTML_INDEX_FILENAME
if index_path.exists():
with open(index_path, 'r', encoding='utf-8') as f:
@@ -58,11 +56,9 @@ def generate_index_from_links(links: List[Link], with_headers: bool):
def main_index_template(links: List[Link], template: str=MAIN_INDEX_TEMPLATE) -> str:
"""render the template for the entire main index"""
- from plugins_sys.config.apps import SHELL_CONFIG, SERVER_CONFIG
-
return render_django_template(template, {
- 'version': archivebox.VERSION,
- 'git_sha': SHELL_CONFIG.COMMIT_HASH or archivebox.VERSION,
+ 'version': VERSION,
+ 'git_sha': SHELL_CONFIG.COMMIT_HASH or VERSION,
'num_links': str(len(links)),
'date_updated': datetime.now(timezone.utc).strftime('%Y-%m-%d'),
'time_updated': datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M'),
@@ -75,7 +71,6 @@ def main_index_template(links: List[Link], template: str=MAIN_INDEX_TEMPLATE) ->
@enforce_types
def write_html_link_details(link: Link, out_dir: Optional[str]=None) -> None:
- from plugins_sys.config.constants import CONSTANTS
out_dir = out_dir or link.link_dir
rendered_html = link_details_template(link)
diff --git a/archivebox/index/json.py b/archivebox/index/json.py
index 06455053..acaa2a18 100644
--- a/archivebox/index/json.py
+++ b/archivebox/index/json.py
@@ -8,7 +8,7 @@ from pathlib import Path
from datetime import datetime, timezone
from typing import List, Optional, Iterator, Any, Union
-import archivebox
+from archivebox.config import VERSION, DATA_DIR, CONSTANTS, SERVER_CONFIG, SHELL_CONFIG
from .schema import Link
from ..system import atomic_write
@@ -19,7 +19,6 @@ from ..util import enforce_types
@enforce_types
def generate_json_index_from_links(links: List[Link], with_headers: bool):
from django.conf import settings
- from plugins_sys.config.apps import SERVER_CONFIG
MAIN_INDEX_HEADER = {
'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.',
@@ -27,8 +26,8 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool):
'copyright_info': SERVER_CONFIG.FOOTER_INFO,
'meta': {
'project': 'ArchiveBox',
- 'version': archivebox.VERSION,
- 'git_sha': archivebox.VERSION, # not used anymore, but kept for backwards compatibility
+ 'version': VERSION,
+ 'git_sha': VERSION, # not used anymore, but kept for backwards compatibility
'website': 'https://ArchiveBox.io',
'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki',
'source': 'https://github.com/ArchiveBox/ArchiveBox',
@@ -52,11 +51,9 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool):
@enforce_types
-def parse_json_main_index(out_dir: Path=archivebox.DATA_DIR) -> Iterator[Link]:
+def parse_json_main_index(out_dir: Path=DATA_DIR) -> Iterator[Link]:
"""parse an archive index json file and return the list of links"""
- from plugins_sys.config.constants import CONSTANTS
-
index_path = Path(out_dir) / CONSTANTS.JSON_INDEX_FILENAME
if index_path.exists():
with open(index_path, 'r', encoding='utf-8') as f:
@@ -68,7 +65,7 @@ def parse_json_main_index(out_dir: Path=archivebox.DATA_DIR) -> Iterator[Link]:
print(" {lightyellow}! Found an index.json in the project root but couldn't load links from it: {} {}".format(
err.__class__.__name__,
err,
- **ANSI,
+ **SHELL_CONFIG.ANSI,
))
return ()
@@ -94,8 +91,6 @@ def parse_json_main_index(out_dir: Path=archivebox.DATA_DIR) -> Iterator[Link]:
def write_json_link_details(link: Link, out_dir: Optional[str]=None) -> None:
"""write a json file with some info about the link"""
- from plugins_sys.config.constants import CONSTANTS
-
out_dir = out_dir or link.link_dir
path = Path(out_dir) / CONSTANTS.JSON_INDEX_FILENAME
atomic_write(str(path), link._asdict(extended=True))
@@ -104,7 +99,6 @@ def write_json_link_details(link: Link, out_dir: Optional[str]=None) -> None:
@enforce_types
def parse_json_link_details(out_dir: Union[Path, str], guess: bool=False) -> Optional[Link]:
"""load the json link index from a given directory"""
- from plugins_sys.config.constants import CONSTANTS
existing_index = Path(out_dir) / CONSTANTS.JSON_INDEX_FILENAME
if existing_index.exists():
@@ -121,7 +115,6 @@ def parse_json_link_details(out_dir: Union[Path, str], guess: bool=False) -> Opt
def parse_json_links_details(out_dir: Union[Path, str]) -> Iterator[Link]:
"""read through all the archive data folders and return the parsed links"""
- from plugins_sys.config.constants import CONSTANTS
for entry in os.scandir(CONSTANTS.ARCHIVE_DIR):
if entry.is_dir(follow_symlinks=True):
diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index 1c16c3bd..a6697c9f 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -17,7 +17,7 @@ from dataclasses import dataclass, asdict, field, fields
from django.utils.functional import cached_property
-from archivebox.constants import ARCHIVE_DIR, ARCHIVE_DIR_NAME
+from archivebox.config.constants import ARCHIVE_DIR, ARCHIVE_DIR_NAME
from plugins_extractor.favicon.apps import FAVICON_CONFIG
@@ -160,7 +160,7 @@ class Link:
return float(self.timestamp) > float(other.timestamp)
def typecheck(self) -> None:
- from ..config import stderr, ANSI
+ from ..config.legacy import stderr, ANSI
try:
assert self.schema == self.__class__.__name__
assert isinstance(self.timestamp, str) and self.timestamp
diff --git a/archivebox/index/sql.py b/archivebox/index/sql.py
index 0071f60b..6ac7c3e7 100644
--- a/archivebox/index/sql.py
+++ b/archivebox/index/sql.py
@@ -10,7 +10,7 @@ from django.db import transaction
from .schema import Link
from ..util import enforce_types, parse_date
-from ..config import (
+from ..config.legacy import (
OUTPUT_DIR,
TAG_SEPARATOR_PATTERN,
)
diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py
index b4e4f975..baf7030f 100644
--- a/archivebox/logging_util.py
+++ b/archivebox/logging_util.py
@@ -4,10 +4,8 @@ import re
import os
import sys
import stat
-import shutil
import time
import argparse
-import archivebox
from math import log
from multiprocessing import Process
@@ -23,6 +21,7 @@ if TYPE_CHECKING:
from rich import print
from rich.panel import Panel
+from archivebox.config import CONSTANTS, DATA_DIR, VERSION, SHELL_CONFIG
from .system import get_dir_size
from .util import enforce_types
from .misc.logging import ANSI, stderr
@@ -133,11 +132,8 @@ class TimedProgress:
def __init__(self, seconds, prefix=''):
- from plugins_sys.config.apps import SHELL_CONFIG
-
self.SHOW_PROGRESS = SHELL_CONFIG.SHOW_PROGRESS
self.ANSI = SHELL_CONFIG.ANSI
- self.TERM_WIDTH = lambda: shutil.get_terminal_size().columns # lambda so it live-updates when terminal is resized
if self.SHOW_PROGRESS:
self.p = Process(target=progress_bar, args=(seconds, prefix, self.ANSI))
@@ -169,7 +165,7 @@ class TimedProgress:
# clear whole terminal line
try:
- sys.stdout.write('\r{}{}\r'.format((' ' * self.TERM_WIDTH()), self.ANSI['reset']))
+ sys.stdout.write('\r{}{}\r'.format((' ' * SHELL_CONFIG.TERM_WIDTH), self.ANSI['reset']))
except (IOError, BrokenPipeError):
# ignore when the parent proc has stopped listening to our stdout
pass
@@ -182,11 +178,11 @@ def progress_bar(seconds: int, prefix: str='', ANSI: Dict[str, str]=ANSI) -> Non
"""show timer in the form of progress bar, with percentage and seconds remaining"""
output_buf = (sys.stdout or sys.__stdout__ or sys.stderr or sys.__stderr__)
chunk = 'â–ˆ' if output_buf and output_buf.encoding.upper() == 'UTF-8' else '#'
- last_width = TERM_WIDTH()
+ last_width = SHELL_CONFIG.TERM_WIDTH
chunks = last_width - len(prefix) - 20 # number of progress chunks to show (aka max bar width)
try:
for s in range(seconds * chunks):
- max_width = TERM_WIDTH()
+ max_width = SHELL_CONFIG.TERM_WIDTH
if max_width < last_width:
# when the terminal size is shrunk, we have to write a newline
# otherwise the progress bar will keep wrapping incorrectly
@@ -224,7 +220,7 @@ def progress_bar(seconds: int, prefix: str='', ANSI: Dict[str, str]=ANSI) -> Non
sys.stdout.flush()
# uncomment to have it disappear when it hits 100% instead of staying full red:
# time.sleep(0.5)
- # sys.stdout.write('\r{}{}\r'.format((' ' * TERM_WIDTH()), ANSI['reset']))
+ # sys.stdout.write('\r{}{}\r'.format((' ' * SHELL_CONFIG.TERM_WIDTH), ANSI['reset']))
# sys.stdout.flush()
except (KeyboardInterrupt, BrokenPipeError):
print()
@@ -234,7 +230,7 @@ def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional
args = ' '.join(subcommand_args)
version_msg = '[dark_magenta]\\[i] [{now}] ArchiveBox v{VERSION}: [/dark_magenta][green4]archivebox [green3]{subcommand}[green2] {args}[/green2]'.format(
now=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'),
- VERSION=archivebox.__version__,
+ VERSION=VERSION,
subcommand=subcommand,
args=args,
)
@@ -256,7 +252,6 @@ def log_importing_started(urls: Union[str, List[str]], depth: int, index_only: b
))
def log_source_saved(source_file: str):
- from plugins_sys.config.constants import CONSTANTS
print(' > Saved verbatim input to {}/{}'.format(CONSTANTS.SOURCES_DIR_NAME, source_file.rsplit('/', 1)[-1]))
def log_parsing_finished(num_parsed: int, parser_name: str):
@@ -289,14 +284,12 @@ def log_indexing_process_finished():
def log_indexing_started(out_path: str):
- from plugins_sys.config.apps import SHELL_CONFIG
-
if SHELL_CONFIG.IS_TTY:
- sys.stdout.write(f' > ./{Path(out_path).relative_to(archivebox.DATA_DIR)}')
+ sys.stdout.write(f' > ./{Path(out_path).relative_to(DATA_DIR)}')
def log_indexing_finished(out_path: str):
- print(f'\r √ ./{Path(out_path).relative_to(archivebox.DATA_DIR)}')
+ print(f'\r √ ./{Path(out_path).relative_to(DATA_DIR)}')
### Archiving Stage
@@ -532,7 +525,7 @@ def log_shell_welcome_msg():
### Helpers
@enforce_types
-def pretty_path(path: Union[Path, str], pwd: Union[Path, str]=archivebox.DATA_DIR) -> str:
+def pretty_path(path: Union[Path, str], pwd: Union[Path, str]=DATA_DIR) -> str:
"""convert paths like .../ArchiveBox/archivebox/../output/abc into output/abc"""
pwd = str(Path(pwd)) # .resolve()
path = str(path)
diff --git a/archivebox/main.py b/archivebox/main.py
index 2c4ce277..4ec2a93e 100755
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -4,7 +4,6 @@ import os
import sys
import shutil
import platform
-import archivebox
from typing import Dict, List, Optional, Iterable, IO, Union
from pathlib import Path
@@ -15,6 +14,7 @@ from crontab import CronTab, CronSlices
from django.db.models import QuerySet
from django.utils import timezone
+from archivebox.config import CONSTANTS, VERSION, DATA_DIR, ARCHIVE_DIR, SHELL_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG, SERVER_CONFIG, ARCHIVING_CONFIG
from .cli import (
CLI_SUBCOMMANDS,
run_subcommand,
@@ -66,22 +66,9 @@ from .index.html import (
)
from .index.csv import links_to_csv
from .extractors import archive_links, archive_link, ignore_methods
-from .misc.logging import stderr, hint, ANSI
+from .misc.logging import stderr, hint
from .misc.checks import check_data_folder
-from .config import (
- ConfigDict,
- IS_TTY,
- DEBUG,
- IN_DOCKER,
- IN_QEMU,
- PUID,
- PGID,
- TIMEZONE,
- ONLY_NEW,
- JSON_INDEX_FILENAME,
- HTML_INDEX_FILENAME,
- SQL_INDEX_FILENAME,
- LDAP,
+from .config.legacy import (
write_config_file,
DEPENDENCIES,
load_all_config,
@@ -104,15 +91,9 @@ from .logging_util import (
printable_dependency_version,
)
-CONSTANTS = archivebox.CONSTANTS
-VERSION = archivebox.VERSION
-PACKAGE_DIR = archivebox.PACKAGE_DIR
-OUTPUT_DIR = archivebox.DATA_DIR
-ARCHIVE_DIR = archivebox.DATA_DIR / 'archive'
-
@enforce_types
-def help(out_dir: Path=archivebox.DATA_DIR) -> None:
+def help(out_dir: Path=DATA_DIR) -> None:
"""Print the ArchiveBox help message and usage"""
all_subcommands = CLI_SUBCOMMANDS
@@ -135,7 +116,7 @@ def help(out_dir: Path=archivebox.DATA_DIR) -> None:
)
- if archivebox.CONSTANTS.DATABASE_FILE.exists():
+ if CONSTANTS.DATABASE_FILE.exists():
print('''{green}ArchiveBox v{}: The self-hosted internet archive.{reset}
{lightred}Active data directory:{reset}
@@ -161,17 +142,17 @@ def help(out_dir: Path=archivebox.DATA_DIR) -> None:
{lightred}Documentation:{reset}
https://github.com/ArchiveBox/ArchiveBox/wiki
-'''.format(VERSION, out_dir, COMMANDS_HELP_TEXT, **ANSI))
+'''.format(VERSION, out_dir, COMMANDS_HELP_TEXT, **SHELL_CONFIG.ANSI))
else:
- print('{green}Welcome to ArchiveBox v{}!{reset}'.format(VERSION, **ANSI))
+ print('{green}Welcome to ArchiveBox v{}!{reset}'.format(VERSION, **SHELL_CONFIG.ANSI))
print()
- if IN_DOCKER:
+ if SHELL_CONFIG.IN_DOCKER:
print('When using Docker, you need to mount a volume to use as your data dir:')
print(' docker run -v /some/path:/data archivebox ...')
print()
print('To import an existing archive (from a previous version of ArchiveBox):')
- print(' 1. cd into your data dir OUTPUT_DIR (usually ArchiveBox/output) and run:')
+ print(' 1. cd into your data dir DATA_DIR (usually ArchiveBox/output) and run:')
print(' 2. archivebox init')
print()
print('To start a new archive:')
@@ -184,10 +165,9 @@ def help(out_dir: Path=archivebox.DATA_DIR) -> None:
@enforce_types
def version(quiet: bool=False,
- out_dir: Path=OUTPUT_DIR) -> None:
+ out_dir: Path=DATA_DIR) -> None:
"""Print the ArchiveBox version and dependency information"""
- from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG, STORAGE_CONFIG, SHELL_CONFIG
from plugins_auth.ldap.apps import LDAP_CONFIG
from django.conf import settings
@@ -202,19 +182,19 @@ def version(quiet: bool=False,
p = platform.uname()
print(
- 'ArchiveBox v{}'.format(archivebox.__version__),
+ 'ArchiveBox v{}'.format(CONSTANTS.VERSION),
f'COMMIT_HASH={SHELL_CONFIG.COMMIT_HASH[:7] if SHELL_CONFIG.COMMIT_HASH else "unknown"}',
f'BUILD_TIME={SHELL_CONFIG.BUILD_TIME}',
)
print(
- f'IN_DOCKER={IN_DOCKER}',
- f'IN_QEMU={IN_QEMU}',
+ f'IN_DOCKER={SHELL_CONFIG.IN_DOCKER}',
+ f'IN_QEMU={SHELL_CONFIG.IN_QEMU}',
f'ARCH={p.machine}',
f'OS={p.system}',
f'PLATFORM={platform.platform()}',
f'PYTHON={sys.implementation.name.title()}',
)
- OUTPUT_IS_REMOTE_FS = CONSTANTS.DATA_LOCATIONS['OUTPUT_DIR']['is_mount'] or CONSTANTS.DATA_LOCATIONS['ARCHIVE_DIR']['is_mount']
+ OUTPUT_IS_REMOTE_FS = CONSTANTS.DATA_LOCATIONS['DATA_DIR']['is_mount'] or CONSTANTS.DATA_LOCATIONS['ARCHIVE_DIR']['is_mount']
print(
f'FS_ATOMIC={STORAGE_CONFIG.ENFORCE_ATOMIC_WRITES}',
f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
@@ -224,14 +204,14 @@ def version(quiet: bool=False,
print(
f'DEBUG={SHELL_CONFIG.DEBUG}',
f'IS_TTY={SHELL_CONFIG.IS_TTY}',
- f'TZ={TIMEZONE}',
+ f'TZ={CONSTANTS.TIMEZONE}',
f'SEARCH_BACKEND={SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}',
f'LDAP={LDAP_CONFIG.LDAP_ENABLED}',
#f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually
)
print()
- print('{white}[i] Old dependency versions:{reset}'.format(**ANSI))
+ print('{white}[i] Old dependency versions:{reset}'.format(**SHELL_CONFIG.ANSI))
for name, dependency in DEPENDENCIES.items():
print(printable_dependency_version(name, dependency))
@@ -240,7 +220,7 @@ def version(quiet: bool=False,
print()
print()
- print('{white}[i] New dependency versions:{reset}'.format(**ANSI))
+ print('{white}[i] New dependency versions:{reset}'.format(**SHELL_CONFIG.ANSI))
for name, binary in settings.BINARIES.items():
err = None
try:
@@ -252,18 +232,18 @@ def version(quiet: bool=False,
print('', '√' if loaded_bin.is_valid else 'X', '', loaded_bin.name.ljust(21), str(loaded_bin.version).ljust(15), loaded_bin.abspath or str(err))
print()
- print('{white}[i] Source-code locations:{reset}'.format(**ANSI))
+ print('{white}[i] Source-code locations:{reset}'.format(**SHELL_CONFIG.ANSI))
for name, path in CONSTANTS.CODE_LOCATIONS.items():
print(printable_folder_status(name, path))
print()
if CONSTANTS.DATABASE_FILE.exists() or CONSTANTS.ARCHIVE_DIR.exists() or CONSTANTS.CONFIG_FILE.exists():
- print('{white}[i] Data locations:{reset}'.format(**ANSI))
+ print('{white}[i] Data locations:{reset}'.format(**SHELL_CONFIG.ANSI))
for name, path in CONSTANTS.DATA_LOCATIONS.items():
print(printable_folder_status(name, path))
else:
print()
- print('{white}[i] Data locations:{reset} (not in a data directory)'.format(**ANSI))
+ print('{white}[i] Data locations:{reset} (not in a data directory)'.format(**SHELL_CONFIG.ANSI))
print()
@@ -272,7 +252,7 @@ def version(quiet: bool=False,
def run(subcommand: str,
subcommand_args: Optional[List[str]],
stdin: Optional[IO]=None,
- out_dir: Path=OUTPUT_DIR) -> None:
+ out_dir: Path=DATA_DIR) -> None:
"""Run a given ArchiveBox subcommand with the given list of args"""
run_subcommand(
subcommand=subcommand,
@@ -283,27 +263,27 @@ def run(subcommand: str,
@enforce_types
-def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=archivebox.DATA_DIR) -> None:
+def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=DATA_DIR) -> None:
"""Initialize a new ArchiveBox collection in the current directory"""
from core.models import Snapshot
out_dir.mkdir(exist_ok=True)
- is_empty = not len(set(os.listdir(out_dir)) - CONSTANTS.ALLOWED_IN_OUTPUT_DIR)
+ is_empty = not len(set(os.listdir(out_dir)) - CONSTANTS.ALLOWED_IN_DATA_DIR)
- if (out_dir / archivebox.CONSTANTS.JSON_INDEX_FILENAME).exists():
+ if (out_dir / CONSTANTS.JSON_INDEX_FILENAME).exists():
stderr("[!] This folder contains a JSON index. It is deprecated, and will no longer be kept up to date automatically.", color="lightyellow")
stderr(" You can run `archivebox list --json --with-headers > static_index.json` to manually generate it.", color="lightyellow")
- existing_index = archivebox.CONSTANTS.DATABASE_FILE.exists()
+ existing_index = CONSTANTS.DATABASE_FILE.exists()
if is_empty and not existing_index:
- print('{green}[+] Initializing a new ArchiveBox v{} collection...{reset}'.format(VERSION, **ANSI))
- print('{green}----------------------------------------------------------------------{reset}'.format(**ANSI))
+ print('{green}[+] Initializing a new ArchiveBox v{} collection...{reset}'.format(VERSION, **SHELL_CONFIG.ANSI))
+ print('{green}----------------------------------------------------------------------{reset}'.format(**SHELL_CONFIG.ANSI))
elif existing_index:
# TODO: properly detect and print the existing version in current index as well
- print('{green}[*] Verifying and updating existing ArchiveBox collection to v{}...{reset}'.format(VERSION, **ANSI))
- print('{green}----------------------------------------------------------------------{reset}'.format(**ANSI))
+ print('{green}[*] Verifying and updating existing ArchiveBox collection to v{}...{reset}'.format(VERSION, **SHELL_CONFIG.ANSI))
+ print('{green}----------------------------------------------------------------------{reset}'.format(**SHELL_CONFIG.ANSI))
else:
if force:
stderr('[!] This folder appears to already have files in it, but no index.sqlite3 is present.', color='lightyellow')
@@ -315,41 +295,41 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
" {lightred}Hint:{reset} To import an existing data folder make sure to cd into the folder first, \n"
" then run and run 'archivebox init' to pick up where you left off.\n\n"
" (Always make sure your data folder is backed up first before updating ArchiveBox)"
- ).format(**ANSI)
+ ).format(**SHELL_CONFIG.ANSI)
)
raise SystemExit(2)
if existing_index:
- print('\n{green}[*] Verifying archive folder structure...{reset}'.format(**ANSI))
+ print('\n{green}[*] Verifying archive folder structure...{reset}'.format(**SHELL_CONFIG.ANSI))
else:
- print('\n{green}[+] Building archive folder structure...{reset}'.format(**ANSI))
+ print('\n{green}[+] Building archive folder structure...{reset}'.format(**SHELL_CONFIG.ANSI))
- print(f' + ./{CONSTANTS.ARCHIVE_DIR.relative_to(OUTPUT_DIR)}, ./{CONSTANTS.SOURCES_DIR.relative_to(OUTPUT_DIR)}, ./{CONSTANTS.LOGS_DIR.relative_to(OUTPUT_DIR)}...')
+ print(f' + ./{CONSTANTS.ARCHIVE_DIR.relative_to(DATA_DIR)}, ./{CONSTANTS.SOURCES_DIR.relative_to(DATA_DIR)}, ./{CONSTANTS.LOGS_DIR.relative_to(DATA_DIR)}...')
Path(CONSTANTS.SOURCES_DIR).mkdir(exist_ok=True)
Path(CONSTANTS.ARCHIVE_DIR).mkdir(exist_ok=True)
Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
- print(f' + ./{CONSTANTS.CONFIG_FILE.relative_to(OUTPUT_DIR)}...')
+ print(f' + ./{CONSTANTS.CONFIG_FILE.relative_to(DATA_DIR)}...')
write_config_file({}, out_dir=out_dir)
if CONSTANTS.DATABASE_FILE.exists():
- print('\n{green}[*] Verifying main SQL index and running any migrations needed...{reset}'.format(**ANSI))
+ print('\n{green}[*] Verifying main SQL index and running any migrations needed...{reset}'.format(**SHELL_CONFIG.ANSI))
else:
- print('\n{green}[+] Building main SQL index and running initial migrations...{reset}'.format(**ANSI))
+ print('\n{green}[+] Building main SQL index and running initial migrations...{reset}'.format(**SHELL_CONFIG.ANSI))
for migration_line in apply_migrations(out_dir):
print(f' {migration_line}')
assert CONSTANTS.DATABASE_FILE.exists()
print()
- print(f' √ ./{CONSTANTS.DATABASE_FILE.relative_to(OUTPUT_DIR)}')
+ print(f' √ ./{CONSTANTS.DATABASE_FILE.relative_to(DATA_DIR)}')
# from django.contrib.auth.models import User
- # if IS_TTY and not User.objects.filter(is_superuser=True).exists():
- # print('{green}[+] Creating admin user account...{reset}'.format(**ANSI))
+ # if SHELL_CONFIG.IS_TTY and not User.objects.filter(is_superuser=True).exists():
+ # print('{green}[+] Creating admin user account...{reset}'.format(**SHELL_CONFIG.ANSI))
# call_command("createsuperuser", interactive=True)
print()
- print('{green}[*] Checking links from indexes and archive folders (safe to Ctrl+C)...{reset}'.format(**ANSI))
+ print('{green}[*] Checking links from indexes and archive folders (safe to Ctrl+C)...{reset}'.format(**SHELL_CONFIG.ANSI))
all_links = Snapshot.objects.none()
pending_links: Dict[str, Link] = {}
@@ -365,9 +345,9 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
# Links in data folders that dont match their timestamp
fixed, cant_fix = fix_invalid_folder_locations(out_dir=out_dir)
if fixed:
- print(' {lightyellow}√ Fixed {} data directory locations that didn\'t match their link timestamps.{reset}'.format(len(fixed), **ANSI))
+ print(' {lightyellow}√ Fixed {} data directory locations that didn\'t match their link timestamps.{reset}'.format(len(fixed), **SHELL_CONFIG.ANSI))
if cant_fix:
- print(' {lightyellow}! Could not fix {} data directory locations due to conflicts with existing folders.{reset}'.format(len(cant_fix), **ANSI))
+ print(' {lightyellow}! Could not fix {} data directory locations due to conflicts with existing folders.{reset}'.format(len(cant_fix), **SHELL_CONFIG.ANSI))
# Links in JSON index but not in main index
orphaned_json_links = {
@@ -377,7 +357,7 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
}
if orphaned_json_links:
pending_links.update(orphaned_json_links)
- print(' {lightyellow}√ Added {} orphaned links from existing JSON index...{reset}'.format(len(orphaned_json_links), **ANSI))
+ print(' {lightyellow}√ Added {} orphaned links from existing JSON index...{reset}'.format(len(orphaned_json_links), **SHELL_CONFIG.ANSI))
# Links in data dir indexes but not in main index
orphaned_data_dir_links = {
@@ -387,7 +367,7 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
}
if orphaned_data_dir_links:
pending_links.update(orphaned_data_dir_links)
- print(' {lightyellow}√ Added {} orphaned links from existing archive directories.{reset}'.format(len(orphaned_data_dir_links), **ANSI))
+ print(' {lightyellow}√ Added {} orphaned links from existing archive directories.{reset}'.format(len(orphaned_data_dir_links), **SHELL_CONFIG.ANSI))
# Links in invalid/duplicate data dirs
invalid_folders = {
@@ -395,10 +375,10 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
for folder, link in get_invalid_folders(all_links, out_dir=out_dir).items()
}
if invalid_folders:
- print(' {lightyellow}! Skipped adding {} invalid link data directories.{reset}'.format(len(invalid_folders), **ANSI))
- print(' X ' + '\n X '.join(f'./{Path(folder).relative_to(OUTPUT_DIR)} {link}' for folder, link in invalid_folders.items()))
+ print(' {lightyellow}! Skipped adding {} invalid link data directories.{reset}'.format(len(invalid_folders), **SHELL_CONFIG.ANSI))
+ print(' X ' + '\n X '.join(f'./{Path(folder).relative_to(DATA_DIR)} {link}' for folder, link in invalid_folders.items()))
print()
- print(' {lightred}Hint:{reset} For more information about the link data directories that were skipped, run:'.format(**ANSI))
+ print(' {lightred}Hint:{reset} For more information about the link data directories that were skipped, run:'.format(**SHELL_CONFIG.ANSI))
print(' archivebox status')
print(' archivebox list --status=invalid')
@@ -407,28 +387,27 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
stderr('[x] Stopped checking archive directories due to Ctrl-C/SIGTERM', color='red')
stderr(' Your archive data is safe, but you should re-run `archivebox init` to finish the process later.')
stderr()
- stderr(' {lightred}Hint:{reset} In the future you can run a quick init without checking dirs like so:'.format(**ANSI))
+ stderr(' {lightred}Hint:{reset} In the future you can run a quick init without checking dirs like so:'.format(**SHELL_CONFIG.ANSI))
stderr(' archivebox init --quick')
raise SystemExit(1)
write_main_index(list(pending_links.values()), out_dir=out_dir)
- print('\n{green}----------------------------------------------------------------------{reset}'.format(**ANSI))
+ print('\n{green}----------------------------------------------------------------------{reset}'.format(**SHELL_CONFIG.ANSI))
from django.contrib.auth.models import User
- from plugins_sys.config.apps import SERVER_CONFIG
if (SERVER_CONFIG.ADMIN_USERNAME and SERVER_CONFIG.ADMIN_PASSWORD) and not User.objects.filter(username=SERVER_CONFIG.ADMIN_USERNAME).exists():
- print('{green}[+] Found ADMIN_USERNAME and ADMIN_PASSWORD configuration options, creating new admin user.{reset}'.format(**ANSI))
+ print('{green}[+] Found ADMIN_USERNAME and ADMIN_PASSWORD configuration options, creating new admin user.{reset}'.format(**SHELL_CONFIG.ANSI))
User.objects.create_superuser(username=SERVER_CONFIG.ADMIN_USERNAME, password=SERVER_CONFIG.ADMIN_PASSWORD)
if existing_index:
- print('{green}[√] Done. Verified and updated the existing ArchiveBox collection.{reset}'.format(**ANSI))
+ print('{green}[√] Done. Verified and updated the existing ArchiveBox collection.{reset}'.format(**SHELL_CONFIG.ANSI))
else:
- print('{green}[√] Done. A new ArchiveBox collection was initialized ({} links).{reset}'.format(len(all_links) + len(pending_links), **ANSI))
+ print('{green}[√] Done. A new ArchiveBox collection was initialized ({} links).{reset}'.format(len(all_links) + len(pending_links), **SHELL_CONFIG.ANSI))
- json_index = out_dir / JSON_INDEX_FILENAME
- html_index = out_dir / HTML_INDEX_FILENAME
+ json_index = out_dir / CONSTANTS.JSON_INDEX_FILENAME
+ html_index = out_dir / CONSTANTS.HTML_INDEX_FILENAME
index_name = f"{date.today()}_index_old"
if json_index.exists():
json_index.rename(f"{index_name}.json")
@@ -440,7 +419,7 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
if Snapshot.objects.count() < 25: # hide the hints for experienced users
print()
- print(' {lightred}Hint:{reset} To view your archive index, run:'.format(**ANSI))
+ print(' {lightred}Hint:{reset} To view your archive index, run:'.format(**SHELL_CONFIG.ANSI))
print(' archivebox server # then visit http://127.0.0.1:8000')
print()
print(' To add new links, you can run:')
@@ -450,7 +429,7 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
print(' archivebox help')
@enforce_types
-def status(out_dir: Path=OUTPUT_DIR) -> None:
+def status(out_dir: Path=DATA_DIR) -> None:
"""Print out some info and statistics about the archive collection"""
check_data_folder(CONFIG)
@@ -459,8 +438,8 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
from django.contrib.auth import get_user_model
User = get_user_model()
- print('{green}[*] Scanning archive main index...{reset}'.format(**ANSI))
- print(ANSI['lightyellow'], f' {out_dir}/*', ANSI['reset'])
+ print('{green}[*] Scanning archive main index...{reset}'.format(**SHELL_CONFIG.ANSI))
+ print(SHELL_CONFIG.ANSI['lightyellow'], f' {out_dir}/*', SHELL_CONFIG.ANSI['reset'])
num_bytes, num_dirs, num_files = get_dir_size(out_dir, recursive=False, pattern='index.')
size = printable_filesize(num_bytes)
print(f' Index size: {size} across {num_files} files')
@@ -469,15 +448,15 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
links = load_main_index(out_dir=out_dir)
num_sql_links = links.count()
num_link_details = sum(1 for link in parse_json_links_details(out_dir=out_dir))
- print(f' > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {SQL_INDEX_FILENAME})')
+ print(f' > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {CONSTANTS.SQL_INDEX_FILENAME})')
print(f' > JSON Link Details: {num_link_details} links'.ljust(36), f'(found in {ARCHIVE_DIR.name}/*/index.json)')
print()
- print('{green}[*] Scanning archive data directories...{reset}'.format(**ANSI))
- print(ANSI['lightyellow'], f' {ARCHIVE_DIR}/*', ANSI['reset'])
+ print('{green}[*] Scanning archive data directories...{reset}'.format(**SHELL_CONFIG.ANSI))
+ print(SHELL_CONFIG.ANSI['lightyellow'], f' {ARCHIVE_DIR}/*', SHELL_CONFIG.ANSI['reset'])
num_bytes, num_dirs, num_files = get_dir_size(ARCHIVE_DIR)
size = printable_filesize(num_bytes)
print(f' Size: {size} across {num_files} files in {num_dirs} directories')
- print(ANSI['black'])
+ print(SHELL_CONFIG.ANSI['black'])
num_indexed = len(get_indexed_folders(links, out_dir=out_dir))
num_archived = len(get_archived_folders(links, out_dir=out_dir))
num_unarchived = len(get_unarchived_folders(links, out_dir=out_dir))
@@ -502,23 +481,23 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
print(f' > corrupted: {len(corrupted)}'.ljust(36), f'({get_corrupted_folders.__doc__})')
print(f' > unrecognized: {len(unrecognized)}'.ljust(36), f'({get_unrecognized_folders.__doc__})')
- print(ANSI['reset'])
+ print(SHELL_CONFIG.ANSI['reset'])
if num_indexed:
- print(' {lightred}Hint:{reset} You can list link data directories by status like so:'.format(**ANSI))
+ print(' {lightred}Hint:{reset} You can list link data directories by status like so:'.format(**SHELL_CONFIG.ANSI))
print(' archivebox list --status= (e.g. indexed, corrupted, archived, etc.)')
if orphaned:
- print(' {lightred}Hint:{reset} To automatically import orphaned data directories into the main index, run:'.format(**ANSI))
+ print(' {lightred}Hint:{reset} To automatically import orphaned data directories into the main index, run:'.format(**SHELL_CONFIG.ANSI))
print(' archivebox init')
if num_invalid:
- print(' {lightred}Hint:{reset} You may need to manually remove or fix some invalid data directories, afterwards make sure to run:'.format(**ANSI))
+ print(' {lightred}Hint:{reset} You may need to manually remove or fix some invalid data directories, afterwards make sure to run:'.format(**SHELL_CONFIG.ANSI))
print(' archivebox init')
print()
- print('{green}[*] Scanning recent archive changes and user logins:{reset}'.format(**ANSI))
- print(ANSI['lightyellow'], f' {CONSTANTS.LOGS_DIR}/*', ANSI['reset'])
+ print('{green}[*] Scanning recent archive changes and user logins:{reset}'.format(**SHELL_CONFIG.ANSI))
+ print(SHELL_CONFIG.ANSI['lightyellow'], f' {CONSTANTS.LOGS_DIR}/*', SHELL_CONFIG.ANSI['reset'])
users = get_admins().values_list('username', flat=True)
print(f' UI users {len(users)}: {", ".join(users)}')
last_login = User.objects.order_by('last_login').last()
@@ -530,7 +509,7 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
if not users:
print()
- print(' {lightred}Hint:{reset} You can create an admin user by running:'.format(**ANSI))
+ print(' {lightred}Hint:{reset} You can create an admin user by running:'.format(**SHELL_CONFIG.ANSI))
print(' archivebox manage createsuperuser')
print()
@@ -538,19 +517,19 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
if not snapshot.downloaded_at:
continue
print(
- ANSI['black'],
+ SHELL_CONFIG.ANSI['black'],
(
f' > {str(snapshot.downloaded_at)[:16]} '
f'[{snapshot.num_outputs} {("X", "√")[snapshot.is_archived]} {printable_filesize(snapshot.archive_size)}] '
f'"{snapshot.title}": {snapshot.url}'
)[:SHELL_CONFIG.TERM_WIDTH],
- ANSI['reset'],
+ SHELL_CONFIG.ANSI['reset'],
)
- print(ANSI['black'], ' ...', ANSI['reset'])
+ print(SHELL_CONFIG.ANSI['black'], ' ...', SHELL_CONFIG.ANSI['reset'])
@enforce_types
-def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR, created_by_id: int | None=None) -> List[Link]:
+def oneshot(url: str, extractors: str="", out_dir: Path=DATA_DIR, created_by_id: int | None=None) -> List[Link]:
"""
Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
You can run this to archive single pages without needing to create a whole collection with archivebox init.
@@ -571,7 +550,7 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR, created_by_i
def add(urls: Union[str, List[str]],
tag: str='',
depth: int=0,
- update: bool=not ONLY_NEW,
+ update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
update_all: bool=False,
index_only: bool=False,
overwrite: bool=False,
@@ -580,7 +559,7 @@ def add(urls: Union[str, List[str]],
extractors: str="",
parser: str="auto",
created_by_id: int | None=None,
- out_dir: Path=OUTPUT_DIR) -> List[Link]:
+ out_dir: Path=DATA_DIR) -> List[Link]:
"""Add a new URL or list of URLs to your archive"""
from core.models import Snapshot, Tag
@@ -693,7 +672,7 @@ def remove(filter_str: Optional[str]=None,
before: Optional[float]=None,
yes: bool=False,
delete: bool=False,
- out_dir: Path=OUTPUT_DIR) -> List[Link]:
+ out_dir: Path=DATA_DIR) -> List[Link]:
"""Remove the specified URLs from the archive"""
check_data_folder(CONFIG)
@@ -767,7 +746,7 @@ def remove(filter_str: Optional[str]=None,
@enforce_types
def update(resume: Optional[float]=None,
- only_new: bool=ONLY_NEW,
+ only_new: bool=ARCHIVING_CONFIG.ONLY_NEW,
index_only: bool=False,
overwrite: bool=False,
filter_patterns_str: Optional[str]=None,
@@ -777,7 +756,7 @@ def update(resume: Optional[float]=None,
after: Optional[str]=None,
before: Optional[str]=None,
extractors: str="",
- out_dir: Path=OUTPUT_DIR) -> List[Link]:
+ out_dir: Path=DATA_DIR) -> List[Link]:
"""Import any new links from subscriptions and retry any previously failed/skipped links"""
from core.models import ArchiveResult
@@ -853,7 +832,7 @@ def list_all(filter_patterns_str: Optional[str]=None,
json: bool=False,
html: bool=False,
with_headers: bool=False,
- out_dir: Path=OUTPUT_DIR) -> Iterable[Link]:
+ out_dir: Path=DATA_DIR) -> Iterable[Link]:
"""List, filter, and export information about archive entries"""
check_data_folder(CONFIG)
@@ -902,7 +881,7 @@ def list_links(snapshots: Optional[QuerySet]=None,
filter_type: str='exact',
after: Optional[float]=None,
before: Optional[float]=None,
- out_dir: Path=OUTPUT_DIR) -> Iterable[Link]:
+ out_dir: Path=DATA_DIR) -> Iterable[Link]:
check_data_folder(CONFIG)
@@ -926,7 +905,7 @@ def list_links(snapshots: Optional[QuerySet]=None,
@enforce_types
def list_folders(links: List[Link],
status: str,
- out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
+ out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
check_data_folder(CONFIG)
@@ -949,7 +928,7 @@ def list_folders(links: List[Link],
raise ValueError('Status not recognized.')
@enforce_types
-def setup(out_dir: Path=OUTPUT_DIR) -> None:
+def setup(out_dir: Path=DATA_DIR) -> None:
"""Automatically install all ArchiveBox dependencies and extras"""
from rich import print
@@ -996,7 +975,7 @@ def config(config_options_str: Optional[str]=None,
get: bool=False,
set: bool=False,
reset: bool=False,
- out_dir: Path=OUTPUT_DIR) -> None:
+ out_dir: Path=DATA_DIR) -> None:
"""Get and set your ArchiveBox project configuration values"""
check_data_folder(CONFIG)
@@ -1014,7 +993,7 @@ def config(config_options_str: Optional[str]=None,
no_args = not (get or set or reset or config_options)
- matching_config: ConfigDict = {}
+ matching_config = {}
if get or no_args:
if config_options:
config_options = [get_real_name(key) for key in config_options]
@@ -1054,11 +1033,11 @@ def config(config_options_str: Optional[str]=None,
if new_config:
before = CONFIG
- matching_config = write_config_file(new_config, out_dir=OUTPUT_DIR)
+ matching_config = write_config_file(new_config, out_dir=DATA_DIR)
after = load_all_config()
print(printable_config(matching_config))
- side_effect_changes: ConfigDict = {}
+ side_effect_changes = {}
for key, val in after.items():
if key in USER_CONFIG and (before[key] != after[key]) and (key not in matching_config):
side_effect_changes[key] = after[key]
@@ -1095,14 +1074,13 @@ def schedule(add: bool=False,
tag: str='',
depth: int=0,
overwrite: bool=False,
- update: bool=not ONLY_NEW,
+ update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
import_path: Optional[str]=None,
- out_dir: Path=OUTPUT_DIR):
+ out_dir: Path=DATA_DIR):
"""Set ArchiveBox to regularly import URLs at specific times using cron"""
check_data_folder(CONFIG)
from plugins_pkg.pip.apps import ARCHIVEBOX_BINARY
- from plugins_sys.config.apps import SHELL_CONFIG, CONSTANTS
Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
@@ -1222,7 +1200,7 @@ def server(runserver_args: Optional[List[str]]=None,
init: bool=False,
quick_init: bool=False,
createsuperuser: bool=False,
- out_dir: Path=OUTPUT_DIR) -> None:
+ out_dir: Path=DATA_DIR) -> None:
"""Run the ArchiveBox HTTP server"""
runserver_args = runserver_args or []
@@ -1238,10 +1216,6 @@ def server(runserver_args: Optional[List[str]]=None,
run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
print()
- # setup config for django runserver
- from . import config
- config.SHOW_PROGRESS = False
- config.DEBUG = config.DEBUG or debug
check_data_folder(CONFIG)
@@ -1250,20 +1224,17 @@ def server(runserver_args: Optional[List[str]]=None,
- print('{green}[+] Starting ArchiveBox webserver... {reset}'.format(**ANSI))
+ print('{green}[+] Starting ArchiveBox webserver... {reset}'.format(**SHELL_CONFIG.ANSI))
print(' > Logging errors to ./logs/errors.log')
if not User.objects.filter(is_superuser=True).exists():
- print('{lightyellow}[!] No admin users exist yet, you will not be able to edit links in the UI.{reset}'.format(**ANSI))
+ print('{lightyellow}[!] No admin users exist yet, you will not be able to edit links in the UI.{reset}'.format(**SHELL_CONFIG.ANSI))
print()
print(' To create an admin user, run:')
print(' archivebox manage createsuperuser')
print()
- # toggle autoreloading when archivebox code changes
- config.SHOW_PROGRESS = False
- config.DEBUG = config.DEBUG or debug
- if debug:
+ if SHELL_CONFIG.DEBUG:
if not reload:
runserver_args.append('--noreload') # '--insecure'
call_command("runserver", *runserver_args)
@@ -1295,13 +1266,13 @@ def server(runserver_args: Optional[List[str]]=None,
@enforce_types
-def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None:
+def manage(args: Optional[List[str]]=None, out_dir: Path=DATA_DIR) -> None:
"""Run an ArchiveBox Django management command"""
check_data_folder(CONFIG)
from django.core.management import execute_from_command_line
- if (args and "createsuperuser" in args) and (IN_DOCKER and not IS_TTY):
+ if (args and "createsuperuser" in args) and (SHELL_CONFIG.IN_DOCKER and not SHELL_CONFIG.IS_TTY):
stderr('[!] Warning: you need to pass -it to use interactive commands in docker', color='lightyellow')
stderr(' docker run -it archivebox manage {}'.format(' '.join(args or ['...'])), color='lightyellow')
stderr('')
@@ -1312,7 +1283,7 @@ def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None:
@enforce_types
-def shell(out_dir: Path=OUTPUT_DIR) -> None:
+def shell(out_dir: Path=DATA_DIR) -> None:
"""Enter an interactive ArchiveBox Django shell"""
check_data_folder(CONFIG)
diff --git a/archivebox/misc/checks.py b/archivebox/misc/checks.py
index c4d3db79..69e0c52c 100644
--- a/archivebox/misc/checks.py
+++ b/archivebox/misc/checks.py
@@ -2,45 +2,42 @@ __package__ = 'archivebox.misc'
from benedict import benedict
-import archivebox
+from archivebox.config import DATA_DIR, ARCHIVE_DIR, CONSTANTS, SHELL_CONFIG
-from .logging import stderr, ANSI
+from .logging import stderr
def check_data_folder(config: benedict) -> None:
- output_dir = archivebox.DATA_DIR
- archive_dir_exists = (archivebox.CONSTANTS.ARCHIVE_DIR).exists()
+ archive_dir_exists = ARCHIVE_DIR.exists()
if not archive_dir_exists:
stderr('[X] No archivebox index found in the current directory.', color='red')
- stderr(f' {output_dir}', color='lightyellow')
+ stderr(f' {DATA_DIR}', color='lightyellow')
stderr()
- stderr(' {lightred}Hint{reset}: Are you running archivebox in the right folder?'.format(**ANSI))
+ stderr(' {lightred}Hint{reset}: Are you running archivebox in the right folder?'.format(**SHELL_CONFIG.ANSI))
stderr(' cd path/to/your/archive/folder')
stderr(' archivebox [command]')
stderr()
- stderr(' {lightred}Hint{reset}: To create a new archive collection or import existing data in this folder, run:'.format(**ANSI))
+ stderr(' {lightred}Hint{reset}: To create a new archive collection or import existing data in this folder, run:'.format(**SHELL_CONFIG.ANSI))
stderr(' archivebox init')
raise SystemExit(2)
def check_migrations(config: benedict):
- output_dir = archivebox.DATA_DIR
-
from ..index.sql import list_migrations
pending_migrations = [name for status, name in list_migrations() if not status]
if pending_migrations:
stderr('[X] This collection was created with an older version of ArchiveBox and must be upgraded first.', color='lightyellow')
- stderr(f' {output_dir}')
+ stderr(f' {DATA_DIR}')
stderr()
stderr(f' To upgrade it to the latest version and apply the {len(pending_migrations)} pending migrations, run:')
stderr(' archivebox init')
raise SystemExit(3)
- archivebox.CONSTANTS.SOURCES_DIR.mkdir(exist_ok=True)
- archivebox.CONSTANTS.LOGS_DIR.mkdir(exist_ok=True)
- archivebox.CONSTANTS.CACHE_DIR.mkdir(exist_ok=True)
- (archivebox.CONSTANTS.LIB_DIR / 'bin').mkdir(exist_ok=True, parents=True)
- (archivebox.CONSTANTS.PERSONAS_DIR / 'Default').mkdir(exist_ok=True, parents=True)
+ CONSTANTS.SOURCES_DIR.mkdir(exist_ok=True)
+ CONSTANTS.LOGS_DIR.mkdir(exist_ok=True)
+ CONSTANTS.CACHE_DIR.mkdir(exist_ok=True)
+ (CONSTANTS.LIB_DIR / 'bin').mkdir(exist_ok=True, parents=True)
+ (CONSTANTS.PERSONAS_DIR / 'Default').mkdir(exist_ok=True, parents=True)
diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py
index 99cd690d..9464c53b 100644
--- a/archivebox/parsers/__init__.py
+++ b/archivebox/parsers/__init__.py
@@ -14,7 +14,7 @@ from datetime import datetime, timezone
from pathlib import Path
from ..system import atomic_write
-from ..config import (
+from ..config.legacy import (
ANSI,
OUTPUT_DIR,
SOURCES_DIR_NAME,
diff --git a/archivebox/parsers/pocket_api.py b/archivebox/parsers/pocket_api.py
index 429f4a9d..10daf54e 100644
--- a/archivebox/parsers/pocket_api.py
+++ b/archivebox/parsers/pocket_api.py
@@ -2,24 +2,25 @@ __package__ = 'archivebox.parsers'
import re
-import archivebox
from typing import IO, Iterable, Optional
from configparser import ConfigParser
from pocket import Pocket
+from archivebox.config import CONSTANTS
+
from ..index.schema import Link
from ..util import enforce_types
from ..system import atomic_write
-from ..config import (
+from ..config.legacy import (
POCKET_CONSUMER_KEY,
POCKET_ACCESS_TOKENS,
)
COUNT_PER_PAGE = 500
-API_DB_PATH = archivebox.DATA_DIR / 'sources' / 'pocket_api.db'
+API_DB_PATH = CONSTANTS.SOURCES_DIR / 'pocket_api.db'
# search for broken protocols that sometimes come from the Pocket API
_BROKEN_PROTOCOL_RE = re.compile('^(http[s]?)(:/(?!/))')
diff --git a/archivebox/parsers/readwise_reader_api.py b/archivebox/parsers/readwise_reader_api.py
index b676dfe8..c12bdc24 100644
--- a/archivebox/parsers/readwise_reader_api.py
+++ b/archivebox/parsers/readwise_reader_api.py
@@ -3,19 +3,20 @@ __package__ = "archivebox.parsers"
import re
import requests
-import archivebox
from datetime import datetime
from typing import IO, Iterable, Optional
from configparser import ConfigParser
+from archivebox.config import CONSTANTS
+
from ..index.schema import Link
from ..util import enforce_types
from ..system import atomic_write
-from ..config import READWISE_READER_TOKENS
+from ..config.legacy import READWISE_READER_TOKENS
-API_DB_PATH = archivebox.DATA_DIR / "sources" / "readwise_reader_api.db"
+API_DB_PATH = CONSTANTS.SOURCES_DIR / "readwise_reader_api.db"
class ReadwiseReaderAPI:
diff --git a/archivebox/plugins_extractor/chrome/apps.py b/archivebox/plugins_extractor/chrome/apps.py
index 8b08ae30..35a0f77b 100644
--- a/archivebox/plugins_extractor/chrome/apps.py
+++ b/archivebox/plugins_extractor/chrome/apps.py
@@ -5,8 +5,6 @@ import platform
from pathlib import Path
from typing import List, Optional, Dict, ClassVar
-from django.conf import settings
-
# Depends on other PyPI/vendor packages:
from rich import print
from pydantic import InstanceOf, Field, model_validator
@@ -18,8 +16,6 @@ from pydantic_pkgr import (
bin_abspath,
)
-import archivebox
-
# Depends on other Django apps:
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
@@ -29,7 +25,7 @@ from abx.archivebox.base_binary import BaseBinary, env
from abx.archivebox.base_hook import BaseHook
# Depends on Other Plugins:
-from plugins_sys.config.apps import ARCHIVING_CONFIG, SHELL_CONFIG
+from archivebox.config import CONSTANTS, ARCHIVING_CONFIG, SHELL_CONFIG
from plugins_pkg.puppeteer.apps import PUPPETEER_BINPROVIDER
from plugins_pkg.playwright.apps import PLAYWRIGHT_BINPROVIDER
@@ -217,7 +213,7 @@ class ChromeBinary(BaseBinary):
}
@staticmethod
- def symlink_to_lib(binary, bin_dir=archivebox.CONSTANTS.LIB_BIN_DIR) -> None:
+ def symlink_to_lib(binary, bin_dir=CONSTANTS.LIB_BIN_DIR) -> None:
if not (binary.abspath and binary.abspath.exists()):
return
diff --git a/archivebox/plugins_extractor/readability/apps.py b/archivebox/plugins_extractor/readability/apps.py
index 14b0a3a6..5af8de7a 100644
--- a/archivebox/plugins_extractor/readability/apps.py
+++ b/archivebox/plugins_extractor/readability/apps.py
@@ -18,7 +18,7 @@ from abx.archivebox.base_extractor import BaseExtractor
from abx.archivebox.base_hook import BaseHook
# Depends on Other Plugins:
-from plugins_sys.config.apps import ARCHIVING_CONFIG
+from archivebox.config import ARCHIVING_CONFIG
from plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
###################### Config ##########################
diff --git a/archivebox/plugins_extractor/singlefile/apps.py b/archivebox/plugins_extractor/singlefile/apps.py
index e737e87a..cabfe67f 100644
--- a/archivebox/plugins_extractor/singlefile/apps.py
+++ b/archivebox/plugins_extractor/singlefile/apps.py
@@ -19,7 +19,7 @@ from abx.archivebox.base_queue import BaseQueue
from abx.archivebox.base_hook import BaseHook
# Depends on Other Plugins:
-from plugins_sys.config.apps import ARCHIVING_CONFIG
+from archivebox.config import ARCHIVING_CONFIG
from plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
###################### Config ##########################
diff --git a/archivebox/plugins_extractor/ytdlp/apps.py b/archivebox/plugins_extractor/ytdlp/apps.py
index e6355103..fdab408f 100644
--- a/archivebox/plugins_extractor/ytdlp/apps.py
+++ b/archivebox/plugins_extractor/ytdlp/apps.py
@@ -12,7 +12,7 @@ from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
from abx.archivebox.base_hook import BaseHook
-from plugins_sys.config.apps import ARCHIVING_CONFIG
+from archivebox.config import ARCHIVING_CONFIG
from plugins_pkg.pip.apps import pip
###################### Config ##########################
diff --git a/archivebox/plugins_pkg/npm/apps.py b/archivebox/plugins_pkg/npm/apps.py
index 31e92c4f..5923b9e6 100644
--- a/archivebox/plugins_pkg/npm/apps.py
+++ b/archivebox/plugins_pkg/npm/apps.py
@@ -1,16 +1,14 @@
-__package__ = 'archivebox.plugins_pkg.npm'
-
-import archivebox
+__package__ = 'plugins_pkg.npm'
from pathlib import Path
from typing import List, Optional
-from django.conf import settings
-
from pydantic import InstanceOf, model_validator
from pydantic_pkgr import BinProvider, NpmProvider, BinName, PATHStr, BinProviderName
+from archivebox.config import DATA_DIR, CONSTANTS
+
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet
from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env, apt, brew
@@ -36,8 +34,8 @@ DEFAULT_GLOBAL_CONFIG = {
NPM_CONFIG = NpmDependencyConfigs(**DEFAULT_GLOBAL_CONFIG)
-OLD_NODE_BIN_PATH = archivebox.DATA_DIR / 'node_modules' / '.bin'
-NEW_NODE_BIN_PATH = archivebox.CONSTANTS.LIB_NPM_DIR / 'node_modules' / '.bin'
+OLD_NODE_BIN_PATH = DATA_DIR / 'node_modules' / '.bin'
+NEW_NODE_BIN_PATH = CONSTANTS.LIB_NPM_DIR / 'node_modules' / '.bin'
class SystemNpmProvider(NpmProvider, BaseBinProvider):
name: BinProviderName = "sys_npm"
@@ -48,7 +46,7 @@ class LibNpmProvider(NpmProvider, BaseBinProvider):
name: BinProviderName = "lib_npm"
PATH: PATHStr = str(OLD_NODE_BIN_PATH)
- npm_prefix: Optional[Path] = archivebox.CONSTANTS.LIB_NPM_DIR
+ npm_prefix: Optional[Path] = CONSTANTS.LIB_NPM_DIR
@model_validator(mode='after')
def validate_path(self):
diff --git a/archivebox/plugins_pkg/pip/apps.py b/archivebox/plugins_pkg/pip/apps.py
index 78021c48..d7596aec 100644
--- a/archivebox/plugins_pkg/pip/apps.py
+++ b/archivebox/plugins_pkg/pip/apps.py
@@ -3,18 +3,19 @@ __package__ = 'archivebox.plugins_pkg.pip'
import os
import sys
import inspect
-import archivebox
from pathlib import Path
from typing import List, Dict, Optional, ClassVar
from pydantic import InstanceOf, Field, model_validator
-import abx
import django
from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type]
from django.core.checks import Error, Tags
-
from pydantic_pkgr import BinProvider, PipProvider, BinName, BinProviderName, ProviderLookupDict, SemVer
+
+from archivebox.config import CONSTANTS, VERSION
+
+import abx
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_check import BaseCheck
@@ -70,7 +71,7 @@ class LibPipBinProvider(PipProvider, BaseBinProvider):
name: BinProviderName = "lib_pip"
INSTALLER_BIN: BinName = "pip"
- pip_venv: Optional[Path] = archivebox.CONSTANTS.LIB_PIP_DIR / 'venv'
+ pip_venv: Optional[Path] = CONSTANTS.LIB_PIP_DIR / 'venv'
SYS_PIP_BINPROVIDER = SystemPipBinProvider()
PIPX_PIP_BINPROVIDER = SystemPipxBinProvider()
@@ -84,10 +85,10 @@ class ArchiveboxBinary(BaseBinary):
binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
- VENV_PIP_BINPROVIDER.name: {'packages': lambda: [], 'version': lambda: archivebox.__version__},
- SYS_PIP_BINPROVIDER.name: {'packages': lambda: [], 'version': lambda: archivebox.__version__},
- apt.name: {'packages': lambda: [], 'version': lambda: archivebox.__version__},
- brew.name: {'packages': lambda: [], 'version': lambda: archivebox.__version__},
+ VENV_PIP_BINPROVIDER.name: {'packages': lambda: [], 'version': lambda: VERSION},
+ SYS_PIP_BINPROVIDER.name: {'packages': lambda: [], 'version': lambda: VERSION},
+ apt.name: {'packages': lambda: [], 'version': lambda: VERSION},
+ brew.name: {'packages': lambda: [], 'version': lambda: VERSION},
}
ARCHIVEBOX_BINARY = ArchiveboxBinary()
diff --git a/archivebox/plugins_pkg/playwright/apps.py b/archivebox/plugins_pkg/playwright/apps.py
index 8c01c997..1cb5d765 100644
--- a/archivebox/plugins_pkg/playwright/apps.py
+++ b/archivebox/plugins_pkg/playwright/apps.py
@@ -2,8 +2,6 @@ import platform
from pathlib import Path
from typing import List, Optional, Dict, ClassVar
-from django.conf import settings
-
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, computed_field, Field
from pydantic_pkgr import (
@@ -19,7 +17,7 @@ from pydantic_pkgr import (
DEFAULT_ENV_PATH,
)
-import archivebox
+from archivebox.config import CONSTANTS
# Depends on other Django apps:
from abx.archivebox.base_plugin import BasePlugin
@@ -47,7 +45,7 @@ class PlaywrightConfigs(BaseConfigSet):
PLAYWRIGHT_CONFIG = PlaywrightConfigs()
-LIB_DIR_BROWSERS = archivebox.CONSTANTS.LIB_BROWSERS_DIR
+LIB_DIR_BROWSERS = CONSTANTS.LIB_BROWSERS_DIR
@@ -65,7 +63,7 @@ class PlaywrightBinProvider(BaseBinProvider):
name: BinProviderName = "playwright"
INSTALLER_BIN: BinName = PLAYWRIGHT_BINARY.name
- PATH: PATHStr = f"{archivebox.CONSTANTS.LIB_BIN_DIR}:{DEFAULT_ENV_PATH}"
+ PATH: PATHStr = f"{CONSTANTS.LIB_BIN_DIR}:{DEFAULT_ENV_PATH}"
puppeteer_browsers_dir: Optional[Path] = (
Path("~/Library/Caches/ms-playwright").expanduser() # macos playwright cache dir
diff --git a/archivebox/plugins_pkg/puppeteer/apps.py b/archivebox/plugins_pkg/puppeteer/apps.py
index f2d4adf0..8314fb5a 100644
--- a/archivebox/plugins_pkg/puppeteer/apps.py
+++ b/archivebox/plugins_pkg/puppeteer/apps.py
@@ -2,8 +2,6 @@ import platform
from pathlib import Path
from typing import List, Optional, Dict, ClassVar
-from django.conf import settings
-
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, Field
from pydantic_pkgr import (
@@ -16,7 +14,7 @@ from pydantic_pkgr import (
HostBinPath,
)
-import archivebox
+from archivebox.config import CONSTANTS
# Depends on other Django apps:
from abx.archivebox.base_plugin import BasePlugin
@@ -45,7 +43,7 @@ class PuppeteerConfigs(BaseConfigSet):
PUPPETEER_CONFIG = PuppeteerConfigs()
-LIB_DIR_BROWSERS = archivebox.CONSTANTS.LIB_BROWSERS_DIR
+LIB_DIR_BROWSERS = CONSTANTS.LIB_BROWSERS_DIR
class PuppeteerBinary(BaseBinary):
@@ -61,7 +59,7 @@ class PuppeteerBinProvider(BaseBinProvider):
name: BinProviderName = "puppeteer"
INSTALLER_BIN: BinName = "npx"
- PATH: PATHStr = str(archivebox.CONSTANTS.LIB_BIN_DIR)
+ PATH: PATHStr = str(CONSTANTS.LIB_BIN_DIR)
puppeteer_browsers_dir: Optional[Path] = LIB_DIR_BROWSERS
puppeteer_install_args: List[str] = ["@puppeteer/browsers", "install", "--path", str(LIB_DIR_BROWSERS)]
@@ -140,7 +138,7 @@ PUPPETEER_BINPROVIDER = PuppeteerBinProvider()
# ALTERNATIVE INSTALL METHOD using Ansible:
# install_playbook = self.plugin_dir / 'install_puppeteer.yml'
-# chrome_bin = run_playbook(install_playbook, data_dir=archivebox.DATA_DIR, quiet=quiet).BINARIES.chrome
+# chrome_bin = run_playbook(install_playbook, data_dir=DATA_DIR, quiet=quiet).BINARIES.chrome
# return self.__class__.model_validate(
# {
# **self.model_dump(),
diff --git a/archivebox/plugins_search/ripgrep/apps.py b/archivebox/plugins_search/ripgrep/apps.py
index 0e597f8e..1d44d84b 100644
--- a/archivebox/plugins_search/ripgrep/apps.py
+++ b/archivebox/plugins_search/ripgrep/apps.py
@@ -6,8 +6,6 @@ from subprocess import run
from typing import List, Dict, ClassVar, Iterable
# from typing_extensions import Self
-import archivebox
-
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, Field
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName
@@ -20,7 +18,7 @@ from abx.archivebox.base_hook import BaseHook
from abx.archivebox.base_searchbackend import BaseSearchBackend
# Depends on Other Plugins:
-from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG
+from archivebox.config import CONSTANTS, SEARCH_BACKEND_CONFIG
###################### Config ##########################
@@ -38,7 +36,7 @@ class RipgrepConfig(BaseConfigSet):
'--files-with-matches',
'--regexp',
])
- RIPGREP_SEARCH_DIR: Path = archivebox.CONSTANTS.ARCHIVE_DIR
+ RIPGREP_SEARCH_DIR: Path = CONSTANTS.ARCHIVE_DIR
RIPGREP_CONFIG = RipgrepConfig()
diff --git a/archivebox/plugins_search/sonic/apps.py b/archivebox/plugins_search/sonic/apps.py
index 5bf37044..97f7b816 100644
--- a/archivebox/plugins_search/sonic/apps.py
+++ b/archivebox/plugins_search/sonic/apps.py
@@ -1,11 +1,8 @@
__package__ = 'archivebox.plugins_search.sonic'
-import os
import sys
from typing import List, Dict, ClassVar, Generator, cast
-from django.conf import settings
-
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, Field, model_validator
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName
@@ -18,7 +15,7 @@ from abx.archivebox.base_hook import BaseHook
from abx.archivebox.base_searchbackend import BaseSearchBackend
# Depends on Other Plugins:
-from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG
+from archivebox.config import SEARCH_BACKEND_CONFIG
SONIC_LIB = None
try:
diff --git a/archivebox/plugins_search/sqlite/apps.py b/archivebox/plugins_search/sqlite/apps.py
index fe5949f6..28209b0f 100644
--- a/archivebox/plugins_search/sqlite/apps.py
+++ b/archivebox/plugins_search/sqlite/apps.py
@@ -17,7 +17,7 @@ from abx.archivebox.base_hook import BaseHook
from abx.archivebox.base_searchbackend import BaseSearchBackend
# Depends on Other Plugins:
-from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG
+from archivebox.config import SEARCH_BACKEND_CONFIG
diff --git a/archivebox/plugins_sys/config/__init__.py b/archivebox/plugins_sys/config/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/archivebox/plugins_sys/config/constants.py b/archivebox/plugins_sys/config/constants.py
deleted file mode 100644
index 7a5c63e0..00000000
--- a/archivebox/plugins_sys/config/constants.py
+++ /dev/null
@@ -1 +0,0 @@
-from archivebox.constants import *
diff --git a/archivebox/queues/settings.py b/archivebox/queues/settings.py
index 50a60ce2..0244e740 100644
--- a/archivebox/queues/settings.py
+++ b/archivebox/queues/settings.py
@@ -1,11 +1,10 @@
from pathlib import Path
+from archivebox.config import DATA_DIR, CONSTANTS
-import archivebox
-OUTPUT_DIR = archivebox.DATA_DIR
-LOGS_DIR = archivebox.CONSTANTS.LOGS_DIR
-
-TMP_DIR = archivebox.CONSTANTS.TMP_DIR
+OUTPUT_DIR = DATA_DIR
+LOGS_DIR = CONSTANTS.LOGS_DIR
+TMP_DIR = CONSTANTS.TMP_DIR
Path.mkdir(TMP_DIR, exist_ok=True)
CONFIG_FILE = TMP_DIR / "supervisord.conf"
diff --git a/archivebox/search/__init__.py b/archivebox/search/__init__.py
index befbd675..29eccee5 100644
--- a/archivebox/search/__init__.py
+++ b/archivebox/search/__init__.py
@@ -6,9 +6,9 @@ from django.conf import settings
from archivebox.index.schema import Link
from archivebox.util import enforce_types
-from archivebox.config import stderr
+from archivebox.misc.logging import stderr
-# from archivebox.plugins_sys.config.apps import settings.CONFIGS.SearchBackendConfig
+# from archivebox.archivebox.config import settings.CONFIGS.SearchBackendConfig
from .utils import get_indexable_content, log_index_started
diff --git a/archivebox/search/utils.py b/archivebox/search/utils.py
index 723c7fb5..55a1fa7a 100644
--- a/archivebox/search/utils.py
+++ b/archivebox/search/utils.py
@@ -1,7 +1,7 @@
from django.db.models import QuerySet
from archivebox.util import enforce_types
-from archivebox.config import ANSI
+from archivebox.config.legacy import ANSI
def log_index_started(url):
print('{green}[*] Indexing url: {} in the search index {reset}'.format(url, **ANSI))
diff --git a/archivebox/system.py b/archivebox/system.py
index cae487e5..4eaa94a0 100644
--- a/archivebox/system.py
+++ b/archivebox/system.py
@@ -15,7 +15,7 @@ from crontab import CronTab
from atomicwrites import atomic_write as lib_atomic_write
from .util import enforce_types, ExtendedEncoder
-from .config import OUTPUT_PERMISSIONS, DIR_OUTPUT_PERMISSIONS, ENFORCE_ATOMIC_WRITES
+from .config.legacy import OUTPUT_PERMISSIONS, DIR_OUTPUT_PERMISSIONS, ENFORCE_ATOMIC_WRITES
def run(cmd, *args, input=None, capture_output=True, timeout=None, check=False, text=False, start_new_session=True, **kwargs):
diff --git a/archivebox/util.py b/archivebox/util.py
index b26333e0..8c30670e 100644
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -25,8 +25,8 @@ except ImportError:
detect_encoding = lambda rawdata: "utf-8"
-from archivebox.constants import STATICFILE_EXTENSIONS
-from plugins_sys.config.apps import ARCHIVING_CONFIG
+from archivebox.config.constants import STATICFILE_EXTENSIONS
+from archivebox.config import ARCHIVING_CONFIG
from .misc.logging import COLOR_DICT