diff --git a/archivebox/__init__.py b/archivebox/__init__.py index bccb2314..5eaa9e8a 100755 --- a/archivebox/__init__.py +++ b/archivebox/__init__.py @@ -16,7 +16,7 @@ if str(PACKAGE_DIR) not in sys.path: from .config.constants import CONSTANTS, VERSION, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa -os.environ['OUTPUT_DIR'] = str(DATA_DIR) +os.environ['ARCHIVEBOX_DATA_DIR'] = str(DATA_DIR) os.environ['DJANGO_SETTINGS_MODULE'] = 'core.settings' # print('INSTALLING MONKEY PATCHES') diff --git a/archivebox/cli/archivebox_add.py b/archivebox/cli/archivebox_add.py index 8b976474..8c44b18b 100644 --- a/archivebox/cli/archivebox_add.py +++ b/archivebox/cli/archivebox_add.py @@ -8,10 +8,11 @@ import argparse from typing import List, Optional, IO -from ..main import add from archivebox.misc.util import docstring +from archivebox.config import DATA_DIR, ARCHIVING_CONFIG + +from ..main import add from ..parsers import PARSERS -from ..config.legacy import OUTPUT_DIR, ONLY_NEW from ..logging_util import SmartFormatter, accept_stdin, stderr @@ -32,7 +33,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional parser.add_argument( '--update', #'-u', action='store_true', - default=not ONLY_NEW, # when ONLY_NEW=True we skip updating old links + default=not ARCHIVING_CONFIG.ONLY_NEW, # when ONLY_NEW=True we skip updating old links help="Also retry previously skipped/failed links when adding new links", ) parser.add_argument( @@ -117,7 +118,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional init=command.init, extractors=command.extract, parser=command.parser, - out_dir=pwd or OUTPUT_DIR, + out_dir=pwd or DATA_DIR, ) diff --git a/archivebox/cli/archivebox_config.py b/archivebox/cli/archivebox_config.py index 50d1a3af..f96829ed 100644 --- a/archivebox/cli/archivebox_config.py +++ b/archivebox/cli/archivebox_config.py @@ -5,12 +5,13 @@ __command__ = 'archivebox config' import sys import argparse +from pathlib import Path from typing import Optional, List, IO -from ..main import config from archivebox.misc.util import docstring -from ..config.legacy import OUTPUT_DIR +from archivebox.config import DATA_DIR +from ..main import config from ..logging_util import SmartFormatter, accept_stdin @@ -56,7 +57,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional get=command.get, set=command.set, reset=command.reset, - out_dir=pwd or OUTPUT_DIR, + out_dir=Path(pwd) if pwd else DATA_DIR, ) diff --git a/archivebox/cli/archivebox_help.py b/archivebox/cli/archivebox_help.py index 425b25d1..de47e6a8 100755 --- a/archivebox/cli/archivebox_help.py +++ b/archivebox/cli/archivebox_help.py @@ -5,12 +5,12 @@ __command__ = 'archivebox help' import sys import argparse - +from pathlib import Path from typing import Optional, List, IO -from ..main import help from archivebox.misc.util import docstring -from ..config.legacy import OUTPUT_DIR +from archivebox.config import DATA_DIR +from ..main import help from ..logging_util import SmartFormatter, reject_stdin @@ -25,7 +25,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional parser.parse_args(args or ()) reject_stdin(__command__, stdin) - help(out_dir=pwd or OUTPUT_DIR) + help(out_dir=Path(pwd) if pwd else DATA_DIR) if __name__ == '__main__': diff --git a/archivebox/cli/archivebox_init.py b/archivebox/cli/archivebox_init.py index f94576b3..0c3e8417 100755 --- a/archivebox/cli/archivebox_init.py +++ b/archivebox/cli/archivebox_init.py @@ -10,7 +10,7 @@ from typing import Optional, List, IO from ..main import init from archivebox.misc.util import docstring -from ..config.legacy import OUTPUT_DIR +from archivebox.config import DATA_DIR from ..logging_util import SmartFormatter, reject_stdin @@ -44,7 +44,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional force=command.force, quick=command.quick, setup=command.setup, - out_dir=pwd or OUTPUT_DIR, + out_dir=pwd or DATA_DIR, ) diff --git a/archivebox/cli/archivebox_list.py b/archivebox/cli/archivebox_list.py index 0276f1a1..a9733377 100644 --- a/archivebox/cli/archivebox_list.py +++ b/archivebox/cli/archivebox_list.py @@ -5,12 +5,12 @@ __command__ = 'archivebox list' import sys import argparse - +from pathlib import Path from typing import Optional, List, IO -from ..main import list_all from archivebox.misc.util import docstring -from ..config.legacy import OUTPUT_DIR +from archivebox.config import DATA_DIR +from ..main import list_all from ..index import ( LINK_FILTERS, get_indexed_folders, @@ -131,7 +131,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional json=command.json, html=command.html, with_headers=command.with_headers, - out_dir=pwd or OUTPUT_DIR, + out_dir=Path(pwd) if pwd else DATA_DIR, ) raise SystemExit(not matching_folders) diff --git a/archivebox/cli/archivebox_manage.py b/archivebox/cli/archivebox_manage.py index 2aa5288f..1ae8e2d5 100644 --- a/archivebox/cli/archivebox_manage.py +++ b/archivebox/cli/archivebox_manage.py @@ -4,19 +4,19 @@ __package__ = 'archivebox.cli' __command__ = 'archivebox manage' import sys - +from pathlib import Path from typing import Optional, List, IO -from ..main import manage from archivebox.misc.util import docstring -from ..config.legacy import OUTPUT_DIR +from archivebox.config import DATA_DIR +from ..main import manage @docstring(manage.__doc__) def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None: manage( args=args, - out_dir=pwd or OUTPUT_DIR, + out_dir=Path(pwd) if pwd else DATA_DIR, ) diff --git a/archivebox/cli/archivebox_oneshot.py b/archivebox/cli/archivebox_oneshot.py index 784091c3..61dc2bb4 100644 --- a/archivebox/cli/archivebox_oneshot.py +++ b/archivebox/cli/archivebox_oneshot.py @@ -9,10 +9,10 @@ import argparse from pathlib import Path from typing import List, Optional, IO -from ..main import oneshot from archivebox.misc.util import docstring -from ..config.legacy import OUTPUT_DIR +from archivebox.config import DATA_DIR from ..logging_util import SmartFormatter, accept_stdin, stderr +from ..main import oneshot @docstring(oneshot.__doc__) @@ -46,7 +46,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional parser.add_argument( '--out-dir', type=str, - default=OUTPUT_DIR, + default=DATA_DIR, help= "Path to save the single archive folder to, e.g. ./example.com_archive" ) command = parser.parse_args(args or ()) diff --git a/archivebox/cli/archivebox_remove.py b/archivebox/cli/archivebox_remove.py index 92bb98ac..9e7e434b 100644 --- a/archivebox/cli/archivebox_remove.py +++ b/archivebox/cli/archivebox_remove.py @@ -5,13 +5,13 @@ __command__ = 'archivebox remove' import sys import argparse - +from pathlib import Path from typing import Optional, List, IO -from ..main import remove from archivebox.misc.util import docstring -from ..config.legacy import OUTPUT_DIR +from archivebox.config import DATA_DIR from ..logging_util import SmartFormatter, accept_stdin +from ..main import remove @docstring(remove.__doc__) @@ -74,7 +74,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional after=command.after, yes=command.yes, delete=command.delete, - out_dir=pwd or OUTPUT_DIR, + out_dir=Path(pwd) if pwd else DATA_DIR, ) diff --git a/archivebox/cli/archivebox_schedule.py b/archivebox/cli/archivebox_schedule.py index bd4331af..f6920b05 100644 --- a/archivebox/cli/archivebox_schedule.py +++ b/archivebox/cli/archivebox_schedule.py @@ -5,13 +5,13 @@ __command__ = 'archivebox schedule' import sys import argparse - +from pathlib import Path from typing import Optional, List, IO -from ..main import schedule from archivebox.misc.util import docstring -from ..config.legacy import OUTPUT_DIR +from archivebox.config import DATA_DIR from ..logging_util import SmartFormatter, reject_stdin +from ..main import schedule @docstring(schedule.__doc__) @@ -108,7 +108,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional overwrite=command.overwrite, update=command.update, import_path=command.import_path, - out_dir=pwd or OUTPUT_DIR, + out_dir=Path(pwd) if pwd else DATA_DIR, ) diff --git a/archivebox/cli/archivebox_server.py b/archivebox/cli/archivebox_server.py index e37b1f87..05ac96e4 100644 --- a/archivebox/cli/archivebox_server.py +++ b/archivebox/cli/archivebox_server.py @@ -5,13 +5,13 @@ __command__ = 'archivebox server' import sys import argparse - +from pathlib import Path from typing import Optional, List, IO -from ..main import server from archivebox.misc.util import docstring -from ..config.legacy import OUTPUT_DIR, BIND_ADDR +from archivebox.config import DATA_DIR, SERVER_CONFIG from ..logging_util import SmartFormatter, reject_stdin +from ..main import server @docstring(server.__doc__) def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None: @@ -25,7 +25,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional 'runserver_args', nargs='*', type=str, - default=[BIND_ADDR], + default=[SERVER_CONFIG.BIND_ADDR], help='Arguments to pass to Django runserver' ) parser.add_argument( @@ -68,7 +68,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional init=command.init, quick_init=command.quick_init, createsuperuser=command.createsuperuser, - out_dir=pwd or OUTPUT_DIR, + out_dir=Path(pwd) if pwd else DATA_DIR, ) diff --git a/archivebox/cli/archivebox_setup.py b/archivebox/cli/archivebox_setup.py index 160a25d1..62a686b3 100755 --- a/archivebox/cli/archivebox_setup.py +++ b/archivebox/cli/archivebox_setup.py @@ -5,13 +5,13 @@ __command__ = 'archivebox setup' import sys import argparse - +from pathlib import Path from typing import Optional, List, IO -from ..main import setup from archivebox.misc.util import docstring -from ..config.legacy import OUTPUT_DIR +from archivebox.config import DATA_DIR from ..logging_util import SmartFormatter, reject_stdin +from ..main import setup @docstring(setup.__doc__) @@ -32,7 +32,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional setup( # force=command.force, - out_dir=pwd or OUTPUT_DIR, + out_dir=Path(pwd) if pwd else DATA_DIR, ) diff --git a/archivebox/cli/archivebox_shell.py b/archivebox/cli/archivebox_shell.py index f084560e..c904e0a2 100644 --- a/archivebox/cli/archivebox_shell.py +++ b/archivebox/cli/archivebox_shell.py @@ -5,13 +5,13 @@ __command__ = 'archivebox shell' import sys import argparse - +from pathlib import Path from typing import Optional, List, IO -from ..main import shell from archivebox.misc.util import docstring -from ..config.legacy import OUTPUT_DIR +from archivebox.config import DATA_DIR from ..logging_util import SmartFormatter, reject_stdin +from ..main import shell @docstring(shell.__doc__) @@ -26,7 +26,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional reject_stdin(__command__, stdin) shell( - out_dir=pwd or OUTPUT_DIR, + out_dir=Path(pwd) if pwd else DATA_DIR, ) diff --git a/archivebox/cli/archivebox_status.py b/archivebox/cli/archivebox_status.py index 3401cade..22e37973 100644 --- a/archivebox/cli/archivebox_status.py +++ b/archivebox/cli/archivebox_status.py @@ -5,13 +5,13 @@ __command__ = 'archivebox status' import sys import argparse - +from pathlib import Path from typing import Optional, List, IO -from ..main import status from archivebox.misc.util import docstring -from ..config.legacy import OUTPUT_DIR +from archivebox.config import DATA_DIR from ..logging_util import SmartFormatter, reject_stdin +from ..main import status @docstring(status.__doc__) @@ -25,7 +25,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional parser.parse_args(args or ()) reject_stdin(__command__, stdin) - status(out_dir=pwd or OUTPUT_DIR) + status(out_dir=Path(pwd) if pwd else DATA_DIR) if __name__ == '__main__': diff --git a/archivebox/cli/archivebox_update.py b/archivebox/cli/archivebox_update.py index 389ad79d..4491c356 100644 --- a/archivebox/cli/archivebox_update.py +++ b/archivebox/cli/archivebox_update.py @@ -5,12 +5,11 @@ __command__ = 'archivebox update' import sys import argparse - +from pathlib import Path from typing import List, Optional, IO -from ..main import update from archivebox.misc.util import docstring -from ..config.legacy import OUTPUT_DIR +from archivebox.config import DATA_DIR from ..index import ( LINK_FILTERS, get_indexed_folders, @@ -25,6 +24,7 @@ from ..index import ( get_unrecognized_folders, ) from ..logging_util import SmartFormatter, accept_stdin +from ..main import update @docstring(update.__doc__) @@ -127,7 +127,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional status=command.status, after=command.after, before=command.before, - out_dir=pwd or OUTPUT_DIR, + out_dir=Path(pwd) if pwd else DATA_DIR, extractors=command.extract, ) diff --git a/archivebox/cli/archivebox_version.py b/archivebox/cli/archivebox_version.py index 1e0c74b8..a5d41dbb 100755 --- a/archivebox/cli/archivebox_version.py +++ b/archivebox/cli/archivebox_version.py @@ -5,13 +5,13 @@ __command__ = 'archivebox version' import sys import argparse - +from pathlib import Path from typing import Optional, List, IO -from ..main import version from archivebox.misc.util import docstring -from ..config.legacy import OUTPUT_DIR +from archivebox.config import DATA_DIR from ..logging_util import SmartFormatter, reject_stdin +from ..main import version @docstring(version.__doc__) @@ -32,7 +32,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional version( quiet=command.quiet, - out_dir=pwd or OUTPUT_DIR, + out_dir=Path(pwd) if pwd else DATA_DIR, ) diff --git a/archivebox/cli/tests.py b/archivebox/cli/tests.py index cc9a8e52..ec4de0e5 100644 --- a/archivebox/cli/tests.py +++ b/archivebox/cli/tests.py @@ -15,7 +15,7 @@ TEST_CONFIG = { 'USE_COLOR': 'False', 'SHOW_PROGRESS': 'False', - 'OUTPUT_DIR': 'data.tests', + 'DATA_DIR': 'data.tests', 'SAVE_ARCHIVE_DOT_ORG': 'False', 'SAVE_TITLE': 'False', @@ -27,12 +27,12 @@ TEST_CONFIG = { 'USE_YOUTUBEDL': 'False', } -OUTPUT_DIR = 'data.tests' +DATA_DIR = 'data.tests' os.environ.update(TEST_CONFIG) from ..main import init from ..index import load_main_index -from ..config.legacy import ( +from archivebox.config.constants import ( SQL_INDEX_FILENAME, JSON_INDEX_FILENAME, HTML_INDEX_FILENAME, @@ -101,22 +101,22 @@ def output_hidden(show_failing=True): class TestInit(unittest.TestCase): def setUp(self): - os.makedirs(OUTPUT_DIR, exist_ok=True) + os.makedirs(DATA_DIR, exist_ok=True) def tearDown(self): - shutil.rmtree(OUTPUT_DIR, ignore_errors=True) + shutil.rmtree(DATA_DIR, ignore_errors=True) def test_basic_init(self): with output_hidden(): archivebox_init.main([]) - assert (Path(OUTPUT_DIR) / SQL_INDEX_FILENAME).exists() - assert (Path(OUTPUT_DIR) / JSON_INDEX_FILENAME).exists() - assert (Path(OUTPUT_DIR) / HTML_INDEX_FILENAME).exists() - assert len(load_main_index(out_dir=OUTPUT_DIR)) == 0 + assert (Path(DATA_DIR) / SQL_INDEX_FILENAME).exists() + assert (Path(DATA_DIR) / JSON_INDEX_FILENAME).exists() + assert (Path(DATA_DIR) / HTML_INDEX_FILENAME).exists() + assert len(load_main_index(out_dir=DATA_DIR)) == 0 def test_conflicting_init(self): - with open(Path(OUTPUT_DIR) / 'test_conflict.txt', 'w+', encoding='utf-8') as f: + with open(Path(DATA_DIR) / 'test_conflict.txt', 'w+', encoding='utf-8') as f: f.write('test') try: @@ -126,11 +126,11 @@ class TestInit(unittest.TestCase): except SystemExit: pass - assert not (Path(OUTPUT_DIR) / SQL_INDEX_FILENAME).exists() - assert not (Path(OUTPUT_DIR) / JSON_INDEX_FILENAME).exists() - assert not (Path(OUTPUT_DIR) / HTML_INDEX_FILENAME).exists() + assert not (Path(DATA_DIR) / SQL_INDEX_FILENAME).exists() + assert not (Path(DATA_DIR) / JSON_INDEX_FILENAME).exists() + assert not (Path(DATA_DIR) / HTML_INDEX_FILENAME).exists() try: - load_main_index(out_dir=OUTPUT_DIR) + load_main_index(out_dir=DATA_DIR) assert False, 'load_main_index should raise an exception when no index is present' except Exception: pass @@ -138,36 +138,36 @@ class TestInit(unittest.TestCase): def test_no_dirty_state(self): with output_hidden(): init() - shutil.rmtree(OUTPUT_DIR, ignore_errors=True) + shutil.rmtree(DATA_DIR, ignore_errors=True) with output_hidden(): init() class TestAdd(unittest.TestCase): def setUp(self): - os.makedirs(OUTPUT_DIR, exist_ok=True) + os.makedirs(DATA_DIR, exist_ok=True) with output_hidden(): init() def tearDown(self): - shutil.rmtree(OUTPUT_DIR, ignore_errors=True) + shutil.rmtree(DATA_DIR, ignore_errors=True) def test_add_arg_url(self): with output_hidden(): archivebox_add.main(['https://getpocket.com/users/nikisweeting/feed/all']) - all_links = load_main_index(out_dir=OUTPUT_DIR) + all_links = load_main_index(out_dir=DATA_DIR) assert len(all_links) == 30 def test_add_arg_file(self): - test_file = Path(OUTPUT_DIR) / 'test.txt' + test_file = Path(DATA_DIR) / 'test.txt' with open(test_file, 'w+', encoding='utf') as f: f.write(test_urls) with output_hidden(): archivebox_add.main([test_file]) - all_links = load_main_index(out_dir=OUTPUT_DIR) + all_links = load_main_index(out_dir=DATA_DIR) assert len(all_links) == 12 os.remove(test_file) @@ -175,40 +175,40 @@ class TestAdd(unittest.TestCase): with output_hidden(): archivebox_add.main([], stdin=test_urls) - all_links = load_main_index(out_dir=OUTPUT_DIR) + all_links = load_main_index(out_dir=DATA_DIR) assert len(all_links) == 12 class TestRemove(unittest.TestCase): def setUp(self): - os.makedirs(OUTPUT_DIR, exist_ok=True) + os.makedirs(DATA_DIR, exist_ok=True) with output_hidden(): init() archivebox_add.main([], stdin=test_urls) # def tearDown(self): - # shutil.rmtree(OUTPUT_DIR, ignore_errors=True) + # shutil.rmtree(DATA_DIR, ignore_errors=True) def test_remove_exact(self): with output_hidden(): archivebox_remove.main(['--yes', '--delete', 'https://example5.com/']) - all_links = load_main_index(out_dir=OUTPUT_DIR) + all_links = load_main_index(out_dir=DATA_DIR) assert len(all_links) == 11 def test_remove_regex(self): with output_hidden(): archivebox_remove.main(['--yes', '--delete', '--filter-type=regex', r'http(s)?:\/\/(.+\.)?(example\d\.com)']) - all_links = load_main_index(out_dir=OUTPUT_DIR) + all_links = load_main_index(out_dir=DATA_DIR) assert len(all_links) == 4 def test_remove_domain(self): with output_hidden(): archivebox_remove.main(['--yes', '--delete', '--filter-type=domain', 'example5.com', 'example6.com']) - all_links = load_main_index(out_dir=OUTPUT_DIR) + all_links = load_main_index(out_dir=DATA_DIR) assert len(all_links) == 10 def test_remove_none(self): diff --git a/archivebox/config/config_stubs.py b/archivebox/config/config_stubs.py index ff8566b9..20c803bb 100644 --- a/archivebox/config/config_stubs.py +++ b/archivebox/config/config_stubs.py @@ -36,7 +36,6 @@ class ConfigDict(BaseConfig, benedict, total=False): IN_DOCKER: bool PACKAGE_DIR: Path - OUTPUT_DIR: Path CONFIG_FILE: Path ONLY_NEW: bool TIMEOUT: int diff --git a/archivebox/config/constants.py b/archivebox/config/constants.py index 43f59f55..a3afe3e4 100644 --- a/archivebox/config/constants.py +++ b/archivebox/config/constants.py @@ -60,7 +60,6 @@ class ConstantsDict(Mapping): LIB_DIR_NAME: str = 'lib' TMP_DIR_NAME: str = 'tmp' - OUTPUT_DIR: Path = DATA_DIR ARCHIVE_DIR: Path = DATA_DIR / ARCHIVE_DIR_NAME SOURCES_DIR: Path = DATA_DIR / SOURCES_DIR_NAME PERSONAS_DIR: Path = DATA_DIR / PERSONAS_DIR_NAME diff --git a/archivebox/config/legacy.py b/archivebox/config/legacy.py index 48ed1a56..46695fd7 100644 --- a/archivebox/config/legacy.py +++ b/archivebox/config/legacy.py @@ -44,7 +44,7 @@ import django from django.db.backends.sqlite3.base import Database as sqlite3 -from .constants import CONSTANTS, TIMEZONE, OUTPUT_DIR +from .constants import CONSTANTS, TIMEZONE from .constants import * from .config_stubs import ( ConfigValue, @@ -57,8 +57,9 @@ from ..misc.logging import ( ) from .defaults import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG -from ..plugins_auth.ldap.apps import LDAP_CONFIG -from ..plugins_extractor.favicon.apps import FAVICON_CONFIG +from archivebox.plugins_auth.ldap.apps import LDAP_CONFIG +from archivebox.plugins_extractor.favicon.apps import FAVICON_CONFIG + ANSI = SHELL_CONFIG.ANSI LDAP = LDAP_CONFIG.LDAP_ENABLED @@ -331,7 +332,7 @@ def load_config_val(key: str, def load_config_file(out_dir: str | None=CONSTANTS.DATA_DIR) -> Optional[benedict]: - """load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf""" + """load the ini-formatted config file from DATA_DIR/Archivebox.conf""" config_path = CONSTANTS.CONFIG_FILE if config_path.exists(): @@ -351,7 +352,7 @@ def load_config_file(out_dir: str | None=CONSTANTS.DATA_DIR) -> Optional[benedic def write_config_file(config: Dict[str, str], out_dir: str | None=CONSTANTS.DATA_DIR) -> benedict: - """load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf""" + """load the ini-formatted config file from DATA_DIR/Archivebox.conf""" from archivebox.misc.system import atomic_write @@ -785,7 +786,7 @@ def bump_startup_progress_bar(): def setup_django_minimal(): # sys.path.append(str(CONSTANTS.PACKAGE_DIR)) - # os.environ.setdefault('OUTPUT_DIR', str(CONSTANTS.DATA_DIR)) + # os.environ.setdefault('ARCHIVEBOX_DATA_DIR', str(CONSTANTS.DATA_DIR)) # os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings') # django.setup() raise Exception('dont use this anymore') diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py index baa38200..113b97db 100644 --- a/archivebox/core/admin.py +++ b/archivebox/core/admin.py @@ -21,8 +21,7 @@ from django import forms from signal_webhooks.admin import WebhookAdmin from signal_webhooks.utils import get_webhook_model -from archivebox.config import VERSION - +from archivebox.config import VERSION, DATA_DIR from archivebox.misc.util import htmldecode, urldecode from core.models import Snapshot, ArchiveResult, Tag @@ -536,11 +535,11 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin): links = [snapshot.as_link() for snapshot in queryset] if len(links) < 3: # run syncronously if there are only 1 or 2 links - archive_links(links, overwrite=True, methods=('title','favicon'), out_dir=CONFIG.OUTPUT_DIR) + archive_links(links, overwrite=True, methods=('title','favicon'), out_dir=DATA_DIR) messages.success(request, f"Title and favicon have been fetched and saved for {len(links)} URLs.") else: # otherwise run in a background worker - result = bg_archive_links((links,), kwargs={"overwrite": True, "methods": ["title", "favicon"], "out_dir": CONFIG.OUTPUT_DIR}) + result = bg_archive_links((links,), kwargs={"overwrite": True, "methods": ["title", "favicon"], "out_dir": DATA_DIR}) messages.success( request, mark_safe(f"Title and favicon are updating in the background for {len(links)} URLs. {result_url(result)}"), @@ -552,7 +551,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin): def update_snapshots(self, request, queryset): links = [snapshot.as_link() for snapshot in queryset] - result = bg_archive_links((links,), kwargs={"overwrite": False, "out_dir": CONFIG.OUTPUT_DIR}) + result = bg_archive_links((links,), kwargs={"overwrite": False, "out_dir": DATA_DIR}) messages.success( request, @@ -581,7 +580,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin): def overwrite_snapshots(self, request, queryset): links = [snapshot.as_link() for snapshot in queryset] - result = bg_archive_links((links,), kwargs={"overwrite": True, "out_dir": CONFIG.OUTPUT_DIR}) + result = bg_archive_links((links,), kwargs={"overwrite": True, "out_dir": DATA_DIR}) messages.success( request, @@ -592,7 +591,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin): description="☠️ Delete" ) def delete_snapshots(self, request, queryset): - remove(snapshots=queryset, yes=True, delete=True, out_dir=CONFIG.OUTPUT_DIR) + remove(snapshots=queryset, yes=True, delete=True, out_dir=DATA_DIR) messages.success( request, mark_safe(f"Succesfully deleted {queryset.count()} Snapshots. Don't forget to scrub URLs from import logs (data/sources) and error logs (data/logs) if needed."), @@ -732,7 +731,7 @@ class ArchiveResultAdmin(ABIDModelAdmin): ) def output_summary(self, result): - snapshot_dir = Path(CONFIG.OUTPUT_DIR) / str(result.pwd).split('data/', 1)[-1] + snapshot_dir = Path(DATA_DIR) / str(result.pwd).split('data/', 1)[-1] output_str = format_html( '
{}

', result.output, diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py index 91943e72..de591332 100644 --- a/archivebox/index/__init__.py +++ b/archivebox/index/__init__.py @@ -243,7 +243,7 @@ def write_main_index(links: List[Link], out_dir: Path=DATA_DIR, created_by_id: i log_indexing_process_finished() @enforce_types -def load_main_index(out_dir: Path=DATA_DIR, warn: bool=True) -> List[Link]: +def load_main_index(out_dir: Path | str=DATA_DIR, warn: bool=True) -> List[Link]: """parse and load existing index with any new links from import_path merged in""" from core.models import Snapshot try: diff --git a/archivebox/index/sql.py b/archivebox/index/sql.py index da3329ca..892f11b7 100644 --- a/archivebox/index/sql.py +++ b/archivebox/index/sql.py @@ -8,18 +8,15 @@ from typing import List, Tuple, Iterator from django.db.models import QuerySet from django.db import transaction -from .schema import Link from archivebox.misc.util import enforce_types, parse_date -from ..config.legacy import ( - OUTPUT_DIR, - TAG_SEPARATOR_PATTERN, -) +from archivebox.config import DATA_DIR, GENERAL_CONFIG +from .schema import Link ### Main Links Index @enforce_types -def parse_sql_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]: +def parse_sql_main_index(out_dir: Path=DATA_DIR) -> Iterator[Link]: from core.models import Snapshot return ( @@ -28,7 +25,7 @@ def parse_sql_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]: ) @enforce_types -def remove_from_sql_main_index(snapshots: QuerySet, atomic: bool=False, out_dir: Path=OUTPUT_DIR) -> None: +def remove_from_sql_main_index(snapshots: QuerySet, atomic: bool=False, out_dir: Path=DATA_DIR) -> None: if atomic: with transaction.atomic(): return snapshots.delete() @@ -44,7 +41,7 @@ def write_link_to_sql_index(link: Link, created_by_id: int | None=None): info['created_by_id'] = created_by_id or get_or_create_system_user_pk() tag_list = list(dict.fromkeys( - tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '') + tag.strip() for tag in re.split(GENERAL_CONFIG.TAG_SEPARATOR_PATTERN, link.tags or '') )) info.pop('tags') @@ -95,7 +92,7 @@ def write_link_to_sql_index(link: Link, created_by_id: int | None=None): @enforce_types -def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, created_by_id: int | None=None) -> None: +def write_sql_main_index(links: List[Link], out_dir: Path=DATA_DIR, created_by_id: int | None=None) -> None: for link in links: # with transaction.atomic(): # write_link_to_sql_index(link) @@ -103,7 +100,7 @@ def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, created_by @enforce_types -def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR, created_by_id: int | None=None) -> None: +def write_sql_link_details(link: Link, out_dir: Path=DATA_DIR, created_by_id: int | None=None) -> None: from core.models import Snapshot # with transaction.atomic(): @@ -120,7 +117,7 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR, created_by_id: snap.title = link.title tag_list = list( - {tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '')} + {tag.strip() for tag in re.split(GENERAL_CONFIG.TAG_SEPARATOR_PATTERN, link.tags or '')} | set(snap.tags.values_list('name', flat=True)) ) @@ -130,7 +127,7 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR, created_by_id: @enforce_types -def list_migrations(out_dir: Path=OUTPUT_DIR) -> List[Tuple[bool, str]]: +def list_migrations(out_dir: Path=DATA_DIR) -> List[Tuple[bool, str]]: from django.core.management import call_command out = StringIO() call_command("showmigrations", list=True, stdout=out) @@ -146,7 +143,7 @@ def list_migrations(out_dir: Path=OUTPUT_DIR) -> List[Tuple[bool, str]]: return migrations @enforce_types -def apply_migrations(out_dir: Path=OUTPUT_DIR) -> List[str]: +def apply_migrations(out_dir: Path=DATA_DIR) -> List[str]: from django.core.management import call_command out1, out2 = StringIO(), StringIO() @@ -160,6 +157,6 @@ def apply_migrations(out_dir: Path=OUTPUT_DIR) -> List[str]: ] @enforce_types -def get_admins(out_dir: Path=OUTPUT_DIR) -> List[str]: +def get_admins(out_dir: Path=DATA_DIR) -> List[str]: from django.contrib.auth.models import User return User.objects.filter(is_superuser=True) diff --git a/archivebox/misc/tests.py b/archivebox/misc/tests.py index fca938ce..4cb34b2e 100644 --- a/archivebox/misc/tests.py +++ b/archivebox/misc/tests.py @@ -13,7 +13,6 @@ IN_DOCKER=False IN_QEMU=False PUID=501 PGID=20 -OUTPUT_DIR=/opt/archivebox/data CONFIG_FILE=/opt/archivebox/data/ArchiveBox.conf ONLY_NEW=True TIMEOUT=60 @@ -173,7 +172,6 @@ IN_DOCKER = false IN_QEMU = false PUID = 501 PGID = 20 -OUTPUT_DIR = "/opt/archivebox/data" CONFIG_FILE = "/opt/archivebox/data/ArchiveBox.conf" ONLY_NEW = true TIMEOUT = 60 diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py index cd29b6aa..e89bf155 100644 --- a/archivebox/parsers/__init__.py +++ b/archivebox/parsers/__init__.py @@ -13,21 +13,16 @@ from typing import IO, Tuple, List, Optional from datetime import datetime, timezone from pathlib import Path +from archivebox.config import DATA_DIR, CONSTANTS, SHELL_CONFIG, ARCHIVING_CONFIG from archivebox.misc.system import atomic_write -from ..config.legacy import ( - ANSI, - OUTPUT_DIR, - SOURCES_DIR_NAME, - TIMEOUT, - stderr, - hint, -) +from archivebox.misc.logging import stderr, hint from archivebox.misc.util import ( basename, htmldecode, download_url, enforce_types, ) + from ..index.schema import Link from ..logging_util import TimedProgress, log_source_saved @@ -38,7 +33,6 @@ from . import pocket_html from . import pinboard_rss from . import shaarli_rss from . import medium_rss - from . import netscape_html from . import generic_rss from . import generic_json @@ -79,7 +73,7 @@ def parse_links_memory(urls: List[str], root_url: Optional[str]=None): parse a list of URLS without touching the filesystem """ - timer = TimedProgress(TIMEOUT * 4) + timer = TimedProgress(ARCHIVING_CONFIG.TIMEOUT * 4) #urls = list(map(lambda x: x + "\n", urls)) file = StringIO() file.writelines(urls) @@ -98,7 +92,7 @@ def parse_links(source_file: str, root_url: Optional[str]=None, parser: str="aut RSS feed, bookmarks export, or text file """ - timer = TimedProgress(TIMEOUT * 4) + timer = TimedProgress(ARCHIVING_CONFIG.TIMEOUT * 4) with open(source_file, 'r', encoding='utf-8') as file: links, parser = run_parser_functions(file, timer, root_url=root_url, parser=parser) @@ -148,9 +142,9 @@ def run_parser_functions(to_parse: IO[str], timer, root_url: Optional[str]=None, @enforce_types -def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=OUTPUT_DIR) -> str: +def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=DATA_DIR) -> str: ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0] - source_path = str(out_dir / SOURCES_DIR_NAME / filename.format(ts=ts)) + source_path = str(CONSTANTS.SOURCES_DIR / filename.format(ts=ts)) referenced_texts = '' @@ -167,10 +161,10 @@ def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: @enforce_types -def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{basename}.txt', out_dir: Path=OUTPUT_DIR) -> str: +def save_file_as_source(path: str, timeout: int=ARCHIVING_CONFIG.TIMEOUT, filename: str='{ts}-{basename}.txt', out_dir: Path=DATA_DIR) -> str: """download a given url's content into output/sources/domain-.txt""" ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0] - source_path = str(OUTPUT_DIR / SOURCES_DIR_NAME / filename.format(basename=basename(path), ts=ts)) + source_path = str(CONSTANTS.SOURCES_DIR / filename.format(basename=basename(path), ts=ts)) if any(path.startswith(s) for s in ('http://', 'https://', 'ftp://')): # Source is a URL that needs to be downloaded @@ -183,9 +177,9 @@ def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{ba except Exception as e: timer.end() print('{}[!] Failed to download {}{}\n'.format( - ANSI['red'], + SHELL_CONFIG.ANSI['red'], path, - ANSI['reset'], + SHELL_CONFIG.ANSI['reset'], )) print(' ', e) raise e diff --git a/archivebox/queues/semaphores.py b/archivebox/queues/semaphores.py index e798e59c..1be98ee3 100644 --- a/archivebox/queues/semaphores.py +++ b/archivebox/queues/semaphores.py @@ -1,10 +1,11 @@ -import time import uuid from functools import wraps from django.db import connection, transaction from django.utils import timezone from huey.exceptions import TaskLockedException +from archivebox.config import CONSTANTS + class SqliteSemaphore: def __init__(self, db_path, table_name, name, value=1, timeout=None): self.db_path = db_path @@ -68,7 +69,8 @@ class SqliteSemaphore: return cursor.rowcount > 0 -LOCKS_DB_PATH = settings.CONFIG.OUTPUT_DIR / 'locks.sqlite3' +LOCKS_DB_PATH = CONSTANTS.DATABASE_FILE.parent / 'locks.sqlite3' + def lock_task_semaphore(db_path, table_name, lock_name, value=1, timeout=None): """ diff --git a/archivebox/queues/settings.py b/archivebox/queues/settings.py index 0244e740..8634cf93 100644 --- a/archivebox/queues/settings.py +++ b/archivebox/queues/settings.py @@ -2,7 +2,6 @@ from pathlib import Path from archivebox.config import DATA_DIR, CONSTANTS -OUTPUT_DIR = DATA_DIR LOGS_DIR = CONSTANTS.LOGS_DIR TMP_DIR = CONSTANTS.TMP_DIR diff --git a/etc/uwsgi.ini b/etc/uwsgi.ini index 9fa83abe..258fdb04 100644 --- a/etc/uwsgi.ini +++ b/etc/uwsgi.ini @@ -2,7 +2,7 @@ socket = 127.0.0.1:3031 chdir = ../ http = 0.0.0.0:8001 -env = OUTPUT_DIR=./data +env = DATA_DIR=./data wsgi-file = archivebox/core/wsgi.py processes = 4 threads = 1