diff --git a/archivebox/config/constants.py b/archivebox/config/constants.py index 6e1a7646..f324259d 100644 --- a/archivebox/config/constants.py +++ b/archivebox/config/constants.py @@ -202,16 +202,6 @@ class ConstantsDict(Mapping): 'enabled': True, 'is_valid': (PACKAGE_DIR / '__main__.py').exists(), }, - 'LIB_DIR': { - 'path': LIB_DIR.resolve(), - 'enabled': True, - 'is_valid': LIB_DIR.is_dir(), - }, - 'RUNTIME_CONFIG': { - 'path': TMP_DIR.resolve(), - 'enabled': True, - 'is_valid': TMP_DIR.is_dir(), - }, 'TEMPLATES_DIR': { 'path': TEMPLATES_DIR.resolve(), 'enabled': True, @@ -222,6 +212,16 @@ class ConstantsDict(Mapping): 'enabled': True, 'is_valid': CUSTOM_TEMPLATES_DIR.is_dir(), }, + 'LIB_DIR': { + 'path': LIB_DIR.resolve(), + 'enabled': True, + 'is_valid': LIB_DIR.is_dir(), + }, + 'TMP_DIR': { + 'path': TMP_DIR.resolve(), + 'enabled': True, + 'is_valid': TMP_DIR.is_dir(), + }, }) DATA_LOCATIONS = benedict({ diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py index 00c2428a..966a2380 100644 --- a/archivebox/extractors/__init__.py +++ b/archivebox/extractors/__init__.py @@ -10,7 +10,7 @@ from datetime import datetime, timezone from django.db.models import QuerySet -from ..config.legacy import ( +from archivebox.config.legacy import ( SAVE_ALLOWLIST_PTN, SAVE_DENYLIST_PTN, ) diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py index de591332..bff099cd 100644 --- a/archivebox/index/__init__.py +++ b/archivebox/index/__init__.py @@ -12,19 +12,11 @@ from urllib.parse import urlparse from django.db.models import QuerySet, Q -from archivebox.config import DATA_DIR, CONSTANTS, SEARCH_BACKEND_CONFIG -from archivebox.misc.util import ( - scheme, - enforce_types, - ExtendedEncoder, -) -from ..misc.logging import stderr -from ..config.legacy import ( - TIMEOUT, - URL_DENYLIST_PTN, - URL_ALLOWLIST_PTN, - OUTPUT_PERMISSIONS -) +from archivebox.config import DATA_DIR, CONSTANTS, ARCHIVING_CONFIG, STORAGE_CONFIG, SEARCH_BACKEND_CONFIG +from archivebox.misc.util import scheme, enforce_types, ExtendedEncoder +from archivebox.misc.logging import stderr +from archivebox.config.legacy import URL_DENYLIST_PTN, URL_ALLOWLIST_PTN + from ..logging_util import ( TimedProgress, log_indexing_process_started, @@ -119,7 +111,7 @@ def merge_links(a: Link, b: Link) -> Link: @enforce_types def validate_links(links: Iterable[Link]) -> List[Link]: - timer = TimedProgress(TIMEOUT * 4) + timer = TimedProgress(ARCHIVING_CONFIG.TIMEOUT * 4) try: links = archivable_links(links) # remove chrome://, about:, mailto: etc. links = sorted_links(links) # deterministically sort the links based on timestamp, url @@ -211,7 +203,7 @@ def lowest_uniq_timestamp(used_timestamps: OrderedDict, timestamp: str) -> str: @enforce_types def timed_index_update(out_path: Path): log_indexing_started(out_path) - timer = TimedProgress(TIMEOUT * 2, prefix=' ') + timer = TimedProgress(ARCHIVING_CONFIG.TIMEOUT * 2, prefix=' ') try: yield finally: @@ -230,14 +222,14 @@ def write_main_index(links: List[Link], out_dir: Path=DATA_DIR, created_by_id: i try: with timed_index_update(CONSTANTS.DATABASE_FILE): write_sql_main_index(links, out_dir=out_dir, created_by_id=created_by_id) - os.chmod(CONSTANTS.DATABASE_FILE, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes + os.chmod(CONSTANTS.DATABASE_FILE, int(STORAGE_CONFIG.OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes except (KeyboardInterrupt, SystemExit): stderr('[!] Warning: Still writing index to disk...', color='lightyellow') stderr(' Run archivebox init to fix any inconsistencies from an ungraceful exit.') with timed_index_update(CONSTANTS.DATABASE_FILE): write_sql_main_index(links, out_dir=out_dir, created_by_id=created_by_id) - os.chmod(CONSTANTS.DATABASE_FILE, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes + os.chmod(CONSTANTS.DATABASE_FILE, int(STORAGE_CONFIG.OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes raise SystemExit(0) log_indexing_process_finished() diff --git a/archivebox/index/html.py b/archivebox/index/html.py index 384562a9..307add0d 100644 --- a/archivebox/index/html.py +++ b/archivebox/index/html.py @@ -8,9 +8,7 @@ from typing import List, Optional, Iterator, Mapping from django.utils.html import format_html, mark_safe # type: ignore from django.core.cache import cache -from .schema import Link from archivebox.misc.system import atomic_write -from ..logging_util import printable_filesize from archivebox.misc.util import ( enforce_types, ts_to_date_str, @@ -18,11 +16,11 @@ from archivebox.misc.util import ( htmlencode, urldecode, ) -from archivebox.config.legacy import ( - SAVE_ARCHIVE_DOT_ORG, - PREVIEW_ORIGINALS, -) from archivebox.config import CONSTANTS, DATA_DIR, VERSION, SHELL_CONFIG, SERVER_CONFIG +from archivebox.plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG + +from .schema import Link +from ..logging_util import printable_filesize MAIN_INDEX_TEMPLATE = 'static_index.html' MINIMAL_INDEX_TEMPLATE = 'minimal_index.html' @@ -102,8 +100,8 @@ def link_details_template(link: Link) -> str: 'status': 'archived' if link.is_archived else 'not yet archived', 'status_color': 'success' if link.is_archived else 'danger', 'oldest_archive_date': ts_to_date_str(link.oldest_archive_date), - 'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG, - 'PREVIEW_ORIGINALS': PREVIEW_ORIGINALS, + 'SAVE_ARCHIVE_DOT_ORG': ARCHIVEDOTORG_CONFIG.SAVE_ARCHIVE_DOT_ORG, + 'PREVIEW_ORIGINALS': SERVER_CONFIG.PREVIEW_ORIGINALS, }) @enforce_types diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py index fdc34c86..bdd93df4 100644 --- a/archivebox/index/schema.py +++ b/archivebox/index/schema.py @@ -17,12 +17,13 @@ from dataclasses import dataclass, asdict, field, fields from django.utils.functional import cached_property -from archivebox.config.constants import ARCHIVE_DIR, ARCHIVE_DIR_NAME +from archivebox.config import ARCHIVE_DIR, CONSTANTS from plugins_extractor.favicon.apps import FAVICON_CONFIG from archivebox.misc.system import get_dir_size from archivebox.misc.util import ts_to_date_str, parse_date +from archivebox.misc.logging import stderr, ANSI class ArchiveError(Exception): @@ -67,7 +68,6 @@ class ArchiveResult: @classmethod def guess_ts(_cls, dict_info): - from archivebox.misc.util import parse_date parsed_timestamp = parse_date(dict_info["timestamp"]) start_ts = parsed_timestamp end_ts = parsed_timestamp + timedelta(seconds=int(dict_info["duration"])) @@ -75,8 +75,6 @@ class ArchiveResult: @classmethod def from_json(cls, json_info, guess=False): - from archivebox.misc.util import parse_date - info = { key: val for key, val in json_info.items() @@ -160,7 +158,6 @@ class Link: return float(self.timestamp) > float(other.timestamp) def typecheck(self) -> None: - from ..config.legacy import stderr, ANSI try: assert self.schema == self.__class__.__name__ assert isinstance(self.timestamp, str) and self.timestamp @@ -231,8 +228,6 @@ class Link: @classmethod def from_json(cls, json_info, guess=False): - from archivebox.misc.util import parse_date - info = { key: val for key, val in json_info.items() @@ -287,7 +282,7 @@ class Link: @property def archive_path(self) -> str: - return '{}/{}'.format(ARCHIVE_DIR_NAME, self.timestamp) + return '{}/{}'.format(CONSTANTS.ARCHIVE_DIR_NAME, self.timestamp) @property def archive_size(self) -> float: diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py index 90576d9e..c8f9cfe5 100644 --- a/archivebox/logging_util.py +++ b/archivebox/logging_util.py @@ -604,14 +604,14 @@ def printable_folder_status(name: str, folder: Dict) -> str: path = pretty_path(folder['path']) return ' '.join(( - ANSI[color], + f'[{color}]', symbol, - ANSI['reset'], + '[/]', name.ljust(21), num_files.ljust(14), - ANSI[color], + f'[{color}]', note.ljust(8), - ANSI['reset'], + '[/]', path.ljust(76), )) diff --git a/archivebox/main.py b/archivebox/main.py index 1380cc8b..a51fcf0d 100755 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -69,8 +69,6 @@ from archivebox.misc.checks import check_data_folder from archivebox.config.legacy import ( write_config_file, load_all_config, - CONFIG, - USER_CONFIG, get_real_name, ) from .logging_util import ( @@ -85,7 +83,6 @@ from .logging_util import ( printable_folders, printable_filesize, printable_folder_status, - printable_dependency_version, ) @@ -167,7 +164,9 @@ def version(quiet: bool=False, out_dir: Path=DATA_DIR) -> None: """Print the ArchiveBox version and dependency information""" - from rich import print + from rich.console import Console + console = Console() + print = console.print print(VERSION) if quiet: return @@ -227,21 +226,27 @@ def version(quiet: bool=False, loaded_bin = binary raise provider_summary = f'[dark_sea_green3]{loaded_bin.binprovider.name.ljust(10)}[/dark_sea_green3]' if loaded_bin.binprovider else '[grey23]not found[/grey23]' - print('', '[green]√[/green]' if loaded_bin.is_valid else '[red]X[/red]', '', loaded_bin.name.ljust(21), str(loaded_bin.version).ljust(12), provider_summary, loaded_bin.abspath or f'[red]{err}[/red]') + if loaded_bin.abspath: + abspath = str(loaded_bin.abspath).replace(str(Path('~').expanduser()), '~') + if ' ' in abspath: + abspath = abspath.replace(' ', r'\ ') + else: + abspath = f'[red]{err}[/red]' + print('', '[green]√[/green]' if loaded_bin.is_valid else '[red]X[/red]', '', loaded_bin.name.ljust(21), str(loaded_bin.version).ljust(12), provider_summary, abspath, overflow='ignore', crop=False) print() - print('[white][i] Source-code locations:[/white]') + print('[deep_sky_blue3][i] Source-code locations:[/deep_sky_blue3]') for name, path in CONSTANTS.CODE_LOCATIONS.items(): - print(printable_folder_status(name, path)) + print(printable_folder_status(name, path), overflow='ignore', crop=False) print() if CONSTANTS.DATABASE_FILE.exists() or CONSTANTS.ARCHIVE_DIR.exists() or CONSTANTS.CONFIG_FILE.exists(): - print('[white][i] Data locations:[/]') + print('[bright_yellow][i] Data locations:[/bright_yellow]') for name, path in CONSTANTS.DATA_LOCATIONS.items(): - print(printable_folder_status(name, path)) + print(printable_folder_status(name, path), overflow='ignore', crop=False) else: print() - print('[white][i] Data locations:[/white] (not in a data directory)') + print('[red][i] Data locations:[/red] (not in a data directory)') print() @@ -984,6 +989,8 @@ def config(config_options_str: Optional[str]=None, elif config_options_str: config_options = config_options_str.split('\n') + from django.conf import settings + config_options = config_options or [] no_args = not (get or set or reset or config_options) @@ -992,15 +999,15 @@ def config(config_options_str: Optional[str]=None, if get or no_args: if config_options: config_options = [get_real_name(key) for key in config_options] - matching_config = {key: CONFIG[key] for key in config_options if key in CONFIG} - failed_config = [key for key in config_options if key not in CONFIG] + matching_config = {key: settings.FLAT_CONFIG[key] for key in config_options if key in settings.FLAT_CONFIG} + failed_config = [key for key in config_options if key not in settings.FLAT_CONFIG] if failed_config: stderr() stderr('[X] These options failed to get', color='red') stderr(' {}'.format('\n '.join(config_options))) raise SystemExit(1) else: - matching_config = CONFIG + matching_config = settings.FLAT_CONFIG print(printable_config(matching_config)) raise SystemExit(not matching_config) @@ -1021,20 +1028,20 @@ def config(config_options_str: Optional[str]=None, if key != raw_key: stderr(f'[i] Note: The config option {raw_key} has been renamed to {key}, please use the new name going forwards.', color='lightyellow') - if key in CONFIG: + if key in settings.FLAT_CONFIG: new_config[key] = val.strip() else: failed_options.append(line) if new_config: - before = CONFIG + before = settings.FLAT_CONFIG matching_config = write_config_file(new_config, out_dir=DATA_DIR) after = load_all_config() print(printable_config(matching_config)) side_effect_changes = {} for key, val in after.items(): - if key in USER_CONFIG and (before[key] != after[key]) and (key not in matching_config): + if key in settings.FLAT_CONFIG and (before[key] != after[key]) and (key not in matching_config): side_effect_changes[key] = after[key] if side_effect_changes: diff --git a/archivebox/misc/system.py b/archivebox/misc/system.py index 690b22d2..2a65ac48 100644 --- a/archivebox/misc/system.py +++ b/archivebox/misc/system.py @@ -14,7 +14,7 @@ from subprocess import _mswindows, PIPE, Popen, CalledProcessError, CompletedPro from crontab import CronTab from atomicwrites import atomic_write as lib_atomic_write -from archivebox.config.legacy import OUTPUT_PERMISSIONS, DIR_OUTPUT_PERMISSIONS, ENFORCE_ATOMIC_WRITES +from archivebox.config import STORAGE_CONFIG from archivebox.misc.util import enforce_types, ExtendedEncoder @@ -94,7 +94,7 @@ def atomic_write(path: Union[Path, str], contents: Union[dict, str, bytes], over elif isinstance(contents, (bytes, str)): f.write(contents) except OSError as e: - if ENFORCE_ATOMIC_WRITES: + if STORAGE_CONFIG.ENFORCE_ATOMIC_WRITES: print(f"[X] OSError: Failed to write {path} with fcntl.F_FULLFSYNC. ({e})") print(" You can store the archive/ subfolder on a hard drive or network share that doesn't support support syncronous writes,") print(" but the main folder containing the index.sqlite3 and ArchiveBox.conf files must be on a filesystem that supports FSYNC.") @@ -108,7 +108,7 @@ def atomic_write(path: Union[Path, str], contents: Union[dict, str, bytes], over f.write(contents) # set file permissions - os.chmod(path, int(OUTPUT_PERMISSIONS, base=8)) + os.chmod(path, int(STORAGE_CONFIG.OUTPUT_PERMISSIONS, base=8)) @enforce_types def chmod_file(path: str, cwd: str='.') -> None: @@ -120,14 +120,14 @@ def chmod_file(path: str, cwd: str='.') -> None: if not root.is_dir(): # path is just a plain file - os.chmod(root, int(OUTPUT_PERMISSIONS, base=8)) + os.chmod(root, int(STORAGE_CONFIG.OUTPUT_PERMISSIONS, base=8)) else: for subpath in Path(path).glob('**/*'): if subpath.is_dir(): # directories need execute permissions to be able to list contents - os.chmod(subpath, int(DIR_OUTPUT_PERMISSIONS, base=8)) + os.chmod(subpath, int(STORAGE_CONFIG.DIR_OUTPUT_PERMISSIONS, base=8)) else: - os.chmod(subpath, int(OUTPUT_PERMISSIONS, base=8)) + os.chmod(subpath, int(STORAGE_CONFIG.OUTPUT_PERMISSIONS, base=8)) @enforce_types @@ -230,31 +230,3 @@ class suppress_output(object): if self.stderr: os.dup2(self.real_stderr, 2) os.close(self.null_stderr) - - -def get_system_user() -> str: - # some host OS's are unable to provide a username (k3s, Windows), making this complicated - # uid 999 is especially problematic and breaks many attempts - SYSTEM_USER = None - FALLBACK_USER_PLACHOLDER = f'user_{os.getuid()}' - - # Option 1 - try: - import pwd - SYSTEM_USER = SYSTEM_USER or pwd.getpwuid(os.geteuid()).pw_name - except (ModuleNotFoundError, Exception): - pass - - # Option 2 - try: - SYSTEM_USER = SYSTEM_USER or getpass.getuser() - except Exception: - pass - - # Option 3 - try: - SYSTEM_USER = SYSTEM_USER or os.getlogin() - except Exception: - pass - - return SYSTEM_USER or FALLBACK_USER_PLACHOLDER diff --git a/archivebox/parsers/pocket_api.py b/archivebox/parsers/pocket_api.py index 7cfd5c2a..fd513840 100644 --- a/archivebox/parsers/pocket_api.py +++ b/archivebox/parsers/pocket_api.py @@ -9,15 +9,15 @@ from configparser import ConfigParser from pocket import Pocket from archivebox.config import CONSTANTS - -from ..index.schema import Link from archivebox.misc.util import enforce_types from archivebox.misc.system import atomic_write -from ..config.legacy import ( +from archivebox.config.legacy import ( POCKET_CONSUMER_KEY, POCKET_ACCESS_TOKENS, ) +from ..index.schema import Link + COUNT_PER_PAGE = 500 API_DB_PATH = CONSTANTS.SOURCES_DIR / 'pocket_api.db' diff --git a/archivebox/parsers/readwise_reader_api.py b/archivebox/parsers/readwise_reader_api.py index b94858e8..65dfbd39 100644 --- a/archivebox/parsers/readwise_reader_api.py +++ b/archivebox/parsers/readwise_reader_api.py @@ -9,12 +9,11 @@ from typing import IO, Iterable, Optional from configparser import ConfigParser from archivebox.config import CONSTANTS - -from ..index.schema import Link from archivebox.misc.util import enforce_types from archivebox.misc.system import atomic_write -from ..config.legacy import READWISE_READER_TOKENS +from archivebox.config.legacy import READWISE_READER_TOKENS +from ..index.schema import Link API_DB_PATH = CONSTANTS.SOURCES_DIR / "readwise_reader_api.db"