mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-12 22:25:44 -04:00
move config moved out of legacy files and better version output
This commit is contained in:
parent
d21bc86075
commit
18474f452b
10 changed files with 67 additions and 104 deletions
|
@ -202,16 +202,6 @@ class ConstantsDict(Mapping):
|
|||
'enabled': True,
|
||||
'is_valid': (PACKAGE_DIR / '__main__.py').exists(),
|
||||
},
|
||||
'LIB_DIR': {
|
||||
'path': LIB_DIR.resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': LIB_DIR.is_dir(),
|
||||
},
|
||||
'RUNTIME_CONFIG': {
|
||||
'path': TMP_DIR.resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': TMP_DIR.is_dir(),
|
||||
},
|
||||
'TEMPLATES_DIR': {
|
||||
'path': TEMPLATES_DIR.resolve(),
|
||||
'enabled': True,
|
||||
|
@ -222,6 +212,16 @@ class ConstantsDict(Mapping):
|
|||
'enabled': True,
|
||||
'is_valid': CUSTOM_TEMPLATES_DIR.is_dir(),
|
||||
},
|
||||
'LIB_DIR': {
|
||||
'path': LIB_DIR.resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': LIB_DIR.is_dir(),
|
||||
},
|
||||
'TMP_DIR': {
|
||||
'path': TMP_DIR.resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': TMP_DIR.is_dir(),
|
||||
},
|
||||
})
|
||||
|
||||
DATA_LOCATIONS = benedict({
|
||||
|
|
|
@ -10,7 +10,7 @@ from datetime import datetime, timezone
|
|||
|
||||
from django.db.models import QuerySet
|
||||
|
||||
from ..config.legacy import (
|
||||
from archivebox.config.legacy import (
|
||||
SAVE_ALLOWLIST_PTN,
|
||||
SAVE_DENYLIST_PTN,
|
||||
)
|
||||
|
|
|
@ -12,19 +12,11 @@ from urllib.parse import urlparse
|
|||
from django.db.models import QuerySet, Q
|
||||
|
||||
|
||||
from archivebox.config import DATA_DIR, CONSTANTS, SEARCH_BACKEND_CONFIG
|
||||
from archivebox.misc.util import (
|
||||
scheme,
|
||||
enforce_types,
|
||||
ExtendedEncoder,
|
||||
)
|
||||
from ..misc.logging import stderr
|
||||
from ..config.legacy import (
|
||||
TIMEOUT,
|
||||
URL_DENYLIST_PTN,
|
||||
URL_ALLOWLIST_PTN,
|
||||
OUTPUT_PERMISSIONS
|
||||
)
|
||||
from archivebox.config import DATA_DIR, CONSTANTS, ARCHIVING_CONFIG, STORAGE_CONFIG, SEARCH_BACKEND_CONFIG
|
||||
from archivebox.misc.util import scheme, enforce_types, ExtendedEncoder
|
||||
from archivebox.misc.logging import stderr
|
||||
from archivebox.config.legacy import URL_DENYLIST_PTN, URL_ALLOWLIST_PTN
|
||||
|
||||
from ..logging_util import (
|
||||
TimedProgress,
|
||||
log_indexing_process_started,
|
||||
|
@ -119,7 +111,7 @@ def merge_links(a: Link, b: Link) -> Link:
|
|||
|
||||
@enforce_types
|
||||
def validate_links(links: Iterable[Link]) -> List[Link]:
|
||||
timer = TimedProgress(TIMEOUT * 4)
|
||||
timer = TimedProgress(ARCHIVING_CONFIG.TIMEOUT * 4)
|
||||
try:
|
||||
links = archivable_links(links) # remove chrome://, about:, mailto: etc.
|
||||
links = sorted_links(links) # deterministically sort the links based on timestamp, url
|
||||
|
@ -211,7 +203,7 @@ def lowest_uniq_timestamp(used_timestamps: OrderedDict, timestamp: str) -> str:
|
|||
@enforce_types
|
||||
def timed_index_update(out_path: Path):
|
||||
log_indexing_started(out_path)
|
||||
timer = TimedProgress(TIMEOUT * 2, prefix=' ')
|
||||
timer = TimedProgress(ARCHIVING_CONFIG.TIMEOUT * 2, prefix=' ')
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
|
@ -230,14 +222,14 @@ def write_main_index(links: List[Link], out_dir: Path=DATA_DIR, created_by_id: i
|
|||
try:
|
||||
with timed_index_update(CONSTANTS.DATABASE_FILE):
|
||||
write_sql_main_index(links, out_dir=out_dir, created_by_id=created_by_id)
|
||||
os.chmod(CONSTANTS.DATABASE_FILE, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes
|
||||
os.chmod(CONSTANTS.DATABASE_FILE, int(STORAGE_CONFIG.OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes
|
||||
|
||||
except (KeyboardInterrupt, SystemExit):
|
||||
stderr('[!] Warning: Still writing index to disk...', color='lightyellow')
|
||||
stderr(' Run archivebox init to fix any inconsistencies from an ungraceful exit.')
|
||||
with timed_index_update(CONSTANTS.DATABASE_FILE):
|
||||
write_sql_main_index(links, out_dir=out_dir, created_by_id=created_by_id)
|
||||
os.chmod(CONSTANTS.DATABASE_FILE, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes
|
||||
os.chmod(CONSTANTS.DATABASE_FILE, int(STORAGE_CONFIG.OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes
|
||||
raise SystemExit(0)
|
||||
|
||||
log_indexing_process_finished()
|
||||
|
|
|
@ -8,9 +8,7 @@ from typing import List, Optional, Iterator, Mapping
|
|||
from django.utils.html import format_html, mark_safe # type: ignore
|
||||
from django.core.cache import cache
|
||||
|
||||
from .schema import Link
|
||||
from archivebox.misc.system import atomic_write
|
||||
from ..logging_util import printable_filesize
|
||||
from archivebox.misc.util import (
|
||||
enforce_types,
|
||||
ts_to_date_str,
|
||||
|
@ -18,11 +16,11 @@ from archivebox.misc.util import (
|
|||
htmlencode,
|
||||
urldecode,
|
||||
)
|
||||
from archivebox.config.legacy import (
|
||||
SAVE_ARCHIVE_DOT_ORG,
|
||||
PREVIEW_ORIGINALS,
|
||||
)
|
||||
from archivebox.config import CONSTANTS, DATA_DIR, VERSION, SHELL_CONFIG, SERVER_CONFIG
|
||||
from archivebox.plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG
|
||||
|
||||
from .schema import Link
|
||||
from ..logging_util import printable_filesize
|
||||
|
||||
MAIN_INDEX_TEMPLATE = 'static_index.html'
|
||||
MINIMAL_INDEX_TEMPLATE = 'minimal_index.html'
|
||||
|
@ -102,8 +100,8 @@ def link_details_template(link: Link) -> str:
|
|||
'status': 'archived' if link.is_archived else 'not yet archived',
|
||||
'status_color': 'success' if link.is_archived else 'danger',
|
||||
'oldest_archive_date': ts_to_date_str(link.oldest_archive_date),
|
||||
'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG,
|
||||
'PREVIEW_ORIGINALS': PREVIEW_ORIGINALS,
|
||||
'SAVE_ARCHIVE_DOT_ORG': ARCHIVEDOTORG_CONFIG.SAVE_ARCHIVE_DOT_ORG,
|
||||
'PREVIEW_ORIGINALS': SERVER_CONFIG.PREVIEW_ORIGINALS,
|
||||
})
|
||||
|
||||
@enforce_types
|
||||
|
|
|
@ -17,12 +17,13 @@ from dataclasses import dataclass, asdict, field, fields
|
|||
|
||||
from django.utils.functional import cached_property
|
||||
|
||||
from archivebox.config.constants import ARCHIVE_DIR, ARCHIVE_DIR_NAME
|
||||
from archivebox.config import ARCHIVE_DIR, CONSTANTS
|
||||
|
||||
from plugins_extractor.favicon.apps import FAVICON_CONFIG
|
||||
|
||||
from archivebox.misc.system import get_dir_size
|
||||
from archivebox.misc.util import ts_to_date_str, parse_date
|
||||
from archivebox.misc.logging import stderr, ANSI
|
||||
|
||||
|
||||
class ArchiveError(Exception):
|
||||
|
@ -67,7 +68,6 @@ class ArchiveResult:
|
|||
|
||||
@classmethod
|
||||
def guess_ts(_cls, dict_info):
|
||||
from archivebox.misc.util import parse_date
|
||||
parsed_timestamp = parse_date(dict_info["timestamp"])
|
||||
start_ts = parsed_timestamp
|
||||
end_ts = parsed_timestamp + timedelta(seconds=int(dict_info["duration"]))
|
||||
|
@ -75,8 +75,6 @@ class ArchiveResult:
|
|||
|
||||
@classmethod
|
||||
def from_json(cls, json_info, guess=False):
|
||||
from archivebox.misc.util import parse_date
|
||||
|
||||
info = {
|
||||
key: val
|
||||
for key, val in json_info.items()
|
||||
|
@ -160,7 +158,6 @@ class Link:
|
|||
return float(self.timestamp) > float(other.timestamp)
|
||||
|
||||
def typecheck(self) -> None:
|
||||
from ..config.legacy import stderr, ANSI
|
||||
try:
|
||||
assert self.schema == self.__class__.__name__
|
||||
assert isinstance(self.timestamp, str) and self.timestamp
|
||||
|
@ -231,8 +228,6 @@ class Link:
|
|||
|
||||
@classmethod
|
||||
def from_json(cls, json_info, guess=False):
|
||||
from archivebox.misc.util import parse_date
|
||||
|
||||
info = {
|
||||
key: val
|
||||
for key, val in json_info.items()
|
||||
|
@ -287,7 +282,7 @@ class Link:
|
|||
|
||||
@property
|
||||
def archive_path(self) -> str:
|
||||
return '{}/{}'.format(ARCHIVE_DIR_NAME, self.timestamp)
|
||||
return '{}/{}'.format(CONSTANTS.ARCHIVE_DIR_NAME, self.timestamp)
|
||||
|
||||
@property
|
||||
def archive_size(self) -> float:
|
||||
|
|
|
@ -604,14 +604,14 @@ def printable_folder_status(name: str, folder: Dict) -> str:
|
|||
path = pretty_path(folder['path'])
|
||||
|
||||
return ' '.join((
|
||||
ANSI[color],
|
||||
f'[{color}]',
|
||||
symbol,
|
||||
ANSI['reset'],
|
||||
'[/]',
|
||||
name.ljust(21),
|
||||
num_files.ljust(14),
|
||||
ANSI[color],
|
||||
f'[{color}]',
|
||||
note.ljust(8),
|
||||
ANSI['reset'],
|
||||
'[/]',
|
||||
path.ljust(76),
|
||||
))
|
||||
|
||||
|
|
|
@ -69,8 +69,6 @@ from archivebox.misc.checks import check_data_folder
|
|||
from archivebox.config.legacy import (
|
||||
write_config_file,
|
||||
load_all_config,
|
||||
CONFIG,
|
||||
USER_CONFIG,
|
||||
get_real_name,
|
||||
)
|
||||
from .logging_util import (
|
||||
|
@ -85,7 +83,6 @@ from .logging_util import (
|
|||
printable_folders,
|
||||
printable_filesize,
|
||||
printable_folder_status,
|
||||
printable_dependency_version,
|
||||
)
|
||||
|
||||
|
||||
|
@ -167,7 +164,9 @@ def version(quiet: bool=False,
|
|||
out_dir: Path=DATA_DIR) -> None:
|
||||
"""Print the ArchiveBox version and dependency information"""
|
||||
|
||||
from rich import print
|
||||
from rich.console import Console
|
||||
console = Console()
|
||||
print = console.print
|
||||
print(VERSION)
|
||||
if quiet:
|
||||
return
|
||||
|
@ -227,21 +226,27 @@ def version(quiet: bool=False,
|
|||
loaded_bin = binary
|
||||
raise
|
||||
provider_summary = f'[dark_sea_green3]{loaded_bin.binprovider.name.ljust(10)}[/dark_sea_green3]' if loaded_bin.binprovider else '[grey23]not found[/grey23]'
|
||||
print('', '[green]√[/green]' if loaded_bin.is_valid else '[red]X[/red]', '', loaded_bin.name.ljust(21), str(loaded_bin.version).ljust(12), provider_summary, loaded_bin.abspath or f'[red]{err}[/red]')
|
||||
if loaded_bin.abspath:
|
||||
abspath = str(loaded_bin.abspath).replace(str(Path('~').expanduser()), '~')
|
||||
if ' ' in abspath:
|
||||
abspath = abspath.replace(' ', r'\ ')
|
||||
else:
|
||||
abspath = f'[red]{err}[/red]'
|
||||
print('', '[green]√[/green]' if loaded_bin.is_valid else '[red]X[/red]', '', loaded_bin.name.ljust(21), str(loaded_bin.version).ljust(12), provider_summary, abspath, overflow='ignore', crop=False)
|
||||
|
||||
print()
|
||||
print('[white][i] Source-code locations:[/white]')
|
||||
print('[deep_sky_blue3][i] Source-code locations:[/deep_sky_blue3]')
|
||||
for name, path in CONSTANTS.CODE_LOCATIONS.items():
|
||||
print(printable_folder_status(name, path))
|
||||
print(printable_folder_status(name, path), overflow='ignore', crop=False)
|
||||
|
||||
print()
|
||||
if CONSTANTS.DATABASE_FILE.exists() or CONSTANTS.ARCHIVE_DIR.exists() or CONSTANTS.CONFIG_FILE.exists():
|
||||
print('[white][i] Data locations:[/]')
|
||||
print('[bright_yellow][i] Data locations:[/bright_yellow]')
|
||||
for name, path in CONSTANTS.DATA_LOCATIONS.items():
|
||||
print(printable_folder_status(name, path))
|
||||
print(printable_folder_status(name, path), overflow='ignore', crop=False)
|
||||
else:
|
||||
print()
|
||||
print('[white][i] Data locations:[/white] (not in a data directory)')
|
||||
print('[red][i] Data locations:[/red] (not in a data directory)')
|
||||
|
||||
print()
|
||||
|
||||
|
@ -984,6 +989,8 @@ def config(config_options_str: Optional[str]=None,
|
|||
elif config_options_str:
|
||||
config_options = config_options_str.split('\n')
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
config_options = config_options or []
|
||||
|
||||
no_args = not (get or set or reset or config_options)
|
||||
|
@ -992,15 +999,15 @@ def config(config_options_str: Optional[str]=None,
|
|||
if get or no_args:
|
||||
if config_options:
|
||||
config_options = [get_real_name(key) for key in config_options]
|
||||
matching_config = {key: CONFIG[key] for key in config_options if key in CONFIG}
|
||||
failed_config = [key for key in config_options if key not in CONFIG]
|
||||
matching_config = {key: settings.FLAT_CONFIG[key] for key in config_options if key in settings.FLAT_CONFIG}
|
||||
failed_config = [key for key in config_options if key not in settings.FLAT_CONFIG]
|
||||
if failed_config:
|
||||
stderr()
|
||||
stderr('[X] These options failed to get', color='red')
|
||||
stderr(' {}'.format('\n '.join(config_options)))
|
||||
raise SystemExit(1)
|
||||
else:
|
||||
matching_config = CONFIG
|
||||
matching_config = settings.FLAT_CONFIG
|
||||
|
||||
print(printable_config(matching_config))
|
||||
raise SystemExit(not matching_config)
|
||||
|
@ -1021,20 +1028,20 @@ def config(config_options_str: Optional[str]=None,
|
|||
if key != raw_key:
|
||||
stderr(f'[i] Note: The config option {raw_key} has been renamed to {key}, please use the new name going forwards.', color='lightyellow')
|
||||
|
||||
if key in CONFIG:
|
||||
if key in settings.FLAT_CONFIG:
|
||||
new_config[key] = val.strip()
|
||||
else:
|
||||
failed_options.append(line)
|
||||
|
||||
if new_config:
|
||||
before = CONFIG
|
||||
before = settings.FLAT_CONFIG
|
||||
matching_config = write_config_file(new_config, out_dir=DATA_DIR)
|
||||
after = load_all_config()
|
||||
print(printable_config(matching_config))
|
||||
|
||||
side_effect_changes = {}
|
||||
for key, val in after.items():
|
||||
if key in USER_CONFIG and (before[key] != after[key]) and (key not in matching_config):
|
||||
if key in settings.FLAT_CONFIG and (before[key] != after[key]) and (key not in matching_config):
|
||||
side_effect_changes[key] = after[key]
|
||||
|
||||
if side_effect_changes:
|
||||
|
|
|
@ -14,7 +14,7 @@ from subprocess import _mswindows, PIPE, Popen, CalledProcessError, CompletedPro
|
|||
from crontab import CronTab
|
||||
from atomicwrites import atomic_write as lib_atomic_write
|
||||
|
||||
from archivebox.config.legacy import OUTPUT_PERMISSIONS, DIR_OUTPUT_PERMISSIONS, ENFORCE_ATOMIC_WRITES
|
||||
from archivebox.config import STORAGE_CONFIG
|
||||
from archivebox.misc.util import enforce_types, ExtendedEncoder
|
||||
|
||||
|
||||
|
@ -94,7 +94,7 @@ def atomic_write(path: Union[Path, str], contents: Union[dict, str, bytes], over
|
|||
elif isinstance(contents, (bytes, str)):
|
||||
f.write(contents)
|
||||
except OSError as e:
|
||||
if ENFORCE_ATOMIC_WRITES:
|
||||
if STORAGE_CONFIG.ENFORCE_ATOMIC_WRITES:
|
||||
print(f"[X] OSError: Failed to write {path} with fcntl.F_FULLFSYNC. ({e})")
|
||||
print(" You can store the archive/ subfolder on a hard drive or network share that doesn't support support syncronous writes,")
|
||||
print(" but the main folder containing the index.sqlite3 and ArchiveBox.conf files must be on a filesystem that supports FSYNC.")
|
||||
|
@ -108,7 +108,7 @@ def atomic_write(path: Union[Path, str], contents: Union[dict, str, bytes], over
|
|||
f.write(contents)
|
||||
|
||||
# set file permissions
|
||||
os.chmod(path, int(OUTPUT_PERMISSIONS, base=8))
|
||||
os.chmod(path, int(STORAGE_CONFIG.OUTPUT_PERMISSIONS, base=8))
|
||||
|
||||
@enforce_types
|
||||
def chmod_file(path: str, cwd: str='.') -> None:
|
||||
|
@ -120,14 +120,14 @@ def chmod_file(path: str, cwd: str='.') -> None:
|
|||
|
||||
if not root.is_dir():
|
||||
# path is just a plain file
|
||||
os.chmod(root, int(OUTPUT_PERMISSIONS, base=8))
|
||||
os.chmod(root, int(STORAGE_CONFIG.OUTPUT_PERMISSIONS, base=8))
|
||||
else:
|
||||
for subpath in Path(path).glob('**/*'):
|
||||
if subpath.is_dir():
|
||||
# directories need execute permissions to be able to list contents
|
||||
os.chmod(subpath, int(DIR_OUTPUT_PERMISSIONS, base=8))
|
||||
os.chmod(subpath, int(STORAGE_CONFIG.DIR_OUTPUT_PERMISSIONS, base=8))
|
||||
else:
|
||||
os.chmod(subpath, int(OUTPUT_PERMISSIONS, base=8))
|
||||
os.chmod(subpath, int(STORAGE_CONFIG.OUTPUT_PERMISSIONS, base=8))
|
||||
|
||||
|
||||
@enforce_types
|
||||
|
@ -230,31 +230,3 @@ class suppress_output(object):
|
|||
if self.stderr:
|
||||
os.dup2(self.real_stderr, 2)
|
||||
os.close(self.null_stderr)
|
||||
|
||||
|
||||
def get_system_user() -> str:
|
||||
# some host OS's are unable to provide a username (k3s, Windows), making this complicated
|
||||
# uid 999 is especially problematic and breaks many attempts
|
||||
SYSTEM_USER = None
|
||||
FALLBACK_USER_PLACHOLDER = f'user_{os.getuid()}'
|
||||
|
||||
# Option 1
|
||||
try:
|
||||
import pwd
|
||||
SYSTEM_USER = SYSTEM_USER or pwd.getpwuid(os.geteuid()).pw_name
|
||||
except (ModuleNotFoundError, Exception):
|
||||
pass
|
||||
|
||||
# Option 2
|
||||
try:
|
||||
SYSTEM_USER = SYSTEM_USER or getpass.getuser()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Option 3
|
||||
try:
|
||||
SYSTEM_USER = SYSTEM_USER or os.getlogin()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return SYSTEM_USER or FALLBACK_USER_PLACHOLDER
|
||||
|
|
|
@ -9,15 +9,15 @@ from configparser import ConfigParser
|
|||
from pocket import Pocket
|
||||
|
||||
from archivebox.config import CONSTANTS
|
||||
|
||||
from ..index.schema import Link
|
||||
from archivebox.misc.util import enforce_types
|
||||
from archivebox.misc.system import atomic_write
|
||||
from ..config.legacy import (
|
||||
from archivebox.config.legacy import (
|
||||
POCKET_CONSUMER_KEY,
|
||||
POCKET_ACCESS_TOKENS,
|
||||
)
|
||||
|
||||
from ..index.schema import Link
|
||||
|
||||
|
||||
COUNT_PER_PAGE = 500
|
||||
API_DB_PATH = CONSTANTS.SOURCES_DIR / 'pocket_api.db'
|
||||
|
|
|
@ -9,12 +9,11 @@ from typing import IO, Iterable, Optional
|
|||
from configparser import ConfigParser
|
||||
|
||||
from archivebox.config import CONSTANTS
|
||||
|
||||
from ..index.schema import Link
|
||||
from archivebox.misc.util import enforce_types
|
||||
from archivebox.misc.system import atomic_write
|
||||
from ..config.legacy import READWISE_READER_TOKENS
|
||||
from archivebox.config.legacy import READWISE_READER_TOKENS
|
||||
|
||||
from ..index.schema import Link
|
||||
|
||||
API_DB_PATH = CONSTANTS.SOURCES_DIR / "readwise_reader_api.db"
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue