mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 22:54:27 -04:00
use new mypy TypedDict to manage config typing
This commit is contained in:
parent
461a8b0d71
commit
11fd436305
5 changed files with 240 additions and 93 deletions
1
Pipfile
1
Pipfile
|
@ -19,6 +19,7 @@ youtube-dl = "*"
|
||||||
python-crontab = "*"
|
python-crontab = "*"
|
||||||
croniter = "*"
|
croniter = "*"
|
||||||
ipython = "*"
|
ipython = "*"
|
||||||
|
mypy-extensions = "*"
|
||||||
|
|
||||||
[requires]
|
[requires]
|
||||||
python_version = "3.7"
|
python_version = "3.7"
|
||||||
|
|
11
Pipfile.lock
generated
11
Pipfile.lock
generated
|
@ -1,7 +1,7 @@
|
||||||
{
|
{
|
||||||
"_meta": {
|
"_meta": {
|
||||||
"hash": {
|
"hash": {
|
||||||
"sha256": "b0dd2536174ddcdc8fe711dd92b577f270c5a34cdb3bcb06cc70842358c80fe2"
|
"sha256": "ad0264907c26fe4227c5c0b6376733422b4e506a9eef057e0a563428373046dd"
|
||||||
},
|
},
|
||||||
"pipfile-spec": 6,
|
"pipfile-spec": 6,
|
||||||
"requires": {
|
"requires": {
|
||||||
|
@ -100,6 +100,14 @@
|
||||||
],
|
],
|
||||||
"version": "==0.13.3"
|
"version": "==0.13.3"
|
||||||
},
|
},
|
||||||
|
"mypy-extensions": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:37e0e956f41369209a3d5f34580150bcacfabaa57b33a15c0b25f4b5725e0812",
|
||||||
|
"sha256:b16cabe759f55e3409a7d231ebd2841378fb0c27a5d1994719e340e4f429ac3e"
|
||||||
|
],
|
||||||
|
"index": "pypi",
|
||||||
|
"version": "==0.4.1"
|
||||||
|
},
|
||||||
"parso": {
|
"parso": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:17cc2d7a945eb42c3569d4564cdf49bde221bc2b552af3eca9c1aad517dcdd33",
|
"sha256:17cc2d7a945eb42c3569d4564cdf49bde221bc2b552af3eca9c1aad517dcdd33",
|
||||||
|
@ -306,6 +314,7 @@
|
||||||
"sha256:37e0e956f41369209a3d5f34580150bcacfabaa57b33a15c0b25f4b5725e0812",
|
"sha256:37e0e956f41369209a3d5f34580150bcacfabaa57b33a15c0b25f4b5725e0812",
|
||||||
"sha256:b16cabe759f55e3409a7d231ebd2841378fb0c27a5d1994719e340e4f429ac3e"
|
"sha256:b16cabe759f55e3409a7d231ebd2841378fb0c27a5d1994719e340e4f429ac3e"
|
||||||
],
|
],
|
||||||
|
"index": "pypi",
|
||||||
"version": "==0.4.1"
|
"version": "==0.4.1"
|
||||||
},
|
},
|
||||||
"parso": {
|
"parso": {
|
||||||
|
|
|
@ -9,10 +9,17 @@ import getpass
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
from typing import Any, Optional, Dict, Tuple
|
from typing import Optional, Type, Tuple
|
||||||
from subprocess import run, PIPE, DEVNULL
|
from subprocess import run, PIPE, DEVNULL
|
||||||
|
|
||||||
CONFIG_TYPE = Dict[str, Any]
|
from .config_stubs import (
|
||||||
|
SimpleConfigValueDict,
|
||||||
|
ConfigValue,
|
||||||
|
ConfigDict,
|
||||||
|
ConfigDefaultValue,
|
||||||
|
ConfigDefaultDict,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# ******************************************************************************
|
# ******************************************************************************
|
||||||
# Documentation: https://github.com/pirate/ArchiveBox/wiki/Configuration
|
# Documentation: https://github.com/pirate/ArchiveBox/wiki/Configuration
|
||||||
|
@ -22,13 +29,13 @@ CONFIG_TYPE = Dict[str, Any]
|
||||||
|
|
||||||
################################# User Config ##################################
|
################################# User Config ##################################
|
||||||
|
|
||||||
SHELL_CONFIG_DEFAULTS = {
|
SHELL_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
||||||
'IS_TTY': {'type': bool, 'default': lambda _: sys.stdout.isatty()},
|
'IS_TTY': {'type': bool, 'default': lambda _: sys.stdout.isatty()},
|
||||||
'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']},
|
'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']},
|
||||||
'SHOW_PROGRESS': {'type': bool, 'default': lambda c: c['IS_TTY']},
|
'SHOW_PROGRESS': {'type': bool, 'default': lambda c: c['IS_TTY']},
|
||||||
}
|
}
|
||||||
|
|
||||||
ARCHIVE_CONFIG_DEFAULTS = {
|
ARCHIVE_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
||||||
'OUTPUT_DIR': {'type': str, 'default': None},
|
'OUTPUT_DIR': {'type': str, 'default': None},
|
||||||
'ONLY_NEW': {'type': bool, 'default': False},
|
'ONLY_NEW': {'type': bool, 'default': False},
|
||||||
'TIMEOUT': {'type': int, 'default': 60},
|
'TIMEOUT': {'type': int, 'default': 60},
|
||||||
|
@ -38,7 +45,7 @@ ARCHIVE_CONFIG_DEFAULTS = {
|
||||||
'URL_BLACKLIST': {'type': str, 'default': None},
|
'URL_BLACKLIST': {'type': str, 'default': None},
|
||||||
}
|
}
|
||||||
|
|
||||||
ARCHIVE_METHOD_TOGGLES_DEFAULTS = {
|
ARCHIVE_METHOD_TOGGLES_DEFAULTS: ConfigDefaultDict = {
|
||||||
'SAVE_TITLE': {'type': bool, 'default': True, 'aliases': ('FETCH_TITLE',)},
|
'SAVE_TITLE': {'type': bool, 'default': True, 'aliases': ('FETCH_TITLE',)},
|
||||||
'SAVE_FAVICON': {'type': bool, 'default': True, 'aliases': ('FETCH_FAVICON',)},
|
'SAVE_FAVICON': {'type': bool, 'default': True, 'aliases': ('FETCH_FAVICON',)},
|
||||||
'SAVE_WGET': {'type': bool, 'default': True, 'aliases': ('FETCH_WGET',)},
|
'SAVE_WGET': {'type': bool, 'default': True, 'aliases': ('FETCH_WGET',)},
|
||||||
|
@ -52,8 +59,8 @@ ARCHIVE_METHOD_TOGGLES_DEFAULTS = {
|
||||||
'SAVE_ARCHIVE_DOT_ORG': {'type': bool, 'default': True, 'aliases': ('SUBMIT_ARCHIVE_DOT_ORG',)},
|
'SAVE_ARCHIVE_DOT_ORG': {'type': bool, 'default': True, 'aliases': ('SUBMIT_ARCHIVE_DOT_ORG',)},
|
||||||
}
|
}
|
||||||
|
|
||||||
ARCHIVE_METHOD_OPTIONS_DEFAULTS = {
|
ARCHIVE_METHOD_OPTIONS_DEFAULTS: ConfigDefaultDict = {
|
||||||
'RESOLUTION': {'type': str, 'default': '1440,2000'},
|
'RESOLUTION': {'type': str, 'default': '1440,2000', 'aliases': ('SCREENSHOT_RESOLUTION',)},
|
||||||
'GIT_DOMAINS': {'type': str, 'default': 'github.com,bitbucket.org,gitlab.com'},
|
'GIT_DOMAINS': {'type': str, 'default': 'github.com,bitbucket.org,gitlab.com'},
|
||||||
'CHECK_SSL_VALIDITY': {'type': bool, 'default': True},
|
'CHECK_SSL_VALIDITY': {'type': bool, 'default': True},
|
||||||
|
|
||||||
|
@ -67,7 +74,7 @@ ARCHIVE_METHOD_OPTIONS_DEFAULTS = {
|
||||||
'CHROME_SANDBOX': {'type': bool, 'default': True},
|
'CHROME_SANDBOX': {'type': bool, 'default': True},
|
||||||
}
|
}
|
||||||
|
|
||||||
DEPENDENCY_CONFIG_DEFAULTS = {
|
DEPENDENCY_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
||||||
'USE_CURL': {'type': bool, 'default': True},
|
'USE_CURL': {'type': bool, 'default': True},
|
||||||
'USE_WGET': {'type': bool, 'default': True},
|
'USE_WGET': {'type': bool, 'default': True},
|
||||||
'USE_GIT': {'type': bool, 'default': True},
|
'USE_GIT': {'type': bool, 'default': True},
|
||||||
|
@ -116,7 +123,7 @@ FAVICON_FILENAME = 'favicon.ico'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
DERIVED_CONFIG_DEFAULTS = {
|
DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
||||||
'TERM_WIDTH': {'default': lambda c: lambda: shutil.get_terminal_size((100, 10)).columns},
|
'TERM_WIDTH': {'default': lambda c: lambda: shutil.get_terminal_size((100, 10)).columns},
|
||||||
'USER': {'default': lambda c: getpass.getuser() or os.getlogin()},
|
'USER': {'default': lambda c: getpass.getuser() or os.getlogin()},
|
||||||
'ANSI': {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}},
|
'ANSI': {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}},
|
||||||
|
@ -131,7 +138,7 @@ DERIVED_CONFIG_DEFAULTS = {
|
||||||
'SOURCES_DIR': {'default': lambda c: os.path.join(c['OUTPUT_DIR'], SOURCES_DIR_NAME)},
|
'SOURCES_DIR': {'default': lambda c: os.path.join(c['OUTPUT_DIR'], SOURCES_DIR_NAME)},
|
||||||
'LOGS_DIR': {'default': lambda c: os.path.join(c['OUTPUT_DIR'], LOGS_DIR_NAME)},
|
'LOGS_DIR': {'default': lambda c: os.path.join(c['OUTPUT_DIR'], LOGS_DIR_NAME)},
|
||||||
'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and os.path.abspath(os.path.expanduser(c['COOKIES_FILE']))},
|
'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and os.path.abspath(os.path.expanduser(c['COOKIES_FILE']))},
|
||||||
'CHROME_USER_DATA_DIR': {'default': lambda c: c['CHROME_USER_DATA_DIR'] and os.path.abspath(os.path.expanduser(c['CHROME_USER_DATA_DIR']))},
|
'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (os.path.abspath(os.path.expanduser(c['CHROME_USER_DATA_DIR'])) or None)},
|
||||||
'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'], re.IGNORECASE)},
|
'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'], re.IGNORECASE)},
|
||||||
|
|
||||||
'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0]},
|
'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0]},
|
||||||
|
@ -168,7 +175,6 @@ DERIVED_CONFIG_DEFAULTS = {
|
||||||
'USE_CHROME': {'default': lambda c: c['USE_CHROME'] and (c['SAVE_PDF'] or c['SAVE_SCREENSHOT'] or c['SAVE_DOM'])},
|
'USE_CHROME': {'default': lambda c: c['USE_CHROME'] and (c['SAVE_PDF'] or c['SAVE_SCREENSHOT'] or c['SAVE_DOM'])},
|
||||||
'CHROME_BINARY': {'default': lambda c: c['CHROME_BINARY'] if c['CHROME_BINARY'] else find_chrome_binary()},
|
'CHROME_BINARY': {'default': lambda c: c['CHROME_BINARY'] if c['CHROME_BINARY'] else find_chrome_binary()},
|
||||||
'CHROME_VERSION': {'default': lambda c: bin_version(c['CHROME_BINARY']) if c['USE_CHROME'] else None},
|
'CHROME_VERSION': {'default': lambda c: bin_version(c['CHROME_BINARY']) if c['USE_CHROME'] else None},
|
||||||
'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (c['CHROME_USER_DATA_DIR'] or None)},
|
|
||||||
'SAVE_PDF': {'default': lambda c: c['USE_CHROME']},
|
'SAVE_PDF': {'default': lambda c: c['USE_CHROME']},
|
||||||
'SAVE_SCREENSHOT': {'default': lambda c: c['USE_CHROME']},
|
'SAVE_SCREENSHOT': {'default': lambda c: c['USE_CHROME']},
|
||||||
'SAVE_DOM': {'default': lambda c: c['USE_CHROME']},
|
'SAVE_DOM': {'default': lambda c: c['USE_CHROME']},
|
||||||
|
@ -184,7 +190,12 @@ DERIVED_CONFIG_DEFAULTS = {
|
||||||
|
|
||||||
################################### Helpers ####################################
|
################################### Helpers ####################################
|
||||||
|
|
||||||
def get_config_val(key: str, default: Any=None, type=None, aliases: Optional[Tuple[str, ...]]=None, config: CONFIG_TYPE=None) -> Any:
|
def load_config_val(key: str,
|
||||||
|
default: ConfigDefaultValue=None,
|
||||||
|
type: Optional[Type]=None,
|
||||||
|
aliases: Optional[Tuple[str, ...]]=None,
|
||||||
|
config: Optional[ConfigDict]=None) -> ConfigValue:
|
||||||
|
|
||||||
# check the canonical option name first, then check any older aliases
|
# check the canonical option name first, then check any older aliases
|
||||||
possible_env_keys = (key, *(aliases or ()))
|
possible_env_keys = (key, *(aliases or ()))
|
||||||
for key in possible_env_keys:
|
for key in possible_env_keys:
|
||||||
|
@ -193,7 +204,8 @@ def get_config_val(key: str, default: Any=None, type=None, aliases: Optional[Tup
|
||||||
break
|
break
|
||||||
|
|
||||||
if type is None or val is None:
|
if type is None or val is None:
|
||||||
if hasattr(default, '__call__'):
|
if callable(default):
|
||||||
|
assert isinstance(config, dict)
|
||||||
return default(config)
|
return default(config)
|
||||||
|
|
||||||
return default
|
return default
|
||||||
|
@ -218,16 +230,22 @@ def get_config_val(key: str, default: Any=None, type=None, aliases: Optional[Tup
|
||||||
|
|
||||||
raise Exception('Config values can only be str, bool, or int')
|
raise Exception('Config values can only be str, bool, or int')
|
||||||
|
|
||||||
def load_config(defaults: dict, config: Optional[CONFIG_TYPE]=None) -> CONFIG_TYPE:
|
def load_config(defaults: ConfigDefaultDict, config: Optional[ConfigDict]=None) -> ConfigDict:
|
||||||
config = {**(config or {})}
|
extended_config: ConfigDict = config.copy() if config else {}
|
||||||
for key, default in defaults.items():
|
for key, default in defaults.items():
|
||||||
try:
|
try:
|
||||||
config[key] = get_config_val(key, **default, config=config)
|
extended_config[key] = load_config_val(
|
||||||
|
key,
|
||||||
|
default=default['default'],
|
||||||
|
type=default.get('type'),
|
||||||
|
aliases=default.get('aliases'),
|
||||||
|
config=extended_config,
|
||||||
|
)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
stderr()
|
stderr()
|
||||||
stderr(f'[X] Error while loading configuration value: {key}', color='red', config=config)
|
stderr(f'[X] Error while loading configuration value: {key}', color='red', config=extended_config)
|
||||||
stderr(' {}: {}'.format(e.__class__.__name__, e))
|
stderr(' {}: {}'.format(e.__class__.__name__, e))
|
||||||
stderr()
|
stderr()
|
||||||
stderr(' Check your config for mistakes and try again (your archive data is unaffected).')
|
stderr(' Check your config for mistakes and try again (your archive data is unaffected).')
|
||||||
|
@ -237,27 +255,27 @@ def load_config(defaults: dict, config: Optional[CONFIG_TYPE]=None) -> CONFIG_TY
|
||||||
stderr()
|
stderr()
|
||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
|
||||||
return config
|
return extended_config
|
||||||
|
|
||||||
def stderr(*args, color: Optional[str]=None, config: Optional[CONFIG_TYPE]=None) -> None:
|
def stderr(*args, color: Optional[str]=None, config: Optional[ConfigDict]=None) -> None:
|
||||||
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
||||||
|
|
||||||
if color:
|
if color:
|
||||||
strs = (ansi[color], ' '.join(str(a) for a in args), ansi['reset'], '\n')
|
strs = [ansi[color], ' '.join(str(a) for a in args), ansi['reset'], '\n']
|
||||||
else:
|
else:
|
||||||
strs = (' '.join(str(a) for a in args), '\n')
|
strs = [' '.join(str(a) for a in args), '\n']
|
||||||
|
|
||||||
sys.stderr.write(''.join(strs))
|
sys.stderr.write(''.join(strs))
|
||||||
|
|
||||||
def bin_version(binary: str) -> Optional[str]:
|
def bin_version(binary: Optional[str]) -> Optional[str]:
|
||||||
"""check the presence and return valid version line of a specified binary"""
|
"""check the presence and return valid version line of a specified binary"""
|
||||||
|
|
||||||
binary = os.path.expanduser(binary)
|
abspath = bin_path(binary)
|
||||||
try:
|
if not abspath:
|
||||||
if not shutil.which(binary):
|
return None
|
||||||
raise Exception
|
|
||||||
|
|
||||||
version_str = run([binary, "--version"], stdout=PIPE).stdout.strip().decode()
|
try:
|
||||||
|
version_str = run([abspath, "--version"], stdout=PIPE).stdout.strip().decode()
|
||||||
# take first 3 columns of first line of version info
|
# take first 3 columns of first line of version info
|
||||||
return ' '.join(version_str.split('\n')[0].strip().split()[:3])
|
return ' '.join(version_str.split('\n')[0].strip().split()[:3])
|
||||||
except Exception:
|
except Exception:
|
||||||
|
@ -270,13 +288,19 @@ def bin_version(binary: str) -> Optional[str]:
|
||||||
# stderr()
|
# stderr()
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def bin_hash(binary: str) -> Optional[str]:
|
def bin_path(binary: Optional[str]) -> Optional[str]:
|
||||||
bin_path = binary and shutil.which(os.path.expanduser(binary))
|
if binary is None:
|
||||||
if not bin_path:
|
return None
|
||||||
|
|
||||||
|
return shutil.which(os.path.expanduser(binary)) or binary
|
||||||
|
|
||||||
|
def bin_hash(binary: Optional[str]) -> Optional[str]:
|
||||||
|
abs_path = bin_path(binary)
|
||||||
|
if abs_path is None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
file_hash = md5()
|
file_hash = md5()
|
||||||
with io.open(bin_path, mode='rb') as f:
|
with io.open(abs_path, mode='rb') as f:
|
||||||
for chunk in iter(lambda: f.read(io.DEFAULT_BUFFER_SIZE), b''):
|
for chunk in iter(lambda: f.read(io.DEFAULT_BUFFER_SIZE), b''):
|
||||||
file_hash.update(chunk)
|
file_hash.update(chunk)
|
||||||
|
|
||||||
|
@ -340,7 +364,7 @@ def wget_supports_compression(config):
|
||||||
]
|
]
|
||||||
return not run(cmd, stdout=DEVNULL, stderr=DEVNULL).returncode
|
return not run(cmd, stdout=DEVNULL, stderr=DEVNULL).returncode
|
||||||
|
|
||||||
def get_code_locations(config: CONFIG_TYPE) -> Dict[str, CONFIG_TYPE]:
|
def get_code_locations(config: ConfigDict) -> SimpleConfigValueDict:
|
||||||
return {
|
return {
|
||||||
'REPO_DIR': {
|
'REPO_DIR': {
|
||||||
'path': os.path.abspath(config['REPO_DIR']),
|
'path': os.path.abspath(config['REPO_DIR']),
|
||||||
|
@ -364,21 +388,22 @@ def get_code_locations(config: CONFIG_TYPE) -> Dict[str, CONFIG_TYPE]:
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_config_locations(config: CONFIG_TYPE) -> Dict[str, CONFIG_TYPE]:
|
def get_config_locations(config: ConfigDict) -> ConfigValue:
|
||||||
|
abspath = lambda path: None if path is None else os.path.abspath(path)
|
||||||
return {
|
return {
|
||||||
'CHROME_USER_DATA_DIR': {
|
'CHROME_USER_DATA_DIR': {
|
||||||
'path': config['CHROME_USER_DATA_DIR'] and os.path.abspath(config['CHROME_USER_DATA_DIR']),
|
'path': abspath(config['CHROME_USER_DATA_DIR']),
|
||||||
'enabled': config['USE_CHROME'] and config['CHROME_USER_DATA_DIR'],
|
'enabled': config['USE_CHROME'] and config['CHROME_USER_DATA_DIR'],
|
||||||
'is_valid': os.path.exists(os.path.join(config['CHROME_USER_DATA_DIR'], 'Default')) if config['CHROME_USER_DATA_DIR'] else False,
|
'is_valid': False if config['CHROME_USER_DATA_DIR'] is None else os.path.exists(os.path.join(config['CHROME_USER_DATA_DIR'], 'Default')),
|
||||||
},
|
},
|
||||||
'COOKIES_FILE': {
|
'COOKIES_FILE': {
|
||||||
'path': config['COOKIES_FILE'] and os.path.abspath(config['COOKIES_FILE']),
|
'path': abspath(config['COOKIES_FILE']),
|
||||||
'enabled': config['USE_WGET'] and config['COOKIES_FILE'],
|
'enabled': config['USE_WGET'] and config['COOKIES_FILE'],
|
||||||
'is_valid': config['COOKIES_FILE'] and os.path.exists(config['COOKIES_FILE']),
|
'is_valid': False if config['COOKIES_FILE'] is None else os.path.exists(config['COOKIES_FILE']),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_data_locations(config: CONFIG_TYPE) -> Dict[str, CONFIG_TYPE]:
|
def get_data_locations(config: ConfigDict) -> ConfigValue:
|
||||||
return {
|
return {
|
||||||
'OUTPUT_DIR': {
|
'OUTPUT_DIR': {
|
||||||
'path': os.path.abspath(config['OUTPUT_DIR']),
|
'path': os.path.abspath(config['OUTPUT_DIR']),
|
||||||
|
@ -400,54 +425,59 @@ def get_data_locations(config: CONFIG_TYPE) -> Dict[str, CONFIG_TYPE]:
|
||||||
'enabled': True,
|
'enabled': True,
|
||||||
'is_valid': os.path.exists(config['ARCHIVE_DIR']),
|
'is_valid': os.path.exists(config['ARCHIVE_DIR']),
|
||||||
},
|
},
|
||||||
|
'SQL_INDEX': {
|
||||||
|
'path': os.path.abspath(os.path.join(config['OUTPUT_DIR'], JSON_INDEX_FILENAME)),
|
||||||
|
'enabled': True,
|
||||||
|
'is_valid': os.path.exists(os.path.join(config['OUTPUT_DIR'], JSON_INDEX_FILENAME)),
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_dependency_info(config: CONFIG_TYPE) -> Dict[str, CONFIG_TYPE]:
|
def get_dependency_info(config: ConfigDict) -> ConfigValue:
|
||||||
return {
|
return {
|
||||||
'PYTHON_BINARY': {
|
'PYTHON_BINARY': {
|
||||||
'path': config['PYTHON_BINARY'],
|
'path': bin_path(config['PYTHON_BINARY']),
|
||||||
'version': config['PYTHON_VERSION'],
|
'version': config['PYTHON_VERSION'],
|
||||||
'hash': bin_hash(config['PYTHON_BINARY']),
|
'hash': bin_hash(config['PYTHON_BINARY']),
|
||||||
'enabled': True,
|
'enabled': True,
|
||||||
'is_valid': bool(config['DJANGO_VERSION']),
|
'is_valid': bool(config['DJANGO_VERSION']),
|
||||||
},
|
},
|
||||||
'DJANGO_BINARY': {
|
'DJANGO_BINARY': {
|
||||||
'path': config['DJANGO_BINARY'],
|
'path': bin_path(config['DJANGO_BINARY']),
|
||||||
'version': config['DJANGO_VERSION'],
|
'version': config['DJANGO_VERSION'],
|
||||||
'hash': bin_hash(config['DJANGO_BINARY']),
|
'hash': bin_hash(config['DJANGO_BINARY']),
|
||||||
'enabled': True,
|
'enabled': True,
|
||||||
'is_valid': bool(config['DJANGO_VERSION']),
|
'is_valid': bool(config['DJANGO_VERSION']),
|
||||||
},
|
},
|
||||||
'CURL_BINARY': {
|
'CURL_BINARY': {
|
||||||
'path': (config['CURL_BINARY'] and shutil.which(config['CURL_BINARY'])) or config['CURL_BINARY'],
|
'path': bin_path(config['CURL_BINARY']),
|
||||||
'version': config['CURL_VERSION'],
|
'version': config['CURL_VERSION'],
|
||||||
'hash': bin_hash(config['PYTHON_BINARY']),
|
'hash': bin_hash(config['PYTHON_BINARY']),
|
||||||
'enabled': config['USE_CURL'],
|
'enabled': config['USE_CURL'],
|
||||||
'is_valid': bool(config['CURL_VERSION']),
|
'is_valid': bool(config['CURL_VERSION']),
|
||||||
},
|
},
|
||||||
'WGET_BINARY': {
|
'WGET_BINARY': {
|
||||||
'path': (config['WGET_BINARY'] and shutil.which(config['WGET_BINARY'])) or config['WGET_BINARY'],
|
'path': bin_path(config['WGET_BINARY']),
|
||||||
'version': config['WGET_VERSION'],
|
'version': config['WGET_VERSION'],
|
||||||
'hash': bin_hash(config['WGET_BINARY']),
|
'hash': bin_hash(config['WGET_BINARY']),
|
||||||
'enabled': config['USE_WGET'],
|
'enabled': config['USE_WGET'],
|
||||||
'is_valid': bool(config['WGET_VERSION']),
|
'is_valid': bool(config['WGET_VERSION']),
|
||||||
},
|
},
|
||||||
'GIT_BINARY': {
|
'GIT_BINARY': {
|
||||||
'path': (config['GIT_BINARY'] and shutil.which(config['GIT_BINARY'])) or config['GIT_BINARY'],
|
'path': bin_path(config['GIT_BINARY']),
|
||||||
'version': config['GIT_VERSION'],
|
'version': config['GIT_VERSION'],
|
||||||
'hash': bin_hash(config['GIT_BINARY']),
|
'hash': bin_hash(config['GIT_BINARY']),
|
||||||
'enabled': config['USE_GIT'],
|
'enabled': config['USE_GIT'],
|
||||||
'is_valid': bool(config['GIT_VERSION']),
|
'is_valid': bool(config['GIT_VERSION']),
|
||||||
},
|
},
|
||||||
'YOUTUBEDL_BINARY': {
|
'YOUTUBEDL_BINARY': {
|
||||||
'path': (config['YOUTUBEDL_BINARY'] and shutil.which(config['YOUTUBEDL_BINARY'])) or config['YOUTUBEDL_BINARY'],
|
'path': bin_path(config['YOUTUBEDL_BINARY']),
|
||||||
'version': config['YOUTUBEDL_VERSION'],
|
'version': config['YOUTUBEDL_VERSION'],
|
||||||
'hash': bin_hash(config['YOUTUBEDL_BINARY']),
|
'hash': bin_hash(config['YOUTUBEDL_BINARY']),
|
||||||
'enabled': config['USE_YOUTUBEDL'],
|
'enabled': config['USE_YOUTUBEDL'],
|
||||||
'is_valid': bool(config['YOUTUBEDL_VERSION']),
|
'is_valid': bool(config['YOUTUBEDL_VERSION']),
|
||||||
},
|
},
|
||||||
'CHROME_BINARY': {
|
'CHROME_BINARY': {
|
||||||
'path': (config['CHROME_BINARY'] and shutil.which(config['CHROME_BINARY'])) or config['CHROME_BINARY'],
|
'path': bin_path(config['CHROME_BINARY']),
|
||||||
'version': config['CHROME_VERSION'],
|
'version': config['CHROME_VERSION'],
|
||||||
'hash': bin_hash(config['CHROME_BINARY']),
|
'hash': bin_hash(config['CHROME_BINARY']),
|
||||||
'enabled': config['USE_CHROME'],
|
'enabled': config['USE_CHROME'],
|
||||||
|
@ -455,7 +485,7 @@ def get_dependency_info(config: CONFIG_TYPE) -> Dict[str, CONFIG_TYPE]:
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_chrome_info(config: CONFIG_TYPE) -> Dict[str, CONFIG_TYPE]:
|
def get_chrome_info(config: ConfigDict) -> ConfigValue:
|
||||||
return {
|
return {
|
||||||
'TIMEOUT': config['TIMEOUT'],
|
'TIMEOUT': config['TIMEOUT'],
|
||||||
'RESOLUTION': config['RESOLUTION'],
|
'RESOLUTION': config['RESOLUTION'],
|
||||||
|
@ -470,6 +500,8 @@ def get_chrome_info(config: CONFIG_TYPE) -> Dict[str, CONFIG_TYPE]:
|
||||||
|
|
||||||
################################## Load Config #################################
|
################################## Load Config #################################
|
||||||
|
|
||||||
|
CONFIG: ConfigDict
|
||||||
|
|
||||||
CONFIG = load_config(SHELL_CONFIG_DEFAULTS)
|
CONFIG = load_config(SHELL_CONFIG_DEFAULTS)
|
||||||
CONFIG = load_config(ARCHIVE_CONFIG_DEFAULTS, CONFIG)
|
CONFIG = load_config(ARCHIVE_CONFIG_DEFAULTS, CONFIG)
|
||||||
CONFIG = load_config(ARCHIVE_METHOD_TOGGLES_DEFAULTS, CONFIG)
|
CONFIG = load_config(ARCHIVE_METHOD_TOGGLES_DEFAULTS, CONFIG)
|
||||||
|
@ -480,9 +512,7 @@ globals().update(CONFIG)
|
||||||
|
|
||||||
############################## Importable Checkers #############################
|
############################## Importable Checkers #############################
|
||||||
|
|
||||||
def check_system_config(config: CONFIG_TYPE=CONFIG) -> None:
|
def check_system_config(config: ConfigDict=CONFIG) -> None:
|
||||||
ANSI = config['ANSI']
|
|
||||||
|
|
||||||
### Check system environment
|
### Check system environment
|
||||||
if config['USER'] == 'root':
|
if config['USER'] == 'root':
|
||||||
stderr('[!] ArchiveBox should never be run as root!', color='red')
|
stderr('[!] ArchiveBox should never be run as root!', color='red')
|
||||||
|
@ -507,7 +537,8 @@ def check_system_config(config: CONFIG_TYPE=CONFIG) -> None:
|
||||||
|
|
||||||
# stderr('[i] Using Chrome binary: {}'.format(shutil.which(CHROME_BINARY) or CHROME_BINARY))
|
# stderr('[i] Using Chrome binary: {}'.format(shutil.which(CHROME_BINARY) or CHROME_BINARY))
|
||||||
# stderr('[i] Using Chrome data dir: {}'.format(os.path.abspath(CHROME_USER_DATA_DIR)))
|
# stderr('[i] Using Chrome data dir: {}'.format(os.path.abspath(CHROME_USER_DATA_DIR)))
|
||||||
if config['CHROME_USER_DATA_DIR'] and not os.path.exists(os.path.join(config['CHROME_USER_DATA_DIR'], 'Default')):
|
if config['CHROME_USER_DATA_DIR'] is not None:
|
||||||
|
if not os.path.exists(os.path.join(config['CHROME_USER_DATA_DIR'], 'Default')):
|
||||||
stderr('[X] Could not find profile "Default" in CHROME_USER_DATA_DIR.', color='red')
|
stderr('[X] Could not find profile "Default" in CHROME_USER_DATA_DIR.', color='red')
|
||||||
stderr(f' {config["CHROME_USER_DATA_DIR"]}')
|
stderr(f' {config["CHROME_USER_DATA_DIR"]}')
|
||||||
stderr(' Make sure you set it to a Chrome user data directory containing a Default profile folder.')
|
stderr(' Make sure you set it to a Chrome user data directory containing a Default profile folder.')
|
||||||
|
@ -519,7 +550,7 @@ def check_system_config(config: CONFIG_TYPE=CONFIG) -> None:
|
||||||
stderr(' CHROME_USER_DATA_DIR="{}"'.format(config['CHROME_USER_DATA_DIR'].split('/Default')[0]))
|
stderr(' CHROME_USER_DATA_DIR="{}"'.format(config['CHROME_USER_DATA_DIR'].split('/Default')[0]))
|
||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
|
||||||
def check_dependencies(config: CONFIG_TYPE=CONFIG, show_help: bool=True) -> None:
|
def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
|
||||||
invalid = [
|
invalid = [
|
||||||
'{}: {} ({})'.format(name, info['path'] or 'unable to find binary', info['version'] or 'unable to detect version')
|
'{}: {} ({})'.format(name, info['path'] or 'unable to find binary', info['version'] or 'unable to detect version')
|
||||||
for name, info in config['DEPENDENCIES'].items()
|
for name, info in config['DEPENDENCIES'].items()
|
||||||
|
@ -564,12 +595,14 @@ def check_dependencies(config: CONFIG_TYPE=CONFIG, show_help: bool=True) -> None
|
||||||
stderr(' https://github.com/pirate/ArchiveBox/wiki/Configuration#save_media')
|
stderr(' https://github.com/pirate/ArchiveBox/wiki/Configuration#save_media')
|
||||||
|
|
||||||
|
|
||||||
def check_data_folder(out_dir: Optional[str]=None, config: CONFIG_TYPE=CONFIG) -> None:
|
def check_data_folder(out_dir: Optional[str]=None, config: ConfigDict=CONFIG) -> None:
|
||||||
out_dir = out_dir or config['OUTPUT_DIR']
|
output_dir = out_dir or config['OUTPUT_DIR']
|
||||||
json_index_exists = os.path.exists(os.path.join(out_dir, JSON_INDEX_FILENAME))
|
assert isinstance(output_dir, str)
|
||||||
|
|
||||||
|
json_index_exists = os.path.exists(os.path.join(output_dir, JSON_INDEX_FILENAME))
|
||||||
if not json_index_exists:
|
if not json_index_exists:
|
||||||
stderr('[X] No archive index was found in current directory.', color='red')
|
stderr('[X] No archive index was found in current directory.', color='red')
|
||||||
stderr(f' {out_dir}')
|
stderr(f' {output_dir}')
|
||||||
stderr()
|
stderr()
|
||||||
stderr(' Are you running archivebox in the right folder?')
|
stderr(' Are you running archivebox in the right folder?')
|
||||||
stderr(' cd path/to/your/archive/folder')
|
stderr(' cd path/to/your/archive/folder')
|
||||||
|
@ -579,7 +612,7 @@ def check_data_folder(out_dir: Optional[str]=None, config: CONFIG_TYPE=CONFIG) -
|
||||||
stderr(' archivebox init')
|
stderr(' archivebox init')
|
||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
|
||||||
sql_index_exists = os.path.exists(os.path.join(out_dir, SQL_INDEX_FILENAME))
|
sql_index_exists = os.path.exists(os.path.join(output_dir, SQL_INDEX_FILENAME))
|
||||||
from .storage.sql import list_migrations
|
from .storage.sql import list_migrations
|
||||||
|
|
||||||
pending_migrations = [name for status, name in list_migrations() if not status]
|
pending_migrations = [name for status, name in list_migrations() if not status]
|
||||||
|
@ -591,7 +624,7 @@ def check_data_folder(out_dir: Optional[str]=None, config: CONFIG_TYPE=CONFIG) -
|
||||||
pending_operation = 'generate the new SQL main index'
|
pending_operation = 'generate the new SQL main index'
|
||||||
|
|
||||||
stderr('[X] This collection was created with an older version of ArchiveBox and must be upgraded first.', color='lightyellow')
|
stderr('[X] This collection was created with an older version of ArchiveBox and must be upgraded first.', color='lightyellow')
|
||||||
stderr(f' {out_dir}')
|
stderr(f' {output_dir}')
|
||||||
stderr()
|
stderr()
|
||||||
stderr(f' To upgrade it to the latest version and {pending_operation} run:')
|
stderr(f' To upgrade it to the latest version and {pending_operation} run:')
|
||||||
stderr(' archivebox init')
|
stderr(' archivebox init')
|
||||||
|
@ -599,26 +632,21 @@ def check_data_folder(out_dir: Optional[str]=None, config: CONFIG_TYPE=CONFIG) -
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def setup_django(out_dir: str=None, check_db=False, config: CONFIG_TYPE=CONFIG) -> None:
|
def setup_django(out_dir: str=None, check_db=False, config: ConfigDict=CONFIG) -> None:
|
||||||
|
output_dir = out_dir or config['OUTPUT_DIR']
|
||||||
|
|
||||||
|
assert isinstance(output_dir, str) and isinstance(config['PYTHON_DIR'], str)
|
||||||
|
|
||||||
import django
|
import django
|
||||||
sys.path.append(config['PYTHON_DIR'])
|
sys.path.append(config['PYTHON_DIR'])
|
||||||
os.environ.setdefault('OUTPUT_DIR', out_dir or config['OUTPUT_DIR'])
|
os.environ.setdefault('OUTPUT_DIR', output_dir)
|
||||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
|
||||||
django.setup()
|
django.setup()
|
||||||
|
|
||||||
if check_db:
|
if check_db:
|
||||||
sql_index_path = os.path.join(out_dir or config['OUTPUT_DIR'], SQL_INDEX_FILENAME)
|
sql_index_path = os.path.join(output_dir, SQL_INDEX_FILENAME)
|
||||||
assert os.path.exists(sql_index_path), (
|
assert os.path.exists(sql_index_path), (
|
||||||
f'No database file {SQL_INDEX_FILENAME} found in OUTPUT_DIR: {config["OUTPUT_DIR"]}')
|
f'No database file {SQL_INDEX_FILENAME} found in OUTPUT_DIR: {config["OUTPUT_DIR"]}')
|
||||||
|
|
||||||
|
|
||||||
check_system_config()
|
check_system_config()
|
||||||
|
|
||||||
|
|
||||||
__all__ = (
|
|
||||||
'stderr',
|
|
||||||
'check_data_folder',
|
|
||||||
'check_dependencies',
|
|
||||||
'setup_django',
|
|
||||||
*CONFIG,
|
|
||||||
)
|
|
||||||
|
|
108
archivebox/legacy/config_stubs.py
Normal file
108
archivebox/legacy/config_stubs.py
Normal file
|
@ -0,0 +1,108 @@
|
||||||
|
from typing import Optional, Dict, Union, Tuple, Callable, Pattern, Type, Any
|
||||||
|
from mypy_extensions import TypedDict
|
||||||
|
|
||||||
|
|
||||||
|
SimpleConfigValue = Union[str, bool, int, None, Pattern, Dict[str, Any]]
|
||||||
|
SimpleConfigValueDict = Dict[str, SimpleConfigValue]
|
||||||
|
SimpleConfigValueGetter = Callable[[], SimpleConfigValue]
|
||||||
|
ConfigValue = Union[SimpleConfigValue, SimpleConfigValueDict, SimpleConfigValueGetter]
|
||||||
|
|
||||||
|
|
||||||
|
class BaseConfig(TypedDict):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class ConfigDict(BaseConfig, total=False):
|
||||||
|
IS_TTY: bool
|
||||||
|
USE_COLOR: bool
|
||||||
|
SHOW_PROGRESS: bool
|
||||||
|
|
||||||
|
OUTPUT_DIR: str
|
||||||
|
ONLY_NEW: bool
|
||||||
|
TIMEOUT: int
|
||||||
|
MEDIA_TIMEOUT: int
|
||||||
|
OUTPUT_PERMISSIONS: str
|
||||||
|
FOOTER_INFO: str
|
||||||
|
URL_BLACKLIST: Optional[str]
|
||||||
|
|
||||||
|
SAVE_TITLE: bool
|
||||||
|
SAVE_FAVICON: bool
|
||||||
|
SAVE_WGET: bool
|
||||||
|
SAVE_WGET_REQUISITES: bool
|
||||||
|
SAVE_PDF: bool
|
||||||
|
SAVE_SCREENSHOT: bool
|
||||||
|
SAVE_DOM: bool
|
||||||
|
SAVE_WARC: bool
|
||||||
|
SAVE_GIT: bool
|
||||||
|
SAVE_MEDIA: bool
|
||||||
|
SAVE_ARCHIVE_DOT_ORG: bool
|
||||||
|
|
||||||
|
RESOLUTION: str
|
||||||
|
GIT_DOMAINS: str
|
||||||
|
CHECK_SSL_VALIDITY: bool
|
||||||
|
WGET_USER_AGENT: str
|
||||||
|
CHROME_USER_AGENT: str
|
||||||
|
COOKIES_FILE: Optional[str]
|
||||||
|
CHROME_USER_DATA_DIR: Optional[str]
|
||||||
|
CHROME_HEADLESS: bool
|
||||||
|
CHROME_SANDBOX: bool
|
||||||
|
|
||||||
|
USE_CURL: bool
|
||||||
|
USE_WGET: bool
|
||||||
|
USE_GIT: bool
|
||||||
|
USE_CHROME: bool
|
||||||
|
USE_YOUTUBEDL: bool
|
||||||
|
|
||||||
|
CURL_BINARY: Optional[str]
|
||||||
|
GIT_BINARY: Optional[str]
|
||||||
|
WGET_BINARY: Optional[str]
|
||||||
|
YOUTUBEDL_BINARY: Optional[str]
|
||||||
|
CHROME_BINARY: Optional[str]
|
||||||
|
|
||||||
|
TERM_WIDTH: Callable[[], int]
|
||||||
|
USER: str
|
||||||
|
ANSI: Dict[str, str]
|
||||||
|
REPO_DIR: str
|
||||||
|
PYTHON_DIR: str
|
||||||
|
LEGACY_DIR: str
|
||||||
|
TEMPLATES_DIR: str
|
||||||
|
ARCHIVE_DIR: str
|
||||||
|
SOURCES_DIR: str
|
||||||
|
LOGS_DIR: str
|
||||||
|
|
||||||
|
URL_BLACKLIST_PTN: Optional[Pattern]
|
||||||
|
WGET_AUTO_COMPRESSION: bool
|
||||||
|
|
||||||
|
ARCHIVEBOX_BINARY: str
|
||||||
|
VERSION: str
|
||||||
|
GIT_SHA: str
|
||||||
|
|
||||||
|
PYTHON_BINARY: str
|
||||||
|
PYTHON_ENCODING: str
|
||||||
|
PYTHON_VERSION: str
|
||||||
|
|
||||||
|
DJANGO_BINARY: str
|
||||||
|
DJANGO_VERSION: str
|
||||||
|
|
||||||
|
CURL_VERSION: str
|
||||||
|
WGET_VERSION: str
|
||||||
|
YOUTUBEDL_VERSION: str
|
||||||
|
GIT_VERSION: str
|
||||||
|
CHROME_VERSION: str
|
||||||
|
|
||||||
|
DEPENDENCIES: Dict[str, SimpleConfigValueDict]
|
||||||
|
CODE_LOCATIONS: Dict[str, SimpleConfigValueDict]
|
||||||
|
CONFIG_LOCATIONS: Dict[str, SimpleConfigValueDict]
|
||||||
|
DATA_LOCATIONS: Dict[str, SimpleConfigValueDict]
|
||||||
|
CHROME_OPTIONS: Dict[str, SimpleConfigValue]
|
||||||
|
|
||||||
|
|
||||||
|
ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]
|
||||||
|
ConfigDefaultValue = Union[ConfigValue, ConfigDefaultValueGetter]
|
||||||
|
|
||||||
|
ConfigDefault = TypedDict('ConfigDefault', {
|
||||||
|
'default': ConfigDefaultValue,
|
||||||
|
'type': Optional[Type],
|
||||||
|
'aliases': Optional[Tuple[str, ...]],
|
||||||
|
}, total=False)
|
||||||
|
|
||||||
|
ConfigDefaultDict = Dict[str, ConfigDefault]
|
3
setup.py
3
setup.py
|
@ -37,10 +37,11 @@ setuptools.setup(
|
||||||
python_requires='>=3.6',
|
python_requires='>=3.6',
|
||||||
install_requires=[
|
install_requires=[
|
||||||
"dataclasses==0.6",
|
"dataclasses==0.6",
|
||||||
|
"mypy-extensions==0.4.1",
|
||||||
"base32-crockford==0.3.0",
|
"base32-crockford==0.3.0",
|
||||||
"django==2.2",
|
"django==2.2",
|
||||||
"django-extensions==2.1.6",
|
"django-extensions==2.1.6",
|
||||||
"python-crontab",
|
"python-crontab==2.3.6",
|
||||||
"youtube-dl",
|
"youtube-dl",
|
||||||
"ipython",
|
"ipython",
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue