mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 06:34:25 -04:00
Support for Reverse Proxy authentication backends (like authelia)
This commit is contained in:
parent
63693bdc77
commit
95cf85f8cf
3 changed files with 59 additions and 32 deletions
|
@ -82,17 +82,19 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
||||||
},
|
},
|
||||||
|
|
||||||
'SERVER_CONFIG': {
|
'SERVER_CONFIG': {
|
||||||
'SECRET_KEY': {'type': str, 'default': None},
|
'SECRET_KEY': {'type': str, 'default': None},
|
||||||
'BIND_ADDR': {'type': str, 'default': lambda c: ['127.0.0.1:8000', '0.0.0.0:8000'][c['IN_DOCKER']]},
|
'BIND_ADDR': {'type': str, 'default': lambda c: ['127.0.0.1:8000', '0.0.0.0:8000'][c['IN_DOCKER']]},
|
||||||
'ALLOWED_HOSTS': {'type': str, 'default': '*'},
|
'ALLOWED_HOSTS': {'type': str, 'default': '*'},
|
||||||
'DEBUG': {'type': bool, 'default': False},
|
'DEBUG': {'type': bool, 'default': False},
|
||||||
'PUBLIC_INDEX': {'type': bool, 'default': True},
|
'PUBLIC_INDEX': {'type': bool, 'default': True},
|
||||||
'PUBLIC_SNAPSHOTS': {'type': bool, 'default': True},
|
'PUBLIC_SNAPSHOTS': {'type': bool, 'default': True},
|
||||||
'PUBLIC_ADD_VIEW': {'type': bool, 'default': False},
|
'PUBLIC_ADD_VIEW': {'type': bool, 'default': False},
|
||||||
'FOOTER_INFO': {'type': str, 'default': 'Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.'},
|
'FOOTER_INFO': {'type': str, 'default': 'Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.'},
|
||||||
'SNAPSHOTS_PER_PAGE': {'type': int, 'default': 40},
|
'SNAPSHOTS_PER_PAGE': {'type': int, 'default': 40},
|
||||||
'CUSTOM_TEMPLATES_DIR': {'type': str, 'default': None},
|
'CUSTOM_TEMPLATES_DIR': {'type': str, 'default': None},
|
||||||
'TIME_ZONE': {'type': str, 'default': 'UTC'},
|
'TIME_ZONE': {'type': str, 'default': 'UTC'},
|
||||||
|
'REVERSE_PROXY_USER_HEADER': {'type': str, 'default': 'Remote-User'},
|
||||||
|
'REVERSE_PROXY_WHITELIST': {'type': str, 'default': ''},
|
||||||
},
|
},
|
||||||
|
|
||||||
'ARCHIVE_METHOD_TOGGLES': {
|
'ARCHIVE_METHOD_TOGGLES': {
|
||||||
|
@ -145,7 +147,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
||||||
'--add-metadata',
|
'--add-metadata',
|
||||||
'--max-filesize={}'.format(c['MEDIA_MAX_SIZE']),
|
'--max-filesize={}'.format(c['MEDIA_MAX_SIZE']),
|
||||||
]},
|
]},
|
||||||
|
|
||||||
|
|
||||||
'WGET_ARGS': {'type': list, 'default': ['--no-verbose',
|
'WGET_ARGS': {'type': list, 'default': ['--no-verbose',
|
||||||
'--adjust-extension',
|
'--adjust-extension',
|
||||||
|
@ -187,7 +189,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
||||||
'USE_NODE': {'type': bool, 'default': True},
|
'USE_NODE': {'type': bool, 'default': True},
|
||||||
'USE_YOUTUBEDL': {'type': bool, 'default': True},
|
'USE_YOUTUBEDL': {'type': bool, 'default': True},
|
||||||
'USE_RIPGREP': {'type': bool, 'default': True},
|
'USE_RIPGREP': {'type': bool, 'default': True},
|
||||||
|
|
||||||
'CURL_BINARY': {'type': str, 'default': 'curl'},
|
'CURL_BINARY': {'type': str, 'default': 'curl'},
|
||||||
'GIT_BINARY': {'type': str, 'default': 'git'},
|
'GIT_BINARY': {'type': str, 'default': 'git'},
|
||||||
'WGET_BINARY': {'type': str, 'default': 'wget'},
|
'WGET_BINARY': {'type': str, 'default': 'wget'},
|
||||||
|
@ -268,7 +270,7 @@ STATICFILE_EXTENSIONS = {
|
||||||
# that can be downloaded as-is, not html pages that need to be rendered
|
# that can be downloaded as-is, not html pages that need to be rendered
|
||||||
'gif', 'jpeg', 'jpg', 'png', 'tif', 'tiff', 'wbmp', 'ico', 'jng', 'bmp',
|
'gif', 'jpeg', 'jpg', 'png', 'tif', 'tiff', 'wbmp', 'ico', 'jng', 'bmp',
|
||||||
'svg', 'svgz', 'webp', 'ps', 'eps', 'ai',
|
'svg', 'svgz', 'webp', 'ps', 'eps', 'ai',
|
||||||
'mp3', 'mp4', 'm4a', 'mpeg', 'mpg', 'mkv', 'mov', 'webm', 'm4v',
|
'mp3', 'mp4', 'm4a', 'mpeg', 'mpg', 'mkv', 'mov', 'webm', 'm4v',
|
||||||
'flv', 'wmv', 'avi', 'ogg', 'ts', 'm3u8',
|
'flv', 'wmv', 'avi', 'ogg', 'ts', 'm3u8',
|
||||||
'pdf', 'txt', 'rtf', 'rtfd', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx',
|
'pdf', 'txt', 'rtf', 'rtfd', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx',
|
||||||
'atom', 'rss', 'css', 'js', 'json',
|
'atom', 'rss', 'css', 'js', 'json',
|
||||||
|
@ -277,7 +279,7 @@ STATICFILE_EXTENSIONS = {
|
||||||
|
|
||||||
# Less common extensions to consider adding later
|
# Less common extensions to consider adding later
|
||||||
# jar, swf, bin, com, exe, dll, deb
|
# jar, swf, bin, com, exe, dll, deb
|
||||||
# ear, hqx, eot, wmlc, kml, kmz, cco, jardiff, jnlp, run, msi, msp, msm,
|
# ear, hqx, eot, wmlc, kml, kmz, cco, jardiff, jnlp, run, msi, msp, msm,
|
||||||
# pl pm, prc pdb, rar, rpm, sea, sit, tcl tk, der, pem, crt, xpi, xspf,
|
# pl pm, prc pdb, rar, rpm, sea, sit, tcl tk, der, pem, crt, xpi, xspf,
|
||||||
# ra, mng, asx, asf, 3gpp, 3gp, mid, midi, kar, jad, wml, htc, mml
|
# ra, mng, asx, asf, 3gpp, 3gp, mid, midi, kar, jad, wml, htc, mml
|
||||||
|
|
||||||
|
@ -389,14 +391,14 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
||||||
'CHROME_BINARY': {'default': lambda c: c['CHROME_BINARY'] or find_chrome_binary()},
|
'CHROME_BINARY': {'default': lambda c: c['CHROME_BINARY'] or find_chrome_binary()},
|
||||||
'USE_CHROME': {'default': lambda c: c['USE_CHROME'] and c['CHROME_BINARY'] and (c['SAVE_PDF'] or c['SAVE_SCREENSHOT'] or c['SAVE_DOM'] or c['SAVE_SINGLEFILE'])},
|
'USE_CHROME': {'default': lambda c: c['USE_CHROME'] and c['CHROME_BINARY'] and (c['SAVE_PDF'] or c['SAVE_SCREENSHOT'] or c['SAVE_DOM'] or c['SAVE_SINGLEFILE'])},
|
||||||
'CHROME_VERSION': {'default': lambda c: bin_version(c['CHROME_BINARY']) if c['USE_CHROME'] else None},
|
'CHROME_VERSION': {'default': lambda c: bin_version(c['CHROME_BINARY']) if c['USE_CHROME'] else None},
|
||||||
|
|
||||||
'SAVE_PDF': {'default': lambda c: c['USE_CHROME'] and c['SAVE_PDF']},
|
'SAVE_PDF': {'default': lambda c: c['USE_CHROME'] and c['SAVE_PDF']},
|
||||||
'SAVE_SCREENSHOT': {'default': lambda c: c['USE_CHROME'] and c['SAVE_SCREENSHOT']},
|
'SAVE_SCREENSHOT': {'default': lambda c: c['USE_CHROME'] and c['SAVE_SCREENSHOT']},
|
||||||
'SAVE_DOM': {'default': lambda c: c['USE_CHROME'] and c['SAVE_DOM']},
|
'SAVE_DOM': {'default': lambda c: c['USE_CHROME'] and c['SAVE_DOM']},
|
||||||
'SAVE_SINGLEFILE': {'default': lambda c: c['USE_CHROME'] and c['SAVE_SINGLEFILE'] and c['USE_NODE']},
|
'SAVE_SINGLEFILE': {'default': lambda c: c['USE_CHROME'] and c['SAVE_SINGLEFILE'] and c['USE_NODE']},
|
||||||
'SAVE_READABILITY': {'default': lambda c: c['USE_READABILITY'] and c['USE_NODE']},
|
'SAVE_READABILITY': {'default': lambda c: c['USE_READABILITY'] and c['USE_NODE']},
|
||||||
'SAVE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['USE_NODE']},
|
'SAVE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['USE_NODE']},
|
||||||
|
|
||||||
'USE_NODE': {'default': lambda c: c['USE_NODE'] and (c['SAVE_READABILITY'] or c['SAVE_SINGLEFILE'] or c['SAVE_MERCURY'])},
|
'USE_NODE': {'default': lambda c: c['USE_NODE'] and (c['SAVE_READABILITY'] or c['SAVE_SINGLEFILE'] or c['SAVE_MERCURY'])},
|
||||||
'NODE_VERSION': {'default': lambda c: bin_version(c['NODE_BINARY']) if c['USE_NODE'] else None},
|
'NODE_VERSION': {'default': lambda c: bin_version(c['NODE_BINARY']) if c['USE_NODE'] else None},
|
||||||
|
|
||||||
|
@ -446,7 +448,7 @@ def load_config_val(key: str,
|
||||||
elif val.lower() in ('false', 'no', '0'):
|
elif val.lower() in ('false', 'no', '0'):
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'Invalid configuration option {key}={val} (expected a boolean: True/False)')
|
raise ValueError(f'Invalid configuration option {key}={val} (expected a boolean: True/False)')
|
||||||
|
|
||||||
elif type is str:
|
elif type is str:
|
||||||
if val.lower() in ('true', 'false', 'yes', 'no', '1', '0'):
|
if val.lower() in ('true', 'false', 'yes', 'no', '1', '0'):
|
||||||
|
@ -471,7 +473,7 @@ def load_config_file(out_dir: str=None) -> Optional[Dict[str, str]]:
|
||||||
config_path = Path(out_dir) / CONFIG_FILENAME
|
config_path = Path(out_dir) / CONFIG_FILENAME
|
||||||
if config_path.exists():
|
if config_path.exists():
|
||||||
config_file = ConfigParser()
|
config_file = ConfigParser()
|
||||||
config_file.optionxform = str
|
config_file.optionxform = str
|
||||||
config_file.read(config_path)
|
config_file.read(config_path)
|
||||||
# flatten into one namespace
|
# flatten into one namespace
|
||||||
config_file_vars = {
|
config_file_vars = {
|
||||||
|
@ -495,7 +497,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
|
||||||
#
|
#
|
||||||
# You can add options here manually in INI format, or automatically by running:
|
# You can add options here manually in INI format, or automatically by running:
|
||||||
# archivebox config --set KEY=VALUE
|
# archivebox config --set KEY=VALUE
|
||||||
#
|
#
|
||||||
# If you modify this file manually, make sure to update your archive after by running:
|
# If you modify this file manually, make sure to update your archive after by running:
|
||||||
# archivebox init
|
# archivebox init
|
||||||
#
|
#
|
||||||
|
@ -506,7 +508,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
|
||||||
|
|
||||||
out_dir = out_dir or Path(os.getenv('OUTPUT_DIR', '.')).resolve()
|
out_dir = out_dir or Path(os.getenv('OUTPUT_DIR', '.')).resolve()
|
||||||
config_path = Path(out_dir) / CONFIG_FILENAME
|
config_path = Path(out_dir) / CONFIG_FILENAME
|
||||||
|
|
||||||
if not config_path.exists():
|
if not config_path.exists():
|
||||||
atomic_write(config_path, CONFIG_HEADER)
|
atomic_write(config_path, CONFIG_HEADER)
|
||||||
|
|
||||||
|
@ -544,7 +546,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
|
||||||
|
|
||||||
with open(config_path, 'w+', encoding='utf-8') as new:
|
with open(config_path, 'w+', encoding='utf-8') as new:
|
||||||
config_file.write(new)
|
config_file.write(new)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# validate the config by attempting to re-parse it
|
# validate the config by attempting to re-parse it
|
||||||
CONFIG = load_all_config()
|
CONFIG = load_all_config()
|
||||||
|
@ -557,20 +559,20 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
|
||||||
|
|
||||||
if Path(f'{config_path}.bak').exists():
|
if Path(f'{config_path}.bak').exists():
|
||||||
os.remove(f'{config_path}.bak')
|
os.remove(f'{config_path}.bak')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
key.upper(): CONFIG.get(key.upper())
|
key.upper(): CONFIG.get(key.upper())
|
||||||
for key in config.keys()
|
for key in config.keys()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def load_config(defaults: ConfigDefaultDict,
|
def load_config(defaults: ConfigDefaultDict,
|
||||||
config: Optional[ConfigDict]=None,
|
config: Optional[ConfigDict]=None,
|
||||||
out_dir: Optional[str]=None,
|
out_dir: Optional[str]=None,
|
||||||
env_vars: Optional[os._Environ]=None,
|
env_vars: Optional[os._Environ]=None,
|
||||||
config_file_vars: Optional[Dict[str, str]]=None) -> ConfigDict:
|
config_file_vars: Optional[Dict[str, str]]=None) -> ConfigDict:
|
||||||
|
|
||||||
env_vars = env_vars or os.environ
|
env_vars = env_vars or os.environ
|
||||||
config_file_vars = config_file_vars or load_config_file(out_dir=out_dir)
|
config_file_vars = config_file_vars or load_config_file(out_dir=out_dir)
|
||||||
|
|
||||||
|
@ -600,7 +602,7 @@ def load_config(defaults: ConfigDefaultDict,
|
||||||
stderr()
|
stderr()
|
||||||
# raise
|
# raise
|
||||||
raise SystemExit(2)
|
raise SystemExit(2)
|
||||||
|
|
||||||
return extended_config
|
return extended_config
|
||||||
|
|
||||||
# def write_config(config: ConfigDict):
|
# def write_config(config: ConfigDict):
|
||||||
|
@ -683,7 +685,7 @@ def bin_hash(binary: Optional[str]) -> Optional[str]:
|
||||||
with io.open(abs_path, mode='rb') as f:
|
with io.open(abs_path, mode='rb') as f:
|
||||||
for chunk in iter(lambda: f.read(io.DEFAULT_BUFFER_SIZE), b''):
|
for chunk in iter(lambda: f.read(io.DEFAULT_BUFFER_SIZE), b''):
|
||||||
file_hash.update(chunk)
|
file_hash.update(chunk)
|
||||||
|
|
||||||
return f'md5:{file_hash.hexdigest()}'
|
return f'md5:{file_hash.hexdigest()}'
|
||||||
|
|
||||||
def find_chrome_binary() -> Optional[str]:
|
def find_chrome_binary() -> Optional[str]:
|
||||||
|
@ -708,7 +710,7 @@ def find_chrome_binary() -> Optional[str]:
|
||||||
full_path_exists = shutil.which(name)
|
full_path_exists = shutil.which(name)
|
||||||
if full_path_exists:
|
if full_path_exists:
|
||||||
return name
|
return name
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def find_chrome_data_dir() -> Optional[str]:
|
def find_chrome_data_dir() -> Optional[str]:
|
||||||
|
@ -1078,7 +1080,7 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
|
||||||
stderr(' If you want to disable media archiving entirely, set SAVE_MEDIA=False instead:')
|
stderr(' If you want to disable media archiving entirely, set SAVE_MEDIA=False instead:')
|
||||||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_media')
|
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_media')
|
||||||
stderr()
|
stderr()
|
||||||
|
|
||||||
def check_data_folder(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG) -> None:
|
def check_data_folder(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG) -> None:
|
||||||
output_dir = out_dir or config['OUTPUT_DIR']
|
output_dir = out_dir or config['OUTPUT_DIR']
|
||||||
assert isinstance(output_dir, (str, Path))
|
assert isinstance(output_dir, (str, Path))
|
||||||
|
@ -1117,7 +1119,7 @@ def check_migrations(out_dir: Union[str, Path, None]=None, config: ConfigDict=CO
|
||||||
|
|
||||||
def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
|
def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
|
||||||
check_system_config()
|
check_system_config()
|
||||||
|
|
||||||
output_dir = out_dir or Path(config['OUTPUT_DIR'])
|
output_dir = out_dir or Path(config['OUTPUT_DIR'])
|
||||||
|
|
||||||
assert isinstance(output_dir, Path) and isinstance(config['PACKAGE_DIR'], Path)
|
assert isinstance(output_dir, Path) and isinstance(config['PACKAGE_DIR'], Path)
|
||||||
|
@ -1152,7 +1154,7 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
|
||||||
# Otherwise use default sqlite3 file-based database and initialize django
|
# Otherwise use default sqlite3 file-based database and initialize django
|
||||||
# without running migrations automatically (user runs them manually by calling init)
|
# without running migrations automatically (user runs them manually by calling init)
|
||||||
django.setup()
|
django.setup()
|
||||||
|
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,11 @@
|
||||||
__package__ = 'archivebox.core'
|
__package__ = 'archivebox.core'
|
||||||
|
|
||||||
|
import ipaddress
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
from django.contrib.auth.middleware import RemoteUserMiddleware
|
||||||
|
from django.core.exceptions import ImproperlyConfigured
|
||||||
|
|
||||||
from ..config import PUBLIC_SNAPSHOTS
|
from ..config import PUBLIC_SNAPSHOTS, REVERSE_PROXY_USER_HEADER, REVERSE_PROXY_WHITELIST
|
||||||
|
|
||||||
|
|
||||||
def detect_timezone(request, activate: bool=True):
|
def detect_timezone(request, activate: bool=True):
|
||||||
|
@ -35,3 +38,23 @@ def CacheControlMiddleware(get_response):
|
||||||
return response
|
return response
|
||||||
|
|
||||||
return middleware
|
return middleware
|
||||||
|
|
||||||
|
class ReverseProxyAuthMiddleware(RemoteUserMiddleware):
|
||||||
|
header = 'HTTP_{normalized}'.format(normalized=REVERSE_PROXY_USER_HEADER.replace('-', '_').upper())
|
||||||
|
|
||||||
|
def process_request(self, request):
|
||||||
|
if REVERSE_PROXY_WHITELIST == '':
|
||||||
|
return
|
||||||
|
|
||||||
|
ip = request.META.get('REMOTE_ADDR')
|
||||||
|
|
||||||
|
for cidr in REVERSE_PROXY_WHITELIST.split(','):
|
||||||
|
try:
|
||||||
|
network = ipaddress.ip_network(cidr)
|
||||||
|
except ValueError:
|
||||||
|
raise ImproperlyConfigured(
|
||||||
|
"The REVERSE_PROXY_WHITELIST config paramater is in invalid format, or "
|
||||||
|
"contains invalid CIDR. Correct format is a coma-separated list of IPv4/IPv6 CIDRs.")
|
||||||
|
|
||||||
|
if ipaddress.ip_address(ip) in network:
|
||||||
|
return super().process_request(request)
|
||||||
|
|
|
@ -61,11 +61,13 @@ MIDDLEWARE = [
|
||||||
'django.middleware.common.CommonMiddleware',
|
'django.middleware.common.CommonMiddleware',
|
||||||
'django.middleware.csrf.CsrfViewMiddleware',
|
'django.middleware.csrf.CsrfViewMiddleware',
|
||||||
'django.contrib.auth.middleware.AuthenticationMiddleware',
|
'django.contrib.auth.middleware.AuthenticationMiddleware',
|
||||||
|
'core.middleware.ReverseProxyAuthMiddleware',
|
||||||
'django.contrib.messages.middleware.MessageMiddleware',
|
'django.contrib.messages.middleware.MessageMiddleware',
|
||||||
'core.middleware.CacheControlMiddleware',
|
'core.middleware.CacheControlMiddleware',
|
||||||
]
|
]
|
||||||
|
|
||||||
AUTHENTICATION_BACKENDS = [
|
AUTHENTICATION_BACKENDS = [
|
||||||
|
'django.contrib.auth.backends.RemoteUserBackend',
|
||||||
'django.contrib.auth.backends.ModelBackend',
|
'django.contrib.auth.backends.ModelBackend',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue