mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 06:34:25 -04:00
Support for Reverse Proxy authentication backends (like authelia)
This commit is contained in:
parent
63693bdc77
commit
95cf85f8cf
3 changed files with 59 additions and 32 deletions
|
@ -82,17 +82,19 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
|||
},
|
||||
|
||||
'SERVER_CONFIG': {
|
||||
'SECRET_KEY': {'type': str, 'default': None},
|
||||
'BIND_ADDR': {'type': str, 'default': lambda c: ['127.0.0.1:8000', '0.0.0.0:8000'][c['IN_DOCKER']]},
|
||||
'ALLOWED_HOSTS': {'type': str, 'default': '*'},
|
||||
'DEBUG': {'type': bool, 'default': False},
|
||||
'PUBLIC_INDEX': {'type': bool, 'default': True},
|
||||
'PUBLIC_SNAPSHOTS': {'type': bool, 'default': True},
|
||||
'PUBLIC_ADD_VIEW': {'type': bool, 'default': False},
|
||||
'FOOTER_INFO': {'type': str, 'default': 'Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.'},
|
||||
'SNAPSHOTS_PER_PAGE': {'type': int, 'default': 40},
|
||||
'CUSTOM_TEMPLATES_DIR': {'type': str, 'default': None},
|
||||
'TIME_ZONE': {'type': str, 'default': 'UTC'},
|
||||
'SECRET_KEY': {'type': str, 'default': None},
|
||||
'BIND_ADDR': {'type': str, 'default': lambda c: ['127.0.0.1:8000', '0.0.0.0:8000'][c['IN_DOCKER']]},
|
||||
'ALLOWED_HOSTS': {'type': str, 'default': '*'},
|
||||
'DEBUG': {'type': bool, 'default': False},
|
||||
'PUBLIC_INDEX': {'type': bool, 'default': True},
|
||||
'PUBLIC_SNAPSHOTS': {'type': bool, 'default': True},
|
||||
'PUBLIC_ADD_VIEW': {'type': bool, 'default': False},
|
||||
'FOOTER_INFO': {'type': str, 'default': 'Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.'},
|
||||
'SNAPSHOTS_PER_PAGE': {'type': int, 'default': 40},
|
||||
'CUSTOM_TEMPLATES_DIR': {'type': str, 'default': None},
|
||||
'TIME_ZONE': {'type': str, 'default': 'UTC'},
|
||||
'REVERSE_PROXY_USER_HEADER': {'type': str, 'default': 'Remote-User'},
|
||||
'REVERSE_PROXY_WHITELIST': {'type': str, 'default': ''},
|
||||
},
|
||||
|
||||
'ARCHIVE_METHOD_TOGGLES': {
|
||||
|
@ -145,7 +147,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
|||
'--add-metadata',
|
||||
'--max-filesize={}'.format(c['MEDIA_MAX_SIZE']),
|
||||
]},
|
||||
|
||||
|
||||
|
||||
'WGET_ARGS': {'type': list, 'default': ['--no-verbose',
|
||||
'--adjust-extension',
|
||||
|
@ -187,7 +189,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
|||
'USE_NODE': {'type': bool, 'default': True},
|
||||
'USE_YOUTUBEDL': {'type': bool, 'default': True},
|
||||
'USE_RIPGREP': {'type': bool, 'default': True},
|
||||
|
||||
|
||||
'CURL_BINARY': {'type': str, 'default': 'curl'},
|
||||
'GIT_BINARY': {'type': str, 'default': 'git'},
|
||||
'WGET_BINARY': {'type': str, 'default': 'wget'},
|
||||
|
@ -268,7 +270,7 @@ STATICFILE_EXTENSIONS = {
|
|||
# that can be downloaded as-is, not html pages that need to be rendered
|
||||
'gif', 'jpeg', 'jpg', 'png', 'tif', 'tiff', 'wbmp', 'ico', 'jng', 'bmp',
|
||||
'svg', 'svgz', 'webp', 'ps', 'eps', 'ai',
|
||||
'mp3', 'mp4', 'm4a', 'mpeg', 'mpg', 'mkv', 'mov', 'webm', 'm4v',
|
||||
'mp3', 'mp4', 'm4a', 'mpeg', 'mpg', 'mkv', 'mov', 'webm', 'm4v',
|
||||
'flv', 'wmv', 'avi', 'ogg', 'ts', 'm3u8',
|
||||
'pdf', 'txt', 'rtf', 'rtfd', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx',
|
||||
'atom', 'rss', 'css', 'js', 'json',
|
||||
|
@ -277,7 +279,7 @@ STATICFILE_EXTENSIONS = {
|
|||
|
||||
# Less common extensions to consider adding later
|
||||
# jar, swf, bin, com, exe, dll, deb
|
||||
# ear, hqx, eot, wmlc, kml, kmz, cco, jardiff, jnlp, run, msi, msp, msm,
|
||||
# ear, hqx, eot, wmlc, kml, kmz, cco, jardiff, jnlp, run, msi, msp, msm,
|
||||
# pl pm, prc pdb, rar, rpm, sea, sit, tcl tk, der, pem, crt, xpi, xspf,
|
||||
# ra, mng, asx, asf, 3gpp, 3gp, mid, midi, kar, jad, wml, htc, mml
|
||||
|
||||
|
@ -389,14 +391,14 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
|||
'CHROME_BINARY': {'default': lambda c: c['CHROME_BINARY'] or find_chrome_binary()},
|
||||
'USE_CHROME': {'default': lambda c: c['USE_CHROME'] and c['CHROME_BINARY'] and (c['SAVE_PDF'] or c['SAVE_SCREENSHOT'] or c['SAVE_DOM'] or c['SAVE_SINGLEFILE'])},
|
||||
'CHROME_VERSION': {'default': lambda c: bin_version(c['CHROME_BINARY']) if c['USE_CHROME'] else None},
|
||||
|
||||
|
||||
'SAVE_PDF': {'default': lambda c: c['USE_CHROME'] and c['SAVE_PDF']},
|
||||
'SAVE_SCREENSHOT': {'default': lambda c: c['USE_CHROME'] and c['SAVE_SCREENSHOT']},
|
||||
'SAVE_DOM': {'default': lambda c: c['USE_CHROME'] and c['SAVE_DOM']},
|
||||
'SAVE_SINGLEFILE': {'default': lambda c: c['USE_CHROME'] and c['SAVE_SINGLEFILE'] and c['USE_NODE']},
|
||||
'SAVE_READABILITY': {'default': lambda c: c['USE_READABILITY'] and c['USE_NODE']},
|
||||
'SAVE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['USE_NODE']},
|
||||
|
||||
|
||||
'USE_NODE': {'default': lambda c: c['USE_NODE'] and (c['SAVE_READABILITY'] or c['SAVE_SINGLEFILE'] or c['SAVE_MERCURY'])},
|
||||
'NODE_VERSION': {'default': lambda c: bin_version(c['NODE_BINARY']) if c['USE_NODE'] else None},
|
||||
|
||||
|
@ -446,7 +448,7 @@ def load_config_val(key: str,
|
|||
elif val.lower() in ('false', 'no', '0'):
|
||||
return False
|
||||
else:
|
||||
raise ValueError(f'Invalid configuration option {key}={val} (expected a boolean: True/False)')
|
||||
raise ValueError(f'Invalid configuration option {key}={val} (expected a boolean: True/False)')
|
||||
|
||||
elif type is str:
|
||||
if val.lower() in ('true', 'false', 'yes', 'no', '1', '0'):
|
||||
|
@ -471,7 +473,7 @@ def load_config_file(out_dir: str=None) -> Optional[Dict[str, str]]:
|
|||
config_path = Path(out_dir) / CONFIG_FILENAME
|
||||
if config_path.exists():
|
||||
config_file = ConfigParser()
|
||||
config_file.optionxform = str
|
||||
config_file.optionxform = str
|
||||
config_file.read(config_path)
|
||||
# flatten into one namespace
|
||||
config_file_vars = {
|
||||
|
@ -495,7 +497,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
|
|||
#
|
||||
# You can add options here manually in INI format, or automatically by running:
|
||||
# archivebox config --set KEY=VALUE
|
||||
#
|
||||
#
|
||||
# If you modify this file manually, make sure to update your archive after by running:
|
||||
# archivebox init
|
||||
#
|
||||
|
@ -506,7 +508,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
|
|||
|
||||
out_dir = out_dir or Path(os.getenv('OUTPUT_DIR', '.')).resolve()
|
||||
config_path = Path(out_dir) / CONFIG_FILENAME
|
||||
|
||||
|
||||
if not config_path.exists():
|
||||
atomic_write(config_path, CONFIG_HEADER)
|
||||
|
||||
|
@ -544,7 +546,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
|
|||
|
||||
with open(config_path, 'w+', encoding='utf-8') as new:
|
||||
config_file.write(new)
|
||||
|
||||
|
||||
try:
|
||||
# validate the config by attempting to re-parse it
|
||||
CONFIG = load_all_config()
|
||||
|
@ -557,20 +559,20 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
|
|||
|
||||
if Path(f'{config_path}.bak').exists():
|
||||
os.remove(f'{config_path}.bak')
|
||||
|
||||
|
||||
return {
|
||||
key.upper(): CONFIG.get(key.upper())
|
||||
for key in config.keys()
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
def load_config(defaults: ConfigDefaultDict,
|
||||
config: Optional[ConfigDict]=None,
|
||||
out_dir: Optional[str]=None,
|
||||
env_vars: Optional[os._Environ]=None,
|
||||
config_file_vars: Optional[Dict[str, str]]=None) -> ConfigDict:
|
||||
|
||||
|
||||
env_vars = env_vars or os.environ
|
||||
config_file_vars = config_file_vars or load_config_file(out_dir=out_dir)
|
||||
|
||||
|
@ -600,7 +602,7 @@ def load_config(defaults: ConfigDefaultDict,
|
|||
stderr()
|
||||
# raise
|
||||
raise SystemExit(2)
|
||||
|
||||
|
||||
return extended_config
|
||||
|
||||
# def write_config(config: ConfigDict):
|
||||
|
@ -683,7 +685,7 @@ def bin_hash(binary: Optional[str]) -> Optional[str]:
|
|||
with io.open(abs_path, mode='rb') as f:
|
||||
for chunk in iter(lambda: f.read(io.DEFAULT_BUFFER_SIZE), b''):
|
||||
file_hash.update(chunk)
|
||||
|
||||
|
||||
return f'md5:{file_hash.hexdigest()}'
|
||||
|
||||
def find_chrome_binary() -> Optional[str]:
|
||||
|
@ -708,7 +710,7 @@ def find_chrome_binary() -> Optional[str]:
|
|||
full_path_exists = shutil.which(name)
|
||||
if full_path_exists:
|
||||
return name
|
||||
|
||||
|
||||
return None
|
||||
|
||||
def find_chrome_data_dir() -> Optional[str]:
|
||||
|
@ -1078,7 +1080,7 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
|
|||
stderr(' If you want to disable media archiving entirely, set SAVE_MEDIA=False instead:')
|
||||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_media')
|
||||
stderr()
|
||||
|
||||
|
||||
def check_data_folder(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG) -> None:
|
||||
output_dir = out_dir or config['OUTPUT_DIR']
|
||||
assert isinstance(output_dir, (str, Path))
|
||||
|
@ -1117,7 +1119,7 @@ def check_migrations(out_dir: Union[str, Path, None]=None, config: ConfigDict=CO
|
|||
|
||||
def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
|
||||
check_system_config()
|
||||
|
||||
|
||||
output_dir = out_dir or Path(config['OUTPUT_DIR'])
|
||||
|
||||
assert isinstance(output_dir, Path) and isinstance(config['PACKAGE_DIR'], Path)
|
||||
|
@ -1152,7 +1154,7 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
|
|||
# Otherwise use default sqlite3 file-based database and initialize django
|
||||
# without running migrations automatically (user runs them manually by calling init)
|
||||
django.setup()
|
||||
|
||||
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
|
|
|
@ -1,8 +1,11 @@
|
|||
__package__ = 'archivebox.core'
|
||||
|
||||
import ipaddress
|
||||
from django.utils import timezone
|
||||
from django.contrib.auth.middleware import RemoteUserMiddleware
|
||||
from django.core.exceptions import ImproperlyConfigured
|
||||
|
||||
from ..config import PUBLIC_SNAPSHOTS
|
||||
from ..config import PUBLIC_SNAPSHOTS, REVERSE_PROXY_USER_HEADER, REVERSE_PROXY_WHITELIST
|
||||
|
||||
|
||||
def detect_timezone(request, activate: bool=True):
|
||||
|
@ -35,3 +38,23 @@ def CacheControlMiddleware(get_response):
|
|||
return response
|
||||
|
||||
return middleware
|
||||
|
||||
class ReverseProxyAuthMiddleware(RemoteUserMiddleware):
|
||||
header = 'HTTP_{normalized}'.format(normalized=REVERSE_PROXY_USER_HEADER.replace('-', '_').upper())
|
||||
|
||||
def process_request(self, request):
|
||||
if REVERSE_PROXY_WHITELIST == '':
|
||||
return
|
||||
|
||||
ip = request.META.get('REMOTE_ADDR')
|
||||
|
||||
for cidr in REVERSE_PROXY_WHITELIST.split(','):
|
||||
try:
|
||||
network = ipaddress.ip_network(cidr)
|
||||
except ValueError:
|
||||
raise ImproperlyConfigured(
|
||||
"The REVERSE_PROXY_WHITELIST config paramater is in invalid format, or "
|
||||
"contains invalid CIDR. Correct format is a coma-separated list of IPv4/IPv6 CIDRs.")
|
||||
|
||||
if ipaddress.ip_address(ip) in network:
|
||||
return super().process_request(request)
|
||||
|
|
|
@ -61,11 +61,13 @@ MIDDLEWARE = [
|
|||
'django.middleware.common.CommonMiddleware',
|
||||
'django.middleware.csrf.CsrfViewMiddleware',
|
||||
'django.contrib.auth.middleware.AuthenticationMiddleware',
|
||||
'core.middleware.ReverseProxyAuthMiddleware',
|
||||
'django.contrib.messages.middleware.MessageMiddleware',
|
||||
'core.middleware.CacheControlMiddleware',
|
||||
]
|
||||
|
||||
AUTHENTICATION_BACKENDS = [
|
||||
'django.contrib.auth.backends.RemoteUserBackend',
|
||||
'django.contrib.auth.backends.ModelBackend',
|
||||
]
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue