mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 14:44:29 -04:00
delete dead code
This commit is contained in:
parent
97695bda5e
commit
7ffb81f61b
19 changed files with 345 additions and 689 deletions
|
@ -1,26 +0,0 @@
|
||||||
# from django.contrib import admin
|
|
||||||
# from django import forms
|
|
||||||
|
|
||||||
# from django_jsonform.widgets import JSONFormWidget
|
|
||||||
|
|
||||||
# from django_pydantic_field.v2.fields import PydanticSchemaField
|
|
||||||
|
|
||||||
# from .models import CustomPlugin
|
|
||||||
|
|
||||||
|
|
||||||
# class PluginForm(forms.ModelForm):
|
|
||||||
# class Meta:
|
|
||||||
# model = CustomPlugin
|
|
||||||
# fields = '__all__'
|
|
||||||
# widgets = {
|
|
||||||
# 'items': JSONFormWidget(schema=PluginSchema),
|
|
||||||
# }
|
|
||||||
|
|
||||||
|
|
||||||
# class PluginAdmin(admin.ModelAdmin):
|
|
||||||
# formfield_overrides = {
|
|
||||||
# PydanticSchemaField: {"widget": JSONFormWidget},
|
|
||||||
# }
|
|
||||||
# form = PluginForm
|
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,12 @@
|
||||||
from typing import Dict, Any, List
|
from typing import Any, List, Callable
|
||||||
|
|
||||||
import configparser
|
|
||||||
import json
|
import json
|
||||||
import ast
|
import ast
|
||||||
|
import inspect
|
||||||
|
import configparser
|
||||||
|
|
||||||
|
from pydantic.json_schema import GenerateJsonSchema
|
||||||
|
from pydantic_core import to_jsonable_python
|
||||||
|
|
||||||
JSONValue = str | bool | int | None | List['JSONValue']
|
JSONValue = str | bool | int | None | List['JSONValue']
|
||||||
|
|
||||||
|
@ -63,333 +67,351 @@ def convert(ini_str: str) -> str:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class JSONSchemaWithLambdas(GenerateJsonSchema):
|
||||||
|
def encode_default(self, default: Any) -> Any:
|
||||||
|
"""Encode lambda functions in default values properly"""
|
||||||
|
config = self._config
|
||||||
|
if isinstance(default, Callable):
|
||||||
|
return '{{lambda ' + inspect.getsource(default).split('=lambda ')[-1].strip()[:-1] + '}}'
|
||||||
|
return to_jsonable_python(
|
||||||
|
default,
|
||||||
|
timedelta_mode=config.ser_json_timedelta,
|
||||||
|
bytes_mode=config.ser_json_bytes,
|
||||||
|
serialize_unknown=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# for computed_field properties render them like this instead:
|
||||||
|
# inspect.getsource(field.wrapped_property.fget).split('def ', 1)[-1].split('\n', 1)[-1].strip().strip('return '),
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### Basic Assertions
|
### Basic Assertions
|
||||||
|
|
||||||
test_input = """
|
# test_input = """
|
||||||
[SERVER_CONFIG]
|
# [SERVER_CONFIG]
|
||||||
IS_TTY=False
|
# IS_TTY=False
|
||||||
USE_COLOR=False
|
# USE_COLOR=False
|
||||||
SHOW_PROGRESS=False
|
# SHOW_PROGRESS=False
|
||||||
IN_DOCKER=False
|
# IN_DOCKER=False
|
||||||
IN_QEMU=False
|
# IN_QEMU=False
|
||||||
PUID=501
|
# PUID=501
|
||||||
PGID=20
|
# PGID=20
|
||||||
OUTPUT_DIR=/opt/archivebox/data
|
# OUTPUT_DIR=/opt/archivebox/data
|
||||||
CONFIG_FILE=/opt/archivebox/data/ArchiveBox.conf
|
# CONFIG_FILE=/opt/archivebox/data/ArchiveBox.conf
|
||||||
ONLY_NEW=True
|
# ONLY_NEW=True
|
||||||
TIMEOUT=60
|
# TIMEOUT=60
|
||||||
MEDIA_TIMEOUT=3600
|
# MEDIA_TIMEOUT=3600
|
||||||
OUTPUT_PERMISSIONS=644
|
# OUTPUT_PERMISSIONS=644
|
||||||
RESTRICT_FILE_NAMES=windows
|
# RESTRICT_FILE_NAMES=windows
|
||||||
URL_DENYLIST=\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$
|
# URL_DENYLIST=\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$
|
||||||
URL_ALLOWLIST=None
|
# URL_ALLOWLIST=None
|
||||||
ADMIN_USERNAME=None
|
# ADMIN_USERNAME=None
|
||||||
ADMIN_PASSWORD=None
|
# ADMIN_PASSWORD=None
|
||||||
ENFORCE_ATOMIC_WRITES=True
|
# ENFORCE_ATOMIC_WRITES=True
|
||||||
TAG_SEPARATOR_PATTERN=[,]
|
# TAG_SEPARATOR_PATTERN=[,]
|
||||||
SECRET_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
# SECRET_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
||||||
BIND_ADDR=127.0.0.1:8000
|
# BIND_ADDR=127.0.0.1:8000
|
||||||
ALLOWED_HOSTS=*
|
# ALLOWED_HOSTS=*
|
||||||
DEBUG=False
|
# DEBUG=False
|
||||||
PUBLIC_INDEX=True
|
# PUBLIC_INDEX=True
|
||||||
PUBLIC_SNAPSHOTS=True
|
# PUBLIC_SNAPSHOTS=True
|
||||||
PUBLIC_ADD_VIEW=False
|
# PUBLIC_ADD_VIEW=False
|
||||||
FOOTER_INFO=Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.
|
# FOOTER_INFO=Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.
|
||||||
SNAPSHOTS_PER_PAGE=40
|
# SNAPSHOTS_PER_PAGE=40
|
||||||
CUSTOM_TEMPLATES_DIR=None
|
# CUSTOM_TEMPLATES_DIR=None
|
||||||
TIME_ZONE=UTC
|
# TIME_ZONE=UTC
|
||||||
TIMEZONE=UTC
|
# TIMEZONE=UTC
|
||||||
REVERSE_PROXY_USER_HEADER=Remote-User
|
# REVERSE_PROXY_USER_HEADER=Remote-User
|
||||||
REVERSE_PROXY_WHITELIST=
|
# REVERSE_PROXY_WHITELIST=
|
||||||
LOGOUT_REDIRECT_URL=/
|
# LOGOUT_REDIRECT_URL=/
|
||||||
PREVIEW_ORIGINALS=True
|
# PREVIEW_ORIGINALS=True
|
||||||
LDAP=False
|
# LDAP=False
|
||||||
LDAP_SERVER_URI=None
|
# LDAP_SERVER_URI=None
|
||||||
LDAP_BIND_DN=None
|
# LDAP_BIND_DN=None
|
||||||
LDAP_BIND_PASSWORD=None
|
# LDAP_BIND_PASSWORD=None
|
||||||
LDAP_USER_BASE=None
|
# LDAP_USER_BASE=None
|
||||||
LDAP_USER_FILTER=None
|
# LDAP_USER_FILTER=None
|
||||||
LDAP_USERNAME_ATTR=None
|
# LDAP_USERNAME_ATTR=None
|
||||||
LDAP_FIRSTNAME_ATTR=None
|
# LDAP_FIRSTNAME_ATTR=None
|
||||||
LDAP_LASTNAME_ATTR=None
|
# LDAP_LASTNAME_ATTR=None
|
||||||
LDAP_EMAIL_ATTR=None
|
# LDAP_EMAIL_ATTR=None
|
||||||
LDAP_CREATE_SUPERUSER=False
|
# LDAP_CREATE_SUPERUSER=False
|
||||||
SAVE_TITLE=True
|
# SAVE_TITLE=True
|
||||||
SAVE_FAVICON=True
|
# SAVE_FAVICON=True
|
||||||
SAVE_WGET=True
|
# SAVE_WGET=True
|
||||||
SAVE_WGET_REQUISITES=True
|
# SAVE_WGET_REQUISITES=True
|
||||||
SAVE_SINGLEFILE=True
|
# SAVE_SINGLEFILE=True
|
||||||
SAVE_READABILITY=True
|
# SAVE_READABILITY=True
|
||||||
SAVE_MERCURY=True
|
# SAVE_MERCURY=True
|
||||||
SAVE_HTMLTOTEXT=True
|
# SAVE_HTMLTOTEXT=True
|
||||||
SAVE_PDF=True
|
# SAVE_PDF=True
|
||||||
SAVE_SCREENSHOT=True
|
# SAVE_SCREENSHOT=True
|
||||||
SAVE_DOM=True
|
# SAVE_DOM=True
|
||||||
SAVE_HEADERS=True
|
# SAVE_HEADERS=True
|
||||||
SAVE_WARC=True
|
# SAVE_WARC=True
|
||||||
SAVE_GIT=True
|
# SAVE_GIT=True
|
||||||
SAVE_MEDIA=True
|
# SAVE_MEDIA=True
|
||||||
SAVE_ARCHIVE_DOT_ORG=True
|
# SAVE_ARCHIVE_DOT_ORG=True
|
||||||
RESOLUTION=1440,2000
|
# RESOLUTION=1440,2000
|
||||||
GIT_DOMAINS=github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht
|
# GIT_DOMAINS=github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht
|
||||||
CHECK_SSL_VALIDITY=True
|
# CHECK_SSL_VALIDITY=True
|
||||||
MEDIA_MAX_SIZE=750m
|
# MEDIA_MAX_SIZE=750m
|
||||||
USER_AGENT=None
|
# USER_AGENT=None
|
||||||
CURL_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)
|
# CURL_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)
|
||||||
WGET_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5
|
# WGET_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5
|
||||||
CHROME_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)
|
# CHROME_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)
|
||||||
COOKIES_FILE=None
|
# COOKIES_FILE=None
|
||||||
CHROME_USER_DATA_DIR=None
|
# CHROME_USER_DATA_DIR=None
|
||||||
CHROME_TIMEOUT=0
|
# CHROME_TIMEOUT=0
|
||||||
CHROME_HEADLESS=True
|
# CHROME_HEADLESS=True
|
||||||
CHROME_SANDBOX=True
|
# CHROME_SANDBOX=True
|
||||||
CHROME_EXTRA_ARGS=[]
|
# CHROME_EXTRA_ARGS=[]
|
||||||
YOUTUBEDL_ARGS=['--restrict-filenames', '--trim-filenames', '128', '--write-description', '--write-info-json', '--write-annotations', '--write-thumbnail', '--no-call-home', '--write-sub', '--write-auto-subs', '--convert-subs=srt', '--yes-playlist', '--continue', '--no-abort-on-error', '--ignore-errors', '--geo-bypass', '--add-metadata', '--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)']
|
# YOUTUBEDL_ARGS=['--restrict-filenames', '--trim-filenames', '128', '--write-description', '--write-info-json', '--write-annotations', '--write-thumbnail', '--no-call-home', '--write-sub', '--write-auto-subs', '--convert-subs=srt', '--yes-playlist', '--continue', '--no-abort-on-error', '--ignore-errors', '--geo-bypass', '--add-metadata', '--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)']
|
||||||
YOUTUBEDL_EXTRA_ARGS=[]
|
# YOUTUBEDL_EXTRA_ARGS=[]
|
||||||
WGET_ARGS=['--no-verbose', '--adjust-extension', '--convert-links', '--force-directories', '--backup-converted', '--span-hosts', '--no-parent', '-e', 'robots=off']
|
# WGET_ARGS=['--no-verbose', '--adjust-extension', '--convert-links', '--force-directories', '--backup-converted', '--span-hosts', '--no-parent', '-e', 'robots=off']
|
||||||
WGET_EXTRA_ARGS=[]
|
# WGET_EXTRA_ARGS=[]
|
||||||
CURL_ARGS=['--silent', '--location', '--compressed']
|
# CURL_ARGS=['--silent', '--location', '--compressed']
|
||||||
CURL_EXTRA_ARGS=[]
|
# CURL_EXTRA_ARGS=[]
|
||||||
GIT_ARGS=['--recursive']
|
# GIT_ARGS=['--recursive']
|
||||||
SINGLEFILE_ARGS=[]
|
# SINGLEFILE_ARGS=[]
|
||||||
SINGLEFILE_EXTRA_ARGS=[]
|
# SINGLEFILE_EXTRA_ARGS=[]
|
||||||
MERCURY_ARGS=['--format=text']
|
# MERCURY_ARGS=['--format=text']
|
||||||
MERCURY_EXTRA_ARGS=[]
|
# MERCURY_EXTRA_ARGS=[]
|
||||||
FAVICON_PROVIDER=https://www.google.com/s2/favicons?domain={}
|
# FAVICON_PROVIDER=https://www.google.com/s2/favicons?domain={}
|
||||||
USE_INDEXING_BACKEND=True
|
# USE_INDEXING_BACKEND=True
|
||||||
USE_SEARCHING_BACKEND=True
|
# USE_SEARCHING_BACKEND=True
|
||||||
SEARCH_BACKEND_ENGINE=ripgrep
|
# SEARCH_BACKEND_ENGINE=ripgrep
|
||||||
SEARCH_BACKEND_HOST_NAME=localhost
|
# SEARCH_BACKEND_HOST_NAME=localhost
|
||||||
SEARCH_BACKEND_PORT=1491
|
# SEARCH_BACKEND_PORT=1491
|
||||||
SEARCH_BACKEND_PASSWORD=SecretPassword
|
# SEARCH_BACKEND_PASSWORD=SecretPassword
|
||||||
SEARCH_PROCESS_HTML=True
|
# SEARCH_PROCESS_HTML=True
|
||||||
SONIC_COLLECTION=archivebox
|
# SONIC_COLLECTION=archivebox
|
||||||
SONIC_BUCKET=snapshots
|
# SONIC_BUCKET=snapshots
|
||||||
SEARCH_BACKEND_TIMEOUT=90
|
# SEARCH_BACKEND_TIMEOUT=90
|
||||||
FTS_SEPARATE_DATABASE=True
|
# FTS_SEPARATE_DATABASE=True
|
||||||
FTS_TOKENIZERS=porter unicode61 remove_diacritics 2
|
# FTS_TOKENIZERS=porter unicode61 remove_diacritics 2
|
||||||
FTS_SQLITE_MAX_LENGTH=1000000000
|
# FTS_SQLITE_MAX_LENGTH=1000000000
|
||||||
USE_CURL=True
|
# USE_CURL=True
|
||||||
USE_WGET=True
|
# USE_WGET=True
|
||||||
USE_SINGLEFILE=True
|
# USE_SINGLEFILE=True
|
||||||
USE_READABILITY=True
|
# USE_READABILITY=True
|
||||||
USE_MERCURY=True
|
# USE_MERCURY=True
|
||||||
USE_GIT=True
|
# USE_GIT=True
|
||||||
USE_CHROME=True
|
# USE_CHROME=True
|
||||||
USE_NODE=True
|
# USE_NODE=True
|
||||||
USE_YOUTUBEDL=True
|
# USE_YOUTUBEDL=True
|
||||||
USE_RIPGREP=True
|
# USE_RIPGREP=True
|
||||||
CURL_BINARY=curl
|
# CURL_BINARY=curl
|
||||||
GIT_BINARY=git
|
# GIT_BINARY=git
|
||||||
WGET_BINARY=wget
|
# WGET_BINARY=wget
|
||||||
SINGLEFILE_BINARY=single-file
|
# SINGLEFILE_BINARY=single-file
|
||||||
READABILITY_BINARY=readability-extractor
|
# READABILITY_BINARY=readability-extractor
|
||||||
MERCURY_BINARY=postlight-parser
|
# MERCURY_BINARY=postlight-parser
|
||||||
YOUTUBEDL_BINARY=yt-dlp
|
# YOUTUBEDL_BINARY=yt-dlp
|
||||||
NODE_BINARY=node
|
# NODE_BINARY=node
|
||||||
RIPGREP_BINARY=rg
|
# RIPGREP_BINARY=rg
|
||||||
CHROME_BINARY=chrome
|
# CHROME_BINARY=chrome
|
||||||
POCKET_CONSUMER_KEY=None
|
# POCKET_CONSUMER_KEY=None
|
||||||
USER=squash
|
# USER=squash
|
||||||
PACKAGE_DIR=/opt/archivebox/archivebox
|
# PACKAGE_DIR=/opt/archivebox/archivebox
|
||||||
TEMPLATES_DIR=/opt/archivebox/archivebox/templates
|
# TEMPLATES_DIR=/opt/archivebox/archivebox/templates
|
||||||
ARCHIVE_DIR=/opt/archivebox/data/archive
|
# ARCHIVE_DIR=/opt/archivebox/data/archive
|
||||||
SOURCES_DIR=/opt/archivebox/data/sources
|
# SOURCES_DIR=/opt/archivebox/data/sources
|
||||||
LOGS_DIR=/opt/archivebox/data/logs
|
# LOGS_DIR=/opt/archivebox/data/logs
|
||||||
PERSONAS_DIR=/opt/archivebox/data/personas
|
# PERSONAS_DIR=/opt/archivebox/data/personas
|
||||||
URL_DENYLIST_PTN=re.compile('\\.(css|js|otf|ttf|woff|woff2|gstatic\\.com|googleapis\\.com/css)(\\?.*)?$', re.IGNORECASE|re.MULTILINE)
|
# URL_DENYLIST_PTN=re.compile('\\.(css|js|otf|ttf|woff|woff2|gstatic\\.com|googleapis\\.com/css)(\\?.*)?$', re.IGNORECASE|re.MULTILINE)
|
||||||
URL_ALLOWLIST_PTN=None
|
# URL_ALLOWLIST_PTN=None
|
||||||
DIR_OUTPUT_PERMISSIONS=755
|
# DIR_OUTPUT_PERMISSIONS=755
|
||||||
ARCHIVEBOX_BINARY=/opt/archivebox/.venv/bin/archivebox
|
# ARCHIVEBOX_BINARY=/opt/archivebox/.venv/bin/archivebox
|
||||||
VERSION=0.8.0
|
# VERSION=0.8.0
|
||||||
COMMIT_HASH=102e87578c6036bb0132dd1ebd17f8f05ffc880f
|
# COMMIT_HASH=102e87578c6036bb0132dd1ebd17f8f05ffc880f
|
||||||
BUILD_TIME=2024-05-15 03:28:05 1715768885
|
# BUILD_TIME=2024-05-15 03:28:05 1715768885
|
||||||
VERSIONS_AVAILABLE=None
|
# VERSIONS_AVAILABLE=None
|
||||||
CAN_UPGRADE=False
|
# CAN_UPGRADE=False
|
||||||
PYTHON_BINARY=/opt/archivebox/.venv/bin/python3.10
|
# PYTHON_BINARY=/opt/archivebox/.venv/bin/python3.10
|
||||||
PYTHON_ENCODING=UTF-8
|
# PYTHON_ENCODING=UTF-8
|
||||||
PYTHON_VERSION=3.10.14
|
# PYTHON_VERSION=3.10.14
|
||||||
DJANGO_BINARY=/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py
|
# DJANGO_BINARY=/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py
|
||||||
DJANGO_VERSION=5.0.6 final (0)
|
# DJANGO_VERSION=5.0.6 final (0)
|
||||||
SQLITE_BINARY=/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py
|
# SQLITE_BINARY=/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py
|
||||||
SQLITE_VERSION=2.6.0
|
# SQLITE_VERSION=2.6.0
|
||||||
CURL_VERSION=curl 8.4.0 (x86_64-apple-darwin23.0)
|
# CURL_VERSION=curl 8.4.0 (x86_64-apple-darwin23.0)
|
||||||
WGET_VERSION=GNU Wget 1.24.5
|
# WGET_VERSION=GNU Wget 1.24.5
|
||||||
WGET_AUTO_COMPRESSION=True
|
# WGET_AUTO_COMPRESSION=True
|
||||||
RIPGREP_VERSION=ripgrep 14.1.0
|
# RIPGREP_VERSION=ripgrep 14.1.0
|
||||||
SINGLEFILE_VERSION=None
|
# SINGLEFILE_VERSION=None
|
||||||
READABILITY_VERSION=None
|
# READABILITY_VERSION=None
|
||||||
MERCURY_VERSION=None
|
# MERCURY_VERSION=None
|
||||||
GIT_VERSION=git version 2.44.0
|
# GIT_VERSION=git version 2.44.0
|
||||||
YOUTUBEDL_VERSION=2024.04.09
|
# YOUTUBEDL_VERSION=2024.04.09
|
||||||
CHROME_VERSION=Google Chrome 124.0.6367.207
|
# CHROME_VERSION=Google Chrome 124.0.6367.207
|
||||||
NODE_VERSION=v21.7.3
|
# NODE_VERSION=v21.7.3
|
||||||
"""
|
# """
|
||||||
|
|
||||||
|
|
||||||
expected_output = TOML_HEADER + '''[SERVER_CONFIG]
|
# expected_output = TOML_HEADER + '''[SERVER_CONFIG]
|
||||||
IS_TTY = false
|
# IS_TTY = false
|
||||||
USE_COLOR = false
|
# USE_COLOR = false
|
||||||
SHOW_PROGRESS = false
|
# SHOW_PROGRESS = false
|
||||||
IN_DOCKER = false
|
# IN_DOCKER = false
|
||||||
IN_QEMU = false
|
# IN_QEMU = false
|
||||||
PUID = 501
|
# PUID = 501
|
||||||
PGID = 20
|
# PGID = 20
|
||||||
OUTPUT_DIR = "/opt/archivebox/data"
|
# OUTPUT_DIR = "/opt/archivebox/data"
|
||||||
CONFIG_FILE = "/opt/archivebox/data/ArchiveBox.conf"
|
# CONFIG_FILE = "/opt/archivebox/data/ArchiveBox.conf"
|
||||||
ONLY_NEW = true
|
# ONLY_NEW = true
|
||||||
TIMEOUT = 60
|
# TIMEOUT = 60
|
||||||
MEDIA_TIMEOUT = 3600
|
# MEDIA_TIMEOUT = 3600
|
||||||
OUTPUT_PERMISSIONS = 644
|
# OUTPUT_PERMISSIONS = 644
|
||||||
RESTRICT_FILE_NAMES = "windows"
|
# RESTRICT_FILE_NAMES = "windows"
|
||||||
URL_DENYLIST = "\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$"
|
# URL_DENYLIST = "\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$"
|
||||||
URL_ALLOWLIST = null
|
# URL_ALLOWLIST = null
|
||||||
ADMIN_USERNAME = null
|
# ADMIN_USERNAME = null
|
||||||
ADMIN_PASSWORD = null
|
# ADMIN_PASSWORD = null
|
||||||
ENFORCE_ATOMIC_WRITES = true
|
# ENFORCE_ATOMIC_WRITES = true
|
||||||
TAG_SEPARATOR_PATTERN = "[,]"
|
# TAG_SEPARATOR_PATTERN = "[,]"
|
||||||
SECRET_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
# SECRET_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||||
BIND_ADDR = "127.0.0.1:8000"
|
# BIND_ADDR = "127.0.0.1:8000"
|
||||||
ALLOWED_HOSTS = "*"
|
# ALLOWED_HOSTS = "*"
|
||||||
DEBUG = false
|
# DEBUG = false
|
||||||
PUBLIC_INDEX = true
|
# PUBLIC_INDEX = true
|
||||||
PUBLIC_SNAPSHOTS = true
|
# PUBLIC_SNAPSHOTS = true
|
||||||
PUBLIC_ADD_VIEW = false
|
# PUBLIC_ADD_VIEW = false
|
||||||
FOOTER_INFO = "Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests."
|
# FOOTER_INFO = "Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests."
|
||||||
SNAPSHOTS_PER_PAGE = 40
|
# SNAPSHOTS_PER_PAGE = 40
|
||||||
CUSTOM_TEMPLATES_DIR = null
|
# CUSTOM_TEMPLATES_DIR = null
|
||||||
TIME_ZONE = "UTC"
|
# TIME_ZONE = "UTC"
|
||||||
TIMEZONE = "UTC"
|
# TIMEZONE = "UTC"
|
||||||
REVERSE_PROXY_USER_HEADER = "Remote-User"
|
# REVERSE_PROXY_USER_HEADER = "Remote-User"
|
||||||
REVERSE_PROXY_WHITELIST = ""
|
# REVERSE_PROXY_WHITELIST = ""
|
||||||
LOGOUT_REDIRECT_URL = "/"
|
# LOGOUT_REDIRECT_URL = "/"
|
||||||
PREVIEW_ORIGINALS = true
|
# PREVIEW_ORIGINALS = true
|
||||||
LDAP = false
|
# LDAP = false
|
||||||
LDAP_SERVER_URI = null
|
# LDAP_SERVER_URI = null
|
||||||
LDAP_BIND_DN = null
|
# LDAP_BIND_DN = null
|
||||||
LDAP_BIND_PASSWORD = null
|
# LDAP_BIND_PASSWORD = null
|
||||||
LDAP_USER_BASE = null
|
# LDAP_USER_BASE = null
|
||||||
LDAP_USER_FILTER = null
|
# LDAP_USER_FILTER = null
|
||||||
LDAP_USERNAME_ATTR = null
|
# LDAP_USERNAME_ATTR = null
|
||||||
LDAP_FIRSTNAME_ATTR = null
|
# LDAP_FIRSTNAME_ATTR = null
|
||||||
LDAP_LASTNAME_ATTR = null
|
# LDAP_LASTNAME_ATTR = null
|
||||||
LDAP_EMAIL_ATTR = null
|
# LDAP_EMAIL_ATTR = null
|
||||||
LDAP_CREATE_SUPERUSER = false
|
# LDAP_CREATE_SUPERUSER = false
|
||||||
SAVE_TITLE = true
|
# SAVE_TITLE = true
|
||||||
SAVE_FAVICON = true
|
# SAVE_FAVICON = true
|
||||||
SAVE_WGET = true
|
# SAVE_WGET = true
|
||||||
SAVE_WGET_REQUISITES = true
|
# SAVE_WGET_REQUISITES = true
|
||||||
SAVE_SINGLEFILE = true
|
# SAVE_SINGLEFILE = true
|
||||||
SAVE_READABILITY = true
|
# SAVE_READABILITY = true
|
||||||
SAVE_MERCURY = true
|
# SAVE_MERCURY = true
|
||||||
SAVE_HTMLTOTEXT = true
|
# SAVE_HTMLTOTEXT = true
|
||||||
SAVE_PDF = true
|
# SAVE_PDF = true
|
||||||
SAVE_SCREENSHOT = true
|
# SAVE_SCREENSHOT = true
|
||||||
SAVE_DOM = true
|
# SAVE_DOM = true
|
||||||
SAVE_HEADERS = true
|
# SAVE_HEADERS = true
|
||||||
SAVE_WARC = true
|
# SAVE_WARC = true
|
||||||
SAVE_GIT = true
|
# SAVE_GIT = true
|
||||||
SAVE_MEDIA = true
|
# SAVE_MEDIA = true
|
||||||
SAVE_ARCHIVE_DOT_ORG = true
|
# SAVE_ARCHIVE_DOT_ORG = true
|
||||||
RESOLUTION = [1440, 2000]
|
# RESOLUTION = [1440, 2000]
|
||||||
GIT_DOMAINS = "github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht"
|
# GIT_DOMAINS = "github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht"
|
||||||
CHECK_SSL_VALIDITY = true
|
# CHECK_SSL_VALIDITY = true
|
||||||
MEDIA_MAX_SIZE = "750m"
|
# MEDIA_MAX_SIZE = "750m"
|
||||||
USER_AGENT = null
|
# USER_AGENT = null
|
||||||
CURL_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)"
|
# CURL_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)"
|
||||||
WGET_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5"
|
# WGET_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5"
|
||||||
CHROME_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)"
|
# CHROME_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)"
|
||||||
COOKIES_FILE = null
|
# COOKIES_FILE = null
|
||||||
CHROME_USER_DATA_DIR = null
|
# CHROME_USER_DATA_DIR = null
|
||||||
CHROME_TIMEOUT = false
|
# CHROME_TIMEOUT = false
|
||||||
CHROME_HEADLESS = true
|
# CHROME_HEADLESS = true
|
||||||
CHROME_SANDBOX = true
|
# CHROME_SANDBOX = true
|
||||||
CHROME_EXTRA_ARGS = []
|
# CHROME_EXTRA_ARGS = []
|
||||||
YOUTUBEDL_ARGS = ["--restrict-filenames", "--trim-filenames", "128", "--write-description", "--write-info-json", "--write-annotations", "--write-thumbnail", "--no-call-home", "--write-sub", "--write-auto-subs", "--convert-subs=srt", "--yes-playlist", "--continue", "--no-abort-on-error", "--ignore-errors", "--geo-bypass", "--add-metadata", "--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)"]
|
# YOUTUBEDL_ARGS = ["--restrict-filenames", "--trim-filenames", "128", "--write-description", "--write-info-json", "--write-annotations", "--write-thumbnail", "--no-call-home", "--write-sub", "--write-auto-subs", "--convert-subs=srt", "--yes-playlist", "--continue", "--no-abort-on-error", "--ignore-errors", "--geo-bypass", "--add-metadata", "--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)"]
|
||||||
YOUTUBEDL_EXTRA_ARGS = []
|
# YOUTUBEDL_EXTRA_ARGS = []
|
||||||
WGET_ARGS = ["--no-verbose", "--adjust-extension", "--convert-links", "--force-directories", "--backup-converted", "--span-hosts", "--no-parent", "-e", "robots=off"]
|
# WGET_ARGS = ["--no-verbose", "--adjust-extension", "--convert-links", "--force-directories", "--backup-converted", "--span-hosts", "--no-parent", "-e", "robots=off"]
|
||||||
WGET_EXTRA_ARGS = []
|
# WGET_EXTRA_ARGS = []
|
||||||
CURL_ARGS = ["--silent", "--location", "--compressed"]
|
# CURL_ARGS = ["--silent", "--location", "--compressed"]
|
||||||
CURL_EXTRA_ARGS = []
|
# CURL_EXTRA_ARGS = []
|
||||||
GIT_ARGS = ["--recursive"]
|
# GIT_ARGS = ["--recursive"]
|
||||||
SINGLEFILE_ARGS = []
|
# SINGLEFILE_ARGS = []
|
||||||
SINGLEFILE_EXTRA_ARGS = []
|
# SINGLEFILE_EXTRA_ARGS = []
|
||||||
MERCURY_ARGS = ["--format=text"]
|
# MERCURY_ARGS = ["--format=text"]
|
||||||
MERCURY_EXTRA_ARGS = []
|
# MERCURY_EXTRA_ARGS = []
|
||||||
FAVICON_PROVIDER = "https://www.google.com/s2/favicons?domain={}"
|
# FAVICON_PROVIDER = "https://www.google.com/s2/favicons?domain={}"
|
||||||
USE_INDEXING_BACKEND = true
|
# USE_INDEXING_BACKEND = true
|
||||||
USE_SEARCHING_BACKEND = true
|
# USE_SEARCHING_BACKEND = true
|
||||||
SEARCH_BACKEND_ENGINE = "ripgrep"
|
# SEARCH_BACKEND_ENGINE = "ripgrep"
|
||||||
SEARCH_BACKEND_HOST_NAME = "localhost"
|
# SEARCH_BACKEND_HOST_NAME = "localhost"
|
||||||
SEARCH_BACKEND_PORT = 1491
|
# SEARCH_BACKEND_PORT = 1491
|
||||||
SEARCH_BACKEND_PASSWORD = "SecretPassword"
|
# SEARCH_BACKEND_PASSWORD = "SecretPassword"
|
||||||
SEARCH_PROCESS_HTML = true
|
# SEARCH_PROCESS_HTML = true
|
||||||
SONIC_COLLECTION = "archivebox"
|
# SONIC_COLLECTION = "archivebox"
|
||||||
SONIC_BUCKET = "snapshots"
|
# SONIC_BUCKET = "snapshots"
|
||||||
SEARCH_BACKEND_TIMEOUT = 90
|
# SEARCH_BACKEND_TIMEOUT = 90
|
||||||
FTS_SEPARATE_DATABASE = true
|
# FTS_SEPARATE_DATABASE = true
|
||||||
FTS_TOKENIZERS = "porter unicode61 remove_diacritics 2"
|
# FTS_TOKENIZERS = "porter unicode61 remove_diacritics 2"
|
||||||
FTS_SQLITE_MAX_LENGTH = 1000000000
|
# FTS_SQLITE_MAX_LENGTH = 1000000000
|
||||||
USE_CURL = true
|
# USE_CURL = true
|
||||||
USE_WGET = true
|
# USE_WGET = true
|
||||||
USE_SINGLEFILE = true
|
# USE_SINGLEFILE = true
|
||||||
USE_READABILITY = true
|
# USE_READABILITY = true
|
||||||
USE_MERCURY = true
|
# USE_MERCURY = true
|
||||||
USE_GIT = true
|
# USE_GIT = true
|
||||||
USE_CHROME = true
|
# USE_CHROME = true
|
||||||
USE_NODE = true
|
# USE_NODE = true
|
||||||
USE_YOUTUBEDL = true
|
# USE_YOUTUBEDL = true
|
||||||
USE_RIPGREP = true
|
# USE_RIPGREP = true
|
||||||
CURL_BINARY = "curl"
|
# CURL_BINARY = "curl"
|
||||||
GIT_BINARY = "git"
|
# GIT_BINARY = "git"
|
||||||
WGET_BINARY = "wget"
|
# WGET_BINARY = "wget"
|
||||||
SINGLEFILE_BINARY = "single-file"
|
# SINGLEFILE_BINARY = "single-file"
|
||||||
READABILITY_BINARY = "readability-extractor"
|
# READABILITY_BINARY = "readability-extractor"
|
||||||
MERCURY_BINARY = "postlight-parser"
|
# MERCURY_BINARY = "postlight-parser"
|
||||||
YOUTUBEDL_BINARY = "yt-dlp"
|
# YOUTUBEDL_BINARY = "yt-dlp"
|
||||||
NODE_BINARY = "node"
|
# NODE_BINARY = "node"
|
||||||
RIPGREP_BINARY = "rg"
|
# RIPGREP_BINARY = "rg"
|
||||||
CHROME_BINARY = "chrome"
|
# CHROME_BINARY = "chrome"
|
||||||
POCKET_CONSUMER_KEY = null
|
# POCKET_CONSUMER_KEY = null
|
||||||
USER = "squash"
|
# USER = "squash"
|
||||||
PACKAGE_DIR = "/opt/archivebox/archivebox"
|
# PACKAGE_DIR = "/opt/archivebox/archivebox"
|
||||||
TEMPLATES_DIR = "/opt/archivebox/archivebox/templates"
|
# TEMPLATES_DIR = "/opt/archivebox/archivebox/templates"
|
||||||
ARCHIVE_DIR = "/opt/archivebox/data/archive"
|
# ARCHIVE_DIR = "/opt/archivebox/data/archive"
|
||||||
SOURCES_DIR = "/opt/archivebox/data/sources"
|
# SOURCES_DIR = "/opt/archivebox/data/sources"
|
||||||
LOGS_DIR = "/opt/archivebox/data/logs"
|
# LOGS_DIR = "/opt/archivebox/data/logs"
|
||||||
PERSONAS_DIR = "/opt/archivebox/data/personas"
|
# PERSONAS_DIR = "/opt/archivebox/data/personas"
|
||||||
URL_DENYLIST_PTN = "re.compile(\'\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$\', re.IGNORECASE|re.MULTILINE)"
|
# URL_DENYLIST_PTN = "re.compile(\'\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$\', re.IGNORECASE|re.MULTILINE)"
|
||||||
URL_ALLOWLIST_PTN = null
|
# URL_ALLOWLIST_PTN = null
|
||||||
DIR_OUTPUT_PERMISSIONS = 755
|
# DIR_OUTPUT_PERMISSIONS = 755
|
||||||
ARCHIVEBOX_BINARY = "/opt/archivebox/.venv/bin/archivebox"
|
# ARCHIVEBOX_BINARY = "/opt/archivebox/.venv/bin/archivebox"
|
||||||
VERSION = "0.8.0"
|
# VERSION = "0.8.0"
|
||||||
COMMIT_HASH = "102e87578c6036bb0132dd1ebd17f8f05ffc880f"
|
# COMMIT_HASH = "102e87578c6036bb0132dd1ebd17f8f05ffc880f"
|
||||||
BUILD_TIME = "2024-05-15 03:28:05 1715768885"
|
# BUILD_TIME = "2024-05-15 03:28:05 1715768885"
|
||||||
VERSIONS_AVAILABLE = null
|
# VERSIONS_AVAILABLE = null
|
||||||
CAN_UPGRADE = false
|
# CAN_UPGRADE = false
|
||||||
PYTHON_BINARY = "/opt/archivebox/.venv/bin/python3.10"
|
# PYTHON_BINARY = "/opt/archivebox/.venv/bin/python3.10"
|
||||||
PYTHON_ENCODING = "UTF-8"
|
# PYTHON_ENCODING = "UTF-8"
|
||||||
PYTHON_VERSION = "3.10.14"
|
# PYTHON_VERSION = "3.10.14"
|
||||||
DJANGO_BINARY = "/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py"
|
# DJANGO_BINARY = "/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py"
|
||||||
DJANGO_VERSION = "5.0.6 final (0)"
|
# DJANGO_VERSION = "5.0.6 final (0)"
|
||||||
SQLITE_BINARY = "/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py"
|
# SQLITE_BINARY = "/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py"
|
||||||
SQLITE_VERSION = "2.6.0"
|
# SQLITE_VERSION = "2.6.0"
|
||||||
CURL_VERSION = "curl 8.4.0 (x86_64-apple-darwin23.0)"
|
# CURL_VERSION = "curl 8.4.0 (x86_64-apple-darwin23.0)"
|
||||||
WGET_VERSION = "GNU Wget 1.24.5"
|
# WGET_VERSION = "GNU Wget 1.24.5"
|
||||||
WGET_AUTO_COMPRESSION = true
|
# WGET_AUTO_COMPRESSION = true
|
||||||
RIPGREP_VERSION = "ripgrep 14.1.0"
|
# RIPGREP_VERSION = "ripgrep 14.1.0"
|
||||||
SINGLEFILE_VERSION = null
|
# SINGLEFILE_VERSION = null
|
||||||
READABILITY_VERSION = null
|
# READABILITY_VERSION = null
|
||||||
MERCURY_VERSION = null
|
# MERCURY_VERSION = null
|
||||||
GIT_VERSION = "git version 2.44.0"
|
# GIT_VERSION = "git version 2.44.0"
|
||||||
YOUTUBEDL_VERSION = "2024.04.09"
|
# YOUTUBEDL_VERSION = "2024.04.09"
|
||||||
CHROME_VERSION = "Google Chrome 124.0.6367.207"
|
# CHROME_VERSION = "Google Chrome 124.0.6367.207"
|
||||||
NODE_VERSION = "v21.7.3"'''
|
# NODE_VERSION = "v21.7.3"'''
|
||||||
|
|
||||||
|
|
||||||
first_output = convert(test_input) # make sure ini -> toml parses correctly
|
# first_output = convert(test_input) # make sure ini -> toml parses correctly
|
||||||
second_output = convert(first_output) # make sure toml -> toml parses/dumps consistently
|
# second_output = convert(first_output) # make sure toml -> toml parses/dumps consistently
|
||||||
assert first_output == second_output == expected_output # make sure parsing is indempotent
|
# assert first_output == second_output == expected_output # make sure parsing is indempotent
|
||||||
|
|
||||||
# # DEBUGGING
|
# # DEBUGGING
|
||||||
# import sys
|
# import sys
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
__package__ = 'archivebox.plugantic'
|
|
|
@ -1,336 +0,0 @@
|
||||||
import re
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import toml
|
|
||||||
import json
|
|
||||||
import platform
|
|
||||||
import inspect
|
|
||||||
import tomllib
|
|
||||||
|
|
||||||
from typing import Callable, Any, Optional, Pattern, Type, Tuple, Dict, List
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from pydantic import BaseModel, Field, FieldValidationInfo, AliasChoices, model_validator, FilePath, DirectoryPath, computed_field, TypeAdapter
|
|
||||||
from pydantic.fields import FieldInfo
|
|
||||||
|
|
||||||
from pydantic_settings import BaseSettings, SettingsConfigDict, PydanticBaseSettingsSource
|
|
||||||
from pydantic_settings.sources import InitSettingsSource, ConfigFileSourceMixin, TomlConfigSettingsSource
|
|
||||||
|
|
||||||
from pydantic.json_schema import GenerateJsonSchema
|
|
||||||
from pydantic_core import PydanticOmit, core_schema, to_jsonable_python, ValidationError
|
|
||||||
from pydantic.json_schema import GenerateJsonSchema, JsonSchemaValue
|
|
||||||
|
|
||||||
import ini_to_toml
|
|
||||||
|
|
||||||
|
|
||||||
class JSONSchemaWithLambdas(GenerateJsonSchema):
|
|
||||||
def encode_default(self, default: Any) -> Any:
|
|
||||||
"""Encode lambda functions in default values properly"""
|
|
||||||
config = self._config
|
|
||||||
if isinstance(default, Callable):
|
|
||||||
return '{{lambda ' + inspect.getsource(default).split('=lambda ')[-1].strip()[:-1] + '}}'
|
|
||||||
return to_jsonable_python(
|
|
||||||
default,
|
|
||||||
timedelta_mode=config.ser_json_timedelta,
|
|
||||||
bytes_mode=config.ser_json_bytes,
|
|
||||||
serialize_unknown=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# for computed_field properties render them like this instead:
|
|
||||||
# inspect.getsource(field.wrapped_property.fget).split('def ', 1)[-1].split('\n', 1)[-1].strip().strip('return '),
|
|
||||||
|
|
||||||
|
|
||||||
class ModelWithDefaults(BaseSettings):
|
|
||||||
model_config = SettingsConfigDict(validate_default=False, case_sensitive=False, extra='ignore')
|
|
||||||
|
|
||||||
@model_validator(mode='after')
|
|
||||||
def fill_defaults(self):
|
|
||||||
"""Populate any unset values using function provided as their default"""
|
|
||||||
for key, field in self.model_fields.items():
|
|
||||||
value = getattr(self, key)
|
|
||||||
if isinstance(value, Callable):
|
|
||||||
# if value is a function, execute it to get the actual value, passing CONFIG dict as an arg
|
|
||||||
config_so_far = self.dict(exclude_unset=True)
|
|
||||||
fallback_value = field.default(config_so_far)
|
|
||||||
|
|
||||||
# check to make sure default factory return value matches type annotation
|
|
||||||
TypeAdapter(field.annotation).validate_python(fallback_value)
|
|
||||||
|
|
||||||
# set generated default value as final validated value
|
|
||||||
setattr(self, key, fallback_value)
|
|
||||||
return self
|
|
||||||
|
|
||||||
def as_json(self, model_fields=True, computed_fields=True):
|
|
||||||
output_dict = {}
|
|
||||||
for section in self.__class__.__mro__[1:]:
|
|
||||||
if not section.__name__.isupper():
|
|
||||||
break
|
|
||||||
output_dict[section.__name__] = output_dict.get(section.__name__) or {}
|
|
||||||
include = {}
|
|
||||||
if model_fields:
|
|
||||||
include.update(**section.model_fields)
|
|
||||||
if computed_fields:
|
|
||||||
include.update(**section.model_computed_fields)
|
|
||||||
output_dict[section.__name__].update(json.loads(section.json(self, include=include)))
|
|
||||||
return output_dict
|
|
||||||
|
|
||||||
def as_toml(self, model_fields=True, computed_fields=True):
|
|
||||||
output_text = ''
|
|
||||||
for section in self.__class__.__mro__[1:]:
|
|
||||||
if not section.__name__.isupper():
|
|
||||||
break
|
|
||||||
include = {}
|
|
||||||
if model_fields:
|
|
||||||
include.update(**section.model_fields)
|
|
||||||
if computed_fields:
|
|
||||||
include.update(**section.model_computed_fields)
|
|
||||||
|
|
||||||
output_text += (
|
|
||||||
f'[{section.__name__}]\n' +
|
|
||||||
toml.dumps(json.loads(section.json(self, include=include))) + '\n'
|
|
||||||
)
|
|
||||||
return output_text
|
|
||||||
|
|
||||||
def as_legacy_schema(self, model_fields=True, computed_fields=True):
|
|
||||||
"""Convert a newer Pydantic Settings BaseModel into the old-style archivebox.config CONFIG_SCHEMA format"""
|
|
||||||
|
|
||||||
schemas = {}
|
|
||||||
|
|
||||||
include = {}
|
|
||||||
if model_fields:
|
|
||||||
include.update(**self.model_fields)
|
|
||||||
if computed_fields:
|
|
||||||
include.update(**self.model_computed_fields)
|
|
||||||
|
|
||||||
for key, field in include.items():
|
|
||||||
key = key.upper()
|
|
||||||
defining_class = None
|
|
||||||
for cls in self.__class__.__mro__[1:]:
|
|
||||||
if key in cls.model_fields or key in cls.model_computed_fields:
|
|
||||||
defining_class = cls
|
|
||||||
break
|
|
||||||
|
|
||||||
assert defining_class is not None, f"No defining class found for field {key}! (should be impossible)"
|
|
||||||
|
|
||||||
schemas[defining_class.__name__] = schemas.get(defining_class.__name__) or {}
|
|
||||||
schemas[defining_class.__name__][key] = {
|
|
||||||
'value': getattr(self, key),
|
|
||||||
'type': str(field.annotation.__name__).lower() if hasattr(field, 'annotation') else str(field.return_type).lower(),
|
|
||||||
'default': field.default if hasattr(field, 'default') else field.wrapped_property.fget,
|
|
||||||
'aliases': (getattr(field.json_schema_extra.get('aliases', {}), 'choices') or []) if getattr(field, 'json_schema_extra') else [],
|
|
||||||
}
|
|
||||||
|
|
||||||
return schemas
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def settings_customise_sources(
|
|
||||||
cls,
|
|
||||||
settings_cls: Type[BaseSettings],
|
|
||||||
init_settings: PydanticBaseSettingsSource,
|
|
||||||
env_settings: PydanticBaseSettingsSource,
|
|
||||||
dotenv_settings: PydanticBaseSettingsSource,
|
|
||||||
file_secret_settings: PydanticBaseSettingsSource,
|
|
||||||
) -> Tuple[PydanticBaseSettingsSource, ...]:
|
|
||||||
ARCHIVEBOX_CONFIG_FILE = Path('/Users/squash/Local/Code/archiveboxes/ArchiveBox/data/ArchiveBox.conf')
|
|
||||||
ARCHIVEBOX_CONFIG_FILE_TOML = ARCHIVEBOX_CONFIG_FILE.parent / f'.ArchiveBox.toml'
|
|
||||||
try:
|
|
||||||
return (
|
|
||||||
init_settings,
|
|
||||||
env_settings,
|
|
||||||
TomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
|
|
||||||
)
|
|
||||||
except tomllib.TOMLDecodeError:
|
|
||||||
toml_str = ini_to_toml.convert(ARCHIVEBOX_CONFIG_FILE.read_text())
|
|
||||||
with open(ARCHIVEBOX_CONFIG_FILE_TOML, 'w+') as f:
|
|
||||||
f.write(toml_str)
|
|
||||||
|
|
||||||
return (
|
|
||||||
init_settings,
|
|
||||||
env_settings,
|
|
||||||
TomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE_TOML),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class SHELL_CONFIG(ModelWithDefaults):
|
|
||||||
IS_TTY: bool = Field(default=lambda c: sys.stdout.isatty())
|
|
||||||
USE_COLOR: bool = Field(default=lambda c: c['IS_TTY'])
|
|
||||||
SHOW_PROGRESS: bool = Field(default=lambda c: c['IS_TTY'] and (platform.system() != 'Darwin'))
|
|
||||||
|
|
||||||
IN_DOCKER: bool = Field(default=False)
|
|
||||||
IN_QEMU: bool = Field(default=False)
|
|
||||||
PUID: int = Field(default=lambda c: os.getuid())
|
|
||||||
PGID: int = Field(default=lambda c: os.getgid())
|
|
||||||
|
|
||||||
|
|
||||||
class GENERAL_CONFIG(ModelWithDefaults):
|
|
||||||
# OUTPUT_DIR: DirectoryPath
|
|
||||||
CONFIG_FILE: FilePath = Field(default=lambda c: c['OUTPUT_DIR'] / 'ArchiveBox.conf')
|
|
||||||
|
|
||||||
ONLY_NEW: bool = Field(default=True)
|
|
||||||
TIMEOUT: int = Field(default=60)
|
|
||||||
MEDIA_TIMEOUT: int = Field(default=3600)
|
|
||||||
|
|
||||||
ENFORCE_ATOMIC_WRITES: bool = Field(default=True)
|
|
||||||
OUTPUT_PERMISSIONS: str = Field(default='644')
|
|
||||||
RESTRICT_FILE_NAMES: str = Field(default='windows')
|
|
||||||
|
|
||||||
URL_DENYLIST: Pattern = Field(default=re.compile(r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$'), aliases=AliasChoices('URL_BLACKLIST'))
|
|
||||||
URL_ALLOWLIST: Pattern = Field(default=re.compile(r''), aliases=AliasChoices('URL_WHITELIST'))
|
|
||||||
|
|
||||||
ADMIN_USERNAME: Optional[str] = Field(default=None, min_length=1, max_length=63, pattern=r'^[\S]+$')
|
|
||||||
ADMIN_PASSWORD: Optional[str] = Field(default=None, min_length=1, max_length=63)
|
|
||||||
|
|
||||||
TAG_SEPARATOR_PATTERN: Pattern = Field(default=re.compile(r'[,]'))
|
|
||||||
|
|
||||||
@computed_field
|
|
||||||
@property
|
|
||||||
def OUTPUT_DIR(self) -> DirectoryPath:
|
|
||||||
return Path('.').resolve()
|
|
||||||
|
|
||||||
# class PackageInstalled(ModelWithDefaults):
|
|
||||||
# binary_abs: HostBinPathStr
|
|
||||||
# version_str: str
|
|
||||||
# is_valid: True
|
|
||||||
# provider: PackageProvider
|
|
||||||
# date_installed: datetime
|
|
||||||
# date_checked: datetime
|
|
||||||
|
|
||||||
class EntrypointConfig(ModelWithDefaults):
|
|
||||||
name: str
|
|
||||||
dependency: str
|
|
||||||
runtime: Literal['python.eval', 'node.eval', 'puppeteer', 'shell.run', 'ansible']
|
|
||||||
CMD: str
|
|
||||||
DEFAULT_ARGS: List[str]
|
|
||||||
EXTRA_ARGS: List[str]
|
|
||||||
ARGS: List[str]
|
|
||||||
SCHEMA: EntrypointSchema
|
|
||||||
validator: Callable = eval
|
|
||||||
|
|
||||||
class VersionEntrypointConfig(ModelWithDefaults):
|
|
||||||
DEFAULT_ARGS = ['--version']
|
|
||||||
|
|
||||||
class PackageProvider(ModelWithDefaults):
|
|
||||||
name: Literal['config', 'PATH', 'pip', 'apt', 'brew', 'npm', 'vendor']
|
|
||||||
|
|
||||||
def install_bin(self, name):
|
|
||||||
# ...
|
|
||||||
return PackageInstall
|
|
||||||
|
|
||||||
def get_bin_path(self, name, install=True):
|
|
||||||
return shell(['which', name])
|
|
||||||
|
|
||||||
class DependencyConfig(ModelWithDefaults):
|
|
||||||
providers: List[Literal['config', 'PATH', 'pip', 'apt', 'brew', 'npm', 'vendor'], ...]
|
|
||||||
name: str
|
|
||||||
packages: List[str]
|
|
||||||
entrypoints: Dict[str, EntrypointConfig]
|
|
||||||
version_cmd: EntrypointConfig = field(default=lambda c: )
|
|
||||||
|
|
||||||
class ExtractorConfig(ModelWithDefaults):
|
|
||||||
name: str
|
|
||||||
description: str = Field(examples=['WGET Extractor'])
|
|
||||||
depends_on: DepencencyConfig
|
|
||||||
entrypoint: EntrypointConfig = Field(description='Which entrypoint to use for this extractor')
|
|
||||||
|
|
||||||
class ReplayerConfig(ModelWithDefaults):
|
|
||||||
"""Describes how to render an ArchiveResult in several contexts"""
|
|
||||||
name: str
|
|
||||||
row_template: 'plugins/wget/templates/row.html'
|
|
||||||
embed_template: 'plugins/wget/templates/embed.html'
|
|
||||||
fullpage_template: 'plugins/wget/templates/fullpage.html'
|
|
||||||
|
|
||||||
icon_view: ImportString 'plugins.wget.replayers.wget.get_icon'
|
|
||||||
thumbnail_getter: ImportString = 'plugins.wget.replayers.wget.get_icon'
|
|
||||||
|
|
||||||
class PluginConfig(ModelWithDefaults):
|
|
||||||
dependencies: Dict[str, DependencyConfig]
|
|
||||||
entrypoints: Dict[str, EntrypointConfig]
|
|
||||||
extractors: Dict[str, ExtractorConfig]
|
|
||||||
replayers: Dict[str, ReplayerConfig]
|
|
||||||
|
|
||||||
name: str
|
|
||||||
|
|
||||||
BINARY:
|
|
||||||
PROVIDERS: List[, ...]
|
|
||||||
|
|
||||||
ENTRYPOINTS: Dict[str, EntrypointConfig]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
WGET_BINARY: HostBinName = Field(default='wget')
|
|
||||||
|
|
||||||
@computed_field
|
|
||||||
@property
|
|
||||||
def WGET_PROVIDERS(self) -> List[Provider]:
|
|
||||||
|
|
||||||
class WGET_DEPENDENCY_CONFIG(DEPENDENCY_CONFIG):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class WGET_CONFIG(ModelWithDefaults):
|
|
||||||
EXTRACTORS: List[EXTRACTORS] = EXTRACTOR_CONFIG('')
|
|
||||||
DEPDENCIES: List[DEPENDENCY_CONFIG] = [DEPENDENCY_CONFIG]
|
|
||||||
|
|
||||||
class WgetConfiguration(SingletonModel):
|
|
||||||
singleton_instance_id = 1
|
|
||||||
|
|
||||||
dependency_config: WGET_CONFIG = SchemaField()
|
|
||||||
extractor_config: WGET_CONFIG = SchemaField()
|
|
||||||
replay_config: WGET_CONFIG = SchemaField()
|
|
||||||
pkg_config: WGET_CONFIG = SchemaField()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class WGET_CONFIG(ModelWithDefaults):
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# class ConfigSet(models.Model):
|
|
||||||
# # scope = when should this config set be active
|
|
||||||
# # host: on a specific host running archivebox
|
|
||||||
# #
|
|
||||||
# # snapshot__added: on or during a specific timeperiod
|
|
||||||
# # user: for actions initiated by a specific archivebox user
|
|
||||||
# # extractor: for specific extractors running under a snapshot
|
|
||||||
# # snapshot_id: for a specific snapshot pk
|
|
||||||
# # snapshot__url: for a specific snapshot url
|
|
||||||
# scope = models.CharField(choices=('host', 'date', 'user', 'extractor', 'domain', 'url', 'custom'))
|
|
||||||
# lookup = models.CharField(choices=('__eq', '__icontains', '__gte', '__lt', '__startswith', '__endswith', '__in', '__isnull'))
|
|
||||||
# match = models.CharField(max_length=128)
|
|
||||||
|
|
||||||
# config = models.JSONField(default={}, schema=Dict[str, JSONValue])
|
|
||||||
# getter = models.ImportString(default='django.utils.model_loading.import_string')
|
|
||||||
|
|
||||||
# label = models.CharField(max_length=64)
|
|
||||||
# created_by = models.ForeignKey(User, on_delete=models.CASCADE)
|
|
||||||
# config = JSONField(schema=Dict[str, JSONValue])
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
CONFIG_SECTIONS = (GENERAL_CONFIG, SHELL_CONFIG)
|
|
||||||
|
|
||||||
class USER_CONFIG(*CONFIG_SECTIONS):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
# print(ShellConfig(**{'IS_TTY': False, 'PGID': 911}).model_dump())
|
|
||||||
# print(json.dumps(SHELL_CONFIG.model_json_schema(schema_generator=JSONSchemaWithLambdas), indent=4))
|
|
||||||
# print(json.dumps(GENERAL_CONFIG.model_json_schema(schema_generator=JSONSchemaWithLambdas), indent=4))
|
|
||||||
print()
|
|
||||||
# os.environ['PGID'] = '422'
|
|
||||||
os.environ['URL_ALLOWLIST'] = r'worked!!!!!\\.com'
|
|
||||||
config = USER_CONFIG(**{'SHOW_PROGRESS': False, 'ADMIN_USERNAME': 'kip', 'PGID': 911})
|
|
||||||
|
|
||||||
print('==========archivebox.config.CONFIG_SCHEMA======================')
|
|
||||||
print(json.dumps(config.as_legacy_schema(), indent=4, default=str))
|
|
||||||
|
|
||||||
print('==========JSON=================================================')
|
|
||||||
# print(config.__class__.__name__, '=', config.model_dump_json(indent=4))
|
|
||||||
print(json.dumps(config.as_json(), indent=4))
|
|
||||||
|
|
||||||
print('==========TOML=================================================')
|
|
||||||
print(config.as_toml())
|
|
||||||
|
|
||||||
|
|
|
@ -1,3 +0,0 @@
|
||||||
from django.test import TestCase
|
|
||||||
|
|
||||||
# Create your tests here.
|
|
Loading…
Add table
Add a link
Reference in a new issue