diff --git a/archivebox/plugantic/ansible_utils.py b/archivebox/plugantic/_ansible_unused/ansible_utils.py similarity index 100% rename from archivebox/plugantic/ansible_utils.py rename to archivebox/plugantic/_ansible_unused/ansible_utils.py diff --git a/archivebox/plugantic/ansible/install_puppeteer.yml b/archivebox/plugantic/_ansible_unused/install_puppeteer.yml similarity index 100% rename from archivebox/plugantic/ansible/install_puppeteer.yml rename to archivebox/plugantic/_ansible_unused/install_puppeteer.yml diff --git a/archivebox/plugantic/ansible/install_singlefile.yml b/archivebox/plugantic/_ansible_unused/install_singlefile.yml similarity index 100% rename from archivebox/plugantic/ansible/install_singlefile.yml rename to archivebox/plugantic/_ansible_unused/install_singlefile.yml diff --git a/archivebox/plugantic/ansible/install_ytdlp.yml b/archivebox/plugantic/_ansible_unused/install_ytdlp.yml similarity index 100% rename from archivebox/plugantic/ansible/install_ytdlp.yml rename to archivebox/plugantic/_ansible_unused/install_ytdlp.yml diff --git a/archivebox/plugantic/ansible/roles/load_binary/meta/argument_specs.yml b/archivebox/plugantic/_ansible_unused/roles/load_binary/meta/argument_specs.yml similarity index 100% rename from archivebox/plugantic/ansible/roles/load_binary/meta/argument_specs.yml rename to archivebox/plugantic/_ansible_unused/roles/load_binary/meta/argument_specs.yml diff --git a/archivebox/plugantic/ansible/roles/load_binary/tasks/main.yml b/archivebox/plugantic/_ansible_unused/roles/load_binary/tasks/main.yml similarity index 100% rename from archivebox/plugantic/ansible/roles/load_binary/tasks/main.yml rename to archivebox/plugantic/_ansible_unused/roles/load_binary/tasks/main.yml diff --git a/archivebox/plugantic/ansible/roles/load_binary/vars/main.yml b/archivebox/plugantic/_ansible_unused/roles/load_binary/vars/main.yml similarity index 100% rename from archivebox/plugantic/ansible/roles/load_binary/vars/main.yml rename to archivebox/plugantic/_ansible_unused/roles/load_binary/vars/main.yml diff --git a/archivebox/plugantic/ansible/roles/setup_lib_npm/meta/argument_specs.yml b/archivebox/plugantic/_ansible_unused/roles/setup_lib_npm/meta/argument_specs.yml similarity index 100% rename from archivebox/plugantic/ansible/roles/setup_lib_npm/meta/argument_specs.yml rename to archivebox/plugantic/_ansible_unused/roles/setup_lib_npm/meta/argument_specs.yml diff --git a/archivebox/plugantic/ansible/roles/setup_lib_npm/tasks/main.yml b/archivebox/plugantic/_ansible_unused/roles/setup_lib_npm/tasks/main.yml similarity index 100% rename from archivebox/plugantic/ansible/roles/setup_lib_npm/tasks/main.yml rename to archivebox/plugantic/_ansible_unused/roles/setup_lib_npm/tasks/main.yml diff --git a/archivebox/plugantic/ansible/roles/setup_lib_npm/vars/main.yml b/archivebox/plugantic/_ansible_unused/roles/setup_lib_npm/vars/main.yml similarity index 100% rename from archivebox/plugantic/ansible/roles/setup_lib_npm/vars/main.yml rename to archivebox/plugantic/_ansible_unused/roles/setup_lib_npm/vars/main.yml diff --git a/archivebox/plugantic/ansible/roles/setup_lib_pip/meta/argument_specs.yml b/archivebox/plugantic/_ansible_unused/roles/setup_lib_pip/meta/argument_specs.yml similarity index 100% rename from archivebox/plugantic/ansible/roles/setup_lib_pip/meta/argument_specs.yml rename to archivebox/plugantic/_ansible_unused/roles/setup_lib_pip/meta/argument_specs.yml diff --git a/archivebox/plugantic/ansible/roles/setup_lib_pip/tasks/main.yml b/archivebox/plugantic/_ansible_unused/roles/setup_lib_pip/tasks/main.yml similarity index 100% rename from archivebox/plugantic/ansible/roles/setup_lib_pip/tasks/main.yml rename to archivebox/plugantic/_ansible_unused/roles/setup_lib_pip/tasks/main.yml diff --git a/archivebox/plugantic/ansible/roles/setup_lib_pip/vars/main.yml b/archivebox/plugantic/_ansible_unused/roles/setup_lib_pip/vars/main.yml similarity index 100% rename from archivebox/plugantic/ansible/roles/setup_lib_pip/vars/main.yml rename to archivebox/plugantic/_ansible_unused/roles/setup_lib_pip/vars/main.yml diff --git a/archivebox/plugantic/admin.py b/archivebox/plugantic/admin.py deleted file mode 100644 index 832a820d..00000000 --- a/archivebox/plugantic/admin.py +++ /dev/null @@ -1,26 +0,0 @@ -# from django.contrib import admin -# from django import forms - -# from django_jsonform.widgets import JSONFormWidget - -# from django_pydantic_field.v2.fields import PydanticSchemaField - -# from .models import CustomPlugin - - -# class PluginForm(forms.ModelForm): -# class Meta: -# model = CustomPlugin -# fields = '__all__' -# widgets = { -# 'items': JSONFormWidget(schema=PluginSchema), -# } - - -# class PluginAdmin(admin.ModelAdmin): -# formfield_overrides = { -# PydanticSchemaField: {"widget": JSONFormWidget}, -# } -# form = PluginForm - - diff --git a/archivebox/plugantic/ini_to_toml.py b/archivebox/plugantic/ini_to_toml.py index fed31992..415b99aa 100644 --- a/archivebox/plugantic/ini_to_toml.py +++ b/archivebox/plugantic/ini_to_toml.py @@ -1,8 +1,12 @@ -from typing import Dict, Any, List +from typing import Any, List, Callable -import configparser import json import ast +import inspect +import configparser + +from pydantic.json_schema import GenerateJsonSchema +from pydantic_core import to_jsonable_python JSONValue = str | bool | int | None | List['JSONValue'] @@ -63,333 +67,351 @@ def convert(ini_str: str) -> str: +class JSONSchemaWithLambdas(GenerateJsonSchema): + def encode_default(self, default: Any) -> Any: + """Encode lambda functions in default values properly""" + config = self._config + if isinstance(default, Callable): + return '{{lambda ' + inspect.getsource(default).split('=lambda ')[-1].strip()[:-1] + '}}' + return to_jsonable_python( + default, + timedelta_mode=config.ser_json_timedelta, + bytes_mode=config.ser_json_bytes, + serialize_unknown=True + ) + + # for computed_field properties render them like this instead: + # inspect.getsource(field.wrapped_property.fget).split('def ', 1)[-1].split('\n', 1)[-1].strip().strip('return '), + + + ### Basic Assertions -test_input = """ -[SERVER_CONFIG] -IS_TTY=False -USE_COLOR=False -SHOW_PROGRESS=False -IN_DOCKER=False -IN_QEMU=False -PUID=501 -PGID=20 -OUTPUT_DIR=/opt/archivebox/data -CONFIG_FILE=/opt/archivebox/data/ArchiveBox.conf -ONLY_NEW=True -TIMEOUT=60 -MEDIA_TIMEOUT=3600 -OUTPUT_PERMISSIONS=644 -RESTRICT_FILE_NAMES=windows -URL_DENYLIST=\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$ -URL_ALLOWLIST=None -ADMIN_USERNAME=None -ADMIN_PASSWORD=None -ENFORCE_ATOMIC_WRITES=True -TAG_SEPARATOR_PATTERN=[,] -SECRET_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx -BIND_ADDR=127.0.0.1:8000 -ALLOWED_HOSTS=* -DEBUG=False -PUBLIC_INDEX=True -PUBLIC_SNAPSHOTS=True -PUBLIC_ADD_VIEW=False -FOOTER_INFO=Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests. -SNAPSHOTS_PER_PAGE=40 -CUSTOM_TEMPLATES_DIR=None -TIME_ZONE=UTC -TIMEZONE=UTC -REVERSE_PROXY_USER_HEADER=Remote-User -REVERSE_PROXY_WHITELIST= -LOGOUT_REDIRECT_URL=/ -PREVIEW_ORIGINALS=True -LDAP=False -LDAP_SERVER_URI=None -LDAP_BIND_DN=None -LDAP_BIND_PASSWORD=None -LDAP_USER_BASE=None -LDAP_USER_FILTER=None -LDAP_USERNAME_ATTR=None -LDAP_FIRSTNAME_ATTR=None -LDAP_LASTNAME_ATTR=None -LDAP_EMAIL_ATTR=None -LDAP_CREATE_SUPERUSER=False -SAVE_TITLE=True -SAVE_FAVICON=True -SAVE_WGET=True -SAVE_WGET_REQUISITES=True -SAVE_SINGLEFILE=True -SAVE_READABILITY=True -SAVE_MERCURY=True -SAVE_HTMLTOTEXT=True -SAVE_PDF=True -SAVE_SCREENSHOT=True -SAVE_DOM=True -SAVE_HEADERS=True -SAVE_WARC=True -SAVE_GIT=True -SAVE_MEDIA=True -SAVE_ARCHIVE_DOT_ORG=True -RESOLUTION=1440,2000 -GIT_DOMAINS=github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht -CHECK_SSL_VALIDITY=True -MEDIA_MAX_SIZE=750m -USER_AGENT=None -CURL_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0) -WGET_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5 -CHROME_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) -COOKIES_FILE=None -CHROME_USER_DATA_DIR=None -CHROME_TIMEOUT=0 -CHROME_HEADLESS=True -CHROME_SANDBOX=True -CHROME_EXTRA_ARGS=[] -YOUTUBEDL_ARGS=['--restrict-filenames', '--trim-filenames', '128', '--write-description', '--write-info-json', '--write-annotations', '--write-thumbnail', '--no-call-home', '--write-sub', '--write-auto-subs', '--convert-subs=srt', '--yes-playlist', '--continue', '--no-abort-on-error', '--ignore-errors', '--geo-bypass', '--add-metadata', '--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)'] -YOUTUBEDL_EXTRA_ARGS=[] -WGET_ARGS=['--no-verbose', '--adjust-extension', '--convert-links', '--force-directories', '--backup-converted', '--span-hosts', '--no-parent', '-e', 'robots=off'] -WGET_EXTRA_ARGS=[] -CURL_ARGS=['--silent', '--location', '--compressed'] -CURL_EXTRA_ARGS=[] -GIT_ARGS=['--recursive'] -SINGLEFILE_ARGS=[] -SINGLEFILE_EXTRA_ARGS=[] -MERCURY_ARGS=['--format=text'] -MERCURY_EXTRA_ARGS=[] -FAVICON_PROVIDER=https://www.google.com/s2/favicons?domain={} -USE_INDEXING_BACKEND=True -USE_SEARCHING_BACKEND=True -SEARCH_BACKEND_ENGINE=ripgrep -SEARCH_BACKEND_HOST_NAME=localhost -SEARCH_BACKEND_PORT=1491 -SEARCH_BACKEND_PASSWORD=SecretPassword -SEARCH_PROCESS_HTML=True -SONIC_COLLECTION=archivebox -SONIC_BUCKET=snapshots -SEARCH_BACKEND_TIMEOUT=90 -FTS_SEPARATE_DATABASE=True -FTS_TOKENIZERS=porter unicode61 remove_diacritics 2 -FTS_SQLITE_MAX_LENGTH=1000000000 -USE_CURL=True -USE_WGET=True -USE_SINGLEFILE=True -USE_READABILITY=True -USE_MERCURY=True -USE_GIT=True -USE_CHROME=True -USE_NODE=True -USE_YOUTUBEDL=True -USE_RIPGREP=True -CURL_BINARY=curl -GIT_BINARY=git -WGET_BINARY=wget -SINGLEFILE_BINARY=single-file -READABILITY_BINARY=readability-extractor -MERCURY_BINARY=postlight-parser -YOUTUBEDL_BINARY=yt-dlp -NODE_BINARY=node -RIPGREP_BINARY=rg -CHROME_BINARY=chrome -POCKET_CONSUMER_KEY=None -USER=squash -PACKAGE_DIR=/opt/archivebox/archivebox -TEMPLATES_DIR=/opt/archivebox/archivebox/templates -ARCHIVE_DIR=/opt/archivebox/data/archive -SOURCES_DIR=/opt/archivebox/data/sources -LOGS_DIR=/opt/archivebox/data/logs -PERSONAS_DIR=/opt/archivebox/data/personas -URL_DENYLIST_PTN=re.compile('\\.(css|js|otf|ttf|woff|woff2|gstatic\\.com|googleapis\\.com/css)(\\?.*)?$', re.IGNORECASE|re.MULTILINE) -URL_ALLOWLIST_PTN=None -DIR_OUTPUT_PERMISSIONS=755 -ARCHIVEBOX_BINARY=/opt/archivebox/.venv/bin/archivebox -VERSION=0.8.0 -COMMIT_HASH=102e87578c6036bb0132dd1ebd17f8f05ffc880f -BUILD_TIME=2024-05-15 03:28:05 1715768885 -VERSIONS_AVAILABLE=None -CAN_UPGRADE=False -PYTHON_BINARY=/opt/archivebox/.venv/bin/python3.10 -PYTHON_ENCODING=UTF-8 -PYTHON_VERSION=3.10.14 -DJANGO_BINARY=/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py -DJANGO_VERSION=5.0.6 final (0) -SQLITE_BINARY=/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py -SQLITE_VERSION=2.6.0 -CURL_VERSION=curl 8.4.0 (x86_64-apple-darwin23.0) -WGET_VERSION=GNU Wget 1.24.5 -WGET_AUTO_COMPRESSION=True -RIPGREP_VERSION=ripgrep 14.1.0 -SINGLEFILE_VERSION=None -READABILITY_VERSION=None -MERCURY_VERSION=None -GIT_VERSION=git version 2.44.0 -YOUTUBEDL_VERSION=2024.04.09 -CHROME_VERSION=Google Chrome 124.0.6367.207 -NODE_VERSION=v21.7.3 -""" +# test_input = """ +# [SERVER_CONFIG] +# IS_TTY=False +# USE_COLOR=False +# SHOW_PROGRESS=False +# IN_DOCKER=False +# IN_QEMU=False +# PUID=501 +# PGID=20 +# OUTPUT_DIR=/opt/archivebox/data +# CONFIG_FILE=/opt/archivebox/data/ArchiveBox.conf +# ONLY_NEW=True +# TIMEOUT=60 +# MEDIA_TIMEOUT=3600 +# OUTPUT_PERMISSIONS=644 +# RESTRICT_FILE_NAMES=windows +# URL_DENYLIST=\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$ +# URL_ALLOWLIST=None +# ADMIN_USERNAME=None +# ADMIN_PASSWORD=None +# ENFORCE_ATOMIC_WRITES=True +# TAG_SEPARATOR_PATTERN=[,] +# SECRET_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +# BIND_ADDR=127.0.0.1:8000 +# ALLOWED_HOSTS=* +# DEBUG=False +# PUBLIC_INDEX=True +# PUBLIC_SNAPSHOTS=True +# PUBLIC_ADD_VIEW=False +# FOOTER_INFO=Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests. +# SNAPSHOTS_PER_PAGE=40 +# CUSTOM_TEMPLATES_DIR=None +# TIME_ZONE=UTC +# TIMEZONE=UTC +# REVERSE_PROXY_USER_HEADER=Remote-User +# REVERSE_PROXY_WHITELIST= +# LOGOUT_REDIRECT_URL=/ +# PREVIEW_ORIGINALS=True +# LDAP=False +# LDAP_SERVER_URI=None +# LDAP_BIND_DN=None +# LDAP_BIND_PASSWORD=None +# LDAP_USER_BASE=None +# LDAP_USER_FILTER=None +# LDAP_USERNAME_ATTR=None +# LDAP_FIRSTNAME_ATTR=None +# LDAP_LASTNAME_ATTR=None +# LDAP_EMAIL_ATTR=None +# LDAP_CREATE_SUPERUSER=False +# SAVE_TITLE=True +# SAVE_FAVICON=True +# SAVE_WGET=True +# SAVE_WGET_REQUISITES=True +# SAVE_SINGLEFILE=True +# SAVE_READABILITY=True +# SAVE_MERCURY=True +# SAVE_HTMLTOTEXT=True +# SAVE_PDF=True +# SAVE_SCREENSHOT=True +# SAVE_DOM=True +# SAVE_HEADERS=True +# SAVE_WARC=True +# SAVE_GIT=True +# SAVE_MEDIA=True +# SAVE_ARCHIVE_DOT_ORG=True +# RESOLUTION=1440,2000 +# GIT_DOMAINS=github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht +# CHECK_SSL_VALIDITY=True +# MEDIA_MAX_SIZE=750m +# USER_AGENT=None +# CURL_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0) +# WGET_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5 +# CHROME_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) +# COOKIES_FILE=None +# CHROME_USER_DATA_DIR=None +# CHROME_TIMEOUT=0 +# CHROME_HEADLESS=True +# CHROME_SANDBOX=True +# CHROME_EXTRA_ARGS=[] +# YOUTUBEDL_ARGS=['--restrict-filenames', '--trim-filenames', '128', '--write-description', '--write-info-json', '--write-annotations', '--write-thumbnail', '--no-call-home', '--write-sub', '--write-auto-subs', '--convert-subs=srt', '--yes-playlist', '--continue', '--no-abort-on-error', '--ignore-errors', '--geo-bypass', '--add-metadata', '--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)'] +# YOUTUBEDL_EXTRA_ARGS=[] +# WGET_ARGS=['--no-verbose', '--adjust-extension', '--convert-links', '--force-directories', '--backup-converted', '--span-hosts', '--no-parent', '-e', 'robots=off'] +# WGET_EXTRA_ARGS=[] +# CURL_ARGS=['--silent', '--location', '--compressed'] +# CURL_EXTRA_ARGS=[] +# GIT_ARGS=['--recursive'] +# SINGLEFILE_ARGS=[] +# SINGLEFILE_EXTRA_ARGS=[] +# MERCURY_ARGS=['--format=text'] +# MERCURY_EXTRA_ARGS=[] +# FAVICON_PROVIDER=https://www.google.com/s2/favicons?domain={} +# USE_INDEXING_BACKEND=True +# USE_SEARCHING_BACKEND=True +# SEARCH_BACKEND_ENGINE=ripgrep +# SEARCH_BACKEND_HOST_NAME=localhost +# SEARCH_BACKEND_PORT=1491 +# SEARCH_BACKEND_PASSWORD=SecretPassword +# SEARCH_PROCESS_HTML=True +# SONIC_COLLECTION=archivebox +# SONIC_BUCKET=snapshots +# SEARCH_BACKEND_TIMEOUT=90 +# FTS_SEPARATE_DATABASE=True +# FTS_TOKENIZERS=porter unicode61 remove_diacritics 2 +# FTS_SQLITE_MAX_LENGTH=1000000000 +# USE_CURL=True +# USE_WGET=True +# USE_SINGLEFILE=True +# USE_READABILITY=True +# USE_MERCURY=True +# USE_GIT=True +# USE_CHROME=True +# USE_NODE=True +# USE_YOUTUBEDL=True +# USE_RIPGREP=True +# CURL_BINARY=curl +# GIT_BINARY=git +# WGET_BINARY=wget +# SINGLEFILE_BINARY=single-file +# READABILITY_BINARY=readability-extractor +# MERCURY_BINARY=postlight-parser +# YOUTUBEDL_BINARY=yt-dlp +# NODE_BINARY=node +# RIPGREP_BINARY=rg +# CHROME_BINARY=chrome +# POCKET_CONSUMER_KEY=None +# USER=squash +# PACKAGE_DIR=/opt/archivebox/archivebox +# TEMPLATES_DIR=/opt/archivebox/archivebox/templates +# ARCHIVE_DIR=/opt/archivebox/data/archive +# SOURCES_DIR=/opt/archivebox/data/sources +# LOGS_DIR=/opt/archivebox/data/logs +# PERSONAS_DIR=/opt/archivebox/data/personas +# URL_DENYLIST_PTN=re.compile('\\.(css|js|otf|ttf|woff|woff2|gstatic\\.com|googleapis\\.com/css)(\\?.*)?$', re.IGNORECASE|re.MULTILINE) +# URL_ALLOWLIST_PTN=None +# DIR_OUTPUT_PERMISSIONS=755 +# ARCHIVEBOX_BINARY=/opt/archivebox/.venv/bin/archivebox +# VERSION=0.8.0 +# COMMIT_HASH=102e87578c6036bb0132dd1ebd17f8f05ffc880f +# BUILD_TIME=2024-05-15 03:28:05 1715768885 +# VERSIONS_AVAILABLE=None +# CAN_UPGRADE=False +# PYTHON_BINARY=/opt/archivebox/.venv/bin/python3.10 +# PYTHON_ENCODING=UTF-8 +# PYTHON_VERSION=3.10.14 +# DJANGO_BINARY=/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py +# DJANGO_VERSION=5.0.6 final (0) +# SQLITE_BINARY=/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py +# SQLITE_VERSION=2.6.0 +# CURL_VERSION=curl 8.4.0 (x86_64-apple-darwin23.0) +# WGET_VERSION=GNU Wget 1.24.5 +# WGET_AUTO_COMPRESSION=True +# RIPGREP_VERSION=ripgrep 14.1.0 +# SINGLEFILE_VERSION=None +# READABILITY_VERSION=None +# MERCURY_VERSION=None +# GIT_VERSION=git version 2.44.0 +# YOUTUBEDL_VERSION=2024.04.09 +# CHROME_VERSION=Google Chrome 124.0.6367.207 +# NODE_VERSION=v21.7.3 +# """ -expected_output = TOML_HEADER + '''[SERVER_CONFIG] -IS_TTY = false -USE_COLOR = false -SHOW_PROGRESS = false -IN_DOCKER = false -IN_QEMU = false -PUID = 501 -PGID = 20 -OUTPUT_DIR = "/opt/archivebox/data" -CONFIG_FILE = "/opt/archivebox/data/ArchiveBox.conf" -ONLY_NEW = true -TIMEOUT = 60 -MEDIA_TIMEOUT = 3600 -OUTPUT_PERMISSIONS = 644 -RESTRICT_FILE_NAMES = "windows" -URL_DENYLIST = "\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$" -URL_ALLOWLIST = null -ADMIN_USERNAME = null -ADMIN_PASSWORD = null -ENFORCE_ATOMIC_WRITES = true -TAG_SEPARATOR_PATTERN = "[,]" -SECRET_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" -BIND_ADDR = "127.0.0.1:8000" -ALLOWED_HOSTS = "*" -DEBUG = false -PUBLIC_INDEX = true -PUBLIC_SNAPSHOTS = true -PUBLIC_ADD_VIEW = false -FOOTER_INFO = "Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests." -SNAPSHOTS_PER_PAGE = 40 -CUSTOM_TEMPLATES_DIR = null -TIME_ZONE = "UTC" -TIMEZONE = "UTC" -REVERSE_PROXY_USER_HEADER = "Remote-User" -REVERSE_PROXY_WHITELIST = "" -LOGOUT_REDIRECT_URL = "/" -PREVIEW_ORIGINALS = true -LDAP = false -LDAP_SERVER_URI = null -LDAP_BIND_DN = null -LDAP_BIND_PASSWORD = null -LDAP_USER_BASE = null -LDAP_USER_FILTER = null -LDAP_USERNAME_ATTR = null -LDAP_FIRSTNAME_ATTR = null -LDAP_LASTNAME_ATTR = null -LDAP_EMAIL_ATTR = null -LDAP_CREATE_SUPERUSER = false -SAVE_TITLE = true -SAVE_FAVICON = true -SAVE_WGET = true -SAVE_WGET_REQUISITES = true -SAVE_SINGLEFILE = true -SAVE_READABILITY = true -SAVE_MERCURY = true -SAVE_HTMLTOTEXT = true -SAVE_PDF = true -SAVE_SCREENSHOT = true -SAVE_DOM = true -SAVE_HEADERS = true -SAVE_WARC = true -SAVE_GIT = true -SAVE_MEDIA = true -SAVE_ARCHIVE_DOT_ORG = true -RESOLUTION = [1440, 2000] -GIT_DOMAINS = "github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht" -CHECK_SSL_VALIDITY = true -MEDIA_MAX_SIZE = "750m" -USER_AGENT = null -CURL_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)" -WGET_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5" -CHROME_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)" -COOKIES_FILE = null -CHROME_USER_DATA_DIR = null -CHROME_TIMEOUT = false -CHROME_HEADLESS = true -CHROME_SANDBOX = true -CHROME_EXTRA_ARGS = [] -YOUTUBEDL_ARGS = ["--restrict-filenames", "--trim-filenames", "128", "--write-description", "--write-info-json", "--write-annotations", "--write-thumbnail", "--no-call-home", "--write-sub", "--write-auto-subs", "--convert-subs=srt", "--yes-playlist", "--continue", "--no-abort-on-error", "--ignore-errors", "--geo-bypass", "--add-metadata", "--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)"] -YOUTUBEDL_EXTRA_ARGS = [] -WGET_ARGS = ["--no-verbose", "--adjust-extension", "--convert-links", "--force-directories", "--backup-converted", "--span-hosts", "--no-parent", "-e", "robots=off"] -WGET_EXTRA_ARGS = [] -CURL_ARGS = ["--silent", "--location", "--compressed"] -CURL_EXTRA_ARGS = [] -GIT_ARGS = ["--recursive"] -SINGLEFILE_ARGS = [] -SINGLEFILE_EXTRA_ARGS = [] -MERCURY_ARGS = ["--format=text"] -MERCURY_EXTRA_ARGS = [] -FAVICON_PROVIDER = "https://www.google.com/s2/favicons?domain={}" -USE_INDEXING_BACKEND = true -USE_SEARCHING_BACKEND = true -SEARCH_BACKEND_ENGINE = "ripgrep" -SEARCH_BACKEND_HOST_NAME = "localhost" -SEARCH_BACKEND_PORT = 1491 -SEARCH_BACKEND_PASSWORD = "SecretPassword" -SEARCH_PROCESS_HTML = true -SONIC_COLLECTION = "archivebox" -SONIC_BUCKET = "snapshots" -SEARCH_BACKEND_TIMEOUT = 90 -FTS_SEPARATE_DATABASE = true -FTS_TOKENIZERS = "porter unicode61 remove_diacritics 2" -FTS_SQLITE_MAX_LENGTH = 1000000000 -USE_CURL = true -USE_WGET = true -USE_SINGLEFILE = true -USE_READABILITY = true -USE_MERCURY = true -USE_GIT = true -USE_CHROME = true -USE_NODE = true -USE_YOUTUBEDL = true -USE_RIPGREP = true -CURL_BINARY = "curl" -GIT_BINARY = "git" -WGET_BINARY = "wget" -SINGLEFILE_BINARY = "single-file" -READABILITY_BINARY = "readability-extractor" -MERCURY_BINARY = "postlight-parser" -YOUTUBEDL_BINARY = "yt-dlp" -NODE_BINARY = "node" -RIPGREP_BINARY = "rg" -CHROME_BINARY = "chrome" -POCKET_CONSUMER_KEY = null -USER = "squash" -PACKAGE_DIR = "/opt/archivebox/archivebox" -TEMPLATES_DIR = "/opt/archivebox/archivebox/templates" -ARCHIVE_DIR = "/opt/archivebox/data/archive" -SOURCES_DIR = "/opt/archivebox/data/sources" -LOGS_DIR = "/opt/archivebox/data/logs" -PERSONAS_DIR = "/opt/archivebox/data/personas" -URL_DENYLIST_PTN = "re.compile(\'\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$\', re.IGNORECASE|re.MULTILINE)" -URL_ALLOWLIST_PTN = null -DIR_OUTPUT_PERMISSIONS = 755 -ARCHIVEBOX_BINARY = "/opt/archivebox/.venv/bin/archivebox" -VERSION = "0.8.0" -COMMIT_HASH = "102e87578c6036bb0132dd1ebd17f8f05ffc880f" -BUILD_TIME = "2024-05-15 03:28:05 1715768885" -VERSIONS_AVAILABLE = null -CAN_UPGRADE = false -PYTHON_BINARY = "/opt/archivebox/.venv/bin/python3.10" -PYTHON_ENCODING = "UTF-8" -PYTHON_VERSION = "3.10.14" -DJANGO_BINARY = "/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py" -DJANGO_VERSION = "5.0.6 final (0)" -SQLITE_BINARY = "/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py" -SQLITE_VERSION = "2.6.0" -CURL_VERSION = "curl 8.4.0 (x86_64-apple-darwin23.0)" -WGET_VERSION = "GNU Wget 1.24.5" -WGET_AUTO_COMPRESSION = true -RIPGREP_VERSION = "ripgrep 14.1.0" -SINGLEFILE_VERSION = null -READABILITY_VERSION = null -MERCURY_VERSION = null -GIT_VERSION = "git version 2.44.0" -YOUTUBEDL_VERSION = "2024.04.09" -CHROME_VERSION = "Google Chrome 124.0.6367.207" -NODE_VERSION = "v21.7.3"''' +# expected_output = TOML_HEADER + '''[SERVER_CONFIG] +# IS_TTY = false +# USE_COLOR = false +# SHOW_PROGRESS = false +# IN_DOCKER = false +# IN_QEMU = false +# PUID = 501 +# PGID = 20 +# OUTPUT_DIR = "/opt/archivebox/data" +# CONFIG_FILE = "/opt/archivebox/data/ArchiveBox.conf" +# ONLY_NEW = true +# TIMEOUT = 60 +# MEDIA_TIMEOUT = 3600 +# OUTPUT_PERMISSIONS = 644 +# RESTRICT_FILE_NAMES = "windows" +# URL_DENYLIST = "\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$" +# URL_ALLOWLIST = null +# ADMIN_USERNAME = null +# ADMIN_PASSWORD = null +# ENFORCE_ATOMIC_WRITES = true +# TAG_SEPARATOR_PATTERN = "[,]" +# SECRET_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" +# BIND_ADDR = "127.0.0.1:8000" +# ALLOWED_HOSTS = "*" +# DEBUG = false +# PUBLIC_INDEX = true +# PUBLIC_SNAPSHOTS = true +# PUBLIC_ADD_VIEW = false +# FOOTER_INFO = "Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests." +# SNAPSHOTS_PER_PAGE = 40 +# CUSTOM_TEMPLATES_DIR = null +# TIME_ZONE = "UTC" +# TIMEZONE = "UTC" +# REVERSE_PROXY_USER_HEADER = "Remote-User" +# REVERSE_PROXY_WHITELIST = "" +# LOGOUT_REDIRECT_URL = "/" +# PREVIEW_ORIGINALS = true +# LDAP = false +# LDAP_SERVER_URI = null +# LDAP_BIND_DN = null +# LDAP_BIND_PASSWORD = null +# LDAP_USER_BASE = null +# LDAP_USER_FILTER = null +# LDAP_USERNAME_ATTR = null +# LDAP_FIRSTNAME_ATTR = null +# LDAP_LASTNAME_ATTR = null +# LDAP_EMAIL_ATTR = null +# LDAP_CREATE_SUPERUSER = false +# SAVE_TITLE = true +# SAVE_FAVICON = true +# SAVE_WGET = true +# SAVE_WGET_REQUISITES = true +# SAVE_SINGLEFILE = true +# SAVE_READABILITY = true +# SAVE_MERCURY = true +# SAVE_HTMLTOTEXT = true +# SAVE_PDF = true +# SAVE_SCREENSHOT = true +# SAVE_DOM = true +# SAVE_HEADERS = true +# SAVE_WARC = true +# SAVE_GIT = true +# SAVE_MEDIA = true +# SAVE_ARCHIVE_DOT_ORG = true +# RESOLUTION = [1440, 2000] +# GIT_DOMAINS = "github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht" +# CHECK_SSL_VALIDITY = true +# MEDIA_MAX_SIZE = "750m" +# USER_AGENT = null +# CURL_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)" +# WGET_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5" +# CHROME_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)" +# COOKIES_FILE = null +# CHROME_USER_DATA_DIR = null +# CHROME_TIMEOUT = false +# CHROME_HEADLESS = true +# CHROME_SANDBOX = true +# CHROME_EXTRA_ARGS = [] +# YOUTUBEDL_ARGS = ["--restrict-filenames", "--trim-filenames", "128", "--write-description", "--write-info-json", "--write-annotations", "--write-thumbnail", "--no-call-home", "--write-sub", "--write-auto-subs", "--convert-subs=srt", "--yes-playlist", "--continue", "--no-abort-on-error", "--ignore-errors", "--geo-bypass", "--add-metadata", "--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)"] +# YOUTUBEDL_EXTRA_ARGS = [] +# WGET_ARGS = ["--no-verbose", "--adjust-extension", "--convert-links", "--force-directories", "--backup-converted", "--span-hosts", "--no-parent", "-e", "robots=off"] +# WGET_EXTRA_ARGS = [] +# CURL_ARGS = ["--silent", "--location", "--compressed"] +# CURL_EXTRA_ARGS = [] +# GIT_ARGS = ["--recursive"] +# SINGLEFILE_ARGS = [] +# SINGLEFILE_EXTRA_ARGS = [] +# MERCURY_ARGS = ["--format=text"] +# MERCURY_EXTRA_ARGS = [] +# FAVICON_PROVIDER = "https://www.google.com/s2/favicons?domain={}" +# USE_INDEXING_BACKEND = true +# USE_SEARCHING_BACKEND = true +# SEARCH_BACKEND_ENGINE = "ripgrep" +# SEARCH_BACKEND_HOST_NAME = "localhost" +# SEARCH_BACKEND_PORT = 1491 +# SEARCH_BACKEND_PASSWORD = "SecretPassword" +# SEARCH_PROCESS_HTML = true +# SONIC_COLLECTION = "archivebox" +# SONIC_BUCKET = "snapshots" +# SEARCH_BACKEND_TIMEOUT = 90 +# FTS_SEPARATE_DATABASE = true +# FTS_TOKENIZERS = "porter unicode61 remove_diacritics 2" +# FTS_SQLITE_MAX_LENGTH = 1000000000 +# USE_CURL = true +# USE_WGET = true +# USE_SINGLEFILE = true +# USE_READABILITY = true +# USE_MERCURY = true +# USE_GIT = true +# USE_CHROME = true +# USE_NODE = true +# USE_YOUTUBEDL = true +# USE_RIPGREP = true +# CURL_BINARY = "curl" +# GIT_BINARY = "git" +# WGET_BINARY = "wget" +# SINGLEFILE_BINARY = "single-file" +# READABILITY_BINARY = "readability-extractor" +# MERCURY_BINARY = "postlight-parser" +# YOUTUBEDL_BINARY = "yt-dlp" +# NODE_BINARY = "node" +# RIPGREP_BINARY = "rg" +# CHROME_BINARY = "chrome" +# POCKET_CONSUMER_KEY = null +# USER = "squash" +# PACKAGE_DIR = "/opt/archivebox/archivebox" +# TEMPLATES_DIR = "/opt/archivebox/archivebox/templates" +# ARCHIVE_DIR = "/opt/archivebox/data/archive" +# SOURCES_DIR = "/opt/archivebox/data/sources" +# LOGS_DIR = "/opt/archivebox/data/logs" +# PERSONAS_DIR = "/opt/archivebox/data/personas" +# URL_DENYLIST_PTN = "re.compile(\'\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$\', re.IGNORECASE|re.MULTILINE)" +# URL_ALLOWLIST_PTN = null +# DIR_OUTPUT_PERMISSIONS = 755 +# ARCHIVEBOX_BINARY = "/opt/archivebox/.venv/bin/archivebox" +# VERSION = "0.8.0" +# COMMIT_HASH = "102e87578c6036bb0132dd1ebd17f8f05ffc880f" +# BUILD_TIME = "2024-05-15 03:28:05 1715768885" +# VERSIONS_AVAILABLE = null +# CAN_UPGRADE = false +# PYTHON_BINARY = "/opt/archivebox/.venv/bin/python3.10" +# PYTHON_ENCODING = "UTF-8" +# PYTHON_VERSION = "3.10.14" +# DJANGO_BINARY = "/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py" +# DJANGO_VERSION = "5.0.6 final (0)" +# SQLITE_BINARY = "/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py" +# SQLITE_VERSION = "2.6.0" +# CURL_VERSION = "curl 8.4.0 (x86_64-apple-darwin23.0)" +# WGET_VERSION = "GNU Wget 1.24.5" +# WGET_AUTO_COMPRESSION = true +# RIPGREP_VERSION = "ripgrep 14.1.0" +# SINGLEFILE_VERSION = null +# READABILITY_VERSION = null +# MERCURY_VERSION = null +# GIT_VERSION = "git version 2.44.0" +# YOUTUBEDL_VERSION = "2024.04.09" +# CHROME_VERSION = "Google Chrome 124.0.6367.207" +# NODE_VERSION = "v21.7.3"''' -first_output = convert(test_input) # make sure ini -> toml parses correctly -second_output = convert(first_output) # make sure toml -> toml parses/dumps consistently -assert first_output == second_output == expected_output # make sure parsing is indempotent +# first_output = convert(test_input) # make sure ini -> toml parses correctly +# second_output = convert(first_output) # make sure toml -> toml parses/dumps consistently +# assert first_output == second_output == expected_output # make sure parsing is indempotent # # DEBUGGING # import sys diff --git a/archivebox/plugantic/migrations/__init__.py b/archivebox/plugantic/migrations/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/archivebox/plugantic/models.py b/archivebox/plugantic/models.py deleted file mode 100644 index 98372eb4..00000000 --- a/archivebox/plugantic/models.py +++ /dev/null @@ -1 +0,0 @@ -__package__ = 'archivebox.plugantic' diff --git a/archivebox/plugantic/settings.py b/archivebox/plugantic/settings.py deleted file mode 100644 index d2e96bdf..00000000 --- a/archivebox/plugantic/settings.py +++ /dev/null @@ -1,336 +0,0 @@ -import re -import os -import sys -import toml -import json -import platform -import inspect -import tomllib - -from typing import Callable, Any, Optional, Pattern, Type, Tuple, Dict, List -from pathlib import Path - -from pydantic import BaseModel, Field, FieldValidationInfo, AliasChoices, model_validator, FilePath, DirectoryPath, computed_field, TypeAdapter -from pydantic.fields import FieldInfo - -from pydantic_settings import BaseSettings, SettingsConfigDict, PydanticBaseSettingsSource -from pydantic_settings.sources import InitSettingsSource, ConfigFileSourceMixin, TomlConfigSettingsSource - -from pydantic.json_schema import GenerateJsonSchema -from pydantic_core import PydanticOmit, core_schema, to_jsonable_python, ValidationError -from pydantic.json_schema import GenerateJsonSchema, JsonSchemaValue - -import ini_to_toml - - -class JSONSchemaWithLambdas(GenerateJsonSchema): - def encode_default(self, default: Any) -> Any: - """Encode lambda functions in default values properly""" - config = self._config - if isinstance(default, Callable): - return '{{lambda ' + inspect.getsource(default).split('=lambda ')[-1].strip()[:-1] + '}}' - return to_jsonable_python( - default, - timedelta_mode=config.ser_json_timedelta, - bytes_mode=config.ser_json_bytes, - serialize_unknown=True - ) - - # for computed_field properties render them like this instead: - # inspect.getsource(field.wrapped_property.fget).split('def ', 1)[-1].split('\n', 1)[-1].strip().strip('return '), - - -class ModelWithDefaults(BaseSettings): - model_config = SettingsConfigDict(validate_default=False, case_sensitive=False, extra='ignore') - - @model_validator(mode='after') - def fill_defaults(self): - """Populate any unset values using function provided as their default""" - for key, field in self.model_fields.items(): - value = getattr(self, key) - if isinstance(value, Callable): - # if value is a function, execute it to get the actual value, passing CONFIG dict as an arg - config_so_far = self.dict(exclude_unset=True) - fallback_value = field.default(config_so_far) - - # check to make sure default factory return value matches type annotation - TypeAdapter(field.annotation).validate_python(fallback_value) - - # set generated default value as final validated value - setattr(self, key, fallback_value) - return self - - def as_json(self, model_fields=True, computed_fields=True): - output_dict = {} - for section in self.__class__.__mro__[1:]: - if not section.__name__.isupper(): - break - output_dict[section.__name__] = output_dict.get(section.__name__) or {} - include = {} - if model_fields: - include.update(**section.model_fields) - if computed_fields: - include.update(**section.model_computed_fields) - output_dict[section.__name__].update(json.loads(section.json(self, include=include))) - return output_dict - - def as_toml(self, model_fields=True, computed_fields=True): - output_text = '' - for section in self.__class__.__mro__[1:]: - if not section.__name__.isupper(): - break - include = {} - if model_fields: - include.update(**section.model_fields) - if computed_fields: - include.update(**section.model_computed_fields) - - output_text += ( - f'[{section.__name__}]\n' + - toml.dumps(json.loads(section.json(self, include=include))) + '\n' - ) - return output_text - - def as_legacy_schema(self, model_fields=True, computed_fields=True): - """Convert a newer Pydantic Settings BaseModel into the old-style archivebox.config CONFIG_SCHEMA format""" - - schemas = {} - - include = {} - if model_fields: - include.update(**self.model_fields) - if computed_fields: - include.update(**self.model_computed_fields) - - for key, field in include.items(): - key = key.upper() - defining_class = None - for cls in self.__class__.__mro__[1:]: - if key in cls.model_fields or key in cls.model_computed_fields: - defining_class = cls - break - - assert defining_class is not None, f"No defining class found for field {key}! (should be impossible)" - - schemas[defining_class.__name__] = schemas.get(defining_class.__name__) or {} - schemas[defining_class.__name__][key] = { - 'value': getattr(self, key), - 'type': str(field.annotation.__name__).lower() if hasattr(field, 'annotation') else str(field.return_type).lower(), - 'default': field.default if hasattr(field, 'default') else field.wrapped_property.fget, - 'aliases': (getattr(field.json_schema_extra.get('aliases', {}), 'choices') or []) if getattr(field, 'json_schema_extra') else [], - } - - return schemas - - @classmethod - def settings_customise_sources( - cls, - settings_cls: Type[BaseSettings], - init_settings: PydanticBaseSettingsSource, - env_settings: PydanticBaseSettingsSource, - dotenv_settings: PydanticBaseSettingsSource, - file_secret_settings: PydanticBaseSettingsSource, - ) -> Tuple[PydanticBaseSettingsSource, ...]: - ARCHIVEBOX_CONFIG_FILE = Path('/Users/squash/Local/Code/archiveboxes/ArchiveBox/data/ArchiveBox.conf') - ARCHIVEBOX_CONFIG_FILE_TOML = ARCHIVEBOX_CONFIG_FILE.parent / f'.ArchiveBox.toml' - try: - return ( - init_settings, - env_settings, - TomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE), - ) - except tomllib.TOMLDecodeError: - toml_str = ini_to_toml.convert(ARCHIVEBOX_CONFIG_FILE.read_text()) - with open(ARCHIVEBOX_CONFIG_FILE_TOML, 'w+') as f: - f.write(toml_str) - - return ( - init_settings, - env_settings, - TomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE_TOML), - ) - - -class SHELL_CONFIG(ModelWithDefaults): - IS_TTY: bool = Field(default=lambda c: sys.stdout.isatty()) - USE_COLOR: bool = Field(default=lambda c: c['IS_TTY']) - SHOW_PROGRESS: bool = Field(default=lambda c: c['IS_TTY'] and (platform.system() != 'Darwin')) - - IN_DOCKER: bool = Field(default=False) - IN_QEMU: bool = Field(default=False) - PUID: int = Field(default=lambda c: os.getuid()) - PGID: int = Field(default=lambda c: os.getgid()) - - -class GENERAL_CONFIG(ModelWithDefaults): - # OUTPUT_DIR: DirectoryPath - CONFIG_FILE: FilePath = Field(default=lambda c: c['OUTPUT_DIR'] / 'ArchiveBox.conf') - - ONLY_NEW: bool = Field(default=True) - TIMEOUT: int = Field(default=60) - MEDIA_TIMEOUT: int = Field(default=3600) - - ENFORCE_ATOMIC_WRITES: bool = Field(default=True) - OUTPUT_PERMISSIONS: str = Field(default='644') - RESTRICT_FILE_NAMES: str = Field(default='windows') - - URL_DENYLIST: Pattern = Field(default=re.compile(r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$'), aliases=AliasChoices('URL_BLACKLIST')) - URL_ALLOWLIST: Pattern = Field(default=re.compile(r''), aliases=AliasChoices('URL_WHITELIST')) - - ADMIN_USERNAME: Optional[str] = Field(default=None, min_length=1, max_length=63, pattern=r'^[\S]+$') - ADMIN_PASSWORD: Optional[str] = Field(default=None, min_length=1, max_length=63) - - TAG_SEPARATOR_PATTERN: Pattern = Field(default=re.compile(r'[,]')) - - @computed_field - @property - def OUTPUT_DIR(self) -> DirectoryPath: - return Path('.').resolve() - -# class PackageInstalled(ModelWithDefaults): -# binary_abs: HostBinPathStr -# version_str: str -# is_valid: True -# provider: PackageProvider -# date_installed: datetime -# date_checked: datetime - -class EntrypointConfig(ModelWithDefaults): - name: str - dependency: str - runtime: Literal['python.eval', 'node.eval', 'puppeteer', 'shell.run', 'ansible'] - CMD: str - DEFAULT_ARGS: List[str] - EXTRA_ARGS: List[str] - ARGS: List[str] - SCHEMA: EntrypointSchema - validator: Callable = eval - -class VersionEntrypointConfig(ModelWithDefaults): - DEFAULT_ARGS = ['--version'] - -class PackageProvider(ModelWithDefaults): - name: Literal['config', 'PATH', 'pip', 'apt', 'brew', 'npm', 'vendor'] - - def install_bin(self, name): - # ... - return PackageInstall - - def get_bin_path(self, name, install=True): - return shell(['which', name]) - -class DependencyConfig(ModelWithDefaults): - providers: List[Literal['config', 'PATH', 'pip', 'apt', 'brew', 'npm', 'vendor'], ...] - name: str - packages: List[str] - entrypoints: Dict[str, EntrypointConfig] - version_cmd: EntrypointConfig = field(default=lambda c: ) - -class ExtractorConfig(ModelWithDefaults): - name: str - description: str = Field(examples=['WGET Extractor']) - depends_on: DepencencyConfig - entrypoint: EntrypointConfig = Field(description='Which entrypoint to use for this extractor') - -class ReplayerConfig(ModelWithDefaults): - """Describes how to render an ArchiveResult in several contexts""" - name: str - row_template: 'plugins/wget/templates/row.html' - embed_template: 'plugins/wget/templates/embed.html' - fullpage_template: 'plugins/wget/templates/fullpage.html' - - icon_view: ImportString 'plugins.wget.replayers.wget.get_icon' - thumbnail_getter: ImportString = 'plugins.wget.replayers.wget.get_icon' - -class PluginConfig(ModelWithDefaults): - dependencies: Dict[str, DependencyConfig] - entrypoints: Dict[str, EntrypointConfig] - extractors: Dict[str, ExtractorConfig] - replayers: Dict[str, ReplayerConfig] - - name: str - - BINARY: - PROVIDERS: List[, ...] - - ENTRYPOINTS: Dict[str, EntrypointConfig] - - - - WGET_BINARY: HostBinName = Field(default='wget') - - @computed_field - @property - def WGET_PROVIDERS(self) -> List[Provider]: - -class WGET_DEPENDENCY_CONFIG(DEPENDENCY_CONFIG): - pass - -class WGET_CONFIG(ModelWithDefaults): - EXTRACTORS: List[EXTRACTORS] = EXTRACTOR_CONFIG('') - DEPDENCIES: List[DEPENDENCY_CONFIG] = [DEPENDENCY_CONFIG] - -class WgetConfiguration(SingletonModel): - singleton_instance_id = 1 - - dependency_config: WGET_CONFIG = SchemaField() - extractor_config: WGET_CONFIG = SchemaField() - replay_config: WGET_CONFIG = SchemaField() - pkg_config: WGET_CONFIG = SchemaField() - - - - - -class WGET_CONFIG(ModelWithDefaults): - - - -# class ConfigSet(models.Model): -# # scope = when should this config set be active -# # host: on a specific host running archivebox -# # -# # snapshot__added: on or during a specific timeperiod -# # user: for actions initiated by a specific archivebox user -# # extractor: for specific extractors running under a snapshot -# # snapshot_id: for a specific snapshot pk -# # snapshot__url: for a specific snapshot url -# scope = models.CharField(choices=('host', 'date', 'user', 'extractor', 'domain', 'url', 'custom')) -# lookup = models.CharField(choices=('__eq', '__icontains', '__gte', '__lt', '__startswith', '__endswith', '__in', '__isnull')) -# match = models.CharField(max_length=128) - -# config = models.JSONField(default={}, schema=Dict[str, JSONValue]) -# getter = models.ImportString(default='django.utils.model_loading.import_string') - -# label = models.CharField(max_length=64) -# created_by = models.ForeignKey(User, on_delete=models.CASCADE) -# config = JSONField(schema=Dict[str, JSONValue]) - - - -CONFIG_SECTIONS = (GENERAL_CONFIG, SHELL_CONFIG) - -class USER_CONFIG(*CONFIG_SECTIONS): - pass - - -if __name__ == '__main__': - # print(ShellConfig(**{'IS_TTY': False, 'PGID': 911}).model_dump()) - # print(json.dumps(SHELL_CONFIG.model_json_schema(schema_generator=JSONSchemaWithLambdas), indent=4)) - # print(json.dumps(GENERAL_CONFIG.model_json_schema(schema_generator=JSONSchemaWithLambdas), indent=4)) - print() - # os.environ['PGID'] = '422' - os.environ['URL_ALLOWLIST'] = r'worked!!!!!\\.com' - config = USER_CONFIG(**{'SHOW_PROGRESS': False, 'ADMIN_USERNAME': 'kip', 'PGID': 911}) - - print('==========archivebox.config.CONFIG_SCHEMA======================') - print(json.dumps(config.as_legacy_schema(), indent=4, default=str)) - - print('==========JSON=================================================') - # print(config.__class__.__name__, '=', config.model_dump_json(indent=4)) - print(json.dumps(config.as_json(), indent=4)) - - print('==========TOML=================================================') - print(config.as_toml()) - - diff --git a/archivebox/plugantic/tests.py b/archivebox/plugantic/tests.py deleted file mode 100644 index 7ce503c2..00000000 --- a/archivebox/plugantic/tests.py +++ /dev/null @@ -1,3 +0,0 @@ -from django.test import TestCase - -# Create your tests here.