diff --git a/archivebox/config_stubs.py b/archivebox/config_stubs.py index c8cc9ecb..399472ca 100644 --- a/archivebox/config_stubs.py +++ b/archivebox/config_stubs.py @@ -16,10 +16,10 @@ class BaseConfig(TypedDict): class ConfigDict(BaseConfig, total=False): """ # Regenerate by pasting this quine into `archivebox shell` 🥚 - from archivebox.config import ConfigDict, CONFIG_DEFAULTS + from archivebox.config import ConfigDict, CONFIG_SCHEMA print('class ConfigDict(BaseConfig, total=False):') print(' ' + '"'*3 + ConfigDict.__doc__ + '"'*3) - for section, configs in CONFIG_DEFAULTS.items(): + for section, configs in CONFIG_SCHEMA.items(): for key, attrs in configs.items(): Type, default = attrs['type'], attrs['default'] if default is None: @@ -32,16 +32,23 @@ class ConfigDict(BaseConfig, total=False): USE_COLOR: bool SHOW_PROGRESS: bool IN_DOCKER: bool + IN_QEMU: bool + PUID: int + PGID: int - PACKAGE_DIR: Path - OUTPUT_DIR: Path - CONFIG_FILE: Path + OUTPUT_DIR: Optional[str] + CONFIG_FILE: Optional[str] ONLY_NEW: bool TIMEOUT: int MEDIA_TIMEOUT: int OUTPUT_PERMISSIONS: str RESTRICT_FILE_NAMES: str URL_DENYLIST: str + URL_ALLOWLIST: Optional[str] + ADMIN_USERNAME: Optional[str] + ADMIN_PASSWORD: Optional[str] + ENFORCE_ATOMIC_WRITES: bool + TAG_SEPARATOR_PATTERN: str SECRET_KEY: Optional[str] BIND_ADDR: str @@ -49,7 +56,27 @@ class ConfigDict(BaseConfig, total=False): DEBUG: bool PUBLIC_INDEX: bool PUBLIC_SNAPSHOTS: bool + PUBLIC_ADD_VIEW: bool FOOTER_INFO: str + SNAPSHOTS_PER_PAGE: int + CUSTOM_TEMPLATES_DIR: Optional[str] + TIME_ZONE: str + TIMEZONE: str + REVERSE_PROXY_USER_HEADER: str + REVERSE_PROXY_WHITELIST: str + LOGOUT_REDIRECT_URL: str + PREVIEW_ORIGINALS: bool + LDAP: bool + LDAP_SERVER_URI: Optional[str] + LDAP_BIND_DN: Optional[str] + LDAP_BIND_PASSWORD: Optional[str] + LDAP_USER_BASE: Optional[str] + LDAP_USER_FILTER: Optional[str] + LDAP_USERNAME_ATTR: Optional[str] + LDAP_FIRSTNAME_ATTR: Optional[str] + LDAP_LASTNAME_ATTR: Optional[str] + LDAP_EMAIL_ATTR: Optional[str] + LDAP_CREATE_SUPERUSER: bool SAVE_TITLE: bool SAVE_FAVICON: bool @@ -58,25 +85,50 @@ class ConfigDict(BaseConfig, total=False): SAVE_SINGLEFILE: bool SAVE_READABILITY: bool SAVE_MERCURY: bool + SAVE_HTMLTOTEXT: bool SAVE_PDF: bool SAVE_SCREENSHOT: bool SAVE_DOM: bool + SAVE_HEADERS: bool SAVE_WARC: bool SAVE_GIT: bool SAVE_MEDIA: bool SAVE_ARCHIVE_DOT_ORG: bool + SAVE_ALLOWLIST: dict + SAVE_DENYLIST: dict RESOLUTION: str GIT_DOMAINS: str CHECK_SSL_VALIDITY: bool + MEDIA_MAX_SIZE: str CURL_USER_AGENT: str WGET_USER_AGENT: str CHROME_USER_AGENT: str - COOKIES_FILE: Union[str, Path, None] - CHROME_USER_DATA_DIR: Union[str, Path, None] + COOKIES_FILE: Optional[str] + CHROME_USER_DATA_DIR: Optional[str] CHROME_TIMEOUT: int CHROME_HEADLESS: bool CHROME_SANDBOX: bool + YOUTUBEDL_ARGS: list + WGET_ARGS: list + CURL_ARGS: list + GIT_ARGS: list + SINGLEFILE_ARGS: Optional[list] + FAVICON_PROVIDER: str + + USE_INDEXING_BACKEND: bool + USE_SEARCHING_BACKEND: bool + SEARCH_BACKEND_ENGINE: str + SEARCH_BACKEND_HOST_NAME: str + SEARCH_BACKEND_PORT: int + SEARCH_BACKEND_PASSWORD: str + SEARCH_PROCESS_HTML: bool + SONIC_COLLECTION: str + SONIC_BUCKET: str + SEARCH_BACKEND_TIMEOUT: int + FTS_SEPARATE_DATABASE: bool + FTS_TOKENIZERS: str + FTS_SQLITE_MAX_LENGTH: int USE_CURL: bool USE_WGET: bool @@ -85,7 +137,9 @@ class ConfigDict(BaseConfig, total=False): USE_MERCURY: bool USE_GIT: bool USE_CHROME: bool + USE_NODE: bool USE_YOUTUBEDL: bool + USE_RIPGREP: bool CURL_BINARY: str GIT_BINARY: str WGET_BINARY: str @@ -93,13 +147,12 @@ class ConfigDict(BaseConfig, total=False): READABILITY_BINARY: str MERCURY_BINARY: str YOUTUBEDL_BINARY: str + NODE_BINARY: str + RIPGREP_BINARY: str CHROME_BINARY: Optional[str] - - YOUTUBEDL_ARGS: List[str] - WGET_ARGS: List[str] - CURL_ARGS: List[str] - GIT_ARGS: List[str] - TAG_SEPARATOR_PATTERN: str + POCKET_CONSUMER_KEY: Optional[str] + POCKET_ACCESS_TOKENS: dict + READWISE_READER_TOKENS: dict ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue] diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py index a865eb24..b5cc569d 100644 --- a/archivebox/core/admin.py +++ b/archivebox/core/admin.py @@ -106,6 +106,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin): action_form = SnapshotActionForm + def changelist_view(self, request, extra_context=None): extra_context = extra_context or {} return super().changelist_view(request, extra_context | GLOBAL_CONTEXT) diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index 7c0f164f..86a962e0 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -1,5 +1,10 @@ __package__ = 'archivebox.core' +# TODO: add this after we upgrade to Django >=3.2 +# https://github.com/typeddjango/django-stubs +# import django_stubs_ext +# django_stubs_ext.monkeypatch() + import os import sys import re @@ -357,21 +362,21 @@ IGNORABLE_404_URLS = [ ] class NoisyRequestsFilter(logging.Filter): - def filter(self, record): + def filter(self, record) -> bool: logline = record.getMessage() # ignore harmless 404s for the patterns in IGNORABLE_404_URLS for ignorable_url_pattern in IGNORABLE_404_URLS: ignorable_log_pattern = re.compile(f'^"GET /.*/?{ignorable_url_pattern.pattern[:-1]} HTTP/.*" (200|30.|404) .+$', re.I | re.M) if ignorable_log_pattern.match(logline): - return 0 + return False # ignore staticfile requests that 200 or 30* ignoreable_200_log_pattern = re.compile(r'"GET /static/.* HTTP/.*" (200|30.) .+', re.I | re.M) if ignoreable_200_log_pattern.match(logline): - return 0 + return False - return 1 + return True if LOGS_DIR.exists(): ERROR_LOG = (LOGS_DIR / 'errors.log') diff --git a/archivebox/mypy.ini b/archivebox/mypy.ini deleted file mode 100644 index b1b4489a..00000000 --- a/archivebox/mypy.ini +++ /dev/null @@ -1,3 +0,0 @@ -[mypy] -plugins = - mypy_django_plugin.main diff --git a/archivebox/plugins/defaults/models.py b/archivebox/plugins/defaults/models.py index c8202136..ba86bdcf 100644 --- a/archivebox/plugins/defaults/models.py +++ b/archivebox/plugins/defaults/models.py @@ -8,7 +8,7 @@ from pathlib import Path from django.db import models, transaction from django.utils.functional import cached_property -from solo.models import SingletonModel +from solo.models import SingletonModel # type: ignore[import-untyped] from config import bin_path, bin_version @@ -22,6 +22,14 @@ ConfigDict = Dict[str, Any] # def bin_version(bin_path: str, cmd: str | None=None) -> str | None: # return '0.0.0' +# def pretty_path(path: Path) -> str: +# """take a Path object and return the path as a string relative to the current directory""" + +# if not path: +# return '' + +# return str(path.expanduser().resolve().relative_to(Path.cwd().resolve())) + class ArchiveBoxBaseDependency(models.Model): singleton_instance_id = 1 @@ -96,7 +104,7 @@ class ArchiveBoxBaseDependency(models.Model): @cached_property def pretty_version(self): - if self.enabled: + if self.is_enabled: if self.is_valid: color, symbol, note, version = 'green', '√', 'valid', '' diff --git a/docker-compose.yml b/docker-compose.yml index e2af00a6..b781c8a5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,6 +8,8 @@ # Documentation: # https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker-compose +--- + version: '3.9' services: @@ -43,8 +45,8 @@ services: # ... # add further configuration options from archivebox/config.py as needed (to apply them only to this container) # or set using `docker compose run archivebox config --set SOME_KEY=someval` (to persist config across all containers) - - # For ad-blocking during archiving, uncomment this section and pihole service section below + + # For ad-blocking during archiving, uncomment this section and pihole service section below # networks: # - dns # dns: @@ -75,8 +77,8 @@ services: # volumes: # - ./sonic.cfg:/etc/sonic.cfg:ro # - ./data/sonic:/var/lib/sonic/store - - + + ### Example: To run pihole in order to block ad/tracker requests during archiving, # uncomment this block and set up pihole using its admin interface diff --git a/pyproject.toml b/pyproject.toml index a996b81b..f50752b5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -98,9 +98,27 @@ dev = [ # linting "flake8", "mypy", - "django-stubs", + "django-stubs[compatible-mypy]>=4.2.7", + "types-requests>=2.31.0.20240125", ] +[tool.pyright] +include = ["archivebox"] +exclude = ["data", "data2", "data3", "data4", "data5", "pip_dist", "brew_dist", "dist", "vendor", "migrations", "tests"] + +[tool.mypy] +mypy_path = "archivebox" +explicit_package_bases = true +check_untyped_defs = true +plugins = ["mypy_django_plugin.main"] +# TODO: remove this eventually https://github.com/hauntsaninja/no_implicit_optional +implicit_optional = true + +[tool.django-stubs] +django_settings_module = "core.settings" +strict_settings = false + + [tool.pdm.scripts] lint = "./bin/lint.sh" test = "./bin/test.sh"