use constants in more places

2025-05-12 22:25:44 -04:00 · 2024-09-26 02:41:09 -07:00 · 2024-09-26 02:41:09 -07:00 · ed45f58758
commit ed45f58758
parent eb360f188a
5 changed files with 53 additions and 64 deletions
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@ -2,7 +2,6 @@ __package__ = 'archivebox.core'
 from typing import Callable
 import threading
 from pathlib import Path
 from django.shortcuts import render, redirect
@ -12,6 +11,7 @@ from django.views import View
 from django.views.generic.list import ListView
 from django.views.generic import FormView
 from django.db.models import Q
 from django.conf import settings
 from django.contrib import messages
 from django.contrib.auth.mixins import UserPassesTestMixin
 from django.views.decorators.csrf import csrf_exempt
@ -20,6 +20,8 @@ from django.utils.decorators import method_decorator
 from admin_data_views.typing import TableContext, ItemContext
 from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
 import archivebox
 from archivebox.constants import CONSTANTS
 from core.models import Snapshot
 from core.forms import AddLinkForm
@ -27,28 +29,17 @@ from core.admin import result_url
 from queues.tasks import bg_add
 from ..plugins_sys.config.apps import SHELL_CONFIG, SERVER_CONFIG
 from ..plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG
 from ..config import (
    OUTPUT_DIR,
    PUBLIC_INDEX,
    PUBLIC_SNAPSHOTS,
    PUBLIC_ADD_VIEW,
    VERSION,
    COMMIT_HASH,
    FOOTER_INFO,
    SNAPSHOTS_PER_PAGE,
    CONFIG,
    CONFIG_SCHEMA,
    DYNAMIC_CONFIG_SCHEMA,
    USER_CONFIG,
    SAVE_ARCHIVE_DOT_ORG,
    PREVIEW_ORIGINALS,
    CONSTANTS,
 )
 from ..logging_util import printable_filesize
-from ..main import add
+from ..util import base_url, htmlencode, ts_to_date_str
 from ..util import base_url, ansi_to_html, htmlencode, urldecode, urlencode, ts_to_date_str
 from ..search import query_search_index
 from ..extractors.wget import wget_output_path
 from .serve_static import serve_static_with_byterange_support
@ -57,7 +48,7 @@ class HomepageView(View):
        if request.user.is_authenticated:
            return redirect('/admin/core/snapshot/')
-        if PUBLIC_INDEX:
+        if SERVER_CONFIG.PUBLIC_INDEX:
            return redirect('/public')
        return redirect(f'/admin/login/?next={request.path}')
@ -166,8 +157,8 @@ class SnapshotView(View):
            'status_color': 'success' if link.is_archived else 'danger',
            'oldest_archive_date': ts_to_date_str(link.oldest_archive_date),
            'warc_path': warc_path,
-            'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG,
+            'SAVE_ARCHIVE_DOT_ORG': ARCHIVEDOTORG_CONFIG.SAVE_ARCHIVE_DOT_ORG,
-            'PREVIEW_ORIGINALS': PREVIEW_ORIGINALS,
+            'PREVIEW_ORIGINALS': SERVER_CONFIG.PREVIEW_ORIGINALS,
            'archiveresults': sorted(archiveresults.values(), key=lambda r: all_types.index(r['name']) if r['name'] in all_types else -r['size']),
            'best_result': best_result,
            # 'tags_str': 'somealskejrewlkrjwer,werlmwrwlekrjewlkrjwer324m532l,4m32,23m324234',
@ -176,7 +167,7 @@ class SnapshotView(View):
    def get(self, request, path):
-        if not request.user.is_authenticated and not PUBLIC_SNAPSHOTS:
+        if not request.user.is_authenticated and not SERVER_CONFIG.PUBLIC_SNAPSHOTS:
            return redirect(f'/admin/login/?next={request.path}')
        snapshot = None
@ -381,15 +372,15 @@ class SnapshotView(View):
 class PublicIndexView(ListView):
    template_name = 'public_index.html'
    model = Snapshot
-    paginate_by = SNAPSHOTS_PER_PAGE
+    paginate_by = SERVER_CONFIG.SNAPSHOTS_PER_PAGE
    ordering = ['-bookmarked_at', '-created_at']
    def get_context_data(self, **kwargs):
        return {
            **super().get_context_data(**kwargs),
-            'VERSION': VERSION,
+            'VERSION': archivebox.VERSION,
-            'COMMIT_HASH': COMMIT_HASH,
+            'COMMIT_HASH': SHELL_CONFIG.COMMIT_HASH,
-            'FOOTER_INFO': FOOTER_INFO,
+            'FOOTER_INFO': SERVER_CONFIG.FOOTER_INFO,
        }
    def get_queryset(self, **kwargs):
@ -428,7 +419,7 @@ class PublicIndexView(ListView):
        return qs.distinct()
    def get(self, *args, **kwargs):
-        if PUBLIC_INDEX or self.request.user.is_authenticated:
+        if SERVER_CONFIG.PUBLIC_INDEX or self.request.user.is_authenticated:
            response = super().get(*args, **kwargs)
            return response
        else:
@ -449,7 +440,7 @@ class AddView(UserPassesTestMixin, FormView):
        return super().get_initial()
    def test_func(self):
-        return PUBLIC_ADD_VIEW or self.request.user.is_authenticated
+        return SERVER_CONFIG.PUBLIC_ADD_VIEW or self.request.user.is_authenticated
    def get_context_data(self, **kwargs):
        return {
@ -457,8 +448,8 @@ class AddView(UserPassesTestMixin, FormView):
            'title': "Add URLs",
            # We can't just call request.build_absolute_uri in the template, because it would include query parameters
            'absolute_add_path': self.request.build_absolute_uri(self.request.path),
-            'VERSION': VERSION,
+            'VERSION': archivebox.VERSION,
-            'FOOTER_INFO': FOOTER_INFO,
+            'FOOTER_INFO': SERVER_CONFIG.FOOTER_INFO,
            'stdout': '',
        }
@ -475,7 +466,7 @@ class AddView(UserPassesTestMixin, FormView):
            "depth": depth,
            "parser": parser,
            "update_all": False,
-            "out_dir": OUTPUT_DIR,
+            "out_dir": archivebox.DATA_DIR,
            "created_by_id": self.request.user.pk,
        }
        if extractors:
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@ -9,8 +9,6 @@ These are the old types we used to use before ArchiveBox v0.4 (before we switche
 __package__ = 'archivebox.index'
 from pathlib import Path
 from datetime import datetime, timezone, timedelta
 from typing import List, Dict, Any, Optional, Union
@ -19,9 +17,13 @@ from dataclasses import dataclass, asdict, field, fields
 from django.utils.functional import cached_property
 from archivebox.constants import ARCHIVE_DIR, ARCHIVE_DIR_NAME
 from plugins_extractor.favicon.apps import FAVICON_CONFIG
 from ..system import get_dir_size
 from ..util import ts_to_date_str, parse_date
-from ..config import OUTPUT_DIR, ARCHIVE_DIR_NAME, FAVICON_PROVIDER
+
 class ArchiveError(Exception):
    def __init__(self, message, hints=None):
@ -88,7 +90,7 @@ class ArchiveResult:
                info['start_ts'] = parse_date(info['start_ts'])
                info['end_ts'] = parse_date(info['end_ts'])
            if "pwd" not in keys:
-                info["pwd"] = str(Path(OUTPUT_DIR) / ARCHIVE_DIR_NAME / json_info["timestamp"])
+                info["pwd"] = str(ARCHIVE_DIR / json_info["timestamp"])
            if "cmd_version" not in keys:
                info["cmd_version"] = "Undefined"
            if "cmd" not in keys:
@ -281,12 +283,10 @@ class Link:
    @property
    def link_dir(self) -> str:
-        from ..config import CONFIG
+        return str(ARCHIVE_DIR / self.timestamp)
        return str(Path(CONFIG['ARCHIVE_DIR']) / self.timestamp)
    @property
    def archive_path(self) -> str:
        from ..config import ARCHIVE_DIR_NAME
        return '{}/{}'.format(ARCHIVE_DIR_NAME, self.timestamp)
    @property
@ -385,7 +385,6 @@ class Link:
    @property
    def is_archived(self) -> bool:
        from ..config import ARCHIVE_DIR
        from ..util import domain
        output_paths = (
@ -402,7 +401,7 @@ class Link:
        )
        return any(
-            (Path(ARCHIVE_DIR) / self.timestamp / path).exists()
+            (ARCHIVE_DIR / self.timestamp / path).exists()
            for path in output_paths
        )
@ -438,7 +437,7 @@ class Link:
        canonical = {
            'index_path': 'index.html',
            'favicon_path': 'favicon.ico',
-            'google_favicon_path': FAVICON_PROVIDER.format(self.domain),
+            'google_favicon_path': FAVICON_CONFIG.FAVICON_PROVIDER.format(self.domain),
            'wget_path': wget_output_path(self),
            'warc_path': 'warc/',
            'singlefile_path': 'singlefile.html',
--- a/archivebox/plugantic/views.py
+++ b/archivebox/plugantic/views.py
@ -12,6 +12,8 @@ from django.utils.html import format_html, mark_safe
 from admin_data_views.typing import TableContext, ItemContext
 from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
 import archivebox
 from ..config_stubs import AttrDict
 from ..util import parse_date
@ -378,9 +380,8 @@ def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
 def log_list_view(request: HttpRequest, **kwargs) -> TableContext:
    assert request.user.is_superuser, "Must be a superuser to view configuration settings."
    from django.conf import settings
-    log_files = settings.CONFIG.LOGS_DIR.glob("*.log")
+    log_files = archivebox.CONSTANTS.LOGS_DIR.glob("*.log")
    log_files = sorted(log_files, key=os.path.getmtime)[::-1]
    rows = {
@ -418,7 +419,7 @@ def log_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
    from django.conf import settings
-    log_file = [logfile for logfile in settings.CONFIG.LOGS_DIR.glob('*.log') if key in logfile.name][0]
+    log_file = [logfile for logfile in archivebox.CONSTANTS.LOGS_DIR.glob('*.log') if key in logfile.name][0]
    log_text = log_file.read_text()
    log_stat = log_file.stat()
--- a/archivebox/plugins_search/ripgrep/apps.py
+++ b/archivebox/plugins_search/ripgrep/apps.py
@ -37,7 +37,7 @@ class RipgrepConfig(BaseConfigSet):
        '--files-with-matches',
        '--regexp',
    ])
-    RIPGREP_SEARCH_DIR: str = Field(default=lambda: str(settings.ARCHIVE_DIR))
+    RIPGREP_SEARCH_DIR: Path = archivebox.CONSTANTS.ARCHIVE_DIR
 RIPGREP_CONFIG = RipgrepConfig()
@ -81,7 +81,7 @@ class RipgrepSearchBackend(BaseSearchBackend):
            ripgrep_binary.abspath, 
            *RIPGREP_CONFIG.RIPGREP_ARGS_DEFAULT,
            text,
-            RIPGREP_CONFIG.RIPGREP_SEARCH_DIR,
+            str(RIPGREP_CONFIG.RIPGREP_SEARCH_DIR),
        ]
        proc = run(cmd, timeout=SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_TIMEOUT, capture_output=True, text=True)
        timestamps = set()
--- a/archivebox/util.py
+++ b/archivebox/util.py
@ -18,13 +18,19 @@ from requests.exceptions import RequestException, ReadTimeout
 from base32_crockford import encode as base32_encode                            # type: ignore
 from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
 try:
    import chardet
    detect_encoding = lambda rawdata: chardet.detect(rawdata)["encoding"]
 except ImportError:
    detect_encoding = lambda rawdata: "utf-8"
 from archivebox.constants import STATICFILE_EXTENSIONS
 from archivebox.plugins_sys.config.apps import ARCHIVING_CONFIG
 from .misc.logging import COLOR_DICT
 ### Parsing Helpers
 # All of these are (str) -> str
@ -114,7 +120,6 @@ def find_all_urls(urls_str: str):
 def is_static_file(url: str):
    # TODO: the proper way is with MIME type detection + ext, not only extension
    from .config import STATICFILE_EXTENSIONS
    return extension(url).lower() in STATICFILE_EXTENSIONS
@ -206,25 +211,20 @@ def parse_date(date: Any) -> Optional[datetime]:
@enforce_types
 def download_url(url: str, timeout: int=None) -> str:
    """Download the contents of a remote url and return the text"""
-    from .config import (
+
-        TIMEOUT,
+    timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
        CHECK_SSL_VALIDITY,
        WGET_USER_AGENT,
        COOKIES_FILE,
    )
    timeout = timeout or TIMEOUT
    session = requests.Session()
-    if COOKIES_FILE and Path(COOKIES_FILE).is_file():
+    if ARCHIVING_CONFIG.COOKIES_FILE and Path(ARCHIVING_CONFIG.COOKIES_FILE).is_file():
-        cookie_jar = http.cookiejar.MozillaCookieJar(COOKIES_FILE)
+        cookie_jar = http.cookiejar.MozillaCookieJar(ARCHIVING_CONFIG.COOKIES_FILE)
        cookie_jar.load(ignore_discard=True, ignore_expires=True)
        for cookie in cookie_jar:
            session.cookies.set(cookie.name, cookie.value, domain=cookie.domain, path=cookie.path)
    response = session.get(
        url,
-        headers={'User-Agent': WGET_USER_AGENT},
+        headers={'User-Agent': ARCHIVING_CONFIG.USER_AGENT},
-        verify=CHECK_SSL_VALIDITY,
+        verify=ARCHIVING_CONFIG.CHECK_SSL_VALIDITY,
        timeout=timeout,
    )
@ -243,14 +243,13 @@ def download_url(url: str, timeout: int=None) -> str:
@enforce_types
 def get_headers(url: str, timeout: int=None) -> str:
    """Download the contents of a remote url and return the headers"""
-    from .config import TIMEOUT, CHECK_SSL_VALIDITY, WGET_USER_AGENT
+    timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
    timeout = timeout or TIMEOUT
    try:
        response = requests.head(
            url,
-            headers={'User-Agent': WGET_USER_AGENT},
+            headers={'User-Agent': ARCHIVING_CONFIG.USER_AGENT},
-            verify=CHECK_SSL_VALIDITY,
+            verify=ARCHIVING_CONFIG.CHECK_SSL_VALIDITY,
            timeout=timeout,
            allow_redirects=True,
        )
@ -261,8 +260,8 @@ def get_headers(url: str, timeout: int=None) -> str:
    except RequestException:
        response = requests.get(
            url,
-            headers={'User-Agent': WGET_USER_AGENT},
+            headers={'User-Agent': ARCHIVING_CONFIG.USER_AGENT},
-            verify=CHECK_SSL_VALIDITY,
+            verify=ARCHIVING_CONFIG.CHECK_SSL_VALIDITY,
            timeout=timeout,
            stream=True
        )
@ -285,7 +284,6 @@ def ansi_to_html(text: str) -> str:
    """
    Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html
    """
    from .config import COLOR_DICT
    TEMPLATE = '<span style="color: rgb{}"><br>'
    text = text.replace('[m', '</span>')