From 0ef3a0913bd9cedb50e0fccdafe7bb07428a6847 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <github@sweeting.me>
Date: Wed, 25 Sep 2024 01:14:48 -0700
Subject: [PATCH] check python encoding in SHELL_CONFIG validation

---
 archivebox/config.py                  |  1 -
 archivebox/logging_util.py            |  3 +--
 archivebox/plugantic/tests.py         |  2 --
 archivebox/plugins_pkg/pip/apps.py    | 38 ++++++++++-----------------
 archivebox/plugins_sys/config/apps.py | 13 +++++++++
 archivebox/system.py                  |  1 +
 6 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/archivebox/config.py b/archivebox/config.py
index 6a209621..e05fbe5a 100644
--- a/archivebox/config.py
+++ b/archivebox/config.py
@@ -559,7 +559,6 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
     'CAN_UPGRADE':              {'default': lambda c: False},             # can_upgrade(c)},
 
     'PYTHON_BINARY':            {'default': lambda c: sys.executable},
-    'PYTHON_ENCODING':          {'default': lambda c: sys.stdout.encoding.upper()},
     'PYTHON_VERSION':           {'default': lambda c: '{}.{}.{}'.format(*sys.version_info[:3])},
 
     'DJANGO_BINARY':            {'default': lambda c: inspect.getfile(django)},
diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py
index 4ad2cad3..9563011e 100644
--- a/archivebox/logging_util.py
+++ b/archivebox/logging_util.py
@@ -25,7 +25,6 @@ from .util import enforce_types
 from .config import (
     ConfigDict,
     OUTPUT_DIR,
-    PYTHON_ENCODING,
     VERSION,
     ANSI,
     IS_TTY,
@@ -184,7 +183,7 @@ class TimedProgress:
 @enforce_types
 def progress_bar(seconds: int, prefix: str='') -> None:
     """show timer in the form of progress bar, with percentage and seconds remaining"""
-    chunk = '█' if PYTHON_ENCODING == 'UTF-8' else '#'
+    chunk = '█' if (sys.stdout or sys.__stdout__).encoding.upper() == 'UTF-8' else '#'
     last_width = TERM_WIDTH()
     chunks = last_width - len(prefix) - 20  # number of progress chunks to show (aka max bar width)
     try:
diff --git a/archivebox/plugantic/tests.py b/archivebox/plugantic/tests.py
index 0e7899ad..e1b0623c 100644
--- a/archivebox/plugantic/tests.py
+++ b/archivebox/plugantic/tests.py
@@ -146,7 +146,6 @@ BUILD_TIME=2024-05-15 03:28:05 1715768885
 VERSIONS_AVAILABLE=None
 CAN_UPGRADE=False
 PYTHON_BINARY=/opt/archivebox/.venv/bin/python3.10
-PYTHON_ENCODING=UTF-8
 PYTHON_VERSION=3.10.14
 DJANGO_BINARY=/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py
 DJANGO_VERSION=5.0.6 final (0)
@@ -307,7 +306,6 @@ BUILD_TIME = "2024-05-15 03:28:05 1715768885"
 VERSIONS_AVAILABLE = null
 CAN_UPGRADE = false
 PYTHON_BINARY = "/opt/archivebox/.venv/bin/python3.10"
-PYTHON_ENCODING = "UTF-8"
 PYTHON_VERSION = "3.10.14"
 DJANGO_BINARY = "/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py"
 DJANGO_VERSION = "5.0.6 final (0)"
diff --git a/archivebox/plugins_pkg/pip/apps.py b/archivebox/plugins_pkg/pip/apps.py
index 0a96db90..4da87a69 100644
--- a/archivebox/plugins_pkg/pip/apps.py
+++ b/archivebox/plugins_pkg/pip/apps.py
@@ -19,7 +19,6 @@ from plugantic.base_check import BaseCheck
 from plugantic.base_binary import BaseBinary, BaseBinProvider, env, apt, brew
 from plugantic.base_hook import BaseHook
 
-PYTHON_ENCODING = sys.__stdout__.encoding.upper()
 
 ###################### Config ##########################
 
@@ -172,8 +171,21 @@ class CheckPipEnvironment(BaseCheck):
 
     @staticmethod
     def check(settings, logger) -> List[Warning]:
+        # hard errors: check python version
+        if sys.version_info[:3] < (3, 10, 0):
+            print('[red][X] Python version is not new enough: {sys.version} (>3.10 is required)[/red]', file=sys.stderr)
+            print('    See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.', file=sys.stderr)
+            raise SystemExit(2)
+        
+        # hard errors: check django version
+        if int(django.VERSION[0]) < 5:
+            print('[red][X] Django version is not new enough: {django.VERSION[:3]} (>=5.0 is required)[/red]', file=sys.stderr)
+            print('    Upgrade django using pip or your system package manager: pip3 install --upgrade django', file=sys.stderr)
+            raise SystemExit(2)
+        
+        # soft errors: check that lib/pip virtualenv is setup properly
         errors = []
-       
+        
         LIB_PIP_BINPROVIDER.setup()
         if not LIB_PIP_BINPROVIDER.INSTALLER_BIN_ABSPATH:
             errors.append(
@@ -185,28 +197,6 @@ class CheckPipEnvironment(BaseCheck):
             )
         # logger.debug("[√] CheckPipEnvironment: data/lib/pip virtualenv is setup properly")
         return errors
-    
-    # check python version
-    if sys.version_info[:3] < (3, 10, 0):
-        print('[red][X] Python version is not new enough: {sys.version} (>3.10 is required)[/red]', file=sys.stderr)
-        print('    See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.', file=sys.stderr)
-        raise SystemExit(2)
-    
-    # check django version
-    if int(django.VERSION[0]) < 5:
-        print('[red][X] Django version is not new enough: {django.VERSION[:3]} (>=5.0 is required)[/red]', file=sys.stderr)
-        print('    Upgrade django using pip or your system package manager: pip3 install --upgrade django', file=sys.stderr)
-        raise SystemExit(2)
-    
-    # check python locale
-    if PYTHON_ENCODING not in ('UTF-8', 'UTF8'):
-        print(f'[red][X] Your system is running python3 scripts with a bad locale setting: {PYTHON_ENCODING} (it should be UTF-8).[/red]', file=sys.stderr)
-        print('    To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)', file=sys.stderr)
-        print('    Or if you\'re using ubuntu/debian, run "dpkg-reconfigure locales"', file=sys.stderr)
-        print('')
-        print('    Confirm that it\'s fixed by opening a new shell and running:', file=sys.stderr)
-        print('        python3 -c "import sys; print(sys.stdout.encoding)"   # should output UTF-8', file=sys.stderr)
-        raise SystemExit(2)
 
 
 USER_IS_NOT_ROOT_CHECK = CheckUserIsNotRoot()
diff --git a/archivebox/plugins_sys/config/apps.py b/archivebox/plugins_sys/config/apps.py
index 61e6ea8b..d5c68d25 100644
--- a/archivebox/plugins_sys/config/apps.py
+++ b/archivebox/plugins_sys/config/apps.py
@@ -32,6 +32,9 @@ class ShellConfig(BaseConfigSet):
     PUID: int                           = Field(default=os.getuid())
     PGID: int                           = Field(default=os.getgid())
     
+    PYTHON_ENCODING: str                = Field(default=(sys.__stdout__ or sys.stdout or sys.__stderr__ or sys.stderr).encoding.upper().replace('UTF8', 'UTF-8'))
+
+    
     @model_validator(mode='after')
     def validate_not_running_as_root(self):
         attempted_command = ' '.join(sys.argv[:3])
@@ -52,6 +55,16 @@ class ShellConfig(BaseConfigSet):
                 print(f'        docker exec -it --user=archivebox <container id> /bin/bash -c "archivebox {attempted_command}"', file=sys.stderr)
             raise SystemExit(2)
         
+        # check python locale
+        if self.PYTHON_ENCODING != 'UTF-8':
+            print(f'[red][X] Your system is running python3 scripts with a bad locale setting: {self.PYTHON_ENCODING} (it should be UTF-8).[/red]', file=sys.stderr)
+            print('    To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)', file=sys.stderr)
+            print('    Or if you\'re using ubuntu/debian, run "dpkg-reconfigure locales"', file=sys.stderr)
+            print('')
+            print('    Confirm that it\'s fixed by opening a new shell and running:', file=sys.stderr)
+            print('        python3 -c "import sys; print(sys.stdout.encoding)"   # should output UTF-8', file=sys.stderr)
+            raise SystemExit(2)
+        
         return self
 
 SHELL_CONFIG = ShellConfig()
diff --git a/archivebox/system.py b/archivebox/system.py
index ab5d30ea..f37d9d92 100644
--- a/archivebox/system.py
+++ b/archivebox/system.py
@@ -38,6 +38,7 @@ def run(cmd, *args, input=None, capture_output=True, timeout=None, check=False,
     pgid = None
     try:
         if isinstance(cmd, (list, tuple)) and cmd[0].endswith('.py'):
+            PYTHON_BINARY = sys.executable
             cmd = (PYTHON_BINARY, *cmd)
 
         with Popen(cmd, *args, start_new_session=start_new_session, text=text, **kwargs) as process: