From 3512dc7e606e67b126100dc8bb2d56874c9025c5 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 14 Mar 2024 00:58:45 -0700
Subject: [PATCH] Disable searching for existing chrome user profiles by
 default

---
 archivebox/config.py | 53 ++++++++++++++++++++++++++------------------
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/archivebox/config.py b/archivebox/config.py
index 1edd2eeb..fad2db53 100644
--- a/archivebox/config.py
+++ b/archivebox/config.py
@@ -500,7 +500,7 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
     'LOGS_DIR':                 {'default': lambda c: c['OUTPUT_DIR'] / LOGS_DIR_NAME},
     'CONFIG_FILE':              {'default': lambda c: Path(c['CONFIG_FILE']).resolve() if c['CONFIG_FILE'] else c['OUTPUT_DIR'] / CONFIG_FILENAME},
     'COOKIES_FILE':             {'default': lambda c: c['COOKIES_FILE'] and Path(c['COOKIES_FILE']).resolve()},
-    'CHROME_USER_DATA_DIR':     {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (Path(c['CHROME_USER_DATA_DIR']).resolve() if c['CHROME_USER_DATA_DIR'] else None)},   # None means unset, so we autodetect it with find_chrome_Data_dir(), but emptystring '' means user manually set it to '', and we should store it as None
+    'CHROME_USER_DATA_DIR':     {'default': lambda c: Path(c['CHROME_USER_DATA_DIR']).resolve() if c['CHROME_USER_DATA_DIR'] else None},
     'URL_DENYLIST_PTN':         {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)},
     'URL_ALLOWLIST_PTN':        {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)},
     'DIR_OUTPUT_PERMISSIONS':   {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')},  # exec is always needed to list directories
@@ -910,27 +910,36 @@ def find_chrome_binary() -> Optional[str]:
 
 def find_chrome_data_dir() -> Optional[str]:
     """find any installed chrome user data directories in the default locations"""
-    # Precedence: Chromium, Chrome, Beta, Canary, Unstable, Dev
-    # make sure data dir finding precedence order always matches binary finding order
-    default_profile_paths = (
-        '~/.config/chromium',
-        '~/Library/Application Support/Chromium',
-        '~/AppData/Local/Chromium/User Data',
-        '~/.config/chrome',
-        '~/.config/google-chrome',
-        '~/Library/Application Support/Google/Chrome',
-        '~/AppData/Local/Google/Chrome/User Data',
-        '~/.config/google-chrome-stable',
-        '~/.config/google-chrome-beta',
-        '~/Library/Application Support/Google/Chrome Canary',
-        '~/AppData/Local/Google/Chrome SxS/User Data',
-        '~/.config/google-chrome-unstable',
-        '~/.config/google-chrome-dev',
-    )
-    for path in default_profile_paths:
-        full_path = Path(path).resolve()
-        if full_path.exists():
-            return full_path
+    # deprecated because this is DANGEROUS, do not re-implement/uncomment this behavior.
+
+    # Going forward we want to discourage people from using their main chrome profile for archiving.
+    # Session tokens, personal data, and cookies are often returned in server responses,
+    # when they get archived, they are essentially burned as anyone who can view the archive
+    # can use that data to masquerade as the logged-in user that did the archiving.
+    # For this reason users should always create dedicated burner profiles for archiving and not use
+    # their daily driver main accounts.
+
+    # # Precedence: Chromium, Chrome, Beta, Canary, Unstable, Dev
+    # # make sure data dir finding precedence order always matches binary finding order
+    # default_profile_paths = (
+    #     '~/.config/chromium',
+    #     '~/Library/Application Support/Chromium',
+    #     '~/AppData/Local/Chromium/User Data',
+    #     '~/.config/chrome',
+    #     '~/.config/google-chrome',
+    #     '~/Library/Application Support/Google/Chrome',
+    #     '~/AppData/Local/Google/Chrome/User Data',
+    #     '~/.config/google-chrome-stable',
+    #     '~/.config/google-chrome-beta',
+    #     '~/Library/Application Support/Google/Chrome Canary',
+    #     '~/AppData/Local/Google/Chrome SxS/User Data',
+    #     '~/.config/google-chrome-unstable',
+    #     '~/.config/google-chrome-dev',
+    # )
+    # for path in default_profile_paths:
+    #     full_path = Path(path).resolve()
+    #     if full_path.exists():
+    #         return full_path
     return None
 
 def wget_supports_compression(config):