From 9fbc2d3818f2757e2bac443cf7329fa65dadee98 Mon Sep 17 00:00:00 2001
From: Phil Crockett <contact@philcrockett.com>
Date: Tue, 18 Feb 2025 21:08:56 +0100
Subject: [PATCH] fix chrome browser history export on Linux

---
 bin/export_browser_history.sh | 85 ++++++++++++++++++++++++++---------
 1 file changed, 64 insertions(+), 21 deletions(-)

diff --git a/bin/export_browser_history.sh b/bin/export_browser_history.sh
index b3cd1d01..d14c84ef 100755
--- a/bin/export_browser_history.sh
+++ b/bin/export_browser_history.sh
@@ -1,4 +1,5 @@
 #!/usr/bin/env bash
+#
 # Helper script to export browser history and bookmarks to a format ArchiveBox can ingest.
 # Usage:
 #    curl -O 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/dev/bin/export_browser_history.sh'
@@ -7,53 +8,95 @@
 #    bash export_browser_history.sh --safari
 #    ls
 #        chrome_history.json
+#        chrome_bookmarks.json
 #        firefox_history.json
 #        firefox_bookmarks.json
 #        safari_history.json
+#
+# Assumptions:
+#
+# * you're running this on macOS or Linux
+# * you're running a reasonably modern version of Bash
+#   * macOS users: `brew install bash`
+#
+# Dependencies:
+#
+# * sqlite
+# * jq (for chrome bookmarks)
+#
 
 set -eo pipefail
 
 BROWSER_TO_EXPORT="${1?Please specify --chrome, --firefox, or --safari}"
 OUTPUT_DIR="$(pwd)"
 
-export_chrome() {
-    if [[ -e "$2" ]]; then
-        cp "$2" "$OUTPUT_DIR/chrome_history.db.tmp"
-    else
-        default=$(ls ~/Library/Application\ Support/Google/Chrome/Default/History)
-        echo "Defaulting to history db: $default"
-        echo "Optionally specify the path to a different sqlite history database as the 2nd argument."
-        cp "$default" "$OUTPUT_DIR/chrome_history.db.tmp"
-    fi
-
-    sqlite3 "$OUTPUT_DIR/chrome_history.db.tmp" "SELECT \"[\" || group_concat(json_object('timestamp', last_visit_time, 'description', title, 'href', url)) || \"]\" FROM urls;" > "$OUTPUT_DIR/chrome_history.json"
-    jq < "$(dirname "${2:-$default}")"/Bookmarks '.roots.other.children[] | {href: .url, description: .name, timestamp: .date_added}' > "$OUTPUT_DIR/chrome_bookmarks.json"
-
-    rm "$OUTPUT_DIR"/chrome_history.db.*
-    echo "Chrome history exported to:"
-    echo "    $OUTPUT_DIR/chrome_history.json"
+is_linux() {
+    [[ "$(uname -s)" == "Linux" ]]
 }
 
-get_places_sqlite() {
-    # shellcheck disable=SC2012  # `ls` is good enough, don't need `find`
-    if [[ "$(uname -s)" == "Linux" ]]; then
+find_firefox_places_db() {
+    # shellcheck disable=SC2012  # `ls` with path expansion is good enough, don't need `find`
+    if is_linux; then
         ls ~/.mozilla/firefox/*.default*/places.sqlite | head -n 1
     else
         ls ~/Library/Application\ Support/Firefox/Profiles/*.default*/places.sqlite | head -n 1
     fi
 }
 
+get_chrome_history_db() {
+    if is_linux; then
+        echo ~/.config/chromium/Default/History
+    else
+        echo ~/Library/Application\ Support/Google/Chrome/Default/History
+    fi
+}
+
+export_chrome() {
+    if [[ -e "$2" ]]; then
+        cp "$2" "$OUTPUT_DIR/chrome_history.db.tmp"
+    else
+        default="$(get_chrome_history_db)"
+        echo "Defaulting to history db: $default"
+        echo "Optionally specify the path to a different sqlite history database as the 2nd argument."
+        cp "$default" "$OUTPUT_DIR/chrome_history.db.tmp"
+    fi
+
+    sqlite3 "$OUTPUT_DIR/chrome_history.db.tmp" "
+    SELECT '[' || group_concat(
+        json_object('timestamp', last_visit_time, 'description', title, 'href', url)
+    ) || ']'
+    FROM urls;" > "$OUTPUT_DIR/chrome_history.json"
+
+    jq '.roots.other.children[] | {href: .url, description: .name, timestamp: .date_added}' \
+       < "$(dirname "${2:-$default}")"/Bookmarks \
+       > "$OUTPUT_DIR/chrome_bookmarks.json"
+
+    rm "$OUTPUT_DIR"/chrome_history.db.*
+    echo "Chrome history exported to:"
+    echo "    $OUTPUT_DIR/chrome_history.json"
+    echo "    $OUTPUT_DIR/chrome_bookmarks.json"
+}
+
 export_firefox() {
     if [[ -e "$2" ]]; then
         cp "$2" "$OUTPUT_DIR/firefox_history.db.tmp"
     else
-        default="$(get_places_sqlite)"
+        default="$(find_firefox_places_db)"
         echo "Defaulting to history db: $default"
         echo "Optionally specify the path to a different sqlite history database as the 2nd argument."
         cp "$default" "$OUTPUT_DIR/firefox_history.db.tmp"
     fi
 
-    sqlite3 "$OUTPUT_DIR/firefox_history.db.tmp" "SELECT '[' || group_concat(json_object('timestamp', last_visit_date, 'description', title, 'href', url)) || ']' FROM moz_places;" > "$OUTPUT_DIR/firefox_history.json"
+    sqlite3 "$OUTPUT_DIR/firefox_history.db.tmp" "
+    SELECT
+        '[' || group_concat(
+            json_object(
+                'timestamp', last_visit_date,
+                'description', title,
+                'href', url
+            )
+        ) || ']'
+    FROM moz_places;" > "$OUTPUT_DIR/firefox_history.json"
 
     sqlite3 "$OUTPUT_DIR/firefox_history.db.tmp" "
     with recursive tags AS (