From 0043b59bc8c7d3d26970eb34a368f660ca8e85aa Mon Sep 17 00:00:00 2001 From: Phil Crockett Date: Sun, 16 Feb 2025 08:22:17 +0100 Subject: [PATCH 1/8] fix(export_browser_history): tilde doesn't expand in quotes --- bin/export_browser_history.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/export_browser_history.sh b/bin/export_browser_history.sh index 24eaf185..1312cbe0 100755 --- a/bin/export_browser_history.sh +++ b/bin/export_browser_history.sh @@ -74,7 +74,7 @@ if [[ "$1" == "--safari" ]]; then if [[ -e "$2" ]]; then cp "$2" "$OUTPUT_DIR/safari_history.db.tmp" else - default="~/Library/Safari/History.db" + default=~"/Library/Safari/History.db" echo "Defaulting to history db: $default" echo "Optionally specify the path to a different sqlite history database as the 2nd argument." cp "$default" "$OUTPUT_DIR/safari_history.db.tmp" From 2ff3fc434e818e42188638bc091ad85585f93643 Mon Sep 17 00:00:00 2001 From: Phil Crockett Date: Sun, 16 Feb 2025 08:31:21 +0100 Subject: [PATCH 2/8] feat(export_browser_history): basic arg parsing error message --- bin/export_browser_history.sh | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/bin/export_browser_history.sh b/bin/export_browser_history.sh index 1312cbe0..0f14fe7c 100755 --- a/bin/export_browser_history.sh +++ b/bin/export_browser_history.sh @@ -11,10 +11,10 @@ # firefox_bookmarks.json # safari_history.json +BROWSER_TO_EXPORT="${1?Please specify --chrome, --firefox, or --safari}" OUTPUT_DIR="$(pwd)" -if [[ "$1" == "--chrome" ]]; then - # Google Chrome / Chromium +export_chrome() { if [[ -e "$2" ]]; then cp "$2" "$OUTPUT_DIR/chrome_history.db.tmp" else @@ -26,14 +26,13 @@ if [[ "$1" == "--chrome" ]]; then sqlite3 "$OUTPUT_DIR/chrome_history.db.tmp" "SELECT \"[\" || group_concat(json_object('timestamp', last_visit_time, 'description', title, 'href', url)) || \"]\" FROM urls;" > "$OUTPUT_DIR/chrome_history.json" jq < "$(dirname "${2:-$default}")"/Bookmarks '.roots.other.children[] | {href: .url, description: .name, timestamp: .date_added}' > "$OUTPUT_DIR/chrome_bookmarks.json" - + rm "$OUTPUT_DIR"/chrome_history.db.* echo "Chrome history exported to:" echo " $OUTPUT_DIR/chrome_history.json" -fi +} -if [[ "$1" == "--firefox" ]]; then - # Firefox +export_firefox() { if [[ -e "$2" ]]; then cp "$2" "$OUTPUT_DIR/firefox_history.db.tmp" else @@ -42,7 +41,7 @@ if [[ "$1" == "--firefox" ]]; then echo "Optionally specify the path to a different sqlite history database as the 2nd argument." cp "$default" "$OUTPUT_DIR/firefox_history.db.tmp" fi - + sqlite3 "$OUTPUT_DIR/firefox_history.db.tmp" "SELECT \"[\" || group_concat(json_object('timestamp', last_visit_date, 'description', title, 'href', url)) || \"]\" FROM moz_places;" > "$OUTPUT_DIR/firefox_history.json" sqlite3 "$OUTPUT_DIR/firefox_history.db.tmp" " @@ -62,15 +61,14 @@ if [[ "$1" == "--firefox" ]]; then JOIN moz_places AS f ON f.id = b.fk JOIN tags ON tags.id = b.parent WHERE f.url LIKE '%://%';" > "$OUTPUT_DIR/firefox_bookmarks.json" - + rm "$OUTPUT_DIR"/firefox_history.db.* echo "Firefox history exported to:" echo " $OUTPUT_DIR/firefox_history.json" echo " $OUTPUT_DIR/firefox_bookmarks.json" -fi +} -if [[ "$1" == "--safari" ]]; then - # Safari +export_safari() { if [[ -e "$2" ]]; then cp "$2" "$OUTPUT_DIR/safari_history.db.tmp" else @@ -79,10 +77,21 @@ if [[ "$1" == "--safari" ]]; then echo "Optionally specify the path to a different sqlite history database as the 2nd argument." cp "$default" "$OUTPUT_DIR/safari_history.db.tmp" fi - + sqlite3 "$OUTPUT_DIR/safari_history.db.tmp" "select url from history_items" > "$OUTPUT_DIR/safari_history.json" - + rm "$OUTPUT_DIR"/safari_history.db.* echo "Safari history exported to:" echo " $OUTPUT_DIR/safari_history.json" +} + +if [[ "$BROWSER_TO_EXPORT" == "--chrome" ]]; then + export_chrome "$@" +elif [[ "$BROWSER_TO_EXPORT" == "--firefox" ]]; then + export_firefox "@" +elif [[ "$BROWSER_TO_EXPORT" == "--safari" ]]; then + export_safari "$@" +else + echo "Unrecognized argument: $1" >&2 + exit 1 fi From 2e1ac0409dd814e7dd646286a5e4cdee4f9b2cad Mon Sep 17 00:00:00 2001 From: Phil Crockett Date: Sun, 16 Feb 2025 08:34:41 +0100 Subject: [PATCH 3/8] feat(export_browser_history): fail script when errors occur --- bin/export_browser_history.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bin/export_browser_history.sh b/bin/export_browser_history.sh index 0f14fe7c..4a0124d8 100755 --- a/bin/export_browser_history.sh +++ b/bin/export_browser_history.sh @@ -11,6 +11,8 @@ # firefox_bookmarks.json # safari_history.json +set -eo pipefail + BROWSER_TO_EXPORT="${1?Please specify --chrome, --firefox, or --safari}" OUTPUT_DIR="$(pwd)" From feded9e3d4db9bb91aef3721502685c7214e6b23 Mon Sep 17 00:00:00 2001 From: Phil Crockett Date: Sun, 16 Feb 2025 10:24:13 +0100 Subject: [PATCH 4/8] fix(export_browser_history): fix sqlite quote syntax error --- bin/export_browser_history.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/export_browser_history.sh b/bin/export_browser_history.sh index 4a0124d8..4560b328 100755 --- a/bin/export_browser_history.sh +++ b/bin/export_browser_history.sh @@ -44,7 +44,7 @@ export_firefox() { cp "$default" "$OUTPUT_DIR/firefox_history.db.tmp" fi - sqlite3 "$OUTPUT_DIR/firefox_history.db.tmp" "SELECT \"[\" || group_concat(json_object('timestamp', last_visit_date, 'description', title, 'href', url)) || \"]\" FROM moz_places;" > "$OUTPUT_DIR/firefox_history.json" + sqlite3 "$OUTPUT_DIR/firefox_history.db.tmp" "SELECT '[' || group_concat(json_object('timestamp', last_visit_date, 'description', title, 'href', url)) || ']' FROM moz_places;" > "$OUTPUT_DIR/firefox_history.json" sqlite3 "$OUTPUT_DIR/firefox_history.db.tmp" " with recursive tags AS ( From 58bf8d07e1eb439691cb4ba6ad2c10847c92bddb Mon Sep 17 00:00:00 2001 From: Phil Crockett Date: Sun, 16 Feb 2025 10:24:37 +0100 Subject: [PATCH 5/8] feat(export_browser_history): add linux support for firefox --- bin/export_browser_history.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/bin/export_browser_history.sh b/bin/export_browser_history.sh index 4560b328..b3cd1d01 100755 --- a/bin/export_browser_history.sh +++ b/bin/export_browser_history.sh @@ -34,11 +34,20 @@ export_chrome() { echo " $OUTPUT_DIR/chrome_history.json" } +get_places_sqlite() { + # shellcheck disable=SC2012 # `ls` is good enough, don't need `find` + if [[ "$(uname -s)" == "Linux" ]]; then + ls ~/.mozilla/firefox/*.default*/places.sqlite | head -n 1 + else + ls ~/Library/Application\ Support/Firefox/Profiles/*.default*/places.sqlite | head -n 1 + fi +} + export_firefox() { if [[ -e "$2" ]]; then cp "$2" "$OUTPUT_DIR/firefox_history.db.tmp" else - default=$(ls ~/Library/Application\ Support/Firefox/Profiles/*.default/places.sqlite) + default="$(get_places_sqlite)" echo "Defaulting to history db: $default" echo "Optionally specify the path to a different sqlite history database as the 2nd argument." cp "$default" "$OUTPUT_DIR/firefox_history.db.tmp" From 9fbc2d3818f2757e2bac443cf7329fa65dadee98 Mon Sep 17 00:00:00 2001 From: Phil Crockett Date: Tue, 18 Feb 2025 21:08:56 +0100 Subject: [PATCH 6/8] fix chrome browser history export on Linux --- bin/export_browser_history.sh | 85 ++++++++++++++++++++++++++--------- 1 file changed, 64 insertions(+), 21 deletions(-) diff --git a/bin/export_browser_history.sh b/bin/export_browser_history.sh index b3cd1d01..d14c84ef 100755 --- a/bin/export_browser_history.sh +++ b/bin/export_browser_history.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# # Helper script to export browser history and bookmarks to a format ArchiveBox can ingest. # Usage: # curl -O 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/dev/bin/export_browser_history.sh' @@ -7,53 +8,95 @@ # bash export_browser_history.sh --safari # ls # chrome_history.json +# chrome_bookmarks.json # firefox_history.json # firefox_bookmarks.json # safari_history.json +# +# Assumptions: +# +# * you're running this on macOS or Linux +# * you're running a reasonably modern version of Bash +# * macOS users: `brew install bash` +# +# Dependencies: +# +# * sqlite +# * jq (for chrome bookmarks) +# set -eo pipefail BROWSER_TO_EXPORT="${1?Please specify --chrome, --firefox, or --safari}" OUTPUT_DIR="$(pwd)" -export_chrome() { - if [[ -e "$2" ]]; then - cp "$2" "$OUTPUT_DIR/chrome_history.db.tmp" - else - default=$(ls ~/Library/Application\ Support/Google/Chrome/Default/History) - echo "Defaulting to history db: $default" - echo "Optionally specify the path to a different sqlite history database as the 2nd argument." - cp "$default" "$OUTPUT_DIR/chrome_history.db.tmp" - fi - - sqlite3 "$OUTPUT_DIR/chrome_history.db.tmp" "SELECT \"[\" || group_concat(json_object('timestamp', last_visit_time, 'description', title, 'href', url)) || \"]\" FROM urls;" > "$OUTPUT_DIR/chrome_history.json" - jq < "$(dirname "${2:-$default}")"/Bookmarks '.roots.other.children[] | {href: .url, description: .name, timestamp: .date_added}' > "$OUTPUT_DIR/chrome_bookmarks.json" - - rm "$OUTPUT_DIR"/chrome_history.db.* - echo "Chrome history exported to:" - echo " $OUTPUT_DIR/chrome_history.json" +is_linux() { + [[ "$(uname -s)" == "Linux" ]] } -get_places_sqlite() { - # shellcheck disable=SC2012 # `ls` is good enough, don't need `find` - if [[ "$(uname -s)" == "Linux" ]]; then +find_firefox_places_db() { + # shellcheck disable=SC2012 # `ls` with path expansion is good enough, don't need `find` + if is_linux; then ls ~/.mozilla/firefox/*.default*/places.sqlite | head -n 1 else ls ~/Library/Application\ Support/Firefox/Profiles/*.default*/places.sqlite | head -n 1 fi } +get_chrome_history_db() { + if is_linux; then + echo ~/.config/chromium/Default/History + else + echo ~/Library/Application\ Support/Google/Chrome/Default/History + fi +} + +export_chrome() { + if [[ -e "$2" ]]; then + cp "$2" "$OUTPUT_DIR/chrome_history.db.tmp" + else + default="$(get_chrome_history_db)" + echo "Defaulting to history db: $default" + echo "Optionally specify the path to a different sqlite history database as the 2nd argument." + cp "$default" "$OUTPUT_DIR/chrome_history.db.tmp" + fi + + sqlite3 "$OUTPUT_DIR/chrome_history.db.tmp" " + SELECT '[' || group_concat( + json_object('timestamp', last_visit_time, 'description', title, 'href', url) + ) || ']' + FROM urls;" > "$OUTPUT_DIR/chrome_history.json" + + jq '.roots.other.children[] | {href: .url, description: .name, timestamp: .date_added}' \ + < "$(dirname "${2:-$default}")"/Bookmarks \ + > "$OUTPUT_DIR/chrome_bookmarks.json" + + rm "$OUTPUT_DIR"/chrome_history.db.* + echo "Chrome history exported to:" + echo " $OUTPUT_DIR/chrome_history.json" + echo " $OUTPUT_DIR/chrome_bookmarks.json" +} + export_firefox() { if [[ -e "$2" ]]; then cp "$2" "$OUTPUT_DIR/firefox_history.db.tmp" else - default="$(get_places_sqlite)" + default="$(find_firefox_places_db)" echo "Defaulting to history db: $default" echo "Optionally specify the path to a different sqlite history database as the 2nd argument." cp "$default" "$OUTPUT_DIR/firefox_history.db.tmp" fi - sqlite3 "$OUTPUT_DIR/firefox_history.db.tmp" "SELECT '[' || group_concat(json_object('timestamp', last_visit_date, 'description', title, 'href', url)) || ']' FROM moz_places;" > "$OUTPUT_DIR/firefox_history.json" + sqlite3 "$OUTPUT_DIR/firefox_history.db.tmp" " + SELECT + '[' || group_concat( + json_object( + 'timestamp', last_visit_date, + 'description', title, + 'href', url + ) + ) || ']' + FROM moz_places;" > "$OUTPUT_DIR/firefox_history.json" sqlite3 "$OUTPUT_DIR/firefox_history.db.tmp" " with recursive tags AS ( From 639aa7242b2d30edacab852316692935ffdbcbc0 Mon Sep 17 00:00:00 2001 From: Philip Crockett Date: Tue, 18 Feb 2025 21:22:52 +0100 Subject: [PATCH 7/8] fix typo --- bin/export_browser_history.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/export_browser_history.sh b/bin/export_browser_history.sh index d14c84ef..e650d928 100755 --- a/bin/export_browser_history.sh +++ b/bin/export_browser_history.sh @@ -142,7 +142,7 @@ export_safari() { if [[ "$BROWSER_TO_EXPORT" == "--chrome" ]]; then export_chrome "$@" elif [[ "$BROWSER_TO_EXPORT" == "--firefox" ]]; then - export_firefox "@" + export_firefox "$@" elif [[ "$BROWSER_TO_EXPORT" == "--safari" ]]; then export_safari "$@" else From ba6a8c2da55df8da408ba7b26e8d9203d99eee92 Mon Sep 17 00:00:00 2001 From: Philip Crockett Date: Tue, 18 Feb 2025 21:38:52 +0100 Subject: [PATCH 8/8] support XDG standard, search for chrome and chromium DBs --- bin/export_browser_history.sh | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/bin/export_browser_history.sh b/bin/export_browser_history.sh index e650d928..6aa8f4d5 100755 --- a/bin/export_browser_history.sh +++ b/bin/export_browser_history.sh @@ -43,9 +43,21 @@ find_firefox_places_db() { fi } -get_chrome_history_db() { +find_chrome_history_db() { if is_linux; then - echo ~/.config/chromium/Default/History + local config_home="${XDG_CONFIG_HOME:-${HOME}/.config}" + for path in \ + "${config_home}/chromium/Default/History" \ + "${config_home}/google-chrome/Default/History"; + do + if [ -f "${path}" ]; then + echo "${path}" + return + fi + done + + echo "Unable to find Chrome history database. You can supply it manually as a second parameter." >&2 + exit 1 else echo ~/Library/Application\ Support/Google/Chrome/Default/History fi @@ -55,7 +67,7 @@ export_chrome() { if [[ -e "$2" ]]; then cp "$2" "$OUTPUT_DIR/chrome_history.db.tmp" else - default="$(get_chrome_history_db)" + default="$(find_chrome_history_db)" echo "Defaulting to history db: $default" echo "Optionally specify the path to a different sqlite history database as the 2nd argument." cp "$default" "$OUTPUT_DIR/chrome_history.db.tmp"