From 38e54b93fe36cfdb5d3f5b529e26a286e6d242f1 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 9 May 2022 19:56:24 -0700
Subject: [PATCH] allow parsing to continue even when fetching URL contents
 fails

---
 archivebox/main.py             | 7 +++++--
 archivebox/parsers/__init__.py | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/archivebox/main.py b/archivebox/main.py
index ed1df69b..d4e7d9c3 100755
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -594,8 +594,11 @@ def add(urls: Union[str, List[str]],
     if new_links and depth == 1:
         log_crawl_started(new_links)
         for new_link in new_links:
-            downloaded_file = save_file_as_source(new_link.url, filename=f'{new_link.timestamp}-crawl-{new_link.domain}.txt', out_dir=out_dir)
-            new_links_depth += parse_links_from_source(downloaded_file, root_url=new_link.url)
+            try:
+                downloaded_file = save_file_as_source(new_link.url, filename=f'{new_link.timestamp}-crawl-{new_link.domain}.txt', out_dir=out_dir)
+                new_links_depth += parse_links_from_source(downloaded_file, root_url=new_link.url)
+            except Exception as err:
+                stderr('[!] Failed to get contents of URL {new_link.url}', err, color='red')
 
     imported_links = list({link.url: link for link in (new_links + new_links_depth)}.values())
     
diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py
index 2451f0f5..0ae958f2 100644
--- a/archivebox/parsers/__init__.py
+++ b/archivebox/parsers/__init__.py
@@ -176,7 +176,7 @@ def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{ba
                 ANSI['reset'],
             ))
             print('    ', e)
-            raise SystemExit(1)
+            raise e
 
     else:
         # Source is a path to a local file on the filesystem