diff --git a/archivebox/parse.py b/archivebox/parse.py
index 88f7f3f1..bd311288 100644
--- a/archivebox/parse.py
+++ b/archivebox/parse.py
@@ -27,12 +27,15 @@ from util import (
     str_between,
     get_link_type,
     URL_REGEX,
+    check_url_parsing,
 )
 
 
 def parse_links(path):
     """parse a list of links dictionaries from a bookmark export file"""
     
+    check_url_parsing()
+
     links = []
     with open(path, 'r', encoding='utf-8') as file:
         print('{green}[*] [{}] Parsing new links from output/sources/{}...{reset}'.format(
@@ -192,7 +195,6 @@ def parse_shaarli_rss_export(rss_file):
 
         yield info
 
-
 def parse_netscape_html_export(html_file):
     """Parse netscape-format bookmarks export files (produced by all browsers)"""
 
diff --git a/archivebox/util.py b/archivebox/util.py
index 9c93a9fd..3e679825 100644
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -58,8 +58,19 @@ base_url = lambda url: without_scheme(url)  # uniq base url used to dedupe links
 
 short_ts = lambda ts: ts.split('.')[0]
 
-URL_REGEX = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))[^<\""]+'
-HTML_TITLE_REGEX = '<title>(.[^<>]+)'
+URL_REGEX = re.compile(
+    r'http[s]?://'                    # start matching from allowed schemes
+    r'(?:[a-zA-Z]|[0-9]'              # followed by allowed alphanum characters
+    r'|[$-_@.&+]|[!*\(\),]'           #    or allowed symbols
+    r'|(?:%[0-9a-fA-F][0-9a-fA-F]))'  #    or allowed unicode bytes
+    r'[^\]\[\(\)<>\""\'\s]+',         # stop parsing at these symbols
+    re.IGNORECASE,
+)
+HTML_TITLE_REGEX = re.compile(
+    r'<title>'                         # start matching text after <title> tag
+    r'(.[^<>]+)',                      # get everything up to these symbols
+    re.IGNORECASE,
+)
 
 
 def check_dependencies():
@@ -124,6 +135,30 @@ def check_dependencies():
             raise SystemExit(1)
 
 
+def check_url_parsing():
+    """Check that plain text regex URL parsing works as expected"""
+    test_urls = '''
+    https://example1.com/what/is/happening.html?what=1#how-about-this=1
+    https://example2.com/what/is/happening/?what=1#how-about-this=1
+    HTtpS://example3.com/what/is/happening/?what=1#how-about-this=1f
+    https://example4.com/what/is/happening.html
+    https://example5.com/
+    https://example6.com
+
+    <test>http://example7.com</test>
+    [https://example8.com/what/is/this.php?what=1]
+    [and http://example9.com?what=1&other=3#and-thing=2]
+    <what>https://example10.com#and-thing=2 "</about>
+    abc<this["https://example11.com/what/is#and-thing=2?whoami=23&where=1"]that>def
+    sdflkf[what](https://example12.com/who/what.php?whoami=1#whatami=2)?am=hi
+    example13.bada
+    and example14.badb
+    <or>htt://example15.badc</that>
+    '''
+    # print('\n'.join(re.findall(URL_REGEX, test_urls)))
+    assert len(re.findall(URL_REGEX, test_urls)) == 12
+
+
 def chmod_file(path, cwd='.', permissions=OUTPUT_PERMISSIONS, timeout=30):
     """chmod -R <permissions> <cwd>/<path>"""