From 3658153cf8347a831a74f3a80e4af2fb8ec39c5b Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 18 Aug 2020 08:04:57 -0400 Subject: [PATCH] fix url parsing through quotes --- archivebox/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/archivebox/util.py b/archivebox/util.py index 7f33ca10..86c1e4aa 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -59,7 +59,7 @@ URL_REGEX = re.compile( r'(?:[a-zA-Z]|[0-9]' # followed by allowed alphanum characters r'|[$-_@.&+]|[!*\(\),]' # or allowed symbols r'|(?:%[0-9a-fA-F][0-9a-fA-F]))' # or allowed unicode bytes - r'[^\]\[\(\)<>\""\'\s]+', # stop parsing at these symbols + r'[^\]\[\(\)<>"\'\s]+', # stop parsing at these symbols re.IGNORECASE, )