mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 22:54:27 -04:00
fix url parsing through quotes
This commit is contained in:
parent
a218ceb4e8
commit
3658153cf8
1 changed files with 1 additions and 1 deletions
|
@ -59,7 +59,7 @@ URL_REGEX = re.compile(
|
||||||
r'(?:[a-zA-Z]|[0-9]' # followed by allowed alphanum characters
|
r'(?:[a-zA-Z]|[0-9]' # followed by allowed alphanum characters
|
||||||
r'|[$-_@.&+]|[!*\(\),]' # or allowed symbols
|
r'|[$-_@.&+]|[!*\(\),]' # or allowed symbols
|
||||||
r'|(?:%[0-9a-fA-F][0-9a-fA-F]))' # or allowed unicode bytes
|
r'|(?:%[0-9a-fA-F][0-9a-fA-F]))' # or allowed unicode bytes
|
||||||
r'[^\]\[\(\)<>\""\'\s]+', # stop parsing at these symbols
|
r'[^\]\[\(\)<>"\'\s]+', # stop parsing at these symbols
|
||||||
re.IGNORECASE,
|
re.IGNORECASE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue