mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 22:54:27 -04:00
fix rss parsing when items have newlines between them
This commit is contained in:
parent
c48b1bbb3c
commit
58c9b47d43
1 changed files with 3 additions and 2 deletions
|
@ -154,7 +154,8 @@ def parse_rss_export(rss_file):
|
||||||
"""Parse RSS XML-format files into links"""
|
"""Parse RSS XML-format files into links"""
|
||||||
|
|
||||||
rss_file.seek(0)
|
rss_file.seek(0)
|
||||||
items = rss_file.read().split('</item>\n<item>')
|
items = rss_file.read().split('<item>')
|
||||||
|
items = items[1:] if items else []
|
||||||
for item in items:
|
for item in items:
|
||||||
# example item:
|
# example item:
|
||||||
# <item>
|
# <item>
|
||||||
|
@ -166,7 +167,7 @@ def parse_rss_export(rss_file):
|
||||||
# </item>
|
# </item>
|
||||||
|
|
||||||
trailing_removed = item.split('</item>', 1)[0]
|
trailing_removed = item.split('</item>', 1)[0]
|
||||||
leading_removed = trailing_removed.split('<item>', 1)[-1]
|
leading_removed = trailing_removed.split('<item>', 1)[-1].strip()
|
||||||
rows = leading_removed.split('\n')
|
rows = leading_removed.split('\n')
|
||||||
|
|
||||||
def get_row(key):
|
def get_row(key):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue