mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 06:34:25 -04:00
fix pinboard RSS parser
This commit is contained in:
parent
61ec4971e9
commit
f9a7c530b9
2 changed files with 5 additions and 5 deletions
|
@ -228,9 +228,9 @@ def parse_pinboard_rss_export(rss_file):
|
||||||
items = root.findall("{http://purl.org/rss/1.0/}item")
|
items = root.findall("{http://purl.org/rss/1.0/}item")
|
||||||
for item in items:
|
for item in items:
|
||||||
url = item.find("{http://purl.org/rss/1.0/}link").text
|
url = item.find("{http://purl.org/rss/1.0/}link").text
|
||||||
tags = item.find("{http://purl.org/dc/elements/1.1/}subject").text
|
tags = item.find("{http://purl.org/dc/elements/1.1/}subject").text if item.find("{http://purl.org/dc/elements/1.1/}subject") else None
|
||||||
title = item.find("{http://purl.org/rss/1.0/}title").text.strip()
|
title = item.find("{http://purl.org/rss/1.0/}title").text.strip() if item.find("{http://purl.org/rss/1.0/}title").text.strip() else None
|
||||||
ts_str = item.find("{http://purl.org/dc/elements/1.1/}date").text
|
ts_str = item.find("{http://purl.org/dc/elements/1.1/}date").text if item.find("{http://purl.org/dc/elements/1.1/}date").text else None
|
||||||
# = 🌈🌈🌈🌈
|
# = 🌈🌈🌈🌈
|
||||||
# = 🌈🌈🌈🌈
|
# = 🌈🌈🌈🌈
|
||||||
# = 🏆🏆🏆🏆
|
# = 🏆🏆🏆🏆
|
||||||
|
@ -243,7 +243,7 @@ def parse_pinboard_rss_export(rss_file):
|
||||||
info = {
|
info = {
|
||||||
'url': url,
|
'url': url,
|
||||||
'timestamp': str(time.timestamp()),
|
'timestamp': str(time.timestamp()),
|
||||||
'tags': tags,
|
'tags': tags or '',
|
||||||
'title': title or None,
|
'title': title or None,
|
||||||
'sources': [rss_file.name],
|
'sources': [rss_file.name],
|
||||||
}
|
}
|
||||||
|
|
|
@ -413,7 +413,7 @@ def derived_link_info(link):
|
||||||
'is_archived': os.path.exists(os.path.join(
|
'is_archived': os.path.exists(os.path.join(
|
||||||
ARCHIVE_DIR,
|
ARCHIVE_DIR,
|
||||||
link['timestamp'],
|
link['timestamp'],
|
||||||
wget_output_path(link) or domain(url)
|
domain(url),
|
||||||
)),
|
)),
|
||||||
'num_outputs': len([entry for entry in link['latest'].values() if entry]) if 'latest' in link else 0,
|
'num_outputs': len([entry for entry in link['latest'].values() if entry]) if 'latest' in link else 0,
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue