mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-14 15:14:31 -04:00
fix logic for ONLY_NEW accidentally replacing all links
This commit is contained in:
parent
1b36d5b29c
commit
3571ef24e4
2 changed files with 23 additions and 32 deletions
|
@ -81,15 +81,6 @@ def validate_links(links):
|
|||
|
||||
return list(links)
|
||||
|
||||
def new_links(all_links, existing_links):
|
||||
"""
|
||||
Return all links which are in the all_links but not in the existing_links.
|
||||
This is used to determine which links are new and not indexed jet. Set the
|
||||
ONLY_NEW environment variable to activate this filter mechanism.
|
||||
"""
|
||||
existing_urls = {link['url'] for link in existing_links}
|
||||
return [link for link in all_links if link['url'] not in existing_urls]
|
||||
|
||||
def archivable_links(links):
|
||||
"""remove chrome://, about:// or other schemed links that cant be archived"""
|
||||
return (
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue