Merge branch 'master' into dev

This commit is contained in:
Nick Sweeting 2019-03-30 15:36:54 -04:00
commit 5d0185b6dd
6 changed files with 41 additions and 8 deletions

View file

@ -8,6 +8,9 @@ from .util import (
merge_links,
)
from config import (
URL_BLACKLIST,
)
def validate_links(links: Iterable[Link]) -> Iterable[Link]:
links = archivable_links(links) # remove chrome://, about:, mailto: etc.
@ -22,11 +25,11 @@ def validate_links(links: Iterable[Link]) -> Iterable[Link]:
def archivable_links(links: Iterable[Link]) -> Iterable[Link]:
"""remove chrome://, about:// or other schemed links that cant be archived"""
return (
link
for link in links
if scheme(link.url) in ('http', 'https', 'ftp')
)
for link in links:
scheme_is_valid = scheme(link.url) in ('http', 'https', 'ftp')
not_blacklisted = (not URL_BLACKLIST.match(link.url)) if URL_BLACKLIST else True
if scheme_is_valid and not_blacklisted:
yield link
def uniquefied_links(sorted_links: Iterable[Link]) -> Iterable[Link]:
@ -87,3 +90,5 @@ def lowest_uniq_timestamp(used_timestamps: OrderedDict, timestamp: str) -> str:
new_timestamp = '{}.{}'.format(timestamp, nonce)
return new_timestamp