fix urldecoding of titles

2025-05-13 22:54:27 -04:00 · 2018-04-17 10:30:25 -04:00 · 2018-04-17 10:30:25 -04:00 · dbe4660da3
commit dbe4660da3
parent 650380efce
2 changed files with 16 additions and 3 deletions
--- a/links.py
+++ b/links.py
@ -33,7 +33,7 @@ Link {
 """

 import datetime
-from urllib.parse import unquote
+from html import unescape

 from util import (
    domain,
@ -41,6 +41,7 @@ from util import (
    str_between,
    get_link_type,
    merge_links,
+    wget_output_path,
 )
 from config import ANSI

@ -54,6 +55,19 @@ def validate_links(links):
        print('[X] No links found :(')
        raise SystemExit(1)

+    for link in links:
+        link['title'] = unescape(link['title'])
+        link['latest'] = link.get('latest') or {}
+        
+        if not link['latest'].get('wget'):
+            link['latest']['wget'] = wget_output_path(link)
+
+        if not link['latest'].get('pdf'):
+            link['latest']['pdf'] = wget_output_path(link)
+
+        if not link['latest'].get('screenshot'):
+            link['latest']['screenshot'] = wget_output_path(link)
+
    return list(links)


@ -86,7 +100,6 @@ def uniquefied_links(sorted_links):
    unique_timestamps = {}
    for link in unique_urls.values():
        link['timestamp'] = lowest_uniq_timestamp(unique_timestamps, link['timestamp'])
-        link['title'] = unquote(link['title'])
        unique_timestamps[link['timestamp']] = link

    return unique_timestamps.values()