mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 14:44:29 -04:00
0 mypy errors
This commit is contained in:
parent
f4e018ba0c
commit
6a8f6f52af
5 changed files with 38 additions and 37 deletions
|
@ -3,7 +3,7 @@ import json
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from string import Template
|
from string import Template
|
||||||
from typing import List, Tuple, Iterator, Optional
|
from typing import List, Tuple, Iterator, Optional, Mapping
|
||||||
|
|
||||||
from .schema import Link, ArchiveResult
|
from .schema import Link, ArchiveResult
|
||||||
from .config import (
|
from .config import (
|
||||||
|
@ -132,8 +132,6 @@ def parse_json_links_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
|
||||||
def write_html_links_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: bool=False) -> None:
|
def write_html_links_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: bool=False) -> None:
|
||||||
"""write the html link index to a given path"""
|
"""write the html link index to a given path"""
|
||||||
|
|
||||||
path = os.path.join(out_dir, 'index.html')
|
|
||||||
|
|
||||||
copy_and_overwrite(
|
copy_and_overwrite(
|
||||||
os.path.join(TEMPLATES_DIR, 'static'),
|
os.path.join(TEMPLATES_DIR, 'static'),
|
||||||
os.path.join(out_dir, 'static'),
|
os.path.join(out_dir, 'static'),
|
||||||
|
@ -147,8 +145,9 @@ def write_html_links_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished:
|
||||||
with open(os.path.join(TEMPLATES_DIR, 'index_row.html'), 'r', encoding='utf-8') as f:
|
with open(os.path.join(TEMPLATES_DIR, 'index_row.html'), 'r', encoding='utf-8') as f:
|
||||||
link_row_html = f.read()
|
link_row_html = f.read()
|
||||||
|
|
||||||
link_rows = '\n'.join(
|
link_rows = []
|
||||||
Template(link_row_html).substitute(**{
|
for link in links:
|
||||||
|
template_row_vars: Mapping[str, str] = {
|
||||||
**derived_link_info(link),
|
**derived_link_info(link),
|
||||||
'title': (
|
'title': (
|
||||||
link.title
|
link.title
|
||||||
|
@ -162,22 +161,22 @@ def write_html_links_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished:
|
||||||
'archive_url': urlencode(
|
'archive_url': urlencode(
|
||||||
wget_output_path(link) or 'index.html'
|
wget_output_path(link) or 'index.html'
|
||||||
),
|
),
|
||||||
})
|
}
|
||||||
for link in links
|
link_rows.append(Template(link_row_html).substitute(**template_row_vars))
|
||||||
)
|
|
||||||
|
|
||||||
template_vars = {
|
template_vars: Mapping[str, str] = {
|
||||||
'num_links': len(links),
|
'num_links': str(len(links)),
|
||||||
'date_updated': datetime.now().strftime('%Y-%m-%d'),
|
'date_updated': datetime.now().strftime('%Y-%m-%d'),
|
||||||
'time_updated': datetime.now().strftime('%Y-%m-%d %H:%M'),
|
'time_updated': datetime.now().strftime('%Y-%m-%d %H:%M'),
|
||||||
'footer_info': FOOTER_INFO,
|
'footer_info': FOOTER_INFO,
|
||||||
'version': VERSION,
|
'version': VERSION,
|
||||||
'git_sha': GIT_SHA,
|
'git_sha': GIT_SHA,
|
||||||
'rows': link_rows,
|
'rows': '\n'.join(link_rows),
|
||||||
'status': 'finished' if finished else 'running',
|
'status': 'finished' if finished else 'running',
|
||||||
}
|
}
|
||||||
|
template_html = Template(index_html).substitute(**template_vars)
|
||||||
|
|
||||||
atomic_write(Template(index_html).substitute(**template_vars), path)
|
atomic_write(template_html, os.path.join(out_dir, 'index.html'))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -111,6 +111,7 @@ def log_archiving_paused(num_links: int, idx: int, timestamp: str):
|
||||||
def log_archiving_finished(num_links: int):
|
def log_archiving_finished(num_links: int):
|
||||||
end_ts = datetime.now()
|
end_ts = datetime.now()
|
||||||
_LAST_RUN_STATS.archiving_end_ts = end_ts
|
_LAST_RUN_STATS.archiving_end_ts = end_ts
|
||||||
|
assert _LAST_RUN_STATS.archiving_start_ts is not None
|
||||||
seconds = end_ts.timestamp() - _LAST_RUN_STATS.archiving_start_ts.timestamp()
|
seconds = end_ts.timestamp() - _LAST_RUN_STATS.archiving_start_ts.timestamp()
|
||||||
if seconds > 60:
|
if seconds > 60:
|
||||||
duration = '{0:.2f} min'.format(seconds / 60, 2)
|
duration = '{0:.2f} min'.format(seconds / 60, 2)
|
||||||
|
@ -194,7 +195,7 @@ def log_archive_method_finished(result: ArchiveResult):
|
||||||
),
|
),
|
||||||
*hints,
|
*hints,
|
||||||
'{}Run to see full output:{}'.format(ANSI['lightred'], ANSI['reset']),
|
'{}Run to see full output:{}'.format(ANSI['lightred'], ANSI['reset']),
|
||||||
*((' cd {};'.format(result.pwd),) if result.pwd else ()),
|
*([' cd {};'.format(result.pwd)] if result.pwd else []),
|
||||||
' {}'.format(quoted_cmd),
|
' {}'.format(quoted_cmd),
|
||||||
]
|
]
|
||||||
print('\n'.join(
|
print('\n'.join(
|
||||||
|
|
|
@ -266,10 +266,12 @@ def parse_pinboard_rss_export(rss_file: IO[str]) -> Iterable[Link]:
|
||||||
root = etree.parse(rss_file).getroot()
|
root = etree.parse(rss_file).getroot()
|
||||||
items = root.findall("{http://purl.org/rss/1.0/}item")
|
items = root.findall("{http://purl.org/rss/1.0/}item")
|
||||||
for item in items:
|
for item in items:
|
||||||
url = item.find("{http://purl.org/rss/1.0/}link").text
|
find = lambda p: item.find(p).text.strip() if item.find(p) else None # type: ignore
|
||||||
tags = item.find("{http://purl.org/dc/elements/1.1/}subject").text if item.find("{http://purl.org/dc/elements/1.1/}subject") else None
|
|
||||||
title = item.find("{http://purl.org/rss/1.0/}title").text.strip() if item.find("{http://purl.org/rss/1.0/}title").text.strip() else None
|
url = find("{http://purl.org/rss/1.0/}link")
|
||||||
ts_str = item.find("{http://purl.org/dc/elements/1.1/}date").text if item.find("{http://purl.org/dc/elements/1.1/}date").text else None
|
tags = find("{http://purl.org/dc/elements/1.1/}subject")
|
||||||
|
title = find("{http://purl.org/rss/1.0/}title")
|
||||||
|
ts_str = find("{http://purl.org/dc/elements/1.1/}date")
|
||||||
|
|
||||||
# Pinboard includes a colon in its date stamp timezone offsets, which
|
# Pinboard includes a colon in its date stamp timezone offsets, which
|
||||||
# Python can't parse. Remove it:
|
# Python can't parse. Remove it:
|
||||||
|
@ -296,12 +298,12 @@ def parse_medium_rss_export(rss_file: IO[str]) -> Iterable[Link]:
|
||||||
|
|
||||||
rss_file.seek(0)
|
rss_file.seek(0)
|
||||||
root = etree.parse(rss_file).getroot()
|
root = etree.parse(rss_file).getroot()
|
||||||
items = root.find("channel").findall("item")
|
items = root.find("channel").findall("item") # type: ignore
|
||||||
for item in items:
|
for item in items:
|
||||||
url = item.find("link").text
|
url = item.find("link").text # type: ignore
|
||||||
title = item.find("title").text.strip()
|
title = item.find("title").text.strip() # type: ignore
|
||||||
ts_str = item.find("pubDate").text
|
ts_str = item.find("pubDate").text # type: ignore
|
||||||
time = datetime.strptime(ts_str, "%a, %d %b %Y %H:%M:%S %Z")
|
time = datetime.strptime(ts_str, "%a, %d %b %Y %H:%M:%S %Z") # type: ignore
|
||||||
|
|
||||||
yield Link(
|
yield Link(
|
||||||
url=htmldecode(url),
|
url=htmldecode(url),
|
||||||
|
@ -319,7 +321,7 @@ def parse_plain_text_export(text_file: IO[str]) -> Iterable[Link]:
|
||||||
text_file.seek(0)
|
text_file.seek(0)
|
||||||
for line in text_file.readlines():
|
for line in text_file.readlines():
|
||||||
urls = re.findall(URL_REGEX, line) if line.strip() else ()
|
urls = re.findall(URL_REGEX, line) if line.strip() else ()
|
||||||
for url in urls:
|
for url in urls: # type: ignore
|
||||||
yield Link(
|
yield Link(
|
||||||
url=htmldecode(url),
|
url=htmldecode(url),
|
||||||
timestamp=str(datetime.now().timestamp()),
|
timestamp=str(datetime.now().timestamp()),
|
||||||
|
|
|
@ -6,9 +6,8 @@ from os.path import exists, join
|
||||||
from shutil import rmtree
|
from shutil import rmtree
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from archive import parse_json_link_index
|
from .config import ARCHIVE_DIR, OUTPUT_DIR
|
||||||
from config import ARCHIVE_DIR, OUTPUT_DIR
|
from .index import parse_json_links_index, write_html_links_index, write_json_links_index
|
||||||
from index import write_html_links_index, write_json_links_index
|
|
||||||
|
|
||||||
|
|
||||||
def cleanup_index(regexes: List[str], proceed: bool, delete: bool) -> None:
|
def cleanup_index(regexes: List[str], proceed: bool, delete: bool) -> None:
|
||||||
|
@ -16,18 +15,18 @@ def cleanup_index(regexes: List[str], proceed: bool, delete: bool) -> None:
|
||||||
exit('index.json is missing; nothing to do')
|
exit('index.json is missing; nothing to do')
|
||||||
|
|
||||||
compiled = [re.compile(r) for r in regexes]
|
compiled = [re.compile(r) for r in regexes]
|
||||||
links = parse_json_link_index(OUTPUT_DIR)['links']
|
links = parse_json_links_index(OUTPUT_DIR)
|
||||||
filtered = []
|
filtered = []
|
||||||
remaining = []
|
remaining = []
|
||||||
|
|
||||||
for l in links:
|
for link in links:
|
||||||
url = l['url']
|
url = link.url
|
||||||
for r in compiled:
|
for r in compiled:
|
||||||
if r.search(url):
|
if r.search(url):
|
||||||
filtered.append((l, r))
|
filtered.append((link, r))
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
remaining.append(l)
|
remaining.append(link)
|
||||||
|
|
||||||
if not filtered:
|
if not filtered:
|
||||||
exit('Search did not match any entries.')
|
exit('Search did not match any entries.')
|
||||||
|
@ -35,7 +34,7 @@ def cleanup_index(regexes: List[str], proceed: bool, delete: bool) -> None:
|
||||||
print('Filtered out {}/{} urls:'.format(len(filtered), len(links)))
|
print('Filtered out {}/{} urls:'.format(len(filtered), len(links)))
|
||||||
|
|
||||||
for link, regex in filtered:
|
for link, regex in filtered:
|
||||||
url = link['url']
|
url = link.url
|
||||||
print(' {url} via {regex}'.format(url=url, regex=regex.pattern))
|
print(' {url} via {regex}'.format(url=url, regex=regex.pattern))
|
||||||
|
|
||||||
if not proceed:
|
if not proceed:
|
||||||
|
|
|
@ -7,7 +7,7 @@ import shutil
|
||||||
|
|
||||||
from json import JSONEncoder
|
from json import JSONEncoder
|
||||||
from typing import List, Optional, Any, Union
|
from typing import List, Optional, Any, Union
|
||||||
from inspect import signature, _empty
|
from inspect import signature
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
from urllib.request import Request, urlopen
|
from urllib.request import Request, urlopen
|
||||||
|
@ -24,7 +24,7 @@ from subprocess import (
|
||||||
CalledProcessError,
|
CalledProcessError,
|
||||||
)
|
)
|
||||||
|
|
||||||
from base32_crockford import encode as base32_encode
|
from base32_crockford import encode as base32_encode # type: ignore
|
||||||
|
|
||||||
from .schema import Link
|
from .schema import Link
|
||||||
from .config import (
|
from .config import (
|
||||||
|
@ -127,9 +127,9 @@ def enforce_types(func):
|
||||||
try:
|
try:
|
||||||
annotation = sig.parameters[arg_key].annotation
|
annotation = sig.parameters[arg_key].annotation
|
||||||
except KeyError:
|
except KeyError:
|
||||||
annotation = _empty
|
annotation = None
|
||||||
|
|
||||||
if annotation is not _empty and annotation.__class__ is type:
|
if annotation is not None and annotation.__class__ is type:
|
||||||
if not isinstance(arg_val, annotation):
|
if not isinstance(arg_val, annotation):
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
'{}(..., {}: {}) got unexpected {} argument {}={}'.format(
|
'{}(..., {}: {}) got unexpected {} argument {}={}'.format(
|
||||||
|
@ -605,7 +605,7 @@ def download_url(url: str, timeout: int=TIMEOUT) -> str:
|
||||||
insecure = ssl._create_unverified_context()
|
insecure = ssl._create_unverified_context()
|
||||||
resp = urlopen(req, timeout=timeout, context=insecure)
|
resp = urlopen(req, timeout=timeout, context=insecure)
|
||||||
|
|
||||||
encoding = resp.headers.get_content_charset() or 'utf-8'
|
encoding = resp.headers.get_content_charset() or 'utf-8' # type: ignore
|
||||||
return resp.read().decode(encoding)
|
return resp.read().decode(encoding)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue