use requests.get to fetch and decode instead of urllib

This commit is contained in:
Nick Sweeting 2020-06-30 05:55:54 -04:00
parent df593dea0a
commit 9f440c2cf8

View file

@ -13,6 +13,7 @@ from html import escape, unescape
from datetime import datetime from datetime import datetime
from dateutil import parser as dateparser from dateutil import parser as dateparser
import requests
from base32_crockford import encode as base32_encode # type: ignore from base32_crockford import encode as base32_encode # type: ignore
from .config import ( from .config import (
@ -155,18 +156,13 @@ def parse_date(date: Any) -> Optional[datetime]:
@enforce_types @enforce_types
def download_url(url: str, timeout: int=TIMEOUT) -> str: def download_url(url: str, timeout: int=TIMEOUT) -> str:
"""Download the contents of a remote url and return the text""" """Download the contents of a remote url and return the text"""
response = requests.get(
req = Request(url, headers={'User-Agent': WGET_USER_AGENT}) url,
headers={'User-Agent': WGET_USER_AGENT},
if CHECK_SSL_VALIDITY: verify=CHECK_SSL_VALIDITY,
resp = urlopen(req, timeout=timeout) timeout=timeout,
else: )
insecure = ssl._create_unverified_context() return response.text
resp = urlopen(req, timeout=timeout, context=insecure)
rawdata = resp.read()
encoding = resp.headers.get_content_charset() or detect_encoding(rawdata)
return rawdata.decode(encoding)
@enforce_types @enforce_types