Merge branch 'master' into django

This commit is contained in:
Nick Sweeting 2020-06-25 21:30:29 -04:00
commit cb67b09f9d
29 changed files with 418 additions and 911 deletions

View file

@ -1,5 +1,6 @@
import re
import ssl
import json as pyjson
from typing import List, Optional, Any
@ -12,8 +13,7 @@ from html import escape, unescape
from datetime import datetime
from dateutil import parser as dateparser
from base32_crockford import encode as base32_encode # type: ignore
import json as pyjson
from base32_crockford import encode as base32_encode # type: ignore
from .config import (
TIMEOUT,
@ -23,6 +23,12 @@ from .config import (
CHROME_OPTIONS,
)
try:
import chardet
detect_encoding = lambda rawdata: chardet.detect(rawdata)["encoding"]
except ImportError:
detect_encoding = lambda rawdata: "utf-8"
### Parsing Helpers
# All of these are (str) -> str
@ -158,8 +164,9 @@ def download_url(url: str, timeout: int=TIMEOUT) -> str:
insecure = ssl._create_unverified_context()
resp = urlopen(req, timeout=timeout, context=insecure)
encoding = resp.headers.get_content_charset() or 'utf-8' # type: ignore
return resp.read().decode(encoding)
rawdata = resp.read()
encoding = resp.headers.get_content_charset() or detect_encoding(rawdata)
return rawdata.decode(encoding)
@enforce_types