fix: Use w3lib to improve the encoding extraction

This commit is contained in:
Cristian 2020-07-22 10:24:08 -05:00
parent 0965031d8f
commit 949f78aa65
5 changed files with 787 additions and 11 deletions

View file

@ -2,4 +2,9 @@ from archivebox import util
def test_download_url_downloads_content():
text = util.download_url("http://127.0.0.1:8080/static/example.com.html")
assert "Example Domain" in text
assert "Example Domain" in text
def test_download_url_gets_encoding_from_body():
text = util.download_url("http://127.0.0.1:8080/static/shift_jis.html")
assert "鹿児島のニュースMBC南日本放送" in text
assert "掲載された全ての記事・画像等の無断転載、二次利用をお断りいたします" in text