mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 22:54:27 -04:00
fix: Use w3lib to improve the encoding extraction
This commit is contained in:
parent
0965031d8f
commit
949f78aa65
5 changed files with 787 additions and 11 deletions
|
@ -2,4 +2,9 @@ from archivebox import util
|
|||
|
||||
def test_download_url_downloads_content():
|
||||
text = util.download_url("http://127.0.0.1:8080/static/example.com.html")
|
||||
assert "Example Domain" in text
|
||||
assert "Example Domain" in text
|
||||
|
||||
def test_download_url_gets_encoding_from_body():
|
||||
text = util.download_url("http://127.0.0.1:8080/static/shift_jis.html")
|
||||
assert "鹿児島のニュース|MBC南日本放送" in text
|
||||
assert "掲載された全ての記事・画像等の無断転載、二次利用をお断りいたします" in text
|
Loading…
Add table
Add a link
Reference in a new issue