mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-06-01 15:28:24 -04:00
enforce utf8 on literally all file operations because windows sucks
This commit is contained in:
parent
185d2f9f9b
commit
bd6d9c165b
9 changed files with 29 additions and 28 deletions
|
@ -33,7 +33,7 @@ def test_depth_flag_0_crawls_only_the_arg_page(tmp_path, process, disable_extrac
|
|||
)
|
||||
|
||||
archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
|
||||
with open(archived_item_path / "index.json", "r") as f:
|
||||
with open(archived_item_path / "index.json", "r", encoding='utf-8') as f:
|
||||
output_json = json.load(f)
|
||||
assert output_json["base_url"] == "127.0.0.1:8080/static/example.com.html"
|
||||
|
||||
|
@ -79,7 +79,7 @@ def test_add_updates_history_json_index(tmp_path, process, disable_extractors_di
|
|||
|
||||
archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
|
||||
|
||||
with open(archived_item_path / "index.json", "r") as f:
|
||||
with open(archived_item_path / "index.json", "r", encoding="utf-8") as f:
|
||||
output_json = json.load(f)
|
||||
assert output_json["history"] != {}
|
||||
|
||||
|
@ -90,4 +90,4 @@ def test_extract_input_uses_only_passed_extractors(tmp_path, process):
|
|||
archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
|
||||
|
||||
assert (archived_item_path / "warc").exists()
|
||||
assert not (archived_item_path / "singlefile.html").exists()
|
||||
assert not (archived_item_path / "singlefile.html").exists()
|
||||
|
|
|
@ -86,7 +86,7 @@ def test_headers_retrieved(tmp_path, process, disable_extractors_dict):
|
|||
output_file = archived_item_path / "headers.json"
|
||||
assert output_file.exists()
|
||||
headers_file = archived_item_path / 'headers.json'
|
||||
with open(headers_file) as f:
|
||||
with open(headers_file, 'r', encoding='utf-8') as f:
|
||||
headers = pyjson.load(f)
|
||||
assert headers['Content-Language'] == 'en'
|
||||
assert headers['Content-Script-Type'] == 'text/javascript'
|
||||
|
@ -98,7 +98,7 @@ def test_headers_redirect_chain(tmp_path, process, disable_extractors_dict):
|
|||
capture_output=True, env=disable_extractors_dict)
|
||||
archived_item_path = list(tmp_path.glob("archive/**/*"))[0]
|
||||
output_file = archived_item_path / "headers.json"
|
||||
with open(output_file) as f:
|
||||
with open(output_file, 'r', encoding='utf-8') as f:
|
||||
headers = pyjson.load(f)
|
||||
assert headers['Content-Language'] == 'en'
|
||||
assert headers['Content-Script-Type'] == 'text/javascript'
|
||||
|
@ -110,6 +110,6 @@ def test_headers_400_plus(tmp_path, process, disable_extractors_dict):
|
|||
capture_output=True, env=disable_extractors_dict)
|
||||
archived_item_path = list(tmp_path.glob("archive/**/*"))[0]
|
||||
output_file = archived_item_path / "headers.json"
|
||||
with open(output_file) as f:
|
||||
with open(output_file, 'r', encoding='utf-8') as f:
|
||||
headers = pyjson.load(f)
|
||||
assert headers["Status-Code"] == "200"
|
||||
assert headers["Status-Code"] == "200"
|
||||
|
|
|
@ -28,11 +28,11 @@ def test_add_link(tmp_path, process, disable_extractors_dict):
|
|||
|
||||
assert "index.json" in [x.name for x in archived_item_path.iterdir()]
|
||||
|
||||
with open(archived_item_path / "index.json", "r") as f:
|
||||
with open(archived_item_path / "index.json", "r", encoding="utf-8") as f:
|
||||
output_json = json.load(f)
|
||||
assert "Example Domain" == output_json['history']['title'][0]['output']
|
||||
|
||||
with open(archived_item_path / "index.html", "r") as f:
|
||||
with open(archived_item_path / "index.html", "r", encoding="utf-8") as f:
|
||||
output_html = f.read()
|
||||
assert "Example Domain" in output_html
|
||||
|
||||
|
@ -47,7 +47,7 @@ def test_add_link_support_stdin(tmp_path, process, disable_extractors_dict):
|
|||
|
||||
assert "index.json" in [x.name for x in archived_item_path.iterdir()]
|
||||
|
||||
with open(archived_item_path / "index.json", "r") as f:
|
||||
with open(archived_item_path / "index.json", "r", encoding="utf-8") as f:
|
||||
output_json = json.load(f)
|
||||
assert "Example Domain" == output_json['history']['title'][0]['output']
|
||||
|
||||
|
@ -75,11 +75,11 @@ def test_collision_urls_different_timestamps(tmp_path, process, disable_extracto
|
|||
|
||||
first_archive = tmp_path / "archive" / str(min([float(folder) for folder in archive_folders]))
|
||||
json_index = str(first_archive / "index.json")
|
||||
with open(json_index, "r") as f:
|
||||
with open(json_index, "r", encoding="utf-8") as f:
|
||||
link_details = json.loads(f.read())
|
||||
|
||||
link_details["url"] = "http://127.0.0.1:8080/static/iana.org.html"
|
||||
with open(json_index, "w") as f:
|
||||
with open(json_index, "w", encoding="utf-8") as f:
|
||||
json.dump(link_details, f)
|
||||
|
||||
init_process = subprocess.run(['archivebox', 'init'], capture_output=True, env=disable_extractors_dict)
|
||||
|
@ -98,12 +98,12 @@ def test_collision_timestamps_different_urls(tmp_path, process, disable_extracto
|
|||
archive_folders.remove(first_archive.name)
|
||||
json_index = str(first_archive / "index.json")
|
||||
|
||||
with open(json_index, "r") as f:
|
||||
with open(json_index, "r", encoding="utf-8") as f:
|
||||
link_details = json.loads(f.read())
|
||||
|
||||
link_details["timestamp"] = archive_folders[0]
|
||||
|
||||
with open(json_index, "w") as f:
|
||||
with open(json_index, "w", encoding="utf-8") as f:
|
||||
json.dump(link_details, f)
|
||||
|
||||
init_process = subprocess.run(['archivebox', 'init'], capture_output=True, env=disable_extractors_dict)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue