Handle list of tags in JSON, and be more clever about comma vs. space

This commit is contained in:
jim winstead 2024-02-28 17:38:49 -08:00
parent 178e676e0f
commit ccabda4c7d
3 changed files with 21 additions and 4 deletions

View file

@ -110,12 +110,19 @@ def test_json(tmp_path, process, disable_extractors_dict):
urls = list(map(lambda x: x[0], urls))
assert "http://127.0.0.1:8080/static/example.com.html" in urls
assert "http://127.0.0.1:8080/static/iana.org.html" in urls
assert "http://127.0.0.1:8080/static/shift_jis.html" in urls
assert "http://127.0.0.1:8080/static/title_og_with_html" in urls
# if the following URL appears, we must have fallen back to another parser
assert not "http://www.example.com/should-not-exist" in urls
tags = list(map(lambda x: x[0], tags))
assert "Tag1" in tags
assert "Tag2" in tags
assert "Tag3" in tags
assert "Tag4 with Space" in tags
assert "Tag5" in tags
assert "Tag6 with Space" in tags
def test_json_with_leading_garbage(tmp_path, process, disable_extractors_dict):
with open('../../mock_server/templates/example.json.bad', 'r', encoding='utf-8') as f: