diff --git a/setup.py b/setup.py index 9ca39608..66112bfd 100755 --- a/setup.py +++ b/setup.py @@ -65,6 +65,7 @@ setuptools.setup( "sphinx-rtd-theme", "recommonmark", "pytest", + "bottle", ], # 'redis': ['redis', 'django-redis'], # 'pywb': ['pywb', 'redis'], diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..20128da7 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,19 @@ +from multiprocessing import Process + +import pytest +from .mock_server.server import start + +server_process = None + +@pytest.hookimpl +def pytest_sessionstart(session): + global server_process + server_process = Process(target=start) + server_process.start() + +@pytest.hookimpl +def pytest_sessionfinish(session): + if server_process is not None: + server_process.terminate() + server_process.join() + \ No newline at end of file diff --git a/tests/mock_server/__init__.py b/tests/mock_server/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/mock_server/server.py b/tests/mock_server/server.py new file mode 100644 index 00000000..0c546e99 --- /dev/null +++ b/tests/mock_server/server.py @@ -0,0 +1,17 @@ +from os.path import abspath +from os import getcwd +from pathlib import Path + +from bottle import route, run, static_file + +@route("/") +def index(): + return "Hello" + +@route("/static/") +def static_path(filename): + template_path = abspath(getcwd()) / Path("tests/mock_server/templates") + return static_file(filename, root=template_path) + +def start(): + run(host='localhost', port=8080) \ No newline at end of file diff --git a/tests/mock_server/templates/example.com.html b/tests/mock_server/templates/example.com.html new file mode 100644 index 00000000..8492e858 --- /dev/null +++ b/tests/mock_server/templates/example.com.html @@ -0,0 +1,49 @@ + + + + Example Domain + + + + + + + + +
+

Example Domain

+

This domain is for use in illustrative examples in documents. You may use this + domain in literature without prior coordination or asking for permission.

+

+ More information... +

+
+ + diff --git a/tests/mock_server/templates/iana.org.html b/tests/mock_server/templates/iana.org.html new file mode 100644 index 00000000..c1e60a2e --- /dev/null +++ b/tests/mock_server/templates/iana.org.html @@ -0,0 +1,390 @@ + + + + IANA — IANA-managed Reserved Domains + + + + + + + + + + + + + + + + + +
+ +
+ +
+ + +
+ + +

IANA-managed Reserved Domains

+ +

Certain domains are set aside, and nominally registered to “IANA”, for specific + policy or technical purposes.

+ +

Example domains

+ +

As described in + RFC 2606 + and + RFC 6761, + a number of domains such as + example.com + and + example.org + are maintained for documentation purposes. These domains may be used as illustrative + examples in documents without prior coordination with us. They are + not available for registration or transfer.

+ +

Test IDN top-level domains

+ +

These domains were temporarily delegated by IANA for the + IDN Evaluation + being conducted by + ICANN.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DomainDomain (A-label)LanguageScript
إختبار + + XN--KGBECHTV + + ArabicArabic
آزمایشی + + XN--HGBK6AJ7F53BBA + + PersianArabic
测试 + + XN--0ZWM56D + + ChineseHan (Simplified variant)
測試 + + XN--G6W251D + + ChineseHan (Traditional variant)
испытание + + XN--80AKHBYKNJ4F + + RussianCyrillic
परीक्षा + + XN--11B5BS3A9AJ6G + + HindiDevanagari (Nagari)
δοκιμή + + XN--JXALPDLP + + Greek, Modern (1453-)Greek
테스트 + + XN--9T4B11YI5A + + KoreanHangul (Hangŭl, Hangeul)
טעסט + + XN--DEBA0AD + + YiddishHebrew
テスト + + XN--ZCKZAH + + JapaneseKatakana
பரிட்சை + + XN--HLCJ6AYA9ESC7A + + TamilTamil
+
+ +

Policy-reserved domains

+ +

We act as both the registrant and registrar for a select number of domains + which have been reserved under policy grounds. These exclusions are + typically indicated in either technical standards (RFC documents), + or + contractual limitations.

+ +

Domains which are described as registered to IANA or ICANN on policy + grounds are not available for registration or transfer, with the exception + of + + country-name.info + domains. These domains are available for release + by the ICANN Governmental Advisory Committee Secretariat.

+ +

Other Special-Use Domains

+ +

There is additionally a + Special-Use Domain Names + registry documenting special-use domains designated by technical standards. For further information, see + Special-Use Domain Names + (RFC 6761).

+ + +
+ + + + +
+ + diff --git a/tests/test_args.py b/tests/test_args.py index 91264ef2..f52626fb 100644 --- a/tests/test_args.py +++ b/tests/test_args.py @@ -4,25 +4,25 @@ import json from .fixtures import * def test_depth_flag_is_accepted(process): - arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=0"], capture_output=True) + arg_process = subprocess.run(["archivebox", "add", "http://localhost:8080/static/example.com.html", "--depth=0"], capture_output=True) assert 'unrecognized arguments: --depth' not in arg_process.stderr.decode("utf-8") def test_depth_flag_fails_if_it_is_not_0_or_1(process): - arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=5"], capture_output=True) + arg_process = subprocess.run(["archivebox", "add", "http://localhost:8080/static/example.com.html", "--depth=5"], capture_output=True) assert 'invalid choice' in arg_process.stderr.decode("utf-8") - arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=-1"], capture_output=True) + arg_process = subprocess.run(["archivebox", "add", "http://localhost:8080/static/example.com.html", "--depth=-1"], capture_output=True) assert 'invalid choice' in arg_process.stderr.decode("utf-8") def test_depth_flag_0_crawls_only_the_arg_page(tmp_path, process): - arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=0"], capture_output=True) + arg_process = subprocess.run(["archivebox", "add", "http://localhost:8080/static/example.com.html", "--depth=0"], capture_output=True) archived_item_path = list(tmp_path.glob('archive/**/*'))[0] with open(archived_item_path / "index.json", "r") as f: output_json = json.load(f) - assert output_json["base_url"] == "example.com" + assert output_json["base_url"] == "localhost:8080/static/example.com.html" def test_depth_flag_1_crawls_the_page_AND_links(tmp_path, process): - arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=1"], capture_output=True) + arg_process = subprocess.run(["archivebox", "add", "http://localhost:8080/static/example.com.html", "--depth=1"], capture_output=True) with open(tmp_path / "index.json", "r") as f: archive_file = f.read() - assert "https://example.com" in archive_file - assert "https://www.iana.org/domains/example" in archive_file + assert "http://localhost:8080/static/example.com.html" in archive_file + assert "http://localhost:8080/static/iana.org.html" in archive_file diff --git a/tests/test_init.py b/tests/test_init.py index 97870459..24d3ed52 100644 --- a/tests/test_init.py +++ b/tests/test_init.py @@ -18,7 +18,7 @@ def test_update(tmp_path, process): def test_add_link(tmp_path, process): os.chdir(tmp_path) - add_process = subprocess.run(['archivebox', 'add', 'http://example.com'], capture_output=True) + add_process = subprocess.run(['archivebox', 'add', 'http://localhost:8080/static/example.com.html'], capture_output=True) archived_item_path = list(tmp_path.glob('archive/**/*'))[0] assert "index.json" in [x.name for x in archived_item_path.iterdir()] @@ -34,7 +34,7 @@ def test_add_link(tmp_path, process): def test_add_link_support_stdin(tmp_path, process): os.chdir(tmp_path) stdin_process = subprocess.Popen(["archivebox", "add"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - stdin_process.communicate(input="http://example.com".encode()) + stdin_process.communicate(input="http://localhost:8080/static/example.com.html".encode()) archived_item_path = list(tmp_path.glob('archive/**/*'))[0] assert "index.json" in [x.name for x in archived_item_path.iterdir()] diff --git a/tests/test_util.py b/tests/test_util.py index 1497de5a..0a076344 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,5 +1,5 @@ from archivebox import util def test_download_url_downloads_content(): - text = util.download_url("https://example.com") + text = util.download_url("http://localhost:8080/static/example.com.html") assert "Example Domain" in text \ No newline at end of file