diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py index 520b9609..5d0d5ca5 100644 --- a/archivebox/parsers/__init__.py +++ b/archivebox/parsers/__init__.py @@ -8,7 +8,6 @@ For examples of supported import formats see tests/. __package__ = 'archivebox.parsers' import re -import os from io import StringIO from typing import IO, Tuple, List, Optional @@ -128,7 +127,7 @@ def run_parser_functions(to_parse: IO[str], timer, root_url: Optional[str]=None) @enforce_types def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=OUTPUT_DIR) -> str: ts = str(datetime.now().timestamp()).split('.', 1)[0] - source_path = os.path.join(out_dir, SOURCES_DIR_NAME, filename.format(ts=ts)) + source_path = str(out_dir / SOURCES_DIR_NAME / filename.format(ts=ts)) atomic_write(source_path, raw_text) log_source_saved(source_file=source_path) return source_path @@ -138,7 +137,7 @@ def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{basename}.txt', out_dir: Path=OUTPUT_DIR) -> str: """download a given url's content into output/sources/domain-.txt""" ts = str(datetime.now().timestamp()).split('.', 1)[0] - source_path = os.path.join(OUTPUT_DIR, SOURCES_DIR_NAME, filename.format(basename=basename(path), ts=ts)) + source_path = str(OUTPUT_DIR / SOURCES_DIR_NAME / filename.format(basename=basename(path), ts=ts)) if any(path.startswith(s) for s in ('http://', 'https://', 'ftp://')): # Source is a URL that needs to be downloaded