mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-20 09:55:10 -04:00
Replaced os.path in init parsers
This commit is contained in:
parent
73418836f8
commit
2c62abb270
1 changed files with 2 additions and 3 deletions
|
@ -8,7 +8,6 @@ For examples of supported import formats see tests/.
|
||||||
__package__ = 'archivebox.parsers'
|
__package__ = 'archivebox.parsers'
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import os
|
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
|
|
||||||
from typing import IO, Tuple, List, Optional
|
from typing import IO, Tuple, List, Optional
|
||||||
|
@ -128,7 +127,7 @@ def run_parser_functions(to_parse: IO[str], timer, root_url: Optional[str]=None)
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=OUTPUT_DIR) -> str:
|
def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=OUTPUT_DIR) -> str:
|
||||||
ts = str(datetime.now().timestamp()).split('.', 1)[0]
|
ts = str(datetime.now().timestamp()).split('.', 1)[0]
|
||||||
source_path = os.path.join(out_dir, SOURCES_DIR_NAME, filename.format(ts=ts))
|
source_path = str(out_dir / SOURCES_DIR_NAME / filename.format(ts=ts))
|
||||||
atomic_write(source_path, raw_text)
|
atomic_write(source_path, raw_text)
|
||||||
log_source_saved(source_file=source_path)
|
log_source_saved(source_file=source_path)
|
||||||
return source_path
|
return source_path
|
||||||
|
@ -138,7 +137,7 @@ def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir:
|
||||||
def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{basename}.txt', out_dir: Path=OUTPUT_DIR) -> str:
|
def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{basename}.txt', out_dir: Path=OUTPUT_DIR) -> str:
|
||||||
"""download a given url's content into output/sources/domain-<timestamp>.txt"""
|
"""download a given url's content into output/sources/domain-<timestamp>.txt"""
|
||||||
ts = str(datetime.now().timestamp()).split('.', 1)[0]
|
ts = str(datetime.now().timestamp()).split('.', 1)[0]
|
||||||
source_path = os.path.join(OUTPUT_DIR, SOURCES_DIR_NAME, filename.format(basename=basename(path), ts=ts))
|
source_path = str(OUTPUT_DIR / SOURCES_DIR_NAME / filename.format(basename=basename(path), ts=ts))
|
||||||
|
|
||||||
if any(path.startswith(s) for s in ('http://', 'https://', 'ftp://')):
|
if any(path.startswith(s) for s in ('http://', 'https://', 'ftp://')):
|
||||||
# Source is a URL that needs to be downloaded
|
# Source is a URL that needs to be downloaded
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue