mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-18 17:14:39 -04:00
test: Fix tests post-rebase
This commit is contained in:
parent
422664079a
commit
b18bbf8874
15 changed files with 114 additions and 121 deletions
|
@ -1,7 +1,7 @@
|
|||
__package__ = 'archivebox.extractors'
|
||||
|
||||
import os
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, Dict, Tuple
|
||||
from collections import defaultdict
|
||||
|
||||
|
@ -24,22 +24,22 @@ from ..logging_util import TimedProgress
|
|||
|
||||
|
||||
@enforce_types
|
||||
def should_save_archive_dot_org(link: Link, out_dir: Optional[str]=None) -> bool:
|
||||
out_dir = out_dir or link.link_dir
|
||||
def should_save_archive_dot_org(link: Link, out_dir: Optional[Path]=None) -> bool:
|
||||
out_dir = out_dir or Path(link.link_dir)
|
||||
if is_static_file(link.url):
|
||||
return False
|
||||
|
||||
if os.path.exists(os.path.join(out_dir, 'archive.org.txt')):
|
||||
if (out_dir / "archive.org.txt").exists():
|
||||
# if open(path, 'r').read().strip() != 'None':
|
||||
return False
|
||||
|
||||
return SAVE_ARCHIVE_DOT_ORG
|
||||
|
||||
@enforce_types
|
||||
def save_archive_dot_org(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) -> ArchiveResult:
|
||||
def save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
|
||||
"""submit site to archive.org for archiving via their service, save returned archive url"""
|
||||
|
||||
out_dir = out_dir or link.link_dir
|
||||
out_dir = out_dir or Path(link.link_dir)
|
||||
output: ArchiveOutput = 'archive.org.txt'
|
||||
archive_org_url = None
|
||||
submit_url = 'https://web.archive.org/save/{}'.format(link.url)
|
||||
|
@ -57,7 +57,7 @@ def save_archive_dot_org(link: Link, out_dir: Optional[str]=None, timeout: int=T
|
|||
status = 'succeeded'
|
||||
timer = TimedProgress(timeout, prefix=' ')
|
||||
try:
|
||||
result = run(cmd, cwd=out_dir, timeout=timeout)
|
||||
result = run(cmd, cwd=str(out_dir), timeout=timeout)
|
||||
content_location, errors = parse_archive_dot_org_response(result.stdout)
|
||||
if content_location:
|
||||
archive_org_url = 'https://web.archive.org{}'.format(content_location[0])
|
||||
|
@ -80,14 +80,14 @@ def save_archive_dot_org(link: Link, out_dir: Optional[str]=None, timeout: int=T
|
|||
# the URL in person, it will attempt to re-archive it, and it'll show the
|
||||
# nicer error message explaining why the url was rejected if it fails.
|
||||
archive_org_url = archive_org_url or submit_url
|
||||
with open(os.path.join(out_dir, str(output)), 'w', encoding='utf-8') as f:
|
||||
with open(str(out_dir / output), 'w', encoding='utf-8') as f:
|
||||
f.write(archive_org_url)
|
||||
chmod_file('archive.org.txt', cwd=out_dir)
|
||||
chmod_file('archive.org.txt', cwd=str(out_dir))
|
||||
output = archive_org_url
|
||||
|
||||
return ArchiveResult(
|
||||
cmd=cmd,
|
||||
pwd=out_dir,
|
||||
pwd=str(out_dir),
|
||||
cmd_version=CURL_VERSION,
|
||||
output=output,
|
||||
status=status,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue