mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-30 06:25:28 -04:00
load EXTRACTORS dynamically using importlib.import_module
This commit is contained in:
parent
c7f55fc3ba
commit
457c42bf84
18 changed files with 198 additions and 40 deletions
|
@ -24,6 +24,8 @@ from ..config import (
|
|||
)
|
||||
from ..logging_util import TimedProgress
|
||||
|
||||
def get_output_path():
|
||||
return 'archive.org.txt'
|
||||
|
||||
|
||||
@enforce_types
|
||||
|
@ -32,7 +34,7 @@ def should_save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, overwr
|
|||
return False
|
||||
|
||||
out_dir = out_dir or Path(link.link_dir)
|
||||
if not overwrite and (out_dir / 'archive.org.txt').exists():
|
||||
if not overwrite and (out_dir / get_output_path()).exists():
|
||||
# if open(path, 'r', encoding='utf-8').read().strip() != 'None':
|
||||
return False
|
||||
|
||||
|
@ -43,7 +45,7 @@ def save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, timeout: int=
|
|||
"""submit site to archive.org for archiving via their service, save returned archive url"""
|
||||
|
||||
out_dir = out_dir or Path(link.link_dir)
|
||||
output: ArchiveOutput = 'archive.org.txt'
|
||||
output: ArchiveOutput = get_output_path()
|
||||
archive_org_url = None
|
||||
submit_url = 'https://web.archive.org/save/{}'.format(link.url)
|
||||
# later options take precedence
|
||||
|
@ -88,7 +90,7 @@ def save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, timeout: int=
|
|||
archive_org_url = archive_org_url or submit_url
|
||||
with open(str(out_dir / output), 'w', encoding='utf-8') as f:
|
||||
f.write(archive_org_url)
|
||||
chmod_file('archive.org.txt', cwd=str(out_dir))
|
||||
chmod_file(str(out_dir / output), cwd=str(out_dir))
|
||||
output = archive_org_url
|
||||
|
||||
return ArchiveResult(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue