From 276a505cae99136f4f8aaa2ea4734705334066ac Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 1 Oct 2024 21:44:56 -0700 Subject: [PATCH] fix extractor path calculation --- archivebox/core/settings.py | 2 +- archivebox/plugins_extractor/wget/apps.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index d97c8529..c76979e1 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -40,7 +40,7 @@ BUILTIN_PLUGIN_DIRS = { 'plugins_extractor': PACKAGE_DIR / 'plugins_extractor', } USER_PLUGIN_DIRS = { - 'user_plugins': DATA_DIR / 'user_plugins', + 'user_plugins': DATA_DIR / 'user_plugins', } BUILTIN_PLUGINS = abx.get_plugins_in_dirs(BUILTIN_PLUGIN_DIRS) diff --git a/archivebox/plugins_extractor/wget/apps.py b/archivebox/plugins_extractor/wget/apps.py index 7cda7059..171bebc4 100644 --- a/archivebox/plugins_extractor/wget/apps.py +++ b/archivebox/plugins_extractor/wget/apps.py @@ -86,7 +86,7 @@ WGET_BINARY = WgetBinary() class WgetExtractor(BaseExtractor): name: ExtractorName = 'wget' - binary: str = WGET_BINARY.name + binary: BinName = WGET_BINARY.name def get_output_path(self, snapshot) -> Path | None: wget_index_path = wget_output_path(snapshot.as_link()) @@ -99,10 +99,10 @@ WGET_EXTRACTOR = WgetExtractor() class WarcExtractor(BaseExtractor): name: ExtractorName = 'warc' - binary: str = WGET_BINARY.name + binary: BinName = WGET_BINARY.name def get_output_path(self, snapshot) -> Path | None: - warc_files = (snapshot.link_dir / 'warc').glob('*.warc.gz') + warc_files = list((Path(snapshot.link_dir) / 'warc').glob('*.warc.gz')) if warc_files: return sorted(warc_files, key=lambda x: x.stat().st_size, reverse=True)[0] return None