mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-15 07:34:27 -04:00
fix extractor path calculation
This commit is contained in:
parent
8498ca5c64
commit
276a505cae
2 changed files with 4 additions and 4 deletions
|
@ -40,7 +40,7 @@ BUILTIN_PLUGIN_DIRS = {
|
||||||
'plugins_extractor': PACKAGE_DIR / 'plugins_extractor',
|
'plugins_extractor': PACKAGE_DIR / 'plugins_extractor',
|
||||||
}
|
}
|
||||||
USER_PLUGIN_DIRS = {
|
USER_PLUGIN_DIRS = {
|
||||||
'user_plugins': DATA_DIR / 'user_plugins',
|
'user_plugins': DATA_DIR / 'user_plugins',
|
||||||
}
|
}
|
||||||
|
|
||||||
BUILTIN_PLUGINS = abx.get_plugins_in_dirs(BUILTIN_PLUGIN_DIRS)
|
BUILTIN_PLUGINS = abx.get_plugins_in_dirs(BUILTIN_PLUGIN_DIRS)
|
||||||
|
|
|
@ -86,7 +86,7 @@ WGET_BINARY = WgetBinary()
|
||||||
|
|
||||||
class WgetExtractor(BaseExtractor):
|
class WgetExtractor(BaseExtractor):
|
||||||
name: ExtractorName = 'wget'
|
name: ExtractorName = 'wget'
|
||||||
binary: str = WGET_BINARY.name
|
binary: BinName = WGET_BINARY.name
|
||||||
|
|
||||||
def get_output_path(self, snapshot) -> Path | None:
|
def get_output_path(self, snapshot) -> Path | None:
|
||||||
wget_index_path = wget_output_path(snapshot.as_link())
|
wget_index_path = wget_output_path(snapshot.as_link())
|
||||||
|
@ -99,10 +99,10 @@ WGET_EXTRACTOR = WgetExtractor()
|
||||||
|
|
||||||
class WarcExtractor(BaseExtractor):
|
class WarcExtractor(BaseExtractor):
|
||||||
name: ExtractorName = 'warc'
|
name: ExtractorName = 'warc'
|
||||||
binary: str = WGET_BINARY.name
|
binary: BinName = WGET_BINARY.name
|
||||||
|
|
||||||
def get_output_path(self, snapshot) -> Path | None:
|
def get_output_path(self, snapshot) -> Path | None:
|
||||||
warc_files = (snapshot.link_dir / 'warc').glob('*.warc.gz')
|
warc_files = list((Path(snapshot.link_dir) / 'warc').glob('*.warc.gz'))
|
||||||
if warc_files:
|
if warc_files:
|
||||||
return sorted(warc_files, key=lambda x: x.stat().st_size, reverse=True)[0]
|
return sorted(warc_files, key=lambda x: x.stat().st_size, reverse=True)[0]
|
||||||
return None
|
return None
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue