ArchiveBox/packages/abx-plugin-readability-extractor/extractors.py
Nick Sweeting 5d9a32c364
wip
2024-10-25 01:06:12 -07:00

20 lines
498 B
Python

__package__ = 'plugins_extractor.readability'
from pathlib import Path
from pydantic_pkgr import BinName
from abx.archivebox.base_extractor import BaseExtractor
from .binaries import READABILITY_BINARY
class ReadabilityExtractor(BaseExtractor):
name: str = 'readability'
binary: BinName = READABILITY_BINARY.name
def get_output_path(self, snapshot) -> Path:
return Path(snapshot.link_dir) / 'readability' / 'content.html'
READABILITY_EXTRACTOR = ReadabilityExtractor()