load EXTRACTORS dynamically using importlib.import_module

2025-05-12 22:25:44 -04:00 · 2024-05-11 22:28:59 -07:00 · 2024-05-11 22:28:59 -07:00 · 457c42bf84
commit 457c42bf84
parent c7f55fc3ba
18 changed files with 198 additions and 40 deletions
--- a/archivebox/index/html.py
+++ b/archivebox/index/html.py
@ -121,7 +121,7 @@ def snapshot_icons(snapshot) -> str:
    cache_key = f'{snapshot.id}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons'
    
    def calc_snapshot_icons():
-        from core.models import EXTRACTORS
+        from core.models import EXTRACTOR_CHOICES
        # start = datetime.now(timezone.utc)

        archive_results = snapshot.archiveresult_set.filter(status="succeeded", output__isnull=False)
@ -147,12 +147,12 @@ def snapshot_icons(snapshot) -> str:
        # Missing specific entry for WARC

        extractor_outputs = defaultdict(lambda: None)
-        for extractor, _ in EXTRACTORS:
+        for extractor, _ in EXTRACTOR_CHOICES:
            for result in archive_results:
                if result.extractor == extractor and result:
                    extractor_outputs[extractor] = result

-        for extractor, _ in EXTRACTORS:
+        for extractor, _ in EXTRACTOR_CHOICES:
            if extractor not in exclude:
                existing = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
                # Check filesystsem to see if anything is actually present (too slow, needs optimization/caching)