diff --git a/archivebox/index/html.py b/archivebox/index/html.py
index 3eca5f01..cff50085 100644
--- a/archivebox/index/html.py
+++ b/archivebox/index/html.py
@@ -146,9 +146,15 @@ def snapshot_icons(snapshot) -> str:
for extractor, _ in EXTRACTORS:
if extractor not in exclude:
- exists = extractor_items[extractor] is not None
+ exists = False
+ if extractor_items[extractor] is not None:
+ outpath = (Path(path) / canon[f"{extractor}_path"])
+ if outpath.is_dir():
+ exists = any(outpath.glob('*.*'))
+ elif outpath.is_file():
+ exists = outpath.stat().st_size > 100
output += format_html(output_template, path, canon[f"{extractor}_path"], str(exists),
- extractor, icons.get(extractor, "?"))
+ extractor, icons.get(extractor, "?"))
if extractor == "wget":
# warc isn't technically it's own extractor, so we have to add it after wget
exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))