diff --git a/archivebox/parse.py b/archivebox/parse.py index 8093672a..5e3b2981 100644 --- a/archivebox/parse.py +++ b/archivebox/parse.py @@ -70,8 +70,11 @@ def parse_links(path): links += list(parser_func(file)) if links: break - except (ValueError, TypeError, IndexError, AttributeError, etree.ParseError) as err: - # parser not supported on this file + except Exception as err: + # we try each parser one by one, each parser will throw exeption an exception if unsupported + # so we accept the first one that doesn't throw any exceptions and produces some non-0 output. + # uncomment the following line to see why the parser was unsupported for each attempted format + # print('[!] Parser {} failed: {} {}'.format(parser_name, err.__class__.__name__, err)) pass