From bb5879a4f73b1b5d5afcdbec27c5f1dc7a79567a Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 18 Feb 2019 23:45:49 -0500 Subject: [PATCH] fix some parser errors not being caught by bail out process --- archivebox/parse.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/archivebox/parse.py b/archivebox/parse.py index 8093672a..5e3b2981 100644 --- a/archivebox/parse.py +++ b/archivebox/parse.py @@ -70,8 +70,11 @@ def parse_links(path): links += list(parser_func(file)) if links: break - except (ValueError, TypeError, IndexError, AttributeError, etree.ParseError) as err: - # parser not supported on this file + except Exception as err: + # we try each parser one by one, each parser will throw exeption an exception if unsupported + # so we accept the first one that doesn't throw any exceptions and produces some non-0 output. + # uncomment the following line to see why the parser was unsupported for each attempted format + # print('[!] Parser {} failed: {} {}'.format(parser_name, err.__class__.__name__, err)) pass