mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-16 08:04:26 -04:00
fix parsing errors for older archive index formats
This commit is contained in:
parent
1ac99621ab
commit
204de37eb9
2 changed files with 4 additions and 3 deletions
|
@ -126,7 +126,7 @@ class Link:
|
||||||
assert isinstance(self.url, str) and '://' in self.url
|
assert isinstance(self.url, str) and '://' in self.url
|
||||||
assert self.updated is None or isinstance(self.updated, datetime)
|
assert self.updated is None or isinstance(self.updated, datetime)
|
||||||
assert self.title is None or (isinstance(self.title, str) and self.title)
|
assert self.title is None or (isinstance(self.title, str) and self.title)
|
||||||
assert self.tags is None or (isinstance(self.tags, str) and self.tags)
|
assert self.tags is None or isinstance(self.tags, str)
|
||||||
assert isinstance(self.sources, list)
|
assert isinstance(self.sources, list)
|
||||||
assert all(isinstance(source, str) and source for source in self.sources)
|
assert all(isinstance(source, str) and source for source in self.sources)
|
||||||
assert isinstance(self.history, dict)
|
assert isinstance(self.history, dict)
|
||||||
|
@ -186,7 +186,7 @@ class Link:
|
||||||
for key, val in json_info.items()
|
for key, val in json_info.items()
|
||||||
if key in cls.field_names()
|
if key in cls.field_names()
|
||||||
}
|
}
|
||||||
info['updated'] = parse_date(info['updated'])
|
info['updated'] = parse_date(info.get('updated'))
|
||||||
info['sources'] = info.get('sources') or []
|
info['sources'] = info.get('sources') or []
|
||||||
|
|
||||||
json_history = info.get('history') or {}
|
json_history = info.get('history') or {}
|
||||||
|
|
|
@ -71,11 +71,12 @@ def parse_links(source_file: str) -> Tuple[List[Link], str]:
|
||||||
timer.end()
|
timer.end()
|
||||||
return links, parser_name
|
return links, parser_name
|
||||||
except Exception as err: # noqa
|
except Exception as err: # noqa
|
||||||
|
pass
|
||||||
# Parsers are tried one by one down the list, and the first one
|
# Parsers are tried one by one down the list, and the first one
|
||||||
# that succeeds is used. To see why a certain parser was not used
|
# that succeeds is used. To see why a certain parser was not used
|
||||||
# due to error or format incompatibility, uncomment this line:
|
# due to error or format incompatibility, uncomment this line:
|
||||||
# print('[!] Parser {} failed: {} {}'.format(parser_name, err.__class__.__name__, err))
|
# print('[!] Parser {} failed: {} {}'.format(parser_name, err.__class__.__name__, err))
|
||||||
pass
|
# raise
|
||||||
|
|
||||||
timer.end()
|
timer.end()
|
||||||
return [], 'Failed to parse'
|
return [], 'Failed to parse'
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue