diff --git a/archivebox/extractors/title.py b/archivebox/extractors/title.py index 816c0484..194c57ad 100644 --- a/archivebox/extractors/title.py +++ b/archivebox/extractors/title.py @@ -62,9 +62,6 @@ class TitleParser(HTMLParser): @enforce_types def should_save_title(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool: - if is_static_file(link.url): - return False - # if link already has valid title, skip it if not overwrite and link.title and not link.title.lower().startswith('http'): return False @@ -113,7 +110,11 @@ def save_title(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) - timestamp=link.timestamp)\ .update(title=output) else: - raise ArchiveError('Unable to detect page title') + # if no content was returned, dont save a title (because it might be a temporary error) + if not html: + raise ArchiveError('Unable to detect page title') + # output = html[:128] # use first bit of content as the title + output = link.base_url # use the filename as the title (better UX) except Exception as err: status = 'failed' output = err