mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-17 08:34:26 -04:00
improve latest title logic to take longest title
This commit is contained in:
parent
ad4bd49163
commit
f1823381d0
1 changed files with 21 additions and 8 deletions
|
@ -170,18 +170,31 @@ class Snapshot(models.Model):
|
|||
@cached_property
|
||||
def latest_title(self):
|
||||
if self.title:
|
||||
return self.title
|
||||
return self.title # whoopdedoo that was easy
|
||||
|
||||
try:
|
||||
return self.archiveresult_set.filter(extractor='title', status='succeeded')[0].output
|
||||
except ArchiveResult.DoesNotExist:
|
||||
# take longest successful title from ArchiveResult db history
|
||||
return sorted(
|
||||
self.archiveresult_set\
|
||||
.filter(extractor='title', status='succeeded', output__isnull=False)\
|
||||
.values_list('output', flat=True),
|
||||
key=lambda r: len(r),
|
||||
)[-1]
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
if ('title' in self.history
|
||||
and self.history['title']
|
||||
and (self.history['title'][-1].status == 'succeeded')
|
||||
and self.history['title'][-1].output.strip()):
|
||||
return self.history['title'][-1].output.strip()
|
||||
try:
|
||||
# take longest successful title from Link json index file history
|
||||
return sorted(
|
||||
(
|
||||
result.output.strip()
|
||||
for result in self.history['title']
|
||||
if result.status == 'succeeded' and result.output.strip()
|
||||
),
|
||||
key=lambda r: len(r),
|
||||
)[-1]
|
||||
except (KeyError, IndexError):
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue