mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-17 16:44:26 -04:00
improve latest title logic to take longest title
This commit is contained in:
parent
ad4bd49163
commit
f1823381d0
1 changed files with 21 additions and 8 deletions
|
@ -170,18 +170,31 @@ class Snapshot(models.Model):
|
||||||
@cached_property
|
@cached_property
|
||||||
def latest_title(self):
|
def latest_title(self):
|
||||||
if self.title:
|
if self.title:
|
||||||
return self.title
|
return self.title # whoopdedoo that was easy
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return self.archiveresult_set.filter(extractor='title', status='succeeded')[0].output
|
# take longest successful title from ArchiveResult db history
|
||||||
except ArchiveResult.DoesNotExist:
|
return sorted(
|
||||||
|
self.archiveresult_set\
|
||||||
|
.filter(extractor='title', status='succeeded', output__isnull=False)\
|
||||||
|
.values_list('output', flat=True),
|
||||||
|
key=lambda r: len(r),
|
||||||
|
)[-1]
|
||||||
|
except IndexError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if ('title' in self.history
|
try:
|
||||||
and self.history['title']
|
# take longest successful title from Link json index file history
|
||||||
and (self.history['title'][-1].status == 'succeeded')
|
return sorted(
|
||||||
and self.history['title'][-1].output.strip()):
|
(
|
||||||
return self.history['title'][-1].output.strip()
|
result.output.strip()
|
||||||
|
for result in self.history['title']
|
||||||
|
if result.status == 'succeeded' and result.output.strip()
|
||||||
|
),
|
||||||
|
key=lambda r: len(r),
|
||||||
|
)[-1]
|
||||||
|
except (KeyError, IndexError):
|
||||||
|
pass
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue