mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-29 05:55:28 -04:00
fix statemachine create_root_snapshot and retry timing
This commit is contained in:
parent
67c22b2df0
commit
1ec2753664
8 changed files with 48 additions and 40 deletions
|
@ -190,22 +190,28 @@ class Crawl(ABIDModel, ModelWithHealthStats, ModelWithStateMachine):
|
|||
from core.models import ArchiveResult
|
||||
|
||||
snapshot_ids = self.snapshot_set.values_list('id', flat=True)
|
||||
pending_archiveresults = ArchiveResult.objects.filter(snapshot_id__in=snapshot_ids, retry_at__isnull=True)
|
||||
pending_archiveresults = ArchiveResult.objects.filter(snapshot_id__in=snapshot_ids, retry_at__isnull=False)
|
||||
return pending_archiveresults
|
||||
|
||||
def create_root_snapshot(self) -> 'Snapshot':
|
||||
from core.models import Snapshot
|
||||
|
||||
try:
|
||||
return Snapshot.objects.get(crawl=self, url=self.seed.uri)
|
||||
except Snapshot.DoesNotExist:
|
||||
pass
|
||||
|
||||
root_snapshot, _ = Snapshot.objects.update_or_create(
|
||||
crawl=self,
|
||||
url=self.seed.uri,
|
||||
defaults={
|
||||
'crawl': self,
|
||||
'status': Snapshot.INITIAL_STATE,
|
||||
'retry_at': timezone.now(),
|
||||
'timestamp': str(timezone.now().timestamp()),
|
||||
# 'config': self.seed.config,
|
||||
},
|
||||
)
|
||||
root_snapshot.save()
|
||||
return root_snapshot
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue