diff --git a/archivebox/actors/actor.py b/archivebox/actors/actor.py index e31b55f1..b5a82724 100644 --- a/archivebox/actors/actor.py +++ b/archivebox/actors/actor.py @@ -32,7 +32,7 @@ class ActorQueueIsEmpty(Exception): CPU_COUNT = cpu_count() DEFAULT_MAX_TICK_TIME = 60 -DEFAULT_MAX_CONCURRENT_ACTORS = min(max(2, int(CPU_COUNT * 0.6)), 8) # 2 < 60% * num available cpu cores < 8 +DEFAULT_MAX_CONCURRENT_ACTORS = min(max(2, int(CPU_COUNT * 0.6)), 8) # 2 < (60% * num available cpu cores) < 8 limit = lambda n, max: min(n, max) @@ -569,3 +569,4 @@ def compile_sql_update(queryset: QuerySet, update_kwargs: dict[str, Any], filter # e.g. UPDATE core_archiveresult SET status='%s', retry_at='%s' WHERE status NOT IN (%s, %s, %s) AND retry_at <= %s update_sql, update_params = query.get_compiler(queryset.db).as_sql() return update_sql, update_params + diff --git a/archivebox/core/actors.py b/archivebox/core/actors.py index 44b6d31d..18281336 100644 --- a/archivebox/core/actors.py +++ b/archivebox/core/actors.py @@ -10,12 +10,16 @@ from actors.actor import ActorType class SnapshotActor(ActorType[Snapshot]): + """ + The primary actor for progressing Snapshot objects + through their lifecycle using the SnapshotMachine. + """ Model = Snapshot StateMachineClass = SnapshotMachine - ACTIVE_STATE: ClassVar[State] = SnapshotMachine.started - FINAL_STATES: ClassVar[list[State]] = SnapshotMachine.final_states - STATE_FIELD_NAME: ClassVar[str] = SnapshotMachine.state_field_name + ACTIVE_STATE: ClassVar[State] = SnapshotMachine.started # 'started' + FINAL_STATES: ClassVar[list[State]] = SnapshotMachine.final_states # ['sealed'] + STATE_FIELD_NAME: ClassVar[str] = SnapshotMachine.state_field_name # status MAX_CONCURRENT_ACTORS: ClassVar[int] = 3 MAX_TICK_TIME: ClassVar[int] = 10 @@ -24,12 +28,16 @@ class SnapshotActor(ActorType[Snapshot]): class ArchiveResultActor(ActorType[ArchiveResult]): + """ + The primary actor for progressing ArchiveResult objects + through their lifecycle using the ArchiveResultMachine. + """ Model = ArchiveResult StateMachineClass = ArchiveResultMachine - ACTIVE_STATE: ClassVar[State] = ArchiveResultMachine.started - FINAL_STATES: ClassVar[list[State]] = ArchiveResultMachine.final_states - STATE_FIELD_NAME: ClassVar[str] = ArchiveResultMachine.state_field_name + ACTIVE_STATE: ClassVar[State] = ArchiveResultMachine.started # 'started' + FINAL_STATES: ClassVar[list[State]] = ArchiveResultMachine.final_states # ['succeeded', 'failed', 'skipped'] + STATE_FIELD_NAME: ClassVar[str] = ArchiveResultMachine.state_field_name # status MAX_CONCURRENT_ACTORS: ClassVar[int] = 6 MAX_TICK_TIME: ClassVar[int] = 60 diff --git a/archivebox/core/statemachines.py b/archivebox/core/statemachines.py index 9dc2ddb3..88a48b82 100644 --- a/archivebox/core/statemachines.py +++ b/archivebox/core/statemachines.py @@ -11,7 +11,11 @@ from core.models import Snapshot, ArchiveResult class SnapshotMachine(StateMachine, strict_states=True): - """State machine for managing Snapshot lifecycle.""" + """ + State machine for managing Snapshot lifecycle. + + https://github.com/ArchiveBox/ArchiveBox/wiki/ArchiveBox-Architecture-Diagrams + """ model: Snapshot @@ -47,8 +51,13 @@ class SnapshotMachine(StateMachine, strict_states=True): self.snapshot.retry_at = None self.snapshot.save() + class ArchiveResultMachine(StateMachine, strict_states=True): - """State machine for managing ArchiveResult lifecycle.""" + """ + State machine for managing ArchiveResult lifecycle. + + https://github.com/ArchiveBox/ArchiveBox/wiki/ArchiveBox-Architecture-Diagrams + """ model: ArchiveResult diff --git a/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/models.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/models.py index 7e5cd41f..e5f79a99 100644 --- a/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/models.py +++ b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/models.py @@ -1,14 +1,14 @@ -from django.db import models +# from django.db import models -from core.models import ArchiveResult +# from core.models import ArchiveResult -class SinglefileResultManager(models.Manager): - def get_queryset(self): - return super().get_queryset().filter(extractor='singlefile') +# class SinglefileResultManager(models.Manager): +# def get_queryset(self): +# return super().get_queryset().filter(extractor='singlefile') -class SinglefileResult(ArchiveResult): - objects = SinglefileResultManager() +# class SinglefileResult(ArchiveResult): +# objects = SinglefileResultManager() - class Meta: - proxy = True +# class Meta: +# proxy = True