mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-17 00:24:26 -04:00
fix: History is now correctly being generated from ArchiveResult. Removed old divergent logic.
This commit is contained in:
parent
d5cabe05af
commit
075d310a9f
2 changed files with 32 additions and 4 deletions
|
@ -4,6 +4,7 @@ import uuid
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, Optional, List
|
from typing import Dict, Optional, List
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
from django.db import models, transaction
|
from django.db import models, transaction
|
||||||
from django.utils.functional import cached_property
|
from django.utils.functional import cached_property
|
||||||
|
@ -107,7 +108,33 @@ class Snapshot(models.Model):
|
||||||
info.pop("tags")
|
info.pop("tags")
|
||||||
return cls(**info)
|
return cls(**info)
|
||||||
|
|
||||||
|
def get_history(self) -> dict:
|
||||||
|
"""
|
||||||
|
Generates the history dictionary out of the stored ArchiveResults
|
||||||
|
"""
|
||||||
|
history_list = self.archiveresult_set.all()
|
||||||
|
history = defaultdict(list)
|
||||||
|
for history_item in history_list:
|
||||||
|
history[history_item.extractor].append(
|
||||||
|
{
|
||||||
|
"cmd": history_item.cmd,
|
||||||
|
"cmd_version": history_item.cmd_version,
|
||||||
|
"end_ts": history_item.end_ts.isoformat(),
|
||||||
|
"start_ts": history_item.start_ts.isoformat(),
|
||||||
|
"pwd": history_item.pwd,
|
||||||
|
"output": history_item.output,
|
||||||
|
"schema": "ArchiveResult",
|
||||||
|
"status": history_item.status
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return dict(history)
|
||||||
|
|
||||||
def as_json(self, *args) -> dict:
|
def as_json(self, *args) -> dict:
|
||||||
|
"""
|
||||||
|
Returns the snapshot in json format.
|
||||||
|
id is converted to str
|
||||||
|
history is extracted from ArchiveResult
|
||||||
|
"""
|
||||||
args = args or self.keys
|
args = args or self.keys
|
||||||
output = {
|
output = {
|
||||||
key: getattr(self, key)
|
key: getattr(self, key)
|
||||||
|
@ -116,6 +143,8 @@ class Snapshot(models.Model):
|
||||||
}
|
}
|
||||||
if "id" in output.keys():
|
if "id" in output.keys():
|
||||||
output["id"] = str(output["id"])
|
output["id"] = str(output["id"])
|
||||||
|
|
||||||
|
output["history"] = self.get_history()
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -88,7 +88,8 @@ def archive_snapshot(snapshot: Model, overwrite: bool=False, methods: Optional[I
|
||||||
details = {"history": {}}
|
details = {"history": {}}
|
||||||
write_snapshot_details(snapshot, out_dir=out_dir, skip_sql_index=False)
|
write_snapshot_details(snapshot, out_dir=out_dir, skip_sql_index=False)
|
||||||
else:
|
else:
|
||||||
details = snapshot.details
|
details = snapshot.details #TODO: This can be retrieved from the sqlite database too.
|
||||||
|
# If that makes more sense, it can be easily changed.
|
||||||
|
|
||||||
#log_link_archiving_started(link, out_dir, is_new)
|
#log_link_archiving_started(link, out_dir, is_new)
|
||||||
stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}
|
stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}
|
||||||
|
@ -103,8 +104,6 @@ def archive_snapshot(snapshot: Model, overwrite: bool=False, methods: Optional[I
|
||||||
|
|
||||||
result = method_function(snapshot=snapshot, out_dir=out_dir)
|
result = method_function(snapshot=snapshot, out_dir=out_dir)
|
||||||
|
|
||||||
details["history"][method_name].append(result)
|
|
||||||
|
|
||||||
stats[result.status] += 1
|
stats[result.status] += 1
|
||||||
log_archive_method_finished(result)
|
log_archive_method_finished(result)
|
||||||
write_search_index(snapshot=snapshot, texts=result.index_texts)
|
write_search_index(snapshot=snapshot, texts=result.index_texts)
|
||||||
|
@ -135,7 +134,7 @@ def archive_snapshot(snapshot: Model, overwrite: bool=False, methods: Optional[I
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
try:
|
try:
|
||||||
write_snapshot_details(snapshot, out_dir=link.link_dir)
|
write_snapshot_details(snapshot, out_dir=snapshot.snapshot_dir)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
raise
|
raise
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue