mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-17 00:24:26 -04:00
make created_by_id autoapply to any ArchiveResults created under Snapshot
Some checks failed
Build GitHub Pages website / build (push) Has been cancelled
Run linters / lint (push) Has been cancelled
Build Debian package / build (push) Has been cancelled
Build Docker image / buildx (push) Has been cancelled
Build Homebrew package / build (push) Has been cancelled
CodeQL / Analyze (python) (push) Has been cancelled
Build Pip package / build (push) Has been cancelled
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Has been cancelled
Run tests / docker_tests (push) Has been cancelled
Build GitHub Pages website / deploy (push) Has been cancelled
Some checks failed
Build GitHub Pages website / build (push) Has been cancelled
Run linters / lint (push) Has been cancelled
Build Debian package / build (push) Has been cancelled
Build Docker image / buildx (push) Has been cancelled
Build Homebrew package / build (push) Has been cancelled
CodeQL / Analyze (python) (push) Has been cancelled
Build Pip package / build (push) Has been cancelled
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Has been cancelled
Run tests / docker_tests (push) Has been cancelled
Build GitHub Pages website / deploy (push) Has been cancelled
This commit is contained in:
parent
c30ae1d2cb
commit
9b1659c72f
4 changed files with 13 additions and 12 deletions
|
@ -532,7 +532,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
|
||||||
|
|
||||||
@admin.register(Tag, site=archivebox_admin)
|
@admin.register(Tag, site=archivebox_admin)
|
||||||
class TagAdmin(ABIDModelAdmin):
|
class TagAdmin(ABIDModelAdmin):
|
||||||
list_display = ('abid', 'name', 'created', 'created_by', 'num_snapshots', 'snapshots')
|
list_display = ('created', 'created_by', 'abid', 'name', 'num_snapshots', 'snapshots')
|
||||||
sort_fields = ('name', 'slug', 'abid', 'created_by', 'created')
|
sort_fields = ('name', 'slug', 'abid', 'created_by', 'created')
|
||||||
readonly_fields = ('slug', 'abid', 'created', 'modified', 'API', 'num_snapshots', 'snapshots')
|
readonly_fields = ('slug', 'abid', 'created', 'modified', 'API', 'num_snapshots', 'snapshots')
|
||||||
search_fields = ('abid', 'name', 'slug')
|
search_fields = ('abid', 'name', 'slug')
|
||||||
|
|
|
@ -107,7 +107,7 @@ def ignore_methods(to_ignore: List[str]) -> Iterable[str]:
|
||||||
return [x[0] for x in ARCHIVE_METHODS if x[0] not in to_ignore]
|
return [x[0] for x in ARCHIVE_METHODS if x[0] not in to_ignore]
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[Path]=None) -> Link:
|
def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[Path]=None, created_by_id: int | None=None) -> Link:
|
||||||
"""download the DOM, PDF, and a screenshot into a folder named after the link's timestamp"""
|
"""download the DOM, PDF, and a screenshot into a folder named after the link's timestamp"""
|
||||||
|
|
||||||
# TODO: Remove when the input is changed to be a snapshot. Suboptimal approach.
|
# TODO: Remove when the input is changed to be a snapshot. Suboptimal approach.
|
||||||
|
@ -115,7 +115,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
|
||||||
try:
|
try:
|
||||||
snapshot = Snapshot.objects.get(url=link.url) # TODO: This will be unnecessary once everything is a snapshot
|
snapshot = Snapshot.objects.get(url=link.url) # TODO: This will be unnecessary once everything is a snapshot
|
||||||
except Snapshot.DoesNotExist:
|
except Snapshot.DoesNotExist:
|
||||||
snapshot = write_link_to_sql_index(link)
|
snapshot = write_link_to_sql_index(link, created_by_id=created_by_id)
|
||||||
|
|
||||||
active_methods = get_archive_methods_for_link(link)
|
active_methods = get_archive_methods_for_link(link)
|
||||||
|
|
||||||
|
@ -154,7 +154,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
|
||||||
log_archive_method_finished(result)
|
log_archive_method_finished(result)
|
||||||
write_search_index(link=link, texts=result.index_texts)
|
write_search_index(link=link, texts=result.index_texts)
|
||||||
ArchiveResult.objects.create(snapshot=snapshot, extractor=method_name, cmd=result.cmd, cmd_version=result.cmd_version,
|
ArchiveResult.objects.create(snapshot=snapshot, extractor=method_name, cmd=result.cmd, cmd_version=result.cmd_version,
|
||||||
output=result.output, pwd=result.pwd, start_ts=result.start_ts, end_ts=result.end_ts, status=result.status)
|
output=result.output, pwd=result.pwd, start_ts=result.start_ts, end_ts=result.end_ts, status=result.status, created_by_id=snapshot.created_by_id)
|
||||||
|
|
||||||
|
|
||||||
# bump the updated time on the main Snapshot here, this is critical
|
# bump the updated time on the main Snapshot here, this is critical
|
||||||
|
@ -213,7 +213,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
|
||||||
return link
|
return link
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def archive_links(all_links: Union[Iterable[Link], QuerySet], overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[Path]=None) -> List[Link]:
|
def archive_links(all_links: Union[Iterable[Link], QuerySet], overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[Path]=None, created_by_id: int | None=None) -> List[Link]:
|
||||||
|
|
||||||
if type(all_links) is QuerySet:
|
if type(all_links) is QuerySet:
|
||||||
num_links: int = all_links.count()
|
num_links: int = all_links.count()
|
||||||
|
@ -232,7 +232,7 @@ def archive_links(all_links: Union[Iterable[Link], QuerySet], overwrite: bool=Fa
|
||||||
for link in all_links:
|
for link in all_links:
|
||||||
idx += 1
|
idx += 1
|
||||||
to_archive = get_link(link)
|
to_archive = get_link(link)
|
||||||
archive_link(to_archive, overwrite=overwrite, methods=methods, out_dir=Path(link.link_dir))
|
archive_link(to_archive, overwrite=overwrite, methods=methods, out_dir=Path(link.link_dir), created_by_id=created_by_id)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
log_archiving_paused(num_links, idx, link.timestamp)
|
log_archiving_paused(num_links, idx, link.timestamp)
|
||||||
raise SystemExit(0)
|
raise SystemExit(0)
|
||||||
|
|
|
@ -70,7 +70,7 @@ def write_link_to_sql_index(link: Link, created_by_id: int | None=None):
|
||||||
'cmd_version': entry.get('cmd_version') or 'unknown',
|
'cmd_version': entry.get('cmd_version') or 'unknown',
|
||||||
'pwd': entry['pwd'],
|
'pwd': entry['pwd'],
|
||||||
'status': entry['status'],
|
'status': entry['status'],
|
||||||
'created_by_id': created_by_id,
|
'created_by_id': snapshot.created_by_id,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
@ -85,7 +85,7 @@ def write_link_to_sql_index(link: Link, created_by_id: int | None=None):
|
||||||
'cmd_version': entry.cmd_version or 'unknown',
|
'cmd_version': entry.cmd_version or 'unknown',
|
||||||
'pwd': entry.pwd,
|
'pwd': entry.pwd,
|
||||||
'status': entry.status,
|
'status': entry.status,
|
||||||
'created_by_id': created_by_id,
|
'created_by_id': snapshot.created_by_id,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -566,7 +566,7 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
|
def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR, created_by_id: int | None=None) -> List[Link]:
|
||||||
"""
|
"""
|
||||||
Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
|
Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
|
||||||
You can run this to archive single pages without needing to create a whole collection with archivebox init.
|
You can run this to archive single pages without needing to create a whole collection with archivebox init.
|
||||||
|
@ -580,7 +580,7 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
|
||||||
raise SystemExit(2)
|
raise SystemExit(2)
|
||||||
|
|
||||||
methods = extractors.split(",") if extractors else ignore_methods(['title'])
|
methods = extractors.split(",") if extractors else ignore_methods(['title'])
|
||||||
archive_link(oneshot_link[0], out_dir=out_dir, methods=methods)
|
archive_link(oneshot_link[0], out_dir=out_dir, methods=methods, created_by_id=created_by_id)
|
||||||
return oneshot_link
|
return oneshot_link
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
|
@ -659,13 +659,14 @@ def add(urls: Union[str, List[str]],
|
||||||
if index_only:
|
if index_only:
|
||||||
# mock archive all the links using the fake index_only extractor method in order to update their state
|
# mock archive all the links using the fake index_only extractor method in order to update their state
|
||||||
if overwrite:
|
if overwrite:
|
||||||
archive_links(imported_links, overwrite=overwrite, methods=['index_only'], out_dir=out_dir)
|
archive_links(imported_links, overwrite=overwrite, methods=['index_only'], out_dir=out_dir, created_by_id=created_by_id)
|
||||||
else:
|
else:
|
||||||
archive_links(new_links, overwrite=False, methods=['index_only'], out_dir=out_dir)
|
archive_links(new_links, overwrite=False, methods=['index_only'], out_dir=out_dir, created_by_id=created_by_id)
|
||||||
else:
|
else:
|
||||||
# fully run the archive extractor methods for each link
|
# fully run the archive extractor methods for each link
|
||||||
archive_kwargs = {
|
archive_kwargs = {
|
||||||
"out_dir": out_dir,
|
"out_dir": out_dir,
|
||||||
|
"created_by_id": created_by_id,
|
||||||
}
|
}
|
||||||
if extractors:
|
if extractors:
|
||||||
archive_kwargs["methods"] = extractors
|
archive_kwargs["methods"] = extractors
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue