make created_by_id autoapply to any ArchiveResults created under Snapshot
Some checks failed
Build GitHub Pages website / build (push) Has been cancelled
Run linters / lint (push) Has been cancelled
Build Debian package / build (push) Has been cancelled
Build Docker image / buildx (push) Has been cancelled
Build Homebrew package / build (push) Has been cancelled
CodeQL / Analyze (python) (push) Has been cancelled
Build Pip package / build (push) Has been cancelled
Run tests / python_tests (ubuntu-22.04, 3.11) (push) Has been cancelled
Run tests / docker_tests (push) Has been cancelled
Build GitHub Pages website / deploy (push) Has been cancelled

This commit is contained in:
Nick Sweeting 2024-08-20 19:43:07 -07:00
parent c30ae1d2cb
commit 9b1659c72f
No known key found for this signature in database
4 changed files with 13 additions and 12 deletions

View file

@ -566,7 +566,7 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
@enforce_types
def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR, created_by_id: int | None=None) -> List[Link]:
"""
Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
You can run this to archive single pages without needing to create a whole collection with archivebox init.
@ -580,7 +580,7 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
raise SystemExit(2)
methods = extractors.split(",") if extractors else ignore_methods(['title'])
archive_link(oneshot_link[0], out_dir=out_dir, methods=methods)
archive_link(oneshot_link[0], out_dir=out_dir, methods=methods, created_by_id=created_by_id)
return oneshot_link
@enforce_types
@ -659,13 +659,14 @@ def add(urls: Union[str, List[str]],
if index_only:
# mock archive all the links using the fake index_only extractor method in order to update their state
if overwrite:
archive_links(imported_links, overwrite=overwrite, methods=['index_only'], out_dir=out_dir)
archive_links(imported_links, overwrite=overwrite, methods=['index_only'], out_dir=out_dir, created_by_id=created_by_id)
else:
archive_links(new_links, overwrite=False, methods=['index_only'], out_dir=out_dir)
archive_links(new_links, overwrite=False, methods=['index_only'], out_dir=out_dir, created_by_id=created_by_id)
else:
# fully run the archive extractor methods for each link
archive_kwargs = {
"out_dir": out_dir,
"created_by_id": created_by_id,
}
if extractors:
archive_kwargs["methods"] = extractors