mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-16 16:14:28 -04:00
refactor: oneshot command is functional
This commit is contained in:
parent
973f8b6abc
commit
c51d789ad4
5 changed files with 13 additions and 10 deletions
|
@ -14,7 +14,7 @@ from ..index.schema import Link
|
||||||
from ..config import CONFIG
|
from ..config import CONFIG
|
||||||
|
|
||||||
#EXTRACTORS = [(extractor[0], extractor[0]) for extractor in get_default_archive_methods()]
|
#EXTRACTORS = [(extractor[0], extractor[0]) for extractor in get_default_archive_methods()]
|
||||||
EXTRACTORS = ["title", "wget"]
|
EXTRACTORS = [("title", "title"), ("wget", "wget")]
|
||||||
STATUS_CHOICES = [
|
STATUS_CHOICES = [
|
||||||
("succeeded", "succeeded"),
|
("succeeded", "succeeded"),
|
||||||
("failed", "failed"),
|
("failed", "failed"),
|
||||||
|
|
|
@ -41,6 +41,7 @@ class MainIndex(View):
|
||||||
|
|
||||||
|
|
||||||
class LinkDetails(View):
|
class LinkDetails(View):
|
||||||
|
|
||||||
def get(self, request, path):
|
def get(self, request, path):
|
||||||
# missing trailing slash -> redirect to index
|
# missing trailing slash -> redirect to index
|
||||||
if '/' not in path:
|
if '/' not in path:
|
||||||
|
|
|
@ -61,7 +61,7 @@ def write_sql_snapshot_details(snapshot: Model, out_dir: Path=OUTPUT_DIR) -> Non
|
||||||
try:
|
try:
|
||||||
snap = Snapshot.objects.get(url=snapshot.url)
|
snap = Snapshot.objects.get(url=snapshot.url)
|
||||||
except Snapshot.DoesNotExist:
|
except Snapshot.DoesNotExist:
|
||||||
snap = write_snapshot_to_sql_index(snapshot)
|
snap = write_snapshot_to_index(snapshot)
|
||||||
snap.title = snapshot.title
|
snap.title = snapshot.title
|
||||||
|
|
||||||
# TODO: If there are actual tags, this will break
|
# TODO: If there are actual tags, this will break
|
||||||
|
|
|
@ -22,7 +22,7 @@ from .cli import (
|
||||||
from .parsers import (
|
from .parsers import (
|
||||||
save_text_as_source,
|
save_text_as_source,
|
||||||
save_file_as_source,
|
save_file_as_source,
|
||||||
parse_links_memory,
|
parse_snapshots_memory,
|
||||||
)
|
)
|
||||||
from .index.schema import Link
|
from .index.schema import Link
|
||||||
from .util import enforce_types # type: ignore
|
from .util import enforce_types # type: ignore
|
||||||
|
@ -516,8 +516,8 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
|
||||||
Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
|
Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
|
||||||
You can run this to archive single pages without needing to create a whole collection with archivebox init.
|
You can run this to archive single pages without needing to create a whole collection with archivebox init.
|
||||||
"""
|
"""
|
||||||
oneshot_link, _ = parse_links_memory([url])
|
oneshot_snapshots, _ = parse_snapshots_memory([url])
|
||||||
if len(oneshot_link) > 1:
|
if len(oneshot_snapshots) > 1:
|
||||||
stderr(
|
stderr(
|
||||||
'[X] You should pass a single url to the oneshot command',
|
'[X] You should pass a single url to the oneshot command',
|
||||||
color='red'
|
color='red'
|
||||||
|
@ -525,8 +525,10 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
|
||||||
raise SystemExit(2)
|
raise SystemExit(2)
|
||||||
|
|
||||||
methods = extractors.split(",") if extractors else ignore_methods(['title'])
|
methods = extractors.split(",") if extractors else ignore_methods(['title'])
|
||||||
archive_link(oneshot_link[0], out_dir=out_dir, methods=methods)
|
snapshot = oneshot_snapshots[0]
|
||||||
return oneshot_link
|
snapshot.save() # Oneshot uses an in-memory database, so this is safe
|
||||||
|
archive_snapshot(snapshot, out_dir=out_dir, methods=methods)
|
||||||
|
return snapshot
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def add(urls: Union[str, List[str]],
|
def add(urls: Union[str, List[str]],
|
||||||
|
|
|
@ -66,7 +66,7 @@ PARSERS = (
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def parse_links_memory(urls: List[str], root_url: Optional[str]=None):
|
def parse_snapshots_memory(urls: List[str], root_url: Optional[str]=None):
|
||||||
"""
|
"""
|
||||||
parse a list of URLS without touching the filesystem
|
parse a list of URLS without touching the filesystem
|
||||||
"""
|
"""
|
||||||
|
@ -77,12 +77,12 @@ def parse_links_memory(urls: List[str], root_url: Optional[str]=None):
|
||||||
file = StringIO()
|
file = StringIO()
|
||||||
file.writelines(urls)
|
file.writelines(urls)
|
||||||
file.name = "io_string"
|
file.name = "io_string"
|
||||||
links, parser = run_parser_functions(file, timer, root_url=root_url)
|
snapshots, parser = run_parser_functions(file, timer, root_url=root_url)
|
||||||
timer.end()
|
timer.end()
|
||||||
|
|
||||||
if parser is None:
|
if parser is None:
|
||||||
return [], 'Failed to parse'
|
return [], 'Failed to parse'
|
||||||
return links, parser
|
return snapshots, parser
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue