mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 14:44:29 -04:00
refactor: Remove skip_index
from archive related functions
This commit is contained in:
parent
9745a5ac56
commit
275ad22db7
3 changed files with 16 additions and 16 deletions
|
@ -19,7 +19,7 @@ meta_cmds = ('help', 'version')
|
||||||
main_cmds = ('init', 'info', 'config')
|
main_cmds = ('init', 'info', 'config')
|
||||||
archive_cmds = ('add', 'remove', 'update', 'list', 'status')
|
archive_cmds = ('add', 'remove', 'update', 'list', 'status')
|
||||||
|
|
||||||
fake_db = ("oneshot",) + meta_cmds
|
fake_db = ("oneshot",)
|
||||||
|
|
||||||
display_first = (*meta_cmds, *main_cmds, *archive_cmds)
|
display_first = (*meta_cmds, *main_cmds, *archive_cmds)
|
||||||
|
|
||||||
|
@ -60,6 +60,8 @@ def run_subcommand(subcommand: str,
|
||||||
stdin: Optional[IO]=None,
|
stdin: Optional[IO]=None,
|
||||||
pwd: Union[Path, str, None]=None) -> None:
|
pwd: Union[Path, str, None]=None) -> None:
|
||||||
"""Run a given ArchiveBox subcommand with the given list of args"""
|
"""Run a given ArchiveBox subcommand with the given list of args"""
|
||||||
|
|
||||||
|
if subcommand not in meta_cmds:
|
||||||
from ..config import setup_django
|
from ..config import setup_django
|
||||||
setup_django(in_memory_db=subcommand in fake_db)
|
setup_django(in_memory_db=subcommand in fake_db)
|
||||||
|
|
||||||
|
|
|
@ -67,11 +67,10 @@ def ignore_methods(to_ignore: List[str]):
|
||||||
return list(methods)
|
return list(methods)
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[Path]=None, skip_index: bool=False) -> Link:
|
def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[Path]=None) -> Link:
|
||||||
"""download the DOM, PDF, and a screenshot into a folder named after the link's timestamp"""
|
"""download the DOM, PDF, and a screenshot into a folder named after the link's timestamp"""
|
||||||
|
|
||||||
# TODO: Remove when the input is changed to be a snapshot. Suboptimal approach.
|
# TODO: Remove when the input is changed to be a snapshot. Suboptimal approach.
|
||||||
if not skip_index:
|
|
||||||
from core.models import Snapshot, ArchiveResult
|
from core.models import Snapshot, ArchiveResult
|
||||||
try:
|
try:
|
||||||
snapshot = Snapshot.objects.get(url=link.url) # TODO: This will be unnecessary once everything is a snapshot
|
snapshot = Snapshot.objects.get(url=link.url) # TODO: This will be unnecessary once everything is a snapshot
|
||||||
|
@ -93,7 +92,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
|
||||||
os.makedirs(out_dir)
|
os.makedirs(out_dir)
|
||||||
|
|
||||||
link = load_link_details(link, out_dir=out_dir)
|
link = load_link_details(link, out_dir=out_dir)
|
||||||
write_link_details(link, out_dir=out_dir, skip_sql_index=skip_index)
|
write_link_details(link, out_dir=out_dir, skip_sql_index=False)
|
||||||
log_link_archiving_started(link, out_dir, is_new)
|
log_link_archiving_started(link, out_dir, is_new)
|
||||||
link = link.overwrite(updated=datetime.now())
|
link = link.overwrite(updated=datetime.now())
|
||||||
stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}
|
stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}
|
||||||
|
@ -112,7 +111,6 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
|
||||||
|
|
||||||
stats[result.status] += 1
|
stats[result.status] += 1
|
||||||
log_archive_method_finished(result)
|
log_archive_method_finished(result)
|
||||||
if not skip_index:
|
|
||||||
write_search_index(link=link, texts=result.index_texts)
|
write_search_index(link=link, texts=result.index_texts)
|
||||||
ArchiveResult.objects.create(snapshot=snapshot, extractor=method_name, cmd=result.cmd, cmd_version=result.cmd_version,
|
ArchiveResult.objects.create(snapshot=snapshot, extractor=method_name, cmd=result.cmd, cmd_version=result.cmd_version,
|
||||||
output=result.output, pwd=result.pwd, start_ts=result.start_ts, end_ts=result.end_ts, status=result.status)
|
output=result.output, pwd=result.pwd, start_ts=result.start_ts, end_ts=result.end_ts, status=result.status)
|
||||||
|
@ -135,7 +133,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
write_link_details(link, out_dir=out_dir, skip_sql_index=skip_index)
|
write_link_details(link, out_dir=out_dir, skip_sql_index=False)
|
||||||
|
|
||||||
log_link_archiving_finished(link, link.link_dir, is_new, stats)
|
log_link_archiving_finished(link, link.link_dir, is_new, stats)
|
||||||
|
|
||||||
|
|
|
@ -524,7 +524,7 @@ def oneshot(url: str, out_dir: Path=OUTPUT_DIR):
|
||||||
)
|
)
|
||||||
raise SystemExit(2)
|
raise SystemExit(2)
|
||||||
methods = ignore_methods(['title'])
|
methods = ignore_methods(['title'])
|
||||||
archive_link(oneshot_link[0], out_dir=out_dir, methods=methods, skip_index=False)
|
archive_link(oneshot_link[0], out_dir=out_dir, methods=methods)
|
||||||
return oneshot_link
|
return oneshot_link
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue