#!/usr/bin/env python3 __package__ = 'archivebox.cli' __command__ = 'archivebox remove' import shutil from pathlib import Path from typing import Iterable import rich_click as click from django.db.models import QuerySet from archivebox.config import DATA_DIR from archivebox.index.schema import Link from archivebox.config.django import setup_django from archivebox.index import load_main_index from archivebox.index.sql import remove_from_sql_main_index from archivebox.misc.util import enforce_types, docstring from archivebox.misc.checks import check_data_folder from archivebox.misc.logging_util import ( log_list_started, log_list_finished, log_removal_started, log_removal_finished, TimedProgress, ) @enforce_types def remove(filter_patterns: Iterable[str]=(), filter_type: str='exact', snapshots: QuerySet | None=None, after: float | None=None, before: float | None=None, yes: bool=False, delete: bool=False, out_dir: Path=DATA_DIR) -> Iterable[Link]: """Remove the specified URLs from the archive""" setup_django() check_data_folder() from archivebox.cli.archivebox_search import list_links list_kwargs = { "filter_patterns": filter_patterns, "filter_type": filter_type, "after": after, "before": before, } if snapshots: list_kwargs["snapshots"] = snapshots log_list_started(filter_patterns, filter_type) timer = TimedProgress(360, prefix=' ') try: snapshots = list_links(**list_kwargs) finally: timer.end() if not snapshots.exists(): log_removal_finished(0, 0) raise SystemExit(1) log_links = [link.as_link() for link in snapshots] log_list_finished(log_links) log_removal_started(log_links, yes=yes, delete=delete) timer = TimedProgress(360, prefix=' ') try: for snapshot in snapshots: if delete: shutil.rmtree(snapshot.as_link().link_dir, ignore_errors=True) finally: timer.end() to_remove = snapshots.count() from archivebox.search import flush_search_index flush_search_index(snapshots=snapshots) remove_from_sql_main_index(snapshots=snapshots, out_dir=out_dir) all_snapshots = load_main_index(out_dir=out_dir) log_removal_finished(all_snapshots.count(), to_remove) return all_snapshots @click.command() @click.option('--yes', is_flag=True, help='Remove links instantly without prompting to confirm') @click.option('--delete', is_flag=True, help='Delete the archived content and metadata folder in addition to removing from index') @click.option('--before', type=float, help='Remove only URLs bookmarked before timestamp') @click.option('--after', type=float, help='Remove only URLs bookmarked after timestamp') @click.option('--filter-type', '-f', type=click.Choice(('exact', 'substring', 'domain', 'regex', 'tag')), default='exact', help='Type of pattern matching to use when filtering URLs') @click.argument('filter_patterns', nargs=-1) @docstring(remove.__doc__) def main(**kwargs): """Remove the specified URLs from the archive""" remove(**kwargs) if __name__ == '__main__': main()