From fe3320eff00dcc41cdc63ac40ad17f8b2b6f86ba Mon Sep 17 00:00:00 2001
From: Nick Sweeting <github@sweeting.me>
Date: Tue, 19 Nov 2024 05:07:12 -0800
Subject: [PATCH] restore missing archivebox_remove work

---
 archivebox/cli/archivebox_remove.py | 141 ++++++++--------------------
 1 file changed, 40 insertions(+), 101 deletions(-)

diff --git a/archivebox/cli/archivebox_remove.py b/archivebox/cli/archivebox_remove.py
index 317dc792..0f03d686 100644
--- a/archivebox/cli/archivebox_remove.py
+++ b/archivebox/cli/archivebox_remove.py
@@ -3,53 +3,45 @@
 __package__ = 'archivebox.cli'
 __command__ = 'archivebox remove'
 
-import sys
-import argparse
+import shutil
 from pathlib import Path
-from typing import Optional, List, IO
+from typing import Iterable
+
+import rich_click as click
 
 from django.db.models import QuerySet
 
-from archivebox.misc.util import docstring
 from archivebox.config import DATA_DIR
-from archivebox.misc.logging_util import SmartFormatter, accept_stdin
 from archivebox.index.schema import Link
+from archivebox.config.django import setup_django
+from archivebox.index import load_main_index
+from archivebox.index.sql import remove_from_sql_main_index
+from archivebox.misc.util import enforce_types, docstring
+from archivebox.misc.checks import check_data_folder
+from archivebox.misc.logging_util import (
+    log_list_started,
+    log_list_finished,
+    log_removal_started,
+    log_removal_finished,
+    TimedProgress,
+)
 
 
-def remove(filter_str: Optional[str]=None,
-           filter_patterns: Optional[list[str]]=None,
-           filter_type: str='exact',
-           snapshots: Optional[QuerySet]=None,
-           after: Optional[float]=None,
-           before: Optional[float]=None,
-           yes: bool=False,
-           delete: bool=False,
-           out_dir: Path=DATA_DIR) -> list[Link]:
+@enforce_types
+def remove(filter_patterns: Iterable[str]=(),
+          filter_type: str='exact',
+          snapshots: QuerySet | None=None,
+          after: float | None=None,
+          before: float | None=None,
+          yes: bool=False,
+          delete: bool=False,
+          out_dir: Path=DATA_DIR) -> Iterable[Link]:
     """Remove the specified URLs from the archive"""
     
+    setup_django()
     check_data_folder()
-
-    if snapshots is None:
-        if filter_str and filter_patterns:
-            stderr(
-                '[X] You should pass either a pattern as an argument, '
-                'or pass a list of patterns via stdin, but not both.\n',
-                color='red',
-            )
-            raise SystemExit(2)
-        elif not (filter_str or filter_patterns):
-            stderr(
-                '[X] You should pass either a pattern as an argument, '
-                'or pass a list of patterns via stdin.',
-                color='red',
-            )
-            stderr()
-            hint(('To remove all urls you can run:',
-                'archivebox remove --filter-type=regex ".*"'))
-            stderr()
-            raise SystemExit(2)
-        elif filter_str:
-            filter_patterns = [ptn.strip() for ptn in filter_str.split('\n')]
+    
+    from archivebox.cli.archivebox_search import list_links
 
     list_kwargs = {
         "filter_patterns": filter_patterns,
@@ -67,12 +59,10 @@ def remove(filter_str: Optional[str]=None,
     finally:
         timer.end()
 
-
     if not snapshots.exists():
         log_removal_finished(0, 0)
         raise SystemExit(1)
 
-
     log_links = [link.as_link() for link in snapshots]
     log_list_finished(log_links)
     log_removal_started(log_links, yes=yes, delete=delete)
@@ -87,7 +77,7 @@ def remove(filter_str: Optional[str]=None,
 
     to_remove = snapshots.count()
 
-    from .search import flush_search_index
+    from archivebox.search import flush_search_index
 
     flush_search_index(snapshots=snapshots)
     remove_from_sql_main_index(snapshots=snapshots, out_dir=out_dir)
@@ -97,69 +87,18 @@ def remove(filter_str: Optional[str]=None,
     return all_snapshots
 
 
+@click.command()
+@click.option('--yes', is_flag=True, help='Remove links instantly without prompting to confirm')
+@click.option('--delete', is_flag=True, help='Delete the archived content and metadata folder in addition to removing from index')
+@click.option('--before', type=float, help='Remove only URLs bookmarked before timestamp')
+@click.option('--after', type=float, help='Remove only URLs bookmarked after timestamp')
+@click.option('--filter-type', '-f', type=click.Choice(('exact', 'substring', 'domain', 'regex', 'tag')), default='exact', help='Type of pattern matching to use when filtering URLs')
+@click.argument('filter_patterns', nargs=-1)
 @docstring(remove.__doc__)
-def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
-    parser = argparse.ArgumentParser(
-        prog=__command__,
-        description=remove.__doc__,
-        add_help=True,
-        formatter_class=SmartFormatter,
-    )
-    parser.add_argument(
-        '--yes', # '-y',
-        action='store_true',
-        help='Remove links instantly without prompting to confirm.',
-    )
-    parser.add_argument(
-        '--delete', # '-r',
-        action='store_true',
-        help=(
-            "In addition to removing the link from the index, "
-            "also delete its archived content and metadata folder."
-        ),
-    )
-    parser.add_argument(
-        '--before', #'-b',
-        type=float,
-        help="List only URLs bookmarked before the given timestamp.",
-        default=None,
-    )
-    parser.add_argument(
-        '--after', #'-a',
-        type=float,
-        help="List only URLs bookmarked after the given timestamp.",
-        default=None,
-    )
-    parser.add_argument(
-        '--filter-type',
-        type=str,
-        choices=('exact', 'substring', 'domain', 'regex','tag'),
-        default='exact',
-        help='Type of pattern matching to use when filtering URLs',
-    )
-    parser.add_argument(
-        'filter_patterns',
-        nargs='*',
-        type=str,
-        help='URLs matching this filter pattern will be removed from the index.'
-    )
-    command = parser.parse_args(args or ())
-    
-    filter_str = None
-    if not command.filter_patterns:
-        filter_str = accept_stdin(stdin)
+def main(**kwargs):
+    """Remove the specified URLs from the archive"""
+    remove(**kwargs)
 
-    remove(
-        filter_str=filter_str,
-        filter_patterns=command.filter_patterns,
-        filter_type=command.filter_type,
-        before=command.before,
-        after=command.after,
-        yes=command.yes,
-        delete=command.delete,
-        out_dir=Path(pwd) if pwd else DATA_DIR,
-    )
-    
 
 if __name__ == '__main__':
-    main(args=sys.argv[1:], stdin=sys.stdin)
+    main()