working consistent list and remove with filtering

This commit is contained in:
Nick Sweeting 2019-04-11 07:00:26 -04:00
parent 4ca9a0beac
commit d8d8f7c2cc
6 changed files with 242 additions and 31 deletions

View file

@ -5,12 +5,11 @@ __command__ = 'archivebox list'
__description__ = 'List all the URLs currently in the archive.'
import sys
import json
import argparse
from ..legacy.util import reject_stdin, ExtendedEncoder
from ..legacy.main import list_archive_data, csv_format
from ..legacy.util import reject_stdin, to_json, to_csv
from ..legacy.main import list_archive_data
def main(args=None):
@ -33,16 +32,10 @@ def main(args=None):
action='store_true',
help="Print the output in JSON format with all columns included.",
)
parser.add_argument(
'--filter', #'-f',
type=str,
help="List only URLs matching the given regex pattern.",
default=None,
)
parser.add_argument(
'--sort', #'-s',
type=str,
help="List the links sorted using the given key, e.g. timestamp or updated",
help="List the links sorted using the given key, e.g. timestamp or updated.",
default=None,
)
parser.add_argument(
@ -57,11 +50,26 @@ def main(args=None):
help="List only URLs bookmarked after the given timestamp.",
default=None,
)
parser.add_argument(
'--filter-type',
type=str,
choices=('exact', 'substring', 'domain', 'regex'),
default='exact',
help='Type of pattern matching to use when filtering URLs',
)
parser.add_argument(
'patterns',
nargs='*',
type=str,
default=None,
help='List only URLs matching these filter patterns.'
)
command = parser.parse_args(args)
reject_stdin(__command__)
links = list_archive_data(
filter_regex=command.filter,
filter_patterns=command.patterns,
filter_type=command.filter_type,
before=command.before,
after=command.after,
)
@ -69,10 +77,9 @@ def main(args=None):
links = sorted(links, key=lambda link: getattr(link, command.sort))
if command.csv:
print(command.csv)
print('\n'.join(csv_format(link, command.csv) for link in links))
print(to_csv(links, csv_cols=command.csv.split(','), header=True))
elif command.json:
print(json.dumps(list(links), indent=4, cls=ExtendedEncoder))
print(to_json(links, indent=4, sort_keys=True))
else:
print('\n'.join(link.url for link in links))

View file

@ -0,0 +1,87 @@
#!/usr/bin/env python3
__package__ = 'archivebox.cli'
__command__ = 'archivebox remove'
__description__ = 'Remove the specified URLs from the archive.'
import sys
import argparse
from ..legacy.main import list_archive_data, remove_archive_links
from ..legacy.util import reject_stdin, to_csv, TimedProgress
from ..legacy.config import ANSI
def main(args=None):
args = sys.argv[1:] if args is None else args
parser = argparse.ArgumentParser(
prog=__command__,
description=__description__,
add_help=True,
)
parser.add_argument(
'--yes', # '-y',
action='store_true',
help='Remove links instantly without prompting to confirm.',
)
parser.add_argument(
'--delete', # '-r',
action='store_true',
help=(
"In addition to removing the link from the index, "
"also delete its archived content and metadata folder."
),
)
parser.add_argument(
'--before', #'-b',
type=float,
help="List only URLs bookmarked before the given timestamp.",
default=None,
)
parser.add_argument(
'--after', #'-a',
type=float,
help="List only URLs bookmarked after the given timestamp.",
default=None,
)
parser.add_argument(
'--filter-type',
type=str,
choices=('exact', 'substring', 'domain', 'regex'),
default='exact',
help='Type of pattern matching to use when filtering URLs',
)
parser.add_argument(
'pattern',
nargs='?',
type=str,
default=None,
help='URLs matching this filter pattern will be removed from the index.'
)
command = parser.parse_args(args)
reject_stdin(__command__)
if not sys.stdin.isatty():
stdin_raw_text = sys.stdin.read()
if stdin_raw_text and command.url:
print(
'[X] You should pass either a pattern as an argument, '
'or pass a list of patterns via stdin, but not both.\n'
)
raise SystemExit(1)
patterns = [pattern.strip() for pattern in stdin_raw_text.split('\n')]
else:
patterns = [command.pattern]
remove_archive_links(
filter_patterns=patterns, filter_type=command.filter_type,
before=command.before, after=command.after,
yes=command.yes, delete=command.delete,
)
if __name__ == '__main__':
main()