mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-28 13:44:14 -04:00
feat: add search filter-type to list command
This commit is contained in:
parent
fb67d6684c
commit
0f7dba07df
2 changed files with 34 additions and 2 deletions
|
@ -98,7 +98,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--filter-type',
|
'--filter-type',
|
||||||
type=str,
|
type=str,
|
||||||
choices=('exact', 'substring', 'domain', 'regex','tag'),
|
choices=('exact', 'substring', 'domain', 'regex', 'tag', 'search'),
|
||||||
default='exact',
|
default='exact',
|
||||||
help='Type of pattern matching to use when filtering URLs',
|
help='Type of pattern matching to use when filtering URLs',
|
||||||
)
|
)
|
||||||
|
|
|
@ -51,6 +51,8 @@ from .sql import (
|
||||||
write_sql_link_details,
|
write_sql_link_details,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from ..search import search_backend_enabled, query_search_index
|
||||||
|
|
||||||
### Link filtering and checking
|
### Link filtering and checking
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
|
@ -365,7 +367,7 @@ LINK_FILTERS = {
|
||||||
}
|
}
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='exact') -> QuerySet:
|
def q_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='exact') -> QuerySet:
|
||||||
q_filter = Q()
|
q_filter = Q()
|
||||||
for pattern in filter_patterns:
|
for pattern in filter_patterns:
|
||||||
try:
|
try:
|
||||||
|
@ -380,6 +382,36 @@ def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type
|
||||||
raise SystemExit(2)
|
raise SystemExit(2)
|
||||||
return snapshots.filter(q_filter)
|
return snapshots.filter(q_filter)
|
||||||
|
|
||||||
|
def search_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='search') -> QuerySet:
|
||||||
|
if not search_backend_enabled():
|
||||||
|
stderr()
|
||||||
|
stderr(
|
||||||
|
'[X] The search backend is not enabled',
|
||||||
|
color='red',
|
||||||
|
)
|
||||||
|
raise SystemExit(2)
|
||||||
|
|
||||||
|
qsearch = get_empty_snapshot_queryset()
|
||||||
|
for pattern in filter_patterns:
|
||||||
|
try:
|
||||||
|
qsearch |= query_search_index(pattern)
|
||||||
|
except Exception as err:
|
||||||
|
stderr()
|
||||||
|
stderr(
|
||||||
|
f'[X] The search backend threw an exception={err}:',
|
||||||
|
color='red',
|
||||||
|
)
|
||||||
|
raise SystemExit(2)
|
||||||
|
|
||||||
|
return snapshots & qsearch
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
|
def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='exact') -> QuerySet:
|
||||||
|
if filter_type != 'search':
|
||||||
|
return q_filter(snapshots, filter_patterns, filter_type)
|
||||||
|
else:
|
||||||
|
return search_filter(snapshots, filter_patterns, filter_type)
|
||||||
|
|
||||||
|
|
||||||
def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||||
"""indexed links without checking archive status or data directory validity"""
|
"""indexed links without checking archive status or data directory validity"""
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue