mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-05-13 06:34:25 -04:00
add tag cli option
This commit is contained in:
parent
334061f17e
commit
fea0b89dbe
3 changed files with 39 additions and 13 deletions
|
@ -22,6 +22,12 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
||||||
add_help=True,
|
add_help=True,
|
||||||
formatter_class=SmartFormatter,
|
formatter_class=SmartFormatter,
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--tag', '-t',
|
||||||
|
type=str,
|
||||||
|
default='',
|
||||||
|
help="Tag the added URLs with the provided tags e.g. --tag=tag1,tag2,tag3",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--update-all', #'-n',
|
'--update-all', #'-n',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
|
@ -89,6 +95,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
||||||
add(
|
add(
|
||||||
urls=stdin_urls or urls,
|
urls=stdin_urls or urls,
|
||||||
depth=command.depth,
|
depth=command.depth,
|
||||||
|
tag=command.tag,
|
||||||
update_all=command.update_all,
|
update_all=command.update_all,
|
||||||
index_only=command.index_only,
|
index_only=command.index_only,
|
||||||
overwrite=command.overwrite,
|
overwrite=command.overwrite,
|
||||||
|
|
|
@ -33,8 +33,11 @@ class Tag(models.Model):
|
||||||
Based on django-taggit model
|
Based on django-taggit model
|
||||||
"""
|
"""
|
||||||
name = models.CharField(unique=True, blank=False, max_length=100)
|
name = models.CharField(unique=True, blank=False, max_length=100)
|
||||||
|
|
||||||
|
# slug is autoset on save from name, never set it manually
|
||||||
slug = models.SlugField(unique=True, blank=True, max_length=100)
|
slug = models.SlugField(unique=True, blank=True, max_length=100)
|
||||||
|
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
verbose_name = "Tag"
|
verbose_name = "Tag"
|
||||||
verbose_name_plural = "Tags"
|
verbose_name_plural = "Tags"
|
||||||
|
|
|
@ -561,6 +561,7 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def add(urls: Union[str, List[str]],
|
def add(urls: Union[str, List[str]],
|
||||||
|
tag: str='',
|
||||||
depth: int=0,
|
depth: int=0,
|
||||||
update_all: bool=not ONLY_NEW,
|
update_all: bool=not ONLY_NEW,
|
||||||
index_only: bool=False,
|
index_only: bool=False,
|
||||||
|
@ -570,6 +571,8 @@ def add(urls: Union[str, List[str]],
|
||||||
out_dir: Path=OUTPUT_DIR) -> List[Link]:
|
out_dir: Path=OUTPUT_DIR) -> List[Link]:
|
||||||
"""Add a new URL or list of URLs to your archive"""
|
"""Add a new URL or list of URLs to your archive"""
|
||||||
|
|
||||||
|
from core.models import Tag
|
||||||
|
|
||||||
assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
|
assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
|
||||||
|
|
||||||
extractors = extractors.split(",") if extractors else []
|
extractors = extractors.split(",") if extractors else []
|
||||||
|
@ -602,31 +605,44 @@ def add(urls: Union[str, List[str]],
|
||||||
new_links_depth += parse_links_from_source(downloaded_file, root_url=new_link.url)
|
new_links_depth += parse_links_from_source(downloaded_file, root_url=new_link.url)
|
||||||
|
|
||||||
imported_links = list({link.url: link for link in (new_links + new_links_depth)}.values())
|
imported_links = list({link.url: link for link in (new_links + new_links_depth)}.values())
|
||||||
|
|
||||||
new_links = dedupe_links(all_links, imported_links)
|
new_links = dedupe_links(all_links, imported_links)
|
||||||
|
|
||||||
write_main_index(links=new_links, out_dir=out_dir)
|
write_main_index(links=new_links, out_dir=out_dir)
|
||||||
all_links = load_main_index(out_dir=out_dir)
|
all_links = load_main_index(out_dir=out_dir)
|
||||||
|
|
||||||
|
# add any tags to imported links
|
||||||
|
tags = [
|
||||||
|
Tag.objects.get_or_create(name=name.strip())
|
||||||
|
for name in tag.split(',')
|
||||||
|
if name.strip()
|
||||||
|
]
|
||||||
|
if tags:
|
||||||
|
for link in imported_links:
|
||||||
|
link.as_snapshot().tags.add(*tags)
|
||||||
|
|
||||||
|
|
||||||
if index_only:
|
if index_only:
|
||||||
|
# mock archive all the links using the fake index_only extractor method in order to update their state
|
||||||
if overwrite:
|
if overwrite:
|
||||||
archive_links(imported_links, overwrite=overwrite, methods=['index_only'], out_dir=out_dir)
|
archive_links(imported_links, overwrite=overwrite, methods=['index_only'], out_dir=out_dir)
|
||||||
else:
|
else:
|
||||||
archive_links(new_links, overwrite=False, methods=['index_only'], out_dir=out_dir)
|
archive_links(new_links, overwrite=False, methods=['index_only'], out_dir=out_dir)
|
||||||
return all_links
|
else:
|
||||||
|
# fully run the archive extractor methods for each link
|
||||||
|
archive_kwargs = {
|
||||||
|
"out_dir": out_dir,
|
||||||
|
}
|
||||||
|
if extractors:
|
||||||
|
archive_kwargs["methods"] = extractors
|
||||||
|
|
||||||
# Run the archive methods for each link
|
if update_all:
|
||||||
archive_kwargs = {
|
archive_links(all_links, overwrite=overwrite, **archive_kwargs)
|
||||||
"out_dir": out_dir,
|
elif overwrite:
|
||||||
}
|
archive_links(imported_links, overwrite=True, **archive_kwargs)
|
||||||
if extractors:
|
elif new_links:
|
||||||
archive_kwargs["methods"] = extractors
|
archive_links(new_links, overwrite=False, **archive_kwargs)
|
||||||
|
|
||||||
if update_all:
|
|
||||||
archive_links(all_links, overwrite=overwrite, **archive_kwargs)
|
|
||||||
elif overwrite:
|
|
||||||
archive_links(imported_links, overwrite=True, **archive_kwargs)
|
|
||||||
elif new_links:
|
|
||||||
archive_links(new_links, overwrite=False, **archive_kwargs)
|
|
||||||
|
|
||||||
return all_links
|
return all_links
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue