Added TAG_SEPARATORS option to supply a regex of characters to use when splitting tags

This commit is contained in:
hannah98 2021-12-30 20:19:48 +00:00
parent 745f6573c1
commit 049f88def9
3 changed files with 10 additions and 3 deletions

View file

@ -1,5 +1,7 @@
__package__ = 'archivebox.index'
import re
from io import StringIO
from pathlib import Path
from typing import List, Tuple, Iterator
@ -8,7 +10,10 @@ from django.db import transaction
from .schema import Link
from ..util import enforce_types, parse_date
from ..config import OUTPUT_DIR
from ..config import (
OUTPUT_DIR,
TAG_SEPARATORS,
)
### Main Links Index
@ -35,7 +40,7 @@ def write_link_to_sql_index(link: Link):
info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys}
tag_list = list(dict.fromkeys(
tag.strip() for tag in (link.tags or '').split(',')
tag.strip() for tag in re.split(TAG_SEPARATORS, link.tags or '')
))
info.pop('tags')
@ -107,7 +112,7 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR) -> None:
snap.title = link.title
tag_list = list(dict.fromkeys(
tag.strip() for tag in (link.tags or '').split(',')
tag.strip() for tag in re.split(TAG_SEPARATORS, link.tags or '')
))
snap.save()