1
0
Fork 0
mirror of https://github.com/nathom/streamrip.git synced 2025-05-25 04:24:49 -04:00

Prefer explicit tracks when downloading discography

This commit is contained in:
Nathan Thomas 2025-03-10 09:18:16 -07:00
parent d0dfab82ab
commit 1bf4e682f3
3 changed files with 142 additions and 22 deletions

View file

@ -98,9 +98,10 @@ class Artist(Media):
return list(_albums) return list(_albums)
# Will not fail on any nonempty string # Will not fail on any nonempty string
_essence = re.compile(r"([^\(]+)(?:\s*[\(\[][^\)][\)\]])*") _essence_re = re.compile(r"([^\(\[]+)(?:\s*[\(\[][^\)][\)\]])*")
def _filter_repeats(self, albums: list[Album]) -> list[Album]: @classmethod
def _filter_repeats(cls, albums: list[Album]) -> list[Album]:
"""When there are different versions of an album on the artist, """When there are different versions of an album on the artist,
choose the one with the best quality. choose the one with the best quality.
@ -109,33 +110,35 @@ class Artist(Media):
""" """
groups: dict[str, list[Album]] = {} groups: dict[str, list[Album]] = {}
for a in albums: for a in albums:
match = self._essence.match(a.meta.album) match = cls._essence_re.match(a.meta.album)
assert match is not None assert match is not None
title = match.group(1).strip().lower() title = match.group(1).strip().lower()
items = groups.get(title, []) items = groups.get(title, [])
items.append(a) items.append(a)
groups[title] = items groups[title] = items
ret: list[Album] = [] unique_albums: list[Album] = []
for group in groups.values(): for group in groups.values():
best = None # Move explicit versions to the beginning
max_bd, max_sr = 0, 0 group = sorted(
# assume that highest bd is always with highest sr group,
for album in group: key=lambda album: album.meta.info.explicit,
bd = album.meta.info.bit_depth or 0 reverse=True,
if bd > max_bd: )
max_bd = bd group = sorted(
best = album group,
key=lambda album: album.meta.info.sampling_rate or 0,
reverse=True,
)
group = sorted(
group,
key=lambda album: album.meta.info.bit_depth or 0,
reverse=True,
)
# group guaranteed to be nonempty
unique_albums.append(group[0])
sr = album.meta.info.sampling_rate or 0 return unique_albums
if sr > max_sr:
max_sr = sr
best = album
assert best is not None # true because all g != []
ret.append(best)
return ret
_extra_re = re.compile( _extra_re = re.compile(
r"(?i)(anniversary|deluxe|live|collector|demo|expanded|remix)" r"(?i)(anniversary|deluxe|live|collector|demo|expanded|remix)"

View file

@ -18,7 +18,7 @@ class Summary(ABC):
@classmethod @classmethod
@abstractmethod @abstractmethod
def from_item(cls, item: dict) -> str: def from_item(cls, item: dict) -> "Summary":
pass pass
@abstractmethod @abstractmethod

View file

@ -0,0 +1,117 @@
from typing import Optional
from streamrip.media import Album, Artist
from streamrip.metadata import AlbumInfo, AlbumMetadata
# helper function to create an album with given parameters
def create_album(
title: str,
explicit: bool,
sampling_rate: Optional[float],
bit_depth: Optional[int],
id: str,
) -> Album:
info = AlbumInfo(
id=id,
quality=0,
container="mp3",
explicit=explicit,
sampling_rate=sampling_rate,
bit_depth=bit_depth,
)
metadata = AlbumMetadata(
info=info,
album=title,
albumartist="artist",
year="2020",
genre=["genre"],
covers=None, # type: ignore
tracktotal=10,
)
return Album(meta=metadata, tracks=[], config=None, folder="folder", db=None) # type: ignore
# tests
def test_single_album():
# one album should simply be returned
album = create_album("Test Album", False, 44.1, 16, id="a1")
result = Artist._filter_repeats([album])
assert len(result) == 1
assert result[0] == album
def test_different_titles():
# albums with different titles should not be grouped
album1 = create_album("Test Album", False, 44.1, 16, id="a1")
album2 = create_album("Another Album", True, 96, 24, id="a2")
result = Artist._filter_repeats([album1, album2])
assert len(result) == 2
titles = {a.meta.album.strip().lower() for a in result}
assert "test album" in titles
assert "another album" in titles
def test_same_title_different_bit_depth():
# when bit_depth differs, the album with higher bit_depth wins
album1 = create_album("Test Album", False, 44.1, 16, id="a1")
album2 = create_album("Test Album (Deluxe)", False, 44.1, 24, id="a2")
result = Artist._filter_repeats([album1, album2])
assert len(result) == 1
assert result[0] == album2
def test_same_title_tie_bit_depth_different_sampling():
# same bit_depth; higher sampling_rate should win
album1 = create_album("Test Album", False, 44.1, 24, id="a1")
album2 = create_album("Test Album (Live)", False, 96, 24, id="a2")
result = Artist._filter_repeats([album1, album2])
assert len(result) == 1
assert result[0] == album2
def test_same_title_tie_bit_depth_and_sampling_different_explicit():
# if bit_depth and sampling_rate are tied, explicit true is prioritized
album1 = create_album("Test Album", False, 96, 24, id="a1")
album2 = create_album("Test Album (Edited)", True, 96, 24, id="a2")
result = Artist._filter_repeats([album1, album2])
assert len(result) == 1
assert result[0] == album2
def test_grouping_normalization():
# titles differing only by bracketed parts should be grouped together
album1 = create_album("Album X", False, 44.1, 16, id="a1")
album2 = create_album("Album X (Deluxe)", False, 96, 24, id="a2")
album3 = create_album("Album X [Special Edition]", True, 44.1, 16, id="a3")
result = Artist._filter_repeats([album1, album2, album3])
assert len(result) == 1
# album2 wins due to higher bit_depth and sampling_rate
assert result[0] == album2
def test_multiple_groups():
# multiple groups should yield one winner per group
album_a1 = create_album("Album A", False, 44.1, 16, id="a1")
album_a2 = create_album("Album A (Remastered)", True, 96, 24, id="a2")
album_b1 = create_album("Album B", False, 96, 24, id="b1")
album_b2 = create_album("Album B (Live)", True, 44.1, 16, id="b2")
album_c1 = create_album("Album C", False, None, None, id="c1")
result = Artist._filter_repeats([album_a1, album_a2, album_b1, album_b2, album_c1])
assert len(result) == 3
winners = {a.meta.info.id for a in result}
# expected winners: album a2, album b1, album c1
assert winners == {"a2", "b1", "c1"}
def test_missing_values():
# albums with missing sampling_rate and bit_depth (treated as 0) should be sorted by explicit flag
album1 = create_album("Test Album", False, None, None, id="a1")
album2 = create_album("Test Album", True, None, None, id="a2")
result = Artist._filter_repeats([album1, album2])
assert len(result) == 1
# explicit true wins over false when other quality metrics are equal (or missing)
assert result[0] == album2