mirror of
https://github.com/nathom/streamrip.git
synced 2025-05-09 14:11:55 -04:00
Prefer explicit tracks when downloading discography
This commit is contained in:
parent
d0dfab82ab
commit
1bf4e682f3
3 changed files with 142 additions and 22 deletions
|
@ -98,9 +98,10 @@ class Artist(Media):
|
|||
return list(_albums)
|
||||
|
||||
# Will not fail on any nonempty string
|
||||
_essence = re.compile(r"([^\(]+)(?:\s*[\(\[][^\)][\)\]])*")
|
||||
_essence_re = re.compile(r"([^\(\[]+)(?:\s*[\(\[][^\)][\)\]])*")
|
||||
|
||||
def _filter_repeats(self, albums: list[Album]) -> list[Album]:
|
||||
@classmethod
|
||||
def _filter_repeats(cls, albums: list[Album]) -> list[Album]:
|
||||
"""When there are different versions of an album on the artist,
|
||||
choose the one with the best quality.
|
||||
|
||||
|
@ -109,33 +110,35 @@ class Artist(Media):
|
|||
"""
|
||||
groups: dict[str, list[Album]] = {}
|
||||
for a in albums:
|
||||
match = self._essence.match(a.meta.album)
|
||||
match = cls._essence_re.match(a.meta.album)
|
||||
assert match is not None
|
||||
title = match.group(1).strip().lower()
|
||||
items = groups.get(title, [])
|
||||
items.append(a)
|
||||
groups[title] = items
|
||||
|
||||
ret: list[Album] = []
|
||||
unique_albums: list[Album] = []
|
||||
for group in groups.values():
|
||||
best = None
|
||||
max_bd, max_sr = 0, 0
|
||||
# assume that highest bd is always with highest sr
|
||||
for album in group:
|
||||
bd = album.meta.info.bit_depth or 0
|
||||
if bd > max_bd:
|
||||
max_bd = bd
|
||||
best = album
|
||||
# Move explicit versions to the beginning
|
||||
group = sorted(
|
||||
group,
|
||||
key=lambda album: album.meta.info.explicit,
|
||||
reverse=True,
|
||||
)
|
||||
group = sorted(
|
||||
group,
|
||||
key=lambda album: album.meta.info.sampling_rate or 0,
|
||||
reverse=True,
|
||||
)
|
||||
group = sorted(
|
||||
group,
|
||||
key=lambda album: album.meta.info.bit_depth or 0,
|
||||
reverse=True,
|
||||
)
|
||||
# group guaranteed to be nonempty
|
||||
unique_albums.append(group[0])
|
||||
|
||||
sr = album.meta.info.sampling_rate or 0
|
||||
if sr > max_sr:
|
||||
max_sr = sr
|
||||
best = album
|
||||
|
||||
assert best is not None # true because all g != []
|
||||
ret.append(best)
|
||||
|
||||
return ret
|
||||
return unique_albums
|
||||
|
||||
_extra_re = re.compile(
|
||||
r"(?i)(anniversary|deluxe|live|collector|demo|expanded|remix)"
|
||||
|
|
|
@ -18,7 +18,7 @@ class Summary(ABC):
|
|||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def from_item(cls, item: dict) -> str:
|
||||
def from_item(cls, item: dict) -> "Summary":
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
|
|
117
tests/test_discography_filter.py
Normal file
117
tests/test_discography_filter.py
Normal file
|
@ -0,0 +1,117 @@
|
|||
from typing import Optional
|
||||
|
||||
from streamrip.media import Album, Artist
|
||||
from streamrip.metadata import AlbumInfo, AlbumMetadata
|
||||
|
||||
# helper function to create an album with given parameters
|
||||
|
||||
|
||||
def create_album(
|
||||
title: str,
|
||||
explicit: bool,
|
||||
sampling_rate: Optional[float],
|
||||
bit_depth: Optional[int],
|
||||
id: str,
|
||||
) -> Album:
|
||||
info = AlbumInfo(
|
||||
id=id,
|
||||
quality=0,
|
||||
container="mp3",
|
||||
explicit=explicit,
|
||||
sampling_rate=sampling_rate,
|
||||
bit_depth=bit_depth,
|
||||
)
|
||||
metadata = AlbumMetadata(
|
||||
info=info,
|
||||
album=title,
|
||||
albumartist="artist",
|
||||
year="2020",
|
||||
genre=["genre"],
|
||||
covers=None, # type: ignore
|
||||
tracktotal=10,
|
||||
)
|
||||
return Album(meta=metadata, tracks=[], config=None, folder="folder", db=None) # type: ignore
|
||||
|
||||
|
||||
# tests
|
||||
|
||||
|
||||
def test_single_album():
|
||||
# one album should simply be returned
|
||||
album = create_album("Test Album", False, 44.1, 16, id="a1")
|
||||
result = Artist._filter_repeats([album])
|
||||
assert len(result) == 1
|
||||
assert result[0] == album
|
||||
|
||||
|
||||
def test_different_titles():
|
||||
# albums with different titles should not be grouped
|
||||
album1 = create_album("Test Album", False, 44.1, 16, id="a1")
|
||||
album2 = create_album("Another Album", True, 96, 24, id="a2")
|
||||
result = Artist._filter_repeats([album1, album2])
|
||||
assert len(result) == 2
|
||||
titles = {a.meta.album.strip().lower() for a in result}
|
||||
assert "test album" in titles
|
||||
assert "another album" in titles
|
||||
|
||||
|
||||
def test_same_title_different_bit_depth():
|
||||
# when bit_depth differs, the album with higher bit_depth wins
|
||||
album1 = create_album("Test Album", False, 44.1, 16, id="a1")
|
||||
album2 = create_album("Test Album (Deluxe)", False, 44.1, 24, id="a2")
|
||||
result = Artist._filter_repeats([album1, album2])
|
||||
assert len(result) == 1
|
||||
assert result[0] == album2
|
||||
|
||||
|
||||
def test_same_title_tie_bit_depth_different_sampling():
|
||||
# same bit_depth; higher sampling_rate should win
|
||||
album1 = create_album("Test Album", False, 44.1, 24, id="a1")
|
||||
album2 = create_album("Test Album (Live)", False, 96, 24, id="a2")
|
||||
result = Artist._filter_repeats([album1, album2])
|
||||
assert len(result) == 1
|
||||
assert result[0] == album2
|
||||
|
||||
|
||||
def test_same_title_tie_bit_depth_and_sampling_different_explicit():
|
||||
# if bit_depth and sampling_rate are tied, explicit true is prioritized
|
||||
album1 = create_album("Test Album", False, 96, 24, id="a1")
|
||||
album2 = create_album("Test Album (Edited)", True, 96, 24, id="a2")
|
||||
result = Artist._filter_repeats([album1, album2])
|
||||
assert len(result) == 1
|
||||
assert result[0] == album2
|
||||
|
||||
|
||||
def test_grouping_normalization():
|
||||
# titles differing only by bracketed parts should be grouped together
|
||||
album1 = create_album("Album X", False, 44.1, 16, id="a1")
|
||||
album2 = create_album("Album X (Deluxe)", False, 96, 24, id="a2")
|
||||
album3 = create_album("Album X [Special Edition]", True, 44.1, 16, id="a3")
|
||||
result = Artist._filter_repeats([album1, album2, album3])
|
||||
assert len(result) == 1
|
||||
# album2 wins due to higher bit_depth and sampling_rate
|
||||
assert result[0] == album2
|
||||
|
||||
|
||||
def test_multiple_groups():
|
||||
# multiple groups should yield one winner per group
|
||||
album_a1 = create_album("Album A", False, 44.1, 16, id="a1")
|
||||
album_a2 = create_album("Album A (Remastered)", True, 96, 24, id="a2")
|
||||
album_b1 = create_album("Album B", False, 96, 24, id="b1")
|
||||
album_b2 = create_album("Album B (Live)", True, 44.1, 16, id="b2")
|
||||
album_c1 = create_album("Album C", False, None, None, id="c1")
|
||||
result = Artist._filter_repeats([album_a1, album_a2, album_b1, album_b2, album_c1])
|
||||
assert len(result) == 3
|
||||
winners = {a.meta.info.id for a in result}
|
||||
# expected winners: album a2, album b1, album c1
|
||||
assert winners == {"a2", "b1", "c1"}
|
||||
|
||||
|
||||
def test_missing_values():
|
||||
# albums with missing sampling_rate and bit_depth (treated as 0) should be sorted by explicit flag
|
||||
album1 = create_album("Test Album", False, None, None, id="a1")
|
||||
album2 = create_album("Test Album", True, None, None, id="a2")
|
||||
result = Artist._filter_repeats([album1, album2])
|
||||
assert len(result) == 1
|
||||
# explicit true wins over false when other quality metrics are equal (or missing)
|
||||
assert result[0] == album2
|
Loading…
Add table
Add a link
Reference in a new issue