mirror of
https://github.com/nathom/streamrip.git
synced 2025-05-25 04:24:49 -04:00
Prefer explicit tracks when downloading discography
This commit is contained in:
parent
d0dfab82ab
commit
1bf4e682f3
3 changed files with 142 additions and 22 deletions
|
@ -98,9 +98,10 @@ class Artist(Media):
|
||||||
return list(_albums)
|
return list(_albums)
|
||||||
|
|
||||||
# Will not fail on any nonempty string
|
# Will not fail on any nonempty string
|
||||||
_essence = re.compile(r"([^\(]+)(?:\s*[\(\[][^\)][\)\]])*")
|
_essence_re = re.compile(r"([^\(\[]+)(?:\s*[\(\[][^\)][\)\]])*")
|
||||||
|
|
||||||
def _filter_repeats(self, albums: list[Album]) -> list[Album]:
|
@classmethod
|
||||||
|
def _filter_repeats(cls, albums: list[Album]) -> list[Album]:
|
||||||
"""When there are different versions of an album on the artist,
|
"""When there are different versions of an album on the artist,
|
||||||
choose the one with the best quality.
|
choose the one with the best quality.
|
||||||
|
|
||||||
|
@ -109,33 +110,35 @@ class Artist(Media):
|
||||||
"""
|
"""
|
||||||
groups: dict[str, list[Album]] = {}
|
groups: dict[str, list[Album]] = {}
|
||||||
for a in albums:
|
for a in albums:
|
||||||
match = self._essence.match(a.meta.album)
|
match = cls._essence_re.match(a.meta.album)
|
||||||
assert match is not None
|
assert match is not None
|
||||||
title = match.group(1).strip().lower()
|
title = match.group(1).strip().lower()
|
||||||
items = groups.get(title, [])
|
items = groups.get(title, [])
|
||||||
items.append(a)
|
items.append(a)
|
||||||
groups[title] = items
|
groups[title] = items
|
||||||
|
|
||||||
ret: list[Album] = []
|
unique_albums: list[Album] = []
|
||||||
for group in groups.values():
|
for group in groups.values():
|
||||||
best = None
|
# Move explicit versions to the beginning
|
||||||
max_bd, max_sr = 0, 0
|
group = sorted(
|
||||||
# assume that highest bd is always with highest sr
|
group,
|
||||||
for album in group:
|
key=lambda album: album.meta.info.explicit,
|
||||||
bd = album.meta.info.bit_depth or 0
|
reverse=True,
|
||||||
if bd > max_bd:
|
)
|
||||||
max_bd = bd
|
group = sorted(
|
||||||
best = album
|
group,
|
||||||
|
key=lambda album: album.meta.info.sampling_rate or 0,
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
group = sorted(
|
||||||
|
group,
|
||||||
|
key=lambda album: album.meta.info.bit_depth or 0,
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
# group guaranteed to be nonempty
|
||||||
|
unique_albums.append(group[0])
|
||||||
|
|
||||||
sr = album.meta.info.sampling_rate or 0
|
return unique_albums
|
||||||
if sr > max_sr:
|
|
||||||
max_sr = sr
|
|
||||||
best = album
|
|
||||||
|
|
||||||
assert best is not None # true because all g != []
|
|
||||||
ret.append(best)
|
|
||||||
|
|
||||||
return ret
|
|
||||||
|
|
||||||
_extra_re = re.compile(
|
_extra_re = re.compile(
|
||||||
r"(?i)(anniversary|deluxe|live|collector|demo|expanded|remix)"
|
r"(?i)(anniversary|deluxe|live|collector|demo|expanded|remix)"
|
||||||
|
|
|
@ -18,7 +18,7 @@ class Summary(ABC):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def from_item(cls, item: dict) -> str:
|
def from_item(cls, item: dict) -> "Summary":
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
|
117
tests/test_discography_filter.py
Normal file
117
tests/test_discography_filter.py
Normal file
|
@ -0,0 +1,117 @@
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from streamrip.media import Album, Artist
|
||||||
|
from streamrip.metadata import AlbumInfo, AlbumMetadata
|
||||||
|
|
||||||
|
# helper function to create an album with given parameters
|
||||||
|
|
||||||
|
|
||||||
|
def create_album(
|
||||||
|
title: str,
|
||||||
|
explicit: bool,
|
||||||
|
sampling_rate: Optional[float],
|
||||||
|
bit_depth: Optional[int],
|
||||||
|
id: str,
|
||||||
|
) -> Album:
|
||||||
|
info = AlbumInfo(
|
||||||
|
id=id,
|
||||||
|
quality=0,
|
||||||
|
container="mp3",
|
||||||
|
explicit=explicit,
|
||||||
|
sampling_rate=sampling_rate,
|
||||||
|
bit_depth=bit_depth,
|
||||||
|
)
|
||||||
|
metadata = AlbumMetadata(
|
||||||
|
info=info,
|
||||||
|
album=title,
|
||||||
|
albumartist="artist",
|
||||||
|
year="2020",
|
||||||
|
genre=["genre"],
|
||||||
|
covers=None, # type: ignore
|
||||||
|
tracktotal=10,
|
||||||
|
)
|
||||||
|
return Album(meta=metadata, tracks=[], config=None, folder="folder", db=None) # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
# tests
|
||||||
|
|
||||||
|
|
||||||
|
def test_single_album():
|
||||||
|
# one album should simply be returned
|
||||||
|
album = create_album("Test Album", False, 44.1, 16, id="a1")
|
||||||
|
result = Artist._filter_repeats([album])
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0] == album
|
||||||
|
|
||||||
|
|
||||||
|
def test_different_titles():
|
||||||
|
# albums with different titles should not be grouped
|
||||||
|
album1 = create_album("Test Album", False, 44.1, 16, id="a1")
|
||||||
|
album2 = create_album("Another Album", True, 96, 24, id="a2")
|
||||||
|
result = Artist._filter_repeats([album1, album2])
|
||||||
|
assert len(result) == 2
|
||||||
|
titles = {a.meta.album.strip().lower() for a in result}
|
||||||
|
assert "test album" in titles
|
||||||
|
assert "another album" in titles
|
||||||
|
|
||||||
|
|
||||||
|
def test_same_title_different_bit_depth():
|
||||||
|
# when bit_depth differs, the album with higher bit_depth wins
|
||||||
|
album1 = create_album("Test Album", False, 44.1, 16, id="a1")
|
||||||
|
album2 = create_album("Test Album (Deluxe)", False, 44.1, 24, id="a2")
|
||||||
|
result = Artist._filter_repeats([album1, album2])
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0] == album2
|
||||||
|
|
||||||
|
|
||||||
|
def test_same_title_tie_bit_depth_different_sampling():
|
||||||
|
# same bit_depth; higher sampling_rate should win
|
||||||
|
album1 = create_album("Test Album", False, 44.1, 24, id="a1")
|
||||||
|
album2 = create_album("Test Album (Live)", False, 96, 24, id="a2")
|
||||||
|
result = Artist._filter_repeats([album1, album2])
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0] == album2
|
||||||
|
|
||||||
|
|
||||||
|
def test_same_title_tie_bit_depth_and_sampling_different_explicit():
|
||||||
|
# if bit_depth and sampling_rate are tied, explicit true is prioritized
|
||||||
|
album1 = create_album("Test Album", False, 96, 24, id="a1")
|
||||||
|
album2 = create_album("Test Album (Edited)", True, 96, 24, id="a2")
|
||||||
|
result = Artist._filter_repeats([album1, album2])
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0] == album2
|
||||||
|
|
||||||
|
|
||||||
|
def test_grouping_normalization():
|
||||||
|
# titles differing only by bracketed parts should be grouped together
|
||||||
|
album1 = create_album("Album X", False, 44.1, 16, id="a1")
|
||||||
|
album2 = create_album("Album X (Deluxe)", False, 96, 24, id="a2")
|
||||||
|
album3 = create_album("Album X [Special Edition]", True, 44.1, 16, id="a3")
|
||||||
|
result = Artist._filter_repeats([album1, album2, album3])
|
||||||
|
assert len(result) == 1
|
||||||
|
# album2 wins due to higher bit_depth and sampling_rate
|
||||||
|
assert result[0] == album2
|
||||||
|
|
||||||
|
|
||||||
|
def test_multiple_groups():
|
||||||
|
# multiple groups should yield one winner per group
|
||||||
|
album_a1 = create_album("Album A", False, 44.1, 16, id="a1")
|
||||||
|
album_a2 = create_album("Album A (Remastered)", True, 96, 24, id="a2")
|
||||||
|
album_b1 = create_album("Album B", False, 96, 24, id="b1")
|
||||||
|
album_b2 = create_album("Album B (Live)", True, 44.1, 16, id="b2")
|
||||||
|
album_c1 = create_album("Album C", False, None, None, id="c1")
|
||||||
|
result = Artist._filter_repeats([album_a1, album_a2, album_b1, album_b2, album_c1])
|
||||||
|
assert len(result) == 3
|
||||||
|
winners = {a.meta.info.id for a in result}
|
||||||
|
# expected winners: album a2, album b1, album c1
|
||||||
|
assert winners == {"a2", "b1", "c1"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_missing_values():
|
||||||
|
# albums with missing sampling_rate and bit_depth (treated as 0) should be sorted by explicit flag
|
||||||
|
album1 = create_album("Test Album", False, None, None, id="a1")
|
||||||
|
album2 = create_album("Test Album", True, None, None, id="a2")
|
||||||
|
result = Artist._filter_repeats([album1, album2])
|
||||||
|
assert len(result) == 1
|
||||||
|
# explicit true wins over false when other quality metrics are equal (or missing)
|
||||||
|
assert result[0] == album2
|
Loading…
Add table
Add a link
Reference in a new issue