From e06e017ec96055dc746994301f66a87b8dd69ba6 Mon Sep 17 00:00:00 2001 From: Nathan Thomas Date: Tue, 17 Aug 2021 10:36:41 -0700 Subject: [PATCH] Add metadata.exclude list to config --- rip/core.py | 91 ++++++++++++++++++++++--------- streamrip/media.py | 121 ++++++++++++++++++++++++++++++++---------- streamrip/metadata.py | 68 ++++++++++++++++++------ 3 files changed, 209 insertions(+), 71 deletions(-) diff --git a/rip/core.py b/rip/core.py index b9ca3cf..9bc59d9 100644 --- a/rip/core.py +++ b/rip/core.py @@ -111,14 +111,18 @@ class RipCore(list): else: self.config = config - if (theme := self.config.file["theme"]["progress_bar"]) != TQDM_DEFAULT_THEME: + if ( + theme := self.config.file["theme"]["progress_bar"] + ) != TQDM_DEFAULT_THEME: set_progress_bar_theme(theme.lower()) def get_db(db_type: str) -> db.Database: db_settings = self.config.session["database"] db_class = db.CLASS_MAP[db_type] - if db_settings[db_type]["enabled"] and db_settings.get("enabled", True): + if db_settings[db_type]["enabled"] and db_settings.get( + "enabled", True + ): default_db_path = DB_PATH_MAP[db_type] path = db_settings[db_type]["path"] @@ -212,8 +216,9 @@ class RipCore(list): session = self.config.session logger.debug(session) # So that the dictionary isn't searched for the same keys multiple times - artwork, conversion, filepaths = tuple( - session[key] for key in ("artwork", "conversion", "filepaths") + artwork, conversion, filepaths, metadata = ( + session[key] + for key in ("artwork", "conversion", "filepaths", "metadata") ) concurrency = session["downloads"]["concurrency"] return { @@ -223,12 +228,12 @@ class RipCore(list): "embed_cover": artwork["embed"], "embed_cover_size": artwork["size"], "keep_hires_cover": artwork["keep_hires_cover"], - "set_playlist_to_album": session["metadata"]["set_playlist_to_album"], + "set_playlist_to_album": metadata["set_playlist_to_album"], "stay_temp": conversion["enabled"], "conversion": conversion, "concurrent_downloads": concurrency["enabled"], "max_connections": concurrency["max_connections"], - "new_tracknumbers": session["metadata"]["new_playlist_tracknumbers"], + "new_tracknumbers": metadata["new_playlist_tracknumbers"], "download_videos": session["tidal"]["download_videos"], "download_booklets": session["qobuz"]["download_booklets"], "download_youtube_videos": session["youtube"]["download_videos"], @@ -238,6 +243,7 @@ class RipCore(list): "add_singles_to_folder": filepaths["add_singles_to_folder"], "max_artwork_width": int(artwork["max_width"]), "max_artwork_height": int(artwork["max_height"]), + "exclude_tags": metadata["exclude"], } def repair(self, max_items=None): @@ -257,7 +263,9 @@ class RipCore(list): ) exit() - for counter, (source, media_type, item_id) in enumerate(self.failed_db): + for counter, (source, media_type, item_id) in enumerate( + self.failed_db + ): if counter >= max_items: break @@ -280,7 +288,9 @@ class RipCore(list): logger.debug("Arguments from config: %s", arguments) - source_subdirs = self.config.session["downloads"]["source_subdirectories"] + source_subdirs = self.config.session["downloads"][ + "source_subdirectories" + ] for item in self: # Item already checked in database in handle_urls if source_subdirs: @@ -292,20 +302,26 @@ class RipCore(list): item.download(**arguments) continue - arguments["quality"] = self.config.session[item.client.source]["quality"] + arguments["quality"] = self.config.session[item.client.source][ + "quality" + ] if isinstance(item, Artist): filters_ = tuple( k for k, v in self.config.session["filters"].items() if v ) arguments["filters"] = filters_ - logger.debug("Added filter argument for artist/label: %s", filters_) + logger.debug( + "Added filter argument for artist/label: %s", filters_ + ) if not isinstance(item, Tracklist) or not item.loaded: logger.debug("Loading metadata") try: item.load_meta(**arguments) except NonStreamable: - self.failed_db.add((item.client.source, item.type, item.id)) + self.failed_db.add( + (item.client.source, item.type, item.id) + ) secho(f"{item!s} is not available, skipping.", fg="red") continue @@ -332,7 +348,7 @@ class RipCore(list): self.db.add(str(item_id)) if isinstance(item, Track): - item.tag() + item.tag(exclude_tags=arguments["exclude_tags"]) if arguments["conversion"]["enabled"]: item.convert(**arguments["conversion"]) @@ -342,7 +358,9 @@ class RipCore(list): :param featured_list: The name of the list. See `rip discover --help`. :type featured_list: str """ - self.extend(self.search("qobuz", featured_list, "featured", limit=max_items)) + self.extend( + self.search("qobuz", featured_list, "featured", limit=max_items) + ) def get_client(self, source: str) -> Client: """Get a client given the source and log in. @@ -448,12 +466,15 @@ class RipCore(list): fg="yellow", ) parsed.extend( - ("deezer", *extract_deezer_dynamic_link(url)) for url in dynamic_urls + ("deezer", *extract_deezer_dynamic_link(url)) + for url in dynamic_urls ) parsed.extend(URL_REGEX.findall(url)) # Qobuz, Tidal, Dezer soundcloud_urls = SOUNDCLOUD_URL_REGEX.findall(url) - soundcloud_items = [self.clients["soundcloud"].get(u) for u in soundcloud_urls] + soundcloud_items = [ + self.clients["soundcloud"].get(u) for u in soundcloud_urls + ] parsed.extend( ("soundcloud", item["kind"], url) @@ -484,11 +505,15 @@ class RipCore(list): # For testing: # https://www.last.fm/user/nathan3895/playlists/12058911 - user_regex = re.compile(r"https://www\.last\.fm/user/([^/]+)/playlists/\d+") + user_regex = re.compile( + r"https://www\.last\.fm/user/([^/]+)/playlists/\d+" + ) lastfm_urls = LASTFM_URL_REGEX.findall(urls) try: lastfm_source = self.config.session["lastfm"]["source"] - lastfm_fallback_source = self.config.session["lastfm"]["fallback_source"] + lastfm_fallback_source = self.config.session["lastfm"][ + "fallback_source" + ] except KeyError: self._config_updating_message() self.config.update() @@ -522,12 +547,16 @@ class RipCore(list): ) query_is_clean = banned_words_plain.search(query) is None - search_results = self.search(source, query, media_type="track") + search_results = self.search( + source, query, media_type="track" + ) track = next(search_results) if query_is_clean: while banned_words.search(track["title"]) is not None: - logger.debug("Track title banned for query=%s", query) + logger.debug( + "Track title banned for query=%s", query + ) track = next(search_results) # Because the track is searched as a single we need to set @@ -537,7 +566,9 @@ class RipCore(list): except (NoResultsFound, StopIteration): return None - track = try_search(lastfm_source) or try_search(lastfm_fallback_source) + track = try_search(lastfm_source) or try_search( + lastfm_fallback_source + ) if track is None: return False @@ -561,7 +592,9 @@ class RipCore(list): pl.creator = creator_match.group(1) tracks_not_found = 0 - with concurrent.futures.ThreadPoolExecutor(max_workers=15) as executor: + with concurrent.futures.ThreadPoolExecutor( + max_workers=15 + ) as executor: futures = [ executor.submit(search_query, title, artist, pl) for title, artist in queries @@ -639,7 +672,9 @@ class RipCore(list): else: logger.debug("Not generator") items = ( - results.get("data") or results.get("items") or results.get("collection") + results.get("data") + or results.get("items") + or results.get("collection") ) if items is None: raise NoResultsFound(query) @@ -679,7 +714,9 @@ class RipCore(list): raise NotImplementedError fields = (fname for _, fname, _, _ in Formatter().parse(fmt) if fname) - ret = fmt.format(**{k: media.get(k, default="Unknown") for k in fields}) + ret = fmt.format( + **{k: media.get(k, default="Unknown") for k in fields} + ) return ret def interactive_search( @@ -817,7 +854,9 @@ class RipCore(list): playlist_title = html.unescape(playlist_title_match.group(1)) if remaining_tracks > 0: - with concurrent.futures.ThreadPoolExecutor(max_workers=15) as executor: + with concurrent.futures.ThreadPoolExecutor( + max_workers=15 + ) as executor: last_page = int(remaining_tracks // 50) + int( remaining_tracks % 50 != 0 ) @@ -872,7 +911,9 @@ class RipCore(list): fg="blue", ) - self.config.file["deezer"]["arl"] = input(style("ARL: ", fg="green")) + self.config.file["deezer"]["arl"] = input( + style("ARL: ", fg="green") + ) self.config.save() secho( f'Credentials saved to config file at "{self.config._path}"', diff --git a/streamrip/media.py b/streamrip/media.py index 5f09b2e..109f278 100644 --- a/streamrip/media.py +++ b/streamrip/media.py @@ -13,7 +13,17 @@ import re import shutil import subprocess from tempfile import gettempdir -from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple, Union +from typing import ( + Any, + Dict, + Generator, + Iterable, + List, + Optional, + Sequence, + Tuple, + Union, +) from click import echo, secho, style from mutagen.flac import FLAC, Picture @@ -56,7 +66,9 @@ logger = logging.getLogger("streamrip") TYPE_REGEXES = { "remaster": re.compile(r"(?i)(re)?master(ed)?"), - "extra": re.compile(r"(?i)(anniversary|deluxe|live|collector|demo|expanded)"), + "extra": re.compile( + r"(?i)(anniversary|deluxe|live|collector|demo|expanded)" + ), } @@ -198,12 +210,15 @@ class Track(Media): if source == "qobuz": self.cover_url = self.resp["album"]["image"]["large"] elif source == "tidal": - self.cover_url = tidal_cover_url(self.resp["album"]["cover"], 320) + self.cover_url = tidal_cover_url( + self.resp["album"]["cover"], 320 + ) elif source == "deezer": self.cover_url = self.resp["album"]["cover_medium"] elif source == "soundcloud": self.cover_url = ( - self.resp["artwork_url"] or self.resp["user"].get("avatar_url") + self.resp["artwork_url"] + or self.resp["user"].get("avatar_url") ).replace("large", "t500x500") else: raise InvalidSourceError(source) @@ -251,7 +266,9 @@ class Track(Media): except ItemExists as e: logger.debug(e) - self.path = os.path.join(gettempdir(), f"{hash(self.id)}_{self.quality}.tmp") + self.path = os.path.join( + gettempdir(), f"{hash(self.id)}_{self.quality}.tmp" + ) def download( # noqa self, @@ -306,9 +323,14 @@ class Track(Media): except KeyError as e: if restrictions := dl_info["restrictions"]: # Turn CamelCase code into a readable sentence - words = re.findall(r"([A-Z][a-z]+)", restrictions[0]["code"]) + words = re.findall( + r"([A-Z][a-z]+)", restrictions[0]["code"] + ) raise NonStreamable( - words[0] + " " + " ".join(map(str.lower, words[1:])) + "." + words[0] + + " " + + " ".join(map(str.lower, words[1:])) + + "." ) secho(f"Panic: {e} dl_info = {dl_info}", fg="red") @@ -317,7 +339,9 @@ class Track(Media): _quick_download(download_url, self.path, desc=self._progress_desc) elif isinstance(self.client, DeezloaderClient): - _quick_download(dl_info["url"], self.path, desc=self._progress_desc) + _quick_download( + dl_info["url"], self.path, desc=self._progress_desc + ) elif self.client.source == "deezer": # We can only find out if the requested quality is available @@ -437,7 +461,9 @@ class Track(Media): ] ) elif dl_info["type"] == "original": - _quick_download(dl_info["url"], self.path, desc=self._progress_desc) + _quick_download( + dl_info["url"], self.path, desc=self._progress_desc + ) # if a wav is returned, convert to flac engine = converter.FLAC(self.path) @@ -465,7 +491,9 @@ class Track(Media): def download_cover(self, width=999999, height=999999): """Download the cover art, if cover_url is given.""" - self.cover_path = os.path.join(gettempdir(), f"cover{hash(self.cover_url)}.jpg") + self.cover_path = os.path.join( + gettempdir(), f"cover{hash(self.cover_url)}.jpg" + ) logger.debug("Downloading cover from %s", self.cover_url) if not os.path.exists(self.cover_path): @@ -485,9 +513,9 @@ class Track(Media): formatter = self.meta.get_formatter(max_quality=self.quality) logger.debug("Track meta formatter %s", formatter) filename = clean_format(self.file_format, formatter) - self.final_path = os.path.join(self.folder, filename)[:250].strip() + ext( - self.quality, self.client.source - ) + self.final_path = os.path.join(self.folder, filename)[ + :250 + ].strip() + ext(self.quality, self.client.source) logger.debug("Formatted path: %s", self.final_path) @@ -500,7 +528,9 @@ class Track(Media): return self.final_path @classmethod - def from_album_meta(cls, album: TrackMetadata, track: dict, client: Client): + def from_album_meta( + cls, album: TrackMetadata, track: dict, client: Client + ): """Return a new Track object initialized with info. :param album: album metadata returned by API @@ -510,7 +540,9 @@ class Track(Media): :raises: IndexError """ meta = TrackMetadata(album=album, track=track, source=client.source) - return cls(client=client, meta=meta, id=track["id"], part_of_tracklist=True) + return cls( + client=client, meta=meta, id=track["id"], part_of_tracklist=True + ) @classmethod def from_api(cls, item: dict, client: Client): @@ -554,6 +586,7 @@ class Track(Media): album_meta: dict = None, cover: Union[Picture, APIC, MP4Cover] = None, embed_cover: bool = True, + exclude_tags: Optional[Sequence] = None, **kwargs, ): """Tag the track using the stored metadata. @@ -569,7 +602,9 @@ class Track(Media): :param embed_cover: Embed cover art into file :type embed_cover: bool """ - assert isinstance(self.meta, TrackMetadata), "meta must be TrackMetadata" + assert isinstance( + self.meta, TrackMetadata + ), "meta must be TrackMetadata" if not self.downloaded: logger.info( "Track %s not tagged because it was not downloaded", @@ -620,7 +655,10 @@ class Track(Media): raise InvalidQuality(f'Invalid quality: "{self.quality}"') # automatically generate key, value pairs based on container - tags = self.meta.tags(self.container) + tags = self.meta.tags( + self.container, + set(exclude_tags) if exclude_tags is not None else None, + ) for k, v in tags: logger.debug("Setting %s tag to %s", k, v) audio[k] = v @@ -690,7 +728,9 @@ class Track(Media): self.format_final_path() if not os.path.isfile(self.path): - logger.info("File %s does not exist. Skipping conversion.", self.path) + logger.info( + "File %s does not exist. Skipping conversion.", self.path + ) secho(f"{self!s} does not exist. Skipping conversion.", fg="red") return @@ -1093,7 +1133,8 @@ class Tracklist(list): kwargs.get("max_connections", 3) ) as executor: future_map = { - executor.submit(target, item, **kwargs): item for item in self + executor.submit(target, item, **kwargs): item + for item in self } try: concurrent.futures.wait(future_map.keys()) @@ -1124,7 +1165,9 @@ class Tracklist(list): secho(f"{item!s} exists. Skipping.", fg="yellow") except NonStreamable as e: e.print(item) - failed_downloads.append((item.client.source, item.type, item.id)) + failed_downloads.append( + (item.client.source, item.type, item.id) + ) self.downloaded = True @@ -1453,7 +1496,9 @@ class Album(Tracklist, Media): _cover_download(embed_cover_url, cover_path) hires_cov_path = os.path.join(self.folder, "cover.jpg") - if kwargs.get("keep_hires_cover", True) and not os.path.exists(hires_cov_path): + if kwargs.get("keep_hires_cover", True) and not os.path.exists( + hires_cov_path + ): logger.debug("Downloading hires cover") _cover_download(self.cover_urls["original"], hires_cov_path) @@ -1507,7 +1552,9 @@ class Album(Tracklist, Media): and isinstance(item, Track) and kwargs.get("folder_format") ): - disc_folder = os.path.join(self.folder, f"Disc {item.meta.discnumber}") + disc_folder = os.path.join( + self.folder, f"Disc {item.meta.discnumber}" + ) kwargs["parent_folder"] = disc_folder else: kwargs["parent_folder"] = self.folder @@ -1522,6 +1569,7 @@ class Album(Tracklist, Media): item.tag( cover=self.cover_obj, embed_cover=kwargs.get("embed_cover", True), + exclude_tags=kwargs.get("exclude_tags"), ) self.downloaded_ids.add(item.id) @@ -1601,7 +1649,9 @@ class Album(Tracklist, Media): :rtype: str """ - formatted_folder = clean_format(self.folder_format, self._get_formatter()) + formatted_folder = clean_format( + self.folder_format, self._get_formatter() + ) return os.path.join(parent_folder, formatted_folder) @@ -1719,7 +1769,9 @@ class Playlist(Tracklist, Media): if self.client.source == "qobuz": self.name = self.meta["name"] self.image = self.meta["images"] - self.creator = safe_get(self.meta, "owner", "name", default="Qobuz") + self.creator = safe_get( + self.meta, "owner", "name", default="Qobuz" + ) tracklist = self.meta["tracks"]["items"] @@ -1729,7 +1781,9 @@ class Playlist(Tracklist, Media): elif self.client.source == "tidal": self.name = self.meta["title"] self.image = tidal_cover_url(self.meta["image"], 640) - self.creator = safe_get(self.meta, "creator", "name", default="TIDAL") + self.creator = safe_get( + self.meta, "creator", "name", default="TIDAL" + ) tracklist = self.meta["tracks"] @@ -1742,7 +1796,9 @@ class Playlist(Tracklist, Media): elif self.client.source == "deezer": self.name = self.meta["title"] self.image = self.meta["picture_big"] - self.creator = safe_get(self.meta, "creator", "name", default="Deezer") + self.creator = safe_get( + self.meta, "creator", "name", default="Deezer" + ) tracklist = self.meta["tracks"] @@ -1783,7 +1839,9 @@ class Playlist(Tracklist, Media): logger.debug("Loaded %d tracks from playlist %s", len(self), self.name) - def _prepare_download(self, parent_folder: str = "StreamripDownloads", **kwargs): + def _prepare_download( + self, parent_folder: str = "StreamripDownloads", **kwargs + ): if kwargs.get("folder_format"): fname = sanitize_filename(self.name) self.folder = os.path.join(parent_folder, fname) @@ -1995,7 +2053,9 @@ class Artist(Tracklist, Media): final = self if isinstance(filters, tuple) and self.client.source == "qobuz": - filter_funcs = (getattr(self, f"_{filter_}") for filter_ in filters) + filter_funcs = ( + getattr(self, f"_{filter_}") for filter_ in filters + ) for func in filter_funcs: final = filter(func, final) @@ -2108,7 +2168,10 @@ class Artist(Tracklist, Media): best_bd = bit_depth(a["bit_depth"] for a in group) best_sr = sampling_rate(a["sampling_rate"] for a in group) for album in group: - if album["bit_depth"] == best_bd and album["sampling_rate"] == best_sr: + if ( + album["bit_depth"] == best_bd + and album["sampling_rate"] == best_sr + ): yield album break diff --git a/streamrip/metadata.py b/streamrip/metadata.py index 6af025a..f82f61a 100644 --- a/streamrip/metadata.py +++ b/streamrip/metadata.py @@ -131,7 +131,9 @@ class TrackMetadata: self.album = resp.get("title", "Unknown Album") self.tracktotal = resp.get("tracks_count", 1) self.genre = resp.get("genres_list") or resp.get("genre") or [] - self.date = resp.get("release_date_original") or resp.get("release_date") + self.date = resp.get("release_date_original") or resp.get( + "release_date" + ) self.copyright = resp.get("copyright") self.albumartist = safe_get(resp, "artist", "name") self.albumcomposer = safe_get(resp, "composer", "name") @@ -140,7 +142,9 @@ class TrackMetadata: self.disctotal = ( max( track.get("media_number", 1) - for track in safe_get(resp, "tracks", "items", default=[{}]) + for track in safe_get( + resp, "tracks", "items", default=[{}] + ) ) or 1 ) @@ -179,14 +183,22 @@ class TrackMetadata: self.cover_urls = get_cover_urls(resp, self.__source) self.streamable = resp.get("allowStreaming", False) - if q := resp.get("audioQuality"): # for album entries in single tracks + if q := resp.get( + "audioQuality" + ): # for album entries in single tracks self._get_tidal_quality(q) elif self.__source == "deezer": self.album = resp.get("title", "Unknown Album") - self.tracktotal = resp.get("track_total", 0) or resp.get("nb_tracks", 0) + self.tracktotal = resp.get("track_total", 0) or resp.get( + "nb_tracks", 0 + ) self.disctotal = ( - max(track.get("disk_number") for track in resp.get("tracks", [{}])) or 1 + max( + track.get("disk_number") + for track in resp.get("tracks", [{}]) + ) + or 1 ) self.genre = safe_get(resp, "genres", "data") self.date = resp.get("release_date") @@ -343,7 +355,9 @@ class TrackMetadata: if isinstance(self._genres, list): if self.__source == "qobuz": - genres: Iterable = re.findall(r"([^\u2192\/]+)", "/".join(self._genres)) + genres: Iterable = re.findall( + r"([^\u2192\/]+)", "/".join(self._genres) + ) genres = set(genres) elif self.__source == "deezer": genres = (g["name"] for g in self._genres) @@ -377,7 +391,9 @@ class TrackMetadata: if hasattr(self, "_copyright"): if self._copyright is None: return None - copyright: str = re.sub(r"(?i)\(P\)", PHON_COPYRIGHT, self._copyright) + copyright: str = re.sub( + r"(?i)\(P\)", PHON_COPYRIGHT, self._copyright + ) copyright = re.sub(r"(?i)\(C\)", COPYRIGHT, copyright) return copyright @@ -437,7 +453,9 @@ class TrackMetadata: formatter["sampling_rate"] /= 1000 return formatter - def tags(self, container: str = "flac") -> Generator: + def tags( + self, container: str = "flac", exclude: Optional[set] = None + ) -> Generator: """Create a generator of key, value pairs for use with mutagen. The *_KEY dicts are organized in the format: @@ -459,41 +477,52 @@ class TrackMetadata: :type container: str :rtype: Generator """ + if exclude is None: + exclude = set() + logger.debug("Excluded tags: %s", exclude) + container = container.lower() if container in ("flac", "vorbis"): - return self.__gen_flac_tags() + return self.__gen_flac_tags(exclude) if container in ("mp3", "id3"): - return self.__gen_mp3_tags() + return self.__gen_mp3_tags(exclude) if container in ("alac", "m4a", "mp4", "aac"): - return self.__gen_mp4_tags() + return self.__gen_mp4_tags(exclude) raise InvalidContainerError(f"Invalid container {container}") - def __gen_flac_tags(self) -> Generator: + def __gen_flac_tags(self, exclude: set) -> Generator: """Generate key, value pairs to tag FLAC files. :rtype: Tuple[str, str] """ for k, v in FLAC_KEY.items(): + logger.debug("attr: %s", k) + if k in exclude: + continue + tag = getattr(self, k) if tag: - if k in ( + if k in { "tracknumber", "discnumber", "tracktotal", "disctotal", - ): + }: tag = f"{int(tag):02}" logger.debug("Adding tag %s: %s", v, tag) yield (v, str(tag)) - def __gen_mp3_tags(self) -> Generator: + def __gen_mp3_tags(self, exclude: set) -> Generator: """Generate key, value pairs to tag MP3 files. :rtype: Tuple[str, str] """ for k, v in MP3_KEY.items(): + if k in exclude: + continue + if k == "tracknumber": text = f"{self.tracknumber}/{self.tracktotal}" elif k == "discnumber": @@ -504,12 +533,15 @@ class TrackMetadata: if text is not None and v is not None: yield (v.__name__, v(encoding=3, text=text)) - def __gen_mp4_tags(self) -> Generator: + def __gen_mp4_tags(self, exclude: set) -> Generator: """Generate key, value pairs to tag ALAC or AAC files. :rtype: Tuple[str, str] """ for k, v in MP4_KEY.items(): + if k in exclude: + continue + if k == "tracknumber": text = [(self.tracknumber, self.tracktotal)] elif k == "discnumber": @@ -581,7 +613,9 @@ class TrackMetadata: :rtype: int """ - return sum(hash(v) for v in self.asdict().values() if isinstance(v, Hashable)) + return sum( + hash(v) for v in self.asdict().values() if isinstance(v, Hashable) + ) def __repr__(self) -> str: """Return the string representation of the metadata object.