diff --git a/streamrip/clients.py b/streamrip/clients.py index 6714ada..e11249e 100644 --- a/streamrip/clients.py +++ b/streamrip/clients.py @@ -4,19 +4,23 @@ import base64 import binascii import hashlib import json +from Cryptodome.Cipher import Blowfish, AES # type: ignore import logging import re import time -import deezer +from pprint import pformat +import requests +import deezer # type: ignore from abc import ABC, abstractmethod -from typing import Generator, Sequence, Tuple, Union +from typing import Generator, Sequence, Tuple, Union, Dict, Any, Optional -import click +import click # type: ignore from .constants import ( AGENT, AVAILABLE_QUALITY_IDS, DEEZER_BASE, + DEEZER_FORMATS, DEEZER_DL, DEEZER_MAX_Q, QOBUZ_BASE, @@ -39,7 +43,7 @@ from .exceptions import ( MissingCredentials, ) from .spoofbuz import Spoofer -from .utils import gen_threadsafe_session, get_quality +from .utils import gen_threadsafe_session, get_quality, safe_get logger = logging.getLogger("streamrip") @@ -342,6 +346,7 @@ class QobuzClient(Client): if status_code == 401: raise AuthenticationError(f"Invalid credentials from params {params}") elif status_code == 400: + logger.debug(resp) raise InvalidAppIdError(f"Invalid app id from params {params}") else: logger.info("Logged in to Qobuz") @@ -412,6 +417,7 @@ class QobuzClient(Client): logging.debug(f"Calling API with endpoint {epoint} params {params}") r = self.session.get(f"{QOBUZ_BASE}/{epoint}", params=params) try: + logger.debug(r.text) return r.json(), r.status_code except Exception: logger.error("Problem getting JSON. Status code: %s", r.status_code) @@ -444,7 +450,7 @@ class DeezerClient(Client): # self.session = gen_threadsafe_session() # no login required - # self.logged_in = True + self.logged_in = False def search(self, query: str, media_type: str = "album", limit: int = 200) -> dict: """Search API for query. @@ -457,20 +463,25 @@ class DeezerClient(Client): :type limit: int """ # TODO: use limit parameter - response = self.session.get( - f"{DEEZER_BASE}/search/{media_type}", params={"q": query} - ) - response.raise_for_status() - return response.json() + try: + search_function = getattr(self.client.api, f"search_{media_type}") + except AttributeError: + raise Exception + + response = search_function(query, limit=limit) + return response def login(self, **kwargs): - """Return None. + try: + arl = kwargs["arl"] + except KeyError: + raise MissingCredentials - Dummy method. + success = self.client.login_via_arl(arl) + if not success: + raise AuthenticationError - :param kwargs: - """ - assert self.client.login_via_arl(kwargs["arl"]) + self.logged_in = True def get(self, meta_id: Union[str, int], media_type: str = "album"): """Get metadata. @@ -485,26 +496,25 @@ class DeezerClient(Client): "track": self.client.api.get_track, "album": self.client.api.get_album, "playlist": self.client.api.get_playlist, - "artist": self.client.api.get_artist_discography, + "artist": self.client.api.get_artist, } get_item = GET_FUNCTIONS[media_type] - return get_item(meta_id) + item = get_item(meta_id) + if media_type in ("album", "playlist"): + tracks = getattr(self.client.api, f"get_{media_type}_tracks")( + meta_id, limit=-1 + ) + item["tracks"] = tracks["data"] + item["track_total"] = len(tracks["data"]) + elif media_type == "artist": + albums = self.client.api.get_artist_albums(meta_id) + item["albums"] = albums["data"] - # url = f"{DEEZER_BASE}/{media_type}/{meta_id}" - # item = self.session.get(url).json() - # if media_type in ("album", "playlist"): - # tracks = self.session.get(f"{url}/tracks", params={"limit": 1000}).json() - # item["tracks"] = tracks["data"] - # item["track_total"] = len(tracks["data"]) - # elif media_type == "artist": - # albums = self.session.get(f"{url}/albums").json() - # item["albums"] = albums["data"] + logger.debug(item) + return item - # logger.debug(item) - # return item - - def get_file_url(self, meta_id: Union[str, int], quality: int = 2): + def get_file_url(self, meta_id: str, quality: int = 2) -> dict: """Get downloadable url for a track. :param meta_id: The track ID. @@ -512,35 +522,127 @@ class DeezerClient(Client): :param quality: :type quality: int """ - track_info = self.client.gw.get_track( - meta_id, - ) + # TODO: optimize such that all of the ids are requested at once + dl_info: Dict[str, Any] = {"quality": quality} + + track_info = self.client.gw.get_track(meta_id) + logger.debug("Track info: %s", pformat(track_info)) + + dl_info["fallback_id"] = safe_get(track_info, "FALLBACK", "SNG_ID") + + format_info = get_quality(quality, "deezer") + assert isinstance(format_info, tuple) + format_no, format_str = format_info + + dl_info["size_to_quality"] = { + int(track_info.get(f"FILESIZE_{format}")): self._quality_id_from_filetype( + format + ) + for format in DEEZER_FORMATS + } + token = track_info["TRACK_TOKEN"] - url = self.client.get_track_url(token, "FLAC") + url = self.client.get_track_url(token, format_str) + if url is None: - md5 = track_info["MD5_ORIGIN"] - media_version = track_info["MEDIA_VERSION"] - format_number = 1 - - url_bytes = b"\xa4".join( - [ - md5.encode(), - str(format_number).encode(), - str(meta_id).encode(), - str(media_version).encode(), - ] + url = self._get_encrypted_file_url( + meta_id, track_info["MD5_ORIGIN"], track_info["MEDIA_VERSION"] ) - md5val = hashlib.md5(url_bytes).hexdigest() - step2 = ( - md5val.encode() - + b"\xa4" - + url_bytes - + b"\xa4" - + (b"." * (16 - (len(step2) % 16))) + dl_info["url"] = url + logger.debug(pformat(dl_info)) + return dl_info + + def _get_encrypted_file_url( + self, meta_id: str, track_hash: str, media_version: str + ): + format_number = 1 + + url_bytes = b"\xa4".join( + ( + track_hash.encode(), + str(format_number).encode(), + str(meta_id).encode(), + str(media_version).encode(), ) - urlPart = _ecbCrypt("jo6aey6haid2Teih", step2) - return urlPart.decode("utf-8") + ) + url_hash = hashlib.md5(url_bytes).hexdigest() + info_bytes = bytearray(url_hash.encode()) + info_bytes.extend(b"\xa4") + info_bytes.extend(url_bytes) + info_bytes.extend(b"\xa4") + # Pad the bytes so that len(info_bytes) % 16 == 0 + padding_len = 16 - (len(info_bytes) % 16) + info_bytes.extend(b"." * padding_len) + + logger.debug("Info bytes: %s", info_bytes) + path = self._gen_url_path(info_bytes) + logger.debug(path) + return f"https://e-cdns-proxy-{track_hash[0]}.dzcdn.net/mobile/1/{path}" + + def _gen_url_path(self, data): + return binascii.hexlify( + AES.new("jo6aey6haid2Teih".encode(), AES.MODE_ECB).encrypt(data) + ).decode("utf-8") + + def _decrypt_stream(self, url: str, meta_id: str, output_stream): + headers = {"User-Agent": AGENT} + chunk_len = 0 + # isCryptedStream = ( + # "/mobile/" in track.downloadURL or "/media/" in track.downloadURL + # ) + + # itemData = { + # "id": track.id, + # "title": track.title, + # "artist": track.mainArtist.name, + # } + + with requests.get(url, headers=headers, stream=True, timeout=10) as request: + request.raise_for_status() + blowfish_key = generate_blowfish_key(meta_id) + + file_size = int(request.headers["Content-Length"]) + if file_size == 0: + raise Exception + + for chunk in request.iter_content(2048 * 3): + if len(chunk) >= 2048: + chunk = decrypt_chunk(blowfish_key, chunk[0:2048]) + chunk[2048:] + + output_stream.write(chunk) + chunk_len += len(chunk) + + # except (SSLError, u3SSLError): + # streamTrack(outputStream, track, chunkLength, downloadObject, listener) + # except (RequestsConnectionError, ReadTimeout, ChunkedEncodingError): + # sleep(2) + # streamTrack(outputStream, track, start, downloadObject, listener) + + @staticmethod + def _quality_id_from_filetype(filetype: str) -> Optional[int]: + return { + "MP3_128": 0, + "MP3_256": 0, + "MP3_320": 1, + "FLAC": 2, + }.get(filetype) + + +def generate_blowfish_key(trackId: str): + SECRET = "g4el58wc0zvf9na1" + md5_hash = hashlib.md5(trackId.encode()).hexdigest() + key = "".join( + chr(ord(md5_hash[i]) ^ ord(md5_hash[i + 16]) ^ ord(SECRET[i])) + for i in range(16) + ) + return key.encode() + + +def decrypt_chunk(key, data): + return Blowfish.new( + key, Blowfish.MODE_CBC, b"\x00\x01\x02\x03\x04\x05\x06\x07" + ).decrypt(data) class TidalClient(Client): diff --git a/streamrip/constants.py b/streamrip/constants.py index 570f8bb..6846392 100644 --- a/streamrip/constants.py +++ b/streamrip/constants.py @@ -154,6 +154,14 @@ TIDAL_Q_MAP = { DEEZER_MAX_Q = 6 AVAILABLE_QUALITY_IDS = (0, 1, 2, 3, 4) +DEEZER_FORMATS = { + "AAC_64", + "MP3_64", + "MP3_128", + "MP3_256", + "MP3_320", + "FLAC", +} # video only for tidal MEDIA_TYPES = {"track", "album", "artist", "label", "playlist", "video"} diff --git a/streamrip/media.py b/streamrip/media.py index fcf65d4..62c0f81 100644 --- a/streamrip/media.py +++ b/streamrip/media.py @@ -273,7 +273,7 @@ class Track(Media): dl_info = self.client.get_file_url(url_id, self.quality) except Exception as e: # click.secho(f"Unable to download track. {e}", fg="red") - raise NonStreamable(e) + raise NonStreamable(repr(e)) if self.client.source == "qobuz": assert isinstance(dl_info, dict) # for typing @@ -285,14 +285,43 @@ class Track(Media): self.bit_depth = dl_info.get("bit_depth") # --------- Download Track ---------- - if self.client.source in ("qobuz", "tidal", "deezer"): - assert isinstance(dl_info, dict) + if self.client.source in {"qobuz", "tidal"}: + assert isinstance(dl_info, dict), dl_info logger.debug("Downloadable URL found: %s", dl_info.get("url")) try: download_url = dl_info["url"] except KeyError as e: click.secho(f"Panic: {e} dl_info = {dl_info}", fg="red") - tqdm_download(download_url, self.path, desc=self._progress_desc) + + _quick_download(download_url, self.path, desc=self._progress_desc) + + elif self.client.source == "deezer": + # We can only find out if the requested quality is available + # after the streaming request is sent for deezer + assert isinstance(dl_info, dict) + + try: + stream = DownloadStream( + dl_info["url"], source="deezer", item_id=self.id + ) + except NonStreamable: + self.id = dl_info["fallback_id"] + dl_info = self.client.get_file_url(self.id, self.quality) + assert isinstance(dl_info, dict) + stream = DownloadStream( + dl_info["url"], source="deezer", item_id=self.id + ) + + stream_size = len(stream) + stream_quality = dl_info["size_to_quality"][stream_size] + if self.quality != stream_quality: + # The chosen quality is not available + self.quality = stream_quality + self.format_final_path() # If the extension is different + + with open(self.path, "wb") as file: + for chunk in tqdm_stream(stream, desc=self._progress_desc): + file.write(chunk) elif self.client.source == "soundcloud": assert isinstance(dl_info, dict) # for typing @@ -1699,19 +1728,18 @@ class Playlist(Tracklist, Media): item.download(**kwargs) - if self.client.source != "deezer": - item.tag(embed_cover=kwargs.get("embed_cover", True)) + item.tag(embed_cover=kwargs.get("embed_cover", True)) - if playlist_to_album and self.client.source == "deezer": - # Because Deezer tracks come pre-tagged, the `set_playlist_to_album` - # option is never set. Here, we manually do this - from mutagen.flac import FLAC + # if playlist_to_album and self.client.source == "deezer": + # # Because Deezer tracks come pre-tagged, the `set_playlist_to_album` + # # option is never set. Here, we manually do this + # from mutagen.flac import FLAC - audio = FLAC(item.path) - audio["ALBUM"] = self.name - audio["ALBUMARTIST"] = self.creator - audio["TRACKNUMBER"] = f"{item['tracknumber']:02}" - audio.save() + # audio = FLAC(item.path) + # audio["ALBUM"] = self.name + # audio["ALBUMARTIST"] = self.creator + # audio["TRACKNUMBER"] = f"{item['tracknumber']:02}" + # audio.save() self.downloaded_ids.add(item.id) diff --git a/streamrip/utils.py b/streamrip/utils.py index 29e590d..9d63ffc 100644 --- a/streamrip/utils.py +++ b/streamrip/utils.py @@ -6,8 +6,19 @@ import base64 import logging import os from string import Formatter -from typing import Dict, Hashable, Optional, Tuple, Union +from typing import ( + Dict, + Hashable, + Optional, + Tuple, + Union, + Generator, +) from collections import OrderedDict +import functools +from Cryptodome.Cipher import Blowfish +import hashlib +import re from json import JSONDecodeError import click @@ -28,9 +39,9 @@ def safe_get(d: dict, *keys: Hashable, default=None): Usage: >>> d = {'foo': {'bar': 'baz'}} - >>> _safe_get(d, 'baz') + >>> safe_get(d, 'baz') None - >>> _safe_get(d, 'foo', 'bar') + >>> safe_get(d, 'foo', 'bar') 'baz' :param d: @@ -49,8 +60,21 @@ def safe_get(d: dict, *keys: Hashable, default=None): curr = res return res + """ + FLAC = 9 + MP3_320 = 3 + MP3_128 = 1 + MP4_RA3 = 15 + MP4_RA2 = 14 + MP4_RA1 = 13 + DEFAULT = 8 + LOCAL = 0 -__QUALITY_MAP: Dict[str, Dict[int, Union[int, str]]] = { + + """ + + +__QUALITY_MAP: Dict[str, Dict[int, Union[int, str, Tuple[int, str]]]] = { "qobuz": { 1: 5, 2: 6, @@ -58,9 +82,9 @@ __QUALITY_MAP: Dict[str, Dict[int, Union[int, str]]] = { 4: 27, }, "deezer": { - 0: 9, - 1: 3, - 2: 1, + 0: (9, "MP3_128"), + 1: (3, "MP3_320"), + 2: (1, "FLAC"), }, "tidal": { 0: "LOW", # AAC @@ -71,7 +95,7 @@ __QUALITY_MAP: Dict[str, Dict[int, Union[int, str]]] = { } -def get_quality(quality_id: int, source: str) -> Union[str, int]: +def get_quality(quality_id: int, source: str) -> Union[str, int, Tuple[int, str]]: """Get the source-specific quality id. :param quality_id: the universal quality id (0, 1, 2, 4) @@ -171,6 +195,84 @@ def tqdm_download(url: str, filepath: str, params: dict = None, desc: str = None raise +class DownloadStream: + """An iterator over chunks of a stream. + + Usage: + + >>> stream = DownloadStream('https://google.com', None) + >>> with open('google.html', 'wb') as file: + >>> for chunk in stream: + >>> file.write(chunk) + + """ + + is_encrypted = re.compile("/m(?:obile|edia)/") + + def __init__( + self, + url: str, + source: str = None, + params: dict = None, + headers: dict = None, + item_id: str = None, + ): + self.source = source + self.session = gen_threadsafe_session(headers=headers) + + self.id = item_id + if isinstance(self.id, int): + self.id = str(self.id) + + if params is None: + params = {} + + self.request = self.session.get( + url, allow_redirects=True, stream=True, params=params + ) + self.file_size = int(self.request.headers["Content-Length"]) + + if self.file_size == 0: + raise NonStreamable + + def __iter__(self) -> Generator: + if self.source == "deezer" and self.is_encrypted.search(self.url) is not None: + assert isinstance(self.id, str), self.id + + blowfish_key = self._generate_blowfish_key(self.id) + return ( + (self._decrypt_chunk(blowfish_key, chunk[:2048]) + chunk[2048:]) + if len(chunk) >= 2048 + else chunk + for chunk in self.request.iter_content(2048 * 3) + ) + + return self.request.iter_content(chunk_size=1024) + + @property + def url(self): + return self.request.url + + def __len__(self): + return self.file_size + + @staticmethod + def _generate_blowfish_key(track_id: str): + SECRET = "g4el58wc0zvf9na1" + md5_hash = hashlib.md5(track_id.encode()).hexdigest() + # good luck :) + return "".join( + chr(functools.reduce(lambda x, y: x ^ y, map(ord, t))) + for t in zip(md5_hash[:16], md5_hash[16:], SECRET) + ).encode() + + @staticmethod + def _decrypt_chunk(key, data): + return Blowfish.new( + key, Blowfish.MODE_CBC, b"\x00\x01\x02\x03\x04\x05\x06\x07" + ).decrypt(data) + + def clean_format(formatter: str, format_info): """Format track or folder names sanitizing every formatter key. @@ -396,3 +498,27 @@ def downsize_image(filepath: str, width: int, height: int): resized_image = image.resize((width, height)) resized_image.save(filepath) + + +TQDM_BAR_FORMAT = ( + "{desc} |{bar}| (" + + click.style("{elapsed}", fg="magenta") + + " at " + + click.style("{rate_fmt}{postfix}", fg="cyan", bold=True) + + ")" +) + + +def tqdm_stream(iterator: DownloadStream, desc: Optional[str] = None) -> Generator: + with tqdm( + total=len(iterator), + unit="B", + unit_scale=True, + unit_divisor=1024, + desc=desc, + dynamic_ncols=True, + bar_format=TQDM_BAR_FORMAT, + ) as bar: + for chunk in iterator: + bar.update(len(chunk)) + yield chunk