From c480462edf9624cd5dc0113f8cc39a1ce40bd5bc Mon Sep 17 00:00:00 2001 From: nathom Date: Sat, 3 Apr 2021 13:07:45 -0700 Subject: [PATCH 1/4] Initial commit for SoundCloud --- .gitignore | 2 + streamrip/cli.py | 2 +- streamrip/clients.py | 81 ++++++++++++++++++++++++++++++++++++++++- streamrip/core.py | 6 +-- streamrip/db.py | 2 +- streamrip/downloader.py | 52 ++++++++++++++++++-------- streamrip/utils.py | 2 +- 7 files changed, 124 insertions(+), 23 deletions(-) diff --git a/.gitignore b/.gitignore index a2f3ca7..1f53559 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ test.py /urls.txt *.flac /Downloads +*.mp3 +StreamripDownloads diff --git a/streamrip/cli.py b/streamrip/cli.py index da0d178..9e5d956 100644 --- a/streamrip/cli.py +++ b/streamrip/cli.py @@ -206,7 +206,7 @@ def config(ctx, **kwargs): config.reset() if kwargs["open"]: - click.secho(f"Opening {CONFIG_PATH}", fg='green') + click.secho(f"Opening {CONFIG_PATH}", fg="green") click.launch(CONFIG_PATH) if kwargs["qobuz"]: diff --git a/streamrip/clients.py b/streamrip/clients.py index a4b9d35..ea86b59 100644 --- a/streamrip/clients.py +++ b/streamrip/clients.py @@ -4,7 +4,7 @@ import json import logging import time from abc import ABC, abstractmethod -from pprint import pformat # , pprint +from pprint import pformat, pprint from typing import Generator, Sequence, Tuple, Union import click @@ -50,6 +50,10 @@ QOBUZ_BASE = "https://www.qobuz.com/api.json/0.2" DEEZER_BASE = "https://api.deezer.com" DEEZER_DL = "http://dz.loaderapp.info/deezer" +# SoundCloud +SOUNDCLOUD_BASE = "https://api-v2.soundcloud.com" +SOUNDCLOUD_CLIENT_ID = "a3e059563d7fd3372b49b37f00a00bcf" + # ----------- Abstract Classes ----------------- @@ -639,3 +643,78 @@ class TidalClient(ClientInterface): def _api_post(self, url, data, auth=None): r = requests.post(url, data=data, auth=auth, verify=False).json() return r + + +class SoundCloudClient(ClientInterface): + source = "soundcloud" + + def __init__(self): + self.session = requests.Session() + self.session.headers.update( + { + "User-Agent": AGENT, + } + ) + + def login(self): + raise NotImplementedError + + def get(self, id=None, url=None, media_type="track"): + assert media_type in ("track", "playlist", "album"), f"{media_type} not supported" + if media_type == 'album': + media_type = 'playlist' + + if url is not None: + resp, status = self._get(f"resolve?url={url}") + elif id is not None: + resp, _ = self._get(f"tracks/{id}") + else: + raise Exception("Must provide id or url") + + return resp + + def get_file_url(self, track: dict, **kwargs) -> str: + if not track['streamable'] or track['policy'] == 'BLOCK': + raise Exception + + if track['downloadable'] and track['has_downloads_left']: + resp, status = self._get("tracks/{id}/download") + return resp['redirectUri'] + + else: + url = None + for tc in track['media']['transcodings']: + fmt = tc['format'] + if fmt['protocol'] == 'hls' and fmt['mime_type'] == 'audio/mpeg': + url = tc['url'] + break + + assert url is not None + + resp, _ = self._get(url, no_base=True) + return resp['url'] + + pprint(resp) + + if status in (401, 404): + raise Exception + + return resp["redirectUri"] + + def search(self, query: str, media_type='album'): + params = {'q': query} + resp, _ = self._get(f"search/{media_type}s", params=params) + return resp + + def _get(self, path, params=None, no_base=False): + if params is None: + params = {} + params["client_id"] = SOUNDCLOUD_CLIENT_ID + if no_base: + url = path + else: + url = f"{SOUNDCLOUD_BASE}/{path}" + + r = self.session.get(url, params=params) + print(r.text) + return r.json(), r.status_code diff --git a/streamrip/core.py b/streamrip/core.py index 6104ceb..efcb1d1 100644 --- a/streamrip/core.py +++ b/streamrip/core.py @@ -71,9 +71,9 @@ class MusicDL(list): f"Enter {capitalize(source)} password (will not show on screen):", fg="green", ) - self.config.file[source]["password"] = md5(getpass( - prompt="" - ).encode('utf-8')).hexdigest() + self.config.file[source]["password"] = md5( + getpass(prompt="").encode("utf-8") + ).hexdigest() self.config.save() click.secho(f'Credentials saved to config file at "{self.config._path}"') diff --git a/streamrip/db.py b/streamrip/db.py index b5647b8..cee20d6 100644 --- a/streamrip/db.py +++ b/streamrip/db.py @@ -61,5 +61,5 @@ class MusicDB: ) conn.commit() except sqlite3.Error as e: - if 'UNIQUE' not in str(e): + if "UNIQUE" not in str(e): raise diff --git a/streamrip/downloader.py b/streamrip/downloader.py index eefd8b2..5bff7c4 100644 --- a/streamrip/downloader.py +++ b/streamrip/downloader.py @@ -1,4 +1,5 @@ import logging +import sys import os import re import shutil @@ -251,7 +252,7 @@ class Track: self.cover_path = os.path.join(self.folder, f"cover{hash(self.meta.album)}.jpg") logger.debug(f"Downloading cover from {self.cover_url}") - click.secho(f"\nDownloading cover art for {self!s}", fg='blue') + click.secho(f"\nDownloading cover art for {self!s}", fg="blue") if not os.path.exists(self.cover_path): tqdm_download(self.cover_url, self.cover_path) @@ -573,7 +574,7 @@ class Tracklist(list): :type quality: int :rtype: Union[Picture, APIC] """ - cover_type = {1: APIC, 2: Picture, 3: Picture, 4: Picture} + cover_type = {0: APIC, 1: APIC, 2: Picture, 3: Picture, 4: Picture} cover = cover_type.get(quality) if cover is Picture: @@ -731,7 +732,6 @@ class Album(Tracklist): "tracktotal": resp.get("numberOfTracks"), } elif client.source == "deezer": - logger.debug(pformat(resp)) return { "id": resp.get("id"), "title": resp.get("title"), @@ -752,6 +752,25 @@ class Album(Tracklist): "sampling_rate": 44100, "tracktotal": resp.get("track_total") or resp.get("nb_tracks"), } + elif client.source == 'soundcloud': + print(resp.keys()) + return { + "id": resp['id'], + "title": resp['title'], + "_artist": resp['user']['username'], + "albumartist": resp['user']['username'], + "year": resp['created_at'][:4], + "cover_urls": { + "small": resp['artwork_url'], + "large": resp['artwork_url'].replace('large', 't500x500') if resp['artwork_url'] is not None else None + }, + "url": resp['uri'], + "streamable": True, # assume to be true for convenience + "quality": 0, # always 128 kbps mp3 + # no bit depth + # no sampling rate + "tracktotal": resp['track_count'], + } raise InvalidSourceError(client.source) @@ -794,7 +813,7 @@ class Album(Tracklist): def download( self, - quality: int = 7, + quality: int = 3, parent_folder: Union[str, os.PathLike] = "StreamripDownloads", database: MusicDB = None, **kwargs, @@ -829,7 +848,7 @@ class Album(Tracklist): logger.debug("Cover already downloaded: %s. Skipping", cover_path) else: click.secho("Downloading cover art", fg="magenta") - if kwargs.get("large_cover", False): + if kwargs.get("large_cover", True): cover_url = self.cover_urls.get("large") if self.client.source == "qobuz": tqdm_download(cover_url.replace("600", "org"), cover_path) @@ -847,7 +866,7 @@ class Album(Tracklist): else: tqdm_download(self.cover_urls["small"], cover_path) - embed_cover = kwargs.get('embed_cover', True) # embed by default + embed_cover = kwargs.get("embed_cover", True) # embed by default if self.client.source != "deezer" and embed_cover: cover = self.get_cover_obj(cover_path, quality) @@ -881,17 +900,18 @@ class Album(Tracklist): else: fmt[key] = None - fmt["sampling_rate"] /= 1000 - # 48.0kHz -> 48kHz, 44.1kHz -> 44.1kHz - if fmt["sampling_rate"] % 1 == 0.0: - fmt["sampling_rate"] = int(fmt["sampling_rate"]) + if fmt.get('sampling_rate', False): + fmt["sampling_rate"] /= 1000 + # 48.0kHz -> 48kHz, 44.1kHz -> 44.1kHz + if fmt["sampling_rate"] % 1 == 0.0: + fmt["sampling_rate"] = int(fmt["sampling_rate"]) return fmt def _get_formatted_folder(self, parent_folder: str) -> str: if self.bit_depth is not None and self.sampling_rate is not None: self.container = "FLAC" - elif self.client.source in ("qobuz", "deezer"): + elif self.client.source in ("qobuz", "deezer", "soundcloud"): self.container = "MP3" elif self.client.source == "tidal": self.container = "AAC" @@ -983,7 +1003,7 @@ class Playlist(Tracklist): :type new_tracknumbers: bool """ if self.client.source == "qobuz": - self.name = self.meta['name'] + self.name = self.meta["name"] tracklist = self.meta["tracks"]["items"] def gen_cover(track): # ? @@ -993,7 +1013,7 @@ class Playlist(Tracklist): return {"track": track, "album": track["album"]} elif self.client.source == "tidal": - self.name = self.meta['title'] + self.name = self.meta["title"] tracklist = self.meta["tracks"] def gen_cover(track): @@ -1007,7 +1027,7 @@ class Playlist(Tracklist): } elif self.client.source == "deezer": - self.name = self.meta['title'] + self.name = self.meta["title"] tracklist = self.meta["tracks"] def gen_cover(track): @@ -1063,7 +1083,7 @@ class Playlist(Tracklist): for track in self: track.download(parent_folder=folder, quality=quality, database=database) if self.client.source != "deezer": - track.tag(embed_cover=kwargs.get('embed_cover', True)) + track.tag(embed_cover=kwargs.get("embed_cover", True)) @staticmethod def _parse_get_resp(item: dict, client: ClientInterface): @@ -1079,7 +1099,7 @@ class Playlist(Tracklist): if client.source == "qobuz": return { "name": item["name"], - "id": item['id'], + "id": item["id"], } elif client.source == "tidal": return { diff --git a/streamrip/utils.py b/streamrip/utils.py index 9d01134..fb3d735 100644 --- a/streamrip/utils.py +++ b/streamrip/utils.py @@ -108,7 +108,7 @@ def tqdm_download(url: str, filepath: str): r = requests.get(url, allow_redirects=True, stream=True) total = int(r.headers.get("content-length", 0)) logger.debug(f"File size = {total}") - if total < 1000: + if total < 1000 and not url.endswith('jpg'): raise NonStreamable try: From 74aca34e6a080b5a9d7b1e583c393068f8abd27b Mon Sep 17 00:00:00 2001 From: nathom Date: Sun, 4 Apr 2021 13:28:55 -0700 Subject: [PATCH 2/4] Move soundcloud album parsing to Playlist --- streamrip/clients.py | 6 ++---- streamrip/downloader.py | 28 +++++++--------------------- 2 files changed, 9 insertions(+), 25 deletions(-) diff --git a/streamrip/clients.py b/streamrip/clients.py index ea86b59..70b84de 100644 --- a/streamrip/clients.py +++ b/streamrip/clients.py @@ -660,14 +660,12 @@ class SoundCloudClient(ClientInterface): raise NotImplementedError def get(self, id=None, url=None, media_type="track"): - assert media_type in ("track", "playlist", "album"), f"{media_type} not supported" - if media_type == 'album': - media_type = 'playlist' + assert media_type in ("track", "playlist"), f"{media_type} not supported" if url is not None: resp, status = self._get(f"resolve?url={url}") elif id is not None: - resp, _ = self._get(f"tracks/{id}") + resp, _ = self._get(f"{media_type}s/{id}") else: raise Exception("Must provide id or url") diff --git a/streamrip/downloader.py b/streamrip/downloader.py index 5bff7c4..119b5f8 100644 --- a/streamrip/downloader.py +++ b/streamrip/downloader.py @@ -1,4 +1,5 @@ import logging +from pprint import pprint import sys import os import re @@ -752,25 +753,6 @@ class Album(Tracklist): "sampling_rate": 44100, "tracktotal": resp.get("track_total") or resp.get("nb_tracks"), } - elif client.source == 'soundcloud': - print(resp.keys()) - return { - "id": resp['id'], - "title": resp['title'], - "_artist": resp['user']['username'], - "albumartist": resp['user']['username'], - "year": resp['created_at'][:4], - "cover_urls": { - "small": resp['artwork_url'], - "large": resp['artwork_url'].replace('large', 't500x500') if resp['artwork_url'] is not None else None - }, - "url": resp['uri'], - "streamable": True, # assume to be true for convenience - "quality": 0, # always 128 kbps mp3 - # no bit depth - # no sampling rate - "tracktotal": resp['track_count'], - } raise InvalidSourceError(client.source) @@ -950,7 +932,7 @@ class Playlist(Tracklist): """Represents a downloadable playlist. Usage: - >>> resp = client.get('hip hop', 'playlist') + >>> resp = client.search('hip hop', 'playlist') >>> pl = Playlist.from_api(resp['items'][0], client) >>> pl.load_meta() >>> pl.download() @@ -993,7 +975,7 @@ class Playlist(Tracklist): :type new_tracknumbers: bool :param kwargs: """ - self.meta = self.client.get(self.id, "playlist") + self.meta = self.client.get(id=self.id, media_type="playlist") self._load_tracks(**kwargs) def _load_tracks(self, new_tracknumbers: bool = True): @@ -1036,6 +1018,10 @@ class Playlist(Tracklist): def meta_args(track): return {"track": track, "source": self.client.source} + elif self.client.source == 'soundcloud': + self.name = self.meta['title'] + tracklist = self.meta['tracks'] + else: raise NotImplementedError From fa72e827690866b3adc2da3231306faba528e3e4 Mon Sep 17 00:00:00 2001 From: nathom Date: Mon, 5 Apr 2021 14:40:14 -0700 Subject: [PATCH 3/4] stash --- .gitignore | 1 + streamrip/clients.py | 59 +++++++++-------- streamrip/constants.py | 3 + streamrip/converter.py | 2 +- streamrip/core.py | 23 +++++-- streamrip/downloader.py | 143 ++++++++++++++++++++++++++-------------- streamrip/metadata.py | 27 +++++++- streamrip/utils.py | 11 ++-- 8 files changed, 181 insertions(+), 88 deletions(-) diff --git a/.gitignore b/.gitignore index 1f53559..2cf9931 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ test.py /Downloads *.mp3 StreamripDownloads +*.wav diff --git a/streamrip/clients.py b/streamrip/clients.py index 70b84de..e6f6cb9 100644 --- a/streamrip/clients.py +++ b/streamrip/clients.py @@ -16,6 +16,7 @@ from .constants import ( AVAILABLE_QUALITY_IDS, DEEZER_MAX_Q, QOBUZ_FEATURED_KEYS, + SOUNDCLOUD_CLIENT_ID, TIDAL_MAX_Q, ) from .exceptions import ( @@ -52,7 +53,6 @@ DEEZER_DL = "http://dz.loaderapp.info/deezer" # SoundCloud SOUNDCLOUD_BASE = "https://api-v2.soundcloud.com" -SOUNDCLOUD_CLIENT_ID = "a3e059563d7fd3372b49b37f00a00bcf" # ----------- Abstract Classes ----------------- @@ -105,12 +105,18 @@ class ClientInterface(ABC): def source(self): pass + @property + @abstractmethod + def max_quality(self): + pass + # ------------- Clients ----------------- class QobuzClient(ClientInterface): source = "qobuz" + max_quality = 4 # ------- Public Methods ------------- def __init__(self): @@ -365,6 +371,7 @@ class QobuzClient(ClientInterface): class DeezerClient(ClientInterface): source = "deezer" + max_quality = 2 def __init__(self): self.session = requests.Session() @@ -425,6 +432,7 @@ class DeezerClient(ClientInterface): class TidalClient(ClientInterface): source = "tidal" + max_quality = 3 def __init__(self): self.logged_in = False @@ -647,6 +655,7 @@ class TidalClient(ClientInterface): class SoundCloudClient(ClientInterface): source = "soundcloud" + max_quality = 0 def __init__(self): self.session = requests.Session() @@ -659,52 +668,45 @@ class SoundCloudClient(ClientInterface): def login(self): raise NotImplementedError - def get(self, id=None, url=None, media_type="track"): + def get(self, id, media_type="track"): assert media_type in ("track", "playlist"), f"{media_type} not supported" - if url is not None: - resp, status = self._get(f"resolve?url={url}") - elif id is not None: + if media_type == "track": resp, _ = self._get(f"{media_type}s/{id}") + elif "http" in id: + resp, _ = self._get(f"resolve?url={id}") else: - raise Exception("Must provide id or url") + raise Exception(id) return resp - def get_file_url(self, track: dict, **kwargs) -> str: - if not track['streamable'] or track['policy'] == 'BLOCK': + def get_file_url(self, track: dict, quality) -> dict: + if not track["streamable"] or track["policy"] == "BLOCK": raise Exception - if track['downloadable'] and track['has_downloads_left']: - resp, status = self._get("tracks/{id}/download") - return resp['redirectUri'] + if track["downloadable"] and track["has_downloads_left"]: + r = self._get(f"tracks/{track['id']}/download", resp_obj=True) + return {"url": r.json()["redirectUri"], "type": "original"} else: url = None - for tc in track['media']['transcodings']: - fmt = tc['format'] - if fmt['protocol'] == 'hls' and fmt['mime_type'] == 'audio/mpeg': - url = tc['url'] + for tc in track["media"]["transcodings"]: + fmt = tc["format"] + if fmt["protocol"] == "hls" and fmt["mime_type"] == "audio/mpeg": + url = tc["url"] break assert url is not None resp, _ = self._get(url, no_base=True) - return resp['url'] + return {"url": resp["url"], "type": "mp3"} - pprint(resp) - - if status in (401, 404): - raise Exception - - return resp["redirectUri"] - - def search(self, query: str, media_type='album'): - params = {'q': query} + def search(self, query: str, media_type="album"): + params = {"q": query} resp, _ = self._get(f"search/{media_type}s", params=params) return resp - def _get(self, path, params=None, no_base=False): + def _get(self, path, params=None, no_base=False, resp_obj=False): if params is None: params = {} params["client_id"] = SOUNDCLOUD_CLIENT_ID @@ -713,6 +715,9 @@ class SoundCloudClient(ClientInterface): else: url = f"{SOUNDCLOUD_BASE}/{path}" + logger.debug(f"Fetching url {url}") r = self.session.get(url, params=params) - print(r.text) + if resp_obj: + return r + return r.json(), r.status_code diff --git a/streamrip/constants.py b/streamrip/constants.py index 753eeff..31f689c 100644 --- a/streamrip/constants.py +++ b/streamrip/constants.py @@ -19,6 +19,7 @@ AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:83.0) Gecko/20100101 Firef TIDAL_COVER_URL = "https://resources.tidal.com/images/{uuid}/{width}x{height}.jpg" EXT = { + 0: ".mp3", 1: ".mp3", 2: ".flac", 3: ".flac", @@ -137,6 +138,8 @@ URL_REGEX = ( r"https:\/\/(?:www|open|play|listen)?\.?(\w+)\.com(?:(?:\/(track|playlist|album|" r"artist|label))|(?:\/[-\w]+?))+\/([-\w]+)" ) +SOUNDCLOUD_URL_REGEX = r"https://soundcloud.com/[-\w:]+" +SOUNDCLOUD_CLIENT_ID = "a3e059563d7fd3372b49b37f00a00bcf" TIDAL_MAX_Q = 7 diff --git a/streamrip/converter.py b/streamrip/converter.py index 95a81f6..4d44344 100644 --- a/streamrip/converter.py +++ b/streamrip/converter.py @@ -97,7 +97,7 @@ class Converter: "-i", self.filename, "-loglevel", - "warning", + "panic", "-c:a", self.codec_lib, ] diff --git a/streamrip/core.py b/streamrip/core.py index efcb1d1..545b3e8 100644 --- a/streamrip/core.py +++ b/streamrip/core.py @@ -9,9 +9,9 @@ from typing import Generator, Optional, Tuple, Union import click -from .clients import DeezerClient, QobuzClient, TidalClient +from .clients import DeezerClient, QobuzClient, SoundCloudClient, TidalClient from .config import Config -from .constants import CONFIG_PATH, DB_PATH, URL_REGEX +from .constants import CONFIG_PATH, DB_PATH, SOUNDCLOUD_URL_REGEX, URL_REGEX from .db import MusicDB from .downloader import Album, Artist, Label, Playlist, Track from .exceptions import AuthenticationError, ParsingError @@ -27,7 +27,6 @@ MEDIA_CLASS = { "track": Track, "label": Label, } -CLIENTS = {"qobuz": QobuzClient, "tidal": TidalClient, "deezer": DeezerClient} Media = Union[Album, Playlist, Artist, Track] @@ -38,6 +37,7 @@ class MusicDL(list): ): self.url_parse = re.compile(URL_REGEX) + self.soundcloud_url_parse = re.compile(SOUNDCLOUD_URL_REGEX) self.config = config if self.config is None: self.config = Config(CONFIG_PATH) @@ -46,6 +46,7 @@ class MusicDL(list): "qobuz": QobuzClient(), "tidal": TidalClient(), "deezer": DeezerClient(), + "soundcloud": SoundCloudClient(), } if config.session["database"]["enabled"]: @@ -81,11 +82,19 @@ class MusicDL(list): raise Exception def assert_creds(self, source: str): - assert source in ("qobuz", "tidal", "deezer"), f"Invalid source {source}" + assert source in ( + "qobuz", + "tidal", + "deezer", + "soundcloud", + ), f"Invalid source {source}" if source == "deezer": # no login for deezer return + if source == "soundcloud": + return + if source == "qobuz" and ( self.config.file[source]["email"] is None or self.config.file[source]["password"] is None @@ -201,6 +210,12 @@ class MusicDL(list): :raises exceptions.ParsingError """ parsed = self.url_parse.findall(url) + soundcloud_urls = self.soundcloud_url_parse.findall(url) + if len(soundcloud_urls) > 0: + parsed.extend( + self.clients["soundcloud"].resolve(u) for u in soundcloud_urls + ) + logger.debug(f"Parsed urls: {parsed}") if parsed != []: diff --git a/streamrip/downloader.py b/streamrip/downloader.py index 119b5f8..1714d63 100644 --- a/streamrip/downloader.py +++ b/streamrip/downloader.py @@ -1,14 +1,15 @@ import logging -from pprint import pprint -import sys import os import re import shutil -from pprint import pformat +import subprocess +import sys +from pprint import pformat, pprint from tempfile import gettempdir from typing import Any, Callable, Optional, Tuple, Union import click +import requests from mutagen.flac import FLAC, Picture from mutagen.id3 import APIC, ID3, ID3NoHeaderError from pathvalidate import sanitize_filename, sanitize_filepath @@ -20,6 +21,7 @@ from .constants import ( EXT, FLAC_MAX_BLOCKSIZE, FOLDER_FORMAT, + SOUNDCLOUD_CLIENT_ID, TRACK_FORMAT, ) from .db import MusicDB @@ -118,17 +120,20 @@ class Track: assert hasattr(self, "id"), "id must be set before loading metadata" - track_meta = self.client.get(self.id, media_type="track") + self.resp = self.client.get(self.id, media_type="track") + pprint(self.resp) self.meta = TrackMetadata( - track=track_meta, source=self.client.source + track=self.resp, source=self.client.source ) # meta dict -> TrackMetadata object try: if self.client.source == "qobuz": - self.cover_url = track_meta["album"]["image"]["small"] + self.cover_url = self.resp["album"]["image"]["small"] elif self.client.source == "tidal": - self.cover_url = tidal_cover_url(track_meta["album"]["cover"], 320) + self.cover_url = tidal_cover_url(self.resp["album"]["cover"], 320) elif self.client.source == "deezer": - self.cover_url = track_meta["album"]["cover_medium"] + self.cover_url = self.resp["album"]["cover_medium"] + elif self.client.source == "soundcloud": + self.cover_url = self.resp["artwork_url"].replace("large", "t500x500") else: raise InvalidSourceError(self.client.source) except KeyError: @@ -146,7 +151,7 @@ class Track: def download( self, - quality: int = 7, + quality: int = 3, parent_folder: str = "StreamripDownloads", progress_bar: bool = True, database: MusicDB = None, @@ -164,10 +169,8 @@ class Track: :type progress_bar: bool """ # args override attributes - self.quality, self.folder = ( - quality or self.quality, - parent_folder or self.folder, - ) + self.quality = min((quality or self.quality), self.client.max_quality) + self.folder = parent_folder or self.folder self.file_format = kwargs.get("track_format", TRACK_FORMAT) self.folder = sanitize_filepath(self.folder, platform="auto") @@ -193,7 +196,12 @@ class Track: if hasattr(self, "cover_url"): # only for playlists and singles self.download_cover() - dl_info = self.client.get_file_url(self.id, quality) + if self.client.source == "soundcloud": + url_id = self.resp + else: + url_id = self.id + + dl_info = self.client.get_file_url(url_id, self.quality) temp_file = os.path.join(gettempdir(), f"~{self.id}_{quality}.tmp") logger.debug("Temporary file path: %s", temp_file) @@ -214,7 +222,8 @@ class Track: if self.client.source in ("qobuz", "tidal"): logger.debug("Downloadable URL found: %s", dl_info.get("url")) tqdm_download(dl_info["url"], temp_file) # downloads file - elif isinstance(dl_info, str): # Deezer + + elif self.client.source == "deezer": # Deezer logger.debug("Downloadable URL found: %s", dl_info) try: tqdm_download(dl_info, temp_file) # downloads file @@ -222,6 +231,34 @@ class Track: logger.debug(f"Track is not downloadable {dl_info}") click.secho("Track is not available for download", fg="red") return False + + elif self.client.source == "soundcloud": + if dl_info["type"] == "mp3": + temp_file += ".mp3" + # convert hls stream to mp3 + subprocess.call( + [ + "ffmpeg", + "-i", + dl_info, + "-c", + "copy", + "-y", + temp_file, + "-loglevel", + "fatal", + ] + ) + elif dl_info["type"] == "original": + tqdm_download(dl_info["url"], temp_file) + + # if a wav is returned, convert to flac + engine = converter.FLAC(temp_file) + temp_file = f"{temp_file}.flac" + engine.convert(custom_fn=temp_file) + + self.final_path = self.final_path.replace(".mp3", ".flac") + self.quality = 2 else: raise InvalidSourceError(self.client.source) @@ -260,9 +297,6 @@ class Track: else: logger.debug("Cover already exists, skipping download") - self.cover = Tracklist.get_cover_obj(self.cover_path, self.quality) - logger.debug(f"Cover obj: {self.cover}") - def format_final_path(self) -> str: """Return the final filepath of the downloaded file. @@ -361,16 +395,13 @@ class Track: self.container = "FLAC" logger.debug("Tagging file with %s container", self.container) audio = FLAC(self.final_path) - elif self.quality == 1: + elif self.quality <= 1: self.container = "MP3" logger.debug("Tagging file with %s container", self.container) try: audio = ID3(self.final_path) except ID3NoHeaderError: audio = ID3() - elif self.quality == 0: # tidal and deezer - # TODO: add compatibility with MP4 container - raise NotImplementedError("Qualities < 320kbps not implemented") else: raise InvalidQuality(f'Invalid quality: "{self.quality}"') @@ -379,9 +410,9 @@ class Track: audio[k] = v if embed_cover and cover is None: - assert hasattr(self, "cover") - cover = self.cover + assert hasattr(self, "cover_path") + cover = Tracklist.get_cover_obj(self.cover_path, self.quality) if isinstance(audio, FLAC): if embed_cover: audio.add_picture(cover) @@ -882,7 +913,7 @@ class Album(Tracklist): else: fmt[key] = None - if fmt.get('sampling_rate', False): + if fmt.get("sampling_rate", False): fmt["sampling_rate"] /= 1000 # 48.0kHz -> 48kHz, 44.1kHz -> 44.1kHz if fmt["sampling_rate"] % 1 == 0.0: @@ -1015,39 +1046,44 @@ class Playlist(Tracklist): def gen_cover(track): return track["album"]["cover_medium"] - def meta_args(track): - return {"track": track, "source": self.client.source} + elif self.client.source == "soundcloud": + pprint(self.meta) + self.name = self.meta["title"] + tracklist = self.meta["tracks"] - elif self.client.source == 'soundcloud': - self.name = self.meta['title'] - tracklist = self.meta['tracks'] + def gen_cover(track): + return track["artwork_url"].replace("large", "t500x500") else: raise NotImplementedError - for i, track in enumerate(tracklist): - # TODO: This should be managed with .m3u files and alike. Arbitrary - # tracknumber tags might cause conflicts if the playlist files are - # inside of a library folder - meta = TrackMetadata(**meta_args(track)) - if new_tracknumbers: - meta["tracknumber"] = str(i + 1) + if self.client.source == "soundcloud": + # No meta is included in soundcloud playlist + # response, so it is loaded at download time + for track in tracklist: + self.append(Track(self.client, id=track["id"])) + else: + for track in tracklist: + # TODO: This should be managed with .m3u files and alike. Arbitrary + # tracknumber tags might cause conflicts if the playlist files are + # inside of a library folder + meta = TrackMetadata(track=track, source=self.client.source) - self.append( - Track( - self.client, - id=track.get("id"), - meta=meta, - cover_url=gen_cover(track), + self.append( + Track( + self.client, + id=track.get("id"), + meta=meta, + cover_url=gen_cover(track), + ) ) - ) logger.debug(f"Loaded {len(self)} tracks from playlist {self.name}") def download( self, - parent_folder: str = "Downloads", - quality: int = 6, + parent_folder: str = "StreamripDownloads", + quality: int = 3, filters: Callable = None, database: MusicDB = None, **kwargs, @@ -1066,9 +1102,18 @@ class Playlist(Tracklist): logger.debug(f"Parent folder {folder}") self.download_message() - for track in self: - track.download(parent_folder=folder, quality=quality, database=database) - if self.client.source != "deezer": + for i, track in enumerate(self): + if self.client.source == "soundcloud": + track.load_meta() + + if kwargs.get("new_tracknumbers", True): + track.meta["tracknumber"] = str(i + 1) + + if ( + track.download(parent_folder=folder, quality=quality, database=database) + and self.client.source != "deezer" + ): + track.tag(embed_cover=kwargs.get("embed_cover", True)) @staticmethod diff --git a/streamrip/metadata.py b/streamrip/metadata.py index 481e280..b021e1e 100644 --- a/streamrip/metadata.py +++ b/streamrip/metadata.py @@ -2,6 +2,7 @@ import json import logging import re import sys +from pprint import pprint from typing import Generator, Optional, Tuple, Union from .constants import ( @@ -113,9 +114,10 @@ class TrackMetadata: self.date = resp.get("release_date") self.albumartist = resp.get("artist", {}).get("name") self.label = resp.get("label") - + elif self.__source == "soundcloud": + raise Exception else: - raise ValueError + raise ValueError(self.__source) def add_track_meta(self, track: dict): """Parse the metadata from a track dict returned by the @@ -150,8 +152,27 @@ class TrackMetadata: self.discnumber = track.get("disk_number") self.artist = track.get("artist", {}).get("name") + elif self.__source == "soundcloud": + self.title = track["title"].strip() + print(f"{self.title=}") + self.genre = track["genre"] + print(f"{self.genre=}") + self.artist = track["user"]["username"] + self.albumartist = self.artist + print(f"{self.artist=}") + self.year = track["created_at"][:4] + print(f"{self.year=}") + self.label = track["label_name"] + print(f"{self.label=}") + self.comment = track["description"] + print(f"{self.comment=}") + self.tracknumber = 0 + print(f"{self.tracknumber=}") + self.tracktotal = 0 + print(f"{self.tracktotal=}") + else: - raise ValueError + raise ValueError(self.__source) if track.get("album"): self.add_album_meta(track["album"]) diff --git a/streamrip/utils.py b/streamrip/utils.py index fb3d735..5a0b886 100644 --- a/streamrip/utils.py +++ b/streamrip/utils.py @@ -96,7 +96,7 @@ def get_quality_id(bit_depth: Optional[int], sampling_rate: Optional[int]): return 4 -def tqdm_download(url: str, filepath: str): +def tqdm_download(url: str, filepath: str, params: dict = None): """Downloads a file with a progress bar. :param url: url to direct download @@ -104,11 +104,14 @@ def tqdm_download(url: str, filepath: str): :type url: str :type filepath: str """ - logger.debug(f"Downloading {url} to {filepath}") - r = requests.get(url, allow_redirects=True, stream=True) + logger.debug(f"Downloading {url} to {filepath} with params {params}") + if params is None: + params = {} + + r = requests.get(url, allow_redirects=True, stream=True, params=params) total = int(r.headers.get("content-length", 0)) logger.debug(f"File size = {total}") - if total < 1000 and not url.endswith('jpg'): + if total < 1000 and not url.endswith("jpg"): raise NonStreamable try: From f3274693bb2eacee9551e2633cc4feeb58a9094d Mon Sep 17 00:00:00 2001 From: nathom Date: Mon, 5 Apr 2021 17:42:24 -0700 Subject: [PATCH 4/4] Soundcloud downloads working --- streamrip/clients.py | 17 +++++------------ streamrip/config.py | 3 +++ streamrip/constants.py | 5 +++-- streamrip/core.py | 20 ++++++++++++++------ streamrip/downloader.py | 18 ++++++++---------- streamrip/metadata.py | 10 +--------- 6 files changed, 34 insertions(+), 39 deletions(-) diff --git a/streamrip/clients.py b/streamrip/clients.py index e6f6cb9..67c45c9 100644 --- a/streamrip/clients.py +++ b/streamrip/clients.py @@ -656,14 +656,7 @@ class TidalClient(ClientInterface): class SoundCloudClient(ClientInterface): source = "soundcloud" max_quality = 0 - - def __init__(self): - self.session = requests.Session() - self.session.headers.update( - { - "User-Agent": AGENT, - } - ) + logged_in = True def login(self): raise NotImplementedError @@ -671,10 +664,10 @@ class SoundCloudClient(ClientInterface): def get(self, id, media_type="track"): assert media_type in ("track", "playlist"), f"{media_type} not supported" - if media_type == "track": - resp, _ = self._get(f"{media_type}s/{id}") - elif "http" in id: + if "http" in str(id): resp, _ = self._get(f"resolve?url={id}") + elif media_type == "track": + resp, _ = self._get(f"{media_type}s/{id}") else: raise Exception(id) @@ -716,7 +709,7 @@ class SoundCloudClient(ClientInterface): url = f"{SOUNDCLOUD_BASE}/{path}" logger.debug(f"Fetching url {url}") - r = self.session.get(url, params=params) + r = requests.get(url, params=params) if resp_obj: return r diff --git a/streamrip/config.py b/streamrip/config.py index 9b4a524..4915aa4 100644 --- a/streamrip/config.py +++ b/streamrip/config.py @@ -54,6 +54,9 @@ class Config: "deezer": { "quality": 2, }, + "soundcloud": { + "quality": 0, + }, "database": {"enabled": True, "path": None}, "conversion": { "enabled": False, diff --git a/streamrip/constants.py b/streamrip/constants.py index 31f689c..ee1fe1c 100644 --- a/streamrip/constants.py +++ b/streamrip/constants.py @@ -135,13 +135,14 @@ FOLDER_FORMAT = ( TRACK_FORMAT = "{tracknumber}. {artist} - {title}" URL_REGEX = ( - r"https:\/\/(?:www|open|play|listen)?\.?(\w+)\.com(?:(?:\/(track|playlist|album|" + r"https:\/\/(?:www|open|play|listen)?\.?(qobuz|tidal|deezer)\.com(?:(?:\/(track|playlist|album|" r"artist|label))|(?:\/[-\w]+?))+\/([-\w]+)" ) -SOUNDCLOUD_URL_REGEX = r"https://soundcloud.com/[-\w:]+" +SOUNDCLOUD_URL_REGEX = r"https://soundcloud.com/[-\w:/]+" SOUNDCLOUD_CLIENT_ID = "a3e059563d7fd3372b49b37f00a00bcf" TIDAL_MAX_Q = 7 DEEZER_MAX_Q = 6 AVAILABLE_QUALITY_IDS = (0, 1, 2, 3, 4) +MEDIA_TYPES = ("track", "album", "artist", "label", "playlist") diff --git a/streamrip/core.py b/streamrip/core.py index 545b3e8..13e6349 100644 --- a/streamrip/core.py +++ b/streamrip/core.py @@ -1,4 +1,5 @@ import logging +from pprint import pprint import os import re import sys @@ -11,7 +12,7 @@ import click from .clients import DeezerClient, QobuzClient, SoundCloudClient, TidalClient from .config import Config -from .constants import CONFIG_PATH, DB_PATH, SOUNDCLOUD_URL_REGEX, URL_REGEX +from .constants import (CONFIG_PATH, DB_PATH, SOUNDCLOUD_URL_REGEX, URL_REGEX, MEDIA_TYPES) from .db import MusicDB from .downloader import Album, Artist, Label, Playlist, Track from .exceptions import AuthenticationError, ParsingError @@ -127,6 +128,11 @@ class MusicDL(list): client = self.get_client(source) + if media_type not in MEDIA_TYPES: + if 'playlist' in media_type: # for SoundCloud + media_type = 'playlist' + + assert media_type in MEDIA_TYPES, media_type item = MEDIA_CLASS[media_type](client=client, id=item_id) self.append(item) @@ -209,12 +215,14 @@ class MusicDL(list): :raises exceptions.ParsingError """ - parsed = self.url_parse.findall(url) + parsed = self.url_parse.findall(url) # Qobuz, Tidal, Dezer soundcloud_urls = self.soundcloud_url_parse.findall(url) - if len(soundcloud_urls) > 0: - parsed.extend( - self.clients["soundcloud"].resolve(u) for u in soundcloud_urls - ) + soundcloud_items = [self.clients["soundcloud"].get(u) for u in soundcloud_urls] + + parsed.extend( + ("soundcloud", item["kind"], url) + for item, url in zip(soundcloud_items, soundcloud_urls) + ) logger.debug(f"Parsed urls: {parsed}") diff --git a/streamrip/downloader.py b/streamrip/downloader.py index 1714d63..889f6e3 100644 --- a/streamrip/downloader.py +++ b/streamrip/downloader.py @@ -121,7 +121,6 @@ class Track: assert hasattr(self, "id"), "id must be set before loading metadata" self.resp = self.client.get(self.id, media_type="track") - pprint(self.resp) self.meta = TrackMetadata( track=self.resp, source=self.client.source ) # meta dict -> TrackMetadata object @@ -133,7 +132,7 @@ class Track: elif self.client.source == "deezer": self.cover_url = self.resp["album"]["cover_medium"] elif self.client.source == "soundcloud": - self.cover_url = self.resp["artwork_url"].replace("large", "t500x500") + self.cover_url = (self.resp["artwork_url"] or self.resp['user'].get("avatar_url")).replace("large", "t500x500") else: raise InvalidSourceError(self.client.source) except KeyError: @@ -169,7 +168,7 @@ class Track: :type progress_bar: bool """ # args override attributes - self.quality = min((quality or self.quality), self.client.max_quality) + self.quality = min(quality, self.client.max_quality) self.folder = parent_folder or self.folder self.file_format = kwargs.get("track_format", TRACK_FORMAT) @@ -194,6 +193,7 @@ class Track: return False if hasattr(self, "cover_url"): # only for playlists and singles + logger.debug("Downloading cover") self.download_cover() if self.client.source == "soundcloud": @@ -203,7 +203,7 @@ class Track: dl_info = self.client.get_file_url(url_id, self.quality) - temp_file = os.path.join(gettempdir(), f"~{self.id}_{quality}.tmp") + temp_file = os.path.join(gettempdir(), f"~{hash(self.id)}_{quality}.tmp") logger.debug("Temporary file path: %s", temp_file) if self.client.source == "qobuz": @@ -240,7 +240,7 @@ class Track: [ "ffmpeg", "-i", - dl_info, + dl_info['url'], "-c", "copy", "-y", @@ -288,7 +288,7 @@ class Track: assert hasattr(self, "cover_url"), "must set cover_url attribute" - self.cover_path = os.path.join(self.folder, f"cover{hash(self.meta.album)}.jpg") + self.cover_path = os.path.join(self.folder, f"cover{hash(self.cover_url)}.jpg") logger.debug(f"Downloading cover from {self.cover_url}") click.secho(f"\nDownloading cover art for {self!s}", fg="blue") @@ -1019,7 +1019,7 @@ class Playlist(Tracklist): self.name = self.meta["name"] tracklist = self.meta["tracks"]["items"] - def gen_cover(track): # ? + def gen_cover(track): return track["album"]["image"]["small"] def meta_args(track): @@ -1047,7 +1047,6 @@ class Playlist(Tracklist): return track["album"]["cover_medium"] elif self.client.source == "soundcloud": - pprint(self.meta) self.name = self.meta["title"] tracklist = self.meta["tracks"] @@ -1126,7 +1125,6 @@ class Playlist(Tracklist): :param client: :type client: ClientInterface """ - print(item.keys()) if client.source == "qobuz": return { "name": item["name"], @@ -1223,7 +1221,7 @@ class Artist(Tracklist): def download( self, - parent_folder: str = "Downloads", + parent_folder: str = "StreamripDownloads", filters: Optional[Tuple] = None, no_repeats: bool = False, quality: int = 6, diff --git a/streamrip/metadata.py b/streamrip/metadata.py index b021e1e..99bfc7a 100644 --- a/streamrip/metadata.py +++ b/streamrip/metadata.py @@ -154,22 +154,14 @@ class TrackMetadata: elif self.__source == "soundcloud": self.title = track["title"].strip() - print(f"{self.title=}") self.genre = track["genre"] - print(f"{self.genre=}") self.artist = track["user"]["username"] self.albumartist = self.artist - print(f"{self.artist=}") self.year = track["created_at"][:4] - print(f"{self.year=}") self.label = track["label_name"] - print(f"{self.label=}") - self.comment = track["description"] - print(f"{self.comment=}") + self.description = track["description"] self.tracknumber = 0 - print(f"{self.tracknumber=}") self.tracktotal = 0 - print(f"{self.tracktotal=}") else: raise ValueError(self.__source)