From 0d2ca55be5d404e81a40ef54052b10754fd8c87d Mon Sep 17 00:00:00 2001 From: nathom Date: Fri, 9 Apr 2021 16:20:03 -0700 Subject: [PATCH] last.fm working --- requirements.txt | 1 - streamrip/cli.py | 27 ++++++++++++ streamrip/config.py | 2 +- streamrip/core.py | 95 ++++++++++++++++++++++++----------------- streamrip/downloader.py | 3 +- streamrip/exceptions.py | 4 ++ streamrip/metadata.py | 10 +++-- 7 files changed, 97 insertions(+), 45 deletions(-) diff --git a/requirements.txt b/requirements.txt index 9b1d5f5..3f60fad 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,6 @@ click ruamel.yaml packaging -bs4 pathvalidate requests mutagen diff --git a/streamrip/cli.py b/streamrip/cli.py index 1d92948..b7f8b58 100644 --- a/streamrip/cli.py +++ b/streamrip/cli.py @@ -216,6 +216,33 @@ def discover(ctx, **kwargs): none_chosen() +@cli.command() +@click.option( + "-s", "--source", help="Qobuz, Tidal, Deezer, or SoundCloud. Default: Qobuz." +) +@click.argument("URL") +@click.pass_context +def lastfm(ctx, source, url): + """Searches for tracks from a last.fm playlist on a given source. + + Examples: + + $ rip lastfm https://www.last.fm/user/nathan3895/playlists/12059037 + + Download a playlist using Qobuz as the source + + $ rip lastfm -s tidal https://www.last.fm/user/nathan3895/playlists/12059037 + + Download a playlist using Tidal as the source + """ + + if source is not None: + config.session["lastfm"]["source"] = source + + core.handle_lastfm_urls(url) + core.download() + + @cli.command() @click.option("-o", "--open", is_flag=True, help="Open the config file") @click.option("-q", "--qobuz", is_flag=True, help="Set Qobuz credentials") diff --git a/streamrip/config.py b/streamrip/config.py index 72a19ee..7b47104 100644 --- a/streamrip/config.py +++ b/streamrip/config.py @@ -82,7 +82,7 @@ class Config: }, "path_format": {"folder": FOLDER_FORMAT, "track": TRACK_FORMAT}, "check_for_updates": True, - "lastfm": {"source": "qobuz"} + "lastfm": {"source": "qobuz"}, } def __init__(self, path: str = None): diff --git a/streamrip/core.py b/streamrip/core.py index eee1f46..2569069 100644 --- a/streamrip/core.py +++ b/streamrip/core.py @@ -1,8 +1,8 @@ import logging -import time import os import re import sys +import time from getpass import getpass from hashlib import md5 from string import Formatter @@ -10,21 +10,21 @@ from typing import Generator, Optional, Tuple, Union import click import requests -from bs4 import BeautifulSoup +from tqdm import tqdm from .clients import DeezerClient, QobuzClient, SoundCloudClient, TidalClient from .config import Config from .constants import ( CONFIG_PATH, DB_PATH, + LASTFM_URL_REGEX, MEDIA_TYPES, SOUNDCLOUD_URL_REGEX, - LASTFM_URL_REGEX, URL_REGEX, ) from .db import MusicDB from .downloader import Album, Artist, Label, Playlist, Track, Tracklist -from .exceptions import AuthenticationError, ParsingError +from .exceptions import AuthenticationError, NoResultsFound, ParsingError from .utils import capitalize logger = logging.getLogger(__name__) @@ -114,18 +114,15 @@ class MusicDL(list): self.prompt_creds(source) def handle_urls(self, url: str): - """Download an url + """Download a url :param url: :type url: str :raises InvalidSourceError :raises ParsingError """ - parsed_info = self.parse_urls(url) - if parsed_info is None: - return - for source, url_type, item_id in parsed_info: + for source, url_type, item_id in self.parse_urls(url): if item_id in self.db: logger.info( f"ID {item_id} already downloaded, use --no-db to override." @@ -152,7 +149,6 @@ class MusicDL(list): self.append(item) def download(self): - arguments = { "database": self.db, "parent_folder": self.config.session["downloads"]["folder"], @@ -192,7 +188,7 @@ class MusicDL(list): else: item.download(**arguments) - if self.db != [] and hasattr(item, 'id'): + if self.db != [] and hasattr(item, "id"): self.db.add(item.id) if self.config.session["conversion"]["enabled"]: @@ -246,9 +242,6 @@ class MusicDL(list): parsed = self.url_parse.findall(url) # Qobuz, Tidal, Dezer soundcloud_urls = self.soundcloud_url_parse.findall(url) soundcloud_items = [self.clients["soundcloud"].get(u) for u in soundcloud_urls] - lastfm_urls = self.lastfm_url_parse.findall(url) - if lastfm_urls: - self.handle_lastfm_urls(lastfm_urls) parsed.extend( ("soundcloud", item["kind"], url) @@ -260,22 +253,30 @@ class MusicDL(list): if parsed != []: return parsed - if not lastfm_urls: - raise ParsingError(f"Error parsing URL: `{url}`") + raise ParsingError(f"Error parsing URL: `{url}`") - def handle_lastfm_urls(self, lastfm_urls): - lastfm_source = self.config.session['lastfm']['source'] + def handle_lastfm_urls(self, urls): + lastfm_urls = self.lastfm_url_parse.findall(urls) + lastfm_source = self.config.session["lastfm"]["source"] for purl in lastfm_urls: + click.secho(f"Fetching playlist at {purl}", fg="blue") title, queries = self.get_lastfm_playlist(purl) pl = Playlist(client=self.clients[lastfm_source], name=title) - for query in queries: - click.secho(f'Searching for "{query}"', fg='cyan') - track = next(self.search(lastfm_source, query, media_type='track')) + tracks_not_found = 0 + for title, artist in tqdm(queries, unit="tracks", desc="Searching"): + query = f"{title} {artist}" + + try: + track = next(self.search(lastfm_source, query, media_type="track")) + except NoResultsFound: + tracks_not_found += 1 + continue + pl.append(track) pl.loaded = True - time.sleep(0.2) # max 5 requests/s + click.secho(f"{tracks_not_found} tracks not found.", fg='yellow') self.append(pl) def handle_txt(self, filepath: Union[str, os.PathLike]): @@ -312,9 +313,13 @@ class MusicDL(list): if i > limit: return else: - for item in ( + items = ( results.get("data") or results.get("items") or results.get("collection") - ): + ) + if items is None: + raise NoResultsFound(query) + + for item in items: yield MEDIA_CLASS[media_type].from_api(item, client) i += 1 if i > limit: @@ -424,22 +429,34 @@ class MusicDL(list): return True def get_lastfm_playlist(self, url: str) -> Tuple[str, list]: - # code from qobuz-dl - try: - r = requests.get(url, timeout=10) - except requests.exceptions.RequestException: - click.secho("Unable to fetch playlist", fg="red") - return + info = [] + words = re.compile(r"[\w\s]+") + title_tags = re.compile('title="([^"]+)"') - soup = BeautifulSoup(r.content, "html.parser") - artists = (artist.text for artist in soup.select("td.chartlist-artist > a")) - titles = (title.text for title in soup.select("td.chartlist-name > a")) + def essence(s): + s = re.sub(r"&#\d+;", "", s) # remove HTML entities + return "".join(words.findall(s)) - queries = [f"{artist} {title}" for artist, title in zip(artists, titles)] + def get_titles(s): + titles = title_tags.findall(s)[2:] + for i in range(0, len(titles) - 1, 2): + info.append((essence(titles[i]), essence(titles[i + 1]))) - if not queries: - click.secho("No tracks found", fg="red") - return + r = requests.get(url) + get_titles(r.text) + remaining_tracks = ( + int(re.search(r'data-playlisting-entry-count="(\d+)"', r.text).group(1)) + - 50 + ) + playlist_title = re.search( + r'

([^<]+)

', r.text + ).group(1) - title = soup.select_one("h1").text - return title, queries + page = 1 + while remaining_tracks > 0: + page += 1 + r = requests.get(f"{url}?page={page}") + get_titles(r.text) + remaining_tracks -= 50 + + return playlist_title, info diff --git a/streamrip/downloader.py b/streamrip/downloader.py index 44a905b..e69eda4 100644 --- a/streamrip/downloader.py +++ b/streamrip/downloader.py @@ -912,7 +912,8 @@ class Album(Tracklist): tqdm_download(self.cover_urls[embed_cover_size], cover_path) if ( self.cover_urls.get(download_cover_size, embed_cover_size) - != embed_cover_size or os.path.size(cover_path) > FLAC_MAX_BLOCKSIZE + != embed_cover_size + or os.path.size(cover_path) > FLAC_MAX_BLOCKSIZE ): # download cover at another resolution but don't use for embed embed_cover_path = cover_path.replace(".jpg", "_embed.jpg") diff --git a/streamrip/exceptions.py b/streamrip/exceptions.py index 11b4d9b..40e657c 100644 --- a/streamrip/exceptions.py +++ b/streamrip/exceptions.py @@ -44,3 +44,7 @@ class BadEncoderOption(Exception): class ConversionError(Exception): pass + + +class NoResultsFound(Exception): + pass diff --git a/streamrip/metadata.py b/streamrip/metadata.py index ec1eaa9..c4d2fe8 100644 --- a/streamrip/metadata.py +++ b/streamrip/metadata.py @@ -100,9 +100,13 @@ class TrackMetadata: self.albumartist = safe_get(resp, "artist", "name") self.label = resp.get("label") self.description = resp.get("description") - self.disctotal = max( - track.get("media_number", 1) for track in safe_get(resp, 'tracks', 'items', default=[{}]) - ) or 1 + self.disctotal = ( + max( + track.get("media_number", 1) + for track in safe_get(resp, "tracks", "items", default=[{}]) + ) + or 1 + ) self.explicit = resp.get("parental_warning", False) if isinstance(self.label, dict):