last.fm working

This commit is contained in:
nathom 2021-04-09 16:20:03 -07:00
parent b2f75cda5c
commit 0d2ca55be5
7 changed files with 97 additions and 45 deletions

View file

@ -1,7 +1,6 @@
click click
ruamel.yaml ruamel.yaml
packaging packaging
bs4
pathvalidate pathvalidate
requests requests
mutagen mutagen

View file

@ -216,6 +216,33 @@ def discover(ctx, **kwargs):
none_chosen() none_chosen()
@cli.command()
@click.option(
"-s", "--source", help="Qobuz, Tidal, Deezer, or SoundCloud. Default: Qobuz."
)
@click.argument("URL")
@click.pass_context
def lastfm(ctx, source, url):
"""Searches for tracks from a last.fm playlist on a given source.
Examples:
$ rip lastfm https://www.last.fm/user/nathan3895/playlists/12059037
Download a playlist using Qobuz as the source
$ rip lastfm -s tidal https://www.last.fm/user/nathan3895/playlists/12059037
Download a playlist using Tidal as the source
"""
if source is not None:
config.session["lastfm"]["source"] = source
core.handle_lastfm_urls(url)
core.download()
@cli.command() @cli.command()
@click.option("-o", "--open", is_flag=True, help="Open the config file") @click.option("-o", "--open", is_flag=True, help="Open the config file")
@click.option("-q", "--qobuz", is_flag=True, help="Set Qobuz credentials") @click.option("-q", "--qobuz", is_flag=True, help="Set Qobuz credentials")

View file

@ -82,7 +82,7 @@ class Config:
}, },
"path_format": {"folder": FOLDER_FORMAT, "track": TRACK_FORMAT}, "path_format": {"folder": FOLDER_FORMAT, "track": TRACK_FORMAT},
"check_for_updates": True, "check_for_updates": True,
"lastfm": {"source": "qobuz"} "lastfm": {"source": "qobuz"},
} }
def __init__(self, path: str = None): def __init__(self, path: str = None):

View file

@ -1,8 +1,8 @@
import logging import logging
import time
import os import os
import re import re
import sys import sys
import time
from getpass import getpass from getpass import getpass
from hashlib import md5 from hashlib import md5
from string import Formatter from string import Formatter
@ -10,21 +10,21 @@ from typing import Generator, Optional, Tuple, Union
import click import click
import requests import requests
from bs4 import BeautifulSoup from tqdm import tqdm
from .clients import DeezerClient, QobuzClient, SoundCloudClient, TidalClient from .clients import DeezerClient, QobuzClient, SoundCloudClient, TidalClient
from .config import Config from .config import Config
from .constants import ( from .constants import (
CONFIG_PATH, CONFIG_PATH,
DB_PATH, DB_PATH,
LASTFM_URL_REGEX,
MEDIA_TYPES, MEDIA_TYPES,
SOUNDCLOUD_URL_REGEX, SOUNDCLOUD_URL_REGEX,
LASTFM_URL_REGEX,
URL_REGEX, URL_REGEX,
) )
from .db import MusicDB from .db import MusicDB
from .downloader import Album, Artist, Label, Playlist, Track, Tracklist from .downloader import Album, Artist, Label, Playlist, Track, Tracklist
from .exceptions import AuthenticationError, ParsingError from .exceptions import AuthenticationError, NoResultsFound, ParsingError
from .utils import capitalize from .utils import capitalize
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -114,18 +114,15 @@ class MusicDL(list):
self.prompt_creds(source) self.prompt_creds(source)
def handle_urls(self, url: str): def handle_urls(self, url: str):
"""Download an url """Download a url
:param url: :param url:
:type url: str :type url: str
:raises InvalidSourceError :raises InvalidSourceError
:raises ParsingError :raises ParsingError
""" """
parsed_info = self.parse_urls(url)
if parsed_info is None:
return
for source, url_type, item_id in parsed_info: for source, url_type, item_id in self.parse_urls(url):
if item_id in self.db: if item_id in self.db:
logger.info( logger.info(
f"ID {item_id} already downloaded, use --no-db to override." f"ID {item_id} already downloaded, use --no-db to override."
@ -152,7 +149,6 @@ class MusicDL(list):
self.append(item) self.append(item)
def download(self): def download(self):
arguments = { arguments = {
"database": self.db, "database": self.db,
"parent_folder": self.config.session["downloads"]["folder"], "parent_folder": self.config.session["downloads"]["folder"],
@ -192,7 +188,7 @@ class MusicDL(list):
else: else:
item.download(**arguments) item.download(**arguments)
if self.db != [] and hasattr(item, 'id'): if self.db != [] and hasattr(item, "id"):
self.db.add(item.id) self.db.add(item.id)
if self.config.session["conversion"]["enabled"]: if self.config.session["conversion"]["enabled"]:
@ -246,9 +242,6 @@ class MusicDL(list):
parsed = self.url_parse.findall(url) # Qobuz, Tidal, Dezer parsed = self.url_parse.findall(url) # Qobuz, Tidal, Dezer
soundcloud_urls = self.soundcloud_url_parse.findall(url) soundcloud_urls = self.soundcloud_url_parse.findall(url)
soundcloud_items = [self.clients["soundcloud"].get(u) for u in soundcloud_urls] soundcloud_items = [self.clients["soundcloud"].get(u) for u in soundcloud_urls]
lastfm_urls = self.lastfm_url_parse.findall(url)
if lastfm_urls:
self.handle_lastfm_urls(lastfm_urls)
parsed.extend( parsed.extend(
("soundcloud", item["kind"], url) ("soundcloud", item["kind"], url)
@ -260,22 +253,30 @@ class MusicDL(list):
if parsed != []: if parsed != []:
return parsed return parsed
if not lastfm_urls: raise ParsingError(f"Error parsing URL: `{url}`")
raise ParsingError(f"Error parsing URL: `{url}`")
def handle_lastfm_urls(self, lastfm_urls): def handle_lastfm_urls(self, urls):
lastfm_source = self.config.session['lastfm']['source'] lastfm_urls = self.lastfm_url_parse.findall(urls)
lastfm_source = self.config.session["lastfm"]["source"]
for purl in lastfm_urls: for purl in lastfm_urls:
click.secho(f"Fetching playlist at {purl}", fg="blue")
title, queries = self.get_lastfm_playlist(purl) title, queries = self.get_lastfm_playlist(purl)
pl = Playlist(client=self.clients[lastfm_source], name=title) pl = Playlist(client=self.clients[lastfm_source], name=title)
for query in queries: tracks_not_found = 0
click.secho(f'Searching for "{query}"', fg='cyan') for title, artist in tqdm(queries, unit="tracks", desc="Searching"):
track = next(self.search(lastfm_source, query, media_type='track')) query = f"{title} {artist}"
try:
track = next(self.search(lastfm_source, query, media_type="track"))
except NoResultsFound:
tracks_not_found += 1
continue
pl.append(track) pl.append(track)
pl.loaded = True pl.loaded = True
time.sleep(0.2) # max 5 requests/s
click.secho(f"{tracks_not_found} tracks not found.", fg='yellow')
self.append(pl) self.append(pl)
def handle_txt(self, filepath: Union[str, os.PathLike]): def handle_txt(self, filepath: Union[str, os.PathLike]):
@ -312,9 +313,13 @@ class MusicDL(list):
if i > limit: if i > limit:
return return
else: else:
for item in ( items = (
results.get("data") or results.get("items") or results.get("collection") results.get("data") or results.get("items") or results.get("collection")
): )
if items is None:
raise NoResultsFound(query)
for item in items:
yield MEDIA_CLASS[media_type].from_api(item, client) yield MEDIA_CLASS[media_type].from_api(item, client)
i += 1 i += 1
if i > limit: if i > limit:
@ -424,22 +429,34 @@ class MusicDL(list):
return True return True
def get_lastfm_playlist(self, url: str) -> Tuple[str, list]: def get_lastfm_playlist(self, url: str) -> Tuple[str, list]:
# code from qobuz-dl info = []
try: words = re.compile(r"[\w\s]+")
r = requests.get(url, timeout=10) title_tags = re.compile('title="([^"]+)"')
except requests.exceptions.RequestException:
click.secho("Unable to fetch playlist", fg="red")
return
soup = BeautifulSoup(r.content, "html.parser") def essence(s):
artists = (artist.text for artist in soup.select("td.chartlist-artist > a")) s = re.sub(r"&#\d+;", "", s) # remove HTML entities
titles = (title.text for title in soup.select("td.chartlist-name > a")) return "".join(words.findall(s))
queries = [f"{artist} {title}" for artist, title in zip(artists, titles)] def get_titles(s):
titles = title_tags.findall(s)[2:]
for i in range(0, len(titles) - 1, 2):
info.append((essence(titles[i]), essence(titles[i + 1])))
if not queries: r = requests.get(url)
click.secho("No tracks found", fg="red") get_titles(r.text)
return remaining_tracks = (
int(re.search(r'data-playlisting-entry-count="(\d+)"', r.text).group(1))
- 50
)
playlist_title = re.search(
r'<h1 class="playlisting-playlist-header-title">([^<]+)</h1>', r.text
).group(1)
title = soup.select_one("h1").text page = 1
return title, queries while remaining_tracks > 0:
page += 1
r = requests.get(f"{url}?page={page}")
get_titles(r.text)
remaining_tracks -= 50
return playlist_title, info

View file

@ -912,7 +912,8 @@ class Album(Tracklist):
tqdm_download(self.cover_urls[embed_cover_size], cover_path) tqdm_download(self.cover_urls[embed_cover_size], cover_path)
if ( if (
self.cover_urls.get(download_cover_size, embed_cover_size) self.cover_urls.get(download_cover_size, embed_cover_size)
!= embed_cover_size or os.path.size(cover_path) > FLAC_MAX_BLOCKSIZE != embed_cover_size
or os.path.size(cover_path) > FLAC_MAX_BLOCKSIZE
): ):
# download cover at another resolution but don't use for embed # download cover at another resolution but don't use for embed
embed_cover_path = cover_path.replace(".jpg", "_embed.jpg") embed_cover_path = cover_path.replace(".jpg", "_embed.jpg")

View file

@ -44,3 +44,7 @@ class BadEncoderOption(Exception):
class ConversionError(Exception): class ConversionError(Exception):
pass pass
class NoResultsFound(Exception):
pass

View file

@ -100,9 +100,13 @@ class TrackMetadata:
self.albumartist = safe_get(resp, "artist", "name") self.albumartist = safe_get(resp, "artist", "name")
self.label = resp.get("label") self.label = resp.get("label")
self.description = resp.get("description") self.description = resp.get("description")
self.disctotal = max( self.disctotal = (
track.get("media_number", 1) for track in safe_get(resp, 'tracks', 'items', default=[{}]) max(
) or 1 track.get("media_number", 1)
for track in safe_get(resp, "tracks", "items", default=[{}])
)
or 1
)
self.explicit = resp.get("parental_warning", False) self.explicit = resp.get("parental_warning", False)
if isinstance(self.label, dict): if isinstance(self.label, dict):