Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Lyrics w. Timestamps #662

Closed
wants to merge 12 commits into from
21 changes: 19 additions & 2 deletions tests/mixins/test_browsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pytest

from tests.test_helpers import is_ci
from ytmusicapi import LyricLine


class TestBrowsing:
Expand Down Expand Up @@ -164,9 +165,25 @@ def test_get_song_related_content(self, yt_oauth, sample_video):

def test_get_lyrics(self, config, yt, sample_video):
playlist = yt.get_watch_playlist(sample_video)
# test normal lyrics
lyrics_song = yt.get_lyrics(playlist["lyrics"])
assert lyrics_song["lyrics"] is not None
assert lyrics_song["source"] is not None
assert lyrics_song is not None
assert isinstance(lyrics_song["lyrics"], str)
assert lyrics_song["hasTimestamps"] is False

# test lyrics with timestamps
lyrics_song = yt.get_lyrics(playlist["lyrics"], timestamps = True)
assert lyrics_song is not None
assert isinstance(lyrics_song["lyrics"], list)
assert lyrics_song["hasTimestamps"] is True

# check the LyricLine object
song = lyrics_song["lyrics"][0]
assert isinstance(song, LyricLine)
assert isinstance(song.text, str)
assert isinstance(song.start_time, int)
assert isinstance(song.end_time, int)
assert isinstance(song.id, int)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure how useful these asserts are. Better to verify the length of the text or the time being greater than a specific value.

I.e., verify the usefulness of the actual values, not just the type

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The text could easily be empty, if there was a pause. One character lines are quite common at the beginning of a song (there's that note character)


playlist = yt.get_watch_playlist(config["uploads"]["private_upload_id"])
assert playlist["lyrics"] is None
Expand Down
6 changes: 4 additions & 2 deletions ytmusicapi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@

from ytmusicapi.setup import setup, setup_oauth
from ytmusicapi.ytmusic import YTMusic
from .mixins.browsing import Lyrics, TimedLyrics, LyricLine
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please undo the changes to this file. They are not needed and I don't want to start exporting everything from root

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking similar, but then you'll need to create a submodule for these classes to be accessible

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can already access them: from ytmusicapi.mixins.browsing import TimedLyrics

It's similar for https://github.com/sigma67/ytmusicapi/blob/main/ytmusicapi/parsers/podcasts.py

Although with #621 in mind, it might be wise to move them to a ytmusicapi.models module (one for podcasts and one for lyrics, I guess)


try:
__version__ = version("ytmusicapi")
except PackageNotFoundError:
# package is not installed
pass

__copyright__ = "Copyright 2023 sigma67"
__copyright__ = "Copyright 2024 sigma67"
__license__ = "MIT"
__title__ = "ytmusicapi"
__all__ = ["YTMusic", "setup_oauth", "setup"]
__all__ = ["YTMusic", "setup_oauth", "setup",
"Lyrics", "TimedLyrics", "LyricLine"]
11 changes: 9 additions & 2 deletions ytmusicapi/mixins/_protocol.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""protocol that defines the functions available to mixins"""

from typing import Optional, Protocol
from typing import Mapping, Optional, Protocol

from requests import Response
from requests.structures import CaseInsensitiveDict

from ytmusicapi.auth.types import AuthType
from ytmusicapi.parsers.i18n import Parser
Expand All @@ -17,15 +18,21 @@ class MixinProtocol(Protocol):

proxies: Optional[dict[str, str]]

context: dict

def _check_auth(self) -> None:
"""checks if self has authentication"""
...

def _send_request(self, endpoint: str, body: dict, additionalParams: str = "") -> dict:
"""for sending post requests to YouTube Music"""
...
sigma67 marked this conversation as resolved.
Show resolved Hide resolved

def _send_get_request(self, url: str, params: Optional[dict] = None) -> Response:
"""for sending get requests to YouTube Music"""
...

@property
def headers(self) -> dict[str, str]:
def headers(self) -> CaseInsensitiveDict[str]:
"""property for getting request headers"""
...
6 changes: 5 additions & 1 deletion ytmusicapi/mixins/_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import re
from datetime import date
from typing import Literal

from ytmusicapi.exceptions import YTMusicUserError


OrderType = Literal['a_to_z', 'z_to_a', 'recently_added']
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a good change in general, but the name is too broad. Since you introduced an ArtistOrderType elsewhere, this should have a more specific name as well. Maybe LibraryOrderType ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes



def prepare_like_endpoint(rating):
if rating == "LIKE":
return "like/like"
Expand All @@ -24,7 +28,7 @@ def validate_order_parameter(order):
)


def prepare_order_params(order):
def prepare_order_params(order: OrderType):
orders = ["a_to_z", "z_to_a", "recently_added"]
if order is not None:
# determine order_params via `.contents.singleColumnBrowseResultsRenderer.tabs[0].tabRenderer.content.sectionListRenderer.contents[1].itemSectionRenderer.header.itemSectionTabbedHeaderRenderer.endItems[1].dropdownRenderer.entries[].dropdownItemRenderer.onSelectCommand.browseEndpoint.params` of `/youtubei/v1/browse` response
Expand Down
236 changes: 214 additions & 22 deletions ytmusicapi/mixins/browsing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from dataclasses import dataclass
import re
import warnings
from typing import Any, Optional
from typing import Any, Optional, TypedDict, cast

from ytmusicapi.continuations import (
get_continuations,
Expand All @@ -24,6 +25,49 @@
from ._utils import get_datestamp


@dataclass
class LyricLine:
"""Represents a line of lyrics with timestamps (in milliseconds).

Args:
text (str): The Songtext.
start_time (int): Begin of the lyric in milliseconds.
end_time (int): End of the lyric in milliseconds.
id (int): A Metadata-Id that probably uniquely identifies each lyric line.
"""
text: str
start_time: int
end_time: int
id: int

@classmethod
def from_raw(cls, raw_lyric: dict):
"""
Converts lyrics in the format from the api to a more reasonable format

:param raw_lyric: The raw lyric-data returned by the mobile api.
:return LyricLine: A `LyricLine`
"""
text = raw_lyric["lyricLine"]
cue_range = raw_lyric["cueRange"]
start_time = int(cue_range["startTimeMilliseconds"])
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this break if the cueRange values are not valid ints?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure, but it would also break if there was no cueRange or any other keys

end_time = int(cue_range["endTimeMilliseconds"])
id = int(cue_range["metadata"]["id"])
return cls(text, start_time, end_time, id)


class Lyrics(TypedDict):
lyrics: str
source: Optional[str]
hasTimestamps: Literal[False]


class TimedLyrics(TypedDict):
lyrics: list[LyricLine]
source: Optional[str]
hasTimestamps: Literal[True]


class BrowsingMixin(MixinProtocol):
def get_home(self, limit=3) -> list[dict]:
"""
Expand Down Expand Up @@ -271,13 +315,15 @@ def get_artist(self, channelId: str) -> dict:
musicShelf = nav(results[0], MUSIC_SHELF)
if "navigationEndpoint" in nav(musicShelf, TITLE):
artist["songs"]["browseId"] = nav(musicShelf, TITLE + NAVIGATION_BROWSE_ID)
artist["songs"]["results"] = parse_playlist_items(musicShelf["contents"])
artist["songs"]["results"] = parse_playlist_items(musicShelf["contents"]) # type: ignore
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why type ignore? please remove

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pylance believes you're acessing a None value bc browseId was assigned to None before 🤷


artist.update(self.parser.parse_channel_contents(results))
return artist

ArtistOrderType = Literal['Recency', 'Popularity', 'Alphabetical order']

def get_artist_albums(
self, channelId: str, params: str, limit: Optional[int] = 100, order: Optional[str] = None
self, channelId: str, params: str, limit: Optional[int] = 100, order: Optional[ArtistOrderType] = None
) -> list[dict]:
"""
Get the full list of an artist's albums, singles or shows
Expand Down Expand Up @@ -836,34 +882,180 @@ def get_song_related(self, browseId: str):
sections = nav(response, ["contents", *SECTION_LIST])
return parse_mixed_content(sections)

def get_lyrics(self, browseId: str) -> dict:

@overload
def get_lyrics(self, browseId: str, timestamps: Literal[False] = False) -> Optional[Lyrics]:
"""
Returns lyrics of a song or video.
Returns lyrics of a song or video. When `timestamps` is set, lyrics are returned with
timestamps, if available.

:param browseId: Lyrics browse id obtained from `get_watch_playlist`
:return: Dictionary with song lyrics.
:param browseId: Lyrics browse-id obtained from :py:func:`get_watch_playlist` (startswith `MPLYt`).
:param timestamps: Whether to return bare lyrics or lyrics with timestamps, if available.
:return: Dictionary with song lyrics or `None`, if no lyrics are found.
The `hasTimestamps`-key determines the format of the data.

Example::

Example when `timestamps` is set to `False`, or not timestamps are available::

{
"lyrics": "Today is gonna be the day\\nThat they're gonna throw it back to you\\n",
"source": "Source: LyricFind"
}
{
"lyrics": "Today is gonna be the day\\nThat they're gonna throw it back to you\\n",
"source": "Source: LyricFind",
"hasTimestamps": False
}

Example when `timestamps` is set to `True` and timestamps are available::

{
"lyrics": [
LyricLine(
text="I was a liar",
start_time=9200,
end_time=10630,
id=1
),
LyricLine(
text="I gave in to the fire",
start_time=10680,
end_time=12540,
id=2
),
],
"source": "Source: LyricFind",
"hasTimestamps": True
}

"""
lyrics = {}

@overload
def get_lyrics(self, browseId: str, timestamps: Literal[True] = True) -> Optional[Lyrics|TimedLyrics]:
"""
Returns lyrics of a song or video. When `timestamps` is set, lyrics are returned with
sigma67 marked this conversation as resolved.
Show resolved Hide resolved
timestamps, if available.

:param browseId: Lyrics browse-id obtained from :py:func:`get_watch_playlist` (startswith `MPLYt`).
:param timestamps: Whether to return bare lyrics or lyrics with timestamps, if available.
:return: Dictionary with song lyrics or `None`, if no lyrics are found.
The `hasTimestamps`-key determines the format of the data.


Example when `timestamps` is set to `False`, or not timestamps are available::

{
"lyrics": "Today is gonna be the day\\nThat they're gonna throw it back to you\\n",
"source": "Source: LyricFind",
"hasTimestamps": False
}

Example when `timestamps` is set to `True` and timestamps are available::

{
"lyrics": [
LyricLine(
text="I was a liar",
start_time=9200,
end_time=10630,
id=1
),
LyricLine(
text="I gave in to the fire",
start_time=10680,
end_time=12540,
id=2
),
],
"source": "Source: LyricFind",
"hasTimestamps": True
}

"""

def get_lyrics(self, browseId: str, timestamps: bool = False) -> Optional[Lyrics|TimedLyrics]:
"""
Returns lyrics of a song or video. When `timestamps` is set, lyrics are returned with
timestamps, if available.

:param browseId: Lyrics browse-id obtained from :py:func:`get_watch_playlist` (startswith `MPLYt`).
:param timestamps: Whether to return bare lyrics or lyrics with timestamps, if available.
:return: Dictionary with song lyrics or `None`, if no lyrics are found.
The `hasTimestamps`-key determines the format of the data.


Example when `timestamps` is set to `False`, or not timestamps are available::

{
"lyrics": "Today is gonna be the day\\nThat they're gonna throw it back to you\\n",
"source": "Source: LyricFind",
"hasTimestamps": False
}

Example when `timestamps` is set to `True` and timestamps are available::

{
"lyrics": [
LyricLine(
text="I was a liar",
start_time=9200,
end_time=10630,
id=1
),
LyricLine(
text="I gave in to the fire",
start_time=10680,
end_time=12540,
id=2
),
],
"source": "Source: LyricFind",
"hasTimestamps": True
}

"""

lyrics: dict = {}
if not browseId:
raise YTMusicUserError("Invalid browseId provided. This song might not have lyrics.")
raise YTMusicUserError(
"Invalid browseId provided. This song might not have lyrics.")

if timestamps:
# change the client to get lyrics with timestamps (mobile only)
copied_context_client = self.context["context"]["client"].copy()
sigma67 marked this conversation as resolved.
Show resolved Hide resolved
self.context["context"]["client"].update({
"clientName": "ANDROID_MUSIC",
"clientVersion": "7.21.50"
})

response = self._send_request("browse", {"browseId": browseId})
lyrics["lyrics"] = nav(
response, ["contents", *SECTION_LIST_ITEM, *DESCRIPTION_SHELF, *DESCRIPTION], True
)
lyrics["source"] = nav(
response, ["contents", *SECTION_LIST_ITEM, *DESCRIPTION_SHELF, "footer", *RUN_TEXT], True
)

return lyrics
if timestamps:
# restore the old context
self.context["context"]["client"] = copied_context_client # type: ignore
sigma67 marked this conversation as resolved.
Show resolved Hide resolved

# unpack the response

# we got lyrics with timestamps
if timestamps and (data := nav(response, TIMESTAMPED_LYRICS, True)) is not None:
assert isinstance(data, dict)

if not "timedLyricsData" in data:
return None

lyrics["lyrics"] = list(map(LyricLine.from_raw, data["timedLyricsData"]))
lyrics["source"] = data.get("sourceMessage")
lyrics["hasTimestamps"] = True
sigma67 marked this conversation as resolved.
Show resolved Hide resolved
else:
lyrics["lyrics"] = nav(
response, ["contents", *SECTION_LIST_ITEM, *DESCRIPTION_SHELF, *DESCRIPTION], True
)

if lyrics["lyrics"] is None:
return None

lyrics["source"] = nav(
response, ["contents", *SECTION_LIST_ITEM, *DESCRIPTION_SHELF, "footer", *RUN_TEXT], True
)
lyrics["hasTimestamps"] = False

return cast(Lyrics | TimedLyrics, lyrics)

def get_basejs_url(self):
"""
Expand All @@ -876,7 +1068,7 @@ def get_basejs_url(self):
if match is None:
raise YTMusicError("Could not identify the URL for base.js player.")

return YTM_DOMAIN + match.group(1)
return cast(str, YTM_DOMAIN + match.group(1))
sigma67 marked this conversation as resolved.
Show resolved Hide resolved

def get_signatureTimestamp(self, url: Optional[str] = None) -> int:
"""
Expand Down
Loading
Loading