-
Notifications
You must be signed in to change notification settings - Fork 216
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added Lyrics w. Timestamps #662
Changes from 6 commits
4d4a76f
14313be
0885616
28a8828
ab032f5
049b0b0
3198b1f
cfd9a4a
0c2444c
a8a1f5f
b546eed
2e1e89f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,14 +2,16 @@ | |
|
||
from ytmusicapi.setup import setup, setup_oauth | ||
from ytmusicapi.ytmusic import YTMusic | ||
from .mixins.browsing import Lyrics, TimedLyrics, LyricLine | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please undo the changes to this file. They are not needed and I don't want to start exporting everything from root There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was thinking similar, but then you'll need to create a submodule for these classes to be accessible There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can already access them: It's similar for https://github.com/sigma67/ytmusicapi/blob/main/ytmusicapi/parsers/podcasts.py Although with #621 in mind, it might be wise to move them to a |
||
|
||
try: | ||
__version__ = version("ytmusicapi") | ||
except PackageNotFoundError: | ||
# package is not installed | ||
pass | ||
|
||
__copyright__ = "Copyright 2023 sigma67" | ||
__copyright__ = "Copyright 2024 sigma67" | ||
__license__ = "MIT" | ||
__title__ = "ytmusicapi" | ||
__all__ = ["YTMusic", "setup_oauth", "setup"] | ||
__all__ = ["YTMusic", "setup_oauth", "setup", | ||
"Lyrics", "TimedLyrics", "LyricLine"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,13 @@ | ||
import re | ||
from datetime import date | ||
from typing import Literal | ||
|
||
from ytmusicapi.exceptions import YTMusicUserError | ||
|
||
|
||
OrderType = Literal['a_to_z', 'z_to_a', 'recently_added'] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a good change in general, but the name is too broad. Since you introduced an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes |
||
|
||
|
||
def prepare_like_endpoint(rating): | ||
if rating == "LIKE": | ||
return "like/like" | ||
|
@@ -24,7 +28,7 @@ def validate_order_parameter(order): | |
) | ||
|
||
|
||
def prepare_order_params(order): | ||
def prepare_order_params(order: OrderType): | ||
orders = ["a_to_z", "z_to_a", "recently_added"] | ||
if order is not None: | ||
# determine order_params via `.contents.singleColumnBrowseResultsRenderer.tabs[0].tabRenderer.content.sectionListRenderer.contents[1].itemSectionRenderer.header.itemSectionTabbedHeaderRenderer.endItems[1].dropdownRenderer.entries[].dropdownItemRenderer.onSelectCommand.browseEndpoint.params` of `/youtubei/v1/browse` response | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
from dataclasses import dataclass | ||
import re | ||
import warnings | ||
from typing import Any, Optional | ||
from typing import Any, Optional, TypedDict, cast | ||
|
||
from ytmusicapi.continuations import ( | ||
get_continuations, | ||
|
@@ -24,6 +25,49 @@ | |
from ._utils import get_datestamp | ||
|
||
|
||
@dataclass | ||
class LyricLine: | ||
"""Represents a line of lyrics with timestamps (in milliseconds). | ||
|
||
Args: | ||
text (str): The Songtext. | ||
start_time (int): Begin of the lyric in milliseconds. | ||
end_time (int): End of the lyric in milliseconds. | ||
id (int): A Metadata-Id that probably uniquely identifies each lyric line. | ||
""" | ||
text: str | ||
start_time: int | ||
end_time: int | ||
id: int | ||
|
||
@classmethod | ||
def from_raw(cls, raw_lyric: dict): | ||
""" | ||
Converts lyrics in the format from the api to a more reasonable format | ||
|
||
:param raw_lyric: The raw lyric-data returned by the mobile api. | ||
:return LyricLine: A `LyricLine` | ||
""" | ||
text = raw_lyric["lyricLine"] | ||
cue_range = raw_lyric["cueRange"] | ||
start_time = int(cue_range["startTimeMilliseconds"]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could this break if the cueRange values are not valid ints? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure, but it would also break if there was no cueRange or any other keys |
||
end_time = int(cue_range["endTimeMilliseconds"]) | ||
id = int(cue_range["metadata"]["id"]) | ||
return cls(text, start_time, end_time, id) | ||
|
||
|
||
class Lyrics(TypedDict): | ||
lyrics: str | ||
source: Optional[str] | ||
hasTimestamps: Literal[False] | ||
|
||
|
||
class TimedLyrics(TypedDict): | ||
lyrics: list[LyricLine] | ||
source: Optional[str] | ||
hasTimestamps: Literal[True] | ||
|
||
|
||
class BrowsingMixin(MixinProtocol): | ||
def get_home(self, limit=3) -> list[dict]: | ||
""" | ||
|
@@ -271,13 +315,15 @@ def get_artist(self, channelId: str) -> dict: | |
musicShelf = nav(results[0], MUSIC_SHELF) | ||
if "navigationEndpoint" in nav(musicShelf, TITLE): | ||
artist["songs"]["browseId"] = nav(musicShelf, TITLE + NAVIGATION_BROWSE_ID) | ||
artist["songs"]["results"] = parse_playlist_items(musicShelf["contents"]) | ||
artist["songs"]["results"] = parse_playlist_items(musicShelf["contents"]) # type: ignore | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why type ignore? please remove There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pylance believes you're acessing a |
||
|
||
artist.update(self.parser.parse_channel_contents(results)) | ||
return artist | ||
|
||
ArtistOrderType = Literal['Recency', 'Popularity', 'Alphabetical order'] | ||
|
||
def get_artist_albums( | ||
self, channelId: str, params: str, limit: Optional[int] = 100, order: Optional[str] = None | ||
self, channelId: str, params: str, limit: Optional[int] = 100, order: Optional[ArtistOrderType] = None | ||
) -> list[dict]: | ||
""" | ||
Get the full list of an artist's albums, singles or shows | ||
|
@@ -836,34 +882,180 @@ def get_song_related(self, browseId: str): | |
sections = nav(response, ["contents", *SECTION_LIST]) | ||
return parse_mixed_content(sections) | ||
|
||
def get_lyrics(self, browseId: str) -> dict: | ||
|
||
@overload | ||
def get_lyrics(self, browseId: str, timestamps: Literal[False] = False) -> Optional[Lyrics]: | ||
""" | ||
Returns lyrics of a song or video. | ||
Returns lyrics of a song or video. When `timestamps` is set, lyrics are returned with | ||
timestamps, if available. | ||
|
||
:param browseId: Lyrics browse id obtained from `get_watch_playlist` | ||
:return: Dictionary with song lyrics. | ||
:param browseId: Lyrics browse-id obtained from :py:func:`get_watch_playlist` (startswith `MPLYt`). | ||
:param timestamps: Whether to return bare lyrics or lyrics with timestamps, if available. | ||
:return: Dictionary with song lyrics or `None`, if no lyrics are found. | ||
The `hasTimestamps`-key determines the format of the data. | ||
|
||
Example:: | ||
|
||
Example when `timestamps` is set to `False`, or not timestamps are available:: | ||
|
||
{ | ||
"lyrics": "Today is gonna be the day\\nThat they're gonna throw it back to you\\n", | ||
"source": "Source: LyricFind" | ||
} | ||
{ | ||
"lyrics": "Today is gonna be the day\\nThat they're gonna throw it back to you\\n", | ||
"source": "Source: LyricFind", | ||
"hasTimestamps": False | ||
} | ||
|
||
Example when `timestamps` is set to `True` and timestamps are available:: | ||
|
||
{ | ||
"lyrics": [ | ||
LyricLine( | ||
text="I was a liar", | ||
start_time=9200, | ||
end_time=10630, | ||
id=1 | ||
), | ||
LyricLine( | ||
text="I gave in to the fire", | ||
start_time=10680, | ||
end_time=12540, | ||
id=2 | ||
), | ||
], | ||
"source": "Source: LyricFind", | ||
"hasTimestamps": True | ||
} | ||
|
||
""" | ||
lyrics = {} | ||
|
||
@overload | ||
def get_lyrics(self, browseId: str, timestamps: Literal[True] = True) -> Optional[Lyrics|TimedLyrics]: | ||
""" | ||
Returns lyrics of a song or video. When `timestamps` is set, lyrics are returned with | ||
sigma67 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
timestamps, if available. | ||
|
||
:param browseId: Lyrics browse-id obtained from :py:func:`get_watch_playlist` (startswith `MPLYt`). | ||
:param timestamps: Whether to return bare lyrics or lyrics with timestamps, if available. | ||
:return: Dictionary with song lyrics or `None`, if no lyrics are found. | ||
The `hasTimestamps`-key determines the format of the data. | ||
|
||
|
||
Example when `timestamps` is set to `False`, or not timestamps are available:: | ||
|
||
{ | ||
"lyrics": "Today is gonna be the day\\nThat they're gonna throw it back to you\\n", | ||
"source": "Source: LyricFind", | ||
"hasTimestamps": False | ||
} | ||
|
||
Example when `timestamps` is set to `True` and timestamps are available:: | ||
|
||
{ | ||
"lyrics": [ | ||
LyricLine( | ||
text="I was a liar", | ||
start_time=9200, | ||
end_time=10630, | ||
id=1 | ||
), | ||
LyricLine( | ||
text="I gave in to the fire", | ||
start_time=10680, | ||
end_time=12540, | ||
id=2 | ||
), | ||
], | ||
"source": "Source: LyricFind", | ||
"hasTimestamps": True | ||
} | ||
|
||
""" | ||
|
||
def get_lyrics(self, browseId: str, timestamps: bool = False) -> Optional[Lyrics|TimedLyrics]: | ||
""" | ||
Returns lyrics of a song or video. When `timestamps` is set, lyrics are returned with | ||
timestamps, if available. | ||
|
||
:param browseId: Lyrics browse-id obtained from :py:func:`get_watch_playlist` (startswith `MPLYt`). | ||
:param timestamps: Whether to return bare lyrics or lyrics with timestamps, if available. | ||
:return: Dictionary with song lyrics or `None`, if no lyrics are found. | ||
The `hasTimestamps`-key determines the format of the data. | ||
|
||
|
||
Example when `timestamps` is set to `False`, or not timestamps are available:: | ||
|
||
{ | ||
"lyrics": "Today is gonna be the day\\nThat they're gonna throw it back to you\\n", | ||
"source": "Source: LyricFind", | ||
"hasTimestamps": False | ||
} | ||
|
||
Example when `timestamps` is set to `True` and timestamps are available:: | ||
|
||
{ | ||
"lyrics": [ | ||
LyricLine( | ||
text="I was a liar", | ||
start_time=9200, | ||
end_time=10630, | ||
id=1 | ||
), | ||
LyricLine( | ||
text="I gave in to the fire", | ||
start_time=10680, | ||
end_time=12540, | ||
id=2 | ||
), | ||
], | ||
"source": "Source: LyricFind", | ||
"hasTimestamps": True | ||
} | ||
|
||
""" | ||
|
||
lyrics: dict = {} | ||
if not browseId: | ||
raise YTMusicUserError("Invalid browseId provided. This song might not have lyrics.") | ||
raise YTMusicUserError( | ||
"Invalid browseId provided. This song might not have lyrics.") | ||
|
||
if timestamps: | ||
# change the client to get lyrics with timestamps (mobile only) | ||
copied_context_client = self.context["context"]["client"].copy() | ||
sigma67 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self.context["context"]["client"].update({ | ||
"clientName": "ANDROID_MUSIC", | ||
"clientVersion": "7.21.50" | ||
}) | ||
|
||
response = self._send_request("browse", {"browseId": browseId}) | ||
lyrics["lyrics"] = nav( | ||
response, ["contents", *SECTION_LIST_ITEM, *DESCRIPTION_SHELF, *DESCRIPTION], True | ||
) | ||
lyrics["source"] = nav( | ||
response, ["contents", *SECTION_LIST_ITEM, *DESCRIPTION_SHELF, "footer", *RUN_TEXT], True | ||
) | ||
|
||
return lyrics | ||
if timestamps: | ||
# restore the old context | ||
self.context["context"]["client"] = copied_context_client # type: ignore | ||
sigma67 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# unpack the response | ||
|
||
# we got lyrics with timestamps | ||
if timestamps and (data := nav(response, TIMESTAMPED_LYRICS, True)) is not None: | ||
assert isinstance(data, dict) | ||
|
||
if not "timedLyricsData" in data: | ||
return None | ||
|
||
lyrics["lyrics"] = list(map(LyricLine.from_raw, data["timedLyricsData"])) | ||
lyrics["source"] = data.get("sourceMessage") | ||
lyrics["hasTimestamps"] = True | ||
sigma67 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
else: | ||
lyrics["lyrics"] = nav( | ||
response, ["contents", *SECTION_LIST_ITEM, *DESCRIPTION_SHELF, *DESCRIPTION], True | ||
) | ||
|
||
if lyrics["lyrics"] is None: | ||
return None | ||
|
||
lyrics["source"] = nav( | ||
response, ["contents", *SECTION_LIST_ITEM, *DESCRIPTION_SHELF, "footer", *RUN_TEXT], True | ||
) | ||
lyrics["hasTimestamps"] = False | ||
|
||
return cast(Lyrics | TimedLyrics, lyrics) | ||
|
||
def get_basejs_url(self): | ||
""" | ||
|
@@ -876,7 +1068,7 @@ def get_basejs_url(self): | |
if match is None: | ||
raise YTMusicError("Could not identify the URL for base.js player.") | ||
|
||
return YTM_DOMAIN + match.group(1) | ||
return cast(str, YTM_DOMAIN + match.group(1)) | ||
sigma67 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def get_signatureTimestamp(self, url: Optional[str] = None) -> int: | ||
""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure how useful these asserts are. Better to verify the length of the text or the time being greater than a specific value.
I.e., verify the usefulness of the actual values, not just the type
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The text could easily be empty, if there was a pause. One character lines are quite common at the beginning of a song (there's that
note character
)