Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: extract request-id from response headers #479

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/elevenlabs/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import httpx

from typing import Iterator, Optional, Union, \
Optional, AsyncIterator
Optional, AsyncIterator, Tuple

from .base_client import \
BaseElevenLabs, AsyncBaseElevenLabs
Expand Down Expand Up @@ -130,7 +130,7 @@ def generate(
typing.Sequence[PronunciationDictionaryVersionLocator]
] = OMIT,
request_options: typing.Optional[RequestOptions] = None
) -> Iterator[bytes]:
) -> Tuple[str, Iterator[bytes]]:
"""
- text: Union[str, Iterator[str]]. The string or stream of strings that will get converted into speech.

Expand Down Expand Up @@ -310,7 +310,7 @@ async def generate(
typing.Sequence[PronunciationDictionaryVersionLocator]
] = OMIT,
request_options: typing.Optional[RequestOptions] = None
) -> AsyncIterator[bytes]:
) -> Tuple[str, AsyncIterator[bytes]]:
"""
This is a manually mnaintained helper function that generates a
voice from provided text.
Expand Down Expand Up @@ -383,7 +383,7 @@ async def generate(
model_id = model.model_id

if stream:
return self.text_to_speech.convert_as_stream(
return await self.text_to_speech.convert_as_stream(
voice_id=voice_id,
model_id=model_id,
voice_settings=voice_settings,
Expand All @@ -396,7 +396,7 @@ async def generate(
else:
if not isinstance(text, str):
raise ApiError(body="Text must be a string when stream is False.")
return self.text_to_speech.convert(
return await self.text_to_speech.convert(
voice_id=voice_id,
model_id=model_id,
voice_settings=voice_settings,
Expand Down
193 changes: 132 additions & 61 deletions src/elevenlabs/text_to_speech/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from .types.text_to_speech_stream_with_timestamps_response import TextToSpeechStreamWithTimestampsResponse
import json
from ..core.client_wrapper import AsyncClientWrapper
from typing import Tuple

# this is used as the default value for optional parameters
OMIT = typing.cast(typing.Any, ...)
Expand Down Expand Up @@ -61,9 +62,9 @@ def convert(
BodyTextToSpeechV1TextToSpeechVoiceIdPostApplyTextNormalization
] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> typing.Iterator[bytes]:
) -> Tuple[str, typing.Iterator[bytes]]:
"""
Converts text into speech using a voice of your choice and returns audio.
Converts text into speech using a voice of your choice and returns the request ID and audio stream.

Parameters
----------
Expand Down Expand Up @@ -126,9 +127,11 @@ def convert(
Request-specific configuration. You can pass in configuration such as `chunk_size`, and more to customize the request and response.

Yields
------
typing.Iterator[bytes]
Successful Response
-------
tuple[str, typing.Iterator[bytes]]
A tuple containing:
- request_id: The ID of the request
- audio_stream: Iterator of audio bytes chunks

Examples
--------
Expand Down Expand Up @@ -180,10 +183,20 @@ def convert(
) as _response:
try:
if 200 <= _response.status_code < 300:
_chunk_size = request_options.get("chunk_size", 1024) if request_options is not None else 1024
for _chunk in _response.iter_bytes(chunk_size=_chunk_size):
yield _chunk
return
request_id = _response.headers.get('request-id')
if not request_id:
raise ApiError(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How likely is this to happen?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

impossible because we set it if not provided by the user

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i can just leave it as optional alternatively ?

status_code=_response.status_code,
body="Missing request-id in response headers."
)

def audio_iterator():
_chunk_size = request_options.get("chunk_size", 1024) if request_options is not None else 1024
for _chunk in _response.iter_bytes(chunk_size=_chunk_size):
yield _chunk

return request_id, audio_iterator()

_response.read()
if _response.status_code == 422:
raise UnprocessableEntityError(
Expand Down Expand Up @@ -224,7 +237,7 @@ def convert_with_timestamps(
BodyTextToSpeechWithTimestampsV1TextToSpeechVoiceIdWithTimestampsPostApplyTextNormalization
] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> typing.Optional[typing.Any]:
) -> Tuple[str, typing.Optional[typing.Any]]:
"""
Converts text into speech using a voice of your choice and returns JSON containing audio as a base64 encoded string together with information on when which character was spoken.

Expand Down Expand Up @@ -389,7 +402,7 @@ def convert_as_stream(
BodyTextToSpeechStreamingV1TextToSpeechVoiceIdStreamPostApplyTextNormalization
] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> typing.Iterator[bytes]:
) -> Tuple[str, typing.Iterator[bytes]]:
"""
Converts text into speech using a voice of your choice and returns audio as an audio stream.

Expand Down Expand Up @@ -508,10 +521,20 @@ def convert_as_stream(
) as _response:
try:
if 200 <= _response.status_code < 300:
_chunk_size = request_options.get("chunk_size", 1024) if request_options is not None else 1024
for _chunk in _response.iter_bytes(chunk_size=_chunk_size):
yield _chunk
return
request_id = _response.headers.get('request-id')
if not request_id:
raise ApiError(
status_code=_response.status_code,
body="Missing request-id in response headers."
)

def audio_iterator():
_chunk_size = request_options.get("chunk_size", 1024) if request_options is not None else 1024
for _chunk in _response.iter_bytes(chunk_size=_chunk_size):
yield _chunk

return request_id, audio_iterator()

_response.read()
if _response.status_code == 422:
raise UnprocessableEntityError(
Expand Down Expand Up @@ -552,7 +575,7 @@ def stream_with_timestamps(
BodyTextToSpeechStreamingWithTimestampsV1TextToSpeechVoiceIdStreamWithTimestampsPostApplyTextNormalization
] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> typing.Iterator[TextToSpeechStreamWithTimestampsResponse]:
) -> Tuple[str, typing.Iterator[TextToSpeechStreamWithTimestampsResponse]]:
"""
Converts text into speech using a voice of your choice and returns a stream of JSONs containing audio as a base64 encoded string together with information on when which character was spoken.

Expand Down Expand Up @@ -673,20 +696,30 @@ def stream_with_timestamps(
) as _response:
try:
if 200 <= _response.status_code < 300:
for _text in _response.iter_lines():
try:
if len(_text) == 0:
continue
yield typing.cast(
TextToSpeechStreamWithTimestampsResponse,
construct_type(
type_=TextToSpeechStreamWithTimestampsResponse, # type: ignore
object_=json.loads(_text),
),
)
except:
pass
return
request_id = _response.headers.get('request-id')
if not request_id:
raise ApiError(
status_code=_response.status_code,
body="Missing request-id in response headers."
)

def response_iterator():
for _text in _response.iter_lines():
try:
if len(_text) == 0:
continue
yield typing.cast(
TextToSpeechStreamWithTimestampsResponse,
construct_type(
type_=TextToSpeechStreamWithTimestampsResponse, # type: ignore
object_=json.loads(_text),
),
)
except:
pass

return request_id, response_iterator()

_response.read()
if _response.status_code == 422:
raise UnprocessableEntityError(
Expand Down Expand Up @@ -732,9 +765,9 @@ async def convert(
BodyTextToSpeechV1TextToSpeechVoiceIdPostApplyTextNormalization
] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> typing.AsyncIterator[bytes]:
) -> Tuple[str, typing.AsyncIterator[bytes]]:
"""
Converts text into speech using a voice of your choice and returns audio.
Converts text into speech using a voice of your choice and returns the request ID and audio stream.

Parameters
----------
Expand Down Expand Up @@ -798,9 +831,11 @@ async def convert(

Yields
------
typing.AsyncIterator[bytes]
Successful Response

tuple[str, typing.AsyncIterator[bytes]]
A tuple containing:
- request_id: The ID of the request
- audio_stream: Iterator of audio bytes chunks

Examples
--------
import asyncio
Expand Down Expand Up @@ -859,10 +894,20 @@ async def main() -> None:
) as _response:
try:
if 200 <= _response.status_code < 300:
_chunk_size = request_options.get("chunk_size", 1024) if request_options is not None else 1024
async for _chunk in _response.aiter_bytes(chunk_size=_chunk_size):
yield _chunk
return
request_id = _response.headers.get('request-id')
if not request_id:
raise ApiError(
status_code=_response.status_code,
body="Missing request-id in response headers."
)

async def audio_iterator():
_chunk_size = request_options.get("chunk_size", 1024) if request_options is not None else 1024
async for _chunk in _response.aiter_bytes(chunk_size=_chunk_size):
yield _chunk

return request_id, audio_iterator()

await _response.aread()
if _response.status_code == 422:
raise UnprocessableEntityError(
Expand Down Expand Up @@ -903,7 +948,7 @@ async def convert_with_timestamps(
BodyTextToSpeechWithTimestampsV1TextToSpeechVoiceIdWithTimestampsPostApplyTextNormalization
] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> typing.Optional[typing.Any]:
) -> Tuple[str, typing.Optional[typing.Any]]:
"""
Converts text into speech using a voice of your choice and returns JSON containing audio as a base64 encoded string together with information on when which character was spoken.

Expand Down Expand Up @@ -1030,7 +1075,14 @@ async def main() -> None:
)
try:
if 200 <= _response.status_code < 300:
return typing.cast(
request_id = _response.headers.get('request-id')
if not request_id:
raise ApiError(
status_code=_response.status_code,
body="Missing request-id in response headers."
)

return request_id, typing.cast(
typing.Optional[typing.Any],
construct_type(
type_=typing.Optional[typing.Any], # type: ignore
Expand Down Expand Up @@ -1076,7 +1128,7 @@ async def convert_as_stream(
BodyTextToSpeechStreamingV1TextToSpeechVoiceIdStreamPostApplyTextNormalization
] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> typing.AsyncIterator[bytes]:
) -> Tuple[str, typing.AsyncIterator[bytes]]:
"""
Converts text into speech using a voice of your choice and returns audio as an audio stream.

Expand Down Expand Up @@ -1203,10 +1255,20 @@ async def main() -> None:
) as _response:
try:
if 200 <= _response.status_code < 300:
_chunk_size = request_options.get("chunk_size", 1024) if request_options is not None else 1024
async for _chunk in _response.aiter_bytes(chunk_size=_chunk_size):
yield _chunk
return
request_id = _response.headers.get('request-id')
if not request_id:
raise ApiError(
status_code=_response.status_code,
body="Missing request-id in response headers."
)

async def audio_iterator():
_chunk_size = request_options.get("chunk_size", 1024) if request_options is not None else 1024
async for _chunk in _response.aiter_bytes(chunk_size=_chunk_size):
yield _chunk

return request_id, audio_iterator()

await _response.aread()
if _response.status_code == 422:
raise UnprocessableEntityError(
Expand Down Expand Up @@ -1247,7 +1309,7 @@ async def stream_with_timestamps(
BodyTextToSpeechStreamingWithTimestampsV1TextToSpeechVoiceIdStreamWithTimestampsPostApplyTextNormalization
] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> typing.AsyncIterator[TextToSpeechStreamWithTimestampsResponse]:
) -> Tuple[str,typing.AsyncIterator[TextToSpeechStreamWithTimestampsResponse]]:
"""
Converts text into speech using a voice of your choice and returns a stream of JSONs containing audio as a base64 encoded string together with information on when which character was spoken.

Expand Down Expand Up @@ -1376,20 +1438,29 @@ async def main() -> None:
) as _response:
try:
if 200 <= _response.status_code < 300:
async for _text in _response.aiter_lines():
try:
if len(_text) == 0:
continue
yield typing.cast(
TextToSpeechStreamWithTimestampsResponse,
construct_type(
type_=TextToSpeechStreamWithTimestampsResponse, # type: ignore
object_=json.loads(_text),
),
)
except:
pass
return
request_id = _response.headers.get('request-id')
if not request_id:
raise ApiError(
status_code=_response.status_code,
body="Missing request-id in response headers."
)

async def response_iterator():
async for _text in _response.aiter_lines():
try:
if len(_text) == 0:
continue
yield typing.cast(
TextToSpeechStreamWithTimestampsResponse,
construct_type(
type_=TextToSpeechStreamWithTimestampsResponse, # type: ignore
object_=json.loads(_text),
),
)
except:
pass

return request_id, response_iterator()
await _response.aread()
if _response.status_code == 422:
raise UnprocessableEntityError(
Expand Down
Loading
Loading