elevenlabs · AngeloGiacco · Feb 10, 2025 · Feb 10, 2025 · Feb 10, 2025 · Feb 10, 2025
diff --git a/src/elevenlabs/client.py b/src/elevenlabs/client.py
@@ -5,7 +5,7 @@
 import httpx
 
 from typing import Iterator, Optional, Union, \
-  Optional, AsyncIterator
+  Optional, AsyncIterator, Tuple
 
 from .base_client import \
   BaseElevenLabs, AsyncBaseElevenLabs
@@ -130,7 +130,7 @@ def generate(
             typing.Sequence[PronunciationDictionaryVersionLocator]
         ] = OMIT,
       request_options: typing.Optional[RequestOptions] = None
-    ) -> Iterator[bytes]:
+    ) -> Tuple[str, Iterator[bytes]]:
         """
             - text: Union[str, Iterator[str]]. The string or stream of strings that will get converted into speech.
 
@@ -310,7 +310,7 @@ async def generate(
             typing.Sequence[PronunciationDictionaryVersionLocator]
         ] = OMIT,
       request_options: typing.Optional[RequestOptions] = None
-    ) -> AsyncIterator[bytes]:
+    ) -> Tuple[str, AsyncIterator[bytes]]:
         """
           This is a manually mnaintained helper function that generates a 
           voice from provided text.
@@ -383,7 +383,7 @@ async def generate(
             model_id = model.model_id
 
         if stream:
-            return self.text_to_speech.convert_as_stream(
+            return await self.text_to_speech.convert_as_stream(
                 voice_id=voice_id,
                 model_id=model_id,
                 voice_settings=voice_settings,
@@ -396,7 +396,7 @@ async def generate(
         else:
             if not isinstance(text, str):
                 raise ApiError(body="Text must be a string when stream is False.")
-            return self.text_to_speech.convert(
+            return await self.text_to_speech.convert(
                 voice_id=voice_id,
                 model_id=model_id,
                 voice_settings=voice_settings,

diff --git a/src/elevenlabs/text_to_speech/client.py b/src/elevenlabs/text_to_speech/client.py
@@ -28,6 +28,7 @@
 from .types.text_to_speech_stream_with_timestamps_response import TextToSpeechStreamWithTimestampsResponse
 import json
 from ..core.client_wrapper import AsyncClientWrapper
+from typing import Tuple
 
 # this is used as the default value for optional parameters
 OMIT = typing.cast(typing.Any, ...)
@@ -61,9 +62,9 @@ def convert(
             BodyTextToSpeechV1TextToSpeechVoiceIdPostApplyTextNormalization
         ] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> typing.Iterator[bytes]:
+    ) -> Tuple[str, typing.Iterator[bytes]]:
         """
-        Converts text into speech using a voice of your choice and returns audio.
+        Converts text into speech using a voice of your choice and returns the request ID and audio stream.
 
         Parameters
         ----------
@@ -126,9 +127,11 @@ def convert(
             Request-specific configuration. You can pass in configuration such as `chunk_size`, and more to customize the request and response.
 
         Yields
-        ------
-        typing.Iterator[bytes]
-            Successful Response
+        -------
+        tuple[str, typing.Iterator[bytes]]
+            A tuple containing:
+            - request_id: The ID of the request
+            - audio_stream: Iterator of audio bytes chunks
 
         Examples
         --------
@@ -180,10 +183,20 @@ def convert(
         ) as _response:
             try:
                 if 200 <= _response.status_code < 300:
-                    _chunk_size = request_options.get("chunk_size", 1024) if request_options is not None else 1024
-                    for _chunk in _response.iter_bytes(chunk_size=_chunk_size):
-                        yield _chunk
-                    return
+                    request_id = _response.headers.get('request-id')
+                    if not request_id:
+                        raise ApiError(
+                            status_code=_response.status_code,
+                            body="Missing request-id in response headers."
+                        )
+
+                    def audio_iterator():
+                        _chunk_size = request_options.get("chunk_size", 1024) if request_options is not None else 1024
+                        for _chunk in _response.iter_bytes(chunk_size=_chunk_size):
+                            yield _chunk
+
+                    return request_id, audio_iterator()
+
                 _response.read()
                 if _response.status_code == 422:
                     raise UnprocessableEntityError(
@@ -224,7 +237,7 @@ def convert_with_timestamps(
             BodyTextToSpeechWithTimestampsV1TextToSpeechVoiceIdWithTimestampsPostApplyTextNormalization
         ] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> typing.Optional[typing.Any]:
+    ) -> Tuple[str, typing.Optional[typing.Any]]:
         """
         Converts text into speech using a voice of your choice and returns JSON containing audio as a base64 encoded string together with information on when which character was spoken.
 
@@ -389,7 +402,7 @@ def convert_as_stream(
             BodyTextToSpeechStreamingV1TextToSpeechVoiceIdStreamPostApplyTextNormalization
         ] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> typing.Iterator[bytes]:
+    ) -> Tuple[str, typing.Iterator[bytes]]:
         """
         Converts text into speech using a voice of your choice and returns audio as an audio stream.
 
@@ -508,10 +521,20 @@ def convert_as_stream(
         ) as _response:
             try:
                 if 200 <= _response.status_code < 300:
-                    _chunk_size = request_options.get("chunk_size", 1024) if request_options is not None else 1024
-                    for _chunk in _response.iter_bytes(chunk_size=_chunk_size):
-                        yield _chunk
-                    return
+                    request_id = _response.headers.get('request-id')
+                    if not request_id:
+                        raise ApiError(
+                            status_code=_response.status_code,
+                            body="Missing request-id in response headers."
+                        )
+
+                    def audio_iterator():
+                        _chunk_size = request_options.get("chunk_size", 1024) if request_options is not None else 1024
+                        for _chunk in _response.iter_bytes(chunk_size=_chunk_size):
+                            yield _chunk
+
+                    return request_id, audio_iterator()
+
                 _response.read()
                 if _response.status_code == 422:
                     raise UnprocessableEntityError(
@@ -552,7 +575,7 @@ def stream_with_timestamps(
             BodyTextToSpeechStreamingWithTimestampsV1TextToSpeechVoiceIdStreamWithTimestampsPostApplyTextNormalization
         ] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> typing.Iterator[TextToSpeechStreamWithTimestampsResponse]:
+    ) -> Tuple[str, typing.Iterator[TextToSpeechStreamWithTimestampsResponse]]:
         """
         Converts text into speech using a voice of your choice and returns a stream of JSONs containing audio as a base64 encoded string together with information on when which character was spoken.
 
@@ -673,20 +696,30 @@ def stream_with_timestamps(
         ) as _response:
             try:
                 if 200 <= _response.status_code < 300:
-                    for _text in _response.iter_lines():
-                        try:
-                            if len(_text) == 0:
-                                continue
-                            yield typing.cast(
-                                TextToSpeechStreamWithTimestampsResponse,
-                                construct_type(
-                                    type_=TextToSpeechStreamWithTimestampsResponse,  # type: ignore
-                                    object_=json.loads(_text),
-                                ),
-                            )
-                        except:
-                            pass
-                    return
+                    request_id = _response.headers.get('request-id')
+                    if not request_id:
+                        raise ApiError(
+                            status_code=_response.status_code,
+                            body="Missing request-id in response headers."
+                        )
+
+                    def response_iterator():
+                        for _text in _response.iter_lines():
+                            try:
+                                if len(_text) == 0:
+                                    continue
+                                yield typing.cast(
+                                    TextToSpeechStreamWithTimestampsResponse,
+                                    construct_type(
+                                        type_=TextToSpeechStreamWithTimestampsResponse,  # type: ignore
+                                        object_=json.loads(_text),
+                                    ),
+                                )
+                            except:
+                                pass
+
+                    return request_id, response_iterator()
+
                 _response.read()
                 if _response.status_code == 422:
                     raise UnprocessableEntityError(
@@ -732,9 +765,9 @@ async def convert(
             BodyTextToSpeechV1TextToSpeechVoiceIdPostApplyTextNormalization
         ] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> typing.AsyncIterator[bytes]:
+    ) -> Tuple[str, typing.AsyncIterator[bytes]]:
         """
-        Converts text into speech using a voice of your choice and returns audio.
+        Converts text into speech using a voice of your choice and returns the request ID and audio stream.
 
         Parameters
         ----------
@@ -798,9 +831,11 @@ async def convert(
 
         Yields
         ------
-        typing.AsyncIterator[bytes]
-            Successful Response
-
+        tuple[str, typing.AsyncIterator[bytes]]
+            A tuple containing:
+            - request_id: The ID of the request
+            - audio_stream: Iterator of audio bytes chunks
+
         Examples
         --------
         import asyncio
@@ -859,10 +894,20 @@ async def main() -> None:
         ) as _response:
             try:
                 if 200 <= _response.status_code < 300:
-                    _chunk_size = request_options.get("chunk_size", 1024) if request_options is not None else 1024
-                    async for _chunk in _response.aiter_bytes(chunk_size=_chunk_size):
-                        yield _chunk
-                    return
+                    request_id = _response.headers.get('request-id')
+                    if not request_id:
+                        raise ApiError(
+                            status_code=_response.status_code,
+                            body="Missing request-id in response headers."
+                        )
+
+                    async def audio_iterator():
+                        _chunk_size = request_options.get("chunk_size", 1024) if request_options is not None else 1024
+                        async for _chunk in _response.aiter_bytes(chunk_size=_chunk_size):
+                            yield _chunk
+
+                    return request_id, audio_iterator()
+
                 await _response.aread()
                 if _response.status_code == 422:
                     raise UnprocessableEntityError(
@@ -903,7 +948,7 @@ async def convert_with_timestamps(
             BodyTextToSpeechWithTimestampsV1TextToSpeechVoiceIdWithTimestampsPostApplyTextNormalization
         ] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> typing.Optional[typing.Any]:
+    ) -> Tuple[str, typing.Optional[typing.Any]]:
         """
         Converts text into speech using a voice of your choice and returns JSON containing audio as a base64 encoded string together with information on when which character was spoken.
 
@@ -1030,7 +1075,14 @@ async def main() -> None:
         )
         try:
             if 200 <= _response.status_code < 300:
-                return typing.cast(
+                request_id = _response.headers.get('request-id')
+                if not request_id:
+                    raise ApiError(
+                        status_code=_response.status_code,
+                        body="Missing request-id in response headers."
+                    )
+
+                return request_id, typing.cast(
                     typing.Optional[typing.Any],
                     construct_type(
                         type_=typing.Optional[typing.Any],  # type: ignore
@@ -1076,7 +1128,7 @@ async def convert_as_stream(
             BodyTextToSpeechStreamingV1TextToSpeechVoiceIdStreamPostApplyTextNormalization
         ] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> typing.AsyncIterator[bytes]:
+    ) -> Tuple[str, typing.AsyncIterator[bytes]]:
         """
         Converts text into speech using a voice of your choice and returns audio as an audio stream.
 
@@ -1203,10 +1255,20 @@ async def main() -> None:
         ) as _response:
             try:
                 if 200 <= _response.status_code < 300:
-                    _chunk_size = request_options.get("chunk_size", 1024) if request_options is not None else 1024
-                    async for _chunk in _response.aiter_bytes(chunk_size=_chunk_size):
-                        yield _chunk
-                    return
+                    request_id = _response.headers.get('request-id')
+                    if not request_id:
+                        raise ApiError(
+                            status_code=_response.status_code,
+                            body="Missing request-id in response headers."
+                        )
+
+                    async def audio_iterator():
+                        _chunk_size = request_options.get("chunk_size", 1024) if request_options is not None else 1024
+                        async for _chunk in _response.aiter_bytes(chunk_size=_chunk_size):
+                            yield _chunk
+
+                    return request_id, audio_iterator()
+
                 await _response.aread()
                 if _response.status_code == 422:
                     raise UnprocessableEntityError(
@@ -1247,7 +1309,7 @@ async def stream_with_timestamps(
             BodyTextToSpeechStreamingWithTimestampsV1TextToSpeechVoiceIdStreamWithTimestampsPostApplyTextNormalization
         ] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> typing.AsyncIterator[TextToSpeechStreamWithTimestampsResponse]:
+    ) -> Tuple[str,typing.AsyncIterator[TextToSpeechStreamWithTimestampsResponse]]:
         """
         Converts text into speech using a voice of your choice and returns a stream of JSONs containing audio as a base64 encoded string together with information on when which character was spoken.
 
@@ -1376,20 +1438,29 @@ async def main() -> None:
         ) as _response:
             try:
                 if 200 <= _response.status_code < 300:
-                    async for _text in _response.aiter_lines():
-                        try:
-                            if len(_text) == 0:
-                                continue
-                            yield typing.cast(
-                                TextToSpeechStreamWithTimestampsResponse,
-                                construct_type(
-                                    type_=TextToSpeechStreamWithTimestampsResponse,  # type: ignore
-                                    object_=json.loads(_text),
-                                ),
-                            )
-                        except:
-                            pass
-                    return
+                    request_id = _response.headers.get('request-id')
+                    if not request_id:
+                        raise ApiError(
+                            status_code=_response.status_code,
+                            body="Missing request-id in response headers."
+                        )
+
+                    async def response_iterator():
+                        async for _text in _response.aiter_lines():
+                            try:
+                                if len(_text) == 0:
+                                    continue
+                                yield typing.cast(
+                                    TextToSpeechStreamWithTimestampsResponse,
+                                    construct_type(
+                                        type_=TextToSpeechStreamWithTimestampsResponse,  # type: ignore
+                                        object_=json.loads(_text),
+                                    ),
+                                )
+                            except:
+                                pass
+
+                    return request_id, response_iterator()
                 await _response.aread()
                 if _response.status_code == 422:
                     raise UnprocessableEntityError(