55import json
66import base64
77import websockets
8+ import asyncio
89
910from websockets .sync .client import connect
11+ from websockets .client import connect as async_connect
1012
1113from .core .api_error import ApiError
1214from .core .jsonable_encoder import jsonable_encoder
1315from .core .remove_none_from_dict import remove_none_from_dict
1416from .core .request_options import RequestOptions
1517from .types .voice_settings import VoiceSettings
16- from .text_to_speech .client import TextToSpeechClient
18+ from .text_to_speech .client import TextToSpeechClient , AsyncTextToSpeechClient
1719from .types import OutputFormat
1820
1921# this is used as the default value for optional parameters
@@ -37,6 +39,22 @@ def text_chunker(chunks: typing.Iterator[str]) -> typing.Iterator[str]:
3739 if buffer != "" :
3840 yield buffer + " "
3941
42+ async def async_text_chunker (chunks : typing .AsyncIterator [str ]) -> typing .AsyncIterator [str ]:
43+ """Used during input streaming to chunk text blocks and set last char to space"""
44+ splitters = ("." , "," , "?" , "!" , ";" , ":" , "—" , "-" , "(" , ")" , "[" , "]" , "}" , " " )
45+ buffer = ""
46+ async for text in chunks :
47+ if buffer .endswith (splitters ):
48+ yield buffer if buffer .endswith (" " ) else buffer + " "
49+ buffer = text
50+ elif text .startswith (splitters ):
51+ output = buffer + text [0 ]
52+ yield output if output .endswith (" " ) else output + " "
53+ buffer = text [1 :]
54+ else :
55+ buffer += text
56+ if buffer != "" :
57+ yield buffer + " "
4058
4159class RealtimeTextToSpeechClient (TextToSpeechClient ):
4260
@@ -137,3 +155,105 @@ def get_text() -> typing.Iterator[str]:
137155 raise ApiError (body = data , status_code = ce .code )
138156 elif ce .code != 1000 :
139157 raise ApiError (body = ce .reason , status_code = ce .code )
158+
159+
160+ class AsyncRealtimeTextToSpeechClient (AsyncTextToSpeechClient ):
161+
162+ async def convert_realtime (
163+ self ,
164+ voice_id : str ,
165+ * ,
166+ text : typing .AsyncIterator [str ],
167+ model_id : typing .Optional [str ] = OMIT ,
168+ output_format : typing .Optional [OutputFormat ] = "mp3_44100_128" ,
169+ voice_settings : typing .Optional [VoiceSettings ] = OMIT ,
170+ request_options : typing .Optional [RequestOptions ] = None ,
171+ ) -> typing .AsyncIterator [bytes ]:
172+ """
173+ Converts text into speech using a voice of your choice and returns audio.
174+
175+ Parameters:
176+ - voice_id: str. Voice ID to be used, you can use https://api.elevenlabs.io/v1/voices to list all the available voices.
177+
178+ - text: typing.Iterator[str]. The text that will get converted into speech.
179+
180+ - model_id: typing.Optional[str]. Identifier of the model that will be used, you can query them using GET /v1/models. The model needs to have support for text to speech, you can check this using the can_do_text_to_speech property.
181+
182+ - voice_settings: typing.Optional[VoiceSettings]. Voice settings overriding stored setttings for the given voice. They are applied only on the given request.
183+
184+ - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
185+ ---
186+ from elevenlabs import PronunciationDictionaryVersionLocator, VoiceSettings
187+ from elevenlabs.client import ElevenLabs
188+
189+ def get_text() -> typing.Iterator[str]:
190+ yield "Hello, how are you?"
191+ yield "I am fine, thank you."
192+
193+ client = ElevenLabs(
194+ api_key="YOUR_API_KEY",
195+ )
196+ client.text_to_speech.convert_realtime(
197+ voice_id="string",
198+ text=get_text(),
199+ model_id="string",
200+ voice_settings=VoiceSettings(
201+ stability=1.1,
202+ similarity_boost=1.1,
203+ style=1.1,
204+ use_speaker_boost=True,
205+ ),
206+ )
207+ """
208+ async with async_connect (
209+ urllib .parse .urljoin (
210+ "wss://api.elevenlabs.io/" ,
211+ f"v1/text-to-speech/{ jsonable_encoder (voice_id )} /stream-input?model_id={ model_id } &output_format={ output_format } "
212+ ),
213+ extra_headers = jsonable_encoder (
214+ remove_none_from_dict (
215+ {
216+ ** self ._client_wrapper .get_headers (),
217+ ** (request_options .get ("additional_headers" , {}) if request_options is not None else {}),
218+ }
219+ )
220+ )
221+ ) as socket :
222+ try :
223+ await socket .send (json .dumps (
224+ dict (
225+ text = " " ,
226+ try_trigger_generation = True ,
227+ voice_settings = voice_settings .dict () if voice_settings else None ,
228+ generation_config = dict (
229+ chunk_length_schedule = [50 ],
230+ ),
231+ )
232+ ))
233+ except websockets .exceptions .ConnectionClosedError as ce :
234+ raise ApiError (body = ce .reason , status_code = ce .code )
235+
236+ try :
237+ async for text_chunk in async_text_chunker (text ):
238+ data = dict (text = text_chunk , try_trigger_generation = True )
239+ await socket .send (json .dumps (data ))
240+ try :
241+ async with asyncio .timeout (1e-4 ):
242+ data = json .loads (await socket .recv ())
243+ if "audio" in data and data ["audio" ]:
244+ yield base64 .b64decode (data ["audio" ]) # type: ignore
245+ except TimeoutError :
246+ pass
247+
248+ await socket .send (json .dumps (dict (text = "" )))
249+
250+ while True :
251+
252+ data = json .loads (await socket .recv ())
253+ if "audio" in data and data ["audio" ]:
254+ yield base64 .b64decode (data ["audio" ]) # type: ignore
255+ except websockets .exceptions .ConnectionClosed as ce :
256+ if "message" in data :
257+ raise ApiError (body = data , status_code = ce .code )
258+ elif ce .code != 1000 :
259+ raise ApiError (body = ce .reason , status_code = ce .code )
0 commit comments