File tree Expand file tree Collapse file tree 5 files changed +57
-12
lines changed
Expand file tree Collapse file tree 5 files changed +57
-12
lines changed Original file line number Diff line number Diff line change @@ -299,18 +299,18 @@ async def generate():
299299 rt_encode_output = True ,
300300 rt_voice = " marin" ,
301301 output_format = " audio" ,
302- audio_output_format = " m4a " ,
302+ audio_output_format = " mp3 " ,
303303 audio_input_format = " mp4" ,
304304 ):
305305 yield chunk
306306
307307return StreamingResponse(
308308 content = generate(),
309- media_type = " audio/mp4 " ,
309+ media_type = " audio/mp3 " ,
310310 headers = {
311311 " Cache-Control" : " no-store" ,
312312 " Pragma" : " no-cache" ,
313- " Content-Disposition" : " inline; filename=stream.m4a " ,
313+ " Content-Disposition" : " inline; filename=stream.mp3 " ,
314314 " X-Accel-Buffering" : " no" ,
315315 },
316316)
Original file line number Diff line number Diff line change @@ -224,18 +224,18 @@ This example will work using expo-audio on Android and iOS.
224224 rt_encode_output = True ,
225225 rt_voice = " marin" ,
226226 output_format = " audio" ,
227- audio_output_format = " m4a " ,
228- audio_input_format = " mp4 " ,
227+ audio_output_format = " mp3 " ,
228+ audio_input_format = " m4a " ,
229229 ):
230230 yield chunk
231231
232232 return StreamingResponse(
233233 content = generate(),
234- media_type = " audio/mp4 " ,
234+ media_type = " audio/mp3 " ,
235235 headers = {
236236 " Cache-Control" : " no-store" ,
237237 " Pragma" : " no-cache" ,
238- " Content-Disposition" : " inline; filename=stream.m4a " ,
238+ " Content-Disposition" : " inline; filename=stream.mp3 " ,
239239 " X-Accel-Buffering" : " no" ,
240240 },
241241 )
Original file line number Diff line number Diff line change 11[tool .poetry ]
22name = " solana-agent"
3- version = " 31.2.3 "
3+ version = " 31.2.4 "
44description = " AI Agents for Solana"
55authors = [" Bevan Hunt <bevan@bevanhunt.com>" ]
66license = " MIT"
Original file line number Diff line number Diff line change 44import contextlib
55import logging
66from typing import List , AsyncGenerator
7+ import tempfile
8+ import os
79
810from solana_agent .interfaces .providers .audio import AudioTranscoder
911
@@ -49,11 +51,45 @@ async def to_pcm16( # pragma: no cover
4951 rate_hz ,
5052 len (audio_bytes ),
5153 )
52- # Prefer to hint format for common containers/codecs; ffmpeg can still autodetect if hint is wrong .
53- hinted_format = None
54+ # iOS-recorded MP4/M4A often requires a seekable input for reliable demuxing .
55+ # Decode from a temporary file instead of stdin for MP4/M4A.
5456 if input_mime in ("audio/mp4" , "audio/m4a" ):
55- hinted_format = "mp4"
56- elif input_mime in ("audio/aac" ,):
57+ suffix = ".m4a" if input_mime == "audio/m4a" else ".mp4"
58+ tmp_path = None
59+ try :
60+ with tempfile .NamedTemporaryFile (delete = False , suffix = suffix ) as tf :
61+ tmp_path = tf .name
62+ tf .write (audio_bytes )
63+ args = [
64+ "-hide_banner" ,
65+ "-loglevel" ,
66+ "error" ,
67+ "-i" ,
68+ tmp_path ,
69+ "-vn" , # ignore any video tracks
70+ "-acodec" ,
71+ "pcm_s16le" ,
72+ "-ac" ,
73+ "1" ,
74+ "-ar" ,
75+ str (rate_hz ),
76+ "-f" ,
77+ "s16le" ,
78+ "pipe:1" ,
79+ ]
80+ out = await self ._run_ffmpeg (args , b"" )
81+ logger .info (
82+ "Transcoded (MP4/M4A temp-file) to PCM16: output_len=%d" , len (out )
83+ )
84+ return out
85+ finally :
86+ if tmp_path :
87+ with contextlib .suppress (Exception ):
88+ os .remove (tmp_path )
89+
90+ # For other formats, prefer a format hint when helpful and decode from stdin.
91+ hinted_format = None
92+ if input_mime in ("audio/aac" ,):
5793 # Raw AAC is typically in ADTS stream format
5894 hinted_format = "adts"
5995 elif input_mime in ("audio/ogg" , "audio/webm" ):
Original file line number Diff line number Diff line change @@ -1037,6 +1037,15 @@ def _strip_tool_strict(tools_val):
10371037 if "tools" in patch :
10381038 patch ["tools" ] = _strip_tool_strict (patch ["tools" ]) # idempotent
10391039
1040+ # Per server requirements, always include session.type and output_modalities
1041+ try :
1042+ patch ["type" ] = "realtime"
1043+ # Preserve caller-provided output_modalities if present, otherwise default to audio
1044+ if "output_modalities" not in patch :
1045+ patch ["output_modalities" ] = ["audio" ]
1046+ except Exception :
1047+ pass
1048+
10401049 payload = {"type" : "session.update" , "session" : patch }
10411050 # Mark awaiting updated and store last patch
10421051 self ._last_session_patch = patch or {}
You can’t perform that action at this time.
0 commit comments