File tree Expand file tree Collapse file tree 2 files changed +41
-5
lines changed
Expand file tree Collapse file tree 2 files changed +41
-5
lines changed Original file line number Diff line number Diff line change 11[tool .poetry ]
22name = " solana-agent"
3- version = " 31.2.3 "
3+ version = " 31.2.4-dev1 "
44description = " AI Agents for Solana"
55authors = [" Bevan Hunt <bevan@bevanhunt.com>" ]
66license = " MIT"
Original file line number Diff line number Diff line change 44import contextlib
55import logging
66from typing import List , AsyncGenerator
7+ import tempfile
8+ import os
79
810from solana_agent .interfaces .providers .audio import AudioTranscoder
911
@@ -49,11 +51,45 @@ async def to_pcm16( # pragma: no cover
4951 rate_hz ,
5052 len (audio_bytes ),
5153 )
52- # Prefer to hint format for common containers/codecs; ffmpeg can still autodetect if hint is wrong .
53- hinted_format = None
54+ # iOS-recorded MP4/M4A often requires a seekable input for reliable demuxing .
55+ # Decode from a temporary file instead of stdin for MP4/M4A.
5456 if input_mime in ("audio/mp4" , "audio/m4a" ):
55- hinted_format = "mp4"
56- elif input_mime in ("audio/aac" ,):
57+ suffix = ".m4a" if input_mime == "audio/m4a" else ".mp4"
58+ tmp_path = None
59+ try :
60+ with tempfile .NamedTemporaryFile (delete = False , suffix = suffix ) as tf :
61+ tmp_path = tf .name
62+ tf .write (audio_bytes )
63+ args = [
64+ "-hide_banner" ,
65+ "-loglevel" ,
66+ "error" ,
67+ "-i" ,
68+ tmp_path ,
69+ "-vn" , # ignore any video tracks
70+ "-acodec" ,
71+ "pcm_s16le" ,
72+ "-ac" ,
73+ "1" ,
74+ "-ar" ,
75+ str (rate_hz ),
76+ "-f" ,
77+ "s16le" ,
78+ "pipe:1" ,
79+ ]
80+ out = await self ._run_ffmpeg (args , b"" )
81+ logger .info (
82+ "Transcoded (MP4/M4A temp-file) to PCM16: output_len=%d" , len (out )
83+ )
84+ return out
85+ finally :
86+ if tmp_path :
87+ with contextlib .suppress (Exception ):
88+ os .remove (tmp_path )
89+
90+ # For other formats, prefer a format hint when helpful and decode from stdin.
91+ hinted_format = None
92+ if input_mime in ("audio/aac" ,):
5793 # Raw AAC is typically in ADTS stream format
5894 hinted_format = "adts"
5995 elif input_mime in ("audio/ogg" , "audio/webm" ):
You can’t perform that action at this time.
0 commit comments