Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -299,18 +299,18 @@ async def generate():
rt_encode_output=True,
rt_voice="marin",
output_format="audio",
audio_output_format="m4a",
audio_output_format="mp3",
audio_input_format="mp4",
):
yield chunk

return StreamingResponse(
content=generate(),
media_type="audio/mp4",
media_type="audio/mp3",
headers={
"Cache-Control": "no-store",
"Pragma": "no-cache",
"Content-Disposition": "inline; filename=stream.m4a",
"Content-Disposition": "inline; filename=stream.mp3",
"X-Accel-Buffering": "no",
},
)
Expand Down
8 changes: 4 additions & 4 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -224,18 +224,18 @@ This example will work using expo-audio on Android and iOS.
rt_encode_output=True,
rt_voice="marin",
output_format="audio",
audio_output_format="m4a",
audio_input_format="mp4",
audio_output_format="mp3",
audio_input_format="m4a",
):
yield chunk

return StreamingResponse(
content=generate(),
media_type="audio/mp4",
media_type="audio/mp3",
headers={
"Cache-Control": "no-store",
"Pragma": "no-cache",
"Content-Disposition": "inline; filename=stream.m4a",
"Content-Disposition": "inline; filename=stream.mp3",
"X-Accel-Buffering": "no",
},
)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "solana-agent"
version = "31.2.3"
version = "31.2.4"
description = "AI Agents for Solana"
authors = ["Bevan Hunt <bevan@bevanhunt.com>"]
license = "MIT"
Expand Down
44 changes: 40 additions & 4 deletions solana_agent/adapters/ffmpeg_transcoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import contextlib
import logging
from typing import List, AsyncGenerator
import tempfile
import os

from solana_agent.interfaces.providers.audio import AudioTranscoder

Expand Down Expand Up @@ -49,11 +51,45 @@ async def to_pcm16( # pragma: no cover
rate_hz,
len(audio_bytes),
)
# Prefer to hint format for common containers/codecs; ffmpeg can still autodetect if hint is wrong.
hinted_format = None
# iOS-recorded MP4/M4A often requires a seekable input for reliable demuxing.
# Decode from a temporary file instead of stdin for MP4/M4A.
if input_mime in ("audio/mp4", "audio/m4a"):
hinted_format = "mp4"
elif input_mime in ("audio/aac",):
suffix = ".m4a" if input_mime == "audio/m4a" else ".mp4"
tmp_path = None
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tf:
tmp_path = tf.name
tf.write(audio_bytes)
args = [
"-hide_banner",
"-loglevel",
"error",
"-i",
tmp_path,
"-vn", # ignore any video tracks
"-acodec",
"pcm_s16le",
"-ac",
"1",
"-ar",
str(rate_hz),
"-f",
"s16le",
"pipe:1",
]
out = await self._run_ffmpeg(args, b"")
logger.info(
"Transcoded (MP4/M4A temp-file) to PCM16: output_len=%d", len(out)
)
return out
finally:
if tmp_path:
with contextlib.suppress(Exception):
os.remove(tmp_path)

# For other formats, prefer a format hint when helpful and decode from stdin.
hinted_format = None
if input_mime in ("audio/aac",):
# Raw AAC is typically in ADTS stream format
hinted_format = "adts"
elif input_mime in ("audio/ogg", "audio/webm"):
Expand Down
9 changes: 9 additions & 0 deletions solana_agent/adapters/openai_realtime_ws.py
Original file line number Diff line number Diff line change
Expand Up @@ -1037,6 +1037,15 @@ def _strip_tool_strict(tools_val):
if "tools" in patch:
patch["tools"] = _strip_tool_strict(patch["tools"]) # idempotent

# Per server requirements, always include session.type and output_modalities
try:
patch["type"] = "realtime"
# Preserve caller-provided output_modalities if present, otherwise default to audio
if "output_modalities" not in patch:
patch["output_modalities"] = ["audio"]
except Exception:
pass

payload = {"type": "session.update", "session": patch}
# Mark awaiting updated and store last patch
self._last_session_patch = patch or {}
Expand Down
Loading