diff --git a/ai/gen-ai-agents/voice-ai-agent/LICENSE b/ai/gen-ai-agents/voice-ai-agent/LICENSE
new file mode 100644
index 000000000..f5385ce4e
--- /dev/null
+++ b/ai/gen-ai-agents/voice-ai-agent/LICENSE
@@ -0,0 +1,35 @@
+Copyright (c) 2025 Oracle and/or its affiliates.
+ 
+The Universal Permissive License (UPL), Version 1.0
+ 
+Subject to the condition set forth below, permission is hereby granted to any
+person obtaining a copy of this software, associated documentation and/or data
+(collectively the "Software"), free of charge and under any and all copyright
+rights in the Software, and any and all patent rights owned or freely
+licensable by each licensor hereunder covering either (i) the unmodified
+Software as contributed to or provided by such licensor, or (ii) the Larger
+Works (as defined below), to deal in both
+ 
+(a) the Software, and
+(b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
+one is included with the Software (each a "Larger Work" to which the Software
+is contributed by such licensors),
+ 
+without restriction, including without limitation the rights to copy, create
+derivative works of, display, perform, and distribute the Software and make,
+use, sell, offer for sale, import, export, have made, and have sold the
+Software and the Larger Work(s), and to sublicense the foregoing rights on
+either these or other terms.
+ 
+This license is subject to the following condition:
+The above copyright notice and either this complete permission notice or at
+a minimum a reference to the UPL must be included in all copies or
+substantial portions of the Software.
+ 
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/ai/gen-ai-agents/voice-ai-agent/README.md b/ai/gen-ai-agents/voice-ai-agent/README.md
new file mode 100644
index 000000000..dea5c30e5
--- /dev/null
+++ b/ai/gen-ai-agents/voice-ai-agent/README.md
@@ -0,0 +1,52 @@
+# Voice AI Agent (OCI Realtime Speech + Generative AI Agent)
+
+**Author:** msliwins  
+**Last review date:** 2025-12-05  
+
+A small voice assistant that:
+
+1. Listens to your microphone with VAD (voice activity detection),
+2. Streams audio to **OCI Realtime Speech** for STT,
+3. Sends the recognized text to an **OCI Generative AI Agent Endpoint**,
+4. Uses **OCI Text-to-Speech** to speak the answer back.
+
+Everything runs in a loop until you stop it with `Ctrl+C`.
+
+---
+
+## Features
+
+- 🎙️ Voice Activity Detection (VAD)  
+  Automatically starts recording when you speak and stops after a short silence.
+
+- 🧠 Generative AI Agent integration  
+  Uses an OCI Generative AI Agent Endpoint to handle conversation and tools.
+
+- 🗣️ Text-to-Speech  
+  Uses OCI AI Speech to synthesize responses and plays them locally.
+
+- 🔁 Persistent agent session  
+  Single agent session reused across turns for conversational context.
+
+- 🧪 Debug traces  
+  Optionally saves agent traces to `traces.json` for debugging.
+
+---
+
+## Project Structure (key files)
+
+- `main.py` – the script you shared; runs the whole loop.
+- `requirements.txt` – Python dependencies.
+- `example.env` – safe template with placeholder values for others.
+
+---
+
+## Requirements
+
+- Python 3.11+ (recommended)
+- Valid OCI tenancy and user with:  
+  - Permission to use **AI Speech** (STT + TTS),
+  - Permission to use **Generative AI Agent Runtime**.
+- Configured `~/.oci/config` with a profile matching your env (`OCI_PROFILE`).
+
+- A working microphone on your machine (Windows, since it uses `winsound`).
diff --git a/ai/gen-ai-agents/voice-ai-agent/example.env b/ai/gen-ai-agents/voice-ai-agent/example.env
new file mode 100644
index 000000000..40b447598
--- /dev/null
+++ b/ai/gen-ai-agents/voice-ai-agent/example.env
@@ -0,0 +1,3 @@
+OCI_PROFILE=PHOENIX
+OCI_COMPARTMENT_OCID=ocid1.compartment.oc1..
+OCI_AGENT_ENDPOINT_ID=ocid1.genaiagentendpoint.oc1.phx.
diff --git a/ai/gen-ai-agents/voice-ai-agent/main.py b/ai/gen-ai-agents/voice-ai-agent/main.py
new file mode 100644
index 000000000..d0dd91dae
--- /dev/null
+++ b/ai/gen-ai-agents/voice-ai-agent/main.py
@@ -0,0 +1,399 @@
+import asyncio
+import json
+import sys
+import io
+import wave
+import sounddevice as sd
+import winsound
+import oci
+import numpy as np
+import time
+
+from oci.ai_speech.models import RealtimeParameters
+from oci_ai_speech_realtime import RealtimeSpeechClient, RealtimeSpeechClientListener
+from oci.ai_speech import AIServiceSpeechClient
+from oci.ai_speech.models import (
+    SynthesizeSpeechDetails, TtsOracleConfiguration,
+    TtsOracleTts2NaturalModelDetails, TtsOracleTts1StandardModelDetails,
+    TtsOracleSpeechSettings
+)
+from oci.generative_ai_agent_runtime import GenerativeAiAgentRuntimeClient
+from oci.generative_ai_agent_runtime.models import ChatDetails, CreateSessionDetails
+
+import os
+from dotenv import load_dotenv
+load_dotenv()
+# --- Configuration ---
+PROFILE = os.getenv("OCI_PROFILE", "PHOENIX")
+COMPARTMENT_OCID = os.getenv("OCI_COMPARTMENT_OCID")
+AGENT_ENDPOINT_ID = os.getenv("OCI_AGENT_ENDPOINT_ID")
+# STT settings
+STT_LANGUAGE = "en-US"
+SAMPLE_RATE = 16000
+CHANNELS = 1
+BYTES_PER_SAMPLE = 2
+CHUNK_MS = 100
+CHUNK_BYTES = (SAMPLE_RATE * CHUNK_MS // 1000) * BYTES_PER_SAMPLE
+
+# VAD settings
+VAD_THRESHOLD = 0.01      # RMS threshold for detecting speech
+SILENCE_DURATION = 1.5    # Seconds of silence to stop listening
+PRE_SPEECH_BUFFER = 0.5   # Seconds of audio kept just before speech starts
+
+# --- STT Helper Classes ---
+
+class STTListener(RealtimeSpeechClientListener):
+    def __init__(self, ready_evt: asyncio.Event):
+        self.ready = ready_evt
+        self.final_text = ""
+        self.last_partial = ""
+
+    def on_connect(self):
+        self.ready.set()
+
+    def on_connect_message(self, message): pass
+    def on_network_event(self, event): pass
+    def on_ack_message(self, message): pass
+    def on_error(self, error): print(f"STT Error: {error}", file=sys.stderr)
+
+    def on_result(self, message):
+        if isinstance(message, dict):
+            trans = (message.get("transcriptions") or [])
+            if trans:
+                t0 = trans[0] or {}
+                text = t0.get("transcription") or ""
+                if not text:
+                    return
+                if t0.get("isFinal") or t0.get("final"):
+                    self.final_text = text
+                else:
+                    self.last_partial = text
+
+async def send_audio_to_oci(config, audio_data):
+    """Send recorded audio to OCI Realtime Speech and return the transcript."""
+    print(f"[STT] Sending {len(audio_data)} bytes to OCI...")
+    
+    region = config["region"]
+    endpoint = f"wss://realtime.aiservice.{region}.oci.oraclecloud.com"
+
+    params = RealtimeParameters(
+        encoding="audio/raw;rate=16000",
+        language_code=STT_LANGUAGE,
+        model_type="ORACLE",
+        is_ack_enabled=True,
+    )
+
+    ready = asyncio.Event()
+    listener = STTListener(ready)
+    client = RealtimeSpeechClient(
+        config=config,
+        service_endpoint=endpoint,
+        realtime_speech_parameters=params,
+        listener=listener,
+        compartment_id=COMPARTMENT_OCID,
+    )
+
+    connect_task = asyncio.create_task(client.connect())
+    try:
+        await asyncio.wait_for(ready.wait(), timeout=10)
+    except asyncio.TimeoutError:
+        print("[STT] Timeout connecting to OCI Speech.")
+        return ""
+
+    start_msg = {"type": "start"}
+    r = client.send_data(json.dumps(start_msg))
+    if asyncio.iscoroutine(r): await r
+
+    # Send audio in small chunks
+    for i in range(0, len(audio_data), CHUNK_BYTES):
+        chunk = audio_data[i:i+CHUNK_BYTES]
+        rs = client.send_data(chunk)
+        if asyncio.iscoroutine(rs): await rs
+        await asyncio.sleep(0.003)
+
+    end_msg = {"type": "end"}
+    rs = client.send_data(json.dumps(end_msg))
+    if asyncio.iscoroutine(rs): await rs
+
+    try:
+        rf = client.request_final_result()
+        if asyncio.iscoroutine(rf): await rf
+    except Exception:
+        pass
+
+    await asyncio.sleep(1.0)  # Give a moment for final result
+
+    try:
+        rc = client.close()
+        if asyncio.iscoroutine(rc): await rc
+    except Exception:
+        pass
+
+    if not connect_task.done():
+        connect_task.cancel()
+        try: await connect_task
+        except asyncio.CancelledError: pass
+
+    text = listener.final_text or listener.last_partial
+    if text:
+        print(f"[STT] User said: {text}")
+    else:
+        print("[STT] No speech detected from audio.")
+    
+    return text
+
+def listen_for_speech():
+    """
+    Synchronous placeholder for speech listening (not used; async version below).
+    """
+    print("\n[VAD] Listening... (Speak now)")
+    
+    q = asyncio.Queue()
+    loop = asyncio.get_event_loop()
+
+    def callback(indata, frames, time, status):
+        if status:
+            print(status, file=sys.stderr)
+        loop.call_soon_threadsafe(q.put_nowait, indata.copy())
+
+    # Pre-calculate buffer sizes
+    chunk_samples = int(SAMPLE_RATE * CHUNK_MS / 1000)
+    pre_speech_chunks = int(PRE_SPEECH_BUFFER * 1000 / CHUNK_MS)
+    silence_chunks = int(SILENCE_DURATION * 1000 / CHUNK_MS)
+    
+    audio_buffer = []
+    pre_buffer = []
+    
+    is_speaking = False
+    silence_counter = 0
+    
+    with sd.InputStream(samplerate=SAMPLE_RATE, channels=CHANNELS, dtype='int16', callback=callback, blocksize=chunk_samples):
+        while True:
+            # Kept as placeholder; logic moved to async version.
+            pass
+            break 
+    return b""
+
+async def listen_for_speech_async():
+    """
+    Asynchronous VAD-based recording: returns raw audio bytes of a single utterance.
+    """
+    print("\n[VAD] Listening... (Speak now)")
+    
+    q = asyncio.Queue()
+    loop = asyncio.get_event_loop()
+
+    def callback(indata, frames, time, status):
+        if status:
+            print(status, file=sys.stderr)
+        loop.call_soon_threadsafe(q.put_nowait, indata.copy())
+
+    chunk_samples = int(SAMPLE_RATE * CHUNK_MS / 1000)
+    pre_speech_chunks_count = int(PRE_SPEECH_BUFFER * 1000 / CHUNK_MS)
+    silence_chunks_limit = int(SILENCE_DURATION * 1000 / CHUNK_MS)
+    
+    audio_data = []
+    pre_buffer = []
+    
+    is_speaking = False
+    silence_counter = 0
+    
+    stream = sd.InputStream(
+        samplerate=SAMPLE_RATE,
+        channels=CHANNELS,
+        dtype='int16',
+        callback=callback,
+        blocksize=chunk_samples
+    )
+    stream.start()
+
+    try:
+        while True:
+            indata = await q.get()
+            
+            # Approximate RMS in [0,1]
+            rms = np.std(indata) / 32768.0
+
+            if is_speaking:
+                audio_data.append(indata)
+                if rms < VAD_THRESHOLD:
+                    silence_counter += 1
+                else:
+                    silence_counter = 0
+                
+                if silence_counter >= silence_chunks_limit:
+                    print(f"\n[VAD] Silence detected ({SILENCE_DURATION}s). Stopping.")
+                    break
+            else:
+                # Waiting for speech start
+                pre_buffer.append(indata)
+                if len(pre_buffer) > pre_speech_chunks_count:
+                    pre_buffer.pop(0)
+                
+                if rms > VAD_THRESHOLD:
+                    print(f"\n[VAD] Speech detected! (RMS: {rms:.4f})")
+                    is_speaking = True
+                    audio_data.extend(pre_buffer)
+                    audio_data.append(indata)
+                    silence_counter = 0
+
+    finally:
+        stream.stop()
+        stream.close()
+
+    if not audio_data:
+        return b""
+    
+    return b"".join(audio_data)
+
+# --- Agent Helper Functions ---
+
+def create_agent_session(config):
+    print(f"[Agent] Creating session...")
+    agent_runtime = GenerativeAiAgentRuntimeClient(config)
+    create_session_details = CreateSessionDetails(
+        description="Voice Agent Session"
+    )
+    session = agent_runtime.create_session(create_session_details, agent_endpoint_id=AGENT_ENDPOINT_ID)
+    session_id = session.data.id
+    print(f"[Agent] Session created: {session_id}")
+    return session_id
+
+def query_agent(config, user_text, session_id):
+    print(f"[Agent] Thinking...")
+    agent_runtime = GenerativeAiAgentRuntimeClient(config)
+    
+    chat_details = ChatDetails(
+        user_message=user_text,
+        session_id=session_id
+    )
+
+    response = agent_runtime.chat(agent_endpoint_id=AGENT_ENDPOINT_ID, chat_details=chat_details)
+    
+    # Save Agent's traces to JSON for debugging
+    if response.data.traces:
+        try:
+            with open("traces.json", "w") as f:
+                f.write(str(response.data.traces))
+                print("[Debug] Traces saved to traces.json")
+        except Exception as e:
+            print(f"[Debug] Failed to save traces: {e}")
+
+    if response.data.message and response.data.message.content:
+        response_text = response.data.message.content.text
+        print(f"[Agent] Response: {response_text}")
+        return response_text
+    
+    print("[Agent] No text response. Checking for tool calls...")
+    return "I am processing your request, but I couldn't find the tool execution details."
+
+# --- TTS Helper Functions ---
+
+def speak_text(config, text):
+    if not text:
+        return
+
+    print(f"[TTS] Synthesizing...")
+    tts_client = AIServiceSpeechClient(config)
+    
+    voices = tts_client.list_voices(compartment_id=COMPARTMENT_OCID).data.items
+    if not voices:
+        print("[TTS] No voices found.", file=sys.stderr)
+        return
+    
+    v = voices[0]
+    voice_id = v.voice_id
+    use_tts2 = "TTS_2_NATURAL" in (v.supported_models or [])
+    model_details = (
+        TtsOracleTts2NaturalModelDetails(model_name="TTS_2_NATURAL", voice_id=voice_id, language_code=v.language_code)
+        if use_tts2 else
+        TtsOracleTts1StandardModelDetails(model_name="TTS_1_STANDARD", voice_id=voice_id)
+    )
+    sample_rate = int(getattr(v, "sample_rate_in_hertz", 22050))
+
+    details = SynthesizeSpeechDetails(
+        text=text,
+        is_stream_enabled=False,
+        compartment_id=COMPARTMENT_OCID,
+        configuration=TtsOracleConfiguration(
+            model_family="ORACLE",
+            model_details=model_details,
+            speech_settings=TtsOracleSpeechSettings(
+                text_type="TEXT",
+                sample_rate_in_hz=sample_rate,
+                output_format="PCM"
+            )
+        )
+    )
+
+    resp = tts_client.synthesize_speech(details)
+    pcm_bytes = resp.data.content
+    
+    if not pcm_bytes:
+        print("[TTS] Empty audio received.")
+        return
+
+    buf = io.BytesIO()
+    with wave.open(buf, "wb") as wf:
+        wf.setnchannels(1)
+        wf.setsampwidth(2)
+        wf.setframerate(sample_rate)
+        wf.writeframes(pcm_bytes)
+    wav_bytes = buf.getvalue()
+
+    print("[TTS] Playing...")
+    winsound.PlaySound(wav_bytes, winsound.SND_MEMORY)
+    print("[TTS] Done.")
+
+# --- Main Loop ---
+
+async def main():
+    try:
+        config = oci.config.from_file("~/.oci/config", PROFILE)
+    except Exception as e:
+        print(f"Error loading OCI config: {e}")
+        return
+
+    print("--- Voice AI Agent Started ---")
+    print("Listening... (Press Ctrl+C to exit)")
+
+    # Create session once and reuse
+    try:
+        session_id = create_agent_session(config)
+    except Exception as e:
+        print(f"Failed to create agent session: {e}")
+        return
+
+    while True:
+        try:
+            # 1. Record speech
+            audio_bytes = await listen_for_speech_async()
+            
+            if not audio_bytes:
+                continue
+
+            # 2. Transcribe
+            user_text = await send_audio_to_oci(config, audio_bytes)
+            
+            if not user_text:
+                continue
+
+            # 3. Query agent
+            agent_response = query_agent(config, user_text, session_id)
+
+            # 4. Speak response
+            speak_text(config, agent_response)
+            
+            # Small pause to reduce feedback from speakers
+            await asyncio.sleep(0.5)
+
+        except KeyboardInterrupt:
+            break
+        except Exception as e:
+            print(f"An error occurred: {e}")
+            await asyncio.sleep(2)
+
+    print("\n--- Voice AI Agent Stopped ---")
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/ai/gen-ai-agents/voice-ai-agent/requirements.txt b/ai/gen-ai-agents/voice-ai-agent/requirements.txt
new file mode 100644
index 000000000..4a564354a
--- /dev/null
+++ b/ai/gen-ai-agents/voice-ai-agent/requirements.txt
@@ -0,0 +1,4 @@
+oci
+sounddevice
+numpy
+websocket-client