diff --git a/src/browser/components/ChatInput/RecordingOverlay.tsx b/src/browser/components/ChatInput/RecordingOverlay.tsx new file mode 100644 index 000000000..2f7e41f79 --- /dev/null +++ b/src/browser/components/ChatInput/RecordingOverlay.tsx @@ -0,0 +1,262 @@ +/** + * Recording overlay - shows live audio visualization during voice recording. + * Replaces the chat textarea when voice input is active. + */ + +import React, { useRef, useState, useLayoutEffect, useEffect, useCallback } from "react"; +import { Loader2 } from "lucide-react"; +import { cn } from "@/common/lib/utils"; +import { formatKeybind, KEYBINDS } from "@/browser/utils/ui/keybinds"; +import type { UIMode } from "@/common/types/mode"; +import type { VoiceInputState } from "@/browser/hooks/useVoiceInput"; + +/** Canvas fill colors for the waveform (slightly lighter than CSS vars for visibility) */ +const MODE_COLORS: Record = { + plan: "hsl(210, 70%, 55%)", + exec: "hsl(268, 94%, 65%)", +}; + +/** Tailwind classes for recording state, keyed by mode */ +const RECORDING_CLASSES: Record = { + plan: "cursor-pointer border-plan-mode bg-plan-mode/10", + exec: "cursor-pointer border-exec-mode bg-exec-mode/10", +}; + +const TEXT_CLASSES: Record = { + plan: "text-plan-mode-light", + exec: "text-exec-mode-light", +}; + +// Waveform shows last 10 seconds of audio, sampled every 50ms (200 samples) +const WINDOW_DURATION_MS = 10_000; +const SAMPLE_INTERVAL_MS = 50; +const NUM_SAMPLES = WINDOW_DURATION_MS / SAMPLE_INTERVAL_MS; + +interface RecordingOverlayProps { + state: VoiceInputState; + mode: UIMode; + mediaRecorder: MediaRecorder | null; + onStop: () => void; +} + +export const RecordingOverlay: React.FC = (props) => { + const isRecording = props.state === "recording"; + const isTranscribing = props.state === "transcribing"; + + const containerClasses = cn( + "mb-1 flex w-full flex-col items-center justify-center gap-1 rounded-md border px-3 py-2 transition-all focus:outline-none", + isRecording ? RECORDING_CLASSES[props.mode] : "cursor-wait border-amber-500 bg-amber-500/10" + ); + + return ( + + ); +}; + +/** Keyboard hint display for recording state */ +const RecordingHints: React.FC = () => ( + <> + space send ·{" "} + {formatKeybind(KEYBINDS.TOGGLE_VOICE_INPUT)} review ·{" "} + esc cancel + +); + +// ============================================================================= +// SlidingWaveform - Canvas-based amplitude visualization +// ============================================================================= + +interface SlidingWaveformProps { + mediaRecorder: MediaRecorder; + color: string; + height: number; +} + +/** + * Renders a sliding window of audio amplitude over time. + * New samples appear on the right and scroll left as time passes. + * Falls back to a simple pulsing indicator if Web Audio API fails. + */ +const SlidingWaveform: React.FC = (props) => { + const canvasRef = useRef(null); + const containerRef = useRef(null); + const [containerWidth, setContainerWidth] = useState(600); + const [audioError, setAudioError] = useState(false); + + // Audio analysis state (refs to avoid re-renders) + const audioContextRef = useRef(null); + const analyserRef = useRef(null); + const samplesRef = useRef(new Array(NUM_SAMPLES).fill(0)); + const animationFrameRef = useRef(0); + const lastSampleTimeRef = useRef(0); + + // Track container width for responsive canvas + useLayoutEffect(() => { + const container = containerRef.current; + if (!container) return; + + const observer = new ResizeObserver((entries) => { + for (const entry of entries) { + setContainerWidth(entry.contentRect.width); + } + }); + observer.observe(container); + setContainerWidth(container.offsetWidth); + + return () => observer.disconnect(); + }, []); + + // Initialize Web Audio API analyser + useEffect(() => { + const stream = props.mediaRecorder.stream; + if (!stream) return; + + try { + const audioContext = new AudioContext(); + const analyser = audioContext.createAnalyser(); + analyser.fftSize = 256; + analyser.smoothingTimeConstant = 0.3; + + const source = audioContext.createMediaStreamSource(stream); + source.connect(analyser); + + audioContextRef.current = audioContext; + analyserRef.current = analyser; + samplesRef.current = new Array(NUM_SAMPLES).fill(0); + lastSampleTimeRef.current = performance.now(); + + return () => { + void audioContext.close(); + audioContextRef.current = null; + analyserRef.current = null; + }; + } catch (err) { + console.error("Failed to initialize audio visualization:", err); + setAudioError(true); + } + }, [props.mediaRecorder]); + + // Animation loop: sample audio amplitude and render bars + const draw = useCallback(() => { + const canvas = canvasRef.current; + const analyser = analyserRef.current; + if (!canvas || !analyser) return; + + const ctx = canvas.getContext("2d"); + if (!ctx) return; + + // Sample audio at fixed intervals + const now = performance.now(); + if (now - lastSampleTimeRef.current >= SAMPLE_INTERVAL_MS) { + const dataArray = new Uint8Array(analyser.frequencyBinCount); + analyser.getByteTimeDomainData(dataArray); + + // Calculate RMS (root mean square) amplitude + let sum = 0; + for (const sample of dataArray) { + const normalized = (sample - 128) / 128; + sum += normalized * normalized; + } + const rms = Math.sqrt(sum / dataArray.length); + + samplesRef.current.shift(); + samplesRef.current.push(rms); + lastSampleTimeRef.current = now; + } + + // Render bars + ctx.clearRect(0, 0, canvas.width, canvas.height); + + const samples = samplesRef.current; + const numBars = samples.length; + // Bar sizing: bars fill full width with 40% gap ratio + const barWidth = canvas.width / (1.4 * numBars - 0.4); + const gap = barWidth * 0.4; + const centerY = canvas.height / 2; + + ctx.fillStyle = props.color; + + for (let i = 0; i < numBars; i++) { + const scaledAmplitude = Math.min(1, samples[i] * 3); // Boost for visibility + const barHeight = Math.max(2, scaledAmplitude * canvas.height * 0.9); + const x = i * (barWidth + gap); + const y = centerY - barHeight / 2; + + ctx.beginPath(); + // roundRect fallback for older browsers (though Electron 38+ supports it) + if (ctx.roundRect) { + ctx.roundRect(x, y, barWidth, barHeight, 1); + } else { + ctx.rect(x, y, barWidth, barHeight); + } + ctx.fill(); + } + + animationFrameRef.current = requestAnimationFrame(draw); + }, [props.color]); + + // Run animation loop + useEffect(() => { + if (audioError) return; + animationFrameRef.current = requestAnimationFrame(draw); + return () => cancelAnimationFrame(animationFrameRef.current); + }, [draw, audioError]); + + // Fallback: simple pulsing indicator if Web Audio API unavailable + if (audioError) { + return ( +
+ {[0, 1, 2, 3, 4].map((i) => ( +
+ ))} +
+ ); + } + + return ( +
+ +
+ ); +}; diff --git a/src/browser/components/ChatInput/VoiceInputButton.tsx b/src/browser/components/ChatInput/VoiceInputButton.tsx index 288cfeaa6..6d4eb817b 100644 --- a/src/browser/components/ChatInput/VoiceInputButton.tsx +++ b/src/browser/components/ChatInput/VoiceInputButton.tsx @@ -9,6 +9,7 @@ import { TooltipWrapper, Tooltip } from "../Tooltip"; import { formatKeybind, KEYBINDS } from "@/browser/utils/ui/keybinds"; import { cn } from "@/common/lib/utils"; import type { VoiceInputState } from "@/browser/hooks/useVoiceInput"; +import type { UIMode } from "@/common/types/mode"; interface VoiceInputButtonProps { state: VoiceInputState; @@ -17,29 +18,43 @@ interface VoiceInputButtonProps { requiresSecureContext: boolean; onToggle: () => void; disabled?: boolean; + mode: UIMode; } -const STATE_CONFIG: Record = { - idle: { label: "Voice input", colorClass: "text-muted/50 hover:text-muted" }, - recording: { label: "Stop recording", colorClass: "text-blue-500 animate-pulse" }, - transcribing: { label: "Transcribing...", colorClass: "text-amber-500" }, +/** Color classes for each voice input state */ +const STATE_COLORS: Record = { + idle: "text-muted/50 hover:text-muted", + recording: "", // Set dynamically based on mode + transcribing: "text-amber-500", }; +const RECORDING_COLORS: Record = { + plan: "text-plan-mode-light animate-pulse", + exec: "text-exec-mode-light animate-pulse", +}; + +function getColorClass(state: VoiceInputState, mode: UIMode): string { + return state === "recording" ? RECORDING_COLORS[mode] : STATE_COLORS[state]; +} + export const VoiceInputButton: React.FC = (props) => { if (!props.shouldShowUI) return null; const needsHttps = props.requiresSecureContext; const needsApiKey = !needsHttps && !props.isApiKeySet; - const isDisabledReason = needsHttps || needsApiKey; + const isDisabled = needsHttps || needsApiKey; + + const label = isDisabled + ? needsHttps + ? "Voice input (requires HTTPS)" + : "Voice input (requires OpenAI API key)" + : props.state === "recording" + ? "Stop recording" + : props.state === "transcribing" + ? "Transcribing..." + : "Voice input"; - const { label, colorClass } = isDisabledReason - ? { - label: needsHttps - ? "Voice input (requires HTTPS)" - : "Voice input (requires OpenAI API key)", - colorClass: "text-muted/50", - } - : STATE_CONFIG[props.state]; + const colorClass = isDisabled ? "text-muted/50" : getColorClass(props.state, props.mode); const Icon = props.state === "transcribing" ? Loader2 : Mic; const isTranscribing = props.state === "transcribing"; @@ -49,7 +64,7 @@ export const VoiceInputButton: React.FC = (props) => { + ) : ( <> = (props) => { requiresSecureContext={voiceInput.requiresSecureContext} onToggle={voiceInput.toggle} disabled={disabled || isSending} + mode={mode} />
diff --git a/src/browser/hooks/useVoiceInput.ts b/src/browser/hooks/useVoiceInput.ts index d729dda3a..681ec38b8 100644 --- a/src/browser/hooks/useVoiceInput.ts +++ b/src/browser/hooks/useVoiceInput.ts @@ -37,6 +37,8 @@ export interface UseVoiceInputResult { shouldShowUI: boolean; /** True when running over HTTP (not localhost) - microphone requires secure context */ requiresSecureContext: boolean; + /** The active MediaRecorder instance when recording, for visualization */ + mediaRecorder: MediaRecorder | null; start: () => void; stop: (options?: { send?: boolean }) => void; cancel: () => void; @@ -73,7 +75,9 @@ export function useVoiceInput(options: UseVoiceInputOptions): UseVoiceInputResul const [state, setState] = useState("idle"); // Refs for MediaRecorder lifecycle + // We use both ref (for callbacks) and state (to trigger re-render for visualizer) const recorderRef = useRef(null); + const [mediaRecorder, setMediaRecorder] = useState(null); const streamRef = useRef(null); const chunksRef = useRef([]); @@ -197,6 +201,7 @@ export function useVoiceInput(options: UseVoiceInputOptions): UseVoiceInputResul }; recorderRef.current = recorder; + setMediaRecorder(recorder); recorder.start(); setState("recording"); } catch (err) { @@ -221,6 +226,7 @@ export function useVoiceInput(options: UseVoiceInputOptions): UseVoiceInputResul if (recorderRef.current?.state !== "inactive") { recorderRef.current?.stop(); recorderRef.current = null; + setMediaRecorder(null); } }, []); @@ -305,6 +311,7 @@ export function useVoiceInput(options: UseVoiceInputOptions): UseVoiceInputResul isApiKeySet: callbacksRef.current.openAIKeySet, shouldShowUI: HAS_MEDIA_RECORDER && !HAS_TOUCH_DICTATION, requiresSecureContext: HAS_MEDIA_RECORDER && !HAS_GET_USER_MEDIA, + mediaRecorder, start: () => void start(), stop, cancel,