Skip to content

Commit 69460b0

Browse files
improve audio recording pipeline on iOS and Android
1 parent cb7b2d9 commit 69460b0

File tree

8 files changed

+212
-92
lines changed

8 files changed

+212
-92
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@ const voiceConfig: RecordingConfig = {
109109
channels: 1, // Mono for voice
110110
encoding: RecordingEncodingTypes.PCM_16BIT,
111111
interval: 50, // Fast response for real-time voice
112+
voiceProcessing: true, // Enable platform AEC/NS/AGC when needed
113+
preGainDb: 6, // Optional gain boost for softer microphones
112114
onAudioStream: async (event: AudioDataEvent) => {
113115
// Process voice data with optimal settings
114116
console.log('Voice data:', {
@@ -131,6 +133,8 @@ const soundConfig: SoundConfig = {
131133

132134
await ExpoStreamer.startRecording(voiceConfig);
133135
await ExpoStreamer.setSoundConfig(soundConfig);
136+
137+
> **Tip:** `voiceProcessing` now defaults to `false` so recordings capture the hotter, unprocessed microphone signal. Toggle it on when you need the built-in echo cancellation/noise suppression pipeline. Use `preGainDb` (range24 dB to +24 dB) to fine-tune input loudness without clipping.
134138
```
135139

136140
### Event Handling with TypeScript

android/src/main/java/expo/modules/audiostream/AudioRecorderManager.kt

Lines changed: 55 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import java.nio.ByteOrder
2020
import java.util.concurrent.atomic.AtomicBoolean
2121
import kotlin.math.max
2222
import kotlin.math.min
23+
import kotlin.math.pow
2324

2425

2526
class AudioRecorderManager(
@@ -48,6 +49,8 @@ class AudioRecorderManager(
4849
private val audioRecordLock = Any()
4950
private var audioFileHandler: AudioFileHandler = AudioFileHandler(filesDir)
5051
private var hardwareSampleRate: Int = 0
52+
private var voiceProcessingEnabled = false
53+
private var gainMultiplier = 1.0f
5154

5255
// Flag to control whether actual audio data or silence is sent
5356
private var isSilent = false
@@ -132,6 +135,10 @@ class AudioRecorderManager(
132135
// Update with validated values
133136
audioFormat = formatValidationResult.first
134137
recordingConfig = formatValidationResult.second
138+
voiceProcessingEnabled = recordingConfig.voiceProcessing
139+
val clampedGainDb = recordingConfig.preGainDb.coerceIn(-24.0, 24.0)
140+
gainMultiplier = 10.0.pow(clampedGainDb / 20.0).toFloat()
141+
Log.d(Constants.TAG, "Voice processing enabled: $voiceProcessingEnabled, preGain: $clampedGainDb dB")
135142

136143
// Initialize the AudioRecord if it's a new recording or if it's not currently paused
137144
if (audioRecord == null || !isPaused.get()) {
@@ -159,8 +166,12 @@ class AudioRecorderManager(
159166
}
160167

161168
audioRecord?.startRecording()
162-
// Apply audio effects after starting recording using the manager
163-
audioRecord?.let { audioEffectsManager.setupAudioEffects(it) }
169+
// Apply audio effects after starting recording when voice processing is requested
170+
if (voiceProcessingEnabled) {
171+
audioRecord?.let { audioEffectsManager.setupAudioEffects(it) }
172+
} else {
173+
audioEffectsManager.releaseAudioEffects()
174+
}
164175

165176
isPaused.set(false)
166177
isRecording.set(true)
@@ -227,6 +238,8 @@ class AudioRecorderManager(
227238
lastEmitTime = SystemClock.elapsedRealtime()
228239
lastEmittedSize = 0
229240
hardwareSampleRate = 0
241+
voiceProcessingEnabled = false
242+
gainMultiplier = 1.0f
230243

231244
Log.d(Constants.TAG, "Audio resources cleaned up")
232245
} catch (e: Exception) {
@@ -330,8 +343,10 @@ class AudioRecorderManager(
330343
isPaused.set(false)
331344
audioRecord?.startRecording()
332345

333-
// Re-apply audio effects when resuming using the manager
334-
audioRecord?.let { audioEffectsManager.setupAudioEffects(it) }
346+
// Re-apply audio effects when resuming, only if voice processing is enabled
347+
if (voiceProcessingEnabled) {
348+
audioRecord?.let { audioEffectsManager.setupAudioEffects(it) }
349+
}
335350

336351
promise.resolve("Recording resumed")
337352
}
@@ -422,10 +437,6 @@ class AudioRecorderManager(
422437
return ByteArray(0)
423438
}
424439

425-
if (hardwareSampleRate == 0 || hardwareSampleRate == recordingConfig.sampleRate) {
426-
return rawData.copyOfRange(0, bytesRead)
427-
}
428-
429440
if (audioFormat != AudioFormat.ENCODING_PCM_16BIT) {
430441
Log.w(
431442
Constants.TAG,
@@ -445,26 +456,41 @@ class AudioRecorderManager(
445456
.asShortBuffer()
446457
.get(sourceSamples)
447458

448-
val targetSampleCount = ((sourceSampleCount.toLong() * recordingConfig.sampleRate + hardwareSampleRate / 2) / hardwareSampleRate).toInt()
449-
if (targetSampleCount <= 0) {
450-
return ByteArray(0)
459+
val needsResample = hardwareSampleRate != 0 && hardwareSampleRate != recordingConfig.sampleRate
460+
val processedSamples: ShortArray = if (needsResample) {
461+
val targetSampleCount = ((sourceSampleCount.toLong() * recordingConfig.sampleRate + hardwareSampleRate / 2) / hardwareSampleRate).toInt()
462+
if (targetSampleCount <= 0) {
463+
return ByteArray(0)
464+
}
465+
466+
val targetSamples = ShortArray(targetSampleCount)
467+
val step = sourceSampleCount.toDouble() / targetSampleCount
468+
var position = 0.0
469+
470+
for (i in 0 until targetSampleCount) {
471+
val index = position.toInt().coerceAtMost(sourceSampleCount - 1)
472+
val fraction = position - index
473+
val nextIndex = min(index + 1, sourceSampleCount - 1)
474+
val interpolated = ((1 - fraction) * sourceSamples[index] + fraction * sourceSamples[nextIndex]).toInt()
475+
targetSamples[i] = interpolated.toShort()
476+
position += step
477+
}
478+
479+
targetSamples
480+
} else {
481+
sourceSamples
451482
}
452483

453-
val targetSamples = ShortArray(targetSampleCount)
454-
val step = sourceSampleCount.toDouble() / targetSampleCount
455-
var position = 0.0
456-
457-
for (i in 0 until targetSampleCount) {
458-
val index = position.toInt().coerceAtMost(sourceSampleCount - 1)
459-
val fraction = position - index
460-
val nextIndex = min(index + 1, sourceSampleCount - 1)
461-
val interpolated = ((1 - fraction) * sourceSamples[index] + fraction * sourceSamples[nextIndex]).toInt()
462-
targetSamples[i] = interpolated.toShort()
463-
position += step
484+
if (gainMultiplier != 1.0f) {
485+
val multiplier = gainMultiplier
486+
for (i in processedSamples.indices) {
487+
val scaled = (processedSamples[i] * multiplier).toInt()
488+
processedSamples[i] = scaled.coerceIn(Short.MIN_VALUE.toInt(), Short.MAX_VALUE.toInt()).toShort()
489+
}
464490
}
465491

466-
val outBuffer = ByteBuffer.allocate(targetSampleCount * 2).order(ByteOrder.LITTLE_ENDIAN)
467-
outBuffer.asShortBuffer().put(targetSamples)
492+
val outBuffer = ByteBuffer.allocate(processedSamples.size * 2).order(ByteOrder.LITTLE_ENDIAN)
493+
outBuffer.asShortBuffer().put(processedSamples)
468494
return outBuffer.array()
469495
}
470496

@@ -564,8 +590,11 @@ class AudioRecorderManager(
564590
return null
565591
}
566592

567-
// Always use VOICE_COMMUNICATION for better echo cancellation
568-
val audioSource = MediaRecorder.AudioSource.VOICE_COMMUNICATION
593+
val audioSource = if (config.voiceProcessing) {
594+
MediaRecorder.AudioSource.VOICE_COMMUNICATION
595+
} else {
596+
MediaRecorder.AudioSource.MIC
597+
}
569598

570599
val channelConfig = if (config.channels == 1) {
571600
AudioFormat.CHANNEL_IN_MONO

android/src/main/java/expo/modules/audiostream/RecordingConfig.kt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ data class RecordingConfig(
55
val channels: Int = 1,
66
val encoding: String = "pcm_16bit",
77
val interval: Long = Constants.DEFAULT_INTERVAL,
8-
val pointsPerSecond: Double = 20.0
8+
val pointsPerSecond: Double = 20.0,
9+
val voiceProcessing: Boolean = false,
10+
val preGainDb: Double = 0.0
911
) {
1012
/**
1113
* Validates the recording configuration
@@ -44,7 +46,9 @@ data class RecordingConfig(
4446
channels = (options["channels"] as? Number)?.toInt() ?: 1,
4547
encoding = options["encoding"] as? String ?: "pcm_16bit",
4648
interval = (options["interval"] as? Number)?.toLong() ?: Constants.DEFAULT_INTERVAL,
47-
pointsPerSecond = (options["pointsPerSecond"] as? Number)?.toDouble() ?: 20.0
49+
pointsPerSecond = (options["pointsPerSecond"] as? Number)?.toDouble() ?: 20.0,
50+
voiceProcessing = options["voiceProcessing"] as? Boolean ?: false,
51+
preGainDb = (options["preGainDb"] as? Number)?.toDouble() ?: 0.0
4852
)
4953
}
5054
}

0 commit comments

Comments
 (0)