fix(android): keep talk mode on realtime relay

This commit is contained in:
Ayaan Zaidi
2026-05-25 07:36:32 +05:30
parent d7aa1f31de
commit 70614f88cc
4 changed files with 3 additions and 95 deletions

View File

@@ -11,14 +11,8 @@ internal data class TalkModeGatewayConfigState(
val mainSessionKey: String,
val interruptOnSpeech: Boolean?,
val silenceTimeoutMs: Long,
val executionMode: TalkModeExecutionMode,
)
internal enum class TalkModeExecutionMode {
Native,
RealtimeRelay,
}
internal object TalkModeGatewayConfigParser {
fun parse(config: JsonObject?): TalkModeGatewayConfigState {
val talk = config?.get("talk").asObjectOrNull()
@@ -27,22 +21,9 @@ internal object TalkModeGatewayConfigParser {
mainSessionKey = normalizeMainKey(sessionCfg?.get("mainKey").asStringOrNull()),
interruptOnSpeech = talk?.get("interruptOnSpeech").asBooleanOrNull(),
silenceTimeoutMs = resolvedSilenceTimeoutMs(talk),
executionMode = resolvedExecutionMode(talk),
)
}
fun resolvedExecutionMode(talk: JsonObject?): TalkModeExecutionMode {
val realtime = talk?.get("realtime").asObjectOrNull() ?: return TalkModeExecutionMode.Native
val mode = realtime["mode"].asStringOrNull()
val transport = realtime["transport"].asStringOrNull()
val brain = realtime["brain"].asStringOrNull()
return if (mode == "realtime" && transport == "gateway-relay" && (brain == null || brain == "agent-consult")) {
TalkModeExecutionMode.RealtimeRelay
} else {
TalkModeExecutionMode.Native
}
}
fun resolvedSilenceTimeoutMs(talk: JsonObject?): Long {
val fallback = TalkDefaults.defaultSilenceTimeoutMs
val primitive = talk?.get("silenceTimeoutMs") as? JsonPrimitive ?: return fallback

View File

@@ -157,7 +157,6 @@ class TalkModeManager internal constructor(
private val completedRunStates = LinkedHashMap<String, Boolean>()
private val completedRunTexts = LinkedHashMap<String, String>()
private var configLoaded = false
private var executionMode = TalkModeExecutionMode.Native
private val startGeneration = AtomicLong(0L)
@Volatile private var realtimeSessionId: String? = null
@@ -526,53 +525,17 @@ class TalkModeManager internal constructor(
try {
ensureConfigLoaded()
if (generation != startGeneration.get() || !_isEnabled.value || stopRequested) return@launch
if (executionMode == TalkModeExecutionMode.RealtimeRelay) {
startRealtimeRelay(generation)
} else {
startNativeRecognition(generation)
}
startRealtimeRelay(generation)
} catch (err: Throwable) {
if (err is CancellationException) return@launch
_statusText.value = "Start failed: ${err.message ?: err::class.simpleName}"
Log.w(tag, "start failed: ${err.message ?: err::class.simpleName}")
if (executionMode == TalkModeExecutionMode.RealtimeRelay) {
stopRealtimeRelay(closeSession = false, preserveStatus = true)
disableRealtimeModeAndNotifyOwner()
}
stopRealtimeRelay(closeSession = false, preserveStatus = true)
disableRealtimeModeAndNotifyOwner()
}
}
}
private suspend fun startNativeRecognition(generation: Long) {
withContext(Dispatchers.Main) {
if (generation != startGeneration.get()) return@withContext
if (!_isEnabled.value || stopRequested) return@withContext
if (_isListening.value) return@withContext
Log.d(tag, "start native")
if (!SpeechRecognizer.isRecognitionAvailable(context)) {
_statusText.value = "Speech recognizer unavailable"
Log.w(tag, "speech recognizer unavailable")
return@withContext
}
val micOk =
ContextCompat.checkSelfPermission(context, Manifest.permission.RECORD_AUDIO) ==
PackageManager.PERMISSION_GRANTED
if (!micOk) {
_statusText.value = "Microphone permission required"
Log.w(tag, "microphone permission required")
return@withContext
}
recognizer?.destroy()
recognizer = SpeechRecognizer.createSpeechRecognizer(context).also { it.setRecognitionListener(listener) }
startListeningInternal(markListening = true)
startSilenceMonitor()
Log.d(tag, "listening")
}
}
private fun stop() {
stopRequested = true
finalizeInFlight = false
@@ -2221,11 +2184,9 @@ class TalkModeManager internal constructor(
val parsed = TalkModeGatewayConfigParser.parse(root?.get("config").asObjectOrNull())
silenceWindowMs = parsed.silenceTimeoutMs
parsed.interruptOnSpeech?.let { interruptOnSpeech = it }
executionMode = parsed.executionMode
configLoaded = true
} catch (_: Throwable) {
silenceWindowMs = TalkDefaults.defaultSilenceTimeoutMs
executionMode = TalkModeExecutionMode.Native
configLoaded = false
}
}

View File

@@ -62,37 +62,4 @@ class TalkModeConfigParsingTest {
TalkModeGatewayConfigParser.resolvedSilenceTimeoutMs(talk),
)
}
@Test
fun defaultsToNativeTalkMode() {
val talk =
buildJsonObject {
put("realtime", buildJsonObject { put("transport", "webrtc") })
}
assertEquals(
TalkModeExecutionMode.Native,
TalkModeGatewayConfigParser.resolvedExecutionMode(talk),
)
}
@Test
fun usesRealtimeRelayWhenGatewayRelayIsConfigured() {
val talk =
buildJsonObject {
put(
"realtime",
buildJsonObject {
put("mode", "realtime")
put("transport", "gateway-relay")
put("brain", "agent-consult")
},
)
}
assertEquals(
TalkModeExecutionMode.RealtimeRelay,
TalkModeGatewayConfigParser.resolvedExecutionMode(talk),
)
}
}

View File

@@ -362,7 +362,6 @@ class TalkModeManagerTest {
onStoppedByRelay = { stoppedByRelay.set(true) },
)
setPrivateField(manager, "executionMode", TalkModeExecutionMode.RealtimeRelay)
setPrivateField(manager, "configLoaded", true)
manager.setEnabled(true)
advanceUntilIdle()