feat(ios): add realtime talk relay mode

Adds realtime Gateway Talk relay support for iOS, including OpenAI realtime provider selection and voice selection controls.

Maintainer fixups preserved provider auth fallback resolution, kept setup-code/manual auth through TLS trust prompts, recomputed pairing auth from current form fields, fixed the realtime voice label Swift compile issue, added provider auth regression coverage, and refreshed shrinkwrap metadata for the current CI merge base.

Verification:
- `fnm exec --using 24.15.0 pnpm deps:shrinkwrap:check`
- `git diff --check`
- `swiftformat --lint --config config/swiftformat --unexclude apps/ios/Sources apps/ios/Sources/Gateway/GatewayConnectionController.swift apps/ios/Sources/Onboarding/GatewayOnboardingView.swift apps/ios/Sources/Onboarding/OnboardingWizardView.swift apps/ios/Sources/Settings/SettingsTab.swift apps/ios/Sources/Voice/TalkModeGatewayConfig.swift`
- `swiftlint lint --config apps/ios/.swiftlint.yml apps/ios/Sources/Gateway/GatewayConnectionController.swift apps/ios/Sources/Onboarding/GatewayOnboardingView.swift apps/ios/Sources/Onboarding/OnboardingWizardView.swift apps/ios/Sources/Settings/SettingsTab.swift apps/ios/Sources/Voice/TalkModeGatewayConfig.swift`
- `AUTOREVIEW_AUTO_TESTS=0 .agents/skills/autoreview/scripts/autoreview --mode branch --base origin/main`
- GitHub CI clean for `8a76c829611c0eb70d4c3b5328f1868aaf3516e1` (cancelled `auto-response` ignored)

Co-authored-by: Colin Johnson <colin@solvely.net>
This commit is contained in:
Colin Johnson
2026-05-22 12:34:06 -04:00
committed by GitHub
parent 933f01cb39
commit e730e9bd0b
17 changed files with 1443 additions and 142 deletions

View File

@@ -52,7 +52,6 @@ struct OpenClawLiveActivity: Widget {
.padding(.vertical, 8)
}
@ViewBuilder
private func trailingView(state: OpenClawActivityAttributes.ContentState) -> some View {
self.statusIcon(state: state)
.font(.system(size: 16, weight: .semibold))
@@ -65,7 +64,6 @@ struct OpenClawLiveActivity: Widget {
.frame(width: 6, height: 6)
}
@ViewBuilder
private func compactStatusIcon(state: OpenClawActivityAttributes.ContentState) -> some View {
self.statusIcon(state: state)
.font(.system(size: 12, weight: .semibold))

View File

@@ -4,6 +4,8 @@
Maintenance update for the current OpenClaw release.
- Added realtime Gateway Talk relay support for iOS voice sessions, including OpenAI realtime provider and voice selection controls. Thanks @Solvely-Colin.
## 2026.5.20 - 2026-05-20
Maintenance update for the current OpenClaw release.

View File

@@ -20,6 +20,47 @@ import UIKit
@MainActor
@Observable
final class GatewayConnectionController {
struct ManualAuthOverride: Equatable {
let token: String?
let bootstrapToken: String?
let password: String?
static func explicit(
token: String?,
bootstrapToken: String?,
password: String?) -> ManualAuthOverride
{
let trimmedToken = token?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
let trimmedBootstrapToken = bootstrapToken?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
let trimmedPassword = password?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
return ManualAuthOverride(
token: trimmedToken.isEmpty ? nil : trimmedToken,
bootstrapToken: trimmedBootstrapToken.isEmpty ? nil : trimmedBootstrapToken,
password: trimmedPassword.isEmpty ? nil : trimmedPassword)
}
static func normalized(
token: String?,
bootstrapToken: String?,
password: String?) -> ManualAuthOverride?
{
let override = ManualAuthOverride.explicit(
token: token,
bootstrapToken: bootstrapToken,
password: password)
guard override.token != nil || override.bootstrapToken != nil || override.password != nil
else { return nil }
return override
}
}
private struct PendingTrustConnect {
let url: URL
let stableID: String
let isManual: Bool
let authOverride: ManualAuthOverride?
}
struct TrustPrompt: Identifiable, Equatable {
let stableID: String
let gatewayName: String
@@ -42,7 +83,7 @@ final class GatewayConnectionController {
private weak var appModel: NodeAppModel?
private var didAutoConnect = false
private var pendingServiceResolvers: [String: GatewayServiceResolver] = [:]
private var pendingTrustConnect: (url: URL, stableID: String, isManual: Bool)?
private var pendingTrustConnect: PendingTrustConnect?
init(appModel: NodeAppModel, startDiscovery: Bool = true) {
self.appModel = appModel
@@ -125,7 +166,11 @@ final class GatewayConnectionController {
guard let fp = await self.probeTLSFingerprint(url: url) else {
return "Failed to read TLS fingerprint from discovered gateway."
}
self.pendingTrustConnect = (url: url, stableID: stableID, isManual: false)
self.pendingTrustConnect = PendingTrustConnect(
url: url,
stableID: stableID,
isManual: false,
authOverride: nil)
self.pendingTrustPrompt = TrustPrompt(
stableID: stableID,
gatewayName: gateway.name,
@@ -162,12 +207,23 @@ final class GatewayConnectionController {
_ = await self.connectWithDiagnostics(gateway)
}
func connectManual(host: String, port: Int, useTLS: Bool) async {
let instanceId = UserDefaults.standard.string(forKey: "node.instanceId")?
.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
let token = GatewaySettingsStore.loadGatewayToken(instanceId: instanceId)
let bootstrapToken = GatewaySettingsStore.loadGatewayBootstrapToken(instanceId: instanceId)
let password = GatewaySettingsStore.loadGatewayPassword(instanceId: instanceId)
func connectManual(
host: String,
port: Int,
useTLS: Bool,
authOverride: ManualAuthOverride? = nil) async
{
let instanceId = GatewaySettingsStore.currentInstanceID()
let token =
authOverride.map(\.token) ?? GatewaySettingsStore.loadGatewayToken(instanceId: instanceId)
let bootstrapToken =
authOverride.map(\.bootstrapToken) ?? GatewaySettingsStore.loadGatewayBootstrapToken(instanceId: instanceId)
let password =
authOverride.map(\.password) ?? GatewaySettingsStore.loadGatewayPassword(instanceId: instanceId)
let pendingAuthOverride = authOverride ?? ManualAuthOverride.normalized(
token: token,
bootstrapToken: bootstrapToken,
password: password)
let resolvedUseTLS = self.resolveManualUseTLS(host: host, useTLS: useTLS)
guard let resolvedPort = self.resolveManualPort(host: host, port: port, useTLS: resolvedUseTLS)
else { return }
@@ -181,7 +237,11 @@ final class GatewayConnectionController {
+ "Remote gateways must use HTTPS/WSS."
return
}
self.pendingTrustConnect = (url: url, stableID: stableID, isManual: true)
self.pendingTrustConnect = PendingTrustConnect(
url: url,
stableID: stableID,
isManual: true,
authOverride: pendingAuthOverride)
self.pendingTrustPrompt = TrustPrompt(
stableID: stableID,
gatewayName: "\(host):\(resolvedPort)",
@@ -269,11 +329,14 @@ final class GatewayConnectionController {
GatewaySettingsStore.saveLastGatewayConnectionDiscovered(stableID: pending.stableID, useTLS: true)
}
let instanceId = UserDefaults.standard.string(forKey: "node.instanceId")?
.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
let token = GatewaySettingsStore.loadGatewayToken(instanceId: instanceId)
let bootstrapToken = GatewaySettingsStore.loadGatewayBootstrapToken(instanceId: instanceId)
let password = GatewaySettingsStore.loadGatewayPassword(instanceId: instanceId)
let instanceId = GatewaySettingsStore.currentInstanceID()
let token =
pending.authOverride.map(\.token) ?? GatewaySettingsStore.loadGatewayToken(instanceId: instanceId)
let bootstrapToken =
pending.authOverride.map(\.bootstrapToken) ?? GatewaySettingsStore.loadGatewayBootstrapToken(
instanceId: instanceId)
let password =
pending.authOverride.map(\.password) ?? GatewaySettingsStore.loadGatewayPassword(instanceId: instanceId)
let tlsParams = GatewayTLSParams(
required: true,
expectedFingerprint: prompt.fingerprintSha256,

View File

@@ -34,6 +34,17 @@ enum GatewaySettingsStore {
self.ensureLastDiscoveredGatewayStableID()
}
static func currentInstanceID(defaults: UserDefaults = .standard) -> String {
self.bootstrapPersistence()
if let value = defaults.string(forKey: self.instanceIdDefaultsKey)?
.trimmingCharacters(in: .whitespacesAndNewlines),
!value.isEmpty
{
return value
}
return self.loadStableInstanceID() ?? ""
}
static func loadStableInstanceID() -> String? {
if let value = KeychainStore.loadString(service: self.nodeService, account: self.instanceIdAccount)?
.trimmingCharacters(in: .whitespacesAndNewlines),
@@ -110,8 +121,15 @@ enum GatewaySettingsStore {
}
static func saveGatewayToken(_ token: String, instanceId: String) {
let trimmed = token.trimmingCharacters(in: .whitespacesAndNewlines)
if trimmed.isEmpty {
_ = KeychainStore.delete(
service: self.gatewayService,
account: self.gatewayTokenAccount(instanceId: instanceId))
return
}
_ = KeychainStore.saveString(
token,
trimmed,
service: self.gatewayService,
account: self.gatewayTokenAccount(instanceId: instanceId))
}
@@ -125,8 +143,13 @@ enum GatewaySettingsStore {
}
static func saveGatewayBootstrapToken(_ token: String, instanceId: String) {
let trimmed = token.trimmingCharacters(in: .whitespacesAndNewlines)
if trimmed.isEmpty {
self.clearGatewayBootstrapToken(instanceId: instanceId)
return
}
_ = KeychainStore.saveString(
token,
trimmed,
service: self.gatewayService,
account: self.gatewayBootstrapTokenAccount(instanceId: instanceId))
}
@@ -145,8 +168,15 @@ enum GatewaySettingsStore {
}
static func saveGatewayPassword(_ password: String, instanceId: String) {
let trimmed = password.trimmingCharacters(in: .whitespacesAndNewlines)
if trimmed.isEmpty {
_ = KeychainStore.delete(
service: self.gatewayService,
account: self.gatewayPasswordAccount(instanceId: instanceId))
return
}
_ = KeychainStore.saveString(
password,
trimmed,
service: self.gatewayService,
account: self.gatewayPasswordAccount(instanceId: instanceId))
}

View File

@@ -597,6 +597,18 @@ final class NodeAppModel {
}
}
func setTalkProviderSelection(_ rawValue: String) {
let selection = TalkModeProviderSelection.resolved(rawValue)
UserDefaults.standard.set(selection.rawValue, forKey: TalkModeProviderSelection.storageKey)
self.talkMode.applyProviderSelectionChanged()
}
func setTalkRealtimeVoiceSelection(_ rawValue: String) {
let voice = TalkModeRealtimeVoiceSelection.resolvedOverride(rawValue) ?? ""
UserDefaults.standard.set(voice, forKey: TalkModeRealtimeVoiceSelection.storageKey)
self.talkMode.applyProviderSelectionChanged()
}
func requestLocationPermissions(mode: OpenClawLocationMode) async -> Bool {
guard mode != .off else { return true }
let status = await self.locationService.ensureAuthorization(mode: mode)

View File

@@ -3,7 +3,7 @@ import OpenClawKit
enum GatewayOnboardingReset {
@MainActor
static func reset(
static func prepareForBootstrapPairing(
appModel: NodeAppModel,
instanceId: String,
defaults: UserDefaults = .standard)
@@ -15,10 +15,24 @@ enum GatewayOnboardingReset {
GatewaySettingsStore.deleteGatewayCredentials(instanceId: trimmedInstanceId)
}
GatewaySettingsStore.clearLastGatewayConnection()
GatewaySettingsStore.clearPreferredGatewayStableID()
GatewaySettingsStore.clearLastDiscoveredGatewayStableID()
let deviceId = DeviceIdentityStore.loadOrCreate().deviceId
DeviceAuthStore.clearToken(deviceId: deviceId, role: "node")
DeviceAuthStore.clearToken(deviceId: deviceId, role: "operator")
GatewaySettingsStore.clearLastGatewayConnection(defaults: defaults)
GatewaySettingsStore.clearPreferredGatewayStableID(defaults: defaults)
GatewaySettingsStore.clearLastDiscoveredGatewayStableID(defaults: defaults)
GatewayTLSStore.clearAllFingerprints()
defaults.set(false, forKey: "gateway.autoconnect")
}
@MainActor
static func reset(
appModel: NodeAppModel,
instanceId: String,
defaults: UserDefaults = .standard)
{
self.prepareForBootstrapPairing(appModel: appModel, instanceId: instanceId, defaults: defaults)
OnboardingStateStore.reset(defaults: defaults)
defaults.set(false, forKey: "gateway.onboardingComplete")

View File

@@ -109,6 +109,7 @@ private struct ManualEntryStep: View {
@State private var manualUseTLS: Bool = true
@State private var manualToken: String = ""
@State private var manualPassword: String = ""
@State private var pendingManualAuthOverride: GatewayConnectionController.ManualAuthOverride?
@State private var connectingGatewayID: String?
@State private var connectStatusText: String?
@@ -207,9 +208,8 @@ private struct ManualEntryStep: View {
defaults.set(self.manualPortValue() ?? 0, forKey: "gateway.manual.port")
defaults.set(self.manualUseTLS, forKey: "gateway.manual.tls")
if let instanceId = defaults.string(forKey: "node.instanceId")?.trimmingCharacters(in: .whitespacesAndNewlines),
!instanceId.isEmpty
{
let instanceId = GatewaySettingsStore.currentInstanceID()
if !instanceId.isEmpty {
let trimmedToken = self.manualToken.trimmingCharacters(in: .whitespacesAndNewlines)
let trimmedPassword = self.manualPassword.trimmingCharacters(in: .whitespacesAndNewlines)
if !trimmedToken.isEmpty {
@@ -220,10 +220,21 @@ private struct ManualEntryStep: View {
self.connectingGatewayID = "manual"
defer { self.connectingGatewayID = nil }
let authOverride = self.pendingManualAuthOverride.map { pending in
GatewayConnectionController.ManualAuthOverride.explicit(
token: self.manualToken,
bootstrapToken: pending.bootstrapToken,
password: self.manualPassword)
} ?? GatewayConnectionController.ManualAuthOverride.normalized(
token: self.manualToken,
bootstrapToken: nil,
password: self.manualPassword)
self.pendingManualAuthOverride = nil
await self.gatewayController.connectManual(
host: host,
port: self.manualPortValue() ?? 0,
useTLS: self.manualUseTLS)
useTLS: self.manualUseTLS,
authOverride: authOverride)
}
private func manualPortValue() -> Int? {
@@ -258,24 +269,38 @@ private struct ManualEntryStep: View {
self.manualPortText = String(link.port)
self.manualUseTLS = link.tls
if let token = link.token, !token.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
self.manualToken = token.trimmingCharacters(in: .whitespacesAndNewlines)
let trimmedToken = link.token?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
let trimmedPassword = link.password?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
let trimmedBootstrapToken =
link.bootstrapToken?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
if !trimmedToken.isEmpty {
self.manualToken = trimmedToken
} else if link.bootstrapToken?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false {
self.manualToken = ""
}
if let password = link.password, !password.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
self.manualPassword = password.trimmingCharacters(in: .whitespacesAndNewlines)
if !trimmedPassword.isEmpty {
self.manualPassword = trimmedPassword
} else if link.bootstrapToken?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false {
self.manualPassword = ""
}
let trimmedInstanceId = UserDefaults.standard.string(forKey: "node.instanceId")?
.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
let trimmedInstanceId = GatewaySettingsStore.currentInstanceID()
if !trimmedInstanceId.isEmpty {
let trimmedBootstrapToken =
link.bootstrapToken?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
if !trimmedBootstrapToken.isEmpty {
GatewayOnboardingReset.prepareForBootstrapPairing(
appModel: self.appModel,
instanceId: trimmedInstanceId)
}
GatewaySettingsStore.saveGatewayBootstrapToken(trimmedBootstrapToken, instanceId: trimmedInstanceId)
}
if !trimmedBootstrapToken.isEmpty || !trimmedToken.isEmpty || !trimmedPassword.isEmpty {
self.pendingManualAuthOverride = GatewayConnectionController.ManualAuthOverride.normalized(
token: trimmedToken,
bootstrapToken: trimmedBootstrapToken,
password: trimmedPassword)
} else {
self.pendingManualAuthOverride = nil
}
self.setupStatusText = "Setup code applied."
}

View File

@@ -71,6 +71,7 @@ struct OnboardingWizardView: View {
@State private var selectedPhoto: PhotosPickerItem?
@State private var showGatewayProblemDetails: Bool = false
@State private var lastPairingAutoResumeAttemptAt: Date?
@State private var pendingManualAuthOverride: GatewayConnectionController.ManualAuthOverride?
private static let pairingAutoResumeTicker = Timer.publish(every: 2.0, on: .main, in: .common).autoconnect()
let allowSkip: Bool
@@ -745,17 +746,32 @@ struct OnboardingWizardView: View {
self.manualPort = link.port
self.manualTLS = link.tls
let trimmedBootstrapToken = link.bootstrapToken?.trimmingCharacters(in: .whitespacesAndNewlines)
if trimmedBootstrapToken?.isEmpty == false {
GatewayOnboardingReset.prepareForBootstrapPairing(
appModel: self.appModel,
instanceId: GatewaySettingsStore.currentInstanceID())
}
self.saveGatewayBootstrapToken(trimmedBootstrapToken)
if let token = link.token?.trimmingCharacters(in: .whitespacesAndNewlines), !token.isEmpty {
self.gatewayToken = token
let trimmedToken = link.token?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
let trimmedPassword = link.password?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
if !trimmedToken.isEmpty {
self.gatewayToken = trimmedToken
} else if trimmedBootstrapToken?.isEmpty == false {
self.gatewayToken = ""
}
if let password = link.password?.trimmingCharacters(in: .whitespacesAndNewlines), !password.isEmpty {
self.gatewayPassword = password
if !trimmedPassword.isEmpty {
self.gatewayPassword = trimmedPassword
} else if trimmedBootstrapToken?.isEmpty == false {
self.gatewayPassword = ""
}
if trimmedBootstrapToken?.isEmpty == false || !trimmedToken.isEmpty || !trimmedPassword.isEmpty {
self.pendingManualAuthOverride = GatewayConnectionController.ManualAuthOverride.normalized(
token: trimmedToken,
bootstrapToken: trimmedBootstrapToken,
password: trimmedPassword)
} else {
self.pendingManualAuthOverride = nil
}
self.saveGatewayCredentials(token: self.gatewayToken, password: self.gatewayPassword)
self.showQRScanner = false
self.connectMessage = "Connecting via QR code…"
@@ -937,7 +953,7 @@ struct OnboardingWizardView: View {
}
private func saveGatewayCredentials(token: String, password: String) {
let trimmedInstanceId = self.instanceId.trimmingCharacters(in: .whitespacesAndNewlines)
let trimmedInstanceId = GatewaySettingsStore.currentInstanceID()
guard !trimmedInstanceId.isEmpty else { return }
let trimmedToken = token.trimmingCharacters(in: .whitespacesAndNewlines)
GatewaySettingsStore.saveGatewayToken(trimmedToken, instanceId: trimmedInstanceId)
@@ -946,7 +962,7 @@ struct OnboardingWizardView: View {
}
private func saveGatewayBootstrapToken(_ token: String?) {
let trimmedInstanceId = self.instanceId.trimmingCharacters(in: .whitespacesAndNewlines)
let trimmedInstanceId = GatewaySettingsStore.currentInstanceID()
guard !trimmedInstanceId.isEmpty else { return }
let trimmedToken = token?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
GatewaySettingsStore.saveGatewayBootstrapToken(trimmedToken, instanceId: trimmedInstanceId)
@@ -1001,7 +1017,21 @@ struct OnboardingWizardView: View {
self.connectMessage = "Connecting to \(host)"
self.statusLine = "Connecting to \(host):\(self.manualPort)"
defer { self.connectingGatewayID = nil }
await self.gatewayController.connectManual(host: host, port: self.manualPort, useTLS: self.manualTLS)
let authOverride = self.pendingManualAuthOverride.map { pending in
GatewayConnectionController.ManualAuthOverride.explicit(
token: self.gatewayToken,
bootstrapToken: pending.bootstrapToken,
password: self.gatewayPassword)
} ?? GatewayConnectionController.ManualAuthOverride.normalized(
token: self.gatewayToken,
bootstrapToken: nil,
password: self.gatewayPassword)
self.pendingManualAuthOverride = nil
await self.gatewayController.connectManual(
host: host,
port: self.manualPort,
useTLS: self.manualTLS,
authOverride: authOverride)
}
private func retryLastAttempt(silent: Bool = false) async {

View File

@@ -21,6 +21,9 @@ struct SettingsTab: View {
@AppStorage("node.instanceId") private var instanceId: String = UUID().uuidString
@AppStorage("voiceWake.enabled") private var voiceWakeEnabled: Bool = false
@AppStorage("talk.enabled") private var talkEnabled: Bool = false
@AppStorage(TalkModeProviderSelection.storageKey) private var talkProviderSelectionRaw: String =
TalkModeProviderSelection.gatewayDefault.rawValue
@AppStorage(TalkModeRealtimeVoiceSelection.storageKey) private var talkRealtimeVoiceSelectionRaw: String = ""
@AppStorage(TalkSpeechLocale.storageKey) private var talkSpeechLocale: String = TalkSpeechLocale.automaticID
@AppStorage("talk.button.enabled") private var talkButtonEnabled: Bool = true
@AppStorage("talk.background.enabled") private var talkBackgroundEnabled: Bool = false
@@ -54,6 +57,7 @@ struct SettingsTab: View {
@State private var manualGatewayPortText: String = ""
@State private var gatewayExpanded: Bool = true
@State private var selectedAgentPickerId: String = ""
@State private var pendingManualAuthOverride: GatewayConnectionController.ManualAuthOverride?
@State private var showResetOnboardingAlert: Bool = false
@State private var showGatewayProblemDetails: Bool = false
@@ -344,64 +348,7 @@ struct SettingsTab: View {
help: "Keeps the screen awake while OpenClaw is open.")
DisclosureGroup("Advanced") {
VStack(alignment: .leading, spacing: 8) {
Text("Talk Voice (Gateway)")
.font(.footnote.weight(.semibold))
.foregroundStyle(.secondary)
LabeledContent("Provider", value: "ElevenLabs")
LabeledContent(
"API Key",
value: self.appModel.talkMode.gatewayTalkConfigLoaded
? (
self.appModel.talkMode.gatewayTalkApiKeyConfigured
? "Configured"
: "Not configured")
: "Not loaded")
LabeledContent(
"Default Model",
value: self.appModel.talkMode.gatewayTalkDefaultModelId ?? "eleven_v3 (fallback)")
LabeledContent(
"Default Voice",
value: self.appModel.talkMode.gatewayTalkDefaultVoiceId ?? "auto (first available)")
Text("Configured on gateway via talk.apiKey, talk.modelId, and talk.voiceId.")
.font(.footnote)
.foregroundStyle(.secondary)
}
self.featureToggle(
"Show Talk Control",
isOn: self.$talkButtonEnabled,
help: "Shows the Talk control in the main toolbar.")
TextField("Default Share Instruction", text: self.$defaultShareInstruction, axis: .vertical)
.lineLimit(2...6)
.textInputAutocapitalization(.sentences)
HStack(spacing: 8) {
Text("Default Share Instruction")
.font(.footnote)
.foregroundStyle(.secondary)
Spacer()
Button {
self.activeFeatureHelp = FeatureHelp(
title: "Default Share Instruction",
message: "Appends this instruction when sharing content "
+ "into OpenClaw from iOS.")
} label: {
Image(systemName: "info.circle")
.foregroundStyle(.secondary)
}
.buttonStyle(.plain)
.accessibilityLabel("Default Share Instruction info")
}
VStack(alignment: .leading, spacing: 8) {
Button {
Task { await self.appModel.runSharePipelineSelfTest() }
} label: {
Label("Run Share Self-Test", systemImage: "checkmark.seal")
}
Text(self.appModel.lastShareEventText)
.font(.footnote)
.foregroundStyle(.secondary)
}
self.advancedAppSettingsView()
}
}
@@ -662,6 +609,120 @@ struct SettingsTab: View {
return trimmed.isEmpty ? "Not connected" : trimmed
}
private var shouldShowRealtimeVoicePicker: Bool {
let providerSelection = TalkModeProviderSelection.resolved(self.talkProviderSelectionRaw)
return providerSelection == .openAIRealtime
|| self.appModel.talkMode.gatewayTalkUsesRealtimeRelay
}
private func talkVoiceSettingsView() -> AnyView {
AnyView(VStack(alignment: .leading, spacing: 8) {
Text("Talk Voice (Gateway)")
.font(.footnote.weight(.semibold))
.foregroundStyle(.secondary)
Picker("Provider", selection: self.talkProviderSelectionBinding) {
ForEach(TalkModeProviderSelection.allCases) { option in
Text(option.label).tag(option.rawValue)
}
}
if self.shouldShowRealtimeVoicePicker {
Picker("Realtime Voice", selection: self.talkRealtimeVoiceSelectionBinding) {
Text("Gateway Default").tag("")
ForEach(TalkModeRealtimeVoiceSelection.voices, id: \.self) { voice in
Text(TalkModeRealtimeVoiceSelection.label(for: voice)).tag(voice)
}
}
}
LabeledContent(
"Active Provider",
value: self.appModel.talkMode.gatewayTalkProviderLabel)
LabeledContent(
"Transport",
value: self.appModel.talkMode.gatewayTalkTransportLabel)
LabeledContent(
"API Key",
value: self.appModel.talkMode.gatewayTalkConfigLoaded
? (
self.appModel.talkMode.gatewayTalkApiKeyConfigured
? "Configured"
: "Not configured")
: "Not loaded")
LabeledContent(
"Default Model",
value: self.appModel.talkMode.gatewayTalkDefaultModelId ?? "eleven_v3 (fallback)")
LabeledContent(
"Default Voice",
value: self.appModel.talkMode.gatewayTalkDefaultVoiceId ?? "auto (first available)")
if let realtimeProvider = self.appModel.talkMode.gatewayTalkRealtimeProviderLabel {
LabeledContent("Realtime Provider", value: realtimeProvider)
}
Text("Realtime uses gateway auth via OpenAI API key or OAuth.")
.font(.footnote)
.foregroundStyle(.secondary)
})
}
private var talkProviderSelectionBinding: Binding<String> {
Binding(
get: { self.talkProviderSelectionRaw },
set: { newValue in
let selection = TalkModeProviderSelection.resolved(newValue)
self.talkProviderSelectionRaw = selection.rawValue
self.appModel.setTalkProviderSelection(selection.rawValue)
})
}
private var talkRealtimeVoiceSelectionBinding: Binding<String> {
Binding(
get: { self.talkRealtimeVoiceSelectionRaw },
set: { newValue in
let voice = TalkModeRealtimeVoiceSelection.resolvedOverride(newValue) ?? ""
self.talkRealtimeVoiceSelectionRaw = voice
self.appModel.setTalkRealtimeVoiceSelection(voice)
})
}
private func advancedAppSettingsView() -> AnyView {
AnyView(Group {
self.talkVoiceSettingsView()
self.featureToggle(
"Show Talk Control",
isOn: self.$talkButtonEnabled,
help: "Shows the Talk control in the main toolbar.")
TextField("Default Share Instruction", text: self.$defaultShareInstruction, axis: .vertical)
.lineLimit(2...6)
.textInputAutocapitalization(.sentences)
HStack(spacing: 8) {
Text("Default Share Instruction")
.font(.footnote)
.foregroundStyle(.secondary)
Spacer()
Button {
self.activeFeatureHelp = FeatureHelp(
title: "Default Share Instruction",
message: "Appends this instruction when sharing content "
+ "into OpenClaw from iOS.")
} label: {
Image(systemName: "info.circle")
.foregroundStyle(.secondary)
}
.buttonStyle(.plain)
.accessibilityLabel("Default Share Instruction info")
}
VStack(alignment: .leading, spacing: 8) {
Button {
Task { await self.appModel.runSharePipelineSelfTest() }
} label: {
Label("Run Share Self-Test", systemImage: "checkmark.seal")
}
Text(self.appModel.lastShareEventText)
.font(.footnote)
.foregroundStyle(.secondary)
}
})
}
private func featureToggle(
_ title: String,
isOn: Binding<Bool>,
@@ -819,14 +880,20 @@ struct SettingsTab: View {
self.manualGatewayPortText = String(link.port)
self.manualGatewayTLS = link.tls
let trimmedInstanceId = self.instanceId.trimmingCharacters(in: .whitespacesAndNewlines)
let trimmedInstanceId = GatewaySettingsStore.currentInstanceID()
let trimmedBootstrapToken =
link.bootstrapToken?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
if !trimmedBootstrapToken.isEmpty {
GatewayOnboardingReset.prepareForBootstrapPairing(
appModel: self.appModel,
instanceId: trimmedInstanceId)
}
if !trimmedInstanceId.isEmpty {
GatewaySettingsStore.saveGatewayBootstrapToken(trimmedBootstrapToken, instanceId: trimmedInstanceId)
}
if let token = link.token, !token.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
let trimmedToken = token.trimmingCharacters(in: .whitespacesAndNewlines)
let trimmedToken = link.token?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
let trimmedPassword = link.password?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
if !trimmedToken.isEmpty {
self.gatewayToken = trimmedToken
if !trimmedInstanceId.isEmpty {
GatewaySettingsStore.saveGatewayToken(trimmedToken, instanceId: trimmedInstanceId)
@@ -837,8 +904,7 @@ struct SettingsTab: View {
GatewaySettingsStore.saveGatewayToken("", instanceId: trimmedInstanceId)
}
}
if let password = link.password, !password.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
let trimmedPassword = password.trimmingCharacters(in: .whitespacesAndNewlines)
if !trimmedPassword.isEmpty {
self.gatewayPassword = trimmedPassword
if !trimmedInstanceId.isEmpty {
GatewaySettingsStore.saveGatewayPassword(trimmedPassword, instanceId: trimmedInstanceId)
@@ -849,6 +915,14 @@ struct SettingsTab: View {
GatewaySettingsStore.saveGatewayPassword("", instanceId: trimmedInstanceId)
}
}
if !trimmedBootstrapToken.isEmpty || !trimmedToken.isEmpty || !trimmedPassword.isEmpty {
self.pendingManualAuthOverride = GatewayConnectionController.ManualAuthOverride.normalized(
token: trimmedToken,
bootstrapToken: trimmedBootstrapToken,
password: trimmedPassword)
} else {
self.pendingManualAuthOverride = nil
}
}
private func openGatewayQRScanner() {
@@ -940,10 +1014,21 @@ struct SettingsTab: View {
GatewayDiagnostics.log(
"connect manual host=\(host) port=\(self.manualGatewayPort) tls=\(self.manualGatewayTLS)")
let authOverride = self.pendingManualAuthOverride.map { pending in
GatewayConnectionController.ManualAuthOverride.explicit(
token: self.gatewayToken,
bootstrapToken: pending.bootstrapToken,
password: self.gatewayPassword)
} ?? GatewayConnectionController.ManualAuthOverride.normalized(
token: self.gatewayToken,
bootstrapToken: nil,
password: self.gatewayPassword)
self.pendingManualAuthOverride = nil
await self.gatewayController.connectManual(
host: host,
port: self.manualGatewayPort,
useTLS: self.manualGatewayTLS)
useTLS: self.manualGatewayTLS,
authOverride: authOverride)
}
private var setupStatusLine: String? {

View File

@@ -0,0 +1,551 @@
import AVFAudio
import Foundation
import OpenClawChatUI
import OpenClawKit
import OpenClawProtocol
import OSLog
private func makeRealtimeAudioTapBlock(
inputSampleRate: Double,
targetSampleRate: Double,
onAudio: @escaping (Data, Double) -> Void) -> AVAudioNodeTapBlock
{
{ buffer, _ in
// This callback runs on Core Audio's realtime queue, not MainActor.
let encoded = RealtimeTalkRelaySession.encodePCM16(
buffer: buffer,
inputSampleRate: inputSampleRate,
targetSampleRate: targetSampleRate)
guard !encoded.isEmpty else { return }
let timestampMs = ProcessInfo.processInfo.systemUptime * 1000
onAudio(encoded, timestampMs)
}
}
private actor RealtimeAudioSender {
private let gateway: GatewayNodeSession
private var relaySessionId: String?
private var pendingSends = 0
private let maxPendingSends = 4
init(gateway: GatewayNodeSession, relaySessionId: String) {
self.gateway = gateway
self.relaySessionId = relaySessionId
}
func close() {
self.relaySessionId = nil
}
func send(_ data: Data, timestampMs: Double) async -> String? {
guard let relaySessionId else { return nil }
guard self.pendingSends < self.maxPendingSends else { return nil }
self.pendingSends += 1
defer { self.pendingSends -= 1 }
let payload: [String: Any] = [
"sessionId": relaySessionId,
"audioBase64": data.base64EncodedString(),
"timestamp": timestampMs,
]
do {
_ = try await Self.requestJSON(
gateway: self.gateway,
method: "talk.session.appendAudio",
payload: payload,
decodeAs: TalkSessionOkResult.self,
timeoutSeconds: 8)
return nil
} catch {
return error.localizedDescription
}
}
private static func requestJSON<T: Decodable>(
gateway: GatewayNodeSession,
method: String,
payload: [String: Any],
decodeAs type: T.Type,
timeoutSeconds: Int) async throws -> T
{
let data = try JSONSerialization.data(withJSONObject: payload)
guard let json = String(data: data, encoding: .utf8) else {
throw NSError(domain: "RealtimeTalkRelay", code: 4, userInfo: [
NSLocalizedDescriptionKey: "Failed to encode \(method) payload",
])
}
let response = try await gateway.request(
method: method,
paramsJSON: json,
timeoutSeconds: timeoutSeconds)
return try JSONDecoder().decode(type, from: response)
}
}
@MainActor
final class RealtimeTalkRelaySession {
struct Options {
let sessionKey: String
let provider: String?
let model: String?
let voice: String?
}
private struct ToolCallStartResponse: Decodable {
let runId: String?
let idempotencyKey: String?
}
private struct ChatCompletionResult {
let text: String?
let failed: Bool
}
private nonisolated static let expectedInputEncoding = "pcm16"
private nonisolated static let expectedOutputEncoding = "pcm16"
private nonisolated static let defaultSampleRateHz = 24000
private nonisolated static let audioFrameBufferSize: AVAudioFrameCount = 2048
private let gateway: GatewayNodeSession
private let options: Options
private let pcmPlayer: PCMStreamingAudioPlaying
private let logger = Logger(subsystem: "ai.openclaw", category: "RealtimeTalkRelay")
private let onStatus: (String) -> Void
private let onSpeakingChanged: (Bool) -> Void
private let audioEngine = AVAudioEngine()
private var relaySessionId: String?
private var inputSampleRateHz = Double(RealtimeTalkRelaySession.defaultSampleRateHz)
private var outputSampleRateHz = Double(RealtimeTalkRelaySession.defaultSampleRateHz)
private var eventTask: Task<Void, Never>?
private var outputTask: Task<Void, Never>?
private var outputContinuation: AsyncThrowingStream<Data, Error>.Continuation?
private var audioSender: RealtimeAudioSender?
private var isClosed = false
private var isOutputPlaying = false
init(
gateway: GatewayNodeSession,
options: Options,
pcmPlayer: PCMStreamingAudioPlaying,
onStatus: @escaping (String) -> Void,
onSpeakingChanged: @escaping (Bool) -> Void)
{
self.gateway = gateway
self.options = options
self.pcmPlayer = pcmPlayer
self.onStatus = onStatus
self.onSpeakingChanged = onSpeakingChanged
}
func start() async throws {
self.isClosed = false
self.onStatus("Connecting realtime…")
let result = try await self.createRelaySession()
guard let relaySessionId = result.relaysessionid?.trimmingCharacters(in: .whitespacesAndNewlines),
!relaySessionId.isEmpty
else {
throw NSError(domain: "RealtimeTalkRelay", code: 1, userInfo: [
NSLocalizedDescriptionKey: "Gateway did not return a realtime relay session",
])
}
self.relaySessionId = relaySessionId
do {
self.audioSender = RealtimeAudioSender(gateway: self.gateway, relaySessionId: relaySessionId)
let eventStream = await self.gateway.subscribeServerEvents(bufferingNewest: 200)
self.startEventPump(stream: eventStream)
self.configureAudioContract(result.audio)
self.startOutputPlayback()
try self.startMicrophonePump()
self.onStatus("Listening (Realtime)")
} catch {
let createdRelaySessionId = self.relaySessionId
self.close(sendClose: false)
if let createdRelaySessionId {
await Self.closeRelaySession(gateway: self.gateway, relaySessionId: createdRelaySessionId)
}
throw error
}
}
func stop() {
self.close(sendClose: true)
}
private func close(sendClose: Bool) {
guard !self.isClosed else { return }
self.isClosed = true
self.stopMicrophonePump()
self.eventTask?.cancel()
self.eventTask = nil
let audioSender = self.audioSender
self.audioSender = nil
Task { await audioSender?.close() }
self.stopOutputPlayback()
if sendClose, let relaySessionId = self.relaySessionId {
Task { [gateway] in
await Self.closeRelaySession(gateway: gateway, relaySessionId: relaySessionId)
}
}
self.relaySessionId = nil
self.onSpeakingChanged(false)
}
private nonisolated static func closeRelaySession(
gateway: GatewayNodeSession,
relaySessionId: String) async
{
let payload = ["sessionId": relaySessionId]
let data = try? JSONSerialization.data(withJSONObject: payload)
let json = data.flatMap { String(data: $0, encoding: .utf8) }
_ = try? await gateway.request(
method: "talk.session.close",
paramsJSON: json,
timeoutSeconds: 8)
}
func cancelOutput(reason: String = "user") {
self.stopOutputPlayback()
self.startOutputPlayback()
guard let relaySessionId else { return }
Task { [gateway] in
let payload: [String: Any] = [
"sessionId": relaySessionId,
"reason": reason,
]
let data = try? JSONSerialization.data(withJSONObject: payload)
let json = data.flatMap { String(data: $0, encoding: .utf8) }
_ = try? await gateway.request(
method: "talk.session.cancelOutput",
paramsJSON: json,
timeoutSeconds: 8)
}
}
private func createRelaySession() async throws -> TalkSessionCreateResult {
var payload: [String: Any] = [
"sessionKey": self.options.sessionKey,
"mode": "realtime",
"transport": "gateway-relay",
"brain": "agent-consult",
]
if let provider = self.nonEmpty(self.options.provider) {
payload["provider"] = provider
}
if let model = self.nonEmpty(self.options.model) {
payload["model"] = model
}
if let voice = self.nonEmpty(self.options.voice) {
payload["voice"] = voice
}
let data = try JSONSerialization.data(withJSONObject: payload)
guard let json = String(data: data, encoding: .utf8) else {
throw NSError(domain: "RealtimeTalkRelay", code: 2, userInfo: [
NSLocalizedDescriptionKey: "Failed to encode realtime relay request",
])
}
let response = try await self.gateway.request(
method: "talk.session.create",
paramsJSON: json,
timeoutSeconds: 20)
return try JSONDecoder().decode(TalkSessionCreateResult.self, from: response)
}
private func configureAudioContract(_ raw: AnyCodable?) {
guard let audio = raw?.dictionaryValue else { return }
let inputEncoding = audio["inputEncoding"]?.stringValue ?? Self.expectedInputEncoding
let outputEncoding = audio["outputEncoding"]?.stringValue ?? Self.expectedOutputEncoding
if inputEncoding != Self.expectedInputEncoding || outputEncoding != Self.expectedOutputEncoding {
let message = "unexpected realtime relay audio contract input=\(inputEncoding) output=\(outputEncoding)"
self.logger.warning("\(message, privacy: .public)")
}
self.inputSampleRateHz = audio["inputSampleRateHz"]?.doubleValue
?? Double(Self.defaultSampleRateHz)
self.outputSampleRateHz = audio["outputSampleRateHz"]?.doubleValue
?? Double(Self.defaultSampleRateHz)
}
private func startEventPump(stream: AsyncStream<EventFrame>) {
self.eventTask?.cancel()
self.eventTask = Task { [weak self] in
for await event in stream {
if Task.isCancelled { return }
await self?.handleGatewayEvent(event)
}
}
}
private func handleGatewayEvent(_ event: EventFrame) async {
guard event.event == "talk.event",
let payload = event.payload?.dictionaryValue
else { return }
if let relaySessionId,
payload["relaySessionId"]?.stringValue != relaySessionId
{
return
}
guard let type = payload["type"]?.stringValue else { return }
switch type {
case "ready":
self.onStatus("Listening (Realtime)")
case "audio":
guard let base64 = payload["audioBase64"]?.stringValue,
let data = Data(base64Encoded: base64)
else { return }
self.isOutputPlaying = true
self.onSpeakingChanged(true)
self.outputContinuation?.yield(data)
case "clear":
self.stopOutputPlayback()
self.startOutputPlayback()
case "transcript":
self.handleTranscriptEvent(payload)
case "toolCall":
await self.handleToolCall(payload)
case "error":
let message = payload["message"]?.stringValue ?? "Realtime failed"
GatewayDiagnostics.log("talk realtime: error=\(Self.safeLogMessage(message))")
self.onStatus(message)
case "close":
self.onStatus("Ready")
self.close(sendClose: false)
default:
return
}
}
private func handleTranscriptEvent(_ payload: [String: AnyCodable]) {
guard payload["final"]?.boolValue == true else { return }
let role = payload["role"]?.stringValue ?? ""
if role == "user" {
self.onStatus("Thinking…")
} else if role == "assistant" {
self.onStatus("Listening (Realtime)")
}
}
private func handleToolCall(_ payload: [String: AnyCodable]) async {
guard let relaySessionId,
let callId = payload["callId"]?.stringValue,
let name = payload["name"]?.stringValue
else { return }
self.onStatus("Thinking…")
do {
let completionStream = await self.gateway.subscribeServerEvents(bufferingNewest: 200)
let args = payload["args"]?.foundationValue ?? [:]
let startPayload: [String: Any] = [
"sessionKey": self.options.sessionKey,
"callId": callId,
"name": name,
"args": args,
"relaySessionId": relaySessionId,
]
let startResponse = try await self.requestJSON(
method: "talk.client.toolCall",
payload: startPayload,
decodeAs: ToolCallStartResponse.self,
timeoutSeconds: 30)
guard let runId = startResponse.runId ?? startResponse.idempotencyKey else {
throw NSError(domain: "RealtimeTalkRelay", code: 3, userInfo: [
NSLocalizedDescriptionKey: "Realtime tool call did not return a run id",
])
}
let completion = await self.waitForChatCompletion(
runId: runId,
stream: completionStream,
timeoutSeconds: 120)
let result: [String: Any] = completion.failed
? ["error": "OpenClaw tool call failed"]
: ["text": completion.text ?? "OpenClaw finished with no text."]
try await self.submitToolResult(callId: callId, result: result)
self.onStatus("Listening (Realtime)")
} catch {
try? await self.submitToolResult(callId: callId, result: [
"error": error.localizedDescription,
])
self.onStatus("Listening (Realtime)")
}
}
private func submitToolResult(callId: String, result: [String: Any]) async throws {
guard let relaySessionId else { return }
let payload: [String: Any] = [
"sessionId": relaySessionId,
"callId": callId,
"result": result,
]
_ = try await self.requestJSON(
method: "talk.session.submitToolResult",
payload: payload,
decodeAs: TalkSessionOkResult.self,
timeoutSeconds: 30)
}
private func waitForChatCompletion(
runId: String,
stream: AsyncStream<EventFrame>,
timeoutSeconds: Int) async -> ChatCompletionResult
{
await withTaskGroup(of: ChatCompletionResult.self) { group in
group.addTask {
for await event in stream {
if Task.isCancelled {
return ChatCompletionResult(text: nil, failed: true)
}
guard event.event == "chat",
let payload = event.payload,
let chatEvent = try? GatewayPayloadDecoding.decode(
payload,
as: OpenClawChatEventPayload.self),
chatEvent.runId == runId
else { continue }
if chatEvent.state == "final" {
return ChatCompletionResult(
text: OpenClawChatEventText.assistantText(from: chatEvent),
failed: false)
}
if chatEvent.state == "aborted" || chatEvent.state == "error" {
return ChatCompletionResult(text: nil, failed: true)
}
}
return ChatCompletionResult(text: nil, failed: true)
}
group.addTask {
try? await Task.sleep(nanoseconds: UInt64(timeoutSeconds) * 1_000_000_000)
return ChatCompletionResult(text: nil, failed: true)
}
let result = await group.next() ?? ChatCompletionResult(text: nil, failed: true)
group.cancelAll()
return result
}
}
private func requestJSON<T: Decodable>(
method: String,
payload: [String: Any],
decodeAs type: T.Type,
timeoutSeconds: Int) async throws -> T
{
let data = try JSONSerialization.data(withJSONObject: payload)
guard let json = String(data: data, encoding: .utf8) else {
throw NSError(domain: "RealtimeTalkRelay", code: 4, userInfo: [
NSLocalizedDescriptionKey: "Failed to encode \(method) payload",
])
}
let response = try await self.gateway.request(
method: method,
paramsJSON: json,
timeoutSeconds: timeoutSeconds)
return try JSONDecoder().decode(type, from: response)
}
private func startMicrophonePump() throws {
self.stopMicrophonePump()
let input = self.audioEngine.inputNode
let format = input.inputFormat(forBus: 0)
let targetSampleRate = self.inputSampleRateHz
guard format.sampleRate > 0, format.channelCount > 0 else {
throw NSError(domain: "RealtimeTalkRelay", code: 5, userInfo: [
NSLocalizedDescriptionKey: "Invalid realtime audio input format",
])
}
let tapBlock = makeRealtimeAudioTapBlock(
inputSampleRate: format.sampleRate,
targetSampleRate: targetSampleRate)
{ [weak self, audioSender = self.audioSender] encoded, timestampMs in
guard let audioSender else { return }
Task {
guard let message = await audioSender.send(encoded, timestampMs: timestampMs) else { return }
await MainActor.run { [weak self] in
guard let self, !self.isClosed else { return }
self.onStatus("Realtime audio failed: \(message)")
}
}
}
input.installTap(
onBus: 0,
bufferSize: Self.audioFrameBufferSize,
format: format,
block: tapBlock)
self.audioEngine.prepare()
try self.audioEngine.start()
}
private func stopMicrophonePump() {
self.audioEngine.inputNode.removeTap(onBus: 0)
self.audioEngine.stop()
}
private func startOutputPlayback() {
self.stopOutputPlayback()
let stream = AsyncThrowingStream<Data, Error> { continuation in
self.outputContinuation = continuation
}
self.outputTask = Task { [weak self] in
guard let self else { return }
let result = await self.pcmPlayer.play(stream: stream, sampleRate: self.outputSampleRateHz)
await MainActor.run {
if !result.finished, let interruptedAt = result.interruptedAt {
self.logger.info("realtime output interrupted at \(interruptedAt, privacy: .public)s")
}
self.isOutputPlaying = false
self.onSpeakingChanged(false)
}
}
}
private func stopOutputPlayback() {
self.outputContinuation?.finish()
self.outputContinuation = nil
self.outputTask?.cancel()
self.outputTask = nil
_ = self.pcmPlayer.stop()
self.isOutputPlaying = false
self.onSpeakingChanged(false)
}
fileprivate nonisolated static func encodePCM16(
buffer: AVAudioPCMBuffer,
inputSampleRate: Double,
targetSampleRate: Double) -> Data
{
guard let channelData = buffer.floatChannelData,
buffer.frameLength > 0,
inputSampleRate > 0,
targetSampleRate > 0
else { return Data() }
let frameCount = Int(buffer.frameLength)
let channelCount = max(1, Int(buffer.format.channelCount))
let outputCount = max(1, Int((Double(frameCount) * targetSampleRate / inputSampleRate).rounded(.down)))
var data = Data(capacity: outputCount * MemoryLayout<Int16>.size)
for index in 0..<outputCount {
let sourcePosition = Double(index) * inputSampleRate / targetSampleRate
let lower = min(frameCount - 1, Int(sourcePosition.rounded(.down)))
let upper = min(frameCount - 1, lower + 1)
let fraction = Float(sourcePosition - Double(lower))
var mixed: Float = 0
for channel in 0..<channelCount {
let samples = channelData[channel]
mixed += samples[lower] + ((samples[upper] - samples[lower]) * fraction)
}
let sample = max(-1, min(1, mixed / Float(channelCount)))
var intSample = Int16((sample * Float(Int16.max)).rounded()).littleEndian
withUnsafeBytes(of: &intSample) { data.append(contentsOf: $0) }
}
return data
}
private nonisolated static func safeLogMessage(_ value: String) -> String {
let singleLine = value
.replacingOccurrences(of: "\n", with: " ")
.replacingOccurrences(of: "\r", with: " ")
if singleLine.count <= 180 {
return singleLine
}
return String(singleLine.prefix(180)) + "..."
}
private func nonEmpty(_ value: String?) -> String? {
let trimmed = value?.trimmingCharacters(in: .whitespacesAndNewlines)
return trimmed?.isEmpty == false ? trimmed : nil
}
}

View File

@@ -1,14 +1,77 @@
import Foundation
import OpenClawKit
enum TalkModeExecutionMode {
case native
case realtimeRelay
}
enum TalkModeProviderSelection: String, CaseIterable, Identifiable {
case gatewayDefault = "gateway"
case nativeElevenLabs = "elevenlabs"
case openAIRealtime = "openai-realtime"
static let storageKey = "talk.providerSelection"
var id: String {
self.rawValue
}
var label: String {
switch self {
case .gatewayDefault:
"Gateway Default"
case .nativeElevenLabs:
"ElevenLabs"
case .openAIRealtime:
"Realtime-2 (OpenAI)"
}
}
static func resolved(_ raw: String?) -> TalkModeProviderSelection {
let trimmed = (raw ?? "").trimmingCharacters(in: .whitespacesAndNewlines)
return TalkModeProviderSelection(rawValue: trimmed) ?? .gatewayDefault
}
}
enum TalkModeRealtimeVoiceSelection {
static let storageKey = "talk.realtime.voiceSelection"
static let voices = [
"alloy",
"ash",
"ballad",
"coral",
"echo",
"sage",
"shimmer",
"verse",
"marin",
"cedar",
]
static func resolvedOverride(_ raw: String?) -> String? {
let trimmed = (raw ?? "").trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
guard !trimmed.isEmpty else { return nil }
return Self.voices.contains(trimmed) ? trimmed : nil
}
static func label(for voice: String) -> String {
voice.prefix(1).uppercased() + String(voice.dropFirst())
}
}
struct TalkModeGatewayConfigState {
let activeProvider: String
let normalizedPayload: Bool
let missingResolvedPayload: Bool
let executionMode: TalkModeExecutionMode
let defaultVoiceId: String?
let voiceAliases: [String: String]
let defaultModelId: String
let defaultOutputFormat: String?
let realtimeProvider: String?
let realtimeModelId: String?
let realtimeVoiceId: String?
let rawConfigApiKey: String?
let interruptOnSpeech: Bool?
let silenceTimeoutMs: Int
@@ -20,6 +83,7 @@ enum TalkModeGatewayConfigParser {
config: [String: Any],
defaultProvider: String,
defaultModelIdFallback: String,
defaultRealtimeModelIdFallback: String,
defaultSilenceTimeoutMs: Int) -> TalkModeGatewayConfigState
{
let talk = TalkConfigParsing.bridgeFoundationDictionary(config["talk"] as? [String: Any])
@@ -29,8 +93,6 @@ enum TalkModeGatewayConfigParser {
allowLegacyFallback: false)
let activeProvider = selection?.provider ?? defaultProvider
let activeConfig = selection?.config
let defaultVoiceId = activeConfig?["voiceId"]?.stringValue?
.trimmingCharacters(in: .whitespacesAndNewlines)
let voiceAliases: [String: String]
if let aliases = activeConfig?["voiceAliases"]?.dictionaryValue {
var resolved: [String: String] = [:]
@@ -45,10 +107,22 @@ enum TalkModeGatewayConfigParser {
} else {
voiceAliases = [:]
}
let model = activeConfig?["modelId"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines)
let model = Self.firstString(activeConfig, keys: ["modelId", "model"])
let defaultModelId = (model?.isEmpty == false) ? model! : defaultModelIdFallback
let defaultOutputFormat = activeConfig?["outputFormat"]?.stringValue?
.trimmingCharacters(in: .whitespacesAndNewlines)
let defaultVoiceId = Self.firstString(activeConfig, keys: ["voiceId", "voice"])
let defaultOutputFormat = Self.firstString(activeConfig, keys: ["outputFormat"])
let realtime = talk?["realtime"]?.dictionaryValue
let realtimeProvider = Self.firstString(realtime, keys: ["provider"])
let realtimeProviders = realtime?["providers"]?.dictionaryValue
let realtimeProviderConfig = Self.realtimeProviderConfig(
providers: realtimeProviders,
provider: realtimeProvider)
let realtimeModel = Self.firstString(realtime, keys: ["model"])
?? Self.firstString(realtimeProviderConfig, keys: ["model"])
let realtimeModelId = realtimeModel ?? defaultRealtimeModelIdFallback
let realtimeVoiceId = Self.firstString(realtime, keys: ["voice"])
?? Self.firstString(realtimeProviderConfig, keys: ["voice"])
let executionMode = Self.resolvedExecutionMode(realtime)
let rawConfigApiKey = activeConfig?["apiKey"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines)
let interruptOnSpeech = talk?["interruptOnSpeech"]?.boolValue
let silenceTimeoutMs = TalkConfigParsing.resolvedSilenceTimeoutMs(
@@ -60,13 +134,53 @@ enum TalkModeGatewayConfigParser {
activeProvider: activeProvider,
normalizedPayload: selection?.normalizedPayload == true,
missingResolvedPayload: talk != nil && selection == nil,
executionMode: executionMode,
defaultVoiceId: defaultVoiceId,
voiceAliases: voiceAliases,
defaultModelId: defaultModelId,
defaultOutputFormat: defaultOutputFormat,
realtimeProvider: realtimeProvider,
realtimeModelId: realtimeModelId,
realtimeVoiceId: realtimeVoiceId,
rawConfigApiKey: rawConfigApiKey,
interruptOnSpeech: interruptOnSpeech,
silenceTimeoutMs: silenceTimeoutMs,
speechLocaleID: speechLocaleID)
}
private static func firstString(_ config: [String: AnyCodable]?, keys: [String]) -> String? {
guard let config else { return nil }
for key in keys {
let value = config[key]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines)
if value?.isEmpty == false {
return value
}
}
return nil
}
private static func resolvedExecutionMode(_ realtime: [String: AnyCodable]?) -> TalkModeExecutionMode {
guard let realtime else { return .native }
let mode = Self.firstString(realtime, keys: ["mode"])?.lowercased()
let transport = Self.firstString(realtime, keys: ["transport"])?.lowercased()
let brain = Self.firstString(realtime, keys: ["brain"])?.lowercased()
if mode == "realtime", transport == "gateway-relay", brain == nil || brain == "agent-consult" {
return .realtimeRelay
}
return .native
}
private static func realtimeProviderConfig(
providers: [String: AnyCodable]?,
provider: String?) -> [String: AnyCodable]?
{
guard let providers else { return nil }
if let provider {
return providers[provider]?.dictionaryValue
}
if providers.count == 1 {
return providers.values.first?.dictionaryValue
}
return nil
}
}

View File

@@ -33,6 +33,7 @@ private final class StreamFailureBox: @unchecked Sendable {
final class TalkModeManager: NSObject {
private typealias SpeechRequest = SFSpeechAudioBufferRecognitionRequest
private static let defaultModelIdFallback = "eleven_v3"
private static let defaultRealtimeModelIdFallback = "gpt-realtime-2"
private static let defaultTalkProvider = "elevenlabs"
private static let defaultSilenceTimeoutMs = TalkDefaults.silenceTimeoutMs
private static let redactedConfigSentinel = "__OPENCLAW_REDACTED__"
@@ -47,6 +48,12 @@ final class TalkModeManager: NSObject {
var gatewayTalkApiKeyConfigured: Bool = false
var gatewayTalkDefaultModelId: String?
var gatewayTalkDefaultVoiceId: String?
var gatewayTalkProviderLabel: String = "Not loaded"
var gatewayTalkTransportLabel: String = "Not loaded"
var gatewayTalkUsesRealtimeRelay: Bool = false
var gatewayTalkRealtimeProviderLabel: String?
var gatewayTalkRealtimeModelId: String?
var gatewayTalkRealtimeVoiceId: String?
private enum CaptureMode {
case idle
@@ -84,6 +91,11 @@ final class TalkModeManager: NSObject {
private var voiceOverrideActive = false
private var modelOverrideActive = false
private var defaultOutputFormat: String?
private var activeTalkProvider: String = TalkModeManager.defaultTalkProvider
private var executionMode: TalkModeExecutionMode = .native
private var realtimeProvider: String?
private var realtimeModelId: String?
private var realtimeVoiceId: String?
private var apiKey: String?
private var voiceAliases: [String: String] = [:]
private var interruptOnSpeech: Bool = true
@@ -96,6 +108,7 @@ final class TalkModeManager: NSObject {
private var pcmFormatUnavailable: Bool = false
var pcmPlayer: PCMStreamingAudioPlaying = PCMStreamingAudioPlayer.shared
var mp3Player: StreamingAudioPlaying = StreamingAudioPlayer.shared
private var realtimeRelaySession: RealtimeTalkRelaySession?
private var gateway: GatewayNodeSession?
private var gatewayConnected = false
@@ -164,6 +177,17 @@ final class TalkModeManager: NSObject {
}
}
func applyProviderSelectionChanged() {
let shouldRestart = self.isEnabled
if shouldRestart {
self.stop()
self.isEnabled = true
Task { await self.start() }
} else {
Task { await self.reloadConfig() }
}
}
func start() async {
guard self.isEnabled else { return }
guard self.captureMode != .pushToTalk else { return }
@@ -181,6 +205,11 @@ final class TalkModeManager: NSObject {
self.statusText = "Microphone permission denied"
return
}
await self.reloadConfig()
if self.shouldUseRealtimeRelay() {
await self.startRealtimeRelay()
return
}
let speechOk = await Self.requestSpeechPermission()
guard speechOk else {
self.logger.warning("start blocked: speech permission denied")
@@ -190,7 +219,6 @@ final class TalkModeManager: NSObject {
return
}
await self.reloadConfig()
do {
try Self.configureAudioSession()
// Set this before starting recognition so any early speech errors are classified correctly.
@@ -208,6 +236,58 @@ final class TalkModeManager: NSObject {
}
}
private func shouldUseRealtimeRelay() -> Bool {
self.executionMode == .realtimeRelay
}
private func startRealtimeRelay() async {
guard let gateway else {
self.statusText = "Gateway not connected"
return
}
do {
try Self.configureAudioSession()
self.stopRealtimeRelay()
self.captureMode = .continuous
self.isListening = true
self.statusText = "Connecting realtime…"
let relay = RealtimeTalkRelaySession(
gateway: gateway,
options: RealtimeTalkRelaySession.Options(
sessionKey: self.mainSessionKey,
provider: self.realtimeProvider,
model: self.realtimeModelId,
voice: self.realtimeVoiceId),
pcmPlayer: self.pcmPlayer,
onStatus: { [weak self] status in
self?.statusText = status
},
onSpeakingChanged: { [weak self] speaking in
self?.isSpeaking = speaking
})
self.realtimeRelaySession = relay
try await relay.start()
await self.subscribeChatIfNeeded(sessionKey: self.mainSessionKey)
self.logger.info("realtime relay listening")
let provider = self.realtimeProvider ?? "configured"
let model = self.realtimeModelId ?? "default"
GatewayDiagnostics.log(
"talk realtime: provider=\(provider) model=\(model)")
} catch {
self.realtimeRelaySession = nil
self.isListening = false
self.captureMode = .idle
self.statusText = "Realtime failed: \(error.localizedDescription)"
self.logger.error("realtime relay failed: \(error.localizedDescription, privacy: .public)")
GatewayDiagnostics.log("talk realtime: failed error=\(error.localizedDescription)")
}
}
private func stopRealtimeRelay() {
self.realtimeRelaySession?.stop()
self.realtimeRelaySession = nil
}
func stop() {
self.isEnabled = false
self.isListening = false
@@ -218,6 +298,7 @@ final class TalkModeManager: NSObject {
self.lastHeard = nil
self.silenceTask?.cancel()
self.silenceTask = nil
self.stopRealtimeRelay()
self.stopRecognition()
self.stopSpeaking()
self.lastInterruptedAtSeconds = nil
@@ -263,6 +344,7 @@ final class TalkModeManager: NSObject {
self.silenceTask?.cancel()
self.silenceTask = nil
self.stopRealtimeRelay()
self.stopRecognition()
self.stopSpeaking()
self.lastInterruptedAtSeconds = nil
@@ -1167,6 +1249,9 @@ final class TalkModeManager: NSObject {
}
private func stopSpeaking(storeInterruption: Bool = true) {
if let realtimeRelaySession {
realtimeRelaySession.cancelOutput()
}
let hasIncremental = self.incrementalSpeechActive ||
self.incrementalSpeechTask != nil ||
!self.incrementalSpeechQueue.isEmpty
@@ -1993,26 +2078,87 @@ extension TalkModeManager {
return trimmed
}
private static func displayName(forProvider provider: String) -> String {
switch provider.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() {
case "elevenlabs":
"ElevenLabs"
case "openai":
"OpenAI"
case "google":
"Google"
case let provider where !provider.isEmpty:
provider
default:
"Gateway Default"
}
}
func reloadConfig() async {
guard let gateway else { return }
self.pcmFormatUnavailable = false
do {
let res = try await gateway.request(
method: "talk.config",
paramsJSON: "{\"includeSecrets\":true}",
timeoutSeconds: 8)
guard let json = try JSONSerialization.jsonObject(with: res) as? [String: Any] else { return }
guard let config = json["config"] as? [String: Any] else { return }
func fetchConfig(includeSecrets: Bool) async throws -> [String: Any]? {
let paramsJSON = includeSecrets ? "{\"includeSecrets\":true}" : "{}"
let res = try await gateway.request(
method: "talk.config",
paramsJSON: paramsJSON,
timeoutSeconds: 8)
guard let json = try JSONSerialization.jsonObject(with: res) as? [String: Any] else {
return nil
}
return json["config"] as? [String: Any]
}
let config: [String: Any]
do {
guard let fetched = try await fetchConfig(includeSecrets: true) else { return }
config = fetched
} catch {
let message = String(describing: error)
guard message.contains("operator.talk.secrets"),
let fetched = try await fetchConfig(includeSecrets: false)
else {
throw error
}
config = fetched
GatewayDiagnostics.log("talk config secrets unavailable; loaded redacted config")
}
let parsed = TalkModeGatewayConfigParser.parse(
config: config,
defaultProvider: Self.defaultTalkProvider,
defaultModelIdFallback: Self.defaultModelIdFallback,
defaultRealtimeModelIdFallback: Self.defaultRealtimeModelIdFallback,
defaultSilenceTimeoutMs: Self.defaultSilenceTimeoutMs)
if parsed.missingResolvedPayload {
GatewayDiagnostics.log(
"talk config ignored: normalized payload missing talk.resolved")
}
let activeProvider = parsed.activeProvider
let providerSelection = TalkModeProviderSelection.resolved(
UserDefaults.standard.string(forKey: TalkModeProviderSelection.storageKey))
var activeProvider = parsed.activeProvider
var executionMode = parsed.executionMode
var realtimeProvider = parsed.realtimeProvider
var realtimeModelId = parsed.realtimeModelId
let realtimeVoiceOverride = TalkModeRealtimeVoiceSelection.resolvedOverride(
UserDefaults.standard.string(forKey: TalkModeRealtimeVoiceSelection.storageKey))
let realtimeVoiceId = realtimeVoiceOverride ?? parsed.realtimeVoiceId
switch providerSelection {
case .gatewayDefault:
break
case .nativeElevenLabs:
activeProvider = Self.defaultTalkProvider
executionMode = .native
case .openAIRealtime:
activeProvider = "openai"
executionMode = .realtimeRelay
realtimeProvider = realtimeProvider ?? "openai"
realtimeModelId = realtimeModelId ?? Self.defaultRealtimeModelIdFallback
}
self.activeTalkProvider = activeProvider
self.executionMode = executionMode
self.realtimeProvider = realtimeProvider
self.realtimeModelId = realtimeModelId
self.realtimeVoiceId = realtimeVoiceId
self.defaultVoiceId = parsed.defaultVoiceId
self.voiceAliases = parsed.voiceAliases
if !self.voiceOverrideActive {
@@ -2033,14 +2179,23 @@ extension TalkModeManager {
} else {
self.apiKey = (localApiKey?.isEmpty == false) ? localApiKey : configApiKey
}
if activeProvider != Self.defaultTalkProvider {
if activeProvider != Self.defaultTalkProvider, executionMode != .realtimeRelay {
self.apiKey = nil
GatewayDiagnostics.log(
"talk provider '\(activeProvider)' not yet supported on iOS; using system voice fallback")
}
self.gatewayTalkDefaultVoiceId = self.defaultVoiceId
self.gatewayTalkDefaultModelId = self.defaultModelId
self.gatewayTalkApiKeyConfigured = (self.apiKey?.isEmpty == false)
self.gatewayTalkDefaultVoiceId = executionMode == .realtimeRelay ? realtimeVoiceId : self.defaultVoiceId
self.gatewayTalkDefaultModelId = executionMode == .realtimeRelay ? realtimeModelId : self.defaultModelId
self.gatewayTalkProviderLabel = providerSelection == .gatewayDefault
? Self.displayName(forProvider: activeProvider)
: providerSelection.label
self.gatewayTalkUsesRealtimeRelay = executionMode == .realtimeRelay
self.gatewayTalkTransportLabel = executionMode == .realtimeRelay ? "Gateway relay" : "Native"
self.gatewayTalkRealtimeProviderLabel = realtimeProvider.map { Self.displayName(forProvider: $0) }
self.gatewayTalkRealtimeModelId = realtimeModelId
self.gatewayTalkRealtimeVoiceId = realtimeVoiceId
self.gatewayTalkApiKeyConfigured = executionMode == .realtimeRelay ||
(self.apiKey?.isEmpty == false)
self.gatewayTalkConfigLoaded = true
if let interrupt = parsed.interruptOnSpeech {
self.interruptOnSpeech = interrupt
@@ -2052,6 +2207,17 @@ extension TalkModeManager {
"talk config provider=\(activeProvider) silenceTimeoutMs=\(parsed.silenceTimeoutMs)")
}
} catch {
self.activeTalkProvider = Self.defaultTalkProvider
self.executionMode = .native
self.realtimeProvider = nil
self.realtimeModelId = nil
self.realtimeVoiceId = nil
self.gatewayTalkProviderLabel = "Not loaded"
self.gatewayTalkTransportLabel = "Not loaded"
self.gatewayTalkUsesRealtimeRelay = false
self.gatewayTalkRealtimeProviderLabel = nil
self.gatewayTalkRealtimeModelId = nil
self.gatewayTalkRealtimeVoiceId = nil
self.defaultModelId = Self.defaultModelIdFallback
if !self.modelOverrideActive {
self.currentModelId = self.defaultModelId

View File

@@ -76,6 +76,7 @@ Sources/Voice/TalkDefaults.swift
Sources/Voice/TalkModeGatewayConfig.swift
Sources/Voice/TalkModeManager.swift
Sources/Voice/TalkOrbOverlay.swift
Sources/Voice/RealtimeTalkRelaySession.swift
Sources/Voice/TalkSpeechLocale.swift
Sources/Voice/VoiceTab.swift
Sources/Voice/VoiceWakeManager.swift

View File

@@ -4,6 +4,102 @@ import Testing
@MainActor
@Suite struct TalkModeManagerTests {
@Test func parsesOpenAIRealtimeProviderModelAndVoice() {
let config: [String: Any] = [
"talk": [
"provider": "elevenlabs",
"providers": [
"elevenlabs": [
"modelId": "eleven_v3",
"voiceId": "eleven-voice",
],
],
"resolved": [
"provider": "elevenlabs",
"config": [
"modelId": "eleven_v3",
"voiceId": "eleven-voice",
],
],
"realtime": [
"provider": " openai ",
"model": " gpt-realtime-2 ",
"voice": " marin ",
"mode": "realtime",
"transport": "gateway-relay",
"brain": "agent-consult",
],
],
]
let parsed = TalkModeGatewayConfigParser.parse(
config: config,
defaultProvider: "elevenlabs",
defaultModelIdFallback: "eleven_v3",
defaultRealtimeModelIdFallback: "gpt-realtime-2",
defaultSilenceTimeoutMs: 900)
#expect(parsed.activeProvider == "elevenlabs")
#expect(parsed.executionMode == .realtimeRelay)
#expect(parsed.defaultModelId == "eleven_v3")
#expect(parsed.defaultVoiceId == "eleven-voice")
#expect(parsed.realtimeProvider == "openai")
#expect(parsed.realtimeModelId == "gpt-realtime-2")
#expect(parsed.realtimeVoiceId == "marin")
}
@Test func defaultsOpenAIRealtimeModelWhenProviderOmitsModel() {
let config: [String: Any] = [
"talk": [
"realtime": [
"provider": "openai",
"mode": "realtime",
"transport": "gateway-relay",
],
],
]
let parsed = TalkModeGatewayConfigParser.parse(
config: config,
defaultProvider: "elevenlabs",
defaultModelIdFallback: "eleven_v3",
defaultRealtimeModelIdFallback: "gpt-realtime-2",
defaultSilenceTimeoutMs: 900)
#expect(parsed.executionMode == .realtimeRelay)
#expect(parsed.defaultModelId == "eleven_v3")
#expect(parsed.realtimeModelId == "gpt-realtime-2")
#expect(parsed.realtimeVoiceId == nil)
}
@Test func resolvesRealtimeVoicePickerOverrides() {
#expect(TalkModeRealtimeVoiceSelection.resolvedOverride(nil) == nil)
#expect(TalkModeRealtimeVoiceSelection.resolvedOverride("") == nil)
#expect(TalkModeRealtimeVoiceSelection.resolvedOverride(" Cedar ") == "cedar")
#expect(TalkModeRealtimeVoiceSelection.resolvedOverride("unknown") == nil)
}
@Test func leavesNativeModeWhenRealtimeTransportIsNotGatewayRelay() {
let config: [String: Any] = [
"talk": [
"realtime": [
"provider": "openai",
"mode": "realtime",
"transport": "webrtc",
],
],
]
let parsed = TalkModeGatewayConfigParser.parse(
config: config,
defaultProvider: "elevenlabs",
defaultModelIdFallback: "eleven_v3",
defaultRealtimeModelIdFallback: "gpt-realtime-2",
defaultSilenceTimeoutMs: 900)
#expect(parsed.executionMode == .native)
}
@Test func detectsPCMFormatRejectionFromElevenLabsError() {
let error = NSError(
domain: "ElevenLabsTTS",

View File

@@ -20,9 +20,9 @@
<string>$(OPENCLAW_MARKETING_VERSION)</string>
<key>CFBundleVersion</key>
<string>$(OPENCLAW_BUILD_VERSION)</string>
<key>WKApplication</key>
<true/>
<key>WKCompanionAppBundleIdentifier</key>
<string>$(OPENCLAW_APP_BUNDLE_ID)</string>
<key>WKWatchKitApp</key>
<true/>
</dict>
</plist>

View File

@@ -0,0 +1,87 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import type { AuthProfileStore } from "../agents/auth-profiles/types.js";
describe("provider auth profile helpers", () => {
afterEach(() => {
vi.doUnmock("../agents/agent-scope-config.js");
vi.doUnmock("../agents/auth-profiles/oauth.js");
vi.doUnmock("../agents/auth-profiles/order.js");
vi.doUnmock("../agents/auth-profiles/store.js");
vi.resetModules();
});
it("resolves API keys from the fallback store that supplied usable profile ids", async () => {
vi.resetModules();
const primaryStore: AuthProfileStore = {
version: 1,
profiles: {},
};
const fallbackStore: AuthProfileStore = {
version: 1,
profiles: {
"openai-codex:default": {
type: "api_key",
provider: "openai-codex",
key: "fallback-key",
},
},
};
const resolveApiKeyForProfile = vi.fn(
async (params: { store: AuthProfileStore; profileId: string }) => {
const profile = params.store.profiles[params.profileId];
return profile?.type === "api_key" && profile.key
? {
apiKey: profile.key,
provider: profile.provider,
profileId: params.profileId,
profileType: profile.type,
}
: null;
},
);
vi.doMock("../agents/agent-scope-config.js", () => ({
resolveDefaultAgentDir: () => "/tmp/openclaw-agent",
}));
vi.doMock("../agents/auth-profiles/oauth.js", () => ({
resolveApiKeyForProfile,
}));
vi.doMock("../agents/auth-profiles/order.js", () => ({
resolveAuthProfileOrder: ({
provider,
store,
}: {
provider: string;
store: AuthProfileStore;
}) =>
Object.entries(store.profiles)
.filter(([, profile]) => profile.provider === provider)
.map(([profileId]) => profileId),
}));
vi.doMock("../agents/auth-profiles/store.js", () => ({
ensureAuthProfileStore: vi.fn(() => primaryStore),
ensureAuthProfileStoreForLocalUpdate: vi.fn(() => primaryStore),
loadAuthProfileStoreForSecretsRuntime: vi.fn(() => primaryStore),
loadAuthProfileStoreWithoutExternalProfiles: vi.fn(() => fallbackStore),
updateAuthProfileStoreWithLock: vi.fn(),
}));
const { listUsableProviderAuthProfileIds, resolveProviderAuthProfileApiKey } =
await import("./provider-auth.js");
expect(listUsableProviderAuthProfileIds({ provider: "openai-codex" }).profileIds).toEqual([
"openai-codex:default",
]);
await expect(resolveProviderAuthProfileApiKey({ provider: "openai-codex" })).resolves.toBe(
"fallback-key",
);
expect(resolveApiKeyForProfile).toHaveBeenCalledWith(
expect.objectContaining({
agentDir: "/tmp/openclaw-agent",
profileId: "openai-codex:default",
store: fallbackStore,
}),
);
});
});

View File

@@ -5,7 +5,12 @@ import { resolveDefaultAgentDir } from "../agents/agent-scope-config.js";
import { resolveApiKeyForProfile } from "../agents/auth-profiles/oauth.js";
import { resolveAuthProfileOrder } from "../agents/auth-profiles/order.js";
import { listProfilesForProvider } from "../agents/auth-profiles/profiles.js";
import { ensureAuthProfileStore } from "../agents/auth-profiles/store.js";
import {
ensureAuthProfileStore,
loadAuthProfileStoreForSecretsRuntime,
loadAuthProfileStoreWithoutExternalProfiles,
} from "../agents/auth-profiles/store.js";
import type { AuthProfileStore } from "../agents/auth-profiles/types.js";
import {
COPILOT_INTEGRATION_ID,
buildCopilotIdeHeaders,
@@ -282,20 +287,11 @@ export function listUsableProviderAuthProfileIds(params: {
provider: string;
cfg?: OpenClawConfig;
agentDir?: string;
allowKeychainPrompt?: boolean;
}): { agentDir: string; profileIds: string[] } {
try {
const agentDir = params.agentDir?.trim() || resolveDefaultAgentDir(params.cfg ?? {});
const store = ensureAuthProfileStore(agentDir, {
allowKeychainPrompt: false,
});
return {
agentDir,
profileIds: resolveAuthProfileOrder({
cfg: params.cfg,
store,
provider: params.provider,
}),
};
const { agentDir, profileIds } = resolveUsableProviderAuthProfiles(params);
return { agentDir, profileIds };
} catch {
return { agentDir: "", profileIds: [] };
}
@@ -305,6 +301,7 @@ export function isProviderAuthProfileConfigured(params: {
provider: string;
cfg?: OpenClawConfig;
agentDir?: string;
allowKeychainPrompt?: boolean;
}): boolean {
return listUsableProviderAuthProfileIds(params).profileIds.length > 0;
}
@@ -313,14 +310,12 @@ export async function resolveProviderAuthProfileApiKey(params: {
provider: string;
cfg?: OpenClawConfig;
agentDir?: string;
allowKeychainPrompt?: boolean;
}): Promise<string | undefined> {
const { agentDir, profileIds } = listUsableProviderAuthProfileIds(params);
const { agentDir, profileIds, store } = resolveUsableProviderAuthProfiles(params);
if (!agentDir || profileIds.length === 0) {
return undefined;
}
const store = ensureAuthProfileStore(agentDir, {
allowKeychainPrompt: false,
});
for (const profileId of profileIds) {
const resolved = await resolveApiKeyForProfile({
cfg: params.cfg,
@@ -334,3 +329,35 @@ export async function resolveProviderAuthProfileApiKey(params: {
}
return undefined;
}
function resolveUsableProviderAuthProfiles(params: {
provider: string;
cfg?: OpenClawConfig;
agentDir?: string;
allowKeychainPrompt?: boolean;
}): { agentDir: string; profileIds: string[]; store: AuthProfileStore } {
const agentDir = params.agentDir?.trim() || resolveDefaultAgentDir(params.cfg ?? {});
const store = loadAuthProfileStoreForSecretsRuntime(agentDir);
const profileIds = resolveAuthProfileOrder({
cfg: params.cfg,
store,
provider: params.provider,
});
if (profileIds.length > 0) {
return { agentDir, profileIds, store };
}
const fallbackStore = loadAuthProfileStoreWithoutExternalProfiles(agentDir, {
allowKeychainPrompt: params.allowKeychainPrompt ?? false,
resolveLegacyOAuthSidecars: true,
});
return {
agentDir,
profileIds: resolveAuthProfileOrder({
cfg: params.cfg,
store: fallbackStore,
provider: params.provider,
}),
store: fallbackStore,
};
}