diff --git a/extensions/google/realtime-voice-provider.test.ts b/extensions/google/realtime-voice-provider.test.ts index 9968b2f586dc..ca5ca583f271 100644 --- a/extensions/google/realtime-voice-provider.test.ts +++ b/extensions/google/realtime-voice-provider.test.ts @@ -192,6 +192,19 @@ describe("buildGoogleRealtimeVoiceProvider", () => { it("connects with Google Live setup config and tool declarations", async () => { const provider = buildGoogleRealtimeVoiceProvider(); + const unreadableToolName = Object.defineProperty( + { + type: "function", + description: "Unreadable", + parameters: { type: "object", properties: {} }, + }, + "name", + { + get() { + throw new Error("tool name getter exploded"); + }, + }, + ); const bridge = provider.createBridge({ providerConfig: { apiKey: "gemini-key", @@ -217,6 +230,43 @@ describe("buildGoogleRealtimeVoiceProvider", () => { required: ["query"], }, }, + { + type: "function", + name: "calendar.lookup:next", + description: "Google lookup", + parameters: { + type: "object", + properties: {}, + }, + }, + { + type: "function", + name: "1_lookup", + description: "OpenAI-only lookup", + parameters: { + type: "object", + properties: {}, + }, + }, + { + type: "function", + name: "bad/name", + description: "Malformed lookup", + parameters: { + type: "object", + properties: {}, + }, + }, + { + type: "function", + name: `x${"a".repeat(128)}`, + description: "Too long", + parameters: { + type: "object", + properties: {}, + }, + }, + unreadableToolName as never, { type: "function", name: "openclaw_agent_consult", @@ -289,16 +339,18 @@ describe("buildGoogleRealtimeVoiceProvider", () => { }, required: ["query"], }); - expect(declarations[1]?.name).toBe("openclaw_agent_consult"); - expect(declarations[1]?.description).toBe("Ask OpenClaw"); - expect(declarations[1]?.parametersJsonSchema).toEqual({ + expect(declarations[1]?.name).toBe("calendar.lookup:next"); + expect(declarations[1]?.description).toBe("Google lookup"); + expect(declarations[2]?.name).toBe("openclaw_agent_consult"); + expect(declarations[2]?.description).toBe("Ask OpenClaw"); + expect(declarations[2]?.parametersJsonSchema).toEqual({ type: "object", properties: { question: { type: "string" }, }, required: ["question"], }); - expect(declarations[1]?.behavior).toBe("NON_BLOCKING"); + expect(declarations[2]?.behavior).toBe("NON_BLOCKING"); }); it("omits zero temperature for native audio responses", async () => { diff --git a/extensions/google/realtime-voice-provider.ts b/extensions/google/realtime-voice-provider.ts index d2ca9c166d6a..1e476caad859 100644 --- a/extensions/google/realtime-voice-provider.ts +++ b/extensions/google/realtime-voice-provider.ts @@ -62,6 +62,7 @@ const GOOGLE_REALTIME_BROWSER_NEW_SESSION_TTL_MS = 60 * 1000; const GOOGLE_REALTIME_RECONNECT_MAX_ATTEMPTS = 3; const GOOGLE_REALTIME_RECONNECT_BASE_DELAY_MS = 250; const GOOGLE_REALTIME_RECONNECT_MAX_DELAY_MS = 2_000; +const GOOGLE_REALTIME_TOOL_NAME_RE = /^[A-Za-z_][A-Za-z0-9_.:-]{0,127}$/; const MULAW_LINEAR_SAMPLES = new Int16Array(256); for (let i = 0; i < MULAW_LINEAR_SAMPLES.length; i += 1) { @@ -338,16 +339,25 @@ function buildRealtimeInputConfig( } function buildFunctionDeclarations(tools: RealtimeVoiceTool[] | undefined): FunctionDeclaration[] { - return (tools ?? []).map((tool) => { + return (tools ?? []).flatMap((tool) => { + let name: unknown; + try { + name = (tool as { name?: unknown }).name; + } catch { + return []; + } + if (typeof name !== "string" || !GOOGLE_REALTIME_TOOL_NAME_RE.test(name)) { + return []; + } const declaration: FunctionDeclaration = { - name: tool.name, + name, description: tool.description, parametersJsonSchema: tool.parameters, }; - if (tool.name === REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME) { + if (name === REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME) { declaration.behavior = "NON_BLOCKING" as Behavior; } - return declaration; + return [declaration]; }); } diff --git a/extensions/openai/realtime-voice-provider.test.ts b/extensions/openai/realtime-voice-provider.test.ts index 54e60b9c27d6..54d5b56a2527 100644 --- a/extensions/openai/realtime-voice-provider.test.ts +++ b/extensions/openai/realtime-voice-provider.test.ts @@ -123,6 +123,7 @@ type SentRealtimeEvent = { }; }; item?: unknown; + tools?: Array<{ name?: string }>; }; }; @@ -457,11 +458,39 @@ describe("buildOpenAIRealtimeVoiceProvider", () => { if (!provider.createBrowserSession) { throw new Error("expected OpenAI realtime provider to support browser sessions"); } + const unreadableToolName = Object.defineProperty( + { + type: "function", + description: "Unreadable", + parameters: { type: "object", properties: {} }, + }, + "name", + { + get() { + throw new Error("tool name getter exploded"); + }, + }, + ); const session = await provider.createBrowserSession({ providerConfig: { apiKey: "sk-test" }, // pragma: allowlist secret instructions: "Be concise.", voice: " Marin ", + tools: [ + { + type: "function", + name: "1_lookup", + description: "OpenAI-compatible lookup", + parameters: { type: "object", properties: {} }, + }, + { + type: "function", + name: "calendar.lookup:next", + description: "Google-only lookup", + parameters: { type: "object", properties: {} }, + }, + unreadableToolName as never, + ], }); expectRecordFields(requireFetchRequest(), "fetch request", { @@ -488,6 +517,9 @@ describe("buildOpenAIRealtimeVoiceProvider", () => { transcription: { model: "gpt-4o-mini-transcribe" }, }); expect(requireNestedRecord(bodySession, ["audio", "output"])).toEqual({ voice: "marin" }); + expect( + (bodySession.tools as Array<{ name?: string }> | undefined)?.map((tool) => tool.name), + ).toEqual(["1_lookup"]); expect(bodySession).not.toHaveProperty("temperature"); expectRecordFields(session, "browser session", { provider: "openai", @@ -743,9 +775,43 @@ describe("buildOpenAIRealtimeVoiceProvider", () => { it("waits for session.updated before draining audio and firing onReady", async () => { const provider = buildOpenAIRealtimeVoiceProvider(); const onReady = vi.fn(); + const unreadableToolName = Object.defineProperty( + { + type: "function", + description: "Unreadable", + parameters: { type: "object", properties: {} }, + }, + "name", + { + get() { + throw new Error("tool name getter exploded"); + }, + }, + ); const bridge = provider.createBridge({ providerConfig: { apiKey: "sk-test" }, // pragma: allowlist secret instructions: "Be helpful.", + tools: [ + { + type: "function", + name: "1_lookup", + description: "OpenAI-compatible lookup", + parameters: { type: "object", properties: {} }, + }, + { + type: "function", + name: "calendar.lookup:next", + description: "Google-only lookup", + parameters: { type: "object", properties: {} }, + }, + { + type: "function", + name: "x".repeat(65), + description: "Too long", + parameters: { type: "object", properties: {} }, + }, + unreadableToolName as never, + ], onAudio: vi.fn(), onClearAudio: vi.fn(), onReady, @@ -776,6 +842,7 @@ describe("buildOpenAIRealtimeVoiceProvider", () => { model: "gpt-realtime-2", output_modalities: ["audio"], }); + expect(session.tools?.map((tool) => tool.name)).toEqual(["1_lookup"]); const inputAudio = requireNestedRecord(session, ["audio", "input"]); expectRecordFields(inputAudio, "session audio input", { format: { type: "audio/pcmu" }, diff --git a/extensions/openai/realtime-voice-provider.ts b/extensions/openai/realtime-voice-provider.ts index 9bce483ee012..55a9bda955a7 100644 --- a/extensions/openai/realtime-voice-provider.ts +++ b/extensions/openai/realtime-voice-provider.ts @@ -92,6 +92,7 @@ const OPENAI_REALTIME_NO_ACTIVE_RESPONSE_CANCEL_ERROR = "Cancellation failed: no active response found"; const OPENAI_REALTIME_MAX_SESSION_DURATION_FRAGMENT = "maximum duration"; const OPENAI_REALTIME_DEFAULT_MIN_BARGE_IN_AUDIO_END_MS = 250; +const OPENAI_REALTIME_TOOL_NAME_RE = /^[A-Za-z0-9_-]{1,64}$/; const OPENAI_REALTIME_VOICES = [ "alloy", "ash", @@ -345,6 +346,24 @@ function isOpenAIRealtimeMaxSessionDurationError(detail: string): boolean { ); } +function normalizeOpenAIRealtimeTools( + tools: RealtimeVoiceTool[] | undefined, +): RealtimeVoiceTool[] | undefined { + const normalized: RealtimeVoiceTool[] = []; + for (const tool of tools ?? []) { + let name: unknown; + try { + name = (tool as { name?: unknown }).name; + } catch { + continue; + } + if (typeof name === "string" && OPENAI_REALTIME_TOOL_NAME_RE.test(name)) { + normalized.push({ ...tool, name }); + } + } + return normalized.length > 0 ? normalized : undefined; +} + async function resolveOpenAIRealtimeDefaultAuth(params: { configuredApiKey: string | undefined; cfg: RealtimeVoiceBrowserSessionCreateRequest["cfg"] | undefined; @@ -902,6 +921,7 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge { private buildGaSessionUpdate(): RealtimeGaSessionUpdate { const cfg = this.config; + const tools = normalizeOpenAIRealtimeTools(cfg.tools); const autoRespondToAudio = cfg.autoRespondToAudio ?? true; const interruptResponseOnInputAudio = cfg.interruptResponseOnInputAudio ?? autoRespondToAudio; return { @@ -931,9 +951,9 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge { }, }, ...(cfg.reasoningEffort ? { reasoning: { effort: cfg.reasoningEffort } } : {}), - ...(cfg.tools && cfg.tools.length > 0 + ...(tools ? { - tools: cfg.tools, + tools, tool_choice: "auto", } : {}), @@ -948,6 +968,7 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge { private buildAzureDeploymentSessionUpdate(): RealtimeAzureDeploymentSessionUpdate { const cfg = this.config; const format = this.resolveLegacyRealtimeAudioFormat(); + const tools = normalizeOpenAIRealtimeTools(cfg.tools); return { type: "session.update", session: { @@ -965,9 +986,9 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge { create_response: cfg.autoRespondToAudio ?? true, }, temperature: cfg.temperature ?? 0.8, - ...(cfg.tools && cfg.tools.length > 0 + ...(tools ? { - tools: cfg.tools, + tools, tool_choice: "auto", } : {}), @@ -1375,6 +1396,7 @@ async function createOpenAIRealtimeBrowserSession( model, }); const voice = normalizeOpenAIRealtimeVoice(req.voice) ?? config.voice ?? "alloy"; + const tools = normalizeOpenAIRealtimeTools(req.tools); const session: Record = { type: "realtime", model, @@ -1401,8 +1423,8 @@ async function createOpenAIRealtimeBrowserSession( output: { voice }, }, }; - if (req.tools && req.tools.length > 0) { - session.tools = req.tools; + if (tools) { + session.tools = tools; session.tool_choice = "auto"; } const reasoningEffort = trimToUndefined(req.reasoningEffort) ?? config.reasoningEffort; diff --git a/src/talk/agent-consult-tool.test.ts b/src/talk/agent-consult-tool.test.ts index 910a70247f1e..b2cdccf3ec67 100644 --- a/src/talk/agent-consult-tool.test.ts +++ b/src/talk/agent-consult-tool.test.ts @@ -114,4 +114,52 @@ describe("realtime voice agent consult tool", () => { ).toStrictEqual([REALTIME_VOICE_AGENT_CONSULT_TOOL, customTool]); expect(resolveRealtimeVoiceAgentConsultTools("none", [customTool])).toEqual([customTool]); }); + + it("skips malformed custom realtime tool names without dropping valid tools", () => { + const validTool = { + type: "function" as const, + name: "custom_lookup", + description: "Custom lookup", + parameters: { type: "object" as const, properties: {} }, + }; + const googleCompatibleTool = { + ...validTool, + name: "calendar.lookup:next", + }; + const providerSpecificTool = { + ...validTool, + name: "bad/name", + }; + const unreadableToolName = Object.defineProperty( + { + type: "function", + description: "Unreadable", + parameters: { type: "object", properties: {} }, + }, + "name", + { + get() { + throw new Error("tool name getter exploded"); + }, + }, + ); + + expect( + resolveRealtimeVoiceAgentConsultTools("safe-read-only", [ + { ...validTool, name: 123 } as never, + unreadableToolName as never, + { ...validTool, name: "" }, + { ...validTool, name: " " }, + { ...validTool, name: "bad name" }, + validTool, + googleCompatibleTool, + providerSpecificTool, + ]), + ).toStrictEqual([ + REALTIME_VOICE_AGENT_CONSULT_TOOL, + validTool, + googleCompatibleTool, + providerSpecificTool, + ]); + }); }); diff --git a/src/talk/agent-consult-tool.ts b/src/talk/agent-consult-tool.ts index 17aa57080d08..4992324bd571 100644 --- a/src/talk/agent-consult-tool.ts +++ b/src/talk/agent-consult-tool.ts @@ -80,6 +80,8 @@ const SAFE_READ_ONLY_TOOLS = [ "memory_get", ] as const; +const REALTIME_VOICE_TOOL_NAME_WHITESPACE_RE = /\s/u; + /** Type guard for user/config supplied consult tool policies. */ export function isRealtimeVoiceAgentConsultToolPolicy( value: unknown, @@ -101,6 +103,20 @@ export function resolveRealtimeVoiceAgentConsultToolPolicy( return isRealtimeVoiceAgentConsultToolPolicy(normalized) ? normalized : fallback; } +function readCustomRealtimeVoiceToolName(tool: RealtimeVoiceTool): string | undefined { + let name: unknown; + try { + name = (tool as { name?: unknown }).name; + } catch { + return undefined; + } + return typeof name === "string" && + name.length > 0 && + !REALTIME_VOICE_TOOL_NAME_WHITESPACE_RE.test(name) + ? name + : undefined; +} + /** Merge the shared consult tool with provider/plugin custom realtime tools. */ export function resolveRealtimeVoiceAgentConsultTools( policy: RealtimeVoiceAgentConsultToolPolicy, @@ -113,8 +129,9 @@ export function resolveRealtimeVoiceAgentConsultTools( // Keep the built-in consult tool first and prevent custom tools from // replacing its provider-facing contract by name. for (const tool of customTools) { - if (!tools.has(tool.name)) { - tools.set(tool.name, tool); + const name = readCustomRealtimeVoiceToolName(tool); + if (name && !tools.has(name)) { + tools.set(name, tool); } } return [...tools.values()];