fix(talk): quarantine malformed realtime tool names

This commit is contained in:
Vincent Koc
2026-06-01 20:07:22 +02:00
parent d249e25a64
commit 88abeb6c25
6 changed files with 232 additions and 16 deletions

View File

@@ -192,6 +192,19 @@ describe("buildGoogleRealtimeVoiceProvider", () => {
it("connects with Google Live setup config and tool declarations", async () => {
const provider = buildGoogleRealtimeVoiceProvider();
const unreadableToolName = Object.defineProperty(
{
type: "function",
description: "Unreadable",
parameters: { type: "object", properties: {} },
},
"name",
{
get() {
throw new Error("tool name getter exploded");
},
},
);
const bridge = provider.createBridge({
providerConfig: {
apiKey: "gemini-key",
@@ -217,6 +230,43 @@ describe("buildGoogleRealtimeVoiceProvider", () => {
required: ["query"],
},
},
{
type: "function",
name: "calendar.lookup:next",
description: "Google lookup",
parameters: {
type: "object",
properties: {},
},
},
{
type: "function",
name: "1_lookup",
description: "OpenAI-only lookup",
parameters: {
type: "object",
properties: {},
},
},
{
type: "function",
name: "bad/name",
description: "Malformed lookup",
parameters: {
type: "object",
properties: {},
},
},
{
type: "function",
name: `x${"a".repeat(128)}`,
description: "Too long",
parameters: {
type: "object",
properties: {},
},
},
unreadableToolName as never,
{
type: "function",
name: "openclaw_agent_consult",
@@ -289,16 +339,18 @@ describe("buildGoogleRealtimeVoiceProvider", () => {
},
required: ["query"],
});
expect(declarations[1]?.name).toBe("openclaw_agent_consult");
expect(declarations[1]?.description).toBe("Ask OpenClaw");
expect(declarations[1]?.parametersJsonSchema).toEqual({
expect(declarations[1]?.name).toBe("calendar.lookup:next");
expect(declarations[1]?.description).toBe("Google lookup");
expect(declarations[2]?.name).toBe("openclaw_agent_consult");
expect(declarations[2]?.description).toBe("Ask OpenClaw");
expect(declarations[2]?.parametersJsonSchema).toEqual({
type: "object",
properties: {
question: { type: "string" },
},
required: ["question"],
});
expect(declarations[1]?.behavior).toBe("NON_BLOCKING");
expect(declarations[2]?.behavior).toBe("NON_BLOCKING");
});
it("omits zero temperature for native audio responses", async () => {

View File

@@ -62,6 +62,7 @@ const GOOGLE_REALTIME_BROWSER_NEW_SESSION_TTL_MS = 60 * 1000;
const GOOGLE_REALTIME_RECONNECT_MAX_ATTEMPTS = 3;
const GOOGLE_REALTIME_RECONNECT_BASE_DELAY_MS = 250;
const GOOGLE_REALTIME_RECONNECT_MAX_DELAY_MS = 2_000;
const GOOGLE_REALTIME_TOOL_NAME_RE = /^[A-Za-z_][A-Za-z0-9_.:-]{0,127}$/;
const MULAW_LINEAR_SAMPLES = new Int16Array(256);
for (let i = 0; i < MULAW_LINEAR_SAMPLES.length; i += 1) {
@@ -338,16 +339,25 @@ function buildRealtimeInputConfig(
}
function buildFunctionDeclarations(tools: RealtimeVoiceTool[] | undefined): FunctionDeclaration[] {
return (tools ?? []).map((tool) => {
return (tools ?? []).flatMap((tool) => {
let name: unknown;
try {
name = (tool as { name?: unknown }).name;
} catch {
return [];
}
if (typeof name !== "string" || !GOOGLE_REALTIME_TOOL_NAME_RE.test(name)) {
return [];
}
const declaration: FunctionDeclaration = {
name: tool.name,
name,
description: tool.description,
parametersJsonSchema: tool.parameters,
};
if (tool.name === REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME) {
if (name === REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME) {
declaration.behavior = "NON_BLOCKING" as Behavior;
}
return declaration;
return [declaration];
});
}

View File

@@ -123,6 +123,7 @@ type SentRealtimeEvent = {
};
};
item?: unknown;
tools?: Array<{ name?: string }>;
};
};
@@ -457,11 +458,39 @@ describe("buildOpenAIRealtimeVoiceProvider", () => {
if (!provider.createBrowserSession) {
throw new Error("expected OpenAI realtime provider to support browser sessions");
}
const unreadableToolName = Object.defineProperty(
{
type: "function",
description: "Unreadable",
parameters: { type: "object", properties: {} },
},
"name",
{
get() {
throw new Error("tool name getter exploded");
},
},
);
const session = await provider.createBrowserSession({
providerConfig: { apiKey: "sk-test" }, // pragma: allowlist secret
instructions: "Be concise.",
voice: " Marin ",
tools: [
{
type: "function",
name: "1_lookup",
description: "OpenAI-compatible lookup",
parameters: { type: "object", properties: {} },
},
{
type: "function",
name: "calendar.lookup:next",
description: "Google-only lookup",
parameters: { type: "object", properties: {} },
},
unreadableToolName as never,
],
});
expectRecordFields(requireFetchRequest(), "fetch request", {
@@ -488,6 +517,9 @@ describe("buildOpenAIRealtimeVoiceProvider", () => {
transcription: { model: "gpt-4o-mini-transcribe" },
});
expect(requireNestedRecord(bodySession, ["audio", "output"])).toEqual({ voice: "marin" });
expect(
(bodySession.tools as Array<{ name?: string }> | undefined)?.map((tool) => tool.name),
).toEqual(["1_lookup"]);
expect(bodySession).not.toHaveProperty("temperature");
expectRecordFields(session, "browser session", {
provider: "openai",
@@ -743,9 +775,43 @@ describe("buildOpenAIRealtimeVoiceProvider", () => {
it("waits for session.updated before draining audio and firing onReady", async () => {
const provider = buildOpenAIRealtimeVoiceProvider();
const onReady = vi.fn();
const unreadableToolName = Object.defineProperty(
{
type: "function",
description: "Unreadable",
parameters: { type: "object", properties: {} },
},
"name",
{
get() {
throw new Error("tool name getter exploded");
},
},
);
const bridge = provider.createBridge({
providerConfig: { apiKey: "sk-test" }, // pragma: allowlist secret
instructions: "Be helpful.",
tools: [
{
type: "function",
name: "1_lookup",
description: "OpenAI-compatible lookup",
parameters: { type: "object", properties: {} },
},
{
type: "function",
name: "calendar.lookup:next",
description: "Google-only lookup",
parameters: { type: "object", properties: {} },
},
{
type: "function",
name: "x".repeat(65),
description: "Too long",
parameters: { type: "object", properties: {} },
},
unreadableToolName as never,
],
onAudio: vi.fn(),
onClearAudio: vi.fn(),
onReady,
@@ -776,6 +842,7 @@ describe("buildOpenAIRealtimeVoiceProvider", () => {
model: "gpt-realtime-2",
output_modalities: ["audio"],
});
expect(session.tools?.map((tool) => tool.name)).toEqual(["1_lookup"]);
const inputAudio = requireNestedRecord(session, ["audio", "input"]);
expectRecordFields(inputAudio, "session audio input", {
format: { type: "audio/pcmu" },

View File

@@ -92,6 +92,7 @@ const OPENAI_REALTIME_NO_ACTIVE_RESPONSE_CANCEL_ERROR =
"Cancellation failed: no active response found";
const OPENAI_REALTIME_MAX_SESSION_DURATION_FRAGMENT = "maximum duration";
const OPENAI_REALTIME_DEFAULT_MIN_BARGE_IN_AUDIO_END_MS = 250;
const OPENAI_REALTIME_TOOL_NAME_RE = /^[A-Za-z0-9_-]{1,64}$/;
const OPENAI_REALTIME_VOICES = [
"alloy",
"ash",
@@ -345,6 +346,24 @@ function isOpenAIRealtimeMaxSessionDurationError(detail: string): boolean {
);
}
function normalizeOpenAIRealtimeTools(
tools: RealtimeVoiceTool[] | undefined,
): RealtimeVoiceTool[] | undefined {
const normalized: RealtimeVoiceTool[] = [];
for (const tool of tools ?? []) {
let name: unknown;
try {
name = (tool as { name?: unknown }).name;
} catch {
continue;
}
if (typeof name === "string" && OPENAI_REALTIME_TOOL_NAME_RE.test(name)) {
normalized.push({ ...tool, name });
}
}
return normalized.length > 0 ? normalized : undefined;
}
async function resolveOpenAIRealtimeDefaultAuth(params: {
configuredApiKey: string | undefined;
cfg: RealtimeVoiceBrowserSessionCreateRequest["cfg"] | undefined;
@@ -902,6 +921,7 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
private buildGaSessionUpdate(): RealtimeGaSessionUpdate {
const cfg = this.config;
const tools = normalizeOpenAIRealtimeTools(cfg.tools);
const autoRespondToAudio = cfg.autoRespondToAudio ?? true;
const interruptResponseOnInputAudio = cfg.interruptResponseOnInputAudio ?? autoRespondToAudio;
return {
@@ -931,9 +951,9 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
},
},
...(cfg.reasoningEffort ? { reasoning: { effort: cfg.reasoningEffort } } : {}),
...(cfg.tools && cfg.tools.length > 0
...(tools
? {
tools: cfg.tools,
tools,
tool_choice: "auto",
}
: {}),
@@ -948,6 +968,7 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
private buildAzureDeploymentSessionUpdate(): RealtimeAzureDeploymentSessionUpdate {
const cfg = this.config;
const format = this.resolveLegacyRealtimeAudioFormat();
const tools = normalizeOpenAIRealtimeTools(cfg.tools);
return {
type: "session.update",
session: {
@@ -965,9 +986,9 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
create_response: cfg.autoRespondToAudio ?? true,
},
temperature: cfg.temperature ?? 0.8,
...(cfg.tools && cfg.tools.length > 0
...(tools
? {
tools: cfg.tools,
tools,
tool_choice: "auto",
}
: {}),
@@ -1375,6 +1396,7 @@ async function createOpenAIRealtimeBrowserSession(
model,
});
const voice = normalizeOpenAIRealtimeVoice(req.voice) ?? config.voice ?? "alloy";
const tools = normalizeOpenAIRealtimeTools(req.tools);
const session: Record<string, unknown> = {
type: "realtime",
model,
@@ -1401,8 +1423,8 @@ async function createOpenAIRealtimeBrowserSession(
output: { voice },
},
};
if (req.tools && req.tools.length > 0) {
session.tools = req.tools;
if (tools) {
session.tools = tools;
session.tool_choice = "auto";
}
const reasoningEffort = trimToUndefined(req.reasoningEffort) ?? config.reasoningEffort;

View File

@@ -114,4 +114,52 @@ describe("realtime voice agent consult tool", () => {
).toStrictEqual([REALTIME_VOICE_AGENT_CONSULT_TOOL, customTool]);
expect(resolveRealtimeVoiceAgentConsultTools("none", [customTool])).toEqual([customTool]);
});
it("skips malformed custom realtime tool names without dropping valid tools", () => {
const validTool = {
type: "function" as const,
name: "custom_lookup",
description: "Custom lookup",
parameters: { type: "object" as const, properties: {} },
};
const googleCompatibleTool = {
...validTool,
name: "calendar.lookup:next",
};
const providerSpecificTool = {
...validTool,
name: "bad/name",
};
const unreadableToolName = Object.defineProperty(
{
type: "function",
description: "Unreadable",
parameters: { type: "object", properties: {} },
},
"name",
{
get() {
throw new Error("tool name getter exploded");
},
},
);
expect(
resolveRealtimeVoiceAgentConsultTools("safe-read-only", [
{ ...validTool, name: 123 } as never,
unreadableToolName as never,
{ ...validTool, name: "" },
{ ...validTool, name: " " },
{ ...validTool, name: "bad name" },
validTool,
googleCompatibleTool,
providerSpecificTool,
]),
).toStrictEqual([
REALTIME_VOICE_AGENT_CONSULT_TOOL,
validTool,
googleCompatibleTool,
providerSpecificTool,
]);
});
});

View File

@@ -80,6 +80,8 @@ const SAFE_READ_ONLY_TOOLS = [
"memory_get",
] as const;
const REALTIME_VOICE_TOOL_NAME_WHITESPACE_RE = /\s/u;
/** Type guard for user/config supplied consult tool policies. */
export function isRealtimeVoiceAgentConsultToolPolicy(
value: unknown,
@@ -101,6 +103,20 @@ export function resolveRealtimeVoiceAgentConsultToolPolicy(
return isRealtimeVoiceAgentConsultToolPolicy(normalized) ? normalized : fallback;
}
function readCustomRealtimeVoiceToolName(tool: RealtimeVoiceTool): string | undefined {
let name: unknown;
try {
name = (tool as { name?: unknown }).name;
} catch {
return undefined;
}
return typeof name === "string" &&
name.length > 0 &&
!REALTIME_VOICE_TOOL_NAME_WHITESPACE_RE.test(name)
? name
: undefined;
}
/** Merge the shared consult tool with provider/plugin custom realtime tools. */
export function resolveRealtimeVoiceAgentConsultTools(
policy: RealtimeVoiceAgentConsultToolPolicy,
@@ -113,8 +129,9 @@ export function resolveRealtimeVoiceAgentConsultTools(
// Keep the built-in consult tool first and prevent custom tools from
// replacing its provider-facing contract by name.
for (const tool of customTools) {
if (!tools.has(tool.name)) {
tools.set(tool.name, tool);
const name = readCustomRealtimeVoiceToolName(tool);
if (name && !tools.has(name)) {
tools.set(name, tool);
}
}
return [...tools.values()];