mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-06 05:51:15 +08:00
fix(talk): quarantine malformed realtime tool names
This commit is contained in:
@@ -192,6 +192,19 @@ describe("buildGoogleRealtimeVoiceProvider", () => {
|
||||
|
||||
it("connects with Google Live setup config and tool declarations", async () => {
|
||||
const provider = buildGoogleRealtimeVoiceProvider();
|
||||
const unreadableToolName = Object.defineProperty(
|
||||
{
|
||||
type: "function",
|
||||
description: "Unreadable",
|
||||
parameters: { type: "object", properties: {} },
|
||||
},
|
||||
"name",
|
||||
{
|
||||
get() {
|
||||
throw new Error("tool name getter exploded");
|
||||
},
|
||||
},
|
||||
);
|
||||
const bridge = provider.createBridge({
|
||||
providerConfig: {
|
||||
apiKey: "gemini-key",
|
||||
@@ -217,6 +230,43 @@ describe("buildGoogleRealtimeVoiceProvider", () => {
|
||||
required: ["query"],
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "function",
|
||||
name: "calendar.lookup:next",
|
||||
description: "Google lookup",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {},
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "function",
|
||||
name: "1_lookup",
|
||||
description: "OpenAI-only lookup",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {},
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "function",
|
||||
name: "bad/name",
|
||||
description: "Malformed lookup",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {},
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "function",
|
||||
name: `x${"a".repeat(128)}`,
|
||||
description: "Too long",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {},
|
||||
},
|
||||
},
|
||||
unreadableToolName as never,
|
||||
{
|
||||
type: "function",
|
||||
name: "openclaw_agent_consult",
|
||||
@@ -289,16 +339,18 @@ describe("buildGoogleRealtimeVoiceProvider", () => {
|
||||
},
|
||||
required: ["query"],
|
||||
});
|
||||
expect(declarations[1]?.name).toBe("openclaw_agent_consult");
|
||||
expect(declarations[1]?.description).toBe("Ask OpenClaw");
|
||||
expect(declarations[1]?.parametersJsonSchema).toEqual({
|
||||
expect(declarations[1]?.name).toBe("calendar.lookup:next");
|
||||
expect(declarations[1]?.description).toBe("Google lookup");
|
||||
expect(declarations[2]?.name).toBe("openclaw_agent_consult");
|
||||
expect(declarations[2]?.description).toBe("Ask OpenClaw");
|
||||
expect(declarations[2]?.parametersJsonSchema).toEqual({
|
||||
type: "object",
|
||||
properties: {
|
||||
question: { type: "string" },
|
||||
},
|
||||
required: ["question"],
|
||||
});
|
||||
expect(declarations[1]?.behavior).toBe("NON_BLOCKING");
|
||||
expect(declarations[2]?.behavior).toBe("NON_BLOCKING");
|
||||
});
|
||||
|
||||
it("omits zero temperature for native audio responses", async () => {
|
||||
|
||||
@@ -62,6 +62,7 @@ const GOOGLE_REALTIME_BROWSER_NEW_SESSION_TTL_MS = 60 * 1000;
|
||||
const GOOGLE_REALTIME_RECONNECT_MAX_ATTEMPTS = 3;
|
||||
const GOOGLE_REALTIME_RECONNECT_BASE_DELAY_MS = 250;
|
||||
const GOOGLE_REALTIME_RECONNECT_MAX_DELAY_MS = 2_000;
|
||||
const GOOGLE_REALTIME_TOOL_NAME_RE = /^[A-Za-z_][A-Za-z0-9_.:-]{0,127}$/;
|
||||
const MULAW_LINEAR_SAMPLES = new Int16Array(256);
|
||||
|
||||
for (let i = 0; i < MULAW_LINEAR_SAMPLES.length; i += 1) {
|
||||
@@ -338,16 +339,25 @@ function buildRealtimeInputConfig(
|
||||
}
|
||||
|
||||
function buildFunctionDeclarations(tools: RealtimeVoiceTool[] | undefined): FunctionDeclaration[] {
|
||||
return (tools ?? []).map((tool) => {
|
||||
return (tools ?? []).flatMap((tool) => {
|
||||
let name: unknown;
|
||||
try {
|
||||
name = (tool as { name?: unknown }).name;
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
if (typeof name !== "string" || !GOOGLE_REALTIME_TOOL_NAME_RE.test(name)) {
|
||||
return [];
|
||||
}
|
||||
const declaration: FunctionDeclaration = {
|
||||
name: tool.name,
|
||||
name,
|
||||
description: tool.description,
|
||||
parametersJsonSchema: tool.parameters,
|
||||
};
|
||||
if (tool.name === REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME) {
|
||||
if (name === REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME) {
|
||||
declaration.behavior = "NON_BLOCKING" as Behavior;
|
||||
}
|
||||
return declaration;
|
||||
return [declaration];
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -123,6 +123,7 @@ type SentRealtimeEvent = {
|
||||
};
|
||||
};
|
||||
item?: unknown;
|
||||
tools?: Array<{ name?: string }>;
|
||||
};
|
||||
};
|
||||
|
||||
@@ -457,11 +458,39 @@ describe("buildOpenAIRealtimeVoiceProvider", () => {
|
||||
if (!provider.createBrowserSession) {
|
||||
throw new Error("expected OpenAI realtime provider to support browser sessions");
|
||||
}
|
||||
const unreadableToolName = Object.defineProperty(
|
||||
{
|
||||
type: "function",
|
||||
description: "Unreadable",
|
||||
parameters: { type: "object", properties: {} },
|
||||
},
|
||||
"name",
|
||||
{
|
||||
get() {
|
||||
throw new Error("tool name getter exploded");
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
const session = await provider.createBrowserSession({
|
||||
providerConfig: { apiKey: "sk-test" }, // pragma: allowlist secret
|
||||
instructions: "Be concise.",
|
||||
voice: " Marin ",
|
||||
tools: [
|
||||
{
|
||||
type: "function",
|
||||
name: "1_lookup",
|
||||
description: "OpenAI-compatible lookup",
|
||||
parameters: { type: "object", properties: {} },
|
||||
},
|
||||
{
|
||||
type: "function",
|
||||
name: "calendar.lookup:next",
|
||||
description: "Google-only lookup",
|
||||
parameters: { type: "object", properties: {} },
|
||||
},
|
||||
unreadableToolName as never,
|
||||
],
|
||||
});
|
||||
|
||||
expectRecordFields(requireFetchRequest(), "fetch request", {
|
||||
@@ -488,6 +517,9 @@ describe("buildOpenAIRealtimeVoiceProvider", () => {
|
||||
transcription: { model: "gpt-4o-mini-transcribe" },
|
||||
});
|
||||
expect(requireNestedRecord(bodySession, ["audio", "output"])).toEqual({ voice: "marin" });
|
||||
expect(
|
||||
(bodySession.tools as Array<{ name?: string }> | undefined)?.map((tool) => tool.name),
|
||||
).toEqual(["1_lookup"]);
|
||||
expect(bodySession).not.toHaveProperty("temperature");
|
||||
expectRecordFields(session, "browser session", {
|
||||
provider: "openai",
|
||||
@@ -743,9 +775,43 @@ describe("buildOpenAIRealtimeVoiceProvider", () => {
|
||||
it("waits for session.updated before draining audio and firing onReady", async () => {
|
||||
const provider = buildOpenAIRealtimeVoiceProvider();
|
||||
const onReady = vi.fn();
|
||||
const unreadableToolName = Object.defineProperty(
|
||||
{
|
||||
type: "function",
|
||||
description: "Unreadable",
|
||||
parameters: { type: "object", properties: {} },
|
||||
},
|
||||
"name",
|
||||
{
|
||||
get() {
|
||||
throw new Error("tool name getter exploded");
|
||||
},
|
||||
},
|
||||
);
|
||||
const bridge = provider.createBridge({
|
||||
providerConfig: { apiKey: "sk-test" }, // pragma: allowlist secret
|
||||
instructions: "Be helpful.",
|
||||
tools: [
|
||||
{
|
||||
type: "function",
|
||||
name: "1_lookup",
|
||||
description: "OpenAI-compatible lookup",
|
||||
parameters: { type: "object", properties: {} },
|
||||
},
|
||||
{
|
||||
type: "function",
|
||||
name: "calendar.lookup:next",
|
||||
description: "Google-only lookup",
|
||||
parameters: { type: "object", properties: {} },
|
||||
},
|
||||
{
|
||||
type: "function",
|
||||
name: "x".repeat(65),
|
||||
description: "Too long",
|
||||
parameters: { type: "object", properties: {} },
|
||||
},
|
||||
unreadableToolName as never,
|
||||
],
|
||||
onAudio: vi.fn(),
|
||||
onClearAudio: vi.fn(),
|
||||
onReady,
|
||||
@@ -776,6 +842,7 @@ describe("buildOpenAIRealtimeVoiceProvider", () => {
|
||||
model: "gpt-realtime-2",
|
||||
output_modalities: ["audio"],
|
||||
});
|
||||
expect(session.tools?.map((tool) => tool.name)).toEqual(["1_lookup"]);
|
||||
const inputAudio = requireNestedRecord(session, ["audio", "input"]);
|
||||
expectRecordFields(inputAudio, "session audio input", {
|
||||
format: { type: "audio/pcmu" },
|
||||
|
||||
@@ -92,6 +92,7 @@ const OPENAI_REALTIME_NO_ACTIVE_RESPONSE_CANCEL_ERROR =
|
||||
"Cancellation failed: no active response found";
|
||||
const OPENAI_REALTIME_MAX_SESSION_DURATION_FRAGMENT = "maximum duration";
|
||||
const OPENAI_REALTIME_DEFAULT_MIN_BARGE_IN_AUDIO_END_MS = 250;
|
||||
const OPENAI_REALTIME_TOOL_NAME_RE = /^[A-Za-z0-9_-]{1,64}$/;
|
||||
const OPENAI_REALTIME_VOICES = [
|
||||
"alloy",
|
||||
"ash",
|
||||
@@ -345,6 +346,24 @@ function isOpenAIRealtimeMaxSessionDurationError(detail: string): boolean {
|
||||
);
|
||||
}
|
||||
|
||||
function normalizeOpenAIRealtimeTools(
|
||||
tools: RealtimeVoiceTool[] | undefined,
|
||||
): RealtimeVoiceTool[] | undefined {
|
||||
const normalized: RealtimeVoiceTool[] = [];
|
||||
for (const tool of tools ?? []) {
|
||||
let name: unknown;
|
||||
try {
|
||||
name = (tool as { name?: unknown }).name;
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
if (typeof name === "string" && OPENAI_REALTIME_TOOL_NAME_RE.test(name)) {
|
||||
normalized.push({ ...tool, name });
|
||||
}
|
||||
}
|
||||
return normalized.length > 0 ? normalized : undefined;
|
||||
}
|
||||
|
||||
async function resolveOpenAIRealtimeDefaultAuth(params: {
|
||||
configuredApiKey: string | undefined;
|
||||
cfg: RealtimeVoiceBrowserSessionCreateRequest["cfg"] | undefined;
|
||||
@@ -902,6 +921,7 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
|
||||
|
||||
private buildGaSessionUpdate(): RealtimeGaSessionUpdate {
|
||||
const cfg = this.config;
|
||||
const tools = normalizeOpenAIRealtimeTools(cfg.tools);
|
||||
const autoRespondToAudio = cfg.autoRespondToAudio ?? true;
|
||||
const interruptResponseOnInputAudio = cfg.interruptResponseOnInputAudio ?? autoRespondToAudio;
|
||||
return {
|
||||
@@ -931,9 +951,9 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
|
||||
},
|
||||
},
|
||||
...(cfg.reasoningEffort ? { reasoning: { effort: cfg.reasoningEffort } } : {}),
|
||||
...(cfg.tools && cfg.tools.length > 0
|
||||
...(tools
|
||||
? {
|
||||
tools: cfg.tools,
|
||||
tools,
|
||||
tool_choice: "auto",
|
||||
}
|
||||
: {}),
|
||||
@@ -948,6 +968,7 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
|
||||
private buildAzureDeploymentSessionUpdate(): RealtimeAzureDeploymentSessionUpdate {
|
||||
const cfg = this.config;
|
||||
const format = this.resolveLegacyRealtimeAudioFormat();
|
||||
const tools = normalizeOpenAIRealtimeTools(cfg.tools);
|
||||
return {
|
||||
type: "session.update",
|
||||
session: {
|
||||
@@ -965,9 +986,9 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
|
||||
create_response: cfg.autoRespondToAudio ?? true,
|
||||
},
|
||||
temperature: cfg.temperature ?? 0.8,
|
||||
...(cfg.tools && cfg.tools.length > 0
|
||||
...(tools
|
||||
? {
|
||||
tools: cfg.tools,
|
||||
tools,
|
||||
tool_choice: "auto",
|
||||
}
|
||||
: {}),
|
||||
@@ -1375,6 +1396,7 @@ async function createOpenAIRealtimeBrowserSession(
|
||||
model,
|
||||
});
|
||||
const voice = normalizeOpenAIRealtimeVoice(req.voice) ?? config.voice ?? "alloy";
|
||||
const tools = normalizeOpenAIRealtimeTools(req.tools);
|
||||
const session: Record<string, unknown> = {
|
||||
type: "realtime",
|
||||
model,
|
||||
@@ -1401,8 +1423,8 @@ async function createOpenAIRealtimeBrowserSession(
|
||||
output: { voice },
|
||||
},
|
||||
};
|
||||
if (req.tools && req.tools.length > 0) {
|
||||
session.tools = req.tools;
|
||||
if (tools) {
|
||||
session.tools = tools;
|
||||
session.tool_choice = "auto";
|
||||
}
|
||||
const reasoningEffort = trimToUndefined(req.reasoningEffort) ?? config.reasoningEffort;
|
||||
|
||||
@@ -114,4 +114,52 @@ describe("realtime voice agent consult tool", () => {
|
||||
).toStrictEqual([REALTIME_VOICE_AGENT_CONSULT_TOOL, customTool]);
|
||||
expect(resolveRealtimeVoiceAgentConsultTools("none", [customTool])).toEqual([customTool]);
|
||||
});
|
||||
|
||||
it("skips malformed custom realtime tool names without dropping valid tools", () => {
|
||||
const validTool = {
|
||||
type: "function" as const,
|
||||
name: "custom_lookup",
|
||||
description: "Custom lookup",
|
||||
parameters: { type: "object" as const, properties: {} },
|
||||
};
|
||||
const googleCompatibleTool = {
|
||||
...validTool,
|
||||
name: "calendar.lookup:next",
|
||||
};
|
||||
const providerSpecificTool = {
|
||||
...validTool,
|
||||
name: "bad/name",
|
||||
};
|
||||
const unreadableToolName = Object.defineProperty(
|
||||
{
|
||||
type: "function",
|
||||
description: "Unreadable",
|
||||
parameters: { type: "object", properties: {} },
|
||||
},
|
||||
"name",
|
||||
{
|
||||
get() {
|
||||
throw new Error("tool name getter exploded");
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
expect(
|
||||
resolveRealtimeVoiceAgentConsultTools("safe-read-only", [
|
||||
{ ...validTool, name: 123 } as never,
|
||||
unreadableToolName as never,
|
||||
{ ...validTool, name: "" },
|
||||
{ ...validTool, name: " " },
|
||||
{ ...validTool, name: "bad name" },
|
||||
validTool,
|
||||
googleCompatibleTool,
|
||||
providerSpecificTool,
|
||||
]),
|
||||
).toStrictEqual([
|
||||
REALTIME_VOICE_AGENT_CONSULT_TOOL,
|
||||
validTool,
|
||||
googleCompatibleTool,
|
||||
providerSpecificTool,
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -80,6 +80,8 @@ const SAFE_READ_ONLY_TOOLS = [
|
||||
"memory_get",
|
||||
] as const;
|
||||
|
||||
const REALTIME_VOICE_TOOL_NAME_WHITESPACE_RE = /\s/u;
|
||||
|
||||
/** Type guard for user/config supplied consult tool policies. */
|
||||
export function isRealtimeVoiceAgentConsultToolPolicy(
|
||||
value: unknown,
|
||||
@@ -101,6 +103,20 @@ export function resolveRealtimeVoiceAgentConsultToolPolicy(
|
||||
return isRealtimeVoiceAgentConsultToolPolicy(normalized) ? normalized : fallback;
|
||||
}
|
||||
|
||||
function readCustomRealtimeVoiceToolName(tool: RealtimeVoiceTool): string | undefined {
|
||||
let name: unknown;
|
||||
try {
|
||||
name = (tool as { name?: unknown }).name;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
return typeof name === "string" &&
|
||||
name.length > 0 &&
|
||||
!REALTIME_VOICE_TOOL_NAME_WHITESPACE_RE.test(name)
|
||||
? name
|
||||
: undefined;
|
||||
}
|
||||
|
||||
/** Merge the shared consult tool with provider/plugin custom realtime tools. */
|
||||
export function resolveRealtimeVoiceAgentConsultTools(
|
||||
policy: RealtimeVoiceAgentConsultToolPolicy,
|
||||
@@ -113,8 +129,9 @@ export function resolveRealtimeVoiceAgentConsultTools(
|
||||
// Keep the built-in consult tool first and prevent custom tools from
|
||||
// replacing its provider-facing contract by name.
|
||||
for (const tool of customTools) {
|
||||
if (!tools.has(tool.name)) {
|
||||
tools.set(tool.name, tool);
|
||||
const name = readCustomRealtimeVoiceToolName(tool);
|
||||
if (name && !tools.has(name)) {
|
||||
tools.set(name, tool);
|
||||
}
|
||||
}
|
||||
return [...tools.values()];
|
||||
|
||||
Reference in New Issue
Block a user