From 9ead0ae9219e87ba2c223ada385f380fea9415cc Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 1 Jun 2026 23:03:27 -0400 Subject: [PATCH] fix: repair live model inference edge cases Fix live model inference edge cases across provider streaming, model switching, outbound delivery, and gateway tool resolution. Includes live/provider issue fixes and leaves #89100 explicitly partial for the remaining FM-2 group routing case. --- .../Sources/OpenClaw/GatewayConnection.swift | 14 +- .../Sources/OpenClaw/TalkModeRuntime.swift | 2 +- .../Sources/OpenClaw/VoiceWakeForwarder.swift | 3 +- .../GatewayConnectionControlTests.swift | 59 +++ .../VoiceWakeForwarderTests.swift | 3 +- docs/cli/onboard.md | 1 + docs/start/wizard-cli-reference.md | 2 +- extensions/acpx/src/runtime.test.ts | 6 +- extensions/acpx/src/runtime.ts | 17 +- .../src/app-server/dynamic-tool-build.test.ts | 18 + .../src/app-server/dynamic-tool-profile.ts | 27 ++ .../src/app-server/event-projector.test.ts | 75 ++++ .../codex/src/app-server/event-projector.ts | 77 ++++ .../codex/src/app-server/run-attempt.ts | 6 +- .../codex/src/app-server/session-binding.ts | 6 + .../thread-lifecycle.binding.test.ts | 51 ++- .../src/app-server/thread-lifecycle.test.ts | 26 +- .../codex/src/app-server/thread-lifecycle.ts | 42 +- extensions/google/api.test.ts | 41 ++ extensions/google/api.ts | 2 + extensions/google/model-id.test.ts | 5 + extensions/google/model-id.ts | 3 + extensions/google/provider-models.test.ts | 18 + extensions/google/provider-models.ts | 4 + extensions/google/provider-policy.ts | 31 +- .../google/provider-registration.test.ts | 67 +++ extensions/google/provider-registration.ts | 11 +- .../provider-model-id-normalization.test.ts | 3 + .../src/provider-model-id-normalize.test.ts | 5 + .../src/provider-model-id-normalize.ts | 3 + src/agents/acp-spawn.test.ts | 11 +- src/agents/acp-spawn.ts | 1 - .../agent-command.live-model-switch.test.ts | 179 +++++++- src/agents/agent-command.ts | 36 +- src/agents/cli-runner.reliability.test.ts | 38 ++ src/agents/cli-runner.ts | 8 +- src/agents/cli-runner/types.ts | 1 + .../model.provider-runtime.test-support.ts | 11 + .../embedded-agent-runner/model.test.ts | 21 + .../google-simple-completion-stream.test.ts | 147 +++++++ src/agents/google-simple-completion-stream.ts | 60 +++ src/agents/openai-transport-stream.test.ts | 261 +++++++++++- src/agents/openai-transport-stream.ts | 390 ++++++++++++++++-- src/agents/provider-transport-fetch.test.ts | 39 ++ src/agents/provider-transport-fetch.ts | 42 ++ .../simple-completion-transport.test.ts | 72 ++++ src/agents/simple-completion-transport.ts | 5 + src/agents/tools/cron-tool-canonicalize.ts | 29 ++ src/agents/tools/cron-tool.test.ts | 132 ++++++ src/agents/tools/message-tool.test.ts | 76 ++++ src/agents/tools/message-tool.ts | 96 +++-- .../reply/get-reply-run.media-only.test.ts | 46 +++ src/auto-reply/reply/get-reply-run.ts | 26 +- src/auto-reply/reply/strip-inbound-meta.ts | 4 + src/auto-reply/status.test.ts | 32 ++ src/cli/program/register.onboard.ts | 8 +- src/commands/agent-command.test-mocks.ts | 13 + ...re.gateway-auth.prompt-auth-config.test.ts | 38 ++ src/commands/configure.gateway-auth.ts | 26 ++ src/commands/onboard-custom-config.test.ts | 49 ++- src/commands/onboard-custom-config.ts | 35 +- src/commands/onboard-custom.test.ts | 82 +++- src/commands/onboard-custom.ts | 90 ++-- .../local/auth-choice.test.ts | 20 + src/commands/onboard-types.ts | 2 +- .../node-child-process.test-support.ts | 2 +- src/gateway/sessions-patch.test.ts | 7 +- src/gateway/sessions-patch.ts | 2 +- src/gateway/tool-resolution.exclude.test.ts | 79 ++++ src/gateway/tool-resolution.ts | 1 - src/llm/utils/json-parse.test.ts | 17 + src/llm/utils/json-parse.ts | 32 +- src/status/status-message.ts | 2 +- src/wizard/i18n/locales/en.ts | 8 +- src/wizard/i18n/locales/zh-CN.ts | 8 +- src/wizard/i18n/locales/zh-TW.ts | 8 +- 76 files changed, 2704 insertions(+), 216 deletions(-) create mode 100644 extensions/google/provider-registration.test.ts create mode 100644 src/agents/google-simple-completion-stream.test.ts create mode 100644 src/agents/google-simple-completion-stream.ts create mode 100644 src/gateway/tool-resolution.exclude.test.ts diff --git a/apps/macos/Sources/OpenClaw/GatewayConnection.swift b/apps/macos/Sources/OpenClaw/GatewayConnection.swift index a2b87d6af8ad..33757a134632 100644 --- a/apps/macos/Sources/OpenClaw/GatewayConnection.swift +++ b/apps/macos/Sources/OpenClaw/GatewayConnection.swift @@ -514,12 +514,16 @@ extension GatewayConnection { var params: [String: AnyCodable] = [ "message": AnyCodable(trimmed), "sessionKey": AnyCodable(sessionKey), - "thinking": AnyCodable(invocation.thinking ?? "default"), "deliver": AnyCodable(invocation.deliver), "to": AnyCodable(invocation.to ?? ""), "channel": AnyCodable(invocation.channel.rawValue), "idempotencyKey": AnyCodable(invocation.idempotencyKey), ] + if let thinking = invocation.thinking?.trimmingCharacters(in: .whitespacesAndNewlines), + !thinking.isEmpty + { + params["thinking"] = AnyCodable(thinking) + } if let timeout = invocation.timeoutSeconds { params["timeout"] = AnyCodable(timeout) } @@ -664,7 +668,7 @@ extension GatewayConnection { func chatSend( sessionKey: String, message: String, - thinking: String, + thinking: String?, idempotencyKey: String, attachments: [OpenClawChatAttachmentPayload], timeoutMs: Int = 30000) async throws -> OpenClawChatSendResponse @@ -673,10 +677,14 @@ extension GatewayConnection { var params: [String: AnyCodable] = [ "sessionKey": AnyCodable(resolvedKey), "message": AnyCodable(message), - "thinking": AnyCodable(thinking), "idempotencyKey": AnyCodable(idempotencyKey), "timeoutMs": AnyCodable(timeoutMs), ] + if let thinking = thinking?.trimmingCharacters(in: .whitespacesAndNewlines), + !thinking.isEmpty + { + params["thinking"] = AnyCodable(thinking) + } if !attachments.isEmpty { let encoded = attachments.map { att in diff --git a/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift b/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift index da89b9ee0980..4edfd4b27591 100644 --- a/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift +++ b/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift @@ -387,7 +387,7 @@ actor TalkModeRuntime { let response = try await GatewayConnection.shared.chatSend( sessionKey: sessionKey, message: prompt, - thinking: "low", + thinking: nil, idempotencyKey: runId, attachments: []) guard self.isCurrent(gen) else { return } diff --git a/apps/macos/Sources/OpenClaw/VoiceWakeForwarder.swift b/apps/macos/Sources/OpenClaw/VoiceWakeForwarder.swift index 962cfc838868..62b31304e052 100644 --- a/apps/macos/Sources/OpenClaw/VoiceWakeForwarder.swift +++ b/apps/macos/Sources/OpenClaw/VoiceWakeForwarder.swift @@ -34,7 +34,7 @@ enum VoiceWakeForwarder { struct ForwardOptions { var sessionKey: String = "main" - var thinking: String = "low" + var thinking: String? var deliver: Bool = true var to: String? var channel: GatewayAgentChannel = .webchat @@ -97,7 +97,6 @@ enum VoiceWakeForwarder { return ForwardOptions( sessionKey: sessionKey, - thinking: "low", deliver: true, to: to, channel: channel, diff --git a/apps/macos/Tests/OpenClawIPCTests/GatewayConnectionControlTests.swift b/apps/macos/Tests/OpenClawIPCTests/GatewayConnectionControlTests.swift index d4b18a42a8d0..0fdaf8a425e8 100644 --- a/apps/macos/Tests/OpenClawIPCTests/GatewayConnectionControlTests.swift +++ b/apps/macos/Tests/OpenClawIPCTests/GatewayConnectionControlTests.swift @@ -173,9 +173,57 @@ private func makeTestGatewayConnection() -> (GatewayConnection, FakeWebSocketSes let json = try JSONSerialization.jsonObject(with: payloadData) as? [String: Any] let params = json?["params"] as? [String: Any] + #expect(params?["thinking"] == nil) #expect(params?["voiceWakeTrigger"] as? String == "") } + @Test func `chat send omits thinking when inheriting session default`() async throws { + let recorder = WebSocketMessageRecorder() + let session = GatewayTestWebSocketSession(taskFactory: { + GatewayTestWebSocketTask(sendHook: { task, message, sendIndex in + recorder.append(message) + guard sendIndex > 0, + let data = Self.messageData(message), + let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any], + let id = json["id"] as? String + else { return } + task.emitReceiveSuccess(.data(Self.chatSendOkResponseData(id: id))) + }) + }) + let connection = GatewayConnection( + configProvider: { + (url: URL(string: "ws://127.0.0.1:1")!, token: nil, password: nil) + }, + sessionBox: WebSocketSessionBox(session: session)) + + _ = try await connection.chatSend( + sessionKey: "main", + message: "hello", + thinking: nil, + idempotencyKey: "chat-1", + attachments: []) + await connection.shutdown() + + guard let chatMessage = recorder.snapshot().reversed().first(where: { message in + guard let data = Self.messageData(message), + let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] + else { return false } + return json["method"] as? String == "chat.send" + }) else { + Issue.record("expected chat.send websocket payload") + return + } + + guard let payloadData = Self.messageData(chatMessage) else { + Issue.record("unexpected chat.send websocket message type") + return + } + + let json = try JSONSerialization.jsonObject(with: payloadData) as? [String: Any] + let params = json?["params"] as? [String: Any] + #expect(params?["thinking"] == nil) + } + private static func messageData(_ message: URLSessionWebSocketTask.Message) -> Data? { switch message { case let .string(text): @@ -186,4 +234,15 @@ private func makeTestGatewayConnection() -> (GatewayConnection, FakeWebSocketSes nil } } + + private static func chatSendOkResponseData(id: String) -> Data { + Data(""" + { + "type": "res", + "id": "\(id)", + "ok": true, + "payload": { "runId": "chat-1", "status": "ok" } + } + """.utf8) + } } diff --git a/apps/macos/Tests/OpenClawIPCTests/VoiceWakeForwarderTests.swift b/apps/macos/Tests/OpenClawIPCTests/VoiceWakeForwarderTests.swift index 8b5059d8bf84..8be786c8cf5d 100644 --- a/apps/macos/Tests/OpenClawIPCTests/VoiceWakeForwarderTests.swift +++ b/apps/macos/Tests/OpenClawIPCTests/VoiceWakeForwarderTests.swift @@ -14,7 +14,7 @@ import Testing @Test func `forward options defaults`() { let opts = VoiceWakeForwarder.ForwardOptions() #expect(opts.sessionKey == "main") - #expect(opts.thinking == "low") + #expect(opts.thinking == nil) #expect(opts.deliver == true) #expect(opts.to == nil) #expect(opts.channel == .webchat) @@ -38,6 +38,7 @@ import Testing #expect(opts.channel == .telegram) #expect(opts.to == "telegram:6812765697") #expect(opts.voiceWakeTrigger == "open claw") + #expect(opts.thinking == nil) #expect(opts.channel.shouldDeliver(opts.deliver) == true) } diff --git a/docs/cli/onboard.md b/docs/cli/onboard.md index 76b40f3aff21..b21a287aab45 100644 --- a/docs/cli/onboard.md +++ b/docs/cli/onboard.md @@ -93,6 +93,7 @@ openclaw onboard --non-interactive \ `--custom-api-key` is optional in non-interactive mode. If omitted, onboarding checks `CUSTOM_API_KEY`. OpenClaw marks common vision model IDs as image-capable automatically. Pass `--custom-image-input` for unknown custom vision IDs, or `--custom-text-input` to force text-only metadata. +Use `--custom-compatibility openai-responses` for OpenAI-compatible endpoints that support `/v1/responses` but not `/v1/chat/completions`. LM Studio also supports a provider-specific key flag in non-interactive mode: diff --git a/docs/start/wizard-cli-reference.md b/docs/start/wizard-cli-reference.md index 7b04eb98d9ec..5adce84f4215 100644 --- a/docs/start/wizard-cli-reference.md +++ b/docs/start/wizard-cli-reference.md @@ -219,7 +219,7 @@ What you set: - `--custom-model-id` - `--custom-api-key` (optional; falls back to `CUSTOM_API_KEY`) - `--custom-provider-id` (optional) - - `--custom-compatibility ` (optional; default `openai`) + - `--custom-compatibility ` (optional; default `openai`) - `--custom-image-input` / `--custom-text-input` (optional; override inferred model input capability) diff --git a/extensions/acpx/src/runtime.test.ts b/extensions/acpx/src/runtime.test.ts index 536e0c2f41b7..ca3b7518e476 100644 --- a/extensions/acpx/src/runtime.test.ts +++ b/extensions/acpx/src/runtime.test.ts @@ -215,6 +215,7 @@ describe("AcpxRuntime fresh reset wrapper", () => { agent: "codex", mode: "persistent", model: "gpt-5.4", + sessionOptions: { model: "gpt-5.4" }, }); }); @@ -619,7 +620,7 @@ describe("AcpxRuntime fresh reset wrapper", () => { ); }); - it("does not normalize model startup for non-Codex ACP agents", async () => { + it("passes model startup through sessionOptions for non-Codex ACP agents", async () => { const baseStore: TestSessionStore = { load: vi.fn(async () => undefined), save: vi.fn(async () => {}), @@ -648,6 +649,7 @@ describe("AcpxRuntime fresh reset wrapper", () => { agent: "main", mode: "persistent", model: "openai/gpt-5.5", + sessionOptions: { model: "openai/gpt-5.5" }, }); }); @@ -694,6 +696,7 @@ describe("AcpxRuntime fresh reset wrapper", () => { agent: "codex", mode: "persistent", model: "gpt-5.5", + sessionOptions: { model: "gpt-5.5" }, }); }); @@ -728,6 +731,7 @@ describe("AcpxRuntime fresh reset wrapper", () => { mode: "persistent", model: "gpt-5.4/xhigh", thinking: "x-high", + sessionOptions: { model: "gpt-5.4/xhigh" }, }); }); diff --git a/extensions/acpx/src/runtime.ts b/extensions/acpx/src/runtime.ts index d66828b3c2b0..349b6af46b9c 100644 --- a/extensions/acpx/src/runtime.ts +++ b/extensions/acpx/src/runtime.ts @@ -17,6 +17,7 @@ import { type AcpRuntimeStatus, type AcpRuntimeTurn, type AcpRuntimeTurnResult, + type SessionAgentOptions, } from "acpx/runtime"; import { parseStrictPositiveInteger } from "openclaw/plugin-sdk/number-runtime"; import { redactSensitiveText } from "openclaw/plugin-sdk/security-runtime"; @@ -49,6 +50,8 @@ type AcpxRuntimeTestOptions = Record & { openclawProcessCleanup?: AcpxProcessCleanupDeps; }; type OpenClawRuntimeTurnInput = Parameters>[0]; +type OpenClawRuntimeEnsureInput = Parameters[0]; +type AcpxDelegateEnsureInput = Parameters[0]; type ResetAwareSessionStore = AcpSessionStore & { markFresh: (sessionKey: string) => void; @@ -547,6 +550,16 @@ function codexAcpSessionModelId(override: CodexAcpModelOverride): string { : override.model; } +function withAcpxSessionOptions(input: OpenClawRuntimeEnsureInput): AcpxDelegateEnsureInput { + const existingOptions = (input as { sessionOptions?: SessionAgentOptions }).sessionOptions; + const model = input.model?.trim() || existingOptions?.model; + const sessionOptions = model ? { ...existingOptions, model } : existingOptions; + return { + ...input, + ...(sessionOptions ? { sessionOptions } : {}), + } as AcpxDelegateEnsureInput; +} + function quoteShellArg(value: string): string { if (/^[A-Za-z0-9_./:=@+-]+$/.test(value)) { return value; @@ -942,7 +955,7 @@ export class AcpxRuntime implements AcpRuntime { this.withCodexWrapperDiagnostics({ command: stableLaunchCommand, fallbackCode: "ACP_SESSION_INIT_FAILED", - run: () => delegate.ensureSession(input), + run: () => delegate.ensureSession(withAcpxSessionOptions(input)), }), }); } @@ -962,7 +975,7 @@ export class AcpxRuntime implements AcpRuntime { this.withCodexWrapperDiagnostics({ command: stableLaunchCommand, fallbackCode: "ACP_SESSION_INIT_FAILED", - run: () => delegate.ensureSession(normalizedInput), + run: () => delegate.ensureSession(withAcpxSessionOptions(normalizedInput)), }), ), }); diff --git a/extensions/codex/src/app-server/dynamic-tool-build.test.ts b/extensions/codex/src/app-server/dynamic-tool-build.test.ts index 206b131f3bb5..6a36829547b7 100644 --- a/extensions/codex/src/app-server/dynamic-tool-build.test.ts +++ b/extensions/codex/src/app-server/dynamic-tool-build.test.ts @@ -18,6 +18,8 @@ import { import { filterCodexDynamicTools, resolveCodexDynamicToolsLoading, + resolveCodexDynamicToolsLoadingForModel, + shouldUseDirectCodexDynamicToolsForModel, } from "./dynamic-tool-profile.js"; import { createCodexDynamicToolBridge } from "./dynamic-tools.js"; import { createCodexTestModel } from "./test-support.js"; @@ -179,6 +181,22 @@ describe("Codex app-server dynamic tool build", () => { expect(resolveCodexDynamicToolsLoading({}, privateQaCodexEnv)).toBe("direct"); }); + it("uses direct dynamic tools for OpenAI nano models without tool_search support", () => { + const tools = [createRuntimeDynamicTool("message"), createRuntimeDynamicTool("web_search")]; + const toolBridge = createCodexDynamicToolBridge({ + tools, + signal: new AbortController().signal, + loading: resolveCodexDynamicToolsLoadingForModel({}, "openai/gpt-5.4-nano"), + }); + + expect(shouldUseDirectCodexDynamicToolsForModel("gpt-5.4-nano")).toBe(true); + expect(resolveCodexDynamicToolsLoadingForModel({}, "gpt-5.4-nano")).toBe("direct"); + expect(resolveCodexDynamicToolsLoadingForModel({}, "gpt-5.5")).toBe("searchable"); + const webSearch = toolBridge.specs.find((tool) => tool.name === "web_search"); + expect(webSearch).not.toHaveProperty("deferLoading"); + expect(webSearch).not.toHaveProperty("namespace"); + }); + it("quarantines unreadable tool entries before Codex-specific filtering", async () => { const messageTool = createRuntimeDynamicTool("message"); const sourceTools = new Proxy([messageTool] as RuntimeDynamicToolForTest[], { diff --git a/extensions/codex/src/app-server/dynamic-tool-profile.ts b/extensions/codex/src/app-server/dynamic-tool-profile.ts index e6a2e30b95f4..dfb1e87e9fe4 100644 --- a/extensions/codex/src/app-server/dynamic-tool-profile.ts +++ b/extensions/codex/src/app-server/dynamic-tool-profile.ts @@ -47,6 +47,33 @@ export function resolveCodexDynamicToolsLoading( : (config.codexDynamicToolsLoading ?? "searchable"); } +function normalizeCodexModelId(modelId: string | undefined): string { + const normalized = modelId?.trim().toLowerCase(); + if (!normalized) { + return ""; + } + return normalized.includes("/") ? normalized.split("/").at(-1)! : normalized; +} + +export function shouldUseDirectCodexDynamicToolsForModel(modelId: string | undefined): boolean { + return shouldDisableCodexToolSearchForModel(modelId); +} + +export function shouldDisableCodexToolSearchForModel(modelId: string | undefined): boolean { + return normalizeCodexModelId(modelId) === "gpt-5.4-nano"; +} + +export function resolveCodexDynamicToolsLoadingForModel( + config: Pick, + modelId: string | undefined, + env: CodexDynamicToolProfileEnv = process.env, +): CodexDynamicToolsLoading { + const loading = resolveCodexDynamicToolsLoading(config, env); + return loading === "searchable" && shouldUseDirectCodexDynamicToolsForModel(modelId) + ? "direct" + : loading; +} + export function filterCodexDynamicTools( tools: T[], config: Pick, diff --git a/extensions/codex/src/app-server/event-projector.test.ts b/extensions/codex/src/app-server/event-projector.test.ts index 4bf004b1ed30..53f8a1c7dcc1 100644 --- a/extensions/codex/src/app-server/event-projector.test.ts +++ b/extensions/codex/src/app-server/event-projector.test.ts @@ -1652,6 +1652,81 @@ describe("CodexAppServerEventProjector", () => { }); }); + it("fails closed when a native tool call finishes without a matching result", async () => { + const trajectoryRecorder = { + filePath: "trajectory.jsonl", + recordEvent: vi.fn(), + flush: vi.fn(async () => undefined), + }; + const projector = await createProjector(await createParams(), { trajectoryRecorder }); + + await projector.handleNotification( + forCurrentTurn("item/started", { + item: { + type: "commandExecution", + id: "cmd-denied", + command: "node scripts/report.js --publish", + cwd: "/workspace", + processId: null, + source: "agent", + status: "inProgress", + commandActions: [], + aggregatedOutput: null, + exitCode: null, + durationMs: null, + }, + }), + ); + await projector.handleNotification( + turnCompleted([ + { + type: "agentMessage", + id: "msg-denied", + text: "The requested publish command was denied before execution.", + }, + ]), + ); + + const result = projector.buildResult(buildEmptyToolTelemetry()); + + expect(String(result.promptError)).toContain("without a matching tool.result"); + expect(result.promptErrorSource).toBe("prompt"); + expect(result.messagesSnapshot.map((message) => message.role)).toEqual([ + "user", + "assistant", + "toolResult", + "assistant", + ]); + const toolResultMessage = requireRecord(result.messagesSnapshot[2], "tool result message"); + expect(toolResultMessage.toolCallId).toBe("cmd-denied"); + expect(toolResultMessage.toolName).toBe("bash"); + expect(toolResultMessage.isError).toBe(true); + const toolResultContent = requireArray(toolResultMessage.content, "tool result content"); + expect(JSON.stringify(toolResultContent)).toContain("matching tool.result"); + expect(trajectoryRecorder.recordEvent).toHaveBeenCalledWith("tool.call", { + threadId: THREAD_ID, + turnId: TURN_ID, + itemId: "cmd-denied", + toolCallId: "cmd-denied", + name: "bash", + arguments: { + command: "node scripts/report.js --publish", + cwd: "/workspace", + }, + }); + expect(trajectoryRecorder.recordEvent).toHaveBeenCalledWith("tool.result", { + threadId: THREAD_ID, + turnId: TURN_ID, + itemId: "cmd-denied", + toolCallId: "cmd-denied", + name: "bash", + status: "failed", + isError: true, + result: { status: "failed", reason: "missing_tool_result" }, + output: expect.stringContaining("without a matching tool.result"), + }); + }); + it("uses streamed command output when final command snapshots omit aggregated output", async () => { const onAgentEvent = vi.fn(); const trajectoryRecorder = { diff --git a/extensions/codex/src/app-server/event-projector.ts b/extensions/codex/src/app-server/event-projector.ts index e73132bd953b..ae377c89f8ac 100644 --- a/extensions/codex/src/app-server/event-projector.ts +++ b/extensions/codex/src/app-server/event-projector.ts @@ -109,6 +109,8 @@ const CODEX_PROMPT_TOTAL_INPUT_KEYS = [ const MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM = 20; const TOOL_TRANSCRIPT_OUTPUT_MAX_CHARS = 12_000; +const MISSING_TOOL_RESULT_ERROR = + "OpenClaw recorded a native Codex tool.call without a matching tool.result before the turn completed."; const GENERATED_IMAGE_MEDIA_SUBDIR = "tool-image-generation"; const BYTES_PER_MB = 1024 * 1024; // Match OpenClaw's default image media cap for generated image tool outputs. @@ -172,6 +174,10 @@ export class CodexAppServerEventProjector { private readonly toolTranscriptMessages: AgentMessage[] = []; private readonly toolTranscriptCallIds = new Set(); private readonly toolTranscriptResultIds = new Set(); + private readonly toolTranscriptNamesById = new Map(); + private readonly toolTrajectoryCallIds = new Set(); + private readonly toolTrajectoryResultIds = new Set(); + private readonly toolTrajectoryNamesById = new Map(); private readonly transcriptToolProgressCallIds = new Set(); private lastNativeToolError: EmbeddedRunAttemptResult["lastToolError"]; private readonly nativeGeneratedMediaUrls = new Set(); @@ -185,6 +191,7 @@ export class CodexAppServerEventProjector { private completedTurn: CodexTurn | undefined; private promptError: unknown; private promptErrorSource: EmbeddedRunAttemptResult["promptErrorSource"] = null; + private synthesizedMissingToolResultError: string | null = null; private aborted = false; private tokenUsage: ReturnType; private guardianReviewCount = 0; @@ -285,6 +292,12 @@ export class CodexAppServerEventProjector { this.reasoningItemOrder, ).join("\n\n"); const planText = collectTextValues(this.planTextByItem).join("\n\n"); + this.synthesizeMissingToolResults({ + failClosed: + !this.completedTurn || + this.completedTurn.status !== "completed" || + assistantTexts.length > 0, + }); const lastAssistant = assistantTexts.length > 0 ? this.createAssistantMessage(assistantTexts.join("\n\n")) @@ -328,6 +341,7 @@ export class CodexAppServerEventProjector { const turnFailed = this.completedTurn?.status === "failed"; const promptError = this.promptError ?? + this.synthesizedMissingToolResultError ?? (turnFailed ? (this.completedTurn?.error?.message ?? "codex app-server turn failed") : null); const agentHarnessResultClassification = classifyAgentHarnessTerminalOutcome({ assistantTexts, @@ -1125,6 +1139,8 @@ export class CodexAppServerEventProjector { status: ReturnType; }): void { if (params.phase === "start") { + this.toolTrajectoryCallIds.add(params.item.id); + this.toolTrajectoryNamesById.set(params.item.id, params.name); this.options.trajectoryRecorder?.recordEvent("tool.call", { threadId: this.threadId, turnId: this.turnId, @@ -1135,6 +1151,7 @@ export class CodexAppServerEventProjector { }); return; } + this.toolTrajectoryResultIds.add(params.item.id); const toolResult = itemToolResult(params.item).result; const output = itemOutputText(params.item, this.toolResultOutputTextByItem); this.options.trajectoryRecorder?.recordEvent("tool.result", { @@ -1396,6 +1413,7 @@ export class CodexAppServerEventProjector { return; } this.toolTranscriptCallIds.add(params.id); + this.toolTranscriptNamesById.set(params.id, params.name); this.toolTranscriptArgumentsById.set(params.id, params.arguments); if (!shouldEmitTranscriptToolProgress(params.name, params.arguments)) { this.transcriptToolProgressSuppressedIds.add(params.id); @@ -1425,6 +1443,61 @@ export class CodexAppServerEventProjector { ); } + private synthesizeMissingToolResults(params: { failClosed: boolean }): void { + if (!params.failClosed) { + return; + } + const missingTranscriptIds = [...this.toolTranscriptCallIds].filter( + (id) => !this.toolTranscriptResultIds.has(id), + ); + const missingTrajectoryIds = [...this.toolTrajectoryCallIds].filter( + (id) => !this.toolTrajectoryResultIds.has(id), + ); + if (missingTranscriptIds.length === 0 && missingTrajectoryIds.length === 0) { + return; + } + + for (const id of missingTranscriptIds) { + const name = this.toolTranscriptNamesById.get(id) ?? this.toolTrajectoryNamesById.get(id); + if (!name) { + continue; + } + this.recordToolTranscriptResult({ + id, + name, + text: formatMissingToolResultError({ id, name }), + isError: true, + }); + } + + for (const id of missingTrajectoryIds) { + const name = this.toolTrajectoryNamesById.get(id) ?? this.toolTranscriptNamesById.get(id); + if (!name) { + continue; + } + this.toolTrajectoryResultIds.add(id); + const text = formatMissingToolResultError({ id, name }); + this.options.trajectoryRecorder?.recordEvent("tool.result", { + threadId: this.threadId, + turnId: this.turnId, + itemId: id, + toolCallId: id, + name, + status: "failed", + isError: true, + result: { status: "failed", reason: "missing_tool_result" }, + output: text, + }); + } + + const missingCount = new Set([...missingTranscriptIds, ...missingTrajectoryIds]).size; + this.synthesizedMissingToolResultError = + missingCount === 1 + ? MISSING_TOOL_RESULT_ERROR + : `${MISSING_TOOL_RESULT_ERROR} missingToolResultCount=${missingCount}`; + this.promptErrorSource = this.promptErrorSource ?? "prompt"; + } + private emitTranscriptToolCallProgress(params: ToolTranscriptCallInput): void { if (!shouldEmitTranscriptToolProgress(params.name, params.arguments)) { return; @@ -1954,6 +2027,10 @@ function itemStatus(item: CodexThreadItem): "completed" | "failed" | "running" | return "completed"; } +function formatMissingToolResultError(params: { id: string; name: string }): string { + return `${MISSING_TOOL_RESULT_ERROR} toolCallId=${params.id}; toolName=${params.name}`; +} + function isNonSuccessItemStatus(status: ReturnType): boolean { return status === "failed" || status === "blocked"; } diff --git a/extensions/codex/src/app-server/run-attempt.ts b/extensions/codex/src/app-server/run-attempt.ts index bc63d4fde833..6061a71530f4 100644 --- a/extensions/codex/src/app-server/run-attempt.ts +++ b/extensions/codex/src/app-server/run-attempt.ts @@ -165,7 +165,7 @@ import { } from "./dynamic-tool-execution.js"; import { filterCodexDynamicTools, - resolveCodexDynamicToolsLoading, + resolveCodexDynamicToolsLoadingForModel, } from "./dynamic-tool-profile.js"; import { createCodexDynamicToolBridge } from "./dynamic-tools.js"; import { handleCodexAppServerElicitationRequest } from "./elicitation-bridge.js"; @@ -595,7 +595,7 @@ export async function runCodexAppServerAttempt( tools, registeredTools, signal: runAbortController.signal, - loading: resolveCodexDynamicToolsLoading(pluginConfig), + loading: resolveCodexDynamicToolsLoadingForModel(pluginConfig, params.modelId), directToolNames: shouldForceMessageTool(params) ? ["message"] : [], hookContext: { agentId: sessionAgentId, @@ -2640,7 +2640,7 @@ export const testing = { buildDynamicTools, filterCodexDynamicToolsForAllowlist, includeForcedCodexDynamicToolAllow, - resolveCodexDynamicToolsLoading, + resolveCodexDynamicToolsLoadingForModel, resolveCodexAppServerHookChannelId, buildCodexAppServerPromptTimeoutOutcome, resolveOpenClawCodingToolsSessionKeys, diff --git a/extensions/codex/src/app-server/session-binding.ts b/extensions/codex/src/app-server/session-binding.ts index c67b2c8342f9..80401ca007e6 100644 --- a/extensions/codex/src/app-server/session-binding.ts +++ b/extensions/codex/src/app-server/session-binding.ts @@ -40,6 +40,7 @@ export type CodexAppServerThreadBinding = { sandbox?: CodexAppServerSandboxMode; serviceTier?: CodexServiceTier; dynamicToolsFingerprint?: string; + dynamicToolsContainDeferred?: boolean; userMcpServersFingerprint?: string; mcpServersFingerprint?: string; nativeHookRelayGeneration?: string; @@ -111,6 +112,10 @@ export async function readCodexAppServerBinding( typeof parsed.dynamicToolsFingerprint === "string" ? parsed.dynamicToolsFingerprint : undefined, + dynamicToolsContainDeferred: + typeof parsed.dynamicToolsContainDeferred === "boolean" + ? parsed.dynamicToolsContainDeferred + : undefined, userMcpServersFingerprint: typeof parsed.userMcpServersFingerprint === "string" ? parsed.userMcpServersFingerprint @@ -170,6 +175,7 @@ export async function writeCodexAppServerBinding( sandbox: binding.sandbox, serviceTier: binding.serviceTier, dynamicToolsFingerprint: binding.dynamicToolsFingerprint, + dynamicToolsContainDeferred: binding.dynamicToolsContainDeferred, userMcpServersFingerprint: binding.userMcpServersFingerprint, mcpServersFingerprint: binding.mcpServersFingerprint, nativeHookRelayGeneration: binding.nativeHookRelayGeneration, diff --git a/extensions/codex/src/app-server/thread-lifecycle.binding.test.ts b/extensions/codex/src/app-server/thread-lifecycle.binding.test.ts index 9dad32225132..1feb44eb5cba 100644 --- a/extensions/codex/src/app-server/thread-lifecycle.binding.test.ts +++ b/extensions/codex/src/app-server/thread-lifecycle.binding.test.ts @@ -63,6 +63,16 @@ function createNamedDynamicTool( }; } +function createDeferredNamedDynamicTool( + name: string, +): Parameters[0]["dynamicTools"][number] { + return { + ...createNamedDynamicTool(name), + namespace: "openclaw", + deferLoading: true, + }; +} + function createPluginAppConfigPatch() { return { apps: { @@ -243,6 +253,42 @@ describe("Codex app-server thread lifecycle bindings", () => { expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start", "thread/resume"]); }); + it("starts a fresh Codex thread when dynamic tools switch from deferred to direct", async () => { + const sessionFile = path.join(tempDir, "session.jsonl"); + const workspaceDir = path.join(tempDir, "workspace"); + const params = createParams(sessionFile, workspaceDir); + const appServer = createThreadLifecycleAppServerOptions(); + let starts = 0; + const request = vi.fn(async (method: string) => { + if (method === "thread/start") { + starts += 1; + return threadStartResult(`thread-${starts}`); + } + if (method === "thread/resume") { + return threadStartResult("thread-existing"); + } + throw new Error(`unexpected method: ${method}`); + }); + + await startOrResumeThread({ + client: { request } as never, + params, + cwd: workspaceDir, + dynamicTools: [createDeferredNamedDynamicTool("web_search")], + appServer, + }); + const binding = await startOrResumeThread({ + client: { request } as never, + params, + cwd: workspaceDir, + dynamicTools: [createNamedDynamicTool("web_search")], + appServer, + }); + + expect(binding.threadId).toBe("thread-2"); + expect(request.mock.calls.map(([method]) => method)).toEqual(["thread/start", "thread/start"]); + }); + it("resumes a bound Codex thread when dynamic tools are reordered", async () => { const sessionFile = path.join(tempDir, "session.jsonl"); const workspaceDir = path.join(tempDir, "workspace"); @@ -489,7 +535,7 @@ describe("Codex app-server thread lifecycle bindings", () => { client: { request } as never, params, cwd: workspaceDir, - dynamicTools: [createMessageDynamicTool("Send and manage messages.")], + dynamicTools: [createDeferredNamedDynamicTool("message")], appServer, }); const fingerprint = (await readCodexAppServerBinding(sessionFile))?.dynamicToolsFingerprint; @@ -504,12 +550,13 @@ describe("Codex app-server thread lifecycle bindings", () => { client: { request } as never, params, cwd: workspaceDir, - dynamicTools: [createMessageDynamicTool("Send and manage messages.")], + dynamicTools: [createDeferredNamedDynamicTool("message")], appServer, }); const binding = await readCodexAppServerBinding(sessionFile); expect(binding?.dynamicToolsFingerprint).toBe(fingerprint); + expect(binding?.dynamicToolsContainDeferred).toBe(true); expect(binding?.threadId).toBe("thread-1"); expect(request.mock.calls.map(([method]) => method)).toEqual([ "thread/start", diff --git a/extensions/codex/src/app-server/thread-lifecycle.test.ts b/extensions/codex/src/app-server/thread-lifecycle.test.ts index 6cc92b7bb7a7..008939e6bd9b 100644 --- a/extensions/codex/src/app-server/thread-lifecycle.test.ts +++ b/extensions/codex/src/app-server/thread-lifecycle.test.ts @@ -21,6 +21,7 @@ function createAttemptParams(params: { bootstrapContextMode?: "full" | "lightweight"; bootstrapContextRunKind?: "default" | "heartbeat" | "cron"; images?: EmbeddedRunAttemptParams["images"]; + modelId?: string; }): EmbeddedRunAttemptParams { const authProfileProviders = params.authProfileProviders ?? @@ -30,7 +31,7 @@ function createAttemptParams(params: { const authProfileType = params.authProfileType ?? "oauth"; return { provider: params.provider, - modelId: "gpt-5.4", + modelId: params.modelId ?? "gpt-5.4", prompt: "test prompt", authProfileId: params.authProfileId, ...(params.bootstrapContextMode ? { bootstrapContextMode: params.bootstrapContextMode } : {}), @@ -151,7 +152,7 @@ describe("Codex app-server native code mode config", () => { expect(instructions).not.toContain("Deferred searchable OpenClaw dynamic tools available"); }); - it("keeps durable dynamic tool fingerprints independent from presentation mode", () => { + it("keeps durable dynamic tool fingerprints scoped to loading mode", () => { const inputSchema = { type: "object", additionalProperties: false, @@ -177,7 +178,7 @@ describe("Codex app-server native code mode config", () => { }, ]); - expect(searchableFingerprint).toBe(directFingerprint); + expect(searchableFingerprint).not.toBe(directFingerprint); }); it("keeps OpenClaw skill catalogs out of developer instructions", () => { @@ -214,6 +215,25 @@ describe("Codex app-server native code mode config", () => { expect(request.personality).toBe("none"); }); + it("disables Codex tool-search features for nano models", () => { + const request = buildThreadStartParams( + createAttemptParams({ provider: "openai", modelId: "gpt-5.4-nano" }), + { + cwd: "/repo", + dynamicTools: [], + appServer: createAppServerOptions() as never, + developerInstructions: "test instructions", + }, + ); + + expect(request.config).toEqual({ + "features.code_mode": true, + "features.code_mode_only": false, + "features.apply_patch_streaming_events": true, + "features.multi_agent": false, + }); + }); + it("removes Codex model personality on thread/resume", () => { const request = buildThreadResumeParams(createAttemptParams({ provider: "openai" }), { threadId: "thread-1", diff --git a/extensions/codex/src/app-server/thread-lifecycle.ts b/extensions/codex/src/app-server/thread-lifecycle.ts index c8fa957cf65c..c41c2d013592 100644 --- a/extensions/codex/src/app-server/thread-lifecycle.ts +++ b/extensions/codex/src/app-server/thread-lifecycle.ts @@ -20,6 +20,7 @@ import { resolveCodexContextEngineProjectionMaxChars, resolveCodexContextEngineProjectionReserveTokens, } from "./context-engine-projection.js"; +import { shouldDisableCodexToolSearchForModel } from "./dynamic-tool-profile.js"; import { invalidInlineImageText, sanitizeInlineImageDataUrl } from "./image-payload-sanitizer.js"; import { isCodexPluginThreadBindingStale, @@ -114,6 +115,10 @@ const CODEX_LIGHTWEIGHT_CONTEXT_THREAD_CONFIG: JsonObject = { project_doc_max_bytes: 0, }; +const CODEX_TOOL_SEARCH_UNSUPPORTED_THREAD_CONFIG: JsonObject = { + "features.multi_agent": false, +}; + type CodexThreadLifecycleTimingSpan = { name: string; durationMs: number; @@ -253,6 +258,9 @@ export async function startOrResumeThread(params: { const dynamicToolsFingerprint = lifecycleTiming.measureSync("fingerprint_dynamic_tools", () => fingerprintDynamicTools(params.dynamicTools), ); + const dynamicToolsContainDeferred = params.dynamicTools.some( + (tool) => tool.deferLoading === true, + ); const contextEngineBinding = lifecycleTiming.measureSync("context_engine_binding", () => buildContextEngineBinding(params.params, params.contextEngineProjection), ); @@ -404,6 +412,23 @@ export async function startOrResumeThread(params: { await clearCodexAppServerBinding(params.params.sessionFile); binding = undefined; } + if (binding?.threadId) { + if ( + binding.dynamicToolsFingerprint && + params.dynamicTools.length > 0 && + binding.dynamicToolsContainDeferred !== dynamicToolsContainDeferred && + (binding.dynamicToolsContainDeferred !== undefined || !dynamicToolsContainDeferred) + ) { + embeddedAgentLog.debug( + "codex app-server dynamic tool loading changed; starting a new thread", + { + threadId: binding.threadId, + }, + ); + await clearCodexAppServerBinding(params.params.sessionFile); + binding = undefined; + } + } if (binding?.threadId) { // `/codex resume ` writes a binding before the next turn can know // the dynamic tool catalog, so only invalidate fingerprints we actually have. @@ -489,6 +514,7 @@ export async function startOrResumeThread(params: { model: params.params.modelId, modelProvider: response.modelProvider ?? fallbackModelProvider, dynamicToolsFingerprint, + dynamicToolsContainDeferred, userMcpServersFingerprint, mcpServersFingerprint: nextMcpServersFingerprint, nativeHookRelayGeneration: @@ -533,6 +559,7 @@ export async function startOrResumeThread(params: { model: params.params.modelId, modelProvider: response.modelProvider ?? fallbackModelProvider, dynamicToolsFingerprint, + dynamicToolsContainDeferred, userMcpServersFingerprint, mcpServersFingerprint: nextMcpServersFingerprint, nativeHookRelayGeneration: @@ -619,6 +646,7 @@ export async function startOrResumeThread(params: { model: response.model ?? params.params.modelId, modelProvider: response.modelProvider ?? modelProvider, dynamicToolsFingerprint, + dynamicToolsContainDeferred, userMcpServersFingerprint, mcpServersFingerprint: nextMcpServersFingerprint, nativeHookRelayGeneration: finalConfigPatch.nativeHookRelayGeneration, @@ -664,6 +692,7 @@ export async function startOrResumeThread(params: { model: response.model ?? params.params.modelId, modelProvider: response.modelProvider ?? modelProvider, dynamicToolsFingerprint, + dynamicToolsContainDeferred, userMcpServersFingerprint, mcpServersFingerprint: nextMcpServersFingerprint, nativeHookRelayGeneration: finalConfigPatch.nativeHookRelayGeneration, @@ -924,7 +953,14 @@ function buildCodexRuntimeThreadConfigForRun( config: JsonObject | undefined, options: { nativeCodeModeEnabled?: boolean; nativeCodeModeOnlyEnabled?: boolean } = {}, ): JsonObject { - const runtimeConfig = buildCodexRuntimeThreadConfig(config, options); + const baseConfig = buildCodexRuntimeThreadConfig(config, options); + const runtimeConfig = + mergeCodexThreadConfigs( + baseConfig, + shouldDisableCodexToolSearchForModel(params.modelId) + ? CODEX_TOOL_SEARCH_UNSUPPORTED_THREAD_CONFIG + : undefined, + ) ?? baseConfig; if (params.bootstrapContextMode !== "lightweight") { return runtimeConfig; } @@ -1114,9 +1150,7 @@ function fingerprintDynamicToolSpec(tool: JsonValue): JsonValue { for (const [key, child] of Object.entries(tool).toSorted(([left], [right]) => left.localeCompare(right), )) { - // Tool-search presentation can change per turn without changing the - // durable app-server execution contract for an existing thread. - if (key === "description" || key === "deferLoading" || key === "namespace") { + if (key === "description") { continue; } stable[key] = stabilizeJsonValue(child); diff --git a/extensions/google/api.test.ts b/extensions/google/api.test.ts index 8af508dd223b..8019f72aa90f 100644 --- a/extensions/google/api.test.ts +++ b/extensions/google/api.test.ts @@ -1,6 +1,8 @@ import { describe, expect, it } from "vitest"; import { isGoogleGenerativeAiApi, + isGoogleVertexBaseUrl, + isGoogleVertexHostname, normalizeGoogleApiBaseUrl, normalizeGoogleGenerativeAiBaseUrl, normalizeGoogleProviderConfig, @@ -83,6 +85,23 @@ describe("google generative ai helpers", () => { models: [{ api: "openai-completions" }], }), ).toBe(false); + expect( + shouldNormalizeGoogleGenerativeAiProviderConfig("google-vertex", { + baseUrl: "https://aiplatform.googleapis.com", + }), + ).toBe(false); + }); + + it("detects native Google Vertex hosts by hostname only", () => { + expect(isGoogleVertexHostname("aiplatform.googleapis.com")).toBe(true); + expect(isGoogleVertexHostname("us-central1-aiplatform.googleapis.com")).toBe(true); + expect(isGoogleVertexHostname("generativelanguage.googleapis.com")).toBe(false); + expect(isGoogleVertexHostname("evil-aiplatform.googleapis.com.attacker.com")).toBe(false); + expect( + isGoogleVertexBaseUrl( + "https://generativelanguage.googleapis.com/v1beta/proxy/aiplatform.googleapis.com", + ), + ).toBe(false); }); it("normalizes transport baseUrls only for Google Generative AI", () => { @@ -114,6 +133,28 @@ describe("google generative ai helpers", () => { api: "openai-completions", baseUrl: "https://generativelanguage.googleapis.com", }); + expect( + resolveGoogleGenerativeAiTransport({ + provider: "google-vertex", + api: undefined, + baseUrl: "https://us-central1-aiplatform.googleapis.com", + }), + ).toEqual({ + api: "google-vertex", + baseUrl: "https://us-central1-aiplatform.googleapis.com", + }); + expect( + resolveGoogleGenerativeAiTransport({ + provider: "google-vertex", + api: "openai-completions", + baseUrl: + "https://aiplatform.googleapis.com/v1/projects/test/locations/us-central1/endpoints/openapi", + }), + ).toEqual({ + api: "openai-completions", + baseUrl: + "https://aiplatform.googleapis.com/v1/projects/test/locations/us-central1/endpoints/openapi", + }); }); it("normalizes google-vertex model ids without rewriting the OpenAI-compatible baseUrl", () => { diff --git a/extensions/google/api.ts b/extensions/google/api.ts index 81fa6fce0045..626c9567d84a 100644 --- a/extensions/google/api.ts +++ b/extensions/google/api.ts @@ -30,6 +30,8 @@ export { export { DEFAULT_GOOGLE_API_BASE_URL, isGoogleGenerativeAiApi, + isGoogleVertexBaseUrl, + isGoogleVertexHostname, normalizeGoogleApiBaseUrl, normalizeGoogleGenerativeAiBaseUrl, normalizeGoogleProviderConfig, diff --git a/extensions/google/model-id.test.ts b/extensions/google/model-id.test.ts index a3d4b3e7f9eb..ea92aa56f180 100644 --- a/extensions/google/model-id.test.ts +++ b/extensions/google/model-id.test.ts @@ -40,4 +40,9 @@ describe("google model id helpers", () => { expect(normalizeGoogleModelId("gemini-3.1-flash-lite")).toBe("gemini-3.1-flash-lite"); expect(normalizeGoogleModelId("gemini-3.1-flash-lite-preview")).toBe("gemini-3.1-flash-lite"); }); + + it("maps the old Gemma 4 26B shorthand to Google's canonical API id", () => { + expect(normalizeGoogleModelId("gemma-4-26b")).toBe("gemma-4-26b-a4b-it"); + expect(normalizeGoogleModelId("google/gemma-4-26b")).toBe("google/gemma-4-26b-a4b-it"); + }); }); diff --git a/extensions/google/model-id.ts b/extensions/google/model-id.ts index 8a0037722d39..e4f9007acc16 100644 --- a/extensions/google/model-id.ts +++ b/extensions/google/model-id.ts @@ -27,6 +27,9 @@ export function normalizeGoogleModelId(id: string): string { if (id === "gemini-3.1-flash" || id === "gemini-3.1-flash-preview") { return "gemini-3-flash-preview"; } + if (id === "gemma-4-26b") { + return "gemma-4-26b-a4b-it"; + } return id; } diff --git a/extensions/google/provider-models.test.ts b/extensions/google/provider-models.test.ts index b039e639f46b..8198e7d7eeee 100644 --- a/extensions/google/provider-models.test.ts +++ b/extensions/google/provider-models.test.ts @@ -494,6 +494,24 @@ describe("resolveGoogleGeminiForwardCompatModel", () => { }); }); + it("canonicalizes Gemma 4 26B shorthand before cloning templates", () => { + const model = resolveGoogleGeminiForwardCompatModel({ + providerId: "google", + ctx: createContext({ + provider: "google", + modelId: "gemma-4-26b", + models: [createTemplateModel("google", "gemini-3-flash-preview", { reasoning: false })], + }), + }); + + expectModelFields(model, { + provider: "google", + id: "gemma-4-26b-a4b-it", + api: "google-generative-ai", + reasoning: true, + }); + }); + it("preserves template reasoning for non-Gemma 4 gemma models", () => { const model = resolveGoogleGeminiForwardCompatModel({ providerId: "google", diff --git a/extensions/google/provider-models.ts b/extensions/google/provider-models.ts index d9158d005a19..45b967e4267d 100644 --- a/extensions/google/provider-models.ts +++ b/extensions/google/provider-models.ts @@ -4,6 +4,7 @@ import type { } from "openclaw/plugin-sdk/plugin-entry"; import { cloneFirstTemplateModel } from "openclaw/plugin-sdk/provider-model-shared"; import { normalizeOptionalLowercaseString } from "openclaw/plugin-sdk/string-coerce-runtime"; +import { normalizeGoogleModelId } from "./model-id.js"; const GOOGLE_GEMINI_CLI_PROVIDER_ID = "google-gemini-cli"; const GOOGLE_ANTIGRAVITY_PROVIDER_ID = "google-antigravity"; @@ -41,6 +42,9 @@ function normalizeGeminiProRequestId(id: string): string { if (id === "gemini-3-pro" || id === "gemini-3-pro-preview" || id === "gemini-3.1-pro") { return "gemini-3.1-pro-preview"; } + if (id === "gemma-4-26b") { + return normalizeGoogleModelId(id); + } return id; } diff --git a/extensions/google/provider-policy.ts b/extensions/google/provider-policy.ts index 97993e887a8c..3d92fb41061c 100644 --- a/extensions/google/provider-policy.ts +++ b/extensions/google/provider-policy.ts @@ -12,6 +12,7 @@ type GoogleApiCarrier = { }; type GoogleProviderConfigLike = GoogleApiCarrier & { + baseUrl?: string | null; models?: ReadonlyArray | null; }; @@ -37,6 +38,28 @@ function stripUrlUserInfo(url: URL): void { url.password = ""; } +const GOOGLE_VERTEX_HOST = "aiplatform.googleapis.com"; +const GOOGLE_VERTEX_REGION_HOST_SUFFIX = "-aiplatform.googleapis.com"; + +export function isGoogleVertexHostname(hostname: string): boolean { + const normalized = hostname.toLowerCase(); + return ( + normalized === GOOGLE_VERTEX_HOST || normalized.endsWith(GOOGLE_VERTEX_REGION_HOST_SUFFIX) + ); +} + +export function isGoogleVertexBaseUrl(baseUrl?: string | null): boolean { + const raw = normalizeOptionalString(baseUrl); + if (!raw) { + return false; + } + try { + return isGoogleVertexHostname(new URL(raw).hostname); + } catch { + return false; + } +} + export function normalizeGoogleApiBaseUrl(baseUrl?: string): string { const raw = trimTrailingSlashes(normalizeOptionalString(baseUrl) || DEFAULT_GOOGLE_API_BASE_URL); try { @@ -85,9 +108,12 @@ export function resolveGoogleGenerativeAiTransport ({ + createGenerativeAi: vi.fn(() => vi.fn()), + createVertex: vi.fn(() => vi.fn()), +})); + +vi.mock("./transport-stream.js", () => ({ + createGoogleGenerativeAiTransportStreamFn: streamFns.createGenerativeAi, + createGoogleVertexTransportStreamFn: streamFns.createVertex, +})); + +function model(overrides: Partial = {}): Model { + return { + id: "gemini-2.5-flash", + name: "Gemini 2.5 Flash", + provider: "google-vertex", + api: "google-generative-ai", + baseUrl: "https://aiplatform.googleapis.com", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1_048_576, + maxTokens: 65_536, + ...overrides, + } as Model; +} + +describe("buildGoogleProvider createStreamFn", () => { + beforeEach(() => { + streamFns.createGenerativeAi.mockClear(); + streamFns.createVertex.mockClear(); + }); + + it("routes native Vertex hosts through the Vertex transport", () => { + const provider = buildGoogleProvider(); + + provider.createStreamFn?.({ + provider: "google-vertex", + modelId: "gemini-2.5-flash", + model: model(), + } as never); + + expect(streamFns.createVertex).toHaveBeenCalledTimes(1); + expect(streamFns.createGenerativeAi).not.toHaveBeenCalled(); + }); + + it("preserves explicit OpenAI-compatible Vertex endpoint configs", () => { + const provider = buildGoogleProvider(); + + const result = provider.createStreamFn?.({ + provider: "google-vertex", + modelId: "gemini-2.5-flash", + model: model({ + api: "openai-completions", + baseUrl: + "https://aiplatform.googleapis.com/v1/projects/test/locations/us-central1/endpoints/openapi", + }), + } as never); + + expect(result).toBeUndefined(); + expect(streamFns.createVertex).not.toHaveBeenCalled(); + expect(streamFns.createGenerativeAi).not.toHaveBeenCalled(); + }); +}); diff --git a/extensions/google/provider-registration.ts b/extensions/google/provider-registration.ts index 40cce9256b65..a14164501bb6 100644 --- a/extensions/google/provider-registration.ts +++ b/extensions/google/provider-registration.ts @@ -10,6 +10,7 @@ import { import { GOOGLE_GEMINI_PROVIDER_HOOKS } from "./provider-hooks.js"; import { isModernGoogleModel, resolveGoogleGeminiForwardCompatModel } from "./provider-models.js"; import { + isGoogleVertexBaseUrl, normalizeGoogleProviderConfig, resolveGoogleGenerativeAiTransport, } from "./provider-policy.js"; @@ -67,12 +68,16 @@ export function buildGoogleProvider(): ProviderPlugin { ctx, }), createStreamFn: ({ model }) => { + if ( + model.api === "google-vertex" || + (model.api === "google-generative-ai" && + (model.provider === "google-vertex" || isGoogleVertexBaseUrl(model.baseUrl))) + ) { + return createGoogleVertexTransportStreamFn(); + } if (model.api === "google-generative-ai") { return createGoogleGenerativeAiTransportStreamFn(); } - if (model.api === "google-vertex") { - return createGoogleVertexTransportStreamFn(); - } return undefined; }, ...GOOGLE_GEMINI_PROVIDER_HOOKS, diff --git a/packages/model-catalog-core/src/provider-model-id-normalization.test.ts b/packages/model-catalog-core/src/provider-model-id-normalization.test.ts index 6a0a1b390394..5a8a904e716d 100644 --- a/packages/model-catalog-core/src/provider-model-id-normalization.test.ts +++ b/packages/model-catalog-core/src/provider-model-id-normalization.test.ts @@ -34,6 +34,9 @@ describe("provider model id policy normalization", () => { "openrouter/google/gemini-3-pro-preview", ), ).toBe("openrouter/google/gemini-3.1-pro-preview"); + expect( + normalizeConfiguredProviderCatalogModelId("openrouter", "openrouter/google/gemma-4-26b"), + ).toBe("openrouter/google/gemma-4-26b-a4b-it"); }); it("normalizes native Anthropic catalog refs without retaining the provider prefix", () => { diff --git a/packages/model-catalog-core/src/provider-model-id-normalize.test.ts b/packages/model-catalog-core/src/provider-model-id-normalize.test.ts index 956fcc069607..2f409ea2fed3 100644 --- a/packages/model-catalog-core/src/provider-model-id-normalize.test.ts +++ b/packages/model-catalog-core/src/provider-model-id-normalize.test.ts @@ -31,4 +31,9 @@ describe("provider model id normalization", () => { it("does not rewrite stable GA flash-lite", () => { expect(normalizeGooglePreviewModelId("gemini-3.1-flash-lite")).toBe("gemini-3.1-flash-lite"); }); + + it("routes Gemma 4 26B shorthand to Google's canonical API id", () => { + expect(normalizeGooglePreviewModelId("gemma-4-26b")).toBe("gemma-4-26b-a4b-it"); + expect(normalizeGooglePreviewModelId("google/gemma-4-26b")).toBe("google/gemma-4-26b-a4b-it"); + }); }); diff --git a/packages/model-catalog-core/src/provider-model-id-normalize.ts b/packages/model-catalog-core/src/provider-model-id-normalize.ts index 9259d9723055..acc21d30ce18 100644 --- a/packages/model-catalog-core/src/provider-model-id-normalize.ts +++ b/packages/model-catalog-core/src/provider-model-id-normalize.ts @@ -25,6 +25,9 @@ export function normalizeGooglePreviewModelId(id: string): string { if (id === "gemini-3.1-flash" || id === "gemini-3.1-flash-preview") { return "gemini-3-flash-preview"; } + if (id === "gemma-4-26b") { + return "gemma-4-26b-a4b-it"; + } return id; } diff --git a/src/agents/acp-spawn.test.ts b/src/agents/acp-spawn.test.ts index 6660a065a3d1..6e806e880951 100644 --- a/src/agents/acp-spawn.test.ts +++ b/src/agents/acp-spawn.test.ts @@ -1033,7 +1033,7 @@ describe("spawnAcpDirect", () => { }); }); - it("does not treat a configured runtime=acp agent primary model as an ACP startup model", async () => { + it("uses configured runtime=acp agent primary model as an ACP startup model", async () => { replaceSpawnConfig({ ...createDefaultSpawnConfig(), agents: { @@ -1067,8 +1067,13 @@ describe("spawnAcpDirect", () => { ); expectAcceptedSpawn(result); - const initInput = expectInitializeSessionFields({ agent: "codex" }); - expect(initInput.runtimeOptions).toBeUndefined(); + expectInitializeSessionFields({ + agent: "codex", + runtimeOptions: { + model: "anthropic/claude-sonnet-4-6", + thinking: "adaptive", + }, + }); }); it("applies ACP spawn run timeout to runtime options and dispatch", async () => { diff --git a/src/agents/acp-spawn.ts b/src/agents/acp-spawn.ts index e1b319c162d8..8249aaad5ef1 100644 --- a/src/agents/acp-spawn.ts +++ b/src/agents/acp-spawn.ts @@ -1008,7 +1008,6 @@ function resolveAcpSpawnRuntimeOptions(params: { cfg: params.cfg, agentId: policyAgentId, modelOverride: params.model, - includeAgentPrimary: false, }); const targetAgentConfig = resolveAgentConfig(params.cfg, policyAgentId); const thinkingPlan = resolveSubagentThinkingOverride({ diff --git a/src/agents/agent-command.live-model-switch.test.ts b/src/agents/agent-command.live-model-switch.test.ts index bbe92f07469b..4d7995c31127 100644 --- a/src/agents/agent-command.live-model-switch.test.ts +++ b/src/agents/agent-command.live-model-switch.test.ts @@ -45,6 +45,7 @@ const state = vi.hoisted(() => ({ persistSessionEntryMock: vi.fn(async (..._args: unknown[]): Promise => undefined), clearSessionAuthProfileOverrideMock: vi.fn(), isThinkingLevelSupportedMock: vi.fn((_args: unknown) => true), + resolveSupportedThinkingLevelMock: vi.fn(({ level }: { level?: string }) => level), resolveThinkingDefaultMock: vi.fn((_args: unknown) => "low"), loadManifestModelCatalogMock: vi.fn(() => []), buildWorkspaceSkillSnapshotMock: vi.fn((..._args: unknown[]): unknown => ({ @@ -56,7 +57,7 @@ const state = vi.hoisted(() => ({ prepareInternalSessionEffectsTranscriptMock: vi.fn(), removeInternalSessionEffectsTranscriptMock: vi.fn(), authProfileStoreMock: { profiles: {} } as { profiles: Record }, - sessionEntryMock: undefined as unknown, + sessionEntryMock: undefined as SessionEntry | undefined, sessionStoreMock: undefined as unknown, storePathMock: undefined as string | undefined, resolvedSessionKeyMock: undefined as string | undefined, @@ -121,20 +122,24 @@ vi.mock("./command/session-store.runtime.js", () => ({ })); vi.mock("./command/session.js", () => ({ - resolveSession: () => ({ - sessionId: "session-1", - sessionKey: state.resolvedSessionKeyMock ?? "agent:main:main", - sessionEntry: state.sessionEntryMock ?? { + resolveSession: () => { + const sessionEntry: SessionEntry = state.sessionEntryMock ?? { sessionId: "session-1", updatedAt: Date.now(), skillsSnapshot: { prompt: "", skills: [], version: 0 }, - }, - sessionStore: state.sessionStoreMock, - storePath: state.storePathMock, - isNewSession: false, - persistedThinking: undefined, - persistedVerbose: undefined, - }), + }; + return { + sessionId: "session-1", + sessionKey: state.resolvedSessionKeyMock ?? "agent:main:main", + sessionEntry, + sessionStore: state.sessionStoreMock, + storePath: state.storePathMock, + isNewSession: false, + persistedThinking: + typeof sessionEntry.thinkingLevel === "string" ? sessionEntry.thinkingLevel : undefined, + persistedVerbose: undefined, + }; + }, })); vi.mock("./command/types.js", () => ({})); @@ -167,7 +172,8 @@ vi.mock("../auto-reply/thinking.js", () => ({ normalizeThinkLevel: (v?: string) => v || undefined, normalizeVerboseLevel: (v?: string) => v || undefined, isThinkingLevelSupported: (args: unknown) => state.isThinkingLevelSupportedMock(args), - resolveSupportedThinkingLevel: ({ level }: { level?: string }) => level, + resolveSupportedThinkingLevel: (args: { level?: string }) => + state.resolveSupportedThinkingLevelMock(args), supportsXHighThinking: () => false, })); @@ -536,11 +542,61 @@ vi.mock("./model-selection.js", () => { const fallback = allowedCatalog[0]; return fallback ? { provider: fallback.provider, model: fallback.id } : null; }, + buildModelAliasIndex: ({ + cfg, + }: { + cfg?: { agents?: { defaults?: { models?: Record } } }; + }) => { + const byAlias = new Map< + string, + { alias: string; ref: { provider: string; model: string } } + >(); + const byKey = new Map(); + for (const [ref, entry] of Object.entries(cfg?.agents?.defaults?.models ?? {})) { + const alias = entry?.alias?.trim(); + if (!alias) { + continue; + } + const [provider, ...modelParts] = ref.split("/"); + const model = modelParts.join("/"); + byAlias.set(alias.toLowerCase(), { alias, ref: { provider, model } }); + byKey.set(`${provider}/${model}`, [alias]); + } + return { byAlias, byKey }; + }, modelKey: (p: string, m: string) => `${p}/${m}`, normalizeModelRef: (p: string, m: string) => ({ provider: normalizeProviderId(p), model: m }), normalizeProviderId, normalizeProviderIdForAuth: normalizeProviderId, - parseModelRef: (m: string, p: string) => ({ provider: p, model: m }), + parseModelRef: (m: string, p: string) => { + const slash = m.indexOf("/"); + return slash > 0 + ? { provider: m.slice(0, slash), model: m.slice(slash + 1) } + : { provider: p, model: m }; + }, + resolveModelRefFromString: ({ + raw, + defaultProvider, + aliasIndex, + }: { + raw: string; + defaultProvider: string; + aliasIndex?: { + byAlias: Map; + }; + }) => { + const aliasMatch = aliasIndex?.byAlias.get(raw.trim().toLowerCase()); + if (aliasMatch) { + return { ref: aliasMatch.ref, alias: aliasMatch.alias }; + } + const slash = raw.indexOf("/"); + return { + ref: + slash > 0 + ? { provider: raw.slice(0, slash), model: raw.slice(slash + 1) } + : { provider: defaultProvider, model: raw }, + }; + }, resolveConfiguredModelRef: ({ cfg }: { cfg?: unknown }) => { const raw = (cfg as { agents?: { defaults?: { model?: string | { primary?: string } } } }) ?.agents?.defaults?.model; @@ -842,6 +898,9 @@ describe("agentCommand – LiveSessionModelSwitchError retry", () => { state.runtimeConfigMock = undefined; delete (state.defaultRuntimeConfig.agents as { list?: unknown }).list; state.isThinkingLevelSupportedMock.mockReturnValue(true); + state.resolveSupportedThinkingLevelMock.mockImplementation( + ({ level }: { level?: string }) => level, + ); state.resolveThinkingDefaultMock.mockReturnValue("low"); state.resolveAgentSkillsFilterMock.mockReturnValue(undefined); state.loadManifestModelCatalogMock.mockReturnValue([]); @@ -1154,6 +1213,36 @@ describe("agentCommand – LiveSessionModelSwitchError retry", () => { expect(state.updateSessionStoreAfterAgentRunMock).toHaveBeenCalledTimes(1); }); + it("does not persist turn-local thinking fallback over a stored session override", async () => { + setupSingleAttemptFallback(); + const sessionEntry: SessionEntry = { + sessionId: "session-1", + updatedAt: 1, + skillsSnapshot: { prompt: "", skills: [], version: 0 }, + thinkingLevel: "high", + }; + const sessionStore: Record = { "agent:main:main": sessionEntry }; + state.sessionEntryMock = sessionEntry; + state.sessionStoreMock = sessionStore; + state.storePathMock = "/tmp/openclaw-sessions.json"; + state.isThinkingLevelSupportedMock.mockReturnValue(false); + state.resolveSupportedThinkingLevelMock.mockReturnValue("off"); + state.runAgentAttemptMock.mockResolvedValue(makeSuccessResult("openai", "gpt-5.4")); + + await runBasicAgentCommand(); + + expectRecordFields(mockCallArg(state.runAgentAttemptMock), { + resolvedThinkLevel: "off", + }); + expect(sessionEntry.thinkingLevel).toBe("high"); + expect(sessionStore["agent:main:main"]?.thinkingLevel).toBe("high"); + expect(state.persistSessionEntryMock).not.toHaveBeenCalledWith( + expect.objectContaining({ + entry: expect.objectContaining({ thinkingLevel: "off" }), + }), + ); + }); + it("persists and clears current run delivery context for restart recovery", async () => { setupSingleAttemptFallback(); state.runAgentAttemptMock.mockResolvedValue(makeSuccessResult("openai", "gpt-5.4")); @@ -1827,6 +1916,66 @@ describe("agentCommand – LiveSessionModelSwitchError retry", () => { }); }); + it("resolves explicit model aliases before thinking validation", async () => { + state.runtimeConfigMock = { + agents: { + defaults: { + model: { primary: "openai/gpt-5.4" }, + models: { + "openai/*": {}, + "codex/gpt-5.5": { + alias: "code", + }, + }, + }, + }, + models: { + providers: { + codex: { + models: [ + { + id: "gpt-5.5", + name: "GPT 5.5 Codex", + reasoning: true, + compat: { supportedReasoningEfforts: ["low", "medium", "high", "xhigh"] }, + }, + ], + }, + }, + }, + }; + state.loadManifestModelCatalogMock.mockReturnValue([]); + state.runWithModelFallbackMock.mockImplementation(async (params: FallbackRunnerParams) => { + const result = await params.run(params.provider, params.model); + return { + result, + provider: params.provider, + model: params.model, + attempts: [], + }; + }); + state.runAgentAttemptMock.mockResolvedValue(makeSuccessResult("codex", "gpt-5.5")); + + await agentCommand({ + message: "hello", + to: "+1234567890", + model: "code", + thinking: "xhigh", + allowModelOverride: true, + }); + + const fallbackParams = mockCallArg(state.runWithModelFallbackMock) as FallbackRunnerParams; + expect(fallbackParams.provider).toBe("codex"); + expect(fallbackParams.model).toBe("gpt-5.5"); + const thinkingArgs = requireRecord( + mockCallArg(state.isThinkingLevelSupportedMock), + "thinking args", + ); + expect(thinkingArgs.provider).toBe("codex"); + expect(thinkingArgs.model).toBe("gpt-5.5"); + expect(thinkingArgs.level).toBe("xhigh"); + }); + it("records fallback steps to the session trajectory runtime", async () => { state.runWithModelFallbackMock.mockImplementation(async (params: FallbackRunnerParams) => { await params.onFallbackStep?.({ @@ -2064,7 +2213,7 @@ describe("agentCommand – LiveSessionModelSwitchError retry", () => { authProfileOverride: "openai:work", authProfileOverrideSource: "user", skillsSnapshot: { prompt: "", skills: [], version: 0 }, - }; + } satisfies SessionEntry; state.sessionEntryMock = sessionEntry; state.runtimeConfigMock = { agents: { diff --git a/src/agents/agent-command.ts b/src/agents/agent-command.ts index 1dc3d45bc871..e287df8e3aed 100644 --- a/src/agents/agent-command.ts +++ b/src/agents/agent-command.ts @@ -105,12 +105,13 @@ import { normalizeConfiguredProviderCatalogModelId } from "./model-ref-shared.js import type { ModelManifestNormalizationContext } from "./model-selection-normalize.js"; import { buildConfiguredModelCatalog, + buildModelAliasIndex, modelKey, normalizeModelRef, normalizeProviderId, - parseModelRef, resolveConfiguredModelRef, resolveDefaultModelForAgent, + resolveModelRefFromString, resolveThinkingDefault, } from "./model-selection.js"; import { @@ -201,10 +202,19 @@ function parseAgentCommandModelRef( defaultProvider: string, modelManifestContext: ModelManifestNormalizationContext, ) { - const parsed = parseModelRef(raw, defaultProvider, { + const parsed = resolveModelRefFromString({ + cfg, + raw, + defaultProvider, + aliasIndex: buildModelAliasIndex({ + cfg, + defaultProvider, + ...modelManifestContext, + allowPluginNormalization: false, + }), ...modelManifestContext, allowPluginNormalization: false, - }); + })?.ref; return parsed ? normalizeAgentCommandModelRef(cfg, parsed.provider, parsed.model, modelManifestContext) : null; @@ -1497,25 +1507,9 @@ async function agentCommandInternal( catalog: thinkingCatalog, }); if (fallbackThinkLevel !== resolvedThinkLevel) { - const previousThinkLevel = resolvedThinkLevel; + // Execution fallbacks are turn-local; directive/model persistence owns + // durable thinking remaps so explicit session overrides survive runs. resolvedThinkLevel = fallbackThinkLevel; - if ( - sessionEntry && - sessionStore && - sessionKey && - sessionEntry.thinkingLevel === previousThinkLevel && - !suppressVisibleSessionEffects - ) { - const entry = sessionEntry; - entry.thinkingLevel = fallbackThinkLevel; - entry.updatedAt = Date.now(); - await persistSessionEntry({ - sessionStore, - sessionKey, - storePath, - entry, - }); - } } } const { resolveSessionTranscriptFile } = await loadTranscriptResolveRuntime(); diff --git a/src/agents/cli-runner.reliability.test.ts b/src/agents/cli-runner.reliability.test.ts index 636290b93196..4658f8c7a44e 100644 --- a/src/agents/cli-runner.reliability.test.ts +++ b/src/agents/cli-runner.reliability.test.ts @@ -7,6 +7,7 @@ import { createReplyOperation, replyRunRegistry, } from "../auto-reply/reply/reply-run-registry.js"; +import { SILENT_REPLY_TOKEN } from "../auto-reply/tokens.js"; import { CURRENT_SESSION_VERSION } from "../config/sessions/version.js"; import type { OpenClawConfig } from "../config/types.openclaw.js"; import { getGlobalHookRunner } from "../plugins/hook-runner-global.js"; @@ -131,6 +132,7 @@ function buildPreparedContext(params?: { openClawHistoryPrompt?: string; provider?: string; model?: string; + allowEmptyAssistantReplyAsSilent?: boolean; }): PreparedCliRunContext { const provider = params?.provider ?? "codex-cli"; const model = params?.model ?? "gpt-5.4"; @@ -156,6 +158,7 @@ function buildPreparedContext(params?: { timeoutMs: 1_000, runId: params?.runId ?? "run-2", lane: params?.lane, + allowEmptyAssistantReplyAsSilent: params?.allowEmptyAssistantReplyAsSilent, }, started: Date.now(), workspaceDir: "/tmp", @@ -1712,6 +1715,41 @@ describe("runCliAgent reliability", () => { expect(hookRunner.runLlmOutput).not.toHaveBeenCalled(); }); + it("returns silent payload for empty CLI output when silence is allowed", async () => { + const hookRunner = { + hasHooks: vi.fn((hookName: string) => hookName === "llm_output"), + runLlmInput: vi.fn(async () => undefined), + runLlmOutput: vi.fn(async () => undefined), + runAgentEnd: vi.fn(async () => undefined), + }; + setHookRunnerForTest(hookRunner); + + supervisorSpawnMock.mockResolvedValueOnce( + createManagedRun({ + reason: "exit", + exitCode: 0, + exitSignal: null, + durationMs: 50, + stdout: " ", + stderr: "", + timedOut: false, + noOutputTimedOut: false, + }), + ); + + const result = await runPreparedCliAgent( + buildPreparedContext({ + provider: "claude-cli", + model: "claude-sonnet-4-6", + allowEmptyAssistantReplyAsSilent: true, + }), + ); + + expect(result.payloads).toEqual([{ text: SILENT_REPLY_TOKEN }]); + expect(result.meta.executionTrace?.fallbackUsed).toBe(false); + expect(hookRunner.runLlmOutput).not.toHaveBeenCalled(); + }); + it("emits agent_end with failure details when the CLI run fails", async () => { let releaseAgentEnd: () => void = () => undefined; const agentEndSettled = new Promise((resolve) => { diff --git a/src/agents/cli-runner.ts b/src/agents/cli-runner.ts index 704b419be177..c05e19c0abb9 100644 --- a/src/agents/cli-runner.ts +++ b/src/agents/cli-runner.ts @@ -534,7 +534,7 @@ export async function runPreparedCliAgent( }; const output = await executePreparedCliRun(attemptContext, cliSessionIdToUse); const assistantText = output.text.trim(); - if (!assistantText) { + if (!assistantText && params.allowEmptyAssistantReplyAsSilent !== true) { throw new FailoverError("CLI backend returned an empty response.", { reason: "empty_response", provider: params.provider, @@ -588,7 +588,11 @@ export async function runPreparedCliAgent( }): EmbeddedAgentRunResult => { const text = resultParams.output.text?.trim(); const rawText = resultParams.output.rawText?.trim(); - const payloads = text ? [{ text }] : undefined; + const payloads = text + ? [{ text }] + : params.allowEmptyAssistantReplyAsSilent === true + ? [{ text: SILENT_REPLY_TOKEN }] + : undefined; const unflushedCliSessionId = resultParams.effectiveCliSessionId && resultParams.bindingFlushOk === false ? resultParams.effectiveCliSessionId diff --git a/src/agents/cli-runner/types.ts b/src/agents/cli-runner/types.ts index 740d1506de46..7d71510f172b 100644 --- a/src/agents/cli-runner/types.ts +++ b/src/agents/cli-runner/types.ts @@ -55,6 +55,7 @@ export type RunCliAgentParams = { extraSystemPrompt?: string; sourceReplyDeliveryMode?: SourceReplyDeliveryMode; silentReplyPromptMode?: SilentReplyPromptMode; + allowEmptyAssistantReplyAsSilent?: boolean; /** Static portion of extraSystemPrompt (excluding per-message inbound metadata) for session reuse hashing. */ extraSystemPromptStatic?: string; streamParams?: import("../command/types.js").AgentStreamParams; diff --git a/src/agents/embedded-agent-runner/model.provider-runtime.test-support.ts b/src/agents/embedded-agent-runner/model.provider-runtime.test-support.ts index 9024903a9210..d3e2f3523db0 100644 --- a/src/agents/embedded-agent-runner/model.provider-runtime.test-support.ts +++ b/src/agents/embedded-agent-runner/model.provider-runtime.test-support.ts @@ -11,6 +11,7 @@ const XAI_BASE_URL = "https://api.x.ai/v1"; const ZAI_BASE_URL = "https://api.z.ai/api/paas/v4"; const GOOGLE_GENERATIVE_AI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"; const GOOGLE_GEMINI_CLI_BASE_URL = "https://cloudcode-pa.googleapis.com"; +const GOOGLE_VERTEX_BASE_URL = "https://aiplatform.googleapis.com"; const DEFAULT_CONTEXT_WINDOW = 200_000; const DEFAULT_MAX_TOKENS = 8192; const OPENROUTER_FALLBACK_COST = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }; @@ -182,6 +183,16 @@ function normalizeTransport(params: { baseUrl: GOOGLE_GENERATIVE_AI_BASE_URL, }; } + if ( + params.provider === "google-vertex" && + params.context.api == null && + params.context.baseUrl === GOOGLE_VERTEX_BASE_URL + ) { + return { + api: "google-vertex", + baseUrl: GOOGLE_VERTEX_BASE_URL, + }; + } if (isNativeOpenAiTransport) { return { api: "openai-responses", diff --git a/src/agents/embedded-agent-runner/model.test.ts b/src/agents/embedded-agent-runner/model.test.ts index 2d28864cb0dd..ff90830e6adb 100644 --- a/src/agents/embedded-agent-runner/model.test.ts +++ b/src/agents/embedded-agent-runner/model.test.ts @@ -1010,6 +1010,27 @@ describe("resolveModel", () => { expect(model.baseUrl).toBe("https://generativelanguage.googleapis.com/v1beta"); }); + it("defaults baseUrl-only Google Vertex fallback models to native Vertex transport", () => { + const cfg = { + models: { + providers: { + "google-vertex": { + baseUrl: "https://aiplatform.googleapis.com", + models: [], + }, + }, + }, + } as unknown as OpenClawConfig; + + const result = resolveModelForTest("google-vertex", "gemini-2.5-flash", "/tmp/agent", cfg); + const model = expectResolvedModel(result); + + expect(model.provider).toBe("google-vertex"); + expect(model.id).toBe("gemini-2.5-flash"); + expect(model.api).toBe("google-vertex"); + expect(model.baseUrl).toBe("https://aiplatform.googleapis.com"); + }); + it("uses bundled static metadata for configured provider fallback token limits", () => { resolveBundledStaticCatalogModelMock.mockReturnValueOnce({ provider: "xiaomi-token-plan", diff --git a/src/agents/google-simple-completion-stream.test.ts b/src/agents/google-simple-completion-stream.test.ts new file mode 100644 index 000000000000..4837a714dbc5 --- /dev/null +++ b/src/agents/google-simple-completion-stream.test.ts @@ -0,0 +1,147 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import type { Model } from "../llm/types.js"; + +const streamSimple = vi.fn(); +const sanitizeGoogleThinkingPayload = vi.fn(); +const ensureCustomApiRegistered = vi.fn(); + +vi.mock("../llm/stream.js", () => ({ + streamSimple, +})); + +vi.mock("../plugin-sdk/provider-stream-shared.js", async () => { + const actual = await vi.importActual( + "../plugin-sdk/provider-stream-shared.js", + ); + return { + ...actual, + sanitizeGoogleThinkingPayload, + }; +}); + +vi.mock("./custom-api-registry.js", () => ({ + ensureCustomApiRegistered, +})); + +const { GOOGLE_SIMPLE_COMPLETION_API, prepareGoogleSimpleCompletionModel } = + await import("./google-simple-completion-stream.js"); + +function makeGoogleModel(id = "gemini-flash-latest"): Model<"google-generative-ai"> { + return { + id, + name: id, + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1_000_000, + maxTokens: 8192, + headers: {}, + }; +} + +describe("prepareGoogleSimpleCompletionModel", () => { + beforeEach(() => { + streamSimple.mockReset(); + sanitizeGoogleThinkingPayload.mockReset(); + ensureCustomApiRegistered.mockReset(); + streamSimple.mockImplementation((_model, _context, options) => { + const payload = { + generationConfig: { + thinkingConfig: { thinkingBudget: -1 }, + }, + }; + options?.onPayload?.(payload, _model); + return { content: [{ type: "text", text: "ok" }], payload }; + }); + }); + + it("returns non-Google models unchanged", () => { + const model = { + ...makeGoogleModel("gpt-5"), + api: "openai-responses", + } as unknown as Model<"openai-responses">; + + const result = prepareGoogleSimpleCompletionModel(model); + + expect(result).toBe(model); + expect(ensureCustomApiRegistered).not.toHaveBeenCalled(); + }); + + it("registers an OpenClaw-owned Google simple-completion api alias", () => { + const model = makeGoogleModel(); + + const result = prepareGoogleSimpleCompletionModel(model); + + expect(result).toEqual({ + ...model, + api: GOOGLE_SIMPLE_COMPLETION_API, + }); + expect(ensureCustomApiRegistered).toHaveBeenCalledTimes(1); + expect(ensureCustomApiRegistered.mock.calls[0]?.[0]).toBe(GOOGLE_SIMPLE_COMPLETION_API); + }); + + it.each(["off", "low", "medium", "high", "adaptive"] as const)( + "sanitizes outbound thinking payload for gemini-flash-latest with reasoning=%s", + async (reasoning) => { + const model = makeGoogleModel(); + const wrapped = prepareGoogleSimpleCompletionModel(model); + const streamFn = ensureCustomApiRegistered.mock.calls[0]?.[1] as ( + ...args: unknown[] + ) => unknown; + + await streamFn(wrapped, { messages: [] }, { apiKey: "key", reasoning }); + + expect(streamSimple).toHaveBeenCalledTimes(1); + expect(streamSimple.mock.calls[0]?.[0]).toEqual({ + ...model, + api: "google-generative-ai", + }); + expect(sanitizeGoogleThinkingPayload).toHaveBeenCalledWith({ + payload: { + generationConfig: { + thinkingConfig: { thinkingBudget: -1 }, + }, + }, + modelId: "gemini-flash-latest", + thinkingLevel: reasoning, + }); + }, + ); + + it("returns the sanitizer-mutated payload shape", async () => { + sanitizeGoogleThinkingPayload.mockImplementationOnce((args: { payload: unknown }) => { + const payload = args.payload as { + generationConfig: { thinkingConfig: Record }; + }; + delete payload.generationConfig.thinkingConfig.thinkingBudget; + payload.generationConfig.thinkingConfig.thinkingLevel = "MINIMAL"; + }); + const model = makeGoogleModel(); + prepareGoogleSimpleCompletionModel(model); + const streamFn = ensureCustomApiRegistered.mock.calls[0]?.[1] as ( + ...args: unknown[] + ) => unknown; + + const result = await streamFn(model, { messages: [] }, { apiKey: "key", reasoning: "off" }); + + expect(result).toMatchObject({ + payload: { + generationConfig: { + thinkingConfig: { + thinkingLevel: "MINIMAL", + }, + }, + }, + }); + expect( + ( + result as { + payload: { generationConfig: { thinkingConfig: Record } }; + } + ).payload.generationConfig.thinkingConfig, + ).not.toHaveProperty("thinkingBudget"); + }); +}); diff --git a/src/agents/google-simple-completion-stream.ts b/src/agents/google-simple-completion-stream.ts new file mode 100644 index 000000000000..14a78f023618 --- /dev/null +++ b/src/agents/google-simple-completion-stream.ts @@ -0,0 +1,60 @@ +import { streamSimple } from "../llm/stream.js"; +import type { Api, Model } from "../llm/types.js"; +import { + sanitizeGoogleThinkingPayload, + streamWithPayloadPatch, + type GoogleThinkingInputLevel, +} from "../plugin-sdk/provider-stream-shared.js"; +import { ensureCustomApiRegistered } from "./custom-api-registry.js"; +import type { StreamFn } from "./runtime/index.js"; + +export const GOOGLE_SIMPLE_COMPLETION_API: Api = "openclaw-google-generative-ai-simple"; + +const SOURCE_API: Api = "google-generative-ai"; + +function resolveGoogleSimpleThinkingLevel( + reasoning: unknown, +): GoogleThinkingInputLevel | undefined { + switch (reasoning) { + case "off": + case "minimal": + case "low": + case "medium": + case "adaptive": + case "high": + case "max": + case "xhigh": + return reasoning; + default: + return undefined; + } +} + +function buildGoogleSimpleCompletionStreamFn(): StreamFn { + return (model, context, options) => { + const googleModel = { ...model, api: SOURCE_API }; + return streamWithPayloadPatch( + streamSimple as unknown as StreamFn, + googleModel, + context, + options, + (payload) => { + sanitizeGoogleThinkingPayload({ + payload, + modelId: model.id, + thinkingLevel: resolveGoogleSimpleThinkingLevel( + (options as { reasoning?: unknown } | undefined)?.reasoning, + ), + }); + }, + ); + }; +} + +export function prepareGoogleSimpleCompletionModel(model: Model): Model { + if (model.api !== SOURCE_API) { + return model; + } + ensureCustomApiRegistered(GOOGLE_SIMPLE_COMPLETION_API, buildGoogleSimpleCompletionStreamFn()); + return { ...model, api: GOOGLE_SIMPLE_COMPLETION_API }; +} diff --git a/src/agents/openai-transport-stream.test.ts b/src/agents/openai-transport-stream.test.ts index 90eb766bd9c3..c02c29645b83 100644 --- a/src/agents/openai-transport-stream.test.ts +++ b/src/agents/openai-transport-stream.test.ts @@ -383,6 +383,84 @@ describe("openai transport stream", () => { }); }); + it("backfills Azure Responses completed message output when item events are absent", async () => { + const model = createAzureResponsesModel(); + const output = createResponsesAssistantOutput(model); + + await testing.processResponsesStream( + streamChunks([ + { + type: "response.completed", + response: { + id: "resp-azure-completed-message", + status: "completed", + output: [ + { type: "reasoning", id: "rs_123", summary: [] }, + { + type: "message", + id: "msg_123", + role: "assistant", + content: [{ type: "text", text: "AZURE_RESPONSES_CANARY_OK" }], + }, + ], + }, + }, + ]), + output, + { push: vi.fn() }, + model, + ); + + expect(output.stopReason).toBe("stop"); + expect(output.content).toEqual([ + { + type: "text", + text: "AZURE_RESPONSES_CANARY_OK", + textSignature: '{"v":1,"id":"msg_123"}', + }, + ]); + }); + + it("backfills Azure Responses completed function calls when item events are absent", async () => { + const model = createAzureResponsesModel(); + const output = createResponsesAssistantOutput(model); + + await testing.processResponsesStream( + streamChunks([ + { + type: "response.completed", + response: { + id: "resp-azure-completed-tool", + status: "completed", + output: [ + { + type: "function_call", + id: "fc_123", + call_id: "call_123", + name: "session_status", + arguments: '{"sessionKey":"current"}', + }, + ], + }, + }, + ]), + output, + { push: vi.fn() }, + model, + ); + + expect(output.stopReason).toBe("toolUse"); + expect(output.content).toEqual([ + { + type: "toolCall", + id: "call_123|fc_123", + name: "session_status", + arguments: { sessionKey: "current" }, + partialJson: '{"sessionKey":"current"}', + }, + ]); + }); + it("summarizes model payload tools with full names when requested", () => { const previous = process.env.OPENCLAW_DEBUG_MODEL_PAYLOAD; process.env.OPENCLAW_DEBUG_MODEL_PAYLOAD = "tools"; @@ -1852,6 +1930,148 @@ describe("openai transport stream", () => { expect(JSON.stringify(events)).not.toContain("DSML"); }); + it("recovers DeepSeek DSML parameter tool calls emitted as text", async () => { + const model = createDeepSeekCompletionsModel(); + const output = createAssistantOutput(model); + const events: CapturedStreamEvent[] = []; + + await testing.processOpenAICompletionsStream( + streamChunks([ + { + id: "chatcmpl-deepseek-dsml-tool", + object: "chat.completion.chunk", + created: 1, + model: model.id, + choices: [ + { + index: 0, + delta: { + content: + '<|DSML|tool_calls>\n<|DSML|invoke name="session_status">\n<|DSML|parameter name="sessionKey" string="true">current\n\n', + }, + logprobs: null, + finish_reason: "stop", + }, + ], + }, + ]), + output, + model, + { push: (event) => events.push(event as CapturedStreamEvent) }, + ); + + expect(output.stopReason).toBe("toolUse"); + expect(output.content).toEqual([ + { + type: "toolCall", + id: "call_deepseek_dsml_1", + name: "session_status", + arguments: { sessionKey: "current" }, + partialArgs: '{"sessionKey":"current"}', + }, + ]); + expect(JSON.stringify(events)).not.toContain("DSML"); + }); + + it("recovers split DeepSeek DSML JSON tool calls emitted as text", async () => { + const model = createDeepSeekCompletionsModel(); + const output = createAssistantOutput(model); + + await testing.processOpenAICompletionsStream( + streamChunks([ + { + id: "chatcmpl-deepseek-split-dsml-tool", + object: "chat.completion.chunk", + created: 1, + model: model.id, + choices: [ + { + index: 0, + delta: { content: '<|DSML|tool_calls><|DSML|invoke name="read">' }, + logprobs: null, + finish_reason: null, + }, + ], + }, + { + id: "chatcmpl-deepseek-split-dsml-tool", + object: "chat.completion.chunk", + created: 1, + model: model.id, + choices: [ + { + index: 0, + delta: { content: '{"path":"/tmp/native.md"}' }, + logprobs: null, + finish_reason: null, + }, + ], + }, + { + id: "chatcmpl-deepseek-split-dsml-tool", + object: "chat.completion.chunk", + created: 1, + model: model.id, + choices: [ + { + index: 0, + delta: { content: "" }, + logprobs: null, + finish_reason: "stop", + }, + ], + }, + ]), + output, + model, + { push() {} }, + ); + + expect(output.stopReason).toBe("toolUse"); + expect(output.content).toEqual([ + { + type: "toolCall", + id: "call_deepseek_dsml_1", + name: "read", + arguments: { path: "/tmp/native.md" }, + partialArgs: '{"path":"/tmp/native.md"}', + }, + ]); + }); + + it("does not recover malformed DeepSeek DSML tool calls", async () => { + const model = createDeepSeekCompletionsModel(); + const output = createAssistantOutput(model); + + await testing.processOpenAICompletionsStream( + streamChunks([ + { + id: "chatcmpl-deepseek-malformed-dsml-tool", + object: "chat.completion.chunk", + created: 1, + model: model.id, + choices: [ + { + index: 0, + delta: { + content: + '<|DSML|tool_calls>\n<|DSML|invoke name="session_status">\n\n', + }, + logprobs: null, + finish_reason: "stop", + }, + ], + }, + ]), + output, + model, + { push() {} }, + ); + + expect(output.stopReason).toBe("stop"); + expect(output.content).toEqual([]); + }); + it("keeps OpenRouter thinking format for declared OpenRouter providers on custom proxy URLs", () => { const params = buildOpenAICompletionsParams( attachModelProviderRequestTransport( @@ -2218,6 +2438,42 @@ describe("openai transport stream", () => { expect(params.input?.[0]?.role).toBe("developer"); }); + it("serializes Responses input messages with explicit message type and content parts", () => { + const params = buildOpenAIResponsesParams( + { + id: "gpt-5.4", + name: "GPT-5.4", + api: "openai-responses", + provider: "microsoft-foundry", + baseUrl: "https://example.services.ai.azure.com/api/projects/demo/openai/v1", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 8192, + } satisfies Model<"openai-responses">, + { + systemPrompt: "system", + messages: [{ role: "user", content: "hello", timestamp: 1 }], + tools: [], + } as never, + undefined, + ) as { input?: unknown }; + + expect(params.input).toEqual([ + { + type: "message", + role: "system", + content: [{ type: "input_text", text: "system" }], + }, + { + type: "message", + role: "user", + content: [{ type: "input_text", text: "hello" }], + }, + ]); + }); + it("uses model maxTokens for Responses params when runtime maxTokens is omitted", () => { const params = buildOpenAIResponsesParams( { @@ -3684,6 +3940,7 @@ describe("openai transport stream", () => { expect(params.instructions).toBe("Stable prefix\nDynamic suffix"); expect(params.input).toEqual([ { + type: "message", role: "user", content: [{ type: "input_text", text: " " }], }, @@ -4051,9 +4308,9 @@ describe("openai transport stream", () => { tools: [], } as never, undefined, - ) as { input?: Array<{ content?: string }> }; + ) as { input?: Array<{ content?: Array<{ text?: string }> }> }; - expect(params.input?.[0]?.content).toBe("Stable prefix\nDynamic suffix"); + expect(params.input?.[0]?.content?.[0]?.text).toBe("Stable prefix\nDynamic suffix"); }); it("defaults responses tool schemas to strict on native OpenAI routes", () => { diff --git a/src/agents/openai-transport-stream.ts b/src/agents/openai-transport-stream.ts index 7058c2227f99..f4302a5db592 100644 --- a/src/agents/openai-transport-stream.ts +++ b/src/agents/openai-transport-stream.ts @@ -1030,6 +1030,13 @@ function parseTextSignature( return { id: signature }; } +function buildResponsesInputMessage( + role: "user" | "system" | "developer", + content: ResponseInputMessageContentList, +): ResponseInputItem.Message { + return { type: "message", role, content }; +} + function convertResponsesMessages( model: Model, context: Context, @@ -1098,19 +1105,29 @@ function convertResponsesMessages( ); const includeSystemPrompt = options?.includeSystemPrompt ?? true; if (includeSystemPrompt && context.systemPrompt) { - messages.push({ - role: model.reasoning && options?.supportsDeveloperRole !== false ? "developer" : "system", - content: sanitizeTransportPayloadText(stripSystemPromptCacheBoundary(context.systemPrompt)), - }); + messages.push( + buildResponsesInputMessage( + model.reasoning && options?.supportsDeveloperRole !== false ? "developer" : "system", + [ + { + type: "input_text", + text: sanitizeTransportPayloadText( + stripSystemPromptCacheBoundary(context.systemPrompt), + ), + }, + ], + ), + ); } let msgIndex = 0; for (const msg of transformedMessages) { if (msg.role === "user") { if (typeof msg.content === "string") { - messages.push({ - role: "user", - content: [{ type: "input_text", text: sanitizeTransportPayloadText(msg.content) }], - }); + messages.push( + buildResponsesInputMessage("user", [ + { type: "input_text", text: sanitizeTransportPayloadText(msg.content) }, + ]), + ); } else { const content = ( msg.content.map((item) => @@ -1124,7 +1141,7 @@ function convertResponsesMessages( ) as ResponseInputMessageContentList ).filter((item) => model.input.includes("image") || item.type !== "input_image"); if (content.length > 0) { - messages.push({ role: "user", content }); + messages.push(buildResponsesInputMessage("user", content)); } } } else if (msg.role === "assistant") { @@ -1426,6 +1443,66 @@ async function processResponsesStream( const eventTypes = new Map(); const sseDebugMode = resolveModelSseDebugMode(); const blockIndex = () => output.content.length - 1; + const appendCompletedResponseTextItem = (item: Record) => { + const text = readResponsesOutputMessageText(item); + if (!text) { + return; + } + const block: Record = { + type: "text", + text, + textSignature: encodeTextSignatureV1( + stringifyUnknown(item.id), + (item.phase as "commentary" | "final_answer" | undefined) ?? undefined, + ), + }; + output.content.push(block); + stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output }); + stream.push({ + type: "text_end", + contentIndex: blockIndex(), + content: text, + partial: output, + }); + }; + const appendCompletedResponseToolCallItem = (item: Record) => { + const args = parseStreamingJson(stringifyJsonLike(item.arguments, "{}")); + const block = { + type: "toolCall", + id: `${stringifyUnknown(item.call_id)}|${stringifyUnknown(item.id)}`, + name: stringifyUnknown(item.name), + arguments: args, + partialJson: stringifyJsonLike(item.arguments, "{}"), + }; + output.content.push(block); + stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output }); + stream.push({ + type: "toolcall_end", + contentIndex: blockIndex(), + toolCall: { + type: "toolCall", + id: block.id, + name: block.name, + arguments: args, + }, + partial: output, + }); + }; + const backfillCompletedResponseOutput = (response: Record | undefined) => { + if (output.content.length > 0 || !Array.isArray(response?.output)) { + return; + } + for (const rawItem of response.output) { + if (!isRecord(rawItem)) { + continue; + } + if (rawItem.type === "message") { + appendCompletedResponseTextItem(rawItem); + } else if (rawItem.type === "function_call") { + appendCompletedResponseToolCallItem(rawItem); + } + } + }; const guardedStream = withResponsesFirstEventTimeout( openaiStream, model, @@ -1580,6 +1657,7 @@ async function processResponsesStream( if (typeof response?.id === "string") { output.responseId = response.id; } + backfillCompletedResponseOutput(response); const usage = response?.usage as | { input_tokens?: number; @@ -1672,6 +1750,24 @@ function mapResponsesStopReason(status: string | undefined): string { } } +function readResponsesOutputMessageText(item: Record): string { + const content = Array.isArray(item.content) ? item.content : []; + return content + .map((part) => { + if (!isRecord(part)) { + return ""; + } + if (part.type === "output_text" || part.type === "text") { + return stringifyUnknown(part.text); + } + if (part.type === "refusal") { + return stringifyUnknown(part.refusal); + } + return ""; + }) + .join(""); +} + function buildOpenAIClientHeaders( model: Model, context: Context, @@ -2058,10 +2154,11 @@ function ensureOpenAICodexResponsesInput(messages: ResponseInput, context: Conte "OpenAI Codex Responses requires non-empty input when only systemPrompt is provided.", ); } - messages.push({ - role: "user", - content: [{ type: "input_text", text: OPENAI_CODEX_RESPONSES_EMPTY_INPUT_TEXT }], - }); + messages.push( + buildResponsesInputMessage("user", [ + { type: "input_text", text: OPENAI_CODEX_RESPONSES_EMPTY_INPUT_TEXT }, + ]), + ); } function resolveOpenAIResponsesTextFormat( @@ -2527,6 +2624,9 @@ async function processOpenAICompletionsStream( const deepSeekTextFilter = shouldFilterDeepSeekDsmlText(compat) ? createDeepSeekTextFilter() : null; + const deepSeekToolCallRecoverer = shouldFilterDeepSeekDsmlText(compat) + ? createDeepSeekDsmlToolCallRecoverer() + : null; const reasoningTagTextPartitioner = createReasoningTagTextPartitioner(); type ToolCallBlock = { type: "toolCall"; @@ -2544,6 +2644,7 @@ async function processOpenAICompletionsStream( let pendingPostToolCallDeltas: CompletionsReasoningDelta[] = []; let pendingPostToolCallBytes = 0; let isFlushingPendingPostToolCallDeltas = false; + let recoveredDeepSeekToolCallIndex = 0; const toolCallBlocksByIndex = new Map(); const toolCallBlocksById = new Map(); const toolCallBlockBytes = new WeakMap(); @@ -2657,8 +2758,72 @@ async function processOpenAICompletionsStream( appendTextDelta(text); } }; + const appendRecoveredToolCall = (toolCall: RecoveredDeepSeekDsmlToolCall) => { + const switchingToolCall = currentBlock?.type === "toolCall"; + finishCurrentBlock(); + if (switchingToolCall) { + currentBlock = null; + flushPendingPostToolCallDeltas(); + } + output.stopReason = "toolUse"; + recoveredDeepSeekToolCallIndex += 1; + const block: ToolCallBlock = { + type: "toolCall", + id: `call_deepseek_dsml_${recoveredDeepSeekToolCallIndex}`, + name: toolCall.name, + arguments: toolCall.arguments, + partialArgs: toolCall.partialArgs, + }; + currentBlock = block; + output.content.push(block); + stream.push({ + type: "toolcall_start", + contentIndex: output.content.indexOf(block), + partial: output, + }); + stream.push({ + type: "toolcall_delta", + contentIndex: output.content.indexOf(block), + delta: toolCall.partialArgs, + partial: output, + }); + }; const appendFilteredVisibleTextDelta = (text: string) => { - const parts = deepSeekTextFilter?.push(text) ?? [text]; + const recoveredParts = deepSeekToolCallRecoverer?.push(text) ?? [ + { kind: "text" as const, text }, + ]; + for (const recoveredPart of recoveredParts) { + if (recoveredPart.kind === "toolCall") { + appendRecoveredToolCall(recoveredPart); + continue; + } + const parts = deepSeekTextFilter?.push(recoveredPart.text) ?? [recoveredPart.text]; + for (const part of parts) { + appendVisibleTextDelta(part); + } + } + }; + const flushDeepSeekToolCallRecovererAtEnd = () => { + const recoveredParts = deepSeekToolCallRecoverer?.flush(); + if (!recoveredParts) { + return; + } + for (const recoveredPart of recoveredParts) { + if (recoveredPart.kind === "toolCall") { + appendRecoveredToolCall(recoveredPart); + continue; + } + const parts = deepSeekTextFilter?.push(recoveredPart.text) ?? [recoveredPart.text]; + for (const part of parts) { + appendVisibleTextDelta(part); + } + } + }; + const flushDeepSeekTextFilterAtEnd = () => { + const parts = deepSeekTextFilter?.flush(); + if (!parts) { + return; + } for (const part of parts) { appendVisibleTextDelta(part); } @@ -2679,15 +2844,6 @@ async function processOpenAICompletionsStream( appendFilteredVisibleTextDelta(delta.text); } }; - const flushDeepSeekTextFilterAtEnd = () => { - const parts = deepSeekTextFilter?.flush(); - if (!parts) { - return; - } - for (const part of parts) { - appendVisibleTextDelta(part); - } - }; const flushReasoningTagTextPartitionerAtEnd = () => { for (const delta of reasoningTagTextPartitioner.flush()) { appendPartitionedVisibleDelta(delta); @@ -2836,6 +2992,7 @@ async function processOpenAICompletionsStream( await cooperativeScheduler.afterEvent(); } flushReasoningTagTextPartitionerAtEnd(); + flushDeepSeekToolCallRecovererAtEnd(); flushDeepSeekTextFilterAtEnd(); finishAllToolCallBlocks(); currentBlock = null; @@ -2871,6 +3028,193 @@ function shouldFilterDeepSeekDsmlText(compat: ReturnType) { return compat.thinkingFormat === "deepseek"; } +type RecoveredDeepSeekDsmlToolCall = { + kind: "toolCall"; + name: string; + arguments: Record; + partialArgs: string; +}; + +type DeepSeekDsmlRecoveredPart = { kind: "text"; text: string } | RecoveredDeepSeekDsmlToolCall; + +const DEEPSEEK_DSML_BARS = ["|", "|"] as const; +const DEEPSEEK_DSML_TOOL_KINDS = ["tool_calls", "tool_call", "function_calls"] as const; +const DEEPSEEK_DSML_TOOL_OPEN_TOKENS = DEEPSEEK_DSML_BARS.flatMap((bar) => + DEEPSEEK_DSML_TOOL_KINDS.map((kind) => `<${bar}DSML${bar}${kind}>`), +); +const DEEPSEEK_DSML_TOOL_CLOSE_TOKENS = DEEPSEEK_DSML_BARS.flatMap((bar) => + DEEPSEEK_DSML_TOOL_KINDS.map((kind) => ``), +); +const DEEPSEEK_DSML_TOOL_MAX_OPEN_TOKEN_LEN = Math.max( + ...DEEPSEEK_DSML_TOOL_OPEN_TOKENS.map((token) => token.length), +); + +function createDeepSeekDsmlToolCallRecoverer() { + let buffer = ""; + + const consume = (final: boolean): DeepSeekDsmlRecoveredPart[] => { + const output: DeepSeekDsmlRecoveredPart[] = []; + while (buffer) { + const open = findEarliestStringToken(buffer, DEEPSEEK_DSML_TOOL_OPEN_TOKENS); + if (!open) { + if (final) { + output.push({ kind: "text", text: buffer }); + buffer = ""; + return output; + } + const keep = longestDeepSeekDsmlToolOpenPrefixSuffixLength(buffer); + const emitLength = buffer.length - keep; + if (emitLength > 0) { + output.push({ kind: "text", text: buffer.slice(0, emitLength) }); + buffer = buffer.slice(emitLength); + } + return output; + } + + if (open.index > 0) { + output.push({ kind: "text", text: buffer.slice(0, open.index) }); + buffer = buffer.slice(open.index); + } + + const afterOpen = buffer.slice(open.token.length); + const close = findEarliestStringToken(afterOpen, DEEPSEEK_DSML_TOOL_CLOSE_TOKENS); + if (!close) { + if (final) { + output.push({ kind: "text", text: buffer }); + buffer = ""; + } + return output; + } + + const body = afterOpen.slice(0, close.index); + const blockLength = open.token.length + close.index + close.token.length; + const recoveredToolCalls = parseDeepSeekDsmlToolCallBlock(body); + if (recoveredToolCalls.length > 0) { + output.push(...recoveredToolCalls); + } else { + output.push({ kind: "text", text: buffer.slice(0, blockLength) }); + } + buffer = buffer.slice(blockLength); + } + return output; + }; + + return { + push(chunk: string) { + buffer += chunk; + return consume(false); + }, + flush() { + return consume(true); + }, + }; +} + +function parseDeepSeekDsmlToolCallBlock(body: string): RecoveredDeepSeekDsmlToolCall[] { + const toolCalls: RecoveredDeepSeekDsmlToolCall[] = []; + const invokeOpenRegex = /<[||]DSML[||]invoke\b([^>]*)>/g; + let openMatch: RegExpExecArray | null; + while ((openMatch = invokeOpenRegex.exec(body)) !== null) { + const invokeName = parseXmlAttribute(openMatch[1] ?? "", "name"); + if (!invokeName) { + continue; + } + const invokeBodyStart = openMatch.index + openMatch[0].length; + const invokeClose = findEarliestStringToken(body.slice(invokeBodyStart), [ + "", + "", + ]); + if (!invokeClose) { + continue; + } + const invokeBody = body.slice(invokeBodyStart, invokeBodyStart + invokeClose.index); + invokeOpenRegex.lastIndex = invokeBodyStart + invokeClose.index + invokeClose.token.length; + const parsedArguments = parseDeepSeekDsmlInvokeArguments(invokeBody); + if (!parsedArguments) { + continue; + } + toolCalls.push({ + kind: "toolCall", + name: invokeName, + arguments: parsedArguments, + partialArgs: JSON.stringify(parsedArguments), + }); + } + return toolCalls; +} + +function parseDeepSeekDsmlInvokeArguments(body: string): Record | null { + const args: Record = {}; + const parameterRegex = /<[||]DSML[||]parameter\b([^>]*)>([\s\S]*?)<\/[||]DSML[||]parameter>/g; + let parameterMatch: RegExpExecArray | null; + while ((parameterMatch = parameterRegex.exec(body)) !== null) { + const name = parseXmlAttribute(parameterMatch[1] ?? "", "name"); + if (!name) { + continue; + } + const rawValue = parameterMatch[2] ?? ""; + if (rawValue.length === 0) { + continue; + } + args[name] = decodeDeepSeekDsmlText(rawValue); + } + if (Object.keys(args).length > 0) { + return args; + } + + const trimmed = body.trim(); + if (!trimmed.startsWith("{")) { + return null; + } + try { + const parsed = JSON.parse(trimmed) as unknown; + if (isRecord(parsed) && Object.keys(parsed).length > 0) { + return parsed; + } + } catch { + return null; + } + return null; +} + +function parseXmlAttribute(attributes: string, name: string): string | null { + const pattern = new RegExp(`\\b${name}=("([^"]*)"|'([^']*)'|([^\\s>]+))`); + const match = pattern.exec(attributes); + const value = match?.[2] ?? match?.[3] ?? match?.[4]; + return value ? decodeDeepSeekDsmlText(value) : null; +} + +function decodeDeepSeekDsmlText(value: string): string { + return value + .replaceAll(""", '"') + .replaceAll("'", "'") + .replaceAll("<", "<") + .replaceAll(">", ">") + .replaceAll("&", "&"); +} + +function findEarliestStringToken(text: string, tokens: readonly string[]) { + let best: { index: number; token: string } | null = null; + for (const token of tokens) { + const index = text.indexOf(token); + if (index !== -1 && (!best || index < best.index)) { + best = { index, token }; + } + } + return best; +} + +function longestDeepSeekDsmlToolOpenPrefixSuffixLength(text: string) { + const maxLength = Math.min(text.length, DEEPSEEK_DSML_TOOL_MAX_OPEN_TOKEN_LEN - 1); + for (let length = maxLength; length > 0; length -= 1) { + const suffix = text.slice(text.length - length); + if (DEEPSEEK_DSML_TOOL_OPEN_TOKENS.some((token) => token.startsWith(suffix))) { + return length; + } + } + return 0; +} + function getCompletionsContentDeltas(content: unknown): CompletionsReasoningDelta[] { if (typeof content === "string") { return content ? [{ kind: "text", text: content }] : []; diff --git a/src/agents/provider-transport-fetch.test.ts b/src/agents/provider-transport-fetch.test.ts index ef0c32575492..e35d2b6ea456 100644 --- a/src/agents/provider-transport-fetch.test.ts +++ b/src/agents/provider-transport-fetch.test.ts @@ -164,6 +164,45 @@ describe("buildGuardedModelFetch", () => { }); }); + it("rejects successful streamed OpenAI-compatible responses with HTML content", async () => { + const release = vi.fn(async () => undefined); + const model = { + id: "private-model", + provider: "custom-openai", + api: "openai-completions", + baseUrl: "https://proxy.example.com", + } as unknown as Model<"openai-completions">; + fetchWithSsrFGuardMock.mockResolvedValue({ + response: new Response("not the API", { + status: 200, + headers: { "content-type": "text/html; charset=utf-8" }, + }), + finalUrl: "https://proxy.example.com/chat/completions", + release, + }); + + let error: unknown; + try { + await buildGuardedModelFetch(model)("https://proxy.example.com/chat/completions", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ model: "private-model", stream: true }), + }); + } catch (caught) { + error = caught; + } + + expect(error).toMatchObject({ + name: "ProviderHttpError", + status: 200, + code: "invalid_provider_content_type", + errorType: "invalid_response", + }); + expect(error).toBeInstanceOf(Error); + expect((error as Error).message).toMatch(/baseUrl.*\/v1 path prefix/); + expect(release).toHaveBeenCalled(); + }); + it("ensures configured local services before the model request", async () => { const release = vi.fn(); ensureModelProviderLocalServiceMock.mockResolvedValue({ release }); diff --git a/src/agents/provider-transport-fetch.ts b/src/agents/provider-transport-fetch.ts index 7eea2768faac..18c9e0341d3e 100644 --- a/src/agents/provider-transport-fetch.ts +++ b/src/agents/provider-transport-fetch.ts @@ -25,6 +25,7 @@ import { createSubsystemLogger } from "../logging/subsystem.js"; import { resolveDebugProxySettings } from "../proxy-capture/env.js"; import { emitModelTransportDebug } from "./model-transport-debug.js"; import { formatModelTransportDebugUrl } from "./model-transport-url.js"; +import { ProviderHttpError, readResponseTextLimited } from "./provider-http-errors.js"; import { ensureModelProviderLocalService, type ProviderLocalServiceLease, @@ -219,6 +220,39 @@ function shouldSanitizeOpenAISdkSseResponse(model: Model): boolean { } } +function isJsonContentType(contentType: string): boolean { + return /\bapplication\/json\b/i.test(contentType) || /\+json\b/i.test(contentType); +} + +function isOpenAISdkStreamContentType(contentType: string): boolean { + return /\btext\/event-stream\b/i.test(contentType) || isJsonContentType(contentType); +} + +async function assertOpenAISdkStreamContentType(params: { + response: Response; + model: Model; + release: () => Promise; + localServiceLease?: ProviderLocalServiceLease; +}): Promise { + const contentType = params.response.headers.get("content-type") ?? ""; + if (!params.response.ok || !params.response.body || isOpenAISdkStreamContentType(contentType)) { + return; + } + const body = await readResponseTextLimited(params.response).catch(() => ""); + await params.release().catch(() => undefined); + params.localServiceLease?.release(); + const hint = + "OpenAI-compatible streamed responses must be text/event-stream or JSON; got " + + `${contentType || "missing content-type"}. Check the provider baseUrl; ` + + "OpenAI-compatible APIs commonly require a /v1 path prefix."; + throw new ProviderHttpError(`${params.model.provider}/${params.model.id}: ${hint}`, { + status: params.response.status, + code: "invalid_provider_content_type", + type: "invalid_response", + body, + }); +} + async function requestBodyHasStreamTrue( request: Request | undefined, init: RequestInit | undefined, @@ -720,6 +754,14 @@ export function buildGuardedModelFetch( headers, }); } + if (synthesizeJsonAsSse && options?.sanitizeSse !== false) { + await assertOpenAISdkStreamContentType({ + response, + model, + release: result.release, + localServiceLease, + }); + } response = buildManagedResponse( response, result.release, diff --git a/src/agents/simple-completion-transport.test.ts b/src/agents/simple-completion-transport.test.ts index bf7cfbc8ab60..d4a56f4954c2 100644 --- a/src/agents/simple-completion-transport.test.ts +++ b/src/agents/simple-completion-transport.test.ts @@ -10,6 +10,7 @@ const createOpenClawTransportStreamFnForModel = vi.fn(); const createTransportAwareStreamFnForModel = vi.fn(); const prepareTransportAwareSimpleModel = vi.fn(); const resolveTransportAwareSimpleApi = vi.fn(); +const prepareGoogleSimpleCompletionModel = vi.fn((model: unknown) => model); vi.mock("./anthropic-vertex-stream.js", () => ({ createAnthropicVertexStreamFnForModel, @@ -19,6 +20,10 @@ vi.mock("./custom-api-registry.js", () => ({ ensureCustomApiRegistered, })); +vi.mock("./google-simple-completion-stream.js", () => ({ + prepareGoogleSimpleCompletionModel, +})); + vi.mock("./provider-transport-stream.js", () => ({ buildTransportAwareSimpleStreamFn, createOpenClawTransportStreamFnForModel, @@ -53,6 +58,7 @@ describe("prepareModelForSimpleCompletion", () => { createTransportAwareStreamFnForModel.mockReset(); prepareTransportAwareSimpleModel.mockReset(); resolveTransportAwareSimpleApi.mockReset(); + prepareGoogleSimpleCompletionModel.mockReset(); createAnthropicVertexStreamFnForModel.mockReturnValue("vertex-stream"); resolveProviderStreamFn.mockReturnValue("ollama-stream"); buildTransportAwareSimpleStreamFn.mockReturnValue(undefined); @@ -60,6 +66,7 @@ describe("prepareModelForSimpleCompletion", () => { createTransportAwareStreamFnForModel.mockReturnValue(undefined); prepareTransportAwareSimpleModel.mockImplementation((model) => model); resolveTransportAwareSimpleApi.mockReturnValue(undefined); + prepareGoogleSimpleCompletionModel.mockImplementation((model) => model); }); it("registers the configured Ollama transport and keeps the original api", () => { @@ -173,6 +180,71 @@ describe("prepareModelForSimpleCompletion", () => { }); }); + it("uses the Google simple-completion sanitizer alias after transport checks pass through", () => { + const model: Model<"google-generative-ai"> = { + id: "gemini-flash-latest", + name: "Gemini Flash Latest", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1_000_000, + maxTokens: 8192, + headers: {}, + }; + prepareGoogleSimpleCompletionModel.mockImplementationOnce((m: unknown) => ({ + ...(m as Model<"google-generative-ai">), + api: "openclaw-google-generative-ai-simple", + })); + resolveProviderStreamFn.mockReturnValueOnce(undefined); + + const result = prepareModelForSimpleCompletion({ model }); + + expect(prepareTransportAwareSimpleModel).toHaveBeenCalledWith(model, { cfg: undefined }); + expect(prepareGoogleSimpleCompletionModel).toHaveBeenCalledWith(model); + expect(buildTransportAwareSimpleStreamFn).not.toHaveBeenCalled(); + expect(result).toEqual({ + ...model, + api: "openclaw-google-generative-ai-simple", + }); + }); + + it("keeps Google transport-aware models on the transport alias", () => { + const model: Model<"google-generative-ai"> = { + id: "gemini-flash-latest", + name: "Gemini Flash Latest", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1_000_000, + maxTokens: 8192, + headers: {}, + }; + + const transportModel = { + ...model, + api: "openclaw-google-generative-ai-transport", + }; + resolveProviderStreamFn.mockReturnValueOnce(undefined); + buildTransportAwareSimpleStreamFn.mockReturnValueOnce("google-transport-stream"); + prepareTransportAwareSimpleModel.mockReturnValueOnce(transportModel); + + const result = prepareModelForSimpleCompletion({ model }); + + expect(buildTransportAwareSimpleStreamFn).toHaveBeenCalledWith(model, { cfg: undefined }); + expect(ensureCustomApiRegistered).toHaveBeenCalledWith( + "openclaw-google-generative-ai-transport", + "google-transport-stream", + ); + expect(prepareGoogleSimpleCompletionModel).not.toHaveBeenCalled(); + expect(result).toBe(transportModel); + }); + it.each([ ["https://chatgpt.com/backend-api", "https://chatgpt.com/backend-api/codex"], ["https://chatgpt.com/backend-api/v1", "https://chatgpt.com/backend-api/codex"], diff --git a/src/agents/simple-completion-transport.ts b/src/agents/simple-completion-transport.ts index e36d9143a450..6061865016c5 100644 --- a/src/agents/simple-completion-transport.ts +++ b/src/agents/simple-completion-transport.ts @@ -3,6 +3,7 @@ import { getApiProvider } from "../llm/api-registry.js"; import type { Api, Model } from "../llm/types.js"; import { createAnthropicVertexStreamFnForModel } from "./anthropic-vertex-stream.js"; import { ensureCustomApiRegistered } from "./custom-api-registry.js"; +import { prepareGoogleSimpleCompletionModel } from "./google-simple-completion-stream.js"; import { registerProviderStreamForModel } from "./provider-stream.js"; import { buildTransportAwareSimpleStreamFn, @@ -99,6 +100,10 @@ export function prepareModelForSimpleCompletion(params: { } } + if (model.api === "google-generative-ai") { + return prepareGoogleSimpleCompletionModel(model); + } + if (model.provider === "anthropic-vertex") { const api = resolveAnthropicVertexSimpleApi(model.baseUrl); ensureCustomApiRegistered(api, createAnthropicVertexStreamFnForModel(model)); diff --git a/src/agents/tools/cron-tool-canonicalize.ts b/src/agents/tools/cron-tool-canonicalize.ts index cc75787c9b46..5332839ca233 100644 --- a/src/agents/tools/cron-tool-canonicalize.ts +++ b/src/agents/tools/cron-tool-canonicalize.ts @@ -41,6 +41,9 @@ const CRON_RECOVERABLE_OBJECT_KEYS: ReadonlySet = new Set([ "agentId", "sessionKey", "failureAlert", + "namePayload", + "scheduleKind", + "sessionTargetName", ...CRON_FLAT_PAYLOAD_KEYS, ...CRON_FLAT_SCHEDULE_KEYS, ]); @@ -77,6 +80,31 @@ function moveDefinedField(params: { return true; } +function repairConcatenatedCronToolKeys(value: Record): void { + // Some small/local tool-call parsers can return valid JSON with adjacent cron + // key names merged. Recover only the observed schema-specific pairs before + // strict gateway validation sees the malformed property names. + if (!isRecord(value.payload) && isRecord(value.namePayload)) { + value.payload = { ...value.namePayload }; + } + const rawScheduleKind = value.scheduleKind; + if (!isRecord(value.schedule)) { + if (isRecord(rawScheduleKind)) { + value.schedule = { ...rawScheduleKind }; + } else if (isCronScheduleKind(rawScheduleKind)) { + value.schedule = { kind: rawScheduleKind }; + } + } else if (isCronScheduleKind(rawScheduleKind) && !isCronScheduleKind(value.schedule.kind)) { + value.schedule = { ...value.schedule, kind: rawScheduleKind }; + } + if (!isNonEmptyString(value.name) && isNonEmptyString(value.sessionTargetName)) { + value.name = value.sessionTargetName; + } + delete value.namePayload; + delete value.scheduleKind; + delete value.sessionTargetName; +} + function setScheduleAtMs(schedule: Record, value: unknown): void { const atMs = typeof value === "number" ? value : Number(value); schedule.at = Number.isFinite(atMs) ? (timestampMsToIsoString(Math.floor(atMs)) ?? value) : value; @@ -213,6 +241,7 @@ export function canonicalizeCronToolObject( ): Record { const unwrapped = isRecord(value.data) ? value.data : isRecord(value.job) ? value.job : value; const next = { ...unwrapped }; + repairConcatenatedCronToolKeys(next); canonicalizeCronToolSchedule(next); canonicalizeCronToolPayload(next); return next; diff --git a/src/agents/tools/cron-tool.test.ts b/src/agents/tools/cron-tool.test.ts index ae13a4e0dfd0..845a94b72461 100644 --- a/src/agents/tools/cron-tool.test.ts +++ b/src/agents/tools/cron-tool.test.ts @@ -668,6 +668,54 @@ describe("cron tool", () => { expect(params?.failureAlert).toEqual({ after: 3, cooldownMs: 60_000 }); }); + it("recovers concatenated cron add keys from local tool-call parsers", async () => { + const tool = createTestCronTool(); + await tool.execute("call-concatenated-add", { + action: "add", + job: { + delivery: { mode: "none" }, + enabled: true, + namePayload: { kind: "agentTurn", message: "Evidence test.", timeoutSeconds: 10 }, + scheduleKind: { everyMs: 999_999, kind: "every" }, + sessionTargetName: "evidence-test", + }, + }); + + const params = expectSingleGatewayCallMethod("cron.add"); + expect(params).toEqual({ + delivery: { mode: "none" }, + enabled: true, + name: "evidence-test", + payload: { kind: "agentTurn", message: "Evidence test.", timeoutSeconds: 10 }, + schedule: { everyMs: 999_999, kind: "every" }, + sessionTarget: "isolated", + wakeMode: "now", + }); + }); + + it("recovers flat concatenated cron add keys from local tool-call parsers", async () => { + const tool = createTestCronTool(); + await tool.execute("call-flat-concatenated-add", { + action: "add", + delivery: { mode: "none" }, + enabled: true, + namePayload: { kind: "agentTurn", message: "Evidence test.", timeoutSeconds: 10 }, + scheduleKind: { everyMs: 999_999, kind: "every" }, + sessionTargetName: "evidence-test", + }); + + const params = expectSingleGatewayCallMethod("cron.add"); + expect(params).toEqual({ + delivery: { mode: "none" }, + enabled: true, + name: "evidence-test", + payload: { kind: "agentTurn", message: "Evidence test.", timeoutSeconds: 10 }, + schedule: { everyMs: 999_999, kind: "every" }, + sessionTarget: "isolated", + wakeMode: "now", + }); + }); + it("stamps cron.add with caller sessionKey when missing", async () => { callGatewayMock.mockResolvedValueOnce({ ok: true }); @@ -1403,6 +1451,90 @@ describe("cron tool", () => { }); }); + it("recovers concatenated cron update keys from local tool-call parsers", async () => { + callGatewayMock.mockResolvedValueOnce({ ok: true }); + + const tool = createTestCronTool(); + await tool.execute("call-update-concatenated", { + action: "update", + id: "job-concat", + patch: { + namePayload: { kind: "agentTurn", message: "Updated prompt.", timeoutSeconds: 20 }, + scheduleKind: { everyMs: 60_000, kind: "every" }, + sessionTargetName: "updated-name", + }, + }); + + const params = expectSingleGatewayCallMethod("cron.update") as + | { + id?: string; + patch?: { + name?: string; + payload?: { kind?: string; message?: string; timeoutSeconds?: number }; + schedule?: { kind?: string; everyMs?: number }; + }; + } + | undefined; + expect(params?.id).toBe("job-concat"); + expect(params?.patch).toEqual({ + name: "updated-name", + payload: { kind: "agentTurn", message: "Updated prompt.", timeoutSeconds: 20 }, + schedule: { everyMs: 60_000, kind: "every" }, + }); + }); + + it("recovers flat concatenated cron update keys from local tool-call parsers", async () => { + callGatewayMock.mockResolvedValueOnce({ ok: true }); + + const tool = createTestCronTool(); + await tool.execute("call-update-flat-concatenated", { + action: "update", + id: "job-concat", + namePayload: { kind: "agentTurn", message: "Updated prompt.", timeoutSeconds: 20 }, + scheduleKind: { everyMs: 60_000, kind: "every" }, + sessionTargetName: "updated-name", + }); + + const params = expectSingleGatewayCallMethod("cron.update") as + | { + id?: string; + patch?: { + name?: string; + payload?: { kind?: string; message?: string; timeoutSeconds?: number }; + schedule?: { kind?: string; everyMs?: number }; + }; + } + | undefined; + expect(params?.id).toBe("job-concat"); + expect(params?.patch).toEqual({ + name: "updated-name", + payload: { kind: "agentTurn", message: "Updated prompt.", timeoutSeconds: 20 }, + schedule: { everyMs: 60_000, kind: "every" }, + }); + }); + + it("uses flat string scheduleKind without leaking it to cron update", async () => { + callGatewayMock.mockResolvedValueOnce({ ok: true }); + + const tool = createTestCronTool(); + await tool.execute("call-update-string-schedule-kind", { + action: "update", + id: "job-kind", + expr: "0 8 * * *", + scheduleKind: "cron", + }); + + const params = expectSingleGatewayCallMethod("cron.update") as + | { + id?: string; + patch?: { schedule?: { kind?: string; expr?: string }; scheduleKind?: unknown }; + } + | undefined; + expect(params?.id).toBe("job-kind"); + expect(params?.patch).toEqual({ schedule: { expr: "0 8 * * *", kind: "cron" } }); + expect(params?.patch?.scheduleKind).toBeUndefined(); + }); + it("rejects malformed flattened fallback-only payload patch params for update action", async () => { const tool = createTestCronTool(); diff --git a/src/agents/tools/message-tool.test.ts b/src/agents/tools/message-tool.test.ts index cfd1b03d1f22..692beba153fc 100644 --- a/src/agents/tools/message-tool.test.ts +++ b/src/agents/tools/message-tool.test.ts @@ -2345,6 +2345,82 @@ describe("message tool internal-runtime-context sanitization", () => { }, ); + it("strips inbound metadata and delivery hints from outbound message text before dispatch (#89100)", async () => { + mockSendResult({ channel: "signal", to: "signal:group-1" }); + + const call = await executeSend({ + action: { + target: "signal:group-1", + message: [ + "Delivery: Final assistant text is not automatically delivered in this run. Use the `message` tool to send user-visible output.", + "", + "Conversation info (untrusted metadata):", + "```json", + '{"chat_id":"group:abc","sender_id":"+15551234567","is_group_chat":true}', + "```", + "", + "Sender (untrusted metadata):", + "```json", + '{"label":"Bob (+15551234567)","id":"+15551234567"}', + "```", + "", + "Visible reply only.", + ].join("\n"), + }, + }); + + expect(call?.params?.message).toBe("Visible reply only."); + expect(JSON.stringify(call?.params)).not.toContain("sender_id"); + expect(JSON.stringify(call?.params)).not.toContain("+15551234567"); + }); + + it.each([ + { + name: "delivery hint only", + message: + "Delivery: Final assistant text is not automatically delivered in this run. Use the `message` tool to send user-visible output.", + }, + { + name: "inbound metadata only", + message: [ + "Conversation info (untrusted metadata):", + "```json", + '{"chat_id":"group:abc","sender_id":"+15551234567"}', + "```", + ].join("\n"), + }, + ])("suppresses outbound sends that contain only $name (#89100)", async ({ message }) => { + const { call, result } = await executeSendWithResult({ + action: { + target: "signal:group-1", + message, + }, + }); + + expect(call).toBeUndefined(); + expect(mocks.runMessageAction).not.toHaveBeenCalled(); + expect(result.details).toMatchObject({ + status: "suppressed", + reason: "inbound_metadata_echo", + }); + expect(JSON.stringify(result)).not.toContain("sender_id"); + expect(JSON.stringify(result)).not.toContain("+15551234567"); + }); + + it("preserves legitimate outbound messages that start with timestamp-like text", async () => { + mockSendResult({ channel: "signal", to: "signal:group-1" }); + + const message = "[Wed 2026-03-11 23:51 PDT] Standup starts now"; + const call = await executeSend({ + action: { + target: "signal:group-1", + message, + }, + }); + + expect(call?.params?.message).toBe(message); + }); + it("strips internal-runtime-context blocks from poll creation text before dispatch", async () => { mockSendResult({ channel: "telegram", to: "telegram:123" }); diff --git a/src/agents/tools/message-tool.ts b/src/agents/tools/message-tool.ts index ece66ee00567..348bb8f9e840 100644 --- a/src/agents/tools/message-tool.ts +++ b/src/agents/tools/message-tool.ts @@ -6,6 +6,10 @@ import { GATEWAY_CLIENT_MODES, } from "../../../packages/gateway-protocol/src/client-info.js"; import type { SourceReplyDeliveryMode } from "../../auto-reply/get-reply-options.types.js"; +import { + hasInboundMetadataSentinel, + stripInboundMetadata, +} from "../../auto-reply/reply/strip-inbound-meta.js"; import type { InboundEventKind } from "../../channels/inbound-event/kind.js"; import { getChannelPlugin, @@ -96,20 +100,35 @@ function normalizeEscapedLineBreaksForVisibleText(text: string): string { return text.replace(/\\r\\n|\\n|\\r/g, "\n"); } +type VisibleTextSuppressionReason = "internal_runtime_context_echo" | "inbound_metadata_echo"; + function sanitizeUserVisibleToolTextResult( text: string, bootPrompt: string | undefined, -): { text: string; suppressed: boolean } { +): { + text: string; + suppressionReason?: VisibleTextSuppressionReason; +} { const normalized = normalizeEscapedLineBreaksForVisibleText(text); const strippedReasoning = stripFormattedReasoningMessage(normalized); const strippedInternal = stripInternalRuntimeContext(strippedReasoning); const strippedBoot = stripBootEchoFromOutboundText(strippedInternal, bootPrompt); + const strippedInbound = hasInboundMetadataSentinel(strippedBoot) + ? stripInboundMetadata(strippedBoot) + : strippedBoot; + const suppressionReason = + strippedBoot.trim().length === 0 && + strippedReasoning.trim().length > 0 && + (strippedInternal !== strippedReasoning || strippedBoot !== strippedInternal) + ? "internal_runtime_context_echo" + : strippedInbound.trim().length === 0 && + strippedBoot.trim().length > 0 && + strippedInbound !== strippedBoot + ? "inbound_metadata_echo" + : undefined; return { - text: strippedBoot, - suppressed: - strippedBoot.trim().length === 0 && - strippedReasoning.trim().length > 0 && - (strippedInternal !== strippedReasoning || strippedBoot !== strippedInternal), + text: strippedInbound, + ...(suppressionReason ? { suppressionReason } : {}), }; } @@ -117,54 +136,54 @@ function sanitizeStringParam( params: Record, field: string, bootPrompt: string | undefined, -): boolean { +): VisibleTextSuppressionReason | undefined { if (typeof params[field] !== "string") { - return false; + return undefined; } const sanitized = sanitizeUserVisibleToolTextResult(params[field], bootPrompt); params[field] = sanitized.text; - return sanitized.suppressed; + return sanitized.suppressionReason; } function sanitizeStringArrayParam( params: Record, field: string, bootPrompt: string | undefined, -): boolean { +): VisibleTextSuppressionReason | undefined { const value = params[field]; if (typeof value === "string") { const sanitized = sanitizeUserVisibleToolTextResult(value, bootPrompt); params[field] = sanitized.text; - return sanitized.suppressed; + return sanitized.suppressionReason; } if (!Array.isArray(value)) { - return false; + return undefined; } - let suppressed = false; + let suppressionReason: VisibleTextSuppressionReason | undefined; params[field] = value.map((entry) => { if (typeof entry !== "string") { return entry; } const sanitized = sanitizeUserVisibleToolTextResult(entry, bootPrompt); - suppressed ||= sanitized.suppressed; + suppressionReason ??= sanitized.suppressionReason; return sanitized.text; }); - return suppressed; + return suppressionReason; } function sanitizePresentationTextFieldsResult( value: unknown, bootPrompt: string | undefined, -): { value: unknown; suppressed: boolean } { +): { value: unknown; suppressionReason?: VisibleTextSuppressionReason } { if (!value || typeof value !== "object" || Array.isArray(value)) { - return { value, suppressed: false }; + return { value }; } - let suppressed = false; + let suppressionReason: VisibleTextSuppressionReason | undefined; const presentation = { ...(value as Record) }; if (typeof presentation.title === "string") { const sanitized = sanitizeUserVisibleToolTextResult(presentation.title, bootPrompt); presentation.title = sanitized.text; - suppressed ||= sanitized.suppressed; + suppressionReason ??= sanitized.suppressionReason; } if (Array.isArray(presentation.blocks)) { presentation.blocks = presentation.blocks.map((block) => { @@ -176,7 +195,7 @@ function sanitizePresentationTextFieldsResult( if (typeof sanitizedBlock[field] === "string") { const sanitized = sanitizeUserVisibleToolTextResult(sanitizedBlock[field], bootPrompt); sanitizedBlock[field] = sanitized.text; - suppressed ||= sanitized.suppressed; + suppressionReason ??= sanitized.suppressionReason; } } if (Array.isArray(sanitizedBlock.buttons)) { @@ -188,7 +207,7 @@ function sanitizePresentationTextFieldsResult( if (typeof sanitizedButton.label === "string") { const sanitized = sanitizeUserVisibleToolTextResult(sanitizedButton.label, bootPrompt); sanitizedButton.label = sanitized.text; - suppressed ||= sanitized.suppressed; + suppressionReason ??= sanitized.suppressionReason; } if (typeof sanitizedButton.url === "string") { const sanitized = sanitizeUserVisibleToolTextResult(sanitizedButton.url, bootPrompt); @@ -197,7 +216,7 @@ function sanitizePresentationTextFieldsResult( } else { delete sanitizedButton.url; } - suppressed ||= sanitized.suppressed; + suppressionReason ??= sanitized.suppressionReason; } for (const webAppField of ["webApp", "web_app"]) { const webApp = sanitizedButton[webAppField]; @@ -215,7 +234,7 @@ function sanitizePresentationTextFieldsResult( } else { delete sanitizedButton[webAppField]; } - suppressed ||= sanitized.suppressed; + suppressionReason ??= sanitized.suppressionReason; } return sanitizedButton; }); @@ -229,7 +248,7 @@ function sanitizePresentationTextFieldsResult( if (typeof sanitizedOption.label === "string") { const sanitized = sanitizeUserVisibleToolTextResult(sanitizedOption.label, bootPrompt); sanitizedOption.label = sanitized.text; - suppressed ||= sanitized.suppressed; + suppressionReason ??= sanitized.suppressionReason; } return sanitizedOption; }); @@ -237,7 +256,7 @@ function sanitizePresentationTextFieldsResult( return sanitizedBlock; }); } - return { value: presentation, suppressed }; + return { value: presentation, ...(suppressionReason ? { suppressionReason } : {}) }; } function readFirstStringParam(params: Record, keys: readonly string[]): string { @@ -1150,7 +1169,7 @@ export function createMessageTool(options?: MessageToolOptions): AnyAgentTool { // that paraphrase out the wrapper markers but reproduce a // substantial chunk of the boot prompt content. Refs #53732. const bootPromptForSession = getBootEchoContextForSession(options?.agentSessionKey); - let suppressedVisiblePayload = false; + let suppressedVisiblePayloadReason: VisibleTextSuppressionReason | undefined; parseJsonMessageParam(params, "presentation"); parseInteractiveParam(params); for (const field of [ @@ -1162,42 +1181,45 @@ export function createMessageTool(options?: MessageToolOptions): AnyAgentTool { "quoteText", "quote_text", ]) { - suppressedVisiblePayload = - sanitizeStringParam(params, field, bootPromptForSession) || suppressedVisiblePayload; + const suppressionReason = sanitizeStringParam(params, field, bootPromptForSession); + suppressedVisiblePayloadReason ??= suppressionReason; } for (const field of ["pollQuestion", "poll_question"]) { - suppressedVisiblePayload = - sanitizeStringParam(params, field, bootPromptForSession) || suppressedVisiblePayload; + const suppressionReason = sanitizeStringParam(params, field, bootPromptForSession); + suppressedVisiblePayloadReason ??= suppressionReason; } for (const field of ["pollOption", "poll_option"]) { - suppressedVisiblePayload = - sanitizeStringArrayParam(params, field, bootPromptForSession) || suppressedVisiblePayload; + const suppressionReason = sanitizeStringArrayParam(params, field, bootPromptForSession); + suppressedVisiblePayloadReason ??= suppressionReason; } const sanitizedPresentation = sanitizePresentationTextFieldsResult( params.presentation, bootPromptForSession, ); params.presentation = sanitizedPresentation.value; - suppressedVisiblePayload ||= sanitizedPresentation.suppressed; + suppressedVisiblePayloadReason ??= sanitizedPresentation.suppressionReason; const sanitizedInteractive = sanitizePresentationTextFieldsResult( params.interactive, bootPromptForSession, ); params.interactive = sanitizedInteractive.value; - suppressedVisiblePayload ||= sanitizedInteractive.suppressed; + suppressedVisiblePayloadReason ??= sanitizedInteractive.suppressionReason; const action = readStringParam(params, "action", { required: true, }) as ChannelMessageActionName; if ( - suppressedVisiblePayload && + suppressedVisiblePayloadReason && action === "send" && !hasSanitizedSendPayloadContent(params) ) { return jsonResult({ status: "suppressed", - reason: "internal_runtime_context_echo", - message: "Suppressed outbound message text because it matched internal runtime context.", + reason: suppressedVisiblePayloadReason, + message: + suppressedVisiblePayloadReason === "inbound_metadata_echo" + ? "Suppressed outbound message text because it matched inbound runtime metadata." + : "Suppressed outbound message text because it matched internal runtime context.", }); } const requireExplicitTarget = options?.requireExplicitTarget === true; diff --git a/src/auto-reply/reply/get-reply-run.media-only.test.ts b/src/auto-reply/reply/get-reply-run.media-only.test.ts index 454ab2cff0f5..60de543a7660 100644 --- a/src/auto-reply/reply/get-reply-run.media-only.test.ts +++ b/src/auto-reply/reply/get-reply-run.media-only.test.ts @@ -372,6 +372,52 @@ describe("runPreparedReply media-only handling", () => { expect(call.followupRun.run.thinkLevel).toBe("off"); }); + it("does not persist turn-local thinking fallback over a stored session override", async () => { + const sessionEntry: SessionEntry = { + sessionId: "session-thinking", + sessionFile: "/tmp/session-thinking.jsonl", + thinkingLevel: "high", + updatedAt: 1, + }; + const sessionStore: Record = { + "session-key": sessionEntry, + }; + + await runPreparedReply( + baseParams({ + provider: "openai", + model: "chat-latest", + resolvedThinkLevel: "high", + sessionEntry, + sessionStore, + storePath: "/tmp/openclaw-sessions.json", + modelState: { + resolveDefaultThinkingLevel: async () => "high", + resolveThinkingCatalog: async () => [ + { + provider: "openai", + id: "chat-latest", + reasoning: false, + }, + ], + allowedModelCatalog: [ + { + provider: "openai", + id: "chat-latest", + name: "Chat Latest", + }, + ], + } as never, + }), + ); + + const call = requireRunReplyAgentCall(); + expect(call.followupRun.run.thinkLevel).toBe("off"); + expect(sessionEntry.thinkingLevel).toBe("high"); + expect(sessionStore["session-key"]?.thinkingLevel).toBe("high"); + expect(updateSessionStore).not.toHaveBeenCalled(); + }); + it("keeps empty-assistant silence disabled for direct runs by default", async () => { await runPreparedReply( baseParams({ diff --git a/src/auto-reply/reply/get-reply-run.ts b/src/auto-reply/reply/get-reply-run.ts index 4e8254739256..c9076aa90471 100644 --- a/src/auto-reply/reply/get-reply-run.ts +++ b/src/auto-reply/reply/get-reply-run.ts @@ -335,9 +335,6 @@ const agentRunnerRuntimeLoader = createLazyImportLoader(() => import("./agent-ru const sessionUpdatesRuntimeLoader = createLazyImportLoader( () => import("./session-updates.runtime.js"), ); -const sessionStoreRuntimeLoader = createLazyImportLoader( - () => import("../../config/sessions/store.runtime.js"), -); function loadEmbeddedAgentRuntime() { return embeddedAgentRuntimeLoader.load(); @@ -351,10 +348,6 @@ function loadSessionUpdatesRuntime() { return sessionUpdatesRuntimeLoader.load(); } -function loadSessionStoreRuntime() { - return sessionStoreRuntimeLoader.load(); -} - function stripPromptThinkingDirectives(body: string): string { return body .split("\n") @@ -903,24 +896,9 @@ export async function runPreparedReply( catalog: thinkingCatalog, }); if (fallbackThinkLevel !== resolvedThinkLevel) { - const previousThinkLevel = resolvedThinkLevel; + // Execution fallbacks are turn-local; directive/model persistence owns + // durable thinking remaps so explicit session overrides survive replies. resolvedThinkLevel = fallbackThinkLevel; - if ( - sessionEntry && - sessionStore && - sessionKey && - sessionEntry.thinkingLevel === previousThinkLevel - ) { - sessionEntry.thinkingLevel = fallbackThinkLevel; - sessionEntry.updatedAt = Date.now(); - sessionStore[sessionKey] = sessionEntry; - if (storePath) { - const { updateSessionStore } = await loadSessionStoreRuntime(); - await updateSessionStore(storePath, (store) => { - store[sessionKey] = sessionEntry; - }); - } - } } } const internalOpts = opts as InternalGetReplyOptions | undefined; diff --git a/src/auto-reply/reply/strip-inbound-meta.ts b/src/auto-reply/reply/strip-inbound-meta.ts index a9d34c7929d1..6ae170cd8321 100644 --- a/src/auto-reply/reply/strip-inbound-meta.ts +++ b/src/auto-reply/reply/strip-inbound-meta.ts @@ -46,6 +46,10 @@ const SENTINEL_FAST_RE = new RegExp( .join("|"), ); +export function hasInboundMetadataSentinel(text: string): boolean { + return Boolean(text && SENTINEL_FAST_RE.test(text)); +} + function isMessageToolDeliveryHintLine(line: string): boolean { const trimmed = line.trim(); return MESSAGE_TOOL_DELIVERY_HINTS.some((hint) => hint === trimmed); diff --git a/src/auto-reply/status.test.ts b/src/auto-reply/status.test.ts index 2980ccf52662..72a64ffa7179 100644 --- a/src/auto-reply/status.test.ts +++ b/src/auto-reply/status.test.ts @@ -1590,6 +1590,38 @@ describe("buildStatusMessage", () => { expect(normalized).toContain("Fallbacks: google/gemini-2.5-flash, openai/gpt-5-mini"); }); + it("omits configured fallbacks for a session-selected model", () => { + const text = buildStatusMessage({ + configuredDefaultModelLabel: "google/gemini-3-flash-preview", + agent: { + model: { + primary: "google/gemini-3-flash-preview", + fallbacks: [ + "google/gemini-3.1-flash-lite", + "google/gemini-2.5-flash", + "google/gemini-3.1-pro-preview", + ], + }, + }, + sessionEntry: { + sessionId: "fb-session-selected", + updatedAt: 0, + modelProvider: "google", + model: "gemini-3.1-flash-lite", + modelOverride: "gemini-3.1-flash-lite", + modelOverrideSource: "user", + }, + sessionKey: "agent:main:main", + sessionScope: "per-sender", + queue: { mode: "collect", depth: 0 }, + modelAuth: "api-key", + }); + + const normalized = normalizeTestText(text); + expect(normalized).toContain("Session selected: google/gemini-3.1-flash-lite"); + expect(normalized).not.toContain("Fallbacks:"); + }); + it("omits configured fallbacks line when no fallbacks provided", () => { const text = buildStatusMessage({ agent: { diff --git a/src/cli/program/register.onboard.ts b/src/cli/program/register.onboard.ts index b4750fdfeb23..a864486621fb 100644 --- a/src/cli/program/register.onboard.ts +++ b/src/cli/program/register.onboard.ts @@ -141,7 +141,7 @@ export function registerOnboardCommand(program: Command): void { .option("--custom-provider-id ", "Custom provider ID (optional; auto-derived by default)") .option( "--custom-compatibility ", - "Custom provider API compatibility: openai|anthropic (default: openai)", + "Custom provider API compatibility: openai|openai-responses|anthropic (default: openai)", ) .option("--custom-image-input", "Mark the custom provider model as image-capable") .option("--custom-text-input", "Mark the custom provider model as text-only") @@ -217,7 +217,11 @@ export function registerOnboardCommand(program: Command): void { customApiKey: opts.customApiKey as string | undefined, customModelId: opts.customModelId as string | undefined, customProviderId: opts.customProviderId as string | undefined, - customCompatibility: opts.customCompatibility as "openai" | "anthropic" | undefined, + customCompatibility: opts.customCompatibility as + | "openai" + | "openai-responses" + | "anthropic" + | undefined, customImageInput: opts.customTextInput === true ? false diff --git a/src/commands/agent-command.test-mocks.ts b/src/commands/agent-command.test-mocks.ts index e471c90ca6e5..de09c6764d30 100644 --- a/src/commands/agent-command.test-mocks.ts +++ b/src/commands/agent-command.test-mocks.ts @@ -181,6 +181,7 @@ vi.mock("../agents/model-selection.js", () => { }, ), buildConfiguredModelCatalog: vi.fn(() => []), + buildModelAliasIndex: vi.fn(() => new Map()), isModelKeyAllowedBySet, isCliProvider: vi.fn(() => false), modelKey, @@ -195,6 +196,18 @@ vi.mock("../agents/model-selection.js", () => { resolveDefaultModelForAgent: vi.fn(({ cfg }: { cfg?: ConfigWithModels }) => resolveDefaultRef(cfg), ), + resolveModelRefFromString: vi.fn( + ({ + raw, + defaultProvider, + }: { + raw: string; + defaultProvider?: string; + }) => { + const ref = parseModelRef(raw, defaultProvider ?? "openai"); + return ref ? { ref, source: "parsed" } : null; + }, + ), resolveThinkingDefault: vi.fn( ({ cfg, diff --git a/src/commands/configure.gateway-auth.prompt-auth-config.test.ts b/src/commands/configure.gateway-auth.prompt-auth-config.test.ts index f3b0aaac8712..852cc0182c49 100644 --- a/src/commands/configure.gateway-auth.prompt-auth-config.test.ts +++ b/src/commands/configure.gateway-auth.prompt-auth-config.test.ts @@ -457,6 +457,44 @@ describe("promptAuthConfig", () => { expect(promptModelAllowlistOptions()?.preferredProvider).toBe("openai"); }); + it("canonicalizes a legacy Codex primary when OpenAI OAuth selects the matching model", async () => { + vi.clearAllMocks(); + mocks.promptAuthChoiceGrouped.mockResolvedValue("openai-device-code"); + mocks.resolvePreferredProviderForAuthChoice.mockResolvedValue("openai"); + mocks.applyAuthChoice.mockResolvedValue({ + config: { + agents: { + defaults: { + model: { primary: "codex/gpt-5.5" }, + models: { + "openai/gpt-5.5": {}, + "openai/gpt-5.3-codex": {}, + }, + }, + }, + }, + }); + mocks.promptModelAllowlist.mockResolvedValue({ + models: ["openai/gpt-5.5", "openai/gpt-5.3-codex"], + scopeKeys: ["openai/gpt-5.5", "openai/gpt-5.3-codex"], + }); + mocks.resolveProviderPluginChoice.mockReturnValue(null); + + const result = await promptAuthConfig({}, makeRuntime(), noopPrompter); + + expect(mocks.promptModelAllowlist).toHaveBeenCalledOnce(); + expect(promptModelAllowlistOptions()?.preferredProvider).toBe("openai"); + expect(mocks.applyPrimaryModel).toHaveBeenCalledWith(expect.any(Object), "openai/gpt-5.5"); + expect(result.agents?.defaults?.model).toEqual({ + primary: "openai/gpt-5.5", + fallbacks: ["openai/gpt-5.3-codex"], + }); + expect(Object.keys(result.agents?.defaults?.models ?? {})).toEqual([ + "openai/gpt-5.5", + "openai/gpt-5.3-codex", + ]); + }); + it("keeps the selected provider scope when existing config has another provider", async () => { vi.clearAllMocks(); mocks.promptAuthChoiceGrouped.mockResolvedValue("github-copilot"); diff --git a/src/commands/configure.gateway-auth.ts b/src/commands/configure.gateway-auth.ts index 664932b7db88..9b2b3745f528 100644 --- a/src/commands/configure.gateway-auth.ts +++ b/src/commands/configure.gateway-auth.ts @@ -110,6 +110,25 @@ function resolveProviderFromModelRef(model: string | undefined): string | undefi return slashIndex > 0 ? trimmed?.slice(0, slashIndex) : undefined; } +function resolveCanonicalOpenAISelectionForLegacyCodexPrimary( + cfg: OpenClawConfig, + selectedModels: readonly string[], +): string | undefined { + const currentModel = cfg.agents?.defaults?.model; + const primary = + typeof currentModel === "string" + ? currentModel.trim() + : currentModel && typeof currentModel === "object" && typeof currentModel.primary === "string" + ? currentModel.primary.trim() + : undefined; + const modelId = primary?.startsWith("codex/") ? primary.slice("codex/".length).trim() : ""; + if (!modelId) { + return undefined; + } + const canonical = `openai/${modelId}`; + return selectedModels.find((model) => model.trim() === canonical); +} + function resolveConfiguredProviderFromAuthChange(params: { before: OpenClawConfig; after: OpenClawConfig; @@ -285,6 +304,13 @@ export async function promptAuthConfig( loadCatalog: shouldLoadModelCatalog, }); if (allowlistSelection.models) { + const canonicalPrimary = resolveCanonicalOpenAISelectionForLegacyCodexPrimary( + next, + allowlistSelection.models, + ); + if (canonicalPrimary) { + next = applyPrimaryModel(next, canonicalPrimary); + } next = applyModelFallbacksFromSelection(next, allowlistSelection.models, { scopeKeys: allowlistSelection.scopeKeys, }); diff --git a/src/commands/onboard-custom-config.test.ts b/src/commands/onboard-custom-config.test.ts index e990ba9a1028..48e5dfc12c67 100644 --- a/src/commands/onboard-custom-config.test.ts +++ b/src/commands/onboard-custom-config.test.ts @@ -57,6 +57,25 @@ it("uses expanded max_tokens for openai verification probes", () => { expect(request.body.max_tokens).toBe(16); }); + +it("uses responses probes for custom OpenAI Responses endpoints", () => { + const request = buildOpenAiVerificationProbeRequest({ + baseUrl: "https://example.com/v1", + apiKey: "test-key", + modelId: "gpt-5.4", + responsesApi: true, + }); + + expect(request.endpoint).toBe("https://example.com/v1/responses"); + expect(request.headers.Authorization).toBe("Bearer test-key"); + expect(request.body).toEqual({ + model: "gpt-5.4", + input: "Hi", + max_output_tokens: 16, + stream: false, + }); +}); + it("uses azure responses-specific headers and body for openai verification probes", () => { const request = buildOpenAiVerificationProbeRequest({ baseUrl: "https://my-resource.openai.azure.com", @@ -148,7 +167,8 @@ describe("applyCustomApiConfig", () => { modelId: "foo-large", compatibility: "invalid" as unknown as "openai", }, - expectedMessage: 'Custom provider compatibility must be "openai" or "anthropic".', + expectedMessage: + 'Custom provider compatibility must be "openai", "openai-responses", or "anthropic".', }, { name: "explicit provider ids that normalize to empty", @@ -190,6 +210,20 @@ describe("applyCustomApiConfig", () => { expect(result.config.agents?.defaults?.models?.[modelRef]?.params?.thinking).toBe("medium"); }); + it("saves explicit custom OpenAI Responses compatibility", () => { + const result = applyCustomApiConfig({ + config: {}, + baseUrl: "https://responses.example.com/v1", + modelId: "gpt-5.4", + compatibility: "openai-responses", + apiKey: "abcd1234", + }); + + const provider = result.config.models?.providers?.[result.providerId!]; + expect(provider?.baseUrl).toBe("https://responses.example.com/v1"); + expect(provider?.api).toBe("openai-responses"); + }); + it("keeps selected compatibility for Azure AI Foundry URLs", () => { const result = applyCustomApiConfig({ config: {}, @@ -473,6 +507,16 @@ describe("parseNonInteractiveCustomApiFlags", () => { expect(result.supportsImageInput).toBe(true); }); + it("parses OpenAI Responses compatibility", () => { + const result = parseNonInteractiveCustomApiFlags({ + baseUrl: "https://llm.example.com/v1", + modelId: "gpt-5.4", + compatibility: "openai-responses", + }); + + expect(result.compatibility).toBe("openai-responses"); + }); + it.each([ { name: "missing required flags", @@ -486,7 +530,8 @@ describe("parseNonInteractiveCustomApiFlags", () => { modelId: "foo-large", compatibility: "xmlrpc", }, - expectedMessage: 'Invalid --custom-compatibility (use "openai" or "anthropic").', + expectedMessage: + 'Invalid --custom-compatibility (use "openai", "openai-responses", or "anthropic").', }, { name: "invalid explicit provider ids", diff --git a/src/commands/onboard-custom-config.ts b/src/commands/onboard-custom-config.ts index 2c85b1f482f5..5ef40372ef31 100644 --- a/src/commands/onboard-custom-config.ts +++ b/src/commands/onboard-custom-config.ts @@ -167,7 +167,7 @@ function hasSameHost(a: string, b: string): boolean { } } -export type CustomApiCompatibility = "openai" | "anthropic"; +export type CustomApiCompatibility = "openai" | "openai-responses" | "anthropic"; export type CustomApiResult = { config: OpenClawConfig; providerId?: string; @@ -349,7 +349,7 @@ export function normalizeOptionalProviderApiKey(value: unknown): SecretInput | u function resolveVerificationEndpoint(params: { baseUrl: string; modelId: string; - endpointPath: "chat/completions" | "messages"; + endpointPath: "chat/completions" | "responses" | "messages"; }) { const resolvedUrl = isAzureUrl(params.baseUrl) ? transformAzureUrl(params.baseUrl, params.modelId) @@ -368,15 +368,19 @@ export function buildOpenAiVerificationProbeRequest(params: { baseUrl: string; apiKey: string; modelId: string; + responsesApi?: boolean; }): VerificationRequest { const isBaseUrlAzureUrl = isAzureUrl(params.baseUrl); const headers = isBaseUrlAzureUrl ? buildAzureOpenAiHeaders(params.apiKey) : buildOpenAiHeaders(params.apiKey); - if (isAzureOpenAiUrl(params.baseUrl)) { + if (isAzureOpenAiUrl(params.baseUrl) || params.responsesApi === true) { const endpoint = new URL( "responses", - transformAzureConfigUrl(params.baseUrl).replace(/\/?$/, "/"), + (isBaseUrlAzureUrl ? transformAzureConfigUrl(params.baseUrl) : params.baseUrl).replace( + /\/?$/, + "/", + ), ).href; return { endpoint, @@ -437,8 +441,11 @@ export function buildAnthropicVerificationProbeRequest(params: { function resolveProviderApi( compatibility: CustomApiCompatibility, -): "openai-completions" | "anthropic-messages" { - return compatibility === "anthropic" ? "anthropic-messages" : "openai-completions"; +): "openai-completions" | "openai-responses" | "anthropic-messages" { + if (compatibility === "anthropic") { + return "anthropic-messages"; + } + return compatibility === "openai-responses" ? "openai-responses" : "openai-completions"; } function parseCustomApiCompatibility(raw?: string): CustomApiCompatibility { @@ -446,10 +453,14 @@ function parseCustomApiCompatibility(raw?: string): CustomApiCompatibility { if (!compatibilityRaw) { return "openai"; } - if (compatibilityRaw !== "openai" && compatibilityRaw !== "anthropic") { + if ( + compatibilityRaw !== "openai" && + compatibilityRaw !== "openai-responses" && + compatibilityRaw !== "anthropic" + ) { throw new CustomApiError( "invalid_compatibility", - 'Invalid --custom-compatibility (use "openai" or "anthropic").', + 'Invalid --custom-compatibility (use "openai", "openai-responses", or "anthropic").', ); } return compatibilityRaw; @@ -525,10 +536,14 @@ export function applyCustomApiConfig(params: ApplyCustomApiConfigParams): Custom throw new CustomApiError("invalid_base_url", "Custom provider base URL must be a valid URL."); } - if (params.compatibility !== "openai" && params.compatibility !== "anthropic") { + if ( + params.compatibility !== "openai" && + params.compatibility !== "openai-responses" && + params.compatibility !== "anthropic" + ) { throw new CustomApiError( "invalid_compatibility", - 'Custom provider compatibility must be "openai" or "anthropic".', + 'Custom provider compatibility must be "openai", "openai-responses", or "anthropic".', ); } diff --git a/src/commands/onboard-custom.test.ts b/src/commands/onboard-custom.test.ts index 45f0efc65971..77be7319e6c6 100644 --- a/src/commands/onboard-custom.test.ts +++ b/src/commands/onboard-custom.test.ts @@ -56,6 +56,7 @@ function stubFetchSequence( fetchMock.mockResolvedValueOnce({ ok: response.ok, status: response.status, + headers: new Headers({ "content-type": "application/json; charset=utf-8" }), json: async () => ({}), }); } @@ -106,6 +107,24 @@ describe("promptCustomApiConfig", () => { expect(prompter.confirm).not.toHaveBeenCalled(); }); + it("handles explicit OpenAI Responses flow", async () => { + const prompter = createTestPrompter({ + text: ["https://proxy.example.com/v1", "test-key", "gpt-5.4", "custom", ""], + select: ["plaintext", "openai-responses"], + }); + const fetchMock = stubFetchSequence([{ ok: true }]); + + const result = await runPromptCustomApi(prompter); + + expect(result.config.models?.providers?.custom?.api).toBe("openai-responses"); + expect(fetchMock.mock.calls[0]?.[0]).toBe("https://proxy.example.com/v1/responses"); + expect(JSON.parse(String(fetchMock.mock.calls[0]?.[1]?.body))).toMatchObject({ + model: "gpt-5.4", + input: "Hi", + max_output_tokens: 16, + }); + }); + it("skips the image-input prompt for known custom vision models", async () => { const prompter = createTestPrompter({ text: ["https://proxy.example.com/v1", "test-key", "gpt-4o", "custom", ""], @@ -163,6 +182,44 @@ describe("promptCustomApiConfig", () => { expect(prompter.select).toHaveBeenCalledTimes(3); }); + it("rejects successful-looking HTML verification responses with a base URL hint", async () => { + const prompter = createTestPrompter({ + text: [ + "https://proxy.example.com", + "test-key", + "bad-model", + "https://proxy.example.com/v1", + "test-key", + "custom", + "", + ], + select: ["plaintext", "openai", "baseUrl", "plaintext"], + }); + const fetchMock = vi + .fn() + .mockResolvedValueOnce({ + ok: true, + status: 200, + headers: new Headers({ "content-type": "text/html; charset=utf-8" }), + text: async () => "not the API", + }) + .mockResolvedValueOnce({ + ok: true, + status: 200, + headers: new Headers({ "content-type": "application/json" }), + json: async () => ({}), + }); + vi.stubGlobal("fetch", fetchMock); + + await runPromptCustomApi(prompter); + + expect(prompter.progress.mock.results[0]?.value.stop).toHaveBeenCalledWith( + expect.stringContaining("usually need a /v1 path prefix"), + ); + expect(fetchMock.mock.calls[0]?.[0]).toBe("https://proxy.example.com/chat/completions"); + expect(fetchMock.mock.calls[1]?.[0]).toBe("https://proxy.example.com/v1/chat/completions"); + }); + it("detects openai compatibility when unknown", async () => { const prompter = createTestPrompter({ text: ["https://example.com/v1", "test-key", "detected-model", "custom", "alias"], @@ -174,6 +231,22 @@ describe("promptCustomApiConfig", () => { expectOpenAiCompatResult({ prompter, textCalls: 5, selectCalls: 2, result }); }); + it("detects OpenAI Responses compatibility when chat completions fail", async () => { + const prompter = createTestPrompter({ + text: ["https://example.com/v1", "test-key", "detected-model", "custom", "alias"], + select: ["plaintext", "unknown"], + }); + const fetchMock = stubFetchSequence([{ ok: false, status: 503 }, { ok: true }]); + + const result = await runPromptCustomApi(prompter); + + expect(result.config.models?.providers?.custom?.api).toBe("openai-responses"); + expect(fetchMock.mock.calls[0]?.[0]).toBe("https://example.com/v1/chat/completions"); + expect(fetchMock.mock.calls[1]?.[0]).toBe("https://example.com/v1/responses"); + expect(prompter.text).toHaveBeenCalledTimes(5); + expect(prompter.select).toHaveBeenCalledTimes(2); + }); + it("re-prompts base url when unknown detection fails", async () => { const prompter = createTestPrompter({ text: [ @@ -187,11 +260,16 @@ describe("promptCustomApiConfig", () => { ], select: ["plaintext", "unknown", "baseUrl", "plaintext"], }); - stubFetchSequence([{ ok: false, status: 404 }, { ok: false, status: 404 }, { ok: true }]); + stubFetchSequence([ + { ok: false, status: 404 }, + { ok: false, status: 404 }, + { ok: false, status: 404 }, + { ok: true }, + ]); await runPromptCustomApi(prompter); expect(prompter.note).toHaveBeenCalledWith( - "This endpoint did not respond to OpenAI or Anthropic style requests.", + "This endpoint did not respond to OpenAI Chat, OpenAI Responses, or Anthropic style requests.", "Endpoint detection", ); }); diff --git a/src/commands/onboard-custom.ts b/src/commands/onboard-custom.ts index 8b599415cc05..70596bb0398d 100644 --- a/src/commands/onboard-custom.ts +++ b/src/commands/onboard-custom.ts @@ -54,6 +54,11 @@ const COMPATIBILITY_OPTIONS: Array<{ labelKey: "wizard.customProvider.compatibilityOpenAi", hintKey: "wizard.customProvider.compatibilityOpenAiHint", }, + { + value: "openai-responses", + labelKey: "wizard.customProvider.compatibilityOpenAiResponses", + hintKey: "wizard.customProvider.compatibilityOpenAiResponsesHint", + }, { value: "anthropic", labelKey: "wizard.customProvider.compatibilityAnthropic", @@ -89,6 +94,18 @@ type VerificationResult = { error?: unknown; }; +function isJsonVerificationResponse(res: Response): boolean { + const contentType = + typeof res.headers?.get === "function" ? (res.headers.get("content-type") ?? "") : ""; + if (!contentType.trim()) { + return true; + } + const mediaType = contentType.split(";", 1)[0]?.trim().toLowerCase(); + return ( + mediaType === "application/json" || (mediaType !== undefined && mediaType.endsWith("+json")) + ); +} + async function requestVerification(params: { endpoint: string; headers: Record; @@ -107,6 +124,13 @@ async function requestVerification(params: { }, VERIFY_TIMEOUT_MS, ); + if (res.ok && !isJsonVerificationResponse(res)) { + const contentType = res.headers.get("content-type") || "missing content-type"; + return { + ok: false, + error: `Verification returned ${contentType} instead of JSON. Check the provider base URL; OpenAI-compatible endpoints usually need a /v1 path prefix.`, + }; + } return { ok: res.ok, status: res.status }; } catch (error) { return { ok: false, error }; @@ -117,6 +141,7 @@ async function requestOpenAiVerification(params: { baseUrl: string; apiKey: string; modelId: string; + responsesApi?: boolean; }): Promise { return await requestVerification(buildOpenAiVerificationProbeRequest(params)); } @@ -259,30 +284,42 @@ export async function promptCustomApiConfig(params: { compatibility = "openai"; verifiedFromProbe = true; } else { - const anthropicProbe = await requestAnthropicVerification({ + const openaiResponsesProbe = await requestOpenAiVerification({ baseUrl, apiKey: resolvedApiKey, modelId, + responsesApi: true, }); - if (anthropicProbe.ok) { - probeSpinner.stop(t("wizard.customProvider.detectedAnthropic")); - compatibility = "anthropic"; + if (openaiResponsesProbe.ok) { + probeSpinner.stop(t("wizard.customProvider.detectedOpenAiResponses")); + compatibility = "openai-responses"; verifiedFromProbe = true; } else { - probeSpinner.stop(t("wizard.customProvider.detectionFailed")); - await prompter.note( - t("wizard.customProvider.detectionFailedNote"), - t("wizard.customProvider.detectionNoteTitle"), - ); - const retryChoice = await promptCustomApiRetryChoice(prompter); - ({ baseUrl, apiKey, resolvedApiKey, modelId } = await applyCustomApiRetryChoice({ - prompter, - config, - secretInputMode: params.secretInputMode, - retryChoice, - current: { baseUrl, apiKey, resolvedApiKey, modelId }, - })); - continue; + const anthropicProbe = await requestAnthropicVerification({ + baseUrl, + apiKey: resolvedApiKey, + modelId, + }); + if (anthropicProbe.ok) { + probeSpinner.stop(t("wizard.customProvider.detectedAnthropic")); + compatibility = "anthropic"; + verifiedFromProbe = true; + } else { + probeSpinner.stop(t("wizard.customProvider.detectionFailed")); + await prompter.note( + t("wizard.customProvider.detectionFailedNote"), + t("wizard.customProvider.detectionNoteTitle"), + ); + const retryChoice = await promptCustomApiRetryChoice(prompter); + ({ baseUrl, apiKey, resolvedApiKey, modelId } = await applyCustomApiRetryChoice({ + prompter, + config, + secretInputMode: params.secretInputMode, + retryChoice, + current: { baseUrl, apiKey, resolvedApiKey, modelId }, + })); + continue; + } } } } @@ -295,21 +332,26 @@ export async function promptCustomApiConfig(params: { const result = compatibility === "anthropic" ? await requestAnthropicVerification({ baseUrl, apiKey: resolvedApiKey, modelId }) - : await requestOpenAiVerification({ baseUrl, apiKey: resolvedApiKey, modelId }); + : await requestOpenAiVerification({ + baseUrl, + apiKey: resolvedApiKey, + modelId, + responsesApi: compatibility === "openai-responses", + }); if (result.ok) { verifySpinner.stop(t("wizard.customProvider.verificationSuccessful")); break; } - if (result.status !== undefined) { - verifySpinner.stop( - t("wizard.customProvider.verificationFailedStatus", { status: result.status }), - ); - } else { + if (result.error !== undefined) { verifySpinner.stop( t("wizard.customProvider.verificationFailedError", { error: formatVerificationError(result.error), }), ); + } else { + verifySpinner.stop( + t("wizard.customProvider.verificationFailedStatus", { status: result.status }), + ); } const retryChoice = await promptCustomApiRetryChoice(prompter); ({ baseUrl, apiKey, resolvedApiKey, modelId } = await applyCustomApiRetryChoice({ diff --git a/src/commands/onboard-non-interactive/local/auth-choice.test.ts b/src/commands/onboard-non-interactive/local/auth-choice.test.ts index 366d7c8f7394..62b34d2ee820 100644 --- a/src/commands/onboard-non-interactive/local/auth-choice.test.ts +++ b/src/commands/onboard-non-interactive/local/auth-choice.test.ts @@ -142,6 +142,26 @@ describe("applyNonInteractiveAuthChoice", () => { expect(apiKeyParams?.secretInputMode).toBe("ref"); }); + it("stores custom provider OpenAI Responses compatibility", async () => { + const runtime = createRuntime(); + const nextConfig = { agents: { defaults: {} } } as OpenClawConfig; + resolveNonInteractiveApiKey.mockResolvedValueOnce(undefined); + + const result = await applyNonInteractiveAuthChoice({ + nextConfig, + authChoice: "custom-api-key", + opts: { + customBaseUrl: "https://models.custom.local/v1", + customModelId: "gpt-5.4", + customCompatibility: "openai-responses", + } as never, + runtime: runtime as never, + baseConfig: nextConfig, + }); + + expect(result?.models?.providers?.["custom-models-custom-local"]?.api).toBe("openai-responses"); + }); + it("marks non-interactive custom provider models as image-capable when requested", async () => { const runtime = createRuntime(); const nextConfig = { agents: { defaults: {} } } as OpenClawConfig; diff --git a/src/commands/onboard-types.ts b/src/commands/onboard-types.ts index de3e8f9da86b..cd92885532cc 100644 --- a/src/commands/onboard-types.ts +++ b/src/commands/onboard-types.ts @@ -59,7 +59,7 @@ export type OnboardOptions = OnboardDynamicProviderOptions & { lmstudioApiKey?: string; customModelId?: string; customProviderId?: string; - customCompatibility?: "openai" | "anthropic"; + customCompatibility?: "openai" | "openai-responses" | "anthropic"; customImageInput?: boolean; gatewayPort?: number; gatewayBind?: GatewayBind; diff --git a/src/gateway/server-methods/node-child-process.test-support.ts b/src/gateway/server-methods/node-child-process.test-support.ts index e86212abccff..a06b95e93b37 100644 --- a/src/gateway/server-methods/node-child-process.test-support.ts +++ b/src/gateway/server-methods/node-child-process.test-support.ts @@ -1,9 +1,9 @@ import { vi } from "vitest"; +import { mockNodeBuiltinModule } from "../../plugin-sdk/test-helpers/node-builtin-mocks.js"; export async function mockNodeChildProcessModule( overrides: Partial, ) { - const { mockNodeBuiltinModule } = await import("openclaw/plugin-sdk/test-node-mocks"); return mockNodeBuiltinModule( () => vi.importActual("node:child_process"), overrides, diff --git a/src/gateway/sessions-patch.test.ts b/src/gateway/sessions-patch.test.ts index 22d9e9955fd2..0c600b338598 100644 --- a/src/gateway/sessions-patch.test.ts +++ b/src/gateway/sessions-patch.test.ts @@ -479,11 +479,13 @@ describe("gateway sessions patch", () => { expect(entry.liveModelSwitchPending).toBe(true); }); - test("marks model reset patches as pending live model switches", async () => { + test("clears pending live model switches for model reset patches", async () => { const store = mainStoreEntry({ sessionId: "sess-live-reset", providerOverride: "anthropic", modelOverride: ANTHROPIC_SONNET_ID, + modelOverrideSource: "user", + liveModelSwitchPending: true, }); const entry = await applyMainModelPatch({ store, @@ -492,7 +494,8 @@ describe("gateway sessions patch", () => { }); expectModelSelection(entry, undefined, undefined); - expect(entry.liveModelSwitchPending).toBe(true); + expect(entry.modelOverrideSource).toBeUndefined(); + expect(entry.liveModelSwitchPending).toBeUndefined(); }); test.each([ diff --git a/src/gateway/sessions-patch.ts b/src/gateway/sessions-patch.ts index ab34ab3e6e5e..202450f953dd 100644 --- a/src/gateway/sessions-patch.ts +++ b/src/gateway/sessions-patch.ts @@ -530,8 +530,8 @@ export async function applySessionsPatchToStore(params: { entry: next, provider: resolvedDefault.provider, }), - markLiveSwitchPending: true, }); + delete next.liveModelSwitchPending; } else if (raw !== undefined) { const trimmed = normalizeOptionalString(raw) ?? ""; if (!trimmed) { diff --git a/src/gateway/tool-resolution.exclude.test.ts b/src/gateway/tool-resolution.exclude.test.ts new file mode 100644 index 000000000000..6199f1d29bcd --- /dev/null +++ b/src/gateway/tool-resolution.exclude.test.ts @@ -0,0 +1,79 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import type { OpenClawConfig } from "../config/types.openclaw.js"; + +type CreateOpenClawToolsArg = { + inheritedToolDenylist?: string[]; + pluginToolDenylist?: string[]; +}; + +const hoisted = vi.hoisted(() => ({ + createOpenClawToolsMock: vi.fn((_args: CreateOpenClawToolsArg) => [ + { + name: "read", + description: "Read files", + parameters: { type: "object", properties: {} }, + execute: vi.fn(), + }, + { + name: "sessions_spawn", + description: "Spawn sessions", + parameters: { type: "object", properties: {} }, + execute: vi.fn(), + }, + ]), +})); + +vi.mock("../agents/openclaw-tools.js", () => ({ + createOpenClawTools: (args: CreateOpenClawToolsArg) => hoisted.createOpenClawToolsMock(args), +})); + +import { resolveGatewayScopedTools } from "./tool-resolution.js"; + +describe("resolveGatewayScopedTools excludeToolNames", () => { + beforeEach(() => { + hoisted.createOpenClawToolsMock.mockClear(); + }); + + function readCreateToolsArgs(): { + inheritedToolDenylist?: string[]; + pluginToolDenylist?: string[]; + } { + const args = hoisted.createOpenClawToolsMock.mock.calls[0]?.[0]; + if (!args || typeof args !== "object") { + throw new Error("expected createOpenClawTools args"); + } + return args as { + inheritedToolDenylist?: string[]; + pluginToolDenylist?: string[]; + }; + } + + it("filters loopback dedup exclusions without inheriting policy denies", () => { + const result = resolveGatewayScopedTools({ + cfg: {} as OpenClawConfig, + sessionKey: "agent:main:direct:test", + surface: "loopback", + excludeToolNames: ["read", "apply_patch"], + }); + + expect(result.tools.map((tool) => tool.name)).toEqual(["sessions_spawn"]); + const args = readCreateToolsArgs(); + expect(args.pluginToolDenylist).toEqual([]); + expect(args.inheritedToolDenylist).toEqual([]); + }); + + it("keeps real gateway deny policy inheritable while excluding native dedup tools", () => { + resolveGatewayScopedTools({ + cfg: { + gateway: { tools: { deny: ["exec"] } }, + } as OpenClawConfig, + sessionKey: "agent:main:direct:test", + surface: "loopback", + excludeToolNames: ["read", "apply_patch"], + }); + + const args = readCreateToolsArgs(); + expect(args.pluginToolDenylist).toEqual(["exec"]); + expect(args.inheritedToolDenylist).toEqual(["exec"]); + }); +}); diff --git a/src/gateway/tool-resolution.ts b/src/gateway/tool-resolution.ts index 1068e4717671..af772346d952 100644 --- a/src/gateway/tool-resolution.ts +++ b/src/gateway/tool-resolution.ts @@ -126,7 +126,6 @@ export function resolveGatewayScopedTools(params: { inheritedToolPolicy, defaultGatewayDeny.length > 0 ? { deny: defaultGatewayDeny } : undefined, Array.isArray(gatewayToolsCfg?.deny) ? { deny: gatewayToolsCfg.deny } : undefined, - excludedToolNames.length > 0 ? { deny: excludedToolNames } : undefined, ]); const inheritedToolDenylist = [...explicitDenylist]; // Passed by reference to sessions_spawn and populated after the final policy diff --git a/src/llm/utils/json-parse.test.ts b/src/llm/utils/json-parse.test.ts index 566b031f2463..e5a6eb17342f 100644 --- a/src/llm/utils/json-parse.test.ts +++ b/src/llm/utils/json-parse.test.ts @@ -13,6 +13,23 @@ describe("json-parse repairJson invalid \\u escapes", () => { expect(parseJsonWithRepair('{"e":"\\u0041"}')).toEqual({ e: "A" }); }); + it.each([ + ['{"path":"C:\\bin\\app.exe"}', "C:\\bin\\app.exe"], + ['{"path":"C:\\temp\\x"}', "C:\\temp\\x"], + ['{"path":"C:\\new\\file"}', "C:\\new\\file"], + ['{"path":"D:\\reports\\q"}', "D:\\reports\\q"], + ['{"path":"C:\\users\\bob"}', "C:\\users\\bob"], + ])("preserves unescaped Windows path control-letter segments: %s", (input, expected) => { + expect(parseStreamingJson(input)).toEqual({ path: expected }); + expect(parseJsonWithRepair(input)).toEqual({ path: expected }); + }); + + it("preserves legitimate JSON control escapes outside Windows paths", () => { + expect(parseJsonWithRepair('{"message":"line\\nnext\\ttabbed"}')).toEqual({ + message: "line\nnext\ttabbed", + }); + }); + it("recovers streaming tool-call arguments instead of dropping them to {}", () => { // LaTeX-style \u (\underline) is a valid string value the model may emit in args. const args = '{"cmd":"\\underline{x}"}'; diff --git a/src/llm/utils/json-parse.ts b/src/llm/utils/json-parse.ts index 8ab7a61e346a..bd1c016f379e 100644 --- a/src/llm/utils/json-parse.ts +++ b/src/llm/utils/json-parse.ts @@ -1,6 +1,7 @@ import { parse as partialParse } from "partial-json"; const VALID_JSON_ESCAPES = new Set(['"', "\\", "/", "b", "f", "n", "r", "t", "u"]); +const JSON_CONTROL_ESCAPES = new Set(["b", "f", "n", "r", "t"]); function isControlCharacter(char: string): boolean { const codePoint = char.codePointAt(0); @@ -32,6 +33,7 @@ function escapeControlCharacter(char: string): string { export function repairJson(json: string): string { let repaired = ""; let inString = false; + let stringValuePrefix = ""; for (let index = 0; index < json.length; index++) { const char = json[index]; @@ -40,6 +42,7 @@ export function repairJson(json: string): string { repaired += char; if (char === '"') { inString = true; + stringValuePrefix = ""; } continue; } @@ -47,6 +50,7 @@ export function repairJson(json: string): string { if (char === '"') { repaired += char; inString = false; + stringValuePrefix = ""; continue; } @@ -61,6 +65,7 @@ export function repairJson(json: string): string { const unicodeDigits = json.slice(index + 2, index + 6); if (/^[0-9a-fA-F]{4}$/.test(unicodeDigits)) { repaired += `\\u${unicodeDigits}`; + stringValuePrefix += `\\u${unicodeDigits}`; index += 5; continue; } @@ -69,35 +74,46 @@ export function repairJson(json: string): string { // hit the valid-escape branch (VALID_JSON_ESCAPES contains "u") and // re-emit the broken \u, leaving the JSON unparseable. repaired += "\\\\"; + stringValuePrefix += "\\"; + continue; + } + + if (JSON_CONTROL_ESCAPES.has(nextChar) && looksLikeWindowsPathPrefix(stringValuePrefix)) { + repaired += "\\\\"; + stringValuePrefix += "\\"; continue; } if (VALID_JSON_ESCAPES.has(nextChar)) { repaired += `\\${nextChar}`; + stringValuePrefix += nextChar === "\\" ? "\\" : `\\${nextChar}`; index += 1; continue; } repaired += "\\\\"; + stringValuePrefix += "\\"; continue; } repaired += isControlCharacter(char) ? escapeControlCharacter(char) : char; + stringValuePrefix += char; } return repaired; } export function parseJsonWithRepair(json: string): unknown { - try { - return JSON.parse(json) as unknown; - } catch (error) { - const repairedJson = repairJson(json); - if (repairedJson !== json) { - return JSON.parse(repairedJson) as unknown; - } - throw error; + const repairedJson = repairJson(json); + if (repairedJson !== json) { + return JSON.parse(repairedJson) as unknown; } + return JSON.parse(json) as unknown; +} + +function looksLikeWindowsPathPrefix(prefix: string): boolean { + const tail = prefix.slice(-160); + return /(?:^|[^A-Za-z0-9])[A-Za-z]:(?:[\\/][^"\\/:*?<>|\r\n]*)*$/.test(tail); } /** diff --git a/src/status/status-message.ts b/src/status/status-message.ts index 176e76ffc058..aa1fbb518b38 100644 --- a/src/status/status-message.ts +++ b/src/status/status-message.ts @@ -997,7 +997,7 @@ export function buildStatusMessage(args: StatusArgs): string { const configuredFallbacks = (() => { const modelConfig = args.agent?.model; if (typeof modelConfig === "object" && modelConfig && Array.isArray(modelConfig.fallbacks)) { - return modelConfig.fallbacks; + return sessionHasPersistedModelSelection ? undefined : modelConfig.fallbacks; } return undefined; })(); diff --git a/src/wizard/i18n/locales/en.ts b/src/wizard/i18n/locales/en.ts index 4988f88de963..85096d7097f4 100644 --- a/src/wizard/i18n/locales/en.ts +++ b/src/wizard/i18n/locales/en.ts @@ -29,14 +29,18 @@ export const en = { compatibilityAnthropicHint: "Uses /messages", compatibilityOpenAi: "OpenAI-compatible", compatibilityOpenAiHint: "Uses /chat/completions", + compatibilityOpenAiResponses: "OpenAI Responses-compatible", + compatibilityOpenAiResponsesHint: "Uses /responses", compatibilityUnknown: "Unknown (detect automatically)", - compatibilityUnknownHint: "Probes OpenAI then Anthropic endpoints", + compatibilityUnknownHint: "Probes OpenAI Chat, OpenAI Responses, then Anthropic endpoints", detectionFailed: "Could not detect endpoint type.", - detectionFailedNote: "This endpoint did not respond to OpenAI or Anthropic style requests.", + detectionFailedNote: + "This endpoint did not respond to OpenAI Chat, OpenAI Responses, or Anthropic style requests.", detectionNoteTitle: "Endpoint detection", detectionProgress: "Detecting endpoint type...", detectedAnthropic: "Detected Anthropic-compatible endpoint.", detectedOpenAi: "Detected OpenAI-compatible endpoint.", + detectedOpenAiResponses: "Detected OpenAI Responses-compatible endpoint.", endpointId: "Endpoint ID", endpointIdRequired: "Endpoint ID is required.", endpointIdRenamed: diff --git a/src/wizard/i18n/locales/zh-CN.ts b/src/wizard/i18n/locales/zh-CN.ts index e5167ddd7236..46242a84a45f 100644 --- a/src/wizard/i18n/locales/zh-CN.ts +++ b/src/wizard/i18n/locales/zh-CN.ts @@ -29,14 +29,18 @@ export const zh_CN = { compatibilityAnthropicHint: "使用 /messages", compatibilityOpenAi: "兼容 OpenAI", compatibilityOpenAiHint: "使用 /chat/completions", + compatibilityOpenAiResponses: "兼容 OpenAI Responses", + compatibilityOpenAiResponsesHint: "使用 /responses", compatibilityUnknown: "未知(自动检测)", - compatibilityUnknownHint: "先探测 OpenAI,再探测 Anthropic 端点", + compatibilityUnknownHint: "先探测 OpenAI Chat、OpenAI Responses,再探测 Anthropic 端点", detectionFailed: "无法检测端点类型。", - detectionFailedNote: "这个端点没有响应 OpenAI 或 Anthropic 风格的请求。", + detectionFailedNote: + "这个端点没有响应 OpenAI Chat、OpenAI Responses 或 Anthropic 风格的请求。", detectionNoteTitle: "端点检测", detectionProgress: "正在检测端点类型...", detectedAnthropic: "检测到兼容 Anthropic 的端点。", detectedOpenAi: "检测到兼容 OpenAI 的端点。", + detectedOpenAiResponses: "检测到兼容 OpenAI Responses 的端点。", endpointId: "端点 ID", endpointIdRequired: "端点 ID 必填。", endpointIdRenamed: '端点 ID "{from}" 已用于不同的基础 URL。将使用 "{to}"。', diff --git a/src/wizard/i18n/locales/zh-TW.ts b/src/wizard/i18n/locales/zh-TW.ts index 91c9231dcd37..aa68de4e818e 100644 --- a/src/wizard/i18n/locales/zh-TW.ts +++ b/src/wizard/i18n/locales/zh-TW.ts @@ -29,14 +29,18 @@ export const zh_TW = { compatibilityAnthropicHint: "使用 /messages", compatibilityOpenAi: "相容 OpenAI", compatibilityOpenAiHint: "使用 /chat/completions", + compatibilityOpenAiResponses: "相容 OpenAI Responses", + compatibilityOpenAiResponsesHint: "使用 /responses", compatibilityUnknown: "未知(自動偵測)", - compatibilityUnknownHint: "先探測 OpenAI,再探測 Anthropic 端點", + compatibilityUnknownHint: "先探測 OpenAI Chat、OpenAI Responses,再探測 Anthropic 端點", detectionFailed: "無法偵測端點類型。", - detectionFailedNote: "這個端點沒有回應 OpenAI 或 Anthropic 風格的請求。", + detectionFailedNote: + "這個端點沒有回應 OpenAI Chat、OpenAI Responses 或 Anthropic 風格的請求。", detectionNoteTitle: "端點偵測", detectionProgress: "正在偵測端點類型...", detectedAnthropic: "偵測到相容 Anthropic 的端點。", detectedOpenAi: "偵測到相容 OpenAI 的端點。", + detectedOpenAiResponses: "偵測到相容 OpenAI Responses 的端點。", endpointId: "端點 ID", endpointIdRequired: "端點 ID 必填。", endpointIdRenamed: '端點 ID "{from}" 已用於不同的基礎 URL。將使用 "{to}"。',