Compare commits

...

4 Commits

Author SHA1 Message Date
Peter Steinberger
7685a75fb6 test(opencode): avoid forced tool choice in live replay 2026-05-28 16:15:03 +01:00
Peter Steinberger
9e977d1590 test(opencode): add live DeepSeek replay probe 2026-05-28 16:15:03 +01:00
Pluviobyte
2474911e4d fix(agents): avoid spread-rebuild when iterating allowlist candidates
oxlint flagged the [...candidates] spread as an unnecessary array copy. Use an explicit baseCount loop bound instead so we still iterate the original entries while pushing tier-stripped variants onto the same array.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-28 16:15:03 +01:00
Pluviobyte
2acd256c9d fix(agents): preserve reasoning_content replay across DeepSeek tier suffixes
OpenCode Zen exposes DeepSeek V4 as `deepseek-v4-flash-free`, which keeps the upstream DeepSeek thinking-mode contract that requires `reasoning_content` to be passed back on follow-up requests. The existing replay allowlist only matched the bare ids (`deepseek-v4-flash`, `kimi-k2-thinking`, ...), so the tier-suffixed id missed every candidate and the sanitizer stripped `reasoning_content` from the assistant turn. DeepSeek then rejected the second API call with HTTP 400 and the session deadlocked.

Strip the well-known tier suffixes (`-free`, `-paid`, `-trial`) when generating allowlist candidates so the base model id matches and the reasoning replay survives. Existing matching for prefixed / colon-suffixed routes is unchanged.

Fixes #87575

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-28 16:15:03 +01:00
3 changed files with 204 additions and 0 deletions

View File

@@ -0,0 +1,131 @@
import {
completeSimple,
type AssistantMessage,
type Model,
type Tool,
} from "openclaw/plugin-sdk/llm";
import { extractNonEmptyAssistantText, isLiveTestEnabled } from "openclaw/plugin-sdk/test-env";
import { Type } from "typebox";
import { describe, expect, it } from "vitest";
const OPENCODE_API_KEY =
process.env.OPENCODE_API_KEY?.trim() || process.env.OPENCODE_ZEN_API_KEY?.trim() || "";
const LIVE_MODEL_ID =
process.env.OPENCLAW_LIVE_OPENCODE_DEEPSEEK_MODEL?.trim() || "deepseek-v4-flash-free";
const LIVE = isLiveTestEnabled(["OPENCODE_LIVE_TEST"]) && OPENCODE_API_KEY.length > 0;
const describeLive = LIVE ? describe : describe.skip;
function resolveOpencodeDeepSeekLiveModel(): Model<"openai-completions"> {
return {
id: LIVE_MODEL_ID,
name: LIVE_MODEL_ID,
api: "openai-completions",
provider: "opencode",
baseUrl: "https://opencode.ai/zen/v1",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 65_536,
maxTokens: 8192,
};
}
function liveEchoTool(): Tool {
return {
name: "live_echo",
description: "Return the supplied value.",
parameters: Type.Object(
{
value: Type.String(),
},
{ additionalProperties: false },
),
};
}
function requireToolCall(message: AssistantMessage) {
const toolCall = message.content.find((block) => block.type === "toolCall");
if (toolCall?.type !== "toolCall") {
throw new Error(`OpenCode DeepSeek live model did not call a tool: ${message.stopReason}`);
}
return toolCall;
}
function hasReasoningContentReplay(message: AssistantMessage): boolean {
return message.content.some(
(block) => block.type === "thinking" && block.thinkingSignature === "reasoning_content",
);
}
describeLive("opencode plugin live", () => {
it("accepts DeepSeek V4 tier-suffixed thinking replay after a tool call", async () => {
const model = resolveOpencodeDeepSeekLiveModel();
const tool = liveEchoTool();
const firstOptions = {
apiKey: OPENCODE_API_KEY,
reasoning: "low",
maxTokens: 128,
} as const;
const first = await completeSimple(
model,
{
messages: [
{
role: "user",
content: "You must call the live_echo tool with value ok. Do not answer directly.",
timestamp: Date.now(),
},
],
tools: [tool],
},
firstOptions,
);
if (first.stopReason === "error") {
throw new Error(first.errorMessage || "OpenCode DeepSeek first turn returned an error");
}
const toolCall = requireToolCall(first);
expect(hasReasoningContentReplay(first)).toBe(true);
const second = await completeSimple(
model,
{
messages: [
{
role: "user",
content: "You must call the live_echo tool with value ok. Do not answer directly.",
timestamp: Date.now() - 3,
},
first,
{
role: "toolResult",
toolCallId: toolCall.id,
toolName: toolCall.name,
content: [{ type: "text", text: "ok" }],
isError: false,
timestamp: Date.now() - 1,
},
{
role: "user",
content: "Reply with exactly: ok",
timestamp: Date.now(),
},
],
tools: [tool],
},
{
apiKey: OPENCODE_API_KEY,
reasoning: "low",
maxTokens: 64,
},
);
if (second.stopReason === "error") {
throw new Error(second.errorMessage || "OpenCode DeepSeek replay returned an error");
}
expect(extractNonEmptyAssistantText(second.content)).toMatch(/^ok[.!]?$/i);
}, 120_000);
});

View File

@@ -8154,6 +8154,51 @@ describe("buildOpenAICompletionsParams sanitizes reasoning replay fields", () =>
expect(assistant.reasoning_content).toBe("Need to answer politely.");
});
// Regression for #87575: OpenCode Zen exposes DeepSeek V4 with a `-free`
// tier suffix that does not change the upstream replay contract. Without
// matching the base id we stripped reasoning_content from the follow-up
// request and DeepSeek rejected the assistant turn with HTTP 400.
it.each([
[
"OpenCode Zen DeepSeek V4 Flash Free",
{
id: "deepseek-v4-flash-free",
name: "DeepSeek V4 Flash Free",
api: "openai-completions" as const,
provider: "opencode",
baseUrl: "https://opencode.ai/zen/v1",
reasoning: true,
input: ["text"] as ("text" | "image")[],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 65_536,
maxTokens: 8192,
},
],
[
"OpenRouter MiMo V2 Pro Free",
{
...customMiMoProxyModel,
id: "xiaomi/mimo-v2-pro-free",
},
],
[
"OpenRouter Kimi K2 Thinking Free",
{
...customKimiProxyModel,
id: "moonshotai/kimi-k2-thinking-free",
},
],
] as const)("preserves reasoning_content replay despite the %s tier suffix", (_label, model) => {
const assistant = getAssistantMessage(
buildReplayParams(model as Model<"openai-completions">, "reasoning_content"),
);
expect(assistant.reasoning_content).toBe("Need to answer politely.");
expect(assistant).not.toHaveProperty("reasoning_details");
expect(assistant).not.toHaveProperty("reasoning");
expect(assistant).not.toHaveProperty("reasoning_text");
});
it("preserves OpenRouter array reasoning_details from tool-call signatures", () => {
const reasoningDetail = { type: "reasoning.encrypted", id: "rs_1", data: "ciphertext" };
const params = buildOpenAICompletionsParams(

View File

@@ -3398,6 +3398,23 @@ const REASONING_CONTENT_REPLAY_MODEL_IDS = new Set([
"mimo-v2.6-pro",
]);
// Tier/access suffixes that some providers append to otherwise identical model
// ids (OpenCode Zen exposes `deepseek-v4-flash-free`, OpenRouter exposes
// `:free` / `:cloud`, etc.). The base model id before the suffix still owns
// the same DeepSeek-style reasoning_content replay contract, so reasoning
// replay must not be stripped just because the catalog id grew a marketing
// suffix (#87575).
const REASONING_CONTENT_REPLAY_TIER_SUFFIXES = ["-free", "-paid", "-trial"] as const;
function stripReasoningContentReplayTierSuffix(modelId: string): string {
for (const suffix of REASONING_CONTENT_REPLAY_TIER_SUFFIXES) {
if (modelId.length > suffix.length && modelId.endsWith(suffix)) {
return modelId.slice(0, -suffix.length);
}
}
return modelId;
}
function getReasoningContentReplayModelIdCandidates(modelId: unknown): string[] {
if (typeof modelId !== "string") {
return [];
@@ -3413,6 +3430,17 @@ function getReasoningContentReplayModelIdCandidates(modelId: unknown): string[]
if (colonParts.length > 1) {
candidates.push(colonParts[0] ?? "", colonParts[colonParts.length - 1] ?? "");
}
const baseCount = candidates.length;
for (let index = 0; index < baseCount; index += 1) {
const candidate = candidates[index];
if (typeof candidate !== "string") {
continue;
}
const stripped = stripReasoningContentReplayTierSuffix(candidate);
if (stripped !== candidate) {
candidates.push(stripped);
}
}
return uniqueStrings(candidates.filter(Boolean));
}