fix(ollama): suppress disabled reasoning output

This commit is contained in:
Peter Steinberger
2026-06-01 01:01:49 +01:00
parent 27dde7a4d6
commit 7562afdca3
4 changed files with 110 additions and 7 deletions

View File

@@ -946,6 +946,29 @@ describe("buildAssistantMessage", () => {
expect(result.content).toEqual([{ type: "thinking", thinking: "Reasoning output" }]);
});
it("drops provider-returned thinking for non-reasoning models", () => {
const response = {
model: "minimax-m2.7:cloud",
created_at: "2026-01-01T00:00:00Z",
message: {
role: "assistant" as const,
content: "",
thinking: "Thinking output",
},
done: true,
prompt_eval_count: 10,
eval_count: 6,
};
const result = buildAssistantMessage(response, {
...modelInfo,
id: "minimax-m2.7:cloud",
reasoning: false,
});
expect(result.stopReason).toBe("stop");
expect(result.content).toEqual([]);
expect(result.usage.output).toBe(6);
});
it("strips inline reasoning prefix from kimi cloud visible text", () => {
const response = {
model: "kimi-k2.6:cloud",
@@ -2717,12 +2740,37 @@ describe("createOllamaStreamFn", () => {
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","reasoning":"reasoned"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","reasoning":" output"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":2}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1}',
],
[{ type: "thinking", thinking: "reasoned output" }],
);
});
it("drops streamed reasoning chunks for non-reasoning models", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","reasoning":"reasoned"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","reasoning":" output"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":2}',
],
async () => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
model: { reasoning: false },
});
const events = await collectStreamEvents(stream);
const doneEvent = events.at(-1);
if (!doneEvent || doneEvent.type !== "done") {
throw new Error("Expected done event");
}
expect(doneEvent.message.content).toEqual([]);
expect(doneEvent.message.usage.output).toBeGreaterThan(0);
expect(events.some((event) => event.type === "thinking_delta")).toBe(false);
},
);
});
it("keeps streamed content after earlier reasoning chunks", async () => {
await expectDoneEventContent(
[

View File

@@ -510,6 +510,7 @@ type StreamModelDescriptor = {
api: string;
provider: string;
id: string;
reasoning?: boolean;
};
type OllamaUsageFallback = {
@@ -666,8 +667,12 @@ function estimateOllamaPromptTokens(params: {
return estimateTokensFromChars(chars);
}
function estimateOllamaCompletionTokens(response: OllamaChatResponse): number {
function estimateOllamaCompletionTokens(
response: OllamaChatResponse,
extraOutputChars = 0,
): number {
const chars =
extraOutputChars +
response.message.content.length +
(response.message.thinking?.length ?? 0) +
(response.message.reasoning?.length ?? 0) +
@@ -1021,7 +1026,10 @@ export function buildAssistantMessage(
options: OllamaAssistantMessageBuildOptions = {},
): AssistantMessage {
const content: (TextContent | ThinkingContent | ToolCall)[] = [];
const thinking = response.message.thinking ?? response.message.reasoning ?? "";
const thinking =
modelInfo.reasoning === false
? ""
: (response.message.thinking ?? response.message.reasoning ?? "");
if (thinking) {
content.push({ type: "thinking", thinking });
}
@@ -1206,10 +1214,17 @@ function createRawOllamaStreamFn(
let accumulatedRawContent = "";
let accumulatedVisibleContent = "";
let accumulatedThinking = "";
let suppressedThinking = "";
const accumulatedToolCalls: OllamaToolCall[] = [];
let finalResponse: OllamaChatResponse | undefined;
let pendingFinalVisibleContent: string | undefined;
const modelInfo = { api: model.api, provider: model.provider, id: model.id };
const modelInfo = {
api: model.api,
provider: model.provider,
id: model.id,
reasoning: model.reasoning,
};
const shouldEmitThinking = model.reasoning !== false;
const visibleContentSanitizer = createOllamaVisibleContentSanitizer(model.id);
const cooperativeScheduler = createOllamaStreamCooperativeScheduler(options?.signal);
let streamStarted = false;
@@ -1334,7 +1349,7 @@ function createRawOllamaStreamFn(
for await (const chunk of parseNdjsonStream(reader)) {
throwIfOllamaStreamAborted(options?.signal);
const thinkingDelta = chunk.message?.thinking ?? chunk.message?.reasoning;
if (thinkingDelta) {
if (thinkingDelta && shouldEmitThinking) {
if (!streamStarted) {
streamStarted = true;
const emptyPartial = buildStreamAssistantMessage({
@@ -1369,6 +1384,9 @@ function createRawOllamaStreamFn(
partial,
});
}
if (thinkingDelta && !shouldEmitThinking) {
suppressedThinking += thinkingDelta;
}
if (chunk.message?.content) {
const rawDelta = chunk.message.content;
@@ -1419,7 +1437,7 @@ function createRawOllamaStreamFn(
const usageFallback = {
input: estimateOllamaPromptTokens({ messages: ollamaMessages, tools: ollamaTools }),
output: estimateOllamaCompletionTokens(finalResponse),
output: estimateOllamaCompletionTokens(finalResponse, suppressedThinking.length),
};
const assistantMessage = buildAssistantMessage(finalResponse, modelInfo, usageFallback, {
...toolCallNameOptions,

View File

@@ -1736,6 +1736,29 @@ describe("runEmbeddedAgent incomplete-turn safety", () => {
expect(retryInstruction).toBe(EMPTY_RESPONSE_RETRY_INSTRUCTION);
});
it("retries empty Ollama stop turns when nonzero output tokens were generated", () => {
const retryInstruction = resolveEmptyResponseRetryInstruction({
provider: "ollama",
modelId: "minimax-m2.7:cloud",
payloadCount: 0,
aborted: false,
timedOut: false,
attempt: makeAttemptResult({
assistantTexts: [],
lastAssistant: {
role: "assistant",
stopReason: "stop",
provider: "ollama",
model: "minimax-m2.7:cloud",
content: [],
usage: { input: 100, output: 6, totalTokens: 106 },
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
}),
});
expect(retryInstruction).toBe(EMPTY_RESPONSE_RETRY_INSTRUCTION);
});
it("does not retry empty turns after an accepted sessions_spawn delivery", () => {
const retryInstruction = resolveEmptyResponseRetryInstruction({
provider: "ollama",

View File

@@ -1,3 +1,4 @@
import { asFiniteNumber } from "@openclaw/normalization-core/number-coercion";
import { normalizeLowercaseStringOrEmpty } from "@openclaw/normalization-core/string-coerce";
import { normalizeStringEntries } from "@openclaw/normalization-core/string-normalization";
import {
@@ -71,6 +72,18 @@ type PlanningOnlyAttempt = Pick<
| "toolMetas"
>;
function hasPositiveOutputTokenUsage(message: AgentMessage | null): boolean {
if (!message || typeof message !== "object") {
return false;
}
const usage = (message as { usage?: unknown }).usage;
if (!usage || typeof usage !== "object") {
return false;
}
const output = asFiniteNumber((usage as { output?: unknown }).output);
return output !== undefined && output > 0;
}
type SilentToolResultAttempt = Pick<
EmbeddedRunAttemptResult,
| "clientToolCalls"
@@ -660,7 +673,8 @@ export function resolveEmptyResponseRetryInstruction(params: {
assistant?.stopReason === "stop" &&
OLLAMA_INCOMPLETE_TURN_PROVIDER_ID_PATTERN.test(
normalizeLowercaseStringOrEmpty(params.provider ?? ""),
)
) &&
!hasPositiveOutputTokenUsage(assistant)
) {
return null;
}