mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-06 05:51:15 +08:00
fix(ollama): suppress disabled reasoning output
This commit is contained in:
@@ -946,6 +946,29 @@ describe("buildAssistantMessage", () => {
|
||||
expect(result.content).toEqual([{ type: "thinking", thinking: "Reasoning output" }]);
|
||||
});
|
||||
|
||||
it("drops provider-returned thinking for non-reasoning models", () => {
|
||||
const response = {
|
||||
model: "minimax-m2.7:cloud",
|
||||
created_at: "2026-01-01T00:00:00Z",
|
||||
message: {
|
||||
role: "assistant" as const,
|
||||
content: "",
|
||||
thinking: "Thinking output",
|
||||
},
|
||||
done: true,
|
||||
prompt_eval_count: 10,
|
||||
eval_count: 6,
|
||||
};
|
||||
const result = buildAssistantMessage(response, {
|
||||
...modelInfo,
|
||||
id: "minimax-m2.7:cloud",
|
||||
reasoning: false,
|
||||
});
|
||||
expect(result.stopReason).toBe("stop");
|
||||
expect(result.content).toEqual([]);
|
||||
expect(result.usage.output).toBe(6);
|
||||
});
|
||||
|
||||
it("strips inline reasoning prefix from kimi cloud visible text", () => {
|
||||
const response = {
|
||||
model: "kimi-k2.6:cloud",
|
||||
@@ -2717,12 +2740,37 @@ describe("createOllamaStreamFn", () => {
|
||||
[
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","reasoning":"reasoned"},"done":false}',
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","reasoning":" output"},"done":false}',
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":2}',
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1}',
|
||||
],
|
||||
[{ type: "thinking", thinking: "reasoned output" }],
|
||||
);
|
||||
});
|
||||
|
||||
it("drops streamed reasoning chunks for non-reasoning models", async () => {
|
||||
await withMockNdjsonFetch(
|
||||
[
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","reasoning":"reasoned"},"done":false}',
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","reasoning":" output"},"done":false}',
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":2}',
|
||||
],
|
||||
async () => {
|
||||
const stream = await createOllamaTestStream({
|
||||
baseUrl: "http://ollama-host:11434",
|
||||
model: { reasoning: false },
|
||||
});
|
||||
const events = await collectStreamEvents(stream);
|
||||
const doneEvent = events.at(-1);
|
||||
if (!doneEvent || doneEvent.type !== "done") {
|
||||
throw new Error("Expected done event");
|
||||
}
|
||||
|
||||
expect(doneEvent.message.content).toEqual([]);
|
||||
expect(doneEvent.message.usage.output).toBeGreaterThan(0);
|
||||
expect(events.some((event) => event.type === "thinking_delta")).toBe(false);
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it("keeps streamed content after earlier reasoning chunks", async () => {
|
||||
await expectDoneEventContent(
|
||||
[
|
||||
|
||||
@@ -510,6 +510,7 @@ type StreamModelDescriptor = {
|
||||
api: string;
|
||||
provider: string;
|
||||
id: string;
|
||||
reasoning?: boolean;
|
||||
};
|
||||
|
||||
type OllamaUsageFallback = {
|
||||
@@ -666,8 +667,12 @@ function estimateOllamaPromptTokens(params: {
|
||||
return estimateTokensFromChars(chars);
|
||||
}
|
||||
|
||||
function estimateOllamaCompletionTokens(response: OllamaChatResponse): number {
|
||||
function estimateOllamaCompletionTokens(
|
||||
response: OllamaChatResponse,
|
||||
extraOutputChars = 0,
|
||||
): number {
|
||||
const chars =
|
||||
extraOutputChars +
|
||||
response.message.content.length +
|
||||
(response.message.thinking?.length ?? 0) +
|
||||
(response.message.reasoning?.length ?? 0) +
|
||||
@@ -1021,7 +1026,10 @@ export function buildAssistantMessage(
|
||||
options: OllamaAssistantMessageBuildOptions = {},
|
||||
): AssistantMessage {
|
||||
const content: (TextContent | ThinkingContent | ToolCall)[] = [];
|
||||
const thinking = response.message.thinking ?? response.message.reasoning ?? "";
|
||||
const thinking =
|
||||
modelInfo.reasoning === false
|
||||
? ""
|
||||
: (response.message.thinking ?? response.message.reasoning ?? "");
|
||||
if (thinking) {
|
||||
content.push({ type: "thinking", thinking });
|
||||
}
|
||||
@@ -1206,10 +1214,17 @@ function createRawOllamaStreamFn(
|
||||
let accumulatedRawContent = "";
|
||||
let accumulatedVisibleContent = "";
|
||||
let accumulatedThinking = "";
|
||||
let suppressedThinking = "";
|
||||
const accumulatedToolCalls: OllamaToolCall[] = [];
|
||||
let finalResponse: OllamaChatResponse | undefined;
|
||||
let pendingFinalVisibleContent: string | undefined;
|
||||
const modelInfo = { api: model.api, provider: model.provider, id: model.id };
|
||||
const modelInfo = {
|
||||
api: model.api,
|
||||
provider: model.provider,
|
||||
id: model.id,
|
||||
reasoning: model.reasoning,
|
||||
};
|
||||
const shouldEmitThinking = model.reasoning !== false;
|
||||
const visibleContentSanitizer = createOllamaVisibleContentSanitizer(model.id);
|
||||
const cooperativeScheduler = createOllamaStreamCooperativeScheduler(options?.signal);
|
||||
let streamStarted = false;
|
||||
@@ -1334,7 +1349,7 @@ function createRawOllamaStreamFn(
|
||||
for await (const chunk of parseNdjsonStream(reader)) {
|
||||
throwIfOllamaStreamAborted(options?.signal);
|
||||
const thinkingDelta = chunk.message?.thinking ?? chunk.message?.reasoning;
|
||||
if (thinkingDelta) {
|
||||
if (thinkingDelta && shouldEmitThinking) {
|
||||
if (!streamStarted) {
|
||||
streamStarted = true;
|
||||
const emptyPartial = buildStreamAssistantMessage({
|
||||
@@ -1369,6 +1384,9 @@ function createRawOllamaStreamFn(
|
||||
partial,
|
||||
});
|
||||
}
|
||||
if (thinkingDelta && !shouldEmitThinking) {
|
||||
suppressedThinking += thinkingDelta;
|
||||
}
|
||||
|
||||
if (chunk.message?.content) {
|
||||
const rawDelta = chunk.message.content;
|
||||
@@ -1419,7 +1437,7 @@ function createRawOllamaStreamFn(
|
||||
|
||||
const usageFallback = {
|
||||
input: estimateOllamaPromptTokens({ messages: ollamaMessages, tools: ollamaTools }),
|
||||
output: estimateOllamaCompletionTokens(finalResponse),
|
||||
output: estimateOllamaCompletionTokens(finalResponse, suppressedThinking.length),
|
||||
};
|
||||
const assistantMessage = buildAssistantMessage(finalResponse, modelInfo, usageFallback, {
|
||||
...toolCallNameOptions,
|
||||
|
||||
@@ -1736,6 +1736,29 @@ describe("runEmbeddedAgent incomplete-turn safety", () => {
|
||||
expect(retryInstruction).toBe(EMPTY_RESPONSE_RETRY_INSTRUCTION);
|
||||
});
|
||||
|
||||
it("retries empty Ollama stop turns when nonzero output tokens were generated", () => {
|
||||
const retryInstruction = resolveEmptyResponseRetryInstruction({
|
||||
provider: "ollama",
|
||||
modelId: "minimax-m2.7:cloud",
|
||||
payloadCount: 0,
|
||||
aborted: false,
|
||||
timedOut: false,
|
||||
attempt: makeAttemptResult({
|
||||
assistantTexts: [],
|
||||
lastAssistant: {
|
||||
role: "assistant",
|
||||
stopReason: "stop",
|
||||
provider: "ollama",
|
||||
model: "minimax-m2.7:cloud",
|
||||
content: [],
|
||||
usage: { input: 100, output: 6, totalTokens: 106 },
|
||||
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
|
||||
}),
|
||||
});
|
||||
|
||||
expect(retryInstruction).toBe(EMPTY_RESPONSE_RETRY_INSTRUCTION);
|
||||
});
|
||||
|
||||
it("does not retry empty turns after an accepted sessions_spawn delivery", () => {
|
||||
const retryInstruction = resolveEmptyResponseRetryInstruction({
|
||||
provider: "ollama",
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { asFiniteNumber } from "@openclaw/normalization-core/number-coercion";
|
||||
import { normalizeLowercaseStringOrEmpty } from "@openclaw/normalization-core/string-coerce";
|
||||
import { normalizeStringEntries } from "@openclaw/normalization-core/string-normalization";
|
||||
import {
|
||||
@@ -71,6 +72,18 @@ type PlanningOnlyAttempt = Pick<
|
||||
| "toolMetas"
|
||||
>;
|
||||
|
||||
function hasPositiveOutputTokenUsage(message: AgentMessage | null): boolean {
|
||||
if (!message || typeof message !== "object") {
|
||||
return false;
|
||||
}
|
||||
const usage = (message as { usage?: unknown }).usage;
|
||||
if (!usage || typeof usage !== "object") {
|
||||
return false;
|
||||
}
|
||||
const output = asFiniteNumber((usage as { output?: unknown }).output);
|
||||
return output !== undefined && output > 0;
|
||||
}
|
||||
|
||||
type SilentToolResultAttempt = Pick<
|
||||
EmbeddedRunAttemptResult,
|
||||
| "clientToolCalls"
|
||||
@@ -660,7 +673,8 @@ export function resolveEmptyResponseRetryInstruction(params: {
|
||||
assistant?.stopReason === "stop" &&
|
||||
OLLAMA_INCOMPLETE_TURN_PROVIDER_ID_PATTERN.test(
|
||||
normalizeLowercaseStringOrEmpty(params.provider ?? ""),
|
||||
)
|
||||
) &&
|
||||
!hasPositiveOutputTokenUsage(assistant)
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user