fix(agents): detect unsigned thinking-only stall when reasoning payload inflates payloadCount

When a model (e.g. Qwen3 via llama.cpp) produces stopReason="stop" with
only an unsigned thinking block, the reasoning text is pushed into
replyItems as { isReasoning: true }, making payloadCount=1. The early-
return guard `payloadCount !== 0 && !toolUseTerminal` in
resolveIncompleteTurnPayloadText then returned null before any stall
detector ran, leaving the session with livenessState="working" and no
recovery path — a silent, permanent stall.

Add isUnsignedThinkingOnlyAssistantTurn helper and wire it into:
- resolveIncompleteTurnPayloadText: bypass the visible-text guard so
  stall detection fires below
- resolveReasoningOnlyRetryInstruction: retry unsigned thinking-only
  turns with REASONING_ONLY_RETRY_INSTRUCTION before surfacing error

Fixes #89787
This commit is contained in:
openperf
2026-06-03 22:38:23 +08:00
parent 8f6f2617ec
commit c613c3884f
2 changed files with 136 additions and 5 deletions

View File

@@ -1519,6 +1519,63 @@ describe("runEmbeddedAgent incomplete-turn safety", () => {
expect(incompleteTurnText).toBeNull();
});
it("surfaces stall on clean stop with only an unsigned thinking payload (payloadCount=1, no visible text)", () => {
// Regression: unsigned thinking payloads increment payloadCount but carry no
// user-visible content. The visible-text guard must not suppress incomplete-turn
// detection when the model produced only a thinking block and no answer. (#89787)
const incompleteTurnText = resolveIncompleteTurnPayloadText({
payloadCount: 1,
aborted: false,
timedOut: false,
attempt: makeAttemptResult({
assistantTexts: [],
lastAssistant: {
role: "assistant",
stopReason: "stop",
provider: "openai",
model: "qwen3.6-35b-a3b",
content: [
{
type: "thinking",
thinking: "let me plan the tool calls I need to make...",
// no signature — unsigned thinking block
},
],
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
}),
});
expect(incompleteTurnText).toContain("couldn't generate a response");
});
it("does not surface a stall when unsigned thinking accompanies visible text (payloadCount=1)", () => {
// When the model emits both a thinking block and a visible text answer, the turn
// succeeded and no stall should be surfaced even though thinking is unsigned.
const incompleteTurnText = resolveIncompleteTurnPayloadText({
payloadCount: 1,
aborted: false,
timedOut: false,
attempt: makeAttemptResult({
assistantTexts: ["Here is the answer to your question."],
lastAssistant: {
role: "assistant",
stopReason: "stop",
provider: "openai",
model: "qwen3.6-35b-a3b",
content: [
{
type: "thinking",
thinking: "let me answer this...",
},
{ type: "text", text: "Here is the answer to your question." },
],
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
}),
});
expect(incompleteTurnText).toBeNull();
});
it("surfaces an error for tool-use terminal turn with pre-tool text via runEmbeddedAgent (#76477)", async () => {
mockedClassifyFailoverReason.mockReturnValue(null);
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
@@ -1687,6 +1744,59 @@ describe("runEmbeddedAgent incomplete-turn safety", () => {
expect(retryInstruction).toBe(REASONING_ONLY_RETRY_INSTRUCTION);
});
it("retries unsigned thinking-only turns via the reasoning-only path (openai-completions)", () => {
const retryInstruction = resolveReasoningOnlyRetryInstruction({
provider: "openai",
modelId: "qwen3.6-35b-a3b",
modelApi: "openai-completions",
aborted: false,
timedOut: false,
attempt: makeAttemptResult({
assistantTexts: [],
lastAssistant: {
role: "assistant",
stopReason: "stop",
provider: "openai",
model: "qwen3.6-35b-a3b",
content: [
{
type: "thinking",
thinking: "let me plan the tool calls I need to make...",
},
],
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
}),
});
expect(retryInstruction).toBe(REASONING_ONLY_RETRY_INSTRUCTION);
});
it("retries unsigned thinking-only Ollama turns via the reasoning-only path", () => {
const retryInstruction = resolveReasoningOnlyRetryInstruction({
provider: "ollama",
modelId: "gemma4:31b",
aborted: false,
timedOut: false,
attempt: makeAttemptResult({
assistantTexts: [],
lastAssistant: {
role: "assistant",
stopReason: "end_turn",
provider: "ollama",
model: "gemma4:31b",
content: [
{
type: "thinking",
thinking: "internal reasoning",
},
],
} as unknown as EmbeddedRunAttemptResult["lastAssistant"],
}),
});
expect(retryInstruction).toBe(REASONING_ONLY_RETRY_INSTRUCTION);
});
it("retries unsigned-thinking Ollama turns via the empty-response path", () => {
const retryInstruction = resolveEmptyResponseRetryInstruction({
provider: "ollama",

View File

@@ -266,9 +266,17 @@ export function resolveIncompleteTurnPayloadText(params: {
// turn check in that case — the final post-tool response was never
// produced. (#76477)
const toolUseTerminal = params.attempt.lastAssistant?.stopReason === "toolUse";
const assistant = params.attempt.currentAttemptAssistant ?? params.attempt.lastAssistant;
// Unsigned thinking payloads count toward payloadCount but carry no user-visible
// content; bypass the visible-text guard when unsigned thinking was the only output
// so that incomplete-turn stall detection fires below. (#89787)
const unsignedThinkingOnlyTerminal =
params.payloadCount !== 0 &&
!joinAssistantTexts(params.attempt.assistantTexts).length &&
isUnsignedThinkingOnlyAssistantTurn(assistant);
if (
(params.payloadCount !== 0 && !toolUseTerminal) ||
(params.payloadCount !== 0 && !toolUseTerminal && !unsignedThinkingOnlyTerminal) ||
(params.aborted && params.externalAbort) ||
params.timedOut ||
params.attempt.clientToolCalls ||
@@ -300,9 +308,7 @@ export function resolveIncompleteTurnPayloadText(params: {
hasAssistantVisibleText: params.payloadCount > 0,
lastAssistant: params.attempt.lastAssistant,
});
const reasoningOnlyAssistant = isReasoningOnlyAssistantTurn(
params.attempt.currentAttemptAssistant ?? params.attempt.lastAssistant,
);
const reasoningOnlyAssistant = isReasoningOnlyAssistantTurn(assistant);
const emptyResponseAssistant = isEmptyResponseAssistantTurn({
payloadCount: params.payloadCount,
attempt: params.attempt,
@@ -310,6 +316,7 @@ export function resolveIncompleteTurnPayloadText(params: {
if (
!incompleteTerminalAssistant &&
!reasoningOnlyAssistant &&
!unsignedThinkingOnlyTerminal &&
!emptyResponseAssistant &&
stopReason !== "error"
) {
@@ -508,6 +515,20 @@ function isReasoningOnlyAssistantTurn(message: unknown): boolean {
return assessLastAssistantMessage(message as AgentMessage) === "incomplete-text";
}
// Unsigned thinking blocks have no cryptographic signature; assessLastAssistantMessage
// returns "incomplete-thinking" for them. Empty content also returns "incomplete-thinking",
// so the content.length > 0 guard is required to distinguish the two cases.
function isUnsignedThinkingOnlyAssistantTurn(message: unknown): boolean {
if (message == null || typeof message !== "object") {
return false;
}
const content = (message as { content?: unknown }).content;
if (!Array.isArray(content) || content.length === 0) {
return false;
}
return assessLastAssistantMessage(message as AgentMessage) === "incomplete-thinking";
}
function isEmptyResponseAssistantTurn(params: {
payloadCount: number;
attempt: Pick<
@@ -638,7 +659,7 @@ export function resolveReasoningOnlyRetryInstruction(params: {
if (assistant?.stopReason === "error") {
return null;
}
if (!isReasoningOnlyAssistantTurn(assistant)) {
if (!isReasoningOnlyAssistantTurn(assistant) && !isUnsignedThinkingOnlyAssistantTurn(assistant)) {
return null;
}