diff --git a/src/agents/embedded-agent-runner/run.before-agent-finalize.test.ts b/src/agents/embedded-agent-runner/run.before-agent-finalize.test.ts index b00adbf9d869..2142c2652f73 100644 --- a/src/agents/embedded-agent-runner/run.before-agent-finalize.test.ts +++ b/src/agents/embedded-agent-runner/run.before-agent-finalize.test.ts @@ -1,3 +1,4 @@ +// Coverage for before_agent_finalize revision handling in embedded runs. import { beforeAll, beforeEach, describe, expect, it } from "vitest"; import { makeAttemptResult } from "./run.overflow-compaction.fixture.js"; import { @@ -15,6 +16,8 @@ function finalAnswerAttempt( text: string, overrides?: Partial, ): EmbeddedRunAttemptResult { + // Finalize tests need a successful assistant turn with both surfaced text and + // snapshot content so the runner can decide whether to request a revision. return makeAttemptResult({ assistantTexts: [text], lastAssistant: { @@ -75,6 +78,8 @@ describe("runEmbeddedAgent before_agent_finalize", () => { }); it("turns a revise decision into one more hidden continuation", async () => { + // Revision prompts are hidden continuations; they must not persist the + // original user prompt a second time. mockedRunEmbeddedAttempt .mockResolvedValueOnce( finalAnswerAttempt("First answer.", { @@ -123,6 +128,8 @@ describe("runEmbeddedAgent before_agent_finalize", () => { }); it("does not retry finalize revisions after a timed-out attempt", async () => { + // A timed-out attempt may have partial assistant text, but asking for a + // finalize revision would replay an invalid or blocked provider turn. mockedRunEmbeddedAttempt.mockResolvedValueOnce( finalAnswerAttempt("Late answer.", { timedOut: true, diff --git a/src/agents/embedded-agent-runner/run.before-agent-reply-cron.test.ts b/src/agents/embedded-agent-runner/run.before-agent-reply-cron.test.ts index e51e12a1b210..c9fdb7bc56b8 100644 --- a/src/agents/embedded-agent-runner/run.before-agent-reply-cron.test.ts +++ b/src/agents/embedded-agent-runner/run.before-agent-reply-cron.test.ts @@ -1,3 +1,4 @@ +// Coverage for cron before_agent_reply hook handling before embedded attempts. import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest"; import { SILENT_REPLY_TOKEN } from "../../auto-reply/tokens.js"; import { makeAttemptResult } from "./run.overflow-compaction.fixture.js"; @@ -12,6 +13,8 @@ import { let runEmbeddedAgent: typeof import("./run.js").runEmbeddedAgent; function firstBeforeAgentReplyCall() { + // Helper keeps assertions on the hook payload and context close to the tests + // without leaking mock tuple details into every case. const call = mockedGlobalHookRunner.runBeforeAgentReply.mock.calls[0]; if (!call) { throw new Error("expected before_agent_reply hook call"); @@ -43,6 +46,8 @@ describe("runEmbeddedAgent cron before_agent_reply seam", () => { }); it("lets before_agent_reply claim cron runs before the embedded attempt starts", async () => { + // Cron hooks can fully handle maintenance prompts before the model is + // invoked, which avoids unnecessary prompt-cache and setup work. mockedGlobalHookRunner.hasHooks.mockImplementation( (hookName: string) => hookName === "before_agent_reply", ); @@ -134,6 +139,8 @@ describe("runEmbeddedAgent cron before_agent_reply seam", () => { }); it("forwards one-shot model-run flags into the embedded attempt", async () => { + // Model-run mode is request-scoped; it must pass through to the first + // attempt without becoming a persistent session setting. mockedRunEmbeddedAttempt.mockResolvedValueOnce(makeAttemptResult({ promptError: null })); await runEmbeddedAgent({ diff --git a/src/agents/embedded-agent-runner/run.codex-app-server-recovery.test.ts b/src/agents/embedded-agent-runner/run.codex-app-server-recovery.test.ts index c5d742181caf..91192f55049e 100644 --- a/src/agents/embedded-agent-runner/run.codex-app-server-recovery.test.ts +++ b/src/agents/embedded-agent-runner/run.codex-app-server-recovery.test.ts @@ -1,3 +1,4 @@ +// Coverage for replay-safe Codex app-server recovery retries. import { beforeAll, beforeEach, describe, expect, it } from "vitest"; import { makeModelFallbackCfg } from "../test-helpers/model-fallback-config-fixture.js"; import { makeAttemptResult } from "./run.overflow-compaction.fixture.js"; @@ -16,6 +17,8 @@ let runEmbeddedAgent: typeof import("./run.js").runEmbeddedAgent; function codexClientClosedAttempt( overrides: Partial = {}, ): EmbeddedRunAttemptResult { + // Stdio client-close failures can be replay-safe when Codex reports that the + // turn ended before completion and no user-visible side effect escaped. return makeAttemptResult({ assistantTexts: [], promptError: new Error("codex app-server client closed before turn completed"), @@ -34,6 +37,8 @@ function codexClientClosedAttempt( function codexTurnCompletionIdleTimeoutAttempt( overrides: Partial = {}, ): EmbeddedRunAttemptResult { + // Completion-watch idle timeouts are retried separately from progress timeouts + // because only the former indicates Codex may have lost the final event. return makeAttemptResult({ assistantTexts: [], aborted: true, @@ -106,6 +111,8 @@ describe("runEmbeddedAgent Codex app-server recovery", () => { }); it("suppresses duplicate user persistence when retrying after the inbound message was persisted", async () => { + // If the first attempt already persisted the inbound message, the retry must + // not mirror it again into the transcript. mockedRunEmbeddedAttempt .mockImplementationOnce(async (attemptParams) => { ( diff --git a/src/agents/embedded-agent-runner/run.codex-server-error-fallback.test.ts b/src/agents/embedded-agent-runner/run.codex-server-error-fallback.test.ts index ee3c68842706..5f4e4012cff1 100644 --- a/src/agents/embedded-agent-runner/run.codex-server-error-fallback.test.ts +++ b/src/agents/embedded-agent-runner/run.codex-server-error-fallback.test.ts @@ -1,3 +1,4 @@ +// Coverage for handing Codex server_error turns to model fallback. import { beforeAll, beforeEach, describe, expect, it } from "vitest"; import { makeAssistantMessageFixture } from "../test-helpers/assistant-message-fixtures.js"; import { makeModelFallbackCfg } from "../test-helpers/model-fallback-config-fixture.js"; @@ -27,6 +28,8 @@ describe("runEmbeddedAgent Codex server_error fallback handoff", () => { }); it("throws FailoverError for Codex server_error when model fallbacks are configured", async () => { + // Codex server_error is a provider failure, not a normal assistant reply; + // configured fallbacks should receive it through the failover path. const rawCodexError = 'Codex error: {"type":"error","error":{"type":"server_error","code":"server_error","message":"An error occurred while processing your request."},"sequence_number":2}'; diff --git a/src/agents/embedded-agent-runner/run.compaction-loop-guard.test.ts b/src/agents/embedded-agent-runner/run.compaction-loop-guard.test.ts index f632291f103d..8e5f442b038b 100644 --- a/src/agents/embedded-agent-runner/run.compaction-loop-guard.test.ts +++ b/src/agents/embedded-agent-runner/run.compaction-loop-guard.test.ts @@ -1,3 +1,4 @@ +// Coverage for wiring the post-compaction loop guard into embedded runs. import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest"; import type { diagnosticSessionStates as DiagnosticSessionStatesType, @@ -29,9 +30,8 @@ import { } from "./run.overflow-compaction.harness.js"; let runEmbeddedAgent: typeof import("./run.js").runEmbeddedAgent; -// These need to be imported AFTER loadRunOverflowCompactionHarness so that -// they reference the same module instances the (re-imported) runner uses. -// vi.resetModules() inside the harness invalidates any earlier import. +// Import after loadRunOverflowCompactionHarness so these references point at the +// same module instances as the re-imported runner graph. let diagnosticSessionStates: typeof DiagnosticSessionStatesType; let getDiagnosticSessionState: typeof GetDiagnosticSessionStateType; let recordToolCall: typeof RecordToolCallType; @@ -51,6 +51,8 @@ function recordToolOutcome( result: unknown, runId?: string, ): void { + // Seed diagnostic history directly for cases that inspect persisted loop + // state without running a wrapped tool. const toolCallId = `${toolName}-${state.toolCallHistory?.length ?? 0}`; const scope = runId ? { runId } : undefined; recordToolCall(state, toolName, toolParams, toolCallId, undefined, scope); @@ -75,6 +77,8 @@ async function executeWrappedToolOutcome( onToolOutcome?: ToolOutcomeObserver, runId = baseParams.runId, ): Promise { + // Exercise the live before_tool_call wrapper so the guard sees the same + // outcome observer path used by real embedded tools. const tool = wrapToolWithBeforeToolCallHook( { name: toolName, @@ -135,15 +139,13 @@ describe("post-compaction loop guard wired into runEmbeddedAgent", () => { let attemptSignalAborted = false; let attemptSignalReason: unknown; - // Attempt 1: overflow → triggers compaction. + // Attempt 1: overflow triggers compaction. mockedRunEmbeddedAttempt.mockImplementationOnce(async () => makeAttemptResult({ promptError: overflowError }), ); - // Attempt 2: post-compaction. The live wrapped-tool path records each - // outcome while the prompt is still running. The third identical result - // must not rely on throwing out of tool execution (the dependency converts - // tool errors into tool results); instead it aborts the attempt signal and - // the runner raises the persisted-loop error after the attempt unwinds. + // Attempt 2: live wrapped-tool outcomes repeat while the prompt is running. + // The guard aborts the attempt signal, then the runner raises the loop error + // after the attempt unwinds. mockedRunEmbeddedAttempt.mockImplementationOnce(async (attemptParams: unknown) => { const { abortSignal, onToolOutcome } = attemptParams as { abortSignal?: AbortSignal; diff --git a/src/agents/embedded-agent-runner/run.cross-provider-fallback-error-context.test.ts b/src/agents/embedded-agent-runner/run.cross-provider-fallback-error-context.test.ts index d363a5201e24..7bd49cca9c5e 100644 --- a/src/agents/embedded-agent-runner/run.cross-provider-fallback-error-context.test.ts +++ b/src/agents/embedded-agent-runner/run.cross-provider-fallback-error-context.test.ts @@ -1,3 +1,4 @@ +// Coverage for preserving current-attempt error context across model fallback. import { beforeAll, beforeEach, describe, expect, it } from "vitest"; import { makeAssistantMessageFixture } from "../test-helpers/assistant-message-fixtures.js"; import { makeModelFallbackCfg } from "../test-helpers/model-fallback-config-fixture.js"; @@ -33,6 +34,8 @@ function isCurrentAttemptAssistant(value: unknown): value is CurrentAttemptAssis } function setupDeepseekFallbackErrorMatchers() { + // DeepSeek matchers prove failover classification uses the current candidate + // assistant instead of stale history from the previous provider. mockedIsFailoverAssistantError.mockImplementation((...args: unknown[]) => { const assistant = args[0]; return isCurrentAttemptAssistant(assistant) && assistant.provider === "deepseek"; @@ -44,6 +47,8 @@ function setupDeepseekFallbackErrorMatchers() { } function captureFormattedAssistant() { + // Capture the assistant passed to formatting so tests can inspect which + // provider/model error object drove the final failover message. let lastFormattedAssistant: unknown; mockedFormatAssistantErrorText.mockImplementation((...args: unknown[]) => { lastFormattedAssistant = args[0]; @@ -173,6 +178,8 @@ describe("runEmbeddedAgent cross-provider fallback error handling", () => { }); it("does not reuse a prior provider session assistant when the current candidate times out", async () => { + // Timeout failover has no reliable current assistant. Reusing the previous + // provider's session error would misattribute the failed candidate. const getLastFormattedAssistant = captureFormattedAssistant(); mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({