docs: document embedded runner run tests

2026-06-06 05:51:15 +08:00 · 2026-06-04 14:38:00 -04:00
parent cd26595d6f
commit a31d3355cd
6 changed files with 42 additions and 9 deletions
--- a/src/agents/embedded-agent-runner/run.before-agent-finalize.test.ts
+++ b/src/agents/embedded-agent-runner/run.before-agent-finalize.test.ts
@@ -1,3 +1,4 @@
+// Coverage for before_agent_finalize revision handling in embedded runs.
 import { beforeAll, beforeEach, describe, expect, it } from "vitest";
 import { makeAttemptResult } from "./run.overflow-compaction.fixture.js";
 import {
@@ -15,6 +16,8 @@ function finalAnswerAttempt(
  text: string,
  overrides?: Partial<EmbeddedRunAttemptResult>,
 ): EmbeddedRunAttemptResult {
+  // Finalize tests need a successful assistant turn with both surfaced text and
+  // snapshot content so the runner can decide whether to request a revision.
  return makeAttemptResult({
    assistantTexts: [text],
    lastAssistant: {
@@ -75,6 +78,8 @@ describe("runEmbeddedAgent before_agent_finalize", () => {
  });

  it("turns a revise decision into one more hidden continuation", async () => {
+    // Revision prompts are hidden continuations; they must not persist the
+    // original user prompt a second time.
    mockedRunEmbeddedAttempt
      .mockResolvedValueOnce(
        finalAnswerAttempt("First answer.", {
@@ -123,6 +128,8 @@ describe("runEmbeddedAgent before_agent_finalize", () => {
  });

  it("does not retry finalize revisions after a timed-out attempt", async () => {
+    // A timed-out attempt may have partial assistant text, but asking for a
+    // finalize revision would replay an invalid or blocked provider turn.
    mockedRunEmbeddedAttempt.mockResolvedValueOnce(
      finalAnswerAttempt("Late answer.", {
        timedOut: true,
--- a/src/agents/embedded-agent-runner/run.before-agent-reply-cron.test.ts
+++ b/src/agents/embedded-agent-runner/run.before-agent-reply-cron.test.ts
@@ -1,3 +1,4 @@
+// Coverage for cron before_agent_reply hook handling before embedded attempts.
 import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
 import { SILENT_REPLY_TOKEN } from "../../auto-reply/tokens.js";
 import { makeAttemptResult } from "./run.overflow-compaction.fixture.js";
@@ -12,6 +13,8 @@ import {
 let runEmbeddedAgent: typeof import("./run.js").runEmbeddedAgent;

 function firstBeforeAgentReplyCall() {
+  // Helper keeps assertions on the hook payload and context close to the tests
+  // without leaking mock tuple details into every case.
  const call = mockedGlobalHookRunner.runBeforeAgentReply.mock.calls[0];
  if (!call) {
    throw new Error("expected before_agent_reply hook call");
@@ -43,6 +46,8 @@ describe("runEmbeddedAgent cron before_agent_reply seam", () => {
  });

  it("lets before_agent_reply claim cron runs before the embedded attempt starts", async () => {
+    // Cron hooks can fully handle maintenance prompts before the model is
+    // invoked, which avoids unnecessary prompt-cache and setup work.
    mockedGlobalHookRunner.hasHooks.mockImplementation(
      (hookName: string) => hookName === "before_agent_reply",
    );
@@ -134,6 +139,8 @@ describe("runEmbeddedAgent cron before_agent_reply seam", () => {
  });

  it("forwards one-shot model-run flags into the embedded attempt", async () => {
+    // Model-run mode is request-scoped; it must pass through to the first
+    // attempt without becoming a persistent session setting.
    mockedRunEmbeddedAttempt.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));

    await runEmbeddedAgent({
--- a/src/agents/embedded-agent-runner/run.codex-app-server-recovery.test.ts
+++ b/src/agents/embedded-agent-runner/run.codex-app-server-recovery.test.ts
@@ -1,3 +1,4 @@
+// Coverage for replay-safe Codex app-server recovery retries.
 import { beforeAll, beforeEach, describe, expect, it } from "vitest";
 import { makeModelFallbackCfg } from "../test-helpers/model-fallback-config-fixture.js";
 import { makeAttemptResult } from "./run.overflow-compaction.fixture.js";
@@ -16,6 +17,8 @@ let runEmbeddedAgent: typeof import("./run.js").runEmbeddedAgent;
 function codexClientClosedAttempt(
  overrides: Partial<EmbeddedRunAttemptResult> = {},
 ): EmbeddedRunAttemptResult {
+  // Stdio client-close failures can be replay-safe when Codex reports that the
+  // turn ended before completion and no user-visible side effect escaped.
  return makeAttemptResult({
    assistantTexts: [],
    promptError: new Error("codex app-server client closed before turn completed"),
@@ -34,6 +37,8 @@ function codexClientClosedAttempt(
 function codexTurnCompletionIdleTimeoutAttempt(
  overrides: Partial<EmbeddedRunAttemptResult> = {},
 ): EmbeddedRunAttemptResult {
+  // Completion-watch idle timeouts are retried separately from progress timeouts
+  // because only the former indicates Codex may have lost the final event.
  return makeAttemptResult({
    assistantTexts: [],
    aborted: true,
@@ -106,6 +111,8 @@ describe("runEmbeddedAgent Codex app-server recovery", () => {
  });

  it("suppresses duplicate user persistence when retrying after the inbound message was persisted", async () => {
+    // If the first attempt already persisted the inbound message, the retry must
+    // not mirror it again into the transcript.
    mockedRunEmbeddedAttempt
      .mockImplementationOnce(async (attemptParams) => {
        (
--- a/src/agents/embedded-agent-runner/run.codex-server-error-fallback.test.ts
+++ b/src/agents/embedded-agent-runner/run.codex-server-error-fallback.test.ts
@@ -1,3 +1,4 @@
+// Coverage for handing Codex server_error turns to model fallback.
 import { beforeAll, beforeEach, describe, expect, it } from "vitest";
 import { makeAssistantMessageFixture } from "../test-helpers/assistant-message-fixtures.js";
 import { makeModelFallbackCfg } from "../test-helpers/model-fallback-config-fixture.js";
@@ -27,6 +28,8 @@ describe("runEmbeddedAgent Codex server_error fallback handoff", () => {
  });

  it("throws FailoverError for Codex server_error when model fallbacks are configured", async () => {
+    // Codex server_error is a provider failure, not a normal assistant reply;
+    // configured fallbacks should receive it through the failover path.
    const rawCodexError =
      'Codex error: {"type":"error","error":{"type":"server_error","code":"server_error","message":"An error occurred while processing your request."},"sequence_number":2}';

--- a/src/agents/embedded-agent-runner/run.compaction-loop-guard.test.ts
+++ b/src/agents/embedded-agent-runner/run.compaction-loop-guard.test.ts
@@ -1,3 +1,4 @@
+// Coverage for wiring the post-compaction loop guard into embedded runs.
 import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
 import type {
  diagnosticSessionStates as DiagnosticSessionStatesType,
@@ -29,9 +30,8 @@ import {
 } from "./run.overflow-compaction.harness.js";

 let runEmbeddedAgent: typeof import("./run.js").runEmbeddedAgent;
-// These need to be imported AFTER loadRunOverflowCompactionHarness so that
-// they reference the same module instances the (re-imported) runner uses.
-// vi.resetModules() inside the harness invalidates any earlier import.
+// Import after loadRunOverflowCompactionHarness so these references point at the
+// same module instances as the re-imported runner graph.
 let diagnosticSessionStates: typeof DiagnosticSessionStatesType;
 let getDiagnosticSessionState: typeof GetDiagnosticSessionStateType;
 let recordToolCall: typeof RecordToolCallType;
@@ -51,6 +51,8 @@ function recordToolOutcome(
  result: unknown,
  runId?: string,
 ): void {
+  // Seed diagnostic history directly for cases that inspect persisted loop
+  // state without running a wrapped tool.
  const toolCallId = `${toolName}-${state.toolCallHistory?.length ?? 0}`;
  const scope = runId ? { runId } : undefined;
  recordToolCall(state, toolName, toolParams, toolCallId, undefined, scope);
@@ -75,6 +77,8 @@ async function executeWrappedToolOutcome(
  onToolOutcome?: ToolOutcomeObserver,
  runId = baseParams.runId,
 ): Promise<unknown> {
+  // Exercise the live before_tool_call wrapper so the guard sees the same
+  // outcome observer path used by real embedded tools.
  const tool = wrapToolWithBeforeToolCallHook(
    {
      name: toolName,
@@ -135,15 +139,13 @@ describe("post-compaction loop guard wired into runEmbeddedAgent", () => {
    let attemptSignalAborted = false;
    let attemptSignalReason: unknown;

-    // Attempt 1: overflow → triggers compaction.
+    // Attempt 1: overflow triggers compaction.
    mockedRunEmbeddedAttempt.mockImplementationOnce(async () =>
      makeAttemptResult({ promptError: overflowError }),
    );
-    // Attempt 2: post-compaction. The live wrapped-tool path records each
-    // outcome while the prompt is still running. The third identical result
-    // must not rely on throwing out of tool execution (the dependency converts
-    // tool errors into tool results); instead it aborts the attempt signal and
-    // the runner raises the persisted-loop error after the attempt unwinds.
+    // Attempt 2: live wrapped-tool outcomes repeat while the prompt is running.
+    // The guard aborts the attempt signal, then the runner raises the loop error
+    // after the attempt unwinds.
    mockedRunEmbeddedAttempt.mockImplementationOnce(async (attemptParams: unknown) => {
      const { abortSignal, onToolOutcome } = attemptParams as {
        abortSignal?: AbortSignal;
--- a/src/agents/embedded-agent-runner/run.cross-provider-fallback-error-context.test.ts
+++ b/src/agents/embedded-agent-runner/run.cross-provider-fallback-error-context.test.ts
@@ -1,3 +1,4 @@
+// Coverage for preserving current-attempt error context across model fallback.
 import { beforeAll, beforeEach, describe, expect, it } from "vitest";
 import { makeAssistantMessageFixture } from "../test-helpers/assistant-message-fixtures.js";
 import { makeModelFallbackCfg } from "../test-helpers/model-fallback-config-fixture.js";
@@ -33,6 +34,8 @@ function isCurrentAttemptAssistant(value: unknown): value is CurrentAttemptAssis
 }

 function setupDeepseekFallbackErrorMatchers() {
+  // DeepSeek matchers prove failover classification uses the current candidate
+  // assistant instead of stale history from the previous provider.
  mockedIsFailoverAssistantError.mockImplementation((...args: unknown[]) => {
    const assistant = args[0];
    return isCurrentAttemptAssistant(assistant) && assistant.provider === "deepseek";
@@ -44,6 +47,8 @@ function setupDeepseekFallbackErrorMatchers() {
 }

 function captureFormattedAssistant() {
+  // Capture the assistant passed to formatting so tests can inspect which
+  // provider/model error object drove the final failover message.
  let lastFormattedAssistant: unknown;
  mockedFormatAssistantErrorText.mockImplementation((...args: unknown[]) => {
    lastFormattedAssistant = args[0];
@@ -173,6 +178,8 @@ describe("runEmbeddedAgent cross-provider fallback error handling", () => {
  });

  it("does not reuse a prior provider session assistant when the current candidate times out", async () => {
+    // Timeout failover has no reliable current assistant. Reusing the previous
+    // provider's session error would misattribute the failed candidate.
    const getLastFormattedAssistant = captureFormattedAssistant();
    mockedRunEmbeddedAttempt.mockResolvedValueOnce(
      makeAttemptResult({