mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-06 05:51:15 +08:00
docs: document embedded runner run tests
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
// Coverage for before_agent_finalize revision handling in embedded runs.
|
||||
import { beforeAll, beforeEach, describe, expect, it } from "vitest";
|
||||
import { makeAttemptResult } from "./run.overflow-compaction.fixture.js";
|
||||
import {
|
||||
@@ -15,6 +16,8 @@ function finalAnswerAttempt(
|
||||
text: string,
|
||||
overrides?: Partial<EmbeddedRunAttemptResult>,
|
||||
): EmbeddedRunAttemptResult {
|
||||
// Finalize tests need a successful assistant turn with both surfaced text and
|
||||
// snapshot content so the runner can decide whether to request a revision.
|
||||
return makeAttemptResult({
|
||||
assistantTexts: [text],
|
||||
lastAssistant: {
|
||||
@@ -75,6 +78,8 @@ describe("runEmbeddedAgent before_agent_finalize", () => {
|
||||
});
|
||||
|
||||
it("turns a revise decision into one more hidden continuation", async () => {
|
||||
// Revision prompts are hidden continuations; they must not persist the
|
||||
// original user prompt a second time.
|
||||
mockedRunEmbeddedAttempt
|
||||
.mockResolvedValueOnce(
|
||||
finalAnswerAttempt("First answer.", {
|
||||
@@ -123,6 +128,8 @@ describe("runEmbeddedAgent before_agent_finalize", () => {
|
||||
});
|
||||
|
||||
it("does not retry finalize revisions after a timed-out attempt", async () => {
|
||||
// A timed-out attempt may have partial assistant text, but asking for a
|
||||
// finalize revision would replay an invalid or blocked provider turn.
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
finalAnswerAttempt("Late answer.", {
|
||||
timedOut: true,
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
// Coverage for cron before_agent_reply hook handling before embedded attempts.
|
||||
import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { SILENT_REPLY_TOKEN } from "../../auto-reply/tokens.js";
|
||||
import { makeAttemptResult } from "./run.overflow-compaction.fixture.js";
|
||||
@@ -12,6 +13,8 @@ import {
|
||||
let runEmbeddedAgent: typeof import("./run.js").runEmbeddedAgent;
|
||||
|
||||
function firstBeforeAgentReplyCall() {
|
||||
// Helper keeps assertions on the hook payload and context close to the tests
|
||||
// without leaking mock tuple details into every case.
|
||||
const call = mockedGlobalHookRunner.runBeforeAgentReply.mock.calls[0];
|
||||
if (!call) {
|
||||
throw new Error("expected before_agent_reply hook call");
|
||||
@@ -43,6 +46,8 @@ describe("runEmbeddedAgent cron before_agent_reply seam", () => {
|
||||
});
|
||||
|
||||
it("lets before_agent_reply claim cron runs before the embedded attempt starts", async () => {
|
||||
// Cron hooks can fully handle maintenance prompts before the model is
|
||||
// invoked, which avoids unnecessary prompt-cache and setup work.
|
||||
mockedGlobalHookRunner.hasHooks.mockImplementation(
|
||||
(hookName: string) => hookName === "before_agent_reply",
|
||||
);
|
||||
@@ -134,6 +139,8 @@ describe("runEmbeddedAgent cron before_agent_reply seam", () => {
|
||||
});
|
||||
|
||||
it("forwards one-shot model-run flags into the embedded attempt", async () => {
|
||||
// Model-run mode is request-scoped; it must pass through to the first
|
||||
// attempt without becoming a persistent session setting.
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
|
||||
|
||||
await runEmbeddedAgent({
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
// Coverage for replay-safe Codex app-server recovery retries.
|
||||
import { beforeAll, beforeEach, describe, expect, it } from "vitest";
|
||||
import { makeModelFallbackCfg } from "../test-helpers/model-fallback-config-fixture.js";
|
||||
import { makeAttemptResult } from "./run.overflow-compaction.fixture.js";
|
||||
@@ -16,6 +17,8 @@ let runEmbeddedAgent: typeof import("./run.js").runEmbeddedAgent;
|
||||
function codexClientClosedAttempt(
|
||||
overrides: Partial<EmbeddedRunAttemptResult> = {},
|
||||
): EmbeddedRunAttemptResult {
|
||||
// Stdio client-close failures can be replay-safe when Codex reports that the
|
||||
// turn ended before completion and no user-visible side effect escaped.
|
||||
return makeAttemptResult({
|
||||
assistantTexts: [],
|
||||
promptError: new Error("codex app-server client closed before turn completed"),
|
||||
@@ -34,6 +37,8 @@ function codexClientClosedAttempt(
|
||||
function codexTurnCompletionIdleTimeoutAttempt(
|
||||
overrides: Partial<EmbeddedRunAttemptResult> = {},
|
||||
): EmbeddedRunAttemptResult {
|
||||
// Completion-watch idle timeouts are retried separately from progress timeouts
|
||||
// because only the former indicates Codex may have lost the final event.
|
||||
return makeAttemptResult({
|
||||
assistantTexts: [],
|
||||
aborted: true,
|
||||
@@ -106,6 +111,8 @@ describe("runEmbeddedAgent Codex app-server recovery", () => {
|
||||
});
|
||||
|
||||
it("suppresses duplicate user persistence when retrying after the inbound message was persisted", async () => {
|
||||
// If the first attempt already persisted the inbound message, the retry must
|
||||
// not mirror it again into the transcript.
|
||||
mockedRunEmbeddedAttempt
|
||||
.mockImplementationOnce(async (attemptParams) => {
|
||||
(
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
// Coverage for handing Codex server_error turns to model fallback.
|
||||
import { beforeAll, beforeEach, describe, expect, it } from "vitest";
|
||||
import { makeAssistantMessageFixture } from "../test-helpers/assistant-message-fixtures.js";
|
||||
import { makeModelFallbackCfg } from "../test-helpers/model-fallback-config-fixture.js";
|
||||
@@ -27,6 +28,8 @@ describe("runEmbeddedAgent Codex server_error fallback handoff", () => {
|
||||
});
|
||||
|
||||
it("throws FailoverError for Codex server_error when model fallbacks are configured", async () => {
|
||||
// Codex server_error is a provider failure, not a normal assistant reply;
|
||||
// configured fallbacks should receive it through the failover path.
|
||||
const rawCodexError =
|
||||
'Codex error: {"type":"error","error":{"type":"server_error","code":"server_error","message":"An error occurred while processing your request."},"sequence_number":2}';
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
// Coverage for wiring the post-compaction loop guard into embedded runs.
|
||||
import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type {
|
||||
diagnosticSessionStates as DiagnosticSessionStatesType,
|
||||
@@ -29,9 +30,8 @@ import {
|
||||
} from "./run.overflow-compaction.harness.js";
|
||||
|
||||
let runEmbeddedAgent: typeof import("./run.js").runEmbeddedAgent;
|
||||
// These need to be imported AFTER loadRunOverflowCompactionHarness so that
|
||||
// they reference the same module instances the (re-imported) runner uses.
|
||||
// vi.resetModules() inside the harness invalidates any earlier import.
|
||||
// Import after loadRunOverflowCompactionHarness so these references point at the
|
||||
// same module instances as the re-imported runner graph.
|
||||
let diagnosticSessionStates: typeof DiagnosticSessionStatesType;
|
||||
let getDiagnosticSessionState: typeof GetDiagnosticSessionStateType;
|
||||
let recordToolCall: typeof RecordToolCallType;
|
||||
@@ -51,6 +51,8 @@ function recordToolOutcome(
|
||||
result: unknown,
|
||||
runId?: string,
|
||||
): void {
|
||||
// Seed diagnostic history directly for cases that inspect persisted loop
|
||||
// state without running a wrapped tool.
|
||||
const toolCallId = `${toolName}-${state.toolCallHistory?.length ?? 0}`;
|
||||
const scope = runId ? { runId } : undefined;
|
||||
recordToolCall(state, toolName, toolParams, toolCallId, undefined, scope);
|
||||
@@ -75,6 +77,8 @@ async function executeWrappedToolOutcome(
|
||||
onToolOutcome?: ToolOutcomeObserver,
|
||||
runId = baseParams.runId,
|
||||
): Promise<unknown> {
|
||||
// Exercise the live before_tool_call wrapper so the guard sees the same
|
||||
// outcome observer path used by real embedded tools.
|
||||
const tool = wrapToolWithBeforeToolCallHook(
|
||||
{
|
||||
name: toolName,
|
||||
@@ -135,15 +139,13 @@ describe("post-compaction loop guard wired into runEmbeddedAgent", () => {
|
||||
let attemptSignalAborted = false;
|
||||
let attemptSignalReason: unknown;
|
||||
|
||||
// Attempt 1: overflow → triggers compaction.
|
||||
// Attempt 1: overflow triggers compaction.
|
||||
mockedRunEmbeddedAttempt.mockImplementationOnce(async () =>
|
||||
makeAttemptResult({ promptError: overflowError }),
|
||||
);
|
||||
// Attempt 2: post-compaction. The live wrapped-tool path records each
|
||||
// outcome while the prompt is still running. The third identical result
|
||||
// must not rely on throwing out of tool execution (the dependency converts
|
||||
// tool errors into tool results); instead it aborts the attempt signal and
|
||||
// the runner raises the persisted-loop error after the attempt unwinds.
|
||||
// Attempt 2: live wrapped-tool outcomes repeat while the prompt is running.
|
||||
// The guard aborts the attempt signal, then the runner raises the loop error
|
||||
// after the attempt unwinds.
|
||||
mockedRunEmbeddedAttempt.mockImplementationOnce(async (attemptParams: unknown) => {
|
||||
const { abortSignal, onToolOutcome } = attemptParams as {
|
||||
abortSignal?: AbortSignal;
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
// Coverage for preserving current-attempt error context across model fallback.
|
||||
import { beforeAll, beforeEach, describe, expect, it } from "vitest";
|
||||
import { makeAssistantMessageFixture } from "../test-helpers/assistant-message-fixtures.js";
|
||||
import { makeModelFallbackCfg } from "../test-helpers/model-fallback-config-fixture.js";
|
||||
@@ -33,6 +34,8 @@ function isCurrentAttemptAssistant(value: unknown): value is CurrentAttemptAssis
|
||||
}
|
||||
|
||||
function setupDeepseekFallbackErrorMatchers() {
|
||||
// DeepSeek matchers prove failover classification uses the current candidate
|
||||
// assistant instead of stale history from the previous provider.
|
||||
mockedIsFailoverAssistantError.mockImplementation((...args: unknown[]) => {
|
||||
const assistant = args[0];
|
||||
return isCurrentAttemptAssistant(assistant) && assistant.provider === "deepseek";
|
||||
@@ -44,6 +47,8 @@ function setupDeepseekFallbackErrorMatchers() {
|
||||
}
|
||||
|
||||
function captureFormattedAssistant() {
|
||||
// Capture the assistant passed to formatting so tests can inspect which
|
||||
// provider/model error object drove the final failover message.
|
||||
let lastFormattedAssistant: unknown;
|
||||
mockedFormatAssistantErrorText.mockImplementation((...args: unknown[]) => {
|
||||
lastFormattedAssistant = args[0];
|
||||
@@ -173,6 +178,8 @@ describe("runEmbeddedAgent cross-provider fallback error handling", () => {
|
||||
});
|
||||
|
||||
it("does not reuse a prior provider session assistant when the current candidate times out", async () => {
|
||||
// Timeout failover has no reliable current assistant. Reusing the previous
|
||||
// provider's session error would misattribute the failed candidate.
|
||||
const getLastFormattedAssistant = captureFormattedAssistant();
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
|
||||
makeAttemptResult({
|
||||
|
||||
Reference in New Issue
Block a user