fix(qa): stabilize mock QA scenario contracts

This commit is contained in:
Vincent Koc
2026-05-27 06:22:26 +02:00
parent 7e702bb43d
commit 81c1892c9a
16 changed files with 266 additions and 94 deletions

View File

@@ -3859,6 +3859,14 @@ describe("qa mock openai server", () => {
expect(maxReasoning.summary).toEqual([]);
expect(outputItem(maxPayload, 1).type).toBe("message");
expect(outputText(maxPayload, 1)).toBe("THINKING-MAX-OK");
const maxStream = await expectResponsesText(server, {
stream: true,
model: "gpt-5.5",
input: [makeUserInput(QA_THINKING_VISIBILITY_MAX_PROMPT)],
});
expect(maxStream).toContain('"type":"response.output_text.delta"');
expect(maxStream).toContain('"delta":"THINKING-MAX-OK"');
});
it("keeps the reasoning-only side-effect path ready for no-auto-retry QA coverage", async () => {

View File

@@ -1512,6 +1512,20 @@ function buildReasoningAndAssistantEvents(params: {
status: "in_progress",
},
},
{
type: "response.output_text.delta",
item_id: answerItem.id,
output_index: 1,
content_index: 0,
delta: params.answerText,
},
{
type: "response.output_text.done",
item_id: answerItem.id,
output_index: 1,
content_index: 0,
text: params.answerText,
},
{
type: "response.output_item.done",
item: answerItem,
@@ -1664,13 +1678,13 @@ async function buildResponsesPayload(
}
return buildAssistantEvents("BUG-SHOULD-NOT-AUTO-RETRY");
}
if (QA_THINKING_VISIBILITY_MAX_PROMPT_RE.test(prompt)) {
if (QA_THINKING_VISIBILITY_MAX_PROMPT_RE.test(allInputText)) {
return buildReasoningAndAssistantEvents({
reasoningId: "rs_mock_thinking_visibility_max",
answerText: "THINKING-MAX-OK",
});
}
if (QA_THINKING_VISIBILITY_OFF_PROMPT_RE.test(prompt)) {
if (QA_THINKING_VISIBILITY_OFF_PROMPT_RE.test(allInputText)) {
return buildAssistantEvents("THINKING-OFF-OK");
}
if (QA_EMPTY_RESPONSE_RECOVERY_PROMPT_RE.test(allInputText)) {

View File

@@ -31,7 +31,7 @@ function createMockOpenAiResponsesProvider(baseUrl: string): ModelProviderConfig
id: "gpt-5.5",
name: "gpt-5.5",
api: "openai-responses",
reasoning: false,
reasoning: true,
input: ["text", "image"],
cost: ZERO_COST,
contextWindow: 128_000,
@@ -41,7 +41,7 @@ function createMockOpenAiResponsesProvider(baseUrl: string): ModelProviderConfig
id: "gpt-5.5-alt",
name: "gpt-5.5-alt",
api: "openai-responses",
reasoning: false,
reasoning: true,
input: ["text", "image"],
cost: ZERO_COST,
contextWindow: 128_000,

View File

@@ -55,6 +55,12 @@ describe("buildQaGatewayConfig", () => {
expect(getPrimaryModel(cfg.agents?.defaults?.model)).toBe("mock-openai/gpt-5.5");
expect(cfg.models?.providers?.["mock-openai"]?.baseUrl).toBe("http://127.0.0.1:44080/v1");
expect(cfg.models?.providers?.["mock-openai"]?.request).toEqual({ allowPrivateNetwork: true });
expect(cfg.models?.providers?.["mock-openai"]?.models).toEqual(
expect.arrayContaining([
expect.objectContaining({ id: "gpt-5.5", reasoning: true }),
expect.objectContaining({ id: "gpt-5.5-alt", reasoning: true }),
]),
);
expect(cfg.models?.providers?.openai?.baseUrl).toBe("http://127.0.0.1:44080/v1");
expect(cfg.models?.providers?.openai?.request).toEqual({ allowPrivateNetwork: true });
expect(cfg.models?.providers?.anthropic?.baseUrl).toBe("http://127.0.0.1:44080");

View File

@@ -197,14 +197,18 @@ describe("qa scenario catalog", () => {
expect(readQaScenarioById("long-context-progress-watchdog").sourcePath).toBe(
"qa/scenarios/runtime/long-context-progress-watchdog.md",
);
expect(JSON.stringify(readQaScenarioById("gateway-restart-inflight-run").execution.flow))
.toContain("EmbeddedAttemptSessionTakeoverError");
expect(JSON.stringify(readQaScenarioById("gateway-restart-inflight-run").execution.flow))
.toContain("AbortError");
expect(JSON.stringify(readQaScenarioById("gateway-restart-inflight-run").execution.flow))
.toContain("This operation was aborted");
expect(JSON.stringify(readQaScenarioById("gateway-restart-inflight-run").execution.flow))
.toContain("liveTurnTimeoutMs(env, 180000)");
expect(
JSON.stringify(readQaScenarioById("gateway-restart-inflight-run").execution.flow),
).toContain("EmbeddedAttemptSessionTakeoverError");
expect(
JSON.stringify(readQaScenarioById("gateway-restart-inflight-run").execution.flow),
).toContain("AbortError");
expect(
JSON.stringify(readQaScenarioById("gateway-restart-inflight-run").execution.flow),
).toContain("This operation was aborted");
expect(
JSON.stringify(readQaScenarioById("gateway-restart-inflight-run").execution.flow),
).toContain("liveTurnTimeoutMs(env, 180000)");
expect(readQaScenarioExecutionConfig("long-context-progress-watchdog")).toMatchObject({
requiredProviderMode: "live-frontier",
harnessRuntime: "codex",
@@ -323,8 +327,8 @@ describe("qa scenario catalog", () => {
const scenario = readQaScenarioById("gpt55-thinking-visibility-switch");
const config = readQaScenarioExecutionConfig("gpt55-thinking-visibility-switch") as
| {
requiredLiveProvider?: string;
requiredLiveModel?: string;
requiredProvider?: string;
requiredModel?: string;
offDirective?: string;
maxDirective?: string;
reasoningDirective?: string;
@@ -332,16 +336,15 @@ describe("qa scenario catalog", () => {
| undefined;
expect(scenario.sourcePath).toBe("qa/scenarios/models/gpt55-thinking-visibility-switch.md");
expect(config?.requiredLiveProvider).toBe("openai");
expect(config?.requiredLiveModel).toBe("gpt-5.5");
expect(config?.requiredProvider).toBe("openai");
expect(config?.requiredModel).toBe("gpt-5.5");
expect(config?.offDirective).toBe("/think off");
expect(config?.maxDirective).toBe("/think medium");
expect(config?.reasoningDirective).toBe("/reasoning on");
expect(scenario.execution.flow?.steps.map((step) => step.name)).toEqual([
"enables reasoning display and disables thinking",
"switches to medium thinking",
"verifies medium thinking emits visible reasoning",
"verifies medium thinking completes the answer",
"verifies medium thinking reaches the provider",
]);
});

View File

@@ -413,4 +413,30 @@ describe("qa suite planning helpers", () => {
}).map((scenario) => scenario.id),
).toEqual(["generic", "live-only"]);
});
it("keeps live-only runtime parity scenarios out of implicit mock selections", () => {
const scenarios = [
makeQaSuiteTestScenario("generic"),
makeQaSuiteTestScenario("live-runtime", {
runtimeParityTier: "live-only",
}),
];
expect(
selectQaSuiteScenarios({
scenarios,
providerMode: "mock-openai",
primaryModel: "mock-openai/gpt-5.5",
}).map((scenario) => scenario.id),
).toEqual(["generic"]);
expect(
selectQaSuiteScenarios({
scenarios,
scenarioIds: ["live-runtime"],
providerMode: "mock-openai",
primaryModel: "mock-openai/gpt-5.5",
}).map((scenario) => scenario.id),
).toEqual(["live-runtime"]);
});
});

View File

@@ -33,12 +33,16 @@ function scenarioMatchesLiveLane(params: {
providerMode: QaProviderMode;
claudeCliAuthMode?: QaCliBackendAuthMode;
}) {
const provider = getQaProvider(params.providerMode);
if (params.scenario.runtimeParityTier === "live-only" && provider.kind !== "live") {
return false;
}
const config = params.scenario.execution.config ?? {};
const requiredProviderMode = normalizeQaConfigString(config.requiredProviderMode);
if (requiredProviderMode && params.providerMode !== requiredProviderMode) {
return false;
}
if (getQaProvider(params.providerMode).kind !== "live") {
if (provider.kind !== "live") {
return true;
}
const selected = splitModelRef(params.primaryModel);

View File

@@ -10,12 +10,14 @@ const listToolsMock = vi.hoisted(() => vi.fn(async () => ({ tools: [] })));
const callToolMock = vi.hoisted(() => vi.fn(async () => ({ content: [] })));
const closeMock = vi.hoisted(() => vi.fn(async () => undefined));
const resolveQaNodeExecPathMock = vi.hoisted(() => vi.fn(async () => "/usr/bin/node"));
const stderrOnMock = vi.hoisted(() => vi.fn());
const stdioTransportMock = vi.hoisted(() =>
vi.fn().mockImplementation(function StdioClientTransport(
this: { params?: unknown },
this: { params?: unknown; stderr?: { on: typeof stderrOnMock } },
params: unknown,
) {
this.params = params;
this.stderr = { on: stderrOnMock };
}),
);
@@ -66,6 +68,7 @@ describe("qa suite runtime agent tools helpers", () => {
callToolMock.mockReset();
closeMock.mockClear();
resolveQaNodeExecPathMock.mockClear();
stderrOnMock.mockClear();
stdioTransportMock.mockClear();
});
@@ -146,10 +149,17 @@ describe("qa suite runtime agent tools helpers", () => {
OPENCLAW_KEY: "1",
},
});
expect(callToolMock).toHaveBeenCalledWith({
name: "plugin.echo",
arguments: { text: "hello" },
});
expect(stderrOnMock).toHaveBeenCalledWith("data", expect.any(Function));
expect(connectMock).toHaveBeenCalledWith(expect.anything(), { timeout: 180_000 });
expect(listToolsMock).toHaveBeenCalledWith({}, { timeout: 180_000 });
expect(callToolMock).toHaveBeenCalledWith(
{
name: "plugin.echo",
arguments: { text: "hello" },
},
undefined,
{ timeout: 180_000 },
);
expect(closeMock).toHaveBeenCalled();
});

View File

@@ -13,6 +13,8 @@ import type {
} from "./suite-runtime-types.js";
const requireFromHere = createRequire(import.meta.url);
const MCP_STDERR_TAIL_LIMIT = 8_192;
const MCP_REQUEST_TIMEOUT_MS = 180_000;
function findSkill(skills: QaSkillStatusEntry[], name: string) {
return skills.find((skill) => skill.name === name);
@@ -52,10 +54,17 @@ async function callPluginToolsMcp(params: {
cwd: params.env.gateway.tempRoot,
env: transportEnv,
});
let stderrTail = "";
const stderr = transport.stderr;
if (stderr && typeof stderr.on === "function") {
stderr.on("data", (chunk: unknown) => {
stderrTail = `${stderrTail}${String(chunk)}`.slice(-MCP_STDERR_TAIL_LIMIT);
});
}
const client = new Client({ name: "openclaw-qa-suite", version: "0.0.0" }, {});
try {
await client.connect(transport);
const listed = await client.listTools();
await client.connect(transport, { timeout: MCP_REQUEST_TIMEOUT_MS });
const listed = await client.listTools({}, { timeout: MCP_REQUEST_TIMEOUT_MS });
const tool = listed.tools.find((entry) => entry.name === params.toolName);
if (!tool) {
const availableTools = listed.tools
@@ -66,10 +75,22 @@ async function callPluginToolsMcp(params: {
`MCP tool missing: ${params.toolName}; available tools: ${availableTools.join(", ") || "<none>"}`,
);
}
return await client.callTool({
name: params.toolName,
arguments: params.args,
});
try {
return await client.callTool(
{
name: params.toolName,
arguments: params.args,
},
undefined,
{ timeout: MCP_REQUEST_TIMEOUT_MS },
);
} catch (error) {
const tail = stderrTail.trim();
if (!tail || !(error instanceof Error)) {
throw error;
}
throw new Error(`${error.message}\nMCP stderr tail:\n${tail}`, { cause: error });
}
} finally {
await client.close().catch(() => {});
}

View File

@@ -9,6 +9,7 @@ export function makeQaSuiteTestScenario(
plugins?: string[];
gatewayConfigPatch?: Record<string, unknown>;
gatewayRuntime?: { forwardHostHome?: boolean };
runtimeParityTier?: QaSuiteTestScenario["runtimeParityTier"];
surface?: string;
} = {},
): QaSuiteTestScenario {
@@ -18,6 +19,7 @@ export function makeQaSuiteTestScenario(
surface: params.surface ?? "test",
objective: "test",
successCriteria: ["test"],
...(params.runtimeParityTier ? { runtimeParityTier: params.runtimeParityTier } : {}),
...(params.plugins ? { plugins: params.plugins } : {}),
...(params.gatewayConfigPatch ? { gatewayConfigPatch: params.gatewayConfigPatch } : {}),
...(params.gatewayRuntime ? { gatewayRuntime: params.gatewayRuntime } : {}),

View File

@@ -13,8 +13,9 @@ objective: Verify GPT-5.5 can switch from disabled thinking to medium thinking w
successCriteria:
- Live runs target openai/gpt-5.5, not a mini or pro variant.
- The session enables reasoning display before the comparison turns.
- The disabled-thinking turn returns its visible marker without a non-empty Reasoning summary.
- The medium-thinking turn returns its visible marker and a separate Reasoning-prefixed message.
- The disabled-thinking turn returns its visible marker without sending a reasoning payload to OpenAI-compatible providers.
- The medium-thinking turn sends a medium reasoning request and returns its visible marker.
- Transports with a visible reasoning lane expose a separate Reasoning-prefixed message; qa-channel validates provider behavior because generic delivery suppresses reasoning payloads by design.
docsRefs:
- docs/tools/thinking.md
- docs/help/testing.md
@@ -29,8 +30,8 @@ execution:
kind: flow
summary: Toggle reasoning display and GPT-5.5 thinking between off/none and medium, then verify visible reasoning only on the medium turn.
config:
requiredLiveProvider: openai
requiredLiveModel: gpt-5.5
requiredProvider: openai
requiredModel: gpt-5.5
offDirective: /think off
maxDirective: /think medium
reasoningDirective: /reasoning on
@@ -77,22 +78,25 @@ steps:
- lambda:
expr: "state.getSnapshot().messages.filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === config.conversationId && /Reasoning visibility enabled/i.test(candidate.text)).at(-1)"
- expr: liveTurnTimeoutMs(env, 20000)
- call: patchConfig
- set: thinkOffCursor
value:
expr: state.getSnapshot().messages.length
- call: state.addInboundMessage
args:
- env:
ref: env
patch:
agents:
defaults:
thinkingDefault: "off"
- call: waitForGatewayHealthy
- conversation:
id:
expr: config.conversationId
kind: direct
senderId: qa-operator
senderName: QA Operator
text:
expr: config.offDirective
- call: waitForCondition
saveAs: thinkOffAck
args:
- ref: env
- 60000
- call: waitForQaChannelReady
args:
- ref: env
- 60000
- lambda:
expr: "state.getSnapshot().messages.slice(thinkOffCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === config.conversationId && /Thinking disabled/i.test(candidate.text)).at(-1)"
- expr: liveTurnTimeoutMs(env, 20000)
- set: offCursor
value:
expr: state.getSnapshot().messages.length
@@ -111,7 +115,7 @@ steps:
args:
- lambda:
expr: "state.getSnapshot().messages.slice(offCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === config.conversationId && candidate.text.includes(config.offMarker)).at(-1)"
- expr: liveTurnTimeoutMs(env, 30000)
- expr: liveTurnTimeoutMs(env, 90000)
- set: offMessages
value:
expr: "state.getSnapshot().messages.slice(offCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === config.conversationId)"
@@ -136,27 +140,34 @@ steps:
expr: "String(offRequest?.model ?? '').includes('gpt-5.5')"
message:
expr: "`expected GPT-5.5 off mock request, got ${String(offRequest?.model ?? '')}`"
detailsExpr: "`reasoning ack=${reasoningAck.text}; off answer=${offAnswer.text}`"
- assert:
expr: "offRequest?.body && !Object.prototype.hasOwnProperty.call(offRequest.body, 'reasoning')"
message:
expr: "`disabled thinking should omit OpenAI reasoning payload, got ${JSON.stringify(offRequest?.body?.reasoning ?? null)}`"
detailsExpr: "`reasoning ack=${reasoningAck.text}; thinking off=${thinkOffAck.text}; off answer=${offAnswer.text}`"
- name: switches to medium thinking
actions:
- call: patchConfig
- set: thinkMediumCursor
value:
expr: state.getSnapshot().messages.length
- call: state.addInboundMessage
args:
- env:
ref: env
patch:
agents:
defaults:
thinkingDefault: "medium"
- call: waitForGatewayHealthy
- conversation:
id:
expr: config.conversationId
kind: direct
senderId: qa-operator
senderName: QA Operator
text:
expr: config.maxDirective
- call: waitForCondition
saveAs: thinkMediumAck
args:
- ref: env
- 60000
- call: waitForQaChannelReady
args:
- ref: env
- 60000
detailsExpr: "`thinking default patched to medium`"
- name: verifies medium thinking emits visible reasoning
- lambda:
expr: "state.getSnapshot().messages.slice(thinkMediumCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === config.conversationId && /Thinking level set to medium/i.test(candidate.text)).at(-1)"
- expr: liveTurnTimeoutMs(env, 20000)
detailsExpr: "`thinking medium=${thinkMediumAck.text}`"
- name: verifies medium thinking reaches the provider
actions:
- set: maxCursor
value:
@@ -171,19 +182,6 @@ steps:
senderName: QA Operator
text:
expr: config.maxPrompt
- call: waitForCondition
saveAs: maxReasoning
args:
- lambda:
expr: "state.getSnapshot().messages.slice(maxCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === config.conversationId && candidate.text.trimStart().startsWith('Reasoning:')).at(-1)"
- expr: liveTurnTimeoutMs(env, 120000)
- assert:
expr: "maxReasoning.text.trimStart().startsWith('Reasoning:')"
message:
expr: "`missing max reasoning message near answer: ${recentOutboundSummary(state, 6)}`"
detailsExpr: "`reasoning=${maxReasoning.text}`"
- name: verifies medium thinking completes the answer
actions:
- call: waitForCondition
saveAs: maxAnswer
args:
@@ -193,7 +191,7 @@ steps:
- assert:
expr: "maxAnswer.text.includes(config.maxMarker)"
message:
expr: "`missing max marker: ${maxAnswer.text}`"
expr: "`missing max marker near answer: ${recentOutboundSummary(state, 6)}`"
- if:
expr: "Boolean(env.mock)"
then:
@@ -207,5 +205,22 @@ steps:
expr: "String(maxRequest?.model ?? '').includes('gpt-5.5')"
message:
expr: "`expected GPT-5.5 mock request, got ${String(maxRequest?.model ?? '')}`"
detailsExpr: "`answer=${maxAnswer.text}`"
- assert:
expr: "maxRequest?.body?.reasoning?.effort === 'medium'"
message:
expr: "`expected medium OpenAI reasoning payload, got ${JSON.stringify(maxRequest?.body?.reasoning ?? null)}`"
- if:
expr: "env.transport.id !== 'qa-channel'"
then:
- call: waitForCondition
saveAs: maxReasoning
args:
- lambda:
expr: "state.getSnapshot().messages.slice(maxCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === config.conversationId && candidate.text.trimStart().startsWith('Reasoning:')).at(-1)"
- expr: liveTurnTimeoutMs(env, 120000)
- assert:
expr: "maxReasoning.text.trimStart().startsWith('Reasoning:')"
message:
expr: "`missing max reasoning message near answer: ${recentOutboundSummary(state, 6)}`"
detailsExpr: "env.transport.id === 'qa-channel' ? `answer=${maxAnswer.text}; medium reasoning=${env.mock ? String(maxRequest?.body?.reasoning?.effort ?? '') : 'live'}; qa-channel suppresses reasoning delivery` : `answer=${maxAnswer.text}; reasoning=${maxReasoning.text}`"
```

View File

@@ -14,6 +14,9 @@ objective: Verify an explicit pending skill suggestion queues for review, then a
plugins:
- skill-workshop
gatewayConfigPatch:
tools:
alsoAllow:
- skill_workshop
plugins:
entries:
skill-workshop:

View File

@@ -38,6 +38,9 @@ execution:
steps:
- name: keeps tools.effective and skills.status aligned after config changes
actions:
- call: ensureImageGenerationConfigured
args:
- ref: env
- call: writeWorkspaceSkill
args:
- env:

View File

@@ -106,6 +106,10 @@ type HookOutcome =
}
| { blocked: false; params: unknown };
type PluginApprovalRequest = NonNullable<PluginHookBeforeToolCallResult["requireApproval"]>;
type BeforeToolCallWrapperOptions = {
approvalMode?: "request" | "report";
emitDiagnostics: boolean;
};
export type BeforeToolCallPolicyDiagnosticState = {
hasBeforeToolCallHook: boolean;
@@ -910,7 +914,7 @@ export async function runBeforeToolCallHook(args: {
export function wrapToolWithBeforeToolCallHook(
tool: AnyAgentTool,
ctx?: HookContext,
options: { emitDiagnostics?: boolean } = {},
options: { approvalMode?: "request" | "report"; emitDiagnostics?: boolean } = {},
): AnyAgentTool {
const execute = tool.execute;
if (!execute) {
@@ -918,7 +922,10 @@ export function wrapToolWithBeforeToolCallHook(
}
const toolName = tool.name || "tool";
const diagnosticIdentity = resolveToolDiagnosticIdentity(tool);
const diagnosticOptions = { emitDiagnostics: options.emitDiagnostics !== false };
const hookOptions: BeforeToolCallWrapperOptions = {
...(options.approvalMode ? { approvalMode: options.approvalMode } : {}),
emitDiagnostics: options.emitDiagnostics !== false,
};
const wrappedTool: AnyAgentTool = {
...tool,
execute: async (toolCallId, params, signal, onUpdate) => {
@@ -931,6 +938,7 @@ export function wrapToolWithBeforeToolCallHook(
toolCallId,
ctx,
signal,
approvalMode: hookOptions.approvalMode,
});
if (outcome.blocked) {
if (outcome.kind !== "veto") {
@@ -950,7 +958,7 @@ export function wrapToolWithBeforeToolCallHook(
...(toolCallId && { toolCallId }),
paramsSummary: summarizeToolParams(outcome.params ?? hookParams),
};
if (diagnosticOptions.emitDiagnostics) {
if (hookOptions.emitDiagnostics) {
emitTrustedDiagnosticEvent({
type: "tool.execution.blocked",
...eventBase,
@@ -992,7 +1000,7 @@ export function wrapToolWithBeforeToolCallHook(
...(toolCallId && { toolCallId }),
paramsSummary: summarizeToolParams(executeParams),
};
if (diagnosticOptions.emitDiagnostics) {
if (hookOptions.emitDiagnostics) {
emitTrustedDiagnosticEvent({
type: "tool.execution.started",
...eventBase,
@@ -1014,7 +1022,7 @@ export function wrapToolWithBeforeToolCallHook(
toolParams: executeParams,
ctx,
});
if (diagnosticOptions.emitDiagnostics) {
if (hookOptions.emitDiagnostics) {
if (skillMatch) {
emitSkillUsedDiagnostic({
ctx,
@@ -1033,7 +1041,7 @@ export function wrapToolWithBeforeToolCallHook(
} catch (err) {
const cause = unwrapErrorCause(err);
const errorCode = diagnosticHttpStatusCode(cause);
if (diagnosticOptions.emitDiagnostics) {
if (hookOptions.emitDiagnostics) {
emitTrustedDiagnosticEvent({
type: "tool.execution.error",
...eventBase,
@@ -1060,7 +1068,7 @@ export function wrapToolWithBeforeToolCallHook(
enumerable: true,
});
Object.defineProperty(wrappedTool, BEFORE_TOOL_CALL_DIAGNOSTIC_OPTIONS, {
value: diagnosticOptions,
value: hookOptions,
enumerable: false,
});
return wrappedTool;
@@ -1079,6 +1087,17 @@ export function setBeforeToolCallDiagnosticsEnabled(tool: AnyAgentTool, enabled:
}
}
export function setBeforeToolCallApprovalMode(
tool: AnyAgentTool,
approvalMode: "request" | "report" | undefined,
): void {
const taggedTool = tool as unknown as Record<symbol, unknown>;
const options = taggedTool[BEFORE_TOOL_CALL_DIAGNOSTIC_OPTIONS];
if (options && typeof options === "object") {
(options as BeforeToolCallWrapperOptions).approvalMode = approvalMode;
}
}
export function copyBeforeToolCallHookMarker(source: AnyAgentTool, target: AnyAgentTool): void {
if (!isToolWrappedWithBeforeToolCallHook(source)) {
return;

View File

@@ -1,5 +1,6 @@
import {
isToolWrappedWithBeforeToolCallHook,
setBeforeToolCallApprovalMode,
wrapToolWithBeforeToolCallHook,
} from "../agents/pi-tools.before-tool-call.js";
import type { AnyAgentTool } from "../agents/tools/common.js";
@@ -22,11 +23,12 @@ function resolveJsonSchemaForTool(tool: AnyAgentTool): Record<string, unknown> {
export function createPluginToolsMcpHandlers(tools: AnyAgentTool[]) {
const wrappedTools = tools.map((tool) => {
if (isToolWrappedWithBeforeToolCallHook(tool)) {
setBeforeToolCallApprovalMode(tool, "report");
return tool;
}
// The ACPX MCP bridge should enforce the same pre-execution hook boundary
// as the agent and HTTP tool execution paths.
return wrapToolWithBeforeToolCallHook(tool);
return wrapToolWithBeforeToolCallHook(tool, undefined, { approvalMode: "report" });
});
const toolMap = new Map<string, AnyAgentTool>();
for (const tool of wrappedTools) {

View File

@@ -1,4 +1,5 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { wrapToolWithBeforeToolCallHook } from "../agents/pi-tools.before-tool-call.js";
import type { AnyAgentTool } from "../agents/tools/common.js";
import {
initializeGlobalHookRunner,
@@ -226,7 +227,7 @@ describe("plugin tools MCP server", () => {
expect(failed.content).toEqual([{ type: "text", text: "Tool error: boom" }]);
});
it("blocks tool execution when before_tool_call requires approval on the MCP bridge", async () => {
it("reports approval requirements without opening plugin approvals on the MCP bridge", async () => {
let hookCalls = 0;
const execute = vi.fn().mockResolvedValue({
content: "Stored.",
@@ -248,7 +249,6 @@ describe("plugin tools MCP server", () => {
},
]),
);
callGatewayTool.mockRejectedValueOnce(new Error("gateway unavailable"));
const tool = {
name: "memory_store",
description: "Store memory",
@@ -262,10 +262,46 @@ describe("plugin tools MCP server", () => {
arguments: { text: "remember this" },
});
expect(hookCalls).toBe(1);
expect(callGatewayTool).not.toHaveBeenCalled();
expect(execute).not.toHaveBeenCalled();
expect(result.isError).toBe(true);
expect(result.content).toEqual([
{ type: "text", text: "Tool error: Plugin approval required (gateway unavailable)" },
]);
expect(result.content).toEqual([{ type: "text", text: "Tool error: Approval required" }]);
});
it("switches pre-wrapped plugin tools to approval report mode on the MCP bridge", async () => {
const execute = vi.fn().mockResolvedValue({
content: "Stored.",
});
initializeGlobalHookRunner(
createMockPluginRegistry([
{
hookName: "before_tool_call",
handler: async () => ({
requireApproval: {
pluginId: "test-plugin",
title: "Approval required",
description: "Approval required",
},
}),
},
]),
);
callGatewayTool.mockRejectedValue(new Error("gateway unavailable"));
const tool = wrapToolWithBeforeToolCallHook({
name: "memory_store",
description: "Store memory",
parameters: { type: "object", properties: {} },
execute,
} as unknown as AnyAgentTool);
const handlers = createPluginToolsMcpHandlers([tool]);
const result = await handlers.callTool({
name: "memory_store",
arguments: { text: "remember this" },
});
expect(callGatewayTool).not.toHaveBeenCalled();
expect(execute).not.toHaveBeenCalled();
expect(result.isError).toBe(true);
expect(result.content).toEqual([{ type: "text", text: "Tool error: Approval required" }]);
});
});