test(qa): require channel scenario markers

This commit is contained in:
Vincent Koc
2026-06-03 14:19:33 +02:00
parent 2fa60af960
commit a9f099d279
5 changed files with 198 additions and 11 deletions

View File

@@ -67,6 +67,7 @@ Docs: https://docs.openclaw.ai
- Release/CI/E2E: require the Kitchen Sink RPC walk to prove every expected plugin tool is cataloged and effective before invoking tool fixtures.
- Release/CI/E2E: stop tracked Docker build commands when centralized build wrappers receive shutdown signals.
- Release/CI/E2E: cover MCP channel pairing reconnects by asserting the same temporary client state is reused across reconnects.
- Release/CI/E2E: require QA channel baseline and reconnect scenarios to assert their scenario markers instead of accepting any outbound reply.
- Release/CI/E2E: fail secret-provider proof runs when temporary state cleanup still fails after retries instead of hiding the cleanup error.
- Release/CI/E2E: fail package-candidate ref proofs when temporary source worktree cleanup fails instead of leaving stale worktrees behind.
- Release/CI/E2E: remove package tarball extract directories when tar extraction fails before validation can continue.

View File

@@ -1,7 +1,98 @@
import { describe, expect, it } from "vitest";
import { createQaBusState } from "./bus-state.js";
import { readQaScenarioById } from "./scenario-catalog.js";
import { runScenarioFlow } from "./scenario-flow-runner.js";
type QaFlowStep = {
name: string;
run: () => Promise<string | void>;
};
function formatTestTranscript(state: ReturnType<typeof createQaBusState>) {
return state
.getSnapshot()
.messages.map((message) => `${message.direction}:${message.conversation.id}:${message.text}`)
.join("\n");
}
async function runLoadedScenarioFlow(
scenarioId: string,
params: {
onWaitForOutboundMessage?: (params: {
waitCount: number;
state: ReturnType<typeof createQaBusState>;
}) => void;
} = {},
) {
const scenario = readQaScenarioById(scenarioId);
const flow = scenario.execution.flow;
if (!flow) {
throw new Error(`scenario has no flow: ${scenarioId}`);
}
const state = createQaBusState();
let waitCount = 0;
const api = {
env: {},
state,
scenario,
config: scenario.execution.config ?? {},
randomUUID: () => "00000000-0000-4000-8000-000000000000",
liveTurnTimeoutMs: (_env: unknown, timeoutMs: number) => timeoutMs,
waitForGatewayHealthy: async () => undefined,
waitForQaChannelReady: async () => undefined,
waitForNoOutbound: async () => undefined,
sleep: async () => undefined,
reset: async () => {
state.reset();
},
resetBus: async () => {
state.reset();
},
runAgentPrompt: async () => undefined,
formatTransportTranscript: formatTestTranscript,
waitForOutboundMessage: async (
stateLocal: ReturnType<typeof createQaBusState>,
predicate: (candidate: unknown) => boolean,
timeoutMs: number,
options?: { sinceIndex?: number },
) => {
waitCount += 1;
params.onWaitForOutboundMessage?.({ waitCount, state: stateLocal });
const match = stateLocal
.getSnapshot()
.messages.slice(options?.sinceIndex ?? 0)
.find((candidate) => predicate(candidate));
if (match) {
return match;
}
throw new Error(`timed out after ${timeoutMs}ms waiting for outbound marker`);
},
runScenario: async (_name: string, steps: QaFlowStep[]) => {
const stepResults = [];
for (const step of steps) {
const details = await step.run();
stepResults.push({
name: step.name,
status: "pass" as const,
...(details !== undefined ? { details } : {}),
});
}
return {
name: scenario.title,
status: "pass" as const,
steps: stepResults,
};
},
};
return await runScenarioFlow({
api,
scenarioTitle: scenario.title,
flow,
});
}
describe("scenario-flow-runner", () => {
it("supports qaImport inside flow expressions", async () => {
const result = await runScenarioFlow({
@@ -221,4 +312,78 @@ describe("scenario-flow-runner", () => {
expect(result.status).toBe("pass");
expect(result.steps[0]?.details).toBe("QA_CODEX_PLUGIN_TURN_OK");
});
it.each([
{
scenarioId: "channel-chat-baseline",
to: "channel:qa-room",
text: "generic shared-channel reply without the required marker",
},
{
scenarioId: "dm-chat-baseline",
to: "dm:alice",
text: "generic DM reply without the required marker",
},
])("rejects unmarked outbound replies for $scenarioId", async ({ scenarioId, to, text }) => {
await expect(
runLoadedScenarioFlow(scenarioId, {
onWaitForOutboundMessage: ({ state }) => {
state.addOutboundMessage({
accountId: "qa-channel",
to,
text,
});
},
}),
).rejects.toThrow("waiting for outbound marker");
});
it("rejects reconnect follow-up replies that replay the first marker", async () => {
await expect(
runLoadedScenarioFlow("qa-channel-reconnect-dedupe", {
onWaitForOutboundMessage: ({ waitCount, state }) => {
if (waitCount === 1) {
state.addOutboundMessage({
accountId: "qa-channel",
to: "channel:qa-room",
text: "RECONNECT-FIRST-OK",
});
return;
}
state.addOutboundMessage({
accountId: "qa-channel",
to: "channel:qa-room",
text: "RECONNECT-FIRST-OK",
});
},
}),
).rejects.toThrow("waiting for outbound marker");
});
it("rejects reconnect follow-up turns with extra unmarked outbound replies", async () => {
await expect(
runLoadedScenarioFlow("qa-channel-reconnect-dedupe", {
onWaitForOutboundMessage: ({ waitCount, state }) => {
if (waitCount === 1) {
state.addOutboundMessage({
accountId: "qa-channel",
to: "channel:qa-room",
text: "RECONNECT-FIRST-OK",
});
return;
}
state.addOutboundMessage({
accountId: "qa-channel",
to: "channel:qa-room",
text: "RECONNECT-SECOND-OK",
});
state.addOutboundMessage({
accountId: "qa-channel",
to: "channel:qa-room",
text: "unmarked duplicate delivery",
});
},
}),
).rejects.toThrow("exactly one marked post-restart reply");
});
});

View File

@@ -12,6 +12,7 @@ coverage:
objective: Verify the QA agent can respond correctly in a shared channel and respect mention-driven group semantics.
successCriteria:
- Agent replies in the shared channel transcript.
- Agent visible reply contains the scenario marker.
- Agent keeps the conversation scoped to the channel.
- Agent respects mention-driven group routing semantics.
docsRefs:
@@ -24,7 +25,8 @@ execution:
kind: flow
summary: Verify the QA agent can respond correctly in a shared channel and respect mention-driven group semantics.
config:
mentionPrompt: "@openclaw explain the QA lab"
expectedMarker: QA-CHANNEL-BASELINE-OK
mentionPrompt: "@openclaw qa channel baseline marker check. Reply exactly: QA-CHANNEL-BASELINE-OK"
```
```yaml qa-flow
@@ -78,7 +80,14 @@ steps:
- ref: state
- lambda:
params: [candidate]
expr: "candidate.conversation.id === 'qa-room' && !candidate.threadId"
expr: "candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room' && candidate.conversation.kind === 'channel' && !candidate.threadId && String(candidate.text ?? '').includes(config.expectedMarker)"
- expr: liveTurnTimeoutMs(env, 180000)
- set: matchingOutbound
value:
expr: "state.getSnapshot().messages.filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room' && candidate.conversation.kind === 'channel' && String(candidate.text ?? '').includes(config.expectedMarker))"
- assert:
expr: matchingOutbound.length === 1
message:
expr: "`expected exactly one channel baseline marker reply, saw ${matchingOutbound.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
detailsExpr: message.text
```

View File

@@ -12,6 +12,7 @@ coverage:
objective: Verify the QA agent can chat coherently in a DM, explain the QA setup, and stay in character.
successCriteria:
- Agent replies in DM without channel routing mistakes.
- Agent visible reply contains the scenario marker.
- Agent explains the QA lab and message bus correctly.
- Agent keeps the dev C-3PO personality.
docsRefs:
@@ -24,7 +25,8 @@ execution:
kind: flow
summary: Verify the QA agent can chat coherently in a DM, explain the QA setup, and stay in character.
config:
prompt: "Hello there, who are you?"
expectedMarker: QA-DM-BASELINE-OK
prompt: "DM baseline marker check. Include exact marker: `QA-DM-BASELINE-OK` and briefly identify the QA lab message bus."
```
```yaml qa-flow
@@ -47,7 +49,14 @@ steps:
- ref: state
- lambda:
params: [candidate]
expr: "candidate.conversation.id === 'alice'"
expr: "candidate.direction === 'outbound' && candidate.conversation.id === 'alice' && candidate.conversation.kind === 'direct' && String(candidate.text ?? '').includes(config.expectedMarker)"
- expr: liveTurnTimeoutMs(env, 45000)
- set: matchingOutbound
value:
expr: "state.getSnapshot().messages.filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'alice' && candidate.conversation.kind === 'direct' && String(candidate.text ?? '').includes(config.expectedMarker))"
- assert:
expr: matchingOutbound.length === 1
message:
expr: "`expected exactly one DM baseline marker reply, saw ${matchingOutbound.length}; transcript=${formatTransportTranscript(state, { conversationId: 'alice' })}`"
detailsExpr: outbound.text
```

View File

@@ -64,7 +64,7 @@ steps:
- ref: state
- lambda:
params: [candidate]
expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound'"
expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound' && String(candidate.text ?? '').includes(config.firstMarker)"
- expr: liveTurnTimeoutMs(env, 60000)
- set: beforeRestartCursor
value:
@@ -80,9 +80,9 @@ steps:
value:
expr: "state.getSnapshot().messages.filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')"
- assert:
expr: "firstMatchesBeforeFollowup.length === 1"
expr: "firstMatchesBeforeFollowup.length === 1 && String(firstMatchesBeforeFollowup[0]?.text ?? '').includes(config.firstMarker)"
message:
expr: "`readiness cycle replayed first reply ${firstMatchesBeforeFollowup.length} times; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
expr: "`readiness cycle should preserve exactly one marked first reply, saw ${firstMatchesBeforeFollowup.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
- call: runAgentPrompt
args:
- ref: env
@@ -99,7 +99,7 @@ steps:
- ref: state
- lambda:
params: [candidate]
expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound'"
expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound' && String(candidate.text ?? '').includes(config.secondMarker)"
- expr: liveTurnTimeoutMs(env, 60000)
- sinceIndex:
ref: beforeRestartCursor
@@ -108,13 +108,16 @@ steps:
expr: state.getSnapshot()
- set: firstMatches
value:
expr: "snapshot.messages.slice(0, beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')"
expr: "snapshot.messages.slice(0, beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room' && String(candidate.text ?? '').includes(config.firstMarker))"
- set: secondMatches
value:
expr: "snapshot.messages.slice(beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room' && String(candidate.text ?? '').includes(config.secondMarker))"
- set: postRestartOutbounds
value:
expr: "snapshot.messages.slice(beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')"
- assert:
expr: "firstMatches.length === 1 && secondMatches.length === 1"
expr: "firstMatches.length === 1 && secondMatches.length === 1 && postRestartOutbounds.length === 1 && !postRestartOutbounds.some((candidate) => String(candidate.text ?? '').includes(config.firstMarker))"
message:
expr: "`expected one pre-restart and one post-restart reply; first=${firstMatches.length} second=${secondMatches.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
expr: "`expected one marked pre-restart reply and exactly one marked post-restart reply without replaying the first marker; first=${firstMatches.length} second=${secondMatches.length} post=${postRestartOutbounds.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
detailsExpr: "`before=${firstOutbound.text}\\nafter=${secondOutbound.text}`"
```