mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-06 05:51:15 +08:00
test(qa): require channel scenario markers
This commit is contained in:
@@ -67,6 +67,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Release/CI/E2E: require the Kitchen Sink RPC walk to prove every expected plugin tool is cataloged and effective before invoking tool fixtures.
|
||||
- Release/CI/E2E: stop tracked Docker build commands when centralized build wrappers receive shutdown signals.
|
||||
- Release/CI/E2E: cover MCP channel pairing reconnects by asserting the same temporary client state is reused across reconnects.
|
||||
- Release/CI/E2E: require QA channel baseline and reconnect scenarios to assert their scenario markers instead of accepting any outbound reply.
|
||||
- Release/CI/E2E: fail secret-provider proof runs when temporary state cleanup still fails after retries instead of hiding the cleanup error.
|
||||
- Release/CI/E2E: fail package-candidate ref proofs when temporary source worktree cleanup fails instead of leaving stale worktrees behind.
|
||||
- Release/CI/E2E: remove package tarball extract directories when tar extraction fails before validation can continue.
|
||||
|
||||
@@ -1,7 +1,98 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { createQaBusState } from "./bus-state.js";
|
||||
import { readQaScenarioById } from "./scenario-catalog.js";
|
||||
import { runScenarioFlow } from "./scenario-flow-runner.js";
|
||||
|
||||
type QaFlowStep = {
|
||||
name: string;
|
||||
run: () => Promise<string | void>;
|
||||
};
|
||||
|
||||
function formatTestTranscript(state: ReturnType<typeof createQaBusState>) {
|
||||
return state
|
||||
.getSnapshot()
|
||||
.messages.map((message) => `${message.direction}:${message.conversation.id}:${message.text}`)
|
||||
.join("\n");
|
||||
}
|
||||
|
||||
async function runLoadedScenarioFlow(
|
||||
scenarioId: string,
|
||||
params: {
|
||||
onWaitForOutboundMessage?: (params: {
|
||||
waitCount: number;
|
||||
state: ReturnType<typeof createQaBusState>;
|
||||
}) => void;
|
||||
} = {},
|
||||
) {
|
||||
const scenario = readQaScenarioById(scenarioId);
|
||||
const flow = scenario.execution.flow;
|
||||
if (!flow) {
|
||||
throw new Error(`scenario has no flow: ${scenarioId}`);
|
||||
}
|
||||
|
||||
const state = createQaBusState();
|
||||
let waitCount = 0;
|
||||
const api = {
|
||||
env: {},
|
||||
state,
|
||||
scenario,
|
||||
config: scenario.execution.config ?? {},
|
||||
randomUUID: () => "00000000-0000-4000-8000-000000000000",
|
||||
liveTurnTimeoutMs: (_env: unknown, timeoutMs: number) => timeoutMs,
|
||||
waitForGatewayHealthy: async () => undefined,
|
||||
waitForQaChannelReady: async () => undefined,
|
||||
waitForNoOutbound: async () => undefined,
|
||||
sleep: async () => undefined,
|
||||
reset: async () => {
|
||||
state.reset();
|
||||
},
|
||||
resetBus: async () => {
|
||||
state.reset();
|
||||
},
|
||||
runAgentPrompt: async () => undefined,
|
||||
formatTransportTranscript: formatTestTranscript,
|
||||
waitForOutboundMessage: async (
|
||||
stateLocal: ReturnType<typeof createQaBusState>,
|
||||
predicate: (candidate: unknown) => boolean,
|
||||
timeoutMs: number,
|
||||
options?: { sinceIndex?: number },
|
||||
) => {
|
||||
waitCount += 1;
|
||||
params.onWaitForOutboundMessage?.({ waitCount, state: stateLocal });
|
||||
const match = stateLocal
|
||||
.getSnapshot()
|
||||
.messages.slice(options?.sinceIndex ?? 0)
|
||||
.find((candidate) => predicate(candidate));
|
||||
if (match) {
|
||||
return match;
|
||||
}
|
||||
throw new Error(`timed out after ${timeoutMs}ms waiting for outbound marker`);
|
||||
},
|
||||
runScenario: async (_name: string, steps: QaFlowStep[]) => {
|
||||
const stepResults = [];
|
||||
for (const step of steps) {
|
||||
const details = await step.run();
|
||||
stepResults.push({
|
||||
name: step.name,
|
||||
status: "pass" as const,
|
||||
...(details !== undefined ? { details } : {}),
|
||||
});
|
||||
}
|
||||
return {
|
||||
name: scenario.title,
|
||||
status: "pass" as const,
|
||||
steps: stepResults,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
return await runScenarioFlow({
|
||||
api,
|
||||
scenarioTitle: scenario.title,
|
||||
flow,
|
||||
});
|
||||
}
|
||||
|
||||
describe("scenario-flow-runner", () => {
|
||||
it("supports qaImport inside flow expressions", async () => {
|
||||
const result = await runScenarioFlow({
|
||||
@@ -221,4 +312,78 @@ describe("scenario-flow-runner", () => {
|
||||
expect(result.status).toBe("pass");
|
||||
expect(result.steps[0]?.details).toBe("QA_CODEX_PLUGIN_TURN_OK");
|
||||
});
|
||||
|
||||
it.each([
|
||||
{
|
||||
scenarioId: "channel-chat-baseline",
|
||||
to: "channel:qa-room",
|
||||
text: "generic shared-channel reply without the required marker",
|
||||
},
|
||||
{
|
||||
scenarioId: "dm-chat-baseline",
|
||||
to: "dm:alice",
|
||||
text: "generic DM reply without the required marker",
|
||||
},
|
||||
])("rejects unmarked outbound replies for $scenarioId", async ({ scenarioId, to, text }) => {
|
||||
await expect(
|
||||
runLoadedScenarioFlow(scenarioId, {
|
||||
onWaitForOutboundMessage: ({ state }) => {
|
||||
state.addOutboundMessage({
|
||||
accountId: "qa-channel",
|
||||
to,
|
||||
text,
|
||||
});
|
||||
},
|
||||
}),
|
||||
).rejects.toThrow("waiting for outbound marker");
|
||||
});
|
||||
|
||||
it("rejects reconnect follow-up replies that replay the first marker", async () => {
|
||||
await expect(
|
||||
runLoadedScenarioFlow("qa-channel-reconnect-dedupe", {
|
||||
onWaitForOutboundMessage: ({ waitCount, state }) => {
|
||||
if (waitCount === 1) {
|
||||
state.addOutboundMessage({
|
||||
accountId: "qa-channel",
|
||||
to: "channel:qa-room",
|
||||
text: "RECONNECT-FIRST-OK",
|
||||
});
|
||||
return;
|
||||
}
|
||||
state.addOutboundMessage({
|
||||
accountId: "qa-channel",
|
||||
to: "channel:qa-room",
|
||||
text: "RECONNECT-FIRST-OK",
|
||||
});
|
||||
},
|
||||
}),
|
||||
).rejects.toThrow("waiting for outbound marker");
|
||||
});
|
||||
|
||||
it("rejects reconnect follow-up turns with extra unmarked outbound replies", async () => {
|
||||
await expect(
|
||||
runLoadedScenarioFlow("qa-channel-reconnect-dedupe", {
|
||||
onWaitForOutboundMessage: ({ waitCount, state }) => {
|
||||
if (waitCount === 1) {
|
||||
state.addOutboundMessage({
|
||||
accountId: "qa-channel",
|
||||
to: "channel:qa-room",
|
||||
text: "RECONNECT-FIRST-OK",
|
||||
});
|
||||
return;
|
||||
}
|
||||
state.addOutboundMessage({
|
||||
accountId: "qa-channel",
|
||||
to: "channel:qa-room",
|
||||
text: "RECONNECT-SECOND-OK",
|
||||
});
|
||||
state.addOutboundMessage({
|
||||
accountId: "qa-channel",
|
||||
to: "channel:qa-room",
|
||||
text: "unmarked duplicate delivery",
|
||||
});
|
||||
},
|
||||
}),
|
||||
).rejects.toThrow("exactly one marked post-restart reply");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -12,6 +12,7 @@ coverage:
|
||||
objective: Verify the QA agent can respond correctly in a shared channel and respect mention-driven group semantics.
|
||||
successCriteria:
|
||||
- Agent replies in the shared channel transcript.
|
||||
- Agent visible reply contains the scenario marker.
|
||||
- Agent keeps the conversation scoped to the channel.
|
||||
- Agent respects mention-driven group routing semantics.
|
||||
docsRefs:
|
||||
@@ -24,7 +25,8 @@ execution:
|
||||
kind: flow
|
||||
summary: Verify the QA agent can respond correctly in a shared channel and respect mention-driven group semantics.
|
||||
config:
|
||||
mentionPrompt: "@openclaw explain the QA lab"
|
||||
expectedMarker: QA-CHANNEL-BASELINE-OK
|
||||
mentionPrompt: "@openclaw qa channel baseline marker check. Reply exactly: QA-CHANNEL-BASELINE-OK"
|
||||
```
|
||||
|
||||
```yaml qa-flow
|
||||
@@ -78,7 +80,14 @@ steps:
|
||||
- ref: state
|
||||
- lambda:
|
||||
params: [candidate]
|
||||
expr: "candidate.conversation.id === 'qa-room' && !candidate.threadId"
|
||||
expr: "candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room' && candidate.conversation.kind === 'channel' && !candidate.threadId && String(candidate.text ?? '').includes(config.expectedMarker)"
|
||||
- expr: liveTurnTimeoutMs(env, 180000)
|
||||
- set: matchingOutbound
|
||||
value:
|
||||
expr: "state.getSnapshot().messages.filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room' && candidate.conversation.kind === 'channel' && String(candidate.text ?? '').includes(config.expectedMarker))"
|
||||
- assert:
|
||||
expr: matchingOutbound.length === 1
|
||||
message:
|
||||
expr: "`expected exactly one channel baseline marker reply, saw ${matchingOutbound.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
|
||||
detailsExpr: message.text
|
||||
```
|
||||
|
||||
@@ -12,6 +12,7 @@ coverage:
|
||||
objective: Verify the QA agent can chat coherently in a DM, explain the QA setup, and stay in character.
|
||||
successCriteria:
|
||||
- Agent replies in DM without channel routing mistakes.
|
||||
- Agent visible reply contains the scenario marker.
|
||||
- Agent explains the QA lab and message bus correctly.
|
||||
- Agent keeps the dev C-3PO personality.
|
||||
docsRefs:
|
||||
@@ -24,7 +25,8 @@ execution:
|
||||
kind: flow
|
||||
summary: Verify the QA agent can chat coherently in a DM, explain the QA setup, and stay in character.
|
||||
config:
|
||||
prompt: "Hello there, who are you?"
|
||||
expectedMarker: QA-DM-BASELINE-OK
|
||||
prompt: "DM baseline marker check. Include exact marker: `QA-DM-BASELINE-OK` and briefly identify the QA lab message bus."
|
||||
```
|
||||
|
||||
```yaml qa-flow
|
||||
@@ -47,7 +49,14 @@ steps:
|
||||
- ref: state
|
||||
- lambda:
|
||||
params: [candidate]
|
||||
expr: "candidate.conversation.id === 'alice'"
|
||||
expr: "candidate.direction === 'outbound' && candidate.conversation.id === 'alice' && candidate.conversation.kind === 'direct' && String(candidate.text ?? '').includes(config.expectedMarker)"
|
||||
- expr: liveTurnTimeoutMs(env, 45000)
|
||||
- set: matchingOutbound
|
||||
value:
|
||||
expr: "state.getSnapshot().messages.filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'alice' && candidate.conversation.kind === 'direct' && String(candidate.text ?? '').includes(config.expectedMarker))"
|
||||
- assert:
|
||||
expr: matchingOutbound.length === 1
|
||||
message:
|
||||
expr: "`expected exactly one DM baseline marker reply, saw ${matchingOutbound.length}; transcript=${formatTransportTranscript(state, { conversationId: 'alice' })}`"
|
||||
detailsExpr: outbound.text
|
||||
```
|
||||
|
||||
@@ -64,7 +64,7 @@ steps:
|
||||
- ref: state
|
||||
- lambda:
|
||||
params: [candidate]
|
||||
expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound'"
|
||||
expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound' && String(candidate.text ?? '').includes(config.firstMarker)"
|
||||
- expr: liveTurnTimeoutMs(env, 60000)
|
||||
- set: beforeRestartCursor
|
||||
value:
|
||||
@@ -80,9 +80,9 @@ steps:
|
||||
value:
|
||||
expr: "state.getSnapshot().messages.filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')"
|
||||
- assert:
|
||||
expr: "firstMatchesBeforeFollowup.length === 1"
|
||||
expr: "firstMatchesBeforeFollowup.length === 1 && String(firstMatchesBeforeFollowup[0]?.text ?? '').includes(config.firstMarker)"
|
||||
message:
|
||||
expr: "`readiness cycle replayed first reply ${firstMatchesBeforeFollowup.length} times; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
|
||||
expr: "`readiness cycle should preserve exactly one marked first reply, saw ${firstMatchesBeforeFollowup.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
|
||||
- call: runAgentPrompt
|
||||
args:
|
||||
- ref: env
|
||||
@@ -99,7 +99,7 @@ steps:
|
||||
- ref: state
|
||||
- lambda:
|
||||
params: [candidate]
|
||||
expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound'"
|
||||
expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound' && String(candidate.text ?? '').includes(config.secondMarker)"
|
||||
- expr: liveTurnTimeoutMs(env, 60000)
|
||||
- sinceIndex:
|
||||
ref: beforeRestartCursor
|
||||
@@ -108,13 +108,16 @@ steps:
|
||||
expr: state.getSnapshot()
|
||||
- set: firstMatches
|
||||
value:
|
||||
expr: "snapshot.messages.slice(0, beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')"
|
||||
expr: "snapshot.messages.slice(0, beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room' && String(candidate.text ?? '').includes(config.firstMarker))"
|
||||
- set: secondMatches
|
||||
value:
|
||||
expr: "snapshot.messages.slice(beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room' && String(candidate.text ?? '').includes(config.secondMarker))"
|
||||
- set: postRestartOutbounds
|
||||
value:
|
||||
expr: "snapshot.messages.slice(beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')"
|
||||
- assert:
|
||||
expr: "firstMatches.length === 1 && secondMatches.length === 1"
|
||||
expr: "firstMatches.length === 1 && secondMatches.length === 1 && postRestartOutbounds.length === 1 && !postRestartOutbounds.some((candidate) => String(candidate.text ?? '').includes(config.firstMarker))"
|
||||
message:
|
||||
expr: "`expected one pre-restart and one post-restart reply; first=${firstMatches.length} second=${secondMatches.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
|
||||
expr: "`expected one marked pre-restart reply and exactly one marked post-restart reply without replaying the first marker; first=${firstMatches.length} second=${secondMatches.length} post=${postRestartOutbounds.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
|
||||
detailsExpr: "`before=${firstOutbound.text}\\nafter=${secondOutbound.text}`"
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user