mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-06 05:51:15 +08:00
test(qa): require channel scenario markers
This commit is contained in:
@@ -67,6 +67,7 @@ Docs: https://docs.openclaw.ai
|
|||||||
- Release/CI/E2E: require the Kitchen Sink RPC walk to prove every expected plugin tool is cataloged and effective before invoking tool fixtures.
|
- Release/CI/E2E: require the Kitchen Sink RPC walk to prove every expected plugin tool is cataloged and effective before invoking tool fixtures.
|
||||||
- Release/CI/E2E: stop tracked Docker build commands when centralized build wrappers receive shutdown signals.
|
- Release/CI/E2E: stop tracked Docker build commands when centralized build wrappers receive shutdown signals.
|
||||||
- Release/CI/E2E: cover MCP channel pairing reconnects by asserting the same temporary client state is reused across reconnects.
|
- Release/CI/E2E: cover MCP channel pairing reconnects by asserting the same temporary client state is reused across reconnects.
|
||||||
|
- Release/CI/E2E: require QA channel baseline and reconnect scenarios to assert their scenario markers instead of accepting any outbound reply.
|
||||||
- Release/CI/E2E: fail secret-provider proof runs when temporary state cleanup still fails after retries instead of hiding the cleanup error.
|
- Release/CI/E2E: fail secret-provider proof runs when temporary state cleanup still fails after retries instead of hiding the cleanup error.
|
||||||
- Release/CI/E2E: fail package-candidate ref proofs when temporary source worktree cleanup fails instead of leaving stale worktrees behind.
|
- Release/CI/E2E: fail package-candidate ref proofs when temporary source worktree cleanup fails instead of leaving stale worktrees behind.
|
||||||
- Release/CI/E2E: remove package tarball extract directories when tar extraction fails before validation can continue.
|
- Release/CI/E2E: remove package tarball extract directories when tar extraction fails before validation can continue.
|
||||||
|
|||||||
@@ -1,7 +1,98 @@
|
|||||||
import { describe, expect, it } from "vitest";
|
import { describe, expect, it } from "vitest";
|
||||||
import { createQaBusState } from "./bus-state.js";
|
import { createQaBusState } from "./bus-state.js";
|
||||||
|
import { readQaScenarioById } from "./scenario-catalog.js";
|
||||||
import { runScenarioFlow } from "./scenario-flow-runner.js";
|
import { runScenarioFlow } from "./scenario-flow-runner.js";
|
||||||
|
|
||||||
|
type QaFlowStep = {
|
||||||
|
name: string;
|
||||||
|
run: () => Promise<string | void>;
|
||||||
|
};
|
||||||
|
|
||||||
|
function formatTestTranscript(state: ReturnType<typeof createQaBusState>) {
|
||||||
|
return state
|
||||||
|
.getSnapshot()
|
||||||
|
.messages.map((message) => `${message.direction}:${message.conversation.id}:${message.text}`)
|
||||||
|
.join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runLoadedScenarioFlow(
|
||||||
|
scenarioId: string,
|
||||||
|
params: {
|
||||||
|
onWaitForOutboundMessage?: (params: {
|
||||||
|
waitCount: number;
|
||||||
|
state: ReturnType<typeof createQaBusState>;
|
||||||
|
}) => void;
|
||||||
|
} = {},
|
||||||
|
) {
|
||||||
|
const scenario = readQaScenarioById(scenarioId);
|
||||||
|
const flow = scenario.execution.flow;
|
||||||
|
if (!flow) {
|
||||||
|
throw new Error(`scenario has no flow: ${scenarioId}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const state = createQaBusState();
|
||||||
|
let waitCount = 0;
|
||||||
|
const api = {
|
||||||
|
env: {},
|
||||||
|
state,
|
||||||
|
scenario,
|
||||||
|
config: scenario.execution.config ?? {},
|
||||||
|
randomUUID: () => "00000000-0000-4000-8000-000000000000",
|
||||||
|
liveTurnTimeoutMs: (_env: unknown, timeoutMs: number) => timeoutMs,
|
||||||
|
waitForGatewayHealthy: async () => undefined,
|
||||||
|
waitForQaChannelReady: async () => undefined,
|
||||||
|
waitForNoOutbound: async () => undefined,
|
||||||
|
sleep: async () => undefined,
|
||||||
|
reset: async () => {
|
||||||
|
state.reset();
|
||||||
|
},
|
||||||
|
resetBus: async () => {
|
||||||
|
state.reset();
|
||||||
|
},
|
||||||
|
runAgentPrompt: async () => undefined,
|
||||||
|
formatTransportTranscript: formatTestTranscript,
|
||||||
|
waitForOutboundMessage: async (
|
||||||
|
stateLocal: ReturnType<typeof createQaBusState>,
|
||||||
|
predicate: (candidate: unknown) => boolean,
|
||||||
|
timeoutMs: number,
|
||||||
|
options?: { sinceIndex?: number },
|
||||||
|
) => {
|
||||||
|
waitCount += 1;
|
||||||
|
params.onWaitForOutboundMessage?.({ waitCount, state: stateLocal });
|
||||||
|
const match = stateLocal
|
||||||
|
.getSnapshot()
|
||||||
|
.messages.slice(options?.sinceIndex ?? 0)
|
||||||
|
.find((candidate) => predicate(candidate));
|
||||||
|
if (match) {
|
||||||
|
return match;
|
||||||
|
}
|
||||||
|
throw new Error(`timed out after ${timeoutMs}ms waiting for outbound marker`);
|
||||||
|
},
|
||||||
|
runScenario: async (_name: string, steps: QaFlowStep[]) => {
|
||||||
|
const stepResults = [];
|
||||||
|
for (const step of steps) {
|
||||||
|
const details = await step.run();
|
||||||
|
stepResults.push({
|
||||||
|
name: step.name,
|
||||||
|
status: "pass" as const,
|
||||||
|
...(details !== undefined ? { details } : {}),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
name: scenario.title,
|
||||||
|
status: "pass" as const,
|
||||||
|
steps: stepResults,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
return await runScenarioFlow({
|
||||||
|
api,
|
||||||
|
scenarioTitle: scenario.title,
|
||||||
|
flow,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
describe("scenario-flow-runner", () => {
|
describe("scenario-flow-runner", () => {
|
||||||
it("supports qaImport inside flow expressions", async () => {
|
it("supports qaImport inside flow expressions", async () => {
|
||||||
const result = await runScenarioFlow({
|
const result = await runScenarioFlow({
|
||||||
@@ -221,4 +312,78 @@ describe("scenario-flow-runner", () => {
|
|||||||
expect(result.status).toBe("pass");
|
expect(result.status).toBe("pass");
|
||||||
expect(result.steps[0]?.details).toBe("QA_CODEX_PLUGIN_TURN_OK");
|
expect(result.steps[0]?.details).toBe("QA_CODEX_PLUGIN_TURN_OK");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it.each([
|
||||||
|
{
|
||||||
|
scenarioId: "channel-chat-baseline",
|
||||||
|
to: "channel:qa-room",
|
||||||
|
text: "generic shared-channel reply without the required marker",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
scenarioId: "dm-chat-baseline",
|
||||||
|
to: "dm:alice",
|
||||||
|
text: "generic DM reply without the required marker",
|
||||||
|
},
|
||||||
|
])("rejects unmarked outbound replies for $scenarioId", async ({ scenarioId, to, text }) => {
|
||||||
|
await expect(
|
||||||
|
runLoadedScenarioFlow(scenarioId, {
|
||||||
|
onWaitForOutboundMessage: ({ state }) => {
|
||||||
|
state.addOutboundMessage({
|
||||||
|
accountId: "qa-channel",
|
||||||
|
to,
|
||||||
|
text,
|
||||||
|
});
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
).rejects.toThrow("waiting for outbound marker");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rejects reconnect follow-up replies that replay the first marker", async () => {
|
||||||
|
await expect(
|
||||||
|
runLoadedScenarioFlow("qa-channel-reconnect-dedupe", {
|
||||||
|
onWaitForOutboundMessage: ({ waitCount, state }) => {
|
||||||
|
if (waitCount === 1) {
|
||||||
|
state.addOutboundMessage({
|
||||||
|
accountId: "qa-channel",
|
||||||
|
to: "channel:qa-room",
|
||||||
|
text: "RECONNECT-FIRST-OK",
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
state.addOutboundMessage({
|
||||||
|
accountId: "qa-channel",
|
||||||
|
to: "channel:qa-room",
|
||||||
|
text: "RECONNECT-FIRST-OK",
|
||||||
|
});
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
).rejects.toThrow("waiting for outbound marker");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rejects reconnect follow-up turns with extra unmarked outbound replies", async () => {
|
||||||
|
await expect(
|
||||||
|
runLoadedScenarioFlow("qa-channel-reconnect-dedupe", {
|
||||||
|
onWaitForOutboundMessage: ({ waitCount, state }) => {
|
||||||
|
if (waitCount === 1) {
|
||||||
|
state.addOutboundMessage({
|
||||||
|
accountId: "qa-channel",
|
||||||
|
to: "channel:qa-room",
|
||||||
|
text: "RECONNECT-FIRST-OK",
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
state.addOutboundMessage({
|
||||||
|
accountId: "qa-channel",
|
||||||
|
to: "channel:qa-room",
|
||||||
|
text: "RECONNECT-SECOND-OK",
|
||||||
|
});
|
||||||
|
state.addOutboundMessage({
|
||||||
|
accountId: "qa-channel",
|
||||||
|
to: "channel:qa-room",
|
||||||
|
text: "unmarked duplicate delivery",
|
||||||
|
});
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
).rejects.toThrow("exactly one marked post-restart reply");
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ coverage:
|
|||||||
objective: Verify the QA agent can respond correctly in a shared channel and respect mention-driven group semantics.
|
objective: Verify the QA agent can respond correctly in a shared channel and respect mention-driven group semantics.
|
||||||
successCriteria:
|
successCriteria:
|
||||||
- Agent replies in the shared channel transcript.
|
- Agent replies in the shared channel transcript.
|
||||||
|
- Agent visible reply contains the scenario marker.
|
||||||
- Agent keeps the conversation scoped to the channel.
|
- Agent keeps the conversation scoped to the channel.
|
||||||
- Agent respects mention-driven group routing semantics.
|
- Agent respects mention-driven group routing semantics.
|
||||||
docsRefs:
|
docsRefs:
|
||||||
@@ -24,7 +25,8 @@ execution:
|
|||||||
kind: flow
|
kind: flow
|
||||||
summary: Verify the QA agent can respond correctly in a shared channel and respect mention-driven group semantics.
|
summary: Verify the QA agent can respond correctly in a shared channel and respect mention-driven group semantics.
|
||||||
config:
|
config:
|
||||||
mentionPrompt: "@openclaw explain the QA lab"
|
expectedMarker: QA-CHANNEL-BASELINE-OK
|
||||||
|
mentionPrompt: "@openclaw qa channel baseline marker check. Reply exactly: QA-CHANNEL-BASELINE-OK"
|
||||||
```
|
```
|
||||||
|
|
||||||
```yaml qa-flow
|
```yaml qa-flow
|
||||||
@@ -78,7 +80,14 @@ steps:
|
|||||||
- ref: state
|
- ref: state
|
||||||
- lambda:
|
- lambda:
|
||||||
params: [candidate]
|
params: [candidate]
|
||||||
expr: "candidate.conversation.id === 'qa-room' && !candidate.threadId"
|
expr: "candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room' && candidate.conversation.kind === 'channel' && !candidate.threadId && String(candidate.text ?? '').includes(config.expectedMarker)"
|
||||||
- expr: liveTurnTimeoutMs(env, 180000)
|
- expr: liveTurnTimeoutMs(env, 180000)
|
||||||
|
- set: matchingOutbound
|
||||||
|
value:
|
||||||
|
expr: "state.getSnapshot().messages.filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room' && candidate.conversation.kind === 'channel' && String(candidate.text ?? '').includes(config.expectedMarker))"
|
||||||
|
- assert:
|
||||||
|
expr: matchingOutbound.length === 1
|
||||||
|
message:
|
||||||
|
expr: "`expected exactly one channel baseline marker reply, saw ${matchingOutbound.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
|
||||||
detailsExpr: message.text
|
detailsExpr: message.text
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ coverage:
|
|||||||
objective: Verify the QA agent can chat coherently in a DM, explain the QA setup, and stay in character.
|
objective: Verify the QA agent can chat coherently in a DM, explain the QA setup, and stay in character.
|
||||||
successCriteria:
|
successCriteria:
|
||||||
- Agent replies in DM without channel routing mistakes.
|
- Agent replies in DM without channel routing mistakes.
|
||||||
|
- Agent visible reply contains the scenario marker.
|
||||||
- Agent explains the QA lab and message bus correctly.
|
- Agent explains the QA lab and message bus correctly.
|
||||||
- Agent keeps the dev C-3PO personality.
|
- Agent keeps the dev C-3PO personality.
|
||||||
docsRefs:
|
docsRefs:
|
||||||
@@ -24,7 +25,8 @@ execution:
|
|||||||
kind: flow
|
kind: flow
|
||||||
summary: Verify the QA agent can chat coherently in a DM, explain the QA setup, and stay in character.
|
summary: Verify the QA agent can chat coherently in a DM, explain the QA setup, and stay in character.
|
||||||
config:
|
config:
|
||||||
prompt: "Hello there, who are you?"
|
expectedMarker: QA-DM-BASELINE-OK
|
||||||
|
prompt: "DM baseline marker check. Include exact marker: `QA-DM-BASELINE-OK` and briefly identify the QA lab message bus."
|
||||||
```
|
```
|
||||||
|
|
||||||
```yaml qa-flow
|
```yaml qa-flow
|
||||||
@@ -47,7 +49,14 @@ steps:
|
|||||||
- ref: state
|
- ref: state
|
||||||
- lambda:
|
- lambda:
|
||||||
params: [candidate]
|
params: [candidate]
|
||||||
expr: "candidate.conversation.id === 'alice'"
|
expr: "candidate.direction === 'outbound' && candidate.conversation.id === 'alice' && candidate.conversation.kind === 'direct' && String(candidate.text ?? '').includes(config.expectedMarker)"
|
||||||
- expr: liveTurnTimeoutMs(env, 45000)
|
- expr: liveTurnTimeoutMs(env, 45000)
|
||||||
|
- set: matchingOutbound
|
||||||
|
value:
|
||||||
|
expr: "state.getSnapshot().messages.filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'alice' && candidate.conversation.kind === 'direct' && String(candidate.text ?? '').includes(config.expectedMarker))"
|
||||||
|
- assert:
|
||||||
|
expr: matchingOutbound.length === 1
|
||||||
|
message:
|
||||||
|
expr: "`expected exactly one DM baseline marker reply, saw ${matchingOutbound.length}; transcript=${formatTransportTranscript(state, { conversationId: 'alice' })}`"
|
||||||
detailsExpr: outbound.text
|
detailsExpr: outbound.text
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -64,7 +64,7 @@ steps:
|
|||||||
- ref: state
|
- ref: state
|
||||||
- lambda:
|
- lambda:
|
||||||
params: [candidate]
|
params: [candidate]
|
||||||
expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound'"
|
expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound' && String(candidate.text ?? '').includes(config.firstMarker)"
|
||||||
- expr: liveTurnTimeoutMs(env, 60000)
|
- expr: liveTurnTimeoutMs(env, 60000)
|
||||||
- set: beforeRestartCursor
|
- set: beforeRestartCursor
|
||||||
value:
|
value:
|
||||||
@@ -80,9 +80,9 @@ steps:
|
|||||||
value:
|
value:
|
||||||
expr: "state.getSnapshot().messages.filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')"
|
expr: "state.getSnapshot().messages.filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')"
|
||||||
- assert:
|
- assert:
|
||||||
expr: "firstMatchesBeforeFollowup.length === 1"
|
expr: "firstMatchesBeforeFollowup.length === 1 && String(firstMatchesBeforeFollowup[0]?.text ?? '').includes(config.firstMarker)"
|
||||||
message:
|
message:
|
||||||
expr: "`readiness cycle replayed first reply ${firstMatchesBeforeFollowup.length} times; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
|
expr: "`readiness cycle should preserve exactly one marked first reply, saw ${firstMatchesBeforeFollowup.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
|
||||||
- call: runAgentPrompt
|
- call: runAgentPrompt
|
||||||
args:
|
args:
|
||||||
- ref: env
|
- ref: env
|
||||||
@@ -99,7 +99,7 @@ steps:
|
|||||||
- ref: state
|
- ref: state
|
||||||
- lambda:
|
- lambda:
|
||||||
params: [candidate]
|
params: [candidate]
|
||||||
expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound'"
|
expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound' && String(candidate.text ?? '').includes(config.secondMarker)"
|
||||||
- expr: liveTurnTimeoutMs(env, 60000)
|
- expr: liveTurnTimeoutMs(env, 60000)
|
||||||
- sinceIndex:
|
- sinceIndex:
|
||||||
ref: beforeRestartCursor
|
ref: beforeRestartCursor
|
||||||
@@ -108,13 +108,16 @@ steps:
|
|||||||
expr: state.getSnapshot()
|
expr: state.getSnapshot()
|
||||||
- set: firstMatches
|
- set: firstMatches
|
||||||
value:
|
value:
|
||||||
expr: "snapshot.messages.slice(0, beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')"
|
expr: "snapshot.messages.slice(0, beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room' && String(candidate.text ?? '').includes(config.firstMarker))"
|
||||||
- set: secondMatches
|
- set: secondMatches
|
||||||
|
value:
|
||||||
|
expr: "snapshot.messages.slice(beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room' && String(candidate.text ?? '').includes(config.secondMarker))"
|
||||||
|
- set: postRestartOutbounds
|
||||||
value:
|
value:
|
||||||
expr: "snapshot.messages.slice(beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')"
|
expr: "snapshot.messages.slice(beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')"
|
||||||
- assert:
|
- assert:
|
||||||
expr: "firstMatches.length === 1 && secondMatches.length === 1"
|
expr: "firstMatches.length === 1 && secondMatches.length === 1 && postRestartOutbounds.length === 1 && !postRestartOutbounds.some((candidate) => String(candidate.text ?? '').includes(config.firstMarker))"
|
||||||
message:
|
message:
|
||||||
expr: "`expected one pre-restart and one post-restart reply; first=${firstMatches.length} second=${secondMatches.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
|
expr: "`expected one marked pre-restart reply and exactly one marked post-restart reply without replaying the first marker; first=${firstMatches.length} second=${secondMatches.length} post=${postRestartOutbounds.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
|
||||||
detailsExpr: "`before=${firstOutbound.text}\\nafter=${secondOutbound.text}`"
|
detailsExpr: "`before=${firstOutbound.text}\\nafter=${secondOutbound.text}`"
|
||||||
```
|
```
|
||||||
|
|||||||
Reference in New Issue
Block a user