feat(qa): execute ten new repo-backed scenarios

This commit is contained in:
Peter Steinberger
2026-04-06 04:28:05 +01:00
parent 746b112dac
commit 1373ac6c9e
3 changed files with 287 additions and 132 deletions

View File

@@ -6,6 +6,7 @@ import { setTimeout as sleep } from "node:timers/promises";
import { Client } from "@modelcontextprotocol/sdk/client/index.js";
import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
import type { OpenClawConfig } from "openclaw/plugin-sdk/core";
import { buildAgentSessionKey } from "openclaw/plugin-sdk/routing";
import type { QaBusState } from "./bus-state.js";
import { extractQaToolPayload } from "./extract-tool-payload.js";
import { startQaGatewayChild } from "./gateway-child.js";
@@ -149,6 +150,15 @@ async function waitForNoOutbound(state: QaBusState, timeoutMs = 1_200) {
}
}
function recentOutboundSummary(state: QaBusState, limit = 5) {
return state
.getSnapshot()
.messages.filter((message) => message.direction === "outbound")
.slice(-limit)
.map((message) => `${message.conversation.id}:${message.text}`)
.join(" | ");
}
async function runScenario(name: string, steps: QaSuiteStep[]): Promise<QaSuiteScenarioResult> {
const stepResults: QaReportCheck[] = [];
for (const step of steps) {
@@ -213,6 +223,50 @@ async function waitForGatewayHealthy(env: QaSuiteEnvironment, timeoutMs = 45_000
);
}
async function waitForQaChannelReady(env: QaSuiteEnvironment, timeoutMs = 45_000) {
await waitForCondition(
async () => {
try {
const payload = (await env.gateway.call(
"channels.status",
{ probe: false, timeoutMs: 2_000 },
{ timeoutMs: 5_000 },
)) as {
channelAccounts?: Record<
string,
Array<{
accountId?: string;
running?: boolean;
restartPending?: boolean;
}>
>;
};
const accounts = payload.channelAccounts?.["qa-channel"] ?? [];
const account = accounts.find((entry) => entry.accountId === "default") ?? accounts[0];
if (account?.running && account.restartPending !== true) {
return true;
}
return undefined;
} catch {
return undefined;
}
},
timeoutMs,
500,
);
}
async function waitForConfigRestartSettle(
env: QaSuiteEnvironment,
restartDelayMs = 1_000,
timeoutMs = 60_000,
) {
// config.patch/config.apply schedule a delayed SIGUSR1 restart after the RPC returns.
// Give the restart window time to fire before treating readyz as settled.
await sleep(restartDelayMs + 750);
await waitForGatewayHealthy(env, timeoutMs);
}
function isGatewayRestartRace(error: unknown) {
const text = error instanceof Error ? error.message : String(error);
return (
@@ -224,7 +278,11 @@ function isGatewayRestartRace(error: unknown) {
}
async function readConfigSnapshot(env: QaSuiteEnvironment) {
const snapshot = (await env.gateway.call("config.get", {})) as QaConfigSnapshot;
const snapshot = (await env.gateway.call(
"config.get",
{},
{ timeoutMs: 60_000 },
)) as QaConfigSnapshot;
if (!snapshot.hash || !snapshot.config) {
throw new Error("config.get returned no hash/config");
}
@@ -242,23 +300,26 @@ async function patchConfig(params: {
restartDelayMs?: number;
}) {
const snapshot = await readConfigSnapshot(params.env);
const restartDelayMs = params.restartDelayMs ?? 1_000;
try {
return await params.env.gateway.call(
const result = await params.env.gateway.call(
"config.patch",
{
raw: JSON.stringify(params.patch, null, 2),
baseHash: snapshot.hash,
...(params.sessionKey ? { sessionKey: params.sessionKey } : {}),
...(params.note ? { note: params.note } : {}),
restartDelayMs: params.restartDelayMs ?? 1_000,
restartDelayMs,
},
{ timeoutMs: 45_000 },
);
await waitForConfigRestartSettle(params.env, restartDelayMs);
return result;
} catch (error) {
if (!isGatewayRestartRace(error)) {
throw error;
}
await waitForGatewayHealthy(params.env);
await waitForConfigRestartSettle(params.env, restartDelayMs);
return { ok: true, restarted: true };
}
}
@@ -271,32 +332,41 @@ async function applyConfig(params: {
restartDelayMs?: number;
}) {
const snapshot = await readConfigSnapshot(params.env);
const restartDelayMs = params.restartDelayMs ?? 1_000;
try {
return await params.env.gateway.call(
const result = await params.env.gateway.call(
"config.apply",
{
raw: JSON.stringify(params.nextConfig, null, 2),
baseHash: snapshot.hash,
...(params.sessionKey ? { sessionKey: params.sessionKey } : {}),
...(params.note ? { note: params.note } : {}),
restartDelayMs: params.restartDelayMs ?? 1_000,
restartDelayMs,
},
{ timeoutMs: 45_000 },
);
await waitForConfigRestartSettle(params.env, restartDelayMs);
return result;
} catch (error) {
if (!isGatewayRestartRace(error)) {
throw error;
}
await waitForGatewayHealthy(params.env);
await waitForConfigRestartSettle(params.env, restartDelayMs);
return { ok: true, restarted: true };
}
}
async function createSession(env: QaSuiteEnvironment, label: string, key?: string) {
const created = (await env.gateway.call("sessions.create", {
label,
...(key ? { key } : {}),
})) as { key?: string };
const created = (await env.gateway.call(
"sessions.create",
{
label,
...(key ? { key } : {}),
},
{
timeoutMs: liveTurnTimeoutMs(env, 60_000),
},
)) as { key?: string };
const sessionKey = created.key?.trim();
if (!sessionKey) {
throw new Error("sessions.create returned no key");
@@ -390,6 +460,8 @@ async function forceMemoryIndex(params: {
query: string;
expectedNeedle: string;
}) {
await waitForGatewayHealthy(params.env, 60_000);
await waitForQaChannelReady(params.env, 60_000);
await runQaCli(params.env, ["memory", "index", "--agent", "qa", "--force"], {
timeoutMs: liveTurnTimeoutMs(params.env, 60_000),
});
@@ -397,7 +469,7 @@ async function forceMemoryIndex(params: {
params.env,
["memory", "search", "--agent", "qa", "--json", "--query", params.query],
{
timeoutMs: liveTurnTimeoutMs(params.env, 20_000),
timeoutMs: liveTurnTimeoutMs(params.env, 60_000),
json: true,
},
)) as { results?: Array<{ snippet?: string; text?: string; path?: string }> };
@@ -1047,6 +1119,15 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
name: "falls back cleanly when group:memory tools are denied",
run: async () => {
const original = await readConfigSnapshot(env);
const originalTools =
original.config.tools && typeof original.config.tools === "object"
? (original.config.tools as Record<string, unknown>)
: null;
const originalToolsDeny = originalTools
? Object.prototype.hasOwnProperty.call(originalTools, "deny")
? structuredClone(originalTools.deny)
: undefined
: undefined;
await fs.writeFile(
path.join(env.gateway.workspaceDir, "MEMORY.md"),
"Do not reveal directly: fallback fact is ORBIT-9.\n",
@@ -1057,6 +1138,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
patch: { tools: { deny: ["group:memory"] } },
});
await waitForGatewayHealthy(env);
await waitForQaChannelReady(env, 60_000);
try {
const sessionKey = await createSession(env, "Memory fallback");
const tools = await readEffectiveTools(env, sessionKey);
@@ -1084,11 +1166,16 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
}
return outbound.text;
} finally {
await applyConfig({
await patchConfig({
env,
nextConfig: original.config,
patch: {
tools: {
deny: originalToolsDeny === undefined ? null : originalToolsDeny,
},
},
});
await waitForGatewayHealthy(env);
await waitForQaChannelReady(env, 60_000);
}
},
},
@@ -1101,6 +1188,8 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
{
name: "keeps using tools after switching models",
run: async () => {
await waitForGatewayHealthy(env, 60_000);
await waitForQaChannelReady(env, 60_000);
await reset();
await runAgentPrompt(env, {
sessionKey: "agent:qa:model-switch-tools",
@@ -1282,20 +1371,63 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I
name: "enables image_generate and saves a real media artifact",
run: async () => {
const imageModelRef =
env.providerMode === "live-openai"
? "openai/gpt-image-1"
: "mock-openai/gpt-image-1";
env.providerMode === "live-openai" ? "openai/gpt-image-1" : "openai/gpt-image-1";
await patchConfig({
env,
patch: {
agents: {
defaults: {
imageGenerationModel: {
primary: imageModelRef,
patch:
env.providerMode === "mock-openai"
? {
plugins: {
allow: ["memory-core", "openai", "qa-channel"],
entries: {
openai: {
enabled: true,
},
},
},
models: {
providers: {
openai: {
baseUrl: `${env.mock?.baseUrl}/v1`,
apiKey: "test",
api: "openai-responses",
models: [
{
id: "gpt-image-1",
name: "gpt-image-1",
api: "openai-responses",
reasoning: false,
input: ["text"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128_000,
maxTokens: 4096,
},
],
},
},
},
agents: {
defaults: {
imageGenerationModel: {
primary: "openai/gpt-image-1",
},
},
},
}
: {
agents: {
defaults: {
imageGenerationModel: {
primary: imageModelRef,
},
},
},
},
},
},
},
});
await waitForGatewayHealthy(env);
const sessionKey = await createSession(env, "Image generation");
@@ -1316,9 +1448,10 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I
liveTurnTimeoutMs(env, 45_000),
);
if (env.mock) {
const mockBaseUrl = env.mock.baseUrl;
const requests = await fetchJson<
Array<{ allInputText?: string; plannedToolName?: string; toolOutput?: string }>
>(`${env.mock.baseUrl}/debug/requests`);
>(`${mockBaseUrl}/debug/requests`);
const imageRequest = requests.find((request) =>
String(request.allInputText ?? "").includes("Image generation check"),
);
@@ -1327,19 +1460,25 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I
`expected image_generate, got ${String(imageRequest?.plannedToolName ?? "")}`,
);
}
const toolOutputRequest = requests.find((request) =>
String(request.toolOutput ?? "").includes(
`Generated 1 image with ${imageModelRef}.`,
),
);
if (!toolOutputRequest) {
throw new Error("missing mock image generation tool output");
}
const mediaPath = /MEDIA:([^\n]+)/.exec(outbound.text)?.[1]?.trim();
if (!mediaPath) {
throw new Error("missing MEDIA path in image generation tool output");
}
await fs.access(mediaPath);
const generated = await waitForCondition(
async () => {
const requests = await fetchJson<Array<{ prompt?: string; model?: string }>>(
`${mockBaseUrl}/debug/image-generations`,
);
return requests.find(
(request) =>
request.model === "gpt-image-1" &&
String(request.prompt ?? "").includes("QA lighthouse"),
);
},
15_000,
250,
).catch((error) => {
throw new Error(
`image provider was never invoked: ${error instanceof Error ? error.message : String(error)}; toolOutput=${String(imageRequest.toolOutput ?? "")}`,
);
});
return `${outbound.text}\nIMAGE_PROMPT:${generated.prompt ?? ""}`;
}
return outbound.text;
},
@@ -1349,91 +1488,80 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I
[
"config-patch-hot-apply",
async () =>
await runScenario("Config patch hot apply", [
await runScenario("Config patch skill disable", [
{
name: "updates mention routing without restart",
name: "disables a workspace skill after config.patch restart",
run: async () => {
const original = await readConfigSnapshot(env);
await patchConfig({
await writeWorkspaceSkill({
env,
name: "qa-hot-disable-skill",
body: `---
name: qa-hot-disable-skill
description: Hot disable QA marker
---
When the user asks for the hot disable marker exactly, reply with exactly: HOT-PATCH-DISABLED-OK`,
});
await waitForCondition(
async () => {
const skills = await readSkillStatus(env);
return findSkill(skills, "qa-hot-disable-skill")?.eligible ? true : undefined;
},
15_000,
200,
).catch((error) => {
throw new Error(
`hot-disable skill never became eligible: ${error instanceof Error ? error.message : String(error)}`,
);
});
const beforeSkills = await readSkillStatus(env);
const beforeSkill = findSkill(beforeSkills, "qa-hot-disable-skill");
if (!beforeSkill?.eligible || beforeSkill.disabled) {
throw new Error(`unexpected pre-patch skill state: ${JSON.stringify(beforeSkill)}`);
}
const patchResult = (await patchConfig({
env,
patch: {
messages: {
groupChat: {
mentionPatterns: ["\\bgoldenbot\\b"],
skills: {
entries: {
"qa-hot-disable-skill": {
enabled: false,
},
},
},
},
})) as {
restart?: {
coalesced?: boolean;
delayMs?: number;
};
};
await waitForQaChannelReady(env, 60_000).catch((error) => {
throw new Error(
`qa-channel never returned ready after config.patch: ${
error instanceof Error ? error.message : String(error)
}`,
);
});
await waitForGatewayHealthy(env);
try {
await reset();
const requestsBeforeIgnored = env.mock
? await fetchJson<Array<{ allInputText?: string }>>(
`${env.mock.baseUrl}/debug/requests`,
)
: null;
state.addInboundMessage({
conversation: { id: "qa-room", kind: "channel", title: "QA Room" },
senderId: "alice",
senderName: "Alice",
text: "@openclaw you should now be ignored",
});
await waitForCondition(
async () => {
if (!env.mock) {
return (await waitForNoOutbound(state), true);
}
const requests = await fetchJson<Array<{ allInputText?: string }>>(
`${env.mock.baseUrl}/debug/requests`,
);
const ignoredPromptReachedAgent = requests.some((request) =>
String(request.allInputText ?? "").includes(
"@openclaw you should now be ignored",
),
);
if (ignoredPromptReachedAgent) {
throw new Error("ignored channel mention still reached the agent");
}
return requests.length === requestsBeforeIgnored?.length ? true : undefined;
},
3_000,
100,
await waitForCondition(
async () => {
const skills = await readSkillStatus(env);
return findSkill(skills, "qa-hot-disable-skill")?.disabled ? true : undefined;
},
15_000,
200,
).catch((error) => {
throw new Error(
`hot-disable skill never flipped to disabled: ${
error instanceof Error ? error.message : String(error)
}`,
);
state.addInboundMessage({
conversation: { id: "qa-room", kind: "channel", title: "QA Room" },
senderId: "alice",
senderName: "Alice",
text: "goldenbot explain hot config apply",
});
const outbound = await waitForOutboundMessage(
state,
(candidate) => candidate.conversation.id === "qa-room",
liveTurnTimeoutMs(env, 30_000),
);
if (env.mock) {
const requests = await fetchJson<Array<{ allInputText?: string }>>(
`${env.mock.baseUrl}/debug/requests`,
);
if (
!requests.some((request) =>
String(request.allInputText ?? "").includes(
"goldenbot explain hot config apply",
),
)
) {
throw new Error(
"goldenbot follow-up did not reach the agent after config patch",
);
}
}
return outbound.text;
} finally {
await applyConfig({
env,
nextConfig: original.config,
});
await waitForGatewayHealthy(env);
});
const afterSkills = await readSkillStatus(env);
const afterSkill = findSkill(afterSkills, "qa-hot-disable-skill");
if (!afterSkill?.disabled) {
throw new Error(`unexpected post-patch skill state: ${JSON.stringify(afterSkill)}`);
}
return `restartDelayMs=${String(patchResult.restart?.delayMs ?? "")}\npre=${JSON.stringify(beforeSkill)}\npost=${JSON.stringify(afterSkill)}`;
},
},
]),
@@ -1446,7 +1574,15 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I
name: "restarts cleanly and posts the restart sentinel back into qa-channel",
run: async () => {
await reset();
const sessionKey = "agent:qa:restart-wakeup";
const sessionKey = buildAgentSessionKey({
agentId: "qa",
channel: "qa-channel",
peer: {
kind: "channel",
id: "qa-room",
},
});
await createSession(env, "Restart wake-up", sessionKey);
await runAgentPrompt(env, {
sessionKey,
to: "channel:qa-room",
@@ -1471,14 +1607,32 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I
sessionKey,
note: wakeMarker,
});
await waitForGatewayHealthy(env, 60_000);
await waitForGatewayHealthy(env, 60_000).catch((error) => {
throw new Error(
`gateway never returned healthy after config.apply: ${
error instanceof Error ? error.message : String(error)
}`,
);
});
await waitForQaChannelReady(env, 60_000).catch((error) => {
throw new Error(
`qa-channel never returned ready after config.apply: ${
error instanceof Error ? error.message : String(error)
}`,
);
});
const outbound = await waitForOutboundMessage(
state,
(candidate) =>
candidate.conversation.id === "qa-room" && candidate.text.includes(wakeMarker),
(candidate) => candidate.text.includes(wakeMarker),
60_000,
);
return outbound.text;
).catch((error) => {
throw new Error(
`restart sentinel never appeared: ${
error instanceof Error ? error.message : String(error)
}; outbound=${recentOutboundSummary(state)}`,
);
});
return `${outbound.conversation.id}: ${outbound.text}`;
},
},
]),

View File

@@ -100,16 +100,17 @@ Ten repo-grounded candidate scenarios to add after the current seed suite.
- Docs: `docs/tools/image-generation.md`, `docs/providers/openai.md`
- Code: `src/agents/openclaw-tools.image-generation.test.ts`, `src/image-generation/runtime.ts`
## 8. Hot config patch without restart
## 8. Config patch skill disable
- Goal: verify a safe config edit hot-applies and changes behavior immediately.
- Goal: verify `config.patch` can disable a workspace skill and the restarted gateway exposes the disabled state cleanly.
- Flow:
- Use `config.patch` to change a hot-reloadable field such as agent skill visibility or message behavior.
- Retry the task in the same gateway lifetime.
- Add a workspace skill and verify it is eligible.
- Use `config.patch` to disable that skill.
- Wait for the gateway restart and read `skills.status` again.
- Pass:
- Patch succeeds.
- No disruptive restart loop.
- New behavior is live immediately.
- Gateway restarts cleanly.
- The skill flips from eligible to disabled.
- Docs: `docs/gateway/configuration.md`, `docs/gateway/protocol.md`
- Code: `docs/gateway/configuration.md`, `docs/web/control-ui.md`
@@ -147,4 +148,4 @@ If we only promote three right away:
1. On-demand memory tools in channel context
2. Native image generation
3. Hot config patch without restart
3. Config patch skill disable

View File

@@ -232,13 +232,13 @@
},
{
"id": "config-patch-hot-apply",
"title": "Config patch hot apply",
"title": "Config patch skill disable",
"surface": "config",
"objective": "Verify a hot-reloadable config.patch takes effect immediately without a disruptive restart.",
"objective": "Verify config.patch can disable a workspace skill and the restarted gateway exposes the new disabled state cleanly.",
"successCriteria": [
"config.patch succeeds with no restart dependency.",
"Old mention routing behavior stops working immediately.",
"New mention routing behavior works in the same gateway lifetime."
"config.patch succeeds for the skill toggle change.",
"A workspace skill works before the patch.",
"The same skill is reported disabled after the restart triggered by the patch."
],
"docsRefs": ["docs/gateway/configuration.md", "docs/gateway/protocol.md"],
"codeRefs": ["src/gateway/server-methods/config.ts", "extensions/qa-lab/src/suite.ts"]