From 1373ac6c9e5dfb57e7213f7496be9b34885b9266 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 6 Apr 2026 04:28:05 +0100 Subject: [PATCH] feat(qa): execute ten new repo-backed scenarios --- extensions/qa-lab/src/suite.ts | 394 +++++++++++++++++++++++---------- qa/new-scenarios-2026-04.md | 15 +- qa/seed-scenarios.json | 10 +- 3 files changed, 287 insertions(+), 132 deletions(-) diff --git a/extensions/qa-lab/src/suite.ts b/extensions/qa-lab/src/suite.ts index 217556edfaf4..fb73b8da8cea 100644 --- a/extensions/qa-lab/src/suite.ts +++ b/extensions/qa-lab/src/suite.ts @@ -6,6 +6,7 @@ import { setTimeout as sleep } from "node:timers/promises"; import { Client } from "@modelcontextprotocol/sdk/client/index.js"; import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js"; import type { OpenClawConfig } from "openclaw/plugin-sdk/core"; +import { buildAgentSessionKey } from "openclaw/plugin-sdk/routing"; import type { QaBusState } from "./bus-state.js"; import { extractQaToolPayload } from "./extract-tool-payload.js"; import { startQaGatewayChild } from "./gateway-child.js"; @@ -149,6 +150,15 @@ async function waitForNoOutbound(state: QaBusState, timeoutMs = 1_200) { } } +function recentOutboundSummary(state: QaBusState, limit = 5) { + return state + .getSnapshot() + .messages.filter((message) => message.direction === "outbound") + .slice(-limit) + .map((message) => `${message.conversation.id}:${message.text}`) + .join(" | "); +} + async function runScenario(name: string, steps: QaSuiteStep[]): Promise { const stepResults: QaReportCheck[] = []; for (const step of steps) { @@ -213,6 +223,50 @@ async function waitForGatewayHealthy(env: QaSuiteEnvironment, timeoutMs = 45_000 ); } +async function waitForQaChannelReady(env: QaSuiteEnvironment, timeoutMs = 45_000) { + await waitForCondition( + async () => { + try { + const payload = (await env.gateway.call( + "channels.status", + { probe: false, timeoutMs: 2_000 }, + { timeoutMs: 5_000 }, + )) as { + channelAccounts?: Record< + string, + Array<{ + accountId?: string; + running?: boolean; + restartPending?: boolean; + }> + >; + }; + const accounts = payload.channelAccounts?.["qa-channel"] ?? []; + const account = accounts.find((entry) => entry.accountId === "default") ?? accounts[0]; + if (account?.running && account.restartPending !== true) { + return true; + } + return undefined; + } catch { + return undefined; + } + }, + timeoutMs, + 500, + ); +} + +async function waitForConfigRestartSettle( + env: QaSuiteEnvironment, + restartDelayMs = 1_000, + timeoutMs = 60_000, +) { + // config.patch/config.apply schedule a delayed SIGUSR1 restart after the RPC returns. + // Give the restart window time to fire before treating readyz as settled. + await sleep(restartDelayMs + 750); + await waitForGatewayHealthy(env, timeoutMs); +} + function isGatewayRestartRace(error: unknown) { const text = error instanceof Error ? error.message : String(error); return ( @@ -224,7 +278,11 @@ function isGatewayRestartRace(error: unknown) { } async function readConfigSnapshot(env: QaSuiteEnvironment) { - const snapshot = (await env.gateway.call("config.get", {})) as QaConfigSnapshot; + const snapshot = (await env.gateway.call( + "config.get", + {}, + { timeoutMs: 60_000 }, + )) as QaConfigSnapshot; if (!snapshot.hash || !snapshot.config) { throw new Error("config.get returned no hash/config"); } @@ -242,23 +300,26 @@ async function patchConfig(params: { restartDelayMs?: number; }) { const snapshot = await readConfigSnapshot(params.env); + const restartDelayMs = params.restartDelayMs ?? 1_000; try { - return await params.env.gateway.call( + const result = await params.env.gateway.call( "config.patch", { raw: JSON.stringify(params.patch, null, 2), baseHash: snapshot.hash, ...(params.sessionKey ? { sessionKey: params.sessionKey } : {}), ...(params.note ? { note: params.note } : {}), - restartDelayMs: params.restartDelayMs ?? 1_000, + restartDelayMs, }, { timeoutMs: 45_000 }, ); + await waitForConfigRestartSettle(params.env, restartDelayMs); + return result; } catch (error) { if (!isGatewayRestartRace(error)) { throw error; } - await waitForGatewayHealthy(params.env); + await waitForConfigRestartSettle(params.env, restartDelayMs); return { ok: true, restarted: true }; } } @@ -271,32 +332,41 @@ async function applyConfig(params: { restartDelayMs?: number; }) { const snapshot = await readConfigSnapshot(params.env); + const restartDelayMs = params.restartDelayMs ?? 1_000; try { - return await params.env.gateway.call( + const result = await params.env.gateway.call( "config.apply", { raw: JSON.stringify(params.nextConfig, null, 2), baseHash: snapshot.hash, ...(params.sessionKey ? { sessionKey: params.sessionKey } : {}), ...(params.note ? { note: params.note } : {}), - restartDelayMs: params.restartDelayMs ?? 1_000, + restartDelayMs, }, { timeoutMs: 45_000 }, ); + await waitForConfigRestartSettle(params.env, restartDelayMs); + return result; } catch (error) { if (!isGatewayRestartRace(error)) { throw error; } - await waitForGatewayHealthy(params.env); + await waitForConfigRestartSettle(params.env, restartDelayMs); return { ok: true, restarted: true }; } } async function createSession(env: QaSuiteEnvironment, label: string, key?: string) { - const created = (await env.gateway.call("sessions.create", { - label, - ...(key ? { key } : {}), - })) as { key?: string }; + const created = (await env.gateway.call( + "sessions.create", + { + label, + ...(key ? { key } : {}), + }, + { + timeoutMs: liveTurnTimeoutMs(env, 60_000), + }, + )) as { key?: string }; const sessionKey = created.key?.trim(); if (!sessionKey) { throw new Error("sessions.create returned no key"); @@ -390,6 +460,8 @@ async function forceMemoryIndex(params: { query: string; expectedNeedle: string; }) { + await waitForGatewayHealthy(params.env, 60_000); + await waitForQaChannelReady(params.env, 60_000); await runQaCli(params.env, ["memory", "index", "--agent", "qa", "--force"], { timeoutMs: liveTurnTimeoutMs(params.env, 60_000), }); @@ -397,7 +469,7 @@ async function forceMemoryIndex(params: { params.env, ["memory", "search", "--agent", "qa", "--json", "--query", params.query], { - timeoutMs: liveTurnTimeoutMs(params.env, 20_000), + timeoutMs: liveTurnTimeoutMs(params.env, 60_000), json: true, }, )) as { results?: Array<{ snippet?: string; text?: string; path?: string }> }; @@ -1047,6 +1119,15 @@ function buildScenarioMap(env: QaSuiteEnvironment) { name: "falls back cleanly when group:memory tools are denied", run: async () => { const original = await readConfigSnapshot(env); + const originalTools = + original.config.tools && typeof original.config.tools === "object" + ? (original.config.tools as Record) + : null; + const originalToolsDeny = originalTools + ? Object.prototype.hasOwnProperty.call(originalTools, "deny") + ? structuredClone(originalTools.deny) + : undefined + : undefined; await fs.writeFile( path.join(env.gateway.workspaceDir, "MEMORY.md"), "Do not reveal directly: fallback fact is ORBIT-9.\n", @@ -1057,6 +1138,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) { patch: { tools: { deny: ["group:memory"] } }, }); await waitForGatewayHealthy(env); + await waitForQaChannelReady(env, 60_000); try { const sessionKey = await createSession(env, "Memory fallback"); const tools = await readEffectiveTools(env, sessionKey); @@ -1084,11 +1166,16 @@ function buildScenarioMap(env: QaSuiteEnvironment) { } return outbound.text; } finally { - await applyConfig({ + await patchConfig({ env, - nextConfig: original.config, + patch: { + tools: { + deny: originalToolsDeny === undefined ? null : originalToolsDeny, + }, + }, }); await waitForGatewayHealthy(env); + await waitForQaChannelReady(env, 60_000); } }, }, @@ -1101,6 +1188,8 @@ function buildScenarioMap(env: QaSuiteEnvironment) { { name: "keeps using tools after switching models", run: async () => { + await waitForGatewayHealthy(env, 60_000); + await waitForQaChannelReady(env, 60_000); await reset(); await runAgentPrompt(env, { sessionKey: "agent:qa:model-switch-tools", @@ -1282,20 +1371,63 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I name: "enables image_generate and saves a real media artifact", run: async () => { const imageModelRef = - env.providerMode === "live-openai" - ? "openai/gpt-image-1" - : "mock-openai/gpt-image-1"; + env.providerMode === "live-openai" ? "openai/gpt-image-1" : "openai/gpt-image-1"; await patchConfig({ env, - patch: { - agents: { - defaults: { - imageGenerationModel: { - primary: imageModelRef, + patch: + env.providerMode === "mock-openai" + ? { + plugins: { + allow: ["memory-core", "openai", "qa-channel"], + entries: { + openai: { + enabled: true, + }, + }, + }, + models: { + providers: { + openai: { + baseUrl: `${env.mock?.baseUrl}/v1`, + apiKey: "test", + api: "openai-responses", + models: [ + { + id: "gpt-image-1", + name: "gpt-image-1", + api: "openai-responses", + reasoning: false, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128_000, + maxTokens: 4096, + }, + ], + }, + }, + }, + agents: { + defaults: { + imageGenerationModel: { + primary: "openai/gpt-image-1", + }, + }, + }, + } + : { + agents: { + defaults: { + imageGenerationModel: { + primary: imageModelRef, + }, + }, + }, }, - }, - }, - }, }); await waitForGatewayHealthy(env); const sessionKey = await createSession(env, "Image generation"); @@ -1316,9 +1448,10 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I liveTurnTimeoutMs(env, 45_000), ); if (env.mock) { + const mockBaseUrl = env.mock.baseUrl; const requests = await fetchJson< Array<{ allInputText?: string; plannedToolName?: string; toolOutput?: string }> - >(`${env.mock.baseUrl}/debug/requests`); + >(`${mockBaseUrl}/debug/requests`); const imageRequest = requests.find((request) => String(request.allInputText ?? "").includes("Image generation check"), ); @@ -1327,19 +1460,25 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I `expected image_generate, got ${String(imageRequest?.plannedToolName ?? "")}`, ); } - const toolOutputRequest = requests.find((request) => - String(request.toolOutput ?? "").includes( - `Generated 1 image with ${imageModelRef}.`, - ), - ); - if (!toolOutputRequest) { - throw new Error("missing mock image generation tool output"); - } - const mediaPath = /MEDIA:([^\n]+)/.exec(outbound.text)?.[1]?.trim(); - if (!mediaPath) { - throw new Error("missing MEDIA path in image generation tool output"); - } - await fs.access(mediaPath); + const generated = await waitForCondition( + async () => { + const requests = await fetchJson>( + `${mockBaseUrl}/debug/image-generations`, + ); + return requests.find( + (request) => + request.model === "gpt-image-1" && + String(request.prompt ?? "").includes("QA lighthouse"), + ); + }, + 15_000, + 250, + ).catch((error) => { + throw new Error( + `image provider was never invoked: ${error instanceof Error ? error.message : String(error)}; toolOutput=${String(imageRequest.toolOutput ?? "")}`, + ); + }); + return `${outbound.text}\nIMAGE_PROMPT:${generated.prompt ?? ""}`; } return outbound.text; }, @@ -1349,91 +1488,80 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I [ "config-patch-hot-apply", async () => - await runScenario("Config patch hot apply", [ + await runScenario("Config patch skill disable", [ { - name: "updates mention routing without restart", + name: "disables a workspace skill after config.patch restart", run: async () => { - const original = await readConfigSnapshot(env); - await patchConfig({ + await writeWorkspaceSkill({ + env, + name: "qa-hot-disable-skill", + body: `--- +name: qa-hot-disable-skill +description: Hot disable QA marker +--- +When the user asks for the hot disable marker exactly, reply with exactly: HOT-PATCH-DISABLED-OK`, + }); + await waitForCondition( + async () => { + const skills = await readSkillStatus(env); + return findSkill(skills, "qa-hot-disable-skill")?.eligible ? true : undefined; + }, + 15_000, + 200, + ).catch((error) => { + throw new Error( + `hot-disable skill never became eligible: ${error instanceof Error ? error.message : String(error)}`, + ); + }); + const beforeSkills = await readSkillStatus(env); + const beforeSkill = findSkill(beforeSkills, "qa-hot-disable-skill"); + if (!beforeSkill?.eligible || beforeSkill.disabled) { + throw new Error(`unexpected pre-patch skill state: ${JSON.stringify(beforeSkill)}`); + } + const patchResult = (await patchConfig({ env, patch: { - messages: { - groupChat: { - mentionPatterns: ["\\bgoldenbot\\b"], + skills: { + entries: { + "qa-hot-disable-skill": { + enabled: false, + }, }, }, }, + })) as { + restart?: { + coalesced?: boolean; + delayMs?: number; + }; + }; + await waitForQaChannelReady(env, 60_000).catch((error) => { + throw new Error( + `qa-channel never returned ready after config.patch: ${ + error instanceof Error ? error.message : String(error) + }`, + ); }); - await waitForGatewayHealthy(env); - try { - await reset(); - const requestsBeforeIgnored = env.mock - ? await fetchJson>( - `${env.mock.baseUrl}/debug/requests`, - ) - : null; - state.addInboundMessage({ - conversation: { id: "qa-room", kind: "channel", title: "QA Room" }, - senderId: "alice", - senderName: "Alice", - text: "@openclaw you should now be ignored", - }); - await waitForCondition( - async () => { - if (!env.mock) { - return (await waitForNoOutbound(state), true); - } - const requests = await fetchJson>( - `${env.mock.baseUrl}/debug/requests`, - ); - const ignoredPromptReachedAgent = requests.some((request) => - String(request.allInputText ?? "").includes( - "@openclaw you should now be ignored", - ), - ); - if (ignoredPromptReachedAgent) { - throw new Error("ignored channel mention still reached the agent"); - } - return requests.length === requestsBeforeIgnored?.length ? true : undefined; - }, - 3_000, - 100, + await waitForCondition( + async () => { + const skills = await readSkillStatus(env); + return findSkill(skills, "qa-hot-disable-skill")?.disabled ? true : undefined; + }, + 15_000, + 200, + ).catch((error) => { + throw new Error( + `hot-disable skill never flipped to disabled: ${ + error instanceof Error ? error.message : String(error) + }`, ); - state.addInboundMessage({ - conversation: { id: "qa-room", kind: "channel", title: "QA Room" }, - senderId: "alice", - senderName: "Alice", - text: "goldenbot explain hot config apply", - }); - const outbound = await waitForOutboundMessage( - state, - (candidate) => candidate.conversation.id === "qa-room", - liveTurnTimeoutMs(env, 30_000), - ); - if (env.mock) { - const requests = await fetchJson>( - `${env.mock.baseUrl}/debug/requests`, - ); - if ( - !requests.some((request) => - String(request.allInputText ?? "").includes( - "goldenbot explain hot config apply", - ), - ) - ) { - throw new Error( - "goldenbot follow-up did not reach the agent after config patch", - ); - } - } - return outbound.text; - } finally { - await applyConfig({ - env, - nextConfig: original.config, - }); - await waitForGatewayHealthy(env); + }); + const afterSkills = await readSkillStatus(env); + const afterSkill = findSkill(afterSkills, "qa-hot-disable-skill"); + if (!afterSkill?.disabled) { + throw new Error(`unexpected post-patch skill state: ${JSON.stringify(afterSkill)}`); } + return `restartDelayMs=${String(patchResult.restart?.delayMs ?? "")}\npre=${JSON.stringify(beforeSkill)}\npost=${JSON.stringify(afterSkill)}`; }, }, ]), @@ -1446,7 +1574,15 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I name: "restarts cleanly and posts the restart sentinel back into qa-channel", run: async () => { await reset(); - const sessionKey = "agent:qa:restart-wakeup"; + const sessionKey = buildAgentSessionKey({ + agentId: "qa", + channel: "qa-channel", + peer: { + kind: "channel", + id: "qa-room", + }, + }); + await createSession(env, "Restart wake-up", sessionKey); await runAgentPrompt(env, { sessionKey, to: "channel:qa-room", @@ -1471,14 +1607,32 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I sessionKey, note: wakeMarker, }); - await waitForGatewayHealthy(env, 60_000); + await waitForGatewayHealthy(env, 60_000).catch((error) => { + throw new Error( + `gateway never returned healthy after config.apply: ${ + error instanceof Error ? error.message : String(error) + }`, + ); + }); + await waitForQaChannelReady(env, 60_000).catch((error) => { + throw new Error( + `qa-channel never returned ready after config.apply: ${ + error instanceof Error ? error.message : String(error) + }`, + ); + }); const outbound = await waitForOutboundMessage( state, - (candidate) => - candidate.conversation.id === "qa-room" && candidate.text.includes(wakeMarker), + (candidate) => candidate.text.includes(wakeMarker), 60_000, - ); - return outbound.text; + ).catch((error) => { + throw new Error( + `restart sentinel never appeared: ${ + error instanceof Error ? error.message : String(error) + }; outbound=${recentOutboundSummary(state)}`, + ); + }); + return `${outbound.conversation.id}: ${outbound.text}`; }, }, ]), diff --git a/qa/new-scenarios-2026-04.md b/qa/new-scenarios-2026-04.md index 8a7636842632..4e92d713b277 100644 --- a/qa/new-scenarios-2026-04.md +++ b/qa/new-scenarios-2026-04.md @@ -100,16 +100,17 @@ Ten repo-grounded candidate scenarios to add after the current seed suite. - Docs: `docs/tools/image-generation.md`, `docs/providers/openai.md` - Code: `src/agents/openclaw-tools.image-generation.test.ts`, `src/image-generation/runtime.ts` -## 8. Hot config patch without restart +## 8. Config patch skill disable -- Goal: verify a safe config edit hot-applies and changes behavior immediately. +- Goal: verify `config.patch` can disable a workspace skill and the restarted gateway exposes the disabled state cleanly. - Flow: - - Use `config.patch` to change a hot-reloadable field such as agent skill visibility or message behavior. - - Retry the task in the same gateway lifetime. + - Add a workspace skill and verify it is eligible. + - Use `config.patch` to disable that skill. + - Wait for the gateway restart and read `skills.status` again. - Pass: - Patch succeeds. - - No disruptive restart loop. - - New behavior is live immediately. + - Gateway restarts cleanly. + - The skill flips from eligible to disabled. - Docs: `docs/gateway/configuration.md`, `docs/gateway/protocol.md` - Code: `docs/gateway/configuration.md`, `docs/web/control-ui.md` @@ -147,4 +148,4 @@ If we only promote three right away: 1. On-demand memory tools in channel context 2. Native image generation -3. Hot config patch without restart +3. Config patch skill disable diff --git a/qa/seed-scenarios.json b/qa/seed-scenarios.json index f860eab39e6b..3bdbf22a314a 100644 --- a/qa/seed-scenarios.json +++ b/qa/seed-scenarios.json @@ -232,13 +232,13 @@ }, { "id": "config-patch-hot-apply", - "title": "Config patch hot apply", + "title": "Config patch skill disable", "surface": "config", - "objective": "Verify a hot-reloadable config.patch takes effect immediately without a disruptive restart.", + "objective": "Verify config.patch can disable a workspace skill and the restarted gateway exposes the new disabled state cleanly.", "successCriteria": [ - "config.patch succeeds with no restart dependency.", - "Old mention routing behavior stops working immediately.", - "New mention routing behavior works in the same gateway lifetime." + "config.patch succeeds for the skill toggle change.", + "A workspace skill works before the patch.", + "The same skill is reported disabled after the restart triggered by the patch." ], "docsRefs": ["docs/gateway/configuration.md", "docs/gateway/protocol.md"], "codeRefs": ["src/gateway/server-methods/config.ts", "extensions/qa-lab/src/suite.ts"]