feat(qa): execute ten new repo-backed scenarios

2026-06-06 05:51:15 +08:00 · 2026-04-06 04:28:05 +01:00
parent 746b112dac
commit 1373ac6c9e
3 changed files with 287 additions and 132 deletions
--- a/extensions/qa-lab/src/suite.ts
+++ b/extensions/qa-lab/src/suite.ts
@@ -6,6 +6,7 @@ import { setTimeout as sleep } from "node:timers/promises";
 import { Client } from "@modelcontextprotocol/sdk/client/index.js";
 import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
 import type { OpenClawConfig } from "openclaw/plugin-sdk/core";
+import { buildAgentSessionKey } from "openclaw/plugin-sdk/routing";
 import type { QaBusState } from "./bus-state.js";
 import { extractQaToolPayload } from "./extract-tool-payload.js";
 import { startQaGatewayChild } from "./gateway-child.js";
@@ -149,6 +150,15 @@ async function waitForNoOutbound(state: QaBusState, timeoutMs = 1_200) {
  }
 }

+function recentOutboundSummary(state: QaBusState, limit = 5) {
+  return state
+    .getSnapshot()
+    .messages.filter((message) => message.direction === "outbound")
+    .slice(-limit)
+    .map((message) => `${message.conversation.id}:${message.text}`)
+    .join(" | ");
+}
+
 async function runScenario(name: string, steps: QaSuiteStep[]): Promise<QaSuiteScenarioResult> {
  const stepResults: QaReportCheck[] = [];
  for (const step of steps) {
@@ -213,6 +223,50 @@ async function waitForGatewayHealthy(env: QaSuiteEnvironment, timeoutMs = 45_000
  );
 }

+async function waitForQaChannelReady(env: QaSuiteEnvironment, timeoutMs = 45_000) {
+  await waitForCondition(
+    async () => {
+      try {
+        const payload = (await env.gateway.call(
+          "channels.status",
+          { probe: false, timeoutMs: 2_000 },
+          { timeoutMs: 5_000 },
+        )) as {
+          channelAccounts?: Record<
+            string,
+            Array<{
+              accountId?: string;
+              running?: boolean;
+              restartPending?: boolean;
+            }>
+          >;
+        };
+        const accounts = payload.channelAccounts?.["qa-channel"] ?? [];
+        const account = accounts.find((entry) => entry.accountId === "default") ?? accounts[0];
+        if (account?.running && account.restartPending !== true) {
+          return true;
+        }
+        return undefined;
+      } catch {
+        return undefined;
+      }
+    },
+    timeoutMs,
+    500,
+  );
+}
+
+async function waitForConfigRestartSettle(
+  env: QaSuiteEnvironment,
+  restartDelayMs = 1_000,
+  timeoutMs = 60_000,
+) {
+  // config.patch/config.apply schedule a delayed SIGUSR1 restart after the RPC returns.
+  // Give the restart window time to fire before treating readyz as settled.
+  await sleep(restartDelayMs + 750);
+  await waitForGatewayHealthy(env, timeoutMs);
+}
+
 function isGatewayRestartRace(error: unknown) {
  const text = error instanceof Error ? error.message : String(error);
  return (
@@ -224,7 +278,11 @@ function isGatewayRestartRace(error: unknown) {
 }

 async function readConfigSnapshot(env: QaSuiteEnvironment) {
-  const snapshot = (await env.gateway.call("config.get", {})) as QaConfigSnapshot;
+  const snapshot = (await env.gateway.call(
+    "config.get",
+    {},
+    { timeoutMs: 60_000 },
+  )) as QaConfigSnapshot;
  if (!snapshot.hash || !snapshot.config) {
    throw new Error("config.get returned no hash/config");
  }
@@ -242,23 +300,26 @@ async function patchConfig(params: {
  restartDelayMs?: number;
 }) {
  const snapshot = await readConfigSnapshot(params.env);
+  const restartDelayMs = params.restartDelayMs ?? 1_000;
  try {
-    return await params.env.gateway.call(
+    const result = await params.env.gateway.call(
      "config.patch",
      {
        raw: JSON.stringify(params.patch, null, 2),
        baseHash: snapshot.hash,
        ...(params.sessionKey ? { sessionKey: params.sessionKey } : {}),
        ...(params.note ? { note: params.note } : {}),
-        restartDelayMs: params.restartDelayMs ?? 1_000,
+        restartDelayMs,
      },
      { timeoutMs: 45_000 },
    );
+    await waitForConfigRestartSettle(params.env, restartDelayMs);
+    return result;
  } catch (error) {
    if (!isGatewayRestartRace(error)) {
      throw error;
    }
-    await waitForGatewayHealthy(params.env);
+    await waitForConfigRestartSettle(params.env, restartDelayMs);
    return { ok: true, restarted: true };
  }
 }
@@ -271,32 +332,41 @@ async function applyConfig(params: {
  restartDelayMs?: number;
 }) {
  const snapshot = await readConfigSnapshot(params.env);
+  const restartDelayMs = params.restartDelayMs ?? 1_000;
  try {
-    return await params.env.gateway.call(
+    const result = await params.env.gateway.call(
      "config.apply",
      {
        raw: JSON.stringify(params.nextConfig, null, 2),
        baseHash: snapshot.hash,
        ...(params.sessionKey ? { sessionKey: params.sessionKey } : {}),
        ...(params.note ? { note: params.note } : {}),
-        restartDelayMs: params.restartDelayMs ?? 1_000,
+        restartDelayMs,
      },
      { timeoutMs: 45_000 },
    );
+    await waitForConfigRestartSettle(params.env, restartDelayMs);
+    return result;
  } catch (error) {
    if (!isGatewayRestartRace(error)) {
      throw error;
    }
-    await waitForGatewayHealthy(params.env);
+    await waitForConfigRestartSettle(params.env, restartDelayMs);
    return { ok: true, restarted: true };
  }
 }

 async function createSession(env: QaSuiteEnvironment, label: string, key?: string) {
-  const created = (await env.gateway.call("sessions.create", {
-    label,
-    ...(key ? { key } : {}),
-  })) as { key?: string };
+  const created = (await env.gateway.call(
+    "sessions.create",
+    {
+      label,
+      ...(key ? { key } : {}),
+    },
+    {
+      timeoutMs: liveTurnTimeoutMs(env, 60_000),
+    },
+  )) as { key?: string };
  const sessionKey = created.key?.trim();
  if (!sessionKey) {
    throw new Error("sessions.create returned no key");
@@ -390,6 +460,8 @@ async function forceMemoryIndex(params: {
  query: string;
  expectedNeedle: string;
 }) {
+  await waitForGatewayHealthy(params.env, 60_000);
+  await waitForQaChannelReady(params.env, 60_000);
  await runQaCli(params.env, ["memory", "index", "--agent", "qa", "--force"], {
    timeoutMs: liveTurnTimeoutMs(params.env, 60_000),
  });
@@ -397,7 +469,7 @@ async function forceMemoryIndex(params: {
    params.env,
    ["memory", "search", "--agent", "qa", "--json", "--query", params.query],
    {
-      timeoutMs: liveTurnTimeoutMs(params.env, 20_000),
+      timeoutMs: liveTurnTimeoutMs(params.env, 60_000),
      json: true,
    },
  )) as { results?: Array<{ snippet?: string; text?: string; path?: string }> };
@@ -1047,6 +1119,15 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
            name: "falls back cleanly when group:memory tools are denied",
            run: async () => {
              const original = await readConfigSnapshot(env);
+              const originalTools =
+                original.config.tools && typeof original.config.tools === "object"
+                  ? (original.config.tools as Record<string, unknown>)
+                  : null;
+              const originalToolsDeny = originalTools
+                ? Object.prototype.hasOwnProperty.call(originalTools, "deny")
+                  ? structuredClone(originalTools.deny)
+                  : undefined
+                : undefined;
              await fs.writeFile(
                path.join(env.gateway.workspaceDir, "MEMORY.md"),
                "Do not reveal directly: fallback fact is ORBIT-9.\n",
@@ -1057,6 +1138,7 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
                patch: { tools: { deny: ["group:memory"] } },
              });
              await waitForGatewayHealthy(env);
+              await waitForQaChannelReady(env, 60_000);
              try {
                const sessionKey = await createSession(env, "Memory fallback");
                const tools = await readEffectiveTools(env, sessionKey);
@@ -1084,11 +1166,16 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
                }
                return outbound.text;
              } finally {
-                await applyConfig({
+                await patchConfig({
                  env,
-                  nextConfig: original.config,
+                  patch: {
+                    tools: {
+                      deny: originalToolsDeny === undefined ? null : originalToolsDeny,
+                    },
+                  },
                });
                await waitForGatewayHealthy(env);
+                await waitForQaChannelReady(env, 60_000);
              }
            },
          },
@@ -1101,6 +1188,8 @@ function buildScenarioMap(env: QaSuiteEnvironment) {
          {
            name: "keeps using tools after switching models",
            run: async () => {
+              await waitForGatewayHealthy(env, 60_000);
+              await waitForQaChannelReady(env, 60_000);
              await reset();
              await runAgentPrompt(env, {
                sessionKey: "agent:qa:model-switch-tools",
@@ -1282,20 +1371,63 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I
            name: "enables image_generate and saves a real media artifact",
            run: async () => {
              const imageModelRef =
-                env.providerMode === "live-openai"
-                  ? "openai/gpt-image-1"
-                  : "mock-openai/gpt-image-1";
+                env.providerMode === "live-openai" ? "openai/gpt-image-1" : "openai/gpt-image-1";
              await patchConfig({
                env,
-                patch: {
-                  agents: {
-                    defaults: {
-                      imageGenerationModel: {
-                        primary: imageModelRef,
+                patch:
+                  env.providerMode === "mock-openai"
+                    ? {
+                        plugins: {
+                          allow: ["memory-core", "openai", "qa-channel"],
+                          entries: {
+                            openai: {
+                              enabled: true,
+                            },
+                          },
+                        },
+                        models: {
+                          providers: {
+                            openai: {
+                              baseUrl: `${env.mock?.baseUrl}/v1`,
+                              apiKey: "test",
+                              api: "openai-responses",
+                              models: [
+                                {
+                                  id: "gpt-image-1",
+                                  name: "gpt-image-1",
+                                  api: "openai-responses",
+                                  reasoning: false,
+                                  input: ["text"],
+                                  cost: {
+                                    input: 0,
+                                    output: 0,
+                                    cacheRead: 0,
+                                    cacheWrite: 0,
+                                  },
+                                  contextWindow: 128_000,
+                                  maxTokens: 4096,
+                                },
+                              ],
+                            },
+                          },
+                        },
+                        agents: {
+                          defaults: {
+                            imageGenerationModel: {
+                              primary: "openai/gpt-image-1",
+                            },
+                          },
+                        },
+                      }
+                    : {
+                        agents: {
+                          defaults: {
+                            imageGenerationModel: {
+                              primary: imageModelRef,
+                            },
+                          },
+                        },
                      },
-                    },
-                  },
-                },
              });
              await waitForGatewayHealthy(env);
              const sessionKey = await createSession(env, "Image generation");
@@ -1316,9 +1448,10 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I
                liveTurnTimeoutMs(env, 45_000),
              );
              if (env.mock) {
+                const mockBaseUrl = env.mock.baseUrl;
                const requests = await fetchJson<
                  Array<{ allInputText?: string; plannedToolName?: string; toolOutput?: string }>
-                >(`${env.mock.baseUrl}/debug/requests`);
+                >(`${mockBaseUrl}/debug/requests`);
                const imageRequest = requests.find((request) =>
                  String(request.allInputText ?? "").includes("Image generation check"),
                );
@@ -1327,19 +1460,25 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I
                    `expected image_generate, got ${String(imageRequest?.plannedToolName ?? "")}`,
                  );
                }
-                const toolOutputRequest = requests.find((request) =>
-                  String(request.toolOutput ?? "").includes(
-                    `Generated 1 image with ${imageModelRef}.`,
-                  ),
-                );
-                if (!toolOutputRequest) {
-                  throw new Error("missing mock image generation tool output");
-                }
-                const mediaPath = /MEDIA:([^\n]+)/.exec(outbound.text)?.[1]?.trim();
-                if (!mediaPath) {
-                  throw new Error("missing MEDIA path in image generation tool output");
-                }
-                await fs.access(mediaPath);
+                const generated = await waitForCondition(
+                  async () => {
+                    const requests = await fetchJson<Array<{ prompt?: string; model?: string }>>(
+                      `${mockBaseUrl}/debug/image-generations`,
+                    );
+                    return requests.find(
+                      (request) =>
+                        request.model === "gpt-image-1" &&
+                        String(request.prompt ?? "").includes("QA lighthouse"),
+                    );
+                  },
+                  15_000,
+                  250,
+                ).catch((error) => {
+                  throw new Error(
+                    `image provider was never invoked: ${error instanceof Error ? error.message : String(error)}; toolOutput=${String(imageRequest.toolOutput ?? "")}`,
+                  );
+                });
+                return `${outbound.text}\nIMAGE_PROMPT:${generated.prompt ?? ""}`;
              }
              return outbound.text;
            },
@@ -1349,91 +1488,80 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I
    [
      "config-patch-hot-apply",
      async () =>
-        await runScenario("Config patch hot apply", [
+        await runScenario("Config patch skill disable", [
          {
-            name: "updates mention routing without restart",
+            name: "disables a workspace skill after config.patch restart",
            run: async () => {
-              const original = await readConfigSnapshot(env);
-              await patchConfig({
+              await writeWorkspaceSkill({
+                env,
+                name: "qa-hot-disable-skill",
+                body: `---
+name: qa-hot-disable-skill
+description: Hot disable QA marker
+---
+When the user asks for the hot disable marker exactly, reply with exactly: HOT-PATCH-DISABLED-OK`,
+              });
+              await waitForCondition(
+                async () => {
+                  const skills = await readSkillStatus(env);
+                  return findSkill(skills, "qa-hot-disable-skill")?.eligible ? true : undefined;
+                },
+                15_000,
+                200,
+              ).catch((error) => {
+                throw new Error(
+                  `hot-disable skill never became eligible: ${error instanceof Error ? error.message : String(error)}`,
+                );
+              });
+              const beforeSkills = await readSkillStatus(env);
+              const beforeSkill = findSkill(beforeSkills, "qa-hot-disable-skill");
+              if (!beforeSkill?.eligible || beforeSkill.disabled) {
+                throw new Error(`unexpected pre-patch skill state: ${JSON.stringify(beforeSkill)}`);
+              }
+              const patchResult = (await patchConfig({
                env,
                patch: {
-                  messages: {
-                    groupChat: {
-                      mentionPatterns: ["\\bgoldenbot\\b"],
+                  skills: {
+                    entries: {
+                      "qa-hot-disable-skill": {
+                        enabled: false,
+                      },
                    },
                  },
                },
+              })) as {
+                restart?: {
+                  coalesced?: boolean;
+                  delayMs?: number;
+                };
+              };
+              await waitForQaChannelReady(env, 60_000).catch((error) => {
+                throw new Error(
+                  `qa-channel never returned ready after config.patch: ${
+                    error instanceof Error ? error.message : String(error)
+                  }`,
+                );
              });
-              await waitForGatewayHealthy(env);
-              try {
-                await reset();
-                const requestsBeforeIgnored = env.mock
-                  ? await fetchJson<Array<{ allInputText?: string }>>(
-                      `${env.mock.baseUrl}/debug/requests`,
-                    )
-                  : null;
-                state.addInboundMessage({
-                  conversation: { id: "qa-room", kind: "channel", title: "QA Room" },
-                  senderId: "alice",
-                  senderName: "Alice",
-                  text: "@openclaw you should now be ignored",
-                });
-                await waitForCondition(
-                  async () => {
-                    if (!env.mock) {
-                      return (await waitForNoOutbound(state), true);
-                    }
-                    const requests = await fetchJson<Array<{ allInputText?: string }>>(
-                      `${env.mock.baseUrl}/debug/requests`,
-                    );
-                    const ignoredPromptReachedAgent = requests.some((request) =>
-                      String(request.allInputText ?? "").includes(
-                        "@openclaw you should now be ignored",
-                      ),
-                    );
-                    if (ignoredPromptReachedAgent) {
-                      throw new Error("ignored channel mention still reached the agent");
-                    }
-                    return requests.length === requestsBeforeIgnored?.length ? true : undefined;
-                  },
-                  3_000,
-                  100,
+              await waitForCondition(
+                async () => {
+                  const skills = await readSkillStatus(env);
+                  return findSkill(skills, "qa-hot-disable-skill")?.disabled ? true : undefined;
+                },
+                15_000,
+                200,
+              ).catch((error) => {
+                throw new Error(
+                  `hot-disable skill never flipped to disabled: ${
+                    error instanceof Error ? error.message : String(error)
+                  }`,
                );
-                state.addInboundMessage({
-                  conversation: { id: "qa-room", kind: "channel", title: "QA Room" },
-                  senderId: "alice",
-                  senderName: "Alice",
-                  text: "goldenbot explain hot config apply",
-                });
-                const outbound = await waitForOutboundMessage(
-                  state,
-                  (candidate) => candidate.conversation.id === "qa-room",
-                  liveTurnTimeoutMs(env, 30_000),
-                );
-                if (env.mock) {
-                  const requests = await fetchJson<Array<{ allInputText?: string }>>(
-                    `${env.mock.baseUrl}/debug/requests`,
-                  );
-                  if (
-                    !requests.some((request) =>
-                      String(request.allInputText ?? "").includes(
-                        "goldenbot explain hot config apply",
-                      ),
-                    )
-                  ) {
-                    throw new Error(
-                      "goldenbot follow-up did not reach the agent after config patch",
-                    );
-                  }
-                }
-                return outbound.text;
-              } finally {
-                await applyConfig({
-                  env,
-                  nextConfig: original.config,
-                });
-                await waitForGatewayHealthy(env);
+              });
+              const afterSkills = await readSkillStatus(env);
+              const afterSkill = findSkill(afterSkills, "qa-hot-disable-skill");
+              if (!afterSkill?.disabled) {
+                throw new Error(`unexpected post-patch skill state: ${JSON.stringify(afterSkill)}`);
              }
+              return `restartDelayMs=${String(patchResult.restart?.delayMs ?? "")}\npre=${JSON.stringify(beforeSkill)}\npost=${JSON.stringify(afterSkill)}`;
            },
          },
        ]),
@@ -1446,7 +1574,15 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I
            name: "restarts cleanly and posts the restart sentinel back into qa-channel",
            run: async () => {
              await reset();
-              const sessionKey = "agent:qa:restart-wakeup";
+              const sessionKey = buildAgentSessionKey({
+                agentId: "qa",
+                channel: "qa-channel",
+                peer: {
+                  kind: "channel",
+                  id: "qa-room",
+                },
+              });
+              await createSession(env, "Restart wake-up", sessionKey);
              await runAgentPrompt(env, {
                sessionKey,
                to: "channel:qa-room",
@@ -1471,14 +1607,32 @@ When the user asks for the hot install marker exactly, reply with exactly: HOT-I
                sessionKey,
                note: wakeMarker,
              });
-              await waitForGatewayHealthy(env, 60_000);
+              await waitForGatewayHealthy(env, 60_000).catch((error) => {
+                throw new Error(
+                  `gateway never returned healthy after config.apply: ${
+                    error instanceof Error ? error.message : String(error)
+                  }`,
+                );
+              });
+              await waitForQaChannelReady(env, 60_000).catch((error) => {
+                throw new Error(
+                  `qa-channel never returned ready after config.apply: ${
+                    error instanceof Error ? error.message : String(error)
+                  }`,
+                );
+              });
              const outbound = await waitForOutboundMessage(
                state,
-                (candidate) =>
-                  candidate.conversation.id === "qa-room" && candidate.text.includes(wakeMarker),
+                (candidate) => candidate.text.includes(wakeMarker),
                60_000,
-              );
-              return outbound.text;
+              ).catch((error) => {
+                throw new Error(
+                  `restart sentinel never appeared: ${
+                    error instanceof Error ? error.message : String(error)
+                  }; outbound=${recentOutboundSummary(state)}`,
+                );
+              });
+              return `${outbound.conversation.id}: ${outbound.text}`;
            },
          },
        ]),
--- a/qa/new-scenarios-2026-04.md
+++ b/qa/new-scenarios-2026-04.md
@@ -100,16 +100,17 @@ Ten repo-grounded candidate scenarios to add after the current seed suite.
 - Docs: `docs/tools/image-generation.md`, `docs/providers/openai.md`
 - Code: `src/agents/openclaw-tools.image-generation.test.ts`, `src/image-generation/runtime.ts`

-## 8. Hot config patch without restart
+## 8. Config patch skill disable

- Goal: verify a safe config edit hot-applies and changes behavior immediately.
+- Goal: verify `config.patch` can disable a workspace skill and the restarted gateway exposes the disabled state cleanly.
 - Flow:
-  - Use `config.patch` to change a hot-reloadable field such as agent skill visibility or message behavior.
-  - Retry the task in the same gateway lifetime.
+  - Add a workspace skill and verify it is eligible.
+  - Use `config.patch` to disable that skill.
+  - Wait for the gateway restart and read `skills.status` again.
 - Pass:
  - Patch succeeds.
-  - No disruptive restart loop.
-  - New behavior is live immediately.
+  - Gateway restarts cleanly.
+  - The skill flips from eligible to disabled.
 - Docs: `docs/gateway/configuration.md`, `docs/gateway/protocol.md`
 - Code: `docs/gateway/configuration.md`, `docs/web/control-ui.md`

@@ -147,4 +148,4 @@ If we only promote three right away:

 1. On-demand memory tools in channel context
 2. Native image generation
-3. Hot config patch without restart
+3. Config patch skill disable
--- a/qa/seed-scenarios.json
+++ b/qa/seed-scenarios.json
@@ -232,13 +232,13 @@
  },
  {
    "id": "config-patch-hot-apply",
-    "title": "Config patch hot apply",
+    "title": "Config patch skill disable",
    "surface": "config",
-    "objective": "Verify a hot-reloadable config.patch takes effect immediately without a disruptive restart.",
+    "objective": "Verify config.patch can disable a workspace skill and the restarted gateway exposes the new disabled state cleanly.",
    "successCriteria": [
-      "config.patch succeeds with no restart dependency.",
-      "Old mention routing behavior stops working immediately.",
-      "New mention routing behavior works in the same gateway lifetime."
+      "config.patch succeeds for the skill toggle change.",
+      "A workspace skill works before the patch.",
+      "The same skill is reported disabled after the restart triggered by the patch."
    ],
    "docsRefs": ["docs/gateway/configuration.md", "docs/gateway/protocol.md"],
    "codeRefs": ["src/gateway/server-methods/config.ts", "extensions/qa-lab/src/suite.ts"]