From fad1c8a0711e1c06a5d70046b8cae4b732274ed3 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Fri, 22 May 2026 02:06:32 +0800 Subject: [PATCH] test(qa-lab): add long-context watchdog scenario --- CHANGELOG.md | 1 + .../qa-lab/src/scenario-catalog.test.ts | 10 ++ .../runtime/long-context-progress-watchdog.md | 156 ++++++++++++++++++ 3 files changed, 167 insertions(+) create mode 100644 qa/scenarios/runtime/long-context-progress-watchdog.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e3a93fe53c3..88e3558e5370 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ Docs: https://docs.openclaw.ai - Agents/tools: remove the old sender-owner tool gating path so configured tools stay visible for trusted sessions while command and channel-action auth still carry real sender identity. - QA-Lab: add curated mock JSONL replay fixtures and first-drift reporting for runtime-parity audits. (#80323, refs #80176) Thanks @100yenadmin. - QA-Lab: include the optional 100-turn runtime parity soak in release-soak artifacts so long-run Codex/Pi transcript drift stays visible outside the default gate. (#80395) Thanks @100yenadmin. +- QA-Lab: add a live-only long-context progress watchdog scenario for Codex app-server timeout and stalled-run sentinels. (#80323) Thanks @100yenadmin. - QA-Lab: add a personal-agent failure recovery scenario that checks honest partial status, retry boundaries, and local recovery artifacts. (#83872) Thanks @iFiras-Max1. - QA-Lab: include an opt-in `update.run` package self-upgrade sentinel for destructive latest-package recovery checks. - QA-Lab: add Codex plugin lifecycle and auth-profile fixture coverage for missing installs, pinned-version drift, first-turn install ordering, and doctor migration safety. (#80323, refs #80174) Thanks @100yenadmin. diff --git a/extensions/qa-lab/src/scenario-catalog.test.ts b/extensions/qa-lab/src/scenario-catalog.test.ts index 2b16dcf9bcf2..7f6b21bf8c2f 100644 --- a/extensions/qa-lab/src/scenario-catalog.test.ts +++ b/extensions/qa-lab/src/scenario-catalog.test.ts @@ -177,6 +177,7 @@ describe("qa scenario catalog", () => { "plugin-hook-health-sentinel", "plugin-manifest-contract-health", "webchat-direct-reply-routing", + "long-context-progress-watchdog", ]; for (const scenarioId of scenarioIds) { @@ -188,6 +189,15 @@ describe("qa scenario catalog", () => { expect(readQaScenarioById("webchat-direct-reply-routing").sourcePath).toBe( "qa/scenarios/channels/webchat-direct-reply-routing.md", ); + expect(readQaScenarioById("long-context-progress-watchdog").sourcePath).toBe( + "qa/scenarios/runtime/long-context-progress-watchdog.md", + ); + expect(readQaScenarioExecutionConfig("long-context-progress-watchdog")).toMatchObject({ + requiredProviderMode: "live-frontier", + harnessRuntime: "codex", + }); + expect(readQaScenarioById("long-context-progress-watchdog").plugins).toBeUndefined(); + expect(readQaScenarioById("long-context-progress-watchdog").gatewayConfigPatch).toBeUndefined(); }); it("loads the opt-in update.run package self-upgrade sentinel", () => { diff --git a/qa/scenarios/runtime/long-context-progress-watchdog.md b/qa/scenarios/runtime/long-context-progress-watchdog.md new file mode 100644 index 000000000000..857e6f970378 --- /dev/null +++ b/qa/scenarios/runtime/long-context-progress-watchdog.md @@ -0,0 +1,156 @@ +# Long-context progress watchdog + +```yaml qa-scenario +id: long-context-progress-watchdog +title: Long-context progress watchdog +surface: runtime +runtimeParityTier: live-only +coverage: + primary: + - runtime.gateway-log-sentinel.codex-progress + secondary: + - runtime.long-context + - runtime.codex-app-server +objective: Fail live proof when long-context activity triggers Codex app-server timeout or stalled-progress sentinels. +successCriteria: + - Gateway config routes the selected QA model through the Codex app-server runtime. + - Agent reads through the seeded long-context fixture and replies with the marker found at the tail. + - Gateway logs since the scenario cursor contain no app-server timeout or stalled-progress sentinel. +docsRefs: + - docs/concepts/qa-e2e-automation.md + - qa/scenarios/index.md +codeRefs: + - extensions/qa-lab/src/gateway-log-sentinel.ts + - extensions/codex/src/app-server +execution: + kind: flow + summary: Seed a large workspace fixture, complete a read turn, and scan for Codex app-server progress failures. + config: + requiredProviderMode: live-frontier + harnessRuntime: codex + fixtureFile: LONG_CONTEXT_SENTINEL_FIXTURE.txt + expectedMarker: LONG-CONTEXT-WATCHDOG-OK + repeatCount: 2000 +``` + +```yaml qa-flow +steps: + - name: catches app-server timeout or stalled progress during long-context activity + actions: + - call: waitForGatewayHealthy + args: + - ref: env + - 60000 + - call: waitForQaChannelReady + args: + - ref: env + - 60000 + - call: readConfigSnapshot + saveAs: originalSnapshot + args: + - ref: env + - set: originalModelEntry + value: + expr: originalSnapshot.config.agents?.defaults?.models?.[env.primaryModel] + - set: originalPluginAllow + value: + expr: originalSnapshot.config.plugins?.allow + - set: originalCodexPluginEntry + value: + expr: originalSnapshot.config.plugins?.entries?.codex + - try: + actions: + - call: patchConfig + args: + - env: + ref: env + patch: + plugins: + allow: + expr: "Array.from(new Set([...(Array.isArray(originalPluginAllow) ? originalPluginAllow : []), 'codex']))" + entries: + codex: + expr: "({ ...((originalCodexPluginEntry && typeof originalCodexPluginEntry === 'object') ? originalCodexPluginEntry : {}), enabled: true })" + agents: + defaults: + models: + expr: "({ [env.primaryModel]: { agentRuntime: { id: config.harnessRuntime } } })" + - call: waitForGatewayHealthy + args: + - ref: env + - 60000 + - call: waitForQaChannelReady + args: + - ref: env + - 60000 + - call: readConfigSnapshot + saveAs: snapshot + args: + - ref: env + - assert: + expr: "snapshot.config.agents?.defaults?.models?.[env.primaryModel]?.agentRuntime?.id === config.harnessRuntime" + message: + expr: "`expected ${env.primaryModel} agentRuntime.id=${config.harnessRuntime}, got ${JSON.stringify(snapshot.config.agents?.defaults?.models?.[env.primaryModel]?.agentRuntime)}`" + - call: reset + - set: logCursor + value: + expr: markGatewayLogCursor() + - set: fixturePath + value: + expr: "path.join(env.gateway.workspaceDir, config.fixtureFile)" + - call: fs.writeFile + args: + - ref: fixturePath + - expr: "`START LONG-CONTEXT-WATCHDOG\\n${Array.from({ length: config.repeatCount }, (_entry, index) => `context row ${index + 1}: alpha beta gamma`).join('\\n')}\\nTAIL ${config.expectedMarker}\\n`" + - utf8 + - set: startIndex + value: + expr: state.getSnapshot().messages.length + - call: runAgentPrompt + args: + - ref: env + - sessionKey: + expr: "`agent:qa:long-context-watchdog:${randomUUID().slice(0, 8)}`" + message: + expr: "`Read ${fixturePath}, find the marker on the TAIL line, and reply with that marker only.`" + timeoutMs: + expr: liveTurnTimeoutMs(env, 90000) + - call: waitForOutboundMessage + saveAs: outbound + args: + - ref: state + - lambda: + params: [candidate] + expr: "candidate.conversation.id === 'qa-operator' && normalizeLowercaseStringOrEmpty(candidate.text).includes(normalizeLowercaseStringOrEmpty(config.expectedMarker))" + - expr: liveTurnTimeoutMs(env, 45000) + - sinceIndex: + ref: startIndex + - call: assertNoGatewayLogSentinels + args: + - since: + ref: logCursor + kinds: + - codex-app-server-timeout + - stalled-agent-run + finally: + - call: patchConfig + args: + - env: + ref: env + patch: + plugins: + allow: + expr: "originalPluginAllow === undefined ? null : originalPluginAllow" + entries: + codex: + expr: "originalCodexPluginEntry === undefined ? null : { ...originalCodexPluginEntry, enabled: originalCodexPluginEntry.enabled === undefined ? null : originalCodexPluginEntry.enabled }" + agents: + defaults: + models: + expr: "({ [env.primaryModel]: originalModelEntry === undefined ? null : { ...originalModelEntry, agentRuntime: originalModelEntry.agentRuntime === undefined ? null : originalModelEntry.agentRuntime } })" + - call: waitForGatewayHealthy + args: + - ref: env + - 60000 + detailsExpr: outbound.text +```