diff --git a/.github/workflows/mantis-slack-desktop-smoke.yml b/.github/workflows/mantis-slack-desktop-smoke.yml index 855c67aa4844..0f8927049d7b 100644 --- a/.github/workflows/mantis-slack-desktop-smoke.yml +++ b/.github/workflows/mantis-slack-desktop-smoke.yml @@ -17,6 +17,11 @@ on: required: true default: slack-canary type: string + approval_checkpoints: + description: Run native Slack approval checkpoint mode instead of gateway setup + required: false + default: false + type: boolean keep_vm: description: Keep the desktop lease open after a passing run required: false @@ -30,6 +35,14 @@ on: options: - aws - hetzner + crabbox_market: + description: Crabbox capacity market for AWS leases + required: false + default: on-demand + type: choice + options: + - on-demand + - spot crabbox_lease_id: description: Optional existing Crabbox desktop/browser lease id or slug to reuse required: false @@ -227,9 +240,11 @@ jobs: CRABBOX_ACCESS_CLIENT_SECRET: ${{ secrets.CRABBOX_ACCESS_CLIENT_SECRET }} CRABBOX_LEASE_ID: ${{ inputs.crabbox_lease_id }} CRABBOX_PROVIDER: ${{ inputs.crabbox_provider }} + CRABBOX_MARKET: ${{ inputs.crabbox_market }} KEEP_VM: ${{ inputs.keep_vm }} HYDRATE_MODE: ${{ inputs.hydrate_mode }} SCENARIO_ID: ${{ inputs.scenario_id }} + APPROVAL_CHECKPOINTS: ${{ inputs.approval_checkpoints }} shell: bash run: | set -euo pipefail @@ -250,6 +265,15 @@ jobs: require_var OPENCLAW_QA_CONVEX_SITE_URL require_var OPENCLAW_QA_CONVEX_SECRET_CI require_var CRABBOX_COORDINATOR_TOKEN + if [[ -z "${CRABBOX_LEASE_ID:-}" && "$CRABBOX_PROVIDER" == "aws" ]]; then + runner_ip="$(curl -fsS https://checkip.amazonaws.com | tr -d '[:space:]')" + if [[ -z "$runner_ip" ]]; then + echo "Could not resolve GitHub runner public IPv4 for AWS SSH ingress." >&2 + exit 1 + fi + export CRABBOX_AWS_SSH_CIDRS="${runner_ip}/32" + echo "Using AWS SSH CIDR ${CRABBOX_AWS_SSH_CIDRS}" + fi candidate_repo="$(pwd)/.artifacts/qa-e2e/mantis/slack-desktop-smoke-worktrees/candidate" output_rel=".artifacts/qa-e2e/mantis/slack-desktop-smoke" @@ -265,6 +289,22 @@ jobs: else keep_args=(--no-keep-lease) fi + market_args=() + if [[ -n "${CRABBOX_MARKET:-}" ]]; then + market_args=(--market "$CRABBOX_MARKET") + fi + gateway_args=(--gateway-setup) + approval_args=() + scenario_args=(--scenario "$SCENARIO_ID") + scenario_label="$SCENARIO_ID" + if [[ "$APPROVAL_CHECKPOINTS" == "true" ]]; then + approval_args=(--approval-checkpoints) + gateway_args=() + if [[ -z "${SCENARIO_ID:-}" || "$SCENARIO_ID" == "slack-canary" || "$SCENARIO_ID" == "approval-checkpoints" ]]; then + scenario_args=() + scenario_label="approval-checkpoints" + fi + fi set +e pnpm openclaw qa mantis slack-desktop-smoke \ @@ -274,7 +314,7 @@ jobs: --class standard \ --idle-timeout 45m \ --ttl 120m \ - --gateway-setup \ + "${gateway_args[@]}" \ --credential-source convex \ --credential-role ci \ --provider-mode live-frontier \ @@ -282,7 +322,9 @@ jobs: --model openai/gpt-5.5 \ --alt-model openai/gpt-5.5 \ --fast \ - --scenario "$SCENARIO_ID" \ + "${scenario_args[@]}" \ + "${approval_args[@]}" \ + "${market_args[@]}" \ "${keep_args[@]}" \ "${lease_args[@]}" mantis_exit=$? @@ -312,27 +354,81 @@ jobs: status="$(jq -r '.status' "$root/mantis-slack-desktop-smoke-summary.json")" screenshot_required=false + desktop_capture_inline=true if [[ "$status" == "pass" ]]; then screenshot_required=true fi + evidence_summary="Mantis ran Slack QA inside a Crabbox Linux VNC desktop, started an OpenClaw Slack gateway in that VM, opened Slack Web in the visible browser, and captured screenshot/video evidence." + expected_result="Slack QA and VM gateway setup pass" + checkpoint_artifacts='[]' + checkpoint_required=false + if [[ "$APPROVAL_CHECKPOINTS" == "true" ]]; then + evidence_summary="Mantis ran Slack native approval QA inside a Crabbox Linux VNC desktop, rendered pending/resolved approval checkpoints from the Slack API messages, and stored Slack QA artifacts." + expected_result="Slack native exec and plugin approval checkpoints pass" + screenshot_required=false + desktop_capture_inline=false + if [[ "$status" == "pass" ]]; then + checkpoint_required=true + fi + checkpoint_scenarios=() + if [[ "$scenario_label" == "approval-checkpoints" ]]; then + checkpoint_scenarios=("slack-approval-exec-native" "slack-approval-plugin-native") + else + checkpoint_scenarios=("$scenario_label") + fi + checkpoint_scenarios_json="$(printf '%s\n' "${checkpoint_scenarios[@]}" | jq -R . | jq -s .)" + checkpoint_artifacts="$( + jq -n \ + --argjson checkpoint_required "$checkpoint_required" \ + --argjson scenario_ids "$checkpoint_scenarios_json" \ + ' + def scenario_kind($id): + if $id == "slack-approval-exec-native" then "exec" + elif $id == "slack-approval-plugin-native" then "plugin" + else error("unsupported approval checkpoint scenario: \($id)") + end; + def scenario_title($id): + if scenario_kind($id) == "exec" then "Exec" else "Plugin" end; + [ + $scenario_ids[] as $id + | ["pending", "resolved"][] as $state + | { + kind: "desktopScreenshot", + lane: "candidate", + label: "\(scenario_title($id)) approval \($state) checkpoint", + path: "approval-checkpoints/\($id)-\($state).png", + targetPath: "approval-checkpoints/\($id)-\($state).png", + alt: "Rendered Slack \(scenario_kind($id)) approval \($state) checkpoint", + width: 720, + inline: true, + required: $checkpoint_required + } + ] + ' + )" + fi jq -n \ --arg status "$status" \ --arg candidate_sha "${{ needs.validate_ref.outputs.candidate_revision }}" \ - --arg scenario "$SCENARIO_ID" \ + --arg scenario "$scenario_label" \ + --arg summary "$evidence_summary" \ + --arg expected "$expected_result" \ + --argjson checkpoint_artifacts "$checkpoint_artifacts" \ --argjson screenshot_required "$screenshot_required" \ + --argjson desktop_capture_inline "$desktop_capture_inline" \ '{ schemaVersion: 1, id: "slack-desktop-smoke", title: "Mantis Slack Desktop Smoke QA", - summary: "Mantis ran Slack QA inside a Crabbox Linux VNC desktop, started an OpenClaw Slack gateway in that VM, opened Slack Web in the visible browser, and captured screenshot/video evidence.", + summary: $summary, scenario: $scenario, comparison: { - candidate: { sha: $candidate_sha, expected: "Slack QA and VM gateway setup pass", status: $status, fixed: ($status == "pass") }, + candidate: { sha: $candidate_sha, expected: $expected, status: $status, fixed: ($status == "pass") }, pass: ($status == "pass") }, - artifacts: [ - { kind: "desktopScreenshot", lane: "candidate", label: "Slack desktop/VNC browser", path: "slack-desktop-smoke.png", targetPath: "slack-desktop.png", alt: "Slack Web desktop screenshot from the Mantis VM", width: 720, inline: true, required: $screenshot_required }, - { kind: "motionPreview", lane: "candidate", label: "Slack motion preview", path: "slack-desktop-smoke-preview.gif", targetPath: "slack-desktop-preview.gif", alt: "Animated Slack desktop preview", width: 720, inline: true, required: false }, + artifacts: ([ + { kind: "desktopScreenshot", lane: "candidate", label: "Slack desktop/VNC browser", path: "slack-desktop-smoke.png", targetPath: "slack-desktop.png", alt: "Slack Web desktop screenshot from the Mantis VM", width: 720, inline: $desktop_capture_inline, required: $screenshot_required }, + { kind: "motionPreview", lane: "candidate", label: "Slack motion preview", path: "slack-desktop-smoke-preview.gif", targetPath: "slack-desktop-preview.gif", alt: "Animated Slack desktop preview", width: 720, inline: $desktop_capture_inline, required: false }, { kind: "motionClip", lane: "candidate", label: "Slack change MP4", path: "slack-desktop-smoke-change.mp4", targetPath: "slack-desktop-change.mp4", required: false }, { kind: "fullVideo", lane: "candidate", label: "Slack desktop MP4", path: "slack-desktop-smoke.mp4", targetPath: "slack-desktop.mp4", required: false }, { kind: "metadata", lane: "run", label: "Slack desktop summary", path: "mantis-slack-desktop-smoke-summary.json", targetPath: "summary.json" }, @@ -340,7 +436,7 @@ jobs: { kind: "metadata", lane: "run", label: "Slack command log", path: "slack-desktop-command.log", targetPath: "slack-desktop-command.log", required: false }, { kind: "metadata", lane: "run", label: "Slack preview metadata", path: "slack-desktop-smoke-preview.json", targetPath: "slack-desktop-preview.json", required: false }, { kind: "metadata", lane: "run", label: "Slack error", path: "error.txt", targetPath: "error.txt", required: false } - ] + ] + $checkpoint_artifacts) }' > "$root/mantis-evidence.json" cat "$root/mantis-slack-desktop-smoke-report.md" >> "$GITHUB_STEP_SUMMARY" diff --git a/docs/concepts/mantis-slack-desktop-runbook.md b/docs/concepts/mantis-slack-desktop-runbook.md index d699eb802eee..204e2984fa55 100644 --- a/docs/concepts/mantis-slack-desktop-runbook.md +++ b/docs/concepts/mantis-slack-desktop-runbook.md @@ -116,6 +116,34 @@ Use `--hydrate-mode prehydrated` only when the reused remote workspace already has `node_modules` and a built `dist/`. Mantis fails closed if those are missing. +Prove native Slack approval UI: + +```bash +pnpm openclaw qa mantis slack-desktop-smoke \ + --provider aws \ + --class standard \ + --approval-checkpoints \ + --credential-source convex \ + --credential-role maintainer \ + --hydrate-mode source +``` + +Approval checkpoint mode is mutually exclusive with `--gateway-setup`. It runs +the opt-in `slack-approval-exec-native` and `slack-approval-plugin-native` +scenarios unless you pass explicit approval checkpoint `--scenario` flags; other +Slack scenarios are rejected before the VM starts. The Slack QA runner writes +each checkpoint JSON file from the real Slack API message it observed, then the +remote watcher renders that message snapshot into +`approval-checkpoints/-pending.png` and +`approval-checkpoints/-resolved.png`. The run fails if any checkpoint +JSON, message evidence, ack JSON, or rendered screenshot is missing or empty. + +Cold GitHub Actions leases do not have Slack Web cookies, so their browser +capture can land on Slack sign-in. For approval checkpoint proof, trust the +rendered checkpoint images and Slack QA artifacts rather than +`slack-desktop-smoke.png`. Use a kept warm lease with a manually logged-in Slack +Web profile only when the browser screenshot itself must show Slack Web. + ## Hydrate modes | Mode | Use when | Remote behavior | Tradeoff | @@ -139,9 +167,9 @@ uses `/var/cache/crabbox/pnpm` when present. - `artifacts.copy`: rsync back from the VM. `crabbox.remote_run` can be marked `accepted` when Crabbox returns a non-zero -remote status after Mantis has copied metadata proving that the OpenClaw gateway -is alive and the setup completed. Treat `accepted` as pass-with-explanation, -not a failed scenario. +remote status after Mantis has copied metadata proving that either the OpenClaw +gateway setup completed or the Slack QA command itself exited successfully. +Treat `accepted` as pass-with-explanation, not a failed scenario. If the run is slow: @@ -159,7 +187,8 @@ A good PR comment should show: - scenario id and candidate SHA; - GitHub Actions run URL; - artifact URL; -- inline screenshot; +- inline approval checkpoint screenshot, or a Slack Web screenshot from a + logged-in warm lease; - inline animated preview when available; - full MP4 and trimmed MP4 links; - pass/fail status; diff --git a/docs/concepts/mantis.md b/docs/concepts/mantis.md index 103c38570b50..825184a86f55 100644 --- a/docs/concepts/mantis.md +++ b/docs/concepts/mantis.md @@ -202,6 +202,10 @@ Useful Slack desktop flags: - `--credential-source convex --credential-role ci` uses the shared credential pool instead of direct Slack env tokens. - `--provider-mode`, `--model`, `--alt-model`, and `--fast` pass through to the Slack live lane. +Approval checkpoint runs render Slack API message snapshots into checkpoint PNGs +for CI-safe visual proof. `slack-desktop-smoke.png` is only proof of Slack Web +when the lease uses a warm browser profile that is already logged in. + The GitHub smoke workflow is `Mantis Discord Smoke`. The before and after GitHub workflow for the first real scenario is `Mantis Discord Status Reactions`. It accepts: diff --git a/docs/concepts/qa-e2e-automation.md b/docs/concepts/qa-e2e-automation.md index f3354c17325c..125c3689f812 100644 --- a/docs/concepts/qa-e2e-automation.md +++ b/docs/concepts/qa-e2e-automation.md @@ -165,6 +165,25 @@ With `--gateway-setup`, Mantis leaves a persistent OpenClaw Slack gateway running inside the VM on port `38973`; without it, the command runs the normal bot-to-bot Slack QA lane and exits after artifact capture. +To prove native Slack approval UI with desktop evidence, run the Mantis approval +checkpoint mode: + +```bash +pnpm openclaw qa mantis slack-desktop-smoke \ + --approval-checkpoints \ + --credential-source convex \ + --credential-role maintainer +``` + +This mode is mutually exclusive with `--gateway-setup`. It runs the Slack +approval scenarios, rejects non-approval scenario ids, waits at each pending and +resolved approval state, renders the observed Slack API message into +`approval-checkpoints/-pending.png` and +`approval-checkpoints/-resolved.png`, then fails if any checkpoint, +message evidence, acknowledgement, or rendered screenshot is missing or empty. +Cold CI leases may still show Slack sign-in in `slack-desktop-smoke.png`; the +approval checkpoint images are the visual proof for this lane. + The operator checklist, GitHub workflow dispatch command, evidence-comment contract, hydrate-mode decision table, timing interpretation, and failure handling steps live in [Mantis Slack Desktop Runbook](/concepts/mantis-slack-desktop-runbook). @@ -400,8 +419,13 @@ Required env when `--credential-source env`: Optional: - `OPENCLAW_QA_SLACK_CAPTURE_CONTENT=1` keeps message bodies in observed-message artifacts. +- `OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_DIR` enables visual approval + checkpoints for Mantis. The runner writes `.pending.json` and + `.resolved.json`, then waits for matching `.ack.json` files. +- `OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_TIMEOUT_MS` overrides the checkpoint + acknowledgement timeout. The default is `120000`. -Scenarios (`extensions/qa-lab/src/live-transports/slack/slack-live.runtime.ts:39`): +Scenarios (`extensions/qa-lab/src/live-transports/slack/slack-live.runtime.ts`): - `slack-canary` - `slack-mention-gating` @@ -410,12 +434,22 @@ Scenarios (`extensions/qa-lab/src/live-transports/slack/slack-live.runtime.ts:39 - `slack-restart-resume` - `slack-thread-follow-up` - `slack-thread-isolation` +- `slack-approval-exec-native` - opt-in native Slack exec approval scenario. + Requests an exec approval through the gateway, verifies the Slack message has + native approval buttons, resolves it, and verifies the resolved Slack update. +- `slack-approval-plugin-native` - opt-in native Slack plugin approval scenario. + Enables exec and plugin approval forwarding together so plugin events are not + suppressed by exec approval routing, then verifies the same pending/resolved + native Slack UI path. Output artifacts: - `slack-qa-report.md` - `slack-qa-summary.json` - `slack-qa-observed-messages.json` - bodies redacted unless `OPENCLAW_QA_SLACK_CAPTURE_CONTENT=1`. +- `approval-checkpoints/` - only when Mantis sets + `OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_DIR`; contains checkpoint JSON, + acknowledgement JSON, and pending/resolved screenshots. #### Setting up the Slack workspace diff --git a/extensions/qa-lab/src/cli.test.ts b/extensions/qa-lab/src/cli.test.ts index 175867247577..dfe2d1056c14 100644 --- a/extensions/qa-lab/src/cli.test.ts +++ b/extensions/qa-lab/src/cli.test.ts @@ -337,10 +337,14 @@ describe("qa cli registration", () => { "/tmp/crabbox", "--provider", "hetzner", + "--market", + "on-demand", "--machine-class", "beast", "--lease-id", "cbx_123abc", + "--fresh-pr", + "openclaw/openclaw#85141", "--idle-timeout", "45m", "--ttl", @@ -369,11 +373,13 @@ describe("qa cli registration", () => { credentialRole: "maintainer", credentialSource: "env", fastMode: true, + freshPr: "openclaw/openclaw#85141", gatewaySetup: undefined, idleTimeout: "45m", keepLease: true, leaseId: "cbx_123abc", machineClass: "beast", + market: "on-demand", outputDir: ".artifacts/qa-e2e/mantis/slack-desktop", primaryModel: "openai/gpt-5.5", provider: "hetzner", diff --git a/extensions/qa-lab/src/live-transports/slack/slack-live.runtime.test.ts b/extensions/qa-lab/src/live-transports/slack/slack-live.runtime.test.ts index 10d533630a39..cc6a0d96b599 100644 --- a/extensions/qa-lab/src/live-transports/slack/slack-live.runtime.test.ts +++ b/extensions/qa-lab/src/live-transports/slack/slack-live.runtime.test.ts @@ -1,7 +1,7 @@ import fs from "node:fs/promises"; import { tmpdir } from "node:os"; import path from "node:path"; -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "vitest"; import { testing, runSlackQaLive } from "./slack-live.runtime.js"; describe("Slack live QA runtime helpers", () => { @@ -66,6 +66,239 @@ describe("Slack live QA runtime helpers", () => { ]); }); + it("selects native approval scenarios by id without changing standard coverage", () => { + expect( + testing + .findScenario(["slack-approval-exec-native", "slack-approval-plugin-native"]) + .map((scenario) => scenario.id), + ).toEqual(["slack-approval-exec-native", "slack-approval-plugin-native"]); + expect(testing.SLACK_QA_STANDARD_SCENARIO_IDS).not.toContain("slack-approval-exec-native"); + }); + + it("enables Slack native exec and plugin approval delivery for approval scenarios", () => { + const cfg = testing.buildSlackQaConfig( + {}, + { + channelId: "C123456789", + driverBotUserId: "U999999999", + overrides: { + approvals: { + exec: true, + plugin: true, + target: "channel", + }, + }, + sutAccountId: "sut", + sutAppToken: "xapp-sut", + sutBotToken: "xoxb-sut", + }, + ); + + expect(cfg.approvals?.exec).toEqual({ enabled: true, mode: "session" }); + expect(cfg.approvals?.plugin).toEqual({ enabled: true, mode: "session" }); + const account = cfg.channels?.slack?.accounts?.sut; + expect(account?.allowFrom).toEqual(["U999999999"]); + expect(account?.execApprovals).toEqual({ + enabled: true, + approvers: ["U999999999"], + target: "channel", + }); + expect(account?.channels?.C123456789?.users).toEqual(["U999999999"]); + }); + + it("extracts Slack native approval button values from blocks", () => { + expect( + testing.collectSlackActionValues([ + { + type: "actions", + elements: [ + { + type: "button", + text: { type: "plain_text", text: "Allow Once" }, + value: "/approve plugin:abc allow-once", + }, + ], + }, + ]), + ).toEqual(["/approve plugin:abc allow-once"]); + }); + + it("builds approval checkpoint message evidence from Slack blocks", () => { + expect( + testing.buildSlackApprovalCheckpointMessage({ + blocks: [ + { + type: "section", + text: { type: "mrkdwn", text: "Plugin approval required" }, + }, + { + type: "actions", + elements: [ + { + type: "button", + text: { type: "plain_text", text: "Allow Once" }, + value: "/approve plugin:abc allow-once", + }, + ], + }, + ], + text: "Plugin approval required", + }), + ).toEqual({ + actionLabels: ["Allow Once"], + blockText: ["Plugin approval required", "Allow Once"], + hasNativeActions: true, + text: "Plugin approval required", + }); + }); + + it("resolves Slack approval checkpoint configuration from env", () => { + expect( + testing.resolveSlackApprovalCheckpointConfig({ + OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_DIR: "/tmp/checkpoints", + OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_TIMEOUT_MS: "5000", + }), + ).toEqual({ + checkpointDir: "/tmp/checkpoints", + timeoutMs: 5000, + }); + expect(testing.resolveSlackApprovalCheckpointConfig({})).toBeUndefined(); + }); + + it("uses started Slack channel readiness for native approval-only scenarios", () => { + const startedStatus = { + lastError: null, + restartPending: false, + running: true, + }; + + expect(testing.isSlackChannelReadyForQa(startedStatus, "started")).toBe(true); + expect(testing.isSlackChannelReadyForQa(startedStatus, "connected")).toBe(false); + expect( + testing.isSlackChannelReadyForQa( + { + ...startedStatus, + connected: false, + }, + "started", + ), + ).toBe(false); + expect( + testing.isSlackChannelReadyForQa( + { + ...startedStatus, + lastError: "socket auth failed", + }, + "started", + ), + ).toBe(false); + }); + + it("keeps Slack readiness stability anchored when connectedAt is absent", () => { + expect( + testing.resolveSlackChannelReadySince({ + observedAt: 2_000, + previousReadySince: undefined, + status: { + lastError: null, + restartPending: false, + running: true, + }, + }), + ).toBe(2_000); + expect( + testing.resolveSlackChannelReadySince({ + observedAt: 3_000, + previousReadySince: 2_000, + status: { + lastError: null, + restartPending: false, + running: true, + }, + }), + ).toBe(2_000); + expect( + testing.resolveSlackChannelReadySince({ + observedAt: 4_000, + previousReadySince: 2_000, + status: { + lastConnectedAt: 3_500, + lastError: null, + restartPending: false, + running: true, + }, + }), + ).toBe(3_500); + }); + + it("allows live approval resolve RPCs to take longer than the generic gateway probe timeout", async () => { + const call = vi.fn(async () => ({ decision: "allow-once" })); + + await testing.resolveApprovalDecision({ + approvalId: "plugin:abc", + context: { + gateway: { call }, + } as never, + decision: "allow-once", + kind: "plugin", + }); + + expect(call).toHaveBeenCalledWith( + "plugin.approval.resolve", + { decision: "allow-once", id: "plugin:abc" }, + { + expectFinal: false, + timeoutMs: 35_000, + }, + ); + }); + + it("redacts approval artifact content and Slack metadata in summary-shaped results", () => { + expect( + testing.toSlackQaScenarioArtifactResults({ + includeContent: false, + redactMetadata: true, + scenarios: [ + { + approval: { + approvalId: "plugin:abc", + approvalKind: "plugin", + channelId: "C123456789", + decision: "allow-once", + pendingActionValues: ["/approve plugin:abc allow-once"], + pendingMessageTs: "1.000000", + pendingText: "Plugin approval required", + resolvedActionValues: [], + resolvedMessageTs: "1.000000", + resolvedText: "Plugin approval: Allowed once", + threadTs: "1.000000", + }, + details: "plugin approval resolved", + id: "slack-approval-plugin-native", + status: "pass", + title: "Slack native plugin approval prompt resolves with exec approvals enabled", + }, + ], + })[0]?.approval, + ).toEqual({ + approvalId: "", + approvalKind: "plugin", + channelId: undefined, + decision: "allow-once", + pendingActionValues: undefined, + pendingCheckpointPath: undefined, + pendingMessageTs: undefined, + pendingScreenshotPath: undefined, + pendingText: undefined, + resolvedActionValues: undefined, + resolvedCheckpointPath: undefined, + resolvedMessageTs: undefined, + resolvedScreenshotPath: undefined, + resolvedText: undefined, + threadTs: undefined, + }); + }); + it("ignores delayed unrelated SUT replies during mention-gating", async () => { const observedMessages: Array = []; await expect( diff --git a/extensions/qa-lab/src/live-transports/slack/slack-live.runtime.ts b/extensions/qa-lab/src/live-transports/slack/slack-live.runtime.ts index ca5364bd9a1f..16ca7616ce7a 100644 --- a/extensions/qa-lab/src/live-transports/slack/slack-live.runtime.ts +++ b/extensions/qa-lab/src/live-transports/slack/slack-live.runtime.ts @@ -37,18 +37,24 @@ type SlackChannelStatus = { connected?: boolean; lastConnectedAt?: number; lastDisconnect?: unknown; - lastError?: string; + lastError?: string | null; restartPending?: boolean; running?: boolean; }; +type SlackChannelReadinessMode = "connected" | "started"; + const SLACK_QA_READY_TIMEOUT_MS = 45_000; const SLACK_QA_READY_STABILITY_MS = 3_000; const SLACK_QA_GATEWAY_STOP_SETTLE_MS = 3_000; const SLACK_QA_RETRYABLE_SCENARIO_ATTEMPTS = 2; +const SLACK_QA_APPROVAL_DECISION_TIMEOUT_MS = 30_000; +const SLACK_QA_APPROVAL_CHECKPOINT_DEFAULT_TIMEOUT_MS = 120_000; type SlackQaScenarioId = | "slack-allowlist-block" + | "slack-approval-exec-native" + | "slack-approval-plugin-native" | "slack-canary" | "slack-mention-gating" | "slack-restart-resume" @@ -56,7 +62,11 @@ type SlackQaScenarioId = | "slack-thread-isolation" | "slack-top-level-reply-shape"; -type SlackQaScenarioRun = { +type SlackQaApprovalKind = "exec" | "plugin"; +type SlackQaApprovalDecision = "allow-always" | "allow-once" | "deny"; + +type SlackQaMessageScenarioRun = { + kind?: "message"; expectReply: boolean; input: string; matchText: string; @@ -65,6 +75,15 @@ type SlackQaScenarioRun = { afterReply?: (message: SlackMessage, context: SlackQaScenarioContext) => Promise; }; +type SlackQaApprovalScenarioRun = { + approvalKind: SlackQaApprovalKind; + decision: SlackQaApprovalDecision; + kind: "approval"; + token: string; +}; + +type SlackQaScenarioRun = SlackQaApprovalScenarioRun | SlackQaMessageScenarioRun; + type SlackQaBeforeRunResult = | string | void @@ -74,6 +93,11 @@ type SlackQaBeforeRunResult = }; type SlackQaConfigOverrides = { + approvals?: { + exec?: boolean; + plugin?: boolean; + target?: "both" | "channel" | "dm"; + }; replyToMode?: "all" | "off"; users?: string[]; }; @@ -102,6 +126,7 @@ type SlackAuthIdentity = { type SlackMessage = { bot_id?: string; + blocks?: unknown[]; text?: string; thread_ts?: string; ts?: string; @@ -115,6 +140,8 @@ type SlackObservedMessage = { scenarioId?: string; scenarioTitle?: string; text: string; + actionValues?: string[]; + blockText?: string[]; threadTs?: string; ts: string; userId?: string; @@ -127,12 +154,47 @@ type SlackObservedMessageArtifact = { scenarioId?: string; scenarioTitle?: string; text?: string; + actionValues?: string[]; + blockText?: string[]; threadTs?: string; ts?: string; userId?: string; }; +type SlackApprovalArtifact = { + approvalId: string; + approvalKind: SlackQaApprovalKind; + channelId?: string; + decision: SlackQaApprovalDecision; + pendingActionValues?: string[]; + pendingCheckpointPath?: string; + pendingMessageTs?: string; + pendingScreenshotPath?: string; + pendingText?: string; + resolvedActionValues?: string[]; + resolvedCheckpointPath?: string; + resolvedMessageTs?: string; + resolvedScreenshotPath?: string; + resolvedText?: string; + threadTs?: string; +}; + +type SlackApprovalCheckpointState = "pending" | "resolved"; + +type SlackApprovalCheckpointAck = { + capturedAt?: string; + screenshotPath?: string; +}; + +type SlackApprovalCheckpointMessage = { + actionLabels: string[]; + blockText: string[]; + hasNativeActions: boolean; + text: string; +}; + type SlackQaScenarioResult = { + approval?: SlackApprovalArtifact; details: string; id: string; requestStartedAt?: string; @@ -175,6 +237,9 @@ type SlackCredentialLease = Awaited; const SLACK_QA_CAPTURE_CONTENT_ENV = "OPENCLAW_QA_SLACK_CAPTURE_CONTENT"; +const SLACK_QA_APPROVAL_CHECKPOINT_DIR_ENV = "OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_DIR"; +const SLACK_QA_APPROVAL_CHECKPOINT_TIMEOUT_MS_ENV = + "OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_TIMEOUT_MS"; const QA_REDACT_PUBLIC_METADATA_ENV = "OPENCLAW_QA_REDACT_PUBLIC_METADATA"; const SLACK_QA_WEB_API_TIMEOUT_MS = 45_000; const SLACK_QA_ENV_KEYS = [ @@ -206,6 +271,7 @@ const slackPostMessageSchema = z.object({ const slackHistoryMessageSchema = z.object({ bot_id: z.string().optional(), + blocks: z.array(z.unknown()).optional(), text: z.string().optional(), thread_ts: z.string().optional(), ts: z.string().min(1), @@ -288,6 +354,41 @@ const SLACK_QA_SCENARIOS: SlackQaScenarioDefinition[] = [ }; }, }, + { + id: "slack-approval-exec-native", + title: "Slack native exec approval prompt resolves", + timeoutMs: 60_000, + configOverrides: { + approvals: { + exec: true, + target: "channel", + }, + }, + buildRun: () => ({ + approvalKind: "exec", + decision: "allow-once", + kind: "approval", + token: `SLACK_QA_EXEC_APPROVAL_${randomUUID().slice(0, 8).toUpperCase()}`, + }), + }, + { + id: "slack-approval-plugin-native", + title: "Slack native plugin approval prompt resolves with exec approvals enabled", + timeoutMs: 60_000, + configOverrides: { + approvals: { + exec: true, + plugin: true, + target: "channel", + }, + }, + buildRun: () => ({ + approvalKind: "plugin", + decision: "allow-once", + kind: "approval", + token: `SLACK_QA_PLUGIN_APPROVAL_${randomUUID().slice(0, 8).toUpperCase()}`, + }), + }, { id: "slack-restart-resume", standardId: "restart-resume", @@ -480,8 +581,43 @@ function buildSlackQaConfig( }, ): OpenClawConfig { const pluginAllow = [...new Set([...(baseCfg.plugins?.allow ?? []), "slack"])]; + const approvalOverrides = params.overrides?.approvals; + const approvalForwardingConfig = + approvalOverrides?.exec || approvalOverrides?.plugin + ? { + approvals: { + ...baseCfg.approvals, + ...(approvalOverrides.exec + ? { + exec: { + ...baseCfg.approvals?.exec, + enabled: true, + mode: "session" as const, + }, + } + : {}), + ...(approvalOverrides.plugin + ? { + plugin: { + ...baseCfg.approvals?.plugin, + enabled: true, + mode: "session" as const, + }, + } + : {}), + }, + } + : {}; + const execApprovalsConfig = approvalOverrides + ? { + enabled: true, + approvers: [params.driverBotUserId], + target: approvalOverrides.target ?? ("channel" as const), + } + : undefined; return { ...baseCfg, + ...approvalForwardingConfig, plugins: { ...baseCfg.plugins, allow: pluginAllow, @@ -508,9 +644,11 @@ function buildSlackQaConfig( mode: "socket", botToken: params.sutBotToken, appToken: params.sutAppToken, + allowFrom: [params.driverBotUserId], groupPolicy: "allowlist", allowBots: true, replyToMode: params.overrides?.replyToMode ?? "off", + ...(execApprovalsConfig ? { execApprovals: execApprovalsConfig } : {}), channels: { [params.channelId]: { enabled: true, @@ -593,6 +731,126 @@ async function listSlackThreadMessages(params: { return replies.messages ?? []; } +function formatApprovalResultValue(value: unknown) { + if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") { + return String(value); + } + if (value == null) { + return ""; + } + return JSON.stringify(value) ?? ""; +} + +function readAcceptedApprovalRequest(result: unknown) { + const accepted = + typeof result === "object" && result !== null + ? (result as { id?: unknown; status?: unknown }) + : null; + if (accepted?.status !== "accepted") { + throw new Error( + `approval request status was ${formatApprovalResultValue( + accepted?.status, + )} instead of accepted`, + ); + } + return accepted; +} + +function readAcceptedApprovalRequestId(result: unknown) { + const id = readAcceptedApprovalRequest(result).id; + if (typeof id !== "string" || id.trim().length === 0) { + throw new Error(`approval request id was ${formatApprovalResultValue(id)}`); + } + return id; +} + +function collectSlackBlockStringFields( + value: unknown, + fieldName: string, + values: string[] = [], +): string[] { + if (Array.isArray(value)) { + for (const entry of value) { + collectSlackBlockStringFields(entry, fieldName, values); + } + return values; + } + if (!value || typeof value !== "object") { + return values; + } + for (const [key, entry] of Object.entries(value)) { + if (key === fieldName && typeof entry === "string" && entry.trim().length > 0) { + values.push(entry); + continue; + } + collectSlackBlockStringFields(entry, fieldName, values); + } + return values; +} + +function collectSlackBlockText(blocks?: unknown[]) { + return collectSlackBlockStringFields(blocks ?? [], "text"); +} + +function collectSlackActionValues(blocks?: unknown[]) { + return collectSlackBlockStringFields(blocks ?? [], "value"); +} + +function collectSlackButtonLabels(blocks?: unknown[]) { + const labels: string[] = []; + function visit(value: unknown) { + if (Array.isArray(value)) { + for (const entry of value) { + visit(entry); + } + return; + } + if (!value || typeof value !== "object") { + return; + } + const candidate = value as Record; + if (candidate.type === "button") { + const text = candidate.text; + if (text && typeof text === "object") { + const label = (text as { text?: unknown }).text; + if (typeof label === "string" && label.trim().length > 0) { + labels.push(label); + } + } + } + for (const entry of Object.values(candidate)) { + visit(entry); + } + } + visit(blocks ?? []); + return labels; +} + +function buildSlackApprovalCheckpointMessage( + message: SlackMessage, +): SlackApprovalCheckpointMessage { + const actionValues = collectSlackActionValues(message.blocks); + return { + actionLabels: collectSlackButtonLabels(message.blocks), + blockText: collectSlackBlockText(message.blocks), + hasNativeActions: actionValues.some((value) => value.includes("/approve")), + text: message.text ?? "", + }; +} + +function hasSlackNativeApprovalActions(params: { + actionValues: string[]; + approvalId: string; + decision: SlackQaApprovalDecision; +}) { + return params.actionValues.some( + (value) => + value.includes("/approve") && + value.includes(params.approvalId) && + value.includes(params.decision), + ); +} + function isSutSlackMessage(message: SlackMessage, sutIdentity: SlackAuthIdentity) { return ( (message.user !== undefined && message.user === sutIdentity.userId) || @@ -625,6 +883,8 @@ async function waitForSlackScenarioReply(params: { } const matchedScenario = text.includes(params.matchText); params.observedMessages.push({ + actionValues: collectSlackActionValues(message.blocks), + blockText: collectSlackBlockText(message.blocks), botId: message.bot_id, channelId: params.channelId, matchedScenario, @@ -706,6 +966,8 @@ async function waitForSlackNoReply(params: { } const matchedScenario = text.includes(params.matchText); params.observedMessages.push({ + actionValues: collectSlackActionValues(message.blocks), + blockText: collectSlackBlockText(message.blocks), botId: message.bot_id, channelId: params.channelId, matchedScenario, @@ -724,9 +986,516 @@ async function waitForSlackNoReply(params: { } } +function resolveApprovalDecisionLabel(decision: SlackQaApprovalDecision) { + return decision === "allow-once" + ? "Allowed once" + : decision === "allow-always" + ? "Allowed always" + : "Denied"; +} + +function resolveApprovalHeading(params: { + approvalKind: SlackQaApprovalKind; + state: "pending" | "resolved"; + decision?: SlackQaApprovalDecision; +}) { + if (params.state === "pending") { + return params.approvalKind === "exec" ? "Exec approval required" : "Plugin approval required"; + } + const label = resolveApprovalDecisionLabel(params.decision ?? "allow-once"); + return params.approvalKind === "exec" ? `Exec approval: ${label}` : `Plugin approval: ${label}`; +} + +function getSlackMessageSearchText(message: SlackMessage) { + return [message.text ?? "", ...collectSlackBlockText(message.blocks)].join("\n"); +} + +function pushObservedApprovalMessage(params: { + channelId: string; + matchedScenario: boolean; + message: SlackMessage; + observedMessages: SlackObservedMessage[]; + scenarioId: string; + scenarioTitle: string; +}) { + if (!params.message.ts) { + return; + } + params.observedMessages.push({ + actionValues: collectSlackActionValues(params.message.blocks), + blockText: collectSlackBlockText(params.message.blocks), + botId: params.message.bot_id, + channelId: params.channelId, + matchedScenario: params.matchedScenario, + scenarioId: params.scenarioId, + scenarioTitle: params.scenarioTitle, + text: params.message.text ?? "", + threadTs: params.message.thread_ts, + ts: params.message.ts, + userId: params.message.user, + }); +} + +async function waitForSlackApprovalPrompt(params: { + approvalId: string; + approvalKind: SlackQaApprovalKind; + channelId: string; + client: WebClient; + decision: SlackQaApprovalDecision; + observedMessages: SlackObservedMessage[]; + oldestTs: string; + scenarioId: string; + scenarioTitle: string; + sutIdentity: SlackAuthIdentity; + timeoutMs: number; + token: string; +}) { + const startedAt = Date.now(); + const seenObservedMessages = new Set(); + let lastMatchedWithoutActions = ""; + while (Date.now() - startedAt < params.timeoutMs) { + const messages = await listSlackMessages({ + channelId: params.channelId, + client: params.client, + oldestTs: params.oldestTs, + }); + for (const message of messages) { + if (!message.ts || !isSutSlackMessage(message, params.sutIdentity)) { + continue; + } + const text = getSlackMessageSearchText(message); + const actionValues = collectSlackActionValues(message.blocks); + const hasHeading = text.includes( + resolveApprovalHeading({ approvalKind: params.approvalKind, state: "pending" }), + ); + const hasToken = text.includes(params.token); + const observedKey = `${message.ts}:${message.text ?? ""}:${actionValues.join("|")}`; + if (hasHeading || hasToken || hasSlackNativeApprovalActions({ ...params, actionValues })) { + if (!seenObservedMessages.has(observedKey)) { + seenObservedMessages.add(observedKey); + pushObservedApprovalMessage({ + channelId: params.channelId, + matchedScenario: hasHeading && hasToken, + message, + observedMessages: params.observedMessages, + scenarioId: params.scenarioId, + scenarioTitle: params.scenarioTitle, + }); + } + } + if (!hasHeading || !hasToken) { + continue; + } + if ( + !hasSlackNativeApprovalActions({ + actionValues, + approvalId: params.approvalId, + decision: params.decision, + }) + ) { + lastMatchedWithoutActions = `message ${message.ts} matched approval text but did not expose native approval button values`; + continue; + } + return { + actionValues, + message, + observedAt: new Date().toISOString(), + }; + } + await new Promise((resolve) => setTimeout(resolve, 1_000)); + } + throw new Error( + [ + `timed out after ${params.timeoutMs}ms waiting for Slack ${params.approvalKind} approval prompt`, + lastMatchedWithoutActions, + ] + .filter(Boolean) + .join("; "), + ); +} + +async function waitForSlackApprovalResolvedUpdate(params: { + approvalKind: SlackQaApprovalKind; + channelId: string; + client: WebClient; + decision: SlackQaApprovalDecision; + messageTs: string; + observedMessages: SlackObservedMessage[]; + oldestTs: string; + scenarioId: string; + scenarioTitle: string; + sutIdentity: SlackAuthIdentity; + timeoutMs: number; + token: string; +}) { + const startedAt = Date.now(); + const seenObservedMessages = new Set(); + while (Date.now() - startedAt < params.timeoutMs) { + const messages = await listSlackMessages({ + channelId: params.channelId, + client: params.client, + oldestTs: params.oldestTs, + }); + const message = messages.find((entry) => entry.ts === params.messageTs); + if (message && isSutSlackMessage(message, params.sutIdentity)) { + const text = getSlackMessageSearchText(message); + const actionValues = collectSlackActionValues(message.blocks); + const observedKey = `${message.ts}:${message.text ?? ""}:${actionValues.join("|")}`; + if (!seenObservedMessages.has(observedKey)) { + seenObservedMessages.add(observedKey); + pushObservedApprovalMessage({ + channelId: params.channelId, + matchedScenario: text.includes(params.token), + message, + observedMessages: params.observedMessages, + scenarioId: params.scenarioId, + scenarioTitle: params.scenarioTitle, + }); + } + if ( + text.includes( + resolveApprovalHeading({ + approvalKind: params.approvalKind, + decision: params.decision, + state: "resolved", + }), + ) && + text.includes(params.token) && + !actionValues.some((value) => value.includes("/approve")) + ) { + return { + actionValues, + message, + observedAt: new Date().toISOString(), + }; + } + } + await new Promise((resolve) => setTimeout(resolve, 1_000)); + } + throw new Error( + `timed out after ${params.timeoutMs}ms waiting for Slack ${params.approvalKind} approval resolution update`, + ); +} + +function resolveSlackApprovalCheckpointConfig(env: NodeJS.ProcessEnv = process.env) { + const checkpointDir = env[SLACK_QA_APPROVAL_CHECKPOINT_DIR_ENV]?.trim(); + if (!checkpointDir) { + return undefined; + } + const rawTimeout = env[SLACK_QA_APPROVAL_CHECKPOINT_TIMEOUT_MS_ENV]?.trim(); + const timeoutMs = rawTimeout + ? Number.parseInt(rawTimeout, 10) + : SLACK_QA_APPROVAL_CHECKPOINT_DEFAULT_TIMEOUT_MS; + if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) { + throw new Error(`${SLACK_QA_APPROVAL_CHECKPOINT_TIMEOUT_MS_ENV} must be a positive integer.`); + } + return { + checkpointDir, + timeoutMs, + }; +} + +async function waitForSlackApprovalCheckpointAck(params: { + ackPath: string; + timeoutMs: number; +}): Promise { + const startedAt = Date.now(); + while (Date.now() - startedAt < params.timeoutMs) { + try { + const parsed = JSON.parse(await fs.readFile(params.ackPath, "utf8")) as { + capturedAt?: unknown; + error?: unknown; + screenshotPath?: unknown; + }; + if (typeof parsed.error === "string" && parsed.error.trim().length > 0) { + throw new Error(`Slack approval checkpoint watcher failed: ${parsed.error}`); + } + return { + capturedAt: typeof parsed.capturedAt === "string" ? parsed.capturedAt : undefined, + screenshotPath: + typeof parsed.screenshotPath === "string" ? parsed.screenshotPath : undefined, + }; + } catch (error) { + if ((error as NodeJS.ErrnoException).code !== "ENOENT") { + throw error; + } + } + await new Promise((resolve) => setTimeout(resolve, 500)); + } + throw new Error(`timed out after ${params.timeoutMs}ms waiting for ${params.ackPath}`); +} + +async function writeSlackApprovalCheckpoint(params: { + approvalId: string; + approvalKind: SlackQaApprovalKind; + channelId: string; + decision?: SlackQaApprovalDecision; + message: SlackMessage; + observedAt: string; + scenarioId: SlackQaScenarioId; + state: SlackApprovalCheckpointState; +}) { + const config = resolveSlackApprovalCheckpointConfig(); + if (!config) { + return undefined; + } + await fs.mkdir(config.checkpointDir, { recursive: true }); + const checkpointPath = path.join( + config.checkpointDir, + `${params.scenarioId}.${params.state}.json`, + ); + const ackPath = path.join(config.checkpointDir, `${params.scenarioId}.${params.state}.ack.json`); + await fs.rm(ackPath, { force: true }).catch(() => {}); + await fs.writeFile( + checkpointPath, + `${JSON.stringify( + { + version: 1, + scenarioId: params.scenarioId, + approvalKind: params.approvalKind, + state: params.state, + approvalId: params.approvalId, + channelId: params.channelId, + messageTs: params.message.ts, + threadTs: params.message.thread_ts ?? null, + decision: params.decision ?? null, + observedAt: params.observedAt, + message: buildSlackApprovalCheckpointMessage(params.message), + }, + null, + 2, + )}\n`, + ); + const ack = await waitForSlackApprovalCheckpointAck({ + ackPath, + timeoutMs: config.timeoutMs, + }); + return { + ackPath, + checkpointPath, + screenshotPath: ack.screenshotPath, + }; +} + +async function requestSlackApproval(params: { + approvalId: string; + channelId: string; + context: Omit; + run: SlackQaApprovalScenarioRun; + sutAccountId: string; +}) { + const commonParams = { + timeoutMs: SLACK_QA_APPROVAL_DECISION_TIMEOUT_MS, + turnSourceAccountId: params.sutAccountId, + turnSourceChannel: "slack", + turnSourceTo: `channel:${params.channelId}`, + twoPhase: true, + }; + if (params.run.approvalKind === "exec") { + const result = await params.context.gateway.call( + "exec.approval.request", + { + ...commonParams, + ask: "always", + command: `printf '%s\\n' '${params.run.token}'`, + host: "gateway", + id: params.approvalId, + security: "full", + }, + { + expectFinal: false, + timeoutMs: SLACK_QA_APPROVAL_DECISION_TIMEOUT_MS + 5_000, + }, + ); + const acceptedId = readAcceptedApprovalRequestId(result); + if (acceptedId !== params.approvalId) { + throw new Error( + `accepted exec approval id was ${formatApprovalResultValue( + acceptedId, + )} instead of ${params.approvalId}`, + ); + } + return acceptedId; + } + const result = await params.context.gateway.call( + "plugin.approval.request", + { + ...commonParams, + agentId: "qa", + description: `Slack plugin approval QA request ${params.run.token}`, + pluginId: "qa-slack-plugin", + severity: "warning", + title: `Slack plugin approval QA ${params.run.token}`, + toolName: "slack_qa_tool", + }, + { + expectFinal: false, + timeoutMs: SLACK_QA_APPROVAL_DECISION_TIMEOUT_MS + 5_000, + }, + ); + return readAcceptedApprovalRequestId(result); +} + +async function waitForApprovalDecision(params: { + approvalId: string; + context: Omit; + kind: SlackQaApprovalKind; +}) { + const method = + params.kind === "exec" ? "exec.approval.waitDecision" : "plugin.approval.waitDecision"; + return await params.context.gateway.call( + method, + { id: params.approvalId }, + { + expectFinal: true, + timeoutMs: SLACK_QA_APPROVAL_DECISION_TIMEOUT_MS + 5_000, + }, + ); +} + +async function resolveApprovalDecision(params: { + approvalId: string; + context: Omit; + decision: SlackQaApprovalDecision; + kind: SlackQaApprovalKind; +}) { + const method = params.kind === "exec" ? "exec.approval.resolve" : "plugin.approval.resolve"; + return await params.context.gateway.call( + method, + { decision: params.decision, id: params.approvalId }, + { + expectFinal: false, + timeoutMs: SLACK_QA_APPROVAL_DECISION_TIMEOUT_MS + 5_000, + }, + ); +} + +function assertApprovalDecisionResult(params: { + decision: SlackQaApprovalDecision; + result: unknown; +}) { + const resultDecision = + typeof params.result === "object" && params.result !== null + ? (params.result as { decision?: unknown }).decision + : undefined; + if (resultDecision !== params.decision) { + throw new Error( + `approval decision was ${formatApprovalResultValue(resultDecision)} instead of ${params.decision}`, + ); + } +} + +async function runSlackApprovalScenario(params: { + channelId: string; + context: Omit; + observedMessages: SlackObservedMessage[]; + run: SlackQaApprovalScenarioRun; + scenario: SlackQaScenarioDefinition; + sutAccountId: string; +}) { + const requestStartedAt = new Date(); + const oldestTs = ((requestStartedAt.getTime() - 5_000) / 1_000).toFixed(6); + const requestedApprovalId = + params.run.approvalKind === "exec" + ? `slack-qa-exec-${randomUUID()}` + : `slack-qa-plugin-${randomUUID()}`; + const approvalId = await requestSlackApproval({ + approvalId: requestedApprovalId, + channelId: params.channelId, + context: params.context, + run: params.run, + sutAccountId: params.sutAccountId, + }); + const pending = await waitForSlackApprovalPrompt({ + approvalId, + approvalKind: params.run.approvalKind, + channelId: params.channelId, + client: params.context.sutReadClient, + decision: params.run.decision, + observedMessages: params.observedMessages, + oldestTs, + scenarioId: params.scenario.id, + scenarioTitle: params.scenario.title, + sutIdentity: params.context.sutIdentity, + timeoutMs: params.scenario.timeoutMs, + token: params.run.token, + }); + const pendingCheckpoint = await writeSlackApprovalCheckpoint({ + approvalId, + approvalKind: params.run.approvalKind, + channelId: params.channelId, + message: pending.message, + observedAt: pending.observedAt, + scenarioId: params.scenario.id, + state: "pending", + }); + await resolveApprovalDecision({ + approvalId, + context: params.context, + decision: params.run.decision, + kind: params.run.approvalKind, + }); + assertApprovalDecisionResult({ + decision: params.run.decision, + result: await waitForApprovalDecision({ + approvalId, + context: params.context, + kind: params.run.approvalKind, + }), + }); + const resolved = await waitForSlackApprovalResolvedUpdate({ + approvalKind: params.run.approvalKind, + channelId: params.channelId, + client: params.context.sutReadClient, + decision: params.run.decision, + messageTs: pending.message.ts, + observedMessages: params.observedMessages, + oldestTs, + scenarioId: params.scenario.id, + scenarioTitle: params.scenario.title, + sutIdentity: params.context.sutIdentity, + timeoutMs: params.scenario.timeoutMs, + token: params.run.token, + }); + const resolvedCheckpoint = await writeSlackApprovalCheckpoint({ + approvalId, + approvalKind: params.run.approvalKind, + channelId: params.channelId, + decision: params.run.decision, + message: resolved.message, + observedAt: resolved.observedAt, + scenarioId: params.scenario.id, + state: "resolved", + }); + const responseObservedAt = new Date(resolved.observedAt); + return { + artifact: { + approvalId, + approvalKind: params.run.approvalKind, + channelId: params.channelId, + decision: params.run.decision, + pendingActionValues: pending.actionValues, + pendingCheckpointPath: pendingCheckpoint?.checkpointPath, + pendingMessageTs: pending.message.ts, + pendingScreenshotPath: pendingCheckpoint?.screenshotPath, + pendingText: pending.message.text, + resolvedActionValues: resolved.actionValues, + resolvedCheckpointPath: resolvedCheckpoint?.checkpointPath, + resolvedMessageTs: resolved.message.ts, + resolvedScreenshotPath: resolvedCheckpoint?.screenshotPath, + resolvedText: resolved.message.text, + threadTs: pending.message.thread_ts, + } satisfies SlackApprovalArtifact, + requestStartedAt, + responseObservedAt, + rttMs: responseObservedAt.getTime() - requestStartedAt.getTime(), + }; +} + async function waitForSlackChannelRunning( gateway: Awaited>, accountId: string, + mode: SlackChannelReadinessMode, ): Promise { const startedAt = Date.now(); let lastStatus: SlackChannelStatus | undefined; @@ -744,7 +1513,7 @@ async function waitForSlackChannelRunning( connected?: boolean; lastConnectedAt?: number; lastDisconnect?: unknown; - lastError?: string; + lastError?: string | null; restartPending?: boolean; running?: boolean; }> @@ -762,7 +1531,7 @@ async function waitForSlackChannelRunning( running: match.running, } : undefined; - if (match?.running && match.connected === true && match.restartPending !== true) { + if (isSlackChannelReadyForQa(lastStatus, mode)) { if (!lastStatus) { throw new Error(`slack account "${accountId}" status disappeared after readiness check`); } @@ -782,20 +1551,24 @@ async function waitForSlackChannelRunning( async function waitForSlackChannelStable( gateway: Awaited>, accountId: string, + mode: SlackChannelReadinessMode, ) { const startedAt = Date.now(); + let readySince: number | undefined; while (Date.now() - startedAt < SLACK_QA_READY_TIMEOUT_MS) { - const status = await waitForSlackChannelRunning(gateway, accountId); - const connectedAt = - typeof status.lastConnectedAt === "number" && status.lastConnectedAt > 0 - ? status.lastConnectedAt - : Date.now(); - const connectedForMs = Date.now() - connectedAt; - if (connectedForMs >= SLACK_QA_READY_STABILITY_MS) { + const status = await waitForSlackChannelRunning(gateway, accountId, mode); + const observedAt = Date.now(); + readySince = resolveSlackChannelReadySince({ + observedAt, + previousReadySince: readySince, + status, + }); + const readyForMs = observedAt - readySince; + if (readyForMs >= SLACK_QA_READY_STABILITY_MS) { return; } await new Promise((resolve) => - setTimeout(resolve, Math.max(500, SLACK_QA_READY_STABILITY_MS - connectedForMs)), + setTimeout(resolve, Math.max(500, SLACK_QA_READY_STABILITY_MS - readyForMs)), ); } throw new Error( @@ -803,6 +1576,32 @@ async function waitForSlackChannelStable( ); } +function isSlackChannelReadyForQa( + status: SlackChannelStatus | undefined, + mode: SlackChannelReadinessMode, +): boolean { + if ( + !status?.running || + status.restartPending === true || + status.lastError != null || + status.connected === false + ) { + return false; + } + return mode === "started" || status.connected === true; +} + +function resolveSlackChannelReadySince(params: { + observedAt: number; + previousReadySince: number | undefined; + status: SlackChannelStatus; +}): number { + if (typeof params.status.lastConnectedAt === "number" && params.status.lastConnectedAt > 0) { + return params.status.lastConnectedAt; + } + return params.previousReadySince ?? params.observedAt; +} + function isRetryableSlackQaScenarioError(error: unknown) { return /timed out after \d+ms waiting for Slack message/iu.test(formatErrorMessage(error)); } @@ -813,6 +1612,8 @@ function toObservedSlackArtifacts(params: { redactMetadata: boolean; }): SlackObservedMessageArtifact[] { return params.messages.map((message) => ({ + actionValues: params.includeContent ? message.actionValues : undefined, + blockText: params.includeContent ? message.blockText : undefined, botId: params.redactMetadata ? undefined : message.botId, channelId: params.redactMetadata ? undefined : message.channelId, matchedScenario: message.matchedScenario, @@ -825,6 +1626,39 @@ function toObservedSlackArtifacts(params: { })); } +function toSlackQaScenarioArtifactResults(params: { + includeContent: boolean; + redactMetadata: boolean; + scenarios: SlackQaScenarioResult[]; +}): SlackQaScenarioResult[] { + return params.scenarios.map((scenario) => { + if (!scenario.approval) { + return scenario; + } + const approval = scenario.approval; + return { + ...scenario, + approval: { + approvalId: params.redactMetadata ? "" : approval.approvalId, + approvalKind: approval.approvalKind, + channelId: params.redactMetadata ? undefined : approval.channelId, + decision: approval.decision, + pendingActionValues: params.includeContent ? approval.pendingActionValues : undefined, + pendingCheckpointPath: approval.pendingCheckpointPath, + pendingMessageTs: params.redactMetadata ? undefined : approval.pendingMessageTs, + pendingScreenshotPath: approval.pendingScreenshotPath, + pendingText: params.includeContent ? approval.pendingText : undefined, + resolvedActionValues: params.includeContent ? approval.resolvedActionValues : undefined, + resolvedCheckpointPath: approval.resolvedCheckpointPath, + resolvedMessageTs: params.redactMetadata ? undefined : approval.resolvedMessageTs, + resolvedScreenshotPath: approval.resolvedScreenshotPath, + resolvedText: params.includeContent ? approval.resolvedText : undefined, + threadTs: params.redactMetadata ? undefined : approval.threadTs, + }, + }; + }); +} + function renderSlackQaMarkdown(params: { channelId: string; cleanupIssues: string[]; @@ -861,6 +1695,17 @@ function renderSlackQaMarkdown(params: { if (scenario.rttMs !== undefined) { lines.push(`- RTT: ${scenario.rttMs}ms`); } + if (scenario.approval) { + lines.push(`- Approval kind: ${scenario.approval.approvalKind}`); + lines.push(`- Approval ID: \`${scenario.approval.approvalId}\``); + lines.push(`- Decision: ${scenario.approval.decision}`); + if (scenario.approval.pendingScreenshotPath) { + lines.push(`- Pending screenshot: \`${scenario.approval.pendingScreenshotPath}\``); + } + if (scenario.approval.resolvedScreenshotPath) { + lines.push(`- Resolved screenshot: \`${scenario.approval.resolvedScreenshotPath}\``); + } + } lines.push(""); } return lines.join("\n"); @@ -963,8 +1808,14 @@ export async function runSlackQaLive(params: { }), }); const activeGatewayHarness = gatewayHarness; - await waitForSlackChannelStable(activeGatewayHarness.gateway, sutAccountId); const scenarioRun = scenario.buildRun(sutIdentity.userId); + const readinessMode: SlackChannelReadinessMode = + scenarioRun.kind === "approval" ? "started" : "connected"; + await waitForSlackChannelStable( + activeGatewayHarness.gateway, + sutAccountId, + readinessMode, + ); const baseScenarioContext = { channelId: activeRuntimeEnv.channelId, driverClient, @@ -979,8 +1830,38 @@ export async function runSlackQaLive(params: { sutIdentity, sutReadClient, waitForReady: async () => - await waitForSlackChannelStable(activeGatewayHarness.gateway, sutAccountId), + await waitForSlackChannelStable( + activeGatewayHarness.gateway, + sutAccountId, + "connected", + ), }; + if (scenarioRun.kind === "approval") { + const approval = await runSlackApprovalScenario({ + channelId: activeRuntimeEnv.channelId, + context: baseScenarioContext, + observedMessages, + run: scenarioRun, + scenario, + sutAccountId, + }); + scenarioResults.push({ + approval: approval.artifact, + id: scenario.id, + title: scenario.title, + status: "pass", + details: [ + `${scenarioRun.approvalKind} approval resolved ${scenarioRun.decision} in ${approval.rttMs}ms`, + scenarioAttempt > 1 ? `retried ${scenarioAttempt - 1}x` : undefined, + ] + .filter(Boolean) + .join("; "), + rttMs: approval.rttMs, + requestStartedAt: approval.requestStartedAt.toISOString(), + responseObservedAt: approval.responseObservedAt.toISOString(), + }); + break; + } const beforeRunResult = await scenarioRun.beforeRun?.(baseScenarioContext); const beforeRunDetails = typeof beforeRunResult === "string" ? beforeRunResult : beforeRunResult?.details; @@ -1135,6 +2016,11 @@ export async function runSlackQaLive(params: { const observedMessagesPath = path.join(outputDir, "slack-qa-observed-messages.json"); const passed = scenarioResults.filter((entry) => entry.status === "pass").length; const failed = scenarioResults.filter((entry) => entry.status === "fail").length; + const artifactScenarioResults = toSlackQaScenarioArtifactResults({ + scenarios: scenarioResults, + includeContent: includeObservedMessageContent, + redactMetadata: redactPublicMetadata, + }); const summary: SlackQaSummary = { credentials: credentialLease ? { @@ -1162,7 +2048,7 @@ export async function runSlackQaLive(params: { passed, failed, }, - scenarios: scenarioResults, + scenarios: artifactScenarioResults, }; await fs.writeFile( observedMessagesPath, @@ -1186,7 +2072,7 @@ export async function runSlackQaLive(params: { finishedAt, gatewayDebugDirPath: preservedGatewayDebugArtifacts ? gatewayDebugDirPath : undefined, redactMetadata: redactPublicMetadata, - scenarios: scenarioResults, + scenarios: artifactScenarioResults, startedAt, })}\n`, ); @@ -1196,15 +2082,25 @@ export async function runSlackQaLive(params: { summaryPath, observedMessagesPath, gatewayDebugDirPath: preservedGatewayDebugArtifacts ? gatewayDebugDirPath : undefined, - scenarios: scenarioResults, + scenarios: artifactScenarioResults, }; } export const testing = { + buildSlackApprovalCheckpointMessage, + buildSlackQaConfig, + collectSlackActionValues, + collectSlackButtonLabels, + collectSlackBlockText, findScenario, + isSlackChannelReadyForQa, parseSlackQaCredentialPayload, + resolveSlackChannelReadySince, + resolveSlackApprovalCheckpointConfig, + resolveApprovalDecision, resolveSlackQaRuntimeEnv, SLACK_QA_STANDARD_SCENARIO_IDS, + toSlackQaScenarioArtifactResults, waitForSlackNoReply, }; export { testing as __testing }; diff --git a/extensions/qa-lab/src/mantis/cli.runtime.ts b/extensions/qa-lab/src/mantis/cli.runtime.ts index 7ad6bec994b1..82277b504145 100644 --- a/extensions/qa-lab/src/mantis/cli.runtime.ts +++ b/extensions/qa-lab/src/mantis/cli.runtime.ts @@ -62,6 +62,11 @@ export async function runMantisSlackDesktopSmokeCommand(opts: MantisSlackDesktop if (result.videoPath) { process.stdout.write(`Mantis Slack desktop video: ${result.videoPath}\n`); } + for (const screenshotPath of result.approvalCheckpointScreenshotPaths ?? []) { + process.stdout.write( + `Mantis Slack desktop approval checkpoint screenshot: ${screenshotPath}\n`, + ); + } if (result.status === "fail") { process.exitCode = 1; } diff --git a/extensions/qa-lab/src/mantis/cli.ts b/extensions/qa-lab/src/mantis/cli.ts index 70197172ec28..62987f3e4250 100644 --- a/extensions/qa-lab/src/mantis/cli.ts +++ b/extensions/qa-lab/src/mantis/cli.ts @@ -105,17 +105,20 @@ type MantisDesktopBrowserSmokeCommanderOptions = { type MantisSlackDesktopSmokeCommanderOptions = { altModel?: string; + approvalCheckpoints?: boolean; class?: string; crabboxBin?: string; credentialRole?: string; credentialSource?: string; fast?: boolean; + freshPr?: string; gatewaySetup?: boolean; hydrateMode?: MantisSlackDesktopHydrateMode; idleTimeout?: string; keepLease?: boolean; leaseId?: string; machineClass?: string; + market?: string; model?: string; outputDir?: string; provider?: string; @@ -316,12 +319,18 @@ export function registerMantisCli(qa: Command) { .option("--provider ", "Crabbox provider") .option("--machine-class ", "Crabbox machine class") .option("--class ", "Alias for --machine-class") + .option("--market ", "Crabbox capacity market: spot or on-demand") .option("--lease-id ", "Reuse an existing Crabbox lease") + .option("--fresh-pr ", "Use Crabbox fresh PR checkout instead of syncing the local tree") .option("--idle-timeout ", "Crabbox idle timeout") .option("--ttl ", "Crabbox maximum lease lifetime") .option("--keep-lease", "Keep a lease created by this run after a passing smoke") .option("--no-keep-lease", "Stop a lease created by this run after a passing smoke") .option("--gateway-setup", "Start a persistent OpenClaw Slack gateway inside the VNC VM") + .option( + "--approval-checkpoints", + "Run Slack approval scenarios with visual checkpoint screenshot acknowledgements", + ) .option("--slack-url ", "Slack web URL to open in the visible browser") .option("--slack-channel-id ", "Slack channel id for gateway setup allowlist") .option("--provider-mode ", "QA provider mode") @@ -338,18 +347,24 @@ export function registerMantisCli(qa: Command) { .option("--credential-role ", "Credential role for convex auth") .option("--fast", "Enable provider fast mode where supported") .action(async (opts: MantisSlackDesktopSmokeCommanderOptions) => { + if (opts.approvalCheckpoints && opts.gatewaySetup) { + throw new Error("--approval-checkpoints cannot be used with --gateway-setup."); + } await runSlackDesktopSmoke({ alternateModel: opts.altModel, + approvalCheckpoints: opts.approvalCheckpoints, crabboxBin: opts.crabboxBin, credentialRole: opts.credentialRole, credentialSource: opts.credentialSource, fastMode: opts.fast, + freshPr: opts.freshPr, gatewaySetup: opts.gatewaySetup, hydrateMode: opts.hydrateMode, idleTimeout: opts.idleTimeout, keepLease: opts.keepLease, leaseId: opts.leaseId, machineClass: opts.machineClass ?? opts.class, + market: opts.market, outputDir: opts.outputDir, primaryModel: opts.model, provider: opts.provider, diff --git a/extensions/qa-lab/src/mantis/crabbox-runtime.ts b/extensions/qa-lab/src/mantis/crabbox-runtime.ts index daeae7770bf1..a1ab3622efb2 100644 --- a/extensions/qa-lab/src/mantis/crabbox-runtime.ts +++ b/extensions/qa-lab/src/mantis/crabbox-runtime.ts @@ -114,10 +114,12 @@ export async function warmupCrabbox(params: { env: NodeJS.ProcessEnv; idleTimeout: string; machineClass: string; + market?: string; provider: string; runner: CommandRunner; ttl: string; }) { + const marketArgs = params.market ? ["--market", params.market] : []; const result = await runCommand({ command: params.crabboxBin, args: [ @@ -128,6 +130,7 @@ export async function warmupCrabbox(params: { "--browser", "--class", params.machineClass, + ...marketArgs, "--idle-timeout", params.idleTimeout, "--ttl", diff --git a/extensions/qa-lab/src/mantis/slack-desktop-smoke.runtime.test.ts b/extensions/qa-lab/src/mantis/slack-desktop-smoke.runtime.test.ts index 104bf675e474..20a48b7be393 100644 --- a/extensions/qa-lab/src/mantis/slack-desktop-smoke.runtime.test.ts +++ b/extensions/qa-lab/src/mantis/slack-desktop-smoke.runtime.test.ts @@ -1,6 +1,7 @@ import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; +import { Command } from "commander"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { runMantisSlackDesktopSmoke } from "./slack-desktop-smoke.runtime.js"; @@ -34,6 +35,64 @@ function phaseStatus( return phases.find((phase) => phase.name === name)?.status; } +async function writeApprovalCheckpointArtifacts(outputDir: string, scenarioIds: readonly string[]) { + const checkpointDir = path.join(outputDir, "approval-checkpoints"); + await fs.mkdir(checkpointDir, { recursive: true }); + for (const scenarioId of scenarioIds) { + for (const state of ["pending", "resolved"] as const) { + await fs.writeFile( + path.join(checkpointDir, `${scenarioId}.${state}.json`), + `${JSON.stringify({ + version: 1, + scenarioId, + approvalKind: scenarioId.includes("plugin") ? "plugin" : "exec", + state, + approvalId: scenarioId.includes("plugin") ? "plugin:abc" : "exec-abc", + channelId: "C123456789", + messageTs: "1.000000", + threadTs: null, + decision: state === "resolved" ? "allow-once" : null, + observedAt: "2026-05-04T13:00:29.000Z", + message: { + actionLabels: state === "pending" ? ["Allow Once", "Allow Always", "Deny"] : [], + blockText: + state === "pending" + ? ["Plugin approval required", "Slack plugin approval QA marker"] + : ["Plugin approval: Allowed once", "Slack plugin approval QA marker"], + hasNativeActions: state === "pending", + text: + state === "pending" ? "Plugin approval required" : "Plugin approval: Allowed once", + }, + })}\n`, + ); + await fs.writeFile( + path.join(checkpointDir, `${scenarioId}.${state}.ack.json`), + `${JSON.stringify({ + version: 1, + capturedAt: "2026-05-04T13:00:30.000Z", + scenarioId, + screenshotPath: `${checkpointDir}/${scenarioId}-${state}.png`, + state, + })}\n`, + ); + await fs.writeFile(path.join(checkpointDir, `${scenarioId}-${state}.png`), "png"); + } + } +} + +function mockMantisCliRuntime(runMantisSlackDesktopSmokeCommand = vi.fn()) { + vi.doMock("./cli.runtime.js", () => ({ + runMantisBeforeAfterCommand: vi.fn(), + runMantisDesktopBrowserSmokeCommand: vi.fn(), + runMantisDiscordSmokeCommand: vi.fn(), + runMantisSlackDesktopSmokeCommand, + runMantisTelegramDesktopBuilderCommand: vi.fn(), + runMantisVisualDriverCommand: vi.fn(), + runMantisVisualTaskCommand: vi.fn(), + })); + return runMantisSlackDesktopSmokeCommand; +} + describe("mantis Slack desktop smoke runtime", () => { let repoRoot: string; @@ -86,7 +145,10 @@ describe("mantis Slack desktop smoke runtime", () => { } else { await fs.writeFile(path.join(outputDir as string, "slack-desktop-smoke.png"), "png"); await fs.writeFile(path.join(outputDir as string, "slack-desktop-smoke.mp4"), "mp4"); - await fs.writeFile(path.join(outputDir as string, "remote-metadata.json"), "{}\n"); + await fs.writeFile( + path.join(outputDir as string, "remote-metadata.json"), + `${JSON.stringify({ qaExitCode: 0 })}\n`, + ); await fs.writeFile(path.join(outputDir as string, "chrome.log"), "chrome\n"); await fs.writeFile(path.join(outputDir as string, "ffmpeg.log"), "ffmpeg\n"); await fs.writeFile(path.join(outputDir as string, "slack-desktop-command.log"), "qa\n"); @@ -101,6 +163,7 @@ describe("mantis Slack desktop smoke runtime", () => { commandRunner: runner, crabboxBin: "/tmp/crabbox", env: runtimeEnv, + freshPr: "openclaw/openclaw#85141", now: () => new Date("2026-05-04T13:00:00.000Z"), outputDir: ".artifacts/qa-e2e/mantis/slack-desktop-test", primaryModel: "openai/gpt-5.4", @@ -124,6 +187,9 @@ describe("mantis Slack desktop smoke runtime", () => { const runArgs = commands.find( (entry) => entry.command === "/tmp/crabbox" && entry.args[0] === "run", )?.args; + expect(runArgs).toContain("--no-hydrate"); + expect(runArgs).toContain("--fresh-pr"); + expect(runArgs).toContain("openclaw/openclaw#85141"); expect(runArgs).not.toContain("--no-sync"); const remoteScript = runArgs?.at(-1); expect(remoteScript).toContain("hydrate_mode='source'"); @@ -139,6 +205,26 @@ describe("mantis Slack desktop smoke runtime", () => { expect(remoteScript).not.toContain("-video_size"); expect(remoteScript).toContain("openclaw qa slack"); expect(remoteScript).toContain("--scenario 'slack-canary'"); + expect(remoteScript).toContain( + 'slack_qa_output_dir=".artifacts/qa-e2e/mantis/$(basename "$out")/slack-qa"', + ); + expect(remoteScript).toContain('--output-dir "$slack_qa_output_dir"'); + expect(remoteScript).toContain("copy_slack_qa_artifacts"); + expect(remoteScript).not.toContain('--output-dir "$out/slack-qa"'); + expect(remoteScript).toContain("remote_command_timeout_seconds="); + expect(remoteScript).toContain("remote-command-timeout.txt"); + expect(remoteScript).toContain( + 'timeout --kill-after=15s "${remote_command_timeout_seconds}s" bash -c run_mantis_remote_body >"$out/slack-desktop-command.log" 2>&1 &', + ); + expect(remoteScript).toContain("MANTIS_REMOTE_HEARTBEAT"); + expect(remoteScript).toContain("qa_status=$?"); + expect(remoteScript).toContain("MANTIS_REMOTE_FAILURE_DIAGNOSTICS_BEGIN"); + expect(remoteScript).toContain("$out/slack-qa/slack-qa-report.md"); + expect(remoteScript).toContain("$out/slack-qa/slack-qa-summary.json"); + expect(remoteScript).toContain("$out/slack-qa/slack-qa-observed-messages.json"); + expect(remoteScript).toContain('tail -n 200 "$diagnostic_file"'); + expect(remoteScript).toContain("Slack desktop screenshot is missing or empty"); + expect(remoteScript).not.toContain('test -s "$out/slack-desktop-smoke.png"'); expect(remoteScript).toContain("OPENCLAW_MANTIS_SLACK_BROWSER_PROFILE_DIR"); const rsyncArgs = commands .filter((entry) => entry.command === "rsync") @@ -172,6 +258,193 @@ describe("mantis Slack desktop smoke runtime", () => { expect(summary.timings.phases.map((phase) => phase.name)).toContain("artifacts.copy"); }); + it("runs approval checkpoint mode with default approval scenarios and records screenshots", async () => { + const commands: { args: readonly string[]; command: string }[] = []; + const expectedScenarios = ["slack-approval-exec-native", "slack-approval-plugin-native"]; + const runner = vi.fn(async (command: string, args: readonly string[]) => { + commands.push({ command, args }); + if (command === "/tmp/crabbox" && args[0] === "warmup") { + return { stdout: "ready lease cbx_123abc\n", stderr: "" }; + } + if (command === "/tmp/crabbox" && args[0] === "inspect") { + return { + stdout: `${JSON.stringify({ + host: "203.0.113.10", + id: "cbx_123abc", + provider: "hetzner", + sshKey: "/tmp/key", + sshPort: "2222", + sshUser: "crabbox", + state: "active", + })}\n`, + stderr: "", + }; + } + if (command === "rsync") { + const outputDir = args.at(-1); + await fs.mkdir(outputDir as string, { recursive: true }); + if (String(outputDir).endsWith("slack-qa/")) { + await fs.writeFile(path.join(outputDir as string, "slack-qa-report.md"), "# Slack\n"); + } else { + await fs.writeFile(path.join(outputDir as string, "slack-desktop-smoke.png"), "png"); + await fs.writeFile( + path.join(outputDir as string, "remote-metadata.json"), + `${JSON.stringify({ qaExitCode: 0 })}\n`, + ); + await fs.writeFile(path.join(outputDir as string, "slack-desktop-command.log"), "qa\n"); + await writeApprovalCheckpointArtifacts(outputDir as string, expectedScenarios); + } + } + return { stdout: "", stderr: "" }; + }); + + const result = await runMantisSlackDesktopSmoke({ + approvalCheckpoints: true, + commandRunner: runner, + crabboxBin: "/tmp/crabbox", + now: () => new Date("2026-05-04T13:15:00.000Z"), + outputDir: ".artifacts/qa-e2e/mantis/slack-desktop-checkpoints", + repoRoot, + }); + + expect(result.status).toBe("pass"); + expect(result.approvalCheckpointScreenshotPaths).toHaveLength(4); + const remoteScript = commands + .find((entry) => entry.command === "/tmp/crabbox" && entry.args[0] === "run") + ?.args.at(-1); + expect(remoteScript).toContain("approval_checkpoints=1"); + expect(remoteScript).toContain('export OPENCLAW_QA_SLACK_CHANNEL_ID="$slack_channel_id"'); + expect(remoteScript).toContain("--scenario 'slack-approval-exec-native'"); + expect(remoteScript).toContain("--scenario 'slack-approval-plugin-native'"); + expect(remoteScript).toContain("OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_DIR"); + expect(remoteScript).toContain("OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_TIMEOUT_MS"); + expect(remoteScript).toContain('cat >"$out/approval-checkpoint-watcher.mjs"'); + expect(remoteScript).not.toContain('node >"$out/approval-checkpoint-watcher.mjs"'); + expect(remoteScript).toContain("approval-checkpoint-watcher.mjs"); + expect(remoteScript).toContain("OPENCLAW_MANTIS_APPROVAL_BROWSER_BIN"); + expect(remoteScript).toContain("Rendered from the Slack API message observed by QA"); + expect(remoteScript).toContain("class='wrap'"); + expect(remoteScript).toContain("--headless=new"); + expect(remoteScript).not.toContain('spawn("scrot", [screenshotPath]'); + expect(remoteScript).toContain("Slack QA exited before all expected approval checkpoints"); + expect(remoteScript).toContain('if [ "$qa_exit" -eq 0 ]; then\n wait "$watcher_pid"'); + expect(remoteScript).toContain( + 'cp "$out/approval-checkpoints/slack-approval-plugin-native-pending.png" "$out/slack-desktop-smoke.png"', + ); + expect(remoteScript).toContain( + 'cp "$out/approval-checkpoints/slack-approval-exec-native-pending.png" "$out/slack-desktop-smoke.png"', + ); + const summary = JSON.parse(await fs.readFile(result.summaryPath, "utf8")) as { + artifacts: { + approvalCheckpoints?: { + directoryPath: string; + screenshots: { scenarioId: string; screenshotPath: string; state: string }[]; + }; + }; + }; + expect(summary.artifacts.approvalCheckpoints?.screenshots).toHaveLength(4); + expect( + summary.artifacts.approvalCheckpoints?.screenshots.map((screenshot) => + path.relative(result.outputDir, screenshot.screenshotPath), + ), + ).toEqual([ + "approval-checkpoints/slack-approval-exec-native-pending.png", + "approval-checkpoints/slack-approval-exec-native-resolved.png", + "approval-checkpoints/slack-approval-plugin-native-pending.png", + "approval-checkpoints/slack-approval-plugin-native-resolved.png", + ]); + await expect(fs.readFile(result.reportPath, "utf8")).resolves.toContain( + "Approval checkpoint slack-approval-plugin-native resolved", + ); + }); + + it("rejects non-approval scenarios in approval checkpoint mode", async () => { + await expect( + runMantisSlackDesktopSmoke({ + approvalCheckpoints: true, + crabboxBin: "/tmp/crabbox", + repoRoot, + scenarioIds: ["slack-canary"], + }), + ).rejects.toThrow("--approval-checkpoints only supports approval checkpoint scenarios"); + }); + + it("fails approval checkpoint mode when ack metadata does not match the expected state", async () => { + const expectedScenarios = ["slack-approval-exec-native", "slack-approval-plugin-native"]; + const runner = vi.fn(async (command: string, args: readonly string[]) => { + if (command === "/tmp/crabbox" && args[0] === "warmup") { + return { stdout: "ready lease cbx_123abc\n", stderr: "" }; + } + if (command === "/tmp/crabbox" && args[0] === "inspect") { + return { + stdout: `${JSON.stringify({ + host: "203.0.113.10", + id: "cbx_123abc", + provider: "hetzner", + sshKey: "/tmp/key", + sshPort: "2222", + sshUser: "crabbox", + state: "active", + })}\n`, + stderr: "", + }; + } + if (command === "rsync") { + const outputDir = args.at(-1) as string; + await fs.mkdir(outputDir, { recursive: true }); + if (!outputDir.endsWith("slack-qa/")) { + await fs.writeFile(path.join(outputDir, "slack-desktop-smoke.png"), "png"); + await fs.writeFile( + path.join(outputDir, "remote-metadata.json"), + `${JSON.stringify({ qaExitCode: 0 })}\n`, + ); + await fs.writeFile(path.join(outputDir, "slack-desktop-command.log"), "qa\n"); + await writeApprovalCheckpointArtifacts(outputDir, expectedScenarios); + await fs.writeFile( + path.join( + outputDir, + "approval-checkpoints", + "slack-approval-plugin-native.resolved.ack.json", + ), + `${JSON.stringify({ + version: 1, + capturedAt: "2026-05-04T13:00:30.000Z", + scenarioId: "slack-approval-plugin-native", + screenshotPath: `${outputDir}/approval-checkpoints/slack-approval-plugin-native-resolved.png`, + state: "pending", + })}\n`, + ); + } + } + return { stdout: "", stderr: "" }; + }); + + const result = await runMantisSlackDesktopSmoke({ + approvalCheckpoints: true, + commandRunner: runner, + crabboxBin: "/tmp/crabbox", + outputDir: ".artifacts/qa-e2e/mantis/slack-desktop-bad-checkpoints", + repoRoot, + }); + + expect(result.status).toBe("fail"); + const summary = JSON.parse(await fs.readFile(result.summaryPath, "utf8")) as { + error?: string; + }; + expect(summary.error).toContain("unexpected state"); + }); + + it("rejects approval checkpoints with gateway setup", async () => { + await expect( + runMantisSlackDesktopSmoke({ + approvalCheckpoints: true, + crabboxBin: "/tmp/crabbox", + gatewaySetup: true, + repoRoot, + }), + ).rejects.toThrow("--approval-checkpoints cannot be used with --gateway-setup"); + }); + it("supports prehydrated remote workspaces without installing or building inside the VM", async () => { const commands: { args: readonly string[]; command: string }[] = []; const runner = vi.fn(async (command: string, args: readonly string[]) => { @@ -475,6 +748,66 @@ describe("mantis Slack desktop smoke runtime", () => { expect(phaseStatus(summary.timings.phases, "crabbox.remote_run")).toBe("accepted"); }); + it("passes Slack QA when Crabbox returns non-zero after remote metadata proves QA success", async () => { + const expectedScenarios = ["slack-approval-exec-native", "slack-approval-plugin-native"]; + const runner = vi.fn(async (command: string, args: readonly string[]) => { + if (command === "/tmp/crabbox" && args[0] === "warmup") { + return { stdout: "ready lease cbx_ba5eba11\n", stderr: "" }; + } + if (command === "/tmp/crabbox" && args[0] === "inspect") { + return { + stdout: `${JSON.stringify({ + host: "203.0.113.10", + id: "cbx_ba5eba11", + provider: "hetzner", + sshKey: "/tmp/key", + sshPort: "2222", + sshUser: "crabbox", + state: "active", + })}\n`, + stderr: "", + }; + } + if (command === "/tmp/crabbox" && args[0] === "run") { + throw new Error("remote command exited 1"); + } + if (command === "rsync") { + const outputDir = args.at(-1); + await fs.mkdir(outputDir as string, { recursive: true }); + if (String(outputDir).endsWith("slack-qa/")) { + await fs.writeFile(path.join(outputDir as string, "slack-qa-report.md"), "# Slack\n"); + } else { + await fs.writeFile(path.join(outputDir as string, "slack-desktop-smoke.png"), "png"); + await fs.writeFile( + path.join(outputDir as string, "remote-metadata.json"), + `${JSON.stringify({ qaExitCode: 0 })}\n`, + ); + await fs.writeFile(path.join(outputDir as string, "slack-desktop-command.log"), "qa\n"); + await writeApprovalCheckpointArtifacts(outputDir as string, expectedScenarios); + } + } + return { stdout: "", stderr: "" }; + }); + + const result = await runMantisSlackDesktopSmoke({ + approvalCheckpoints: true, + commandRunner: runner, + crabboxBin: "/tmp/crabbox", + now: () => new Date("2026-05-04T14:45:00.000Z"), + outputDir: ".artifacts/qa-e2e/mantis/slack-desktop-qa-metadata", + repoRoot, + }); + + expect(result.status).toBe("pass"); + expect(result.approvalCheckpointScreenshotPaths).toHaveLength(4); + const summary = JSON.parse(await fs.readFile(result.summaryPath, "utf8")) as { + status: string; + timings: { phases: { name: string; status: string }[] }; + }; + expect(summary.status).toBe("pass"); + expect(phaseStatus(summary.timings.phases, "crabbox.remote_run")).toBe("accepted"); + }); + it("copies the screenshot before reporting a failed remote Slack QA run", async () => { const runner = vi.fn(async (command: string, args: readonly string[]) => { if (command === "/tmp/crabbox" && args[0] === "inspect") { @@ -536,6 +869,55 @@ describe("mantis Slack desktop smoke runtime", () => { expect(summary.artifacts.videoPath).toContain("slack-desktop-smoke.mp4"); }); + it("reports Slack QA failure from copied remote metadata when Crabbox run exits zero", async () => { + const runner = vi.fn(async (command: string, args: readonly string[]) => { + if (command === "/tmp/crabbox" && args[0] === "inspect") { + return { + stdout: `${JSON.stringify({ + host: "203.0.113.10", + id: "cbx_existing", + provider: "hetzner", + sshKey: "/tmp/key", + sshPort: "2222", + sshUser: "crabbox", + })}\n`, + stderr: "", + }; + } + if (command === "rsync") { + const outputDir = args.at(-1); + await fs.mkdir(outputDir as string, { recursive: true }); + if (String(outputDir).endsWith("slack-qa/")) { + return { stdout: "", stderr: "" }; + } + await fs.writeFile(path.join(outputDir as string, "slack-desktop-smoke.png"), "png"); + await fs.writeFile( + path.join(outputDir as string, "remote-metadata.json"), + `${JSON.stringify({ qaExitCode: 7 })}\n`, + ); + } + return { stdout: "", stderr: "" }; + }); + + const result = await runMantisSlackDesktopSmoke({ + commandRunner: runner, + crabboxBin: "/tmp/crabbox", + leaseId: "cbx_existing", + outputDir: ".artifacts/qa-e2e/mantis/slack-desktop-metadata-fail", + repoRoot, + }); + + expect(result.status).toBe("fail"); + const summary = JSON.parse(await fs.readFile(result.summaryPath, "utf8")) as { + error?: string; + status: string; + timings: { phases: { name: string; status: string }[] }; + }; + expect(summary.status).toBe("fail"); + expect(summary.error).toContain("Slack QA exited with code 7"); + expect(phaseStatus(summary.timings.phases, "crabbox.remote_run")).toBe("pass"); + }); + it("accepts Blacksmith Testbox lease ids from Crabbox warmup", async () => { const commands: { args: readonly string[]; command: string }[] = []; const runner = vi.fn(async (command: string, args: readonly string[]) => { @@ -565,7 +947,10 @@ describe("mantis Slack desktop smoke runtime", () => { } else { await fs.writeFile(path.join(outputDir as string, "slack-desktop-smoke.png"), "png"); await fs.writeFile(path.join(outputDir as string, "slack-desktop-smoke.mp4"), "mp4"); - await fs.writeFile(path.join(outputDir as string, "remote-metadata.json"), "{}\n"); + await fs.writeFile( + path.join(outputDir as string, "remote-metadata.json"), + `${JSON.stringify({ qaExitCode: 0 })}\n`, + ); await fs.writeFile(path.join(outputDir as string, "chrome.log"), "chrome\n"); await fs.writeFile(path.join(outputDir as string, "ffmpeg.log"), "ffmpeg\n"); await fs.writeFile(path.join(outputDir as string, "slack-desktop-command.log"), "qa\n"); @@ -598,4 +983,56 @@ describe("mantis Slack desktop smoke runtime", () => { expect(summary.crabbox.id).toBe("tbx_abc-123_more"); expect(summary.crabbox.provider).toBe("blacksmith-testbox"); }); + + it("routes the approval checkpoints CLI flag into the Slack desktop runtime", async () => { + vi.resetModules(); + const runMantisSlackDesktopSmokeCommand = mockMantisCliRuntime(vi.fn(async () => undefined)); + const { registerMantisCli } = await import("./cli.js"); + const qa = new Command("qa"); + registerMantisCli(qa); + + await qa.parseAsync([ + "node", + "openclaw", + "mantis", + "slack-desktop-smoke", + "--approval-checkpoints", + "--market", + "on-demand", + "--scenario", + "slack-approval-plugin-native", + ]); + + expect(runMantisSlackDesktopSmokeCommand).toHaveBeenCalledWith( + expect.objectContaining({ + approvalCheckpoints: true, + gatewaySetup: undefined, + market: "on-demand", + scenarioIds: ["slack-approval-plugin-native"], + }), + ); + vi.doUnmock("./cli.runtime.js"); + }); + + it("rejects mutually exclusive approval checkpoint and gateway setup CLI flags", async () => { + vi.resetModules(); + const runMantisSlackDesktopSmokeCommand = mockMantisCliRuntime(vi.fn(async () => undefined)); + const { registerMantisCli } = await import("./cli.js"); + const qa = new Command("qa"); + registerMantisCli(qa); + + await expect( + qa.parseAsync([ + "node", + "openclaw", + "mantis", + "slack-desktop-smoke", + "--approval-checkpoints", + "--gateway-setup", + ]), + ).rejects.toThrow("--approval-checkpoints cannot be used with --gateway-setup"); + + expect(runMantisSlackDesktopSmokeCommand).not.toHaveBeenCalled(); + vi.doUnmock("./cli.runtime.js"); + }); }); diff --git a/extensions/qa-lab/src/mantis/slack-desktop-smoke.runtime.ts b/extensions/qa-lab/src/mantis/slack-desktop-smoke.runtime.ts index 5efcd8c88eb9..607956d3fd09 100644 --- a/extensions/qa-lab/src/mantis/slack-desktop-smoke.runtime.ts +++ b/extensions/qa-lab/src/mantis/slack-desktop-smoke.runtime.ts @@ -22,18 +22,21 @@ import { export type MantisSlackDesktopSmokeOptions = { alternateModel?: string; + approvalCheckpoints?: boolean; commandRunner?: CommandRunner; crabboxBin?: string; credentialRole?: string; credentialSource?: string; env?: NodeJS.ProcessEnv; fastMode?: boolean; + freshPr?: string; gatewaySetup?: boolean; hydrateMode?: MantisSlackDesktopHydrateMode; idleTimeout?: string; keepLease?: boolean; leaseId?: string; machineClass?: string; + market?: string; now?: () => Date; outputDir?: string; primaryModel?: string; @@ -49,6 +52,7 @@ export type MantisSlackDesktopSmokeOptions = { export type MantisSlackDesktopHydrateMode = "prehydrated" | "source"; export type MantisSlackDesktopSmokeResult = { + approvalCheckpointScreenshotPaths?: string[]; outputDir: string; reportPath: string; screenshotPath?: string; @@ -70,6 +74,7 @@ type SlackGatewayCredentialHeartbeat = ReturnType 0 + ? [...params.scenarioIds] + : params.approvalCheckpoints + ? [...DEFAULT_APPROVAL_CHECKPOINT_SCENARIOS] + : []; + if (params.approvalCheckpoints) { + const allowed = new Set(DEFAULT_APPROVAL_CHECKPOINT_SCENARIOS); + const unsupported = scenarioIds.filter((scenarioId) => !allowed.has(scenarioId)); + if (unsupported.length > 0) { + throw new Error( + `--approval-checkpoints only supports approval checkpoint scenarios: ${[ + ...DEFAULT_APPROVAL_CHECKPOINT_SCENARIOS, + ].join(", ")}. Unsupported: ${unsupported.join(", ")}.`, + ); + } + } + return scenarioIds; +} + +async function assertNonEmptyFile(filePath: string, label: string) { + let stats; + try { + stats = await fs.stat(filePath); + } catch (error) { + throw new Error(`${label} is missing: ${filePath}`, { cause: error }); + } + if (!stats.isFile() || stats.size <= 0) { + throw new Error(`${label} is empty: ${filePath}`); + } +} + +async function readJsonObject(filePath: string, label: string): Promise> { + await assertNonEmptyFile(filePath, label); + let parsed: unknown; + try { + parsed = JSON.parse(await fs.readFile(filePath, "utf8")); + } catch (error) { + throw new Error(`${label} is not valid JSON: ${filePath}`, { cause: error }); + } + if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) { + throw new Error(`${label} must be a JSON object: ${filePath}`); + } + return parsed as Record; +} + +function assertApprovalCheckpointBaseJson(params: { + filePath: string; + label: string; + record: Record; + scenarioId: string; + state: MantisApprovalCheckpointState; +}) { + if (params.record.version !== 1) { + throw new Error(`${params.label} has unexpected version in ${params.filePath}`); + } + if (params.record.scenarioId !== params.scenarioId) { + throw new Error(`${params.label} has unexpected scenarioId in ${params.filePath}`); + } + if (params.record.state !== params.state) { + throw new Error(`${params.label} has unexpected state in ${params.filePath}`); + } +} + +function assertApprovalCheckpointJson(params: { + filePath: string; + label: string; + record: Record; + scenarioId: string; + state: MantisApprovalCheckpointState; +}) { + assertApprovalCheckpointBaseJson(params); + const message = params.record.message; + if (!message || typeof message !== "object" || Array.isArray(message)) { + throw new Error(`${params.label} is missing Slack message evidence in ${params.filePath}`); + } + const candidate = message as Record; + if (typeof candidate.text !== "string") { + throw new Error(`${params.label} message evidence is missing text in ${params.filePath}`); + } + if ( + !Array.isArray(candidate.blockText) || + !candidate.blockText.every((entry) => typeof entry === "string") + ) { + throw new Error(`${params.label} message evidence is missing blockText in ${params.filePath}`); + } + if ( + !Array.isArray(candidate.actionLabels) || + !candidate.actionLabels.every((entry) => typeof entry === "string") + ) { + throw new Error( + `${params.label} message evidence is missing actionLabels in ${params.filePath}`, + ); + } + if (typeof candidate.hasNativeActions !== "boolean") { + throw new Error( + `${params.label} message evidence is missing hasNativeActions in ${params.filePath}`, + ); + } + if (params.state === "pending" && candidate.actionLabels.length === 0) { + throw new Error( + `${params.label} pending message evidence has no native action labels in ${params.filePath}`, + ); + } +} + +function assertApprovalCheckpointAckJson(params: { + filePath: string; + label: string; + record: Record; + scenarioId: string; + screenshotPath: string; + state: MantisApprovalCheckpointState; +}) { + assertApprovalCheckpointBaseJson(params); + if (typeof params.record.screenshotPath !== "string" || !params.record.screenshotPath.trim()) { + throw new Error(`${params.label} is missing screenshotPath in ${params.filePath}`); + } + if (path.basename(params.record.screenshotPath) !== path.basename(params.screenshotPath)) { + throw new Error(`${params.label} screenshotPath does not match ${params.screenshotPath}`); + } +} + +async function collectApprovalCheckpointArtifacts(params: { + enabled: boolean; + outputDir: string; + scenarioIds: readonly string[]; +}): Promise { + if (!params.enabled) { + return undefined; + } + const directoryPath = path.join(params.outputDir, "approval-checkpoints"); + const screenshots: MantisApprovalCheckpointScreenshot[] = []; + for (const scenarioId of params.scenarioIds) { + for (const state of ["pending", "resolved"] as const) { + const checkpointPath = path.join(directoryPath, `${scenarioId}.${state}.json`); + const ackPath = path.join(directoryPath, `${scenarioId}.${state}.ack.json`); + const screenshotPath = path.join(directoryPath, `${scenarioId}-${state}.png`); + const checkpointLabel = `Approval checkpoint ${scenarioId}.${state}`; + const ackLabel = `Approval checkpoint ack ${scenarioId}.${state}`; + assertApprovalCheckpointJson({ + filePath: checkpointPath, + label: checkpointLabel, + record: await readJsonObject(checkpointPath, checkpointLabel), + scenarioId, + state, + }); + assertApprovalCheckpointAckJson({ + filePath: ackPath, + label: ackLabel, + record: await readJsonObject(ackPath, ackLabel), + scenarioId, + screenshotPath, + state, + }); + await assertNonEmptyFile( + screenshotPath, + `Approval checkpoint screenshot ${scenarioId}.${state}`, + ); + screenshots.push({ + ackPath, + checkpointPath, + scenarioId, + screenshotPath, + state, + }); + } + } + return { + directoryPath, + screenshots, + }; +} + async function readRemoteMetadata( outputDir: string, ): Promise { @@ -347,6 +550,7 @@ async function prepareGatewayCredentialEnv(params: { function renderRemoteScript(params: { alternateModel: string; + approvalCheckpoints: boolean; credentialRole: string; credentialSource: string; fastMode: boolean; @@ -369,8 +573,10 @@ function renderRemoteScript(params: { const fastMode = params.fastMode ? "1" : "0"; const hydrateMode = shellQuote(params.hydrateMode); const setupGateway = params.setupGateway ? "1" : "0"; + const approvalCheckpoints = params.approvalCheckpoints ? "1" : "0"; const slackChannelId = shellQuote(params.slackChannelId); const scenarioArgs = params.scenarioIds.flatMap((id) => ["--scenario", shellQuote(id)]).join(" "); + const checkpointScenarioJson = shellQuote(JSON.stringify(params.scenarioIds)); return `set -euo pipefail out=${shellOutputDir} slack_url_override=${slackUrl} @@ -382,7 +588,23 @@ alternate_model=${alternateModel} fast_mode=${fastMode} hydrate_mode=${hydrateMode} setup_gateway=${setupGateway} +approval_checkpoints=${approvalCheckpoints} slack_channel_id=${slackChannelId} +approval_checkpoint_scenarios_json=${checkpointScenarioJson} +remote_command_timeout_seconds="\${OPENCLAW_MANTIS_REMOTE_COMMAND_TIMEOUT_SECONDS:-600}" +if [ -z "\${OPENCLAW_QA_SLACK_CHANNEL_ID:-}" ] && [ -n "$slack_channel_id" ]; then + export OPENCLAW_QA_SLACK_CHANNEL_ID="$slack_channel_id" +fi +case "$remote_command_timeout_seconds" in + ''|*[!0-9]*) + echo "OPENCLAW_MANTIS_REMOTE_COMMAND_TIMEOUT_SECONDS must be an integer number of seconds." >&2 + exit 2 + ;; +esac +if [ "$remote_command_timeout_seconds" -le 0 ]; then + echo "OPENCLAW_MANTIS_REMOTE_COMMAND_TIMEOUT_SECONDS must be greater than zero." >&2 + exit 2 +fi rm -rf "$out" mkdir -p "$out" export DISPLAY="\${DISPLAY:-:99}" @@ -486,7 +708,7 @@ else fi chrome_pid=$! qa_status=0 -{ +run_mantis_remote_body() { set -e echo "remote pwd: $(pwd)" sudo corepack enable || sudo npm install -g pnpm@11 @@ -558,15 +780,287 @@ MANTIS_SLACK_PATCH fi disown "$gateway_pid" >/dev/null 2>&1 || true else - qa_args=(openclaw qa slack --repo-root . --output-dir "$out/slack-qa" --provider-mode "$provider_mode" --model "$primary_model" --alt-model "$alternate_model" --credential-source "$credential_source" --credential-role "$credential_role") + slack_qa_output_dir=".artifacts/qa-e2e/mantis/$(basename "$out")/slack-qa" + rm -rf "$slack_qa_output_dir" "$out/slack-qa" + mkdir -p "$(dirname "$slack_qa_output_dir")" "$out/slack-qa" + copy_slack_qa_artifacts() { + rm -rf "$out/slack-qa" + mkdir -p "$out/slack-qa" + if [ -d "$slack_qa_output_dir" ]; then + cp -a "$slack_qa_output_dir"/. "$out/slack-qa"/ + fi + } + qa_args=(openclaw qa slack --repo-root . --output-dir "$slack_qa_output_dir" --provider-mode "$provider_mode" --model "$primary_model" --alt-model "$alternate_model" --credential-source "$credential_source" --credential-role "$credential_role") if [ "$fast_mode" = "1" ]; then qa_args+=(--fast) fi - pnpm "\${qa_args[@]}" ${scenarioArgs} + if [ "$approval_checkpoints" = "1" ]; then + checkpoint_dir="$out/approval-checkpoints" + mkdir -p "$checkpoint_dir" + export OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_DIR="$checkpoint_dir" + export OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_TIMEOUT_MS="\${OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_TIMEOUT_MS:-120000}" + export OPENCLAW_MANTIS_APPROVAL_CHECKPOINT_SCENARIOS_JSON="$approval_checkpoint_scenarios_json" + export OPENCLAW_MANTIS_APPROVAL_BROWSER_BIN="$browser_bin" + cat >"$out/approval-checkpoint-watcher.mjs" <<'MANTIS_APPROVAL_WATCHER' + import { spawn } from "node:child_process"; + import fs from "node:fs/promises"; + import path from "node:path"; + +const checkpointDir = process.env.OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_DIR; +const timeoutMs = Number.parseInt( + process.env.OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_TIMEOUT_MS || "120000", + 10, +); + const scenarioIds = JSON.parse( + process.env.OPENCLAW_MANTIS_APPROVAL_CHECKPOINT_SCENARIOS_JSON || "[]", + ); + const browserBin = process.env.OPENCLAW_MANTIS_APPROVAL_BROWSER_BIN; + +if (!checkpointDir) { + throw new Error("OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_DIR is required."); +} +if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) { + throw new Error("OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_TIMEOUT_MS must be a positive integer."); +} +if (!Array.isArray(scenarioIds) || scenarioIds.length === 0) { + throw new Error("At least one approval checkpoint scenario id is required."); +} + + const states = ["pending", "resolved"]; + const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); + const htmlEscape = (value) => + String(value ?? "") + .replaceAll("&", "&") + .replaceAll("<", "<") + .replaceAll(">", ">") + .replaceAll('"', """) + .replaceAll("'", "'"); + + async function readJson(filePath) { + return JSON.parse(await fs.readFile(filePath, "utf8")); + } + +async function waitForCheckpoint(filePath) { + const deadline = Date.now() + timeoutMs; + while (Date.now() <= deadline) { + try { + const stats = await fs.stat(filePath); + if (stats.isFile() && stats.size > 0) { + return; + } + } catch { + // Keep polling until the Slack QA scenario emits the checkpoint or the timeout expires. + } + await delay(500); + } + throw new Error(\`Timed out waiting for approval checkpoint: \${filePath}\`); +} + + function renderCheckpointHtml(checkpoint) { + const message = checkpoint && typeof checkpoint.message === "object" ? checkpoint.message : {}; + const blockText = Array.isArray(message.blockText) + ? message.blockText.filter((entry) => typeof entry === "string" && entry.trim().length > 0) + : []; + const actionLabels = Array.isArray(message.actionLabels) + ? message.actionLabels.filter((entry) => typeof entry === "string" && entry.trim().length > 0) + : []; + const text = typeof message.text === "string" ? message.text : ""; + const lines = blockText.length > 0 ? blockText : text.split("\\n").filter(Boolean); + const title = + lines[0] || + (checkpoint.approvalKind === "plugin" ? "Plugin approval required" : "Exec approval required"); + const detailLines = lines.slice(1).filter((line) => !actionLabels.includes(line)); + const stateLabel = checkpoint.state === "resolved" ? "Resolved" : "Pending"; + const decision = typeof checkpoint.decision === "string" ? checkpoint.decision : ""; + const decisionLabel = + decision === "allow-once" + ? "Allowed once" + : decision === "allow-always" + ? "Allowed always" + : decision === "deny" + ? "Denied" + : ""; + const detailHtml = detailLines + .map((line) => '

' + htmlEscape(line) + "

") + .join(""); + const buttonsHtml = + checkpoint.state === "pending" && actionLabels.length > 0 + ? '
' + + actionLabels.map((label) => '").join("") + + "
" + : '
' + htmlEscape(decisionLabel || stateLabel) + "
"; + return '' + + "
" + + '
# Slack native approval checkpoint
' + + '
OC
' + + '
openclawAPP' + + htmlEscape(stateLabel) + + "
" + + '

' + htmlEscape(title) + "

" + + detailHtml + + buttonsHtml + + '
Rendered from the Slack API message observed by QA at ' + + htmlEscape(checkpoint.observedAt || "") + + ".
" + + "
"; + } + + async function captureScreenshot(screenshotPath, checkpoint) { + if (!browserBin) { + throw new Error("OPENCLAW_MANTIS_APPROVAL_BROWSER_BIN is required to render approval checkpoint screenshots."); + } + const htmlPath = screenshotPath + ".html"; + await fs.writeFile(htmlPath, renderCheckpointHtml(checkpoint), "utf8"); + await new Promise((resolve, reject) => { + const child = spawn( + browserBin, + [ + "--headless=new", + "--disable-gpu", + "--no-sandbox", + "--disable-dev-shm-usage", + "--window-size=960,720", + "--screenshot=" + screenshotPath, + new URL("file://" + path.resolve(htmlPath)).href, + ], + { stdio: "inherit" }, + ); + child.on("error", reject); + child.on("exit", (code) => { + if (code === 0) { + resolve(); + } else { + reject(new Error(\`browser screenshot exited with code \${code ?? "unknown"} for \${screenshotPath}\`)); + } + }); + }); + const stats = await fs.stat(screenshotPath); + if (!stats.isFile() || stats.size <= 0) { + throw new Error(\`Approval checkpoint screenshot is missing or empty: \${screenshotPath}\`); + } +} + +async function writeJson(filePath, value) { + const tmpPath = \`\${filePath}.tmp-\${process.pid}\`; + await fs.writeFile(tmpPath, \`\${JSON.stringify(value, null, 2)}\\n\`, "utf8"); + await fs.rename(tmpPath, filePath); +} + +const acknowledgements = []; +for (const scenarioId of scenarioIds) { + if (typeof scenarioId !== "string" || scenarioId.length === 0) { + throw new Error("Approval checkpoint scenario ids must be non-empty strings."); + } + for (const state of states) { + const checkpointPath = path.join(checkpointDir, \`\${scenarioId}.\${state}.json\`); + const screenshotPath = path.join(checkpointDir, \`\${scenarioId}-\${state}.png\`); + const ackPath = path.join(checkpointDir, \`\${scenarioId}.\${state}.ack.json\`); + await waitForCheckpoint(checkpointPath); + const checkpoint = await readJson(checkpointPath); + await captureScreenshot(screenshotPath, checkpoint); + const acknowledgement = { + version: 1, + scenarioId, + state, + checkpointPath, + screenshotPath, + capturedAt: new Date().toISOString(), + }; + await writeJson(ackPath, acknowledgement); + acknowledgements.push(acknowledgement); + process.stdout.write(\`acknowledged \${scenarioId} \${state}: \${screenshotPath}\\n\`); + } +} + +await writeJson(path.join(checkpointDir, ".watcher-complete.json"), { + version: 1, + acknowledgements, + completedAt: new Date().toISOString(), +}); +MANTIS_APPROVAL_WATCHER + node "$out/approval-checkpoint-watcher.mjs" >"$out/approval-checkpoint-watcher.log" 2>&1 & + watcher_pid="$!" + qa_exit=0 + pnpm "\${qa_args[@]}" ${scenarioArgs} || qa_exit=$? + watcher_exit=0 + if [ "$qa_exit" -eq 0 ]; then + wait "$watcher_pid" || watcher_exit=$? + elif kill -0 "$watcher_pid" >/dev/null 2>&1; then + kill "$watcher_pid" >/dev/null 2>&1 || true + wait "$watcher_pid" >/dev/null 2>&1 || true + echo "Slack QA exited before all expected approval checkpoints were acknowledged." >&2 + watcher_exit=1 + else + wait "$watcher_pid" || watcher_exit=$? + fi + copy_slack_qa_artifacts + if [ "$qa_exit" -ne 0 ]; then + exit "$qa_exit" + fi + if [ "$watcher_exit" -ne 0 ]; then + exit "$watcher_exit" + fi + else + qa_exit=0 + pnpm "\${qa_args[@]}" ${scenarioArgs} || qa_exit=$? + copy_slack_qa_artifacts + if [ "$qa_exit" -ne 0 ]; then + exit "$qa_exit" + fi + fi fi -} >"$out/slack-desktop-command.log" 2>&1 || qa_status=$? +} +export -f run_mantis_remote_body +export out credential_source credential_role provider_mode primary_model alternate_model +export fast_mode hydrate_mode setup_gateway approval_checkpoints slack_channel_id +export approval_checkpoint_scenarios_json browser_bin profile slack_url +set +e +if command -v timeout >/dev/null 2>&1; then + timeout --kill-after=15s "\${remote_command_timeout_seconds}s" bash -c run_mantis_remote_body >"$out/slack-desktop-command.log" 2>&1 & +else + run_mantis_remote_body >"$out/slack-desktop-command.log" 2>&1 & +fi +remote_body_pid="$!" +( + while kill -0 "$remote_body_pid" >/dev/null 2>&1; do + echo "MANTIS_REMOTE_HEARTBEAT $(date -u +%Y-%m-%dT%H:%M:%SZ)" + sleep 30 + done +) & +heartbeat_pid="$!" +wait "$remote_body_pid" +qa_status=$? +kill "$heartbeat_pid" >/dev/null 2>&1 || true +wait "$heartbeat_pid" >/dev/null 2>&1 || true +set -e +if [ "$qa_status" -eq 124 ] || [ "$qa_status" -eq 137 ]; then + echo "Remote command timed out after \${remote_command_timeout_seconds}s." >"$out/remote-command-timeout.txt" + qa_status=124 +fi sleep 5 -scrot "$out/slack-desktop-smoke.png" || true +if [ "$approval_checkpoints" = "1" ] && [ -s "$out/approval-checkpoints/slack-approval-plugin-native-pending.png" ]; then + cp "$out/approval-checkpoints/slack-approval-plugin-native-pending.png" "$out/slack-desktop-smoke.png" +elif [ "$approval_checkpoints" = "1" ] && [ -s "$out/approval-checkpoints/slack-approval-exec-native-pending.png" ]; then + cp "$out/approval-checkpoints/slack-approval-exec-native-pending.png" "$out/slack-desktop-smoke.png" +else + scrot "$out/slack-desktop-smoke.png" || true +fi if [ -n "$video_pid" ]; then wait "$video_pid" || true fi @@ -580,6 +1074,7 @@ cat >"$out/remote-metadata.json" </dev/null 2>&1; then echo true; else echo false; fi), "gatewayPid": "$(if [ -f "$out/openclaw-gateway.pid" ]; then cat "$out/openclaw-gateway.pid"; fi)", "gatewayPort": 38973, @@ -588,11 +1083,34 @@ cat >"$out/remote-metadata.json" <&2 +fi +exit 0 `; } @@ -631,6 +1149,15 @@ function renderReport(summary: MantisSlackDesktopSmokeSummary) { ? `- Video: \`${path.basename(summary.artifacts.videoPath)}\`` : "- Video: missing", summary.artifacts.slackQaDir ? "- Slack QA artifacts: `slack-qa/`" : undefined, + summary.artifacts.approvalCheckpoints + ? "- Approval checkpoints: `approval-checkpoints/`" + : undefined, + ...(summary.artifacts.approvalCheckpoints?.screenshots.map( + (screenshot) => + `- Approval checkpoint ${screenshot.scenarioId} ${screenshot.state}: \`approval-checkpoints/${path.basename( + screenshot.screenshotPath, + )}\``, + ) ?? []), "- Remote metadata: `remote-metadata.json`", "- Remote command log: `slack-desktop-command.log`", "- FFmpeg log: `ffmpeg.log`", @@ -704,6 +1231,7 @@ export async function runMantisSlackDesktopSmoke( trimToValue(opts.provider) ?? trimToValue(env[CRABBOX_PROVIDER_ENV]) ?? DEFAULT_PROVIDER; const machineClass = trimToValue(opts.machineClass) ?? trimToValue(env[CRABBOX_CLASS_ENV]) ?? DEFAULT_CLASS; + const market = trimToValue(opts.market) ?? trimToValue(env[CRABBOX_MARKET_ENV]); const idleTimeout = trimToValue(opts.idleTimeout) ?? trimToValue(env[CRABBOX_IDLE_TIMEOUT_ENV]) ?? @@ -715,12 +1243,20 @@ export async function runMantisSlackDesktopSmoke( const primaryModel = trimToValue(opts.primaryModel) ?? DEFAULT_MODEL; const alternateModel = trimToValue(opts.alternateModel) ?? primaryModel; const fastMode = opts.fastMode ?? true; + const freshPr = trimToValue(opts.freshPr); const hydrateMode = normalizeHydrateMode(opts.hydrateMode) ?? normalizeHydrateMode(env[HYDRATE_MODE_ENV]) ?? DEFAULT_HYDRATE_MODE; const gatewaySetup = opts.gatewaySetup ?? false; - const scenarioIds = opts.scenarioIds ?? []; + const approvalCheckpoints = opts.approvalCheckpoints ?? false; + if (approvalCheckpoints && gatewaySetup) { + throw new Error("--approval-checkpoints cannot be used with --gateway-setup."); + } + const scenarioIds = resolveScenarioIds({ + approvalCheckpoints, + scenarioIds: opts.scenarioIds, + }); const slackChannelId = trimToValue(opts.slackChannelId) ?? trimToValue(env[SLACK_CHANNEL_ID_ENV]) ?? @@ -742,6 +1278,7 @@ export async function runMantisSlackDesktopSmoke( let slackQaDir: string | undefined; let videoPath: string | undefined; let remoteMetadata: SlackDesktopRemoteMetadata | undefined; + let approvalCheckpointArtifacts: MantisApprovalCheckpointArtifacts | undefined; try { leaseId = @@ -753,6 +1290,7 @@ export async function runMantisSlackDesktopSmoke( env, idleTimeout, machineClass, + market, provider, runner, ttl, @@ -784,6 +1322,7 @@ export async function runMantisSlackDesktopSmoke( leaseHeartbeat = preparedCredentialEnv.leaseHeartbeat; let remoteRunError: unknown; const remoteRunStartedAt = new Date(); + const freshPrArgs = freshPr ? ["--fresh-pr", freshPr] : []; await runCommand({ command: crabboxBin, args: [ @@ -794,10 +1333,13 @@ export async function runMantisSlackDesktopSmoke( resolvedLeaseId, "--desktop", "--browser", + "--no-hydrate", + ...freshPrArgs, "--shell", "--", renderRemoteScript({ alternateModel, + approvalCheckpoints, credentialRole, credentialSource, fastMode, @@ -843,22 +1385,37 @@ export async function runMantisSlackDesktopSmoke( } remoteMetadata = await readRemoteMetadata(outputDir); slackQaDir = path.join(outputDir, "slack-qa"); - if (!(await pathExists(screenshotPath))) { - throw new Error("Slack desktop screenshot was not copied back from Crabbox."); - } + await assertNonEmptyFile(screenshotPath, "Slack desktop screenshot"); const gatewaySetupCompleted = gatewaySetup && remoteMetadata?.qaExitCode === 0 && remoteMetadata.gatewayAlive === true; + const slackQaCompleted = !gatewaySetup && remoteMetadata?.qaExitCode === 0; if (remoteRunError && gatewaySetupCompleted) { timer.updatePhaseStatus("crabbox.remote_run", "accepted"); } - if (remoteRunError && !gatewaySetupCompleted) { + if (remoteRunError && slackQaCompleted) { + timer.updatePhaseStatus("crabbox.remote_run", "accepted"); + } + if (remoteRunError && !gatewaySetupCompleted && !slackQaCompleted) { throw remoteRunError; } if (gatewaySetup && !gatewaySetupCompleted) { throw new Error("Slack desktop gateway setup did not report a live OpenClaw gateway."); } + if (!gatewaySetup && !slackQaCompleted) { + const detail = + remoteMetadata?.qaExitCode === undefined + ? "Slack QA did not report an exit code." + : `Slack QA exited with code ${remoteMetadata.qaExitCode}.`; + throw new Error(`${detail} See slack-desktop-command.log for details.`); + } + approvalCheckpointArtifacts = await collectApprovalCheckpointArtifacts({ + enabled: approvalCheckpoints, + outputDir, + scenarioIds, + }); summary = { artifacts: { + approvalCheckpoints: approvalCheckpointArtifacts, reportPath, screenshotPath, slackQaDir, @@ -884,6 +1441,9 @@ export async function runMantisSlackDesktopSmoke( timings: timer.snapshot(), }; return { + approvalCheckpointScreenshotPaths: approvalCheckpointArtifacts?.screenshots.map( + (screenshot) => screenshot.screenshotPath, + ), outputDir, reportPath, screenshotPath, @@ -894,6 +1454,7 @@ export async function runMantisSlackDesktopSmoke( } catch (error) { summary = { artifacts: { + approvalCheckpoints: approvalCheckpointArtifacts, reportPath, screenshotPath, slackQaDir, diff --git a/extensions/slack/src/approval-native-gates.ts b/extensions/slack/src/approval-native-gates.ts index e184fe19b6b2..dfec9ef52f37 100644 --- a/extensions/slack/src/approval-native-gates.ts +++ b/extensions/slack/src/approval-native-gates.ts @@ -87,6 +87,7 @@ function hasSlackPluginForwardingTarget(params: { } function requestHasSlackOriginOrSession(params: { + cfg: OpenClawConfig; request: SlackNativeApprovalRequest; accountId?: string | null; }): boolean { @@ -106,7 +107,13 @@ function requestHasSlackOriginOrSession(params: { request: params.request, channel: "slack", bundledFallback: false, - }) !== null + }) !== null && + doesApprovalRequestMatchChannelAccount({ + cfg: params.cfg, + request: params.request, + channel: "slack", + accountId: params.accountId, + }) ); } @@ -135,6 +142,7 @@ function canPluginForwardingRouteToSlack(params: { if ( modeIncludesSession(mode) && requestHasSlackOriginOrSession({ + cfg: params.cfg, request: params.request, accountId: params.accountId, }) diff --git a/extensions/slack/src/approval-native.test.ts b/extensions/slack/src/approval-native.test.ts index f476dff77918..2ca2edafff0d 100644 --- a/extensions/slack/src/approval-native.test.ts +++ b/extensions/slack/src/approval-native.test.ts @@ -394,6 +394,61 @@ describe("slack native approval adapter", () => { ).toBe(true); }); + it("does not route plugin session fallback across Slack accounts", async () => { + writeStore({ + "agent:main:slack:channel:c999": { + sessionId: "sess", + updatedAt: Date.now(), + lastChannel: "slack", + lastAccountId: "work", + }, + }); + + const cfg = { + ...buildConfig({ allowFrom: ["U123OWNER"] }), + session: { store: STORE_PATH }, + approvals: { + plugin: { + enabled: true, + mode: "session", + }, + }, + } as OpenClawConfig; + const request = { + id: "plugin:req-account-bound", + request: { + title: "Plugin approval", + description: "Allow access", + sessionKey: "agent:main:slack:channel:c999", + }, + createdAtMs: 0, + expiresAtMs: 1000, + }; + + expect( + slackApprovalCapability.nativeRuntime?.availability.shouldHandle({ + cfg, + accountId: "default", + request, + }), + ).toBe(false); + expect( + await slackNativeApprovalAdapter.native?.resolveApproverDmTargets?.({ + cfg, + accountId: "default", + approvalKind: "plugin", + request, + }), + ).toEqual([]); + expect( + slackApprovalCapability.nativeRuntime?.availability.shouldHandle({ + cfg, + accountId: "work", + request, + }), + ).toBe(true); + }); + it("falls back to the session-bound origin target for plugin approvals", async () => { writeStore({ "agent:main:slack:channel:c123": {