Add Slack approval QA checkpoints (#85141)

* test: add slack approval qa checkpoints

* fix(slack): scope plugin approval session fallback

* ci(mantis): allow slack approval checkpoint dispatch

* ci(mantis): use on-demand aws slack desktops

* ci(mantis): run slack smoke from candidate checkout

* ci(mantis): pin aws ssh ingress to runner

* test(mantis): skip crabbox actions hydrate for slack desktop

* ci(mantis): use fresh pr checkout for slack desktop

* ci(mantis): start slack desktop smoke from source

* fix(mantis): use relative slack qa output dir

* test(mantis): surface slack smoke failure logs

* fix(mantis): write slack approval watcher script

* fix(mantis): accept successful slack qa metadata

* fix(mantis): tighten slack approval evidence

* fix(mantis): repair slack evidence manifest

* fix(mantis): render slack approval checkpoint proof

* fix(mantis): quote approval checkpoint renderer html

* fix(mantis): preserve slack approval failure artifacts

* fix(mantis): timeout silent slack desktop runs

* fix(mantis): keep slack desktop runs chatty

* fix(mantis): keep slack workflow harness trusted

* fix(qa-lab): make slack approval evidence robust

* fix(qa-lab): harden slack approval workflow proof

* test(qa-lab): surface slack approval diagnostics

* test(qa-lab): loosen slack approval readiness
This commit is contained in:
Kevin Lin
2026-05-22 22:04:15 -07:00
committed by GitHub
parent d7a078f196
commit 5656f687c1
14 changed files with 2429 additions and 47 deletions

View File

@@ -17,6 +17,11 @@ on:
required: true
default: slack-canary
type: string
approval_checkpoints:
description: Run native Slack approval checkpoint mode instead of gateway setup
required: false
default: false
type: boolean
keep_vm:
description: Keep the desktop lease open after a passing run
required: false
@@ -30,6 +35,14 @@ on:
options:
- aws
- hetzner
crabbox_market:
description: Crabbox capacity market for AWS leases
required: false
default: on-demand
type: choice
options:
- on-demand
- spot
crabbox_lease_id:
description: Optional existing Crabbox desktop/browser lease id or slug to reuse
required: false
@@ -227,9 +240,11 @@ jobs:
CRABBOX_ACCESS_CLIENT_SECRET: ${{ secrets.CRABBOX_ACCESS_CLIENT_SECRET }}
CRABBOX_LEASE_ID: ${{ inputs.crabbox_lease_id }}
CRABBOX_PROVIDER: ${{ inputs.crabbox_provider }}
CRABBOX_MARKET: ${{ inputs.crabbox_market }}
KEEP_VM: ${{ inputs.keep_vm }}
HYDRATE_MODE: ${{ inputs.hydrate_mode }}
SCENARIO_ID: ${{ inputs.scenario_id }}
APPROVAL_CHECKPOINTS: ${{ inputs.approval_checkpoints }}
shell: bash
run: |
set -euo pipefail
@@ -250,6 +265,15 @@ jobs:
require_var OPENCLAW_QA_CONVEX_SITE_URL
require_var OPENCLAW_QA_CONVEX_SECRET_CI
require_var CRABBOX_COORDINATOR_TOKEN
if [[ -z "${CRABBOX_LEASE_ID:-}" && "$CRABBOX_PROVIDER" == "aws" ]]; then
runner_ip="$(curl -fsS https://checkip.amazonaws.com | tr -d '[:space:]')"
if [[ -z "$runner_ip" ]]; then
echo "Could not resolve GitHub runner public IPv4 for AWS SSH ingress." >&2
exit 1
fi
export CRABBOX_AWS_SSH_CIDRS="${runner_ip}/32"
echo "Using AWS SSH CIDR ${CRABBOX_AWS_SSH_CIDRS}"
fi
candidate_repo="$(pwd)/.artifacts/qa-e2e/mantis/slack-desktop-smoke-worktrees/candidate"
output_rel=".artifacts/qa-e2e/mantis/slack-desktop-smoke"
@@ -265,6 +289,22 @@ jobs:
else
keep_args=(--no-keep-lease)
fi
market_args=()
if [[ -n "${CRABBOX_MARKET:-}" ]]; then
market_args=(--market "$CRABBOX_MARKET")
fi
gateway_args=(--gateway-setup)
approval_args=()
scenario_args=(--scenario "$SCENARIO_ID")
scenario_label="$SCENARIO_ID"
if [[ "$APPROVAL_CHECKPOINTS" == "true" ]]; then
approval_args=(--approval-checkpoints)
gateway_args=()
if [[ -z "${SCENARIO_ID:-}" || "$SCENARIO_ID" == "slack-canary" || "$SCENARIO_ID" == "approval-checkpoints" ]]; then
scenario_args=()
scenario_label="approval-checkpoints"
fi
fi
set +e
pnpm openclaw qa mantis slack-desktop-smoke \
@@ -274,7 +314,7 @@ jobs:
--class standard \
--idle-timeout 45m \
--ttl 120m \
--gateway-setup \
"${gateway_args[@]}" \
--credential-source convex \
--credential-role ci \
--provider-mode live-frontier \
@@ -282,7 +322,9 @@ jobs:
--model openai/gpt-5.5 \
--alt-model openai/gpt-5.5 \
--fast \
--scenario "$SCENARIO_ID" \
"${scenario_args[@]}" \
"${approval_args[@]}" \
"${market_args[@]}" \
"${keep_args[@]}" \
"${lease_args[@]}"
mantis_exit=$?
@@ -312,27 +354,81 @@ jobs:
status="$(jq -r '.status' "$root/mantis-slack-desktop-smoke-summary.json")"
screenshot_required=false
desktop_capture_inline=true
if [[ "$status" == "pass" ]]; then
screenshot_required=true
fi
evidence_summary="Mantis ran Slack QA inside a Crabbox Linux VNC desktop, started an OpenClaw Slack gateway in that VM, opened Slack Web in the visible browser, and captured screenshot/video evidence."
expected_result="Slack QA and VM gateway setup pass"
checkpoint_artifacts='[]'
checkpoint_required=false
if [[ "$APPROVAL_CHECKPOINTS" == "true" ]]; then
evidence_summary="Mantis ran Slack native approval QA inside a Crabbox Linux VNC desktop, rendered pending/resolved approval checkpoints from the Slack API messages, and stored Slack QA artifacts."
expected_result="Slack native exec and plugin approval checkpoints pass"
screenshot_required=false
desktop_capture_inline=false
if [[ "$status" == "pass" ]]; then
checkpoint_required=true
fi
checkpoint_scenarios=()
if [[ "$scenario_label" == "approval-checkpoints" ]]; then
checkpoint_scenarios=("slack-approval-exec-native" "slack-approval-plugin-native")
else
checkpoint_scenarios=("$scenario_label")
fi
checkpoint_scenarios_json="$(printf '%s\n' "${checkpoint_scenarios[@]}" | jq -R . | jq -s .)"
checkpoint_artifacts="$(
jq -n \
--argjson checkpoint_required "$checkpoint_required" \
--argjson scenario_ids "$checkpoint_scenarios_json" \
'
def scenario_kind($id):
if $id == "slack-approval-exec-native" then "exec"
elif $id == "slack-approval-plugin-native" then "plugin"
else error("unsupported approval checkpoint scenario: \($id)")
end;
def scenario_title($id):
if scenario_kind($id) == "exec" then "Exec" else "Plugin" end;
[
$scenario_ids[] as $id
| ["pending", "resolved"][] as $state
| {
kind: "desktopScreenshot",
lane: "candidate",
label: "\(scenario_title($id)) approval \($state) checkpoint",
path: "approval-checkpoints/\($id)-\($state).png",
targetPath: "approval-checkpoints/\($id)-\($state).png",
alt: "Rendered Slack \(scenario_kind($id)) approval \($state) checkpoint",
width: 720,
inline: true,
required: $checkpoint_required
}
]
'
)"
fi
jq -n \
--arg status "$status" \
--arg candidate_sha "${{ needs.validate_ref.outputs.candidate_revision }}" \
--arg scenario "$SCENARIO_ID" \
--arg scenario "$scenario_label" \
--arg summary "$evidence_summary" \
--arg expected "$expected_result" \
--argjson checkpoint_artifacts "$checkpoint_artifacts" \
--argjson screenshot_required "$screenshot_required" \
--argjson desktop_capture_inline "$desktop_capture_inline" \
'{
schemaVersion: 1,
id: "slack-desktop-smoke",
title: "Mantis Slack Desktop Smoke QA",
summary: "Mantis ran Slack QA inside a Crabbox Linux VNC desktop, started an OpenClaw Slack gateway in that VM, opened Slack Web in the visible browser, and captured screenshot/video evidence.",
summary: $summary,
scenario: $scenario,
comparison: {
candidate: { sha: $candidate_sha, expected: "Slack QA and VM gateway setup pass", status: $status, fixed: ($status == "pass") },
candidate: { sha: $candidate_sha, expected: $expected, status: $status, fixed: ($status == "pass") },
pass: ($status == "pass")
},
artifacts: [
{ kind: "desktopScreenshot", lane: "candidate", label: "Slack desktop/VNC browser", path: "slack-desktop-smoke.png", targetPath: "slack-desktop.png", alt: "Slack Web desktop screenshot from the Mantis VM", width: 720, inline: true, required: $screenshot_required },
{ kind: "motionPreview", lane: "candidate", label: "Slack motion preview", path: "slack-desktop-smoke-preview.gif", targetPath: "slack-desktop-preview.gif", alt: "Animated Slack desktop preview", width: 720, inline: true, required: false },
artifacts: ([
{ kind: "desktopScreenshot", lane: "candidate", label: "Slack desktop/VNC browser", path: "slack-desktop-smoke.png", targetPath: "slack-desktop.png", alt: "Slack Web desktop screenshot from the Mantis VM", width: 720, inline: $desktop_capture_inline, required: $screenshot_required },
{ kind: "motionPreview", lane: "candidate", label: "Slack motion preview", path: "slack-desktop-smoke-preview.gif", targetPath: "slack-desktop-preview.gif", alt: "Animated Slack desktop preview", width: 720, inline: $desktop_capture_inline, required: false },
{ kind: "motionClip", lane: "candidate", label: "Slack change MP4", path: "slack-desktop-smoke-change.mp4", targetPath: "slack-desktop-change.mp4", required: false },
{ kind: "fullVideo", lane: "candidate", label: "Slack desktop MP4", path: "slack-desktop-smoke.mp4", targetPath: "slack-desktop.mp4", required: false },
{ kind: "metadata", lane: "run", label: "Slack desktop summary", path: "mantis-slack-desktop-smoke-summary.json", targetPath: "summary.json" },
@@ -340,7 +436,7 @@ jobs:
{ kind: "metadata", lane: "run", label: "Slack command log", path: "slack-desktop-command.log", targetPath: "slack-desktop-command.log", required: false },
{ kind: "metadata", lane: "run", label: "Slack preview metadata", path: "slack-desktop-smoke-preview.json", targetPath: "slack-desktop-preview.json", required: false },
{ kind: "metadata", lane: "run", label: "Slack error", path: "error.txt", targetPath: "error.txt", required: false }
]
] + $checkpoint_artifacts)
}' > "$root/mantis-evidence.json"
cat "$root/mantis-slack-desktop-smoke-report.md" >> "$GITHUB_STEP_SUMMARY"

View File

@@ -116,6 +116,34 @@ Use `--hydrate-mode prehydrated` only when the reused remote workspace already
has `node_modules` and a built `dist/`. Mantis fails closed if those are
missing.
Prove native Slack approval UI:
```bash
pnpm openclaw qa mantis slack-desktop-smoke \
--provider aws \
--class standard \
--approval-checkpoints \
--credential-source convex \
--credential-role maintainer \
--hydrate-mode source
```
Approval checkpoint mode is mutually exclusive with `--gateway-setup`. It runs
the opt-in `slack-approval-exec-native` and `slack-approval-plugin-native`
scenarios unless you pass explicit approval checkpoint `--scenario` flags; other
Slack scenarios are rejected before the VM starts. The Slack QA runner writes
each checkpoint JSON file from the real Slack API message it observed, then the
remote watcher renders that message snapshot into
`approval-checkpoints/<scenario>-pending.png` and
`approval-checkpoints/<scenario>-resolved.png`. The run fails if any checkpoint
JSON, message evidence, ack JSON, or rendered screenshot is missing or empty.
Cold GitHub Actions leases do not have Slack Web cookies, so their browser
capture can land on Slack sign-in. For approval checkpoint proof, trust the
rendered checkpoint images and Slack QA artifacts rather than
`slack-desktop-smoke.png`. Use a kept warm lease with a manually logged-in Slack
Web profile only when the browser screenshot itself must show Slack Web.
## Hydrate modes
| Mode | Use when | Remote behavior | Tradeoff |
@@ -139,9 +167,9 @@ uses `/var/cache/crabbox/pnpm` when present.
- `artifacts.copy`: rsync back from the VM.
`crabbox.remote_run` can be marked `accepted` when Crabbox returns a non-zero
remote status after Mantis has copied metadata proving that the OpenClaw gateway
is alive and the setup completed. Treat `accepted` as pass-with-explanation,
not a failed scenario.
remote status after Mantis has copied metadata proving that either the OpenClaw
gateway setup completed or the Slack QA command itself exited successfully.
Treat `accepted` as pass-with-explanation, not a failed scenario.
If the run is slow:
@@ -159,7 +187,8 @@ A good PR comment should show:
- scenario id and candidate SHA;
- GitHub Actions run URL;
- artifact URL;
- inline screenshot;
- inline approval checkpoint screenshot, or a Slack Web screenshot from a
logged-in warm lease;
- inline animated preview when available;
- full MP4 and trimmed MP4 links;
- pass/fail status;

View File

@@ -202,6 +202,10 @@ Useful Slack desktop flags:
- `--credential-source convex --credential-role ci` uses the shared credential pool instead of direct Slack env tokens.
- `--provider-mode`, `--model`, `--alt-model`, and `--fast` pass through to the Slack live lane.
Approval checkpoint runs render Slack API message snapshots into checkpoint PNGs
for CI-safe visual proof. `slack-desktop-smoke.png` is only proof of Slack Web
when the lease uses a warm browser profile that is already logged in.
The GitHub smoke workflow is `Mantis Discord Smoke`. The before and after GitHub
workflow for the first real scenario is `Mantis Discord Status Reactions`. It
accepts:

View File

@@ -165,6 +165,25 @@ With `--gateway-setup`, Mantis leaves a persistent OpenClaw Slack gateway
running inside the VM on port `38973`; without it, the command runs the normal
bot-to-bot Slack QA lane and exits after artifact capture.
To prove native Slack approval UI with desktop evidence, run the Mantis approval
checkpoint mode:
```bash
pnpm openclaw qa mantis slack-desktop-smoke \
--approval-checkpoints \
--credential-source convex \
--credential-role maintainer
```
This mode is mutually exclusive with `--gateway-setup`. It runs the Slack
approval scenarios, rejects non-approval scenario ids, waits at each pending and
resolved approval state, renders the observed Slack API message into
`approval-checkpoints/<scenario>-pending.png` and
`approval-checkpoints/<scenario>-resolved.png`, then fails if any checkpoint,
message evidence, acknowledgement, or rendered screenshot is missing or empty.
Cold CI leases may still show Slack sign-in in `slack-desktop-smoke.png`; the
approval checkpoint images are the visual proof for this lane.
The operator checklist, GitHub workflow dispatch command, evidence-comment
contract, hydrate-mode decision table, timing interpretation, and failure
handling steps live in [Mantis Slack Desktop Runbook](/concepts/mantis-slack-desktop-runbook).
@@ -400,8 +419,13 @@ Required env when `--credential-source env`:
Optional:
- `OPENCLAW_QA_SLACK_CAPTURE_CONTENT=1` keeps message bodies in observed-message artifacts.
- `OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_DIR` enables visual approval
checkpoints for Mantis. The runner writes `<scenario>.pending.json` and
`<scenario>.resolved.json`, then waits for matching `.ack.json` files.
- `OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_TIMEOUT_MS` overrides the checkpoint
acknowledgement timeout. The default is `120000`.
Scenarios (`extensions/qa-lab/src/live-transports/slack/slack-live.runtime.ts:39`):
Scenarios (`extensions/qa-lab/src/live-transports/slack/slack-live.runtime.ts`):
- `slack-canary`
- `slack-mention-gating`
@@ -410,12 +434,22 @@ Scenarios (`extensions/qa-lab/src/live-transports/slack/slack-live.runtime.ts:39
- `slack-restart-resume`
- `slack-thread-follow-up`
- `slack-thread-isolation`
- `slack-approval-exec-native` - opt-in native Slack exec approval scenario.
Requests an exec approval through the gateway, verifies the Slack message has
native approval buttons, resolves it, and verifies the resolved Slack update.
- `slack-approval-plugin-native` - opt-in native Slack plugin approval scenario.
Enables exec and plugin approval forwarding together so plugin events are not
suppressed by exec approval routing, then verifies the same pending/resolved
native Slack UI path.
Output artifacts:
- `slack-qa-report.md`
- `slack-qa-summary.json`
- `slack-qa-observed-messages.json` - bodies redacted unless `OPENCLAW_QA_SLACK_CAPTURE_CONTENT=1`.
- `approval-checkpoints/` - only when Mantis sets
`OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_DIR`; contains checkpoint JSON,
acknowledgement JSON, and pending/resolved screenshots.
#### Setting up the Slack workspace

View File

@@ -337,10 +337,14 @@ describe("qa cli registration", () => {
"/tmp/crabbox",
"--provider",
"hetzner",
"--market",
"on-demand",
"--machine-class",
"beast",
"--lease-id",
"cbx_123abc",
"--fresh-pr",
"openclaw/openclaw#85141",
"--idle-timeout",
"45m",
"--ttl",
@@ -369,11 +373,13 @@ describe("qa cli registration", () => {
credentialRole: "maintainer",
credentialSource: "env",
fastMode: true,
freshPr: "openclaw/openclaw#85141",
gatewaySetup: undefined,
idleTimeout: "45m",
keepLease: true,
leaseId: "cbx_123abc",
machineClass: "beast",
market: "on-demand",
outputDir: ".artifacts/qa-e2e/mantis/slack-desktop",
primaryModel: "openai/gpt-5.5",
provider: "hetzner",

View File

@@ -1,7 +1,7 @@
import fs from "node:fs/promises";
import { tmpdir } from "node:os";
import path from "node:path";
import { describe, expect, it } from "vitest";
import { describe, expect, it, vi } from "vitest";
import { testing, runSlackQaLive } from "./slack-live.runtime.js";
describe("Slack live QA runtime helpers", () => {
@@ -66,6 +66,239 @@ describe("Slack live QA runtime helpers", () => {
]);
});
it("selects native approval scenarios by id without changing standard coverage", () => {
expect(
testing
.findScenario(["slack-approval-exec-native", "slack-approval-plugin-native"])
.map((scenario) => scenario.id),
).toEqual(["slack-approval-exec-native", "slack-approval-plugin-native"]);
expect(testing.SLACK_QA_STANDARD_SCENARIO_IDS).not.toContain("slack-approval-exec-native");
});
it("enables Slack native exec and plugin approval delivery for approval scenarios", () => {
const cfg = testing.buildSlackQaConfig(
{},
{
channelId: "C123456789",
driverBotUserId: "U999999999",
overrides: {
approvals: {
exec: true,
plugin: true,
target: "channel",
},
},
sutAccountId: "sut",
sutAppToken: "xapp-sut",
sutBotToken: "xoxb-sut",
},
);
expect(cfg.approvals?.exec).toEqual({ enabled: true, mode: "session" });
expect(cfg.approvals?.plugin).toEqual({ enabled: true, mode: "session" });
const account = cfg.channels?.slack?.accounts?.sut;
expect(account?.allowFrom).toEqual(["U999999999"]);
expect(account?.execApprovals).toEqual({
enabled: true,
approvers: ["U999999999"],
target: "channel",
});
expect(account?.channels?.C123456789?.users).toEqual(["U999999999"]);
});
it("extracts Slack native approval button values from blocks", () => {
expect(
testing.collectSlackActionValues([
{
type: "actions",
elements: [
{
type: "button",
text: { type: "plain_text", text: "Allow Once" },
value: "/approve plugin:abc allow-once",
},
],
},
]),
).toEqual(["/approve plugin:abc allow-once"]);
});
it("builds approval checkpoint message evidence from Slack blocks", () => {
expect(
testing.buildSlackApprovalCheckpointMessage({
blocks: [
{
type: "section",
text: { type: "mrkdwn", text: "Plugin approval required" },
},
{
type: "actions",
elements: [
{
type: "button",
text: { type: "plain_text", text: "Allow Once" },
value: "/approve plugin:abc allow-once",
},
],
},
],
text: "Plugin approval required",
}),
).toEqual({
actionLabels: ["Allow Once"],
blockText: ["Plugin approval required", "Allow Once"],
hasNativeActions: true,
text: "Plugin approval required",
});
});
it("resolves Slack approval checkpoint configuration from env", () => {
expect(
testing.resolveSlackApprovalCheckpointConfig({
OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_DIR: "/tmp/checkpoints",
OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_TIMEOUT_MS: "5000",
}),
).toEqual({
checkpointDir: "/tmp/checkpoints",
timeoutMs: 5000,
});
expect(testing.resolveSlackApprovalCheckpointConfig({})).toBeUndefined();
});
it("uses started Slack channel readiness for native approval-only scenarios", () => {
const startedStatus = {
lastError: null,
restartPending: false,
running: true,
};
expect(testing.isSlackChannelReadyForQa(startedStatus, "started")).toBe(true);
expect(testing.isSlackChannelReadyForQa(startedStatus, "connected")).toBe(false);
expect(
testing.isSlackChannelReadyForQa(
{
...startedStatus,
connected: false,
},
"started",
),
).toBe(false);
expect(
testing.isSlackChannelReadyForQa(
{
...startedStatus,
lastError: "socket auth failed",
},
"started",
),
).toBe(false);
});
it("keeps Slack readiness stability anchored when connectedAt is absent", () => {
expect(
testing.resolveSlackChannelReadySince({
observedAt: 2_000,
previousReadySince: undefined,
status: {
lastError: null,
restartPending: false,
running: true,
},
}),
).toBe(2_000);
expect(
testing.resolveSlackChannelReadySince({
observedAt: 3_000,
previousReadySince: 2_000,
status: {
lastError: null,
restartPending: false,
running: true,
},
}),
).toBe(2_000);
expect(
testing.resolveSlackChannelReadySince({
observedAt: 4_000,
previousReadySince: 2_000,
status: {
lastConnectedAt: 3_500,
lastError: null,
restartPending: false,
running: true,
},
}),
).toBe(3_500);
});
it("allows live approval resolve RPCs to take longer than the generic gateway probe timeout", async () => {
const call = vi.fn(async () => ({ decision: "allow-once" }));
await testing.resolveApprovalDecision({
approvalId: "plugin:abc",
context: {
gateway: { call },
} as never,
decision: "allow-once",
kind: "plugin",
});
expect(call).toHaveBeenCalledWith(
"plugin.approval.resolve",
{ decision: "allow-once", id: "plugin:abc" },
{
expectFinal: false,
timeoutMs: 35_000,
},
);
});
it("redacts approval artifact content and Slack metadata in summary-shaped results", () => {
expect(
testing.toSlackQaScenarioArtifactResults({
includeContent: false,
redactMetadata: true,
scenarios: [
{
approval: {
approvalId: "plugin:abc",
approvalKind: "plugin",
channelId: "C123456789",
decision: "allow-once",
pendingActionValues: ["/approve plugin:abc allow-once"],
pendingMessageTs: "1.000000",
pendingText: "Plugin approval required",
resolvedActionValues: [],
resolvedMessageTs: "1.000000",
resolvedText: "Plugin approval: Allowed once",
threadTs: "1.000000",
},
details: "plugin approval resolved",
id: "slack-approval-plugin-native",
status: "pass",
title: "Slack native plugin approval prompt resolves with exec approvals enabled",
},
],
})[0]?.approval,
).toEqual({
approvalId: "<redacted>",
approvalKind: "plugin",
channelId: undefined,
decision: "allow-once",
pendingActionValues: undefined,
pendingCheckpointPath: undefined,
pendingMessageTs: undefined,
pendingScreenshotPath: undefined,
pendingText: undefined,
resolvedActionValues: undefined,
resolvedCheckpointPath: undefined,
resolvedMessageTs: undefined,
resolvedScreenshotPath: undefined,
resolvedText: undefined,
threadTs: undefined,
});
});
it("ignores delayed unrelated SUT replies during mention-gating", async () => {
const observedMessages: Array<unknown> = [];
await expect(

View File

@@ -62,6 +62,11 @@ export async function runMantisSlackDesktopSmokeCommand(opts: MantisSlackDesktop
if (result.videoPath) {
process.stdout.write(`Mantis Slack desktop video: ${result.videoPath}\n`);
}
for (const screenshotPath of result.approvalCheckpointScreenshotPaths ?? []) {
process.stdout.write(
`Mantis Slack desktop approval checkpoint screenshot: ${screenshotPath}\n`,
);
}
if (result.status === "fail") {
process.exitCode = 1;
}

View File

@@ -105,17 +105,20 @@ type MantisDesktopBrowserSmokeCommanderOptions = {
type MantisSlackDesktopSmokeCommanderOptions = {
altModel?: string;
approvalCheckpoints?: boolean;
class?: string;
crabboxBin?: string;
credentialRole?: string;
credentialSource?: string;
fast?: boolean;
freshPr?: string;
gatewaySetup?: boolean;
hydrateMode?: MantisSlackDesktopHydrateMode;
idleTimeout?: string;
keepLease?: boolean;
leaseId?: string;
machineClass?: string;
market?: string;
model?: string;
outputDir?: string;
provider?: string;
@@ -316,12 +319,18 @@ export function registerMantisCli(qa: Command) {
.option("--provider <provider>", "Crabbox provider")
.option("--machine-class <class>", "Crabbox machine class")
.option("--class <class>", "Alias for --machine-class")
.option("--market <market>", "Crabbox capacity market: spot or on-demand")
.option("--lease-id <id>", "Reuse an existing Crabbox lease")
.option("--fresh-pr <spec>", "Use Crabbox fresh PR checkout instead of syncing the local tree")
.option("--idle-timeout <duration>", "Crabbox idle timeout")
.option("--ttl <duration>", "Crabbox maximum lease lifetime")
.option("--keep-lease", "Keep a lease created by this run after a passing smoke")
.option("--no-keep-lease", "Stop a lease created by this run after a passing smoke")
.option("--gateway-setup", "Start a persistent OpenClaw Slack gateway inside the VNC VM")
.option(
"--approval-checkpoints",
"Run Slack approval scenarios with visual checkpoint screenshot acknowledgements",
)
.option("--slack-url <url>", "Slack web URL to open in the visible browser")
.option("--slack-channel-id <id>", "Slack channel id for gateway setup allowlist")
.option("--provider-mode <mode>", "QA provider mode")
@@ -338,18 +347,24 @@ export function registerMantisCli(qa: Command) {
.option("--credential-role <role>", "Credential role for convex auth")
.option("--fast", "Enable provider fast mode where supported")
.action(async (opts: MantisSlackDesktopSmokeCommanderOptions) => {
if (opts.approvalCheckpoints && opts.gatewaySetup) {
throw new Error("--approval-checkpoints cannot be used with --gateway-setup.");
}
await runSlackDesktopSmoke({
alternateModel: opts.altModel,
approvalCheckpoints: opts.approvalCheckpoints,
crabboxBin: opts.crabboxBin,
credentialRole: opts.credentialRole,
credentialSource: opts.credentialSource,
fastMode: opts.fast,
freshPr: opts.freshPr,
gatewaySetup: opts.gatewaySetup,
hydrateMode: opts.hydrateMode,
idleTimeout: opts.idleTimeout,
keepLease: opts.keepLease,
leaseId: opts.leaseId,
machineClass: opts.machineClass ?? opts.class,
market: opts.market,
outputDir: opts.outputDir,
primaryModel: opts.model,
provider: opts.provider,

View File

@@ -114,10 +114,12 @@ export async function warmupCrabbox(params: {
env: NodeJS.ProcessEnv;
idleTimeout: string;
machineClass: string;
market?: string;
provider: string;
runner: CommandRunner;
ttl: string;
}) {
const marketArgs = params.market ? ["--market", params.market] : [];
const result = await runCommand({
command: params.crabboxBin,
args: [
@@ -128,6 +130,7 @@ export async function warmupCrabbox(params: {
"--browser",
"--class",
params.machineClass,
...marketArgs,
"--idle-timeout",
params.idleTimeout,
"--ttl",

View File

@@ -1,6 +1,7 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { Command } from "commander";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { runMantisSlackDesktopSmoke } from "./slack-desktop-smoke.runtime.js";
@@ -34,6 +35,64 @@ function phaseStatus(
return phases.find((phase) => phase.name === name)?.status;
}
async function writeApprovalCheckpointArtifacts(outputDir: string, scenarioIds: readonly string[]) {
const checkpointDir = path.join(outputDir, "approval-checkpoints");
await fs.mkdir(checkpointDir, { recursive: true });
for (const scenarioId of scenarioIds) {
for (const state of ["pending", "resolved"] as const) {
await fs.writeFile(
path.join(checkpointDir, `${scenarioId}.${state}.json`),
`${JSON.stringify({
version: 1,
scenarioId,
approvalKind: scenarioId.includes("plugin") ? "plugin" : "exec",
state,
approvalId: scenarioId.includes("plugin") ? "plugin:abc" : "exec-abc",
channelId: "C123456789",
messageTs: "1.000000",
threadTs: null,
decision: state === "resolved" ? "allow-once" : null,
observedAt: "2026-05-04T13:00:29.000Z",
message: {
actionLabels: state === "pending" ? ["Allow Once", "Allow Always", "Deny"] : [],
blockText:
state === "pending"
? ["Plugin approval required", "Slack plugin approval QA marker"]
: ["Plugin approval: Allowed once", "Slack plugin approval QA marker"],
hasNativeActions: state === "pending",
text:
state === "pending" ? "Plugin approval required" : "Plugin approval: Allowed once",
},
})}\n`,
);
await fs.writeFile(
path.join(checkpointDir, `${scenarioId}.${state}.ack.json`),
`${JSON.stringify({
version: 1,
capturedAt: "2026-05-04T13:00:30.000Z",
scenarioId,
screenshotPath: `${checkpointDir}/${scenarioId}-${state}.png`,
state,
})}\n`,
);
await fs.writeFile(path.join(checkpointDir, `${scenarioId}-${state}.png`), "png");
}
}
}
function mockMantisCliRuntime(runMantisSlackDesktopSmokeCommand = vi.fn()) {
vi.doMock("./cli.runtime.js", () => ({
runMantisBeforeAfterCommand: vi.fn(),
runMantisDesktopBrowserSmokeCommand: vi.fn(),
runMantisDiscordSmokeCommand: vi.fn(),
runMantisSlackDesktopSmokeCommand,
runMantisTelegramDesktopBuilderCommand: vi.fn(),
runMantisVisualDriverCommand: vi.fn(),
runMantisVisualTaskCommand: vi.fn(),
}));
return runMantisSlackDesktopSmokeCommand;
}
describe("mantis Slack desktop smoke runtime", () => {
let repoRoot: string;
@@ -86,7 +145,10 @@ describe("mantis Slack desktop smoke runtime", () => {
} else {
await fs.writeFile(path.join(outputDir as string, "slack-desktop-smoke.png"), "png");
await fs.writeFile(path.join(outputDir as string, "slack-desktop-smoke.mp4"), "mp4");
await fs.writeFile(path.join(outputDir as string, "remote-metadata.json"), "{}\n");
await fs.writeFile(
path.join(outputDir as string, "remote-metadata.json"),
`${JSON.stringify({ qaExitCode: 0 })}\n`,
);
await fs.writeFile(path.join(outputDir as string, "chrome.log"), "chrome\n");
await fs.writeFile(path.join(outputDir as string, "ffmpeg.log"), "ffmpeg\n");
await fs.writeFile(path.join(outputDir as string, "slack-desktop-command.log"), "qa\n");
@@ -101,6 +163,7 @@ describe("mantis Slack desktop smoke runtime", () => {
commandRunner: runner,
crabboxBin: "/tmp/crabbox",
env: runtimeEnv,
freshPr: "openclaw/openclaw#85141",
now: () => new Date("2026-05-04T13:00:00.000Z"),
outputDir: ".artifacts/qa-e2e/mantis/slack-desktop-test",
primaryModel: "openai/gpt-5.4",
@@ -124,6 +187,9 @@ describe("mantis Slack desktop smoke runtime", () => {
const runArgs = commands.find(
(entry) => entry.command === "/tmp/crabbox" && entry.args[0] === "run",
)?.args;
expect(runArgs).toContain("--no-hydrate");
expect(runArgs).toContain("--fresh-pr");
expect(runArgs).toContain("openclaw/openclaw#85141");
expect(runArgs).not.toContain("--no-sync");
const remoteScript = runArgs?.at(-1);
expect(remoteScript).toContain("hydrate_mode='source'");
@@ -139,6 +205,26 @@ describe("mantis Slack desktop smoke runtime", () => {
expect(remoteScript).not.toContain("-video_size");
expect(remoteScript).toContain("openclaw qa slack");
expect(remoteScript).toContain("--scenario 'slack-canary'");
expect(remoteScript).toContain(
'slack_qa_output_dir=".artifacts/qa-e2e/mantis/$(basename "$out")/slack-qa"',
);
expect(remoteScript).toContain('--output-dir "$slack_qa_output_dir"');
expect(remoteScript).toContain("copy_slack_qa_artifacts");
expect(remoteScript).not.toContain('--output-dir "$out/slack-qa"');
expect(remoteScript).toContain("remote_command_timeout_seconds=");
expect(remoteScript).toContain("remote-command-timeout.txt");
expect(remoteScript).toContain(
'timeout --kill-after=15s "${remote_command_timeout_seconds}s" bash -c run_mantis_remote_body >"$out/slack-desktop-command.log" 2>&1 &',
);
expect(remoteScript).toContain("MANTIS_REMOTE_HEARTBEAT");
expect(remoteScript).toContain("qa_status=$?");
expect(remoteScript).toContain("MANTIS_REMOTE_FAILURE_DIAGNOSTICS_BEGIN");
expect(remoteScript).toContain("$out/slack-qa/slack-qa-report.md");
expect(remoteScript).toContain("$out/slack-qa/slack-qa-summary.json");
expect(remoteScript).toContain("$out/slack-qa/slack-qa-observed-messages.json");
expect(remoteScript).toContain('tail -n 200 "$diagnostic_file"');
expect(remoteScript).toContain("Slack desktop screenshot is missing or empty");
expect(remoteScript).not.toContain('test -s "$out/slack-desktop-smoke.png"');
expect(remoteScript).toContain("OPENCLAW_MANTIS_SLACK_BROWSER_PROFILE_DIR");
const rsyncArgs = commands
.filter((entry) => entry.command === "rsync")
@@ -172,6 +258,193 @@ describe("mantis Slack desktop smoke runtime", () => {
expect(summary.timings.phases.map((phase) => phase.name)).toContain("artifacts.copy");
});
it("runs approval checkpoint mode with default approval scenarios and records screenshots", async () => {
const commands: { args: readonly string[]; command: string }[] = [];
const expectedScenarios = ["slack-approval-exec-native", "slack-approval-plugin-native"];
const runner = vi.fn(async (command: string, args: readonly string[]) => {
commands.push({ command, args });
if (command === "/tmp/crabbox" && args[0] === "warmup") {
return { stdout: "ready lease cbx_123abc\n", stderr: "" };
}
if (command === "/tmp/crabbox" && args[0] === "inspect") {
return {
stdout: `${JSON.stringify({
host: "203.0.113.10",
id: "cbx_123abc",
provider: "hetzner",
sshKey: "/tmp/key",
sshPort: "2222",
sshUser: "crabbox",
state: "active",
})}\n`,
stderr: "",
};
}
if (command === "rsync") {
const outputDir = args.at(-1);
await fs.mkdir(outputDir as string, { recursive: true });
if (String(outputDir).endsWith("slack-qa/")) {
await fs.writeFile(path.join(outputDir as string, "slack-qa-report.md"), "# Slack\n");
} else {
await fs.writeFile(path.join(outputDir as string, "slack-desktop-smoke.png"), "png");
await fs.writeFile(
path.join(outputDir as string, "remote-metadata.json"),
`${JSON.stringify({ qaExitCode: 0 })}\n`,
);
await fs.writeFile(path.join(outputDir as string, "slack-desktop-command.log"), "qa\n");
await writeApprovalCheckpointArtifacts(outputDir as string, expectedScenarios);
}
}
return { stdout: "", stderr: "" };
});
const result = await runMantisSlackDesktopSmoke({
approvalCheckpoints: true,
commandRunner: runner,
crabboxBin: "/tmp/crabbox",
now: () => new Date("2026-05-04T13:15:00.000Z"),
outputDir: ".artifacts/qa-e2e/mantis/slack-desktop-checkpoints",
repoRoot,
});
expect(result.status).toBe("pass");
expect(result.approvalCheckpointScreenshotPaths).toHaveLength(4);
const remoteScript = commands
.find((entry) => entry.command === "/tmp/crabbox" && entry.args[0] === "run")
?.args.at(-1);
expect(remoteScript).toContain("approval_checkpoints=1");
expect(remoteScript).toContain('export OPENCLAW_QA_SLACK_CHANNEL_ID="$slack_channel_id"');
expect(remoteScript).toContain("--scenario 'slack-approval-exec-native'");
expect(remoteScript).toContain("--scenario 'slack-approval-plugin-native'");
expect(remoteScript).toContain("OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_DIR");
expect(remoteScript).toContain("OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_TIMEOUT_MS");
expect(remoteScript).toContain('cat >"$out/approval-checkpoint-watcher.mjs"');
expect(remoteScript).not.toContain('node >"$out/approval-checkpoint-watcher.mjs"');
expect(remoteScript).toContain("approval-checkpoint-watcher.mjs");
expect(remoteScript).toContain("OPENCLAW_MANTIS_APPROVAL_BROWSER_BIN");
expect(remoteScript).toContain("Rendered from the Slack API message observed by QA");
expect(remoteScript).toContain("class='wrap'");
expect(remoteScript).toContain("--headless=new");
expect(remoteScript).not.toContain('spawn("scrot", [screenshotPath]');
expect(remoteScript).toContain("Slack QA exited before all expected approval checkpoints");
expect(remoteScript).toContain('if [ "$qa_exit" -eq 0 ]; then\n wait "$watcher_pid"');
expect(remoteScript).toContain(
'cp "$out/approval-checkpoints/slack-approval-plugin-native-pending.png" "$out/slack-desktop-smoke.png"',
);
expect(remoteScript).toContain(
'cp "$out/approval-checkpoints/slack-approval-exec-native-pending.png" "$out/slack-desktop-smoke.png"',
);
const summary = JSON.parse(await fs.readFile(result.summaryPath, "utf8")) as {
artifacts: {
approvalCheckpoints?: {
directoryPath: string;
screenshots: { scenarioId: string; screenshotPath: string; state: string }[];
};
};
};
expect(summary.artifacts.approvalCheckpoints?.screenshots).toHaveLength(4);
expect(
summary.artifacts.approvalCheckpoints?.screenshots.map((screenshot) =>
path.relative(result.outputDir, screenshot.screenshotPath),
),
).toEqual([
"approval-checkpoints/slack-approval-exec-native-pending.png",
"approval-checkpoints/slack-approval-exec-native-resolved.png",
"approval-checkpoints/slack-approval-plugin-native-pending.png",
"approval-checkpoints/slack-approval-plugin-native-resolved.png",
]);
await expect(fs.readFile(result.reportPath, "utf8")).resolves.toContain(
"Approval checkpoint slack-approval-plugin-native resolved",
);
});
it("rejects non-approval scenarios in approval checkpoint mode", async () => {
await expect(
runMantisSlackDesktopSmoke({
approvalCheckpoints: true,
crabboxBin: "/tmp/crabbox",
repoRoot,
scenarioIds: ["slack-canary"],
}),
).rejects.toThrow("--approval-checkpoints only supports approval checkpoint scenarios");
});
it("fails approval checkpoint mode when ack metadata does not match the expected state", async () => {
const expectedScenarios = ["slack-approval-exec-native", "slack-approval-plugin-native"];
const runner = vi.fn(async (command: string, args: readonly string[]) => {
if (command === "/tmp/crabbox" && args[0] === "warmup") {
return { stdout: "ready lease cbx_123abc\n", stderr: "" };
}
if (command === "/tmp/crabbox" && args[0] === "inspect") {
return {
stdout: `${JSON.stringify({
host: "203.0.113.10",
id: "cbx_123abc",
provider: "hetzner",
sshKey: "/tmp/key",
sshPort: "2222",
sshUser: "crabbox",
state: "active",
})}\n`,
stderr: "",
};
}
if (command === "rsync") {
const outputDir = args.at(-1) as string;
await fs.mkdir(outputDir, { recursive: true });
if (!outputDir.endsWith("slack-qa/")) {
await fs.writeFile(path.join(outputDir, "slack-desktop-smoke.png"), "png");
await fs.writeFile(
path.join(outputDir, "remote-metadata.json"),
`${JSON.stringify({ qaExitCode: 0 })}\n`,
);
await fs.writeFile(path.join(outputDir, "slack-desktop-command.log"), "qa\n");
await writeApprovalCheckpointArtifacts(outputDir, expectedScenarios);
await fs.writeFile(
path.join(
outputDir,
"approval-checkpoints",
"slack-approval-plugin-native.resolved.ack.json",
),
`${JSON.stringify({
version: 1,
capturedAt: "2026-05-04T13:00:30.000Z",
scenarioId: "slack-approval-plugin-native",
screenshotPath: `${outputDir}/approval-checkpoints/slack-approval-plugin-native-resolved.png`,
state: "pending",
})}\n`,
);
}
}
return { stdout: "", stderr: "" };
});
const result = await runMantisSlackDesktopSmoke({
approvalCheckpoints: true,
commandRunner: runner,
crabboxBin: "/tmp/crabbox",
outputDir: ".artifacts/qa-e2e/mantis/slack-desktop-bad-checkpoints",
repoRoot,
});
expect(result.status).toBe("fail");
const summary = JSON.parse(await fs.readFile(result.summaryPath, "utf8")) as {
error?: string;
};
expect(summary.error).toContain("unexpected state");
});
it("rejects approval checkpoints with gateway setup", async () => {
await expect(
runMantisSlackDesktopSmoke({
approvalCheckpoints: true,
crabboxBin: "/tmp/crabbox",
gatewaySetup: true,
repoRoot,
}),
).rejects.toThrow("--approval-checkpoints cannot be used with --gateway-setup");
});
it("supports prehydrated remote workspaces without installing or building inside the VM", async () => {
const commands: { args: readonly string[]; command: string }[] = [];
const runner = vi.fn(async (command: string, args: readonly string[]) => {
@@ -475,6 +748,66 @@ describe("mantis Slack desktop smoke runtime", () => {
expect(phaseStatus(summary.timings.phases, "crabbox.remote_run")).toBe("accepted");
});
it("passes Slack QA when Crabbox returns non-zero after remote metadata proves QA success", async () => {
const expectedScenarios = ["slack-approval-exec-native", "slack-approval-plugin-native"];
const runner = vi.fn(async (command: string, args: readonly string[]) => {
if (command === "/tmp/crabbox" && args[0] === "warmup") {
return { stdout: "ready lease cbx_ba5eba11\n", stderr: "" };
}
if (command === "/tmp/crabbox" && args[0] === "inspect") {
return {
stdout: `${JSON.stringify({
host: "203.0.113.10",
id: "cbx_ba5eba11",
provider: "hetzner",
sshKey: "/tmp/key",
sshPort: "2222",
sshUser: "crabbox",
state: "active",
})}\n`,
stderr: "",
};
}
if (command === "/tmp/crabbox" && args[0] === "run") {
throw new Error("remote command exited 1");
}
if (command === "rsync") {
const outputDir = args.at(-1);
await fs.mkdir(outputDir as string, { recursive: true });
if (String(outputDir).endsWith("slack-qa/")) {
await fs.writeFile(path.join(outputDir as string, "slack-qa-report.md"), "# Slack\n");
} else {
await fs.writeFile(path.join(outputDir as string, "slack-desktop-smoke.png"), "png");
await fs.writeFile(
path.join(outputDir as string, "remote-metadata.json"),
`${JSON.stringify({ qaExitCode: 0 })}\n`,
);
await fs.writeFile(path.join(outputDir as string, "slack-desktop-command.log"), "qa\n");
await writeApprovalCheckpointArtifacts(outputDir as string, expectedScenarios);
}
}
return { stdout: "", stderr: "" };
});
const result = await runMantisSlackDesktopSmoke({
approvalCheckpoints: true,
commandRunner: runner,
crabboxBin: "/tmp/crabbox",
now: () => new Date("2026-05-04T14:45:00.000Z"),
outputDir: ".artifacts/qa-e2e/mantis/slack-desktop-qa-metadata",
repoRoot,
});
expect(result.status).toBe("pass");
expect(result.approvalCheckpointScreenshotPaths).toHaveLength(4);
const summary = JSON.parse(await fs.readFile(result.summaryPath, "utf8")) as {
status: string;
timings: { phases: { name: string; status: string }[] };
};
expect(summary.status).toBe("pass");
expect(phaseStatus(summary.timings.phases, "crabbox.remote_run")).toBe("accepted");
});
it("copies the screenshot before reporting a failed remote Slack QA run", async () => {
const runner = vi.fn(async (command: string, args: readonly string[]) => {
if (command === "/tmp/crabbox" && args[0] === "inspect") {
@@ -536,6 +869,55 @@ describe("mantis Slack desktop smoke runtime", () => {
expect(summary.artifacts.videoPath).toContain("slack-desktop-smoke.mp4");
});
it("reports Slack QA failure from copied remote metadata when Crabbox run exits zero", async () => {
const runner = vi.fn(async (command: string, args: readonly string[]) => {
if (command === "/tmp/crabbox" && args[0] === "inspect") {
return {
stdout: `${JSON.stringify({
host: "203.0.113.10",
id: "cbx_existing",
provider: "hetzner",
sshKey: "/tmp/key",
sshPort: "2222",
sshUser: "crabbox",
})}\n`,
stderr: "",
};
}
if (command === "rsync") {
const outputDir = args.at(-1);
await fs.mkdir(outputDir as string, { recursive: true });
if (String(outputDir).endsWith("slack-qa/")) {
return { stdout: "", stderr: "" };
}
await fs.writeFile(path.join(outputDir as string, "slack-desktop-smoke.png"), "png");
await fs.writeFile(
path.join(outputDir as string, "remote-metadata.json"),
`${JSON.stringify({ qaExitCode: 7 })}\n`,
);
}
return { stdout: "", stderr: "" };
});
const result = await runMantisSlackDesktopSmoke({
commandRunner: runner,
crabboxBin: "/tmp/crabbox",
leaseId: "cbx_existing",
outputDir: ".artifacts/qa-e2e/mantis/slack-desktop-metadata-fail",
repoRoot,
});
expect(result.status).toBe("fail");
const summary = JSON.parse(await fs.readFile(result.summaryPath, "utf8")) as {
error?: string;
status: string;
timings: { phases: { name: string; status: string }[] };
};
expect(summary.status).toBe("fail");
expect(summary.error).toContain("Slack QA exited with code 7");
expect(phaseStatus(summary.timings.phases, "crabbox.remote_run")).toBe("pass");
});
it("accepts Blacksmith Testbox lease ids from Crabbox warmup", async () => {
const commands: { args: readonly string[]; command: string }[] = [];
const runner = vi.fn(async (command: string, args: readonly string[]) => {
@@ -565,7 +947,10 @@ describe("mantis Slack desktop smoke runtime", () => {
} else {
await fs.writeFile(path.join(outputDir as string, "slack-desktop-smoke.png"), "png");
await fs.writeFile(path.join(outputDir as string, "slack-desktop-smoke.mp4"), "mp4");
await fs.writeFile(path.join(outputDir as string, "remote-metadata.json"), "{}\n");
await fs.writeFile(
path.join(outputDir as string, "remote-metadata.json"),
`${JSON.stringify({ qaExitCode: 0 })}\n`,
);
await fs.writeFile(path.join(outputDir as string, "chrome.log"), "chrome\n");
await fs.writeFile(path.join(outputDir as string, "ffmpeg.log"), "ffmpeg\n");
await fs.writeFile(path.join(outputDir as string, "slack-desktop-command.log"), "qa\n");
@@ -598,4 +983,56 @@ describe("mantis Slack desktop smoke runtime", () => {
expect(summary.crabbox.id).toBe("tbx_abc-123_more");
expect(summary.crabbox.provider).toBe("blacksmith-testbox");
});
it("routes the approval checkpoints CLI flag into the Slack desktop runtime", async () => {
vi.resetModules();
const runMantisSlackDesktopSmokeCommand = mockMantisCliRuntime(vi.fn(async () => undefined));
const { registerMantisCli } = await import("./cli.js");
const qa = new Command("qa");
registerMantisCli(qa);
await qa.parseAsync([
"node",
"openclaw",
"mantis",
"slack-desktop-smoke",
"--approval-checkpoints",
"--market",
"on-demand",
"--scenario",
"slack-approval-plugin-native",
]);
expect(runMantisSlackDesktopSmokeCommand).toHaveBeenCalledWith(
expect.objectContaining({
approvalCheckpoints: true,
gatewaySetup: undefined,
market: "on-demand",
scenarioIds: ["slack-approval-plugin-native"],
}),
);
vi.doUnmock("./cli.runtime.js");
});
it("rejects mutually exclusive approval checkpoint and gateway setup CLI flags", async () => {
vi.resetModules();
const runMantisSlackDesktopSmokeCommand = mockMantisCliRuntime(vi.fn(async () => undefined));
const { registerMantisCli } = await import("./cli.js");
const qa = new Command("qa");
registerMantisCli(qa);
await expect(
qa.parseAsync([
"node",
"openclaw",
"mantis",
"slack-desktop-smoke",
"--approval-checkpoints",
"--gateway-setup",
]),
).rejects.toThrow("--approval-checkpoints cannot be used with --gateway-setup");
expect(runMantisSlackDesktopSmokeCommand).not.toHaveBeenCalled();
vi.doUnmock("./cli.runtime.js");
});
});

View File

@@ -22,18 +22,21 @@ import {
export type MantisSlackDesktopSmokeOptions = {
alternateModel?: string;
approvalCheckpoints?: boolean;
commandRunner?: CommandRunner;
crabboxBin?: string;
credentialRole?: string;
credentialSource?: string;
env?: NodeJS.ProcessEnv;
fastMode?: boolean;
freshPr?: string;
gatewaySetup?: boolean;
hydrateMode?: MantisSlackDesktopHydrateMode;
idleTimeout?: string;
keepLease?: boolean;
leaseId?: string;
machineClass?: string;
market?: string;
now?: () => Date;
outputDir?: string;
primaryModel?: string;
@@ -49,6 +52,7 @@ export type MantisSlackDesktopSmokeOptions = {
export type MantisSlackDesktopHydrateMode = "prehydrated" | "source";
export type MantisSlackDesktopSmokeResult = {
approvalCheckpointScreenshotPaths?: string[];
outputDir: string;
reportPath: string;
screenshotPath?: string;
@@ -70,6 +74,7 @@ type SlackGatewayCredentialHeartbeat = ReturnType<typeof startQaCredentialLeaseH
type MantisSlackDesktopSmokeSummary = {
artifacts: {
approvalCheckpoints?: MantisApprovalCheckpointArtifacts;
reportPath: string;
screenshotPath?: string;
slackQaDir?: string;
@@ -118,6 +123,21 @@ type SlackDesktopRemoteMetadata = {
qaExitCode?: number;
};
type MantisApprovalCheckpointState = "pending" | "resolved";
type MantisApprovalCheckpointScreenshot = {
ackPath: string;
checkpointPath: string;
scenarioId: string;
screenshotPath: string;
state: MantisApprovalCheckpointState;
};
type MantisApprovalCheckpointArtifacts = {
directoryPath: string;
screenshots: MantisApprovalCheckpointScreenshot[];
};
const DEFAULT_PROVIDER = "hetzner";
const DEFAULT_CLASS = "beast";
const DEFAULT_IDLE_TIMEOUT = "90m";
@@ -128,9 +148,14 @@ const DEFAULT_PROVIDER_MODE = "live-frontier";
const DEFAULT_MODEL = "openai/gpt-5.4";
const DEFAULT_SLACK_CHANNEL_ID = "C0AUXUC5AGN";
const DEFAULT_HYDRATE_MODE: MantisSlackDesktopHydrateMode = "source";
const DEFAULT_APPROVAL_CHECKPOINT_SCENARIOS = [
"slack-approval-exec-native",
"slack-approval-plugin-native",
] as const;
const CRABBOX_BIN_ENV = "OPENCLAW_MANTIS_CRABBOX_BIN";
const CRABBOX_PROVIDER_ENV = "OPENCLAW_MANTIS_CRABBOX_PROVIDER";
const CRABBOX_CLASS_ENV = "OPENCLAW_MANTIS_CRABBOX_CLASS";
const CRABBOX_MARKET_ENV = "OPENCLAW_MANTIS_CRABBOX_MARKET";
const CRABBOX_LEASE_ID_ENV = "OPENCLAW_MANTIS_CRABBOX_LEASE_ID";
const CRABBOX_KEEP_ENV = "OPENCLAW_MANTIS_KEEP_VM";
const CRABBOX_IDLE_TIMEOUT_ENV = "OPENCLAW_MANTIS_CRABBOX_IDLE_TIMEOUT";
@@ -206,6 +231,184 @@ function defaultOutputDir(repoRoot: string, startedAt: Date) {
return path.join(repoRoot, ".artifacts", "qa-e2e", "mantis", `slack-desktop-${stamp}`);
}
function resolveScenarioIds(params: {
approvalCheckpoints: boolean;
scenarioIds: readonly string[] | undefined;
}) {
const scenarioIds =
params.scenarioIds && params.scenarioIds.length > 0
? [...params.scenarioIds]
: params.approvalCheckpoints
? [...DEFAULT_APPROVAL_CHECKPOINT_SCENARIOS]
: [];
if (params.approvalCheckpoints) {
const allowed = new Set<string>(DEFAULT_APPROVAL_CHECKPOINT_SCENARIOS);
const unsupported = scenarioIds.filter((scenarioId) => !allowed.has(scenarioId));
if (unsupported.length > 0) {
throw new Error(
`--approval-checkpoints only supports approval checkpoint scenarios: ${[
...DEFAULT_APPROVAL_CHECKPOINT_SCENARIOS,
].join(", ")}. Unsupported: ${unsupported.join(", ")}.`,
);
}
}
return scenarioIds;
}
async function assertNonEmptyFile(filePath: string, label: string) {
let stats;
try {
stats = await fs.stat(filePath);
} catch (error) {
throw new Error(`${label} is missing: ${filePath}`, { cause: error });
}
if (!stats.isFile() || stats.size <= 0) {
throw new Error(`${label} is empty: ${filePath}`);
}
}
async function readJsonObject(filePath: string, label: string): Promise<Record<string, unknown>> {
await assertNonEmptyFile(filePath, label);
let parsed: unknown;
try {
parsed = JSON.parse(await fs.readFile(filePath, "utf8"));
} catch (error) {
throw new Error(`${label} is not valid JSON: ${filePath}`, { cause: error });
}
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
throw new Error(`${label} must be a JSON object: ${filePath}`);
}
return parsed as Record<string, unknown>;
}
function assertApprovalCheckpointBaseJson(params: {
filePath: string;
label: string;
record: Record<string, unknown>;
scenarioId: string;
state: MantisApprovalCheckpointState;
}) {
if (params.record.version !== 1) {
throw new Error(`${params.label} has unexpected version in ${params.filePath}`);
}
if (params.record.scenarioId !== params.scenarioId) {
throw new Error(`${params.label} has unexpected scenarioId in ${params.filePath}`);
}
if (params.record.state !== params.state) {
throw new Error(`${params.label} has unexpected state in ${params.filePath}`);
}
}
function assertApprovalCheckpointJson(params: {
filePath: string;
label: string;
record: Record<string, unknown>;
scenarioId: string;
state: MantisApprovalCheckpointState;
}) {
assertApprovalCheckpointBaseJson(params);
const message = params.record.message;
if (!message || typeof message !== "object" || Array.isArray(message)) {
throw new Error(`${params.label} is missing Slack message evidence in ${params.filePath}`);
}
const candidate = message as Record<string, unknown>;
if (typeof candidate.text !== "string") {
throw new Error(`${params.label} message evidence is missing text in ${params.filePath}`);
}
if (
!Array.isArray(candidate.blockText) ||
!candidate.blockText.every((entry) => typeof entry === "string")
) {
throw new Error(`${params.label} message evidence is missing blockText in ${params.filePath}`);
}
if (
!Array.isArray(candidate.actionLabels) ||
!candidate.actionLabels.every((entry) => typeof entry === "string")
) {
throw new Error(
`${params.label} message evidence is missing actionLabels in ${params.filePath}`,
);
}
if (typeof candidate.hasNativeActions !== "boolean") {
throw new Error(
`${params.label} message evidence is missing hasNativeActions in ${params.filePath}`,
);
}
if (params.state === "pending" && candidate.actionLabels.length === 0) {
throw new Error(
`${params.label} pending message evidence has no native action labels in ${params.filePath}`,
);
}
}
function assertApprovalCheckpointAckJson(params: {
filePath: string;
label: string;
record: Record<string, unknown>;
scenarioId: string;
screenshotPath: string;
state: MantisApprovalCheckpointState;
}) {
assertApprovalCheckpointBaseJson(params);
if (typeof params.record.screenshotPath !== "string" || !params.record.screenshotPath.trim()) {
throw new Error(`${params.label} is missing screenshotPath in ${params.filePath}`);
}
if (path.basename(params.record.screenshotPath) !== path.basename(params.screenshotPath)) {
throw new Error(`${params.label} screenshotPath does not match ${params.screenshotPath}`);
}
}
async function collectApprovalCheckpointArtifacts(params: {
enabled: boolean;
outputDir: string;
scenarioIds: readonly string[];
}): Promise<MantisApprovalCheckpointArtifacts | undefined> {
if (!params.enabled) {
return undefined;
}
const directoryPath = path.join(params.outputDir, "approval-checkpoints");
const screenshots: MantisApprovalCheckpointScreenshot[] = [];
for (const scenarioId of params.scenarioIds) {
for (const state of ["pending", "resolved"] as const) {
const checkpointPath = path.join(directoryPath, `${scenarioId}.${state}.json`);
const ackPath = path.join(directoryPath, `${scenarioId}.${state}.ack.json`);
const screenshotPath = path.join(directoryPath, `${scenarioId}-${state}.png`);
const checkpointLabel = `Approval checkpoint ${scenarioId}.${state}`;
const ackLabel = `Approval checkpoint ack ${scenarioId}.${state}`;
assertApprovalCheckpointJson({
filePath: checkpointPath,
label: checkpointLabel,
record: await readJsonObject(checkpointPath, checkpointLabel),
scenarioId,
state,
});
assertApprovalCheckpointAckJson({
filePath: ackPath,
label: ackLabel,
record: await readJsonObject(ackPath, ackLabel),
scenarioId,
screenshotPath,
state,
});
await assertNonEmptyFile(
screenshotPath,
`Approval checkpoint screenshot ${scenarioId}.${state}`,
);
screenshots.push({
ackPath,
checkpointPath,
scenarioId,
screenshotPath,
state,
});
}
}
return {
directoryPath,
screenshots,
};
}
async function readRemoteMetadata(
outputDir: string,
): Promise<SlackDesktopRemoteMetadata | undefined> {
@@ -347,6 +550,7 @@ async function prepareGatewayCredentialEnv(params: {
function renderRemoteScript(params: {
alternateModel: string;
approvalCheckpoints: boolean;
credentialRole: string;
credentialSource: string;
fastMode: boolean;
@@ -369,8 +573,10 @@ function renderRemoteScript(params: {
const fastMode = params.fastMode ? "1" : "0";
const hydrateMode = shellQuote(params.hydrateMode);
const setupGateway = params.setupGateway ? "1" : "0";
const approvalCheckpoints = params.approvalCheckpoints ? "1" : "0";
const slackChannelId = shellQuote(params.slackChannelId);
const scenarioArgs = params.scenarioIds.flatMap((id) => ["--scenario", shellQuote(id)]).join(" ");
const checkpointScenarioJson = shellQuote(JSON.stringify(params.scenarioIds));
return `set -euo pipefail
out=${shellOutputDir}
slack_url_override=${slackUrl}
@@ -382,7 +588,23 @@ alternate_model=${alternateModel}
fast_mode=${fastMode}
hydrate_mode=${hydrateMode}
setup_gateway=${setupGateway}
approval_checkpoints=${approvalCheckpoints}
slack_channel_id=${slackChannelId}
approval_checkpoint_scenarios_json=${checkpointScenarioJson}
remote_command_timeout_seconds="\${OPENCLAW_MANTIS_REMOTE_COMMAND_TIMEOUT_SECONDS:-600}"
if [ -z "\${OPENCLAW_QA_SLACK_CHANNEL_ID:-}" ] && [ -n "$slack_channel_id" ]; then
export OPENCLAW_QA_SLACK_CHANNEL_ID="$slack_channel_id"
fi
case "$remote_command_timeout_seconds" in
''|*[!0-9]*)
echo "OPENCLAW_MANTIS_REMOTE_COMMAND_TIMEOUT_SECONDS must be an integer number of seconds." >&2
exit 2
;;
esac
if [ "$remote_command_timeout_seconds" -le 0 ]; then
echo "OPENCLAW_MANTIS_REMOTE_COMMAND_TIMEOUT_SECONDS must be greater than zero." >&2
exit 2
fi
rm -rf "$out"
mkdir -p "$out"
export DISPLAY="\${DISPLAY:-:99}"
@@ -486,7 +708,7 @@ else
fi
chrome_pid=$!
qa_status=0
{
run_mantis_remote_body() {
set -e
echo "remote pwd: $(pwd)"
sudo corepack enable || sudo npm install -g pnpm@11
@@ -558,15 +780,287 @@ MANTIS_SLACK_PATCH
fi
disown "$gateway_pid" >/dev/null 2>&1 || true
else
qa_args=(openclaw qa slack --repo-root . --output-dir "$out/slack-qa" --provider-mode "$provider_mode" --model "$primary_model" --alt-model "$alternate_model" --credential-source "$credential_source" --credential-role "$credential_role")
slack_qa_output_dir=".artifacts/qa-e2e/mantis/$(basename "$out")/slack-qa"
rm -rf "$slack_qa_output_dir" "$out/slack-qa"
mkdir -p "$(dirname "$slack_qa_output_dir")" "$out/slack-qa"
copy_slack_qa_artifacts() {
rm -rf "$out/slack-qa"
mkdir -p "$out/slack-qa"
if [ -d "$slack_qa_output_dir" ]; then
cp -a "$slack_qa_output_dir"/. "$out/slack-qa"/
fi
}
qa_args=(openclaw qa slack --repo-root . --output-dir "$slack_qa_output_dir" --provider-mode "$provider_mode" --model "$primary_model" --alt-model "$alternate_model" --credential-source "$credential_source" --credential-role "$credential_role")
if [ "$fast_mode" = "1" ]; then
qa_args+=(--fast)
fi
pnpm "\${qa_args[@]}" ${scenarioArgs}
if [ "$approval_checkpoints" = "1" ]; then
checkpoint_dir="$out/approval-checkpoints"
mkdir -p "$checkpoint_dir"
export OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_DIR="$checkpoint_dir"
export OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_TIMEOUT_MS="\${OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_TIMEOUT_MS:-120000}"
export OPENCLAW_MANTIS_APPROVAL_CHECKPOINT_SCENARIOS_JSON="$approval_checkpoint_scenarios_json"
export OPENCLAW_MANTIS_APPROVAL_BROWSER_BIN="$browser_bin"
cat >"$out/approval-checkpoint-watcher.mjs" <<'MANTIS_APPROVAL_WATCHER'
import { spawn } from "node:child_process";
import fs from "node:fs/promises";
import path from "node:path";
const checkpointDir = process.env.OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_DIR;
const timeoutMs = Number.parseInt(
process.env.OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_TIMEOUT_MS || "120000",
10,
);
const scenarioIds = JSON.parse(
process.env.OPENCLAW_MANTIS_APPROVAL_CHECKPOINT_SCENARIOS_JSON || "[]",
);
const browserBin = process.env.OPENCLAW_MANTIS_APPROVAL_BROWSER_BIN;
if (!checkpointDir) {
throw new Error("OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_DIR is required.");
}
if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) {
throw new Error("OPENCLAW_QA_SLACK_APPROVAL_CHECKPOINT_TIMEOUT_MS must be a positive integer.");
}
if (!Array.isArray(scenarioIds) || scenarioIds.length === 0) {
throw new Error("At least one approval checkpoint scenario id is required.");
}
const states = ["pending", "resolved"];
const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
const htmlEscape = (value) =>
String(value ?? "")
.replaceAll("&", "&amp;")
.replaceAll("<", "&lt;")
.replaceAll(">", "&gt;")
.replaceAll('"', "&quot;")
.replaceAll("'", "&#39;");
async function readJson(filePath) {
return JSON.parse(await fs.readFile(filePath, "utf8"));
}
async function waitForCheckpoint(filePath) {
const deadline = Date.now() + timeoutMs;
while (Date.now() <= deadline) {
try {
const stats = await fs.stat(filePath);
if (stats.isFile() && stats.size > 0) {
return;
}
} catch {
// Keep polling until the Slack QA scenario emits the checkpoint or the timeout expires.
}
await delay(500);
}
throw new Error(\`Timed out waiting for approval checkpoint: \${filePath}\`);
}
function renderCheckpointHtml(checkpoint) {
const message = checkpoint && typeof checkpoint.message === "object" ? checkpoint.message : {};
const blockText = Array.isArray(message.blockText)
? message.blockText.filter((entry) => typeof entry === "string" && entry.trim().length > 0)
: [];
const actionLabels = Array.isArray(message.actionLabels)
? message.actionLabels.filter((entry) => typeof entry === "string" && entry.trim().length > 0)
: [];
const text = typeof message.text === "string" ? message.text : "";
const lines = blockText.length > 0 ? blockText : text.split("\\n").filter(Boolean);
const title =
lines[0] ||
(checkpoint.approvalKind === "plugin" ? "Plugin approval required" : "Exec approval required");
const detailLines = lines.slice(1).filter((line) => !actionLabels.includes(line));
const stateLabel = checkpoint.state === "resolved" ? "Resolved" : "Pending";
const decision = typeof checkpoint.decision === "string" ? checkpoint.decision : "";
const decisionLabel =
decision === "allow-once"
? "Allowed once"
: decision === "allow-always"
? "Allowed always"
: decision === "deny"
? "Denied"
: "";
const detailHtml = detailLines
.map((line) => '<p class="detail">' + htmlEscape(line) + "</p>")
.join("");
const buttonsHtml =
checkpoint.state === "pending" && actionLabels.length > 0
? '<div class="actions">' +
actionLabels.map((label) => '<button>' + htmlEscape(label) + "</button>").join("") +
"</div>"
: '<div class="resolution">' + htmlEscape(decisionLabel || stateLabel) + "</div>";
return '<!doctype html><html><head><meta charset="utf-8">' +
"<style>" +
"body{margin:0;background:#1d1c1d;color:#d1d2d3;font:16px Arial,Helvetica,sans-serif;}" +
".wrap{width:920px;min-height:620px;padding:34px 40px;box-sizing:border-box;}" +
".channel{color:#f8f8f8;font-size:22px;font-weight:700;margin-bottom:28px;}" +
".message{display:flex;gap:14px;align-items:flex-start;}" +
".avatar{width:42px;height:42px;border-radius:8px;background:#36c5f0;display:flex;align-items:center;justify-content:center;color:#101214;font-weight:800;}" +
".content{max-width:760px;}" +
".meta{display:flex;gap:8px;align-items:center;margin-bottom:8px;}" +
".name{font-weight:800;color:#f8f8f8;}.app{font-size:12px;color:#d1d2d3;border:1px solid #55585d;border-radius:4px;padding:1px 4px;}" +
".state{color:#b9babd;font-size:13px;}" +
".title{font-size:20px;color:#f8f8f8;font-weight:800;margin:0 0 10px;}" +
".detail{margin:6px 0;color:#d1d2d3;line-height:1.35;}" +
".actions{display:flex;gap:10px;margin-top:16px;}" +
"button{background:#2c2d30;color:#f8f8f8;border:1px solid #565856;border-radius:4px;font-weight:700;padding:8px 14px;font-size:15px;}" +
".resolution{display:inline-block;margin-top:16px;color:#2eb67d;border:1px solid #2eb67d;border-radius:4px;padding:7px 12px;font-weight:700;}" +
".evidence{margin-top:34px;color:#b9babd;font-size:13px;border-top:1px solid #3a3d42;padding-top:14px;}" +
"</style></head><body><main class='wrap'>" +
'<div class="channel"># Slack native approval checkpoint</div>' +
'<section class="message"><div class="avatar">OC</div><div class="content">' +
'<div class="meta"><span class="name">openclaw</span><span class="app">APP</span><span class="state">' +
htmlEscape(stateLabel) +
"</span></div>" +
'<h1 class="title">' + htmlEscape(title) + "</h1>" +
detailHtml +
buttonsHtml +
'<div class="evidence">Rendered from the Slack API message observed by QA at ' +
htmlEscape(checkpoint.observedAt || "") +
".</div>" +
"</div></section></main></body></html>";
}
async function captureScreenshot(screenshotPath, checkpoint) {
if (!browserBin) {
throw new Error("OPENCLAW_MANTIS_APPROVAL_BROWSER_BIN is required to render approval checkpoint screenshots.");
}
const htmlPath = screenshotPath + ".html";
await fs.writeFile(htmlPath, renderCheckpointHtml(checkpoint), "utf8");
await new Promise((resolve, reject) => {
const child = spawn(
browserBin,
[
"--headless=new",
"--disable-gpu",
"--no-sandbox",
"--disable-dev-shm-usage",
"--window-size=960,720",
"--screenshot=" + screenshotPath,
new URL("file://" + path.resolve(htmlPath)).href,
],
{ stdio: "inherit" },
);
child.on("error", reject);
child.on("exit", (code) => {
if (code === 0) {
resolve();
} else {
reject(new Error(\`browser screenshot exited with code \${code ?? "unknown"} for \${screenshotPath}\`));
}
});
});
const stats = await fs.stat(screenshotPath);
if (!stats.isFile() || stats.size <= 0) {
throw new Error(\`Approval checkpoint screenshot is missing or empty: \${screenshotPath}\`);
}
}
async function writeJson(filePath, value) {
const tmpPath = \`\${filePath}.tmp-\${process.pid}\`;
await fs.writeFile(tmpPath, \`\${JSON.stringify(value, null, 2)}\\n\`, "utf8");
await fs.rename(tmpPath, filePath);
}
const acknowledgements = [];
for (const scenarioId of scenarioIds) {
if (typeof scenarioId !== "string" || scenarioId.length === 0) {
throw new Error("Approval checkpoint scenario ids must be non-empty strings.");
}
for (const state of states) {
const checkpointPath = path.join(checkpointDir, \`\${scenarioId}.\${state}.json\`);
const screenshotPath = path.join(checkpointDir, \`\${scenarioId}-\${state}.png\`);
const ackPath = path.join(checkpointDir, \`\${scenarioId}.\${state}.ack.json\`);
await waitForCheckpoint(checkpointPath);
const checkpoint = await readJson(checkpointPath);
await captureScreenshot(screenshotPath, checkpoint);
const acknowledgement = {
version: 1,
scenarioId,
state,
checkpointPath,
screenshotPath,
capturedAt: new Date().toISOString(),
};
await writeJson(ackPath, acknowledgement);
acknowledgements.push(acknowledgement);
process.stdout.write(\`acknowledged \${scenarioId} \${state}: \${screenshotPath}\\n\`);
}
}
await writeJson(path.join(checkpointDir, ".watcher-complete.json"), {
version: 1,
acknowledgements,
completedAt: new Date().toISOString(),
});
MANTIS_APPROVAL_WATCHER
node "$out/approval-checkpoint-watcher.mjs" >"$out/approval-checkpoint-watcher.log" 2>&1 &
watcher_pid="$!"
qa_exit=0
pnpm "\${qa_args[@]}" ${scenarioArgs} || qa_exit=$?
watcher_exit=0
if [ "$qa_exit" -eq 0 ]; then
wait "$watcher_pid" || watcher_exit=$?
elif kill -0 "$watcher_pid" >/dev/null 2>&1; then
kill "$watcher_pid" >/dev/null 2>&1 || true
wait "$watcher_pid" >/dev/null 2>&1 || true
echo "Slack QA exited before all expected approval checkpoints were acknowledged." >&2
watcher_exit=1
else
wait "$watcher_pid" || watcher_exit=$?
fi
copy_slack_qa_artifacts
if [ "$qa_exit" -ne 0 ]; then
exit "$qa_exit"
fi
if [ "$watcher_exit" -ne 0 ]; then
exit "$watcher_exit"
fi
else
qa_exit=0
pnpm "\${qa_args[@]}" ${scenarioArgs} || qa_exit=$?
copy_slack_qa_artifacts
if [ "$qa_exit" -ne 0 ]; then
exit "$qa_exit"
fi
fi
fi
} >"$out/slack-desktop-command.log" 2>&1 || qa_status=$?
}
export -f run_mantis_remote_body
export out credential_source credential_role provider_mode primary_model alternate_model
export fast_mode hydrate_mode setup_gateway approval_checkpoints slack_channel_id
export approval_checkpoint_scenarios_json browser_bin profile slack_url
set +e
if command -v timeout >/dev/null 2>&1; then
timeout --kill-after=15s "\${remote_command_timeout_seconds}s" bash -c run_mantis_remote_body >"$out/slack-desktop-command.log" 2>&1 &
else
run_mantis_remote_body >"$out/slack-desktop-command.log" 2>&1 &
fi
remote_body_pid="$!"
(
while kill -0 "$remote_body_pid" >/dev/null 2>&1; do
echo "MANTIS_REMOTE_HEARTBEAT $(date -u +%Y-%m-%dT%H:%M:%SZ)"
sleep 30
done
) &
heartbeat_pid="$!"
wait "$remote_body_pid"
qa_status=$?
kill "$heartbeat_pid" >/dev/null 2>&1 || true
wait "$heartbeat_pid" >/dev/null 2>&1 || true
set -e
if [ "$qa_status" -eq 124 ] || [ "$qa_status" -eq 137 ]; then
echo "Remote command timed out after \${remote_command_timeout_seconds}s." >"$out/remote-command-timeout.txt"
qa_status=124
fi
sleep 5
scrot "$out/slack-desktop-smoke.png" || true
if [ "$approval_checkpoints" = "1" ] && [ -s "$out/approval-checkpoints/slack-approval-plugin-native-pending.png" ]; then
cp "$out/approval-checkpoints/slack-approval-plugin-native-pending.png" "$out/slack-desktop-smoke.png"
elif [ "$approval_checkpoints" = "1" ] && [ -s "$out/approval-checkpoints/slack-approval-exec-native-pending.png" ]; then
cp "$out/approval-checkpoints/slack-approval-exec-native-pending.png" "$out/slack-desktop-smoke.png"
else
scrot "$out/slack-desktop-smoke.png" || true
fi
if [ -n "$video_pid" ]; then
wait "$video_pid" || true
fi
@@ -580,6 +1074,7 @@ cat >"$out/remote-metadata.json" <<MANTIS_REMOTE_METADATA
"display": "$DISPLAY",
"openedUrl": "$slack_url",
"gatewaySetup": $setup_gateway,
"approvalCheckpoints": $approval_checkpoints,
"gatewayAlive": $(if [ "$setup_gateway" = "1" ] && [ -f "$out/openclaw-gateway.pid" ] && kill -0 "$(cat "$out/openclaw-gateway.pid")" >/dev/null 2>&1; then echo true; else echo false; fi),
"gatewayPid": "$(if [ -f "$out/openclaw-gateway.pid" ]; then cat "$out/openclaw-gateway.pid"; fi)",
"gatewayPort": 38973,
@@ -588,11 +1083,34 @@ cat >"$out/remote-metadata.json" <<MANTIS_REMOTE_METADATA
"credentialRole": "$credential_role",
"providerMode": "$provider_mode",
"hydrateMode": "$hydrate_mode",
"remoteCommandTimedOut": $(if [ -f "$out/remote-command-timeout.txt" ]; then echo true; else echo false; fi),
"capturedAt": "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
}
MANTIS_REMOTE_METADATA
test -s "$out/slack-desktop-smoke.png"
exit "$qa_status"
if [ "$qa_status" -ne 0 ]; then
echo "MANTIS_REMOTE_FAILURE_DIAGNOSTICS_BEGIN"
find "$out" -maxdepth 3 -type f -printf "%p %s bytes\\n" | sort || true
for diagnostic_file in \
"$out/slack-desktop-command.log" \
"$out/slack-qa/slack-qa-report.md" \
"$out/slack-qa/slack-qa-summary.json" \
"$out/slack-qa/slack-qa-observed-messages.json" \
"$out/remote-command-timeout.txt" \
"$out/approval-checkpoint-watcher.log" \
"$out/chrome.log" \
"$out/ffmpeg.log" \
"$out/remote-metadata.json"; do
if [ -f "$diagnostic_file" ]; then
echo "===== tail: $diagnostic_file ====="
tail -n 200 "$diagnostic_file" || true
fi
done
echo "MANTIS_REMOTE_FAILURE_DIAGNOSTICS_END"
fi
if [ ! -s "$out/slack-desktop-smoke.png" ]; then
echo "Slack desktop screenshot is missing or empty: $out/slack-desktop-smoke.png" >&2
fi
exit 0
`;
}
@@ -631,6 +1149,15 @@ function renderReport(summary: MantisSlackDesktopSmokeSummary) {
? `- Video: \`${path.basename(summary.artifacts.videoPath)}\``
: "- Video: missing",
summary.artifacts.slackQaDir ? "- Slack QA artifacts: `slack-qa/`" : undefined,
summary.artifacts.approvalCheckpoints
? "- Approval checkpoints: `approval-checkpoints/`"
: undefined,
...(summary.artifacts.approvalCheckpoints?.screenshots.map(
(screenshot) =>
`- Approval checkpoint ${screenshot.scenarioId} ${screenshot.state}: \`approval-checkpoints/${path.basename(
screenshot.screenshotPath,
)}\``,
) ?? []),
"- Remote metadata: `remote-metadata.json`",
"- Remote command log: `slack-desktop-command.log`",
"- FFmpeg log: `ffmpeg.log`",
@@ -704,6 +1231,7 @@ export async function runMantisSlackDesktopSmoke(
trimToValue(opts.provider) ?? trimToValue(env[CRABBOX_PROVIDER_ENV]) ?? DEFAULT_PROVIDER;
const machineClass =
trimToValue(opts.machineClass) ?? trimToValue(env[CRABBOX_CLASS_ENV]) ?? DEFAULT_CLASS;
const market = trimToValue(opts.market) ?? trimToValue(env[CRABBOX_MARKET_ENV]);
const idleTimeout =
trimToValue(opts.idleTimeout) ??
trimToValue(env[CRABBOX_IDLE_TIMEOUT_ENV]) ??
@@ -715,12 +1243,20 @@ export async function runMantisSlackDesktopSmoke(
const primaryModel = trimToValue(opts.primaryModel) ?? DEFAULT_MODEL;
const alternateModel = trimToValue(opts.alternateModel) ?? primaryModel;
const fastMode = opts.fastMode ?? true;
const freshPr = trimToValue(opts.freshPr);
const hydrateMode =
normalizeHydrateMode(opts.hydrateMode) ??
normalizeHydrateMode(env[HYDRATE_MODE_ENV]) ??
DEFAULT_HYDRATE_MODE;
const gatewaySetup = opts.gatewaySetup ?? false;
const scenarioIds = opts.scenarioIds ?? [];
const approvalCheckpoints = opts.approvalCheckpoints ?? false;
if (approvalCheckpoints && gatewaySetup) {
throw new Error("--approval-checkpoints cannot be used with --gateway-setup.");
}
const scenarioIds = resolveScenarioIds({
approvalCheckpoints,
scenarioIds: opts.scenarioIds,
});
const slackChannelId =
trimToValue(opts.slackChannelId) ??
trimToValue(env[SLACK_CHANNEL_ID_ENV]) ??
@@ -742,6 +1278,7 @@ export async function runMantisSlackDesktopSmoke(
let slackQaDir: string | undefined;
let videoPath: string | undefined;
let remoteMetadata: SlackDesktopRemoteMetadata | undefined;
let approvalCheckpointArtifacts: MantisApprovalCheckpointArtifacts | undefined;
try {
leaseId =
@@ -753,6 +1290,7 @@ export async function runMantisSlackDesktopSmoke(
env,
idleTimeout,
machineClass,
market,
provider,
runner,
ttl,
@@ -784,6 +1322,7 @@ export async function runMantisSlackDesktopSmoke(
leaseHeartbeat = preparedCredentialEnv.leaseHeartbeat;
let remoteRunError: unknown;
const remoteRunStartedAt = new Date();
const freshPrArgs = freshPr ? ["--fresh-pr", freshPr] : [];
await runCommand({
command: crabboxBin,
args: [
@@ -794,10 +1333,13 @@ export async function runMantisSlackDesktopSmoke(
resolvedLeaseId,
"--desktop",
"--browser",
"--no-hydrate",
...freshPrArgs,
"--shell",
"--",
renderRemoteScript({
alternateModel,
approvalCheckpoints,
credentialRole,
credentialSource,
fastMode,
@@ -843,22 +1385,37 @@ export async function runMantisSlackDesktopSmoke(
}
remoteMetadata = await readRemoteMetadata(outputDir);
slackQaDir = path.join(outputDir, "slack-qa");
if (!(await pathExists(screenshotPath))) {
throw new Error("Slack desktop screenshot was not copied back from Crabbox.");
}
await assertNonEmptyFile(screenshotPath, "Slack desktop screenshot");
const gatewaySetupCompleted =
gatewaySetup && remoteMetadata?.qaExitCode === 0 && remoteMetadata.gatewayAlive === true;
const slackQaCompleted = !gatewaySetup && remoteMetadata?.qaExitCode === 0;
if (remoteRunError && gatewaySetupCompleted) {
timer.updatePhaseStatus("crabbox.remote_run", "accepted");
}
if (remoteRunError && !gatewaySetupCompleted) {
if (remoteRunError && slackQaCompleted) {
timer.updatePhaseStatus("crabbox.remote_run", "accepted");
}
if (remoteRunError && !gatewaySetupCompleted && !slackQaCompleted) {
throw remoteRunError;
}
if (gatewaySetup && !gatewaySetupCompleted) {
throw new Error("Slack desktop gateway setup did not report a live OpenClaw gateway.");
}
if (!gatewaySetup && !slackQaCompleted) {
const detail =
remoteMetadata?.qaExitCode === undefined
? "Slack QA did not report an exit code."
: `Slack QA exited with code ${remoteMetadata.qaExitCode}.`;
throw new Error(`${detail} See slack-desktop-command.log for details.`);
}
approvalCheckpointArtifacts = await collectApprovalCheckpointArtifacts({
enabled: approvalCheckpoints,
outputDir,
scenarioIds,
});
summary = {
artifacts: {
approvalCheckpoints: approvalCheckpointArtifacts,
reportPath,
screenshotPath,
slackQaDir,
@@ -884,6 +1441,9 @@ export async function runMantisSlackDesktopSmoke(
timings: timer.snapshot(),
};
return {
approvalCheckpointScreenshotPaths: approvalCheckpointArtifacts?.screenshots.map(
(screenshot) => screenshot.screenshotPath,
),
outputDir,
reportPath,
screenshotPath,
@@ -894,6 +1454,7 @@ export async function runMantisSlackDesktopSmoke(
} catch (error) {
summary = {
artifacts: {
approvalCheckpoints: approvalCheckpointArtifacts,
reportPath,
screenshotPath,
slackQaDir,

View File

@@ -87,6 +87,7 @@ function hasSlackPluginForwardingTarget(params: {
}
function requestHasSlackOriginOrSession(params: {
cfg: OpenClawConfig;
request: SlackNativeApprovalRequest;
accountId?: string | null;
}): boolean {
@@ -106,7 +107,13 @@ function requestHasSlackOriginOrSession(params: {
request: params.request,
channel: "slack",
bundledFallback: false,
}) !== null
}) !== null &&
doesApprovalRequestMatchChannelAccount({
cfg: params.cfg,
request: params.request,
channel: "slack",
accountId: params.accountId,
})
);
}
@@ -135,6 +142,7 @@ function canPluginForwardingRouteToSlack(params: {
if (
modeIncludesSession(mode) &&
requestHasSlackOriginOrSession({
cfg: params.cfg,
request: params.request,
accountId: params.accountId,
})

View File

@@ -394,6 +394,61 @@ describe("slack native approval adapter", () => {
).toBe(true);
});
it("does not route plugin session fallback across Slack accounts", async () => {
writeStore({
"agent:main:slack:channel:c999": {
sessionId: "sess",
updatedAt: Date.now(),
lastChannel: "slack",
lastAccountId: "work",
},
});
const cfg = {
...buildConfig({ allowFrom: ["U123OWNER"] }),
session: { store: STORE_PATH },
approvals: {
plugin: {
enabled: true,
mode: "session",
},
},
} as OpenClawConfig;
const request = {
id: "plugin:req-account-bound",
request: {
title: "Plugin approval",
description: "Allow access",
sessionKey: "agent:main:slack:channel:c999",
},
createdAtMs: 0,
expiresAtMs: 1000,
};
expect(
slackApprovalCapability.nativeRuntime?.availability.shouldHandle({
cfg,
accountId: "default",
request,
}),
).toBe(false);
expect(
await slackNativeApprovalAdapter.native?.resolveApproverDmTargets?.({
cfg,
accountId: "default",
approvalKind: "plugin",
request,
}),
).toEqual([]);
expect(
slackApprovalCapability.nativeRuntime?.availability.shouldHandle({
cfg,
accountId: "work",
request,
}),
).toBe(true);
});
it("falls back to the session-bound origin target for plugin approvals", async () => {
writeStore({
"agent:main:slack:channel:c123": {