ci(mantis): make telegram proof skips public-safe

This commit is contained in:
Ayaan Zaidi
2026-05-18 07:53:52 +05:30
parent 48f7db23f0
commit 6baa2b38b2
3 changed files with 116 additions and 10 deletions

View File

@@ -16,8 +16,11 @@ Hard limits:
- Do not finish with tiny, cropped-wrong, off-bottom, or sidebar-heavy GIFs.
- Do not invent a generic proof. The proof must match the PR behavior.
- Do not force GIFs for internal-only, workflow-only, test-only, docs-only, or
otherwise non-visual PRs. A no-visual-proof manifest is a successful outcome
when GIFs would be misleading.
otherwise non-visual PRs. A no-visual-proof manifest is a successful workflow
outcome when GIFs would be misleading, but it is not proof that the PR passed.
- Keep public-facing manifest summaries short and user-domain. Do not mention
harness internals, mock-provider limits, secret/trust boundaries, local paths,
transcript seeding, or workflow implementation details in the summary.
Inputs are provided as environment variables:
@@ -42,9 +45,10 @@ Required workflow:
before/after. If it does not, write
`${MANTIS_OUTPUT_DIR}/mantis-evidence.json` with `comparison.pass: true`, no
artifacts, and a summary that starts with
`Mantis did not generate before/after GIFs because`. Include the concrete
reason in the summary. Use this manifest shape and do not create worktrees
or start Crabbox for this case:
`Mantis did not generate before/after GIFs because`. Include a short
public reason, such as `the PR changes internal session bookkeeping rather
than Telegram-visible behavior`. Use this manifest shape and do not create
worktrees or start Crabbox for this case:
```json
{
@@ -73,6 +77,14 @@ Required workflow:
}
```
If the PR appears visual but proof is blocked by Telegram Desktop session
state, authorization, credentials, Crabbox, or another capture-infrastructure
issue, do not describe it as a no-visual PR. Write a manifest with
`comparison.pass: false`, skipped lanes, no artifacts, and a summary that
starts with `Mantis could not capture Telegram Desktop proof because`. The
publisher will keep that out of PR comments so the failure stays in the
workflow logs and artifacts.
4. Decide what Telegram message, mock model response, command, callback, button,
media, or sequence best proves the PR. Use `MANTIS_INSTRUCTIONS` as extra
maintainer guidance, not as a replacement for reading the PR.
@@ -134,4 +146,6 @@ Expected final state:
`Main` and `This PR`.
- No-visual-proof manifests contain no artifacts and have `comparison.pass:
true`.
- Capture-infrastructure failure manifests contain no artifacts and have
`comparison.pass: false`.
- The worktree can be dirty only under `.artifacts/`.

View File

@@ -308,6 +308,47 @@ function laneLine(label, lane) {
return pieces.join("");
}
function hasVisibleProofArtifacts(manifest) {
return manifest.artifacts.some((artifact) =>
["desktopScreenshot", "fullVideo", "motionClip", "motionPreview", "timeline"].includes(
artifact.kind,
),
);
}
function isSkippedNoVisualProof(manifest) {
const comparison = manifest.comparison ?? {};
return (
!hasVisibleProofArtifacts(manifest) &&
comparison.baseline?.status === "skipped" &&
comparison.candidate?.status === "skipped"
);
}
function publicSummary(manifest) {
if (isSkippedNoVisualProof(manifest)) {
return "Mantis did not generate before/after GIFs because this PR does not have a clean Telegram-visible before/after proof in the standard Mantis run.";
}
return manifest.summary ?? "Mantis captured QA evidence for this scenario.";
}
function overallStatus(manifest) {
if (isSkippedNoVisualProof(manifest)) {
return "skipped";
}
const pass = manifest.comparison?.pass;
return typeof pass === "boolean" ? String(pass) : "";
}
export function shouldPublishPrComment(manifest) {
if (!isSkippedNoVisualProof(manifest)) {
return true;
}
return !/(authorization[- ]?error|credential infrastructure|logged[- ]out|login screen|welcome screen|bad telegram session)/iu.test(
manifest.summary ?? "",
);
}
export function renderEvidenceComment({
artifactUrl: actionsArtifactUrl,
manifest,
@@ -333,7 +374,7 @@ export function renderEvidenceComment({
marker,
`## ${manifest.title}`,
"",
`Summary: ${manifest.summary ?? "Mantis captured QA evidence for this scenario."}`,
`Summary: ${publicSummary(manifest)}`,
"",
`- Scenario: \`${manifest.scenario}\``,
];
@@ -354,8 +395,9 @@ export function renderEvidenceComment({
if (candidateLine) {
lines.push(candidateLine);
}
if (typeof comparison.pass === "boolean") {
lines.push(`- Overall: \`${comparison.pass}\``);
const overall = overallStatus(manifest);
if (overall) {
lines.push(`- Overall: \`${overall}\``);
}
lines.push("");
@@ -551,6 +593,10 @@ export async function publishEvidence(rawArgs = process.argv.slice(2)) {
runUrl: args.run_url,
treeUrl: published.treeUrl,
});
if (!shouldPublishPrComment(manifest)) {
console.log("Skipped Mantis QA evidence PR comment because the run did not capture proof.");
return;
}
upsertPrComment({
body,
marker: args.marker,

View File

@@ -6,6 +6,7 @@ import {
loadEvidenceManifest,
publishArtifactFiles,
renderEvidenceComment,
shouldPublishPrComment,
} from "../../scripts/mantis/publish-pr-evidence.mjs";
const tempDirs: string[] = [];
@@ -276,13 +277,58 @@ describe("scripts/mantis/publish-pr-evidence", () => {
"mantis-evidence.json",
]);
expect(body).toContain(
"Summary: Mantis did not generate before/after GIFs because this PR changes CI wiring only.",
"Summary: Mantis did not generate before/after GIFs because this PR does not have a clean Telegram-visible before/after proof in the standard Mantis run.",
);
expect(body).toContain("- Overall: `true`");
expect(body).toContain("- Overall: `skipped`");
expect(body).not.toContain("<table");
expect(body).not.toContain("<img ");
});
it("does not publish PR comments for Telegram capture infrastructure failures", () => {
const dir = mkdtempSync(path.join(tmpdir(), "mantis-evidence-test-"));
tempDirs.push(dir);
const manifestPath = path.join(dir, "mantis-evidence.json");
writeFileSync(
manifestPath,
JSON.stringify({
artifacts: [],
comparison: {
baseline: {
expected: "no acceptable native Telegram Desktop visual artifact",
status: "skipped",
},
candidate: {
expected: "no acceptable native Telegram Desktop visual artifact",
status: "skipped",
},
pass: false,
},
id: "telegram-desktop-proof",
scenario: "telegram-desktop-proof",
schemaVersion: 1,
summary:
"Mantis could not capture Telegram Desktop proof because native Telegram Desktop opened to the logged-out welcome screen.",
title: "Mantis Telegram Desktop Proof",
}),
);
const manifest = loadEvidenceManifest(manifestPath);
const body = renderEvidenceComment({
manifest,
marker: "<!-- mantis-telegram-desktop-proof -->",
rawBase: "https://artifacts.openclaw.ai/mantis/telegram-desktop/pr-1/run-1",
requestSource: "pull_request_target",
runUrl: "https://github.com/openclaw/openclaw/actions/runs/1",
treeUrl: "https://artifacts.openclaw.ai/mantis/telegram-desktop/pr-1/run-1/index.json",
});
expect(body).toContain(
"Summary: Mantis did not generate before/after GIFs because this PR does not have a clean Telegram-visible before/after proof in the standard Mantis run.",
);
expect(body).toContain("- Overall: `skipped`");
expect(shouldPublishPrComment(manifest)).toBe(false);
});
it("rejects artifact paths that escape the manifest directory", () => {
const dir = mkdtempSync(path.join(tmpdir(), "mantis-evidence-test-"));
tempDirs.push(dir);