test: improve full-suite failure summaries

2026-06-06 05:51:15 +08:00 · 2026-05-27 00:21:01 +01:00
parent 0028c2f793
commit 8f1fb675aa
7 changed files with 188 additions and 8 deletions
--- a/.agents/skills/crabbox/SKILL.md
+++ b/.agents/skills/crabbox/SKILL.md
@@ -160,9 +160,14 @@ pnpm crabbox:run -- \
  --ttl 240m \
  --timing-json \
  --shell -- \
-  "pnpm test"
+  "pnpm verify"
 ```

+Use `pnpm verify` when you need check plus full Vitest proof. It emits
+`CRABBOX_PHASE:check` and `CRABBOX_PHASE:test`, making Crabbox summaries show
+which stage failed. Use plain `pnpm test` only when check proof is already
+covered or intentionally skipped.
+
 Focused rerun:

 ```sh
--- a/.agents/skills/openclaw-testing/SKILL.md
+++ b/.agents/skills/openclaw-testing/SKILL.md
@@ -68,6 +68,7 @@ scripts/crabbox-wrapper.mjs` for Testbox, and `git commit --no-verify` only
 pnpm changed:lanes --json
 pnpm check:changed       # changed typecheck/lint/guards; no Vitest
 pnpm test:changed        # cheap smart changed Vitest targets
+pnpm verify              # full check, then full Vitest
 OPENCLAW_TEST_CHANGED_BROAD=1 pnpm test:changed
 pnpm test <path-or-filter> -- --reporter=verbose
 OPENCLAW_VITEST_MAX_WORKERS=1 pnpm test <path-or-filter>
@@ -89,6 +90,8 @@ status checks or install reconciliation in a linked worktree.
 - `pnpm check` and `pnpm check:changed` do not run Vitest tests. They are for
  typecheck, lint, and guard proof.
 - `pnpm test` and `pnpm test:changed` run Vitest tests.
+- `pnpm verify` runs `pnpm check`, then `pnpm test`, with Crabbox phase markers
+  so remote summaries show which half failed.
 - `pnpm test:changed` is intentionally cheap by default: direct test edits,
  sibling tests, explicit source mappings, and import-graph dependents.
 - `OPENCLAW_TEST_CHANGED_BROAD=1 pnpm test:changed` is the explicit broad
--- a/package.json
+++ b/package.json
@@ -1802,7 +1802,8 @@
    "ui:i18n:check": "node --import tsx scripts/control-ui-i18n.ts check",
    "ui:i18n:report": "node --import tsx scripts/control-ui-i18n-report.ts",
    "ui:i18n:sync": "node --import tsx scripts/control-ui-i18n.ts sync --write",
-    "ui:install": "node scripts/ui.js install"
+    "ui:install": "node scripts/ui.js install",
+    "verify": "node scripts/verify.mjs"
  },
  "dependencies": {
    "@agentclientprotocol/sdk": "0.22.1",
--- a/scripts/test-projects.mjs
+++ b/scripts/test-projects.mjs
@@ -25,6 +25,7 @@ import {
  buildFullSuiteVitestRunPlans,
  createVitestRunSpecs,
  findUnmatchedExplicitTestTargets,
+  formatFailedShardDigest,
  listFullExtensionVitestProjectConfigs,
  orderFullSuiteSpecsForParallelRun,
  parseTestProjectsArgs,
@@ -152,6 +153,7 @@ function isFullExtensionsProjectRun(specs) {
 async function runVitestSpecsParallel(specs, concurrency) {
  let nextIndex = 0;
  let exitCode = 0;
+  const failures = [];
  const timings = [];

  const runWorker = async () => {
@@ -168,6 +170,14 @@ async function runVitestSpecsParallel(specs, concurrency) {
      }
      if (result.code !== 0) {
        exitCode = exitCode || result.code;
+        failures.push({
+          code: result.code,
+          config: spec.config,
+          includePatterns: spec.includePatterns,
+          noOutputTimedOut: result.noOutputTimedOut,
+          order: index,
+          signal: result.signal,
+        });
      }
      if (result.timing) {
        timings.push(result.timing);
@@ -176,7 +186,7 @@ async function runVitestSpecsParallel(specs, concurrency) {
  };

  await Promise.all(Array.from({ length: concurrency }, () => runWorker()));
-  return { exitCode, timings };
+  return { exitCode, failures, timings };
 }

 async function main() {
@@ -188,7 +198,9 @@ async function main() {
  if (unmatchedExplicitTargets.length > 0) {
    for (const unmatched of unmatchedExplicitTargets) {
      const suffix = unmatched.includePattern ? ` (${unmatched.includePattern})` : "";
-      console.error(`[test] explicit test target matched no test files: ${unmatched.target}${suffix}`);
+      console.error(
+        `[test] explicit test target matched no test files: ${unmatched.target}${suffix}`,
+      );
    }
    printTestSummary("failed", 1, performance.now() - suiteStartedAt);
    process.exitCode = 1;
@@ -276,10 +288,11 @@ async function main() {
      console.error(
        `[test] running ${parallelSpecs.length} Vitest shards with parallelism ${concurrency}`,
      );
-      const { exitCode: parallelExitCode, timings } = await runVitestSpecsParallel(
-        parallelSpecs,
-        concurrency,
-      );
+      const {
+        exitCode: parallelExitCode,
+        failures,
+        timings,
+      } = await runVitestSpecsParallel(parallelSpecs, concurrency);
      writeShardTimings(timings, process.cwd(), baseEnv);
      printTestSummary(
        parallelExitCode === 0 ? "passed" : "failed",
@@ -287,6 +300,9 @@ async function main() {
        performance.now() - suiteStartedAt,
        "Vitest summaries above are per-shard, not aggregate totals.",
      );
+      for (const line of formatFailedShardDigest(failures)) {
+        console.error(line);
+      }
      releaseLockOnce();
      if (parallelExitCode !== 0) {
        process.exit(parallelExitCode);
--- a/scripts/test-projects.test-support.mjs
+++ b/scripts/test-projects.test-support.mjs
@@ -243,6 +243,7 @@ const UTILS_VITEST_CONFIG = "test/vitest/vitest.utils.config.ts";
 const WIZARD_VITEST_CONFIG = "test/vitest/vitest.wizard.config.ts";
 const INCLUDE_FILE_ENV_KEY = "OPENCLAW_VITEST_INCLUDE_FILE";
 const FS_MODULE_CACHE_PATH_ENV_KEY = "OPENCLAW_VITEST_FS_MODULE_CACHE_PATH";
+const FAILED_SHARD_DIGEST_LIMIT = 12;
 const CHANGED_ARGS_PATTERN = /^--changed(?:=(.+))?$/u;
 const VITEST_CONFIG_BY_KIND = {
  acp: ACP_VITEST_CONFIG,
@@ -2091,6 +2092,75 @@ export function writeVitestIncludeFile(filePath, includePatterns) {
  fs.writeFileSync(filePath, `${JSON.stringify(includePatterns, null, 2)}\n`);
 }

+function shellQuote(value) {
+  const text = `${value}`;
+  if (text === "") {
+    return "''";
+  }
+  if (/^[A-Za-z0-9_./:=@%+-]+$/u.test(text)) {
+    return text;
+  }
+  return `'${text.replaceAll("'", "'\\''")}'`;
+}
+
+function formatFailedShardRerunCommand(failure) {
+  const includePatterns = failure.includePatterns ?? [];
+  if (includePatterns.length > 0) {
+    return ["pnpm", "test", ...includePatterns.map(shellQuote), "--", "--reporter=verbose"].join(
+      " ",
+    );
+  }
+  return [
+    "node",
+    "scripts/run-vitest.mjs",
+    "run",
+    "--config",
+    shellQuote(failure.config),
+    "--reporter=verbose",
+  ].join(" ");
+}
+
+function formatFailedShardStatus(failure) {
+  const details = [];
+  if (failure.code !== undefined && failure.code !== null) {
+    details.push(`exit ${failure.code}`);
+  }
+  if (failure.signal) {
+    details.push(`signal ${failure.signal}`);
+  }
+  if (failure.noOutputTimedOut) {
+    details.push("no-output timeout");
+  }
+  return details.length > 0 ? ` (${details.join(", ")})` : "";
+}
+
+export function formatFailedShardDigest(failures, options = {}) {
+  if (failures.length === 0) {
+    return [];
+  }
+
+  const limit = options.limit ?? FAILED_SHARD_DIGEST_LIMIT;
+  const orderedFailures = failures.toSorted((left, right) => {
+    const leftOrder = typeof left.order === "number" ? left.order : Number.MAX_SAFE_INTEGER;
+    const rightOrder = typeof right.order === "number" ? right.order : Number.MAX_SAFE_INTEGER;
+    return leftOrder - rightOrder || left.config.localeCompare(right.config);
+  });
+  const shown = orderedFailures.slice(0, limit);
+  const lines = [`[test] failed shard digest (${failures.length}):`];
+  for (const failure of shown) {
+    const includes =
+      failure.includePatterns?.length > 0
+        ? ` includes=${failure.includePatterns.map(shellQuote).join(",")}`
+        : "";
+    lines.push(`[test] - ${failure.config}${formatFailedShardStatus(failure)}${includes}`);
+    lines.push(`[test]   rerun: ${formatFailedShardRerunCommand(failure)}`);
+  }
+  if (shown.length < failures.length) {
+    lines.push(`[test] - ... ${failures.length - shown.length} more failed shard(s) omitted`);
+  }
+  return lines;
+}
+
 export function buildVitestArgs(args, cwd = process.cwd()) {
  const [plan] = buildVitestRunPlans(args, cwd);
  if (!plan) {
--- a/scripts/verify.mjs
+++ b/scripts/verify.mjs
@@ -0,0 +1,46 @@
+import { performance } from "node:perf_hooks";
+import { formatMs, printTimingSummary } from "./lib/check-timing-summary.mjs";
+import { runManagedCommand } from "./lib/managed-child-process.mjs";
+
+const stages = [
+  { name: "check", args: ["check"] },
+  { name: "test", args: ["test"] },
+];
+
+async function runStage(stage) {
+  console.error(`CRABBOX_PHASE:${stage.name}`);
+  console.error(`[verify] ${stage.name}`);
+  const startedAt = performance.now();
+  const status = await runManagedCommand({
+    args: stage.args,
+    bin: "pnpm",
+  });
+  return {
+    durationMs: performance.now() - startedAt,
+    name: stage.name,
+    status,
+  };
+}
+
+export async function main() {
+  const timings = [];
+  for (const stage of stages) {
+    const result = await runStage(stage);
+    timings.push(result);
+    if (result.status !== 0) {
+      printTimingSummary("verify", timings);
+      console.error(
+        `[verify] failed during ${stage.name} after ${formatMs(result.durationMs)}; later stages were not run`,
+      );
+      process.exitCode = result.status;
+      return;
+    }
+  }
+
+  printTimingSummary("verify", timings);
+  console.error("[verify] passed");
+}
+
+if (import.meta.main) {
+  await main();
+}
--- a/test/scripts/test-projects.test.ts
+++ b/test/scripts/test-projects.test.ts
@@ -12,6 +12,7 @@ import {
  buildVitestArgs,
  buildVitestRunPlans,
  findUnmatchedExplicitTestTargets,
+  formatFailedShardDigest,
  listFullExtensionVitestProjectConfigs,
  orderFullSuiteSpecsForParallelRun,
  shouldAcquireLocalHeavyCheckLock,
@@ -1536,6 +1537,44 @@ describe("scripts/test-projects parallel cache paths", () => {
  });
 });

+describe("scripts/test-projects failed shard digest", () => {
+  it("prints failed configs with focused rerun commands", () => {
+    expect(
+      formatFailedShardDigest([
+        {
+          code: 1,
+          config: "test/vitest/vitest.extension-codex.config.ts",
+          includePatterns: null,
+          noOutputTimedOut: false,
+          signal: null,
+        },
+      ]),
+    ).toEqual([
+      "[test] failed shard digest (1):",
+      "[test] - test/vitest/vitest.extension-codex.config.ts (exit 1)",
+      "[test]   rerun: node scripts/run-vitest.mjs run --config test/vitest/vitest.extension-codex.config.ts --reporter=verbose",
+    ]);
+  });
+
+  it("prints target-based reruns when a shard used include patterns", () => {
+    expect(
+      formatFailedShardDigest([
+        {
+          code: 143,
+          config: "test/vitest/vitest.unit.config.ts",
+          includePatterns: ["src/foo bar.test.ts"],
+          noOutputTimedOut: true,
+          signal: "SIGTERM",
+        },
+      ]),
+    ).toEqual([
+      "[test] failed shard digest (1):",
+      "[test] - test/vitest/vitest.unit.config.ts (exit 143, signal SIGTERM, no-output timeout) includes='src/foo bar.test.ts'",
+      "[test]   rerun: pnpm test 'src/foo bar.test.ts' -- --reporter=verbose",
+    ]);
+  });
+});
+
 describe("scripts/test-projects Vitest stall watchdog", () => {
  it("adds a default no-output timeout to non-watch specs", () => {
    const [spec] = applyDefaultVitestNoOutputTimeout(