test: improve full-suite failure summaries

This commit is contained in:
Peter Steinberger
2026-05-27 00:21:01 +01:00
parent 0028c2f793
commit 8f1fb675aa
7 changed files with 188 additions and 8 deletions

View File

@@ -160,9 +160,14 @@ pnpm crabbox:run -- \
--ttl 240m \
--timing-json \
--shell -- \
"pnpm test"
"pnpm verify"
```
Use `pnpm verify` when you need check plus full Vitest proof. It emits
`CRABBOX_PHASE:check` and `CRABBOX_PHASE:test`, making Crabbox summaries show
which stage failed. Use plain `pnpm test` only when check proof is already
covered or intentionally skipped.
Focused rerun:
```sh

View File

@@ -68,6 +68,7 @@ scripts/crabbox-wrapper.mjs` for Testbox, and `git commit --no-verify` only
pnpm changed:lanes --json
pnpm check:changed # changed typecheck/lint/guards; no Vitest
pnpm test:changed # cheap smart changed Vitest targets
pnpm verify # full check, then full Vitest
OPENCLAW_TEST_CHANGED_BROAD=1 pnpm test:changed
pnpm test <path-or-filter> -- --reporter=verbose
OPENCLAW_VITEST_MAX_WORKERS=1 pnpm test <path-or-filter>
@@ -89,6 +90,8 @@ status checks or install reconciliation in a linked worktree.
- `pnpm check` and `pnpm check:changed` do not run Vitest tests. They are for
typecheck, lint, and guard proof.
- `pnpm test` and `pnpm test:changed` run Vitest tests.
- `pnpm verify` runs `pnpm check`, then `pnpm test`, with Crabbox phase markers
so remote summaries show which half failed.
- `pnpm test:changed` is intentionally cheap by default: direct test edits,
sibling tests, explicit source mappings, and import-graph dependents.
- `OPENCLAW_TEST_CHANGED_BROAD=1 pnpm test:changed` is the explicit broad

View File

@@ -1802,7 +1802,8 @@
"ui:i18n:check": "node --import tsx scripts/control-ui-i18n.ts check",
"ui:i18n:report": "node --import tsx scripts/control-ui-i18n-report.ts",
"ui:i18n:sync": "node --import tsx scripts/control-ui-i18n.ts sync --write",
"ui:install": "node scripts/ui.js install"
"ui:install": "node scripts/ui.js install",
"verify": "node scripts/verify.mjs"
},
"dependencies": {
"@agentclientprotocol/sdk": "0.22.1",

View File

@@ -25,6 +25,7 @@ import {
buildFullSuiteVitestRunPlans,
createVitestRunSpecs,
findUnmatchedExplicitTestTargets,
formatFailedShardDigest,
listFullExtensionVitestProjectConfigs,
orderFullSuiteSpecsForParallelRun,
parseTestProjectsArgs,
@@ -152,6 +153,7 @@ function isFullExtensionsProjectRun(specs) {
async function runVitestSpecsParallel(specs, concurrency) {
let nextIndex = 0;
let exitCode = 0;
const failures = [];
const timings = [];
const runWorker = async () => {
@@ -168,6 +170,14 @@ async function runVitestSpecsParallel(specs, concurrency) {
}
if (result.code !== 0) {
exitCode = exitCode || result.code;
failures.push({
code: result.code,
config: spec.config,
includePatterns: spec.includePatterns,
noOutputTimedOut: result.noOutputTimedOut,
order: index,
signal: result.signal,
});
}
if (result.timing) {
timings.push(result.timing);
@@ -176,7 +186,7 @@ async function runVitestSpecsParallel(specs, concurrency) {
};
await Promise.all(Array.from({ length: concurrency }, () => runWorker()));
return { exitCode, timings };
return { exitCode, failures, timings };
}
async function main() {
@@ -188,7 +198,9 @@ async function main() {
if (unmatchedExplicitTargets.length > 0) {
for (const unmatched of unmatchedExplicitTargets) {
const suffix = unmatched.includePattern ? ` (${unmatched.includePattern})` : "";
console.error(`[test] explicit test target matched no test files: ${unmatched.target}${suffix}`);
console.error(
`[test] explicit test target matched no test files: ${unmatched.target}${suffix}`,
);
}
printTestSummary("failed", 1, performance.now() - suiteStartedAt);
process.exitCode = 1;
@@ -276,10 +288,11 @@ async function main() {
console.error(
`[test] running ${parallelSpecs.length} Vitest shards with parallelism ${concurrency}`,
);
const { exitCode: parallelExitCode, timings } = await runVitestSpecsParallel(
parallelSpecs,
concurrency,
);
const {
exitCode: parallelExitCode,
failures,
timings,
} = await runVitestSpecsParallel(parallelSpecs, concurrency);
writeShardTimings(timings, process.cwd(), baseEnv);
printTestSummary(
parallelExitCode === 0 ? "passed" : "failed",
@@ -287,6 +300,9 @@ async function main() {
performance.now() - suiteStartedAt,
"Vitest summaries above are per-shard, not aggregate totals.",
);
for (const line of formatFailedShardDigest(failures)) {
console.error(line);
}
releaseLockOnce();
if (parallelExitCode !== 0) {
process.exit(parallelExitCode);

View File

@@ -243,6 +243,7 @@ const UTILS_VITEST_CONFIG = "test/vitest/vitest.utils.config.ts";
const WIZARD_VITEST_CONFIG = "test/vitest/vitest.wizard.config.ts";
const INCLUDE_FILE_ENV_KEY = "OPENCLAW_VITEST_INCLUDE_FILE";
const FS_MODULE_CACHE_PATH_ENV_KEY = "OPENCLAW_VITEST_FS_MODULE_CACHE_PATH";
const FAILED_SHARD_DIGEST_LIMIT = 12;
const CHANGED_ARGS_PATTERN = /^--changed(?:=(.+))?$/u;
const VITEST_CONFIG_BY_KIND = {
acp: ACP_VITEST_CONFIG,
@@ -2091,6 +2092,75 @@ export function writeVitestIncludeFile(filePath, includePatterns) {
fs.writeFileSync(filePath, `${JSON.stringify(includePatterns, null, 2)}\n`);
}
function shellQuote(value) {
const text = `${value}`;
if (text === "") {
return "''";
}
if (/^[A-Za-z0-9_./:=@%+-]+$/u.test(text)) {
return text;
}
return `'${text.replaceAll("'", "'\\''")}'`;
}
function formatFailedShardRerunCommand(failure) {
const includePatterns = failure.includePatterns ?? [];
if (includePatterns.length > 0) {
return ["pnpm", "test", ...includePatterns.map(shellQuote), "--", "--reporter=verbose"].join(
" ",
);
}
return [
"node",
"scripts/run-vitest.mjs",
"run",
"--config",
shellQuote(failure.config),
"--reporter=verbose",
].join(" ");
}
function formatFailedShardStatus(failure) {
const details = [];
if (failure.code !== undefined && failure.code !== null) {
details.push(`exit ${failure.code}`);
}
if (failure.signal) {
details.push(`signal ${failure.signal}`);
}
if (failure.noOutputTimedOut) {
details.push("no-output timeout");
}
return details.length > 0 ? ` (${details.join(", ")})` : "";
}
export function formatFailedShardDigest(failures, options = {}) {
if (failures.length === 0) {
return [];
}
const limit = options.limit ?? FAILED_SHARD_DIGEST_LIMIT;
const orderedFailures = failures.toSorted((left, right) => {
const leftOrder = typeof left.order === "number" ? left.order : Number.MAX_SAFE_INTEGER;
const rightOrder = typeof right.order === "number" ? right.order : Number.MAX_SAFE_INTEGER;
return leftOrder - rightOrder || left.config.localeCompare(right.config);
});
const shown = orderedFailures.slice(0, limit);
const lines = [`[test] failed shard digest (${failures.length}):`];
for (const failure of shown) {
const includes =
failure.includePatterns?.length > 0
? ` includes=${failure.includePatterns.map(shellQuote).join(",")}`
: "";
lines.push(`[test] - ${failure.config}${formatFailedShardStatus(failure)}${includes}`);
lines.push(`[test] rerun: ${formatFailedShardRerunCommand(failure)}`);
}
if (shown.length < failures.length) {
lines.push(`[test] - ... ${failures.length - shown.length} more failed shard(s) omitted`);
}
return lines;
}
export function buildVitestArgs(args, cwd = process.cwd()) {
const [plan] = buildVitestRunPlans(args, cwd);
if (!plan) {

46
scripts/verify.mjs Normal file
View File

@@ -0,0 +1,46 @@
import { performance } from "node:perf_hooks";
import { formatMs, printTimingSummary } from "./lib/check-timing-summary.mjs";
import { runManagedCommand } from "./lib/managed-child-process.mjs";
const stages = [
{ name: "check", args: ["check"] },
{ name: "test", args: ["test"] },
];
async function runStage(stage) {
console.error(`CRABBOX_PHASE:${stage.name}`);
console.error(`[verify] ${stage.name}`);
const startedAt = performance.now();
const status = await runManagedCommand({
args: stage.args,
bin: "pnpm",
});
return {
durationMs: performance.now() - startedAt,
name: stage.name,
status,
};
}
export async function main() {
const timings = [];
for (const stage of stages) {
const result = await runStage(stage);
timings.push(result);
if (result.status !== 0) {
printTimingSummary("verify", timings);
console.error(
`[verify] failed during ${stage.name} after ${formatMs(result.durationMs)}; later stages were not run`,
);
process.exitCode = result.status;
return;
}
}
printTimingSummary("verify", timings);
console.error("[verify] passed");
}
if (import.meta.main) {
await main();
}

View File

@@ -12,6 +12,7 @@ import {
buildVitestArgs,
buildVitestRunPlans,
findUnmatchedExplicitTestTargets,
formatFailedShardDigest,
listFullExtensionVitestProjectConfigs,
orderFullSuiteSpecsForParallelRun,
shouldAcquireLocalHeavyCheckLock,
@@ -1536,6 +1537,44 @@ describe("scripts/test-projects parallel cache paths", () => {
});
});
describe("scripts/test-projects failed shard digest", () => {
it("prints failed configs with focused rerun commands", () => {
expect(
formatFailedShardDigest([
{
code: 1,
config: "test/vitest/vitest.extension-codex.config.ts",
includePatterns: null,
noOutputTimedOut: false,
signal: null,
},
]),
).toEqual([
"[test] failed shard digest (1):",
"[test] - test/vitest/vitest.extension-codex.config.ts (exit 1)",
"[test] rerun: node scripts/run-vitest.mjs run --config test/vitest/vitest.extension-codex.config.ts --reporter=verbose",
]);
});
it("prints target-based reruns when a shard used include patterns", () => {
expect(
formatFailedShardDigest([
{
code: 143,
config: "test/vitest/vitest.unit.config.ts",
includePatterns: ["src/foo bar.test.ts"],
noOutputTimedOut: true,
signal: "SIGTERM",
},
]),
).toEqual([
"[test] failed shard digest (1):",
"[test] - test/vitest/vitest.unit.config.ts (exit 143, signal SIGTERM, no-output timeout) includes='src/foo bar.test.ts'",
"[test] rerun: pnpm test 'src/foo bar.test.ts' -- --reporter=verbose",
]);
});
});
describe("scripts/test-projects Vitest stall watchdog", () => {
it("adds a default no-output timeout to non-watch specs", () => {
const [spec] = applyDefaultVitestNoOutputTimeout(