From 633e4b8a7c5e834c576c14d083fce143c77534db Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Mon, 25 May 2026 17:18:36 +0200 Subject: [PATCH] fix(test): clean plugin gauntlet temp roots --- CHANGELOG.md | 1 + scripts/check-plugin-gateway-gauntlet.mjs | 380 +++++++++++-------- test/scripts/plugin-gateway-gauntlet.test.ts | 69 ++++ 3 files changed, 301 insertions(+), 149 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e364ad3efba..eda8792cccd2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Sessions/doctor: load large session stores without clone amplification during read-only doctor checks and reclaim stale `sessions.json.*.tmp` sidecars. Fixes #56827. Thanks @openperf. +- Tests: clean successful plugin gateway gauntlet isolated temp roots while keeping an explicit preservation switch for failed/debug runs. - Gateway: keep session-only Control UI tool-start mirrors flowing during diagnostic queue pressure instead of silently dropping non-terminal tool updates. - Agents/memory: return optional not-found context for missing date-only daily memory reads instead of logging benign first-run `ENOENT` failures. Fixes #82928. Thanks @galiniliev. - Discord: merge streamed text captions into following media block replies so captions and attachments send as one message. (#86487) Thanks @neeravmakwana. diff --git a/scripts/check-plugin-gateway-gauntlet.mjs b/scripts/check-plugin-gateway-gauntlet.mjs index f96f310a0fde..5598ae3c976a 100644 --- a/scripts/check-plugin-gateway-gauntlet.mjs +++ b/scripts/check-plugin-gateway-gauntlet.mjs @@ -5,6 +5,7 @@ import fs from "node:fs"; import os from "node:os"; import path from "node:path"; import process from "node:process"; +import { fileURLToPath } from "node:url"; import { buildGauntletPrebuildEnv, collectGatewayCpuObservations, @@ -57,6 +58,7 @@ function parseArgs(argv) { commandTimeoutMs: 120_000, buildTimeoutMs: 600_000, qaTimeoutMs: 900_000, + keepRunRoot: process.env.OPENCLAW_PLUGIN_GATEWAY_GAUNTLET_KEEP_RUN_ROOT === "1", }; const envIds = normalizeCsv(process.env.OPENCLAW_PLUGIN_GATEWAY_GAUNTLET_IDS); options.pluginIds.push(...envIds); @@ -151,6 +153,9 @@ function parseArgs(argv) { case "--skip-slash-help": options.skipSlashHelp = true; break; + case "--keep-run-root": + options.keepRunRoot = true; + break; case "--help": printHelp(); process.exit(0); @@ -186,6 +191,7 @@ Options: --skip-lifecycle Skip plugin install/inspect/disable/enable/doctor/uninstall --skip-qa Skip QA Lab RPC conversation runs --skip-slash-help Skip CLI help probes for plugin-declared command aliases + --keep-run-root Preserve isolated HOME/state/log temp root after success `); } @@ -263,7 +269,7 @@ function chunkArray(values, chunkSize) { return chunks; } -function toRepoRelativePath(repoRoot, absolutePath) { +export function toRepoRelativePath(repoRoot, absolutePath) { const relativePath = path.relative(repoRoot, absolutePath); if (!relativePath || relativePath.startsWith("..") || path.isAbsolute(relativePath)) { throw new Error(`Output path must stay inside repo root: ${absolutePath}`); @@ -271,6 +277,12 @@ function toRepoRelativePath(repoRoot, absolutePath) { return relativePath; } +function validateOutputDir(options, repoRoot) { + if (!options.skipQa) { + toRepoRelativePath(repoRoot, path.join(options.outputDir, "qa-suite")); + } +} + function createIsolatedEnv(repoRoot, runRoot) { const home = path.join(runRoot, "home"); const stateDir = path.join(runRoot, "state"); @@ -305,19 +317,26 @@ function timeWrapperArgs(command, args) { return { command: "/usr/bin/time", args: ["-v", command, ...args], mode: "gnu" }; } -function parseTimedMetrics(stderr, wallMs, mode) { +export function parseTimedMetrics(stderr, wallMs, mode) { let userSeconds = null; let systemSeconds = null; let maxRssMb = null; if (mode === "gnu") { - userSeconds = parseFirstFloat(stderr, /User time \(seconds\):\s*([0-9.]+)/u); - systemSeconds = parseFirstFloat(stderr, /System time \(seconds\):\s*([0-9.]+)/u); - const maxRssKb = parseFirstFloat(stderr, /Maximum resident set size \(kbytes\):\s*([0-9.]+)/u); + userSeconds = parseLastFloat(stderr, /^\s*User time \(seconds\):\s*([0-9.]+)\s*$/gmu); + systemSeconds = parseLastFloat(stderr, /^\s*System time \(seconds\):\s*([0-9.]+)\s*$/gmu); + const maxRssKb = parseLastFloat( + stderr, + /^\s*Maximum resident set size \(kbytes\):\s*([0-9.]+)\s*$/gmu, + ); maxRssMb = maxRssKb == null ? null : maxRssKb / 1024; } else if (mode === "bsd") { - userSeconds = parseFirstFloat(stderr, /[0-9.]+\s+real\s+([0-9.]+)\s+user/u); - systemSeconds = parseFirstFloat(stderr, /([0-9.]+)\s+sys/u); - const maxRssBytes = parseFirstFloat(stderr, /([0-9]+)\s+maximum resident set size/u); + const cpuLine = parseLastMatch( + stderr, + /^\s*[0-9.]+\s+real\s+([0-9.]+)\s+user\s+([0-9.]+)\s+sys\s*$/gmu, + ); + userSeconds = parseMatchFloat(cpuLine, 1); + systemSeconds = parseMatchFloat(cpuLine, 2); + const maxRssBytes = parseLastFloat(stderr, /^\s*([0-9]+)\s+maximum resident set size\s*$/gmu); maxRssMb = maxRssBytes == null ? null : maxRssBytes / 1024 / 1024; } const cpuMs = @@ -332,15 +351,26 @@ function parseTimedMetrics(stderr, wallMs, mode) { }; } -function parseFirstFloat(value, pattern) { - const match = value.match(pattern); +function parseLastMatch(value, pattern) { + let lastMatch = null; + for (const match of value.matchAll(pattern)) { + lastMatch = match; + } + return lastMatch; +} + +function parseMatchFloat(match, index) { if (!match) { return null; } - const parsed = Number(match[1]); + const parsed = Number(match[index]); return Number.isFinite(parsed) ? parsed : null; } +function parseLastFloat(value, pattern) { + return parseMatchFloat(parseLastMatch(value, pattern), 1); +} + function stripAnsi(value) { return value.replace(ANSI_PATTERN, ""); } @@ -358,8 +388,11 @@ function writeCommandLog(params) { return logPath; } -function runMeasuredCommand(params) { - const { command, args, mode } = timeWrapperArgs(params.command, params.args); +export function runMeasuredCommand(params) { + const { command, args, mode } = + params.timeMode === "none" + ? { command: params.command, args: params.args, mode: "none" } + : timeWrapperArgs(params.command, params.args); const started = performance.now(); const result = spawnSync(command, args, { cwd: params.cwd, @@ -370,9 +403,20 @@ function runMeasuredCommand(params) { ...(mode === "none" ? (params.spawnOptions ?? {}) : {}), }); const wallMs = performance.now() - started; - const status = result.status ?? (result.signal ? 1 : 0); + const spawnError = result.error + ? { + code: typeof result.error.code === "string" ? result.error.code : null, + message: result.error.message, + } + : null; + const status = result.status ?? (result.signal || spawnError ? 1 : 0); const stdout = result.stdout ?? ""; - const stderr = result.stderr ?? ""; + const stderr = [ + result.stderr ?? "", + spawnError ? `[spawn error] ${spawnError.code ?? "unknown"} ${spawnError.message}` : "", + ] + .filter(Boolean) + .join("\n"); const diagnosticFailure = detectCommandDiagnosticFailure(stdout, stderr); const logPath = writeCommandLog({ logDir: params.logDir, @@ -388,7 +432,8 @@ function runMeasuredCommand(params) { status, diagnosticFailure, signal: result.signal ?? null, - timedOut: result.error?.code === "ETIMEDOUT", + timedOut: spawnError?.code === "ETIMEDOUT", + spawnError, logPath, ...parseTimedMetrics(stderr, wallMs, mode), }; @@ -503,147 +548,184 @@ function runQaChunks(params) { async function main() { const options = parseArgs(process.argv.slice(2)); const repoRoot = path.resolve(options.repoRoot); + validateOutputDir(options, repoRoot); fs.mkdirSync(options.outputDir, { recursive: true }); const runRoot = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-plugin-gauntlet-")); + let preserveRunRoot = options.keepRunRoot; const env = createIsolatedEnv(repoRoot, runRoot); - const matrix = discoverBundledPluginManifests(repoRoot); - const selectedPlugins = selectPluginEntries(matrix, { - ids: options.pluginIds, - shardTotal: options.shardTotal, - shardIndex: options.shardIndex, - limit: options.limit, - }); - const rows = []; - if (!options.skipPrebuild && (selectedPlugins.length > 0 || !options.skipQa)) { - process.stderr.write("[plugin-gauntlet] prebuild\n"); - const prebuildEnv = buildGauntletPrebuildEnv(env, { includePrivateQa: !options.skipQa }); - const prebuildCommand = pnpmCommand(["build"], { cwd: repoRoot, env: prebuildEnv }); - rows.push( - runMeasuredCommand({ - cwd: repoRoot, - env: prebuildEnv, - logDir: path.join(options.outputDir, "logs", "prebuild"), - command: prebuildCommand.command, - args: prebuildCommand.args, - spawnOptions: prebuildCommand.options, - label: "prebuild", - phase: "prebuild", - timeoutMs: options.buildTimeoutMs, - }), - ); - } - const prebuildFailed = rows.some( - (row) => row.phase === "prebuild" && (row.status !== 0 || row.timedOut), - ); - if (!prebuildFailed && !options.skipLifecycle) { - runPluginLifecycle({ - repoRoot, - outputDir: options.outputDir, - env, - plugins: selectedPlugins, - rows, - commandTimeoutMs: options.commandTimeoutMs, - }); - } - if (!prebuildFailed && !options.skipSlashHelp) { - runSlashHelpProbes({ - repoRoot, - outputDir: options.outputDir, - env, - plugins: selectedPlugins, - rows, - commandTimeoutMs: options.commandTimeoutMs, - }); - } - const qaSummaries = - options.skipQa || prebuildFailed - ? [] - : runQaChunks({ - repoRoot, - outputDir: options.outputDir, - env, - plugins: selectedPlugins, - qaBaseline: options.qaBaseline, - rows, - qaScenarios: options.qaScenarios, - qaPluginChunkSize: options.qaPluginChunkSize, - qaTimeoutMs: options.qaTimeoutMs, - }); - const metricObservations = collectMetricObservations(rows, { - cpuCoreWarn: options.cpuCoreWarn, - hotWallWarnMs: options.hotWallWarnMs, - maxRssWarnMb: options.maxRssWarnMb, - wallAnomalyMultiplier: options.wallAnomalyMultiplier, - rssAnomalyMultiplier: options.rssAnomalyMultiplier, - }); - const qaBaselineObservations = collectQaBaselineRegressionObservations(rows, { - cpuRegressionMultiplier: options.qaCpuRegressionMultiplier, - wallRegressionMultiplier: options.qaWallRegressionMultiplier, - }); - const gatewayObservations = qaSummaries.flatMap((qa) => - collectGatewayCpuObservations({ - startup: null, - qa, - cpuCoreWarn: options.cpuCoreWarn, - hotWallWarnMs: options.hotWallWarnMs, - }), - ); - const failures = rows.filter((row) => row.status !== 0 || row.timedOut || row.diagnosticFailure); - const summary = { - generatedAt: new Date().toISOString(), - repoRoot, - outputDir: options.outputDir, - isolatedRunRoot: runRoot, - selectedPluginCount: selectedPlugins.length, - totalPluginCount: matrix.length, - options: { - pluginIds: options.pluginIds, + try { + const matrix = discoverBundledPluginManifests(repoRoot); + const selectedPlugins = selectPluginEntries(matrix, { + ids: options.pluginIds, shardTotal: options.shardTotal, shardIndex: options.shardIndex, - limit: options.limit ?? null, - qaScenarios: options.qaScenarios, - qaPluginChunkSize: options.qaPluginChunkSize, - qaBaseline: options.qaBaseline, - skipLifecycle: options.skipLifecycle, - skipQa: options.skipQa, - skipSlashHelp: options.skipSlashHelp, - skipPrebuild: options.skipPrebuild, - thresholds: { + limit: options.limit, + }); + const rows = []; + if (!options.skipPrebuild && (selectedPlugins.length > 0 || !options.skipQa)) { + process.stderr.write("[plugin-gauntlet] prebuild\n"); + const prebuildEnv = buildGauntletPrebuildEnv(env, { includePrivateQa: !options.skipQa }); + const prebuildCommand = pnpmCommand(["build"], { cwd: repoRoot, env: prebuildEnv }); + rows.push( + runMeasuredCommand({ + cwd: repoRoot, + env: prebuildEnv, + logDir: path.join(options.outputDir, "logs", "prebuild"), + command: prebuildCommand.command, + args: prebuildCommand.args, + spawnOptions: prebuildCommand.options, + label: "prebuild", + phase: "prebuild", + timeoutMs: options.buildTimeoutMs, + }), + ); + } + const prebuildFailed = rows.some( + (row) => row.phase === "prebuild" && (row.status !== 0 || row.timedOut), + ); + if (!prebuildFailed && !options.skipLifecycle) { + runPluginLifecycle({ + repoRoot, + outputDir: options.outputDir, + env, + plugins: selectedPlugins, + rows, + commandTimeoutMs: options.commandTimeoutMs, + }); + } + if (!prebuildFailed && !options.skipSlashHelp) { + runSlashHelpProbes({ + repoRoot, + outputDir: options.outputDir, + env, + plugins: selectedPlugins, + rows, + commandTimeoutMs: options.commandTimeoutMs, + }); + } + const qaSummaries = + options.skipQa || prebuildFailed + ? [] + : runQaChunks({ + repoRoot, + outputDir: options.outputDir, + env, + plugins: selectedPlugins, + qaBaseline: options.qaBaseline, + rows, + qaScenarios: options.qaScenarios, + qaPluginChunkSize: options.qaPluginChunkSize, + qaTimeoutMs: options.qaTimeoutMs, + }); + const metricObservations = collectMetricObservations(rows, { + cpuCoreWarn: options.cpuCoreWarn, + hotWallWarnMs: options.hotWallWarnMs, + maxRssWarnMb: options.maxRssWarnMb, + wallAnomalyMultiplier: options.wallAnomalyMultiplier, + rssAnomalyMultiplier: options.rssAnomalyMultiplier, + }); + const qaBaselineObservations = collectQaBaselineRegressionObservations(rows, { + cpuRegressionMultiplier: options.qaCpuRegressionMultiplier, + wallRegressionMultiplier: options.qaWallRegressionMultiplier, + }); + const gatewayObservations = qaSummaries.flatMap((qa) => + collectGatewayCpuObservations({ + startup: null, + qa, cpuCoreWarn: options.cpuCoreWarn, hotWallWarnMs: options.hotWallWarnMs, - maxRssWarnMb: options.maxRssWarnMb, - wallAnomalyMultiplier: options.wallAnomalyMultiplier, - rssAnomalyMultiplier: options.rssAnomalyMultiplier, - qaCpuRegressionMultiplier: options.qaCpuRegressionMultiplier, - qaWallRegressionMultiplier: options.qaWallRegressionMultiplier, - }, - }, - matrix, - selectedPlugins, - rows, - observations: [...metricObservations, ...qaBaselineObservations, ...gatewayObservations], - failures, - }; - const summaryPath = path.join(options.outputDir, "plugin-gateway-gauntlet-summary.json"); - fs.writeFileSync(summaryPath, `${JSON.stringify(summary, null, 2)}\n`, "utf8"); - process.stdout.write(`[plugin-gauntlet] summary: ${summaryPath}\n`); - process.stdout.write( - `[plugin-gauntlet] plugins=${selectedPlugins.length}/${matrix.length} rows=${rows.length} failures=${failures.length} observations=${summary.observations.length}\n`, - ); - for (const failure of failures) { - process.stdout.write( - `[plugin-gauntlet] failure phase=${failure.phase} plugin=${failure.pluginId ?? ""} status=${failure.status} timedOut=${failure.timedOut} diagnostic=${failure.diagnosticFailure ?? ""} wallMs=${Math.round(failure.wallMs)} log=${failure.logPath}\n`, + }), ); - } - for (const observation of summary.observations.slice(0, 20)) { - process.stdout.write(`[plugin-gauntlet] observation ${JSON.stringify(observation)}\n`); - } - if (failures.length > 0) { - process.exitCode = 1; + const failures = rows.filter( + (row) => row.status !== 0 || row.timedOut || row.diagnosticFailure, + ); + preserveRunRoot = preserveRunRoot || failures.length > 0; + let cleanupError = null; + if (!preserveRunRoot) { + try { + fs.rmSync(runRoot, { recursive: true, force: true }); + } catch (error) { + cleanupError = error instanceof Error ? error.message : String(error); + preserveRunRoot = true; + } + } + const summary = { + generatedAt: new Date().toISOString(), + repoRoot, + outputDir: options.outputDir, + isolatedRunRoot: runRoot, + isolatedRunRootPreserved: preserveRunRoot, + isolatedRunRootCleanupError: cleanupError, + selectedPluginCount: selectedPlugins.length, + totalPluginCount: matrix.length, + options: { + pluginIds: options.pluginIds, + shardTotal: options.shardTotal, + shardIndex: options.shardIndex, + limit: options.limit ?? null, + qaScenarios: options.qaScenarios, + qaPluginChunkSize: options.qaPluginChunkSize, + qaBaseline: options.qaBaseline, + keepRunRoot: options.keepRunRoot, + skipLifecycle: options.skipLifecycle, + skipQa: options.skipQa, + skipSlashHelp: options.skipSlashHelp, + skipPrebuild: options.skipPrebuild, + thresholds: { + cpuCoreWarn: options.cpuCoreWarn, + hotWallWarnMs: options.hotWallWarnMs, + maxRssWarnMb: options.maxRssWarnMb, + wallAnomalyMultiplier: options.wallAnomalyMultiplier, + rssAnomalyMultiplier: options.rssAnomalyMultiplier, + qaCpuRegressionMultiplier: options.qaCpuRegressionMultiplier, + qaWallRegressionMultiplier: options.qaWallRegressionMultiplier, + }, + }, + matrix, + selectedPlugins, + rows, + observations: [...metricObservations, ...qaBaselineObservations, ...gatewayObservations], + failures, + }; + const summaryPath = path.join(options.outputDir, "plugin-gateway-gauntlet-summary.json"); + fs.writeFileSync(summaryPath, `${JSON.stringify(summary, null, 2)}\n`, "utf8"); + process.stdout.write(`[plugin-gauntlet] summary: ${summaryPath}\n`); + process.stdout.write( + `[plugin-gauntlet] plugins=${selectedPlugins.length}/${matrix.length} rows=${rows.length} failures=${failures.length} observations=${summary.observations.length}\n`, + ); + if (preserveRunRoot) { + process.stdout.write(`[plugin-gauntlet] isolated run root preserved: ${runRoot}\n`); + } + for (const failure of failures) { + process.stdout.write( + `[plugin-gauntlet] failure phase=${failure.phase} plugin=${failure.pluginId ?? ""} status=${failure.status} timedOut=${failure.timedOut} diagnostic=${failure.diagnosticFailure ?? ""} wallMs=${Math.round(failure.wallMs)} log=${failure.logPath}\n`, + ); + } + for (const observation of summary.observations.slice(0, 20)) { + process.stdout.write(`[plugin-gauntlet] observation ${JSON.stringify(observation)}\n`); + } + if (failures.length > 0) { + process.exitCode = 1; + } + } catch (error) { + if (!options.keepRunRoot) { + try { + fs.rmSync(runRoot, { recursive: true, force: true }); + } catch (cleanupError) { + process.stderr.write( + `[plugin-gauntlet] failed to clean isolated run root ${runRoot}: ${ + cleanupError instanceof Error ? cleanupError.message : String(cleanupError) + }\n`, + ); + } + } + throw error; } } -main().catch((error) => { - console.error(error instanceof Error ? error.message : String(error)); - process.exitCode = 1; -}); +if (process.argv[1] && fileURLToPath(import.meta.url) === path.resolve(process.argv[1])) { + main().catch((error) => { + console.error(error instanceof Error ? error.message : String(error)); + process.exitCode = 1; + }); +} diff --git a/test/scripts/plugin-gateway-gauntlet.test.ts b/test/scripts/plugin-gateway-gauntlet.test.ts index 29192eb838cd..a17169e4fc79 100644 --- a/test/scripts/plugin-gateway-gauntlet.test.ts +++ b/test/scripts/plugin-gateway-gauntlet.test.ts @@ -1,7 +1,12 @@ +import { spawnSync } from "node:child_process"; import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + parseTimedMetrics, + runMeasuredCommand, +} from "../../scripts/check-plugin-gateway-gauntlet.mjs"; import { buildGauntletPrebuildEnv, collectGatewayCpuObservations, @@ -261,4 +266,68 @@ describe("plugin gateway gauntlet helpers", () => { const env = { EXISTING: "1" }; expect(buildGauntletPrebuildEnv(env, { includePrivateQa: false })).toBe(env); }); + + it("parses macOS time -l metrics from strict trailing lines", () => { + const metrics = parseTimedMetrics( + [ + "plugin stderr: 99.00 real 99.00 user 99.00 sys nope", + " 0.25 real 0.06 user 0.02 sys", + " 2097152 maximum resident set size", + ].join("\n"), + 250, + "bsd", + ); + + expect(metrics.cpuMs).toBe(80); + expect(metrics.cpuCoreRatio).toBeCloseTo(0.32); + expect(metrics.maxRssMb).toBe(2); + }); + + it("marks spawn errors as failed measured rows", async () => { + const logDir = path.join(repoRoot, "logs"); + const row = runMeasuredCommand({ + cwd: repoRoot, + env: process.env, + logDir, + command: path.join(repoRoot, "missing-command"), + args: [], + label: "missing", + phase: "probe", + timeoutMs: 1000, + timeMode: "none", + }); + + expect(row.status).toBe(1); + expect(row.spawnError?.code).toBe("ENOENT"); + await expect(fs.readFile(row.logPath, "utf8")).resolves.toContain("[spawn error] ENOENT"); + }); + + it("cleans the isolated run root after a successful dry run", async () => { + const outputDir = path.join(repoRoot, "artifacts"); + const result = spawnSync( + process.execPath, + [ + path.resolve("scripts/check-plugin-gateway-gauntlet.mjs"), + "--repo-root", + repoRoot, + "--output-dir", + outputDir, + "--skip-prebuild", + "--skip-lifecycle", + "--skip-slash-help", + "--skip-qa", + ], + { + cwd: path.resolve("."), + encoding: "utf8", + }, + ); + + expect(result.status, result.stderr).toBe(0); + const summary = JSON.parse( + await fs.readFile(path.join(outputDir, "plugin-gateway-gauntlet-summary.json"), "utf8"), + ); + expect(summary.isolatedRunRootPreserved).toBe(false); + await expect(fs.stat(summary.isolatedRunRoot)).rejects.toHaveProperty("code", "ENOENT"); + }); });