fix(e2e): fail secret provider startup exits fast

This commit is contained in:
Vincent Koc
2026-06-04 04:21:42 +02:00
parent ad958fd97a
commit 50c3995894
3 changed files with 115 additions and 16 deletions

View File

@@ -34,6 +34,7 @@ Docs: https://docs.openclaw.ai
- Security/config/tooling: reject corrupt shell snapshots, suspicious gateway startup configs, malformed release/test/tooling/Docker/perf numeric limits, oversized audit responses, unsafe exec precheck env, and invalid pending-agent SQLite scaffold denials. (#89701, #89705, #89480, #81488) Thanks @RomneyDa and @mmaps.
- Release/CI/E2E: restore package changelog extraction after the post-2026.6.1 version bump, keep hydrated pnpm modules under `node_modules` for ARM/Linux package lifecycle scripts, keep OpenAI live-cache prerequisites advisory while Anthropic prerequisites stay blocking, retry Windows Parallels background log appends on transient file-lock errors, bound candidate GitHub and cross-OS Discord fetches, harden ARM smoke/browser checks, show Docker build heartbeats, reset Crabbox pnpm hydrate state, and isolate Testbox/Docker/release journey artifacts.
- Release/CI/E2E: keep Crabbox hydrate pnpm stores on the persistent cache volume while still resetting volatile modules, reducing cold installs and runner memory churn.
- Release/CI/E2E: fail secret-provider proof startup immediately when the gateway exits by signal instead of waiting for the readiness timeout.
## 2026.6.1

View File

@@ -239,13 +239,29 @@ function runCommand(command, args, options = {}) {
const stdout = createOutputCapture("stdout");
const stderr = createOutputCapture("stderr");
let timedOut = false;
let aborted = false;
let killTimer;
const abort = () => {
if (childHasExited(child)) {
return;
}
aborted = true;
terminateProcessTree(child, "SIGTERM");
killTimer ??= setTimeout(() => terminateProcessTree(child, "SIGKILL"), 1000);
killTimer.unref();
};
const timer = setTimeout(() => {
timedOut = true;
terminateProcessTree(child, "SIGTERM");
killTimer = setTimeout(() => terminateProcessTree(child, "SIGKILL"), 1000);
killTimer.unref();
}, timeoutMs);
const abortSignal = options.signal;
if (abortSignal?.aborted) {
abort();
} else {
abortSignal?.addEventListener("abort", abort, { once: true });
}
child.stdout?.on("data", (chunk) => {
stdout.append(chunk);
});
@@ -275,6 +291,7 @@ function runCommand(command, args, options = {}) {
if (killTimer) {
clearTimeout(killTimer);
}
abortSignal?.removeEventListener("abort", abort);
removeParentSignalHandlers();
reject(error instanceof Error ? error : new Error(formatErrorMessage(error)));
});
@@ -283,8 +300,13 @@ function runCommand(command, args, options = {}) {
if (killTimer) {
clearTimeout(killTimer);
}
abortSignal?.removeEventListener("abort", abort);
removeParentSignalHandlers();
const result = { code: code ?? 0, signal, stdout: stdout.text(), stderr: stderr.text() };
if (aborted) {
reject(new Error(scrub(`command aborted: ${command} ${args.join(" ")}`)));
return;
}
if (timedOut) {
terminateProcessTree(child, "SIGKILL");
reject(new Error(scrub(`command timed out: ${command} ${args.join(" ")}`)));
@@ -665,30 +687,65 @@ async function startGateway(envCtx, port, token = TOKEN_V1) {
child.stderr.on("data", (chunk) => {
stderr.append(chunk);
});
const gatewayExit = new Promise((resolve) => {
child.once("error", (error) => {
resolve({
kind: "gateway-error",
error: error instanceof Error ? error : new Error(formatErrorMessage(error)),
});
});
child.once("exit", (code, signal) => {
resolve({ kind: "gateway-exit", code, signal });
});
});
const started = Date.now();
let lastHealthResult;
let lastHealthError;
while (Date.now() - started < READY_TIMEOUT_MS) {
if (child.exitCode !== null) {
if (childHasExited(child)) {
const exit = child.signalCode ? `signal ${child.signalCode}` : `code ${child.exitCode}`;
throw new Error(
scrub(
`gateway exited during startup (${child.exitCode})\n${stderr.text() || stdout.text()}`,
),
scrub(`gateway exited during startup (${exit})\n${stderr.text() || stdout.text()}`),
);
}
const remainingMs = remainingDeadlineMs(started, READY_TIMEOUT_MS);
try {
const health = await gatewayCall(
envCtx.env,
port,
token,
"health",
{},
{
allowFailure: true,
timeoutMs: Math.min(RPC_TIMEOUT_MS + 10000, remainingMs),
},
const healthAbort = new AbortController();
const healthProbe = (async () => {
try {
const health = await gatewayCall(
envCtx.env,
port,
token,
"health",
{},
{
allowFailure: true,
signal: healthAbort.signal,
timeoutMs: Math.min(RPC_TIMEOUT_MS + 10000, remainingMs),
},
);
return { kind: "health", health };
} catch (error) {
return { kind: "health-error", error };
}
})();
const outcome = await Promise.race([healthProbe, gatewayExit]);
if (outcome.kind === "gateway-error") {
healthAbort.abort();
throw new Error(scrub(`gateway failed to start: ${outcome.error.message}`));
}
if (outcome.kind === "gateway-exit") {
healthAbort.abort();
const exit = outcome.signal ? `signal ${outcome.signal}` : `code ${outcome.code}`;
throw new Error(
scrub(`gateway exited during startup (${exit})\n${stderr.text() || stdout.text()}`),
);
}
try {
if (outcome.kind === "health-error") {
throw outcome.error;
}
const health = outcome.health;
lastHealthResult = health;
if (health.code === 0) {
return {
@@ -813,7 +870,11 @@ async function gatewayCall(env, port, token, method, params = {}, options = {})
OPENCLAW_STATE_DIR: clientStateDir,
OPENCLAW_HOME: clientStateDir,
},
{ timeoutMs: options.timeoutMs ?? RPC_TIMEOUT_MS + 10000, allowFailure: options.allowFailure },
{
timeoutMs: options.timeoutMs ?? RPC_TIMEOUT_MS + 10000,
allowFailure: options.allowFailure,
signal: options.signal,
},
);
}

View File

@@ -90,6 +90,29 @@ function writeLeakingStartupOpenClaw(root: string): string {
return scriptPath;
}
function writeSignaledStartupOpenClaw(root: string): string {
const scriptPath = path.join(root, "fake-signaled-openclaw.mjs");
fs.writeFileSync(
scriptPath,
[
"#!/usr/bin/env node",
"import { setTimeout as delay } from 'node:timers/promises';",
"const args = process.argv.slice(2);",
"if (args[0] === 'gateway' && args[1] === 'run') {",
" setTimeout(() => process.kill(process.pid, 'SIGTERM'), 50);",
" await new Promise(() => {});",
"}",
"if (args[0] === 'gateway' && (args[1] === 'call' || args[1] === 'status')) {",
" await delay(60_000);",
"}",
"process.exit(2);",
"",
].join("\n"),
{ mode: 0o755 },
);
return scriptPath;
}
function runProofHarness(
root: string,
fakeOpenClaw: string,
@@ -151,6 +174,20 @@ describe("secret provider integration proof harness", () => {
expect(payload.elapsedMs).toBeLessThan(750);
});
it("fails fast when startup exits by signal", () => {
const root = makeTempDir();
const fakeOpenClaw = writeSignaledStartupOpenClaw(root);
const result = runProofHarness(root, fakeOpenClaw, "start", {
OPENCLAW_SECRET_PROOF_READY_MS: "2000",
});
expect(result.error).toBeUndefined();
expect(result.status).toBe(0);
const payload = JSON.parse(result.stdout);
expect(payload.message).toContain("gateway exited during startup (signal SIGTERM)");
expect(payload.elapsedMs).toBeLessThan(750);
});
it("kills a stalled startup gateway before returning a readiness failure", async () => {
const root = makeTempDir();
const markerPath = path.join(root, "gateway-marker.txt");