From b12114e45ca8f6ffb005979bd63996e752b72d07 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Thu, 4 Jun 2026 04:42:29 +0200 Subject: [PATCH] fix(e2e): abort kitchen sink readiness on gateway exit --- CHANGELOG.md | 1 + scripts/e2e/kitchen-sink-rpc-walk.mjs | 69 +++++++++++++++++++--- test/scripts/kitchen-sink-rpc-walk.test.ts | 46 ++++++++++++++- 3 files changed, 105 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e0a212ecea2..7ea4234db188 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ Docs: https://docs.openclaw.ai - Release/CI/E2E: keep Crabbox hydrate pnpm stores on the persistent cache volume while still resetting volatile modules, reducing cold installs and runner memory churn. - Release/CI/E2E: fail secret-provider proof startup immediately when the gateway exits by signal instead of waiting for the readiness timeout. - Release/CI/E2E: report plugin gateway gauntlet command-log write failures as failed rows instead of crashing the harness from child-process callbacks. +- Release/CI/E2E: abort stalled Kitchen Sink RPC readiness probes as soon as the gateway exits so proof failures return promptly. ## 2026.6.1 diff --git a/scripts/e2e/kitchen-sink-rpc-walk.mjs b/scripts/e2e/kitchen-sink-rpc-walk.mjs index 68028d53b4c2..d683548684e9 100644 --- a/scripts/e2e/kitchen-sink-rpc-walk.mjs +++ b/scripts/e2e/kitchen-sink-rpc-walk.mjs @@ -583,6 +583,7 @@ export async function fetchJson(url, options = {}) { const attempts = Math.max(1, options.attempts ?? 3); const timeoutMs = Math.max(1, options.timeoutMs ?? FETCH_TIMEOUT_MS); const maxBodyBytes = Math.max(1, options.maxBodyBytes ?? FETCH_BODY_MAX_BYTES); + const externalSignal = options.signal; let lastError; for (let attempt = 1; attempt <= attempts; attempt += 1) { const controller = new AbortController(); @@ -590,6 +591,26 @@ export async function fetchJson(url, options = {}) { code: "ETIMEDOUT", }); let timeout; + let removeExternalAbort = () => {}; + const abortPromise = externalSignal + ? new Promise((_, reject) => { + const abortError = () => + externalSignal.reason instanceof Error + ? externalSignal.reason + : new Error("fetch aborted"); + const onAbort = () => { + const error = abortError(); + controller.abort(error); + reject(new Error(error.message, { cause: error })); + }; + if (externalSignal.aborted) { + onAbort(); + return; + } + externalSignal.addEventListener("abort", onAbort, { once: true }); + removeExternalAbort = () => externalSignal.removeEventListener("abort", onAbort); + }) + : null; const timeoutPromise = new Promise((_, reject) => { timeout = setTimeout(() => { controller.abort(timeoutError); @@ -601,10 +622,12 @@ export async function fetchJson(url, options = {}) { const response = await Promise.race([ (options.fetchImpl ?? fetch)(url, { signal: controller.signal }), timeoutPromise, + ...(abortPromise ? [abortPromise] : []), ]); const text = await Promise.race([ readBoundedResponseText(response, maxBodyBytes), timeoutPromise, + ...(abortPromise ? [abortPromise] : []), ]); let body = null; try { @@ -620,6 +643,7 @@ export async function fetchJson(url, options = {}) { } await delay(options.retryDelayMs ?? 250); } finally { + removeExternalAbort(); if (timeout) { clearTimeout(timeout); } @@ -780,6 +804,15 @@ export function hasChildExited(child) { return child.exitCode !== null || child.signalCode !== null; } +function createChildExitPromise(child) { + if (!child || typeof child.once !== "function") { + return null; + } + return new Promise((resolve) => { + child.once("exit", () => resolve()); + }); +} + function releaseUnsettledGatewayChild(child) { child.stdin?.destroy?.(); child.stdout?.destroy?.(); @@ -865,6 +898,7 @@ export async function waitForGatewayReady(child, port, logPath, options = {}) { const timeoutMs = Math.max(1, options.timeoutMs ?? READY_TIMEOUT_MS); const pollDelayMs = Math.max(1, options.pollDelayMs ?? 250); const logReportedReady = createGatewayReadyLogScanner(logPath); + const childExit = createChildExitPromise(child); const exitedBeforeReadyError = () => new Error(`gateway exited before ready\n${tailFile(logPath)}`); if (hasChildExited(child)) { @@ -875,12 +909,33 @@ export async function waitForGatewayReady(child, port, logPath, options = {}) { if (hasChildExited(child)) { throw exitedBeforeReadyError(); } + const probeAbort = new AbortController(); + const readyzProbe = (async () => { + try { + const readyz = await fetchJson(`http://127.0.0.1:${port}/readyz`, { + attempts: 1, + fetchImpl: options.fetchImpl, + signal: probeAbort.signal, + timeoutMs: Math.min(FETCH_TIMEOUT_MS, remainingMs), + }); + return { kind: "readyz", readyz }; + } catch (error) { + return { kind: "error", error }; + } + })(); + const outcome = await Promise.race([ + readyzProbe, + ...(childExit ? [childExit.then(() => ({ kind: "child-exit" }))] : []), + ]); + if (outcome.kind === "child-exit") { + probeAbort.abort(exitedBeforeReadyError()); + throw exitedBeforeReadyError(); + } try { - const readyz = await fetchJson(`http://127.0.0.1:${port}/readyz`, { - attempts: 1, - fetchImpl: options.fetchImpl, - timeoutMs: Math.min(FETCH_TIMEOUT_MS, remainingMs), - }); + if (outcome.kind === "error") { + throw outcome.error; + } + const readyz = outcome.readyz; if (readyz.ok) { return; } @@ -976,9 +1031,7 @@ export function assertExpectedKitchenSinkToolEntries( source: entry?.source, })); if (wrongProvenance.length > 0) { - throw new Error( - `${label} plugin provenance mismatch: ${JSON.stringify(wrongProvenance)}`, - ); + throw new Error(`${label} plugin provenance mismatch: ${JSON.stringify(wrongProvenance)}`); } } return ids; diff --git a/test/scripts/kitchen-sink-rpc-walk.test.ts b/test/scripts/kitchen-sink-rpc-walk.test.ts index e96690ca01a0..1a21131ef188 100644 --- a/test/scripts/kitchen-sink-rpc-walk.test.ts +++ b/test/scripts/kitchen-sink-rpc-walk.test.ts @@ -211,6 +211,48 @@ describe("kitchen-sink RPC gateway teardown", () => { } }); + it("aborts stalled readiness probes when the gateway exits mid-probe", async () => { + const root = mkdtempSync(path.join(tmpdir(), "openclaw-kitchen-rpc-exit-during-ready-")); + try { + const logPath = path.join(root, "gateway.log"); + writeFileSync(logPath, "gateway died during readiness\n"); + const child = Object.assign(new EventEmitter(), { + exitCode: null, + signalCode: null as NodeJS.Signals | null, + }); + const fetchImpl = vi.fn((_url: string, init?: RequestInit) => { + return new Promise((_resolve, reject) => { + init?.signal?.addEventListener( + "abort", + () => { + const reason = init.signal?.reason; + reject(reason instanceof Error ? reason : new Error("fetch aborted")); + }, + { once: true }, + ); + }); + }); + const startedAt = Date.now(); + setTimeout(() => { + child.signalCode = "SIGTERM"; + child.emit("exit", null, "SIGTERM"); + }, 25); + + await expect( + waitForGatewayReady(child, 9, logPath, { + fetchImpl, + pollDelayMs: 5_000, + timeoutMs: 2_000, + }), + ).rejects.toThrow("gateway exited before ready"); + + expect(fetchImpl).toHaveBeenCalledOnce(); + expect(Date.now() - startedAt).toBeLessThan(500); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }); + it("keeps stalled readiness probes inside the caller deadline", async () => { const root = mkdtempSync(path.join(tmpdir(), "openclaw-kitchen-rpc-stalled-ready-")); try { @@ -553,9 +595,7 @@ describe("kitchen-sink RPC command catalog assertions", () => { it("requires every expected Kitchen Sink plugin tool", () => { expect(() => assertExpectedKitchenSinkToolEntries( - [ - { id: "kitchen_sink_text", source: "plugin", pluginId: "openclaw-kitchen-sink-fixture" }, - ], + [{ id: "kitchen_sink_text", source: "plugin", pluginId: "openclaw-kitchen-sink-fixture" }], "tools.catalog plugin tools", { requirePluginProvenance: true }, ),