fix(e2e): abort kitchen sink readiness on gateway exit

This commit is contained in:
Vincent Koc
2026-06-04 04:42:29 +02:00
parent 32e51f250f
commit b12114e45c
3 changed files with 105 additions and 11 deletions

View File

@@ -36,6 +36,7 @@ Docs: https://docs.openclaw.ai
- Release/CI/E2E: keep Crabbox hydrate pnpm stores on the persistent cache volume while still resetting volatile modules, reducing cold installs and runner memory churn.
- Release/CI/E2E: fail secret-provider proof startup immediately when the gateway exits by signal instead of waiting for the readiness timeout.
- Release/CI/E2E: report plugin gateway gauntlet command-log write failures as failed rows instead of crashing the harness from child-process callbacks.
- Release/CI/E2E: abort stalled Kitchen Sink RPC readiness probes as soon as the gateway exits so proof failures return promptly.
## 2026.6.1

View File

@@ -583,6 +583,7 @@ export async function fetchJson(url, options = {}) {
const attempts = Math.max(1, options.attempts ?? 3);
const timeoutMs = Math.max(1, options.timeoutMs ?? FETCH_TIMEOUT_MS);
const maxBodyBytes = Math.max(1, options.maxBodyBytes ?? FETCH_BODY_MAX_BYTES);
const externalSignal = options.signal;
let lastError;
for (let attempt = 1; attempt <= attempts; attempt += 1) {
const controller = new AbortController();
@@ -590,6 +591,26 @@ export async function fetchJson(url, options = {}) {
code: "ETIMEDOUT",
});
let timeout;
let removeExternalAbort = () => {};
const abortPromise = externalSignal
? new Promise((_, reject) => {
const abortError = () =>
externalSignal.reason instanceof Error
? externalSignal.reason
: new Error("fetch aborted");
const onAbort = () => {
const error = abortError();
controller.abort(error);
reject(new Error(error.message, { cause: error }));
};
if (externalSignal.aborted) {
onAbort();
return;
}
externalSignal.addEventListener("abort", onAbort, { once: true });
removeExternalAbort = () => externalSignal.removeEventListener("abort", onAbort);
})
: null;
const timeoutPromise = new Promise((_, reject) => {
timeout = setTimeout(() => {
controller.abort(timeoutError);
@@ -601,10 +622,12 @@ export async function fetchJson(url, options = {}) {
const response = await Promise.race([
(options.fetchImpl ?? fetch)(url, { signal: controller.signal }),
timeoutPromise,
...(abortPromise ? [abortPromise] : []),
]);
const text = await Promise.race([
readBoundedResponseText(response, maxBodyBytes),
timeoutPromise,
...(abortPromise ? [abortPromise] : []),
]);
let body = null;
try {
@@ -620,6 +643,7 @@ export async function fetchJson(url, options = {}) {
}
await delay(options.retryDelayMs ?? 250);
} finally {
removeExternalAbort();
if (timeout) {
clearTimeout(timeout);
}
@@ -780,6 +804,15 @@ export function hasChildExited(child) {
return child.exitCode !== null || child.signalCode !== null;
}
function createChildExitPromise(child) {
if (!child || typeof child.once !== "function") {
return null;
}
return new Promise((resolve) => {
child.once("exit", () => resolve());
});
}
function releaseUnsettledGatewayChild(child) {
child.stdin?.destroy?.();
child.stdout?.destroy?.();
@@ -865,6 +898,7 @@ export async function waitForGatewayReady(child, port, logPath, options = {}) {
const timeoutMs = Math.max(1, options.timeoutMs ?? READY_TIMEOUT_MS);
const pollDelayMs = Math.max(1, options.pollDelayMs ?? 250);
const logReportedReady = createGatewayReadyLogScanner(logPath);
const childExit = createChildExitPromise(child);
const exitedBeforeReadyError = () =>
new Error(`gateway exited before ready\n${tailFile(logPath)}`);
if (hasChildExited(child)) {
@@ -875,12 +909,33 @@ export async function waitForGatewayReady(child, port, logPath, options = {}) {
if (hasChildExited(child)) {
throw exitedBeforeReadyError();
}
const probeAbort = new AbortController();
const readyzProbe = (async () => {
try {
const readyz = await fetchJson(`http://127.0.0.1:${port}/readyz`, {
attempts: 1,
fetchImpl: options.fetchImpl,
signal: probeAbort.signal,
timeoutMs: Math.min(FETCH_TIMEOUT_MS, remainingMs),
});
return { kind: "readyz", readyz };
} catch (error) {
return { kind: "error", error };
}
})();
const outcome = await Promise.race([
readyzProbe,
...(childExit ? [childExit.then(() => ({ kind: "child-exit" }))] : []),
]);
if (outcome.kind === "child-exit") {
probeAbort.abort(exitedBeforeReadyError());
throw exitedBeforeReadyError();
}
try {
const readyz = await fetchJson(`http://127.0.0.1:${port}/readyz`, {
attempts: 1,
fetchImpl: options.fetchImpl,
timeoutMs: Math.min(FETCH_TIMEOUT_MS, remainingMs),
});
if (outcome.kind === "error") {
throw outcome.error;
}
const readyz = outcome.readyz;
if (readyz.ok) {
return;
}
@@ -976,9 +1031,7 @@ export function assertExpectedKitchenSinkToolEntries(
source: entry?.source,
}));
if (wrongProvenance.length > 0) {
throw new Error(
`${label} plugin provenance mismatch: ${JSON.stringify(wrongProvenance)}`,
);
throw new Error(`${label} plugin provenance mismatch: ${JSON.stringify(wrongProvenance)}`);
}
}
return ids;

View File

@@ -211,6 +211,48 @@ describe("kitchen-sink RPC gateway teardown", () => {
}
});
it("aborts stalled readiness probes when the gateway exits mid-probe", async () => {
const root = mkdtempSync(path.join(tmpdir(), "openclaw-kitchen-rpc-exit-during-ready-"));
try {
const logPath = path.join(root, "gateway.log");
writeFileSync(logPath, "gateway died during readiness\n");
const child = Object.assign(new EventEmitter(), {
exitCode: null,
signalCode: null as NodeJS.Signals | null,
});
const fetchImpl = vi.fn((_url: string, init?: RequestInit) => {
return new Promise((_resolve, reject) => {
init?.signal?.addEventListener(
"abort",
() => {
const reason = init.signal?.reason;
reject(reason instanceof Error ? reason : new Error("fetch aborted"));
},
{ once: true },
);
});
});
const startedAt = Date.now();
setTimeout(() => {
child.signalCode = "SIGTERM";
child.emit("exit", null, "SIGTERM");
}, 25);
await expect(
waitForGatewayReady(child, 9, logPath, {
fetchImpl,
pollDelayMs: 5_000,
timeoutMs: 2_000,
}),
).rejects.toThrow("gateway exited before ready");
expect(fetchImpl).toHaveBeenCalledOnce();
expect(Date.now() - startedAt).toBeLessThan(500);
} finally {
rmSync(root, { recursive: true, force: true });
}
});
it("keeps stalled readiness probes inside the caller deadline", async () => {
const root = mkdtempSync(path.join(tmpdir(), "openclaw-kitchen-rpc-stalled-ready-"));
try {
@@ -553,9 +595,7 @@ describe("kitchen-sink RPC command catalog assertions", () => {
it("requires every expected Kitchen Sink plugin tool", () => {
expect(() =>
assertExpectedKitchenSinkToolEntries(
[
{ id: "kitchen_sink_text", source: "plugin", pluginId: "openclaw-kitchen-sink-fixture" },
],
[{ id: "kitchen_sink_text", source: "plugin", pluginId: "openclaw-kitchen-sink-fixture" }],
"tools.catalog plugin tools",
{ requirePluginProvenance: true },
),