mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-06 05:51:15 +08:00
fix(perf): fail startup bench on early gateway exit
This commit is contained in:
@@ -12,6 +12,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- Install/update: bypass npm `min-release-age` policies with `--min-release-age=0` instead of `--before` so hosted installers keep working on npm versions that reject the combined config. (#84749) Thanks @TeodoroRodrigo.
|
||||
- WebChat: keep message-tool replies visible in the chat while still summarizing internal tool results for the model. Fixes #86347. Thanks @shakkernerd.
|
||||
- Gateway/perf: fail startup benchmark samples when the Gateway process exits before benchmark teardown, including signal deaths after readiness probes.
|
||||
- Agents/commitments: serialize commitment store load-modify-save writes so concurrent heartbeat and CLI updates no longer lose dismissal, sent, or attempt state. (#81153) Thanks @ai-hpc.
|
||||
- xAI/LM Studio: promote plain-text tool-call fallbacks into structured tool calls and strip leaked internal tool syntax before user-facing delivery. (#86222) Thanks @fuller-stack-dev.
|
||||
- CLI: suppress benign self-update version-skew warnings during package post-update finalization.
|
||||
|
||||
@@ -34,6 +34,7 @@ type ProbeTransition = {
|
||||
type GatewaySample = {
|
||||
cpuCoreRatio: number | null;
|
||||
cpuMs: number | null;
|
||||
exitedBeforeTeardown?: boolean;
|
||||
exitCode: number | null;
|
||||
firstOutputMs: number | null;
|
||||
gatewayReadyLogLine: string | null;
|
||||
@@ -79,6 +80,15 @@ type BenchmarkFailure = {
|
||||
sampleIndex: number;
|
||||
};
|
||||
|
||||
type ChildExit = {
|
||||
exitCode: number | null;
|
||||
signal: string | null;
|
||||
};
|
||||
|
||||
type StopChildResult = ChildExit & {
|
||||
exitedBeforeTeardown: boolean;
|
||||
};
|
||||
|
||||
type PluginFixtureResult = {
|
||||
pluginIds: string[];
|
||||
pluginsDir: string;
|
||||
@@ -407,6 +417,17 @@ function collectResultFailures(
|
||||
reason: `missing ${missing.join(", ")}`,
|
||||
sampleIndex: index + 1,
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (sample.exitedBeforeTeardown === true) {
|
||||
failures.push({
|
||||
id: result.id,
|
||||
reason:
|
||||
sample.signal == null
|
||||
? `child exited ${sample.exitCode ?? "before teardown"}`
|
||||
: `child exited by ${sample.signal}`,
|
||||
sampleIndex: index + 1,
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -687,36 +708,52 @@ function sanitizedEnv(
|
||||
return env;
|
||||
}
|
||||
|
||||
async function stopChild(child: ChildProcessWithoutNullStreams): Promise<{
|
||||
exitCode: number | null;
|
||||
signal: string | null;
|
||||
}> {
|
||||
if (child.exitCode != null || child.signalCode != null) {
|
||||
return { exitCode: child.exitCode, signal: child.signalCode };
|
||||
async function stopChild(child: ChildProcessWithoutNullStreams): Promise<StopChildResult> {
|
||||
const currentExit = (): ChildExit | null =>
|
||||
child.exitCode != null || child.signalCode != null
|
||||
? { exitCode: child.exitCode, signal: child.signalCode }
|
||||
: null;
|
||||
|
||||
const existingExit = currentExit();
|
||||
if (existingExit != null) {
|
||||
return { ...existingExit, exitedBeforeTeardown: true };
|
||||
}
|
||||
const exited = new Promise<{ exitCode: number | null; signal: string | null }>((resolve) => {
|
||||
child.once("exit", (exitCode, signal) => resolve({ exitCode, signal }));
|
||||
|
||||
let observedExit: ChildExit | null = null;
|
||||
const exited = new Promise<ChildExit>((resolve) => {
|
||||
child.once("exit", (exitCode, signal) => {
|
||||
observedExit = { exitCode, signal };
|
||||
resolve(observedExit);
|
||||
});
|
||||
});
|
||||
killProcessTree(child, "SIGTERM");
|
||||
|
||||
await new Promise<void>((resolve) => setImmediate(resolve));
|
||||
const queuedExit = observedExit ?? currentExit();
|
||||
if (queuedExit != null) {
|
||||
return { ...queuedExit, exitedBeforeTeardown: true };
|
||||
}
|
||||
|
||||
const sentTeardownSignal = killProcessTree(child, "SIGTERM");
|
||||
const timeout = delay(2000).then(() => {
|
||||
if (child.exitCode == null && child.signalCode == null) {
|
||||
killProcessTree(child, "SIGKILL");
|
||||
}
|
||||
return exited;
|
||||
});
|
||||
return Promise.race([exited, timeout]);
|
||||
const exit = await Promise.race([exited, timeout]);
|
||||
return { ...exit, exitedBeforeTeardown: !sentTeardownSignal };
|
||||
}
|
||||
|
||||
function killProcessTree(child: ChildProcessWithoutNullStreams, signal: NodeJS.Signals): void {
|
||||
function killProcessTree(child: ChildProcessWithoutNullStreams, signal: NodeJS.Signals): boolean {
|
||||
if (process.platform !== "win32" && child.pid !== undefined) {
|
||||
try {
|
||||
process.kill(-child.pid, signal);
|
||||
return;
|
||||
return true;
|
||||
} catch {
|
||||
// Fall back to the direct child below.
|
||||
}
|
||||
}
|
||||
child.kill(signal);
|
||||
return child.kill(signal);
|
||||
}
|
||||
|
||||
function collectStartupTrace(line: string, startupTrace: Record<string, number>): void {
|
||||
@@ -975,6 +1012,7 @@ async function runGatewaySample(options: {
|
||||
return {
|
||||
cpuCoreRatio,
|
||||
cpuMs,
|
||||
exitedBeforeTeardown: exit.exitedBeforeTeardown,
|
||||
exitCode: exit.exitCode,
|
||||
firstOutputMs,
|
||||
gatewayReadyLogLine,
|
||||
@@ -1109,6 +1147,7 @@ export const testing = {
|
||||
parsePositiveInt,
|
||||
resolveEntry,
|
||||
sanitizedEnv,
|
||||
stopChild,
|
||||
summarizeCase,
|
||||
waitForProbe,
|
||||
writeConfig,
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
import { spawnSync } from "node:child_process";
|
||||
import { EventEmitter } from "node:events";
|
||||
import fs from "node:fs";
|
||||
import { createServer } from "node:http";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { performance } from "node:perf_hooks";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { testing } from "../../scripts/bench-gateway-startup.ts";
|
||||
|
||||
async function listenOnLoopback(handler: Parameters<typeof createServer>[0]) {
|
||||
@@ -154,6 +155,176 @@ describe("gateway startup benchmark script", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("flags samples that become ready and then exit nonzero", () => {
|
||||
const result = testing.summarizeCase({ config: {}, id: "demo", name: "demo" }, [
|
||||
{
|
||||
cpuCoreRatio: 0.5,
|
||||
cpuMs: 100,
|
||||
exitedBeforeTeardown: true,
|
||||
exitCode: 1,
|
||||
firstOutputMs: 1,
|
||||
gatewayReadyLogLine: "[gateway] ready",
|
||||
gatewayReadyLogMs: 20,
|
||||
healthz: {
|
||||
firstErrorKind: "econnrefused",
|
||||
firstRecoveryMs: 10,
|
||||
ms: 10,
|
||||
status: 200,
|
||||
transitions: [],
|
||||
},
|
||||
httpListenLogLine: "[gateway] http server listening (0 plugins)",
|
||||
httpListenLogMs: 5,
|
||||
maxRssMb: 120,
|
||||
outputTail: "ready\\nError: startup sidecar crashed",
|
||||
readyz: {
|
||||
firstErrorKind: "http-503",
|
||||
firstRecoveryMs: 18,
|
||||
ms: 18,
|
||||
status: 200,
|
||||
transitions: [],
|
||||
},
|
||||
signal: null,
|
||||
startupTrace: {},
|
||||
},
|
||||
]);
|
||||
|
||||
expect(testing.collectResultFailures([result], { processMetricsRequired: true })).toEqual([
|
||||
{
|
||||
id: "demo",
|
||||
reason: "child exited 1",
|
||||
sampleIndex: 1,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("does not flag nonzero exits from intentional teardown", () => {
|
||||
const result = testing.summarizeCase({ config: {}, id: "demo", name: "demo" }, [
|
||||
{
|
||||
cpuCoreRatio: 0.5,
|
||||
cpuMs: 100,
|
||||
exitedBeforeTeardown: false,
|
||||
exitCode: 1,
|
||||
firstOutputMs: 1,
|
||||
gatewayReadyLogLine: "[gateway] ready",
|
||||
gatewayReadyLogMs: 20,
|
||||
healthz: {
|
||||
firstErrorKind: "econnrefused",
|
||||
firstRecoveryMs: 10,
|
||||
ms: 10,
|
||||
status: 200,
|
||||
transitions: [],
|
||||
},
|
||||
httpListenLogLine: "[gateway] http server listening (0 plugins)",
|
||||
httpListenLogMs: 5,
|
||||
maxRssMb: 120,
|
||||
outputTail: "",
|
||||
readyz: {
|
||||
firstErrorKind: "http-503",
|
||||
firstRecoveryMs: 18,
|
||||
ms: 18,
|
||||
status: 200,
|
||||
transitions: [],
|
||||
},
|
||||
signal: null,
|
||||
startupTrace: {},
|
||||
},
|
||||
]);
|
||||
|
||||
expect(testing.collectResultFailures([result], { processMetricsRequired: true })).toEqual([]);
|
||||
});
|
||||
|
||||
it("flags samples that become ready and then die from a signal", () => {
|
||||
const result = testing.summarizeCase({ config: {}, id: "demo", name: "demo" }, [
|
||||
{
|
||||
cpuCoreRatio: 0.5,
|
||||
cpuMs: 100,
|
||||
exitedBeforeTeardown: true,
|
||||
exitCode: null,
|
||||
firstOutputMs: 1,
|
||||
gatewayReadyLogLine: "[gateway] ready",
|
||||
gatewayReadyLogMs: 20,
|
||||
healthz: {
|
||||
firstErrorKind: "econnrefused",
|
||||
firstRecoveryMs: 10,
|
||||
ms: 10,
|
||||
status: 200,
|
||||
transitions: [],
|
||||
},
|
||||
httpListenLogLine: "[gateway] http server listening (0 plugins)",
|
||||
httpListenLogMs: 5,
|
||||
maxRssMb: 120,
|
||||
outputTail: "ready\\nsegmentation fault",
|
||||
readyz: {
|
||||
firstErrorKind: "http-503",
|
||||
firstRecoveryMs: 18,
|
||||
ms: 18,
|
||||
status: 200,
|
||||
transitions: [],
|
||||
},
|
||||
signal: "SIGSEGV",
|
||||
startupTrace: {},
|
||||
},
|
||||
]);
|
||||
|
||||
expect(testing.collectResultFailures([result], { processMetricsRequired: true })).toEqual([
|
||||
{
|
||||
id: "demo",
|
||||
reason: "child exited by SIGSEGV",
|
||||
sampleIndex: 1,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("classifies queued child exits before sending teardown signals", async () => {
|
||||
const child = new EventEmitter() as EventEmitter & {
|
||||
exitCode: number | null;
|
||||
kill: ReturnType<typeof vi.fn>;
|
||||
signalCode: NodeJS.Signals | null;
|
||||
};
|
||||
child.exitCode = null;
|
||||
child.signalCode = null;
|
||||
child.kill = vi.fn(() => true);
|
||||
|
||||
const stopped = testing.stopChild(child as unknown as Parameters<typeof testing.stopChild>[0]);
|
||||
queueMicrotask(() => {
|
||||
child.exitCode = 7;
|
||||
child.emit("exit", 7, null);
|
||||
});
|
||||
|
||||
await expect(stopped).resolves.toEqual({
|
||||
exitedBeforeTeardown: true,
|
||||
exitCode: 7,
|
||||
signal: null,
|
||||
});
|
||||
expect(child.kill).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("classifies failed teardown signaling as a pre-teardown child exit", async () => {
|
||||
const child = new EventEmitter() as EventEmitter & {
|
||||
exitCode: number | null;
|
||||
kill: ReturnType<typeof vi.fn>;
|
||||
signalCode: NodeJS.Signals | null;
|
||||
};
|
||||
child.exitCode = null;
|
||||
child.signalCode = null;
|
||||
child.kill = vi.fn(() => {
|
||||
setImmediate(() => {
|
||||
child.exitCode = 8;
|
||||
child.emit("exit", 8, null);
|
||||
});
|
||||
return false;
|
||||
});
|
||||
|
||||
await expect(
|
||||
testing.stopChild(child as unknown as Parameters<typeof testing.stopChild>[0]),
|
||||
).resolves.toEqual({
|
||||
exitedBeforeTeardown: true,
|
||||
exitCode: 8,
|
||||
signal: null,
|
||||
});
|
||||
expect(child.kill).toHaveBeenCalledWith("SIGTERM");
|
||||
});
|
||||
|
||||
it("collects Count-suffixed startup trace metrics", () => {
|
||||
const startupTrace: Record<string, number> = {};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user