Compare commits

...

2 Commits

Author SHA1 Message Date
Peter Steinberger
8d2280c746 fix: harden legacy command queue migration (#61933) (thanks @openperf) 2026-04-06 15:40:55 +01:00
openperf
00dbf815e9 fix(process ): migrate legacy command-queue singleton missing activeTaskWaiters
After a SIGUSR1 in-process restart following an npm upgrade from v2026.4.2
to v2026.4.5, the globalThis singleton created by the old code version
lacks the activeTaskWaiters field added in v2026.4.5.  resolveGlobalSingleton
returns the stale object as-is, causing notifyActiveTaskWaiters() to call
Array.from(undefined) and crash the gateway in a loop.

Add a schema migration step in getQueueState() that patches the missing
field on legacy singleton objects.  Add a regression test that plants a
v2026.4.2-shaped state object and verifies resetAllLanes() and
waitForActiveTasks() succeed without throwing.

Fixes #61905
2026-04-06 15:39:51 +01:00
3 changed files with 59 additions and 1 deletions

View File

@@ -34,6 +34,7 @@ Docs: https://docs.openclaw.ai
- TUI/command messages: strip inbound envelope metadata before rendering command/system messages so async completion notices stop leaking raw wrappers into the operator terminal. (#59985) Thanks @MoerAI.
- TUI/terminal: restore Kitty keyboard protocol and `modifyOtherKeys` state on TUI exit and fatal CLI crashes so parent shells stop inheriting broken keyboard input after `openclaw tui` exits. (#49130) Thanks @biefan.
- Docs/i18n: relocalize final localized-page links after translation so generated locale pages stop keeping stale English-root links when targets appear later in the same run. (#61796) thanks @hxy91819.
- Gateway/command queue: migrate legacy global queue state after in-process SIGUSR1 restarts so pre-4.5 hot-upgrade singletons missing `activeTaskWaiters` stop crashing restart recovery. (#61933) Thanks @openperf.
## 2026.4.5

View File

@@ -378,6 +378,42 @@ describe("command queue", () => {
await expect(enqueueCommand(async () => "ok")).resolves.toBe("ok");
});
it("migrates legacy queue state missing activeTaskWaiters without crashing", async () => {
// Simulate a SIGUSR1 in-process restart where the globalThis singleton was
// created by an older code version (e.g. v2026.4.2) that did not include
// the `activeTaskWaiters` field. The schema migration in getQueueState()
// must patch the missing field so resetAllLanes() and
// notifyActiveTaskWaiters() do not throw.
const key = Symbol.for("openclaw.commandQueueState");
const globalStore = globalThis as Record<PropertyKey, unknown>;
const original = globalStore[key];
try {
// Plant a legacy-shaped state object (no activeTaskWaiters).
globalStore[key] = {
gatewayDraining: false,
lanes: new Map(),
nextTaskId: 1,
};
// resetAllLanes calls notifyActiveTaskWaiters → Array.from(state.activeTaskWaiters).
// Without the migration this would throw:
// TypeError: undefined is not iterable
expect(() => resetAllLanes()).not.toThrow();
// waitForActiveTasks also accesses activeTaskWaiters.
await expect(waitForActiveTasks(0)).resolves.toEqual({ drained: true });
} finally {
// Restore original state so subsequent tests are not affected.
if (original !== undefined) {
globalStore[key] = original;
} else {
delete globalStore[key];
}
resetCommandQueueStateForTest();
}
});
it("shares lane state across distinct module instances", async () => {
const commandQueueA = await importFreshModule<typeof import("./command-queue.js")>(
import.meta.url,

View File

@@ -53,6 +53,17 @@ type ActiveTaskWaiter = {
timeout?: ReturnType<typeof setTimeout>;
};
type QueueState = {
gatewayDraining: boolean;
lanes: Map<string, LaneState>;
activeTaskWaiters: Set<ActiveTaskWaiter>;
nextTaskId: number;
};
type LegacyQueueState = Omit<QueueState, "activeTaskWaiters"> & {
activeTaskWaiters?: Set<ActiveTaskWaiter>;
};
function isExpectedNonErrorLaneFailure(err: unknown): boolean {
return err instanceof Error && err.name === "LiveSessionModelSwitchError";
}
@@ -64,12 +75,22 @@ function isExpectedNonErrorLaneFailure(err: unknown): boolean {
const COMMAND_QUEUE_STATE_KEY = Symbol.for("openclaw.commandQueueState");
function getQueueState() {
return resolveGlobalSingleton(COMMAND_QUEUE_STATE_KEY, () => ({
const state = resolveGlobalSingleton<LegacyQueueState>(COMMAND_QUEUE_STATE_KEY, () => ({
gatewayDraining: false,
lanes: new Map<string, LaneState>(),
activeTaskWaiters: new Set<ActiveTaskWaiter>(),
nextTaskId: 1,
}));
// Schema migration: the singleton may have been created by an older code
// version (e.g. v2026.4.2) that did not include `activeTaskWaiters`. After
// a SIGUSR1 in-process restart the new code inherits the stale object via
// `resolveGlobalSingleton` because the Symbol key already exists on
// globalThis. Patch the missing field so all downstream consumers see a
// valid Set instead of `undefined`.
if (!("activeTaskWaiters" in state)) {
state.activeTaskWaiters = new Set<ActiveTaskWaiter>();
}
return state as QueueState;
}
function normalizeLane(lane: string): string {