From adc6adccd8256cd5d48a0811556fd8768fc529ee Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Sat, 23 May 2026 00:00:38 +0800 Subject: [PATCH] fix(update): detect nested macOS gateway ancestry (#85391) * fix(update): detect nested macOS gateway ancestry * fix(release): refresh shrinkwrap for CI npm * fix(update): inherit gateway runtime pid for update guard --- CHANGELOG.md | 1 + extensions/acpx/npm-shrinkwrap.json | 2 + .../amazon-bedrock-mantle/npm-shrinkwrap.json | 2 + extensions/amazon-bedrock/npm-shrinkwrap.json | 2 + .../anthropic-vertex/npm-shrinkwrap.json | 2 + extensions/codex/npm-shrinkwrap.json | 2 + .../diagnostics-otel/npm-shrinkwrap.json | 2 + extensions/discord/npm-shrinkwrap.json | 3 +- extensions/memory-lancedb/npm-shrinkwrap.json | 1 + extensions/twitch/npm-shrinkwrap.json | 2 + extensions/whatsapp/npm-shrinkwrap.json | 3 + npm-shrinkwrap.json | 6 + .../gateway-cli/run.option-collisions.test.ts | 21 +++ src/cli/gateway-cli/run.ts | 4 + src/cli/update-cli.test.ts | 34 +++++ src/cli/update-cli/update-command.ts | 43 +++++- src/daemon/constants.ts | 1 + src/infra/restart-stale-pids.test.ts | 127 +++++++++++++++++- src/infra/restart-stale-pids.ts | 49 +++++-- src/infra/restart.test.ts | 3 +- 20 files changed, 286 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e08b9a2839a..e2b9fc16480c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ Docs: https://docs.openclaw.ai - Codex app-server: disable native Code Mode when the effective exec host is `node` and keep OpenClaw `exec`/`process` available, so `/exec host=node` routes shell commands through the selected node instead of the gateway. Fixes #85012. (#85090) Thanks @sahilsatralkar. - Gateway: defer provider auth-state prewarm until after startup readiness so early gateway tool/session requests are not blocked by provider auth discovery. (#85272) Thanks @dutifulbob. - Agents/Codex: show the first plan update as a transient chat status notice without counting it as final assistant content. +- CLI/update: walk the macOS process ancestry and honor the inherited Gateway runtime PID before package updates stop the managed Gateway service, so nested in-band updater children can refuse instead of killing the LaunchAgent-supervised Gateway that owns them. Fixes #85120. - Gateway/LaunchAgent: wait for launchd reload bootout to finish and fall back to kickstart when bootstrap races, so reload handoff does not leave the service deregistered. Fixes #84630. (#84641) Thanks @NianJiuZst. - Gateway/LaunchAgent: treat a concurrent launchd bootstrap as a successful restart when the service is already loaded, avoiding false macOS Gateway restart failures. Fixes #84721. (#84722) Thanks @googlerest. - Gateway/service: include the active `openclaw` command bin directory in managed service PATH generation and doctor audit expectations for npm-global macOS installs. Fixes #84201. (#84475) Thanks @jbetala7. diff --git a/extensions/acpx/npm-shrinkwrap.json b/extensions/acpx/npm-shrinkwrap.json index 70fa68b081a8..d83935499341 100644 --- a/extensions/acpx/npm-shrinkwrap.json +++ b/extensions/acpx/npm-shrinkwrap.json @@ -903,6 +903,7 @@ "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.8.3.tgz", "integrity": "sha512-HdUm8EMQBLaJvGUdidNNbqpA1kYkwNcb+MYxkxCLAPJGQzlv9J0C24h8V65Z4c5GLd/JEALDvpFCQgpLJqc0zw==", "license": "Apache-2.0", + "peer": true, "peerDependencies": { "bare-abort-controller": "*" }, @@ -2238,6 +2239,7 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-4.4.3.tgz", "integrity": "sha512-ytENFjIJFl2UwYglde2jchW2Hwm4GJFLDiSXWdTrJQBIN9Fcyp7n4DhxJEiWNAJMV1/BqWfW/kkg71UDcHJyTQ==", "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/extensions/amazon-bedrock-mantle/npm-shrinkwrap.json b/extensions/amazon-bedrock-mantle/npm-shrinkwrap.json index f2c30ea5afcb..339737cceaaa 100644 --- a/extensions/amazon-bedrock-mantle/npm-shrinkwrap.json +++ b/extensions/amazon-bedrock-mantle/npm-shrinkwrap.json @@ -1314,6 +1314,7 @@ "resolved": "https://registry.npmjs.org/ws/-/ws-8.20.1.tgz", "integrity": "sha512-It4dO0K5v//JtTXuPkfEOaI3uUN87iYPnqo/ZzqCoG3g8uhA66QUMs/SrM0YK7/NAu+r4LMh/9dq2A7k+rHs+w==", "license": "MIT", + "peer": true, "engines": { "node": ">=10.0.0" }, @@ -1350,6 +1351,7 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-4.4.3.tgz", "integrity": "sha512-ytENFjIJFl2UwYglde2jchW2Hwm4GJFLDiSXWdTrJQBIN9Fcyp7n4DhxJEiWNAJMV1/BqWfW/kkg71UDcHJyTQ==", "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/extensions/amazon-bedrock/npm-shrinkwrap.json b/extensions/amazon-bedrock/npm-shrinkwrap.json index 66fdca19db82..76491ddaeec2 100644 --- a/extensions/amazon-bedrock/npm-shrinkwrap.json +++ b/extensions/amazon-bedrock/npm-shrinkwrap.json @@ -1188,6 +1188,7 @@ "resolved": "https://registry.npmjs.org/ws/-/ws-8.20.1.tgz", "integrity": "sha512-It4dO0K5v//JtTXuPkfEOaI3uUN87iYPnqo/ZzqCoG3g8uhA66QUMs/SrM0YK7/NAu+r4LMh/9dq2A7k+rHs+w==", "license": "MIT", + "peer": true, "engines": { "node": ">=10.0.0" }, @@ -1224,6 +1225,7 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-4.4.3.tgz", "integrity": "sha512-ytENFjIJFl2UwYglde2jchW2Hwm4GJFLDiSXWdTrJQBIN9Fcyp7n4DhxJEiWNAJMV1/BqWfW/kkg71UDcHJyTQ==", "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/extensions/anthropic-vertex/npm-shrinkwrap.json b/extensions/anthropic-vertex/npm-shrinkwrap.json index dcfdb16e2c53..87118a470789 100644 --- a/extensions/anthropic-vertex/npm-shrinkwrap.json +++ b/extensions/anthropic-vertex/npm-shrinkwrap.json @@ -1321,6 +1321,7 @@ "resolved": "https://registry.npmjs.org/ws/-/ws-8.20.1.tgz", "integrity": "sha512-It4dO0K5v//JtTXuPkfEOaI3uUN87iYPnqo/ZzqCoG3g8uhA66QUMs/SrM0YK7/NAu+r4LMh/9dq2A7k+rHs+w==", "license": "MIT", + "peer": true, "engines": { "node": ">=10.0.0" }, @@ -1372,6 +1373,7 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-4.4.3.tgz", "integrity": "sha512-ytENFjIJFl2UwYglde2jchW2Hwm4GJFLDiSXWdTrJQBIN9Fcyp7n4DhxJEiWNAJMV1/BqWfW/kkg71UDcHJyTQ==", "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/extensions/codex/npm-shrinkwrap.json b/extensions/codex/npm-shrinkwrap.json index 1dff52b7ca81..b4d69435251d 100644 --- a/extensions/codex/npm-shrinkwrap.json +++ b/extensions/codex/npm-shrinkwrap.json @@ -1866,6 +1866,7 @@ "resolved": "https://registry.npmjs.org/ws/-/ws-8.20.1.tgz", "integrity": "sha512-It4dO0K5v//JtTXuPkfEOaI3uUN87iYPnqo/ZzqCoG3g8uhA66QUMs/SrM0YK7/NAu+r4LMh/9dq2A7k+rHs+w==", "license": "MIT", + "peer": true, "engines": { "node": ">=10.0.0" }, @@ -1917,6 +1918,7 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-4.4.3.tgz", "integrity": "sha512-ytENFjIJFl2UwYglde2jchW2Hwm4GJFLDiSXWdTrJQBIN9Fcyp7n4DhxJEiWNAJMV1/BqWfW/kkg71UDcHJyTQ==", "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/extensions/diagnostics-otel/npm-shrinkwrap.json b/extensions/diagnostics-otel/npm-shrinkwrap.json index e0e5bd240931..fbf215bf1762 100644 --- a/extensions/diagnostics-otel/npm-shrinkwrap.json +++ b/extensions/diagnostics-otel/npm-shrinkwrap.json @@ -67,6 +67,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.1.tgz", "integrity": "sha512-gLyJlPHPZYdAk1JENA9LeHejZe1Ti77/pTeFm/nMXmQH/HFZlcS/O2XJB+L8fkbrNSqhdtlvjBVjxwUYanNH5Q==", "license": "Apache-2.0", + "peer": true, "engines": { "node": ">=8.0.0" } @@ -580,6 +581,7 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.16.0.tgz", "integrity": "sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==", "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, diff --git a/extensions/discord/npm-shrinkwrap.json b/extensions/discord/npm-shrinkwrap.json index 923991b3a03a..ac3c203eb372 100644 --- a/extensions/discord/npm-shrinkwrap.json +++ b/extensions/discord/npm-shrinkwrap.json @@ -432,7 +432,8 @@ "version": "0.1.1", "resolved": "https://registry.npmjs.org/opusscript/-/opusscript-0.1.1.tgz", "integrity": "sha512-mL0fZZOUnXdZ78woRXp18lApwpp0lF5tozJOD1Wut0dgrA9WuQTgSels/CSmFleaAZrJi/nci5KOVtbuxeWoQA==", - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/prism-media": { "version": "1.3.5", diff --git a/extensions/memory-lancedb/npm-shrinkwrap.json b/extensions/memory-lancedb/npm-shrinkwrap.json index 5c4cf3a1d5f2..c96fd1bb54a8 100644 --- a/extensions/memory-lancedb/npm-shrinkwrap.json +++ b/extensions/memory-lancedb/npm-shrinkwrap.json @@ -209,6 +209,7 @@ "resolved": "https://registry.npmjs.org/apache-arrow/-/apache-arrow-18.1.0.tgz", "integrity": "sha512-v/ShMp57iBnBp4lDgV8Jx3d3Q5/Hac25FWmQ98eMahUiHPXcvwIMKJD0hBIgclm/FCG+LwPkAKtkRO1O/W0YGg==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@swc/helpers": "^0.5.11", "@types/command-line-args": "^5.2.3", diff --git a/extensions/twitch/npm-shrinkwrap.json b/extensions/twitch/npm-shrinkwrap.json index 942b33ef499c..19dfcd82d38d 100644 --- a/extensions/twitch/npm-shrinkwrap.json +++ b/extensions/twitch/npm-shrinkwrap.json @@ -168,6 +168,7 @@ "resolved": "https://registry.npmjs.org/@twurple/auth/-/auth-8.1.4.tgz", "integrity": "sha512-ylsJoPInCw9BwOqxKcx+1k2ce9QG3vJpKFzPdIyHh49HvM/ulQZ0CAGysydugDYXF0iO/TGryh7PluSwx5fIwA==", "license": "MIT", + "peer": true, "dependencies": { "@d-fischer/logger": "^4.2.1", "@d-fischer/shared-utils": "^3.6.1", @@ -288,6 +289,7 @@ "resolved": "https://registry.npmjs.org/ws/-/ws-8.20.1.tgz", "integrity": "sha512-It4dO0K5v//JtTXuPkfEOaI3uUN87iYPnqo/ZzqCoG3g8uhA66QUMs/SrM0YK7/NAu+r4LMh/9dq2A7k+rHs+w==", "license": "MIT", + "peer": true, "engines": { "node": ">=10.0.0" }, diff --git a/extensions/whatsapp/npm-shrinkwrap.json b/extensions/whatsapp/npm-shrinkwrap.json index 9568a120d0cf..b009acf8e363 100644 --- a/extensions/whatsapp/npm-shrinkwrap.json +++ b/extensions/whatsapp/npm-shrinkwrap.json @@ -1178,6 +1178,7 @@ "resolved": "https://registry.npmjs.org/audio-decode/-/audio-decode-2.2.3.tgz", "integrity": "sha512-Z0lHvMayR/Pad9+O9ddzaBJE0DrhZkQlStrC1RwcAHF3AhQAsdwKHeLGK8fYKyp2DDU6xHxzGb4CLMui12yVrg==", "license": "MIT", + "peer": true, "dependencies": { "@wasm-audio-decoders/flac": "^0.2.4", "@wasm-audio-decoders/ogg-vorbis": "^0.1.15", @@ -1417,6 +1418,7 @@ "resolved": "https://registry.npmjs.org/jimp/-/jimp-1.6.1.tgz", "integrity": "sha512-hNQh6rZtWfSVWSNVmvq87N5BPJsNH7k7I7qyrXf9DOma9xATQk3fsyHazCQe51nCjdkoWdTmh0vD7bjVSLoxxw==", "license": "MIT", + "peer": true, "dependencies": { "@jimp/core": "1.6.1", "@jimp/diff": "1.6.1", @@ -1461,6 +1463,7 @@ "resolved": "https://registry.npmjs.org/keyv/-/keyv-5.6.0.tgz", "integrity": "sha512-CYDD3SOtsHtyXeEORYRx2qBtpDJFjRTGXUtmNEMGyzYOKj1TE3tycdlho7kA1Ufx9OYWZzg52QFBGALTirzDSw==", "license": "MIT", + "peer": true, "dependencies": { "@keyv/serialize": "^1.1.1" } diff --git a/npm-shrinkwrap.json b/npm-shrinkwrap.json index 9b240637fcb5..9c5d4bfced4c 100644 --- a/npm-shrinkwrap.json +++ b/npm-shrinkwrap.json @@ -1567,6 +1567,7 @@ "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.29.0.tgz", "integrity": "sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ==", "license": "MIT", + "peer": true, "dependencies": { "@hono/node-server": "^1.19.9", "ajv": "^8.17.1", @@ -2813,6 +2814,7 @@ "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz", "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==", "license": "MIT", + "peer": true, "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.1", @@ -3230,6 +3232,7 @@ "resolved": "https://registry.npmjs.org/grammy/-/grammy-1.43.0.tgz", "integrity": "sha512-7dYm06A945mXuIk/5HUlSjeyIYChW8vCEiU2dkOKKqJJzwAWxTkCc91Eqbz7TgODh2rtFFKWI/fekowWHOkmjQ==", "license": "MIT", + "peer": true, "dependencies": { "@grammyjs/types": "3.27.3", "abort-controller": "^3.0.0", @@ -3298,6 +3301,7 @@ "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.18.tgz", "integrity": "sha512-RWzP96k/yv0PQfyXnWjs6zot20TqfpfsNXhOnev8d1InAxubW93L11/oNUc3tQqn2G0bSdAOBpX+2uDFHV7kdQ==", "license": "MIT", + "peer": true, "engines": { "node": ">=16.9.0" } @@ -5064,6 +5068,7 @@ "resolved": "https://registry.npmjs.org/undici/-/undici-8.3.0.tgz", "integrity": "sha512-TkUDgb6tl7KOGZ+7e8E3d2FYgUQgF6z5YypqjWmixVQSQERFcVrVg0ySADm2LVLRh5ljAaHTCR5Fmz3Q34rB7Q==", "license": "MIT", + "peer": true, "engines": { "node": ">=22.19.0" } @@ -5287,6 +5292,7 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-4.4.3.tgz", "integrity": "sha512-ytENFjIJFl2UwYglde2jchW2Hwm4GJFLDiSXWdTrJQBIN9Fcyp7n4DhxJEiWNAJMV1/BqWfW/kkg71UDcHJyTQ==", "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/src/cli/gateway-cli/run.option-collisions.test.ts b/src/cli/gateway-cli/run.option-collisions.test.ts index b573dcacf501..8fe68930cfab 100644 --- a/src/cli/gateway-cli/run.option-collisions.test.ts +++ b/src/cli/gateway-cli/run.option-collisions.test.ts @@ -1,6 +1,7 @@ import path from "node:path"; import { Command } from "commander"; import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest"; +import { GATEWAY_SERVICE_RUNTIME_PID_ENV } from "../../daemon/constants.js"; import { SUPERVISOR_HINT_ENV_VARS } from "../../infra/supervisor-markers.js"; import { withEnvAsync } from "../../test-utils/env.js"; import { withTempSecretFiles } from "../../test-utils/secret-file-fixture.js"; @@ -17,6 +18,7 @@ const forceFreePortAndWait = vi.fn(async (_port: number, _opts: unknown) => ({ waitedMs: 0, escalatedToSigkill: false, })); +const cleanStaleGatewayProcessesSync = vi.fn((_port?: number) => []); const waitForPortBindable = vi.fn(async (_port: number, _opts?: unknown) => 0); const ensureDevGatewayConfig = vi.fn(async (_opts?: unknown) => {}); type GatewayLoopStart = (params?: { startupStartedAt?: number }) => Promise; @@ -122,6 +124,10 @@ vi.mock("../../gateway/net.js", async (importOriginal) => { }; }); +vi.mock("../../infra/restart-stale-pids.js", () => ({ + cleanStaleGatewayProcessesSync: (port?: number) => cleanStaleGatewayProcessesSync(port), +})); + vi.mock("../../gateway/server.js", () => ({ startGatewayServer: (port: number, opts?: unknown) => startGatewayServer(port, opts), })); @@ -226,6 +232,7 @@ describe("gateway run option collisions", () => { setVerbose.mockClear(); setConsoleSubsystemFilter.mockClear(); forceFreePortAndWait.mockClear(); + cleanStaleGatewayProcessesSync.mockClear(); waitForPortBindable.mockClear(); ensureDevGatewayConfig.mockClear(); runGatewayLoop.mockClear(); @@ -278,6 +285,20 @@ describe("gateway run option collisions", () => { expect(gatewayStartOptions().auth?.token).toBe("tok_run"); }); + it("marks service-mode gateway descendants with the live gateway pid", async () => { + await withEnvAsync( + { + OPENCLAW_SERVICE_MARKER: "openclaw", + [GATEWAY_SERVICE_RUNTIME_PID_ENV]: undefined, + }, + async () => { + await runGatewayCli(["gateway", "run", "--allow-unconfigured"]); + + expect(process.env[GATEWAY_SERVICE_RUNTIME_PID_ENV]).toBe(String(process.pid)); + }, + ); + }); + it("blocks --force port cleanup from an older binary with newer config", async () => { configState.snapshot = { exists: true, diff --git a/src/cli/gateway-cli/run.ts b/src/cli/gateway-cli/run.ts index 8f864e1c066b..0265ba22f297 100644 --- a/src/cli/gateway-cli/run.ts +++ b/src/cli/gateway-cli/run.ts @@ -12,6 +12,7 @@ import type { import { CONFIG_PATH, resolveGatewayPort, resolveStateDir } from "../../config/paths.js"; import type { OpenClawConfig } from "../../config/types.openclaw.js"; import { hasConfiguredSecretInput } from "../../config/types.secrets.js"; +import { GATEWAY_SERVICE_RUNTIME_PID_ENV } from "../../daemon/constants.js"; import { defaultGatewayBindMode, isContainerEnvironment, @@ -465,6 +466,9 @@ async function maybeWriteGatewayStartupFailureBundle(err: unknown): Promise { { OPENCLAW_SERVICE_MARKER: "openclaw", OPENCLAW_SERVICE_KIND: "gateway", + [GATEWAY_SERVICE_RUNTIME_PID_ENV]: "7777", }, async () => { await updateCommand({ yes: true }); @@ -938,6 +940,7 @@ describe("update-cli", () => { const spawnEnv = spawnCall()?.[2]?.env; expect(spawnEnv?.OPENCLAW_SERVICE_MARKER).toBeUndefined(); expect(spawnEnv?.OPENCLAW_SERVICE_KIND).toBeUndefined(); + expect(spawnEnv?.[GATEWAY_SERVICE_RUNTIME_PID_ENV]).toBeUndefined(); }); it("passes pre-update plugin install records into the post-core update process", async () => { @@ -2044,6 +2047,37 @@ describe("update-cli", () => { expect(packageInstallCommandCall()).toBeUndefined(); }); + it("refuses package updates from inherited gateway runtime pid when process ancestry is truncated", async () => { + mockPackageInstallStatus(createCaseDir("openclaw-update")); + serviceLoaded.mockResolvedValue(true); + serviceReadRuntime.mockResolvedValue({ + status: "running", + pid: 4242, + state: "running", + }); + mockGetSelfAndAncestorPidsSync.mockReturnValue(new Set([process.pid])); + + await withEnvAsync( + { + OPENCLAW_SERVICE_MARKER: "openclaw", + OPENCLAW_SERVICE_KIND: "gateway", + [GATEWAY_SERVICE_RUNTIME_PID_ENV]: "4242", + }, + async () => { + await updateCommand({ yes: true }); + }, + ); + + const errors = vi.mocked(defaultRuntime.error).mock.calls.map((call) => String(call[0])); + expect(errors.join("\n")).toContain( + "openclaw update detected it is running inside the gateway process tree.", + ); + expect(errors.join("\n")).toContain("Gateway PID 4242 is an ancestor"); + expect(defaultRuntime.exit).toHaveBeenCalledWith(1); + expect(serviceStop).not.toHaveBeenCalled(); + expect(packageInstallCommandCall()).toBeUndefined(); + }); + it("blocks package updates when the target requires a newer Node runtime", async () => { mockPackageInstallStatus(createCaseDir("openclaw-update")); vi.mocked(fetchNpmPackageTargetStatus).mockResolvedValue({ diff --git a/src/cli/update-cli/update-command.ts b/src/cli/update-cli/update-command.ts index e6880c22bcbc..f1636b192381 100644 --- a/src/cli/update-cli/update-command.ts +++ b/src/cli/update-cli/update-command.ts @@ -29,7 +29,11 @@ import { asResolvedSourceConfig, asRuntimeConfig } from "../../config/materializ import { CONFIG_PATH, resolveIncludeRoots } from "../../config/paths.js"; import type { OpenClawConfig } from "../../config/types.openclaw.js"; import type { PluginInstallRecord } from "../../config/types.plugins.js"; -import { GATEWAY_SERVICE_KIND, GATEWAY_SERVICE_MARKER } from "../../daemon/constants.js"; +import { + GATEWAY_SERVICE_KIND, + GATEWAY_SERVICE_MARKER, + GATEWAY_SERVICE_RUNTIME_PID_ENV, +} from "../../daemon/constants.js"; import { resolveGatewayInstallEntrypoint } from "../../daemon/gateway-entrypoint.js"; import { disableCurrentOpenClawUpdateLaunchdJob } from "../../daemon/launchd.js"; import { resolveGatewayRestartLogPath } from "../../daemon/restart-logs.js"; @@ -770,8 +774,40 @@ Gateway PID ${pid} is an ancestor of this process, so this updater cannot safely Run \`${replaceCliName(formatCliCommand("openclaw update"), CLI_NAME)}\` from a shell outside the gateway service, or stop the gateway service first and then update.`; } -function isGatewayAncestorPid(pid: unknown): pid is number { - return typeof pid === "number" && pid > 0 && getSelfAndAncestorPidsSync().has(pid); +function parsePositivePid(value: unknown): number | null { + if (typeof value === "number" && Number.isFinite(value) && value > 0) { + return Math.floor(value); + } + if (typeof value !== "string") { + return null; + } + const trimmed = value.trim(); + if (!/^\d+$/u.test(trimmed)) { + return null; + } + const parsed = Number.parseInt(trimmed, 10); + return Number.isFinite(parsed) && parsed > 0 ? parsed : null; +} + +function isInheritedGatewayRuntimePid( + pid: number, + env: Record = process.env, +): boolean { + if (!isRunningInsideGatewayService(env)) { + return false; + } + return parsePositivePid(env[GATEWAY_SERVICE_RUNTIME_PID_ENV]) === pid; +} + +function isGatewayAncestorPid( + pid: unknown, + env: Record = process.env, +): pid is number { + const parsed = parsePositivePid(pid); + if (parsed === null) { + return false; + } + return isInheritedGatewayRuntimePid(parsed, env) || getSelfAndAncestorPidsSync().has(parsed); } function gatewayAncestryBlockMessage(pid: unknown): string | undefined { @@ -1048,6 +1084,7 @@ function stripGatewayServiceMarkerEnv(env: NodeJS.ProcessEnv): NodeJS.ProcessEnv const resolvedEnv = { ...env }; delete resolvedEnv.OPENCLAW_SERVICE_MARKER; delete resolvedEnv.OPENCLAW_SERVICE_KIND; + delete resolvedEnv[GATEWAY_SERVICE_RUNTIME_PID_ENV]; return resolvedEnv; } diff --git a/src/daemon/constants.ts b/src/daemon/constants.ts index 43a66f6a2c2b..b954c18e8141 100644 --- a/src/daemon/constants.ts +++ b/src/daemon/constants.ts @@ -6,6 +6,7 @@ export const GATEWAY_SYSTEMD_SERVICE_NAME = "openclaw-gateway"; export const GATEWAY_WINDOWS_TASK_NAME = "OpenClaw Gateway"; export const GATEWAY_SERVICE_MARKER = "openclaw"; export const GATEWAY_SERVICE_KIND = "gateway"; +export const GATEWAY_SERVICE_RUNTIME_PID_ENV = "OPENCLAW_GATEWAY_SERVICE_PID"; const NODE_LAUNCH_AGENT_LABEL = "ai.openclaw.node"; const NODE_SYSTEMD_SERVICE_NAME = "openclaw-node"; const NODE_WINDOWS_TASK_NAME = "OpenClaw Node"; diff --git a/src/infra/restart-stale-pids.test.ts b/src/infra/restart-stale-pids.test.ts index 367dbbee0034..8640fc58ccb8 100644 --- a/src/infra/restart-stale-pids.test.ts +++ b/src/infra/restart-stale-pids.test.ts @@ -151,7 +151,10 @@ function installInitialBusyPoll( resolvePoll: (call: number) => MockLsofResult, ): () => number { let call = 0; - mockSpawnSync.mockImplementation(() => { + mockSpawnSync.mockImplementation((command: unknown) => { + if (command !== "lsof") { + return createLsofResult(); + } call += 1; if (call === 1) { return createOpenClawBusyResult(stalePid); @@ -301,7 +304,9 @@ describe.skipIf(isWindows)("restart-stale-pids", () => { }); expect(findGatewayPidsOnPortSync(18789)).toEqual([stalePid]); - const psCall = mockSpawnSync.mock.calls.find((call) => call[0] === "ps"); + const psCall = mockSpawnSync.mock.calls.find( + (call) => call[0] === "ps" && Array.isArray(call[1]) && (call[1] as unknown[])[0] === "-ww", + ); expect(psCall?.[1]).toEqual(["-ww", "-p", String(stalePid), "-o", "command="]); expect(psCall?.[2]).toEqual({ timeout: 2000, encoding: "utf8" }); }); @@ -442,6 +447,47 @@ describe.skipIf(isWindows)("restart-stale-pids", () => { }, ); + it("excludes the full ancestor chain on macOS via ps - nested in-band updater regression for #85120", () => { + const origDescriptor = Object.getOwnPropertyDescriptor(process, "platform"); + const toolHostPid = process.pid + 3101; + const gatewayGrandparentPid = process.pid + 3102; + const benignStalePid = process.pid + 3103; + Object.defineProperty(process, "platform", { value: "darwin", configurable: true }); + try { + mockSpawnSync.mockImplementation((command: unknown, args: unknown) => { + if (command === "ps" && Array.isArray(args) && args[0] === "-o") { + const targetPid = args[3]; + if (targetPid === String(toolHostPid)) { + return { error: null, status: 0, stdout: `${gatewayGrandparentPid}\n`, stderr: "" }; + } + if (targetPid === String(gatewayGrandparentPid)) { + return { error: null, status: 0, stdout: "1\n", stderr: "" }; + } + return { error: null, status: 0, stdout: "0\n", stderr: "" }; + } + return { + error: null, + status: 0, + stdout: lsofOutput([ + { pid: toolHostPid, cmd: "openclaw-gateway" }, + { pid: gatewayGrandparentPid, cmd: "openclaw-gateway" }, + { pid: benignStalePid, cmd: "openclaw-gateway" }, + ]), + stderr: "", + }; + }); + + const pids = withStubbedPpid(toolHostPid, () => findGatewayPidsOnPortSync(18789)); + expect(pids).not.toContain(toolHostPid); + expect(pids).not.toContain(gatewayGrandparentPid); + expect(pids).toContain(benignStalePid); + } finally { + if (origDescriptor) { + Object.defineProperty(process, "platform", origDescriptor); + } + } + }); + it("excludes pids whose command does not include 'openclaw'", () => { const otherPid = process.pid + 2; mockSpawnSync.mockReturnValue({ @@ -462,6 +508,36 @@ describe.skipIf(isWindows)("restart-stale-pids", () => { expect(mockCallRecordArg(mockSpawnSync, 0, 2, "lsof options").timeout).toBe(400); }); + it("uses the caller timeout for macOS ancestor ps probes", () => { + const origDescriptor = Object.getOwnPropertyDescriptor(process, "platform"); + const gatewayParentPid = process.pid + 3151; + Object.defineProperty(process, "platform", { value: "darwin", configurable: true }); + try { + mockSpawnSync.mockImplementation((command: unknown, args: unknown) => { + if (command === "ps" && Array.isArray(args) && args[0] === "-o") { + return { error: null, status: 0, stdout: "1\n", stderr: "" }; + } + return { + error: null, + status: 0, + stdout: lsofOutput([{ pid: process.pid + 3152, cmd: "openclaw-gateway" }]), + stderr: "", + }; + }); + + withStubbedPpid(gatewayParentPid, () => findGatewayPidsOnPortSync(18789, 400)); + const ancestorPsCall = mockSpawnSync.mock.calls.find( + (call) => + call[0] === "ps" && Array.isArray(call[1]) && (call[1] as unknown[])[0] === "-o", + ); + expect(ancestorPsCall?.[2]).toEqual({ timeout: 400, encoding: "utf8" }); + } finally { + if (origDescriptor) { + Object.defineProperty(process, "platform", origDescriptor); + } + } + }); + it("deduplicates pids from dual-stack listeners (IPv4+IPv6 emit same pid twice)", () => { // Dual-stack listeners cause lsof to emit the same PID twice in -Fpc output // (once for the IPv4 socket, once for IPv6). Without dedup, terminateStaleProcessesSync @@ -1213,9 +1289,50 @@ describe.skipIf(isWindows)("restart-stale-pids", () => { vi.spyOn(process, "kill").mockReturnValue(true); // No openclaw pids in status-1 output means the port is free for this cleanup. expect(cleanStaleGatewayProcessesSync()).toContain(stalePid); - // Completed with one argv verification after the status-1 poll output: - // initial lsof + poll lsof + ps argv check. - expect(getCallCount()).toBe(3); + // Completed with one initial lsof and one status-1 poll lsof. The + // separate `ps` argv verification is intentionally not counted here. + expect(getCallCount()).toBe(2); + }); + + it("uses the short poll timeout for macOS ancestor ps probes", () => { + const origDescriptor = Object.getOwnPropertyDescriptor(process, "platform"); + const stalePid = process.pid + 810; + const gatewayParentPid = process.pid + 811; + let lsofCall = 0; + Object.defineProperty(process, "platform", { value: "darwin", configurable: true }); + try { + mockSpawnSync.mockImplementation((command: unknown, args: unknown) => { + if (command === "ps" && Array.isArray(args) && args[0] === "-o") { + return { error: null, status: 0, stdout: "1\n", stderr: "" }; + } + if (command === "lsof") { + lsofCall += 1; + if (lsofCall === 1) { + return createOpenClawBusyResult(stalePid); + } + return createLsofResult({ + stdout: lsofOutput([{ pid: gatewayParentPid, cmd: "openclaw-gateway" }]), + }); + } + return createLsofResult(); + }); + + vi.spyOn(process, "kill").mockReturnValue(true); + const killed = withStubbedPpid(gatewayParentPid, () => cleanStaleGatewayProcessesSync()); + expect(killed).toContain(stalePid); + const ancestorPsTimeouts = mockSpawnSync.mock.calls + .filter( + (call) => + call[0] === "ps" && Array.isArray(call[1]) && (call[1] as unknown[])[0] === "-o", + ) + .map((call) => (call[2] as { timeout?: number } | undefined)?.timeout); + expect(ancestorPsTimeouts).toContain(2000); + expect(ancestorPsTimeouts).toContain(400); + } finally { + if (origDescriptor) { + Object.defineProperty(process, "platform", origDescriptor); + } + } }); }); diff --git a/src/infra/restart-stale-pids.ts b/src/infra/restart-stale-pids.ts index 27f0ed6783df..252a6d863cf7 100644 --- a/src/infra/restart-stale-pids.ts +++ b/src/infra/restart-stale-pids.ts @@ -104,6 +104,22 @@ function readParentPidFromProc(pid: number): number | null { } } +function readParentPidFromPs(pid: number, spawnTimeoutMs: number): number | null { + try { + const res = spawnSync("ps", ["-o", "ppid=", "-p", String(pid)], { + encoding: "utf8", + timeout: spawnTimeoutMs, + }); + if (res.error || res.status !== 0 || !res.stdout.trim()) { + return null; + } + const parsed = Number.parseInt(res.stdout.trim(), 10); + return Number.isFinite(parsed) && parsed > 0 ? parsed : null; + } catch { + return null; + } +} + /** * Collect the set of PIDs whose termination would cascade-kill the caller: * the current process, its direct parent, and — where the platform permits @@ -129,24 +145,28 @@ function readParentPidFromProc(pid: number): number | null { * install where the gateway spawns a direct-child sidecar. * * The walk is best-effort. `process.ppid` is provided by Node via a direct - * syscall and is always available; transitive ancestors are only read on - * Linux via `/proc`. macOS/Windows stop at ppid, which is sufficient for - * the direct-child sidecar topology this bug describes; extending those - * platforms can be done without touching the call sites. + * syscall and is always available; transitive ancestors are read on Linux via + * `/proc` and on macOS via `ps`. Windows stops at ppid. * - * The function takes no parameters and exposes no hooks. Tests exercise - * the real walk by stubbing `process.ppid` (and, on Linux, by mocking - * `node:fs` to inject `/proc//status` payloads) — there is no - * reachable override for runtime callers to mutate. + * The function exposes no runtime hooks. Tests exercise the real walk by + * stubbing `process.ppid` (and, on Linux, by mocking `node:fs` to inject + * `/proc//status` payloads) — there is no reachable override for + * runtime callers to mutate. */ -export function getSelfAndAncestorPidsSync(): Set { +export function getSelfAndAncestorPidsSync(spawnTimeoutMs = SPAWN_TIMEOUT_MS): Set { const pids = new Set([process.pid]); const immediateParent = getParentPid(); if (!Number.isFinite(immediateParent) || immediateParent <= 0) { return pids; } pids.add(immediateParent); - if (process.platform !== "linux") { + const readTransitiveParent = + process.platform === "linux" + ? readParentPidFromProc + : process.platform === "darwin" + ? (pid: number) => readParentPidFromPs(pid, spawnTimeoutMs) + : null; + if (!readTransitiveParent) { return pids; } // Transitive ancestor walk. Each hop's validity (positive pid, not already @@ -155,7 +175,7 @@ export function getSelfAndAncestorPidsSync(): Set { // parent` after the same check, so no separate top-of-loop guard is needed. let current = immediateParent; for (let depth = 0; depth < MAX_ANCESTOR_WALK_DEPTH; depth++) { - const parent = readParentPidFromProc(current); + const parent = readTransitiveParent(current); if (parent == null || parent <= 0 || pids.has(parent)) { break; } @@ -171,8 +191,9 @@ export function getSelfAndAncestorPidsSync(): Set { * rationale). On Linux the ancestor lookup reads up to * `MAX_ANCESTOR_WALK_DEPTH` entries from `/proc//status`; each read is * a virtual-filesystem access (no disk I/O, no external process), wrapped - * in try/catch and degrades silently. On macOS/Windows the lookup is - * in-memory via `process.ppid` only. + * in try/catch and degrades silently. On macOS the lookup shells out to `ps` + * with the caller's spawn timeout. Windows only uses the in-memory direct + * parent from `process.ppid`. */ function parseLsofEntries(stdout: string): Array<{ pid: number; cmd?: string }> { const entries: Array<{ pid: number; cmd?: string }> = []; @@ -239,7 +260,7 @@ function parsePidsFromLsofOutput(stdout: string, spawnTimeoutMs: number): number // same PID twice. Return each PID at most once to avoid double-killing. // Exclude self and ancestors — terminating any ancestor cascade-kills the // caller via the supervisor, recreating the #68451 restart loop. - const excluded = getSelfAndAncestorPidsSync(); + const excluded = getSelfAndAncestorPidsSync(spawnTimeoutMs); const pids: number[] = []; for (const entry of parseLsofEntries(stdout)) { if (excluded.has(entry.pid)) { diff --git a/src/infra/restart.test.ts b/src/infra/restart.test.ts index f7e240aef8c1..c4d34c076f5a 100644 --- a/src/infra/restart.test.ts +++ b/src/infra/restart.test.ts @@ -166,7 +166,8 @@ describe.runIf(process.platform !== "win32")("cleanStaleGatewayProcessesSync", ( expect(killed).toEqual([stalePid]); expect(resolveGatewayPortMock).not.toHaveBeenCalled(); - expect(spawnSyncMock).toHaveBeenCalledTimes(2); + const lsofCalls = spawnSyncMock.mock.calls.filter((call) => call[0] === "/usr/sbin/lsof"); + expect(lsofCalls).toHaveLength(2); const [command, args, options] = requireFirstSpawnSyncCall(); expect(command).toBe("/usr/sbin/lsof"); expect(args).toEqual(["-nP", "-iTCP:19999", "-sTCP:LISTEN", "-Fpc"]);