diff --git a/docs/diagnostics/flags.md b/docs/diagnostics/flags.md index 6b6a54c6a4cb..c64ea9c3daef 100644 --- a/docs/diagnostics/flags.md +++ b/docs/diagnostics/flags.md @@ -50,6 +50,50 @@ Disable all flags: OPENCLAW_DIAGNOSTICS=0 ``` +`OPENCLAW_DIAGNOSTICS=0` is a process-level disable override: it disables +flags from both env and config for that process. + +## Profiling flags + +Profiler flags enable targeted timing spans without raising global logging +levels. They are disabled by default. + +Enable all profiler-gated spans for one gateway run: + +```bash +OPENCLAW_DIAGNOSTICS=profiler openclaw gateway run +``` + +Enable only reply-dispatch profiler spans: + +```bash +OPENCLAW_DIAGNOSTICS=reply.profiler openclaw gateway run +``` + +Enable only Codex app-server startup/tool/thread profiler spans: + +```bash +OPENCLAW_DIAGNOSTICS=codex.profiler openclaw gateway run +``` + +Enable profiler flags from config: + +```json +{ + "diagnostics": { + "flags": ["reply.profiler", "codex.profiler"] + } +} +``` + +Restart the gateway after changing config flags. To disable a profiler flag, +remove it from `diagnostics.flags` and restart. To temporarily disable every +diagnostics flag even when config enables profiler flags, start the process with: + +```bash +OPENCLAW_DIAGNOSTICS=0 openclaw gateway run +``` + ## Timeline artifacts The `timeline` flag writes structured startup and runtime timing events for diff --git a/extensions/codex/src/app-server/profiler-flag.test.ts b/extensions/codex/src/app-server/profiler-flag.test.ts new file mode 100644 index 000000000000..a0f40e74b1c7 --- /dev/null +++ b/extensions/codex/src/app-server/profiler-flag.test.ts @@ -0,0 +1,30 @@ +import { describe, expect, it } from "vitest"; +import { isCodexAppServerProfilerEnabled } from "./profiler-flag.js"; + +describe("isCodexAppServerProfilerEnabled", () => { + it("is disabled by default", () => { + expect(isCodexAppServerProfilerEnabled(undefined, {} as NodeJS.ProcessEnv)).toBe(false); + }); + + it("matches global and Codex profiler flags", () => { + expect( + isCodexAppServerProfilerEnabled( + { diagnostics: { flags: ["codex.profiler"] } }, + {} as NodeJS.ProcessEnv, + ), + ).toBe(true); + expect( + isCodexAppServerProfilerEnabled(undefined, { + OPENCLAW_DIAGNOSTICS: "profiler", + } as NodeJS.ProcessEnv), + ).toBe(true); + }); + + it("uses the documented diagnostics env disable override", () => { + expect( + isCodexAppServerProfilerEnabled({ diagnostics: { flags: ["codex.profiler"] } }, { + OPENCLAW_DIAGNOSTICS: "0", + } as NodeJS.ProcessEnv), + ).toBe(false); + }); +}); diff --git a/extensions/codex/src/app-server/profiler-flag.ts b/extensions/codex/src/app-server/profiler-flag.ts new file mode 100644 index 000000000000..62fd464d81f7 --- /dev/null +++ b/extensions/codex/src/app-server/profiler-flag.ts @@ -0,0 +1,11 @@ +import type { OpenClawConfig } from "openclaw/plugin-sdk/config-contracts"; +import { isDiagnosticFlagEnabled } from "openclaw/plugin-sdk/diagnostic-runtime"; + +const PROFILER_FLAGS = ["profiler", "codex.profiler"] as const; + +export function isCodexAppServerProfilerEnabled( + config?: OpenClawConfig, + env: NodeJS.ProcessEnv = process.env, +): boolean { + return PROFILER_FLAGS.some((flag) => isDiagnosticFlagEnabled(flag, config, env)); +} diff --git a/extensions/codex/src/app-server/run-attempt.test.ts b/extensions/codex/src/app-server/run-attempt.test.ts index 1d42ba244845..218324aca87d 100644 --- a/extensions/codex/src/app-server/run-attempt.test.ts +++ b/extensions/codex/src/app-server/run-attempt.test.ts @@ -887,6 +887,7 @@ describe("runCodexAppServerAttempt", () => { await closeCodexSandboxExecServersForTests(); resetCodexAppServerClientFactoryForTest(); testing.resetOpenClawCodingToolsFactoryForTests(); + testing.resetEnsuredCodexWorkspaceDirsForTests(); testing.clearPendingCodexNativeHookRelayUnregistersForTests(); resetCodexRateLimitCacheForTests(); nativeHookRelayTesting.clearNativeHookRelaysForTests(); @@ -903,6 +904,16 @@ describe("runCodexAppServerAttempt", () => { await fs.rm(tempDir, { recursive: true, force: true }); }); + it("recreates cached Codex workspace directories after cleanup removes them", async () => { + const workspaceDir = path.join(tempDir, "cached-workspace"); + + await testing.ensureCodexWorkspaceDirOnceForTests(workspaceDir); + await fs.rm(workspaceDir, { recursive: true, force: true }); + await testing.ensureCodexWorkspaceDirOnceForTests(workspaceDir); + + expect((await fs.stat(workspaceDir)).isDirectory()).toBe(true); + }); + it("filters Codex-native dynamic tools from app-server tool exposure", () => { const tools = [ "read", diff --git a/extensions/codex/src/app-server/run-attempt.ts b/extensions/codex/src/app-server/run-attempt.ts index 2266d1bc472e..867f8bb7ad20 100644 --- a/extensions/codex/src/app-server/run-attempt.ts +++ b/extensions/codex/src/app-server/run-attempt.ts @@ -134,6 +134,7 @@ import { mergeCodexThreadConfigs, shouldBuildCodexPluginThreadConfig, } from "./plugin-thread-config.js"; +import { isCodexAppServerProfilerEnabled } from "./profiler-flag.js"; import { assertCodexTurnStartResponse, readCodexDynamicToolCallParams, @@ -278,6 +279,7 @@ type CodexWorkspaceBootstrapContext = CodexBootstrapContext & { }; let openClawCodingToolsFactoryForTests: OpenClawCodingToolsFactory | undefined; +const ensuredCodexWorkspaceDirs = new Set(); type PendingCodexNativeHookRelayUnregister = { timeout: ReturnType; @@ -340,6 +342,30 @@ function clearPendingCodexNativeHookRelayUnregistersForTests(): void { pendingCodexNativeHookRelayUnregisters.clear(); } +async function ensureCodexWorkspaceDirOnce(workspaceDir: string): Promise { + const normalized = path.resolve(workspaceDir); + if (ensuredCodexWorkspaceDirs.has(normalized)) { + try { + const stat = await fs.stat(normalized); + if (stat.isDirectory()) { + return; + } + } catch (error) { + const code = + typeof error === "object" && error ? (error as { code?: unknown }).code : undefined; + if (code !== "ENOENT") { + throw error; + } + } + ensuredCodexWorkspaceDirs.delete(normalized); + } + // Codex attempts re-enter the same workspace repeatedly; caching successful + // mkdirs avoids repeated fs work while still recovering if cleanup prunes + // the directory between attempts. + await fs.mkdir(normalized, { recursive: true }); + ensuredCodexWorkspaceDirs.add(normalized); +} + function emitCodexAppServerEvent( params: EmbeddedRunAttemptParams, event: Parameters>[0], @@ -1013,6 +1039,7 @@ export async function runCodexAppServerAttempt( } = {}, ): Promise { const attemptStartedAt = Date.now(); + const profilerEnabled = isCodexAppServerProfilerEnabled(params.config); const codexModelCallTrace = freezeDiagnosticTraceContext( createDiagnosticTraceContextFromActiveScope(), ); @@ -1022,13 +1049,20 @@ export async function runCodexAppServerAttempt( let codexModelCallStarted = false; let codexModelCallTerminalEmitted = false; let codexModelCallRequestPayloadBytes: number | undefined; + // Startup phase timings are profiler-gated because this function runs before + // every Codex turn; normal production should not do timing bookkeeping here. + const preDynamicStartupStages = createCodexDynamicToolBuildStageTracker({ + enabled: profilerEnabled, + }); const attemptClientFactory = options.clientFactory ?? defaultCodexAppServerClientFactory; const pluginConfig = readCodexPluginConfig(options.pluginConfig); const computerUseConfig = resolveCodexComputerUseConfig({ pluginConfig }); const configuredAppServer = resolveCodexAppServerRuntimeOptions({ pluginConfig }); const beforeToolCallPolicy = getBeforeToolCallPolicyDiagnosticState(); + preDynamicStartupStages.mark("config"); const resolvedWorkspace = resolveUserPath(params.workspaceDir); - await fs.mkdir(resolvedWorkspace, { recursive: true }); + await ensureCodexWorkspaceDirOnce(resolvedWorkspace); + preDynamicStartupStages.mark("workspace"); const sandboxSessionKey = params.sandboxSessionKey?.trim() || params.sessionKey?.trim() || params.sessionId; const contextSessionKey = params.sessionKey?.trim() || sandboxSessionKey; @@ -1037,12 +1071,14 @@ export async function runCodexAppServerAttempt( sessionKey: sandboxSessionKey, workspaceDir: resolvedWorkspace, }); + preDynamicStartupStages.mark("sandbox"); const effectiveWorkspace = sandbox?.enabled ? sandbox.workspaceAccess === "rw" ? resolvedWorkspace : sandbox.workspaceDir : resolvedWorkspace; - await fs.mkdir(effectiveWorkspace, { recursive: true }); + await ensureCodexWorkspaceDirOnce(effectiveWorkspace); + preDynamicStartupStages.mark("effective-workspace"); const appServer = resolveCodexAppServerForOpenClawToolPolicy({ appServer: configuredAppServer, pluginConfig, @@ -1062,11 +1098,13 @@ export async function runCodexAppServerAttempt( trustedToolPolicies: beforeToolCallPolicy.trustedToolPolicies, }); } + preDynamicStartupStages.mark("app-server-policy"); let pluginAppServer: CodexAppServerRuntimeOptions = appServer; const nativeHookRelayEvents = resolveCodexNativeHookRelayEvents({ configuredEvents: options.nativeHookRelay?.events, appServer, }); + preDynamicStartupStages.mark("native-hook-relay"); const runAbortController = new AbortController(); const abortFromUpstream = () => { @@ -1084,7 +1122,9 @@ export async function runCodexAppServerAttempt( agentId: params.agentId, }); const agentDir = params.agentDir ?? resolveAgentDir(params.config ?? {}, sessionAgentId); + preDynamicStartupStages.mark("session-agent"); let startupBinding = await readCodexAppServerBinding(params.sessionFile); + preDynamicStartupStages.mark("read-binding"); const startupBindingAuthProfileId = startupBinding?.authProfileId; startupBinding = await rotateOversizedCodexAppServerStartupBinding({ binding: startupBinding, @@ -1094,6 +1134,7 @@ export async function runCodexAppServerAttempt( config: params.config, contextEngineActive: isActiveHarnessContextEngine(params.contextEngine), }); + preDynamicStartupStages.mark("rotate-binding"); const startupAuthProfileCandidate = params.runtimePlan?.auth.forwardedAuthProfileId ?? params.authProfileId ?? @@ -1110,6 +1151,7 @@ export async function runCodexAppServerAttempt( agentDir, config: params.config, }); + preDynamicStartupStages.mark("auth-profile"); const runtimeParams = { ...params, sessionKey: contextSessionKey, @@ -1133,11 +1175,13 @@ export async function runCodexAppServerAttempt( : resolveCodexAppServerFallbackApiKeyCacheKey({ startOptions: appServer.start, }); + preDynamicStartupStages.mark("auth-cache"); const nodeExecBlocksNativeExecution = isCodexNativeExecutionBlockedByNodeExecHost(params, { agentId: sessionAgentId, runtimeSessionKey: sandboxSessionKey, sandbox, }); + preDynamicStartupStages.mark("native-exec-policy"); const bundleMcpThreadConfig = await loadCodexBundleMcpThreadConfig({ workspaceDir: effectiveWorkspace, cfg: params.config, @@ -1145,12 +1189,14 @@ export async function runCodexAppServerAttempt( disableTools: params.disableTools, toolsAllow: nodeExecBlocksNativeExecution ? [] : params.toolsAllow, }); + preDynamicStartupStages.mark("bundle-mcp"); const sandboxExecServerEnabled = isCodexSandboxExecServerEnabled(pluginConfig); const nativeToolSurfaceEnabled = shouldEnableCodexAppServerNativeToolSurface(params, sandbox, { agentId: sessionAgentId, runtimeSessionKey: sandboxSessionKey, sandboxExecServerEnabled, }); + preDynamicStartupStages.mark("native-tool-surface"); for (const diagnostic of bundleMcpThreadConfig.diagnostics) { embeddedAgentLog.warn(`bundle-mcp: ${diagnostic.pluginId}: ${diagnostic.message}`); } @@ -1165,6 +1211,23 @@ export async function runCodexAppServerAttempt( }); } const hookChannelId = resolveCodexAppServerHookChannelId(params, sandboxSessionKey); + preDynamicStartupStages.mark("context-engine-support"); + const preDynamicSummary = preDynamicStartupStages.snapshot(); + if (shouldWarnCodexDynamicToolBuildStageSummary(preDynamicSummary)) { + embeddedAgentLog.warn( + `codex app-server pre-dynamic startup timings runId=${params.runId} sessionId=${params.sessionId} totalMs=${preDynamicSummary.totalMs} stages=${formatCodexDynamicToolBuildStageSummary(preDynamicSummary)}`, + { + runId: params.runId, + sessionId: params.sessionId, + totalMs: preDynamicSummary.totalMs, + stages: preDynamicSummary.stages, + hasStartupBinding: Boolean(startupBinding?.threadId), + startupAuthProfileId: startupAuthProfileId ?? null, + bundleMcpDiagnosticCount: bundleMcpThreadConfig.diagnostics.length, + nativeToolSurfaceEnabled, + }, + ); + } let yieldDetected = false; const tools = await buildDynamicTools({ params, @@ -1176,6 +1239,7 @@ export async function runCodexAppServerAttempt( runAbortController, sessionAgentId, pluginConfig, + profilerEnabled, onYieldDetected: () => { yieldDetected = true; }, @@ -1190,6 +1254,7 @@ export async function runCodexAppServerAttempt( runAbortController, sessionAgentId, pluginConfig, + profilerEnabled, forceHeartbeatTool: true, ignoreRuntimePlan: true, onYieldDetected: () => { @@ -3853,6 +3918,9 @@ function createCodexNativeHookRelay(params: { }), signal: params.signal, command: { + // Hook relay subprocesses are observational for most tool events; keep + // them lower priority so they do not compete with the active reply turn. + nice: 10, timeoutMs: params.options?.gatewayTimeoutMs, }, }); @@ -4022,6 +4090,7 @@ type DynamicToolBuildParams = { runAbortController: AbortController; sessionAgentId: string; pluginConfig: CodexPluginConfig; + profilerEnabled?: boolean; forceHeartbeatTool?: boolean; ignoreRuntimePlan?: boolean; onYieldDetected: () => void; @@ -4051,16 +4120,91 @@ function resolveCodexAppServerHookChannelId( }).channelId; } +type CodexDynamicToolBuildStageTiming = { + name: string; + durationMs: number; + elapsedMs: number; +}; + +type CodexDynamicToolBuildStageSummary = { + totalMs: number; + stages: CodexDynamicToolBuildStageTiming[]; +}; + +const CODEX_DYNAMIC_TOOL_BUILD_WARN_TOTAL_MS = 1_000; +const CODEX_DYNAMIC_TOOL_BUILD_WARN_STAGE_MS = 500; + +function createCodexDynamicToolBuildStageTracker(options: { enabled?: boolean } = {}): { + mark: (name: string) => void; + snapshot: () => CodexDynamicToolBuildStageSummary; +} { + if (!options.enabled) { + return { + mark() {}, + snapshot() { + return { totalMs: 0, stages: [] }; + }, + }; + } + + const startedAt = Date.now(); + let previousAt = startedAt; + const stages: CodexDynamicToolBuildStageTiming[] = []; + const toMs = (value: number) => Math.max(0, Math.round(value)); + return { + mark(name) { + const currentAt = Date.now(); + stages.push({ + name, + durationMs: toMs(currentAt - previousAt), + elapsedMs: toMs(currentAt - startedAt), + }); + previousAt = currentAt; + }, + snapshot() { + return { + totalMs: toMs(Date.now() - startedAt), + stages: stages.slice(), + }; + }, + }; +} + +function shouldWarnCodexDynamicToolBuildStageSummary( + summary: CodexDynamicToolBuildStageSummary, +): boolean { + return ( + summary.totalMs >= CODEX_DYNAMIC_TOOL_BUILD_WARN_TOTAL_MS || + summary.stages.some((stage) => stage.durationMs >= CODEX_DYNAMIC_TOOL_BUILD_WARN_STAGE_MS) + ); +} + +function formatCodexDynamicToolBuildStageSummary( + summary: CodexDynamicToolBuildStageSummary, +): string { + return summary.stages.length > 0 + ? summary.stages + .map((stage) => `${stage.name}:${stage.durationMs}ms@${stage.elapsedMs}ms`) + .join(",") + : "none"; +} + async function buildDynamicTools(input: DynamicToolBuildParams) { const { params } = input; if (params.disableTools || !supportsModelTools(params.model)) { return []; } + // Dynamic tool construction is on the reply hot path, so per-stage + // Date.now/span bookkeeping runs only when the Codex profiler flag is set. + const toolBuildStages = createCodexDynamicToolBuildStageTracker({ + enabled: input.profilerEnabled, + }); const modelHasVision = params.model.input?.includes("image") ?? false; const agentDir = params.agentDir ?? resolveAgentDir(params.config ?? {}, input.sessionAgentId); const createOpenClawCodingTools = openClawCodingToolsFactoryForTests ?? (await import("openclaw/plugin-sdk/agent-harness")).createOpenClawCodingTools; + toolBuildStages.mark("load-agent-harness-tools"); const sessionKeys = resolveOpenClawCodingToolsSessionKeys(params, input.sandboxSessionKey); const allTools = createOpenClawCodingTools({ agentId: input.sessionAgentId, @@ -4130,7 +4274,11 @@ async function buildDynamicTools(input: DynamicToolBuildParams) { data: { name: "sessions_yield", message }, }); }, + recordToolPrepStage: (name) => { + toolBuildStages.mark(name); + }, }); + toolBuildStages.mark("create-openclaw-coding-tools"); const codexFilteredTools = addNodeShellDynamicToolsIfNeeded( addSandboxShellDynamicToolsIfAvailable( isCodexMemoryFlushRun(params) @@ -4142,13 +4290,16 @@ async function buildDynamicTools(input: DynamicToolBuildParams) { allTools, input, ); + toolBuildStages.mark("codex-filtering"); const visionFilteredTools = filterToolsForVisionInputs(codexFilteredTools, { modelHasVision, hasInboundImages: (params.images?.length ?? 0) > 0, }); + toolBuildStages.mark("vision-filtering"); const toolsAllow = includeForcedCodexDynamicToolAllow(params.toolsAllow, params); const filteredTools = filterCodexDynamicToolsForAllowlist(visionFilteredTools, toolsAllow); - return normalizeAgentRuntimeTools({ + toolBuildStages.mark("allowlist-filter"); + const normalizedTools = normalizeAgentRuntimeTools({ runtimePlan: input.ignoreRuntimePlan ? undefined : params.runtimePlan, tools: filteredTools, provider: params.provider, @@ -4159,6 +4310,30 @@ async function buildDynamicTools(input: DynamicToolBuildParams) { modelApi: params.model.api, model: params.model, }); + toolBuildStages.mark("runtime-normalization"); + const summary = toolBuildStages.snapshot(); + if (shouldWarnCodexDynamicToolBuildStageSummary(summary)) { + const phase = input.forceHeartbeatTool ? "registered-tools" : "runtime-tools"; + embeddedAgentLog.warn( + `codex app-server dynamic tool build timings runId=${params.runId} sessionId=${params.sessionId} phase=${phase} totalMs=${summary.totalMs} stages=${formatCodexDynamicToolBuildStageSummary(summary)}`, + { + runId: params.runId, + sessionId: params.sessionId, + phase, + totalMs: summary.totalMs, + stages: summary.stages, + allToolCount: allTools.length, + codexFilteredToolCount: codexFilteredTools.length, + visionFilteredToolCount: visionFilteredTools.length, + filteredToolCount: filteredTools.length, + normalizedToolCount: normalizedTools.length, + forceHeartbeatTool: input.forceHeartbeatTool === true, + ignoreRuntimePlan: input.ignoreRuntimePlan === true, + nativeToolSurfaceEnabled: input.nativeToolSurfaceEnabled === true, + }, + ); + } + return normalizedTools; } function includeForcedCodexDynamicToolAllow( @@ -5994,6 +6169,12 @@ export const testing = { resetOpenClawCodingToolsFactoryForTests(): void { openClawCodingToolsFactoryForTests = undefined; }, + async ensureCodexWorkspaceDirOnceForTests(workspaceDir: string): Promise { + await ensureCodexWorkspaceDirOnce(workspaceDir); + }, + resetEnsuredCodexWorkspaceDirsForTests(): void { + ensuredCodexWorkspaceDirs.clear(); + }, flushPendingCodexNativeHookRelayUnregistersForTests, clearPendingCodexNativeHookRelayUnregistersForTests, resolveCodexNativeHookRelayUnregisterGraceMs, diff --git a/extensions/codex/src/app-server/thread-lifecycle.ts b/extensions/codex/src/app-server/thread-lifecycle.ts index 54fbc941d306..f893b5605541 100644 --- a/extensions/codex/src/app-server/thread-lifecycle.ts +++ b/extensions/codex/src/app-server/thread-lifecycle.ts @@ -19,6 +19,7 @@ import { mergeCodexThreadConfigs, type CodexPluginThreadConfig, } from "./plugin-thread-config.js"; +import { isCodexAppServerProfilerEnabled } from "./profiler-flag.js"; import { assertCodexThreadResumeResponse, assertCodexThreadStartResponse, @@ -84,6 +85,113 @@ const CODEX_LIGHTWEIGHT_CONTEXT_THREAD_CONFIG: JsonObject = { project_doc_max_bytes: 0, }; +type CodexThreadLifecycleTimingSpan = { + name: string; + durationMs: number; + elapsedMs: number; +}; + +type CodexThreadLifecycleTimingSummary = { + totalMs: number; + spans: CodexThreadLifecycleTimingSpan[]; +}; + +const CODEX_THREAD_LIFECYCLE_TIMING_WARN_TOTAL_MS = 1_000; +const CODEX_THREAD_LIFECYCLE_TIMING_WARN_STAGE_MS = 500; + +function createCodexThreadLifecycleTimingTracker(options: { enabled?: boolean } = {}): { + measure: (name: string, run: () => Promise | T) => Promise; + measureSync: (name: string, run: () => T) => T; + logIfSlow: (params: { + runId: string; + sessionId: string; + sessionKey?: string; + action: "started" | "resumed" | "rotated"; + threadId?: string; + }) => void; +} { + if (!options.enabled) { + return { + async measure(_name, run) { + return await run(); + }, + measureSync(_name, run) { + return run(); + }, + logIfSlow() {}, + }; + } + + const startedAt = Date.now(); + let didLog = false; + const spans: CodexThreadLifecycleTimingSpan[] = []; + const toMs = (value: number) => Math.max(0, Math.round(value)); + const record = (name: string, spanStartedAt: number) => { + spans.push({ + name, + durationMs: toMs(Date.now() - spanStartedAt), + elapsedMs: toMs(Date.now() - startedAt), + }); + }; + const snapshot = (): CodexThreadLifecycleTimingSummary => ({ + totalMs: toMs(Date.now() - startedAt), + spans: spans.slice(), + }); + const shouldLog = (summary: CodexThreadLifecycleTimingSummary) => + summary.totalMs >= CODEX_THREAD_LIFECYCLE_TIMING_WARN_TOTAL_MS || + summary.spans.some((span) => span.durationMs >= CODEX_THREAD_LIFECYCLE_TIMING_WARN_STAGE_MS); + const formatSpans = (summary: CodexThreadLifecycleTimingSummary) => + summary.spans.length > 0 + ? summary.spans + .map((span) => `${span.name}:${span.durationMs}ms@${span.elapsedMs}ms`) + .join(",") + : "none"; + return { + async measure(name, run) { + const spanStartedAt = Date.now(); + try { + return await run(); + } finally { + record(name, spanStartedAt); + } + }, + measureSync(name, run) { + const spanStartedAt = Date.now(); + try { + return run(); + } finally { + record(name, spanStartedAt); + } + }, + logIfSlow(params) { + if (didLog) { + return; + } + const summary = snapshot(); + if (!shouldLog(summary)) { + return; + } + didLog = true; + embeddedAgentLog.warn( + `codex app-server thread lifecycle timings runId=${params.runId} sessionId=${ + params.sessionId + } sessionKey=${params.sessionKey ?? "unknown"} action=${params.action} totalMs=${ + summary.totalMs + } stages=${formatSpans(summary)}`, + { + runId: params.runId, + sessionId: params.sessionId, + sessionKey: params.sessionKey, + action: params.action, + threadId: params.threadId, + totalMs: summary.totalMs, + spans: summary.spans, + }, + ); + }, + }; +} + export async function startOrResumeThread(params: { client: CodexAppServerClient; params: EmbeddedRunAttemptParams; @@ -103,10 +211,16 @@ export async function startOrResumeThread(params: { pluginThreadConfig?: CodexPluginThreadConfigProvider; contextEngineProjection?: CodexContextEngineThreadBootstrapProjection; }): Promise { - const dynamicToolsFingerprint = fingerprintDynamicTools(params.dynamicTools); - const contextEngineBinding = buildContextEngineBinding( - params.params, - params.contextEngineProjection, + // Thread lifecycle spans are useful when profiling startup churn, but normal + // turns should not pay Date.now/span-array overhead while resuming threads. + const lifecycleTiming = createCodexThreadLifecycleTimingTracker({ + enabled: isCodexAppServerProfilerEnabled(params.params.config), + }); + const dynamicToolsFingerprint = lifecycleTiming.measureSync("fingerprint_dynamic_tools", () => + fingerprintDynamicTools(params.dynamicTools), + ); + const contextEngineBinding = lifecycleTiming.measureSync("context_engine_binding", () => + buildContextEngineBinding(params.params, params.contextEngineProjection), ); const userMcpServersConfigPatch = params.userMcpServersEnabled === false @@ -118,11 +232,13 @@ export async function startOrResumeThread(params: { const environmentSelectionFingerprint = fingerprintEnvironmentSelection( params.environmentSelection, ); - let binding = await readCodexAppServerBinding(params.params.sessionFile, { - authProfileStore: params.params.authProfileStore, - agentDir: params.params.agentDir, - config: params.params.config, - }); + let binding = await lifecycleTiming.measure("read_binding", () => + readCodexAppServerBinding(params.params.sessionFile, { + authProfileStore: params.params.authProfileStore, + agentDir: params.params.agentDir, + config: params.params.config, + }), + ); let preserveExistingBinding = false; let rotatedContextEngineBinding = false; let prebuiltPluginThreadConfig: CodexPluginThreadConfig | undefined; @@ -207,7 +323,9 @@ export async function startOrResumeThread(params: { }) ) { try { - prebuiltPluginThreadConfig = await params.pluginThreadConfig?.build(); + prebuiltPluginThreadConfig = await lifecycleTiming.measure("plugin_config_recovery", () => + params.pluginThreadConfig?.build(), + ); pluginBindingStale = prebuiltPluginThreadConfig?.fingerprint !== binding.pluginAppsFingerprint; } catch (error) { @@ -274,19 +392,21 @@ export async function startOrResumeThread(params: { userMcpServersConfigPatch, params.finalConfigPatch, ); + const resumeParams = lifecycleTiming.measureSync("thread_resume_params", () => + buildThreadResumeParams(params.params, { + threadId: binding.threadId, + authProfileId, + appServer: params.appServer, + dynamicTools: params.dynamicTools, + developerInstructions: params.developerInstructions, + config: resumeConfig, + nativeCodeModeEnabled: params.nativeCodeModeEnabled, + nativeCodeModeOnlyEnabled: params.nativeCodeModeOnlyEnabled, + }), + ); const response = assertCodexThreadResumeResponse( - await params.client.request( - "thread/resume", - buildThreadResumeParams(params.params, { - threadId: binding.threadId, - authProfileId, - appServer: params.appServer, - dynamicTools: params.dynamicTools, - developerInstructions: params.developerInstructions, - config: resumeConfig, - nativeCodeModeEnabled: params.nativeCodeModeEnabled, - nativeCodeModeOnlyEnabled: params.nativeCodeModeOnlyEnabled, - }), + await lifecycleTiming.measure("thread_resume_request", () => + params.client.request("thread/resume", resumeParams), ), ); const boundAuthProfileId = authProfileId; @@ -301,29 +421,31 @@ export async function startOrResumeThread(params: { params.mcpServersFingerprintEvaluated === true ? params.mcpServersFingerprint : binding.mcpServersFingerprint; - await writeCodexAppServerBinding( - params.params.sessionFile, - { - threadId: response.thread.id, - cwd: params.cwd, - authProfileId: boundAuthProfileId, - model: params.params.modelId, - modelProvider: response.modelProvider ?? fallbackModelProvider, - dynamicToolsFingerprint, - userMcpServersFingerprint, - mcpServersFingerprint: nextMcpServersFingerprint, - pluginAppsFingerprint: binding.pluginAppsFingerprint, - pluginAppsInputFingerprint: binding.pluginAppsInputFingerprint, - pluginAppPolicyContext: binding.pluginAppPolicyContext, - contextEngine: contextEngineBinding, - environmentSelectionFingerprint, - createdAt: binding.createdAt, - }, - { - authProfileStore: params.params.authProfileStore, - agentDir: params.params.agentDir, - config: params.params.config, - }, + await lifecycleTiming.measure("thread_resume_write_binding", () => + writeCodexAppServerBinding( + params.params.sessionFile, + { + threadId: response.thread.id, + cwd: params.cwd, + authProfileId: boundAuthProfileId, + model: params.params.modelId, + modelProvider: response.modelProvider ?? fallbackModelProvider, + dynamicToolsFingerprint, + userMcpServersFingerprint, + mcpServersFingerprint: nextMcpServersFingerprint, + pluginAppsFingerprint: binding.pluginAppsFingerprint, + pluginAppsInputFingerprint: binding.pluginAppsInputFingerprint, + pluginAppPolicyContext: binding.pluginAppPolicyContext, + contextEngine: contextEngineBinding, + environmentSelectionFingerprint, + createdAt: binding.createdAt, + }, + { + authProfileStore: params.params.authProfileStore, + agentDir: params.params.agentDir, + config: params.params.config, + }, + ), ); if (contextEngineBinding) { embeddedAgentLog.info("codex app-server wrote context-engine thread binding", { @@ -336,6 +458,13 @@ export async function startOrResumeThread(params: { action: "resumed", }); } + lifecycleTiming.logIfSlow({ + runId: params.params.runId, + sessionId: params.params.sessionId, + sessionKey: params.params.sessionKey, + threadId: response.thread.id, + action: "resumed", + }); return { ...binding, threadId: response.thread.id, @@ -366,27 +495,34 @@ export async function startOrResumeThread(params: { } const pluginThreadConfig = params.pluginThreadConfig?.enabled - ? (prebuiltPluginThreadConfig ?? (await params.pluginThreadConfig.build())) + ? (prebuiltPluginThreadConfig ?? + (await lifecycleTiming.measure("plugin_config_build", () => + params.pluginThreadConfig?.build(), + ))) : undefined; - const config = mergeCodexThreadConfigs( - params.config, - userMcpServersConfigPatch, - pluginThreadConfig?.configPatch, - params.finalConfigPatch, + const config = lifecycleTiming.measureSync("merge_thread_config", () => + mergeCodexThreadConfigs( + params.config, + userMcpServersConfigPatch, + pluginThreadConfig?.configPatch, + params.finalConfigPatch, + ), + ); + const startParams = lifecycleTiming.measureSync("thread_start_params", () => + buildThreadStartParams(params.params, { + cwd: params.cwd, + dynamicTools: params.dynamicTools, + appServer: params.appServer, + developerInstructions: params.developerInstructions, + config, + nativeCodeModeEnabled: params.nativeCodeModeEnabled, + nativeCodeModeOnlyEnabled: params.nativeCodeModeOnlyEnabled, + environmentSelection: params.environmentSelection, + }), ); const response = assertCodexThreadStartResponse( - await params.client.request( - "thread/start", - buildThreadStartParams(params.params, { - cwd: params.cwd, - dynamicTools: params.dynamicTools, - appServer: params.appServer, - developerInstructions: params.developerInstructions, - config, - nativeCodeModeEnabled: params.nativeCodeModeEnabled, - nativeCodeModeOnlyEnabled: params.nativeCodeModeOnlyEnabled, - environmentSelection: params.environmentSelection, - }), + await lifecycleTiming.measure("thread_start_request", () => + params.client.request("thread/start", startParams), ), ); const modelProvider = resolveCodexAppServerModelProvider({ @@ -400,29 +536,31 @@ export async function startOrResumeThread(params: { const nextMcpServersFingerprint = params.mcpServersFingerprintEvaluated === true ? params.mcpServersFingerprint : undefined; if (!preserveExistingBinding) { - await writeCodexAppServerBinding( - params.params.sessionFile, - { - threadId: response.thread.id, - cwd: params.cwd, - authProfileId: params.params.authProfileId, - model: response.model ?? params.params.modelId, - modelProvider: response.modelProvider ?? modelProvider, - dynamicToolsFingerprint, - userMcpServersFingerprint, - mcpServersFingerprint: nextMcpServersFingerprint, - pluginAppsFingerprint: pluginThreadConfig?.fingerprint, - pluginAppsInputFingerprint: pluginThreadConfig?.inputFingerprint, - pluginAppPolicyContext: pluginThreadConfig?.policyContext, - contextEngine: contextEngineBinding, - environmentSelectionFingerprint, - createdAt, - }, - { - authProfileStore: params.params.authProfileStore, - agentDir: params.params.agentDir, - config: params.params.config, - }, + await lifecycleTiming.measure("thread_start_write_binding", () => + writeCodexAppServerBinding( + params.params.sessionFile, + { + threadId: response.thread.id, + cwd: params.cwd, + authProfileId: params.params.authProfileId, + model: response.model ?? params.params.modelId, + modelProvider: response.modelProvider ?? modelProvider, + dynamicToolsFingerprint, + userMcpServersFingerprint, + mcpServersFingerprint: nextMcpServersFingerprint, + pluginAppsFingerprint: pluginThreadConfig?.fingerprint, + pluginAppsInputFingerprint: pluginThreadConfig?.inputFingerprint, + pluginAppPolicyContext: pluginThreadConfig?.policyContext, + contextEngine: contextEngineBinding, + environmentSelectionFingerprint, + createdAt, + }, + { + authProfileStore: params.params.authProfileStore, + agentDir: params.params.agentDir, + config: params.params.config, + }, + ), ); if (contextEngineBinding) { embeddedAgentLog.info("codex app-server wrote context-engine thread binding", { @@ -436,6 +574,13 @@ export async function startOrResumeThread(params: { }); } } + lifecycleTiming.logIfSlow({ + runId: params.params.runId, + sessionId: params.params.sessionId, + sessionKey: params.params.sessionKey, + threadId: response.thread.id, + action: rotatedContextEngineBinding ? "rotated" : "started", + }); return { schemaVersion: 1, threadId: response.thread.id, diff --git a/src/agents/harness/native-hook-relay.test.ts b/src/agents/harness/native-hook-relay.test.ts index f04c8c91eb29..40ae74463a84 100644 --- a/src/agents/harness/native-hook-relay.test.ts +++ b/src/agents/harness/native-hook-relay.test.ts @@ -115,7 +115,28 @@ describe("native hook relay registry", () => { ); }); - it("preserves safety relays while marking hook-only events without handlers inactive", () => { + it("preserves permission relays while marking hook-only events without handlers inactive", () => { + const relay = registerNativeHookRelay({ + provider: "codex", + sessionId: "session-1", + runId: "run-1", + command: { + executable: "/opt/Open Claw/openclaw.mjs", + nodeExecutable: "/usr/local/bin/node", + timeoutMs: 1234, + }, + }); + + expect(relay.shouldRelayEvent("pre_tool_use")).toBe(false); + expect(relay.shouldRelayEvent("post_tool_use")).toBe(false); + expect(relay.shouldRelayEvent("before_agent_finalize")).toBe(false); + expect(relay.shouldRelayEvent("permission_request")).toBe(true); + }); + + it("builds pre-tool relay commands only when before-tool policy is active", () => { + initializeGlobalHookRunner( + createMockPluginRegistry([{ hookName: "before_tool_call", handler: vi.fn() }]), + ); const relay = registerNativeHookRelay({ provider: "codex", sessionId: "session-1", @@ -128,9 +149,42 @@ describe("native hook relay registry", () => { }); expect(relay.shouldRelayEvent("pre_tool_use")).toBe(true); - expect(relay.shouldRelayEvent("post_tool_use")).toBe(false); - expect(relay.shouldRelayEvent("before_agent_finalize")).toBe(false); - expect(relay.shouldRelayEvent("permission_request")).toBe(true); + expect(relay.commandForEvent("pre_tool_use")).toBe( + "/usr/local/bin/node '/opt/Open Claw/openclaw.mjs' hooks relay --provider codex --relay-id " + + `${relay.relayId} --event pre_tool_use --timeout 1234`, + ); + }); + + it("keeps pre-tool relays active when native loop detection is not disabled", () => { + const relay = registerNativeHookRelay({ + provider: "codex", + sessionId: "session-1", + sessionKey: "agent:main:session-1", + runId: "run-1", + command: { + executable: "/opt/Open Claw/openclaw.mjs", + nodeExecutable: "/usr/local/bin/node", + timeoutMs: 1234, + }, + }); + + expect(relay.shouldRelayEvent("pre_tool_use")).toBe(true); + expect(relay.commandForEvent("pre_tool_use")).toBe( + "/usr/local/bin/node '/opt/Open Claw/openclaw.mjs' hooks relay --provider codex --relay-id " + + `${relay.relayId} --event pre_tool_use --timeout 1234`, + ); + }); + + it("omits pre-tool relays when native loop detection is explicitly disabled", () => { + const relay = registerNativeHookRelay({ + provider: "codex", + sessionId: "session-1", + sessionKey: "agent:main:session-1", + runId: "run-1", + config: { tools: { loopDetection: { enabled: false } } } as never, + }); + + expect(relay.shouldRelayEvent("pre_tool_use")).toBe(false); }); it("builds relay commands only for native events with matching local hooks", () => { @@ -148,7 +202,7 @@ describe("native hook relay registry", () => { }, }); - expect(relay.shouldRelayEvent("pre_tool_use")).toBe(true); + expect(relay.shouldRelayEvent("pre_tool_use")).toBe(false); expect(relay.shouldRelayEvent("post_tool_use")).toBe(true); expect(relay.shouldRelayEvent("before_agent_finalize")).toBe(false); expect(relay.commandForEvent("post_tool_use")).toBe( @@ -2284,4 +2338,19 @@ describe("native hook relay command builder", () => { "openclaw hooks relay --provider codex --relay-id relay-1 --event permission_request --timeout 5000", ); }); + + it("can lower native hook relay process priority", () => { + const prefix = process.platform === "win32" ? "" : "nice -n 10 "; + expect( + buildNativeHookRelayCommand({ + provider: "codex", + relayId: "relay-1", + event: "pre_tool_use", + executable: "openclaw", + nice: 10, + }), + ).toBe( + `${prefix}openclaw hooks relay --provider codex --relay-id relay-1 --event pre_tool_use --timeout 5000`, + ); + }); }); diff --git a/src/agents/harness/native-hook-relay.ts b/src/agents/harness/native-hook-relay.ts index 1f3a77c8c188..fcb97856a840 100644 --- a/src/agents/harness/native-hook-relay.ts +++ b/src/agents/harness/native-hook-relay.ts @@ -17,8 +17,9 @@ import { hasGlobalHooks } from "../../plugins/hook-runner-global.js"; import { PluginApprovalResolutions } from "../../plugins/types.js"; import { uniqueValues } from "../../shared/string-normalization.js"; import { asBoolean } from "../../utils/boolean.js"; -import { runBeforeToolCallHook } from "../pi-tools.before-tool-call.js"; +import { hasBeforeToolCallPolicy, runBeforeToolCallHook } from "../pi-tools.before-tool-call.js"; import { stableStringify } from "../stable-stringify.js"; +import { resolveToolLoopDetectionConfig } from "../tool-loop-detection-config.js"; import { normalizeToolName } from "../tool-policy.js"; import { callGatewayTool } from "../tools/gateway.js"; import { runAgentHarnessAfterToolCallHook } from "./hook-helpers.js"; @@ -110,6 +111,7 @@ export type RegisterNativeHookRelayParams = { export type NativeHookRelayCommandOptions = { executable?: string; + nice?: number | false; nodeExecutable?: string; timeoutMs?: number; }; @@ -324,12 +326,13 @@ export function registerNativeHookRelay( registerNativeHookRelayBridge(registration); const handle: NativeHookRelayRegistrationHandle = { ...registration, - shouldRelayEvent: nativeHookRelayEventHasLocalWork, + shouldRelayEvent: (event) => nativeHookRelayEventHasLocalWork(registration, event), commandForEvent: (event) => buildNativeHookRelayCommand({ provider: params.provider, relayId, event, + nice: params.command?.nice, timeoutMs: params.command?.timeoutMs, executable: params.command?.executable, nodeExecutable: params.command?.nodeExecutable, @@ -376,12 +379,24 @@ function normalizeRelayId(value: string | undefined): string | undefined { return trimmed; } +function resolveNativeHookRelayNicePrefix(value: number | false | undefined): string[] { + if (process.platform === "win32" || value === false || value === undefined) { + return []; + } + const nice = normalizePositiveInteger(value, 0); + if (nice <= 0) { + return []; + } + return ["nice", "-n", String(nice)]; +} + export function buildNativeHookRelayCommand(params: { provider: NativeHookRelayProvider; relayId: string; event: NativeHookRelayEvent; timeoutMs?: number; executable?: string; + nice?: number | false; nodeExecutable?: string; }): string { const timeoutMs = normalizePositiveInteger(params.timeoutMs, DEFAULT_RELAY_TIMEOUT_MS); @@ -390,7 +405,9 @@ export function buildNativeHookRelayCommand(params: { executable === "openclaw" ? ["openclaw"] : [params.nodeExecutable ?? process.execPath, executable]; + const nicePrefix = resolveNativeHookRelayNicePrefix(params.nice); return shellQuoteArgs([ + ...nicePrefix, ...argv, "hooks", "relay", @@ -405,9 +422,25 @@ export function buildNativeHookRelayCommand(params: { ]); } -function nativeHookRelayEventHasLocalWork(event: NativeHookRelayEvent): boolean { +function nativePreToolUseMayRunLoopDetection(registration: NativeHookRelayRegistration): boolean { + if (!registration.sessionKey) { + return false; + } + const loopDetection = resolveToolLoopDetectionConfig({ + cfg: registration.config, + agentId: registration.agentId, + }); + return loopDetection?.enabled !== false; +} + +function nativeHookRelayEventHasLocalWork( + registration: NativeHookRelayRegistration, + event: NativeHookRelayEvent, +): boolean { if (event === "pre_tool_use") { - return true; + // Avoid spawning a native hook relay for every Codex tool call when there + // is no before_tool_call hook, trusted-tool policy, or loop detector work. + return hasBeforeToolCallPolicy() || nativePreToolUseMayRunLoopDetection(registration); } if (event === "post_tool_use") { return hasGlobalHooks("after_tool_call");