From d8198c8c0e8f2d9cc26fc8edaa7b4943962e8b60 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 17 May 2026 11:30:45 +0100 Subject: [PATCH] fix: use Codex runtime context budget for compaction --- CHANGELOG.md | 1 + .../reply/agent-runner-memory.test.ts | 93 +++++++++++++++++++ src/auto-reply/reply/agent-runner-memory.ts | 62 ++++++++++++- 3 files changed, 154 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6daa0fae2202..63ac08dd41c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ Docs: https://docs.openclaw.ai - Gateway/secrets: split the lightweight secrets runtime state and auth-store cache from the full secrets runtime and take a startup fast path when the gateway startup config has no SecretRef values, speeding up secrets startup while preserving cleanup and refresh semantics. - Codex app-server: rotate oversized native Codex threads before resume and cap dynamic tool-result text entering native Codex sessions, preventing stale oversized context from surviving OpenClaw compaction. (#82981) Thanks @hansolo949. - Gateway/restart: drain pending replies and active chat runs during restart shutdown before sockets and channels close, aborting timed-out chat runs through the normal cleanup path. (#69121) Thanks @alexlomt. +- Agents/Codex: use the Codex runtime context window for OpenAI-model preflight compaction and memory flush checks, so GPT-5.5 Codex sessions compact before hitting the smaller native context limit. Fixes #82982. Thanks @vliuyt. - QA-Lab: wake qa-bus long polls that arrive with stale future cursors after a bus restart, preserving reconnect readiness for harness clients. (#67142) Thanks @hxy91819. - QA-Lab: stage Multipass transfer scripts under OpenClaw's preferred temp root instead of raw OS temp paths, keeping the VM runner inside temp-path guardrails. (#64098) Thanks @ImLukeF. - Agents/replies: keep surviving reply media and append a warning when other media references fail, so partial media normalization no longer drops failures silently. Thanks @Jerry-Xin. diff --git a/src/auto-reply/reply/agent-runner-memory.test.ts b/src/auto-reply/reply/agent-runner-memory.test.ts index 7791803805d7..327d813a62b8 100644 --- a/src/auto-reply/reply/agent-runner-memory.test.ts +++ b/src/auto-reply/reply/agent-runner-memory.test.ts @@ -734,6 +734,99 @@ describe("runMemoryFlushIfNeeded", () => { expect(compactCall.currentTokenCount).toBeGreaterThanOrEqual(100_000); }); + it("uses the persisted Codex runtime context window for OpenAI preflight compaction", async () => { + registerMemoryFlushPlanResolverForTest(() => ({ + softThresholdTokens: 4_000, + forceFlushTranscriptBytes: 1_000_000_000, + reserveTokensFloor: 0, + prompt: "Pre-compaction memory flush.\nNO_REPLY", + systemPrompt: "Write memory to memory/YYYY-MM-DD.md.", + relativePath: "memory/2023-11-14.md", + })); + const sessionEntry: SessionEntry = { + sessionId: "session", + updatedAt: Date.now(), + totalTokens: 347_000, + totalTokensFresh: false, + agentHarnessId: "codex", + }; + + await runPreflightCompactionIfNeeded({ + cfg: { + models: { + providers: { + openai: { models: [{ id: "gpt-5.5", contextWindow: 1_000_000 }] }, + "openai-codex": { models: [{ id: "gpt-5.5", contextWindow: 350_000 }] }, + }, + }, + agents: { defaults: { compaction: { memoryFlush: {} } } }, + } as never, + followupRun: createTestFollowupRun({ + provider: "openai", + model: "gpt-5.5", + sessionId: "session", + sessionKey: "main", + }), + defaultModel: "gpt-5.5", + sessionEntry, + sessionStore: { main: sessionEntry }, + sessionKey: "main", + storePath: path.join(rootDir, "sessions.json"), + isHeartbeat: false, + replyOperation: createReplyOperation(), + }); + + expect(compactEmbeddedPiSessionMock).toHaveBeenCalledTimes(1); + const compactCall = requireCompactEmbeddedPiSessionCall(); + expect(compactCall.currentTokenCount).toBe(347_000); + }); + + it("keeps the OpenAI API context window for persisted PI runtime overrides", async () => { + registerMemoryFlushPlanResolverForTest(() => ({ + softThresholdTokens: 4_000, + forceFlushTranscriptBytes: 1_000_000_000, + reserveTokensFloor: 0, + prompt: "Pre-compaction memory flush.\nNO_REPLY", + systemPrompt: "Write memory to memory/YYYY-MM-DD.md.", + relativePath: "memory/2023-11-14.md", + })); + const sessionEntry: SessionEntry = { + sessionId: "session", + updatedAt: Date.now(), + totalTokens: 347_000, + totalTokensFresh: false, + agentRuntimeOverride: "pi", + }; + + const entry = await runPreflightCompactionIfNeeded({ + cfg: { + models: { + providers: { + openai: { models: [{ id: "gpt-5.5", contextWindow: 1_000_000 }] }, + "openai-codex": { models: [{ id: "gpt-5.5", contextWindow: 350_000 }] }, + }, + }, + agents: { defaults: { compaction: { memoryFlush: {} } } }, + } as never, + followupRun: createTestFollowupRun({ + provider: "openai", + model: "gpt-5.5", + sessionId: "session", + sessionKey: "main", + }), + defaultModel: "gpt-5.5", + sessionEntry, + sessionStore: { main: sessionEntry }, + sessionKey: "main", + storePath: path.join(rootDir, "sessions.json"), + isHeartbeat: false, + replyOperation: createReplyOperation(), + }); + + expect(entry).toBe(sessionEntry); + expect(compactEmbeddedPiSessionMock).not.toHaveBeenCalled(); + }); + it("uses the active run sessionFile when the session entry has no transcript path", async () => { const sessionFile = path.join(rootDir, "active-run-session.jsonl"); await fs.writeFile( diff --git a/src/auto-reply/reply/agent-runner-memory.ts b/src/auto-reply/reply/agent-runner-memory.ts index 63d009ba10ce..7ab139c4ae5d 100644 --- a/src/auto-reply/reply/agent-runner-memory.ts +++ b/src/auto-reply/reply/agent-runner-memory.ts @@ -3,8 +3,10 @@ import fs from "node:fs"; import type { AgentMessage } from "@earendil-works/pi-agent-core"; import { resolveBootstrapWarningSignaturesSeen } from "../../agents/bootstrap-budget.js"; import { estimateMessagesTokens } from "../../agents/compaction.js"; +import { resolveAgentHarnessPolicy } from "../../agents/harness/policy.js"; import { runWithModelFallback } from "../../agents/model-fallback.js"; import { isCliProvider } from "../../agents/model-selection.js"; +import { resolveContextConfigProviderForRuntime } from "../../agents/openai-codex-routing.js"; import { resolveSandboxConfigForAgent, resolveSandboxRuntimeStatus } from "../../agents/sandbox.js"; import { derivePromptTokens, @@ -163,6 +165,50 @@ function resolveMemoryFlushModelFallbackOptions( }; } +function resolveFollowupContextConfigProvider(params: { + cfg: OpenClawConfig; + followupRun: FollowupRun; + sessionEntry?: SessionEntry; + sessionKey?: string; + runtimePolicySessionKey?: string; +}): string { + const provider = params.followupRun.run.provider; + const matchingSessionEntry = + params.sessionEntry?.sessionId === params.followupRun.run.sessionId + ? params.sessionEntry + : undefined; + const persistedRuntimeOverride = normalizeOptionalString( + matchingSessionEntry?.agentRuntimeOverride, + ); + const persistedRuntimeId = + persistedRuntimeOverride && + persistedRuntimeOverride !== "auto" && + persistedRuntimeOverride !== "default" + ? persistedRuntimeOverride + : matchingSessionEntry?.agentHarnessId; + if (persistedRuntimeId) { + return resolveContextConfigProviderForRuntime({ + provider, + runtimeId: persistedRuntimeId, + }); + } + const harnessPolicy = resolveAgentHarnessPolicy({ + provider, + modelId: params.followupRun.run.model, + config: params.cfg, + agentId: params.followupRun.run.agentId, + sessionKey: + params.runtimePolicySessionKey ?? + params.sessionKey ?? + params.followupRun.run.runtimePolicySessionKey ?? + params.followupRun.run.sessionKey, + }); + return resolveContextConfigProviderForRuntime({ + provider, + runtimeId: harnessPolicy.runtime, + }); +} + function resolveVisibleMemoryFlushErrorPayloads(payloads?: ReplyPayload[]): ReplyPayload[] { return (payloads ?? []).filter( (payload) => payload.isError === true && isRenderablePayload(payload), @@ -514,7 +560,13 @@ export async function runPreflightCompactionIfNeeded(params: { const contextWindowTokens = resolveMemoryFlushContextWindowTokens({ cfg: params.cfg, - provider: params.followupRun.run.provider, + provider: resolveFollowupContextConfigProvider({ + cfg: params.cfg, + followupRun: params.followupRun, + sessionEntry: entry, + sessionKey: params.sessionKey, + runtimePolicySessionKey: params.runtimePolicySessionKey, + }), modelId: params.followupRun.run.model ?? params.defaultModel, agentCfgContextTokens: params.agentCfgContextTokens, }); @@ -749,7 +801,13 @@ export async function runMemoryFlushIfNeeded(params: { (params.sessionKey ? params.sessionStore?.[params.sessionKey] : undefined); const contextWindowTokens = resolveMemoryFlushContextWindowTokens({ cfg: params.cfg, - provider: params.followupRun.run.provider, + provider: resolveFollowupContextConfigProvider({ + cfg: params.cfg, + followupRun: params.followupRun, + sessionEntry: entry, + sessionKey: params.sessionKey, + runtimePolicySessionKey: params.runtimePolicySessionKey, + }), modelId: params.followupRun.run.model ?? params.defaultModel, agentCfgContextTokens: params.agentCfgContextTokens, });