fix(gateway): stabilize webchat prompt cache affinity

Keep WebChat run/idempotency ids per message while threading a stable hashed promptCacheKey through chat.send into embedded runs. Fixes #89139.
This commit is contained in:
Vincent Koc
2026-06-03 00:33:02 -07:00
committed by GitHub
parent 85e5d486df
commit 175cfe4846
7 changed files with 70 additions and 0 deletions

View File

@@ -49,6 +49,8 @@ export type PartialReplyPayload = Pick<ReplyPayload, "text" | "mediaUrls"> & {
export type GetReplyOptions = {
/** Override run id for agent events (defaults to random UUID). */
runId?: string;
/** Stable provider prompt-cache affinity key; distinct from run id/idempotency. */
promptCacheKey?: string;
/** Abort signal for the underlying agent run. */
abortSignal?: AbortSignal;
/** Optional inbound images (used for webchat attachments). */

View File

@@ -2231,6 +2231,7 @@ export async function runAgentTurnWithFallback(params: {
hasRepliedRef: params.opts?.hasRepliedRef,
provider,
runId,
promptCacheKey: params.opts?.promptCacheKey,
allowTransientCooldownProbe: runOptions?.allowTransientCooldownProbe,
model,
});

View File

@@ -55,6 +55,7 @@ export function buildEmbeddedRunBaseParams(params: {
provider: string;
model: string;
runId: string;
promptCacheKey?: string;
authProfile: ReturnType<typeof resolveProviderScopedAuthProfile>;
allowTransientCooldownProbe?: boolean;
isReasoningTagProvider?: ReasoningTagProviderResolver;
@@ -99,6 +100,7 @@ export function buildEmbeddedRunBaseParams(params: {
bashElevated: params.run.bashElevated,
timeoutMs: params.run.timeoutMs,
runId: params.runId,
promptCacheKey: params.promptCacheKey,
allowTransientCooldownProbe: params.allowTransientCooldownProbe,
};
}

View File

@@ -25,6 +25,7 @@ const {
buildThreadingToolContext,
buildEmbeddedRunBaseParams,
buildEmbeddedRunContexts,
buildEmbeddedRunExecutionParams,
resolveModelFallbackOptions,
resolveEnforceFinalTag,
resolveProviderScopedAuthProfile,
@@ -138,6 +139,7 @@ describe("agent-runner-utils", () => {
provider: "openai",
model: "gpt-4.1-mini",
runId: "run-1",
promptCacheKey: "webchat-cache-key",
authProfile,
});
@@ -160,6 +162,24 @@ describe("agent-runner-utils", () => {
expect(resolved.bashElevated).toBe(run.bashElevated);
expect(resolved.timeoutMs).toBe(run.timeoutMs);
expect(resolved.runId).toBe("run-1");
expect(resolved.promptCacheKey).toBe("webchat-cache-key");
});
it("threads prompt cache affinity through embedded execution params", () => {
const run = makeRun();
const resolved = buildEmbeddedRunExecutionParams({
run,
sessionCtx: { Provider: "webchat" },
hasRepliedRef: undefined,
provider: "openai",
model: "gpt-4.1-mini",
runId: "run-1",
promptCacheKey: "stable-session-cache-key",
});
expect(resolved.runBaseParams.runId).toBe("run-1");
expect(resolved.runBaseParams.promptCacheKey).toBe("stable-session-cache-key");
});
it("passes through recovered auto fallback provenance for embedded run params", () => {

View File

@@ -273,6 +273,7 @@ export function buildEmbeddedRunExecutionParams(params: {
provider: string;
model: string;
runId: string;
promptCacheKey?: string;
allowTransientCooldownProbe?: boolean;
}) {
const { authProfile, embeddedContext, senderContext } = buildEmbeddedRunContexts(params);
@@ -281,6 +282,7 @@ export function buildEmbeddedRunExecutionParams(params: {
provider: params.provider,
model: params.model,
runId: params.runId,
promptCacheKey: params.promptCacheKey,
authProfile,
allowTransientCooldownProbe: params.allowTransientCooldownProbe,
});

View File

@@ -333,6 +333,28 @@ function buildMediaOnlyTtsSupplementTranscriptMarker(
return buildTtsSupplementTranscriptMarker(payload);
}
function resolveWebchatPromptCacheKey(params: {
agentId: string;
model: string;
provider: string;
sessionKey: string;
}): string {
const digest = createHash("sha256")
.update(
[
"v1",
params.provider.trim().toLowerCase(),
params.model.trim(),
normalizeAgentId(params.agentId),
params.sessionKey,
].join("\0"),
"utf8",
)
.digest("hex")
.slice(0, 32);
return `openclaw-webchat-${digest}`;
}
async function buildWebchatAssistantMediaMessage(
payloads: ReplyPayload[],
options?: {
@@ -3633,6 +3655,16 @@ export const chatHandlers: GatewayRequestHandlers = {
dispatcher,
replyOptions: {
runId: clientRunId,
...(isOperatorUiClient(clientInfo)
? {
promptCacheKey: resolveWebchatPromptCacheKey({
agentId,
provider: resolvedSessionModel.provider,
model: resolvedSessionModel.model,
sessionKey: activeRunScopeKey,
}),
}
: {}),
abortSignal: activeRunAbort.controller.signal,
images: replyOptionImages,
imageOrder: imageOrder.length > 0 ? imageOrder : undefined,

View File

@@ -1004,6 +1004,17 @@ describe("gateway server chat", () => {
},
]);
expect(dispatchInboundMessageMock).toHaveBeenCalledTimes(2);
const dispatchOptions = dispatchInboundMessageMock.mock.calls.map(([params]) => {
return (params as { replyOptions?: GetReplyOptions }).replyOptions;
});
expect(dispatchOptions[0]?.runId).toBe("idem-sequential-a");
expect(dispatchOptions[1]?.runId).toBe("idem-sequential-b");
expect(dispatchOptions[0]?.promptCacheKey).toEqual(
expect.stringMatching(/^openclaw-webchat-[a-f0-9]{32}$/u),
);
expect(dispatchOptions[1]?.promptCacheKey).toBe(dispatchOptions[0]?.promptCacheKey);
expect(dispatchOptions[0]?.promptCacheKey).not.toContain("main");
expect(dispatchOptions[0]?.promptCacheKey).not.toContain("sess-main");
expect(context.addChatRun).toHaveBeenCalledTimes(2);
} finally {
dispatchInboundMessageMock.mockReset();