fix: share signed thinking replay policy

2026-06-06 05:51:15 +08:00 · 2026-05-24 05:56:28 +01:00
parent 906476af0c
commit d6c9387c0f
6 changed files with 78 additions and 13 deletions
--- a/src/agents/pi-embedded-runner/replay-history.ts
+++ b/src/agents/pi-embedded-runner/replay-history.ts
@@ -35,6 +35,7 @@ import { STREAM_ERROR_FALLBACK_TEXT } from "../stream-message-shared.js";
 import { sanitizeToolCallIdsForCloudCodeAssist } from "../tool-call-id.js";
 import type { TranscriptPolicy } from "../transcript-policy.js";
 import {
+  providerRequiresSignedThinking,
  resolveTranscriptPolicy,
  shouldAllowProviderOwnedThinkingReplay,
 } from "../transcript-policy.js";
@@ -658,12 +659,6 @@ function isSameModelSnapshot(a: ModelSnapshotEntry, b: ModelSnapshotEntry): bool
  );
 }

-const SIGNED_THINKING_PROVIDERS = new Set(["anthropic", "amazon-bedrock", "anthropic-vertex"]);
-
-function providerRequiresSignedThinking(provider?: string | null): boolean {
-  return SIGNED_THINKING_PROVIDERS.has(provider ?? "");
-}
-
 /**
 * Applies the generic replay-history cleanup pipeline before provider-owned
 * replay hooks run.
@@ -697,12 +692,11 @@ export async function sanitizeSessionHistory(params: {
    });
  const withInterSessionMarkers = annotateInterSessionUserMessages(params.messages);
  const signedThinkingProvider = providerRequiresSignedThinking(params.provider);
-  const allowProviderOwnedThinkingReplay =
-    shouldAllowProviderOwnedThinkingReplay({
-      modelApi: params.modelApi,
-      policy,
-    }) ||
-    (signedThinkingProvider && !policy.dropThinkingBlocks);
+  const allowProviderOwnedThinkingReplay = shouldAllowProviderOwnedThinkingReplay({
+    modelApi: params.modelApi,
+    provider: params.provider,
+    policy,
+  });
  const isOpenAIResponsesApi =
    params.modelApi === "openai-responses" ||
    params.modelApi === "openai-codex-responses" ||
--- a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.test.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.test.ts
@@ -130,6 +130,46 @@ describe("sanitizeReplayToolCallIdsForStream", () => {
    });
  });

+  it("preserves signed-thinking replay ids when requested by provider policy", () => {
+    const rawId = "call_1";
+    const out = sanitizeReplayToolCallIdsForStream({
+      messages: [
+        {
+          role: "assistant",
+          content: [
+            { type: "thinking", thinking: "internal", thinkingSignature: "sig_1" },
+            { type: "toolUse", id: rawId, name: "read", input: { path: "." } },
+          ],
+        } as never,
+        {
+          role: "toolResult",
+          toolCallId: rawId,
+          toolUseId: rawId,
+          toolName: "read",
+          content: [{ type: "text", text: "ok" }],
+          isError: false,
+        } as never,
+      ],
+      mode: "strict",
+      preserveReplaySafeThinkingToolCallIds: true,
+      repairToolUseResultPairing: true,
+    });
+
+    expect(out.map((message) => message.role)).toEqual(["assistant", "toolResult"]);
+    expect(requireAssistantMessage(out[0]).content[1]).toMatchObject({
+      type: "toolUse",
+      id: "call_1",
+      name: "read",
+    });
+    expect(toolResultSummary(out[1])).toEqual({
+      role: "toolResult",
+      toolCallId: "call_1",
+      toolUseId: "call_1",
+      toolName: "read",
+      isError: false,
+    });
+  });
+
  it("synthesizes missing tool results after strict id sanitization", () => {
    const rawId = "call_function_av7cbkigmk7x1";
    const out = sanitizeReplayToolCallIdsForStream({
--- a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts
@@ -919,6 +919,7 @@ export function wrapStreamFnSanitizeMalformedToolCalls(
    TranscriptPolicy,
    "validateGeminiTurns" | "validateAnthropicTurns" | "preserveSignatures" | "dropThinkingBlocks"
  >,
+  provider?: string | null,
 ): StreamFn {
  return (model, context, options) => {
    const ctx = context as unknown as { messages?: unknown };
@@ -928,6 +929,7 @@ export function wrapStreamFnSanitizeMalformedToolCalls(
    }
    const allowProviderOwnedThinkingReplay = shouldAllowProviderOwnedThinkingReplay({
      modelApi: (model as { api?: unknown })?.api as string | null | undefined,
+      provider,
      policy: {
        validateAnthropicTurns: transcriptPolicy?.validateAnthropicTurns === true,
        preserveSignatures: transcriptPolicy?.preserveSignatures === true,
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -2855,6 +2855,7 @@ export async function runEmbeddedAttempt(
            preserveNativeAnthropicToolUseIds: transcriptPolicy.preserveNativeAnthropicToolUseIds,
            preserveReplaySafeThinkingToolCallIds: shouldAllowProviderOwnedThinkingReplay({
              modelApi: (model as { api?: unknown })?.api as string | null | undefined,
+              provider: params.provider,
              policy: transcriptPolicy,
            }),
            repairToolUseResultPairing: transcriptPolicy.repairToolUseResultPairing,
@@ -2911,6 +2912,7 @@ export async function runEmbeddedAttempt(
        activeSession.agent.streamFn,
        allowedToolNames,
        transcriptPolicy,
+        params.provider,
      );
      activeSession.agent.streamFn = wrapStreamFnTrimToolCallNames(
        activeSession.agent.streamFn,
--- a/src/agents/transcript-policy.test.ts
+++ b/src/agents/transcript-policy.test.ts
@@ -617,6 +617,24 @@ describe("resolveTranscriptPolicy", () => {
    ).toBe(true);
  });

+  it.each(["anthropic", "amazon-bedrock"] as const)(
+    "allows provider-owned thinking replay for signed-thinking %s recovery policies",
+    (provider) => {
+      expect(
+        shouldAllowProviderOwnedThinkingReplay({
+          provider,
+          modelApi:
+            provider === "amazon-bedrock" ? "bedrock-converse-stream" : "anthropic-messages",
+          policy: {
+            validateAnthropicTurns: true,
+            preserveSignatures: false,
+            dropThinkingBlocks: false,
+          },
+        }),
+      ).toBe(true);
+    },
+  );
+
  it("does not allow immutable provider-owned thinking replay for github-copilot claude models", () => {
    const policy = resolveTranscriptPolicy({
      provider: "github-copilot",
--- a/src/agents/transcript-policy.ts
+++ b/src/agents/transcript-policy.ts
@@ -32,17 +32,26 @@ export type TranscriptPolicy = {
  allowSyntheticToolResults: boolean;
 };

+const SIGNED_THINKING_PROVIDERS = new Set(["anthropic", "amazon-bedrock", "anthropic-vertex"]);
+
+export function providerRequiresSignedThinking(provider?: string | null): boolean {
+  return SIGNED_THINKING_PROVIDERS.has(normalizeProviderId(provider ?? ""));
+}
+
 export function shouldAllowProviderOwnedThinkingReplay(params: {
  modelApi?: string | null;
+  provider?: string | null;
  policy: Pick<
    TranscriptPolicy,
    "validateAnthropicTurns" | "preserveSignatures" | "dropThinkingBlocks"
  >;
 }): boolean {
+  const hasProviderOwnedSignedThinking =
+    params.policy.preserveSignatures || providerRequiresSignedThinking(params.provider);
  return (
    isAnthropicApi(params.modelApi) &&
    params.policy.validateAnthropicTurns &&
-    params.policy.preserveSignatures &&
+    hasProviderOwnedSignedThinking &&
    !params.policy.dropThinkingBlocks
  );
 }