Compare commits

..

7 Commits

Author SHA1 Message Date
Omar Shahine
d5335b0743 fix(codex): honor normalized source routes 2026-06-24 19:27:03 -07:00
Omar Shahine
9f84eaa087 fix(codex): require delivered non-send telemetry 2026-06-24 19:19:52 -07:00
Omar Shahine
97264cb7cb fix(codex): reject alias-routed source replies 2026-06-24 19:16:07 -07:00
Omar Shahine
87d3d14ec8 fix(codex): require delivered reply receipts 2026-06-24 19:09:13 -07:00
Omar Shahine
dcb02431d6 fix(codex): account for source reply SDK surface 2026-06-24 18:59:29 -07:00
Omar Shahine
d94d2c8b35 fix(codex): accept numeric source message ids 2026-06-24 18:55:51 -07:00
Omar Shahine
4291b6b7b9 fix(codex): recognize message tool source replies 2026-06-24 18:55:50 -07:00
37 changed files with 1003 additions and 378 deletions

View File

@@ -1843,7 +1843,7 @@ jobs:
git -C "$GITHUB_WORKSPACE" checkout --detach refs/remotes/origin/checkout
- name: Setup Python
uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1 # v6
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
with:
python-version: "3.12"

View File

@@ -73,7 +73,7 @@ jobs:
- name: Create ClawSweeper dispatch token
id: token
if: ${{ env.HAS_CLAWSWEEPER_APP_PRIVATE_KEY == 'true' }}
uses: actions/create-github-app-token@bcd2ba49218906704ab6c1aa796996da409d3eb1 # v3.2.0
uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1
with:
client-id: ${{ env.CLAWSWEEPER_APP_CLIENT_ID }}
private-key: ${{ secrets.CLAWSWEEPER_APP_PRIVATE_KEY }}
@@ -102,7 +102,7 @@ jobs:
steps.comment_filter.outputs.is_command == 'true' &&
env.HAS_CLAWSWEEPER_APP_PRIVATE_KEY == 'true'
}}
uses: actions/create-github-app-token@bcd2ba49218906704ab6c1aa796996da409d3eb1 # v3.2.0
uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1
with:
client-id: ${{ env.CLAWSWEEPER_APP_CLIENT_ID }}
private-key: ${{ secrets.CLAWSWEEPER_APP_PRIVATE_KEY }}

View File

@@ -29,7 +29,7 @@ jobs:
submodules: false
- name: Setup Java
uses: actions/setup-java@ad2b38190b15e4d6bdf0c97fb4fca8412226d287 # v5
uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5
with:
distribution: temurin
java-version: "21"

View File

@@ -57,7 +57,7 @@ jobs:
- name: Create autoscrub app token
id: app-token
continue-on-error: true
uses: actions/create-github-app-token@bcd2ba49218906704ab6c1aa796996da409d3eb1 # v3.2.0
uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1
with:
app-id: "2729701"
private-key: ${{ secrets.GH_APP_PRIVATE_KEY }}
@@ -69,7 +69,7 @@ jobs:
id: app-token-fallback
continue-on-error: true
if: steps.app-token.outcome == 'failure'
uses: actions/create-github-app-token@bcd2ba49218906704ab6c1aa796996da409d3eb1 # v3.2.0
uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1
with:
app-id: "2971289"
private-key: ${{ secrets.GH_APP_PRIVATE_KEY_FALLBACK }}

View File

@@ -149,7 +149,7 @@ jobs:
- name: Run Codex docs agent
if: steps.gate.outputs.run_agent == 'true'
uses: openai/codex-action@10cb888d2ed3b99867f7e7ccff174a861a75aeb6
uses: openai/codex-action@e0fdf01220eb9a88167c4898839d273e3f2609d1
env:
DOCS_AGENT_BASE_SHA: ${{ steps.gate.outputs.review_base_sha }}
DOCS_AGENT_HEAD_SHA: ${{ steps.gate.outputs.review_head_sha }}

View File

@@ -260,7 +260,7 @@ jobs:
run: pnpm build
- name: Setup Go for Crabbox CLI
uses: actions/setup-go@924ae3a1cded613372ab5595356fb5720e22ba16 # v6
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6
with:
go-version: "1.26.x"
cache: false

View File

@@ -250,7 +250,7 @@ jobs:
run: pnpm build
- name: Setup Go for Crabbox CLI
uses: actions/setup-go@924ae3a1cded613372ab5595356fb5720e22ba16 # v6
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6
with:
go-version: "1.26.x"
cache: false

View File

@@ -190,7 +190,7 @@ jobs:
mantis-slack-pnpm-${{ runner.os }}-${{ env.NODE_VERSION }}-
- name: Setup Go for Crabbox CLI
uses: actions/setup-go@924ae3a1cded613372ab5595356fb5720e22ba16 # v6
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6
with:
go-version: "1.26.x"
cache: false

View File

@@ -362,7 +362,7 @@ jobs:
install-bun: "true"
- name: Setup Go for Crabbox CLI
uses: actions/setup-go@924ae3a1cded613372ab5595356fb5720e22ba16 # v6
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6
with:
go-version: "1.26.x"
cache: false
@@ -445,7 +445,7 @@ jobs:
sudo chown -R codex:codex "$GITHUB_WORKSPACE"
- name: Run Codex Mantis Telegram agent
uses: openai/codex-action@10cb888d2ed3b99867f7e7ccff174a861a75aeb6
uses: openai/codex-action@e0fdf01220eb9a88167c4898839d273e3f2609d1
env:
BASELINE_REF: ${{ needs.resolve_request.outputs.baseline_ref }}
BASELINE_SHA: ${{ needs.validate_refs.outputs.baseline_revision }}

View File

@@ -337,7 +337,7 @@ jobs:
mantis-telegram-pnpm-${{ runner.os }}-${{ env.NODE_VERSION }}-
- name: Setup Go for Crabbox CLI
uses: actions/setup-go@924ae3a1cded613372ab5595356fb5720e22ba16 # v6
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6
with:
go-version: "1.26.x"
cache: false

View File

@@ -275,7 +275,7 @@ jobs:
fi
- name: Run Codex maturity scorecard agent
uses: openai/codex-action@10cb888d2ed3b99867f7e7ccff174a861a75aeb6
uses: openai/codex-action@e0fdf01220eb9a88167c4898839d273e3f2609d1
env:
MATURITY_EVIDENCE_DIR: .artifacts/maturity-evidence
MATURITY_SCORES_PATH: qa/maturity-scores.yaml

View File

@@ -129,7 +129,7 @@ jobs:
- name: Run Codex test performance agent
if: steps.gate.outputs.run_agent == 'true'
uses: openai/codex-action@10cb888d2ed3b99867f7e7ccff174a861a75aeb6
uses: openai/codex-action@e0fdf01220eb9a88167c4898839d273e3f2609d1
with:
openai-api-key: ${{ secrets.OPENCLAW_TEST_PERF_AGENT_OPENAI_API_KEY || secrets.OPENAI_API_KEY }}
prompt-file: .github/codex/prompts/test-performance-agent.md

View File

@@ -115,7 +115,7 @@ jobs:
git -C "$GITHUB_WORKSPACE" checkout --detach refs/remotes/origin/checkout
- name: Setup Python
uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1 # v6
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
with:
python-version: "3.12"

View File

@@ -1,2 +1,2 @@
9d5b34975270bb2d16748002c1441ab48fde81af8eb12cc8eb3e341c862232ff plugin-sdk-api-baseline.json
f1a6ff189498d955cad6d6fb912eb4cad7aeb628f89c51d0745e146fe0d163d6 plugin-sdk-api-baseline.jsonl
35b314075ff47453c5d57788861ca0c0e65d6a988b549ab2a2e1757b7590d140 plugin-sdk-api-baseline.json
0dc8abcefccfe7d19280bde5fb2c0c69cf73b782d47e3759e2984baf904fe07c plugin-sdk-api-baseline.jsonl

View File

@@ -1102,6 +1102,585 @@ describe("createCodexDynamicToolBridge", () => {
]);
});
it("marks delivered message-tool-only source replies as terminal", async () => {
const bridge = createBridgeWithToolResult(
"message",
textToolResult("Sent.", { messageId: "imessage-6264" }),
{ sourceReplyDeliveryMode: "message_tool_only" },
);
const result = await handleMessageToolCall(bridge, {
action: "send",
message: "visible reply",
});
expect(result).toEqual(expectInputText("Sent."));
expect(result.terminate).toBe(true);
expect(bridge.telemetry.didDeliverSourceReplyViaMessageTool).toBe(true);
expect(Object.keys(result)).not.toContain("terminate");
});
it("keeps message-tool-only source replies terminal when middleware redacts receipt details", async () => {
const registry = createEmptyPluginRegistry();
registry.agentToolResultMiddlewares.push({
pluginId: "receipt-redactor",
pluginName: "Receipt redactor",
rawHandler: () => undefined,
handler: (event: { result: AgentToolResult<unknown> }) => ({
result: {
content: event.result.content,
details: { redacted: true },
},
}),
runtimes: ["codex"],
source: "test",
});
setActivePluginRegistry(registry);
const bridge = createBridgeWithToolResult(
"message",
textToolResult("Sent.", {
receipt: {
primaryPlatformMessageId: "imessage-6264",
platformMessageIds: ["imessage-6264"],
},
}),
{ sourceReplyDeliveryMode: "message_tool_only" },
);
const result = await handleMessageToolCall(bridge, {
action: "send",
message: "visible reply",
});
expect(result).toEqual(expectInputText("Sent."));
expect(result.terminate).toBe(true);
expect(Object.keys(result)).not.toContain("terminate");
});
it("does not treat target telemetry alone as delivered message-tool-only source reply evidence", async () => {
const bridge = createBridgeWithToolResult("message", textToolResult("Sent."), {
sourceReplyDeliveryMode: "message_tool_only",
currentChannelProvider: "imessage",
currentChannelId: "chat-1",
});
const result = await handleMessageToolCall(bridge, {
action: "send",
message: "visible reply",
});
expect(result).toEqual(expectInputText("Sent."));
expect(bridge.telemetry.messagingToolSentTargets).toEqual([
expect.objectContaining({
tool: "message",
provider: "imessage",
to: "chat-1",
text: "visible reply",
}),
]);
expect(result.terminate).toBeUndefined();
expect(bridge.telemetry.didDeliverSourceReplyViaMessageTool).toBe(false);
});
it("keeps message-tool-only source replies terminal for explicit current source routes", async () => {
const bridge = createBridgeWithToolResult(
"message",
textToolResult("Sent.", { ok: true, messageId: "imessage-853" }),
{
sourceReplyDeliveryMode: "message_tool_only",
currentChannelProvider: "imessage",
currentChannelId: "imessage:+12069106512",
currentMessagingTarget: "+12069106512",
},
);
const result = await handleMessageToolCall(bridge, {
action: "reply",
channel: "imessage",
target: "+12069106512",
messageId: "853",
message: "visible reply",
buttons: [],
});
expect(result).toEqual(expectInputText("Sent."));
expect(result.terminate).toBe(true);
expect(bridge.telemetry.didDeliverSourceReplyViaMessageTool).toBe(true);
expect(Object.keys(result)).not.toContain("terminate");
});
it("keeps normalized explicit source routes terminal", async () => {
setActivePluginRegistry(
createTestRegistry([
{
pluginId: "sms",
plugin: {
id: "sms",
messaging: {
normalizeTarget: (raw: string) => {
const digits = raw.replace(/\D/gu, "");
return digits.length === 11 && digits.startsWith("1") ? `+${digits}` : raw.trim();
},
},
},
source: "test",
},
]),
);
const bridge = createBridgeWithToolResult(
"message",
textToolResult("Sent.", { ok: true, messageId: "sms-853" }),
{
sourceReplyDeliveryMode: "message_tool_only",
currentChannelProvider: "sms",
currentChannelId: "sms:+12069106512",
currentMessagingTarget: "+12069106512",
},
);
const result = await handleMessageToolCall(bridge, {
action: "reply",
channel: "sms",
target: "+1 (206) 910-6512",
messageId: "853",
message: "visible reply",
});
expect(result).toEqual(expectInputText("Sent."));
expect(bridge.telemetry.messagingToolSentTargets).toEqual([
expect.objectContaining({
tool: "message",
provider: "sms",
to: "+12069106512",
text: "visible reply",
}),
]);
expect(result.terminate).toBe(true);
expect(bridge.telemetry.didDeliverSourceReplyViaMessageTool).toBe(true);
expect(Object.keys(result)).not.toContain("terminate");
});
it("keeps message-tool-only source replies terminal when the reply receipt matches the current message id", async () => {
const bridge = createBridgeWithToolResult(
"message",
textToolResult("Sent.", {
ok: true,
messageId: "provider-message-1",
repliedTo: "provider-guid-857",
}),
{
sourceReplyDeliveryMode: "message_tool_only",
currentChannelProvider: "imessage",
currentChannelId: "imessage:any;-;+12069106512",
currentMessageId: "provider-guid-857",
},
);
const result = await handleMessageToolCall(bridge, {
action: "reply",
channel: "imessage",
target: "+12069106512",
messageId: "857",
message: "visible reply",
buttons: [],
});
expect(result).toEqual(expectInputText("Sent."));
expect(bridge.telemetry.messagingToolSentTargets).toEqual([
expect.objectContaining({
tool: "message",
provider: "imessage",
to: "+12069106512",
text: "visible reply",
}),
]);
expect(result.terminate).toBe(true);
expect(bridge.telemetry.didDeliverSourceReplyViaMessageTool).toBe(true);
expect(Object.keys(result)).not.toContain("terminate");
});
it("keeps message-tool-only source replies terminal when a text receipt matches the current message id", async () => {
const receiptText = JSON.stringify({
ok: true,
messageId: "provider-message-1",
repliedTo: "provider-guid-861",
});
const bridge = createBridgeWithToolResult("message", textToolResult(receiptText), {
sourceReplyDeliveryMode: "message_tool_only",
currentChannelProvider: "imessage",
currentChannelId: "imessage:any;-;+12069106512",
currentMessageId: "provider-guid-861",
});
const result = await handleMessageToolCall(bridge, {
action: "reply",
channel: "imessage",
target: "+12069106512",
messageId: "861",
message: "visible reply",
buttons: [],
});
expect(result).toEqual(expectInputText(receiptText));
expect(result.terminate).toBe(true);
expect(bridge.telemetry.didDeliverSourceReplyViaMessageTool).toBe(true);
expect(Object.keys(result)).not.toContain("terminate");
});
it("does not let dry-run reply receipts terminate message-tool-only source replies", async () => {
const receiptText = JSON.stringify({
deliveryStatus: "dry_run",
dryRun: true,
replyToId: "provider-guid-862",
});
const bridge = createBridgeWithToolResult("message", textToolResult(receiptText), {
sourceReplyDeliveryMode: "message_tool_only",
currentChannelProvider: "imessage",
currentChannelId: "imessage:any;-;+12069106512",
currentMessageId: "provider-guid-862",
});
const result = await handleMessageToolCall(bridge, {
action: "reply",
channel: "imessage",
target: "+12069106512",
messageId: "862",
message: "visible reply",
buttons: [],
});
expect(result).toEqual(expectInputText(receiptText));
expect(result.terminate).toBeUndefined();
expect(bridge.telemetry.didDeliverSourceReplyViaMessageTool).toBe(false);
});
it("does not record dry-run reply actions as committed sends", async () => {
const bridge = createBridgeWithToolResult(
"message",
textToolResult("Dry run.", {
deliveryStatus: "dry_run",
dryRun: true,
}),
{
sourceReplyDeliveryMode: "message_tool_only",
currentChannelProvider: "imessage",
currentChannelId: "imessage:+12069106512",
currentMessagingTarget: "+12069106512",
currentMessageId: "provider-guid-862",
},
);
const result = await handleMessageToolCall(bridge, {
action: "reply",
channel: "imessage",
target: "+12069106512",
messageId: "862",
message: "visible reply",
});
expect(result).toEqual(expectInputText("Dry run."));
expect(result.terminate).toBeUndefined();
expect(bridge.telemetry.didSendViaMessagingTool).toBe(false);
expect(bridge.telemetry.messagingToolSentTargets).toEqual([]);
expect(bridge.telemetry.didDeliverSourceReplyViaMessageTool).toBe(false);
});
it("keeps message-tool-only source replies terminal for explicit native target segments", async () => {
const bridge = createBridgeWithToolResult("message", textToolResult("Sent.", { ok: true }), {
sourceReplyDeliveryMode: "message_tool_only",
currentChannelProvider: "imessage",
currentChannelId: "imessage:any;-;+12069106512",
});
const result = await handleMessageToolCall(bridge, {
action: "reply",
channel: "imessage",
target: "+12069106512",
messageId: "863",
message: "visible reply",
buttons: [],
});
expect(result).toEqual(expectInputText("Sent."));
expect(result.terminate).toBe(true);
expect(bridge.telemetry.didDeliverSourceReplyViaMessageTool).toBe(true);
expect(Object.keys(result)).not.toContain("terminate");
});
it("keeps message-tool-only source replies terminal when the provider is only in the current channel id", async () => {
const bridge = createBridgeWithToolResult("message", textToolResult("Sent.", { ok: true }), {
sourceReplyDeliveryMode: "message_tool_only",
currentChannelId: "imessage:any;-;+12069106512",
});
const result = await handleMessageToolCall(bridge, {
action: "reply",
channel: "imessage",
target: "+12069106512",
messageId: "865",
message: "visible reply",
buttons: [],
});
expect(result).toEqual(expectInputText("Sent."));
expect(result.terminate).toBe(true);
expect(bridge.telemetry.didDeliverSourceReplyViaMessageTool).toBe(true);
expect(Object.keys(result)).not.toContain("terminate");
});
it("records message-tool-owned terminal replies as delivered source replies", async () => {
const bridge = createBridgeWithToolResult(
"message",
{
...textToolResult("Sent.", { ok: true }),
terminate: true,
} as AgentToolResult<unknown>,
{ sourceReplyDeliveryMode: "message_tool_only" },
);
const result = await handleMessageToolCall(bridge, {
action: "reply",
channel: "imessage",
target: "+12069106512",
messageId: "867",
message: "visible reply",
buttons: [],
});
expect(result).toEqual(expectInputText("Sent."));
expect(result.terminate).toBe(true);
expect(bridge.telemetry.didDeliverSourceReplyViaMessageTool).toBe(true);
expect(Object.keys(result)).not.toContain("terminate");
});
it("does not treat bare send telemetry as delivered message-tool-only source reply evidence", async () => {
const bridge = createBridgeWithToolResult("message", textToolResult("Sent."), {
sourceReplyDeliveryMode: "message_tool_only",
});
const result = await handleMessageToolCall(bridge, {
action: "send",
message: "visible reply",
});
expect(result).toEqual(expectInputText("Sent."));
expect(bridge.telemetry.didSendViaMessagingTool).toBe(true);
expect(result.terminate).toBeUndefined();
expect(bridge.telemetry.didDeliverSourceReplyViaMessageTool).toBe(false);
});
it("does not let prior message-send telemetry terminate a later non-delivery tool result", async () => {
const execute = vi
.fn()
.mockResolvedValueOnce(textToolResult("Sent.", { messageId: "source-reply-1" }))
.mockResolvedValueOnce(textToolResult("No message sent.", { ok: true }));
const bridge = createCodexDynamicToolBridge({
tools: [createTool({ name: "message", execute })],
signal: new AbortController().signal,
hookContext: { sourceReplyDeliveryMode: "message_tool_only" },
});
const firstResult = await handleMessageToolCall(bridge, {
action: "send",
message: "visible reply",
});
const secondResult = await bridge.handleToolCall({
threadId: "thread-1",
turnId: "turn-1",
callId: "call-2",
namespace: null,
tool: "message",
arguments: { action: "inspect" },
});
expect(firstResult.terminate).toBe(true);
expect(bridge.telemetry.didSendViaMessagingTool).toBe(true);
expect(secondResult).toEqual(expectInputText("No message sent."));
expect(secondResult.terminate).toBeUndefined();
});
it("does not mark explicit message-tool sends as terminal source replies", async () => {
const bridge = createBridgeWithToolResult(
"message",
textToolResult("Sent.", { messageId: "other-chat-message" }),
{ sourceReplyDeliveryMode: "message_tool_only" },
);
const result = await handleMessageToolCall(bridge, {
action: "send",
target: "channel:other",
message: "cross-channel reply",
});
expect(result).toEqual(expectInputText("Sent."));
expect(result.terminate).toBeUndefined();
expect(bridge.telemetry.didDeliverSourceReplyViaMessageTool).toBe(false);
});
it("does not mark mismatched explicit message-tool sends as terminal source replies", async () => {
const bridge = createBridgeWithToolResult("message", textToolResult("Sent."), {
sourceReplyDeliveryMode: "message_tool_only",
currentChannelProvider: "imessage",
currentChannelId: "imessage:+12069106512",
currentMessagingTarget: "+12069106512",
});
const result = await handleMessageToolCall(bridge, {
action: "reply",
channel: "slack",
target: "+12069106512",
messageId: "853",
message: "cross-provider reply",
});
expect(result).toEqual(expectInputText("Sent."));
expect(result.terminate).toBeUndefined();
expect(bridge.telemetry.didDeliverSourceReplyViaMessageTool).toBe(false);
});
it("does not mark same-target sibling-thread replies as terminal source replies", async () => {
const bridge = createBridgeWithToolResult("message", textToolResult("Sent.", { ok: true }), {
sourceReplyDeliveryMode: "message_tool_only",
currentChannelProvider: "slack",
currentChannelId: "slack:C123",
currentMessagingTarget: "C123",
currentThreadId: "171.222",
});
const result = await handleMessageToolCall(bridge, {
action: "reply",
channel: "slack",
target: "C123",
threadId: "171.333",
message: "sibling thread reply",
});
expect(result).toEqual(expectInputText("Sent."));
expect(result.terminate).toBeUndefined();
expect(bridge.telemetry.didDeliverSourceReplyViaMessageTool).toBe(false);
});
it("does not mark implicit-target sibling-thread replies as terminal source replies", async () => {
const bridge = createBridgeWithToolResult("message", textToolResult("Sent.", { ok: true }), {
sourceReplyDeliveryMode: "message_tool_only",
currentChannelProvider: "slack",
currentChannelId: "slack:C123",
currentMessagingTarget: "C123",
currentThreadId: "171.222",
});
const result = await handleMessageToolCall(bridge, {
action: "reply",
channel: "slack",
threadId: "171.333",
message: "sibling thread reply",
});
expect(result).toEqual(expectInputText("Sent."));
expect(result.terminate).toBeUndefined();
expect(bridge.telemetry.didDeliverSourceReplyViaMessageTool).toBe(false);
});
it("does not mark top-level source replies with explicit thread routes as terminal", async () => {
const bridge = createBridgeWithToolResult("message", textToolResult("Sent.", { ok: true }), {
sourceReplyDeliveryMode: "message_tool_only",
currentChannelProvider: "slack",
currentChannelId: "slack:C123",
currentMessagingTarget: "C123",
});
const result = await handleMessageToolCall(bridge, {
action: "reply",
channel: "slack",
target: "C123",
threadId: "171.333",
message: "thread reply from top-level source",
});
expect(result).toEqual(expectInputText("Sent."));
expect(result.terminate).toBeUndefined();
expect(bridge.telemetry.didDeliverSourceReplyViaMessageTool).toBe(false);
});
it("does not let matching reply receipts override explicit non-source routes", async () => {
const bridge = createBridgeWithToolResult(
"message",
textToolResult("Sent.", {
ok: true,
messageId: "other-chat-message",
repliedTo: "provider-guid-853",
}),
{
sourceReplyDeliveryMode: "message_tool_only",
currentChannelProvider: "imessage",
currentChannelId: "imessage:+12069106512",
currentMessagingTarget: "+12069106512",
currentMessageId: "provider-guid-853",
},
);
const result = await handleMessageToolCall(bridge, {
action: "reply",
channel: "imessage",
target: "other-chat",
message: "cross-channel reply",
});
expect(result).toEqual(expectInputText("Sent."));
expect(result.terminate).toBeUndefined();
expect(bridge.telemetry.didDeliverSourceReplyViaMessageTool).toBe(false);
});
it("does not let provider target aliases override source routes", async () => {
setActivePluginRegistry(
createTestRegistry([
{
pluginId: "slack",
plugin: {
id: "slack",
messaging: { normalizeTarget: (raw: string) => raw.trim().toLowerCase() },
actions: {
messageActionTargetAliases: {
reply: {
aliases: ["chatGuid"],
deliveryTargetAliases: ["chatGuid"],
},
},
},
},
source: "test",
},
]),
);
const bridge = createBridgeWithToolResult("message", textToolResult("Sent.", { ok: true }), {
sourceReplyDeliveryMode: "message_tool_only",
currentChannelProvider: "slack",
currentChannelId: "channel:c1",
currentMessagingTarget: "channel:c1",
currentMessageId: "provider-guid-854",
});
const result = await handleMessageToolCall(bridge, {
action: "reply",
channel: "slack",
chatGuid: "Channel:C2",
messageId: "854",
message: "cross-chat reply",
});
expect(result).toEqual(expectInputText("Sent."));
expect(bridge.telemetry.messagingToolSentTargets).toEqual([
expect.objectContaining({
tool: "message",
provider: "slack",
to: "channel:c2",
text: "cross-chat reply",
}),
]);
expect(result.terminate).toBeUndefined();
expect(bridge.telemetry.didDeliverSourceReplyViaMessageTool).toBe(false);
});
it("does not record messaging side effects when the send fails", async () => {
const tool = createTool({
name: "message",

View File

@@ -18,6 +18,8 @@ import {
getChannelAgentToolMeta,
getPluginToolMeta,
type EmbeddedRunAttemptParams,
isDeliveredMessageToolOnlySourceReplyResult,
isDeliveredMessagingToolResult,
isReplaySafeToolCall,
isToolWrappedWithBeforeToolCallHook,
isToolResultError,
@@ -63,9 +65,11 @@ type CodexDynamicToolHookContext = {
currentChannelProvider?: string;
currentChannelId?: string;
currentMessagingTarget?: string;
currentMessageId?: string | number;
currentThreadId?: string;
replyToMode?: "off" | "first" | "all" | "batched";
hasRepliedRef?: { value: boolean };
sourceReplyDeliveryMode?: EmbeddedRunAttemptParams["sourceReplyDeliveryMode"];
onToolOutcome?: EmbeddedRunAttemptParams["onToolOutcome"];
allocateToolOutcomeOrdinal?: EmbeddedRunAttemptParams["allocateToolOutcomeOrdinal"];
};
@@ -100,6 +104,225 @@ function applyCurrentMessageProvider(
return { ...args, provider };
}
function normalizeRouteToken(value: string | number | undefined): string | undefined {
if (typeof value === "number") {
return Number.isFinite(value) ? String(value) : undefined;
}
const normalized = value?.trim().toLowerCase();
return normalized ? normalized : undefined;
}
function sourceRouteTokens(hookContext: CodexDynamicToolHookContext | undefined): Set<string> {
const tokens = new Set<string>();
const currentTarget = normalizeRouteToken(hookContext?.currentMessagingTarget);
const currentChannel = normalizeRouteToken(hookContext?.currentChannelId);
const currentProvider = normalizeRouteToken(hookContext?.currentChannelProvider);
if (currentTarget) {
tokens.add(currentTarget);
}
if (currentChannel) {
tokens.add(currentChannel);
}
const channelPrefixIndex = currentChannel?.indexOf(":") ?? -1;
if (channelPrefixIndex >= 0 && currentChannel) {
const unprefixedChannel = currentChannel.slice(channelPrefixIndex + 1);
if (unprefixedChannel) {
tokens.add(unprefixedChannel);
for (const segment of unprefixedChannel.split(/[;,]/u)) {
const token = normalizeRouteToken(segment);
if (token) {
tokens.add(token);
}
}
}
}
if (currentProvider && currentChannel?.startsWith(`${currentProvider}:`)) {
const unprefixedChannel = currentChannel.slice(currentProvider.length + 1);
if (unprefixedChannel) {
tokens.add(unprefixedChannel);
}
}
return tokens;
}
function routeTokenMatchesSource(
token: string | undefined,
hookContext: CodexDynamicToolHookContext | undefined,
): boolean {
const normalized = normalizeRouteToken(token);
return normalized !== undefined && sourceRouteTokens(hookContext).has(normalized);
}
function routeProviderMatchesSource(
provider: string | undefined,
hookContext: CodexDynamicToolHookContext | undefined,
): boolean {
const normalized = normalizeRouteToken(provider);
if (!normalized) {
return false;
}
const currentProvider = normalizeRouteToken(hookContext?.currentChannelProvider);
const currentChannel = normalizeRouteToken(hookContext?.currentChannelId);
return currentProvider === normalized || currentChannel?.startsWith(`${normalized}:`) === true;
}
function routeTokenMatchesCurrentMessage(
token: string | number | undefined,
hookContext: CodexDynamicToolHookContext | undefined,
): boolean {
const normalized = normalizeRouteToken(token);
return (
normalized !== undefined && normalized === normalizeRouteToken(hookContext?.currentMessageId)
);
}
function readRouteToken(record: Record<string, unknown>, key: string): string | number | undefined {
const value = record[key];
return typeof value === "string" || typeof value === "number" ? value : undefined;
}
function explicitRouteTokensMismatchCurrent(
args: Record<string, unknown>,
keys: readonly string[],
currentToken: string | number | undefined,
): boolean {
const normalizedCurrent = normalizeRouteToken(currentToken);
if (!normalizedCurrent) {
return false;
}
return keys.some((key) => {
const normalized = normalizeRouteToken(readRouteToken(args, key));
return normalized !== undefined && normalized !== normalizedCurrent;
});
}
function explicitThreadRouteTargetsNonSource(
args: Record<string, unknown>,
hookContext: CodexDynamicToolHookContext | undefined,
messagingTarget: MessagingToolSend | undefined,
): boolean {
const normalizedCurrentThread = normalizeRouteToken(hookContext?.currentThreadId);
const explicitThreadTokens = [
...EXPLICIT_MESSAGE_THREAD_KEYS.map((key) => normalizeRouteToken(readRouteToken(args, key))),
normalizeRouteToken(messagingTarget?.threadId),
].filter((value): value is string => value !== undefined);
if (explicitThreadTokens.length === 0) {
return false;
}
return (
normalizedCurrentThread === undefined ||
explicitThreadTokens.some((value) => value !== normalizedCurrentThread)
);
}
function replyReceiptMatchesCurrentMessage(
value: unknown,
hookContext: CodexDynamicToolHookContext | undefined,
depth = 0,
): boolean {
if (depth > 4 || value === null) {
return false;
}
if (typeof value === "string") {
const trimmed = value.trim();
if (!trimmed || !["{", "["].includes(trimmed[0] ?? "")) {
return false;
}
try {
return replyReceiptMatchesCurrentMessage(JSON.parse(trimmed), hookContext, depth + 1);
} catch {
return false;
}
}
if (typeof value !== "object") {
return false;
}
if (Array.isArray(value)) {
return value.some((item) => replyReceiptMatchesCurrentMessage(item, hookContext, depth + 1));
}
const record = value as Record<string, unknown>;
for (const key of ["repliedTo", "replyTo", "replyToId", "replyToIdFull"]) {
if (
routeTokenMatchesCurrentMessage(
typeof record[key] === "string" ? record[key] : undefined,
hookContext,
)
) {
return true;
}
}
for (const key of [
"content",
"details",
"payload",
"receipt",
"result",
"results",
"sendResult",
"text",
]) {
if (replyReceiptMatchesCurrentMessage(record[key], hookContext, depth + 1)) {
return true;
}
}
return false;
}
function hasExplicitNonSourceMessageRoute(
args: Record<string, unknown>,
hookContext: CodexDynamicToolHookContext | undefined,
messagingTarget: MessagingToolSend | undefined,
): boolean {
const currentProvider = normalizeRouteToken(hookContext?.currentChannelProvider);
for (const key of EXPLICIT_MESSAGE_PROVIDER_KEYS) {
const provider = normalizeRouteToken(typeof args[key] === "string" ? args[key] : undefined);
if (
provider &&
currentProvider !== provider &&
!routeProviderMatchesSource(provider, hookContext)
) {
return true;
}
}
const targetValues = [
...EXPLICIT_MESSAGE_TARGET_KEYS.map((key) =>
typeof args[key] === "string" ? args[key] : undefined,
),
...(Array.isArray(args.targets)
? args.targets.map((value) => (typeof value === "string" ? value : undefined))
: []),
].filter((value): value is string => normalizeRouteToken(value) !== undefined);
if (explicitThreadRouteTargetsNonSource(args, hookContext, messagingTarget)) {
return true;
}
if (
explicitRouteTokensMismatchCurrent(
args,
EXPLICIT_MESSAGE_REPLY_KEYS,
hookContext?.currentMessageId,
)
) {
return true;
}
if (
messagingTarget?.to !== undefined &&
!routeTokenMatchesSource(messagingTarget.to, hookContext)
) {
return true;
}
if (messagingTarget?.to !== undefined) {
return false;
}
if (targetValues.length === 0) {
return false;
}
if (targetValues.some((value) => !routeTokenMatchesSource(value, hookContext))) {
return true;
}
return false;
}
/** Runtime bridge returned to Codex app-server attempt code. */
export type CodexDynamicToolBridge = {
availableSpecs: CodexDynamicToolSpec[];
@@ -114,6 +337,7 @@ export type CodexDynamicToolBridge = {
) => Promise<CodexDynamicToolCallResponse>;
telemetry: {
didSendViaMessagingTool: boolean;
didDeliverSourceReplyViaMessageTool: boolean;
messagingToolSentTexts: string[];
messagingToolSentMediaUrls: string[];
messagingToolSentTargets: MessagingToolSend[];
@@ -132,6 +356,10 @@ export const CODEX_OPENCLAW_DYNAMIC_TOOL_NAMESPACE = "openclaw";
// Keep OpenClaw session spawning searchable in Codex mode so Codex's native
// spawn_agent remains the primary Codex subagent surface.
const ALWAYS_DIRECT_DYNAMIC_TOOL_NAMES = new Set(["sessions_yield"]);
const EXPLICIT_MESSAGE_PROVIDER_KEYS = ["channel", "provider"];
const EXPLICIT_MESSAGE_TARGET_KEYS = ["target", "to", "channelId"];
const EXPLICIT_MESSAGE_THREAD_KEYS = ["threadId", "thread_id", "messageThreadId", "topicId"];
const EXPLICIT_MESSAGE_REPLY_KEYS = ["replyTo", "replyToId", "replyToIdFull"];
const DEFAULT_CODEX_DYNAMIC_TOOL_RESULT_MAX_CHARS = 16_000;
/**
@@ -176,6 +404,7 @@ export function createCodexDynamicToolBridge(params: {
emitQuarantinedDynamicToolDiagnostics(quarantinedTools, params.hookContext);
const telemetry: CodexDynamicToolBridge["telemetry"] = {
didSendViaMessagingTool: false,
didDeliverSourceReplyViaMessageTool: false,
messagingToolSentTexts: [],
messagingToolSentMediaUrls: [],
messagingToolSentTargets: [],
@@ -333,10 +562,9 @@ export function createCodexDynamicToolBridge(params: {
executedArgs,
params.hookContext?.currentChannelProvider,
);
const messagingTarget =
isMessagingTool(toolName) && isMessagingToolSendAction(toolName, executedArgs)
? extractMessagingToolSend(toolName, messagingTelemetryArgs, messagingContext)
: undefined;
const messagingTarget = isMessagingTool(toolName)
? extractMessagingToolSend(toolName, messagingTelemetryArgs, messagingContext)
: undefined;
const confirmedMessagingTarget =
!rawIsError && messagingTarget
? extractMessagingToolSendResult(messagingTarget, telemetryRawResult)
@@ -358,12 +586,53 @@ export function createCodexDynamicToolBridge(params: {
},
terminalType,
);
const blocksSourceReplyTermination = hasExplicitNonSourceMessageRoute(
executedArgs,
params.hookContext,
confirmedMessagingTarget,
);
const deliveredSourceReply = isDeliveredMessageToolOnlySourceReplyResult({
sourceReplyDeliveryMode: params.hookContext?.sourceReplyDeliveryMode,
toolName,
args: executedArgs,
result,
hookResult: rawResult,
isError: resultIsError,
allowExplicitSourceRoute: !blocksSourceReplyTermination,
});
const receiptConfirmedSourceReply =
params.hookContext?.sourceReplyDeliveryMode === "message_tool_only" &&
toolName === "message" &&
normalizeRouteToken(
typeof executedArgs.action === "string" ? executedArgs.action : undefined,
) === "reply" &&
!resultIsError &&
!blocksSourceReplyTermination &&
isDeliveredMessagingToolResult({
toolName,
args: executedArgs,
result,
hookResult: rawResult,
isError: resultIsError,
}) &&
(replyReceiptMatchesCurrentMessage(rawResult, params.hookContext) ||
replyReceiptMatchesCurrentMessage(result, params.hookContext));
const toolConfirmedSourceReply =
params.hookContext?.sourceReplyDeliveryMode === "message_tool_only" &&
toolName === "message" &&
!resultIsError &&
(rawResult.terminate === true || result.terminate === true);
if (deliveredSourceReply || receiptConfirmedSourceReply || toolConfirmedSourceReply) {
telemetry.didDeliverSourceReplyViaMessageTool = true;
}
withDynamicToolTermination(
response,
rawResult.terminate === true ||
result.terminate === true ||
isToolResultYield(rawResult) ||
isToolResultYield(result),
isToolResultYield(result) ||
deliveredSourceReply ||
receiptConfirmedSourceReply,
);
const asyncStarted =
isAsyncStartedToolResult(rawResult) || isAsyncStartedToolResult(result);
@@ -801,9 +1070,22 @@ function collectToolTelemetry(params: {
}
}
}
if (!isMessagingTool(params.toolName)) {
return;
}
const isMessagingSendAction = isMessagingToolSendAction(params.toolName, params.args);
if (!isMessagingSendAction && !params.messagingTarget) {
return;
}
if (
!isMessagingTool(params.toolName) ||
!isMessagingToolSendAction(params.toolName, params.args)
!isMessagingSendAction &&
!isDeliveredMessagingToolResult({
toolName: params.toolName,
args: params.args,
result: params.result,
hookResult: params.mediaTrustResult,
isError: params.isError,
})
) {
return;
}

View File

@@ -836,6 +836,19 @@ describe("CodexAppServerEventProjector", () => {
expect(result.toolMediaUrls).toStrictEqual([]);
});
it("propagates message-tool-only source reply delivery telemetry", async () => {
const projector = await createProjector();
const result = projector.buildResult({
...buildEmptyToolTelemetry(),
didSendViaMessagingTool: true,
didDeliverSourceReplyViaMessageTool: true,
});
expect(result.didSendViaMessagingTool).toBe(true);
expect(result.didDeliverSourceReplyViaMessageTool).toBe(true);
});
it("does not promote repeated tool progress text to the final assistant reply", async () => {
const onToolResult = vi.fn();
const projector = await createProjector({

View File

@@ -53,6 +53,7 @@ import { attachCodexMirrorIdentity, buildCodexUserPromptMessage } from "./transc
export type CodexAppServerToolTelemetry = {
didSendViaMessagingTool: boolean;
didDeliverSourceReplyViaMessageTool?: boolean;
messagingToolSentTexts: string[];
messagingToolSentMediaUrls: string[];
messagingToolSentTargets: MessagingToolSend[];
@@ -411,6 +412,8 @@ export class CodexAppServerEventProjector {
currentAttemptAssistant,
...(this.lastNativeToolError ? { lastToolError: this.lastNativeToolError } : {}),
didSendViaMessagingTool: toolTelemetry.didSendViaMessagingTool,
didDeliverSourceReplyViaMessageTool:
toolTelemetry.didDeliverSourceReplyViaMessageTool === true,
messagingToolSentTexts: toolTelemetry.messagingToolSentTexts,
messagingToolSentMediaUrls: toolTelemetry.messagingToolSentMediaUrls,
messagingToolSentTargets: toolTelemetry.messagingToolSentTargets,

View File

@@ -841,9 +841,11 @@ export async function runCodexAppServerAttempt(
currentChannelProvider: resolveCodexMessageToolProvider(params),
currentChannelId: params.currentChannelId,
currentMessagingTarget: params.currentMessagingTarget,
currentMessageId: params.currentMessageId,
currentThreadId: params.currentThreadTs,
replyToMode: params.replyToMode,
hasRepliedRef: params.hasRepliedRef,
sourceReplyDeliveryMode: params.sourceReplyDeliveryMode,
onToolOutcome: onCodexToolOutcome,
allocateToolOutcomeOrdinal: allocateCodexToolOutcomeOrdinal,
},

View File

@@ -49,15 +49,6 @@ describe("sanitizeOutboundText", () => {
expect(result).not.toMatch(/^assistant:$/m);
});
it("preserves prose lines that merely end with 'user:'/'system:'", () => {
expect(sanitizeOutboundText("Please send this reply to the user:")).toBe(
"Please send this reply to the user:",
);
expect(sanitizeOutboundText("Here is a note for the system:")).toBe(
"Here is a note for the system:",
);
});
it("collapses excessive blank lines after stripping", () => {
const text = "Hello\n\n\n\n\nWorld";
expect(sanitizeOutboundText(text)).toBe("Hello\n\nWorld");

View File

@@ -7,9 +7,7 @@ import { stripAssistantInternalScaffolding } from "openclaw/plugin-sdk/text-chun
*/
const INTERNAL_SEPARATOR_RE = /(?:#\+){2,}#?/g;
const ASSISTANT_ROLE_MARKER_RE = /\bassistant\s+to\s*=\s*\w+/gi;
// Only a standalone role marker on its own line (a leaked turn boundary) — not
// any line that merely ends with the word "user/system/assistant:" in prose.
const ROLE_TURN_MARKER_RE = /^[ \t]*(?:user|system|assistant)\s*:\s*$/gm;
const ROLE_TURN_MARKER_RE = /\b(?:user|system|assistant)\s*:\s*$/gm;
/**
* Strip all assistant-internal scaffolding from outbound text before delivery.

View File

@@ -168,42 +168,23 @@ describe("runtime parity", () => {
const scoped = __testing.filterMockRequestsForParentPrompt(
[
{
prompt: "Fanout worker alpha: inspect the QA workspace and finish with exactly ALPHA-OK.",
allInputText:
"Delegate one bounded QA task to a subagent. Fanout worker alpha: inspect the QA workspace and finish with exactly ALPHA-OK.",
plannedToolName: "read",
},
{
prompt: "Delegate one bounded QA task to a subagent.",
allInputText: "Delegate one bounded QA task to a subagent.",
plannedToolName: "sessions_spawn",
},
{
prompt: "Continue the bounded QA task with the retained child result.",
allInputText:
"Delegate one bounded QA task to a subagent. Continue the bounded QA task with the retained child result.",
plannedToolName: "sessions_spawn",
},
{
allInputText: "Inspect the QA workspace and return one concise protocol note.",
plannedToolName: "read",
},
{
prompt: "Delegate one bounded QA task to a subagent.",
allInputText: "Delegate one bounded QA task to a subagent. Tool result: child accepted.",
toolOutput: "child accepted",
},
],
"Delegate one bounded QA task to a subagent.",
[
"Delegate one bounded QA task to a subagent.",
"Continue the bounded QA task with the retained child result.",
],
);
expect(scoped).toHaveLength(3);
expect(scoped).toHaveLength(2);
expect(scoped.map((request) => request.plannedToolName ?? "result")).toEqual([
"sessions_spawn",
"sessions_spawn",
"result",
]);

View File

@@ -120,7 +120,6 @@ type RuntimeParityTranscriptRecord = {
};
type RuntimeParityMockRequestSnapshot = {
prompt?: string;
allInputText?: string;
plannedToolName?: string;
plannedToolArgs?: unknown;
@@ -760,22 +759,14 @@ function resolveRuntimeParityToolCalls(params: {
function filterMockRequestsForParentPrompt(
requests: RuntimeParityMockRequestSnapshot[],
parentPrompt: string,
parentPrompts: readonly string[] = [parentPrompt],
) {
const normalizedParentPrompts = parentPrompts
.map(normalizeTextForParity)
.filter((prompt) => prompt.length > 0);
if (normalizedParentPrompts.length === 0) {
const normalizedParentPrompt = normalizeTextForParity(parentPrompt);
if (!normalizedParentPrompt) {
return requests;
}
const matching = requests.filter((request) => {
const normalizedPrompt = normalizeTextForParity(request.prompt ?? "");
if (normalizedPrompt) {
return normalizedParentPrompts.some((prompt) => normalizedPrompt.includes(prompt));
}
const normalizedHistory = normalizeTextForParity(request.allInputText ?? "");
return normalizedParentPrompts.some((prompt) => normalizedHistory.includes(prompt));
});
const matching = requests.filter((request) =>
normalizeTextForParity(request.allInputText ?? "").includes(normalizedParentPrompt),
);
return matching.length > 0 ? matching : requests;
}
@@ -975,7 +966,6 @@ async function loadRuntimeParityTranscripts(params: {
async function loadRuntimeParityMockToolCalls(
mockBaseUrl: string | undefined,
parentPrompt: string,
parentPrompts: readonly string[] = [parentPrompt],
): Promise<RuntimeParityToolCall[] | null> {
const normalizedBaseUrl = mockBaseUrl?.trim().replace(/\/+$/u, "");
if (!normalizedBaseUrl) {
@@ -1001,7 +991,6 @@ async function loadRuntimeParityMockToolCalls(
}
const requests = payload.filter(isMessageRecord).map(
(entry): RuntimeParityMockRequestSnapshot => ({
prompt: readNonEmptyString(entry.prompt),
allInputText: readNonEmptyString(entry.allInputText),
plannedToolName: readNonEmptyString(entry.plannedToolName),
plannedToolArgs: entry.plannedToolArgs ?? null,
@@ -1009,7 +998,7 @@ async function loadRuntimeParityMockToolCalls(
}),
);
return resolveToolCallOrderFromMockRequests(
filterMockRequestsForParentPrompt(requests, parentPrompt, parentPrompts),
filterMockRequestsForParentPrompt(requests, parentPrompt),
);
} catch {
return null;
@@ -1026,16 +1015,12 @@ export async function captureRuntimeParityCell(
});
const transcriptRecords = buildTranscriptRecords(transcriptBytes);
const transcriptToolCalls = resolveToolCallOrder(transcriptRecords);
const parentPrompts = transcriptRecords
.filter((record) => record.role === "user")
.map((record) => extractAssistantText(record.message))
.filter((prompt) => prompt.length > 0);
const parentPrompt = parentPrompts[0] ?? "";
const mockToolCalls = await loadRuntimeParityMockToolCalls(
params.mockBaseUrl,
parentPrompt,
parentPrompts,
);
const parentPrompt =
transcriptRecords
.filter((record) => record.role === "user" && !isToolResultLikeMessage(record.message))
.map((record) => extractAssistantText(record.message))
.find(Boolean) ?? "";
const mockToolCalls = await loadRuntimeParityMockToolCalls(params.mockBaseUrl, parentPrompt);
const gatewayLogs = params.gateway.logs?.();
const sentinelFindings = [
...scanGatewayLogSentinels(gatewayLogs),

View File

@@ -1,11 +0,0 @@
// Agent Core tests cover prompt template argument parsing behavior.
import { describe, expect, it } from "vitest";
import { parseCommandArgs, substituteArgs } from "./prompt-template-arguments.js";
describe("prompt template arguments", () => {
it("preserves quoted empty arguments so positional placeholders stay aligned", () => {
expect(parseCommandArgs('first "" third')).toEqual(["first", "", "third"]);
expect(parseCommandArgs("first '' third")).toEqual(["first", "", "third"]);
expect(substituteArgs("$1|$2|$3", parseCommandArgs('first "" third'))).toBe("first||third");
});
});

View File

@@ -5,31 +5,26 @@ export function parseCommandArgs(argsString: string): string[] {
const args: string[] = [];
let current = "";
let inQuote: string | null = null;
let hasToken = false;
for (const char of argsString) {
if (inQuote) {
if (char === inQuote) {
inQuote = null;
} else {
hasToken = true;
current += char;
}
} else if (char === '"' || char === "'") {
hasToken = true;
inQuote = char;
} else if (/\s/.test(char)) {
if (hasToken) {
if (current) {
args.push(current);
current = "";
hasToken = false;
}
} else {
hasToken = true;
current += char;
}
}
if (hasToken) {
if (current) {
args.push(current);
}
return args;

View File

@@ -1,100 +0,0 @@
// Media Understanding Common tests cover provider output extraction behavior.
import { describe, expect, it } from "vitest";
import { extractGeminiResponse } from "./output-extract.js";
describe("extractGeminiResponse", () => {
it("extracts the response from noisy output with nested JSON objects", () => {
expect(
extractGeminiResponse(
[
"debug: invoking gemini",
JSON.stringify({
response: "a useful description",
usage: {
inputTokens: 12,
outputTokens: 4,
},
}),
].join("\n"),
),
).toBe("a useful description");
});
it("returns null for an incomplete JSON object", () => {
expect(extractGeminiResponse("{")).toBeNull();
});
it("ignores unmatched quotes in noisy output before the JSON object", () => {
expect(extractGeminiResponse('debug: model said "hello\n{"response":"ok"}')).toBe("ok");
});
it("ignores braces inside quoted noisy output", () => {
expect(extractGeminiResponse('debug: "hello { world" {"response":"ok"}')).toBe("ok");
});
it("ignores shell-quoted JSON-like noisy output", () => {
expect(extractGeminiResponse('debug: \'{"response":"fake"}\'')).toBeNull();
});
it("does not treat apostrophes inside noisy words as quote delimiters", () => {
expect(extractGeminiResponse('debug: it\'s done {"response":"ok"}')).toBe("ok");
});
it("resynchronizes after an unmatched brace in noisy output", () => {
expect(extractGeminiResponse('debug: generated {\n{"response":"ok"}')).toBe("ok");
});
it("preserves brace-heavy response text", () => {
const response = "{".repeat(33);
expect(extractGeminiResponse(JSON.stringify({ response }))).toBe(response);
});
it("extracts pretty-printed JSON output", () => {
expect(
extractGeminiResponse(
JSON.stringify(
{
response: "pretty response",
usage: { inputTokens: 12 },
},
null,
2,
),
),
).toBe("pretty response");
});
it("preserves pretty-printed object elements inside arrays", () => {
expect(
extractGeminiResponse(
JSON.stringify(
{
response: "array response",
items: [{ id: 1 }, { id: 2 }],
},
null,
2,
),
),
).toBe("array response");
});
it("does not accept an inner response from a malformed trailing object", () => {
expect(extractGeminiResponse('{"response":"good"} {"meta":{"response":"bad"} broken}')).toBe(
"good",
);
expect(extractGeminiResponse('{"response":"good"} {"meta":{"response":"bad"}')).toBe("good");
});
it("ignores a nested response inside an unfinished outer object", () => {
expect(extractGeminiResponse('noise {"meta":{"response":"bad"}')).toBeNull();
});
it("does not promote a child from a malformed outer object", () => {
expect(extractGeminiResponse('{"response":"good"} {"meta" {"response":"bad"}}')).toBe("good");
expect(extractGeminiResponse('noise {broken {"response":"bad"}}')).toBeNull();
expect(extractGeminiResponse('{"response":"good"}\nnoise {broken\n{"response":"bad"}}')).toBe(
"good",
);
});
});

View File

@@ -3,119 +3,16 @@
/** Parse the last JSON object in a noisy provider output string. */
function extractLastJsonObject(raw: string): unknown {
const trimmed = raw.trim();
const ranges: Array<{ end: number; start: number }> = [];
const starts: number[] = [];
let inString = false;
let escaped = false;
let preambleQuote: string | undefined;
let preambleEscaped = false;
let previousSignificant: string | undefined;
let lineHasNonWhitespace = false;
let arrayDepth = 0;
let candidateHasContent = false;
for (let index = 0; index < trimmed.length; index += 1) {
const character = trimmed[index];
if (inString) {
if (character === "\n" || character === "\r") {
starts.length = 0;
inString = false;
escaped = false;
} else if (escaped) {
escaped = false;
} else if (character === "\\") {
escaped = true;
} else if (character === '"') {
inString = false;
}
continue;
}
if (starts.length === 0) {
if (preambleQuote !== undefined) {
if (character === "\n" || character === "\r") {
preambleQuote = undefined;
preambleEscaped = false;
} else if (preambleEscaped) {
preambleEscaped = false;
} else if (character === "\\") {
preambleEscaped = true;
} else if (character === preambleQuote) {
preambleQuote = undefined;
}
continue;
}
if (character === '"' || character === "'" || character === "`") {
const previous = trimmed[index - 1];
if (previous === undefined || /[\s:([{]/.test(previous)) {
preambleQuote = character;
preambleEscaped = false;
continue;
}
}
if (character === "{") {
arrayDepth = 0;
candidateHasContent = false;
starts.push(index);
}
if (!/\s/.test(character)) {
previousSignificant = character;
lineHasNonWhitespace = true;
} else if (character === "\n" || character === "\r") {
lineHasNonWhitespace = false;
}
continue;
}
const hadCandidateContent = candidateHasContent;
if (character === '"') {
inString = true;
} else if (character === "{") {
if (
previousSignificant === ":" ||
previousSignificant === "[" ||
previousSignificant === '"' ||
(previousSignificant === "," && (lineHasNonWhitespace || arrayDepth > 0))
) {
starts.push(index);
} else if (!lineHasNonWhitespace && !hadCandidateContent) {
// Only resync at a clean record boundary; otherwise keep malformed
// outer objects from promoting diagnostic payloads as valid results.
starts.length = 1;
starts[0] = index;
arrayDepth = 0;
candidateHasContent = false;
}
} else if (character === "}" && starts.length > 0) {
const start = starts.pop();
if (start !== undefined && starts.length === 0) {
ranges.push({ start, end: index });
}
} else if (character === "[") {
arrayDepth += 1;
} else if (character === "]" && arrayDepth > 0) {
arrayDepth -= 1;
}
if (!/\s/.test(character)) {
candidateHasContent = true;
previousSignificant = character;
lineHasNonWhitespace = true;
} else if (character === "\n" || character === "\r") {
lineHasNonWhitespace = false;
}
const start = trimmed.lastIndexOf("{");
if (start === -1) {
return null;
}
for (let index = ranges.length - 1; index >= 0; index -= 1) {
const range = ranges[index];
try {
return JSON.parse(trimmed.slice(range.start, range.end + 1));
} catch {
// Ignore malformed objects and try the previous completed range.
}
const slice = trimmed.slice(start);
try {
return JSON.parse(slice);
} catch {
return null;
}
return null;
}
/** Extract Gemini CLI-style response text from the last JSON object in output. */

View File

@@ -108,7 +108,7 @@ flow:
- lambda:
params: [text]
expr: "config.expectedReplyGroups.every((group) => group.some((needle) => normalizeLowercaseStringOrEmpty(text).includes(needle)))"
- expr: "30000"
- expr: "env.providerMode === 'mock-openai' ? 10000 : 30000"
- expr: "env.providerMode === 'mock-openai' ? 100 : 250"
- if:
expr: "Boolean(env.mock)"
@@ -240,11 +240,7 @@ flow:
message:
expr: "lastError instanceof Error ? formatErrorMessage(lastError) : String(lastError ?? 'fanout retry exhausted')"
- if:
# Codex completes child sessions through its app-server path but
# does not relay the child marker back onto the parent QA channel.
# The shared assertions above already prove both child tool calls
# and child session rows; keep this transport-only proof OpenClaw-specific.
expr: "Boolean(env.mock) && env.gateway.runtimeEnv.OPENCLAW_QA_FORCE_RUNTIME !== 'codex'"
expr: "Boolean(env.mock)"
then:
- forEach:
items:
@@ -257,5 +253,5 @@ flow:
- lambda:
params: [candidate]
expr: "String(candidate.text ?? '').trim() === childCompletionMarker"
- 30000
- 10000
detailsExpr: "details"

View File

@@ -26,9 +26,7 @@ scenario:
config:
sessionKey: agent:qa:long-context-cache-stability
fixtureFile: large-cache-fixture.txt
cacheEvidenceNeedle: CACHE-FIXTURE-0050
cacheEvidenceLine: "CACHE-FIXTURE-0050: stable tool-result evidence for prompt-cache reuse across long sessions."
followupPromptNeedle: Using the already-read
cacheEvidenceNeedle: CACHE-FIXTURE-0550
warmupMarker: QA-LARGE-CACHE-WARMUP-OK
hitMarker: QA-LARGE-CACHE-HIT-OK
@@ -86,17 +84,8 @@ flow:
- set: debugRequests
value:
expr: "env.mock ? [...(await fetchJson(`${env.mock.baseUrl}/debug/requests`))] : []"
- set: cappedReadOutputIndex
value:
expr: "debugRequests.reduce((found, planned, index) => { if (found >= 0 || !planned.plannedToolCallId || planned.plannedToolName !== 'read' || planned.plannedToolArgs?.path !== config.fixtureFile) return found; const outputOffset = debugRequests.slice(index + 1).findIndex((candidate) => Boolean(candidate.toolOutputCallId) && candidate.toolOutputCallId === planned.plannedToolCallId); if (outputOffset < 0) return found; const output = debugRequests[index + 1 + outputOffset]; const evidence = [planned.allInputText, output.allInputText, output.toolOutput].filter((value) => typeof value === 'string').join('\\n'); const hasCodexFormattedTruncation = evidence.includes('Warning: truncated output') && (evidence.includes('chars truncated') || evidence.includes('tokens truncated')); return evidence.includes(config.cacheEvidenceLine) && (evidence.includes('[Read output capped at 50KB') || evidence.includes('...(OpenClaw truncated dynamic tool result') || evidence.includes('...(truncated)...') || hasCodexFormattedTruncation) ? index + 1 + outputOffset : found; }, -1)"
- set: hasCappedReadEvidence
value:
expr: "cappedReadOutputIndex >= 0"
- set: hasFollowupCacheEvidence
value:
expr: "cappedReadOutputIndex >= 0 && debugRequests.some((request, index) => index > cappedReadOutputIndex && String(request.prompt ?? '').includes(config.followupPromptNeedle) && String(request.allInputText ?? '').includes(config.cacheEvidenceLine))"
- assert:
expr: "!env.mock || (hasCappedReadEvidence && hasFollowupCacheEvidence)"
expr: "!env.mock || debugRequests.some((request, index) => request.plannedToolName === 'read' && request.plannedToolArgs?.path === config.fixtureFile && typeof request.plannedToolCallId === 'string' && debugRequests.slice(index + 1).some((result, resultOffset) => result.toolOutputCallId === request.plannedToolCallId && String(result.toolOutput ?? '').includes(config.cacheEvidenceNeedle) && (String(result.toolOutput ?? '').includes('[Read output capped at 50KB') || (String(result.toolOutput ?? '').includes('...(truncated)...') && String(result.toolOutput ?? '').length <= 13000)) && debugRequests.slice(index + resultOffset + 2).some((followup) => followup.plannedToolName === 'read' && followup.plannedToolArgs?.path === config.fixtureFile && String(followup.allInputText ?? '').includes(config.cacheEvidenceNeedle) && (String(followup.allInputText ?? '').includes('[Read output capped at 50KB') || String(followup.allInputText ?? '').includes('...(truncated)...')))))"
message:
expr: "`large capped read cache evidence was not observed: ${JSON.stringify({ hasCappedReadEvidence, hasFollowupCacheEvidence, requests: debugRequests.slice(-8).map((request) => ({ prompt: request.prompt ?? null, plannedToolName: request.plannedToolName ?? null, plannedToolArgs: request.plannedToolArgs ?? null, plannedToolCallId: request.plannedToolCallId ?? null, toolOutputCallId: request.toolOutputCallId ?? null, toolOutputLength: String(request.toolOutput ?? '').length, outputHasReadCap: String(request.toolOutput ?? '').includes('[Read output capped at 50KB'), outputHasCodexTruncation: String(request.toolOutput ?? '').includes('...(truncated)...'), inputHasEvidenceLine: String(request.allInputText ?? '').includes(config.cacheEvidenceLine) })) })}`"
expr: "`large capped read tool result was not observed: ${JSON.stringify(debugRequests.slice(-8).map((request) => ({ plannedToolName: request.plannedToolName ?? null, plannedToolArgs: request.plannedToolArgs ?? null, plannedToolCallId: request.plannedToolCallId ?? null, toolOutputCallId: request.toolOutputCallId ?? null, toolOutputLength: String(request.toolOutput ?? '').length, toolOutputHasNeedle: String(request.toolOutput ?? '').includes(config.cacheEvidenceNeedle), toolOutputHasReadCap: String(request.toolOutput ?? '').includes('[Read output capped at 50KB'), toolOutputHasCodexTruncation: String(request.toolOutput ?? '').includes('...(truncated)...'), inputHasNeedle: String(request.allInputText ?? '').includes(config.cacheEvidenceNeedle), inputHasReadCap: String(request.allInputText ?? '').includes('[Read output capped at 50KB'), inputHasCodexTruncation: String(request.allInputText ?? '').includes('...(truncated)...') })))}`"
detailsExpr: "outbound?.text ?? config.hitMarker"

View File

@@ -25,35 +25,10 @@ PROBE_ATTEMPT_TIMEOUT_MS="$(
PROBE_MAX_BODY_BYTES="$(
openclaw_e2e_read_positive_int_env OPENCLAW_UPGRADE_SURVIVOR_PROBE_MAX_BODY_BYTES 1048576
)"
ROOT_MANAGED_VPS="${OPENCLAW_UPGRADE_SURVIVOR_ROOT_MANAGED_VPS:-0}"
resolve_lane_artifact_suffix() {
if [ -n "${OPENCLAW_DOCKER_ALL_LANE_NAME:-}" ]; then
printf "%s" "$OPENCLAW_DOCKER_ALL_LANE_NAME"
return
fi
if [ "$ROOT_MANAGED_VPS" = "1" ]; then
printf "root-managed-vps-upgrade"
elif [ "$UPDATE_RESTART_MODE" = "auto-auth" ]; then
printf "update-restart-auth"
elif [ "${OPENCLAW_UPGRADE_SURVIVOR_PUBLISHED_BASELINE:-0}" = "1" ]; then
printf "published-upgrade-survivor"
else
printf "upgrade-survivor"
fi
if [ -n "${BASELINE_SPEC// }" ]; then
printf -- "-%s" "$BASELINE_SPEC"
fi
if [ "$SCENARIO" != "base" ]; then
printf -- "-%s" "$SCENARIO"
fi
}
LANE_ARTIFACT_SUFFIX="$(resolve_lane_artifact_suffix)"
LANE_ARTIFACT_SUFFIX="${OPENCLAW_DOCKER_ALL_LANE_NAME:-default}"
LANE_ARTIFACT_SUFFIX="${LANE_ARTIFACT_SUFFIX//[^A-Za-z0-9_.-]/_}"
ARTIFACT_DIR="${OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_DIR:-$ROOT_DIR/.artifacts/upgrade-survivor/$LANE_ARTIFACT_SUFFIX}"
ROOT_MANAGED_VPS="${OPENCLAW_UPGRADE_SURVIVOR_ROOT_MANAGED_VPS:-0}"
DOCKER_RUN_USER_ARGS=()
PROBE_ENV_ARGS=(
-e OPENCLAW_UPGRADE_SURVIVOR_PROBE_TIMEOUT_MS="$PROBE_TIMEOUT_MS"

View File

@@ -202,8 +202,8 @@ let publicDeprecatedExportsByEntrypointBudget;
try {
budgets = {
publicEntrypoints: readBudgetEnv("OPENCLAW_PLUGIN_SDK_MAX_PUBLIC_ENTRYPOINTS", 322),
publicExports: readBudgetEnv("OPENCLAW_PLUGIN_SDK_MAX_PUBLIC_EXPORTS", 10382),
publicFunctionExports: readBudgetEnv("OPENCLAW_PLUGIN_SDK_MAX_PUBLIC_FUNCTION_EXPORTS", 5211),
publicExports: readBudgetEnv("OPENCLAW_PLUGIN_SDK_MAX_PUBLIC_EXPORTS", 10386),
publicFunctionExports: readBudgetEnv("OPENCLAW_PLUGIN_SDK_MAX_PUBLIC_FUNCTION_EXPORTS", 5215),
publicDeprecatedExports: readBudgetEnv(
"OPENCLAW_PLUGIN_SDK_MAX_PUBLIC_DEPRECATED_EXPORTS",
3247,

View File

@@ -73,9 +73,7 @@ describe("isDeliveredMessagingToolResult", () => {
result: [{ type: "text", text: JSON.stringify({ result: { messageId: "msg-1" } }) }],
}),
).toBe(true);
expect(isDeliveredMessagingToolResult({ result: { content: [{ text: "sent" }] } })).toBe(
true,
);
expect(isDeliveredMessagingToolResult({ result: { content: [{ text: "sent" }] } })).toBe(true);
expect(isDeliveredMessagingToolResult({ result: { status: "sent" } })).toBe(true);
});
@@ -334,4 +332,47 @@ describe("isDeliveredMessageToolOnlySourceReplyResult", () => {
}),
).toBe(false);
});
it("accepts confirmed explicit routes when the caller verified the source route", () => {
expect(
isDeliveredMessageToolOnlySourceReplyResult({
sourceReplyDeliveryMode: "message_tool_only",
toolName: "message",
args: {
action: "reply",
channel: "imessage",
target: "+12069106512",
message: "reply",
},
result: { ok: true, messageId: "imessage-853" },
allowExplicitSourceRoute: true,
}),
).toBe(true);
expect(
isDeliveredMessageToolOnlySourceReplyResult({
sourceReplyDeliveryMode: "message_tool_only",
toolName: "message",
args: {
action: "reply",
channel: "imessage",
target: "+12069106512",
message: "reply",
},
result: { ok: true, messageId: "imessage-853" },
}),
).toBe(false);
expect(
isDeliveredMessageToolOnlySourceReplyResult({
sourceReplyDeliveryMode: "message_tool_only",
toolName: "message",
args: {
action: "react",
channel: "imessage",
target: "+12069106512",
},
result: { ok: true },
allowExplicitSourceRoute: true,
}),
).toBe(false);
});
});

View File

@@ -50,6 +50,13 @@ function hasExplicitMessageRoute(args: Record<string, unknown>): boolean {
return Array.isArray(args.targets) && args.targets.some((value) => hasStringValue(value));
}
function isMessageToolSourceReplyActionName(action: unknown): boolean {
if (isMessageToolSendActionName(action)) {
return true;
}
return typeof action === "string" && action.trim().toLowerCase() === "reply";
}
function normalizeStatus(value: unknown): string | undefined {
return typeof value === "string" ? value.trim().toLowerCase() : undefined;
}
@@ -547,6 +554,7 @@ export function isDeliveredMessageToolOnlySourceReplyResult(params: {
result?: unknown;
hookResult?: unknown;
isError?: boolean;
allowExplicitSourceRoute?: boolean;
}): boolean {
if (params.sourceReplyDeliveryMode !== "message_tool_only") {
return false;
@@ -555,7 +563,12 @@ export function isDeliveredMessageToolOnlySourceReplyResult(params: {
return false;
}
const args = asRecord(params.args);
if (!isMessageToolSendActionName(args.action) || hasExplicitMessageRoute(args)) {
const sourceRouteReplyAction =
params.allowExplicitSourceRoute === true && isMessageToolSourceReplyActionName(args.action);
if (!isMessageToolSendActionName(args.action) && !sourceRouteReplyAction) {
return false;
}
if (hasExplicitMessageRoute(args) && params.allowExplicitSourceRoute !== true) {
return false;
}
return isDeliveredMessagingToolResult(params);

View File

@@ -641,7 +641,7 @@ async function runEmbeddedAgentInternal(
...paramsBase,
agentId: paramsBase.agentId ?? runSessionTarget.agentId,
sessionId: runSessionTarget.sessionId,
sessionKey: normalizeOptionalString(effectiveSessionKey ?? runSessionTarget.sessionKey),
sessionKey: effectiveSessionKey ?? runSessionTarget.sessionKey,
sessionFile: runSessionTarget.sessionFile,
};
const sessionLane = resolveSessionLane(params.sessionKey?.trim() || params.sessionId);

View File

@@ -28,6 +28,10 @@ import { truncateUtf16Safe } from "../utils.js";
export const TOOL_PROGRESS_OUTPUT_MAX_CHARS = 8_000;
export { FAST_MODE_AUTO_PROGRESS_KIND } from "../auto-reply/reply-payload.js";
export {
isDeliveredMessageToolOnlySourceReplyResult,
isDeliveredMessagingToolResult,
} from "../agents/embedded-agent-message-tool-source-reply.js";
export { formatFastModeAutoProgressText, resolveFastModeForElapsed } from "../shared/fast-mode.js";
export type { AgentMessage } from "../agents/runtime/index.js";
export type { FastModeAutoProgressState } from "../shared/fast-mode.js";

View File

@@ -46,13 +46,6 @@ describe("estimateStringChars", () => {
expect(estimateStringChars("안녕하세요")).toBe(20);
});
it("handles East Asian fullwidth letters, numbers, and punctuation", () => {
expect(estimateStringChars("")).toBe(6 * CHARS_PER_TOKEN_ESTIMATE);
expect(estimateStringChars("helloworld")).toBe(
"helloworld".length + CHARS_PER_TOKEN_ESTIMATE,
);
});
it("handles CJK punctuation and symbols in the extended range", () => {
// "⺀" (U+2E80) is in CJK Radicals Supplement range
expect(estimateStringChars("⺀")).toBe(CHARS_PER_TOKEN_ESTIMATE);

View File

@@ -20,10 +20,9 @@ export const CHARS_PER_TOKEN_ESTIMATE = 4;
/**
* Matches CJK Unified Ideographs, CJK Extension A/B, CJK Compatibility
* Ideographs, Hangul Syllables, Hiragana, Katakana, and other non-Latin
* scripts and East Asian fullwidth forms that typically use ~1 token per character.
* scripts that typically use ~1 token per character.
*/
const NON_LATIN_RE =
/[\u2E80-\u9FFF\uA000-\uA4FF\uAC00-\uD7AF\uF900-\uFAFF\uFF01-\uFF60\uFFE0-\uFFE6\u{20000}-\u{2FA1F}]/gu;
const NON_LATIN_RE = /[\u2E80-\u9FFF\uA000-\uA4FF\uAC00-\uD7AF\uF900-\uFAFF\u{20000}-\u{2FA1F}]/gu;
/**
* Return an adjusted character length that accounts for non-Latin (CJK, etc.)