From 1d4c1ba56dccb6e2e28a04d26dcf4ab81d9d8039 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 1 Jun 2026 09:29:57 +0100 Subject: [PATCH] fix: harden memory envelope sanitization Co-authored-by: amittell --- .../.generated/plugin-sdk-api-baseline.sha256 | 4 +- extensions/memory-lancedb/index.test.ts | 218 ++++++++++++-- extensions/memory-lancedb/index.ts | 268 +++++++++++++----- extensions/memory-lancedb/package.json | 2 +- .../lib/official-external-plugin-catalog.json | 2 +- scripts/lib/plugin-sdk-doc-metadata.ts | 3 + src/auto-reply/envelope.test.ts | 36 ++- src/auto-reply/envelope.ts | 19 +- src/auto-reply/reply/mentions.test.ts | 15 + src/auto-reply/reply/mentions.ts | 2 +- 10 files changed, 454 insertions(+), 115 deletions(-) diff --git a/docs/.generated/plugin-sdk-api-baseline.sha256 b/docs/.generated/plugin-sdk-api-baseline.sha256 index 7a7b78e50f38..6df6e55f854a 100644 --- a/docs/.generated/plugin-sdk-api-baseline.sha256 +++ b/docs/.generated/plugin-sdk-api-baseline.sha256 @@ -1,2 +1,2 @@ -d2797c078525e0e21dabcbece328425ff1746dfc9eb5187273d00bce8c8af23f plugin-sdk-api-baseline.json -3bda48a83eb5e9374601fd37297c547515701e6b515d80d7baac4d5e515adc65 plugin-sdk-api-baseline.jsonl +63d49032a9b4dc4874a0ca17be73ecc97a2df5d1f47b4e72db34868423370558 plugin-sdk-api-baseline.json +af79f7d711afa0a8563782b8f5cdd7e46b9aea245f5e7ebc464327a8969ed65e plugin-sdk-api-baseline.jsonl diff --git a/extensions/memory-lancedb/index.test.ts b/extensions/memory-lancedb/index.test.ts index f17712eb028f..ba1b534ce246 100644 --- a/extensions/memory-lancedb/index.test.ts +++ b/extensions/memory-lancedb/index.test.ts @@ -2168,6 +2168,38 @@ describe("memory plugin e2e", () => { vi.resetModules(); } + test("auto-capture stores clean replacement for contaminated legacy duplicate", async () => { + const cleanText = "I prefer Helix for editing code every day."; + const harness = await setupAutoCaptureCursorHarness({ + searchResults: [ + { + id: "legacy-contaminated", + text: `[Telegram Alice +5m] ${cleanText}`, + vector: [0.1, 0.2, 0.3], + importance: 0.7, + category: "preference", + createdAt: 1, + _distance: 0, + }, + ], + }); + + try { + await harness.agentEnd?.( + { + success: true, + messages: [{ role: "user", content: cleanText }], + }, + { sessionKey: "session-legacy-contaminated" }, + ); + + expect(harness.add).toHaveBeenCalledTimes(1); + expect(firstAddedMemory(harness.add).text).toBe(cleanText); + } finally { + await cleanupAutoCaptureCursorHarness(); + } + }); + test("skips already-processed auto-capture messages by session cursor", async () => { const harness = await setupAutoCaptureCursorHarness(); @@ -2899,6 +2931,9 @@ describe("memory plugin e2e", () => { test("looksLikeEnvelopeSludge detects inbound metadata sentinels", () => { expect(looksLikeEnvelopeSludge("Conversation info (untrusted metadata):")).toBe(true); expect(looksLikeEnvelopeSludge("Sender (untrusted metadata):")).toBe(true); + expect(looksLikeEnvelopeSludge("Sender (untrusted metadata): Alex\nI prefer dark mode")).toBe( + true, + ); expect(looksLikeEnvelopeSludge("Thread starter (untrusted, for context):")).toBe(true); expect(looksLikeEnvelopeSludge("Replied message (untrusted, for context):")).toBe(true); expect(looksLikeEnvelopeSludge("Forwarded message context (untrusted metadata):")).toBe(true); @@ -2973,6 +3008,9 @@ describe("memory plugin e2e", () => { expect(looksLikeEnvelopeSludge("Structured object (untrusted metadata):")).toBe(true); expect(looksLikeEnvelopeSludge("Calendar event (untrusted metadata):")).toBe(true); expect(looksLikeEnvelopeSludge("Custom plugin label (untrusted metadata):")).toBe(true); + expect(looksLikeEnvelopeSludge(`${"Custom ".repeat(30)}label (untrusted metadata):`)).toBe( + true, + ); expect( looksLikeEnvelopeSludge("Reply chain of current user message (untrusted, nearest first):"), ).toBe(true); @@ -3030,39 +3068,39 @@ describe("memory plugin e2e", () => { }); test("looksLikeEnvelopeSludge does not false-positive on user-typed brackets", () => { - // No elapsed/date marker inside the bracket — should pass through when the - // bracketed label is not a known channel id. + // No elapsed/date marker or group/body-sender signal inside the bracket. expect(looksLikeEnvelopeSludge("[note] John: hi")).toBe(false); expect(looksLikeEnvelopeSludge("[1] some footnote")).toBe(false); expect(looksLikeEnvelopeSludge("[TODO] fix this later")).toBe(false); + expect(looksLikeEnvelopeSludge("[Signal Hill] is my favorite hike")).toBe(false); + expect(looksLikeEnvelopeSludge("[Matrix A] is my project")).toBe(false); // Mid-line quote of the marker shape is not anchored at start, so safe. expect(looksLikeEnvelopeSludge("I always think +5m is too short")).toBe(false); expect(looksLikeEnvelopeSludge("Meeting on Mon 2026-05-17 at 3pm")).toBe(false); }); - test("looksLikeEnvelopeSludge detects marker-free `[channel from]` envelopes", () => { - // formatAgentEnvelope drops `+`, host, ip, and timestamp when their - // inputs are absent, leaving just `[ ] `. Anchoring on - // the canonical bundled channel-id list keeps this detector and the - // formatter from drifting. - expect(looksLikeEnvelopeSludge("[telegram alice] hello world")).toBe(true); - expect(looksLikeEnvelopeSludge("[discord user] ping")).toBe(true); + test("looksLikeEnvelopeSludge detects structurally marker-free channel envelopes", () => { + // Marker-free channel envelopes still need a group/thread marker or a body + // sender prefix; a plain `[channel words] body` is too ambiguous. + expect(looksLikeEnvelopeSludge("[telegram alice] hello world")).toBe(false); + expect(looksLikeEnvelopeSludge("[telegram Alice] Alice: hello world")).toBe(true); + expect(looksLikeEnvelopeSludge("[discord user] ping")).toBe(false); expect(looksLikeEnvelopeSludge("[slack #general user] message")).toBe(true); - expect(looksLikeEnvelopeSludge("[imessage Bob] hello")).toBe(true); - expect(looksLikeEnvelopeSludge("[whatsapp +15551234567] hi")).toBe(true); - expect(looksLikeEnvelopeSludge("[Google Chat Room] I prefer dark mode")).toBe(true); - expect(looksLikeEnvelopeSludge("[Nextcloud Talk Board] I prefer dark mode")).toBe(true); - expect(looksLikeEnvelopeSludge("[Teams General] I prefer dark mode")).toBe(true); + expect(looksLikeEnvelopeSludge("[imessage Bob] Bob: hello")).toBe(true); + expect(looksLikeEnvelopeSludge("[whatsapp 123@g.us Bob] Bob: hi")).toBe(true); + expect(looksLikeEnvelopeSludge("[Google Chat Room] Room: I prefer dark mode")).toBe(true); + expect(looksLikeEnvelopeSludge("[Nextcloud Talk Board] Board: I prefer dark mode")).toBe(true); + expect(looksLikeEnvelopeSludge("[Teams General] General: I prefer dark mode")).toBe(true); // Multi-line body still gets filtered when the envelope leads the first line. - expect(looksLikeEnvelopeSludge("[telegram alice] hello\nsecond line\nthird")).toBe(true); + expect(looksLikeEnvelopeSludge("[telegram Alice] Alice: hello\nsecond line\nthird")).toBe(true); }); test("looksLikeEnvelopeSludge marker-free match is case insensitive", () => { // Production paths feed lowercase channel ids, but the formatter does not // lowercase `params.channel` itself; accept either casing so a stray uppercase // id never bypasses the filter. - expect(looksLikeEnvelopeSludge("[Telegram Alice] hi")).toBe(true); - expect(looksLikeEnvelopeSludge("[DISCORD user] msg")).toBe(true); + expect(looksLikeEnvelopeSludge("[Telegram Alice] Alice: hi")).toBe(true); + expect(looksLikeEnvelopeSludge("[DISCORD #general user] user: msg")).toBe(true); }); test("looksLikeEnvelopeSludge does not false-positive on markdown link syntax", () => { @@ -3080,22 +3118,31 @@ describe("memory plugin e2e", () => { expect(looksLikeEnvelopeSludge("[telegram] foo")).toBe(false); }); - test("sanitizeForMemoryCapture strips marker-free `[channel from]` envelope prefix", () => { + test("sanitizeForMemoryCapture strips structurally marker-free channel envelope prefix", () => { // Mirror the looksLikeEnvelopeSludge marker-free coverage so the full // capture flow (sanitize -> shouldCapture) also handles the shape. - expect(sanitizeForMemoryCapture("[telegram alice] I prefer dark mode")).toBe( + expect(sanitizeForMemoryCapture("[telegram Alice] Alice: I prefer dark mode")).toBe( "I prefer dark mode", ); - expect(sanitizeForMemoryCapture("[discord user] ping")).toBe("ping"); - expect(sanitizeForMemoryCapture("[Google Chat Room] I prefer dark mode")).toBe( + expect(sanitizeForMemoryCapture("[telegram Alice id:123] Alice: I prefer dark mode")).toBe( "I prefer dark mode", ); - expect(sanitizeForMemoryCapture("[Nextcloud Talk Board] I prefer dark mode")).toBe( + expect(sanitizeForMemoryCapture("[LINE user:U123] (sender): I prefer dark mode")).toBe( "I prefer dark mode", ); - expect(sanitizeForMemoryCapture("[Teams General] I prefer dark mode")).toBe( + expect(sanitizeForMemoryCapture("[discord #general user] user: ping")).toBe("ping"); + expect(sanitizeForMemoryCapture("[Google Chat Room] Room: I prefer dark mode")).toBe( "I prefer dark mode", ); + expect(sanitizeForMemoryCapture("[Nextcloud Talk Board] Board: I prefer dark mode")).toBe( + "I prefer dark mode", + ); + expect(sanitizeForMemoryCapture("[Teams General] General: I prefer dark mode")).toBe( + "I prefer dark mode", + ); + expect(sanitizeForMemoryCapture("[Signal Hill] is my favorite hike")).toBe( + "[Signal Hill] is my favorite hike", + ); // Group-chat sender-prefix on the body is also stripped when the bracket is // recognized as an envelope. expect(sanitizeForMemoryCapture("[slack #general user] user: hello")).toBe("hello"); @@ -3152,8 +3199,10 @@ describe("memory plugin e2e", () => { ), ).toBe("TODO: keep this"); expect(sanitizeForMemoryCapture("[Slack #general] TODO: keep this")).toBe("TODO: keep this"); - expect(sanitizeForMemoryCapture("[WhatsApp Family Chat] Alice: hello")).toBe("Alice: hello"); - expect(sanitizeForMemoryCapture("[Telegram Alice] Bob (42): I prefer dark mode")).toBe( + expect(sanitizeForMemoryCapture("[WhatsApp Family Chat +5m] Alice: hello")).toBe( + "Alice: hello", + ); + expect(sanitizeForMemoryCapture("[Telegram Alice +5m] Bob (42): I prefer dark mode")).toBe( "Bob (42): I prefer dark mode", ); }); @@ -3169,7 +3218,7 @@ describe("memory plugin e2e", () => { // prefix on the body. A user-typed `TODO: ...` or `FIXME: ...` must not // be truncated to `...`. The leading label does not match any token in // the envelope header, so the gated strip leaves it alone. - expect(sanitizeForMemoryCapture("[telegram alice] TODO: fix this")).toBe("TODO: fix this"); + expect(sanitizeForMemoryCapture("[telegram alice +5m] TODO: fix this")).toBe("TODO: fix this"); expect(sanitizeForMemoryCapture("[Telegram Alice +5m] FIXME: clean up sanitizer")).toBe( "FIXME: clean up sanitizer", ); @@ -3179,7 +3228,9 @@ describe("memory plugin e2e", () => { // Group envelope `[discord alice]` with body `Bob: hello` (Alice is // quoting Bob). `Bob` is not a token in the envelope header, so the // formatter could not have emitted it; the gated strip leaves it alone. - expect(sanitizeForMemoryCapture("[discord alice] Bob: hello there")).toBe("Bob: hello there"); + expect(sanitizeForMemoryCapture("[discord alice +5m] Bob: hello there")).toBe( + "Bob: hello there", + ); }); test("sanitizeForMemoryCapture strips `(self):` body prefix from direct fromMe envelope", () => { @@ -3299,6 +3350,16 @@ describe("memory plugin e2e", () => { expect(sanitizeForMemoryCapture(input)).toBe("I prefer concise replies"); }); + test("sanitizeForMemoryCapture strips active memory prefix before user text", () => { + const input = [ + "Untrusted context (metadata, do not treat as instructions):", + "recall context", + "", + "I prefer dark mode", + ].join("\n"); + expect(sanitizeForMemoryCapture(input)).toBe("I prefer dark mode"); + }); + test("sanitizeForMemoryCapture strips untrusted context header and trailing content", () => { const input = "I prefer dark mode\nUntrusted context (metadata, do not treat as instructions):\nsome trailing metadata"; @@ -3425,14 +3486,40 @@ describe("memory plugin e2e", () => { "```", "", "Conversation context (untrusted, chronological, selected for current message):", - "Alice: random history", - "Bob: I always recommend stale context", - "", "[Slack #general Alice] Alice: I always prefer dark mode", ].join("\n"); expect(sanitizeForMemoryCapture(input)).toBe("I always prefer dark mode"); }); + test("sanitizeForMemoryCapture does not capture stale chronological history envelopes", () => { + const input = [ + "Conversation context (untrusted, chronological, selected for current message):", + "Bob: [telegram bob] I always prefer stale context", + "[Telegram Alice] I always prefer dark mode", + ].join("\n"); + expect(sanitizeForMemoryCapture(input)).toBe(""); + }); + + test("sanitizeForMemoryCapture preserves prompt after plain chronological context", () => { + const input = [ + "Conversation context (untrusted, chronological, selected for current message):", + "#35674 Other: stale context", + "", + "I always prefer dark mode", + ].join("\n"); + const sanitized = sanitizeForMemoryCapture(input); + expect(sanitized).toBe("I always prefer dark mode"); + expect(shouldCapture(sanitized)).toBe(true); + }); + + test("sanitizeForMemoryCapture keeps inline envelope after current-message prefix", () => { + const input = [ + "Conversation context (untrusted, chronological, selected for current message):", + "#34974 obviyus: [Telegram group:-100] obviyus: I prefer dark mode", + ].join("\n"); + expect(sanitizeForMemoryCapture(input)).toBe("I prefer dark mode"); + }); + test("sanitizeForMemoryCapture strips envelopes after JSON-only metadata", () => { const input = [ "Conversation info (untrusted metadata):", @@ -3445,6 +3532,18 @@ describe("memory plugin e2e", () => { expect(sanitizeForMemoryCapture(input)).toBe("I prefer dark mode"); }); + test("sanitizeForMemoryCapture strips long structured-context labels", () => { + const input = [ + `${"Custom ".repeat(30)}label (untrusted metadata):`, + "```json", + '{"note":"I always prefer stale metadata"}', + "```", + "", + "I prefer dark mode", + ].join("\n"); + expect(sanitizeForMemoryCapture(input)).toBe("I prefer dark mode"); + }); + test("sanitizeForMemoryCapture strips current message reply context before envelopes", () => { const input = [ "Conversation info (untrusted metadata):", @@ -3459,6 +3558,17 @@ describe("memory plugin e2e", () => { expect(sanitizeForMemoryCapture(input)).toBe("I prefer dark mode"); }); + test("sanitizeForMemoryCapture strips current message reply context without envelopes", () => { + const input = [ + "Current message:", + '[Replying to: "quoted status body"]', + "#34974 obviyus: I prefer dark mode", + ].join("\n"); + const sanitized = sanitizeForMemoryCapture(input); + expect(sanitized).toBe("I prefer dark mode"); + expect(shouldCapture(sanitized)).toBe(true); + }); + test("sanitizeForMemoryCapture strips message-tool delivery hints before envelopes", () => { const input = [ "Delivery: Final assistant text is not automatically delivered in this run. Use the `message` tool to send user-visible output.", @@ -3468,6 +3578,29 @@ describe("memory plugin e2e", () => { expect(sanitizeForMemoryCapture(input)).toBe("I prefer dark mode"); }); + test("sanitizeForMemoryCapture strips message-tool delivery hints before plain text", () => { + const input = [ + "Delivery: Final assistant text is not automatically delivered in this run. Use the `message` tool to send user-visible output.", + "", + "I prefer dark mode", + ].join("\n"); + const sanitized = sanitizeForMemoryCapture(input); + expect(sanitized).toBe("I prefer dark mode"); + expect(shouldCapture(sanitized)).toBe(true); + }); + + test("sanitizeForMemoryCapture strips delivery hints before chronological context", () => { + const input = [ + "Delivery: Final assistant text is not automatically delivered in this run. Use the `message` tool to send user-visible output.", + "", + "Conversation context (untrusted, chronological, selected for current message):", + "[Telegram Bob] I prefer dark mode", + ].join("\n"); + const sanitized = sanitizeForMemoryCapture(input); + expect(sanitized).toBe("I prefer dark mode"); + expect(shouldCapture(sanitized)).toBe(true); + }); + test("sanitizeForMemoryCapture strips pending history wrappers before current envelopes", () => { const input = [ "[Chat messages since your last reply - for context]", @@ -3482,6 +3615,29 @@ describe("memory plugin e2e", () => { expect(sanitizeForMemoryCapture(input)).toBe("I prefer dark mode"); }); + test("sanitizeForMemoryCapture strips QQ history wrappers before current text", () => { + const input = [ + "[Chat messages since your last reply \u2014 CONTEXT ONLY]", + "Bob: I always prefer stale context", + "", + "[CURRENT MESSAGE \u2014 reply to this]", + "I prefer dark mode", + ].join("\n"); + const sanitized = sanitizeForMemoryCapture(input); + expect(sanitized).toBe("I prefer dark mode"); + expect(shouldCapture(sanitized)).toBe(true); + }); + + test("sanitizeForMemoryCapture strips QQ merged-message wrappers before current text", () => { + const input = [ + "[Merged earlier messages \u2014 CONTEXT ONLY]", + "Bob: I always prefer stale context", + "[CURRENT MESSAGE \u2014 reply using the context above]", + "I prefer dark mode", + ].join("\n"); + expect(sanitizeForMemoryCapture(input)).toBe("I prefer dark mode"); + }); + test("sanitizeForMemoryCapture preserves user text after back-to-back sentinels at start", () => { // Two sentinels at the very start (no user content before either) must // both be stripped so the body that follows survives. @@ -3574,10 +3730,12 @@ describe("memory plugin e2e", () => { category: "fact", text: 'Conversation info (untrusted metadata):\n```json\n{"id":"123"}\n```\nsome sludge', }, + { category: "fact", text: "Sender (untrusted metadata): Alex\nI prefer light mode" }, { category: "entity", text: "My email is test@example.com" }, ]); expect(result).toContain("dark mode"); expect(result).toContain("this layout"); + expect(result).not.toContain("light mode"); expect(result).not.toContain("media attached"); expect(result).toContain("test@example.com"); expect(result).not.toContain("untrusted metadata"); diff --git a/extensions/memory-lancedb/index.ts b/extensions/memory-lancedb/index.ts index 7c41e3341a00..1a6ea81312c0 100644 --- a/extensions/memory-lancedb/index.ts +++ b/extensions/memory-lancedb/index.ts @@ -209,6 +209,7 @@ const DEFAULT_TOOL_RECALL_COOLDOWN_MS = 60_000; // bounded. const DEFAULT_AUTO_RECALL_OVERFETCH_LIMIT = 10; const DEFAULT_AUTO_RECALL_RESULT_CAP = 3; +const DUPLICATE_SEARCH_LIMIT = 5; function parsePositiveIntegerOption(value: string | undefined, flag: string): number | undefined { if (value === undefined) { @@ -662,6 +663,16 @@ function sanitizeRecallMemoryText(text: string): string | null { return looksLikeEnvelopeSludge(stripped) ? null : stripped; } +async function findCleanDuplicateMemory( + db: { + search(vector: number[], limit?: number, minScore?: number): Promise; + }, + vector: number[], +): Promise { + const existing = await db.search(vector, DUPLICATE_SEARCH_LIMIT, 0.95); + return existing.find((result) => sanitizeRecallMemoryText(result.entry.text) !== null); +} + // ============================================================================ // Envelope / transport metadata contamination detection // ============================================================================ @@ -692,6 +703,12 @@ const INBOUND_META_SENTINELS = [ "Nearby reply target window (untrusted, chronological, around replied-to message):", "Chat history since last reply (untrusted, for context):", ] as const; +const INBOUND_META_SENTINEL_LINE_RE = new RegExp( + `^(?:${INBOUND_META_SENTINELS.map((sentinel) => + sentinel.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), + ).join("|")})[^\\n]*$`, + "m", +); const MESSAGE_TOOL_DELIVERY_HINTS = [ "Delivery: to send a message, use the `message` tool.", @@ -705,6 +722,16 @@ const MESSAGE_TOOL_DELIVERY_HINT_RE = new RegExp( ); const HISTORY_CONTEXT_MARKER = "[Chat messages since your last reply - for context]"; const CURRENT_MESSAGE_MARKER = "[Current message - respond to this]"; +const HISTORY_CONTEXT_MARKERS = [ + HISTORY_CONTEXT_MARKER, + "[Chat messages since your last reply \u2014 CONTEXT ONLY]", + "[Merged earlier messages \u2014 CONTEXT ONLY]", +] as const; +const CURRENT_MESSAGE_MARKERS = [ + CURRENT_MESSAGE_MARKER, + "[CURRENT MESSAGE \u2014 reply to this]", + "[CURRENT MESSAGE \u2014 reply using the context above]", +] as const; const ACTIVE_TURN_RECOVERY_RE = /active-turn-recovery/i; @@ -720,18 +747,20 @@ const ACTIVE_TURN_RECOVERY_RE = /active-turn-recovery/i; * so requiring `):` to terminate the line catches every real injection while * sidestepping the false-positive risk. * - * Label segment is capped at 100 chars to avoid catastrophic backtracking on - * pathological inputs. + * The producer does not truncate custom structured-context labels, so the + * label segment is newline-bound rather than length-bound. The expression uses + * only linear character classes; avoid nested wildcards here. */ const INBOUND_META_LABEL_RE = - /^[^\n]{1,100}\((?:untrusted metadata|untrusted, for context|untrusted, nearest first|untrusted, chronological,[^\n)]{1,80})\):[ \t]*$/m; + /^[^\n]+\((?:untrusted metadata|untrusted, for context|untrusted, nearest first|untrusted, chronological,[^\n)]{1,80})\):[ \t]*$/m; const INBOUND_META_LABEL_JSON_BLOCK_RE = - /^[^\n]{1,100}\((?:untrusted metadata|untrusted, for context|untrusted, nearest first|untrusted, chronological,[^\n)]{1,80})\):[ \t]*\n[ \t]*```json[ \t]*\n[\s\S]*?\n[ \t]*```[ \t]*\n?/gm; + /^[^\n]+\((?:untrusted metadata|untrusted, for context|untrusted, nearest first|untrusted, chronological,[^\n)]{1,80})\):[ \t]*\n[ \t]*```json[ \t]*\n[\s\S]*?\n[ \t]*```[ \t]*\n?/gm; const LEADING_CHRONOLOGICAL_CONTEXT_LABEL_RE = /^\s*[^\n]{1,100}\(untrusted, chronological,[^\n)]{1,80}\):[ \t]*(?:\n|$)/; -const BRACKETED_LINE_PREFIX_RE = /^\[[^\]\n]{1,500}\]\s/gm; const BRACKETED_PREFIX_RE = /\[[^\]\n]{1,500}\]\s/g; const LEADING_CURRENT_MESSAGE_CONTEXT_RE = /^\s*Current message:[ \t]*(?:\n|$)/; +const LEADING_CURRENT_MESSAGE_REPLY_LINE_RE = /^\s*\[Replying to:[^\n]{0,1000}\]\s*\n/; +const LEADING_CURRENT_MESSAGE_ID_SENDER_RE = /^#\d+\s+[^\n:]{1,100}:\s*/; const UNTRUSTED_CONTEXT_HEADER_RE = /^Untrusted context \(metadata/m; @@ -777,12 +806,11 @@ const INBOUND_ENVELOPE_PREFIX_RE = /^\[([^\]\n]{0,300}?(?:\s\+(?:\d+[smhdwy]|just now)\b|\s[A-Za-z]{3}\s\d{4}-\d{2}-\d{2})[^\]\n]{0,200})\]\s/; /** - * Marker-free leading envelope header, e.g. `[telegram alice] hello`. The - * elapsed/date marker regex above misses this shape because `formatAgentEnvelope` - * drops `+`, host, ip, and the absolute timestamp when their inputs are - * absent. The minimum real shape is then `[ ]` with no markers, - * which is indistinguishable from arbitrary user `[label ...]` prose without a - * channel-id anchor. + * Marker-free leading envelope header. The elapsed/date marker regex above + * misses envelopes where `formatAgentEnvelope` drops every optional marker. + * Because channel labels can also be ordinary words, callers only accept this + * match after `matchKnownChannelMarkerFreeEnvelopePrefix` finds a stronger + * group/thread or body-sender signal. * * Anchoring on a known bundled/official channel prefix from * `BUNDLED_CHAT_CHANNEL_ENVELOPE_PREFIXES` keeps the detector and formatter in @@ -824,6 +852,7 @@ const INBOUND_ENVELOPE_KNOWN_CHANNEL_PREFIX_RE: RegExp | null = ENVELOPE_KNOWN_C * regex bounded and matches realistic display names). */ const ENVELOPE_BODY_SENDER_PREFIX_RE = /^([^\n:]{1,120}):\s/; +const ENVELOPE_BODY_DIRECT_PREFIX = "(sender)"; const ENVELOPE_BODY_SELF_PREFIX = "(self)"; const SENDER_PREFIXED_ENVELOPE_CHANNEL_RE = /^(?:discord|imessage|line|mattermost|qqbot|signal|slack|telegram|whatsapp)(?:\s|$)/i; @@ -831,6 +860,25 @@ const NON_DIRECT_ENVELOPE_HEADER_RE = /(?:^|\s)(?:#[^\s]+|group:[^\s]+|group\s+id:[^\s]+|room:[^\s]+|channel\s+id:[^\s]+|id:-[^\s]+|unknown-group|[^\s]+@g\.us)(?:\s|$)/i; const USER_AUTHORED_BODY_LABEL_RE = /^(?:action|decision|fixme|note|question|reminder|todo)$/i; +function matchKnownChannelMarkerFreeEnvelopePrefix( + text: string, + options?: { allowAmbiguousDirect?: boolean }, +): RegExpMatchArray | null { + const match = INBOUND_ENVELOPE_KNOWN_CHANNEL_PREFIX_RE?.exec(text); + if (!match) { + return null; + } + const headerInside = match[1] ?? ""; + if (NON_DIRECT_ENVELOPE_HEADER_RE.test(headerInside)) { + return match; + } + const body = text.slice(match[0].length); + if (stripEnvelopeBodySenderPrefix(body, headerInside) !== body) { + return match; + } + return options?.allowAmbiguousDirect ? match : null; +} + /** * Returns true if `text` looks like it contains OpenClaw-injected envelope or * transport metadata that should never be persisted as a long-term memory. @@ -843,7 +891,7 @@ export function looksLikeEnvelopeSludge(text: string): boolean { // Generic line-anchored sentinel match; precompiled at module scope so the // hot-path callers (capture gating, recall filtering) do not pay a regex // compile per invocation. - if (INBOUND_META_LABEL_RE.test(text)) { + if (INBOUND_META_SENTINEL_LINE_RE.test(text) || INBOUND_META_LABEL_RE.test(text)) { return true; } @@ -857,7 +905,10 @@ export function looksLikeEnvelopeSludge(text: string): boolean { return true; } - if (text.includes(HISTORY_CONTEXT_MARKER) || text.includes(CURRENT_MESSAGE_MARKER)) { + if ( + HISTORY_CONTEXT_MARKERS.some((marker) => text.includes(marker)) || + CURRENT_MESSAGE_MARKERS.some((marker) => text.includes(marker)) + ) { return true; } @@ -877,18 +928,13 @@ export function looksLikeEnvelopeSludge(text: string): boolean { } // Check for the leading `[Channel sender +elapsed ...]` bracket emitted by - // formatInboundEnvelope. The agent_end hook receives messages with this - // header still attached, so unguarded auto-capture would persist envelope - // metadata bytes as part of the user's "memory". Two regexes: the - // marker-aware one catches envelopes that include elapsed/date markers - // regardless of channel id (covers third-party channels not in the bundled - // list); the known-channel one catches marker-free shapes like - // `[telegram alice] hi` that drop every optional part except the channel id - // and from label. + // formatInboundEnvelope. Marker-free channel brackets need a stronger + // group/thread or body-sender signal so user prose like `[Signal Hill] ...` + // is not treated as transport metadata. if (INBOUND_ENVELOPE_PREFIX_RE.test(text)) { return true; } - if (INBOUND_ENVELOPE_KNOWN_CHANNEL_PREFIX_RE?.test(text)) { + if (matchKnownChannelMarkerFreeEnvelopePrefix(text)) { return true; } @@ -921,7 +967,7 @@ function stripEnvelopeBodySenderPrefix(body: string, headerInside: string): stri return body; } const label = match[1]; - if (label === ENVELOPE_BODY_SELF_PREFIX) { + if (label === ENVELOPE_BODY_SELF_PREFIX || label === ENVELOPE_BODY_DIRECT_PREFIX) { return body.slice(match[0].length); } if ( @@ -932,7 +978,7 @@ function stripEnvelopeBodySenderPrefix(body: string, headerInside: string): stri return body.slice(match[0].length); } const headerTokens = headerInside.split(/\s+/); - if (headerTokens.includes(label)) { + if (headerTokens.includes(label) || headerInside.includes(label)) { return body.slice(match[0].length); } return body; @@ -957,7 +1003,10 @@ function stripLeadingMessageToolDeliveryHints(text: string): string { return stripped ? lines.slice(index).join("\n") : text; } -function findFirstInboundEnvelopeIndex(text: string, options?: { skipReplyQuoteLine?: boolean }) { +function findFirstInboundEnvelopeIndex( + text: string, + options?: { allowAmbiguousMarkerFree?: boolean; skipReplyQuoteLine?: boolean }, +) { for (const match of text.matchAll(BRACKETED_PREFIX_RE)) { const index = match.index; if (options?.skipReplyQuoteLine) { @@ -969,7 +1018,9 @@ function findFirstInboundEnvelopeIndex(text: string, options?: { skipReplyQuoteL const candidate = text.slice(index); if ( INBOUND_ENVELOPE_PREFIX_RE.test(candidate) || - INBOUND_ENVELOPE_KNOWN_CHANNEL_PREFIX_RE?.test(candidate) + matchKnownChannelMarkerFreeEnvelopePrefix(candidate, { + allowAmbiguousDirect: options?.allowAmbiguousMarkerFree, + }) ) { return index; } @@ -979,22 +1030,36 @@ function findFirstInboundEnvelopeIndex(text: string, options?: { skipReplyQuoteL function stripPendingHistoryContextBeforeCurrentMessage(text: string): string { const candidateText = text.trimStart(); - if (!candidateText.startsWith(HISTORY_CONTEXT_MARKER)) { + if (!HISTORY_CONTEXT_MARKERS.some((marker) => candidateText.startsWith(marker))) { return text; } - const currentMessageIndex = candidateText.lastIndexOf(CURRENT_MESSAGE_MARKER); - if (currentMessageIndex === -1) { + const currentMarker = findLastContextMarker(candidateText, CURRENT_MESSAGE_MARKERS); + if (!currentMarker) { return text; } - return candidateText.slice(currentMessageIndex + CURRENT_MESSAGE_MARKER.length); + return candidateText.slice(currentMarker.index + currentMarker.marker.length); } function stripToCurrentMessageMarker(text: string): string | null { - const currentMessageIndex = text.lastIndexOf(CURRENT_MESSAGE_MARKER); - if (currentMessageIndex === -1) { + const currentMarker = findLastContextMarker(text, CURRENT_MESSAGE_MARKERS); + if (!currentMarker) { return null; } - return text.slice(currentMessageIndex + CURRENT_MESSAGE_MARKER.length); + return text.slice(currentMarker.index + currentMarker.marker.length); +} + +function findLastContextMarker( + text: string, + markers: readonly string[], +): { index: number; marker: string } | null { + let result: { index: number; marker: string } | null = null; + for (const marker of markers) { + const index = text.lastIndexOf(marker); + if (index !== -1 && (!result || index > result.index)) { + result = { index, marker }; + } + } + return result; } function stripLeadingCurrentMessageContextBeforeEnvelope(text: string): string { @@ -1002,12 +1067,24 @@ function stripLeadingCurrentMessageContextBeforeEnvelope(text: string): string { if (!LEADING_CURRENT_MESSAGE_CONTEXT_RE.test(candidateText)) { return text; } - const envelopeIndex = findFirstInboundEnvelopeIndex(candidateText, { skipReplyQuoteLine: true }); + const envelopeIndex = findFirstInboundEnvelopeIndex(candidateText, { + allowAmbiguousMarkerFree: true, + skipReplyQuoteLine: true, + }); if (envelopeIndex === -1) { - return text; + let plainBody = candidateText.replace(LEADING_CURRENT_MESSAGE_CONTEXT_RE, "").trimStart(); + for (let pass = 0; pass < 4; pass += 1) { + const replyLineMatch = plainBody.match(LEADING_CURRENT_MESSAGE_REPLY_LINE_RE); + if (!replyLineMatch) { + break; + } + plainBody = plainBody.slice(replyLineMatch[0].length).trimStart(); + } + const currentMessagePrefixMatch = plainBody.match(LEADING_CURRENT_MESSAGE_ID_SENDER_RE); + return currentMessagePrefixMatch ? plainBody.slice(currentMessagePrefixMatch[0].length) : text; } // `Current message:` is current-turn transport context. Strip it only when a - // real inbound envelope follows; otherwise preserve the text for normal capture. + // real current-message body follows; otherwise preserve the text for normal capture. return candidateText.slice(envelopeIndex); } @@ -1021,37 +1098,28 @@ function stripLeadingPlainTextMetadataBody(text: string): string { return currentMessageBody === candidateText ? "" : currentMessageBody; } -function stripLeadingInboundEnvelope(text: string): string { - const candidateText = stripLeadingCurrentMessageContextBeforeEnvelope( +function stripLeadingInboundEnvelope( + text: string, + options?: { allowAmbiguousMarkerFree?: boolean }, +): string { + const strippedCandidate = stripLeadingCurrentMessageContextBeforeEnvelope( stripPendingHistoryContextBeforeCurrentMessage(stripLeadingMessageToolDeliveryHints(text)), - ).trimStart(); + ); + const candidateText = strippedCandidate.trimStart(); + const allowAmbiguousMarkerFree = options?.allowAmbiguousMarkerFree || strippedCandidate !== text; const envelopePrefixMatch = candidateText.match(INBOUND_ENVELOPE_PREFIX_RE) ?? - (INBOUND_ENVELOPE_KNOWN_CHANNEL_PREFIX_RE - ? candidateText.match(INBOUND_ENVELOPE_KNOWN_CHANNEL_PREFIX_RE) - : null); + matchKnownChannelMarkerFreeEnvelopePrefix(candidateText, { + allowAmbiguousDirect: allowAmbiguousMarkerFree, + }); if (!envelopePrefixMatch) { - return text; + return strippedCandidate === text ? text : candidateText; } const headerInside = envelopePrefixMatch[1] ?? ""; const afterBracket = candidateText.slice(envelopePrefixMatch[0].length); return stripEnvelopeBodySenderPrefix(afterBracket, headerInside); } -function findFirstInboundEnvelopeLineIndex(text: string): number { - for (const match of text.matchAll(BRACKETED_LINE_PREFIX_RE)) { - const index = match.index; - const candidate = text.slice(index); - if ( - INBOUND_ENVELOPE_PREFIX_RE.test(candidate) || - INBOUND_ENVELOPE_KNOWN_CHANNEL_PREFIX_RE?.test(candidate) - ) { - return index; - } - } - return -1; -} - function stripLeadingChronologicalContextBlocks(text: string): string { let cleaned = text; let remainingPasses = INBOUND_META_SENTINELS.length; @@ -1062,8 +1130,36 @@ function stripLeadingChronologicalContextBlocks(text: string): string { return cleaned; } const afterLabel = cleaned.slice(match[0].length); - const envelopeIndex = findFirstInboundEnvelopeLineIndex(afterLabel); - cleaned = envelopeIndex === -1 ? "" : afterLabel.slice(envelopeIndex); + const bodyStart = afterLabel.search(/\S/); + if (bodyStart === -1) { + return ""; + } + const bodyLineEnd = afterLabel.indexOf("\n", bodyStart); + const firstBodyLine = + bodyLineEnd === -1 ? afterLabel.slice(bodyStart) : afterLabel.slice(bodyStart, bodyLineEnd); + let lineEnvelopeIndex = firstBodyLine.trimStart().startsWith("[") + ? findFirstInboundEnvelopeIndex(firstBodyLine, { + allowAmbiguousMarkerFree: true, + skipReplyQuoteLine: true, + }) + : -1; + if (lineEnvelopeIndex === -1 && match[0].includes("selected for current message")) { + const inlineEnvelopeIndex = findFirstInboundEnvelopeIndex(firstBodyLine, { + allowAmbiguousMarkerFree: true, + skipReplyQuoteLine: true, + }); + const prefix = inlineEnvelopeIndex === -1 ? "" : firstBodyLine.slice(0, inlineEnvelopeIndex); + lineEnvelopeIndex = /^#\d+\s/.test(prefix.trimStart()) ? inlineEnvelopeIndex : -1; + } + const envelopeIndex = lineEnvelopeIndex === -1 ? -1 : bodyStart + lineEnvelopeIndex; + if (envelopeIndex === -1) { + const separatorMatch = /\n[ \t]*\n/.exec(afterLabel); + cleaned = separatorMatch + ? afterLabel.slice(separatorMatch.index + separatorMatch[0].length) + : ""; + } else { + cleaned = afterLabel.slice(envelopeIndex); + } if (!cleaned) { return ""; } @@ -1084,16 +1180,22 @@ export function sanitizeForMemoryCapture(text: string): string { // Pre-truncate to cap regex work on very large inputs (ReDoS mitigation) const MAX_SANITIZE_CHARS = 10_000; let cleaned = text.length > MAX_SANITIZE_CHARS ? text.slice(0, MAX_SANITIZE_CHARS) : text; + let strippedInjectedContext = false; // Strip leading timestamp prefix cleaned = cleaned.replace(LEADING_TIMESTAMP_PREFIX_RE, ""); + const afterDeliveryHints = stripLeadingMessageToolDeliveryHints(cleaned); + strippedInjectedContext ||= afterDeliveryHints !== cleaned; + cleaned = afterDeliveryHints; // Strip inbound metadata blocks: generic label line + optional ```json + // content + ```. This deliberately mirrors `looksLikeEnvelopeSludge`'s // generic label coverage so current reply-chain, location, and plugin-owned // structured-context labels do not make `shouldCapture` reject the useful // user body that follows. - cleaned = cleaned.replace(INBOUND_META_LABEL_JSON_BLOCK_RE, ""); + const afterJsonMetaBlocks = cleaned.replace(INBOUND_META_LABEL_JSON_BLOCK_RE, ""); + strippedInjectedContext ||= afterJsonMetaBlocks !== cleaned; + cleaned = afterJsonMetaBlocks; // First strip legacy/inline sentinel+code-fence blocks; each replace removes // the entire block including its sentinel header so iteration order does not @@ -1104,12 +1206,16 @@ export function sanitizeForMemoryCapture(text: string): string { `${escapedSentinel}\\s*\\n\\s*\`\`\`json\\s*\\n[\\s\\S]*?\\n\\s*\`\`\`\\s*\\n?`, "g", ); - cleaned = cleaned.replace(blockRe, ""); + const afterSentinelBlock = cleaned.replace(blockRe, ""); + strippedInjectedContext ||= afterSentinelBlock !== cleaned; + cleaned = afterSentinelBlock; } // Plain chat-window context blocks are untrusted history lines rather than // JSON metadata. When they lead the prompt, keep only the following real // inbound envelope; if no envelope follows, drop the context block entirely. - cleaned = stripLeadingChronologicalContextBlocks(cleaned); + const afterChronologicalContext = stripLeadingChronologicalContextBlocks(cleaned); + strippedInjectedContext ||= afterChronologicalContext !== cleaned; + cleaned = afterChronologicalContext; // For labels/sentinels that survived the code-fence strip (plain-text body, // no JSON fence), act on the earliest line-anchored metadata header each // pass. A bounded retry cap rules out pathological input from spinning @@ -1153,18 +1259,32 @@ export function sanitizeForMemoryCapture(text: string): string { const lineEnd = cleaned.indexOf("\n"); const afterHeader = lineEnd === -1 ? "" : cleaned.slice(lineEnd + 1); if (!afterHeader.trimStart().startsWith("```json")) { - cleaned = stripLeadingPlainTextMetadataBody(afterHeader); + const afterPlainTextMetadata = stripLeadingPlainTextMetadataBody(afterHeader); + strippedInjectedContext ||= afterPlainTextMetadata !== cleaned; + cleaned = afterPlainTextMetadata; continue; } } - cleaned = cleaned.replace(earliestMetaRe, ""); + const afterMetaHeader = cleaned.replace(earliestMetaRe, ""); + strippedInjectedContext ||= afterMetaHeader !== cleaned; + cleaned = afterMetaHeader; } + // Active-memory context can be prepended before the real user prompt; strip + // that known block before the generic untrusted-context truncation below. + const afterActiveMemoryContext = cleaned.replace( + /^Untrusted context \(metadata[^\n]*\n[\s\S]*?<\/active_memory_plugin>\s*/gm, + "", + ); + strippedInjectedContext ||= afterActiveMemoryContext !== cleaned; + cleaned = afterActiveMemoryContext; + // Strip the "Untrusted context (metadata..." header and everything after it, // but only when it appears at the start of a line to avoid false positives // on user content that happens to quote the phrase mid-line. const untrustedLineMatch = /^Untrusted context \(metadata/m.exec(cleaned); if (untrustedLineMatch) { + strippedInjectedContext = true; cleaned = cleaned.slice(0, untrustedLineMatch.index); } @@ -1174,7 +1294,9 @@ export function sanitizeForMemoryCapture(text: string): string { // The bracket precedes the user's body text; for non-direct envelopes the // body is prefixed with `: ` and for direct fromMe with `(self): `, // so strip that too when the surviving label matches the formatter contract. - cleaned = stripLeadingInboundEnvelope(cleaned); + cleaned = stripLeadingInboundEnvelope(cleaned, { + allowAmbiguousMarkerFree: strippedInjectedContext, + }); // Strip [media attached: ...] and [media attached N/M: ...] annotations cleaned = cleaned.replace(MEDIA_ATTACHED_PATTERN, ""); @@ -1517,20 +1639,19 @@ export default definePluginEntry({ const vector = await embeddings.embed(text); - // Check for duplicates - const existing = await db.search(vector, 1, 0.95); - if (existing.length > 0) { + const existing = await findCleanDuplicateMemory(db, vector); + if (existing) { return { content: [ { type: "text", - text: `Similar memory already exists: "${existing[0].entry.text}"`, + text: `Similar memory already exists: "${existing.entry.text}"`, }, ], details: { action: "duplicate", - existingId: existing[0].entry.id, - existingText: existing[0].entry.text, + existingId: existing.entry.id, + existingText: existing.entry.text, }, }; } @@ -1851,9 +1972,8 @@ export default definePluginEntry({ const category = detectCategory(sanitized); const vector = await embeddings.embed(sanitized); - // Check for duplicates (high similarity threshold) - const existing = await db.search(vector, 1, 0.95); - if (existing.length > 0) { + const existing = await findCleanDuplicateMemory(db, vector); + if (existing) { continue; } diff --git a/extensions/memory-lancedb/package.json b/extensions/memory-lancedb/package.json index abf6e730799d..1bf55f6cbf85 100644 --- a/extensions/memory-lancedb/package.json +++ b/extensions/memory-lancedb/package.json @@ -23,7 +23,7 @@ "install": { "npmSpec": "@openclaw/memory-lancedb", "defaultChoice": "npm", - "minHostVersion": ">=2026.4.10" + "minHostVersion": ">=2026.5.31" }, "compat": { "pluginApi": ">=2026.5.31" diff --git a/scripts/lib/official-external-plugin-catalog.json b/scripts/lib/official-external-plugin-catalog.json index 0e08a5203975..517d560eb9eb 100644 --- a/scripts/lib/official-external-plugin-catalog.json +++ b/scripts/lib/official-external-plugin-catalog.json @@ -186,7 +186,7 @@ "install": { "npmSpec": "@openclaw/memory-lancedb", "defaultChoice": "npm", - "minHostVersion": ">=2026.4.10" + "minHostVersion": ">=2026.5.31" } } }, diff --git a/scripts/lib/plugin-sdk-doc-metadata.ts b/scripts/lib/plugin-sdk-doc-metadata.ts index 6356a6fc2cca..6ef4079b96dd 100644 --- a/scripts/lib/plugin-sdk-doc-metadata.ts +++ b/scripts/lib/plugin-sdk-doc-metadata.ts @@ -56,6 +56,9 @@ export const pluginSdkDocMetadata = { "channel-config-schema-legacy": { category: "channel", }, + "chat-channel-ids": { + category: "channel", + }, "channel-contract": { category: "channel", }, diff --git a/src/auto-reply/envelope.test.ts b/src/auto-reply/envelope.test.ts index 31ae8be2a232..f91987548bc7 100644 --- a/src/auto-reply/envelope.test.ts +++ b/src/auto-reply/envelope.test.ts @@ -103,7 +103,7 @@ describe("formatInboundEnvelope", () => { expect(body).toBe("[Signal Signal Group id:123] Bob (42): ping"); }); - it("keeps direct messages unprefixed", () => { + it("prefixes direct messages with the header sender", () => { const body = formatInboundEnvelope({ channel: "iMessage", from: "+1555", @@ -111,7 +111,37 @@ describe("formatInboundEnvelope", () => { chatType: "direct", senderLabel: "Alice", }); - expect(body).toBe("[iMessage +1555] hello"); + expect(body).toBe("[iMessage +1555] +1555: hello"); + }); + + it("uses display text for direct body prefixes when from includes an id", () => { + const body = formatInboundEnvelope({ + channel: "Telegram", + from: "Alice id:123", + body: "hello", + chatType: "direct", + }); + expect(body).toBe("[Telegram Alice id:123] Alice: hello"); + }); + + it("uses a stable direct body prefix when id display text contains a colon", () => { + const body = formatInboundEnvelope({ + channel: "Telegram", + from: "Ops: Alice id:123", + body: "/status", + chatType: "direct", + }); + expect(body).toBe("[Telegram Ops: Alice id:123] (sender): /status"); + }); + + it("uses a stable direct body prefix when from is an opaque id label", () => { + const body = formatInboundEnvelope({ + channel: "LINE", + from: "user:U123", + body: "hello", + chatType: "direct", + }); + expect(body).toBe("[LINE user:U123] (sender): hello"); }); it("includes elapsed time when previousTimestamp is provided", () => { @@ -141,7 +171,7 @@ describe("formatInboundEnvelope", () => { chatType: "direct", envelope: { includeElapsed: false, includeTimestamp: false }, }); - expect(body).toBe("[Telegram Alice] follow-up message"); + expect(body).toBe("[Telegram Alice] Alice: follow-up message"); }); it("prefixes DM body with (self) when fromMe is true", () => { diff --git a/src/auto-reply/envelope.ts b/src/auto-reply/envelope.ts index 8feeef7c17ad..d3a84f23db14 100644 --- a/src/auto-reply/envelope.ts +++ b/src/auto-reply/envelope.ts @@ -153,6 +153,16 @@ export function formatEnvelopeTimestamp( return weekday ? `${weekday} ${formatted}` : formatted; } +function resolveDirectEnvelopeBodyLabel(from: string | undefined): string { + const label = sanitizeEnvelopeHeaderPart(from || ""); + const idMarkerIndex = label.search(/\s+id:/i); + if (idMarkerIndex > 0) { + const displayLabel = label.slice(0, idMarkerIndex).trim(); + return displayLabel.includes(":") ? "(sender)" : displayLabel; + } + return label.includes(":") ? "(sender)" : label; +} + export function formatAgentEnvelope(params: AgentEnvelopeParams): string { const channel = sanitizeEnvelopeHeaderPart(normalizeOptionalString(params.channel) || "Channel"); const parts: string[] = [channel]; @@ -211,12 +221,15 @@ export function formatInboundEnvelope(params: { const resolvedSenderRaw = normalizeOptionalString(params.senderLabel) || resolveSenderLabel(params.sender ?? {}); const resolvedSender = resolvedSenderRaw ? sanitizeEnvelopeHeaderPart(resolvedSenderRaw) : ""; + const directSender = resolveDirectEnvelopeBodyLabel(normalizeOptionalString(params.from)); const body = isDirect && params.fromMe ? `(self): ${params.body}` - : !isDirect && resolvedSender - ? `${resolvedSender}: ${params.body}` - : params.body; + : isDirect && directSender + ? `${directSender}: ${params.body}` + : !isDirect && resolvedSender + ? `${resolvedSender}: ${params.body}` + : params.body; return formatAgentEnvelope({ channel: params.channel, from: params.from, diff --git a/src/auto-reply/reply/mentions.test.ts b/src/auto-reply/reply/mentions.test.ts index c2f0e848b836..0299db793600 100644 --- a/src/auto-reply/reply/mentions.test.ts +++ b/src/auto-reply/reply/mentions.test.ts @@ -14,6 +14,21 @@ describe("stripStructuralPrefixes", () => { expect(stripStructuralPrefixes("John: hello")).toBe("hello"); }); + it("preserves colon-delimited slash commands", () => { + expect(stripStructuralPrefixes("/config:json")).toBe("/config:json"); + }); + + it("strips direct envelope display labels with handles", () => { + expect( + stripStructuralPrefixes("[Telegram Alice (@alice) id:123] Alice (@alice): /status"), + ).toBe("/status"); + }); + + it("strips direct envelope display labels with non-ascii characters", () => { + expect(stripStructuralPrefixes("[Telegram Jörg] Jörg: /status")).toBe("/status"); + expect(stripStructuralPrefixes("[Telegram 山田] 山田: /status")).toBe("/status"); + }); + it("passes through plain text", () => { expect(stripStructuralPrefixes("just a message")).toBe("just a message"); }); diff --git a/src/auto-reply/reply/mentions.ts b/src/auto-reply/reply/mentions.ts index c32c8475f59f..4d9d2daaad10 100644 --- a/src/auto-reply/reply/mentions.ts +++ b/src/auto-reply/reply/mentions.ts @@ -187,7 +187,7 @@ export function stripStructuralPrefixes(text: string): string { return afterMarker .replace(/\[[^\]]+\]\s*/g, "") - .replace(/^[ \t]*[A-Za-z0-9+()\-_. ]+:\s*/gm, "") + .replace(/^[ \t]*[^\n:]{1,120}:\s+/gm, "") .replace(/\\n/g, " ") .replace(/\s+/g, " ") .trim();