mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-06 05:51:15 +08:00
fix: accept leading fuzzy Discord voice wake names (#86484)
This commit is contained in:
committed by
GitHub
parent
5d018034f6
commit
8fe4f34af2
@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Fixes
|
||||
|
||||
- Discord/OpenAI voice: accept leading fuzzy wake-name transcripts such as "Monty" or "Moti" for a Molty agent while keeping ambient speech gated.
|
||||
- Discord/OpenAI voice: rotate Realtime sessions at provider max duration without logging the expected session-expiry event as an error.
|
||||
- Memory/local embeddings: run local GGUF embeddings in an isolated worker sidecar and degrade to configured fallback or keyword search on worker failure so native embedding crashes do not take down the Gateway. (#85348) Thanks @osolmaz.
|
||||
- Gateway: clear the runtime config snapshot before `SIGUSR1` in-process restarts so config changes survive the next gateway loop. (#86388) Thanks @XuZehan-iCenter.
|
||||
|
||||
@@ -2966,6 +2966,133 @@ describe("DiscordVoiceManager", () => {
|
||||
expectUserMessageIncludes("openclaw wake answer");
|
||||
});
|
||||
|
||||
it("accepts leading fuzzy wake names before realtime agent-proxy consults", async () => {
|
||||
const manager = createManager(
|
||||
{
|
||||
groupPolicy: "open",
|
||||
voice: {
|
||||
enabled: true,
|
||||
mode: "agent-proxy",
|
||||
realtime: { provider: "openai", consultPolicy: "auto", requireWakeName: true },
|
||||
},
|
||||
},
|
||||
undefined,
|
||||
{
|
||||
agents: {
|
||||
list: [{ id: "agent-1", identity: { name: "Molty" } }],
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
await manager.join({ guildId: "g1", channelId: "1001" });
|
||||
const entry = getSessionEntry(manager) as {
|
||||
realtime?: {
|
||||
beginSpeakerTurn: (
|
||||
context: { extraSystemPrompt?: string; senderIsOwner: boolean; speakerLabel: string },
|
||||
userId: string,
|
||||
) => { close: () => void; sendInputAudio: (audio: Buffer) => void };
|
||||
};
|
||||
};
|
||||
const bridgeParams = lastRealtimeBridgeParams() as
|
||||
| {
|
||||
onTranscript?: (role: "user" | "assistant", text: string, isFinal: boolean) => void;
|
||||
}
|
||||
| undefined;
|
||||
|
||||
const montyTurn = entry.realtime?.beginSpeakerTurn(
|
||||
{ extraSystemPrompt: undefined, senderIsOwner: true, speakerLabel: "Owner" },
|
||||
"u-owner",
|
||||
);
|
||||
montyTurn?.sendInputAudio(Buffer.alloc(8));
|
||||
bridgeParams?.onTranscript?.("user", "Monty, are you with us?", true);
|
||||
await new Promise((resolve) => setTimeout(resolve, 260));
|
||||
|
||||
expect(agentCommandArgsAt(0).message).toContain("are you with us?");
|
||||
expect(agentCommandArgsAt(0).message).not.toContain("Monty");
|
||||
|
||||
const motiTurn = entry.realtime?.beginSpeakerTurn(
|
||||
{ extraSystemPrompt: undefined, senderIsOwner: true, speakerLabel: "Owner" },
|
||||
"u-owner",
|
||||
);
|
||||
motiTurn?.sendInputAudio(Buffer.alloc(8));
|
||||
bridgeParams?.onTranscript?.("user", "Moti, what's going on today?", true);
|
||||
await new Promise((resolve) => setTimeout(resolve, 260));
|
||||
|
||||
expect(agentCommandArgsAt(1).message).toContain("what's going on today?");
|
||||
expect(agentCommandArgsAt(1).message).not.toContain("Moti");
|
||||
|
||||
const openClawTurn = entry.realtime?.beginSpeakerTurn(
|
||||
{ extraSystemPrompt: undefined, senderIsOwner: true, speakerLabel: "Owner" },
|
||||
"u-owner",
|
||||
);
|
||||
openClawTurn?.sendInputAudio(Buffer.alloc(8));
|
||||
bridgeParams?.onTranscript?.("user", "Open claw can you still hear me?", true);
|
||||
await new Promise((resolve) => setTimeout(resolve, 260));
|
||||
|
||||
expect(agentCommandArgsAt(2).message).toContain("can you still hear me?");
|
||||
expect(agentCommandArgsAt(2).message).not.toContain("Open claw");
|
||||
});
|
||||
|
||||
it("rejects non-wake fuzzy leading phrases before realtime agent-proxy consults", async () => {
|
||||
const manager = createManager(
|
||||
{
|
||||
groupPolicy: "open",
|
||||
voice: {
|
||||
enabled: true,
|
||||
mode: "agent-proxy",
|
||||
realtime: { provider: "openai", consultPolicy: "auto", requireWakeName: true },
|
||||
},
|
||||
},
|
||||
undefined,
|
||||
{
|
||||
agents: {
|
||||
list: [{ id: "agent-1", identity: { name: "Molty" } }],
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
await manager.join({ guildId: "g1", channelId: "1001" });
|
||||
const entry = getSessionEntry(manager) as {
|
||||
realtime?: {
|
||||
beginSpeakerTurn: (
|
||||
context: { extraSystemPrompt?: string; senderIsOwner: boolean; speakerLabel: string },
|
||||
userId: string,
|
||||
) => { close: () => void; sendInputAudio: (audio: Buffer) => void };
|
||||
};
|
||||
};
|
||||
const bridgeParams = lastRealtimeBridgeParams() as
|
||||
| {
|
||||
onTranscript?: (role: "user" | "assistant", text: string, isFinal: boolean) => void;
|
||||
}
|
||||
| undefined;
|
||||
|
||||
const multiTurn = entry.realtime?.beginSpeakerTurn(
|
||||
{ extraSystemPrompt: undefined, senderIsOwner: true, speakerLabel: "Owner" },
|
||||
"u-owner",
|
||||
);
|
||||
multiTurn?.sendInputAudio(Buffer.alloc(8));
|
||||
bridgeParams?.onTranscript?.("user", "Multi, step through the maintainer queue.", true);
|
||||
await new Promise((resolve) => setTimeout(resolve, 260));
|
||||
|
||||
const ambientTurn = entry.realtime?.beginSpeakerTurn(
|
||||
{ extraSystemPrompt: undefined, senderIsOwner: true, speakerLabel: "Owner" },
|
||||
"u-owner",
|
||||
);
|
||||
ambientTurn?.sendInputAudio(Buffer.alloc(8));
|
||||
bridgeParams?.onTranscript?.("user", "This is a multi-step maintainer problem.", true);
|
||||
await new Promise((resolve) => setTimeout(resolve, 260));
|
||||
|
||||
const openLawTurn = entry.realtime?.beginSpeakerTurn(
|
||||
{ extraSystemPrompt: undefined, senderIsOwner: true, speakerLabel: "Owner" },
|
||||
"u-owner",
|
||||
);
|
||||
openLawTurn?.sendInputAudio(Buffer.alloc(8));
|
||||
bridgeParams?.onTranscript?.("user", "Open law is not the wake phrase.", true);
|
||||
await new Promise((resolve) => setTimeout(resolve, 260));
|
||||
|
||||
expect(agentCommandMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("leaves non-OpenAI agent-proxy realtime auto-response enabled when wake names are requested", async () => {
|
||||
resolveConfiguredRealtimeVoiceProviderMock.mockReturnValueOnce({
|
||||
provider: { id: "google" },
|
||||
|
||||
@@ -64,6 +64,7 @@ const DISCORD_REALTIME_FORCED_CONSULT_FALLBACK_DELAY_MS = 200;
|
||||
const DISCORD_REALTIME_DUPLICATE_ERROR_SUPPRESS_MS = 60_000;
|
||||
const DISCORD_REALTIME_CONTROL_SPEECH_DEDUPE_MS = 5_000;
|
||||
const DISCORD_REALTIME_OUTPUT_PLAYBACK_WATCHDOG_MARGIN_MS = 1_500;
|
||||
const DISCORD_REALTIME_WAKE_NAME_FUZZY_PREFIX_WORDS = 3;
|
||||
const REALTIME_PCM16_BYTES_PER_SAMPLE = 2;
|
||||
const DISCORD_RAW_PCM_FRAME_BYTES = 3_840;
|
||||
const DISCORD_REALTIME_OUTPUT_PREROLL_FRAMES = 25;
|
||||
@@ -353,6 +354,19 @@ function normalizeWakeName(value: string): string | undefined {
|
||||
return normalized || undefined;
|
||||
}
|
||||
|
||||
function normalizeWakeNameCandidate(value: string): string | undefined {
|
||||
const normalized = value
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, " ")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
return normalized || undefined;
|
||||
}
|
||||
|
||||
function compactWakeName(value: string): string {
|
||||
return value.replace(/[^a-z0-9]+/g, "");
|
||||
}
|
||||
|
||||
function escapeRegExp(value: string): string {
|
||||
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
}
|
||||
@@ -384,6 +398,136 @@ function stripLeadingWakeName(text: string, wakeName: string): string {
|
||||
.trim();
|
||||
}
|
||||
|
||||
type LeadingWakeNameCandidate = {
|
||||
heardName: string;
|
||||
endIndex: number;
|
||||
strongBoundary: boolean;
|
||||
};
|
||||
|
||||
type WakeNameTranscriptResult =
|
||||
| { allowed: true; text: string; wakeName: string; heardName: string; match: "exact" | "fuzzy" }
|
||||
| { allowed: false; text: string };
|
||||
type AllowedWakeNameTranscriptResult = Extract<WakeNameTranscriptResult, { allowed: true }>;
|
||||
|
||||
function leadingWakeNameCandidates(text: string): LeadingWakeNameCandidate[] {
|
||||
const opener = /^\s*(?:(?:hey|ok|okay)(?:\s*[-,:;]+\s*|\s+))?/i.exec(text);
|
||||
const nameStart = opener?.[0].length ?? 0;
|
||||
const candidates: LeadingWakeNameCandidate[] = [];
|
||||
const tokenPattern = /[a-z0-9]+/gi;
|
||||
tokenPattern.lastIndex = nameStart;
|
||||
|
||||
for (
|
||||
let wordCount = 0;
|
||||
wordCount < DISCORD_REALTIME_WAKE_NAME_FUZZY_PREFIX_WORDS;
|
||||
wordCount += 1
|
||||
) {
|
||||
const token = tokenPattern.exec(text);
|
||||
if (!token) {
|
||||
break;
|
||||
}
|
||||
const between = text.slice(
|
||||
wordCount === 0 ? nameStart : candidates[wordCount - 1]?.endIndex,
|
||||
token.index,
|
||||
);
|
||||
if (wordCount > 0 && !/^[\s'-]+$/.test(between)) {
|
||||
break;
|
||||
}
|
||||
const endIndex = token.index + token[0].length;
|
||||
const heardName = normalizeWakeNameCandidate(text.slice(nameStart, endIndex));
|
||||
if (!heardName) {
|
||||
break;
|
||||
}
|
||||
const boundary = text.slice(endIndex).match(/^\s*([,.:;!?-]|$)/);
|
||||
candidates.push({
|
||||
heardName,
|
||||
endIndex,
|
||||
strongBoundary: Boolean(boundary),
|
||||
});
|
||||
}
|
||||
|
||||
return candidates;
|
||||
}
|
||||
|
||||
function levenshteinDistance(left: string, right: string): number {
|
||||
if (left === right) {
|
||||
return 0;
|
||||
}
|
||||
if (!left) {
|
||||
return right.length;
|
||||
}
|
||||
if (!right) {
|
||||
return left.length;
|
||||
}
|
||||
|
||||
let previous = Array.from({ length: right.length + 1 }, (_, index) => index);
|
||||
for (let leftIndex = 0; leftIndex < left.length; leftIndex += 1) {
|
||||
const current = [leftIndex + 1];
|
||||
for (let rightIndex = 0; rightIndex < right.length; rightIndex += 1) {
|
||||
const cost = left[leftIndex] === right[rightIndex] ? 0 : 1;
|
||||
current[rightIndex + 1] = Math.min(
|
||||
current[rightIndex] + 1,
|
||||
previous[rightIndex + 1] + 1,
|
||||
previous[rightIndex] + cost,
|
||||
);
|
||||
}
|
||||
previous = current;
|
||||
}
|
||||
return previous[right.length] ?? Math.max(left.length, right.length);
|
||||
}
|
||||
|
||||
function isFuzzyWakeNameMatch(candidate: LeadingWakeNameCandidate, wakeName: string): boolean {
|
||||
const normalizedWakeName = normalizeWakeNameCandidate(wakeName);
|
||||
if (!normalizedWakeName) {
|
||||
return false;
|
||||
}
|
||||
const heardCompact = compactWakeName(candidate.heardName);
|
||||
const wakeCompact = compactWakeName(normalizedWakeName);
|
||||
if (!heardCompact || !wakeCompact || wakeCompact.length < 5) {
|
||||
return false;
|
||||
}
|
||||
if (!candidate.strongBoundary) {
|
||||
return false;
|
||||
}
|
||||
const distance = levenshteinDistance(heardCompact, wakeCompact);
|
||||
if (distance <= 1) {
|
||||
return true;
|
||||
}
|
||||
return distance === 2 && wakeCompact.length >= 5 && heardCompact.length !== wakeCompact.length;
|
||||
}
|
||||
|
||||
function stripLeadingWakeNameCandidate(text: string, candidate: LeadingWakeNameCandidate): string {
|
||||
return text
|
||||
.slice(candidate.endIndex)
|
||||
.replace(/^\s*(?:[-,:;.!?]+\s*)?/, "")
|
||||
.trim();
|
||||
}
|
||||
|
||||
function matchLeadingFuzzyWakeName(
|
||||
text: string,
|
||||
wakeNames: string[],
|
||||
): AllowedWakeNameTranscriptResult | undefined {
|
||||
for (const candidate of leadingWakeNameCandidates(text)) {
|
||||
for (const wakeName of wakeNames) {
|
||||
const normalizedWakeName = normalizeWakeNameCandidate(wakeName);
|
||||
if (!normalizedWakeName) {
|
||||
continue;
|
||||
}
|
||||
const heardCompact = compactWakeName(candidate.heardName);
|
||||
const wakeCompact = compactWakeName(normalizedWakeName);
|
||||
if (heardCompact === wakeCompact || isFuzzyWakeNameMatch(candidate, wakeName)) {
|
||||
return {
|
||||
allowed: true,
|
||||
text: stripLeadingWakeNameCandidate(text, candidate),
|
||||
wakeName,
|
||||
heardName: candidate.heardName,
|
||||
match: heardCompact === wakeCompact ? "exact" : "fuzzy",
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function resolveDiscordRealtimeWakeNames(params: {
|
||||
config: DiscordRealtimeVoiceConfig;
|
||||
cfg: OpenClawConfig;
|
||||
@@ -1273,13 +1417,26 @@ export class DiscordRealtimeVoiceSession implements VoiceRealtimeSession {
|
||||
this.talkback.enqueue(acceptedText, this.consumePendingSpeakerContext());
|
||||
}
|
||||
|
||||
private resolveWakeNameTranscript(text: string): { allowed: boolean; text: string } {
|
||||
private resolveWakeNameTranscript(text: string): WakeNameTranscriptResult {
|
||||
if (!this.requireWakeName) {
|
||||
return { allowed: true, text };
|
||||
return { allowed: true, text, wakeName: "", heardName: "", match: "exact" };
|
||||
}
|
||||
const wakeName = this.wakeNames.find((name) => includesWakeName(text, name));
|
||||
if (wakeName) {
|
||||
return { allowed: true, text: stripLeadingWakeName(text, wakeName) };
|
||||
return {
|
||||
allowed: true,
|
||||
text: stripLeadingWakeName(text, wakeName),
|
||||
wakeName,
|
||||
heardName: wakeName,
|
||||
match: "exact",
|
||||
};
|
||||
}
|
||||
const fuzzyWakeName = matchLeadingFuzzyWakeName(text, this.wakeNames);
|
||||
if (fuzzyWakeName) {
|
||||
logger.info(
|
||||
`discord voice: realtime wake-name gate matched canonical=${fuzzyWakeName.wakeName} heard=${fuzzyWakeName.heardName} match=${fuzzyWakeName.match} voiceSession=${this.params.entry.voiceSessionKey} agent=${this.params.entry.route.agentId}`,
|
||||
);
|
||||
return fuzzyWakeName;
|
||||
}
|
||||
return { allowed: false, text };
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user