mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-06 05:51:15 +08:00
docs: document voice call helper APIs
This commit is contained in:
@@ -1,6 +1,9 @@
|
||||
import type { OpenClawPluginApi } from "../api.js";
|
||||
import type { VoiceCallTtsConfig } from "./config.js";
|
||||
|
||||
// Narrow core runtime/config contracts consumed by the voice-call plugin.
|
||||
|
||||
/** Core config subset read by voice-call helpers. */
|
||||
export type CoreConfig = {
|
||||
session?: {
|
||||
store?: string;
|
||||
@@ -11,4 +14,5 @@ export type CoreConfig = {
|
||||
[key: string]: unknown;
|
||||
};
|
||||
|
||||
/** Agent runtime API subset exposed through the plugin SDK. */
|
||||
export type CoreAgentDeps = OpenClawPluginApi["runtime"]["agent"];
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import { isRecord as isPlainObject } from "openclaw/plugin-sdk/string-coerce-runtime";
|
||||
|
||||
// Prototype-safe deep merge for config overrides that ignores undefined values.
|
||||
|
||||
const BLOCKED_MERGE_KEYS = new Set(["__proto__", "prototype", "constructor"]);
|
||||
|
||||
/** Deep-merge plain objects, keeping base values when overrides are undefined. */
|
||||
export function deepMergeDefined(base: unknown, override: unknown): unknown {
|
||||
if (!isPlainObject(base) || !isPlainObject(override)) {
|
||||
return override === undefined ? base : override;
|
||||
@@ -13,6 +16,7 @@ export function deepMergeDefined(base: unknown, override: unknown): unknown {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Blocked keys above prevent prototype pollution while preserving normal nested overrides.
|
||||
const existing = result[key];
|
||||
result[key] = key in result ? deepMergeDefined(existing, value) : value;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/string-coerce-runtime";
|
||||
|
||||
// Case-insensitive HTTP header lookup for provider webhook handlers.
|
||||
|
||||
type HttpHeaderMap = Record<string, string | string[] | undefined>;
|
||||
|
||||
/** Return the first value for a header name regardless of caller casing. */
|
||||
export function getHeader(headers: HttpHeaderMap, name: string): string | undefined {
|
||||
const target = normalizeLowercaseStringOrEmpty(name);
|
||||
const direct = headers[target];
|
||||
|
||||
@@ -5,6 +5,9 @@ import { normalizeOptionalString as normalizeString } from "openclaw/plugin-sdk/
|
||||
import type { VoiceCallConfig } from "./config.js";
|
||||
import type { CoreAgentDeps, CoreConfig } from "./core-bridge.js";
|
||||
|
||||
// Builds compact agent context injected into realtime voice sessions.
|
||||
|
||||
/** Agent identity subset used by voice instructions. */
|
||||
type VoiceIdentityLike = {
|
||||
name?: unknown;
|
||||
emoji?: unknown;
|
||||
@@ -13,6 +16,7 @@ type VoiceIdentityLike = {
|
||||
vibe?: unknown;
|
||||
};
|
||||
|
||||
/** Limit injected context while preserving an explicit truncation marker. */
|
||||
function limitText(text: string, maxChars: number): string {
|
||||
if (text.length <= maxChars) {
|
||||
return text;
|
||||
@@ -20,6 +24,7 @@ function limitText(text: string, maxChars: number): string {
|
||||
return `${text.slice(0, Math.max(0, maxChars - 32)).trimEnd()}\n[truncated]`;
|
||||
}
|
||||
|
||||
/** Read configured workspace context files through the safe workspace root. */
|
||||
async function readWorkspaceVoiceContextFiles(params: {
|
||||
workspaceDir: string;
|
||||
files: readonly string[];
|
||||
@@ -48,6 +53,7 @@ async function readWorkspaceVoiceContextFiles(params: {
|
||||
return sections;
|
||||
}
|
||||
|
||||
/** Build final realtime instructions from base instructions, consult policy, and fast context. */
|
||||
export async function buildRealtimeVoiceInstructions(params: {
|
||||
baseInstructions: string;
|
||||
config: VoiceCallConfig;
|
||||
@@ -98,6 +104,7 @@ export async function buildRealtimeVoiceInstructions(params: {
|
||||
params.coreConfig as OpenClawConfig,
|
||||
agentId,
|
||||
);
|
||||
// Workspace reads stay under the agent root; missing or unreadable context files are omitted.
|
||||
const fileSections = await readWorkspaceVoiceContextFiles({
|
||||
workspaceDir,
|
||||
files: contextConfig.files,
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
import { REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME } from "openclaw/plugin-sdk/realtime-voice";
|
||||
|
||||
// Default realtime instructions for the voice-call plugin's phone interface.
|
||||
|
||||
/** Baseline instructions that keep realtime calls brief and route deep work to agent consult. */
|
||||
export const DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS = `You are OpenClaw's phone-call realtime voice interface. Keep spoken replies brief and natural. When a question needs deeper reasoning, current information, or tools, call ${REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME} before answering.`;
|
||||
|
||||
@@ -9,6 +9,9 @@ type Logger = {
|
||||
debug?: (message: string) => void;
|
||||
};
|
||||
|
||||
// Voice-call labels for the SDK realtime fast-context resolver.
|
||||
|
||||
/** Resolve fast-context consult data using caller-oriented labels. */
|
||||
export async function resolveRealtimeFastContextConsult(params: {
|
||||
cfg: OpenClawConfig;
|
||||
agentId: string;
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
import type { VoiceCallConfig } from "./config.js";
|
||||
import type { CoreAgentDeps } from "./core-bridge.js";
|
||||
|
||||
// Resolves the model used for voice-call text response generation.
|
||||
|
||||
/** Resolve provider/model fields from explicit voice config or agent defaults. */
|
||||
export function resolveVoiceResponseModel(params: {
|
||||
voiceConfig: VoiceCallConfig;
|
||||
agentRuntime: CoreAgentDeps;
|
||||
|
||||
@@ -10,6 +10,9 @@ import type { CoreConfig } from "./core-bridge.js";
|
||||
import { deepMergeDefined } from "./deep-merge.js";
|
||||
import { convertPcmToMulaw8k } from "./telephony-audio.js";
|
||||
|
||||
// Telephony TTS adapter that applies voice-call overrides and emits 8kHz mulaw audio.
|
||||
|
||||
/** Core runtime TTS API used by the telephony adapter. */
|
||||
export type TelephonyTtsRuntime = {
|
||||
textToSpeechTelephony: (params: {
|
||||
text: string;
|
||||
@@ -27,13 +30,16 @@ export type TelephonyTtsRuntime = {
|
||||
}>;
|
||||
};
|
||||
|
||||
/** Provider facade used by Twilio/webhook code for telephony synthesis. */
|
||||
export type TelephonyTtsProvider = {
|
||||
synthesisTimeoutMs: number;
|
||||
synthesizeForTelephony: (text: string) => Promise<Buffer>;
|
||||
};
|
||||
|
||||
/** Default timeout for one telephony synthesis request. */
|
||||
export const TELEPHONY_DEFAULT_TTS_TIMEOUT_MS = 8000;
|
||||
|
||||
/** Voice-call override policy for inline TTS model directives. */
|
||||
type TelephonyModelOverrideConfig = {
|
||||
enabled?: boolean;
|
||||
allowText?: boolean;
|
||||
@@ -45,6 +51,7 @@ type TelephonyModelOverrideConfig = {
|
||||
allowSeed?: boolean;
|
||||
};
|
||||
|
||||
/** Create a TTS provider that honors voice-call overrides and converts PCM to mulaw. */
|
||||
export function createTelephonyTtsProvider(params: {
|
||||
coreConfig: CoreConfig;
|
||||
ttsOverride?: VoiceCallTtsConfig;
|
||||
@@ -107,6 +114,7 @@ export function createTelephonyTtsProvider(params: {
|
||||
};
|
||||
}
|
||||
|
||||
/** Apply voice-call TTS overrides to core config without mutating the original object. */
|
||||
function applyTtsOverride(coreConfig: CoreConfig, override?: VoiceCallTtsConfig): CoreConfig {
|
||||
if (!override) {
|
||||
return coreConfig;
|
||||
@@ -127,6 +135,7 @@ function applyTtsOverride(coreConfig: CoreConfig, override?: VoiceCallTtsConfig)
|
||||
};
|
||||
}
|
||||
|
||||
/** Merge core and voice-call TTS config, keeping undefined override fields out. */
|
||||
function mergeTtsConfig(
|
||||
base?: VoiceCallTtsConfig,
|
||||
override?: VoiceCallTtsConfig,
|
||||
@@ -143,6 +152,7 @@ function mergeTtsConfig(
|
||||
return deepMergeDefined(base, override) as VoiceCallTtsConfig;
|
||||
}
|
||||
|
||||
/** Resolve directive override policy for telephony synthesis. */
|
||||
function resolveTelephonyModelOverridePolicy(
|
||||
overrides: TelephonyModelOverrideConfig | undefined,
|
||||
): SpeechModelOverridePolicy {
|
||||
@@ -172,6 +182,7 @@ function resolveTelephonyModelOverridePolicy(
|
||||
};
|
||||
}
|
||||
|
||||
/** Read model override policy from TTS config when present. */
|
||||
function readTelephonyModelOverrides(
|
||||
ttsConfig: VoiceCallTtsConfig | undefined,
|
||||
): TelephonyModelOverrideConfig | undefined {
|
||||
@@ -181,16 +192,19 @@ function readTelephonyModelOverrides(
|
||||
: undefined;
|
||||
}
|
||||
|
||||
/** Normalize provider ids for config lookup. */
|
||||
function normalizeProviderId(value: unknown): string | undefined {
|
||||
return typeof value === "string" ? value.trim().toLowerCase() || undefined : undefined;
|
||||
}
|
||||
|
||||
/** Coerce provider config objects while rejecting arrays and primitives. */
|
||||
function asProviderConfig(value: unknown): SpeechProviderConfig {
|
||||
return value && typeof value === "object" && !Array.isArray(value)
|
||||
? (value as SpeechProviderConfig)
|
||||
: {};
|
||||
}
|
||||
|
||||
/** Collect named provider configs from canonical and legacy TTS config shapes. */
|
||||
function collectTelephonyProviderConfigs(
|
||||
ttsConfig: VoiceCallTtsConfig | undefined,
|
||||
): Record<string, SpeechProviderConfig> {
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
// Shared webhook response contract for voice-call providers.
|
||||
|
||||
/** HTTP response payload returned by provider webhook handlers. */
|
||||
export type WebhookResponsePayload = {
|
||||
statusCode: number;
|
||||
body: string;
|
||||
|
||||
Reference in New Issue
Block a user