mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-12 01:01:37 +08:00
Compare commits
18 Commits
fix/ui-sav
...
fix-models
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d0ee03bd20 | ||
|
|
9589e02362 | ||
|
|
4ab568cede | ||
|
|
9520b6d970 | ||
|
|
83f92e34af | ||
|
|
9366cbc7db | ||
|
|
d4f895d8f2 | ||
|
|
f08c34a73f | ||
|
|
6a9301c27d | ||
|
|
653401774d | ||
|
|
c6cdbb630c | ||
|
|
da2439f2cc | ||
|
|
92ab3f22dc | ||
|
|
43a6c5b77f | ||
|
|
495616d13e | ||
|
|
bac80f0886 | ||
|
|
ef078fec70 | ||
|
|
8e3ac01db6 |
@@ -19,14 +19,16 @@ Docs: https://docs.clawd.bot
|
||||
- Docs: add Bedrock EC2 instance role setup + IAM steps. (#1625) Thanks @sergical. https://docs.clawd.bot/bedrock
|
||||
- Exec approvals: forward approval prompts to chat with `/approve` for all channels (including plugins). (#1621) Thanks @czekaj. https://docs.clawd.bot/tools/exec-approvals https://docs.clawd.bot/tools/slash-commands
|
||||
- Gateway: expose config.patch in the gateway tool with safe partial updates + restart sentinel. (#1653) Thanks @Glucksberg.
|
||||
- Telegram: add `channels.telegram.linkPreview` to toggle outbound link previews. (#1700) Thanks @zerone0x. https://docs.clawd.bot/channels/telegram
|
||||
- Telegram: treat DM topics as separate sessions and keep DM history limits stable with thread suffixes. (#1597) Thanks @rohannagpal.
|
||||
- Telegram: add verbose raw-update logging for inbound Telegram updates. (#1597) Thanks @rohannagpal.
|
||||
|
||||
### Fixes
|
||||
- BlueBubbles: keep part-index GUIDs in reply tags when short IDs are missing.
|
||||
- Web UI: hide internal `message_id` hints in chat bubbles.
|
||||
- Web UI: allow form saves with unsupported schema paths while blocking missing schema; clear stale disconnect banners on reconnect. (#1678) Thanks @Glucksberg.
|
||||
- Web UI: show Stop button during active runs, swap back to New session when idle. (#1664) Thanks @ndbroadbent.
|
||||
- Web UI: clear stale disconnect banners on reconnect; allow form saves with unsupported schema paths but block missing schema. (#1707) Thanks @Glucksberg.
|
||||
- Auto-reply: don't treat `/models` as a `/model` directive. (#1753) Thanks @uos-status.
|
||||
- Heartbeat: normalize target identifiers for consistent routing.
|
||||
- TUI: reload history after gateway reconnect to restore session state. (#1663)
|
||||
- Telegram: use wrapped fetch for long-polling on Node to normalize AbortSignal handling. (#1639)
|
||||
@@ -40,6 +42,8 @@ Docs: https://docs.clawd.bot
|
||||
- Gateway: skip Tailscale DNS probing when tailscale.mode is off. (#1671)
|
||||
- Gateway: reduce log noise for late invokes + remote node probes; debounce skills refresh. (#1607) Thanks @petter-b.
|
||||
- Gateway: clarify Control UI/WebChat auth error hints for missing tokens. (#1690)
|
||||
- Gateway: listen on IPv6 loopback when bound to 127.0.0.1 so localhost webhooks work.
|
||||
- Gateway: store lock files in the temp directory to avoid stale locks on persistent volumes. (#1676)
|
||||
- macOS: default direct-transport `ws://` URLs to port 18789; document `gateway.remote.transport`. (#1603) Thanks @ngutman.
|
||||
- Voice Call: return stream TwiML for outbound conversation calls on initial Twilio webhook. (#1634)
|
||||
- Google Chat: tighten email allowlist matching, typing cleanup, media caps, and onboarding/docs/tests. (#1635) Thanks @iHildy.
|
||||
|
||||
@@ -17,7 +17,7 @@ read_when:
|
||||
- **Proxy:** optional `channels.telegram.proxy` uses `undici.ProxyAgent` through grammY’s `client.baseFetch`.
|
||||
- **Webhook support:** `webhook-set.ts` wraps `setWebhook/deleteWebhook`; `webhook.ts` hosts the callback with health + graceful shutdown. Gateway enables webhook mode when `channels.telegram.webhookUrl` is set (otherwise it long-polls).
|
||||
- **Sessions:** direct chats collapse into the agent main session (`agent:<agentId>:<mainKey>`); groups use `agent:<agentId>:telegram:group:<chatId>`; replies route back to the same channel.
|
||||
- **Config knobs:** `channels.telegram.botToken`, `channels.telegram.dmPolicy`, `channels.telegram.groups` (allowlist + mention defaults), `channels.telegram.allowFrom`, `channels.telegram.groupAllowFrom`, `channels.telegram.groupPolicy`, `channels.telegram.mediaMaxMb`, `channels.telegram.proxy`, `channels.telegram.webhookSecret`, `channels.telegram.webhookUrl`.
|
||||
- **Config knobs:** `channels.telegram.botToken`, `channels.telegram.dmPolicy`, `channels.telegram.groups` (allowlist + mention defaults), `channels.telegram.allowFrom`, `channels.telegram.groupAllowFrom`, `channels.telegram.groupPolicy`, `channels.telegram.mediaMaxMb`, `channels.telegram.linkPreview`, `channels.telegram.proxy`, `channels.telegram.webhookSecret`, `channels.telegram.webhookUrl`.
|
||||
- **Draft streaming:** optional `channels.telegram.streamMode` uses `sendMessageDraft` in private topic chats (Bot API 9.3+). This is separate from channel block streaming.
|
||||
- **Tests:** grammy mocks cover DM + group mention gating and outbound send; more media/webhook fixtures still welcome.
|
||||
|
||||
|
||||
@@ -525,6 +525,7 @@ Provider options:
|
||||
- `channels.telegram.replyToMode`: `off | first | all` (default: `first`).
|
||||
- `channels.telegram.textChunkLimit`: outbound chunk size (chars).
|
||||
- `channels.telegram.chunkMode`: `length` (default) or `newline` to split on newlines before length chunking.
|
||||
- `channels.telegram.linkPreview`: toggle link previews for outbound messages (default: true).
|
||||
- `channels.telegram.streamMode`: `off | partial | block` (draft streaming).
|
||||
- `channels.telegram.mediaMaxMb`: inbound/outbound media cap (MB).
|
||||
- `channels.telegram.retry`: retry policy for outbound Telegram API calls (attempts, minDelayMs, maxDelayMs, jitter).
|
||||
|
||||
@@ -89,6 +89,8 @@ Clawdbot ships with the pi‑ai catalog. These providers require **no**
|
||||
- Gemini CLI OAuth is shipped as a bundled plugin (`google-gemini-cli-auth`, disabled by default).
|
||||
- Enable: `clawdbot plugins enable google-gemini-cli-auth`
|
||||
- Login: `clawdbot models auth login --provider google-gemini-cli --set-default`
|
||||
- Note: you do **not** paste a client id or secret into `clawdbot.json`. The CLI login flow stores
|
||||
tokens in auth profiles on the gateway host.
|
||||
|
||||
### Z.AI (GLM)
|
||||
|
||||
|
||||
@@ -1021,6 +1021,7 @@ Set `channels.telegram.configWrites: false` to block Telegram-initiated config w
|
||||
],
|
||||
historyLimit: 50, // include last N group messages as context (0 disables)
|
||||
replyToMode: "first", // off | first | all
|
||||
linkPreview: true, // toggle outbound link previews
|
||||
streamMode: "partial", // off | partial | block (draft streaming; separate from block streaming)
|
||||
draftChunk: { // optional; only for streamMode=block
|
||||
minChars: 200,
|
||||
|
||||
@@ -24,6 +24,7 @@ Quick answers plus deeper troubleshooting for real-world setups (local dev, VPS,
|
||||
- [How do I try the latest bits?](#how-do-i-try-the-latest-bits)
|
||||
- [How long does install and onboarding usually take?](#how-long-does-install-and-onboarding-usually-take)
|
||||
- [Installer stuck? How do I get more feedback?](#installer-stuck-how-do-i-get-more-feedback)
|
||||
- [Windows install says git not found or clawdbot not recognized](#windows-install-says-git-not-found-or-clawdbot-not-recognized)
|
||||
- [The docs didn’t answer my question - how do I get a better answer?](#the-docs-didnt-answer-my-question-how-do-i-get-a-better-answer)
|
||||
- [How do I install Clawdbot on Linux?](#how-do-i-install-clawdbot-on-linux)
|
||||
- [How do I install Clawdbot on a VPS?](#how-do-i-install-clawdbot-on-a-vps)
|
||||
@@ -39,6 +40,7 @@ Quick answers plus deeper troubleshooting for real-world setups (local dev, VPS,
|
||||
- [Is AWS Bedrock supported?](#is-aws-bedrock-supported)
|
||||
- [How does Codex auth work?](#how-does-codex-auth-work)
|
||||
- [Do you support OpenAI subscription auth (Codex OAuth)?](#do-you-support-openai-subscription-auth-codex-oauth)
|
||||
- [How do I set up Gemini CLI OAuth](#how-do-i-set-up-gemini-cli-oauth)
|
||||
- [Is a local model OK for casual chats?](#is-a-local-model-ok-for-casual-chats)
|
||||
- [How do I keep hosted model traffic in a specific region?](#how-do-i-keep-hosted-model-traffic-in-a-specific-region)
|
||||
- [Do I have to buy a Mac Mini to install this?](#do-i-have-to-buy-a-mac-mini-to-install-this)
|
||||
@@ -511,6 +513,26 @@ curl -fsSL https://clawd.bot/install.sh | bash -s -- --install-method git --verb
|
||||
|
||||
More options: [Installer flags](/install/installer).
|
||||
|
||||
### Windows install says git not found or clawdbot not recognized
|
||||
|
||||
Two common Windows issues:
|
||||
|
||||
**1) npm error spawn git / git not found**
|
||||
- Install **Git for Windows** and make sure `git` is on your PATH.
|
||||
- Close and reopen PowerShell, then re-run the installer.
|
||||
|
||||
**2) clawdbot is not recognized after install**
|
||||
- Your npm global bin folder is not on PATH.
|
||||
- Check the path:
|
||||
```powershell
|
||||
npm config get prefix
|
||||
```
|
||||
- Ensure `<prefix>\\bin` is on PATH (on most systems it is `%AppData%\\npm`).
|
||||
- Close and reopen PowerShell after updating PATH.
|
||||
|
||||
If you want the smoothest Windows setup, use **WSL2** instead of native Windows.
|
||||
Docs: [Windows](/platforms/windows).
|
||||
|
||||
### The docs didnt answer my question how do I get a better answer
|
||||
|
||||
Use the **hackable (git) install** so you have the full source and docs locally, then ask
|
||||
@@ -610,9 +632,10 @@ Docs: [Anthropic](/providers/anthropic), [OpenAI](/providers/openai),
|
||||
Yes. You can authenticate with **Claude Code CLI OAuth** or a **setup-token**
|
||||
instead of an API key. This is the subscription path.
|
||||
|
||||
Important: you must verify with Anthropic that this usage is allowed under
|
||||
their subscription policy and terms. If you want the most explicit, supported
|
||||
path, use an Anthropic API key.
|
||||
Claude Pro/Max subscriptions **do not include an API key**, so this is the
|
||||
correct approach for subscription accounts. Important: you must verify with
|
||||
Anthropic that this usage is allowed under their subscription policy and terms.
|
||||
If you want the most explicit, supported path, use an Anthropic API key.
|
||||
|
||||
### How does Anthropic setuptoken auth work
|
||||
|
||||
@@ -664,6 +687,16 @@ can import the CLI login or run the OAuth flow for you.
|
||||
|
||||
See [OAuth](/concepts/oauth), [Model providers](/concepts/model-providers), and [Wizard](/start/wizard).
|
||||
|
||||
### How do I set up Gemini CLI OAuth
|
||||
|
||||
Gemini CLI uses a **plugin auth flow**, not a client id or secret in `clawdbot.json`.
|
||||
|
||||
Steps:
|
||||
1) Enable the plugin: `clawdbot plugins enable google-gemini-cli-auth`
|
||||
2) Login: `clawdbot models auth login --provider google-gemini-cli --set-default`
|
||||
|
||||
This stores OAuth tokens in auth profiles on the gateway host. Details: [Model providers](/concepts/model-providers).
|
||||
|
||||
### Is a local model OK for casual chats
|
||||
|
||||
Usually no. Clawdbot needs large context + strong safety; small cards truncate and leak. If you must, run the **largest** MiniMax M2.1 build you can locally (LM Studio) and see [/gateway/local-models](/gateway/local-models). Smaller/quantized models increase prompt-injection risk - see [Security](/gateway/security).
|
||||
|
||||
@@ -114,3 +114,9 @@ Git requirement:
|
||||
|
||||
If you choose `-InstallMethod git` and Git is missing, the installer will print the
|
||||
Git for Windows link (`https://git-scm.com/download/win`) and exit.
|
||||
|
||||
Common Windows issues:
|
||||
|
||||
- **npm error spawn git / ENOENT**: install Git for Windows and reopen PowerShell, then rerun the installer.
|
||||
- **"clawdbot" is not recognized**: your npm global bin folder is not on PATH. Most systems use
|
||||
`%AppData%\\npm`. You can also run `npm config get prefix` and add `\\bin` to PATH, then reopen PowerShell.
|
||||
|
||||
@@ -67,6 +67,22 @@ Plugins can register:
|
||||
Plugins run **in‑process** with the Gateway, so treat them as trusted code.
|
||||
Tool authoring guide: [Plugin agent tools](/plugins/agent-tools).
|
||||
|
||||
## Runtime helpers
|
||||
|
||||
Plugins can access selected core helpers via `api.runtime`. For telephony TTS:
|
||||
|
||||
```ts
|
||||
const result = await api.runtime.tts.textToSpeechTelephony({
|
||||
text: "Hello from Clawdbot",
|
||||
cfg: api.config,
|
||||
});
|
||||
```
|
||||
|
||||
Notes:
|
||||
- Uses core `messages.tts` configuration (OpenAI or ElevenLabs).
|
||||
- Returns PCM audio buffer + sample rate. Plugins must resample/encode for providers.
|
||||
- Edge TTS is not supported for telephony.
|
||||
|
||||
## Discovery & precedence
|
||||
|
||||
Clawdbot scans, in order:
|
||||
|
||||
@@ -104,6 +104,87 @@ Notes:
|
||||
- `mock` is a local dev provider (no network calls).
|
||||
- `skipSignatureVerification` is for local testing only.
|
||||
|
||||
## TTS for calls
|
||||
|
||||
Voice Call uses the core `messages.tts` configuration (OpenAI or ElevenLabs) for
|
||||
streaming speech on calls. You can override it under the plugin config with the
|
||||
**same shape** — it deep‑merges with `messages.tts`.
|
||||
|
||||
```json5
|
||||
{
|
||||
tts: {
|
||||
provider: "elevenlabs",
|
||||
elevenlabs: {
|
||||
voiceId: "pMsXgVXv3BLzUgSXRplE",
|
||||
modelId: "eleven_multilingual_v2"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Notes:
|
||||
- **Edge TTS is ignored for voice calls** (telephony audio needs PCM; Edge output is unreliable).
|
||||
- Core TTS is used when Twilio media streaming is enabled; otherwise calls fall back to provider native voices.
|
||||
|
||||
### More examples
|
||||
|
||||
Use core TTS only (no override):
|
||||
|
||||
```json5
|
||||
{
|
||||
messages: {
|
||||
tts: {
|
||||
provider: "openai",
|
||||
openai: { voice: "alloy" }
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Override to ElevenLabs just for calls (keep core default elsewhere):
|
||||
|
||||
```json5
|
||||
{
|
||||
plugins: {
|
||||
entries: {
|
||||
"voice-call": {
|
||||
config: {
|
||||
tts: {
|
||||
provider: "elevenlabs",
|
||||
elevenlabs: {
|
||||
apiKey: "elevenlabs_key",
|
||||
voiceId: "pMsXgVXv3BLzUgSXRplE",
|
||||
modelId: "eleven_multilingual_v2"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Override only the OpenAI model for calls (deep‑merge example):
|
||||
|
||||
```json5
|
||||
{
|
||||
plugins: {
|
||||
entries: {
|
||||
"voice-call": {
|
||||
config: {
|
||||
tts: {
|
||||
openai: {
|
||||
model: "gpt-4o-mini-tts",
|
||||
voice: "marin"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Inbound calls
|
||||
|
||||
Inbound policy defaults to `disabled`. To enable inbound calls, set:
|
||||
|
||||
@@ -16,7 +16,7 @@ and you configure everything via the `/setup` web wizard.
|
||||
|
||||
## One-click deploy
|
||||
|
||||
<a href="https://railway.com/deploy/clawdbot-railway-template" target="_blank" rel="noreferrer">Deploy on Railway</a>
|
||||
<a href="https://railway.app/new/template?template=https://github.com/vignesh07/clawdbot-railway-template" target="_blank" rel="noreferrer">Deploy on Railway</a>
|
||||
|
||||
After deploy, find your public URL in **Railway → your service → Settings → Domains**.
|
||||
|
||||
@@ -55,6 +55,7 @@ Attach a volume mounted at:
|
||||
Set these variables on the service:
|
||||
|
||||
- `SETUP_PASSWORD` (required)
|
||||
- `PORT=8080` (required — must match the port in Public Networking)
|
||||
- `CLAWDBOT_STATE_DIR=/data/.clawdbot` (recommended)
|
||||
- `CLAWDBOT_WORKSPACE_DIR=/data/workspace` (recommended)
|
||||
- `CLAWDBOT_GATEWAY_TOKEN` (recommended; treat as an admin secret)
|
||||
@@ -82,8 +83,9 @@ If Telegram DMs are set to pairing, the setup wizard can approve the pairing cod
|
||||
1) Go to https://discord.com/developers/applications
|
||||
2) **New Application** → choose a name
|
||||
3) **Bot** → **Add Bot**
|
||||
4) Copy the **Bot Token** and paste into `/setup`
|
||||
5) Invite the bot to your server (OAuth2 URL Generator; scopes: `bot`, `applications.commands`)
|
||||
4) **Enable MESSAGE CONTENT INTENT** under Bot → Privileged Gateway Intents (required or the bot will crash on startup)
|
||||
5) Copy the **Bot Token** and paste into `/setup`
|
||||
6) Invite the bot to your server (OAuth2 URL Generator; scopes: `bot`, `applications.commands`)
|
||||
|
||||
## Backups & migration
|
||||
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
# Changelog
|
||||
|
||||
## 2026.1.24
|
||||
|
||||
### Changes
|
||||
- Breaking: voice-call TTS now uses core `messages.tts` (plugin TTS config deep‑merges with core).
|
||||
- Telephony TTS supports OpenAI + ElevenLabs; Edge TTS is ignored for calls.
|
||||
- Removed legacy `tts.model`/`tts.voice`/`tts.instructions` plugin fields.
|
||||
|
||||
## 2026.1.23
|
||||
|
||||
### Changes
|
||||
|
||||
@@ -75,6 +75,27 @@ Notes:
|
||||
- Twilio/Telnyx/Plivo require a **publicly reachable** webhook URL.
|
||||
- `mock` is a local dev provider (no network calls).
|
||||
|
||||
## TTS for calls
|
||||
|
||||
Voice Call uses the core `messages.tts` configuration (OpenAI or ElevenLabs) for
|
||||
streaming speech on calls. You can override it under the plugin config with the
|
||||
same shape — overrides deep-merge with `messages.tts`.
|
||||
|
||||
```json5
|
||||
{
|
||||
tts: {
|
||||
provider: "openai",
|
||||
openai: {
|
||||
voice: "alloy"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Notes:
|
||||
- Edge TTS is ignored for voice calls (telephony audio needs PCM; Edge output is unreliable).
|
||||
- Core TTS is used when Twilio media streaming is enabled; otherwise calls fall back to provider native voices.
|
||||
|
||||
## CLI
|
||||
|
||||
```bash
|
||||
|
||||
@@ -99,16 +99,39 @@
|
||||
"label": "Media Stream Path",
|
||||
"advanced": true
|
||||
},
|
||||
"tts.model": {
|
||||
"label": "TTS Model",
|
||||
"tts.provider": {
|
||||
"label": "TTS Provider Override",
|
||||
"help": "Deep-merges with messages.tts (Edge is ignored for calls).",
|
||||
"advanced": true
|
||||
},
|
||||
"tts.voice": {
|
||||
"label": "TTS Voice",
|
||||
"tts.openai.model": {
|
||||
"label": "OpenAI TTS Model",
|
||||
"advanced": true
|
||||
},
|
||||
"tts.instructions": {
|
||||
"label": "TTS Instructions",
|
||||
"tts.openai.voice": {
|
||||
"label": "OpenAI TTS Voice",
|
||||
"advanced": true
|
||||
},
|
||||
"tts.openai.apiKey": {
|
||||
"label": "OpenAI API Key",
|
||||
"sensitive": true,
|
||||
"advanced": true
|
||||
},
|
||||
"tts.elevenlabs.modelId": {
|
||||
"label": "ElevenLabs Model ID",
|
||||
"advanced": true
|
||||
},
|
||||
"tts.elevenlabs.voiceId": {
|
||||
"label": "ElevenLabs Voice ID",
|
||||
"advanced": true
|
||||
},
|
||||
"tts.elevenlabs.apiKey": {
|
||||
"label": "ElevenLabs API Key",
|
||||
"sensitive": true,
|
||||
"advanced": true
|
||||
},
|
||||
"tts.elevenlabs.baseUrl": {
|
||||
"label": "ElevenLabs Base URL",
|
||||
"advanced": true
|
||||
},
|
||||
"publicUrl": {
|
||||
@@ -370,20 +393,193 @@
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"auto": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"off",
|
||||
"always",
|
||||
"inbound",
|
||||
"tagged"
|
||||
]
|
||||
},
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"mode": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"final",
|
||||
"all"
|
||||
]
|
||||
},
|
||||
"provider": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"openai"
|
||||
"openai",
|
||||
"elevenlabs",
|
||||
"edge"
|
||||
]
|
||||
},
|
||||
"model": {
|
||||
"summaryModel": {
|
||||
"type": "string"
|
||||
},
|
||||
"voice": {
|
||||
"modelOverrides": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"allowText": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"allowProvider": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"allowVoice": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"allowModelId": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"allowVoiceSettings": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"allowNormalization": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"allowSeed": {
|
||||
"type": "boolean"
|
||||
}
|
||||
}
|
||||
},
|
||||
"elevenlabs": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"apiKey": {
|
||||
"type": "string"
|
||||
},
|
||||
"baseUrl": {
|
||||
"type": "string"
|
||||
},
|
||||
"voiceId": {
|
||||
"type": "string"
|
||||
},
|
||||
"modelId": {
|
||||
"type": "string"
|
||||
},
|
||||
"seed": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"maximum": 4294967295
|
||||
},
|
||||
"applyTextNormalization": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"auto",
|
||||
"on",
|
||||
"off"
|
||||
]
|
||||
},
|
||||
"languageCode": {
|
||||
"type": "string"
|
||||
},
|
||||
"voiceSettings": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"stability": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1
|
||||
},
|
||||
"similarityBoost": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1
|
||||
},
|
||||
"style": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1
|
||||
},
|
||||
"useSpeakerBoost": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"speed": {
|
||||
"type": "number",
|
||||
"minimum": 0.5,
|
||||
"maximum": 2
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"openai": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"apiKey": {
|
||||
"type": "string"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"voice": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"edge": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"voice": {
|
||||
"type": "string"
|
||||
},
|
||||
"lang": {
|
||||
"type": "string"
|
||||
},
|
||||
"outputFormat": {
|
||||
"type": "string"
|
||||
},
|
||||
"pitch": {
|
||||
"type": "string"
|
||||
},
|
||||
"rate": {
|
||||
"type": "string"
|
||||
},
|
||||
"volume": {
|
||||
"type": "string"
|
||||
},
|
||||
"saveSubtitles": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"proxy": {
|
||||
"type": "string"
|
||||
},
|
||||
"timeoutMs": {
|
||||
"type": "integer",
|
||||
"minimum": 1000,
|
||||
"maximum": 120000
|
||||
}
|
||||
}
|
||||
},
|
||||
"prefsPath": {
|
||||
"type": "string"
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string"
|
||||
"maxTextLength": {
|
||||
"type": "integer",
|
||||
"minimum": 1
|
||||
},
|
||||
"timeoutMs": {
|
||||
"type": "integer",
|
||||
"minimum": 1000,
|
||||
"maximum": 120000
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
@@ -74,9 +74,26 @@ const voiceCallConfigSchema = {
|
||||
},
|
||||
"streaming.sttModel": { label: "Realtime STT Model", advanced: true },
|
||||
"streaming.streamPath": { label: "Media Stream Path", advanced: true },
|
||||
"tts.model": { label: "TTS Model", advanced: true },
|
||||
"tts.voice": { label: "TTS Voice", advanced: true },
|
||||
"tts.instructions": { label: "TTS Instructions", advanced: true },
|
||||
"tts.provider": {
|
||||
label: "TTS Provider Override",
|
||||
help: "Deep-merges with messages.tts (Edge is ignored for calls).",
|
||||
advanced: true,
|
||||
},
|
||||
"tts.openai.model": { label: "OpenAI TTS Model", advanced: true },
|
||||
"tts.openai.voice": { label: "OpenAI TTS Voice", advanced: true },
|
||||
"tts.openai.apiKey": {
|
||||
label: "OpenAI API Key",
|
||||
sensitive: true,
|
||||
advanced: true,
|
||||
},
|
||||
"tts.elevenlabs.modelId": { label: "ElevenLabs Model ID", advanced: true },
|
||||
"tts.elevenlabs.voiceId": { label: "ElevenLabs Voice ID", advanced: true },
|
||||
"tts.elevenlabs.apiKey": {
|
||||
label: "ElevenLabs API Key",
|
||||
sensitive: true,
|
||||
advanced: true,
|
||||
},
|
||||
"tts.elevenlabs.baseUrl": { label: "ElevenLabs Base URL", advanced: true },
|
||||
publicUrl: { label: "Public Webhook URL", advanced: true },
|
||||
skipSignatureVerification: {
|
||||
label: "Skip Signature Verification",
|
||||
@@ -161,6 +178,7 @@ const voiceCallPlugin = {
|
||||
runtimePromise = createVoiceCallRuntime({
|
||||
config: cfg,
|
||||
coreConfig: api.config as CoreConfig,
|
||||
ttsRuntime: api.runtime.tts,
|
||||
logger: api.logger,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -82,31 +82,82 @@ export const SttConfigSchema = z
|
||||
.default({ provider: "openai", model: "whisper-1" });
|
||||
export type SttConfig = z.infer<typeof SttConfigSchema>;
|
||||
|
||||
export const TtsProviderSchema = z.enum(["openai", "elevenlabs", "edge"]);
|
||||
export const TtsModeSchema = z.enum(["final", "all"]);
|
||||
export const TtsAutoSchema = z.enum(["off", "always", "inbound", "tagged"]);
|
||||
|
||||
export const TtsConfigSchema = z
|
||||
.object({
|
||||
/** TTS provider (currently only OpenAI supported) */
|
||||
provider: z.literal("openai").default("openai"),
|
||||
/**
|
||||
* TTS model to use:
|
||||
* - gpt-4o-mini-tts: newest, supports instructions for tone/style control (recommended)
|
||||
* - tts-1: lower latency
|
||||
* - tts-1-hd: higher quality
|
||||
*/
|
||||
model: z.string().min(1).default("gpt-4o-mini-tts"),
|
||||
/**
|
||||
* Voice ID. For best quality, use marin or cedar.
|
||||
* All voices: alloy, ash, ballad, coral, echo, fable, nova, onyx, sage, shimmer, verse, marin, cedar
|
||||
*/
|
||||
voice: z.string().min(1).default("coral"),
|
||||
/**
|
||||
* Instructions for speech style (only works with gpt-4o-mini-tts).
|
||||
* Examples: "Speak in a cheerful tone", "Talk like a sympathetic customer service agent"
|
||||
*/
|
||||
instructions: z.string().optional(),
|
||||
auto: TtsAutoSchema.optional(),
|
||||
enabled: z.boolean().optional(),
|
||||
mode: TtsModeSchema.optional(),
|
||||
provider: TtsProviderSchema.optional(),
|
||||
summaryModel: z.string().optional(),
|
||||
modelOverrides: z
|
||||
.object({
|
||||
enabled: z.boolean().optional(),
|
||||
allowText: z.boolean().optional(),
|
||||
allowProvider: z.boolean().optional(),
|
||||
allowVoice: z.boolean().optional(),
|
||||
allowModelId: z.boolean().optional(),
|
||||
allowVoiceSettings: z.boolean().optional(),
|
||||
allowNormalization: z.boolean().optional(),
|
||||
allowSeed: z.boolean().optional(),
|
||||
})
|
||||
.strict()
|
||||
.optional(),
|
||||
elevenlabs: z
|
||||
.object({
|
||||
apiKey: z.string().optional(),
|
||||
baseUrl: z.string().optional(),
|
||||
voiceId: z.string().optional(),
|
||||
modelId: z.string().optional(),
|
||||
seed: z.number().int().min(0).max(4294967295).optional(),
|
||||
applyTextNormalization: z.enum(["auto", "on", "off"]).optional(),
|
||||
languageCode: z.string().optional(),
|
||||
voiceSettings: z
|
||||
.object({
|
||||
stability: z.number().min(0).max(1).optional(),
|
||||
similarityBoost: z.number().min(0).max(1).optional(),
|
||||
style: z.number().min(0).max(1).optional(),
|
||||
useSpeakerBoost: z.boolean().optional(),
|
||||
speed: z.number().min(0.5).max(2).optional(),
|
||||
})
|
||||
.strict()
|
||||
.optional(),
|
||||
})
|
||||
.strict()
|
||||
.optional(),
|
||||
openai: z
|
||||
.object({
|
||||
apiKey: z.string().optional(),
|
||||
model: z.string().optional(),
|
||||
voice: z.string().optional(),
|
||||
})
|
||||
.strict()
|
||||
.optional(),
|
||||
edge: z
|
||||
.object({
|
||||
enabled: z.boolean().optional(),
|
||||
voice: z.string().optional(),
|
||||
lang: z.string().optional(),
|
||||
outputFormat: z.string().optional(),
|
||||
pitch: z.string().optional(),
|
||||
rate: z.string().optional(),
|
||||
volume: z.string().optional(),
|
||||
saveSubtitles: z.boolean().optional(),
|
||||
proxy: z.string().optional(),
|
||||
timeoutMs: z.number().int().min(1000).max(120000).optional(),
|
||||
})
|
||||
.strict()
|
||||
.optional(),
|
||||
prefsPath: z.string().optional(),
|
||||
maxTextLength: z.number().int().min(1).optional(),
|
||||
timeoutMs: z.number().int().min(1000).max(120000).optional(),
|
||||
})
|
||||
.strict()
|
||||
.default({ provider: "openai", model: "gpt-4o-mini-tts", voice: "coral" });
|
||||
export type TtsConfig = z.infer<typeof TtsConfigSchema>;
|
||||
.optional();
|
||||
export type VoiceCallTtsConfig = z.infer<typeof TtsConfigSchema>;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Webhook Server Configuration
|
||||
@@ -307,7 +358,7 @@ export const VoiceCallConfigSchema = z
|
||||
/** STT configuration */
|
||||
stt: SttConfigSchema,
|
||||
|
||||
/** TTS configuration */
|
||||
/** TTS override (deep-merges with core messages.tts) */
|
||||
tts: TtsConfigSchema,
|
||||
|
||||
/** Store path for call logs */
|
||||
|
||||
@@ -2,10 +2,16 @@ import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
import { fileURLToPath, pathToFileURL } from "node:url";
|
||||
|
||||
import type { VoiceCallTtsConfig } from "./config.js";
|
||||
|
||||
export type CoreConfig = {
|
||||
session?: {
|
||||
store?: string;
|
||||
};
|
||||
messages?: {
|
||||
tts?: VoiceCallTtsConfig;
|
||||
};
|
||||
[key: string]: unknown;
|
||||
};
|
||||
|
||||
type CoreAgentDeps = {
|
||||
|
||||
@@ -143,7 +143,7 @@ export class CallManager {
|
||||
// For notify mode with a message, use inline TwiML with <Say>
|
||||
let inlineTwiml: string | undefined;
|
||||
if (mode === "notify" && initialMessage) {
|
||||
const pollyVoice = mapVoiceToPolly(this.config.tts.voice);
|
||||
const pollyVoice = mapVoiceToPolly(this.config.tts?.openai?.voice);
|
||||
inlineTwiml = this.generateNotifyTwiml(initialMessage, pollyVoice);
|
||||
console.log(
|
||||
`[voice-call] Using inline TwiML for notify mode (voice: ${pollyVoice})`,
|
||||
@@ -210,11 +210,13 @@ export class CallManager {
|
||||
this.addTranscriptEntry(call, "bot", text);
|
||||
|
||||
// Play TTS
|
||||
const voice =
|
||||
this.provider?.name === "twilio" ? this.config.tts?.openai?.voice : undefined;
|
||||
await this.provider.playTts({
|
||||
callId,
|
||||
providerCallId: call.providerCallId,
|
||||
text,
|
||||
voice: this.config.tts.voice,
|
||||
voice,
|
||||
});
|
||||
|
||||
return { success: true };
|
||||
|
||||
@@ -68,7 +68,7 @@ export async function initiateCall(
|
||||
// For notify mode with a message, use inline TwiML with <Say>.
|
||||
let inlineTwiml: string | undefined;
|
||||
if (mode === "notify" && initialMessage) {
|
||||
const pollyVoice = mapVoiceToPolly(ctx.config.tts.voice);
|
||||
const pollyVoice = mapVoiceToPolly(ctx.config.tts?.openai?.voice);
|
||||
inlineTwiml = generateNotifyTwiml(initialMessage, pollyVoice);
|
||||
console.log(`[voice-call] Using inline TwiML for notify mode (voice: ${pollyVoice})`);
|
||||
}
|
||||
@@ -120,11 +120,13 @@ export async function speak(
|
||||
|
||||
addTranscriptEntry(call, "bot", text);
|
||||
|
||||
const voice =
|
||||
ctx.provider?.name === "twilio" ? ctx.config.tts?.openai?.voice : undefined;
|
||||
await ctx.provider.playTts({
|
||||
callId,
|
||||
providerCallId: call.providerCallId,
|
||||
text,
|
||||
voice: ctx.config.tts.voice,
|
||||
voice,
|
||||
});
|
||||
|
||||
return { success: true };
|
||||
@@ -244,4 +246,3 @@ export async function endCall(
|
||||
return { success: false, error: err instanceof Error ? err.message : String(err) };
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -15,9 +15,9 @@ import type {
|
||||
WebhookVerificationResult,
|
||||
} from "../types.js";
|
||||
import { escapeXml, mapVoiceToPolly } from "../voice-mapping.js";
|
||||
import { chunkAudio } from "../telephony-audio.js";
|
||||
import type { TelephonyTtsProvider } from "../telephony-tts.js";
|
||||
import type { VoiceCallProvider } from "./base.js";
|
||||
import type { OpenAITTSProvider } from "./tts-openai.js";
|
||||
import { chunkAudio } from "./tts-openai.js";
|
||||
import { twilioApiRequest } from "./twilio/api.js";
|
||||
import { verifyTwilioProviderWebhook } from "./twilio/webhook.js";
|
||||
|
||||
@@ -53,8 +53,8 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
/** Current public webhook URL (set when tunnel starts or from config) */
|
||||
private currentPublicUrl: string | null = null;
|
||||
|
||||
/** Optional OpenAI TTS provider for streaming TTS */
|
||||
private ttsProvider: OpenAITTSProvider | null = null;
|
||||
/** Optional telephony TTS provider for streaming TTS */
|
||||
private ttsProvider: TelephonyTtsProvider | null = null;
|
||||
|
||||
/** Optional media stream handler for sending audio */
|
||||
private mediaStreamHandler: MediaStreamHandler | null = null;
|
||||
@@ -119,7 +119,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
return this.currentPublicUrl;
|
||||
}
|
||||
|
||||
setTTSProvider(provider: OpenAITTSProvider): void {
|
||||
setTTSProvider(provider: TelephonyTtsProvider): void {
|
||||
this.ttsProvider = provider;
|
||||
}
|
||||
|
||||
@@ -454,13 +454,13 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
* Play TTS audio via Twilio.
|
||||
*
|
||||
* Two modes:
|
||||
* 1. OpenAI TTS + Media Streams: If TTS provider and media stream are available,
|
||||
* generates audio via OpenAI and streams it through WebSocket (preferred).
|
||||
* 1. Core TTS + Media Streams: If TTS provider and media stream are available,
|
||||
* generates audio via core TTS and streams it through WebSocket (preferred).
|
||||
* 2. TwiML <Say>: Falls back to Twilio's native TTS with Polly voices.
|
||||
* Note: This may not work on all Twilio accounts.
|
||||
*/
|
||||
async playTts(input: PlayTtsInput): Promise<void> {
|
||||
// Try OpenAI TTS via media stream first (if configured)
|
||||
// Try telephony TTS via media stream first (if configured)
|
||||
const streamSid = this.callStreamMap.get(input.providerCallId);
|
||||
if (this.ttsProvider && this.mediaStreamHandler && streamSid) {
|
||||
try {
|
||||
@@ -468,7 +468,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
return;
|
||||
} catch (err) {
|
||||
console.warn(
|
||||
`[voice-call] OpenAI TTS failed, falling back to Twilio <Say>:`,
|
||||
`[voice-call] Telephony TTS failed, falling back to Twilio <Say>:`,
|
||||
err instanceof Error ? err.message : err,
|
||||
);
|
||||
// Fall through to TwiML <Say> fallback
|
||||
@@ -484,7 +484,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
}
|
||||
|
||||
console.warn(
|
||||
"[voice-call] Using TwiML <Say> fallback - OpenAI TTS not configured or media stream not active",
|
||||
"[voice-call] Using TwiML <Say> fallback - telephony TTS not configured or media stream not active",
|
||||
);
|
||||
|
||||
const pollyVoice = mapVoiceToPolly(input.voice);
|
||||
@@ -502,8 +502,8 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
}
|
||||
|
||||
/**
|
||||
* Play TTS via OpenAI and Twilio Media Streams.
|
||||
* Generates audio with OpenAI TTS, converts to mu-law, and streams via WebSocket.
|
||||
* Play TTS via core TTS and Twilio Media Streams.
|
||||
* Generates audio with core TTS, converts to mu-law, and streams via WebSocket.
|
||||
* Uses a jitter buffer to smooth out timing variations.
|
||||
*/
|
||||
private async playTtsViaStream(
|
||||
@@ -514,8 +514,8 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
throw new Error("TTS provider and media stream handler required");
|
||||
}
|
||||
|
||||
// Generate audio with OpenAI TTS (returns mu-law at 8kHz)
|
||||
const muLawAudio = await this.ttsProvider.synthesizeForTwilio(text);
|
||||
// Generate audio with core TTS (returns mu-law at 8kHz)
|
||||
const muLawAudio = await this.ttsProvider.synthesizeForTelephony(text);
|
||||
|
||||
// Stream audio in 20ms chunks (160 bytes at 8kHz mu-law)
|
||||
const CHUNK_SIZE = 160;
|
||||
|
||||
@@ -6,8 +6,9 @@ import type { VoiceCallProvider } from "./providers/base.js";
|
||||
import { MockProvider } from "./providers/mock.js";
|
||||
import { PlivoProvider } from "./providers/plivo.js";
|
||||
import { TelnyxProvider } from "./providers/telnyx.js";
|
||||
import { OpenAITTSProvider } from "./providers/tts-openai.js";
|
||||
import { TwilioProvider } from "./providers/twilio.js";
|
||||
import type { TelephonyTtsRuntime } from "./telephony-tts.js";
|
||||
import { createTelephonyTtsProvider } from "./telephony-tts.js";
|
||||
import { startTunnel, type TunnelResult } from "./tunnel.js";
|
||||
import {
|
||||
cleanupTailscaleExposure,
|
||||
@@ -81,9 +82,10 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
|
||||
export async function createVoiceCallRuntime(params: {
|
||||
config: VoiceCallConfig;
|
||||
coreConfig: CoreConfig;
|
||||
ttsRuntime?: TelephonyTtsRuntime;
|
||||
logger?: Logger;
|
||||
}): Promise<VoiceCallRuntime> {
|
||||
const { config, coreConfig, logger } = params;
|
||||
const { config, coreConfig, ttsRuntime, logger } = params;
|
||||
const log = logger ?? {
|
||||
info: console.log,
|
||||
warn: console.warn,
|
||||
@@ -149,27 +151,24 @@ export async function createVoiceCallRuntime(params: {
|
||||
|
||||
if (provider.name === "twilio" && config.streaming?.enabled) {
|
||||
const twilioProvider = provider as TwilioProvider;
|
||||
const openaiApiKey =
|
||||
config.streaming.openaiApiKey || process.env.OPENAI_API_KEY;
|
||||
if (openaiApiKey) {
|
||||
if (ttsRuntime?.textToSpeechTelephony) {
|
||||
try {
|
||||
const ttsProvider = new OpenAITTSProvider({
|
||||
apiKey: openaiApiKey,
|
||||
voice: config.tts.voice,
|
||||
model: config.tts.model,
|
||||
instructions: config.tts.instructions,
|
||||
const ttsProvider = createTelephonyTtsProvider({
|
||||
coreConfig,
|
||||
ttsOverride: config.tts,
|
||||
runtime: ttsRuntime,
|
||||
});
|
||||
twilioProvider.setTTSProvider(ttsProvider);
|
||||
log.info("[voice-call] OpenAI TTS provider configured");
|
||||
log.info("[voice-call] Telephony TTS provider configured");
|
||||
} catch (err) {
|
||||
log.warn(
|
||||
`[voice-call] Failed to initialize OpenAI TTS: ${
|
||||
`[voice-call] Failed to initialize telephony TTS: ${
|
||||
err instanceof Error ? err.message : String(err)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
log.warn("[voice-call] OpenAI TTS key missing; streaming TTS disabled");
|
||||
log.warn("[voice-call] Telephony TTS unavailable; streaming TTS disabled");
|
||||
}
|
||||
|
||||
const mediaHandler = webhookServer.getMediaStreamHandler();
|
||||
|
||||
88
extensions/voice-call/src/telephony-audio.ts
Normal file
88
extensions/voice-call/src/telephony-audio.ts
Normal file
@@ -0,0 +1,88 @@
|
||||
const TELEPHONY_SAMPLE_RATE = 8000;
|
||||
|
||||
function clamp16(value: number): number {
|
||||
return Math.max(-32768, Math.min(32767, value));
|
||||
}
|
||||
|
||||
/**
|
||||
* Resample 16-bit PCM (little-endian mono) to 8kHz using linear interpolation.
|
||||
*/
|
||||
export function resamplePcmTo8k(input: Buffer, inputSampleRate: number): Buffer {
|
||||
if (inputSampleRate === TELEPHONY_SAMPLE_RATE) return input;
|
||||
const inputSamples = Math.floor(input.length / 2);
|
||||
if (inputSamples === 0) return Buffer.alloc(0);
|
||||
|
||||
const ratio = inputSampleRate / TELEPHONY_SAMPLE_RATE;
|
||||
const outputSamples = Math.floor(inputSamples / ratio);
|
||||
const output = Buffer.alloc(outputSamples * 2);
|
||||
|
||||
for (let i = 0; i < outputSamples; i++) {
|
||||
const srcPos = i * ratio;
|
||||
const srcIndex = Math.floor(srcPos);
|
||||
const frac = srcPos - srcIndex;
|
||||
|
||||
const s0 = input.readInt16LE(srcIndex * 2);
|
||||
const s1Index = Math.min(srcIndex + 1, inputSamples - 1);
|
||||
const s1 = input.readInt16LE(s1Index * 2);
|
||||
|
||||
const sample = Math.round(s0 + frac * (s1 - s0));
|
||||
output.writeInt16LE(clamp16(sample), i * 2);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert 16-bit PCM to 8-bit mu-law (G.711).
|
||||
*/
|
||||
export function pcmToMulaw(pcm: Buffer): Buffer {
|
||||
const samples = Math.floor(pcm.length / 2);
|
||||
const mulaw = Buffer.alloc(samples);
|
||||
|
||||
for (let i = 0; i < samples; i++) {
|
||||
const sample = pcm.readInt16LE(i * 2);
|
||||
mulaw[i] = linearToMulaw(sample);
|
||||
}
|
||||
|
||||
return mulaw;
|
||||
}
|
||||
|
||||
export function convertPcmToMulaw8k(
|
||||
pcm: Buffer,
|
||||
inputSampleRate: number,
|
||||
): Buffer {
|
||||
const pcm8k = resamplePcmTo8k(pcm, inputSampleRate);
|
||||
return pcmToMulaw(pcm8k);
|
||||
}
|
||||
|
||||
/**
|
||||
* Chunk audio buffer into 20ms frames for streaming (8kHz mono mu-law).
|
||||
*/
|
||||
export function chunkAudio(
|
||||
audio: Buffer,
|
||||
chunkSize = 160,
|
||||
): Generator<Buffer, void, unknown> {
|
||||
return (function* () {
|
||||
for (let i = 0; i < audio.length; i += chunkSize) {
|
||||
yield audio.subarray(i, Math.min(i + chunkSize, audio.length));
|
||||
}
|
||||
})();
|
||||
}
|
||||
|
||||
function linearToMulaw(sample: number): number {
|
||||
const BIAS = 132;
|
||||
const CLIP = 32635;
|
||||
|
||||
const sign = sample < 0 ? 0x80 : 0;
|
||||
if (sample < 0) sample = -sample;
|
||||
if (sample > CLIP) sample = CLIP;
|
||||
|
||||
sample += BIAS;
|
||||
let exponent = 7;
|
||||
for (let expMask = 0x4000; (sample & expMask) === 0 && exponent > 0; exponent--) {
|
||||
expMask >>= 1;
|
||||
}
|
||||
|
||||
const mantissa = (sample >> (exponent + 3)) & 0x0f;
|
||||
return ~(sign | (exponent << 4) | mantissa) & 0xff;
|
||||
}
|
||||
95
extensions/voice-call/src/telephony-tts.ts
Normal file
95
extensions/voice-call/src/telephony-tts.ts
Normal file
@@ -0,0 +1,95 @@
|
||||
import type { CoreConfig } from "./core-bridge.js";
|
||||
import type { VoiceCallTtsConfig } from "./config.js";
|
||||
import { convertPcmToMulaw8k } from "./telephony-audio.js";
|
||||
|
||||
export type TelephonyTtsRuntime = {
|
||||
textToSpeechTelephony: (params: {
|
||||
text: string;
|
||||
cfg: CoreConfig;
|
||||
prefsPath?: string;
|
||||
}) => Promise<{
|
||||
success: boolean;
|
||||
audioBuffer?: Buffer;
|
||||
sampleRate?: number;
|
||||
provider?: string;
|
||||
error?: string;
|
||||
}>;
|
||||
};
|
||||
|
||||
export type TelephonyTtsProvider = {
|
||||
synthesizeForTelephony: (text: string) => Promise<Buffer>;
|
||||
};
|
||||
|
||||
export function createTelephonyTtsProvider(params: {
|
||||
coreConfig: CoreConfig;
|
||||
ttsOverride?: VoiceCallTtsConfig;
|
||||
runtime: TelephonyTtsRuntime;
|
||||
}): TelephonyTtsProvider {
|
||||
const { coreConfig, ttsOverride, runtime } = params;
|
||||
const mergedConfig = applyTtsOverride(coreConfig, ttsOverride);
|
||||
|
||||
return {
|
||||
synthesizeForTelephony: async (text: string) => {
|
||||
const result = await runtime.textToSpeechTelephony({
|
||||
text,
|
||||
cfg: mergedConfig,
|
||||
});
|
||||
|
||||
if (!result.success || !result.audioBuffer || !result.sampleRate) {
|
||||
throw new Error(result.error ?? "TTS conversion failed");
|
||||
}
|
||||
|
||||
return convertPcmToMulaw8k(result.audioBuffer, result.sampleRate);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function applyTtsOverride(
|
||||
coreConfig: CoreConfig,
|
||||
override?: VoiceCallTtsConfig,
|
||||
): CoreConfig {
|
||||
if (!override) return coreConfig;
|
||||
|
||||
const base = coreConfig.messages?.tts;
|
||||
const merged = mergeTtsConfig(base, override);
|
||||
if (!merged) return coreConfig;
|
||||
|
||||
return {
|
||||
...coreConfig,
|
||||
messages: {
|
||||
...(coreConfig.messages ?? {}),
|
||||
tts: merged,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function mergeTtsConfig(
|
||||
base?: VoiceCallTtsConfig,
|
||||
override?: VoiceCallTtsConfig,
|
||||
): VoiceCallTtsConfig | undefined {
|
||||
if (!base && !override) return undefined;
|
||||
if (!override) return base;
|
||||
if (!base) return override;
|
||||
return deepMerge(base, override);
|
||||
}
|
||||
|
||||
function deepMerge<T>(base: T, override: T): T {
|
||||
if (!isPlainObject(base) || !isPlainObject(override)) {
|
||||
return override;
|
||||
}
|
||||
const result: Record<string, unknown> = { ...base };
|
||||
for (const [key, value] of Object.entries(override)) {
|
||||
if (value === undefined) continue;
|
||||
const existing = (base as Record<string, unknown>)[key];
|
||||
if (isPlainObject(existing) && isPlainObject(value)) {
|
||||
result[key] = deepMerge(existing, value);
|
||||
} else {
|
||||
result[key] = value;
|
||||
}
|
||||
}
|
||||
return result as T;
|
||||
}
|
||||
|
||||
function isPlainObject(value: unknown): value is Record<string, unknown> {
|
||||
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
import crypto from "node:crypto";
|
||||
import { spawn, type ChildProcessWithoutNullStreams } from "node:child_process";
|
||||
import type { ChildProcessWithoutNullStreams } from "node:child_process";
|
||||
import path from "node:path";
|
||||
import type { AgentTool, AgentToolResult } from "@mariozechner/pi-agent-core";
|
||||
import { Type } from "@sinclair/typebox";
|
||||
@@ -27,6 +27,7 @@ import {
|
||||
} from "../infra/shell-env.js";
|
||||
import { enqueueSystemEvent } from "../infra/system-events.js";
|
||||
import { logInfo, logWarn } from "../logger.js";
|
||||
import { formatSpawnError, spawnWithFallback } from "../process/spawn-utils.js";
|
||||
import {
|
||||
type ProcessSession,
|
||||
type SessionStdin,
|
||||
@@ -362,23 +363,38 @@ async function runExecProcess(opts: {
|
||||
let stdin: SessionStdin | undefined;
|
||||
|
||||
if (opts.sandbox) {
|
||||
child = spawn(
|
||||
"docker",
|
||||
buildDockerExecArgs({
|
||||
containerName: opts.sandbox.containerName,
|
||||
command: opts.command,
|
||||
workdir: opts.containerWorkdir ?? opts.sandbox.containerWorkdir,
|
||||
env: opts.env,
|
||||
tty: opts.usePty,
|
||||
}),
|
||||
{
|
||||
const { child: spawned } = await spawnWithFallback({
|
||||
argv: [
|
||||
"docker",
|
||||
...buildDockerExecArgs({
|
||||
containerName: opts.sandbox.containerName,
|
||||
command: opts.command,
|
||||
workdir: opts.containerWorkdir ?? opts.sandbox.containerWorkdir,
|
||||
env: opts.env,
|
||||
tty: opts.usePty,
|
||||
}),
|
||||
],
|
||||
options: {
|
||||
cwd: opts.workdir,
|
||||
env: process.env,
|
||||
detached: process.platform !== "win32",
|
||||
stdio: ["pipe", "pipe", "pipe"],
|
||||
windowsHide: true,
|
||||
},
|
||||
) as ChildProcessWithoutNullStreams;
|
||||
fallbacks: [
|
||||
{
|
||||
label: "no-detach",
|
||||
options: { detached: false },
|
||||
},
|
||||
],
|
||||
onFallback: (err, fallback) => {
|
||||
const errText = formatSpawnError(err);
|
||||
const warning = `Warning: spawn failed (${errText}); retrying with ${fallback.label}.`;
|
||||
logWarn(`exec: spawn failed (${errText}); retrying with ${fallback.label}.`);
|
||||
opts.warnings.push(warning);
|
||||
},
|
||||
});
|
||||
child = spawned as ChildProcessWithoutNullStreams;
|
||||
stdin = child.stdin;
|
||||
} else if (opts.usePty) {
|
||||
const { shell, args: shellArgs } = getShellConfig();
|
||||
@@ -422,24 +438,56 @@ async function runExecProcess(opts: {
|
||||
const warning = `Warning: PTY spawn failed (${errText}); retrying without PTY for \`${opts.command}\`.`;
|
||||
logWarn(`exec: PTY spawn failed (${errText}); retrying without PTY for "${opts.command}".`);
|
||||
opts.warnings.push(warning);
|
||||
child = spawn(shell, [...shellArgs, opts.command], {
|
||||
const { child: spawned } = await spawnWithFallback({
|
||||
argv: [shell, ...shellArgs, opts.command],
|
||||
options: {
|
||||
cwd: opts.workdir,
|
||||
env: opts.env,
|
||||
detached: process.platform !== "win32",
|
||||
stdio: ["pipe", "pipe", "pipe"],
|
||||
windowsHide: true,
|
||||
},
|
||||
fallbacks: [
|
||||
{
|
||||
label: "no-detach",
|
||||
options: { detached: false },
|
||||
},
|
||||
],
|
||||
onFallback: (fallbackErr, fallback) => {
|
||||
const fallbackText = formatSpawnError(fallbackErr);
|
||||
const fallbackWarning = `Warning: spawn failed (${fallbackText}); retrying with ${fallback.label}.`;
|
||||
logWarn(`exec: spawn failed (${fallbackText}); retrying with ${fallback.label}.`);
|
||||
opts.warnings.push(fallbackWarning);
|
||||
},
|
||||
});
|
||||
child = spawned as ChildProcessWithoutNullStreams;
|
||||
stdin = child.stdin;
|
||||
}
|
||||
} else {
|
||||
const { shell, args: shellArgs } = getShellConfig();
|
||||
const { child: spawned } = await spawnWithFallback({
|
||||
argv: [shell, ...shellArgs, opts.command],
|
||||
options: {
|
||||
cwd: opts.workdir,
|
||||
env: opts.env,
|
||||
detached: process.platform !== "win32",
|
||||
stdio: ["pipe", "pipe", "pipe"],
|
||||
windowsHide: true,
|
||||
}) as ChildProcessWithoutNullStreams;
|
||||
stdin = child.stdin;
|
||||
}
|
||||
} else {
|
||||
const { shell, args: shellArgs } = getShellConfig();
|
||||
child = spawn(shell, [...shellArgs, opts.command], {
|
||||
cwd: opts.workdir,
|
||||
env: opts.env,
|
||||
detached: process.platform !== "win32",
|
||||
stdio: ["pipe", "pipe", "pipe"],
|
||||
windowsHide: true,
|
||||
}) as ChildProcessWithoutNullStreams;
|
||||
},
|
||||
fallbacks: [
|
||||
{
|
||||
label: "no-detach",
|
||||
options: { detached: false },
|
||||
},
|
||||
],
|
||||
onFallback: (err, fallback) => {
|
||||
const errText = formatSpawnError(err);
|
||||
const warning = `Warning: spawn failed (${errText}); retrying with ${fallback.label}.`;
|
||||
logWarn(`exec: spawn failed (${errText}); retrying with ${fallback.label}.`);
|
||||
opts.warnings.push(warning);
|
||||
},
|
||||
});
|
||||
child = spawned as ChildProcessWithoutNullStreams;
|
||||
stdin = child.stdin;
|
||||
}
|
||||
|
||||
|
||||
@@ -10,11 +10,17 @@ describe("extractModelDirective", () => {
|
||||
expect(result.cleaned).toBe("");
|
||||
});
|
||||
|
||||
it("extracts /models with argument", () => {
|
||||
it("does not treat /models as a /model directive", () => {
|
||||
const result = extractModelDirective("/models gpt-5");
|
||||
expect(result.hasDirective).toBe(true);
|
||||
expect(result.rawModel).toBe("gpt-5");
|
||||
expect(result.cleaned).toBe("");
|
||||
expect(result.hasDirective).toBe(false);
|
||||
expect(result.rawModel).toBeUndefined();
|
||||
expect(result.cleaned).toBe("/models gpt-5");
|
||||
});
|
||||
|
||||
it("does not parse /models as a /model directive (no args)", () => {
|
||||
const result = extractModelDirective("/models");
|
||||
expect(result.hasDirective).toBe(false);
|
||||
expect(result.cleaned).toBe("/models");
|
||||
});
|
||||
|
||||
it("extracts /model with provider/model format", () => {
|
||||
|
||||
@@ -14,7 +14,7 @@ export function extractModelDirective(
|
||||
if (!body) return { cleaned: "", hasDirective: false };
|
||||
|
||||
const modelMatch = body.match(
|
||||
/(?:^|\s)\/models?(?=$|\s|:)\s*:?\s*([A-Za-z0-9_.:@-]+(?:\/[A-Za-z0-9_.:@-]+)*)?/i,
|
||||
/(?:^|\s)\/model(?=$|\s|:)\s*:?\s*([A-Za-z0-9_.:@-]+(?:\/[A-Za-z0-9_.:@-]+)*)?/i,
|
||||
);
|
||||
|
||||
const aliases = (options?.aliases ?? []).map((alias) => alias.trim()).filter(Boolean);
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import type { ReasoningLevel } from "../thinking.js";
|
||||
import type { NoticeLevel, ReasoningLevel } from "../thinking.js";
|
||||
import {
|
||||
type ElevatedLevel,
|
||||
normalizeElevatedLevel,
|
||||
normalizeNoticeLevel,
|
||||
normalizeReasoningLevel,
|
||||
normalizeThinkLevel,
|
||||
normalizeVerboseLevel,
|
||||
@@ -112,6 +113,22 @@ export function extractVerboseDirective(body?: string): {
|
||||
};
|
||||
}
|
||||
|
||||
export function extractNoticeDirective(body?: string): {
|
||||
cleaned: string;
|
||||
noticeLevel?: NoticeLevel;
|
||||
rawLevel?: string;
|
||||
hasDirective: boolean;
|
||||
} {
|
||||
if (!body) return { cleaned: "", hasDirective: false };
|
||||
const extracted = extractLevelDirective(body, ["notice", "notices"], normalizeNoticeLevel);
|
||||
return {
|
||||
cleaned: extracted.cleaned,
|
||||
noticeLevel: extracted.level,
|
||||
rawLevel: extracted.rawLevel,
|
||||
hasDirective: extracted.hasDirective,
|
||||
};
|
||||
}
|
||||
|
||||
export function extractElevatedDirective(body?: string): {
|
||||
cleaned: string;
|
||||
elevatedLevel?: ElevatedLevel;
|
||||
@@ -152,5 +169,5 @@ export function extractStatusDirective(body?: string): {
|
||||
return extractSimpleDirective(body, ["status"]);
|
||||
}
|
||||
|
||||
export type { ElevatedLevel, ReasoningLevel, ThinkLevel, VerboseLevel };
|
||||
export type { ElevatedLevel, NoticeLevel, ReasoningLevel, ThinkLevel, VerboseLevel };
|
||||
export { extractExecDirective } from "./exec/directive.js";
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
export type ThinkLevel = "off" | "minimal" | "low" | "medium" | "high" | "xhigh";
|
||||
export type VerboseLevel = "off" | "on" | "full";
|
||||
export type NoticeLevel = "off" | "on" | "full";
|
||||
export type ElevatedLevel = "off" | "on" | "ask" | "full";
|
||||
export type ElevatedMode = "off" | "ask" | "full";
|
||||
export type ReasoningLevel = "off" | "on" | "stream";
|
||||
@@ -93,6 +94,16 @@ export function normalizeVerboseLevel(raw?: string | null): VerboseLevel | undef
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// Normalize system notice flags used to toggle system notifications.
|
||||
export function normalizeNoticeLevel(raw?: string | null): NoticeLevel | undefined {
|
||||
if (!raw) return undefined;
|
||||
const key = raw.toLowerCase();
|
||||
if (["off", "false", "no", "0"].includes(key)) return "off";
|
||||
if (["full", "all", "everything"].includes(key)) return "full";
|
||||
if (["on", "minimal", "true", "yes", "1"].includes(key)) return "on";
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// Normalize response-usage display modes used to toggle per-response usage footers.
|
||||
export function normalizeUsageDisplay(raw?: string | null): UsageDisplayLevel | undefined {
|
||||
if (!raw) return undefined;
|
||||
|
||||
@@ -59,6 +59,17 @@ export const CONFIG_PATH_CLAWDBOT = resolveConfigPath();
|
||||
|
||||
export const DEFAULT_GATEWAY_PORT = 18789;
|
||||
|
||||
/**
|
||||
* Gateway lock directory (ephemeral).
|
||||
* Default: os.tmpdir()/clawdbot-<uid> (uid suffix when available).
|
||||
*/
|
||||
export function resolveGatewayLockDir(tmpdir: () => string = os.tmpdir): string {
|
||||
const base = tmpdir();
|
||||
const uid = typeof process.getuid === "function" ? process.getuid() : undefined;
|
||||
const suffix = uid != null ? `clawdbot-${uid}` : "clawdbot";
|
||||
return path.join(base, suffix);
|
||||
}
|
||||
|
||||
const OAUTH_FILENAME = "oauth.json";
|
||||
|
||||
/**
|
||||
|
||||
@@ -118,6 +118,8 @@ export type TelegramAccountConfig = {
|
||||
reactionLevel?: "off" | "ack" | "minimal" | "extensive";
|
||||
/** Heartbeat visibility settings for this channel. */
|
||||
heartbeat?: ChannelHeartbeatVisibilityConfig;
|
||||
/** Controls whether link previews are shown in outbound messages. Default: true. */
|
||||
linkPreview?: boolean;
|
||||
};
|
||||
|
||||
export type TelegramTopicConfig = {
|
||||
|
||||
@@ -125,6 +125,7 @@ export const TelegramAccountSchemaBase = z
|
||||
reactionNotifications: z.enum(["off", "own", "all"]).optional(),
|
||||
reactionLevel: z.enum(["off", "ack", "minimal", "extensive"]).optional(),
|
||||
heartbeat: ChannelHeartbeatVisibilitySchema,
|
||||
linkPreview: z.boolean().optional(),
|
||||
})
|
||||
.strict();
|
||||
|
||||
|
||||
@@ -1,77 +1,28 @@
|
||||
import { beforeEach, describe, expect, test, vi } from "vitest";
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
const testTailnetIPv4 = { value: undefined as string | undefined };
|
||||
const testTailnetIPv6 = { value: undefined as string | undefined };
|
||||
import { resolveGatewayListenHosts } from "./net.js";
|
||||
|
||||
vi.mock("../infra/tailnet.js", () => ({
|
||||
pickPrimaryTailnetIPv4: () => testTailnetIPv4.value,
|
||||
pickPrimaryTailnetIPv6: () => testTailnetIPv6.value,
|
||||
}));
|
||||
|
||||
import { isLocalGatewayAddress, resolveGatewayClientIp } from "./net.js";
|
||||
|
||||
describe("gateway net", () => {
|
||||
beforeEach(() => {
|
||||
testTailnetIPv4.value = undefined;
|
||||
testTailnetIPv6.value = undefined;
|
||||
});
|
||||
|
||||
test("treats loopback as local", () => {
|
||||
expect(isLocalGatewayAddress("127.0.0.1")).toBe(true);
|
||||
expect(isLocalGatewayAddress("127.0.1.1")).toBe(true);
|
||||
expect(isLocalGatewayAddress("::1")).toBe(true);
|
||||
expect(isLocalGatewayAddress("::ffff:127.0.0.1")).toBe(true);
|
||||
});
|
||||
|
||||
test("treats local tailnet IPv4 as local", () => {
|
||||
testTailnetIPv4.value = "100.64.0.1";
|
||||
expect(isLocalGatewayAddress("100.64.0.1")).toBe(true);
|
||||
expect(isLocalGatewayAddress("::ffff:100.64.0.1")).toBe(true);
|
||||
});
|
||||
|
||||
test("ignores non-matching tailnet IPv4", () => {
|
||||
testTailnetIPv4.value = "100.64.0.1";
|
||||
expect(isLocalGatewayAddress("100.64.0.2")).toBe(false);
|
||||
});
|
||||
|
||||
test("treats local tailnet IPv6 as local", () => {
|
||||
testTailnetIPv6.value = "fd7a:115c:a1e0::123";
|
||||
expect(isLocalGatewayAddress("fd7a:115c:a1e0::123")).toBe(true);
|
||||
});
|
||||
|
||||
test("uses forwarded-for when remote is a trusted proxy", () => {
|
||||
const clientIp = resolveGatewayClientIp({
|
||||
remoteAddr: "10.0.0.2",
|
||||
forwardedFor: "203.0.113.9, 10.0.0.2",
|
||||
trustedProxies: ["10.0.0.2"],
|
||||
describe("resolveGatewayListenHosts", () => {
|
||||
it("returns the input host when not loopback", async () => {
|
||||
const hosts = await resolveGatewayListenHosts("0.0.0.0", {
|
||||
canBindToHost: async () => {
|
||||
throw new Error("should not be called");
|
||||
},
|
||||
});
|
||||
expect(clientIp).toBe("203.0.113.9");
|
||||
expect(hosts).toEqual(["0.0.0.0"]);
|
||||
});
|
||||
|
||||
test("ignores forwarded-for from untrusted proxies", () => {
|
||||
const clientIp = resolveGatewayClientIp({
|
||||
remoteAddr: "10.0.0.3",
|
||||
forwardedFor: "203.0.113.9",
|
||||
trustedProxies: ["10.0.0.2"],
|
||||
it("adds ::1 when IPv6 loopback is available", async () => {
|
||||
const hosts = await resolveGatewayListenHosts("127.0.0.1", {
|
||||
canBindToHost: async () => true,
|
||||
});
|
||||
expect(clientIp).toBe("10.0.0.3");
|
||||
expect(hosts).toEqual(["127.0.0.1", "::1"]);
|
||||
});
|
||||
|
||||
test("normalizes trusted proxy IPs and strips forwarded ports", () => {
|
||||
const clientIp = resolveGatewayClientIp({
|
||||
remoteAddr: "::ffff:10.0.0.2",
|
||||
forwardedFor: "203.0.113.9:1234",
|
||||
trustedProxies: ["10.0.0.2"],
|
||||
it("keeps only IPv4 loopback when IPv6 is unavailable", async () => {
|
||||
const hosts = await resolveGatewayListenHosts("127.0.0.1", {
|
||||
canBindToHost: async () => false,
|
||||
});
|
||||
expect(clientIp).toBe("203.0.113.9");
|
||||
});
|
||||
|
||||
test("falls back to x-real-ip when forwarded-for is missing", () => {
|
||||
const clientIp = resolveGatewayClientIp({
|
||||
remoteAddr: "10.0.0.2",
|
||||
realIp: "203.0.113.10",
|
||||
trustedProxies: ["10.0.0.2"],
|
||||
});
|
||||
expect(clientIp).toBe("203.0.113.10");
|
||||
expect(hosts).toEqual(["127.0.0.1"]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -97,14 +97,14 @@ export async function resolveGatewayBindHost(
|
||||
|
||||
if (mode === "loopback") {
|
||||
// 127.0.0.1 rarely fails, but handle gracefully
|
||||
if (await canBindTo("127.0.0.1")) return "127.0.0.1";
|
||||
if (await canBindToHost("127.0.0.1")) return "127.0.0.1";
|
||||
return "0.0.0.0"; // extreme fallback
|
||||
}
|
||||
|
||||
if (mode === "tailnet") {
|
||||
const tailnetIP = pickPrimaryTailnetIPv4();
|
||||
if (tailnetIP && (await canBindTo(tailnetIP))) return tailnetIP;
|
||||
if (await canBindTo("127.0.0.1")) return "127.0.0.1";
|
||||
if (tailnetIP && (await canBindToHost(tailnetIP))) return tailnetIP;
|
||||
if (await canBindToHost("127.0.0.1")) return "127.0.0.1";
|
||||
return "0.0.0.0";
|
||||
}
|
||||
|
||||
@@ -116,13 +116,13 @@ export async function resolveGatewayBindHost(
|
||||
const host = customHost?.trim();
|
||||
if (!host) return "0.0.0.0"; // invalid config → fall back to all
|
||||
|
||||
if (isValidIPv4(host) && (await canBindTo(host))) return host;
|
||||
if (isValidIPv4(host) && (await canBindToHost(host))) return host;
|
||||
// Custom IP failed → fall back to LAN
|
||||
return "0.0.0.0";
|
||||
}
|
||||
|
||||
if (mode === "auto") {
|
||||
if (await canBindTo("127.0.0.1")) return "127.0.0.1";
|
||||
if (await canBindToHost("127.0.0.1")) return "127.0.0.1";
|
||||
return "0.0.0.0";
|
||||
}
|
||||
|
||||
@@ -136,7 +136,7 @@ export async function resolveGatewayBindHost(
|
||||
* @param host - The host address to test
|
||||
* @returns True if we can successfully bind to this address
|
||||
*/
|
||||
async function canBindTo(host: string): Promise<boolean> {
|
||||
export async function canBindToHost(host: string): Promise<boolean> {
|
||||
return new Promise((resolve) => {
|
||||
const testServer = net.createServer();
|
||||
testServer.once("error", () => {
|
||||
@@ -151,6 +151,16 @@ async function canBindTo(host: string): Promise<boolean> {
|
||||
});
|
||||
}
|
||||
|
||||
export async function resolveGatewayListenHosts(
|
||||
bindHost: string,
|
||||
opts?: { canBindToHost?: (host: string) => Promise<boolean> },
|
||||
): Promise<string[]> {
|
||||
if (bindHost !== "127.0.0.1") return [bindHost];
|
||||
const canBind = opts?.canBindToHost ?? canBindToHost;
|
||||
if (await canBind("::1")) return [bindHost, "::1"];
|
||||
return [bindHost];
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate if a string is a valid IPv4 address.
|
||||
*
|
||||
|
||||
@@ -28,6 +28,7 @@ export function createGatewayCloseHandler(params: {
|
||||
browserControl: { stop: () => Promise<void> } | null;
|
||||
wss: WebSocketServer;
|
||||
httpServer: HttpServer;
|
||||
httpServers?: HttpServer[];
|
||||
}) {
|
||||
return async (opts?: { reason?: string; restartExpectedMs?: number | null }) => {
|
||||
const reasonRaw = typeof opts?.reason === "string" ? opts.reason.trim() : "";
|
||||
@@ -108,14 +109,20 @@ export function createGatewayCloseHandler(params: {
|
||||
await params.browserControl.stop().catch(() => {});
|
||||
}
|
||||
await new Promise<void>((resolve) => params.wss.close(() => resolve()));
|
||||
const httpServer = params.httpServer as HttpServer & {
|
||||
closeIdleConnections?: () => void;
|
||||
};
|
||||
if (typeof httpServer.closeIdleConnections === "function") {
|
||||
httpServer.closeIdleConnections();
|
||||
const servers =
|
||||
params.httpServers && params.httpServers.length > 0
|
||||
? params.httpServers
|
||||
: [params.httpServer];
|
||||
for (const server of servers) {
|
||||
const httpServer = server as HttpServer & {
|
||||
closeIdleConnections?: () => void;
|
||||
};
|
||||
if (typeof httpServer.closeIdleConnections === "function") {
|
||||
httpServer.closeIdleConnections();
|
||||
}
|
||||
await new Promise<void>((resolve, reject) =>
|
||||
httpServer.close((err) => (err ? reject(err) : resolve())),
|
||||
);
|
||||
}
|
||||
await new Promise<void>((resolve, reject) =>
|
||||
params.httpServer.close((err) => (err ? reject(err) : resolve())),
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ import type { ChatAbortControllerEntry } from "./chat-abort.js";
|
||||
import type { HooksConfigResolved } from "./hooks.js";
|
||||
import { createGatewayHooksRequestHandler } from "./server/hooks.js";
|
||||
import { listenGatewayHttpServer } from "./server/http-listen.js";
|
||||
import { resolveGatewayListenHosts } from "./net.js";
|
||||
import { createGatewayPluginRequestHandler } from "./server/plugins-http.js";
|
||||
import type { GatewayWsClient } from "./server/ws-types.js";
|
||||
import { createGatewayBroadcaster } from "./server-broadcast.js";
|
||||
@@ -38,11 +39,14 @@ export async function createGatewayRuntimeState(params: {
|
||||
canvasHostEnabled: boolean;
|
||||
allowCanvasHostInTests?: boolean;
|
||||
logCanvas: { info: (msg: string) => void; warn: (msg: string) => void };
|
||||
log: { info: (msg: string) => void; warn: (msg: string) => void };
|
||||
logHooks: ReturnType<typeof createSubsystemLogger>;
|
||||
logPlugins: ReturnType<typeof createSubsystemLogger>;
|
||||
}): Promise<{
|
||||
canvasHost: CanvasHostHandler | null;
|
||||
httpServer: HttpServer;
|
||||
httpServers: HttpServer[];
|
||||
httpBindHosts: string[];
|
||||
wss: WebSocketServer;
|
||||
clients: Set<GatewayWsClient>;
|
||||
broadcast: (
|
||||
@@ -100,30 +104,49 @@ export async function createGatewayRuntimeState(params: {
|
||||
log: params.logPlugins,
|
||||
});
|
||||
|
||||
const httpServer = createGatewayHttpServer({
|
||||
canvasHost,
|
||||
controlUiEnabled: params.controlUiEnabled,
|
||||
controlUiBasePath: params.controlUiBasePath,
|
||||
openAiChatCompletionsEnabled: params.openAiChatCompletionsEnabled,
|
||||
openResponsesEnabled: params.openResponsesEnabled,
|
||||
openResponsesConfig: params.openResponsesConfig,
|
||||
handleHooksRequest,
|
||||
handlePluginRequest,
|
||||
resolvedAuth: params.resolvedAuth,
|
||||
tlsOptions: params.gatewayTls?.enabled ? params.gatewayTls.tlsOptions : undefined,
|
||||
});
|
||||
|
||||
await listenGatewayHttpServer({
|
||||
httpServer,
|
||||
bindHost: params.bindHost,
|
||||
port: params.port,
|
||||
});
|
||||
const bindHosts = await resolveGatewayListenHosts(params.bindHost);
|
||||
const httpServers: HttpServer[] = [];
|
||||
const httpBindHosts: string[] = [];
|
||||
for (const host of bindHosts) {
|
||||
const httpServer = createGatewayHttpServer({
|
||||
canvasHost,
|
||||
controlUiEnabled: params.controlUiEnabled,
|
||||
controlUiBasePath: params.controlUiBasePath,
|
||||
openAiChatCompletionsEnabled: params.openAiChatCompletionsEnabled,
|
||||
openResponsesEnabled: params.openResponsesEnabled,
|
||||
openResponsesConfig: params.openResponsesConfig,
|
||||
handleHooksRequest,
|
||||
handlePluginRequest,
|
||||
resolvedAuth: params.resolvedAuth,
|
||||
tlsOptions: params.gatewayTls?.enabled ? params.gatewayTls.tlsOptions : undefined,
|
||||
});
|
||||
try {
|
||||
await listenGatewayHttpServer({
|
||||
httpServer,
|
||||
bindHost: host,
|
||||
port: params.port,
|
||||
});
|
||||
httpServers.push(httpServer);
|
||||
httpBindHosts.push(host);
|
||||
} catch (err) {
|
||||
if (host === bindHosts[0]) throw err;
|
||||
params.log.warn(
|
||||
`gateway: failed to bind loopback alias ${host}:${params.port} (${String(err)})`,
|
||||
);
|
||||
}
|
||||
}
|
||||
const httpServer = httpServers[0];
|
||||
if (!httpServer) {
|
||||
throw new Error("Gateway HTTP server failed to start");
|
||||
}
|
||||
|
||||
const wss = new WebSocketServer({
|
||||
noServer: true,
|
||||
maxPayload: MAX_PAYLOAD_BYTES,
|
||||
});
|
||||
attachGatewayUpgradeHandler({ httpServer, wss, canvasHost });
|
||||
for (const server of httpServers) {
|
||||
attachGatewayUpgradeHandler({ httpServer: server, wss, canvasHost });
|
||||
}
|
||||
|
||||
const clients = new Set<GatewayWsClient>();
|
||||
const { broadcast } = createGatewayBroadcaster({ clients });
|
||||
@@ -140,6 +163,8 @@ export async function createGatewayRuntimeState(params: {
|
||||
return {
|
||||
canvasHost,
|
||||
httpServer,
|
||||
httpServers,
|
||||
httpBindHosts,
|
||||
wss,
|
||||
clients,
|
||||
broadcast,
|
||||
|
||||
@@ -7,6 +7,7 @@ import { getResolvedLoggerSettings } from "../logging.js";
|
||||
export function logGatewayStartup(params: {
|
||||
cfg: ReturnType<typeof loadConfig>;
|
||||
bindHost: string;
|
||||
bindHosts?: string[];
|
||||
port: number;
|
||||
tlsEnabled?: boolean;
|
||||
log: { info: (msg: string, meta?: Record<string, unknown>) => void };
|
||||
@@ -22,9 +23,16 @@ export function logGatewayStartup(params: {
|
||||
consoleMessage: `agent model: ${chalk.whiteBright(modelRef)}`,
|
||||
});
|
||||
const scheme = params.tlsEnabled ? "wss" : "ws";
|
||||
const formatHost = (host: string) => (host.includes(":") ? `[${host}]` : host);
|
||||
const hosts =
|
||||
params.bindHosts && params.bindHosts.length > 0 ? params.bindHosts : [params.bindHost];
|
||||
const primaryHost = hosts[0] ?? params.bindHost;
|
||||
params.log.info(
|
||||
`listening on ${scheme}://${params.bindHost}:${params.port} (PID ${process.pid})`,
|
||||
`listening on ${scheme}://${formatHost(primaryHost)}:${params.port} (PID ${process.pid})`,
|
||||
);
|
||||
for (const host of hosts.slice(1)) {
|
||||
params.log.info(`listening on ${scheme}://${formatHost(host)}:${params.port}`);
|
||||
}
|
||||
params.log.info(`log file: ${getResolvedLoggerSettings().file}`);
|
||||
if (params.isNixMode) {
|
||||
params.log.info("gateway: running in Nix mode (config managed externally)");
|
||||
|
||||
@@ -263,6 +263,8 @@ export async function startGatewayServer(
|
||||
const {
|
||||
canvasHost,
|
||||
httpServer,
|
||||
httpServers,
|
||||
httpBindHosts,
|
||||
wss,
|
||||
clients,
|
||||
broadcast,
|
||||
@@ -292,6 +294,7 @@ export async function startGatewayServer(
|
||||
canvasHostEnabled,
|
||||
allowCanvasHostInTests: opts.allowCanvasHostInTests,
|
||||
logCanvas,
|
||||
log,
|
||||
logHooks,
|
||||
logPlugins,
|
||||
});
|
||||
@@ -464,6 +467,7 @@ export async function startGatewayServer(
|
||||
logGatewayStartup({
|
||||
cfg: cfgAtStart,
|
||||
bindHost,
|
||||
bindHosts: httpBindHosts,
|
||||
port,
|
||||
tlsEnabled: gatewayTls.enabled,
|
||||
log,
|
||||
@@ -552,6 +556,7 @@ export async function startGatewayServer(
|
||||
browserControl,
|
||||
wss,
|
||||
httpServer,
|
||||
httpServers,
|
||||
});
|
||||
|
||||
return {
|
||||
|
||||
@@ -7,12 +7,13 @@ import path from "node:path";
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
|
||||
import { acquireGatewayLock, GatewayLockError } from "./gateway-lock.js";
|
||||
import { resolveConfigPath, resolveStateDir } from "../config/paths.js";
|
||||
import { resolveConfigPath, resolveGatewayLockDir, resolveStateDir } from "../config/paths.js";
|
||||
|
||||
async function makeEnv() {
|
||||
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-gateway-lock-"));
|
||||
const configPath = path.join(dir, "clawdbot.json");
|
||||
await fs.writeFile(configPath, "{}", "utf8");
|
||||
await fs.mkdir(resolveGatewayLockDir(), { recursive: true });
|
||||
return {
|
||||
env: {
|
||||
...process.env,
|
||||
@@ -29,7 +30,8 @@ function resolveLockPath(env: NodeJS.ProcessEnv) {
|
||||
const stateDir = resolveStateDir(env);
|
||||
const configPath = resolveConfigPath(env, stateDir);
|
||||
const hash = createHash("sha1").update(configPath).digest("hex").slice(0, 8);
|
||||
return { lockPath: path.join(stateDir, `gateway.${hash}.lock`), configPath };
|
||||
const lockDir = resolveGatewayLockDir();
|
||||
return { lockPath: path.join(lockDir, `gateway.${hash}.lock`), configPath };
|
||||
}
|
||||
|
||||
function makeProcStat(pid: number, startTime: number) {
|
||||
|
||||
@@ -3,7 +3,7 @@ import fs from "node:fs/promises";
|
||||
import fsSync from "node:fs";
|
||||
import path from "node:path";
|
||||
|
||||
import { resolveConfigPath, resolveStateDir } from "../config/paths.js";
|
||||
import { resolveConfigPath, resolveGatewayLockDir, resolveStateDir } from "../config/paths.js";
|
||||
|
||||
const DEFAULT_TIMEOUT_MS = 5000;
|
||||
const DEFAULT_POLL_INTERVAL_MS = 100;
|
||||
@@ -150,7 +150,8 @@ function resolveGatewayLockPath(env: NodeJS.ProcessEnv) {
|
||||
const stateDir = resolveStateDir(env);
|
||||
const configPath = resolveConfigPath(env, stateDir);
|
||||
const hash = createHash("sha1").update(configPath).digest("hex").slice(0, 8);
|
||||
const lockPath = path.join(stateDir, `gateway.${hash}.lock`);
|
||||
const lockDir = resolveGatewayLockDir();
|
||||
const lockPath = path.join(lockDir, `gateway.${hash}.lock`);
|
||||
return { lockPath, configPath };
|
||||
}
|
||||
|
||||
|
||||
@@ -124,6 +124,7 @@ import { startWebLoginWithQr, waitForWebLogin } from "../../web/login-qr.js";
|
||||
import { sendMessageWhatsApp, sendPollWhatsApp } from "../../web/outbound.js";
|
||||
import { registerMemoryCli } from "../../cli/memory-cli.js";
|
||||
import { formatNativeDependencyHint } from "./native-deps.js";
|
||||
import { textToSpeechTelephony } from "../../tts/tts.js";
|
||||
|
||||
import type { PluginRuntime } from "./types.js";
|
||||
|
||||
@@ -162,6 +163,9 @@ export function createPluginRuntime(): PluginRuntime {
|
||||
getImageMetadata,
|
||||
resizeToJpeg,
|
||||
},
|
||||
tts: {
|
||||
textToSpeechTelephony,
|
||||
},
|
||||
tools: {
|
||||
createMemoryGetTool,
|
||||
createMemorySearchTool,
|
||||
|
||||
@@ -16,6 +16,7 @@ type UpsertChannelPairingRequest =
|
||||
typeof import("../../pairing/pairing-store.js").upsertChannelPairingRequest;
|
||||
type FetchRemoteMedia = typeof import("../../media/fetch.js").fetchRemoteMedia;
|
||||
type SaveMediaBuffer = typeof import("../../media/store.js").saveMediaBuffer;
|
||||
type TextToSpeechTelephony = typeof import("../../tts/tts.js").textToSpeechTelephony;
|
||||
type BuildMentionRegexes = typeof import("../../auto-reply/reply/mentions.js").buildMentionRegexes;
|
||||
type MatchesMentionPatterns =
|
||||
typeof import("../../auto-reply/reply/mentions.js").matchesMentionPatterns;
|
||||
@@ -173,6 +174,9 @@ export type PluginRuntime = {
|
||||
getImageMetadata: GetImageMetadata;
|
||||
resizeToJpeg: ResizeToJpeg;
|
||||
};
|
||||
tts: {
|
||||
textToSpeechTelephony: TextToSpeechTelephony;
|
||||
};
|
||||
tools: {
|
||||
createMemoryGetTool: CreateMemoryGetTool;
|
||||
createMemorySearchTool: CreateMemorySearchTool;
|
||||
|
||||
@@ -43,6 +43,7 @@ function setup(config: Record<string, unknown>): Registered {
|
||||
source: "test",
|
||||
config: {},
|
||||
pluginConfig: config,
|
||||
runtime: { tts: { textToSpeechTelephony: vi.fn() } },
|
||||
logger: noopLogger,
|
||||
registerGatewayMethod: (method, handler) => methods.set(method, handler),
|
||||
registerTool: (tool) => tools.push(tool),
|
||||
@@ -142,6 +143,7 @@ describe("voice-call plugin", () => {
|
||||
source: "test",
|
||||
config: {},
|
||||
pluginConfig: { provider: "mock" },
|
||||
runtime: { tts: { textToSpeechTelephony: vi.fn() } },
|
||||
logger: noopLogger,
|
||||
registerGatewayMethod: () => {},
|
||||
registerTool: () => {},
|
||||
|
||||
@@ -4,6 +4,7 @@ import { promisify } from "node:util";
|
||||
|
||||
import { danger, shouldLogVerbose } from "../globals.js";
|
||||
import { logDebug, logError } from "../logger.js";
|
||||
import { resolveCommandStdio } from "./spawn-utils.js";
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
@@ -78,19 +79,22 @@ export async function runCommandWithTimeout(
|
||||
if (resolvedEnv.npm_config_fund == null) resolvedEnv.npm_config_fund = "false";
|
||||
}
|
||||
|
||||
const stdio = resolveCommandStdio({ hasInput, preferInherit: true });
|
||||
const child = spawn(argv[0], argv.slice(1), {
|
||||
stdio,
|
||||
cwd,
|
||||
env: resolvedEnv,
|
||||
windowsVerbatimArguments,
|
||||
});
|
||||
// Spawn with inherited stdin (TTY) so tools like `pi` stay interactive when needed.
|
||||
return await new Promise((resolve, reject) => {
|
||||
const child = spawn(argv[0], argv.slice(1), {
|
||||
stdio: [hasInput ? "pipe" : "inherit", "pipe", "pipe"],
|
||||
cwd,
|
||||
env: resolvedEnv,
|
||||
windowsVerbatimArguments,
|
||||
});
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
let settled = false;
|
||||
const timer = setTimeout(() => {
|
||||
child.kill("SIGKILL");
|
||||
if (typeof child.kill === "function") {
|
||||
child.kill("SIGKILL");
|
||||
}
|
||||
}, timeoutMs);
|
||||
|
||||
if (hasInput && child.stdin) {
|
||||
|
||||
64
src/process/spawn-utils.test.ts
Normal file
64
src/process/spawn-utils.test.ts
Normal file
@@ -0,0 +1,64 @@
|
||||
import { EventEmitter } from "node:events";
|
||||
import { PassThrough } from "node:stream";
|
||||
import type { ChildProcess } from "node:child_process";
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
|
||||
import { spawnWithFallback } from "./spawn-utils.js";
|
||||
|
||||
function createStubChild() {
|
||||
const child = new EventEmitter() as ChildProcess;
|
||||
child.stdin = new PassThrough() as ChildProcess["stdin"];
|
||||
child.stdout = new PassThrough() as ChildProcess["stdout"];
|
||||
child.stderr = new PassThrough() as ChildProcess["stderr"];
|
||||
child.pid = 1234;
|
||||
child.killed = false;
|
||||
child.kill = vi.fn(() => true) as ChildProcess["kill"];
|
||||
queueMicrotask(() => {
|
||||
child.emit("spawn");
|
||||
});
|
||||
return child;
|
||||
}
|
||||
|
||||
describe("spawnWithFallback", () => {
|
||||
it("retries on EBADF using fallback options", async () => {
|
||||
const spawnMock = vi
|
||||
.fn()
|
||||
.mockImplementationOnce(() => {
|
||||
const err = new Error("spawn EBADF");
|
||||
(err as NodeJS.ErrnoException).code = "EBADF";
|
||||
throw err;
|
||||
})
|
||||
.mockImplementationOnce(() => createStubChild());
|
||||
|
||||
const result = await spawnWithFallback({
|
||||
argv: ["echo", "ok"],
|
||||
options: { stdio: ["pipe", "pipe", "pipe"] },
|
||||
fallbacks: [{ label: "safe-stdin", options: { stdio: ["ignore", "pipe", "pipe"] } }],
|
||||
spawnImpl: spawnMock,
|
||||
});
|
||||
|
||||
expect(result.usedFallback).toBe(true);
|
||||
expect(result.fallbackLabel).toBe("safe-stdin");
|
||||
expect(spawnMock).toHaveBeenCalledTimes(2);
|
||||
expect(spawnMock.mock.calls[0]?.[2]?.stdio).toEqual(["pipe", "pipe", "pipe"]);
|
||||
expect(spawnMock.mock.calls[1]?.[2]?.stdio).toEqual(["ignore", "pipe", "pipe"]);
|
||||
});
|
||||
|
||||
it("does not retry on non-EBADF errors", async () => {
|
||||
const spawnMock = vi.fn().mockImplementationOnce(() => {
|
||||
const err = new Error("spawn ENOENT");
|
||||
(err as NodeJS.ErrnoException).code = "ENOENT";
|
||||
throw err;
|
||||
});
|
||||
|
||||
await expect(
|
||||
spawnWithFallback({
|
||||
argv: ["missing"],
|
||||
options: { stdio: ["pipe", "pipe", "pipe"] },
|
||||
fallbacks: [{ label: "safe-stdin", options: { stdio: ["ignore", "pipe", "pipe"] } }],
|
||||
spawnImpl: spawnMock,
|
||||
}),
|
||||
).rejects.toThrow(/ENOENT/);
|
||||
expect(spawnMock).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
127
src/process/spawn-utils.ts
Normal file
127
src/process/spawn-utils.ts
Normal file
@@ -0,0 +1,127 @@
|
||||
import type { ChildProcess, SpawnOptions } from "node:child_process";
|
||||
import { spawn } from "node:child_process";
|
||||
|
||||
export type SpawnFallback = {
|
||||
label: string;
|
||||
options: SpawnOptions;
|
||||
};
|
||||
|
||||
export type SpawnWithFallbackResult = {
|
||||
child: ChildProcess;
|
||||
usedFallback: boolean;
|
||||
fallbackLabel?: string;
|
||||
};
|
||||
|
||||
type SpawnWithFallbackParams = {
|
||||
argv: string[];
|
||||
options: SpawnOptions;
|
||||
fallbacks?: SpawnFallback[];
|
||||
spawnImpl?: typeof spawn;
|
||||
retryCodes?: string[];
|
||||
onFallback?: (err: unknown, fallback: SpawnFallback) => void;
|
||||
};
|
||||
|
||||
const DEFAULT_RETRY_CODES = ["EBADF"];
|
||||
|
||||
export function resolveCommandStdio(params: {
|
||||
hasInput: boolean;
|
||||
preferInherit: boolean;
|
||||
}): ["pipe" | "inherit" | "ignore", "pipe", "pipe"] {
|
||||
const stdin = params.hasInput ? "pipe" : params.preferInherit ? "inherit" : "pipe";
|
||||
return [stdin, "pipe", "pipe"];
|
||||
}
|
||||
|
||||
export function formatSpawnError(err: unknown): string {
|
||||
if (!(err instanceof Error)) return String(err);
|
||||
const details = err as NodeJS.ErrnoException;
|
||||
const parts: string[] = [];
|
||||
const message = err.message?.trim();
|
||||
if (message) parts.push(message);
|
||||
if (details.code && !message?.includes(details.code)) parts.push(details.code);
|
||||
if (details.syscall) parts.push(`syscall=${details.syscall}`);
|
||||
if (typeof details.errno === "number") parts.push(`errno=${details.errno}`);
|
||||
return parts.join(" ");
|
||||
}
|
||||
|
||||
function shouldRetry(err: unknown, codes: string[]): boolean {
|
||||
const code =
|
||||
err && typeof err === "object" && "code" in err ? String((err as { code?: unknown }).code) : "";
|
||||
return code.length > 0 && codes.includes(code);
|
||||
}
|
||||
|
||||
async function spawnAndWaitForSpawn(
|
||||
spawnImpl: typeof spawn,
|
||||
argv: string[],
|
||||
options: SpawnOptions,
|
||||
): Promise<ChildProcess> {
|
||||
const child = spawnImpl(argv[0], argv.slice(1), options);
|
||||
|
||||
return await new Promise((resolve, reject) => {
|
||||
let settled = false;
|
||||
const cleanup = () => {
|
||||
child.removeListener("error", onError);
|
||||
child.removeListener("spawn", onSpawn);
|
||||
};
|
||||
const finishResolve = () => {
|
||||
if (settled) return;
|
||||
settled = true;
|
||||
cleanup();
|
||||
resolve(child);
|
||||
};
|
||||
const onError = (err: unknown) => {
|
||||
if (settled) return;
|
||||
settled = true;
|
||||
cleanup();
|
||||
reject(err);
|
||||
};
|
||||
const onSpawn = () => {
|
||||
finishResolve();
|
||||
};
|
||||
child.once("error", onError);
|
||||
child.once("spawn", onSpawn);
|
||||
// Ensure mocked spawns that never emit "spawn" don't stall.
|
||||
process.nextTick(() => {
|
||||
if (typeof child.pid === "number") {
|
||||
finishResolve();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
export async function spawnWithFallback(
|
||||
params: SpawnWithFallbackParams,
|
||||
): Promise<SpawnWithFallbackResult> {
|
||||
const spawnImpl = params.spawnImpl ?? spawn;
|
||||
const retryCodes = params.retryCodes ?? DEFAULT_RETRY_CODES;
|
||||
const baseOptions = { ...params.options };
|
||||
const fallbacks = params.fallbacks ?? [];
|
||||
const attempts: Array<{ label?: string; options: SpawnOptions }> = [
|
||||
{ options: baseOptions },
|
||||
...fallbacks.map((fallback) => ({
|
||||
label: fallback.label,
|
||||
options: { ...baseOptions, ...fallback.options },
|
||||
})),
|
||||
];
|
||||
|
||||
let lastError: unknown;
|
||||
for (let index = 0; index < attempts.length; index += 1) {
|
||||
const attempt = attempts[index];
|
||||
try {
|
||||
const child = await spawnAndWaitForSpawn(spawnImpl, params.argv, attempt.options);
|
||||
return {
|
||||
child,
|
||||
usedFallback: index > 0,
|
||||
fallbackLabel: attempt.label,
|
||||
};
|
||||
} catch (err) {
|
||||
lastError = err;
|
||||
const nextFallback = fallbacks[index];
|
||||
if (!nextFallback || !shouldRetry(err, retryCodes)) {
|
||||
throw err;
|
||||
}
|
||||
params.onFallback?.(err, nextFallback);
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError;
|
||||
}
|
||||
@@ -151,6 +151,7 @@ export const dispatchTelegramMessage = async ({
|
||||
tableMode,
|
||||
chunkMode,
|
||||
onVoiceRecording: sendRecordVoice,
|
||||
linkPreview: telegramCfg.linkPreview,
|
||||
});
|
||||
},
|
||||
onError: (err, info) => {
|
||||
|
||||
@@ -348,6 +348,7 @@ export const registerTelegramNativeCommands = ({
|
||||
messageThreadId: resolvedThreadId,
|
||||
tableMode,
|
||||
chunkMode,
|
||||
linkPreview: telegramCfg.linkPreview,
|
||||
});
|
||||
},
|
||||
onError: (err, info) => {
|
||||
|
||||
@@ -108,4 +108,60 @@ describe("deliverReplies", () => {
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("includes link_preview_options when linkPreview is false", async () => {
|
||||
const runtime = { error: vi.fn(), log: vi.fn() };
|
||||
const sendMessage = vi.fn().mockResolvedValue({
|
||||
message_id: 3,
|
||||
chat: { id: "123" },
|
||||
});
|
||||
const bot = { api: { sendMessage } } as unknown as Bot;
|
||||
|
||||
await deliverReplies({
|
||||
replies: [{ text: "Check https://example.com" }],
|
||||
chatId: "123",
|
||||
token: "tok",
|
||||
runtime,
|
||||
bot,
|
||||
replyToMode: "off",
|
||||
textLimit: 4000,
|
||||
linkPreview: false,
|
||||
});
|
||||
|
||||
expect(sendMessage).toHaveBeenCalledWith(
|
||||
"123",
|
||||
expect.any(String),
|
||||
expect.objectContaining({
|
||||
link_preview_options: { is_disabled: true },
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("does not include link_preview_options when linkPreview is true", async () => {
|
||||
const runtime = { error: vi.fn(), log: vi.fn() };
|
||||
const sendMessage = vi.fn().mockResolvedValue({
|
||||
message_id: 4,
|
||||
chat: { id: "123" },
|
||||
});
|
||||
const bot = { api: { sendMessage } } as unknown as Bot;
|
||||
|
||||
await deliverReplies({
|
||||
replies: [{ text: "Check https://example.com" }],
|
||||
chatId: "123",
|
||||
token: "tok",
|
||||
runtime,
|
||||
bot,
|
||||
replyToMode: "off",
|
||||
textLimit: 4000,
|
||||
linkPreview: true,
|
||||
});
|
||||
|
||||
expect(sendMessage).toHaveBeenCalledWith(
|
||||
"123",
|
||||
expect.any(String),
|
||||
expect.not.objectContaining({
|
||||
link_preview_options: expect.anything(),
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -36,8 +36,11 @@ export async function deliverReplies(params: {
|
||||
chunkMode?: ChunkMode;
|
||||
/** Callback invoked before sending a voice message to switch typing indicator. */
|
||||
onVoiceRecording?: () => Promise<void> | void;
|
||||
/** Controls whether link previews are shown. Default: true (previews enabled). */
|
||||
linkPreview?: boolean;
|
||||
}) {
|
||||
const { replies, chatId, runtime, bot, replyToMode, textLimit, messageThreadId } = params;
|
||||
const { replies, chatId, runtime, bot, replyToMode, textLimit, messageThreadId, linkPreview } =
|
||||
params;
|
||||
const chunkMode = params.chunkMode ?? "length";
|
||||
const threadParams = buildTelegramThreadParams(messageThreadId);
|
||||
let hasReplied = false;
|
||||
@@ -85,6 +88,7 @@ export async function deliverReplies(params: {
|
||||
messageThreadId,
|
||||
textMode: "html",
|
||||
plainText: chunk.text,
|
||||
linkPreview,
|
||||
});
|
||||
if (replyToId && !hasReplied) {
|
||||
hasReplied = true;
|
||||
@@ -180,6 +184,7 @@ export async function deliverReplies(params: {
|
||||
messageThreadId,
|
||||
textMode: "html",
|
||||
plainText: chunk.text,
|
||||
linkPreview,
|
||||
});
|
||||
if (replyToId && !hasReplied) {
|
||||
hasReplied = true;
|
||||
@@ -248,17 +253,22 @@ async function sendTelegramText(
|
||||
messageThreadId?: number;
|
||||
textMode?: "markdown" | "html";
|
||||
plainText?: string;
|
||||
linkPreview?: boolean;
|
||||
},
|
||||
): Promise<number | undefined> {
|
||||
const baseParams = buildTelegramSendParams({
|
||||
replyToMessageId: opts?.replyToMessageId,
|
||||
messageThreadId: opts?.messageThreadId,
|
||||
});
|
||||
// Add link_preview_options when link preview is disabled.
|
||||
const linkPreviewEnabled = opts?.linkPreview ?? true;
|
||||
const linkPreviewOptions = linkPreviewEnabled ? undefined : { is_disabled: true };
|
||||
const textMode = opts?.textMode ?? "markdown";
|
||||
const htmlText = textMode === "html" ? text : markdownToTelegramHtml(text);
|
||||
try {
|
||||
const res = await bot.api.sendMessage(chatId, htmlText, {
|
||||
parse_mode: "HTML",
|
||||
...(linkPreviewOptions ? { link_preview_options: linkPreviewOptions } : {}),
|
||||
...baseParams,
|
||||
});
|
||||
return res.message_id;
|
||||
@@ -268,6 +278,7 @@ async function sendTelegramText(
|
||||
runtime.log?.(`telegram HTML parse failed; retrying without formatting: ${errText}`);
|
||||
const fallbackText = opts?.plainText ?? text;
|
||||
const res = await bot.api.sendMessage(chatId, fallbackText, {
|
||||
...(linkPreviewOptions ? { link_preview_options: linkPreviewOptions } : {}),
|
||||
...baseParams,
|
||||
});
|
||||
return res.message_id;
|
||||
|
||||
@@ -152,6 +152,62 @@ describe("sendMessageTelegram", () => {
|
||||
expect(res.messageId).toBe("42");
|
||||
});
|
||||
|
||||
it("adds link_preview_options when previews are disabled in config", async () => {
|
||||
const chatId = "123";
|
||||
const sendMessage = vi.fn().mockResolvedValue({
|
||||
message_id: 7,
|
||||
chat: { id: chatId },
|
||||
});
|
||||
const api = { sendMessage } as unknown as {
|
||||
sendMessage: typeof sendMessage;
|
||||
};
|
||||
|
||||
loadConfig.mockReturnValue({
|
||||
channels: { telegram: { linkPreview: false } },
|
||||
});
|
||||
|
||||
await sendMessageTelegram(chatId, "hi", { token: "tok", api });
|
||||
|
||||
expect(sendMessage).toHaveBeenCalledWith(chatId, "hi", {
|
||||
parse_mode: "HTML",
|
||||
link_preview_options: { is_disabled: true },
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps link_preview_options on plain-text fallback when disabled", async () => {
|
||||
const chatId = "123";
|
||||
const parseErr = new Error(
|
||||
"400: Bad Request: can't parse entities: Can't find end of the entity starting at byte offset 9",
|
||||
);
|
||||
const sendMessage = vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(parseErr)
|
||||
.mockResolvedValueOnce({
|
||||
message_id: 42,
|
||||
chat: { id: chatId },
|
||||
});
|
||||
const api = { sendMessage } as unknown as {
|
||||
sendMessage: typeof sendMessage;
|
||||
};
|
||||
|
||||
loadConfig.mockReturnValue({
|
||||
channels: { telegram: { linkPreview: false } },
|
||||
});
|
||||
|
||||
await sendMessageTelegram(chatId, "_oops_", {
|
||||
token: "tok",
|
||||
api,
|
||||
});
|
||||
|
||||
expect(sendMessage).toHaveBeenNthCalledWith(1, chatId, "<i>oops</i>", {
|
||||
parse_mode: "HTML",
|
||||
link_preview_options: { is_disabled: true },
|
||||
});
|
||||
expect(sendMessage).toHaveBeenNthCalledWith(2, chatId, "_oops_", {
|
||||
link_preview_options: { is_disabled: true },
|
||||
});
|
||||
});
|
||||
|
||||
it("uses native fetch for BAN compatibility when api is omitted", async () => {
|
||||
const originalFetch = globalThis.fetch;
|
||||
const originalBun = (globalThis as { Bun?: unknown }).Bun;
|
||||
|
||||
@@ -198,20 +198,25 @@ export async function sendMessageTelegram(
|
||||
});
|
||||
const renderHtmlText = (value: string) => renderTelegramHtmlText(value, { textMode, tableMode });
|
||||
|
||||
// Resolve link preview setting from config (default: enabled).
|
||||
const linkPreviewEnabled = account.config.linkPreview ?? true;
|
||||
const linkPreviewOptions = linkPreviewEnabled ? undefined : { is_disabled: true };
|
||||
|
||||
const sendTelegramText = async (
|
||||
rawText: string,
|
||||
params?: Record<string, unknown>,
|
||||
fallbackText?: string,
|
||||
) => {
|
||||
const htmlText = renderHtmlText(rawText);
|
||||
const sendParams = params
|
||||
? {
|
||||
parse_mode: "HTML" as const,
|
||||
...params,
|
||||
}
|
||||
: {
|
||||
parse_mode: "HTML" as const,
|
||||
};
|
||||
const baseParams = params ? { ...params } : {};
|
||||
if (linkPreviewOptions) {
|
||||
baseParams.link_preview_options = linkPreviewOptions;
|
||||
}
|
||||
const hasBaseParams = Object.keys(baseParams).length > 0;
|
||||
const sendParams = {
|
||||
parse_mode: "HTML" as const,
|
||||
...baseParams,
|
||||
};
|
||||
const res = await request(() => api.sendMessage(chatId, htmlText, sendParams), "message").catch(
|
||||
async (err) => {
|
||||
// Telegram rejects malformed HTML (e.g., unsupported tags or entities).
|
||||
@@ -222,7 +227,7 @@ export async function sendMessageTelegram(
|
||||
console.warn(`telegram HTML parse failed, retrying as plain text: ${errText}`);
|
||||
}
|
||||
const fallback = fallbackText ?? rawText;
|
||||
const plainParams = params && Object.keys(params).length > 0 ? { ...params } : undefined;
|
||||
const plainParams = hasBaseParams ? baseParams : undefined;
|
||||
return await request(
|
||||
() =>
|
||||
plainParams
|
||||
|
||||
@@ -109,13 +109,13 @@ describe("tts", () => {
|
||||
});
|
||||
|
||||
describe("isValidOpenAIModel", () => {
|
||||
it("accepts gpt-4o-mini-tts model", () => {
|
||||
it("accepts supported models", () => {
|
||||
expect(isValidOpenAIModel("gpt-4o-mini-tts")).toBe(true);
|
||||
expect(isValidOpenAIModel("tts-1")).toBe(true);
|
||||
expect(isValidOpenAIModel("tts-1-hd")).toBe(true);
|
||||
});
|
||||
|
||||
it("rejects other models", () => {
|
||||
expect(isValidOpenAIModel("tts-1")).toBe(false);
|
||||
expect(isValidOpenAIModel("tts-1-hd")).toBe(false);
|
||||
it("rejects unsupported models", () => {
|
||||
expect(isValidOpenAIModel("invalid")).toBe(false);
|
||||
expect(isValidOpenAIModel("")).toBe(false);
|
||||
expect(isValidOpenAIModel("gpt-4")).toBe(false);
|
||||
@@ -123,9 +123,11 @@ describe("tts", () => {
|
||||
});
|
||||
|
||||
describe("OPENAI_TTS_MODELS", () => {
|
||||
it("contains only gpt-4o-mini-tts", () => {
|
||||
it("contains supported models", () => {
|
||||
expect(OPENAI_TTS_MODELS).toContain("gpt-4o-mini-tts");
|
||||
expect(OPENAI_TTS_MODELS).toHaveLength(1);
|
||||
expect(OPENAI_TTS_MODELS).toContain("tts-1");
|
||||
expect(OPENAI_TTS_MODELS).toContain("tts-1-hd");
|
||||
expect(OPENAI_TTS_MODELS).toHaveLength(3);
|
||||
});
|
||||
|
||||
it("is a non-empty array", () => {
|
||||
|
||||
129
src/tts/tts.ts
129
src/tts/tts.ts
@@ -76,6 +76,11 @@ const DEFAULT_OUTPUT = {
|
||||
voiceCompatible: false,
|
||||
};
|
||||
|
||||
const TELEPHONY_OUTPUT = {
|
||||
openai: { format: "pcm" as const, sampleRate: 24000 },
|
||||
elevenlabs: { format: "pcm_22050", sampleRate: 22050 },
|
||||
};
|
||||
|
||||
const TTS_AUTO_MODES = new Set<TtsAutoMode>(["off", "always", "inbound", "tagged"]);
|
||||
|
||||
export type ResolvedTtsConfig = {
|
||||
@@ -180,6 +185,16 @@ export type TtsResult = {
|
||||
voiceCompatible?: boolean;
|
||||
};
|
||||
|
||||
export type TtsTelephonyResult = {
|
||||
success: boolean;
|
||||
audioBuffer?: Buffer;
|
||||
error?: string;
|
||||
latencyMs?: number;
|
||||
provider?: string;
|
||||
outputFormat?: string;
|
||||
sampleRate?: number;
|
||||
};
|
||||
|
||||
type TtsStatusEntry = {
|
||||
timestamp: number;
|
||||
success: boolean;
|
||||
@@ -736,7 +751,17 @@ function parseTtsDirectives(
|
||||
};
|
||||
}
|
||||
|
||||
export const OPENAI_TTS_MODELS = ["gpt-4o-mini-tts"] as const;
|
||||
export const OPENAI_TTS_MODELS = ["gpt-4o-mini-tts", "tts-1", "tts-1-hd"] as const;
|
||||
|
||||
/**
|
||||
* Custom OpenAI-compatible TTS endpoint.
|
||||
* When set, model/voice validation is relaxed to allow non-OpenAI models.
|
||||
* Example: OPENAI_TTS_BASE_URL=http://localhost:8880/v1
|
||||
*/
|
||||
const OPENAI_TTS_BASE_URL = (
|
||||
process.env.OPENAI_TTS_BASE_URL?.trim() || "https://api.openai.com/v1"
|
||||
).replace(/\/+$/, "");
|
||||
const isCustomOpenAIEndpoint = OPENAI_TTS_BASE_URL !== "https://api.openai.com/v1";
|
||||
export const OPENAI_TTS_VOICES = [
|
||||
"alloy",
|
||||
"ash",
|
||||
@@ -752,10 +777,14 @@ export const OPENAI_TTS_VOICES = [
|
||||
type OpenAiTtsVoice = (typeof OPENAI_TTS_VOICES)[number];
|
||||
|
||||
function isValidOpenAIModel(model: string): boolean {
|
||||
// Allow any model when using custom endpoint (e.g., Kokoro, LocalAI)
|
||||
if (isCustomOpenAIEndpoint) return true;
|
||||
return OPENAI_TTS_MODELS.includes(model as (typeof OPENAI_TTS_MODELS)[number]);
|
||||
}
|
||||
|
||||
function isValidOpenAIVoice(voice: string): voice is OpenAiTtsVoice {
|
||||
// Allow any voice when using custom endpoint (e.g., Kokoro Chinese voices)
|
||||
if (isCustomOpenAIEndpoint) return true;
|
||||
return OPENAI_TTS_VOICES.includes(voice as OpenAiTtsVoice);
|
||||
}
|
||||
|
||||
@@ -966,7 +995,7 @@ async function openaiTTS(params: {
|
||||
apiKey: string;
|
||||
model: string;
|
||||
voice: string;
|
||||
responseFormat: "mp3" | "opus";
|
||||
responseFormat: "mp3" | "opus" | "pcm";
|
||||
timeoutMs: number;
|
||||
}): Promise<Buffer> {
|
||||
const { text, apiKey, model, voice, responseFormat, timeoutMs } = params;
|
||||
@@ -982,7 +1011,7 @@ async function openaiTTS(params: {
|
||||
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
||||
|
||||
try {
|
||||
const response = await fetch("https://api.openai.com/v1/audio/speech", {
|
||||
const response = await fetch(`${OPENAI_TTS_BASE_URL}/audio/speech`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
@@ -1210,6 +1239,100 @@ export async function textToSpeech(params: {
|
||||
};
|
||||
}
|
||||
|
||||
export async function textToSpeechTelephony(params: {
|
||||
text: string;
|
||||
cfg: ClawdbotConfig;
|
||||
prefsPath?: string;
|
||||
}): Promise<TtsTelephonyResult> {
|
||||
const config = resolveTtsConfig(params.cfg);
|
||||
const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config);
|
||||
|
||||
if (params.text.length > config.maxTextLength) {
|
||||
return {
|
||||
success: false,
|
||||
error: `Text too long (${params.text.length} chars, max ${config.maxTextLength})`,
|
||||
};
|
||||
}
|
||||
|
||||
const userProvider = getTtsProvider(config, prefsPath);
|
||||
const providers = resolveTtsProviderOrder(userProvider);
|
||||
|
||||
let lastError: string | undefined;
|
||||
|
||||
for (const provider of providers) {
|
||||
const providerStart = Date.now();
|
||||
try {
|
||||
if (provider === "edge") {
|
||||
lastError = "edge: unsupported for telephony";
|
||||
continue;
|
||||
}
|
||||
|
||||
const apiKey = resolveTtsApiKey(config, provider);
|
||||
if (!apiKey) {
|
||||
lastError = `No API key for ${provider}`;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (provider === "elevenlabs") {
|
||||
const output = TELEPHONY_OUTPUT.elevenlabs;
|
||||
const audioBuffer = await elevenLabsTTS({
|
||||
text: params.text,
|
||||
apiKey,
|
||||
baseUrl: config.elevenlabs.baseUrl,
|
||||
voiceId: config.elevenlabs.voiceId,
|
||||
modelId: config.elevenlabs.modelId,
|
||||
outputFormat: output.format,
|
||||
seed: config.elevenlabs.seed,
|
||||
applyTextNormalization: config.elevenlabs.applyTextNormalization,
|
||||
languageCode: config.elevenlabs.languageCode,
|
||||
voiceSettings: config.elevenlabs.voiceSettings,
|
||||
timeoutMs: config.timeoutMs,
|
||||
});
|
||||
|
||||
return {
|
||||
success: true,
|
||||
audioBuffer,
|
||||
latencyMs: Date.now() - providerStart,
|
||||
provider,
|
||||
outputFormat: output.format,
|
||||
sampleRate: output.sampleRate,
|
||||
};
|
||||
}
|
||||
|
||||
const output = TELEPHONY_OUTPUT.openai;
|
||||
const audioBuffer = await openaiTTS({
|
||||
text: params.text,
|
||||
apiKey,
|
||||
model: config.openai.model,
|
||||
voice: config.openai.voice,
|
||||
responseFormat: output.format,
|
||||
timeoutMs: config.timeoutMs,
|
||||
});
|
||||
|
||||
return {
|
||||
success: true,
|
||||
audioBuffer,
|
||||
latencyMs: Date.now() - providerStart,
|
||||
provider,
|
||||
outputFormat: output.format,
|
||||
sampleRate: output.sampleRate,
|
||||
};
|
||||
} catch (err) {
|
||||
const error = err as Error;
|
||||
if (error.name === "AbortError") {
|
||||
lastError = `${provider}: request timed out`;
|
||||
} else {
|
||||
lastError = `${provider}: ${error.message}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
success: false,
|
||||
error: `TTS conversion failed: ${lastError || "no providers available"}`,
|
||||
};
|
||||
}
|
||||
|
||||
export async function maybeApplyTtsToPayload(params: {
|
||||
payload: ReplyPayload;
|
||||
cfg: ClawdbotConfig;
|
||||
|
||||
Reference in New Issue
Block a user