mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-06 14:01:24 +08:00
Compare commits
837 Commits
v2026.6.1-
...
codex/mark
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
542baa3b43 | ||
|
|
72402b51c5 | ||
|
|
df90aac6e2 | ||
|
|
09796c6991 | ||
|
|
86ef2324a3 | ||
|
|
f1326d71ae | ||
|
|
cf6700486c | ||
|
|
537636b926 | ||
|
|
21648cf844 | ||
|
|
0841fe7d67 | ||
|
|
7ecc9551ff | ||
|
|
5d423e5f1a | ||
|
|
fc459ad376 | ||
|
|
abd52441c5 | ||
|
|
33711a477b | ||
|
|
5edcff17c8 | ||
|
|
a652a0529b | ||
|
|
d92f990126 | ||
|
|
5577442e84 | ||
|
|
fe01495f8e | ||
|
|
c8d313f742 | ||
|
|
82466b33c5 | ||
|
|
480091b9ee | ||
|
|
5ea7e9d071 | ||
|
|
6c4626eca1 | ||
|
|
b3f7436307 | ||
|
|
e327700c7d | ||
|
|
988b2e12a6 | ||
|
|
4cf2a2dd6f | ||
|
|
b65bd56a79 | ||
|
|
bed2f620dd | ||
|
|
eb1d0a3588 | ||
|
|
fbdbbe9e97 | ||
|
|
782a360580 | ||
|
|
0e9b65889b | ||
|
|
914f0f9315 | ||
|
|
1666686eba | ||
|
|
1cfc23afb4 | ||
|
|
5acb805f37 | ||
|
|
8361b69ff6 | ||
|
|
66c588e28f | ||
|
|
3d5c8b25a5 | ||
|
|
0ff9e3a88f | ||
|
|
4774d26cec | ||
|
|
0bff438190 | ||
|
|
2fbddd65e6 | ||
|
|
08ff5f7235 | ||
|
|
ca10f65280 | ||
|
|
79627a02a5 | ||
|
|
b8cd4513a6 | ||
|
|
cd003a688e | ||
|
|
7321e70b6f | ||
|
|
691baa3054 | ||
|
|
782de561c8 | ||
|
|
56c2ee2a77 | ||
|
|
7e59c8a48a | ||
|
|
2cb6f013ad | ||
|
|
c9417590c4 | ||
|
|
dae769e4d1 | ||
|
|
fd36d510ac | ||
|
|
1b7da5d000 | ||
|
|
4730f05e78 | ||
|
|
859d01c919 | ||
|
|
3549150d17 | ||
|
|
bb7339fe24 | ||
|
|
7033becd07 | ||
|
|
9326519c8d | ||
|
|
ace9d4c842 | ||
|
|
8ccb15f813 | ||
|
|
85aa7cca7a | ||
|
|
fd84a67e22 | ||
|
|
c4b7e5ebd7 | ||
|
|
6878fb25f9 | ||
|
|
da6e410690 | ||
|
|
7d013c1353 | ||
|
|
f733a37db3 | ||
|
|
42fae37d9f | ||
|
|
940e4e64ff | ||
|
|
30bf7310a5 | ||
|
|
f4952f3c42 | ||
|
|
a0590e113a | ||
|
|
645f3025a7 | ||
|
|
84398e8509 | ||
|
|
89d694b33a | ||
|
|
2b411b0298 | ||
|
|
f5c2e455c7 | ||
|
|
6495eb8355 | ||
|
|
f3dccaa707 | ||
|
|
3830ae5f86 | ||
|
|
955cc4a0fa | ||
|
|
e6049f5560 | ||
|
|
b949cd8a63 | ||
|
|
eb68d9e8e7 | ||
|
|
ee6b5eb51a | ||
|
|
57930933ce | ||
|
|
a9865297f9 | ||
|
|
8f952a1819 | ||
|
|
ef7f54e1db | ||
|
|
1bb275b4af | ||
|
|
8f8fba66e3 | ||
|
|
3663b216ea | ||
|
|
082e0e1e74 | ||
|
|
029eae8d4d | ||
|
|
7e91337292 | ||
|
|
5cddc8617b | ||
|
|
b29bc49452 | ||
|
|
ab0c86079c | ||
|
|
7cc4b178da | ||
|
|
8e21b7b791 | ||
|
|
70c180de5c | ||
|
|
3ab4ff1970 | ||
|
|
b80b736bec | ||
|
|
902a7f2e40 | ||
|
|
fd66568e9c | ||
|
|
cb50517168 | ||
|
|
461d582bf0 | ||
|
|
df403be1a6 | ||
|
|
ddec7f7583 | ||
|
|
f675c85e97 | ||
|
|
935e31e1f7 | ||
|
|
7a2312ed3b | ||
|
|
22408ff4ca | ||
|
|
a293e4ea36 | ||
|
|
d1d363f02c | ||
|
|
181937aa79 | ||
|
|
b05a9e64e7 | ||
|
|
052b9caa4c | ||
|
|
b8cd038b53 | ||
|
|
807a78d729 | ||
|
|
3ba3706e7b | ||
|
|
f8fbeca3b0 | ||
|
|
ecfdc422ff | ||
|
|
b2d4015559 | ||
|
|
38b3f872ec | ||
|
|
30342d1ff1 | ||
|
|
5772ce0bd2 | ||
|
|
8b615e7bdd | ||
|
|
74a6828e65 | ||
|
|
6f885c9e69 | ||
|
|
606f914786 | ||
|
|
0a37307b9e | ||
|
|
a45cf4aa3d | ||
|
|
627f937126 | ||
|
|
87a51de824 | ||
|
|
cf7aa53974 | ||
|
|
1dd7dcbb8b | ||
|
|
1c7bc0a70c | ||
|
|
2eef5e64ea | ||
|
|
44030e6a70 | ||
|
|
b4e3680c15 | ||
|
|
f5fed728d7 | ||
|
|
7e1c1293d2 | ||
|
|
2ba9dcc4d1 | ||
|
|
c4b2e5ede1 | ||
|
|
9b9481466b | ||
|
|
e887319d03 | ||
|
|
19cd359980 | ||
|
|
6d6f800b71 | ||
|
|
89e289bebf | ||
|
|
887da616a3 | ||
|
|
6f7111af77 | ||
|
|
fb184b23d8 | ||
|
|
3da99c9c5d | ||
|
|
4a5250bbd0 | ||
|
|
a01dad0467 | ||
|
|
3e0f205e21 | ||
|
|
7e89d1549c | ||
|
|
7fe48606d9 | ||
|
|
c9dba69584 | ||
|
|
04e960542d | ||
|
|
37d68a2c26 | ||
|
|
4f75d03f98 | ||
|
|
c56f0ad6e8 | ||
|
|
e7685a3442 | ||
|
|
0915a43ae3 | ||
|
|
40bc655224 | ||
|
|
c4c3649a69 | ||
|
|
982d81f613 | ||
|
|
cd01bd00fc | ||
|
|
133a0a3d1b | ||
|
|
542c2a667c | ||
|
|
eaa9da2d81 | ||
|
|
7c7c52640c | ||
|
|
7106593349 | ||
|
|
284c316fde | ||
|
|
f4a049d571 | ||
|
|
c7d3d09345 | ||
|
|
f43e8eac30 | ||
|
|
daa6405784 | ||
|
|
63d1572d40 | ||
|
|
6d3d1b4449 | ||
|
|
5198edc051 | ||
|
|
776121bf27 | ||
|
|
321bd8734d | ||
|
|
f5c3fc2033 | ||
|
|
eb925afda2 | ||
|
|
66dccf2111 | ||
|
|
fe976b19f5 | ||
|
|
c306bf9986 | ||
|
|
c9c71965d2 | ||
|
|
7e3832cb72 | ||
|
|
a71d83f1ea | ||
|
|
d14c004124 | ||
|
|
87881bb3f8 | ||
|
|
f529019f71 | ||
|
|
ad230f0072 | ||
|
|
57c15073bd | ||
|
|
3d4a170acd | ||
|
|
70954c5ef1 | ||
|
|
bc1ceb11f5 | ||
|
|
5c5ead97f2 | ||
|
|
5a451e4b29 | ||
|
|
74b7668ad7 | ||
|
|
fd820654f6 | ||
|
|
91bc6d2f75 | ||
|
|
c8f2b9864a | ||
|
|
845ae136e2 | ||
|
|
8bad7e3c5f | ||
|
|
5d916a47e0 | ||
|
|
ce6443d6c2 | ||
|
|
a4f270e960 | ||
|
|
25c19e98d9 | ||
|
|
be1d0283f7 | ||
|
|
6ea9de0ba9 | ||
|
|
9f9b233262 | ||
|
|
befc96d445 | ||
|
|
9de16d960e | ||
|
|
ac3fed0b90 | ||
|
|
8856a3e63f | ||
|
|
8348c97336 | ||
|
|
a1e7b5c2af | ||
|
|
93d27fd090 | ||
|
|
ae1d58e2e2 | ||
|
|
73d7448920 | ||
|
|
35e8f4aeb5 | ||
|
|
9e8e5f8b8e | ||
|
|
f5ee1d71a0 | ||
|
|
a1ac0e892c | ||
|
|
5554d29db7 | ||
|
|
ba9f3be82b | ||
|
|
c4618bd859 | ||
|
|
47c68db395 | ||
|
|
14d9a9d184 | ||
|
|
ad3e74f433 | ||
|
|
5869473dc3 | ||
|
|
005da3bfc0 | ||
|
|
c3042c8a53 | ||
|
|
595df6e4fc | ||
|
|
80a9f9171d | ||
|
|
a85df5a2fe | ||
|
|
3d39143851 | ||
|
|
578258775e | ||
|
|
9cbe85f2e6 | ||
|
|
035ca4106d | ||
|
|
ace66d9276 | ||
|
|
2e856ecf6d | ||
|
|
bb5a2a6c4b | ||
|
|
e3652a0541 | ||
|
|
9ac9c4014e | ||
|
|
a059c5e359 | ||
|
|
d1cc90f991 | ||
|
|
bbf74df187 | ||
|
|
9f56655cba | ||
|
|
32cf26edb9 | ||
|
|
38219de4a8 | ||
|
|
74de22592f | ||
|
|
49d563823e | ||
|
|
55c26f453a | ||
|
|
39dcc60cf3 | ||
|
|
74b77e746c | ||
|
|
b1562cf30e | ||
|
|
f46921dbc1 | ||
|
|
2873917a67 | ||
|
|
45e30ed8cb | ||
|
|
343901eed2 | ||
|
|
39987341ef | ||
|
|
e968912c0a | ||
|
|
a8946ceaa2 | ||
|
|
a38f8a7727 | ||
|
|
937c81d269 | ||
|
|
f741019d47 | ||
|
|
f3a66be5db | ||
|
|
1b3d42a5bf | ||
|
|
6920c31b59 | ||
|
|
bff56270f7 | ||
|
|
16a2e9797c | ||
|
|
3580dcc2c5 | ||
|
|
77cbf0bbe7 | ||
|
|
be604a74cc | ||
|
|
d3645e9a09 | ||
|
|
f430f7b35f | ||
|
|
5c8ad36c96 | ||
|
|
9cf089add3 | ||
|
|
2f42e28822 | ||
|
|
0dfecf5d38 | ||
|
|
0cf207ff69 | ||
|
|
34f5d18646 | ||
|
|
894f76f9b2 | ||
|
|
cf2f010c11 | ||
|
|
9dec94077c | ||
|
|
46fdc874ff | ||
|
|
07cfeb8825 | ||
|
|
748d15a7e8 | ||
|
|
44a41c983d | ||
|
|
5991581624 | ||
|
|
6462d5711f | ||
|
|
da00d620c8 | ||
|
|
5e2913b8f2 | ||
|
|
35efd98a8d | ||
|
|
a74d094a92 | ||
|
|
8501e1ab49 | ||
|
|
1b35fd6042 | ||
|
|
f199a3ec4a | ||
|
|
8b445c0b1c | ||
|
|
31cb21dc80 | ||
|
|
9141dac9ff | ||
|
|
641c8d3e8f | ||
|
|
48ef13f3f9 | ||
|
|
f002c11263 | ||
|
|
ed98cf4072 | ||
|
|
06bbffa56b | ||
|
|
b9dd6e2176 | ||
|
|
84941d8079 | ||
|
|
7c71652b97 | ||
|
|
68d189aee2 | ||
|
|
de62123e4d | ||
|
|
ffbfcf7ede | ||
|
|
af78281011 | ||
|
|
ea0411257d | ||
|
|
bcd4e91a26 | ||
|
|
03ccc1860d | ||
|
|
1a7ff3c75c | ||
|
|
657355d2b0 | ||
|
|
2b444e9b43 | ||
|
|
c603b71d40 | ||
|
|
ac33c605cc | ||
|
|
df13f8aa6d | ||
|
|
590b653d8d | ||
|
|
f126a99773 | ||
|
|
d44e59b737 | ||
|
|
dee8f41d99 | ||
|
|
3242949658 | ||
|
|
b1375ef40c | ||
|
|
0f3ef7d6e7 | ||
|
|
0ed2a3f6f4 | ||
|
|
1404b0e87e | ||
|
|
738bcde966 | ||
|
|
93f04f1edd | ||
|
|
a47f3b240d | ||
|
|
e90dea78a8 | ||
|
|
03b1d06980 | ||
|
|
78638ba4bb | ||
|
|
c4fcafcf8e | ||
|
|
e4c1182789 | ||
|
|
1cba4300a8 | ||
|
|
93084f6073 | ||
|
|
f4d53265da | ||
|
|
c77e69b27b | ||
|
|
b9fd6d96cc | ||
|
|
1fd4e90463 | ||
|
|
ae62e30ae7 | ||
|
|
7b11b3f782 | ||
|
|
0531beaf52 | ||
|
|
355c1354e9 | ||
|
|
064ac94744 | ||
|
|
b5f9cb6151 | ||
|
|
5e03331d19 | ||
|
|
75b6ebc524 | ||
|
|
f3a35fb09b | ||
|
|
7634b15b81 | ||
|
|
5122e14c6b | ||
|
|
2efa068f0b | ||
|
|
0a59b1319d | ||
|
|
222e6f5c60 | ||
|
|
cdd8bc862b | ||
|
|
762ad43b26 | ||
|
|
87c1417dab | ||
|
|
71c473a539 | ||
|
|
5fa93a09d6 | ||
|
|
e5b9d3c66b | ||
|
|
4c12cc9da1 | ||
|
|
0df70f2f9a | ||
|
|
ccbfcd3337 | ||
|
|
a564c7dd82 | ||
|
|
79c2c69ef1 | ||
|
|
c76863ec8a | ||
|
|
297d95b94c | ||
|
|
751eabc9c4 | ||
|
|
89d868733a | ||
|
|
1ea0f55fd6 | ||
|
|
7f2ab82410 | ||
|
|
2fc6ef9cd0 | ||
|
|
e90fb1feba | ||
|
|
7398020b1f | ||
|
|
3ce0abff1a | ||
|
|
71f9d68616 | ||
|
|
eae814770c | ||
|
|
9660aab819 | ||
|
|
a1f602765e | ||
|
|
243094a9e2 | ||
|
|
aa63357a88 | ||
|
|
7aff176ead | ||
|
|
c7ac8c0b58 | ||
|
|
ac29cbccc1 | ||
|
|
ab4ff72e05 | ||
|
|
1fd3e8a536 | ||
|
|
b57eb93646 | ||
|
|
188dbfbbbd | ||
|
|
279a3a00bb | ||
|
|
51ae46319a | ||
|
|
d9ef964c42 | ||
|
|
d47eeda8f9 | ||
|
|
5eaba4ce10 | ||
|
|
ff4a7f7e50 | ||
|
|
8a6472b4b0 | ||
|
|
9091d44ad2 | ||
|
|
5f2a996550 | ||
|
|
d096e788aa | ||
|
|
67a8225f3b | ||
|
|
3932238405 | ||
|
|
8dd47022bc | ||
|
|
429082e106 | ||
|
|
97c9ef2bad | ||
|
|
14b88e5193 | ||
|
|
55d0eebf38 | ||
|
|
32fe56d9b5 | ||
|
|
fb2e814383 | ||
|
|
6d4d2d662a | ||
|
|
3c1d353e33 | ||
|
|
85d0bd8c75 | ||
|
|
ba37ac552c | ||
|
|
ea4b3fd235 | ||
|
|
233a68e820 | ||
|
|
6b4d308045 | ||
|
|
c7befdc0e0 | ||
|
|
06e70c8ea5 | ||
|
|
d7dedeb427 | ||
|
|
6e53296c56 | ||
|
|
5bd5cbcc3e | ||
|
|
e3647f0c03 | ||
|
|
8ed427971d | ||
|
|
1cbf3a9114 | ||
|
|
37b3dd4008 | ||
|
|
4712707798 | ||
|
|
041d699c13 | ||
|
|
090d549a17 | ||
|
|
ce00659782 | ||
|
|
fc35ea8283 | ||
|
|
7b3803a4a6 | ||
|
|
68ce3a2d38 | ||
|
|
b9910b87a0 | ||
|
|
6c67c766ce | ||
|
|
4b2ccbf421 | ||
|
|
05c5d5a23d | ||
|
|
39daf6e335 | ||
|
|
b4cce6da21 | ||
|
|
458d49e8e4 | ||
|
|
36dd1f902e | ||
|
|
6d88c9416d | ||
|
|
ae3f999856 | ||
|
|
eac2c3db00 | ||
|
|
e60a8bac79 | ||
|
|
30a5337315 | ||
|
|
8382859716 | ||
|
|
4e004384e0 | ||
|
|
79074b7ee9 | ||
|
|
ab1415b62d | ||
|
|
8359e618ed | ||
|
|
86c3de42cf | ||
|
|
44413914a2 | ||
|
|
84d2aff5fb | ||
|
|
4354045ce1 | ||
|
|
fa305ad2e7 | ||
|
|
81d30ae3c8 | ||
|
|
b460cae176 | ||
|
|
ab3b585601 | ||
|
|
8061edd972 | ||
|
|
88b853cf7b | ||
|
|
b8b85fb402 | ||
|
|
a074ac6382 | ||
|
|
1a8e1f25ae | ||
|
|
26bde4dcbd | ||
|
|
f97c5946b7 | ||
|
|
3fb6b22133 | ||
|
|
8ea2dc7075 | ||
|
|
393ac2a110 | ||
|
|
ce908ef258 | ||
|
|
bd549a1a02 | ||
|
|
251d1a3c33 | ||
|
|
fb5c0da417 | ||
|
|
700003d25c | ||
|
|
5f4fbb1639 | ||
|
|
4c0a838b34 | ||
|
|
281e503a18 | ||
|
|
091df1fddc | ||
|
|
0826b75e9b | ||
|
|
79fae8a163 | ||
|
|
521861192b | ||
|
|
c94710b5f4 | ||
|
|
ccc4053def | ||
|
|
cb72a1ce2d | ||
|
|
ca23a63de1 | ||
|
|
b6288593c2 | ||
|
|
817f220aaa | ||
|
|
06502bc9ad | ||
|
|
3e74cc4d1a | ||
|
|
ffa248a523 | ||
|
|
7a4a814a3d | ||
|
|
5948160245 | ||
|
|
2656a8feca | ||
|
|
1d87ef5a86 | ||
|
|
ba8abd1357 | ||
|
|
4fabaea49b | ||
|
|
64b684e187 | ||
|
|
3cbf0d1faa | ||
|
|
e088d2cbbe | ||
|
|
75ba474c7d | ||
|
|
93ff68940d | ||
|
|
0d676cfd48 | ||
|
|
c0026f1811 | ||
|
|
112ce219fb | ||
|
|
937a5a1ee1 | ||
|
|
72edfa235e | ||
|
|
58ba60e14e | ||
|
|
5782a24b97 | ||
|
|
b03998ae37 | ||
|
|
18bf52fc94 | ||
|
|
026ec61336 | ||
|
|
c01cd303b2 | ||
|
|
be1009ea34 | ||
|
|
eaa1af3e56 | ||
|
|
f4833592b3 | ||
|
|
577636d728 | ||
|
|
a827663a5b | ||
|
|
07ca2b6871 | ||
|
|
5f431f4fcd | ||
|
|
da2d32c5f8 | ||
|
|
9ab59b4953 | ||
|
|
9e5dace9d3 | ||
|
|
544245826c | ||
|
|
d275f33bd5 | ||
|
|
9df20de599 | ||
|
|
4184e9833b | ||
|
|
92405fb43a | ||
|
|
e6232d218f | ||
|
|
8044db357f | ||
|
|
219ff4f299 | ||
|
|
2646058c9b | ||
|
|
5ed4298fb3 | ||
|
|
66c359839a | ||
|
|
48dc4444ae | ||
|
|
903612ab64 | ||
|
|
a4d7a8e3d9 | ||
|
|
1edf373908 | ||
|
|
e36f9bcb89 | ||
|
|
2aec8684a0 | ||
|
|
b32d6f48ca | ||
|
|
a69a86775b | ||
|
|
a116a0567e | ||
|
|
0e16019ead | ||
|
|
f56e36d828 | ||
|
|
7b18277681 | ||
|
|
e2990c76df | ||
|
|
7b8ff148af | ||
|
|
bfc66fb505 | ||
|
|
ecba8fb765 | ||
|
|
4bb06ec498 | ||
|
|
04f2a05a95 | ||
|
|
783a709a94 | ||
|
|
c7240c46a7 | ||
|
|
ec2f8ca948 | ||
|
|
5d489d45e8 | ||
|
|
bf3f207175 | ||
|
|
3c65961276 | ||
|
|
f31c30fece | ||
|
|
124bb53ea9 | ||
|
|
fe7fcc9091 | ||
|
|
adf128510b | ||
|
|
dcbf2dde4c | ||
|
|
32a5c3848a | ||
|
|
e1509529bf | ||
|
|
73b434f25b | ||
|
|
792976b76f | ||
|
|
8aaf6d9a84 | ||
|
|
30a4478c10 | ||
|
|
5d07ee772e | ||
|
|
fa9ef924a2 | ||
|
|
f066d1c87e | ||
|
|
05f2113302 | ||
|
|
3597ff0547 | ||
|
|
dd90fd0255 | ||
|
|
cb04dd3028 | ||
|
|
04505f86eb | ||
|
|
f0101337bb | ||
|
|
797777c813 | ||
|
|
c79b89173d | ||
|
|
2a2228e496 | ||
|
|
157fddee51 | ||
|
|
5ea6857491 | ||
|
|
59eb39e39a | ||
|
|
a2b0002d3f | ||
|
|
0308347fa7 | ||
|
|
cf6875e633 | ||
|
|
9cf1c116ff | ||
|
|
3030a4973e | ||
|
|
8c59fbbe92 | ||
|
|
443791ef52 | ||
|
|
9ee71023c2 | ||
|
|
731cfb6ff5 | ||
|
|
9e7f9915a0 | ||
|
|
ef20dc5f2f | ||
|
|
f58a38b522 | ||
|
|
05a13da12c | ||
|
|
e0cfcc3151 | ||
|
|
ac61833b62 | ||
|
|
1683b809c1 | ||
|
|
78d012ece4 | ||
|
|
67d008d00e | ||
|
|
adeafcee18 | ||
|
|
d9099828a4 | ||
|
|
0b66e2cd01 | ||
|
|
beabbe9219 | ||
|
|
62a27e1be5 | ||
|
|
cc31cddf54 | ||
|
|
d6fe20c350 | ||
|
|
a483a2cbc5 | ||
|
|
7fff122060 | ||
|
|
6b9185c6ec | ||
|
|
473188bd1f | ||
|
|
b734ccfa3c | ||
|
|
b8f1843909 | ||
|
|
4048b087c3 | ||
|
|
2a7e41b27b | ||
|
|
2f5f5307ef | ||
|
|
2439e2450a | ||
|
|
cec3fbae45 | ||
|
|
7881649f7e | ||
|
|
355a411e2a | ||
|
|
883f4cbf25 | ||
|
|
eaf86695b3 | ||
|
|
0b3465e9a3 | ||
|
|
2478bd2db4 | ||
|
|
c949857684 | ||
|
|
37218ccd2b | ||
|
|
298fcebd96 | ||
|
|
b62ab78f03 | ||
|
|
e46cb79e93 | ||
|
|
7f45dc815f | ||
|
|
f5556b500e | ||
|
|
19821c958d | ||
|
|
c94964e3a0 | ||
|
|
e6ecffc7fb | ||
|
|
fc5c22a238 | ||
|
|
a4c1d64a33 | ||
|
|
9d96e542de | ||
|
|
0ebc68745f | ||
|
|
dd42bb9e4c | ||
|
|
59ab73f417 | ||
|
|
cb2ec869ac | ||
|
|
1ca4396825 | ||
|
|
98b2385585 | ||
|
|
0a1adb9290 | ||
|
|
21662d3ee8 | ||
|
|
a33ec61daa | ||
|
|
1eb4a2a837 | ||
|
|
1e7c7caba5 | ||
|
|
d97ce8e7c1 | ||
|
|
8ef5d37f84 | ||
|
|
2858ced19f | ||
|
|
19e4a47ba5 | ||
|
|
4ab1f899c8 | ||
|
|
3159b1840b | ||
|
|
d44507dd58 | ||
|
|
1988f443dd | ||
|
|
a4e811a063 | ||
|
|
1e8b669bdc | ||
|
|
76412b9e76 | ||
|
|
515acdb6b7 | ||
|
|
64598efd21 | ||
|
|
e819d5718b | ||
|
|
51cf923f7e | ||
|
|
bf2628fd09 | ||
|
|
bc6ddea004 | ||
|
|
cf6f086114 | ||
|
|
85f262ad3b | ||
|
|
07642fd3ac | ||
|
|
e9eb6a5a6e | ||
|
|
058cf763b4 | ||
|
|
e3439e2019 | ||
|
|
7e5a7eff15 | ||
|
|
5d4b2081b5 | ||
|
|
b60e95ac50 | ||
|
|
9fbf3ab3f5 | ||
|
|
0a4ef8b44c | ||
|
|
9d27524aae | ||
|
|
37ee88c43a | ||
|
|
8c40322f6d | ||
|
|
9621d02c3b | ||
|
|
db9524334d | ||
|
|
023d1c1346 | ||
|
|
02257c6145 | ||
|
|
63a085603d | ||
|
|
6c1acbb51d | ||
|
|
3962e794a3 | ||
|
|
e93debe38a | ||
|
|
2aa74b8be8 | ||
|
|
641329157f | ||
|
|
069e616b40 | ||
|
|
b72867c4ef | ||
|
|
e7029418b2 | ||
|
|
0094f36bb9 | ||
|
|
6a96f5701a | ||
|
|
4d335bccae | ||
|
|
018a5dccf1 | ||
|
|
3cb4554fe8 | ||
|
|
d1d6900c6d | ||
|
|
bd46b791e9 | ||
|
|
3c781401ad | ||
|
|
86581bd139 | ||
|
|
c5fda5eb9a | ||
|
|
390434673e | ||
|
|
f27fdcbdb0 | ||
|
|
303141be85 | ||
|
|
580bc23dcc | ||
|
|
9e5f601c61 | ||
|
|
d22b8d1cdb | ||
|
|
01c5513c41 | ||
|
|
4dde1e9b54 | ||
|
|
5efffc9184 | ||
|
|
301aae5cd7 | ||
|
|
bd3f2929c0 | ||
|
|
6a540d945c | ||
|
|
68a780bb3c | ||
|
|
bd654bf5be | ||
|
|
aa5996ff28 | ||
|
|
4404474a99 | ||
|
|
edad8bd695 | ||
|
|
f1fcb4763c | ||
|
|
c996011b0c | ||
|
|
b7659b414e | ||
|
|
0c22351b0e | ||
|
|
a3c068ab46 | ||
|
|
34e60c7613 | ||
|
|
ae048ac2dc | ||
|
|
b97bc433ff | ||
|
|
82b69dceb8 | ||
|
|
2ae9055e8d | ||
|
|
b1ec36802c | ||
|
|
98b8eb02d2 | ||
|
|
9a6c1eb13f | ||
|
|
10d44e6e2a | ||
|
|
7c3bf80220 | ||
|
|
279f14f3fc | ||
|
|
9d50d2beb6 | ||
|
|
e9b481bbf6 | ||
|
|
2d2a4da093 | ||
|
|
aaebe74428 | ||
|
|
ac68783d81 | ||
|
|
11169b5c6a | ||
|
|
a190b16ced | ||
|
|
8be6591675 | ||
|
|
e9982ad288 | ||
|
|
ef68275a6d | ||
|
|
92aeda817d | ||
|
|
67a08ebadb | ||
|
|
e68c5861ac | ||
|
|
0109afd7fc | ||
|
|
3bdd36b718 | ||
|
|
0a88da285b | ||
|
|
3bab9e07d4 | ||
|
|
77b22b4e22 | ||
|
|
30f28516d7 | ||
|
|
ecea9a3d8c | ||
|
|
02565857e8 | ||
|
|
32f1e0e3ac | ||
|
|
ddfe936ebe | ||
|
|
2810c181ea | ||
|
|
911ebfa7fb | ||
|
|
8675ae253b | ||
|
|
20c0ef5341 | ||
|
|
ef1f870335 | ||
|
|
74dfd528cc | ||
|
|
624f279b6c | ||
|
|
48e0fb965d | ||
|
|
22ccabe92e | ||
|
|
bd3eea8a24 | ||
|
|
cb3ea96414 | ||
|
|
cf2f6e8902 | ||
|
|
c11eb54ff3 | ||
|
|
b25cc2cb97 | ||
|
|
9693b72e87 | ||
|
|
b7f733f828 | ||
|
|
8f5c762f9b | ||
|
|
170f7ac81b | ||
|
|
bb70e68f82 | ||
|
|
fbeb08967c | ||
|
|
69df840dd1 | ||
|
|
ea70737204 | ||
|
|
6950c25a89 | ||
|
|
45823c5f88 | ||
|
|
4d4b9a76ce | ||
|
|
e08890f356 | ||
|
|
f63647a799 | ||
|
|
d49ce1011b | ||
|
|
10f1f19a55 | ||
|
|
eb45f7506e | ||
|
|
72ddb522b4 | ||
|
|
1a45a6d112 | ||
|
|
a227ce9cd5 | ||
|
|
871ba88159 | ||
|
|
6588d77fa0 | ||
|
|
edbbbddf96 | ||
|
|
deddd60a50 | ||
|
|
8d5352fdf9 | ||
|
|
b19e8edd45 | ||
|
|
9b3f19377d | ||
|
|
035f50f0b3 | ||
|
|
8917f5fcd5 | ||
|
|
7f8ae918e3 | ||
|
|
a43dafe15d | ||
|
|
0f883cb654 | ||
|
|
d2c55f660f | ||
|
|
93efa868b9 | ||
|
|
0c15c69e8f | ||
|
|
40326fcd4f | ||
|
|
d0c99db71b | ||
|
|
333c28efe1 | ||
|
|
062c82ef82 | ||
|
|
732f2e5375 | ||
|
|
db1246e1bd | ||
|
|
fb614861e0 | ||
|
|
d887a39c28 | ||
|
|
f0e5fd6037 | ||
|
|
343c56a64c | ||
|
|
386044015b | ||
|
|
62edb0ccab | ||
|
|
8554efb754 |
@@ -5,6 +5,8 @@ import {
|
||||
import { generateSecureToken } from "openclaw/plugin-sdk/secure-random-runtime";
|
||||
|
||||
const SLACK_EXTERNAL_ARG_MENU_TOKEN_BYTES = 18;
|
||||
// Slack echoes external menu option values back as plain strings; keep tokens URL-safe
|
||||
// and fixed-length so readToken can reject forged or malformed values before lookup.
|
||||
const SLACK_EXTERNAL_ARG_MENU_TOKEN_LENGTH = Math.ceil(
|
||||
(SLACK_EXTERNAL_ARG_MENU_TOKEN_BYTES * 8) / 6,
|
||||
);
|
||||
@@ -28,6 +30,7 @@ function pruneSlackExternalArgMenuStore(
|
||||
): void {
|
||||
const now = asDateTimestampMs(rawNow);
|
||||
if (now === undefined) {
|
||||
// An invalid clock makes every expiry comparison untrustworthy, so fail closed.
|
||||
store.clear();
|
||||
return;
|
||||
}
|
||||
@@ -46,6 +49,7 @@ function createSlackExternalArgMenuToken(store: Map<string, SlackExternalArgMenu
|
||||
return token;
|
||||
}
|
||||
|
||||
/** Creates the short-lived in-memory store used for Slack external select arguments. */
|
||||
export function createSlackExternalArgMenuStore() {
|
||||
const store = new Map<string, SlackExternalArgMenuEntry>();
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
/** Normalizes PSTN caller ids to digits-only strings for allowlist comparisons. */
|
||||
export function normalizePhoneNumber(input?: string): string {
|
||||
if (!input) {
|
||||
return "";
|
||||
@@ -5,6 +6,7 @@ export function normalizePhoneNumber(input?: string): string {
|
||||
return input.replace(/\D/g, "");
|
||||
}
|
||||
|
||||
/** Checks a pre-normalized caller id against configured phone-number allowlist entries. */
|
||||
export function isAllowlistedCaller(
|
||||
normalizedFrom: string,
|
||||
allowFrom: string[] | undefined,
|
||||
|
||||
@@ -1,14 +1,18 @@
|
||||
const DEFAULT_MAX_OUTPUT_CHARS = 16_384;
|
||||
|
||||
export type BoundedChildOutput = {
|
||||
/** Retained output tail, kept within the configured character limit. */
|
||||
text: string;
|
||||
/** True once older output has been dropped from the accumulator. */
|
||||
truncated: boolean;
|
||||
};
|
||||
|
||||
/** Creates an empty accumulator for bounded child-process output capture. */
|
||||
export function emptyBoundedChildOutput(): BoundedChildOutput {
|
||||
return { text: "", truncated: false };
|
||||
}
|
||||
|
||||
/** Appends output while retaining only the newest maxChars so diagnostics stay bounded. */
|
||||
export function appendBoundedChildOutput(
|
||||
current: BoundedChildOutput,
|
||||
chunk: string,
|
||||
@@ -19,11 +23,13 @@ export function appendBoundedChildOutput(
|
||||
return { text: appended, truncated: current.truncated };
|
||||
}
|
||||
return {
|
||||
// Keep the tail because child-process failures usually print the actionable error last.
|
||||
text: appended.slice(-maxChars),
|
||||
truncated: true,
|
||||
};
|
||||
}
|
||||
|
||||
/** Prefixes retained output with an explicit truncation marker when older text was dropped. */
|
||||
export function formatBoundedChildOutput(output: BoundedChildOutput): string {
|
||||
return output.truncated ? `[output truncated]\n${output.text}` : output.text;
|
||||
}
|
||||
|
||||
@@ -95,6 +95,8 @@ function parseVoiceCallIntOption(
|
||||
): number {
|
||||
const min = opts?.min ?? 0;
|
||||
const value = raw?.trim() ?? "";
|
||||
// CLI numeric flags intentionally accept only plain decimal integers so
|
||||
// values like 0x10 or 1e3 cannot surprise operators or tests.
|
||||
const parsed = parseStrictNonNegativeInteger(value);
|
||||
if (parsed === undefined || parsed < min || (opts?.max !== undefined && parsed > opts.max)) {
|
||||
throw new Error(`Invalid numeric value for ${optionName}: ${raw ?? ""}`);
|
||||
@@ -104,6 +106,8 @@ function parseVoiceCallIntOption(
|
||||
|
||||
function isGatewayUnavailableForLocalFallback(err: unknown): boolean {
|
||||
const message = formatErrorMessage(err);
|
||||
// These errors mean the local Gateway cannot service the request; callers can
|
||||
// safely fall back to a standalone runtime without hiding command failures.
|
||||
return (
|
||||
message.includes("ECONNREFUSED") ||
|
||||
message.includes("ECONNRESET") ||
|
||||
@@ -140,6 +144,8 @@ async function callVoiceCallGateway(
|
||||
}
|
||||
|
||||
function resolveGatewayOperationTimeoutMs(config: VoiceCallConfig): number {
|
||||
// Outbound calls need at least the ring timeout plus grace, but never less
|
||||
// than the baseline gateway operation budget.
|
||||
return Math.max(
|
||||
VOICE_CALL_GATEWAY_OPERATION_TIMEOUT_MS,
|
||||
addTimerTimeoutGraceMs(config.ringTimeoutMs) ?? 1,
|
||||
@@ -147,6 +153,8 @@ function resolveGatewayOperationTimeoutMs(config: VoiceCallConfig): number {
|
||||
}
|
||||
|
||||
function resolveGatewayContinueTimeoutMs(config: VoiceCallConfig): number {
|
||||
// Continue waits for playback, caller transcript, and a buffer for gateway
|
||||
// async-operation polling.
|
||||
return (
|
||||
clampTimerTimeoutMs(
|
||||
config.transcriptTimeoutMs +
|
||||
@@ -173,6 +181,8 @@ function readGatewayOperationId(payload: unknown): string {
|
||||
|
||||
function readGatewayPollTimeoutMs(payload: unknown, fallbackTimeoutMs: number): number {
|
||||
if (isRecord(payload) && typeof payload.pollTimeoutMs === "number") {
|
||||
// The gateway can return a dynamic poll budget; clamp it before using it as
|
||||
// a client-side deadline.
|
||||
return clampTimerTimeoutMs(payload.pollTimeoutMs) ?? fallbackTimeoutMs;
|
||||
}
|
||||
return fallbackTimeoutMs;
|
||||
|
||||
@@ -2,11 +2,15 @@ import { asOptionalRecord, readStringField } from "openclaw/plugin-sdk/string-co
|
||||
import type { VoiceCallConfig } from "./config.js";
|
||||
import { VoiceCallConfigSchema } from "./config.js";
|
||||
|
||||
/** Release where doctor-only legacy voice-call config support is scheduled for removal. */
|
||||
export const VOICE_CALL_LEGACY_CONFIG_REMOVAL_VERSION = "2026.6.0";
|
||||
|
||||
type VoiceCallLegacyConfigIssue = {
|
||||
/** Legacy config path relative to the voice-call plugin config object. */
|
||||
path: string;
|
||||
/** Canonical path or object that replaces the legacy key. */
|
||||
replacement: string;
|
||||
/** Operator-facing explanation shown in warnings and doctor output. */
|
||||
message: string;
|
||||
};
|
||||
|
||||
@@ -38,6 +42,7 @@ function mergeProviderConfig(
|
||||
};
|
||||
}
|
||||
|
||||
/** Collects legacy voice-call config keys that runtime load accepts only through doctor migration. */
|
||||
export function collectVoiceCallLegacyConfigIssues(value: unknown): VoiceCallLegacyConfigIssue[] {
|
||||
const raw = asObject(value) ?? {};
|
||||
const realtime = asObject(raw.realtime);
|
||||
@@ -107,9 +112,13 @@ export function collectVoiceCallLegacyConfigIssues(value: unknown): VoiceCallLeg
|
||||
return issues;
|
||||
}
|
||||
|
||||
/** Formats legacy-config warnings with the exact doctor command operators should run. */
|
||||
export function formatVoiceCallLegacyConfigWarnings(params: {
|
||||
/** Raw voice-call plugin config value to inspect. */
|
||||
value: unknown;
|
||||
/** Fully qualified config path shown in warning lines. */
|
||||
configPathPrefix: string;
|
||||
/** Exact command operators can run to rewrite legacy keys. */
|
||||
doctorFixCommand: string;
|
||||
}): string[] {
|
||||
const issues = collectVoiceCallLegacyConfigIssues(params.value);
|
||||
@@ -125,12 +134,18 @@ export function formatVoiceCallLegacyConfigWarnings(params: {
|
||||
];
|
||||
}
|
||||
|
||||
/** Migrates the retired voice-call config shape into the canonical schema input. */
|
||||
export function migrateVoiceCallLegacyConfigInput(params: {
|
||||
/** Raw voice-call plugin config value before schema parsing. */
|
||||
value: unknown;
|
||||
/** Fully qualified config path used when reporting change lines. */
|
||||
configPathPrefix?: string;
|
||||
}): {
|
||||
/** Canonical config-shaped object suitable for VoiceCallConfigSchema parsing. */
|
||||
config: Record<string, unknown>;
|
||||
/** Doctor-style change log describing every rewrite/removal applied. */
|
||||
changes: string[];
|
||||
/** Legacy issues detected before migration, for warnings and removal planning. */
|
||||
issues: VoiceCallLegacyConfigIssue[];
|
||||
} {
|
||||
const raw = asObject(params.value) ?? {};
|
||||
@@ -165,6 +180,7 @@ export function migrateVoiceCallLegacyConfigInput(params: {
|
||||
? {
|
||||
...streaming,
|
||||
provider: streamingProvider ?? legacyStreamingProvider,
|
||||
// Legacy top-level STT knobs now live under the OpenAI streaming provider config.
|
||||
providers: mergeProviderConfig(streaming.providers, "openai", legacyStreamingOpenAICompat),
|
||||
}
|
||||
: undefined;
|
||||
@@ -254,10 +270,12 @@ export function migrateVoiceCallLegacyConfigInput(params: {
|
||||
return { config, changes, issues };
|
||||
}
|
||||
|
||||
/** Returns only the migrated config object for callers that do not need issue/change details. */
|
||||
export function normalizeVoiceCallLegacyConfigInput(value: unknown): Record<string, unknown> {
|
||||
return migrateVoiceCallLegacyConfigInput({ value }).config;
|
||||
}
|
||||
|
||||
/** Parses voice-call plugin config after applying the bounded legacy migration. */
|
||||
export function parseVoiceCallPluginConfig(value: unknown): VoiceCallConfig {
|
||||
return VoiceCallConfigSchema.parse(normalizeVoiceCallLegacyConfigInput(value));
|
||||
}
|
||||
|
||||
@@ -503,8 +503,11 @@ export const VoiceCallConfigSchema = z
|
||||
.strict();
|
||||
|
||||
export type VoiceCallConfig = z.infer<typeof VoiceCallConfigSchema>;
|
||||
/** Voice-call config after applying an optional per-number inbound route override. */
|
||||
export type VoiceCallEffectiveConfigResult = {
|
||||
/** Effective config for the call, with route overrides merged when matched. */
|
||||
config: VoiceCallConfig;
|
||||
/** Canonical configured phone route key that matched the caller/dialed number. */
|
||||
numberRouteKey?: string;
|
||||
};
|
||||
type DeepPartial<T> = T extends SecretInput
|
||||
@@ -514,13 +517,10 @@ type DeepPartial<T> = T extends SecretInput
|
||||
: T extends object
|
||||
? { [K in keyof T]?: DeepPartial<T[K]> }
|
||||
: T;
|
||||
/** Partial config shape accepted at plugin boundaries before defaults and env fallbacks apply. */
|
||||
export type VoiceCallConfigInput = DeepPartial<VoiceCallConfig>;
|
||||
const TWILIO_AUTH_TOKEN_PATH = "plugins.entries.voice-call.config.twilio.authToken";
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Configuration Helpers
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
const DEFAULT_VOICE_CALL_CONFIG = VoiceCallConfigSchema.parse({});
|
||||
|
||||
function cloneDefaultVoiceCallConfig(): VoiceCallConfig {
|
||||
@@ -542,6 +542,8 @@ function normalizeWebhookLikePath(pathname: string): string {
|
||||
function defaultRealtimeStreamPathForServePath(servePath: string): string {
|
||||
const normalized = normalizeWebhookLikePath(servePath);
|
||||
if (normalized.endsWith("/webhook")) {
|
||||
// Keep the realtime route next to the webhook route so reverse-proxy rules
|
||||
// for custom voice paths can forward both HTTP callbacks and WS upgrades.
|
||||
return `${normalized.slice(0, -"/webhook".length)}/stream/realtime`;
|
||||
}
|
||||
if (normalized === "/") {
|
||||
@@ -558,6 +560,8 @@ function normalizeVoiceCallTtsConfig(
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// TTS route overrides are partial by design; preserve global provider knobs
|
||||
// while letting per-number routes replace only the nested fields they own.
|
||||
return TtsConfigSchema.parse(deepMergeDefined(defaults ?? {}, overrides ?? {}));
|
||||
}
|
||||
|
||||
@@ -565,6 +569,7 @@ function normalizePhoneRouteKey(phone: string | undefined): string {
|
||||
return phone?.replace(/\D/g, "") ?? "";
|
||||
}
|
||||
|
||||
/** Resolves the canonical per-number route key for exact or normalized phone input. */
|
||||
export function resolveVoiceCallNumberRouteKey(
|
||||
config: Pick<VoiceCallConfig, "numbers">,
|
||||
phone: string | undefined,
|
||||
@@ -577,6 +582,8 @@ export function resolveVoiceCallNumberRouteKey(
|
||||
return phone;
|
||||
}
|
||||
|
||||
// Config keys are E.164, but callers can arrive with formatted phone text.
|
||||
// Normalize only for lookup; keep the canonical configured route key in the result.
|
||||
const normalizedPhone = normalizePhoneRouteKey(phone);
|
||||
if (!normalizedPhone) {
|
||||
return undefined;
|
||||
@@ -586,6 +593,7 @@ export function resolveVoiceCallNumberRouteKey(
|
||||
);
|
||||
}
|
||||
|
||||
/** Applies per-number route overrides while preserving global route registry and TTS defaults. */
|
||||
export function resolveVoiceCallEffectiveConfig(
|
||||
config: VoiceCallConfig,
|
||||
phoneOrRouteKey: string | undefined,
|
||||
@@ -637,6 +645,7 @@ function sanitizeVoiceCallNumberRoutes(
|
||||
);
|
||||
}
|
||||
|
||||
/** Resolves Twilio auth tokens from SecretInput while preserving clear config-path errors. */
|
||||
export function resolveTwilioAuthToken(
|
||||
config: Pick<VoiceCallConfig, "twilio">,
|
||||
): string | undefined {
|
||||
@@ -646,6 +655,7 @@ export function resolveTwilioAuthToken(
|
||||
});
|
||||
}
|
||||
|
||||
/** Normalizes partial voice-call config by applying nested defaults that Zod cannot infer alone. */
|
||||
export function normalizeVoiceCallConfig(config: VoiceCallConfigInput): VoiceCallConfig {
|
||||
const defaults = cloneDefaultVoiceCallConfig();
|
||||
const serve = { ...defaults.serve, ...config.serve };
|
||||
@@ -667,6 +677,8 @@ export function normalizeVoiceCallConfig(config: VoiceCallConfigInput): VoiceCal
|
||||
...config.realtime?.agentContext,
|
||||
files: config.realtime?.agentContext?.files ?? defaults.realtime.agentContext.files,
|
||||
};
|
||||
// Zod defaults only apply to complete subtrees. Normalize here so callers can
|
||||
// provide partial nested config without losing defaults from sibling fields.
|
||||
return {
|
||||
...defaults,
|
||||
...config,
|
||||
@@ -697,6 +709,8 @@ export function normalizeVoiceCallConfig(config: VoiceCallConfigInput): VoiceCal
|
||||
provider: realtimeProvider,
|
||||
streamPath:
|
||||
config.realtime?.streamPath ??
|
||||
// Realtime stream defaults depend on the normalized serve path, not the
|
||||
// schema default, because callers can override serve.path with partial config.
|
||||
defaultRealtimeStreamPathForServePath(serve.path ?? defaults.serve.path),
|
||||
tools:
|
||||
(config.realtime?.tools as RealtimeToolConfig[] | undefined) ?? defaults.realtime.tools,
|
||||
@@ -712,6 +726,7 @@ export function normalizeVoiceCallConfig(config: VoiceCallConfigInput): VoiceCal
|
||||
};
|
||||
}
|
||||
|
||||
/** Builds the memory/session key for voice conversations based on configured session scope. */
|
||||
export function resolveVoiceCallSessionKey(params: {
|
||||
config: Pick<VoiceCallConfig, "sessionScope">;
|
||||
callId: string;
|
||||
@@ -726,13 +741,12 @@ export function resolveVoiceCallSessionKey(params: {
|
||||
return `voice:call:${params.callId}`;
|
||||
}
|
||||
const normalizedPhone = params.phone?.replace(/\D/g, "");
|
||||
// Per-phone scope intentionally strips formatting so the same caller keeps
|
||||
// one memory thread across inbound/outbound formatting differences.
|
||||
return normalizedPhone ? `voice:${normalizedPhone}` : `voice:${params.callId}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves the configuration by merging environment variables into missing fields.
|
||||
* Returns a new configuration object with environment variables applied.
|
||||
*/
|
||||
/** Resolves config defaults plus provider environment fallbacks into the canonical runtime shape. */
|
||||
export function resolveVoiceCallConfig(config: VoiceCallConfigInput): VoiceCallConfig {
|
||||
const resolved = normalizeVoiceCallConfig(config);
|
||||
|
||||
@@ -783,9 +797,7 @@ export function resolveVoiceCallConfig(config: VoiceCallConfigInput): VoiceCallC
|
||||
return normalizeVoiceCallConfig(resolved);
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate that the configuration has all required fields for the selected provider.
|
||||
*/
|
||||
/** Validates provider credentials and incompatible realtime/streaming policy combinations. */
|
||||
export function validateProviderConfig(config: VoiceCallConfig): {
|
||||
valid: boolean;
|
||||
errors: string[];
|
||||
@@ -858,6 +870,8 @@ export function validateProviderConfig(config: VoiceCallConfig): {
|
||||
);
|
||||
}
|
||||
|
||||
// Realtime and streaming both own the live audio WebSocket path; allowing both
|
||||
// would create two competing handlers for a single telephony media stream.
|
||||
if (config.realtime.enabled && config.streaming.enabled) {
|
||||
errors.push(
|
||||
"plugins.entries.voice-call.config.realtime.enabled and plugins.entries.voice-call.config.streaming.enabled cannot both be true",
|
||||
|
||||
@@ -2,13 +2,16 @@ import type { OpenClawPluginApi } from "../api.js";
|
||||
import type { VoiceCallTtsConfig } from "./config.js";
|
||||
|
||||
export type CoreConfig = {
|
||||
/** Core session config used to locate persisted voice response sessions. */
|
||||
session?: {
|
||||
store?: string;
|
||||
};
|
||||
/** Core TTS config that voice-call can merge with route-specific overrides. */
|
||||
messages?: {
|
||||
tts?: VoiceCallTtsConfig;
|
||||
};
|
||||
[key: string]: unknown;
|
||||
};
|
||||
|
||||
/** Agent runtime capabilities injected from the host OpenClaw plugin API. */
|
||||
export type CoreAgentDeps = OpenClawPluginApi["runtime"]["agent"];
|
||||
|
||||
@@ -2,6 +2,7 @@ import { isRecord as isPlainObject } from "openclaw/plugin-sdk/string-coerce-run
|
||||
|
||||
const BLOCKED_MERGE_KEYS = new Set(["__proto__", "prototype", "constructor"]);
|
||||
|
||||
/** Deep-merges plain config objects while treating undefined overrides as "leave base intact". */
|
||||
export function deepMergeDefined(base: unknown, override: unknown): unknown {
|
||||
if (!isPlainObject(base) || !isPlainObject(override)) {
|
||||
return override === undefined ? base : override;
|
||||
@@ -9,6 +10,7 @@ export function deepMergeDefined(base: unknown, override: unknown): unknown {
|
||||
|
||||
const result: Record<string, unknown> = { ...base };
|
||||
for (const [key, value] of Object.entries(override)) {
|
||||
// Config merges can consume user-authored objects, so skip prototype keys before recursion.
|
||||
if (BLOCKED_MERGE_KEYS.has(key) || value === undefined) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -65,13 +65,23 @@ type VoiceCallContinueOperationRequest = {
|
||||
message: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Creates a short-lived async operation store for gateway-driven continue-call requests.
|
||||
*
|
||||
* `start` returns an operation id immediately while the call continues in the
|
||||
* background; `read` returns pending state or consumes one terminal result.
|
||||
*/
|
||||
export function createVoiceCallContinueOperationStore(params: {
|
||||
/** Resolved voice-call config used as fallback for transcript and TTS polling windows. */
|
||||
config: VoiceCallConfig;
|
||||
/** Core config fallback for global TTS timeout defaults. */
|
||||
coreConfig: CoreConfig;
|
||||
}) {
|
||||
const operations = new Map<string, VoiceCallContinueOperation>();
|
||||
|
||||
const resolvePollTimeoutMs = (rt: VoiceCallRuntime): number => {
|
||||
// The client waits for both assistant transcript generation and TTS playback
|
||||
// preparation, plus a buffer for provider webhook latency.
|
||||
const ttsTimeoutMs =
|
||||
rt.config.tts?.timeoutMs ??
|
||||
params.config.tts?.timeoutMs ??
|
||||
@@ -86,12 +96,15 @@ export function createVoiceCallContinueOperationStore(params: {
|
||||
};
|
||||
|
||||
const scheduleCleanup = (operationId: string) => {
|
||||
// Completed operations are readable once, but still get a delayed cleanup in
|
||||
// case the caller disconnects before polling the terminal state.
|
||||
const timer = setTimeout(() => {
|
||||
operations.delete(operationId);
|
||||
}, VOICE_CALL_CONTINUE_OPERATION_CLEANUP_MS);
|
||||
timer.unref?.();
|
||||
};
|
||||
|
||||
/** Starts an async continue-call operation and returns the poll token plus timeout budget. */
|
||||
const start = (
|
||||
request: VoiceCallContinueOperationRequest,
|
||||
): VoiceCallContinueOperationStartPayload => {
|
||||
@@ -110,6 +123,8 @@ export function createVoiceCallContinueOperationStore(params: {
|
||||
.continueCall(request.callId, request.message)
|
||||
.then((result) => {
|
||||
const current = operations.get(operationId);
|
||||
// A poller may have consumed or cleanup may have removed the operation
|
||||
// before the async continue call resolves.
|
||||
if (!current || current.status !== "pending") {
|
||||
return;
|
||||
}
|
||||
@@ -157,6 +172,7 @@ export function createVoiceCallContinueOperationStore(params: {
|
||||
return { operationId, status: "pending", pollTimeoutMs };
|
||||
};
|
||||
|
||||
/** Reads an operation state; completed/failed operations are removed after this call. */
|
||||
const read = (
|
||||
operationId: string,
|
||||
):
|
||||
@@ -177,6 +193,8 @@ export function createVoiceCallContinueOperationStore(params: {
|
||||
};
|
||||
}
|
||||
if (operation.status === "failed") {
|
||||
// Terminal states are single-consume so repeated polls cannot replay stale
|
||||
// call results after the gateway has already returned them.
|
||||
operations.delete(operationId);
|
||||
return {
|
||||
ok: true,
|
||||
|
||||
@@ -2,6 +2,7 @@ import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/string-coer
|
||||
|
||||
type HttpHeaderMap = Record<string, string | string[] | undefined>;
|
||||
|
||||
/** Reads one HTTP header case-insensitively, using the first value for multi-value headers. */
|
||||
export function getHeader(headers: HttpHeaderMap, name: string): string | undefined {
|
||||
const target = normalizeLowercaseStringOrEmpty(name);
|
||||
const direct = headers[target];
|
||||
|
||||
@@ -23,6 +23,7 @@ import type {
|
||||
WebhookVerificationResult,
|
||||
} from "./types.js";
|
||||
|
||||
/** In-memory provider double that records call-control side effects for manager tests. */
|
||||
export class FakeProvider implements VoiceCallProvider {
|
||||
readonly name: "plivo" | "twilio" | "telnyx";
|
||||
twilioStreamConnectEnabled = true;
|
||||
@@ -73,10 +74,12 @@ export class FakeProvider implements VoiceCallProvider {
|
||||
}
|
||||
}
|
||||
|
||||
/** Create an isolated temp directory for voice-call state tests. */
|
||||
export function createTestStorePath(): string {
|
||||
return fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-voice-call-test-"));
|
||||
}
|
||||
|
||||
/** Install the synchronous plugin-state runtime used by voice-call manager tests. */
|
||||
export function installVoiceCallStateRuntimeForTests(): void {
|
||||
if (getOptionalVoiceCallStateRuntime()) {
|
||||
return;
|
||||
@@ -96,6 +99,7 @@ export function installVoiceCallStateRuntimeForTests(): void {
|
||||
});
|
||||
}
|
||||
|
||||
/** Build and initialize a CallManager with an isolated store and fake provider. */
|
||||
export async function createManagerHarness(
|
||||
configOverrides: Record<string, unknown> = {},
|
||||
provider = new FakeProvider(),
|
||||
@@ -115,6 +119,7 @@ export async function createManagerHarness(
|
||||
return { manager, provider };
|
||||
}
|
||||
|
||||
/** Drive the manager through a provider answered event for an existing call. */
|
||||
export function markCallAnswered(manager: CallManager, callId: string, eventId: string): void {
|
||||
manager.processEvent({
|
||||
id: eventId,
|
||||
@@ -125,6 +130,7 @@ export function markCallAnswered(manager: CallManager, callId: string, eventId:
|
||||
});
|
||||
}
|
||||
|
||||
/** Persist canonical call snapshots into the plugin-state store for restore tests. */
|
||||
export function writeCallsToStore(storePath: string, calls: Record<string, unknown>[]): void {
|
||||
fs.mkdirSync(storePath, { recursive: true });
|
||||
for (const call of calls) {
|
||||
@@ -132,6 +138,7 @@ export function writeCallsToStore(storePath: string, calls: Record<string, unkno
|
||||
}
|
||||
}
|
||||
|
||||
/** Write retired JSONL call records for tests that prove runtime ignores legacy logs. */
|
||||
export function writeLegacyCallsJsonl(storePath: string, calls: Record<string, unknown>[]): void {
|
||||
fs.mkdirSync(storePath, { recursive: true });
|
||||
const logPath = path.join(storePath, "calls.jsonl");
|
||||
@@ -139,6 +146,7 @@ export function writeLegacyCallsJsonl(storePath: string, calls: Record<string, u
|
||||
fs.writeFileSync(logPath, lines);
|
||||
}
|
||||
|
||||
/** Produce a schema-shaped persisted call with override hooks for restore fixtures. */
|
||||
export function makePersistedCall(
|
||||
overrides: Record<string, unknown> = {},
|
||||
): Record<string, unknown> {
|
||||
|
||||
@@ -117,7 +117,8 @@ export class CallManager {
|
||||
const verified = await this.verifyRestoredCalls(provider, persisted.activeCalls);
|
||||
this.activeCalls = verified;
|
||||
|
||||
// Rebuild providerCallIdMap from verified calls only
|
||||
// Only verified calls are addressable by provider id after restart; skipped
|
||||
// persisted records must not receive future webhook events.
|
||||
this.providerCallIdMap = new Map();
|
||||
for (const [callId, call] of verified) {
|
||||
if (call.providerCallId) {
|
||||
@@ -125,14 +126,14 @@ export class CallManager {
|
||||
}
|
||||
}
|
||||
|
||||
// Restart max-duration timers for restored calls that are past the answered state
|
||||
// Restore only the remaining duration. Calls whose answered window already
|
||||
// elapsed are dropped because a timer scheduled at 0ms races startup state.
|
||||
let skippedAlreadyElapsedTimers = 0;
|
||||
for (const [callId, call] of verified) {
|
||||
if (call.answeredAt && !TerminalStates.has(call.state)) {
|
||||
const elapsed = Date.now() - call.answeredAt;
|
||||
const maxDurationMs = resolveVoiceCallSecondsTimerDelayMs(this.config.maxDurationSeconds);
|
||||
if (elapsed >= maxDurationMs) {
|
||||
// Already expired — remove instead of keeping
|
||||
verified.delete(callId);
|
||||
if (call.providerCallId) {
|
||||
this.providerCallIdMap.delete(call.providerCallId);
|
||||
@@ -187,13 +188,15 @@ export class CallManager {
|
||||
let keptVerificationFailures = 0;
|
||||
|
||||
for (const [callId, call] of candidates) {
|
||||
// Skip calls without a provider ID — can't verify
|
||||
// Without a provider id there is no remote state to verify, so restoring
|
||||
// would create a local-only call that can never receive carrier events.
|
||||
if (!call.providerCallId) {
|
||||
skippedNoProviderCallId += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip calls older than maxDurationSeconds (time-based fallback)
|
||||
// Age is the local fallback when provider state is unavailable or stale;
|
||||
// persist timeout locally and make a best-effort remote hangup.
|
||||
if (now - call.startedAt > maxAgeMs) {
|
||||
skippedOlderThanMaxDuration += 1;
|
||||
markRestoredCallSkipped(call, "timeout");
|
||||
@@ -232,7 +235,8 @@ export class CallManager {
|
||||
}
|
||||
})
|
||||
.catch(() => {
|
||||
// Verification failed entirely — keep the call, rely on timer
|
||||
// Treat verification failure like an unknown provider status: the
|
||||
// restored max-duration timer remains the safety net.
|
||||
keptVerificationFailures += 1;
|
||||
verified.set(callId, call);
|
||||
}),
|
||||
|
||||
@@ -10,8 +10,10 @@ type TranscriptWaiter = {
|
||||
};
|
||||
|
||||
type CallManagerRuntimeState = {
|
||||
/** Live call records and provider-id indexes that survive across manager helper calls. */
|
||||
activeCalls: Map<CallId, CallRecord>;
|
||||
providerCallIdMap: Map<string, CallId>;
|
||||
/** Provider event IDs already applied; webhook retries must not re-run side effects. */
|
||||
processedEventIds: Set<string>;
|
||||
/** Provider call IDs we already sent a reject hangup for; avoids duplicate hangup calls. */
|
||||
rejectedProviderCallIds: Set<string>;
|
||||
@@ -25,12 +27,17 @@ type CallManagerRuntimeDeps = {
|
||||
};
|
||||
|
||||
type CallManagerTransientState = {
|
||||
/** Calls currently executing an agent turn; drives overlap suppression for voice loops. */
|
||||
activeTurnCalls: Set<CallId>;
|
||||
/** Pending transcript waits keyed by call; process-local and intentionally not persisted. */
|
||||
transcriptWaiters: Map<CallId, TranscriptWaiter>;
|
||||
/** Provider-independent call duration deadlines; restored calls rebuild these from persisted age. */
|
||||
maxDurationTimers: Map<CallId, NodeJS.Timeout>;
|
||||
/** Outbound initial messages already started; prevents duplicate playback on callback races. */
|
||||
initialMessageInFlight: Set<CallId>;
|
||||
};
|
||||
|
||||
/** Issues short-lived media stream credentials for providers that connect by websocket. */
|
||||
export type StreamSessionIssuer = (request: {
|
||||
providerName: "twilio" | "telnyx";
|
||||
callId: CallId;
|
||||
@@ -44,6 +51,7 @@ type CallManagerHooks = {
|
||||
streamSessionIssuer?: StreamSessionIssuer;
|
||||
};
|
||||
|
||||
/** Shared dependency bag passed to pure call-manager helpers instead of binding to the class. */
|
||||
export type CallManagerContext = CallManagerRuntimeState &
|
||||
CallManagerRuntimeDeps &
|
||||
CallManagerTransientState &
|
||||
|
||||
@@ -58,6 +58,10 @@ function shouldAcceptInbound(config: EventContext["config"], from: string | unde
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a local call record for provider webhooks that arrive before local state exists.
|
||||
* This covers inbound PSTN calls and externally-created provider calls pointed at this webhook.
|
||||
*/
|
||||
function createWebhookCall(params: {
|
||||
ctx: EventContext;
|
||||
providerCallId: string;
|
||||
@@ -107,6 +111,10 @@ function createWebhookCall(params: {
|
||||
return callRecord;
|
||||
}
|
||||
|
||||
/**
|
||||
* Persists a terminal snapshot for an inbound call rejected before it becomes active.
|
||||
* The durable processed-event id keeps redelivery from repeatedly applying policy side effects.
|
||||
*/
|
||||
function persistRejectedInboundCall(params: {
|
||||
ctx: EventContext;
|
||||
event: NormalizedEvent;
|
||||
@@ -115,6 +123,8 @@ function persistRejectedInboundCall(params: {
|
||||
}): void {
|
||||
const callId = params.event.callId || params.providerCallId;
|
||||
const now = Date.now();
|
||||
// Rejections are persisted as terminal snapshots even though they never enter
|
||||
// activeCalls, so replay recovery keeps the dedupe key and policy decision.
|
||||
const rejectedCall: CallRecord = {
|
||||
callId,
|
||||
providerCallId: params.providerCallId,
|
||||
@@ -133,6 +143,13 @@ function persistRejectedInboundCall(params: {
|
||||
persistCallRecord(params.ctx.storePath, rejectedCall);
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies one normalized provider event to active call state with replay dedupe.
|
||||
*
|
||||
* Unknown calls may be registered from webhook payloads, blocked inbound calls
|
||||
* are persisted as terminal snapshots, and retryable errors deliberately keep
|
||||
* their replay key uncommitted so a later delivery can still recover the call.
|
||||
*/
|
||||
export function processEvent(ctx: EventContext, event: NormalizedEvent): void {
|
||||
const dedupeKey = event.dedupeKey || event.id;
|
||||
if (ctx.processedEventIds.has(dedupeKey)) {
|
||||
@@ -167,6 +184,8 @@ export function processEvent(ctx: EventContext, event: NormalizedEvent): void {
|
||||
if (ctx.rejectedProviderCallIds.has(pid)) {
|
||||
return;
|
||||
}
|
||||
// Track rejected provider IDs separately from processed event IDs because
|
||||
// carriers can emit multiple event ids for the same blocked call.
|
||||
ctx.rejectedProviderCallIds.add(pid);
|
||||
const callId = event.callId ?? pid;
|
||||
persistRejectedInboundCall({ ctx, event, dedupeKey, providerCallId: pid });
|
||||
@@ -198,6 +217,8 @@ export function processEvent(ctx: EventContext, event: NormalizedEvent): void {
|
||||
}
|
||||
|
||||
if (!call) {
|
||||
// Do not burn the replay key. Some providers can deliver status callbacks
|
||||
// before the create/answer event that registers the call.
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -208,11 +229,14 @@ export function processEvent(ctx: EventContext, event: NormalizedEvent): void {
|
||||
if (previousProviderCallId) {
|
||||
const mapped = ctx.providerCallIdMap.get(previousProviderCallId);
|
||||
if (mapped === call.callId) {
|
||||
// Providers can replace request ids with stable call ids; drop only our stale mapping.
|
||||
ctx.providerCallIdMap.delete(previousProviderCallId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Retryable errors are observations, not terminal decisions; keep their
|
||||
// replay keys reusable so a redelivery can still advance the call.
|
||||
const shouldCommitReplayKey = !(event.type === "call.error" && event.retryable);
|
||||
if (shouldCommitReplayKey) {
|
||||
ctx.processedEventIds.add(dedupeKey);
|
||||
@@ -289,6 +313,7 @@ export function processEvent(ctx: EventContext, event: NormalizedEvent): void {
|
||||
event.turnToken,
|
||||
);
|
||||
if (hadWaiter && !resolved) {
|
||||
// Keep a mismatched turn-token transcript out of both waiters and durable history.
|
||||
console.warn(
|
||||
`[voice-call] Ignoring speech event with mismatched turn token for ${call.callId}`,
|
||||
);
|
||||
|
||||
56
extensions/voice-call/src/manager/lifecycle.test.ts
Normal file
56
extensions/voice-call/src/manager/lifecycle.test.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
|
||||
const { persistCallRecordMock } = vi.hoisted(() => ({
|
||||
persistCallRecordMock: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock("./store.js", () => ({
|
||||
persistCallRecord: persistCallRecordMock,
|
||||
}));
|
||||
|
||||
import type { CallRecord } from "../types.js";
|
||||
import { finalizeCall } from "./lifecycle.js";
|
||||
|
||||
function createCall(overrides: Partial<CallRecord> = {}): CallRecord {
|
||||
return {
|
||||
callId: "call-1",
|
||||
providerCallId: "provider-1",
|
||||
provider: "twilio",
|
||||
direction: "outbound",
|
||||
state: "active",
|
||||
from: "+15550000000",
|
||||
to: "+15550000001",
|
||||
startedAt: 1,
|
||||
transcript: [],
|
||||
processedEventIds: [],
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe("voice-call manager lifecycle", () => {
|
||||
it("finalizes calls without removing provider ids owned by repaired calls", () => {
|
||||
const call = createCall();
|
||||
const activeCalls = new Map([["call-1", call]]);
|
||||
const providerCallIdMap = new Map([["provider-1", "call-2"]]);
|
||||
|
||||
finalizeCall({
|
||||
ctx: {
|
||||
activeCalls,
|
||||
providerCallIdMap,
|
||||
storePath: "/tmp/voice-call",
|
||||
},
|
||||
call,
|
||||
endReason: "completed",
|
||||
endedAt: 42,
|
||||
});
|
||||
|
||||
expect(call).toMatchObject({
|
||||
state: "completed",
|
||||
endReason: "completed",
|
||||
endedAt: 42,
|
||||
});
|
||||
expect(activeCalls.has("call-1")).toBe(false);
|
||||
expect(providerCallIdMap.get("provider-1")).toBe("call-2");
|
||||
expect(persistCallRecordMock).toHaveBeenCalledWith("/tmp/voice-call", call);
|
||||
});
|
||||
});
|
||||
@@ -18,16 +18,24 @@ function removeProviderCallMapping(
|
||||
return;
|
||||
}
|
||||
const mappedCallId = providerCallIdMap.get(call.providerCallId);
|
||||
// Webhook repair can adopt or replace provider ids while stale call records
|
||||
// are still finalizing; only the call that owns the live map entry may delete it.
|
||||
if (mappedCallId === call.callId) {
|
||||
providerCallIdMap.delete(call.providerCallId);
|
||||
}
|
||||
}
|
||||
|
||||
/** Finalizes one call record, persists it, and clears transient timers/waiters. */
|
||||
export function finalizeCall(params: {
|
||||
/** Manager state maps and optional transient queues that own this call. */
|
||||
ctx: CallLifecycleContext;
|
||||
/** Active call record to mark terminal and remove from live indexes. */
|
||||
call: CallRecord;
|
||||
/** Terminal reason that also drives the call-state transition. */
|
||||
endReason: EndReason;
|
||||
/** Provider event timestamp; defaults to local wall time for local hangups. */
|
||||
endedAt?: number;
|
||||
/** Optional waiter error text when a pending transcript promise must be rejected. */
|
||||
transcriptRejectReason?: string;
|
||||
}): void {
|
||||
const { ctx, call, endReason } = params;
|
||||
@@ -37,6 +45,8 @@ export function finalizeCall(params: {
|
||||
transitionState(call, endReason);
|
||||
persistCallRecord(ctx.storePath, call);
|
||||
|
||||
// Timers and waiters are process-local state; clear them before dropping the
|
||||
// active call so late timeout/transcript callbacks cannot observe a dead call.
|
||||
if (ctx.maxDurationTimers) {
|
||||
clearMaxDurationTimer({ maxDurationTimers: ctx.maxDurationTimers }, call.callId);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import type { CallId, CallRecord } from "../types.js";
|
||||
|
||||
/** Resolves a provider call id through the fast map, then active-call state for restored calls. */
|
||||
export function getCallByProviderCallId(params: {
|
||||
activeCalls: Map<CallId, CallRecord>;
|
||||
providerCallIdMap: Map<string, CallId>;
|
||||
@@ -10,6 +11,7 @@ export function getCallByProviderCallId(params: {
|
||||
return params.activeCalls.get(callId);
|
||||
}
|
||||
|
||||
// Restored calls may predate the in-memory provider id map; scan active state as fallback.
|
||||
for (const call of params.activeCalls.values()) {
|
||||
if (call.providerCallId === params.providerCallId) {
|
||||
return call;
|
||||
@@ -18,6 +20,7 @@ export function getCallByProviderCallId(params: {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/** Finds a call by internal call id first, then by provider call id. */
|
||||
export function findCall(params: {
|
||||
activeCalls: Map<CallId, CallRecord>;
|
||||
providerCallIdMap: Map<string, CallId>;
|
||||
|
||||
@@ -94,6 +94,8 @@ function lookupConnectedCall(ctx: ConnectedCallContext, callId: CallId): Connect
|
||||
if (TerminalStates.has(call.state)) {
|
||||
return { kind: "ended", call };
|
||||
}
|
||||
// The ok branch carries providerCallId/provider together so callers cannot
|
||||
// accidentally hang up or play audio with a half-connected local record.
|
||||
return { kind: "ok", call, providerCallId: call.providerCallId, provider: ctx.provider };
|
||||
}
|
||||
|
||||
@@ -119,6 +121,13 @@ function validateDtmfDigits(digits: string): string | null {
|
||||
: "digits may only contain digits, *, #, comma, w, p";
|
||||
}
|
||||
|
||||
/**
|
||||
* Initiate an outbound call and register the local record before provider handoff.
|
||||
*
|
||||
* `options` accepts the current object shape plus the legacy string shorthand for
|
||||
* an initial message. Conversation-mode DTMF is validated before any call record
|
||||
* is created because the provider will execute those digits before webhook control returns.
|
||||
*/
|
||||
export async function initiateCall(
|
||||
ctx: InitiateContext,
|
||||
to: string,
|
||||
@@ -132,6 +141,8 @@ export async function initiateCall(
|
||||
const dtmfSequence = opts.dtmfSequence;
|
||||
const requesterSessionKey = opts.requesterSessionKey?.trim();
|
||||
if (dtmfSequence) {
|
||||
// Pre-connect DTMF only makes sense for conversation calls because the
|
||||
// redirect returns control to the webhook for the live exchange.
|
||||
const validationError = validateDtmfDigits(dtmfSequence);
|
||||
if (validationError) {
|
||||
return { callId: "", success: false, error: validationError };
|
||||
@@ -209,6 +220,8 @@ export async function initiateCall(
|
||||
}
|
||||
|
||||
const streamSession =
|
||||
// Telnyx streaming authenticates with a per-call token; include it only
|
||||
// when realtime is enabled and the provider can consume stream URLs.
|
||||
ctx.config.realtime?.enabled && ctx.provider.name === "telnyx" && ctx.streamSessionIssuer
|
||||
? ctx.streamSessionIssuer({
|
||||
providerName: "telnyx",
|
||||
@@ -254,6 +267,12 @@ export async function initiateCall(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Speak TTS into a connected call and append the spoken text to the transcript.
|
||||
*
|
||||
* The active number route selects the TTS voice, so transferred/restored calls
|
||||
* keep route-specific speech settings even after provider callback handoff.
|
||||
*/
|
||||
export async function speak(
|
||||
ctx: SpeakContext,
|
||||
callId: CallId,
|
||||
@@ -303,9 +322,15 @@ function shouldStartListeningAfterInitialMessage(ctx: ConversationContext): bool
|
||||
const streamAwareProvider = ctx.provider as typeof ctx.provider & {
|
||||
isConversationStreamConnectEnabled?: () => boolean;
|
||||
};
|
||||
// Twilio's stream-connect mode begins listening from the webhook path; issuing
|
||||
// a second startListening call here would duplicate media stream setup.
|
||||
return streamAwareProvider.isConversationStreamConnectEnabled?.() !== true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Send outbound DTMF digits through providers that expose live DTMF support.
|
||||
* Returns a typed failure instead of throwing for unsupported providers or invalid digits.
|
||||
*/
|
||||
export async function sendDtmf(
|
||||
ctx: SpeakContext,
|
||||
callId: CallId,
|
||||
@@ -335,6 +360,10 @@ export async function sendDtmf(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Plays the one-shot initial message, then enters notify hangup or conversation listening mode.
|
||||
* Provider callbacks may race at answer/stream-connect time; this helper owns the in-flight guard.
|
||||
*/
|
||||
export async function speakInitialMessage(
|
||||
ctx: ConversationContext,
|
||||
providerCallId: string,
|
||||
@@ -363,6 +392,8 @@ export async function speakInitialMessage(
|
||||
);
|
||||
return;
|
||||
}
|
||||
// Answered and media-stream connected callbacks can both attempt startup
|
||||
// speech; keep one playback active so the caller does not hear duplicates.
|
||||
ctx.initialMessageInFlight.add(call.callId);
|
||||
|
||||
try {
|
||||
@@ -383,6 +414,8 @@ export async function speakInitialMessage(
|
||||
const delaySec = ctx.config.outbound.notifyHangupDelaySec;
|
||||
const delayMs = resolveVoiceCallSecondsTimerDelayMs(delaySec, 0);
|
||||
console.log(`[voice-call] Notify mode: auto-hangup in ${delaySec}s for call ${call.callId}`);
|
||||
// Notify hangup is intentionally not a max-duration timer; it is a short
|
||||
// post-message grace period and rechecks active state before ending.
|
||||
setTimeout(() => {
|
||||
void (async () => {
|
||||
const currentCall = ctx.activeCalls.get(call.callId);
|
||||
@@ -409,6 +442,10 @@ export async function speakInitialMessage(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Speak a prompt, collect the caller's final transcript, and record turn latency.
|
||||
* Only one active turn per call is allowed because transcript waiters are keyed by call id.
|
||||
*/
|
||||
export async function continueCall(
|
||||
ctx: ConversationContext,
|
||||
callId: CallId,
|
||||
@@ -426,6 +463,8 @@ export async function continueCall(
|
||||
ctx.activeTurnCalls.add(callId);
|
||||
|
||||
const turnStartedAt = Date.now();
|
||||
// Twilio needs a turn token to ignore stale final transcripts from earlier
|
||||
// listen windows; other providers already scope transcripts by stream state.
|
||||
const turnToken = provider.name === "twilio" ? crypto.randomUUID() : undefined;
|
||||
|
||||
try {
|
||||
@@ -473,10 +512,16 @@ export async function continueCall(
|
||||
return { success: false, error: formatErrorMessage(err) };
|
||||
} finally {
|
||||
ctx.activeTurnCalls.delete(callId);
|
||||
// Always remove the waiter after a turn so a late provider callback cannot
|
||||
// resolve a promise belonging to the next user prompt.
|
||||
clearTranscriptWaiter(ctx, callId);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Hang up a connected call and finalize local state exactly once.
|
||||
* Already-terminal local records are treated as success to make repeated cleanup idempotent.
|
||||
*/
|
||||
export async function endCall(
|
||||
ctx: EndCallContext,
|
||||
callId: CallId,
|
||||
|
||||
@@ -11,6 +11,7 @@ const StateOrder: readonly CallState[] = [
|
||||
"listening",
|
||||
];
|
||||
|
||||
/** Applies monotonic call-state transitions while allowing speaking/listening turn cycles. */
|
||||
export function transitionState(call: CallRecord, newState: CallState): void {
|
||||
// No-op for same state or already terminal.
|
||||
if (call.state === newState || TerminalStates.has(call.state)) {
|
||||
@@ -37,6 +38,7 @@ export function transitionState(call: CallRecord, newState: CallState): void {
|
||||
}
|
||||
}
|
||||
|
||||
/** Appends a final transcript entry with a fresh timestamp. */
|
||||
export function addTranscriptEntry(call: CallRecord, speaker: "bot" | "user", text: string): void {
|
||||
const entry: TranscriptEntry = {
|
||||
timestamp: Date.now(),
|
||||
|
||||
@@ -4,13 +4,20 @@ import type { PluginStateSyncKeyedStore } from "openclaw/plugin-sdk/plugin-state
|
||||
import { getOptionalVoiceCallStateRuntime } from "../runtime-state.js";
|
||||
import { CallRecordSchema, TerminalStates, type CallId, type CallRecord } from "../types.js";
|
||||
|
||||
/** Keyed-store namespace for call snapshot metadata rows. */
|
||||
export const CALL_RECORD_EVENTS_NAMESPACE = "call-record-events";
|
||||
/** Keyed-store namespace for base64 chunks that hold serialized call snapshots. */
|
||||
export const CALL_RECORD_EVENT_CHUNKS_NAMESPACE = "call-record-event-chunks";
|
||||
/** Retain a bounded replay log of the newest call snapshots in plugin state. */
|
||||
export const MAX_CALL_RECORD_EVENTS = 1000;
|
||||
/** Metadata store capacity includes prune headroom so a write can land before old rows drop. */
|
||||
export const CALL_RECORD_EVENT_META_MAX_ENTRIES = MAX_CALL_RECORD_EVENTS + 100;
|
||||
/** Keep each persisted call within the per-record plugin-state chunk ceiling. */
|
||||
export const MAX_CHUNKS_PER_CALL_RECORD_EVENT = 48;
|
||||
/** Chunk store capacity covers all retained snapshots plus one in-flight over-capacity write. */
|
||||
export const CALL_RECORD_CHUNK_MAX_ENTRIES =
|
||||
MAX_CALL_RECORD_EVENTS * MAX_CHUNKS_PER_CALL_RECORD_EVENT + MAX_CHUNKS_PER_CALL_RECORD_EVENT;
|
||||
/** Raw bytes per chunk leave room for base64 overhead under keyed-store value limits. */
|
||||
export const RAW_CALL_RECORD_CHUNK_BYTES = 47 * 1024;
|
||||
let callRecordEventSequence = 0;
|
||||
|
||||
@@ -27,9 +34,13 @@ type CallRecordEventChunk = {
|
||||
};
|
||||
|
||||
export type PersistedCallRecord = {
|
||||
/** Parsed call snapshot payload. */
|
||||
call: CallRecord;
|
||||
/** Snapshot write time used to replay records in canonical order. */
|
||||
persistedAt: number;
|
||||
/** Same-millisecond tie-breaker assigned by this process. */
|
||||
sequence: number;
|
||||
/** Deterministic final tie-breaker for migrated or malformed metadata. */
|
||||
orderKey: string;
|
||||
};
|
||||
|
||||
@@ -38,6 +49,7 @@ type CallRecordStateStores = {
|
||||
chunks: PluginStateSyncKeyedStore<CallRecordEventChunk>;
|
||||
};
|
||||
|
||||
/** Resolves the retired JSONL call-log path without reading it during normal restore. */
|
||||
export function resolveVoiceCallLegacyCallLogPath(storePath: string): string {
|
||||
return path.join(storePath, "calls.jsonl");
|
||||
}
|
||||
@@ -79,12 +91,14 @@ function buildChunkKey(eventKey: string, index: number): string {
|
||||
return `${eventKey}:chunk:${String(index).padStart(4, "0")}`;
|
||||
}
|
||||
|
||||
/** Builds a deterministic key for old JSONL lines when migration tooling needs replay order. */
|
||||
export function buildVoiceCallLegacyJsonlEventKey(line: string, index: number): string {
|
||||
return `jsonl:${String(index).padStart(8, "0")}:${createHash("sha256").update(line).digest("hex")}`;
|
||||
}
|
||||
|
||||
function nextCallRecordOrder(): { persistedAt: number; sequence: number } {
|
||||
const sequence = callRecordEventSequence;
|
||||
// Sequence disambiguates multiple snapshots written in the same millisecond.
|
||||
callRecordEventSequence = (callRecordEventSequence + 1) % 1_000_000;
|
||||
return { persistedAt: Date.now(), sequence };
|
||||
}
|
||||
@@ -98,6 +112,7 @@ function parseEventKeySequence(key: string): number {
|
||||
return match ? Number.parseInt(match[1], 10) : 0;
|
||||
}
|
||||
|
||||
/** Parses v2 envelopes or bare legacy call records without throwing on corrupt history lines. */
|
||||
export function parseVoiceCallRecordLine(line: string, sequence = 0): PersistedCallRecord | null {
|
||||
if (!line.trim()) {
|
||||
return null;
|
||||
@@ -142,6 +157,7 @@ function countCallRecordChunks(call: CallRecord): number {
|
||||
);
|
||||
}
|
||||
|
||||
/** Trims oversized call snapshots before SQLite chunking so history writes remain bounded. */
|
||||
export function prepareVoiceCallRecordForStorage(call: CallRecord): CallRecord {
|
||||
if (countCallRecordChunks(call) <= MAX_CHUNKS_PER_CALL_RECORD_EVENT) {
|
||||
return call;
|
||||
@@ -155,6 +171,7 @@ export function prepareVoiceCallRecordForStorage(call: CallRecord): CallRecord {
|
||||
},
|
||||
};
|
||||
const candidateInputs = [
|
||||
// Preserve the newest transcript context first; older turns are least useful after restore.
|
||||
{ transcript: call.transcript.slice(-20), metadata },
|
||||
{ transcript: [], metadata },
|
||||
{
|
||||
@@ -244,6 +261,8 @@ function readCallRecordEvent(stores: CallRecordStateStores, eventKey: string): C
|
||||
for (let index = 0; index < meta.chunkCount; index += 1) {
|
||||
const chunk = stores.chunks.lookup(buildChunkKey(eventKey, index));
|
||||
if (!chunk || chunk.index !== index) {
|
||||
// A partially pruned or corrupt chunk set should drop only that snapshot;
|
||||
// older/newer snapshots can still restore the call.
|
||||
return null;
|
||||
}
|
||||
chunks.push(Buffer.from(chunk.dataBase64, "base64"));
|
||||
@@ -255,6 +274,8 @@ function readCallRecordEvent(stores: CallRecordStateStores, eventKey: string): C
|
||||
function readCallRecordEvents(stores: CallRecordStateStores): CallRecord[] {
|
||||
const sqliteCalls: PersistedCallRecord[] = stores.events
|
||||
.entries()
|
||||
// First sort by keyed-store creation order to make equal metadata stable
|
||||
// before reconstructing each chunked snapshot.
|
||||
.toSorted((a, b) => a.createdAt - b.createdAt || a.key.localeCompare(b.key))
|
||||
.map((entry) => {
|
||||
const call = readCallRecordEvent(stores, entry.key);
|
||||
@@ -271,6 +292,8 @@ function readCallRecordEvents(stores: CallRecordStateStores): CallRecord[] {
|
||||
return sqliteCalls
|
||||
.toSorted(
|
||||
(a, b) =>
|
||||
// persistedAt + sequence are the canonical write order; orderKey is a
|
||||
// final deterministic tie-breaker for migrated or malformed metadata.
|
||||
a.persistedAt - b.persistedAt ||
|
||||
a.sequence - b.sequence ||
|
||||
a.orderKey.localeCompare(b.orderKey),
|
||||
@@ -278,6 +301,7 @@ function readCallRecordEvents(stores: CallRecordStateStores): CallRecord[] {
|
||||
.map((entry) => entry.call);
|
||||
}
|
||||
|
||||
/** Appends one canonical SQLite-backed call snapshot; runtime never writes JSONL fallback logs. */
|
||||
export function persistCallRecord(storePath: string, call: CallRecord): void {
|
||||
try {
|
||||
const stores = createCallRecordStateStores(storePath);
|
||||
@@ -292,10 +316,12 @@ export function persistCallRecord(storePath: string, call: CallRecord): void {
|
||||
}
|
||||
}
|
||||
|
||||
/** Test hook retained because call-record persistence used to have async writers. */
|
||||
export async function flushPendingCallRecordWritesForTest(): Promise<void> {
|
||||
await Promise.resolve();
|
||||
}
|
||||
|
||||
/** Restores non-terminal calls and replay dedupe keys from the plugin-state snapshot log. */
|
||||
export function loadActiveCallsFromStore(storePath: string): {
|
||||
activeCalls: Map<CallId, CallRecord>;
|
||||
providerCallIdMap: Map<string, CallId>;
|
||||
@@ -319,6 +345,8 @@ export function loadActiveCallsFromStore(storePath: string): {
|
||||
}
|
||||
const callMap = new Map<CallId, CallRecord>();
|
||||
for (const call of calls) {
|
||||
// Replaying the ordered snapshot log into a map leaves the newest snapshot
|
||||
// per callId, matching the old append-only JSONL restore behavior.
|
||||
callMap.set(call.callId, call);
|
||||
}
|
||||
|
||||
@@ -343,6 +371,7 @@ export function loadActiveCallsFromStore(storePath: string): {
|
||||
return { activeCalls, providerCallIdMap, processedEventIds, rejectedProviderCallIds };
|
||||
}
|
||||
|
||||
/** Returns newest call history snapshots from plugin state, bounded by the requested limit. */
|
||||
export async function getCallHistoryFromStore(
|
||||
storePath: string,
|
||||
limit = 50,
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import { MAX_TIMER_TIMEOUT_MS, resolveTimerTimeoutMs } from "openclaw/plugin-sdk/number-runtime";
|
||||
|
||||
/** Converts provider/config seconds into a Node-safe timer delay in milliseconds. */
|
||||
export function resolveVoiceCallSecondsTimerDelayMs(seconds: number, minMs = 1): number {
|
||||
if (!Number.isFinite(seconds)) {
|
||||
return resolveTimerTimeoutMs(MAX_TIMER_TIMEOUT_MS, MAX_TIMER_TIMEOUT_MS, minMs);
|
||||
}
|
||||
const timeoutMs = Math.floor(seconds * 1000);
|
||||
// Extremely large second values can overflow to Infinity before the timer clamp runs.
|
||||
return resolveTimerTimeoutMs(
|
||||
Number.isFinite(timeoutMs) ? timeoutMs : MAX_TIMER_TIMEOUT_MS,
|
||||
minMs,
|
||||
@@ -12,6 +14,7 @@ export function resolveVoiceCallSecondsTimerDelayMs(seconds: number, minMs = 1):
|
||||
);
|
||||
}
|
||||
|
||||
/** Clamps millisecond timer input with a fallback for invalid values. */
|
||||
export function resolveVoiceCallTimerDelayMs(timeoutMs: number, fallbackMs = 1): number {
|
||||
return resolveTimerTimeoutMs(timeoutMs, fallbackMs);
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@ type MaxDurationTimerContext = Pick<
|
||||
>;
|
||||
type TranscriptWaiterContext = Pick<TimerContext, "transcriptWaiters">;
|
||||
|
||||
/** Cancels and forgets the max-duration timer for one call. */
|
||||
export function clearMaxDurationTimer(
|
||||
ctx: Pick<MaxDurationTimerContext, "maxDurationTimers">,
|
||||
callId: CallId,
|
||||
@@ -27,10 +28,15 @@ export function clearMaxDurationTimer(
|
||||
}
|
||||
}
|
||||
|
||||
/** Starts the per-call hard timeout, replacing any previous timer for the same call. */
|
||||
export function startMaxDurationTimer(params: {
|
||||
/** Manager maps/config used to find the live call and persist timeout metadata. */
|
||||
ctx: MaxDurationTimerContext;
|
||||
/** Internal call id whose timer should be replaced and tracked. */
|
||||
callId: CallId;
|
||||
/** Cleanup hook invoked after timeout metadata is persisted on the live call. */
|
||||
onTimeout: (callId: CallId) => Promise<void>;
|
||||
/** Optional millisecond override used when restoring aged calls. */
|
||||
timeoutMs?: number;
|
||||
}): void {
|
||||
clearMaxDurationTimer(params.ctx, params.callId);
|
||||
@@ -52,6 +58,7 @@ export function startMaxDurationTimer(params: {
|
||||
`[voice-call] Max duration reached (${Math.ceil(maxDurationMs / 1000)}s), ending call ${params.callId}`,
|
||||
);
|
||||
call.endReason = "timeout";
|
||||
// Persist the timeout reason before delegating to provider hangup/cleanup.
|
||||
persistCallRecord(params.ctx.storePath, call);
|
||||
await params.onTimeout(params.callId);
|
||||
}
|
||||
@@ -61,6 +68,7 @@ export function startMaxDurationTimer(params: {
|
||||
params.ctx.maxDurationTimers.set(params.callId, timer);
|
||||
}
|
||||
|
||||
/** Clears a pending final-transcript waiter without resolving or rejecting its promise. */
|
||||
export function clearTranscriptWaiter(ctx: TranscriptWaiterContext, callId: CallId): void {
|
||||
const waiter = ctx.transcriptWaiters.get(callId);
|
||||
if (!waiter) {
|
||||
@@ -70,6 +78,7 @@ export function clearTranscriptWaiter(ctx: TranscriptWaiterContext, callId: Call
|
||||
ctx.transcriptWaiters.delete(callId);
|
||||
}
|
||||
|
||||
/** Rejects and removes the pending final-transcript waiter for a call. */
|
||||
export function rejectTranscriptWaiter(
|
||||
ctx: TranscriptWaiterContext,
|
||||
callId: CallId,
|
||||
@@ -83,6 +92,7 @@ export function rejectTranscriptWaiter(
|
||||
waiter.reject(new Error(reason));
|
||||
}
|
||||
|
||||
/** Resolves a pending transcript waiter only when its optional turn token matches. */
|
||||
export function resolveTranscriptWaiter(
|
||||
ctx: TranscriptWaiterContext,
|
||||
callId: CallId,
|
||||
@@ -94,6 +104,7 @@ export function resolveTranscriptWaiter(
|
||||
return false;
|
||||
}
|
||||
if (waiter.turnToken && waiter.turnToken !== turnToken) {
|
||||
// Ignore stale transcript completions from an earlier turn on the same call.
|
||||
return false;
|
||||
}
|
||||
clearTranscriptWaiter(ctx, callId);
|
||||
@@ -101,9 +112,12 @@ export function resolveTranscriptWaiter(
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Registers a single pending final-transcript wait for a call turn. */
|
||||
export function waitForFinalTranscript(
|
||||
ctx: TimerContext,
|
||||
/** Internal call id; only one waiter may be active per call. */
|
||||
callId: CallId,
|
||||
/** Optional provider turn token that filters stale final transcripts. */
|
||||
turnToken?: string,
|
||||
): Promise<string> {
|
||||
if (ctx.transcriptWaiters.has(callId)) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { generateNotifyTwiml } from "./twiml.js";
|
||||
import { generateDtmfRedirectTwiml, generateNotifyTwiml } from "./twiml.js";
|
||||
|
||||
describe("generateNotifyTwiml", () => {
|
||||
it("renders escaped xml with the requested voice", () => {
|
||||
@@ -8,6 +8,15 @@ describe("generateNotifyTwiml", () => {
|
||||
<Response>
|
||||
<Say voice="Polly.Joanna">Call <ended> & "logged"</Say>
|
||||
<Hangup/>
|
||||
</Response>`);
|
||||
});
|
||||
|
||||
it("renders escaped DTMF redirects", () => {
|
||||
expect(generateDtmfRedirectTwiml(`12<&"`, "https://example.test/hook?x=<y>&z=1"))
|
||||
.toBe(`<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Response>
|
||||
<Play digits="12<&"" />
|
||||
<Redirect method="POST">https://example.test/hook?x=<y>&z=1</Redirect>
|
||||
</Response>`);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { escapeXml } from "../voice-mapping.js";
|
||||
|
||||
/** Render a terminal Twilio TwiML response that speaks an escaped status message and hangs up. */
|
||||
export function generateNotifyTwiml(message: string, voice: string): string {
|
||||
return `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Response>
|
||||
@@ -8,6 +9,7 @@ export function generateNotifyTwiml(message: string, voice: string): string {
|
||||
</Response>`;
|
||||
}
|
||||
|
||||
/** Render Twilio TwiML that sends escaped DTMF digits before returning to the webhook. */
|
||||
export function generateDtmfRedirectTwiml(digits: string, webhookUrl: string): string {
|
||||
return `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Response>
|
||||
|
||||
@@ -1,12 +1,3 @@
|
||||
/**
|
||||
* Media Stream Handler
|
||||
*
|
||||
* Handles bidirectional audio streaming between Twilio and the AI services.
|
||||
* - Receives mu-law audio from Twilio via WebSocket
|
||||
* - Forwards to the selected realtime transcription provider
|
||||
* - Sends TTS audio back to Twilio
|
||||
*/
|
||||
|
||||
import type { IncomingMessage } from "node:http";
|
||||
import type { Duplex } from "node:stream";
|
||||
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-contracts";
|
||||
@@ -101,6 +92,7 @@ const MAX_INBOUND_MESSAGE_BYTES = 64 * 1024;
|
||||
const MAX_WS_BUFFERED_BYTES = 1024 * 1024;
|
||||
const CLOSE_REASON_LOG_MAX_CHARS = 120;
|
||||
|
||||
/** Scrubs control characters and bounds carrier/user text before logging close reasons. */
|
||||
export function sanitizeLogText(value: string, maxChars: number): string {
|
||||
const sanitized = value
|
||||
.replace(/\p{Cc}/gu, " ")
|
||||
@@ -122,6 +114,12 @@ function normalizeWsMessageData(data: RawData): Buffer {
|
||||
return Buffer.from(data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses one Twilio websocket message from any `ws` RawData representation.
|
||||
*
|
||||
* Malformed JSON is surfaced as a typed error so security tests and upgrade
|
||||
* guards can distinguish bad input from normal carrier events.
|
||||
*/
|
||||
export function parseTwilioMediaMessage(data: RawData): TwilioMediaMessage {
|
||||
const raw = normalizeWsMessageData(data);
|
||||
try {
|
||||
@@ -131,9 +129,7 @@ export function parseTwilioMediaMessage(data: RawData): TwilioMediaMessage {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Manages WebSocket connections for Twilio media streams.
|
||||
*/
|
||||
/** Manages Twilio media-stream WebSockets, STT sessions, and queued TTS playback. */
|
||||
export class MediaStreamHandler {
|
||||
private wss: WebSocketServer | null = null;
|
||||
private sessions = new Map<string, StreamSession>();
|
||||
@@ -167,7 +163,10 @@ export class MediaStreamHandler {
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle WebSocket upgrade for media stream connections.
|
||||
* Handles the HTTP upgrade into a Twilio media-stream socket.
|
||||
*
|
||||
* The handler reserves capacity before `ws` emits `connection` so slow
|
||||
* handshakes cannot bypass global connection limits.
|
||||
*/
|
||||
handleUpgrade(request: IncomingMessage, socket: Duplex, head: Buffer): void {
|
||||
if (!this.wss) {
|
||||
@@ -187,6 +186,8 @@ export class MediaStreamHandler {
|
||||
return;
|
||||
}
|
||||
|
||||
// Count the socket before ws has emitted "connection"; otherwise a burst of
|
||||
// slow handshakes can bypass the max-connection cap.
|
||||
this.inflightUpgrades += 1;
|
||||
let released = false;
|
||||
const releaseUpgradeReservation = () => {
|
||||
@@ -299,9 +300,7 @@ export class MediaStreamHandler {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle stream start event.
|
||||
*/
|
||||
/** Accepts Twilio's `start` frame and creates the STT/Talk session for the stream. */
|
||||
private handleStart(
|
||||
ws: WebSocket,
|
||||
message: TwilioMediaMessage,
|
||||
@@ -336,6 +335,8 @@ export class MediaStreamHandler {
|
||||
onPartial: (partial) => {
|
||||
const session = this.sessions.get(streamSid);
|
||||
if (session) {
|
||||
// Provider callbacks can arrive after stop/close; emit observability
|
||||
// only for the currently registered session.
|
||||
this.emitTalkEvent(session, {
|
||||
type: "transcript.delta",
|
||||
turnId: this.ensureActiveTurn(session),
|
||||
@@ -434,6 +435,8 @@ export class MediaStreamHandler {
|
||||
this.sessions.get(session.streamSid) !== session ||
|
||||
session.ws.readyState !== WebSocket.OPEN
|
||||
) {
|
||||
// The socket may close while provider auth/connect is still pending; close
|
||||
// the orphan STT session instead of announcing readiness for a dead stream.
|
||||
session.sttSession.close();
|
||||
return;
|
||||
}
|
||||
@@ -445,9 +448,7 @@ export class MediaStreamHandler {
|
||||
this.config.onTranscriptionReady?.(session.callId, session.streamSid);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle stream stop event.
|
||||
*/
|
||||
/** Tears down stream-owned STT, Talk, and TTS state exactly once on stop/close. */
|
||||
private handleStop(session: StreamSession): void {
|
||||
console.log(`[MediaStream] Stream stopped: ${session.streamSid}`);
|
||||
|
||||
@@ -498,6 +499,8 @@ export class MediaStreamHandler {
|
||||
return false;
|
||||
}
|
||||
|
||||
// A Twilio media socket is unauthenticated until its start frame arrives, so
|
||||
// bound both time and cardinality for these pre-start connections.
|
||||
const timeout = setTimeout(() => {
|
||||
if (!this.pendingConnections.has(ws)) {
|
||||
return;
|
||||
@@ -577,6 +580,8 @@ export class MediaStreamHandler {
|
||||
};
|
||||
}
|
||||
if (bufferedBeforeBytes > MAX_WS_BUFFERED_BYTES) {
|
||||
// Once ws has already crossed the cap, skip enqueueing more frames. The
|
||||
// caller treats sent:false as backpressure/failure evidence.
|
||||
try {
|
||||
session.ws.close(1013, "Backpressure: send buffer exceeded");
|
||||
} catch {
|
||||
@@ -594,6 +599,8 @@ export class MediaStreamHandler {
|
||||
session.ws.send(JSON.stringify(message));
|
||||
const bufferedAfterBytes = session.ws.bufferedAmount;
|
||||
if (bufferedAfterBytes > MAX_WS_BUFFERED_BYTES) {
|
||||
// send() can synchronously enqueue beyond the cap; close immediately so
|
||||
// the stream does not keep accumulating TTS/audio frames.
|
||||
try {
|
||||
session.ws.close(1013, "Backpressure: send buffer exceeded");
|
||||
} catch {
|
||||
@@ -660,10 +667,7 @@ export class MediaStreamHandler {
|
||||
return this.sendToStream(streamSid, { event: "clear", streamSid });
|
||||
}
|
||||
|
||||
/**
|
||||
* Queue a TTS operation for sequential playback.
|
||||
* Only one TTS operation plays at a time per stream to prevent overlap.
|
||||
*/
|
||||
/** Queues one TTS playback unit behind any active audio for the same stream. */
|
||||
async queueTts(streamSid: string, playFn: (signal: AbortSignal) => Promise<void>): Promise<void> {
|
||||
const queue = this.getTtsQueue(streamSid);
|
||||
let resolveEntry: () => void;
|
||||
@@ -692,6 +696,8 @@ export class MediaStreamHandler {
|
||||
*/
|
||||
clearTtsQueue(streamSid: string, _reason = "unspecified"): void {
|
||||
const queue = this.getTtsQueue(streamSid);
|
||||
// Barge-in resolves queued work as cancelled success while the active
|
||||
// playback observes AbortSignal, so callers do not hang during teardown.
|
||||
this.resolveQueuedTtsEntries(queue);
|
||||
this.ttsActiveControllers.get(streamSid)?.abort();
|
||||
const session = this.sessions.get(streamSid);
|
||||
|
||||
@@ -16,45 +16,38 @@ import type {
|
||||
WebhookVerificationResult,
|
||||
} from "../types.js";
|
||||
|
||||
/**
|
||||
* Abstract base interface for voice call providers.
|
||||
*
|
||||
* Each provider (Telnyx, Twilio, etc.) implements this interface to provide
|
||||
* a consistent API for the call manager.
|
||||
*
|
||||
* Responsibilities:
|
||||
* - Webhook verification and event parsing
|
||||
* - Outbound call initiation and hangup
|
||||
* - Media control (TTS playback, STT listening)
|
||||
*/
|
||||
/** Provider contract consumed by the call manager for webhook, call-control, and media actions. */
|
||||
export interface VoiceCallProvider {
|
||||
/** Provider identifier */
|
||||
/** Stable provider id stored on call records and used for restore-time status checks. */
|
||||
readonly name: ProviderName;
|
||||
|
||||
/** Publish the externally reachable webhook base URL after provider construction. */
|
||||
setPublicUrl?(url: string): void;
|
||||
|
||||
/**
|
||||
* Verify webhook signature/HMAC before processing.
|
||||
* Must be called before parseWebhookEvent.
|
||||
* Verifies provider-signed webhook input before any state mutation.
|
||||
*
|
||||
* Implementations should fail closed for bad credentials/signatures and return
|
||||
* skip metadata only for explicit local-dev bypasses.
|
||||
*/
|
||||
verifyWebhook(ctx: WebhookContext): WebhookVerificationResult;
|
||||
|
||||
/**
|
||||
* Parse provider-specific webhook payload into normalized events.
|
||||
* Returns events and optional response to send back to provider.
|
||||
* Normalizes a provider webhook into manager events plus an optional immediate response.
|
||||
*
|
||||
* This must not perform provider side effects; manager replay dedupe happens after parsing.
|
||||
*/
|
||||
parseWebhookEvent(ctx: WebhookContext, options?: WebhookParseOptions): ProviderWebhookParseResult;
|
||||
|
||||
/**
|
||||
* Consume one-time TwiML that must be served before shortcut handlers such as
|
||||
* realtime media streams take over the webhook response.
|
||||
* Consume one-time TwiML for a provider request.
|
||||
*
|
||||
* Implementations must return the TwiML at most once per provider call so a
|
||||
* replayed webhook cannot repeat pre-connect DTMF or notification playback.
|
||||
*/
|
||||
consumeInitialTwiML?: (ctx: WebhookContext) => string | null;
|
||||
|
||||
/**
|
||||
* Initiate an outbound call.
|
||||
* @returns Provider call ID and status
|
||||
*/
|
||||
/** Starts an outbound call and returns the provider call id that future webhooks will use. */
|
||||
initiateCall(input: InitiateCallInput): Promise<InitiateCallResult>;
|
||||
|
||||
/**
|
||||
@@ -63,37 +56,30 @@ export interface VoiceCallProvider {
|
||||
*/
|
||||
answerCall?: (input: AnswerCallInput) => Promise<void>;
|
||||
|
||||
/**
|
||||
* Hang up an active call.
|
||||
*/
|
||||
/** Ends an active provider call; callers handle duplicate suppression before invoking this. */
|
||||
hangupCall(input: HangupCallInput): Promise<void>;
|
||||
|
||||
/**
|
||||
* Play TTS audio to the caller.
|
||||
* The provider should handle streaming if supported.
|
||||
*/
|
||||
/** Plays synthesized speech on the active call leg using the provider's best media path. */
|
||||
playTts(input: PlayTtsInput): Promise<void>;
|
||||
|
||||
/**
|
||||
* Send DTMF digits to an active call.
|
||||
* Send already-validated DTMF digits to an active call.
|
||||
*/
|
||||
sendDtmf?: (input: SendDtmfInput) => Promise<void>;
|
||||
|
||||
/**
|
||||
* Start listening for user speech (activate STT).
|
||||
* Start listening for user speech and echo `turnToken` in final transcript callbacks when provided.
|
||||
*/
|
||||
startListening(input: StartListeningInput): Promise<void>;
|
||||
|
||||
/**
|
||||
* Stop listening for user speech (deactivate STT).
|
||||
*/
|
||||
/** Stops provider speech capture while preserving any already-finalized transcript event. */
|
||||
stopListening(input: StopListeningInput): Promise<void>;
|
||||
|
||||
/**
|
||||
* Query provider for current call status.
|
||||
* Used to verify persisted calls are still active on restart.
|
||||
* Must return `isUnknown: true` for transient errors (network, 5xx)
|
||||
* so the caller can keep the call and rely on timer-based fallback.
|
||||
* Reads provider status during restore and reconciliation.
|
||||
*
|
||||
* Transient lookup failures must return `isUnknown: true`; the manager keeps
|
||||
* the call and relies on max-duration timers instead of ending it speculatively.
|
||||
*/
|
||||
getCallStatus(input: GetCallStatusInput): Promise<GetCallStatusResult>;
|
||||
}
|
||||
|
||||
@@ -29,10 +29,17 @@ import type { VoiceCallProvider } from "./base.js";
|
||||
export class MockProvider implements VoiceCallProvider {
|
||||
readonly name = "mock" as const;
|
||||
|
||||
/** Local fixtures are intentionally unsigned; manager auth checks still exercise provider selection. */
|
||||
verifyWebhook(_ctx: WebhookContext): WebhookVerificationResult {
|
||||
return { ok: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts JSON fixture payloads into the same normalized event stream real providers return.
|
||||
*
|
||||
* Invalid JSON yields a 400 so webhook tests can cover request rejection
|
||||
* without introducing a network-backed provider.
|
||||
*/
|
||||
parseWebhookEvent(
|
||||
ctx: WebhookContext,
|
||||
_options?: WebhookParseOptions,
|
||||
@@ -100,6 +107,8 @@ export class MockProvider implements VoiceCallProvider {
|
||||
return {
|
||||
...base,
|
||||
type: evt.type,
|
||||
// Preserve explicit empty transcripts and false final flags so tests can
|
||||
// model partial/falsy provider payloads without the mock rewriting them.
|
||||
transcript: payload.transcript ?? "",
|
||||
isFinal: payload.isFinal ?? true,
|
||||
confidence: payload.confidence,
|
||||
@@ -138,6 +147,7 @@ export class MockProvider implements VoiceCallProvider {
|
||||
return {
|
||||
...base,
|
||||
type: evt.type,
|
||||
// Empty error strings are valid fixtures; only missing values get a default.
|
||||
error: payload.error ?? "unknown error",
|
||||
retryable: payload.retryable,
|
||||
};
|
||||
@@ -148,6 +158,7 @@ export class MockProvider implements VoiceCallProvider {
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns a stable synthetic provider id so tests can round-trip manager/provider state. */
|
||||
async initiateCall(input: InitiateCallInput): Promise<InitiateCallResult> {
|
||||
return {
|
||||
providerCallId: `mock-${input.callId}`,
|
||||
@@ -155,28 +166,41 @@ export class MockProvider implements VoiceCallProvider {
|
||||
};
|
||||
}
|
||||
|
||||
/** Mock call-control methods deliberately acknowledge commands without side effects. */
|
||||
async hangupCall(_input: HangupCallInput): Promise<void> {
|
||||
// No-op for mock
|
||||
// No-op for mock.
|
||||
}
|
||||
|
||||
/** Mock media playback is synchronous from the manager's perspective. */
|
||||
async playTts(_input: PlayTtsInput): Promise<void> {
|
||||
// No-op for mock
|
||||
// No-op for mock.
|
||||
}
|
||||
|
||||
/** DTMF dispatch is accepted but not recorded; tests assert manager behavior instead. */
|
||||
async sendDtmf(_input: SendDtmfInput): Promise<void> {
|
||||
// No-op for mock
|
||||
// No-op for mock.
|
||||
}
|
||||
|
||||
/** Listening state is owned by the manager harness, not the mock provider. */
|
||||
async startListening(_input: StartListeningInput): Promise<void> {
|
||||
// No-op for mock
|
||||
// No-op for mock.
|
||||
}
|
||||
|
||||
/** Stop-listening acknowledgements keep provider cleanup paths available in tests. */
|
||||
async stopListening(_input: StopListeningInput): Promise<void> {
|
||||
// No-op for mock
|
||||
// No-op for mock.
|
||||
}
|
||||
|
||||
/**
|
||||
* Simulates restore-time provider reconciliation from the synthetic provider id.
|
||||
*
|
||||
* Embedding terminal words in the id lets tests choose active vs completed
|
||||
* calls without introducing mutable provider-side state.
|
||||
*/
|
||||
async getCallStatus(input: GetCallStatusInput): Promise<GetCallStatusResult> {
|
||||
const id = normalizeLowercaseStringOrEmpty(input.providerCallId);
|
||||
// Let tests force restore/cleanup paths by embedding terminal-state words in
|
||||
// the mock provider call id; all other ids behave like active calls.
|
||||
if (id.includes("stale") || id.includes("ended") || id.includes("completed")) {
|
||||
return { status: "completed", isTerminal: true };
|
||||
}
|
||||
|
||||
@@ -26,13 +26,13 @@ import type { VoiceCallProvider } from "./base.js";
|
||||
import { guardedJsonApiRequest } from "./shared/guarded-json-api.js";
|
||||
|
||||
export interface PlivoProviderOptions {
|
||||
/** Override public URL origin for signature verification */
|
||||
/** Canonical external origin used when Plivo signs or re-fetches callback URLs. */
|
||||
publicUrl?: string;
|
||||
/** Skip webhook signature verification (development only) */
|
||||
/** Development-only escape hatch; production should verify every Plivo callback. */
|
||||
skipVerification?: boolean;
|
||||
/** Outbound ring timeout in seconds */
|
||||
/** Outbound ring timeout passed to Plivo's `hangup_on_ring` field. */
|
||||
ringTimeoutSec?: number;
|
||||
/** Webhook security options (forwarded headers/allowlist) */
|
||||
/** Forwarded-header trust and host allowlist controls for callback URL reconstruction. */
|
||||
webhookSecurity?: WebhookSecurityConfig;
|
||||
}
|
||||
|
||||
@@ -48,9 +48,11 @@ function createPlivoRequestDedupeKey(ctx: WebhookContext): string {
|
||||
if (nonceV2) {
|
||||
return `plivo:v2:${nonceV2}`;
|
||||
}
|
||||
// Unsigned/dev callbacks still need stable replay keys, so fall back to the raw body hash.
|
||||
return `plivo:fallback:${crypto.createHash("sha256").update(ctx.rawBody).digest("hex")}`;
|
||||
}
|
||||
|
||||
/** Plivo Call API provider that drives speak/listen by transferring the A-leg to XML callbacks. */
|
||||
export class PlivoProvider implements VoiceCallProvider {
|
||||
readonly name = "plivo" as const;
|
||||
|
||||
@@ -60,7 +62,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
private readonly options: PlivoProviderOptions;
|
||||
private readonly apiHost: string;
|
||||
|
||||
// Best-effort mapping between create-call request UUID and call UUID.
|
||||
// Plivo create-call returns a request UUID first; later callbacks reveal the call UUID.
|
||||
private requestUuidToCallUuid = new Map<string, string>();
|
||||
|
||||
// Used for transfer URLs and GetInput action URLs.
|
||||
@@ -85,6 +87,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
this.options = options;
|
||||
}
|
||||
|
||||
/** Sends an authenticated Plivo API request through the SSRF guard. */
|
||||
private async apiRequest<T = unknown>(params: {
|
||||
method: "GET" | "POST" | "DELETE";
|
||||
endpoint: string;
|
||||
@@ -107,6 +110,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
});
|
||||
}
|
||||
|
||||
/** Verifies Plivo signatures and returns replay keys for manager-level webhook dedupe. */
|
||||
verifyWebhook(ctx: WebhookContext): WebhookVerificationResult {
|
||||
const result = verifyPlivoWebhook(ctx, this.authToken, {
|
||||
publicUrl: this.options.publicUrl,
|
||||
@@ -129,6 +133,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
};
|
||||
}
|
||||
|
||||
/** Parses Plivo form callbacks into normalized events or one-shot XML responses. */
|
||||
parseWebhookEvent(
|
||||
ctx: WebhookContext,
|
||||
options?: WebhookParseOptions,
|
||||
@@ -154,6 +159,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
const callId = this.getCallIdFromQuery(ctx);
|
||||
const pending = callId ? this.pendingSpeakByCallId.get(callId) : undefined;
|
||||
if (callId) {
|
||||
// Pending XML payloads are single-use because Plivo fetches them via transfer callback.
|
||||
this.pendingSpeakByCallId.delete(callId);
|
||||
}
|
||||
|
||||
@@ -172,6 +178,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
const callId = this.getCallIdFromQuery(ctx);
|
||||
const pending = callId ? this.pendingListenByCallId.get(callId) : undefined;
|
||||
if (callId) {
|
||||
// Pending listen options are single-use for the transfer callback that asks for XML.
|
||||
this.pendingListenByCallId.delete(callId);
|
||||
}
|
||||
|
||||
@@ -221,6 +228,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
const requestUuid = params.get("RequestUUID") || "";
|
||||
|
||||
if (requestUuid && callUuid) {
|
||||
// Connect outbound initiation IDs to call-control IDs once Plivo exposes both.
|
||||
this.requestUuidToCallUuid.set(requestUuid, callUuid);
|
||||
}
|
||||
|
||||
@@ -298,6 +306,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Starts an outbound Plivo call and stores the webhook base needed by later transfer flows. */
|
||||
async initiateCall(input: InitiateCallInput): Promise<InitiateCallResult> {
|
||||
const webhookUrl = new URL(input.webhookUrl);
|
||||
webhookUrl.searchParams.set("provider", "plivo");
|
||||
@@ -362,6 +371,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
});
|
||||
}
|
||||
|
||||
/** Resolves Plivo's create-time request UUID to the callback-time CallUUID when available. */
|
||||
private resolveCallContext(params: {
|
||||
providerCallId: string;
|
||||
callId: string;
|
||||
@@ -370,6 +380,8 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
callUuid: string;
|
||||
webhookBase: string;
|
||||
} {
|
||||
// Plivo returns request_uuid at create time and CallUUID later on callbacks;
|
||||
// prefer the adopted CallUUID once the answer/hangup webhook links them.
|
||||
const callUuid = this.requestUuidToCallUuid.get(params.providerCallId) ?? params.providerCallId;
|
||||
const webhookBase =
|
||||
this.callUuidToWebhookUrl.get(callUuid) || this.callIdToWebhookUrl.get(params.callId);
|
||||
@@ -382,6 +394,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
return { callUuid, webhookBase };
|
||||
}
|
||||
|
||||
/** Transfers the live call leg to a short-lived XML flow for pending speak/listen payloads. */
|
||||
private async transferCallLeg(params: {
|
||||
callUuid: string;
|
||||
webhookBase: string;
|
||||
@@ -393,6 +406,8 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
transferUrl.searchParams.set("flow", params.flow);
|
||||
transferUrl.searchParams.set("callId", params.callId);
|
||||
|
||||
// Transfer the A-leg to a short-lived XML endpoint so Plivo fetches the
|
||||
// current speak/listen payload without storing text in provider URLs.
|
||||
await this.apiRequest({
|
||||
method: "POST",
|
||||
endpoint: `/Call/${params.callUuid}/`,
|
||||
@@ -404,6 +419,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
});
|
||||
}
|
||||
|
||||
/** Queues one speak payload and transfers the call leg so Plivo fetches XML once. */
|
||||
async playTts(input: PlayTtsInput): Promise<void> {
|
||||
const { callUuid, webhookBase } = this.resolveCallContext({
|
||||
providerCallId: input.providerCallId,
|
||||
@@ -416,6 +432,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
locale: input.locale,
|
||||
});
|
||||
|
||||
// The xml-speak webhook consumes this pending payload exactly once.
|
||||
await this.transferCallLeg({
|
||||
callUuid,
|
||||
webhookBase,
|
||||
@@ -424,6 +441,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
});
|
||||
}
|
||||
|
||||
/** Queues one listen payload and transfers the call leg to a GetInput XML callback. */
|
||||
async startListening(input: StartListeningInput): Promise<void> {
|
||||
const { callUuid, webhookBase } = this.resolveCallContext({
|
||||
providerCallId: input.providerCallId,
|
||||
@@ -435,6 +453,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
language: input.language,
|
||||
});
|
||||
|
||||
// The xml-listen webhook consumes this pending payload exactly once.
|
||||
await this.transferCallLeg({
|
||||
callUuid,
|
||||
webhookBase,
|
||||
@@ -443,10 +462,12 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
});
|
||||
}
|
||||
|
||||
/** No-op because Plivo GetInput ends itself after speech or timeout. */
|
||||
async stopListening(_input: StopListeningInput): Promise<void> {
|
||||
// GetInput ends automatically when speech ends.
|
||||
}
|
||||
|
||||
/** Reads Plivo call status during restore; API errors stay unknown so timers can decide later. */
|
||||
async getCallStatus(input: GetCallStatusInput): Promise<GetCallStatusResult> {
|
||||
const terminalStatuses = new Set([
|
||||
"completed",
|
||||
@@ -546,6 +567,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
private baseWebhookUrlFromCtx(ctx: WebhookContext): string | null {
|
||||
try {
|
||||
if (this.options.publicUrl) {
|
||||
// Pin callbacks to configured public origin while preserving this webhook path.
|
||||
const base = new URL(this.options.publicUrl);
|
||||
const requestUrl = new URL(ctx.url);
|
||||
base.pathname = requestUrl.pathname;
|
||||
|
||||
@@ -9,16 +9,19 @@ const TERMINAL_PROVIDER_STATUS_TO_END_REASON: Record<string, EndReason> = {
|
||||
canceled: "hangup-bot",
|
||||
};
|
||||
|
||||
/** Normalizes carrier status strings for restore checks while preserving missing as "unknown". */
|
||||
export function normalizeProviderStatus(status: string | null | undefined): string {
|
||||
const normalized = normalizeOptionalLowercaseString(status);
|
||||
return normalized && normalized.length > 0 ? normalized : "unknown";
|
||||
}
|
||||
|
||||
/** Maps terminal carrier statuses to OpenClaw end reasons; active/unknown statuses stay null. */
|
||||
export function mapProviderStatusToEndReason(status: string | null | undefined): EndReason | null {
|
||||
const normalized = normalizeProviderStatus(status);
|
||||
return TERMINAL_PROVIDER_STATUS_TO_END_REASON[normalized] ?? null;
|
||||
}
|
||||
|
||||
/** Checks whether restore should finalize a local call based on provider status alone. */
|
||||
export function isProviderStatusTerminal(status: string | null | undefined): boolean {
|
||||
return mapProviderStatusToEndReason(status) !== null;
|
||||
}
|
||||
|
||||
@@ -5,12 +5,17 @@ type GuardedJsonApiRequestParams = {
|
||||
method: "GET" | "POST" | "DELETE" | "PUT" | "PATCH";
|
||||
headers: Record<string, string>;
|
||||
body?: Record<string, unknown>;
|
||||
/** Treat 404 as an idempotent "already gone" result for delete/status probes. */
|
||||
allowNotFound?: boolean;
|
||||
/** Exact provider API hostnames permitted after SSRF resolution and redirect checks. */
|
||||
allowedHostnames: string[];
|
||||
/** Audit label emitted by the network guard for provider-specific API calls. */
|
||||
auditContext: string;
|
||||
/** Prefix preserved on thrown errors so callers can attribute provider failures. */
|
||||
errorPrefix: string;
|
||||
};
|
||||
|
||||
/** Performs a provider JSON request through the SSRF guard and always releases the response pin. */
|
||||
export async function guardedJsonApiRequest<T = unknown>(
|
||||
params: GuardedJsonApiRequestParams,
|
||||
): Promise<T> {
|
||||
@@ -28,6 +33,7 @@ export async function guardedJsonApiRequest<T = unknown>(
|
||||
try {
|
||||
if (!response.ok) {
|
||||
if (params.allowNotFound && response.status === 404) {
|
||||
// Provider lookup/delete paths use 404 as "already gone" rather than a hard failure.
|
||||
return undefined as T;
|
||||
}
|
||||
const errorText = await response.text();
|
||||
@@ -44,6 +50,7 @@ export async function guardedJsonApiRequest<T = unknown>(
|
||||
throw new Error(`${params.errorPrefix}: malformed JSON response`);
|
||||
}
|
||||
} finally {
|
||||
// Release the SSRF guard's resolved-address pin after all body reads finish.
|
||||
await release();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,14 +21,9 @@ import { verifyTelnyxWebhook } from "../webhook-security.js";
|
||||
import type { VoiceCallProvider } from "./base.js";
|
||||
import { guardedJsonApiRequest } from "./shared/guarded-json-api.js";
|
||||
|
||||
/**
|
||||
* Telnyx Voice API provider implementation.
|
||||
*
|
||||
* Uses Telnyx Call Control API v2 for managing calls.
|
||||
* @see https://developers.telnyx.com/docs/api/v2/call-control
|
||||
*/
|
||||
/** Telnyx provider knobs that affect webhook verification behavior. */
|
||||
export interface TelnyxProviderOptions {
|
||||
/** Skip webhook signature verification (development only, NOT for production) */
|
||||
/** Development-only escape hatch; production webhooks should verify Ed25519 signatures. */
|
||||
skipVerification?: boolean;
|
||||
}
|
||||
|
||||
@@ -54,11 +49,13 @@ function normalizeBase64ForCompare(value: string): string {
|
||||
function decodeClientStateBase64(value: string): string | null {
|
||||
const buffer = Buffer.from(value, "base64");
|
||||
if (normalizeBase64ForCompare(buffer.toString("base64")) !== normalizeBase64ForCompare(value)) {
|
||||
// Telnyx echoes client_state; reject malformed base64 instead of inventing a call id.
|
||||
return null;
|
||||
}
|
||||
return buffer.toString("utf8");
|
||||
}
|
||||
|
||||
/** Telnyx Call Control provider for outbound/inbound call control and PCMU media streaming. */
|
||||
export class TelnyxProvider implements VoiceCallProvider {
|
||||
readonly name = "telnyx" as const;
|
||||
|
||||
@@ -83,9 +80,7 @@ export class TelnyxProvider implements VoiceCallProvider {
|
||||
this.options = options;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make an authenticated request to the Telnyx API.
|
||||
*/
|
||||
/** Sends an authenticated Telnyx Call Control command through the SSRF guard. */
|
||||
private async apiRequest<T = unknown>(
|
||||
endpoint: string,
|
||||
body: Record<string, unknown>,
|
||||
@@ -106,9 +101,7 @@ export class TelnyxProvider implements VoiceCallProvider {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify Telnyx webhook signature using Ed25519.
|
||||
*/
|
||||
/** Verifies Telnyx webhook signatures and returns replay keys for manager dedupe. */
|
||||
verifyWebhook(ctx: WebhookContext): WebhookVerificationResult {
|
||||
const result = verifyTelnyxWebhook(ctx, this.publicKey, {
|
||||
skipVerification: this.options.skipVerification,
|
||||
@@ -122,9 +115,7 @@ export class TelnyxProvider implements VoiceCallProvider {
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse Telnyx webhook event into normalized format.
|
||||
*/
|
||||
/** Parses one Telnyx webhook into the manager's normalized event envelope. */
|
||||
parseWebhookEvent(
|
||||
ctx: WebhookContext,
|
||||
options?: WebhookParseOptions,
|
||||
@@ -147,13 +138,11 @@ export class TelnyxProvider implements VoiceCallProvider {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert Telnyx event to normalized event format.
|
||||
*/
|
||||
/** Converts Telnyx Call Control events while preserving verified-request dedupe keys. */
|
||||
private normalizeEvent(data: TelnyxEvent, dedupeKey?: string): NormalizedEvent | null {
|
||||
// Decode client_state from Base64 (we encode it in initiateCall)
|
||||
let callId = "";
|
||||
if (data.payload?.client_state) {
|
||||
// Outbound calls encode OpenClaw's call id in client_state; fall back to raw carrier value.
|
||||
callId = decodeClientStateBase64(data.payload.client_state) ?? data.payload.client_state;
|
||||
}
|
||||
if (!callId) {
|
||||
@@ -217,6 +206,7 @@ export class TelnyxProvider implements VoiceCallProvider {
|
||||
|
||||
case "streaming.started":
|
||||
case "streaming.stopped":
|
||||
// WebSocket bridge owns stream lifecycle; carrier lifecycle webhooks are acknowledged only.
|
||||
return null;
|
||||
|
||||
default:
|
||||
@@ -224,10 +214,7 @@ export class TelnyxProvider implements VoiceCallProvider {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Map Telnyx hangup cause to normalized end reason.
|
||||
* @see https://developers.telnyx.com/docs/api/v2/call-control/Call-Commands#hangup-causes
|
||||
*/
|
||||
/** Maps Telnyx hangup causes to OpenClaw terminal reasons used by call records. */
|
||||
private mapHangupCause(cause?: string): EndReason {
|
||||
switch (cause) {
|
||||
case "normal_clearing":
|
||||
@@ -253,7 +240,7 @@ export class TelnyxProvider implements VoiceCallProvider {
|
||||
case "subscriber_absent":
|
||||
return "hangup-user";
|
||||
default:
|
||||
// Unknown cause - log it for debugging and return completed
|
||||
// Unknown Telnyx causes are not retryable proof; log and preserve historical completion behavior.
|
||||
if (cause) {
|
||||
console.warn(`[telnyx] Unknown hangup cause: ${cause}`);
|
||||
}
|
||||
@@ -261,6 +248,7 @@ export class TelnyxProvider implements VoiceCallProvider {
|
||||
}
|
||||
}
|
||||
|
||||
/** Starts an outbound Telnyx call and embeds the OpenClaw call id in signed callback state. */
|
||||
async initiateCall(input: InitiateCallInput): Promise<InitiateCallResult> {
|
||||
const body: Record<string, unknown> = {
|
||||
connection_id: this.connectionId,
|
||||
@@ -268,6 +256,8 @@ export class TelnyxProvider implements VoiceCallProvider {
|
||||
from: input.from,
|
||||
webhook_url: input.webhookUrl,
|
||||
webhook_url_method: "POST",
|
||||
// Telnyx echoes client_state on webhooks; encode the OpenClaw call id so
|
||||
// outbound callbacks can rejoin local state before call_control_id mapping exists.
|
||||
client_state: Buffer.from(input.callId).toString("base64"),
|
||||
timeout_secs: 30,
|
||||
...(input.streamUrl
|
||||
@@ -282,9 +272,7 @@ export class TelnyxProvider implements VoiceCallProvider {
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Hang up a call via Telnyx API.
|
||||
*/
|
||||
/** Hangs up a call-control leg; missing legs are treated as already ended. */
|
||||
async hangupCall(input: HangupCallInput): Promise<void> {
|
||||
await this.apiRequest(
|
||||
`/calls/${input.providerCallId}/actions/hangup`,
|
||||
@@ -293,8 +281,10 @@ export class TelnyxProvider implements VoiceCallProvider {
|
||||
);
|
||||
}
|
||||
|
||||
/** Answers an inbound call, optionally attaching the bidirectional PCMU stream bridge. */
|
||||
async answerCall(input: AnswerCallInput): Promise<void> {
|
||||
const body: Record<string, unknown> = {
|
||||
// Stable command id makes answer retries idempotent for one OpenClaw call.
|
||||
command_id: `openclaw-answer-${input.callId}`,
|
||||
...(input.streamUrl
|
||||
? buildTelnyxStreamingFields(input.streamUrl, input.streamAuthToken)
|
||||
@@ -303,9 +293,7 @@ export class TelnyxProvider implements VoiceCallProvider {
|
||||
await this.apiRequest(`/calls/${input.providerCallId}/actions/answer`, body);
|
||||
}
|
||||
|
||||
/**
|
||||
* Play TTS audio via Telnyx speak action.
|
||||
*/
|
||||
/** Plays text through Telnyx speak, passing provider-specific voice ids through unchanged. */
|
||||
async playTts(input: PlayTtsInput): Promise<void> {
|
||||
await this.apiRequest(`/calls/${input.providerCallId}/actions/speak`, {
|
||||
command_id: crypto.randomUUID(),
|
||||
@@ -315,9 +303,7 @@ export class TelnyxProvider implements VoiceCallProvider {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Start transcription (STT) via Telnyx.
|
||||
*/
|
||||
/** Starts Telnyx transcription for the active call leg. */
|
||||
async startListening(input: StartListeningInput): Promise<void> {
|
||||
await this.apiRequest(`/calls/${input.providerCallId}/actions/transcription_start`, {
|
||||
command_id: crypto.randomUUID(),
|
||||
@@ -325,9 +311,7 @@ export class TelnyxProvider implements VoiceCallProvider {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop transcription via Telnyx.
|
||||
*/
|
||||
/** Stops Telnyx transcription; missing legs are safe during hangup races. */
|
||||
async stopListening(input: StopListeningInput): Promise<void> {
|
||||
await this.apiRequest(
|
||||
`/calls/${input.providerCallId}/actions/transcription_stop`,
|
||||
@@ -336,6 +320,7 @@ export class TelnyxProvider implements VoiceCallProvider {
|
||||
);
|
||||
}
|
||||
|
||||
/** Reads Telnyx liveness for restore; ambiguous responses stay non-terminal. */
|
||||
async getCallStatus(input: GetCallStatusInput): Promise<GetCallStatusResult> {
|
||||
try {
|
||||
const data = await guardedJsonApiRequest<{ data?: { state?: string; is_alive?: boolean } }>({
|
||||
@@ -357,8 +342,8 @@ export class TelnyxProvider implements VoiceCallProvider {
|
||||
|
||||
const state = data.data?.state ?? "unknown";
|
||||
const isAlive = data.data?.is_alive;
|
||||
// If is_alive is missing, treat as unknown rather than terminal (P1 fix)
|
||||
if (isAlive === undefined) {
|
||||
// Missing liveness is not terminal proof; keep restore logic conservative.
|
||||
return { status: state, isTerminal: false, isUnknown: true };
|
||||
}
|
||||
return { status: state, isTerminal: !isAlive };
|
||||
@@ -372,6 +357,8 @@ function buildTelnyxStreamingFields(
|
||||
streamUrl: string,
|
||||
streamAuthToken: string | undefined,
|
||||
): Record<string, unknown> {
|
||||
// Realtime voice expects 8kHz PCMU both ways; keep these fields in sync with
|
||||
// the WebSocket bridge's frame codec and sample-rate assumptions.
|
||||
return {
|
||||
stream_url: streamUrl,
|
||||
stream_track: "inbound_track",
|
||||
|
||||
@@ -48,6 +48,9 @@ function createTwilioRequestDedupeKey(ctx: WebhookContext, verifiedRequestKey?:
|
||||
return verifiedRequestKey;
|
||||
}
|
||||
|
||||
// Before signature verification succeeds, derive idempotency from the actual
|
||||
// request facts we route on; Twilio's raw idempotency header is not enough
|
||||
// because query tokens select the OpenClaw call/turn.
|
||||
const signature = getHeader(ctx.headers, "x-twilio-signature") ?? "";
|
||||
const params = new URLSearchParams(ctx.rawBody);
|
||||
const callSid = params.get("CallSid") ?? "";
|
||||
@@ -73,6 +76,7 @@ type TwilioProviderConfig = {
|
||||
authToken?: string;
|
||||
};
|
||||
|
||||
/** Twilio Voice provider for REST call control, TwiML callback routing, and media streams. */
|
||||
export class TwilioProvider implements VoiceCallProvider {
|
||||
readonly name = "twilio" as const;
|
||||
|
||||
@@ -82,23 +86,23 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
private readonly callWebhookUrls = new Map<string, string>();
|
||||
private readonly options: TwilioProviderOptions;
|
||||
|
||||
/** Current public webhook URL (set when tunnel starts or from config) */
|
||||
/** Provider-visible webhook origin used for signature verification and generated stream URLs. */
|
||||
private currentPublicUrl: string | null = null;
|
||||
|
||||
/** Optional telephony TTS provider for streaming TTS */
|
||||
/** Optional streaming TTS adapter used before falling back to TwiML redirects. */
|
||||
private ttsProvider: TelephonyTtsProvider | null = null;
|
||||
|
||||
/** Optional media stream handler for sending audio */
|
||||
/** Optional media bridge used to enqueue outbound audio to live Twilio streams. */
|
||||
private mediaStreamHandler: MediaStreamHandler | null = null;
|
||||
|
||||
/** Map of call SID to stream SID for media streams */
|
||||
/** Current Twilio streamSid per callSid; outbound media/clear/mark frames require it. */
|
||||
private callStreamMap = new Map<string, string>();
|
||||
/** Per-call tokens for media stream authentication */
|
||||
/** Per-call stream tokens validated before accepting Twilio media websocket starts. */
|
||||
private streamAuthTokens = new Map<string, string>();
|
||||
|
||||
/** Storage for TwiML content (for notify mode with URL-based TwiML) */
|
||||
/** One-shot TwiML payloads consumed by initial notify-mode callbacks. */
|
||||
private readonly twimlStorage = new Map<string, string>();
|
||||
/** Track notify-mode calls to avoid streaming on follow-up callbacks */
|
||||
/** Notify-mode call ids that should not be upgraded into streaming conversations. */
|
||||
private readonly notifyCalls = new Set<string>();
|
||||
private readonly activeStreamCalls = new Set<string>();
|
||||
|
||||
@@ -151,30 +155,37 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
}
|
||||
}
|
||||
|
||||
/** Updates the externally reachable webhook origin after tunnel/exposure setup. */
|
||||
setPublicUrl(url: string): void {
|
||||
this.currentPublicUrl = url;
|
||||
}
|
||||
|
||||
/** Returns the public origin currently used for generated callbacks and stream URLs. */
|
||||
getPublicUrl(): string | null {
|
||||
return this.currentPublicUrl;
|
||||
}
|
||||
|
||||
/** Injects the telephony TTS adapter used for live media-stream playback. */
|
||||
setTTSProvider(provider: TelephonyTtsProvider): void {
|
||||
this.ttsProvider = provider;
|
||||
}
|
||||
|
||||
/** Injects the media stream bridge that owns outbound audio queueing and barge-in clears. */
|
||||
setMediaStreamHandler(handler: MediaStreamHandler): void {
|
||||
this.mediaStreamHandler = handler;
|
||||
}
|
||||
|
||||
/** Associates a Twilio stream SID with a call SID so later playback can target the socket. */
|
||||
registerCallStream(callSid: string, streamSid: string): void {
|
||||
this.callStreamMap.set(callSid, streamSid);
|
||||
}
|
||||
|
||||
/** Returns whether a live stream is registered for the Twilio call SID. */
|
||||
hasRegisteredStream(callSid: string): boolean {
|
||||
return this.callStreamMap.has(callSid);
|
||||
}
|
||||
|
||||
/** Removes a stream mapping, preserving newer reconnect streams when an old stop arrives late. */
|
||||
unregisterCallStream(callSid: string, streamSid?: string): void {
|
||||
const currentStreamSid = this.callStreamMap.get(callSid);
|
||||
if (!currentStreamSid) {
|
||||
@@ -184,16 +195,20 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
return;
|
||||
}
|
||||
if (streamSid && currentStreamSid !== streamSid) {
|
||||
// Twilio can deliver a stop for an older stream after reconnecting the
|
||||
// same call; keep the newer stream registered so playback is not severed.
|
||||
return;
|
||||
}
|
||||
this.callStreamMap.delete(callSid);
|
||||
this.activeStreamCalls.delete(callSid);
|
||||
}
|
||||
|
||||
/** True when TwiML should connect new calls to the media stream bridge instead of static TwiML. */
|
||||
isConversationStreamConnectEnabled(): boolean {
|
||||
return Boolean(this.mediaStreamHandler && this.getStreamUrl());
|
||||
}
|
||||
|
||||
/** Validates a one-time media-stream token before accepting Twilio's websocket start frame. */
|
||||
isValidStreamToken(callSid: string, token?: string): boolean {
|
||||
const expected = this.streamAuthTokens.get(callSid);
|
||||
if (!expected || !token) {
|
||||
@@ -245,6 +260,9 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
if (!isTwilioCallNotInProgressError(err)) {
|
||||
throw err;
|
||||
}
|
||||
// Twilio can acknowledge answer/status webhooks before the Calls API
|
||||
// accepts live TwiML updates for that SID. Short retries bridge that
|
||||
// provider race without hiding real API failures.
|
||||
console.warn(
|
||||
`[voice-call] Twilio ${operation} update hit call state race (21220); retrying in ${retryDelayMs}ms`,
|
||||
);
|
||||
@@ -383,6 +401,8 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
|
||||
const endReason = mapProviderStatusToEndReason(callStatus);
|
||||
if (endReason) {
|
||||
// Terminal status callbacks are the last provider-owned cleanup point for
|
||||
// stream credentials when local hangup did not initiate the call ending.
|
||||
this.streamAuthTokens.delete(callSid);
|
||||
this.activeStreamCalls.delete(callSid);
|
||||
if (callIdOverride) {
|
||||
@@ -430,9 +450,13 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
});
|
||||
|
||||
if (decision.consumeStoredTwimlCallId) {
|
||||
// Stored notify/pre-connect TwiML is single-use; replaying it on later
|
||||
// callbacks would restart setup digits instead of entering conversation.
|
||||
this.deleteStoredTwiml(decision.consumeStoredTwimlCallId);
|
||||
}
|
||||
if (decision.activateStreamCallSid) {
|
||||
// activeStreamCalls is the admission lock for inbound calls before the
|
||||
// WebSocket start event has registered a stream SID.
|
||||
this.activeStreamCalls.add(decision.activateStreamCallSid);
|
||||
}
|
||||
|
||||
@@ -498,6 +522,8 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
if (existing) {
|
||||
return existing;
|
||||
}
|
||||
// Keep the token stable for the call: Twilio may fetch TwiML more than once
|
||||
// before the WebSocket "start" frame carries customParameters.token.
|
||||
const token = crypto.randomBytes(16).toString("base64url");
|
||||
this.streamAuthTokens.set(callSid, token);
|
||||
return token;
|
||||
@@ -510,6 +536,8 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
}
|
||||
const token = this.getStreamAuthToken(callSid);
|
||||
const url = new URL(baseUrl);
|
||||
// Keep the token in URL state until getStreamConnectXml moves it into a
|
||||
// Twilio <Parameter>; Twilio drops WebSocket query strings on connect.
|
||||
url.searchParams.set("token", token);
|
||||
return url.toString();
|
||||
}
|
||||
@@ -668,6 +696,8 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
<Redirect method="POST">${escapeXml(webhookUrl)}</Redirect>
|
||||
</Response>`;
|
||||
|
||||
// Redirect back to the stored webhook URL so the call returns to normal
|
||||
// dynamic TwiML after Twilio finishes playing the DTMF sequence.
|
||||
await this.updateLiveCallTwiml(input.providerCallId, twiml, "sendDtmf");
|
||||
}
|
||||
|
||||
@@ -697,6 +727,8 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
sent?: unknown;
|
||||
};
|
||||
return {
|
||||
// Older handlers returned void; treat that as success while allowing
|
||||
// newer handlers to report dropped frames or marks explicitly.
|
||||
sent: typed.sent === undefined ? true : Boolean(typed.sent),
|
||||
};
|
||||
};
|
||||
@@ -854,10 +886,6 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Twilio-specific types
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
interface TwilioCallResponse {
|
||||
sid: string;
|
||||
status: string;
|
||||
|
||||
@@ -1,17 +1,15 @@
|
||||
import type { WebhookSecurityConfig } from "../config.js";
|
||||
|
||||
/**
|
||||
* Twilio Voice API provider options.
|
||||
*/
|
||||
/** Runtime options for Twilio webhook verification, media stream URLs, and dev-only bypasses. */
|
||||
export interface TwilioProviderOptions {
|
||||
/** Allow ngrok free tier compatibility mode (loopback only, less secure) */
|
||||
/** Allows unsigned loopback callbacks produced by ngrok's free interstitial flow. */
|
||||
allowNgrokFreeTierLoopbackBypass?: boolean;
|
||||
/** Override public URL for signature verification */
|
||||
/** Canonical external origin used when Twilio signs a URL different from the local request. */
|
||||
publicUrl?: string;
|
||||
/** Path for media stream WebSocket (e.g., /voice/stream) */
|
||||
/** WebSocket path advertised in generated TwiML stream responses. */
|
||||
streamPath?: string;
|
||||
/** Skip webhook signature verification (development only) */
|
||||
/** Development-only escape hatch; production should verify every Twilio callback. */
|
||||
skipVerification?: boolean;
|
||||
/** Webhook security options (forwarded headers/allowlist) */
|
||||
/** Forwarded-header trust and host allowlist controls for signature URL reconstruction. */
|
||||
webhookSecurity?: WebhookSecurityConfig;
|
||||
}
|
||||
|
||||
@@ -23,9 +23,13 @@ function parseTwilioApiError(text: string): ParsedTwilioApiError {
|
||||
}
|
||||
}
|
||||
|
||||
/** Twilio REST failure with structured status/code metadata for provider retry and race handling. */
|
||||
export class TwilioApiError extends Error {
|
||||
/** HTTP status returned by Twilio. */
|
||||
readonly httpStatus: number;
|
||||
/** Raw response body retained for diagnostics without reparsing at call sites. */
|
||||
readonly responseText: string;
|
||||
/** Twilio-specific numeric error code, when the response body exposes one. */
|
||||
readonly twilioCode?: number;
|
||||
|
||||
constructor(httpStatus: number, responseText: string) {
|
||||
@@ -39,12 +43,19 @@ export class TwilioApiError extends Error {
|
||||
}
|
||||
}
|
||||
|
||||
/** Sends a Twilio REST form request through the SSRF guard and releases the resolved-address pin. */
|
||||
export async function twilioApiRequest<T = unknown>(params: {
|
||||
/** Twilio REST API origin; normally `https://api.twilio.com`. */
|
||||
baseUrl: string;
|
||||
/** Account SID used for HTTP Basic auth. */
|
||||
accountSid: string;
|
||||
/** Auth token paired with the account SID. */
|
||||
authToken: string;
|
||||
/** API path beginning at the account-scoped resource endpoint. */
|
||||
endpoint: string;
|
||||
/** Form body; array values are encoded as repeated Twilio form keys. */
|
||||
body: URLSearchParams | Record<string, string | string[]>;
|
||||
/** Treat 404 as an idempotent missing resource instead of throwing. */
|
||||
allowNotFound?: boolean;
|
||||
}): Promise<T> {
|
||||
const bodyParams =
|
||||
@@ -52,6 +63,7 @@ export async function twilioApiRequest<T = unknown>(params: {
|
||||
? params.body
|
||||
: Object.entries(params.body).reduce((acc, [key, value]) => {
|
||||
if (Array.isArray(value)) {
|
||||
// Twilio expects repeated form keys for multi-value params like StatusCallbackEvent.
|
||||
for (const entry of value) {
|
||||
acc.append(key, entry);
|
||||
}
|
||||
@@ -95,6 +107,7 @@ export async function twilioApiRequest<T = unknown>(params: {
|
||||
throw new Error("Twilio API returned malformed JSON.");
|
||||
}
|
||||
} finally {
|
||||
// Release the resolved-address pin after response text has been consumed.
|
||||
await release();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,6 +37,11 @@ function isOutboundDirection(direction: string | null): boolean {
|
||||
return direction?.startsWith("outbound") ?? false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the Twilio webhook fields needed for TwiML response routing.
|
||||
* The raw body carries Twilio form fields; the query string distinguishes
|
||||
* OpenClaw status callbacks and one-shot TwiML requests.
|
||||
*/
|
||||
export function readTwimlRequestView(ctx: WebhookContext): TwimlRequestView {
|
||||
const params = new URLSearchParams(ctx.rawBody);
|
||||
const type = normalizeOptionalString(ctx.query?.type);
|
||||
@@ -51,26 +56,39 @@ export function readTwimlRequestView(ctx: WebhookContext): TwimlRequestView {
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Chooses stored, streaming, pause, queue, or empty TwiML for a Twilio webhook.
|
||||
* Stored notify/pre-connect TwiML wins once, status callbacks never control
|
||||
* media, and inbound streams are serialized until the WebSocket path accepts.
|
||||
*/
|
||||
export function decideTwimlResponse(input: TwimlPolicyInput): TwimlDecision {
|
||||
if (input.callIdFromQuery && !input.isStatusCallback) {
|
||||
if (input.hasStoredTwiml) {
|
||||
// Initial notify/pre-connect TwiML wins before any streaming decision.
|
||||
return { kind: "stored", consumeStoredTwimlCallId: input.callIdFromQuery };
|
||||
}
|
||||
if (input.isNotifyCall) {
|
||||
// Notify-mode calls should not fall through into streaming after their
|
||||
// one spoken message has already been served.
|
||||
return { kind: "empty" };
|
||||
}
|
||||
|
||||
if (isOutboundDirection(input.direction)) {
|
||||
// Outbound conversation calls can stream even before Twilio reports
|
||||
// in-progress; waiting would miss the first media setup callback.
|
||||
return input.canStream ? { kind: "stream" } : { kind: "pause" };
|
||||
}
|
||||
}
|
||||
|
||||
if (input.isStatusCallback) {
|
||||
// Status callbacks are event notifications, not instructions for call media.
|
||||
return { kind: "empty" };
|
||||
}
|
||||
|
||||
if (input.direction === "inbound") {
|
||||
if (input.hasActiveStreams) {
|
||||
// Only one inbound stream can be active until the WebSocket layer accepts
|
||||
// or releases the current call; queue extras at the carrier.
|
||||
return { kind: "queue" };
|
||||
}
|
||||
if (input.canStream && input.callSid) {
|
||||
|
||||
@@ -2,6 +2,7 @@ import type { WebhookContext, WebhookVerificationResult } from "../../types.js";
|
||||
import { verifyTwilioWebhook } from "../../webhook-security.js";
|
||||
import type { TwilioProviderOptions } from "../twilio.types.js";
|
||||
|
||||
/** Verifies Twilio callbacks using the externally visible URL and configured proxy trust policy. */
|
||||
export function verifyTwilioProviderWebhook(params: {
|
||||
ctx: WebhookContext;
|
||||
authToken: string;
|
||||
|
||||
@@ -20,9 +20,13 @@ function limitText(text: string, maxChars: number): string {
|
||||
return `${text.slice(0, Math.max(0, maxChars - 32)).trimEnd()}\n[truncated]`;
|
||||
}
|
||||
|
||||
/** Reads configured workspace context files into bounded prompt sections for realtime voice turns. */
|
||||
async function readWorkspaceVoiceContextFiles(params: {
|
||||
/** Agent workspace root; all configured files are resolved inside this directory. */
|
||||
workspaceDir: string;
|
||||
/** Relative file names from voice-call realtime agent context config. */
|
||||
files: readonly string[];
|
||||
/** Shared character budget across headings and file contents. */
|
||||
maxChars: number;
|
||||
}): Promise<string[]> {
|
||||
const sections: string[] = [];
|
||||
@@ -35,11 +39,15 @@ async function readWorkspaceVoiceContextFiles(params: {
|
||||
if (remaining <= 0) {
|
||||
continue;
|
||||
}
|
||||
// The security runtime keeps reads rooted in the agent workspace, so config
|
||||
// file names can be user-controlled without allowing path escape.
|
||||
const content = await workspaceRoot.readText(file).catch(() => undefined);
|
||||
const trimmed = content?.trim();
|
||||
if (!trimmed) {
|
||||
continue;
|
||||
}
|
||||
// Charge headings against the same budget as content so a long file list
|
||||
// cannot crowd out the final prompt with metadata alone.
|
||||
const body = limitText(trimmed, Math.max(0, remaining - file.length - 16));
|
||||
const section = `### ${file}\n${body}`;
|
||||
sections.push(section);
|
||||
@@ -48,10 +56,20 @@ async function readWorkspaceVoiceContextFiles(params: {
|
||||
return sections;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds realtime voice system instructions with bounded agent identity/context capsules.
|
||||
*
|
||||
* The returned prompt keeps immediate phone-turn context small and leaves deeper
|
||||
* workspace, memory, and tool work behind the realtime consult tool.
|
||||
*/
|
||||
export async function buildRealtimeVoiceInstructions(params: {
|
||||
/** Provider/system baseline instructions before plugin-specific policy and context. */
|
||||
baseInstructions: string;
|
||||
/** Voice-call plugin config controlling consult policy and context inclusion. */
|
||||
config: VoiceCallConfig;
|
||||
/** Core OpenClaw config used to resolve the selected agent identity/workspace. */
|
||||
coreConfig: CoreConfig;
|
||||
/** Injected agent helpers from the plugin runtime boundary. */
|
||||
agentRuntime: CoreAgentDeps;
|
||||
}): Promise<string> {
|
||||
const { config } = params;
|
||||
@@ -66,6 +84,8 @@ export async function buildRealtimeVoiceInstructions(params: {
|
||||
return sections.filter(Boolean).join("\n\n");
|
||||
}
|
||||
|
||||
// Realtime calls need a small always-available context capsule; larger memory,
|
||||
// tools, and workspace state stay behind openclaw_agent_consult.
|
||||
const agentId = config.agentId ?? "main";
|
||||
const capsule: string[] = [
|
||||
"OpenClaw agent voice context:",
|
||||
@@ -108,6 +128,8 @@ export async function buildRealtimeVoiceInstructions(params: {
|
||||
}
|
||||
}
|
||||
|
||||
// Keep the voice capsule after policy guidance: it is persona/context, not a
|
||||
// stronger instruction layer than realtime consult and transfer rules.
|
||||
sections.push(limitText(capsule.join("\n\n"), contextConfig.maxChars));
|
||||
return sections.filter(Boolean).join("\n\n");
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME } from "openclaw/plugin-sdk/realtime-voice";
|
||||
|
||||
/** Baseline realtime voice system instructions that require tool-backed consults for hard answers. */
|
||||
export const DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS = `You are OpenClaw's phone-call realtime voice interface. Keep spoken replies brief and natural. When a question needs deeper reasoning, current information, or tools, call ${REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME} before answering.`;
|
||||
|
||||
@@ -9,14 +9,28 @@ type Logger = {
|
||||
debug?: (message: string) => void;
|
||||
};
|
||||
|
||||
/**
|
||||
* Resolves a realtime voice fast-context consult using voice-call-specific labels.
|
||||
*
|
||||
* This keeps the policy implementation in the shared SDK while making fallback
|
||||
* prompts and debug logs describe a phone caller instead of a generic user.
|
||||
*/
|
||||
export async function resolveRealtimeFastContextConsult(params: {
|
||||
/** Current OpenClaw config snapshot used by the shared resolver. */
|
||||
cfg: OpenClawConfig;
|
||||
/** Agent whose memory/session context should be queried. */
|
||||
agentId: string;
|
||||
/** Voice-call session key used to scope session context lookup. */
|
||||
sessionKey: string;
|
||||
/** Fast-context policy and retrieval limits from voice-call config. */
|
||||
config: RealtimeVoiceFastContextConfig;
|
||||
/** Tool-call arguments from the realtime model; validated by the SDK resolver. */
|
||||
args: unknown;
|
||||
/** Optional debug logger for SDK consult decisions. */
|
||||
logger: Logger;
|
||||
}): Promise<RealtimeVoiceFastContextConsultResult> {
|
||||
// Voice-call consults share the SDK resolver, but label the audience as a
|
||||
// caller so fallback prompts and logs stay telephony-specific.
|
||||
return await resolveRealtimeVoiceFastContextConsult({
|
||||
...params,
|
||||
labels: {
|
||||
|
||||
@@ -1,8 +1,3 @@
|
||||
/**
|
||||
* Voice call response generator - uses the embedded OpenClaw agent for tool support.
|
||||
* Routes voice responses through the same agent infrastructure as messaging.
|
||||
*/
|
||||
|
||||
import crypto from "node:crypto";
|
||||
import { applyModelOverrideToSessionEntry } from "openclaw/plugin-sdk/model-session-runtime";
|
||||
import {
|
||||
@@ -15,26 +10,28 @@ import type { CoreAgentDeps, CoreConfig } from "./core-bridge.js";
|
||||
import { resolveVoiceResponseModel } from "./response-model.js";
|
||||
|
||||
export type VoiceResponseParams = {
|
||||
/** Voice call config */
|
||||
/** Voice-call route config that selects agent, model, timeout, and session scope. */
|
||||
voiceConfig: VoiceCallConfig;
|
||||
/** Core OpenClaw config */
|
||||
/** Core OpenClaw config used by the embedded agent runtime and session store. */
|
||||
coreConfig: CoreConfig;
|
||||
/** Injected host agent runtime */
|
||||
/** Injected host agent runtime used to create/reuse the voice response session. */
|
||||
agentRuntime: CoreAgentDeps;
|
||||
/** Call ID for session tracking */
|
||||
/** Internal call id used for per-call session keys and run ids. */
|
||||
callId: string;
|
||||
/** Persisted call session key */
|
||||
/** Persisted call session key from the call record, when already resolved. */
|
||||
sessionKey?: string;
|
||||
/** Caller's phone number */
|
||||
/** Caller's phone number, used for phone-scoped fallback session keys and prompts. */
|
||||
from: string;
|
||||
/** Conversation transcript */
|
||||
/** Durable conversation transcript included in the system prompt as call history. */
|
||||
transcript: Array<{ speaker: "user" | "bot"; text: string }>;
|
||||
/** Latest user message */
|
||||
/** Latest caller utterance sent as the embedded-agent prompt. */
|
||||
userMessage: string;
|
||||
};
|
||||
|
||||
export type VoiceResponseResult = {
|
||||
/** Spoken text extracted from the agent payloads, or null for silence/failure. */
|
||||
text: string | null;
|
||||
/** User-safe failure summary when the embedded response could not be produced. */
|
||||
error?: string;
|
||||
};
|
||||
|
||||
@@ -81,6 +78,7 @@ function normalizeSpokenText(value: string): string | null {
|
||||
return normalized.length > 0 ? normalized : null;
|
||||
}
|
||||
|
||||
/** Recovers the required spoken JSON object even when the model wraps it in fences or prose. */
|
||||
function tryParseSpokenJson(text: string): string | null {
|
||||
const candidates: string[] = [];
|
||||
const trimmed = text.trim();
|
||||
@@ -97,6 +95,8 @@ function tryParseSpokenJson(text: string): string | null {
|
||||
const firstBrace = trimmed.indexOf("{");
|
||||
const lastBrace = trimmed.lastIndexOf("}");
|
||||
if (firstBrace >= 0 && lastBrace > firstBrace) {
|
||||
// Models sometimes wrap the required JSON in prose; recover the outer object
|
||||
// before falling back to plain-text sanitization.
|
||||
candidates.push(trimmed.slice(firstBrace, lastBrace + 1));
|
||||
}
|
||||
|
||||
@@ -153,6 +153,7 @@ function isLikelyMetaReasoningParagraph(paragraph: string): boolean {
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Drops obvious planning text while preserving conversational fallback output for the caller. */
|
||||
function sanitizePlainSpokenText(text: string): string | null {
|
||||
const withoutCodeFences = text.replace(/```[\s\S]*?```/g, " ").trim();
|
||||
if (!withoutCodeFences) {
|
||||
@@ -161,6 +162,8 @@ function sanitizePlainSpokenText(text: string): string | null {
|
||||
|
||||
const paragraphs = normalizeStringEntries(withoutCodeFences.split(/\n\s*\n+/));
|
||||
|
||||
// Keep conversational plain text usable, but drop obvious planning paragraphs
|
||||
// that should never be spoken to the caller.
|
||||
while (paragraphs.length > 1 && isLikelyMetaReasoningParagraph(paragraphs[0])) {
|
||||
paragraphs.shift();
|
||||
}
|
||||
@@ -168,10 +171,13 @@ function sanitizePlainSpokenText(text: string): string | null {
|
||||
return normalizeSpokenText(paragraphs.join(" "));
|
||||
}
|
||||
|
||||
/** Extracts only caller-safe speech segments from mixed agent text, reasoning, and error payloads. */
|
||||
function extractSpokenTextFromPayloads(payloads: VoiceResponsePayload[]): string | null {
|
||||
const spokenSegments: string[] = [];
|
||||
|
||||
for (const payload of payloads) {
|
||||
// Voice payloads can interleave hidden reasoning/tool errors with user-facing
|
||||
// text; only speak explicit non-error output.
|
||||
if (payload.isError || payload.isReasoning) {
|
||||
continue;
|
||||
}
|
||||
@@ -198,17 +204,21 @@ function extractSpokenTextFromPayloads(payloads: VoiceResponsePayload[]): string
|
||||
return spokenSegments.length > 0 ? spokenSegments.join(" ").trim() : null;
|
||||
}
|
||||
|
||||
/** Scopes voice sessions into agent sandboxes so phone/call keys cannot collide across agents. */
|
||||
function resolveVoiceSandboxSessionKey(agentId: string, sessionKey: string): string {
|
||||
const trimmed = sessionKey.trim();
|
||||
if (trimmed.toLowerCase().startsWith("agent:")) {
|
||||
return trimmed;
|
||||
}
|
||||
// Embedded agents expect an agent-scoped sandbox key even when the persisted
|
||||
// voice session key is phone- or call-scoped.
|
||||
return `agent:${agentId}:${trimmed}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a voice response using the embedded OpenClaw agent with full tool support.
|
||||
* Uses the same agent infrastructure as messaging for consistent behavior.
|
||||
* Generates a spoken voice response through the embedded OpenClaw agent runtime.
|
||||
* The agent is forced through a JSON spoken-output contract, but this helper
|
||||
* also sanitizes common plain-text fallback output before returning speech.
|
||||
*/
|
||||
export async function generateVoiceResponse(
|
||||
params: VoiceResponseParams,
|
||||
@@ -238,26 +248,24 @@ export async function generateVoiceResponse(
|
||||
const agentId = voiceConfig.agentId ?? "main";
|
||||
const toolsAllow = resolveVoiceAgentToolsAllow(cfg, agentId);
|
||||
|
||||
// Resolve paths
|
||||
const storePath = agentRuntime.session.resolveStorePath(cfg.session?.store, { agentId });
|
||||
const agentDir = agentRuntime.resolveAgentDir(cfg, agentId);
|
||||
const workspaceDir = agentRuntime.resolveAgentWorkspaceDir(cfg, agentId);
|
||||
|
||||
// Ensure workspace exists
|
||||
await agentRuntime.ensureAgentWorkspace({ dir: workspaceDir });
|
||||
|
||||
// Load or create session entry
|
||||
const now = Date.now();
|
||||
const existingSessionEntry = agentRuntime.session.getSessionEntry({
|
||||
storePath,
|
||||
sessionKey: resolvedSessionKey,
|
||||
});
|
||||
|
||||
// Resolve model from config
|
||||
const { provider, model } = resolveVoiceResponseModel({ voiceConfig, agentRuntime });
|
||||
|
||||
let sessionEntry = existingSessionEntry;
|
||||
if (!sessionEntry?.sessionId || voiceConfig.responseModel) {
|
||||
// Response-model overrides are pinned on the session before the embedded
|
||||
// agent starts so inherited model/auth metadata cannot leak from old calls.
|
||||
sessionEntry =
|
||||
(await agentRuntime.session.patchSessionEntry({
|
||||
storePath,
|
||||
@@ -295,14 +303,11 @@ export async function generateVoiceResponse(
|
||||
agentId,
|
||||
});
|
||||
|
||||
// Resolve thinking level
|
||||
const thinkLevel = agentRuntime.resolveThinkingDefault({ cfg, provider, model });
|
||||
|
||||
// Resolve agent identity for personalized prompt
|
||||
const identity = agentRuntime.resolveAgentIdentity(cfg, agentId);
|
||||
const agentName = identity?.name?.trim() || "assistant";
|
||||
|
||||
// Build system prompt with conversation history
|
||||
const basePrompt =
|
||||
voiceConfig.responseSystemPrompt ??
|
||||
`You are ${agentName}, a helpful voice assistant on a phone call. Keep responses brief and conversational (1-2 sentences max). Be natural and friendly. The caller's phone number is ${from}. You have access to tools - use them when helpful.`;
|
||||
@@ -314,9 +319,10 @@ export async function generateVoiceResponse(
|
||||
.join("\n");
|
||||
extraSystemPrompt = `${basePrompt}\n\nConversation so far:\n${history}`;
|
||||
}
|
||||
// The embedded agent may stream through the normal text channel, so the system
|
||||
// prompt carries a strict JSON spoken-output contract before payload parsing.
|
||||
extraSystemPrompt = `${extraSystemPrompt}\n\n${VOICE_SPOKEN_OUTPUT_CONTRACT}`;
|
||||
|
||||
// Resolve timeout
|
||||
const timeoutMs = voiceConfig.responseTimeoutMs ?? agentRuntime.resolveAgentTimeoutMs({ cfg });
|
||||
const runId = `voice:${callId}:${Date.now()}`;
|
||||
|
||||
|
||||
@@ -1,17 +1,29 @@
|
||||
import type { VoiceCallConfig } from "./config.js";
|
||||
import type { CoreAgentDeps } from "./core-bridge.js";
|
||||
|
||||
/**
|
||||
* Resolves the provider/model pair used for non-realtime voice responses.
|
||||
*
|
||||
* `responseModel` accepts either `provider/model-id` or a legacy single-segment
|
||||
* model id. Multi-segment provider model ids split only at the first slash.
|
||||
*/
|
||||
export function resolveVoiceResponseModel(params: {
|
||||
/** Voice-call config containing the optional response model override. */
|
||||
voiceConfig: VoiceCallConfig;
|
||||
/** Runtime defaults used when config omits a model or uses a legacy bare model id. */
|
||||
agentRuntime: CoreAgentDeps;
|
||||
}): {
|
||||
/** Original model reference used for diagnostics and request metadata. */
|
||||
modelRef: string;
|
||||
/** Provider id selected from the prefix or runtime default. */
|
||||
provider: string;
|
||||
/** Provider-owned model id, which may itself contain slash-delimited path segments. */
|
||||
model: string;
|
||||
} {
|
||||
const modelRef =
|
||||
params.voiceConfig.responseModel ??
|
||||
`${params.agentRuntime.defaults.provider}/${params.agentRuntime.defaults.model}`;
|
||||
// Split only on the first slash so model ids can contain provider-owned path segments.
|
||||
const slashIndex = modelRef.indexOf("/");
|
||||
|
||||
return {
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import { createPluginRuntimeStore, type PluginRuntime } from "openclaw/plugin-sdk/runtime-store";
|
||||
|
||||
/** Runtime state capability shared by the voice-call CLI, runtime, webhook, and manager. */
|
||||
export type VoiceCallStateRuntime = Pick<PluginRuntime, "state">;
|
||||
|
||||
// The store is optional for tests and degraded CLI paths, but initialized runtime paths
|
||||
// use it as the canonical persisted-state bridge for call records.
|
||||
const {
|
||||
setRuntime: setVoiceCallStateRuntime,
|
||||
clearRuntime: clearVoiceCallStateRuntime,
|
||||
|
||||
@@ -37,12 +37,19 @@ import type { ToolHandlerContext } from "./webhook/realtime-handler.js";
|
||||
import { cleanupTailscaleExposure, setupTailscaleExposure } from "./webhook/tailscale.js";
|
||||
|
||||
export type VoiceCallRuntime = {
|
||||
/** Normalized voice-call config used for provider, webhook, and manager setup. */
|
||||
config: VoiceCallConfig;
|
||||
/** Provider implementation selected from the normalized config. */
|
||||
provider: VoiceCallProvider;
|
||||
/** Call manager owning active calls, persistence, and provider event handling. */
|
||||
manager: CallManager;
|
||||
/** HTTP/websocket webhook server bound for provider callbacks. */
|
||||
webhookServer: VoiceCallWebhookServer;
|
||||
/** Provider-facing webhook URL after public URL, tunnel, or local fallback resolution. */
|
||||
webhookUrl: string;
|
||||
/** Externally reachable origin when a public URL/tunnel/Tailscale route is active. */
|
||||
publicUrl: string | null;
|
||||
/** Idempotent cleanup for tunnel/Tailscale exposure and the webhook server. */
|
||||
stop: () => Promise<void>;
|
||||
};
|
||||
|
||||
@@ -108,6 +115,7 @@ function loadRealtimeHandler(): Promise<RealtimeHandlerModule> {
|
||||
return realtimeHandlerPromise;
|
||||
}
|
||||
|
||||
/** Resolves the agent consult session to the same phone/call scope used by classic responses. */
|
||||
function resolveVoiceCallConsultSessionKey(call: {
|
||||
config: VoiceCallConfig;
|
||||
sessionKey?: string;
|
||||
@@ -127,6 +135,7 @@ function resolveVoiceCallConsultSessionKey(call: {
|
||||
});
|
||||
}
|
||||
|
||||
/** Converts durable call transcript plus one optional live partial into consult-agent messages. */
|
||||
function mapVoiceCallConsultTranscript(
|
||||
call: {
|
||||
transcript?: Array<{ speaker: "user" | "bot"; text: string }>;
|
||||
@@ -141,11 +150,14 @@ function mapVoiceCallConsultTranscript(
|
||||
);
|
||||
const partial = context?.partialUserTranscript?.trim();
|
||||
if (partial && transcript.at(-1)?.text !== partial) {
|
||||
// Tool calls can arrive before the final STT commit; include the latest
|
||||
// partial once without duplicating already-committed transcript text.
|
||||
transcript.push({ role: "user", text: partial });
|
||||
}
|
||||
return transcript;
|
||||
}
|
||||
|
||||
/** Owns shutdown order for provider-facing exposure and the local webhook listener. */
|
||||
function createRuntimeResourceLifecycle(params: {
|
||||
config: VoiceCallConfig;
|
||||
webhookServer: VoiceCallWebhookServer;
|
||||
@@ -174,6 +186,8 @@ function createRuntimeResourceLifecycle(params: {
|
||||
}
|
||||
stopped = true;
|
||||
const suppressErrors = opts?.suppressErrors ?? false;
|
||||
// Stop in reverse exposure order so provider-facing routes disappear
|
||||
// before the local webhook server is torn down.
|
||||
await runStep(async () => {
|
||||
if (tunnelResult) {
|
||||
await tunnelResult.stop();
|
||||
@@ -189,6 +203,7 @@ function createRuntimeResourceLifecycle(params: {
|
||||
};
|
||||
}
|
||||
|
||||
/** Instantiates the selected provider after config/env/default resolution has completed. */
|
||||
async function resolveProvider(config: VoiceCallConfig): Promise<VoiceCallProvider> {
|
||||
const allowNgrokFreeTierLoopbackBypass =
|
||||
config.tunnel?.provider === "ngrok" &&
|
||||
@@ -261,13 +276,25 @@ async function resolveRealtimeProvider(params: {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Starts the provider, webhook server, optional realtime bridge, and cleanup lifecycle.
|
||||
* The returned runtime is fully initialized: manager state restored, exposure
|
||||
* chosen, provider URLs wired, and realtime/TTS integrations attached when enabled.
|
||||
*/
|
||||
export async function createVoiceCallRuntime(params: {
|
||||
/** Raw plugin config; normalized and validated before any provider is started. */
|
||||
config: VoiceCallConfig;
|
||||
/** Narrow core config bridge used by legacy response/TTS call sites. */
|
||||
coreConfig: CoreConfig;
|
||||
/** Full host config used for provider/plugin lookup and realtime resolution. */
|
||||
fullConfig?: OpenClawConfig;
|
||||
/** Embedded agent runtime used for classic and realtime voice consults. */
|
||||
agentRuntime: CoreAgentDeps;
|
||||
/** Optional plugin state runtime installed before manager restore. */
|
||||
stateRuntime?: VoiceCallStateRuntime["state"];
|
||||
/** Optional core TTS runtime used for Twilio streaming telephony speech. */
|
||||
ttsRuntime?: TelephonyTtsRuntime;
|
||||
/** Optional logger; console methods are used when omitted. */
|
||||
logger?: Logger;
|
||||
}): Promise<VoiceCallRuntime> {
|
||||
const {
|
||||
@@ -287,6 +314,8 @@ export async function createVoiceCallRuntime(params: {
|
||||
};
|
||||
|
||||
const config = resolveVoiceCallConfig(rawConfig);
|
||||
// fullConfig carries the complete host config for provider/plugin lookups; coreConfig
|
||||
// is the narrowed voice-call bridge used by older call sites and tests.
|
||||
const cfg = fullConfig ?? (coreConfig as OpenClawConfig);
|
||||
|
||||
if (!config.enabled) {
|
||||
@@ -309,6 +338,8 @@ export async function createVoiceCallRuntime(params: {
|
||||
setVoiceCallStateRuntime({ state: stateRuntime });
|
||||
}
|
||||
const manager = new CallManager(config);
|
||||
// Resolve realtime lazily only when enabled so normal TTS/STT call flows do
|
||||
// not load provider runtimes or validate realtime credentials.
|
||||
const realtimeProvider = config.realtime.enabled
|
||||
? await resolveRealtimeProvider({
|
||||
config,
|
||||
@@ -382,6 +413,8 @@ export async function createVoiceCallRuntime(params: {
|
||||
if (fastContext.handled) {
|
||||
return fastContext.result;
|
||||
}
|
||||
// Slow consults reuse the normal embedded-agent lane, but fork from the
|
||||
// requester session when an outbound call came from another channel.
|
||||
const { provider: agentProvider, model } = resolveVoiceResponseModel({
|
||||
voiceConfig: effectiveConfig,
|
||||
agentRuntime,
|
||||
@@ -434,7 +467,6 @@ export async function createVoiceCallRuntime(params: {
|
||||
// keeps the port bound while the runtime promise rejects, causing
|
||||
// EADDRINUSE on the next attempt. See: #32387
|
||||
try {
|
||||
// Determine public URL - priority: config.publicUrl > tunnel > legacy tailscale
|
||||
let publicUrl: string | null = config.publicUrl ?? null;
|
||||
|
||||
if (!publicUrl && config.tunnel?.provider && config.tunnel.provider !== "none") {
|
||||
@@ -474,11 +506,15 @@ export async function createVoiceCallRuntime(params: {
|
||||
provider.setPublicUrl?.(publicUrl);
|
||||
}
|
||||
if (publicUrl && realtimeProvider) {
|
||||
// Realtime stream TwiML must use the same externally reachable origin as
|
||||
// provider webhooks, not the local bind URL.
|
||||
webhookServer.getRealtimeHandler()?.setPublicUrl(publicUrl);
|
||||
}
|
||||
|
||||
const realtimeHandler = webhookServer.getRealtimeHandler();
|
||||
if (realtimeHandler) {
|
||||
// Providers that attach streams during answer/initiate get one-time WS
|
||||
// tokens from the realtime handler and echo them on upgrade.
|
||||
manager.streamSessionIssuer = (request) => realtimeHandler.issueStreamSession(request);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { convertPcmToMulaw8k, resamplePcmTo8k } from "./telephony-audio.js";
|
||||
import { chunkAudio, convertPcmToMulaw8k, resamplePcmTo8k } from "./telephony-audio.js";
|
||||
|
||||
function makeSinePcm(
|
||||
sampleRate: number,
|
||||
@@ -79,3 +79,14 @@ describe("telephony-audio convertPcmToMulaw8k", () => {
|
||||
expect(unalignedMulaw.equals(mulaw)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("telephony-audio chunkAudio", () => {
|
||||
it("returns streaming frame views with a short final chunk", () => {
|
||||
const audio = Buffer.from([1, 2, 3, 4, 5]);
|
||||
const chunks = [...chunkAudio(audio, 2)];
|
||||
|
||||
expect(chunks.map((chunk) => [...chunk])).toEqual([[1, 2], [3, 4], [5]]);
|
||||
audio[0] = 9;
|
||||
expect(chunks[0]?.[0]).toBe(9);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
/** Core realtime-voice audio conversion helpers re-exported for voice-call provider code. */
|
||||
export { convertPcmToMulaw8k, resamplePcmTo8k } from "openclaw/plugin-sdk/realtime-voice";
|
||||
|
||||
/**
|
||||
* Chunk audio buffer into 20ms frames for streaming (8kHz mono mu-law).
|
||||
* Chunks 8kHz mono mu-law audio into streaming frames; the final frame may be shorter.
|
||||
*/
|
||||
export function chunkAudio(audio: Buffer, chunkSize = 160): Generator<Buffer, void, unknown> {
|
||||
return (function* () {
|
||||
for (let i = 0; i < audio.length; i += chunkSize) {
|
||||
// Yield Buffer views instead of copies so large synthesized replies stream without extra allocation.
|
||||
yield audio.subarray(i, Math.min(i + chunkSize, audio.length));
|
||||
}
|
||||
})();
|
||||
|
||||
@@ -11,27 +11,47 @@ import { deepMergeDefined } from "./deep-merge.js";
|
||||
import { convertPcmToMulaw8k } from "./telephony-audio.js";
|
||||
|
||||
export type TelephonyTtsRuntime = {
|
||||
/**
|
||||
* Synthesize PCM audio through the core TTS runtime before telephony conversion.
|
||||
* Voice-call passes merged global/route config and any allowed directive overrides.
|
||||
*/
|
||||
textToSpeechTelephony: (params: {
|
||||
/** Caller-facing speech text after voice-call strips directive control markup. */
|
||||
text: string;
|
||||
/** Core config after route-specific voice-call TTS overrides are merged in. */
|
||||
cfg: CoreConfig;
|
||||
/** Optional preference store path forwarded to core TTS runtimes that support it. */
|
||||
prefsPath?: string;
|
||||
/** Directive-controlled provider/model/voice overrides accepted by policy. */
|
||||
overrides?: TtsDirectiveOverrides;
|
||||
}) => Promise<{
|
||||
success: boolean;
|
||||
/** PCM audio returned by the selected TTS provider before 8 kHz mu-law conversion. */
|
||||
audioBuffer?: Buffer;
|
||||
/** Sample rate for the returned PCM buffer. Required on success. */
|
||||
sampleRate?: number;
|
||||
/** Provider that produced audio, used for fallback diagnostics. */
|
||||
provider?: string;
|
||||
/** Original provider when the TTS runtime failed over to another provider. */
|
||||
fallbackFrom?: string;
|
||||
/** Ordered provider attempts, when the runtime exposes a fallback chain. */
|
||||
attemptedProviders?: string[];
|
||||
/** Human-readable failure reason when synthesis did not produce audio. */
|
||||
error?: string;
|
||||
}>;
|
||||
};
|
||||
|
||||
export type TelephonyTtsProvider = {
|
||||
/** Maximum time the call flow should wait for speech synthesis before falling back. */
|
||||
synthesisTimeoutMs: number;
|
||||
/**
|
||||
* Convert response text into 8 kHz mu-law audio that telephony providers can stream.
|
||||
* Throws when core TTS fails or omits the PCM buffer/sample rate required for conversion.
|
||||
*/
|
||||
synthesizeForTelephony: (text: string) => Promise<Buffer>;
|
||||
};
|
||||
|
||||
/** Default wait budget for per-call telephony TTS synthesis before text fallback. */
|
||||
export const TELEPHONY_DEFAULT_TTS_TIMEOUT_MS = 8000;
|
||||
|
||||
type TelephonyModelOverrideConfig = {
|
||||
@@ -45,10 +65,18 @@ type TelephonyModelOverrideConfig = {
|
||||
allowSeed?: boolean;
|
||||
};
|
||||
|
||||
/**
|
||||
* Builds the telephony TTS adapter that applies global/route voice-call overrides,
|
||||
* directive policy, fallback logging, and final PCM-to-mu-law conversion.
|
||||
*/
|
||||
export function createTelephonyTtsProvider(params: {
|
||||
/** Base core config supplied by the plugin host. */
|
||||
coreConfig: CoreConfig;
|
||||
/** Route-specific TTS config layered over `coreConfig.messages.tts`. */
|
||||
ttsOverride?: VoiceCallTtsConfig;
|
||||
/** Core TTS runtime bridge used before telephony audio conversion. */
|
||||
runtime: TelephonyTtsRuntime;
|
||||
/** Optional warning sink for ignored directives and provider fallbacks. */
|
||||
logger?: {
|
||||
warn?: (message: string) => void;
|
||||
};
|
||||
@@ -69,6 +97,8 @@ export function createTelephonyTtsProvider(params: {
|
||||
return {
|
||||
synthesisTimeoutMs,
|
||||
synthesizeForTelephony: async (text: string) => {
|
||||
// Directive tags can hide caller-facing text or override speaker/model settings.
|
||||
// Parse them before sending text to TTS so callers never hear control syntax.
|
||||
const directives = parseTtsDirectives(text, modelOverrides, {
|
||||
cfg: mergedConfig,
|
||||
providerConfigs,
|
||||
@@ -140,6 +170,8 @@ function mergeTtsConfig(
|
||||
if (!base) {
|
||||
return override;
|
||||
}
|
||||
// Number routes layer TTS settings over global voice-call TTS; deepMergeDefined
|
||||
// preserves existing nested provider fields while blocking prototype pollution.
|
||||
return deepMergeDefined(base, override) as VoiceCallTtsConfig;
|
||||
}
|
||||
|
||||
@@ -148,6 +180,8 @@ function resolveTelephonyModelOverridePolicy(
|
||||
): SpeechModelOverridePolicy {
|
||||
const enabled = overrides?.enabled ?? true;
|
||||
if (!enabled) {
|
||||
// Disabled means no directive-controlled TTS surface at all, not just
|
||||
// falling back to per-field defaults.
|
||||
return {
|
||||
enabled: false,
|
||||
allowText: false,
|
||||
@@ -163,6 +197,8 @@ function resolveTelephonyModelOverridePolicy(
|
||||
return {
|
||||
enabled: true,
|
||||
allowText: allow(overrides?.allowText),
|
||||
// Provider swaps can cross billing/latency/security boundaries, so they
|
||||
// remain opt-in even when other directive overrides are enabled by default.
|
||||
allowProvider: allow(overrides?.allowProvider, false),
|
||||
allowVoice: allow(overrides?.allowVoice),
|
||||
allowModelId: allow(overrides?.allowModelId),
|
||||
@@ -208,6 +244,8 @@ function collectTelephonyProviderConfigs(
|
||||
const normalized = normalizeProviderId(providerId) ?? providerId;
|
||||
entries[normalized] = asProviderConfig(value);
|
||||
}
|
||||
// Older configs also allow provider blocks directly under messages.tts; keep those
|
||||
// readable for directive overrides without treating scalar TTS settings as providers.
|
||||
const reservedKeys = new Set([
|
||||
"auto",
|
||||
"enabled",
|
||||
@@ -231,6 +269,8 @@ function collectTelephonyProviderConfigs(
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
// Keep legacy direct provider blocks available to directive parsing, but do
|
||||
// not let scalar global TTS fields masquerade as provider configs.
|
||||
const normalized = normalizeProviderId(key) ?? key;
|
||||
entries[normalized] ??= asProviderConfig(value);
|
||||
}
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
import type { VoiceCallConfig } from "./config.js";
|
||||
import { DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS } from "./realtime-defaults.js";
|
||||
|
||||
/** Build a complete valid voice-call config baseline for focused tests. */
|
||||
export function createVoiceCallBaseConfig(params?: {
|
||||
/** Provider id to set on the config; defaults to mock. */
|
||||
provider?: "telnyx" | "twilio" | "plivo" | "mock";
|
||||
/** Tunnel provider to set on the config; defaults to none. */
|
||||
tunnelProvider?: "none" | "ngrok";
|
||||
}): VoiceCallConfig {
|
||||
return {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { normalizeOptionalString } from "openclaw/plugin-sdk/string-coerce-runtime";
|
||||
import type { VoiceCallTtsConfig } from "./config.js";
|
||||
|
||||
/** Reads the active provider's preferred speaker from modern and legacy TTS config keys. */
|
||||
function resolveProviderVoiceSetting(providerConfig: unknown): string | undefined {
|
||||
if (!providerConfig || typeof providerConfig !== "object") {
|
||||
return undefined;
|
||||
@@ -11,6 +12,8 @@ function resolveProviderVoiceSetting(providerConfig: unknown): string | undefine
|
||||
voice?: unknown;
|
||||
voiceId?: unknown;
|
||||
};
|
||||
// Prefer the voice-call-specific keys, then fall back to legacy provider TTS keys
|
||||
// so existing per-provider configs keep selecting the same speaker.
|
||||
return (
|
||||
normalizeOptionalString(candidate.speakerVoice) ??
|
||||
normalizeOptionalString(candidate.speakerVoiceId) ??
|
||||
@@ -19,10 +22,21 @@ function resolveProviderVoiceSetting(providerConfig: unknown): string | undefine
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves the active telephony TTS provider's speaker hint for call metadata.
|
||||
*
|
||||
* Only the selected provider block is inspected so fallback-provider config does
|
||||
* not leak into the call's advertised/default voice.
|
||||
*
|
||||
* Legacy `voice`/`voiceId` keys stay readable because provider configs predate
|
||||
* the voice-call-specific `speakerVoice` fields.
|
||||
*/
|
||||
export function resolvePreferredTtsVoice(config: { tts?: VoiceCallTtsConfig }): string | undefined {
|
||||
const providerId = config.tts?.provider;
|
||||
if (!providerId) {
|
||||
return undefined;
|
||||
}
|
||||
// Only inspect the active provider block. Other provider voice settings may
|
||||
// be configured for fallback chains, but they should not bias call metadata.
|
||||
return resolveProviderVoiceSetting(config.tts?.providers?.[providerId]);
|
||||
}
|
||||
|
||||
@@ -8,59 +8,45 @@ import { getTailscaleDnsName } from "./webhook/tailscale.js";
|
||||
|
||||
const NGROK_LOG_BUFFER_MAX_CHARS = 16_384;
|
||||
|
||||
/**
|
||||
* Tunnel configuration for exposing the webhook server.
|
||||
*/
|
||||
interface TunnelConfig {
|
||||
/** Tunnel provider: ngrok, tailscale-serve, or tailscale-funnel */
|
||||
/** Tunnel provider: ngrok, tailscale-serve, tailscale-funnel, or none. */
|
||||
provider: "ngrok" | "tailscale-serve" | "tailscale-funnel" | "none";
|
||||
/** Local port to tunnel */
|
||||
/** Local webhook server port to expose. */
|
||||
port: number;
|
||||
/** Path prefix for the tunnel (e.g., /voice/webhook) */
|
||||
/** Webhook path appended to the provider-visible public origin. */
|
||||
path: string;
|
||||
/** ngrok auth token (optional, enables longer sessions) */
|
||||
/** Optional ngrok auth token, applied before the tunnel starts. */
|
||||
ngrokAuthToken?: string;
|
||||
/** ngrok custom domain (paid feature) */
|
||||
/** Optional ngrok custom domain. */
|
||||
ngrokDomain?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of starting a tunnel.
|
||||
*/
|
||||
export interface TunnelResult {
|
||||
/** The public URL */
|
||||
/** Provider-visible webhook URL, including the configured webhook path. */
|
||||
publicUrl: string;
|
||||
/** Function to stop the tunnel */
|
||||
/** Idempotent cleanup hook for the spawned tunnel route/process. */
|
||||
stop: () => Promise<void>;
|
||||
/** Tunnel provider name */
|
||||
/** Tunnel provider that produced this public URL. */
|
||||
provider: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start an ngrok tunnel to expose the local webhook server.
|
||||
*
|
||||
* Uses the ngrok CLI which must be installed: https://ngrok.com/download
|
||||
*
|
||||
* @example
|
||||
* const tunnel = await startNgrokTunnel({ port: 3334, path: '/voice/webhook' });
|
||||
* console.log('Public URL:', tunnel.publicUrl);
|
||||
* // Later: await tunnel.stop();
|
||||
*/
|
||||
/** Starts an ngrok CLI tunnel and returns the provider-visible webhook URL. */
|
||||
export async function startNgrokTunnel(config: {
|
||||
/** Local webhook server port to expose through ngrok. */
|
||||
port: number;
|
||||
/** Webhook path appended to the ngrok public origin. */
|
||||
path: string;
|
||||
/** Optional ngrok auth token configured before startup. */
|
||||
authToken?: string;
|
||||
/** Optional ngrok custom domain. */
|
||||
domain?: string;
|
||||
}): Promise<TunnelResult> {
|
||||
// Set auth token if provided
|
||||
if (config.authToken) {
|
||||
await runNgrokCommand(["config", "add-authtoken", config.authToken]);
|
||||
}
|
||||
|
||||
// Build ngrok command args
|
||||
const args = ["http", String(config.port), "--log", "stdout", "--log-format", "json"];
|
||||
|
||||
// Add custom domain if provided (paid ngrok feature)
|
||||
if (config.domain) {
|
||||
args.push("--domain", config.domain);
|
||||
}
|
||||
@@ -86,12 +72,12 @@ export async function startNgrokTunnel(config: {
|
||||
try {
|
||||
const log = JSON.parse(line);
|
||||
|
||||
// ngrok logs the public URL in a 'started tunnel' message
|
||||
// The JSON log stream is the stable readiness signal; stdout prose can
|
||||
// vary across ngrok versions and should not drive URL discovery.
|
||||
if (log.msg === "started tunnel" && log.url) {
|
||||
publicUrl = log.url;
|
||||
}
|
||||
|
||||
// Also check for the URL field directly
|
||||
if (log.addr && log.url && !publicUrl) {
|
||||
publicUrl = log.url;
|
||||
}
|
||||
@@ -101,7 +87,7 @@ export async function startNgrokTunnel(config: {
|
||||
resolved = true;
|
||||
clearTimeout(timeout);
|
||||
|
||||
// Add path to the public URL
|
||||
// Providers call the webhook path, not the bare ngrok origin.
|
||||
const fullUrl = publicUrl + config.path;
|
||||
|
||||
console.log(`[voice-call] ngrok tunnel active: ${fullUrl}`);
|
||||
@@ -119,7 +105,7 @@ export async function startNgrokTunnel(config: {
|
||||
});
|
||||
}
|
||||
} catch {
|
||||
// Not JSON, might be startup message
|
||||
// Ignore non-JSON startup text; stderr handles actionable CLI errors.
|
||||
}
|
||||
};
|
||||
|
||||
@@ -139,7 +125,6 @@ export async function startNgrokTunnel(config: {
|
||||
|
||||
proc.stderr.on("data", (data: Buffer) => {
|
||||
const msg = data.toString();
|
||||
// Check for common errors
|
||||
if (msg.includes("ERR_NGROK")) {
|
||||
if (!resolved) {
|
||||
resolved = true;
|
||||
@@ -168,9 +153,6 @@ export async function startNgrokTunnel(config: {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Run an ngrok command and wait for completion.
|
||||
*/
|
||||
async function runNgrokCommand(args: string[]): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const proc = spawn("ngrok", args, {
|
||||
@@ -200,9 +182,7 @@ async function runNgrokCommand(args: string[]): Promise<string> {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if ngrok is installed and available.
|
||||
*/
|
||||
/** Checks whether the ngrok CLI is installed without surfacing spawn failures to callers. */
|
||||
export async function isNgrokAvailable(): Promise<boolean> {
|
||||
return new Promise((resolve) => {
|
||||
const proc = spawn("ngrok", ["version"], {
|
||||
@@ -219,15 +199,15 @@ export async function isNgrokAvailable(): Promise<boolean> {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a Tailscale serve/funnel tunnel.
|
||||
*/
|
||||
/** Starts one Tailscale serve/funnel route for the configured webhook path. */
|
||||
export async function startTailscaleTunnel(config: {
|
||||
/** Tailscale exposure mode; funnel is internet-public, serve is tailnet-scoped. */
|
||||
mode: "serve" | "funnel";
|
||||
/** Local webhook server port to expose. */
|
||||
port: number;
|
||||
/** Webhook path to expose on the tailnet DNS name. */
|
||||
path: string;
|
||||
}): Promise<TunnelResult> {
|
||||
// Get Tailscale DNS name
|
||||
const dnsName = await getTailscaleDnsName();
|
||||
if (!dnsName) {
|
||||
throw new Error("Could not get Tailscale DNS name. Is Tailscale running?");
|
||||
@@ -282,9 +262,6 @@ export async function startTailscaleTunnel(config: {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop a Tailscale serve/funnel tunnel.
|
||||
*/
|
||||
async function stopTailscaleTunnel(mode: "serve" | "funnel", path: string): Promise<void> {
|
||||
return new Promise((resolve) => {
|
||||
const proc = spawn("tailscale", [mode, "off", path], {
|
||||
@@ -303,9 +280,7 @@ async function stopTailscaleTunnel(mode: "serve" | "funnel", path: string): Prom
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a tunnel based on configuration.
|
||||
*/
|
||||
/** Dispatches the configured webhook exposure provider, returning null for disabled tunnels. */
|
||||
export async function startTunnel(config: TunnelConfig): Promise<TunnelResult | null> {
|
||||
switch (config.provider) {
|
||||
case "ngrok":
|
||||
|
||||
@@ -1,27 +1,16 @@
|
||||
import { z } from "zod";
|
||||
import type { CallMode } from "./config.js";
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Provider Identifiers
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
const ProviderNameSchema = z.enum(["telnyx", "twilio", "plivo", "mock"]);
|
||||
/** Carrier/provider ids implemented by the voice-call plugin. */
|
||||
export type ProviderName = z.infer<typeof ProviderNameSchema>;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Core Call Identifiers
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
/** Internal call identifier (UUID) */
|
||||
/** Internal call identifier generated by OpenClaw and used for manager/session state. */
|
||||
export type CallId = string;
|
||||
|
||||
/** Provider-specific call identifier */
|
||||
/** Provider-specific call identifier used by carrier webhooks and control APIs. */
|
||||
type ProviderCallId = string;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Call Lifecycle States
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
const CallStateSchema = z.enum([
|
||||
// Non-terminal states
|
||||
"initiated",
|
||||
@@ -41,6 +30,7 @@ const CallStateSchema = z.enum([
|
||||
"busy",
|
||||
"voicemail",
|
||||
]);
|
||||
/** Normalized call lifecycle state used by manager persistence and provider events. */
|
||||
export type CallState = z.infer<typeof CallStateSchema>;
|
||||
|
||||
export const TerminalStates = new Set<CallState>([
|
||||
@@ -66,12 +56,9 @@ const EndReasonSchema = z.enum([
|
||||
"busy",
|
||||
"voicemail",
|
||||
]);
|
||||
/** Terminal call reason persisted after a provider or local hangup event. */
|
||||
export type EndReason = z.infer<typeof EndReasonSchema>;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Normalized Call Events
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
const BaseEventSchema = z.object({
|
||||
id: z.string(),
|
||||
// Stable provider-derived key for idempotency/replay dedupe.
|
||||
@@ -128,49 +115,56 @@ const NormalizedEventSchema = z.discriminatedUnion("type", [
|
||||
retryable: z.boolean().optional(),
|
||||
}),
|
||||
]);
|
||||
/** Provider webhook events normalized before manager state transitions run. */
|
||||
export type NormalizedEvent = z.infer<typeof NormalizedEventSchema>;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Call Direction
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
const CallDirectionSchema = z.enum(["outbound", "inbound"]);
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Call Record
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
const TranscriptEntrySchema = z.object({
|
||||
timestamp: z.number(),
|
||||
speaker: z.enum(["bot", "user"]),
|
||||
text: z.string(),
|
||||
isFinal: z.boolean().default(true),
|
||||
});
|
||||
/** Transcript row stored on a call record after speech or bot output events. */
|
||||
export type TranscriptEntry = z.infer<typeof TranscriptEntrySchema>;
|
||||
|
||||
export const CallRecordSchema = z.object({
|
||||
/** Internal call id that keys active manager state and persisted snapshots. */
|
||||
callId: z.string(),
|
||||
/** Provider call id once the carrier has accepted or reported the call. */
|
||||
providerCallId: z.string().optional(),
|
||||
/** Provider that owns this call record. */
|
||||
provider: ProviderNameSchema,
|
||||
/** Whether OpenClaw placed the call or received it from the provider webhook. */
|
||||
direction: CallDirectionSchema,
|
||||
/** Current normalized lifecycle state. */
|
||||
state: CallStateSchema,
|
||||
/** Caller/source phone number. */
|
||||
from: z.string(),
|
||||
/** Destination/OpenClaw phone number. */
|
||||
to: z.string(),
|
||||
/** Persisted agent session key for classic and realtime voice turns. */
|
||||
sessionKey: z.string().optional(),
|
||||
/** Local or provider event timestamp when the call record was created. */
|
||||
startedAt: z.number(),
|
||||
/** Provider event timestamp for answer, when known. */
|
||||
answeredAt: z.number().optional(),
|
||||
/** Provider/local timestamp for terminal state, when known. */
|
||||
endedAt: z.number().optional(),
|
||||
/** Terminal reason after finalization. */
|
||||
endReason: EndReasonSchema.optional(),
|
||||
/** User/bot transcript entries retained for response generation and restore. */
|
||||
transcript: z.array(TranscriptEntrySchema).default([]),
|
||||
/** Provider event replay keys already applied to this call. */
|
||||
processedEventIds: z.array(z.string()).default([]),
|
||||
/** Route, mode, latency, and provider-specific metadata kept with the call. */
|
||||
metadata: z.record(z.string(), z.unknown()).optional(),
|
||||
});
|
||||
/** Persisted call state shared by the manager, webhook, and restore flow. */
|
||||
export type CallRecord = z.infer<typeof CallRecordSchema>;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Webhook Types
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
/** Result of authenticating a provider webhook before event parsing. */
|
||||
export type WebhookVerificationResult = {
|
||||
ok: boolean;
|
||||
reason?: string;
|
||||
@@ -185,6 +179,7 @@ export type WebhookParseOptions = {
|
||||
verifiedRequestKey?: string;
|
||||
};
|
||||
|
||||
/** Raw HTTP webhook request material passed to provider adapters. */
|
||||
export type WebhookContext = {
|
||||
headers: Record<string, string | string[] | undefined>;
|
||||
rawBody: string;
|
||||
@@ -194,17 +189,19 @@ export type WebhookContext = {
|
||||
remoteAddress?: string;
|
||||
};
|
||||
|
||||
/** Provider adapter output after converting one webhook request into normalized events. */
|
||||
export type ProviderWebhookParseResult = {
|
||||
/** Normalized provider events to apply in manager order. */
|
||||
events: NormalizedEvent[];
|
||||
/** Optional immediate response body, such as TwiML/XML expected by the carrier. */
|
||||
providerResponseBody?: string;
|
||||
/** Headers for the immediate carrier response. */
|
||||
providerResponseHeaders?: Record<string, string>;
|
||||
/** HTTP status for the immediate carrier response; defaults at the webhook layer. */
|
||||
statusCode?: number;
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Provider Method Types
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
/** Request shape for starting an outbound provider call. */
|
||||
export type InitiateCallInput = {
|
||||
callId: CallId;
|
||||
from: string;
|
||||
@@ -227,16 +224,20 @@ export type InitiateCallInput = {
|
||||
};
|
||||
|
||||
export type InitiateCallResult = {
|
||||
/** Provider call id that subsequent webhooks/control calls will reference. */
|
||||
providerCallId: ProviderCallId;
|
||||
/** Provider acceptance state after outbound dial request creation. */
|
||||
status: "initiated" | "queued";
|
||||
};
|
||||
|
||||
/** Request shape for ending an active provider call. */
|
||||
export type HangupCallInput = {
|
||||
callId: CallId;
|
||||
providerCallId: ProviderCallId;
|
||||
reason: EndReason;
|
||||
};
|
||||
|
||||
/** Request shape for provider APIs that answer inbound calls explicitly. */
|
||||
export type AnswerCallInput = {
|
||||
callId: CallId;
|
||||
providerCallId: ProviderCallId;
|
||||
@@ -251,6 +252,7 @@ export type AnswerCallInput = {
|
||||
streamAuthToken?: string;
|
||||
};
|
||||
|
||||
/** Provider TTS request for speaking text into an active call. */
|
||||
export type PlayTtsInput = {
|
||||
callId: CallId;
|
||||
providerCallId: ProviderCallId;
|
||||
@@ -259,12 +261,14 @@ export type PlayTtsInput = {
|
||||
locale?: string;
|
||||
};
|
||||
|
||||
/** Provider DTMF request for an active call. */
|
||||
export type SendDtmfInput = {
|
||||
callId: CallId;
|
||||
providerCallId: ProviderCallId;
|
||||
digits: string;
|
||||
};
|
||||
|
||||
/** Provider STT/listening request for an active call turn. */
|
||||
export type StartListeningInput = {
|
||||
callId: CallId;
|
||||
providerCallId: ProviderCallId;
|
||||
@@ -273,19 +277,21 @@ export type StartListeningInput = {
|
||||
turnToken?: string;
|
||||
};
|
||||
|
||||
/** Provider request to stop collecting user speech. */
|
||||
export type StopListeningInput = {
|
||||
/** Internal call id for logs and manager correlation. */
|
||||
callId: CallId;
|
||||
/** Provider call id for the active call leg. */
|
||||
providerCallId: ProviderCallId;
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Call Status Verification (used on restart to verify persisted calls)
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
/** Provider lookup request used when restoring persisted calls after restart. */
|
||||
export type GetCallStatusInput = {
|
||||
/** Provider call id to reconcile after restore. */
|
||||
providerCallId: ProviderCallId;
|
||||
};
|
||||
|
||||
/** Provider status lookup result used to decide whether restored calls stay active. */
|
||||
export type GetCallStatusResult = {
|
||||
/** Provider-specific status string (e.g. "completed", "in-progress") */
|
||||
status: string;
|
||||
@@ -295,17 +301,14 @@ export type GetCallStatusResult = {
|
||||
isUnknown?: boolean;
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Outbound Call Options
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
/** User-facing outbound call options accepted by voice-call tools. */
|
||||
export type OutboundCallOptions = {
|
||||
/** Message to speak when call connects */
|
||||
/** Message to speak when call connects. */
|
||||
message?: string;
|
||||
/** Call mode (overrides config default) */
|
||||
/** Call mode override for this outbound call. */
|
||||
mode?: CallMode;
|
||||
/** DTMF digits to send after the call is connected */
|
||||
/** DTMF digits to execute before conversation webhook control resumes. */
|
||||
dtmfSequence?: string;
|
||||
/** Session that initiated the call, used for agent context/delegated message routing */
|
||||
/** Session that initiated the call, used for forked realtime consult context. */
|
||||
requesterSessionKey?: string;
|
||||
};
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
|
||||
/** Resolve a user-provided path, expanding leading `~` while preserving blank input. */
|
||||
export function resolveUserPath(input: string): string {
|
||||
const trimmed = input.trim();
|
||||
if (!trimmed) {
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/string-coerce-runtime";
|
||||
|
||||
/**
|
||||
* Escape XML special characters for TwiML and other XML responses.
|
||||
*/
|
||||
/** Escapes user/model text before embedding it in TwiML or provider XML responses. */
|
||||
export function escapeXml(text: string): string {
|
||||
return text
|
||||
.replace(/&/g, "&")
|
||||
@@ -12,9 +10,7 @@ export function escapeXml(text: string): string {
|
||||
.replace(/'/g, "'");
|
||||
}
|
||||
|
||||
/**
|
||||
* Map of OpenAI voice names to similar Twilio Polly voices.
|
||||
*/
|
||||
/** OpenAI voice aliases accepted by config and translated for Twilio's Polly-backed TTS. */
|
||||
const OPENAI_TO_POLLY_MAP: Record<string, string> = {
|
||||
alloy: "Polly.Joanna", // neutral, warm
|
||||
echo: "Polly.Matthew", // male, warm
|
||||
@@ -24,42 +20,37 @@ const OPENAI_TO_POLLY_MAP: Record<string, string> = {
|
||||
shimmer: "Polly.Kimberly", // female, clear
|
||||
};
|
||||
|
||||
/**
|
||||
* Default Polly voice when no mapping is found.
|
||||
*/
|
||||
/** Stable fallback voice used when config omits a voice or names an unsupported OpenAI alias. */
|
||||
export const DEFAULT_POLLY_VOICE = "Polly.Joanna";
|
||||
|
||||
/**
|
||||
* Map OpenAI voice names to Twilio Polly equivalents.
|
||||
* Falls through if already a valid Polly/Google voice.
|
||||
* Resolves config voice names to Twilio-compatible TTS voice ids.
|
||||
*
|
||||
* @param voice - OpenAI voice name (alloy, echo, etc.) or Polly voice name
|
||||
* @returns Polly voice name suitable for Twilio TwiML
|
||||
* OpenAI aliases are case-insensitive; Polly/Google provider voice ids pass through unchanged.
|
||||
*
|
||||
* @param voice - OpenAI voice alias, Twilio Polly voice id, Google voice id, or undefined.
|
||||
* @returns TwiML voice id suitable for Twilio `<Say>`.
|
||||
*/
|
||||
export function mapVoiceToPolly(voice: string | undefined): string {
|
||||
if (!voice) {
|
||||
return DEFAULT_POLLY_VOICE;
|
||||
}
|
||||
|
||||
// Already a Polly/Google voice - pass through
|
||||
// Preserve provider-qualified voice ids exactly; TwiML voice names are provider-owned strings.
|
||||
if (voice.startsWith("Polly.") || voice.startsWith("Google.")) {
|
||||
return voice;
|
||||
}
|
||||
|
||||
// Map OpenAI voices to Polly equivalents
|
||||
// Unknown OpenAI-style names fall back instead of leaking unsupported voice ids to Twilio.
|
||||
return OPENAI_TO_POLLY_MAP[normalizeLowercaseStringOrEmpty(voice)] || DEFAULT_POLLY_VOICE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a voice name is a known OpenAI voice.
|
||||
*/
|
||||
/** Returns true only for the OpenAI aliases this plugin can translate for telephony TTS. */
|
||||
export function isOpenAiVoice(voice: string): boolean {
|
||||
return normalizeLowercaseStringOrEmpty(voice) in OPENAI_TO_POLLY_MAP;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all supported OpenAI voice names.
|
||||
*/
|
||||
/** Lists supported OpenAI aliases in config-display order. */
|
||||
export function getOpenAiVoiceNames(): string[] {
|
||||
return Object.keys(OPENAI_TO_POLLY_MAP);
|
||||
}
|
||||
|
||||
@@ -12,28 +12,40 @@ type VoiceCallWebhookExposureConfig = {
|
||||
};
|
||||
|
||||
type VoiceCallWebhookExposureStatus = {
|
||||
/** Whether the selected provider can receive webhook callbacks with current config. */
|
||||
ok: boolean;
|
||||
/** Whether some exposure mechanism was configured, even if it is invalid. */
|
||||
configured: boolean;
|
||||
/** Human-readable setup status for doctor/config diagnostics. */
|
||||
message: string;
|
||||
};
|
||||
|
||||
/** Returns true for providers that must receive externally reachable webhook callbacks. */
|
||||
export function providerRequiresPublicWebhook(providerName: string | undefined): boolean {
|
||||
return providerName === "twilio" || providerName === "telnyx" || providerName === "plivo";
|
||||
}
|
||||
|
||||
/** Checks whether a webhook hostname resolves to loopback, private, or otherwise blocked space. */
|
||||
export function isLocalOnlyWebhookHost(hostname: string): boolean {
|
||||
return isBlockedHostnameOrIp(hostname);
|
||||
}
|
||||
|
||||
/** Detects public webhook URLs that carrier providers cannot reach. */
|
||||
export function isProviderUnreachableWebhookUrl(webhookUrl: string): boolean {
|
||||
try {
|
||||
const parsed = new URL(webhookUrl);
|
||||
return isLocalOnlyWebhookHost(parsed.hostname);
|
||||
} catch {
|
||||
// Let config validation report malformed URLs; this helper only classifies reachable hosts.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Summarizes whether voice-call webhook exposure is configured for the selected provider.
|
||||
* This is a diagnostic helper: runtime startup performs the final fail-closed
|
||||
* check after public URL, tunnel, and Tailscale resolution.
|
||||
*/
|
||||
export function resolveWebhookExposureStatus(
|
||||
config: VoiceCallWebhookExposureConfig,
|
||||
): VoiceCallWebhookExposureStatus {
|
||||
|
||||
@@ -42,6 +42,8 @@ function sha256Hex(input: string): string {
|
||||
}
|
||||
|
||||
function createSkippedVerificationReplayKey(provider: string, ctx: WebhookContext): string {
|
||||
// Dev-mode skips still need deterministic replay identity so local retries do
|
||||
// not exercise a different side-effect path than signed provider retries.
|
||||
return `${provider}:skip:${sha256Hex(`${ctx.method}\n${ctx.url}\n${ctx.rawBody}`)}`;
|
||||
}
|
||||
|
||||
@@ -72,6 +74,8 @@ function markReplay(cache: ReplayCache, replayKey: string): boolean {
|
||||
return true;
|
||||
}
|
||||
|
||||
// If expiry would overflow the valid Date range, skip storing the entry
|
||||
// rather than pinning an unusable replay marker forever.
|
||||
const expiresAt = resolveExpiresAtMsFromDurationMs(REPLAY_WINDOW_MS, { nowMs: now });
|
||||
if (expiresAt !== undefined) {
|
||||
cache.seenUntil.set(replayKey, expiresAt);
|
||||
@@ -82,14 +86,7 @@ function markReplay(cache: ReplayCache, replayKey: string): boolean {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate Twilio webhook signature using HMAC-SHA1.
|
||||
*
|
||||
* Twilio signs requests by concatenating the URL with sorted POST params,
|
||||
* then computing HMAC-SHA1 with the auth token.
|
||||
*
|
||||
* @see https://www.twilio.com/docs/usage/webhooks/webhooks-security
|
||||
*/
|
||||
/** Validates Twilio's URL-plus-sorted-form HMAC signature. */
|
||||
function validateTwilioSignature(
|
||||
authToken: string,
|
||||
signature: string | undefined,
|
||||
@@ -102,13 +99,11 @@ function validateTwilioSignature(
|
||||
|
||||
const dataToSign = buildTwilioDataToSign(url, params);
|
||||
|
||||
// HMAC-SHA1 with auth token, then base64 encode
|
||||
const expectedSignature = crypto
|
||||
.createHmac("sha1", authToken)
|
||||
.update(dataToSign)
|
||||
.digest("base64");
|
||||
|
||||
// Use timing-safe comparison to prevent timing attacks
|
||||
return timingSafeEqual(signature, expectedSignature);
|
||||
}
|
||||
|
||||
@@ -130,42 +125,19 @@ function buildCanonicalTwilioParamString(params: URLSearchParams): string {
|
||||
.join("&");
|
||||
}
|
||||
|
||||
/**
|
||||
* Timing-safe string comparison to prevent timing attacks.
|
||||
*/
|
||||
function timingSafeEqual(a: string, b: string): boolean {
|
||||
return safeEqualSecret(a, b);
|
||||
}
|
||||
|
||||
/**
|
||||
* Configuration for secure URL reconstruction.
|
||||
*/
|
||||
/** Controls when signature URL reconstruction may trust proxy-supplied headers. */
|
||||
interface WebhookUrlOptions {
|
||||
/**
|
||||
* Whitelist of allowed hostnames. If provided, only these hosts will be
|
||||
* accepted from forwarding headers. This prevents host header injection attacks.
|
||||
*
|
||||
* SECURITY: You must provide this OR set trustForwardingHeaders=true to use
|
||||
* X-Forwarded-Host headers. Without either, forwarding headers are ignored.
|
||||
*/
|
||||
/** Host allowlist for forwarding headers; without this or explicit trust they are ignored. */
|
||||
allowedHosts?: string[];
|
||||
/**
|
||||
* Explicitly trust X-Forwarded-* headers without a whitelist.
|
||||
* WARNING: Only set this to true if you trust your proxy configuration
|
||||
* and understand the security implications.
|
||||
*
|
||||
* @default false
|
||||
*/
|
||||
/** Trust X-Forwarded-* without a host allowlist when the deployment owns the proxy boundary. */
|
||||
trustForwardingHeaders?: boolean;
|
||||
/**
|
||||
* List of trusted proxy IP addresses. X-Forwarded-* headers will only be
|
||||
* trusted if the request comes from one of these IPs.
|
||||
* Requires remoteIP to be set for validation.
|
||||
*/
|
||||
/** Optional source-IP allowlist required before forwarded headers affect signature URLs. */
|
||||
trustedProxyIPs?: string[];
|
||||
/**
|
||||
* The IP address of the incoming request (for proxy validation).
|
||||
*/
|
||||
/** Incoming request IP used to evaluate trustedProxyIPs. */
|
||||
remoteIP?: string;
|
||||
}
|
||||
|
||||
@@ -244,45 +216,30 @@ function normalizeAllowedHosts(allowedHosts?: string[]): Set<string> | null {
|
||||
}
|
||||
|
||||
/**
|
||||
* Reconstruct the public webhook URL from request headers.
|
||||
* Reconstructs the provider-visible webhook URL used by signature verification.
|
||||
*
|
||||
* SECURITY: This function validates host headers to prevent host header
|
||||
* injection attacks. When using forwarding headers (X-Forwarded-Host, etc.),
|
||||
* always provide allowedHosts to whitelist valid hostnames.
|
||||
*
|
||||
* When behind a reverse proxy (Tailscale, nginx, ngrok), the original URL
|
||||
* used by Twilio differs from the local request URL. We use standard
|
||||
* forwarding headers to reconstruct it.
|
||||
*
|
||||
* Priority order:
|
||||
* 1. X-Forwarded-Proto + X-Forwarded-Host (standard proxy headers)
|
||||
* 2. X-Original-Host (nginx)
|
||||
* 3. Ngrok-Forwarded-Host (ngrok specific)
|
||||
* 4. Host header (direct connection)
|
||||
* Forwarded headers affect HMAC/EdDSA inputs, so they are trusted only when the
|
||||
* deployment opts in through host allowlists, explicit trust, or proxy IP gates.
|
||||
*/
|
||||
export function reconstructWebhookUrl(ctx: WebhookContext, options?: WebhookUrlOptions): string {
|
||||
const { headers } = ctx;
|
||||
|
||||
// SECURITY: Only trust forwarding headers if explicitly configured.
|
||||
// Either allowedHosts must be set (for whitelist validation) or
|
||||
// trustForwardingHeaders must be true (explicit opt-in to trust).
|
||||
const allowedHosts = normalizeAllowedHosts(options?.allowedHosts);
|
||||
const hasAllowedHosts = allowedHosts !== null;
|
||||
const explicitlyTrusted = options?.trustForwardingHeaders === true;
|
||||
|
||||
// Also check trusted proxy IPs if configured
|
||||
const trustedProxyIPs = options?.trustedProxyIPs?.filter(Boolean) ?? [];
|
||||
const hasTrustedProxyIPs = trustedProxyIPs.length > 0;
|
||||
const remoteIP = options?.remoteIP ?? ctx.remoteAddress;
|
||||
const fromTrustedProxy =
|
||||
!hasTrustedProxyIPs || (remoteIP ? trustedProxyIPs.includes(remoteIP) : false);
|
||||
|
||||
// Only trust forwarding headers if: (has whitelist OR explicitly trusted) AND from trusted proxy
|
||||
// Forwarded hosts affect signature URLs, so require both an explicit trust mode
|
||||
// and a trusted proxy source before honoring them.
|
||||
const shouldTrustForwardingHeaders = (hasAllowedHosts || explicitlyTrusted) && fromTrustedProxy;
|
||||
|
||||
const isAllowedForwardedHost = (host: string): boolean => !allowedHosts || allowedHosts.has(host);
|
||||
|
||||
// Determine protocol - only trust X-Forwarded-Proto from trusted proxies
|
||||
let proto = "https";
|
||||
if (shouldTrustForwardingHeaders) {
|
||||
const forwardedProto = getHeader(headers, "x-forwarded-proto");
|
||||
@@ -291,11 +248,10 @@ export function reconstructWebhookUrl(ctx: WebhookContext, options?: WebhookUrlO
|
||||
}
|
||||
}
|
||||
|
||||
// Determine host - with security validation
|
||||
let host: string | null = null;
|
||||
|
||||
if (shouldTrustForwardingHeaders) {
|
||||
// Try forwarding headers in priority order
|
||||
// Priority order mirrors common proxy stacks: standard, nginx, then ngrok.
|
||||
const forwardingHeaders = ["x-forwarded-host", "x-original-host", "ngrok-forwarded-host"];
|
||||
|
||||
for (const headerName of forwardingHeaders) {
|
||||
@@ -310,7 +266,6 @@ export function reconstructWebhookUrl(ctx: WebhookContext, options?: WebhookUrlO
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to Host header if no valid forwarding header found
|
||||
if (!host) {
|
||||
const hostHeader = getHeader(headers, "host");
|
||||
if (hostHeader) {
|
||||
@@ -321,7 +276,6 @@ export function reconstructWebhookUrl(ctx: WebhookContext, options?: WebhookUrlO
|
||||
}
|
||||
}
|
||||
|
||||
// Last resort: try to extract from ctx.url
|
||||
if (!host) {
|
||||
try {
|
||||
const parsed = new URL(ctx.url);
|
||||
@@ -330,7 +284,6 @@ export function reconstructWebhookUrl(ctx: WebhookContext, options?: WebhookUrlO
|
||||
host = extracted;
|
||||
}
|
||||
} catch {
|
||||
// URL parsing failed - use empty string (will result in invalid URL)
|
||||
host = "";
|
||||
}
|
||||
}
|
||||
@@ -339,14 +292,11 @@ export function reconstructWebhookUrl(ctx: WebhookContext, options?: WebhookUrlO
|
||||
host = "";
|
||||
}
|
||||
|
||||
// Extract path from the context URL (fallback to "/" on parse failure)
|
||||
let path = "/";
|
||||
try {
|
||||
const parsed = new URL(ctx.url);
|
||||
path = parsed.pathname + parsed.search;
|
||||
} catch {
|
||||
// URL parsing failed
|
||||
}
|
||||
} catch {}
|
||||
|
||||
return `${proto}://${host}${path}`;
|
||||
}
|
||||
@@ -406,15 +356,12 @@ function extractPortFromHostHeader(hostHeader?: string): string | undefined {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of Twilio webhook verification with detailed info.
|
||||
*/
|
||||
interface TwilioVerificationResult {
|
||||
ok: boolean;
|
||||
reason?: string;
|
||||
/** The URL that was used for verification (for debugging) */
|
||||
/** Provider-visible URL that matched the signature, useful for diagnosing proxy config. */
|
||||
verificationUrl?: string;
|
||||
/** Whether we're running behind ngrok free tier */
|
||||
/** Whether the failed URL looked like an ngrok free-tier callback. */
|
||||
isNgrokFreeTier?: boolean;
|
||||
/** Request is cryptographically valid but was already processed recently. */
|
||||
isReplay?: boolean;
|
||||
@@ -437,6 +384,8 @@ function createTwilioReplayKey(params: {
|
||||
requestParams: URLSearchParams;
|
||||
}): string {
|
||||
const canonicalParams = buildCanonicalTwilioParamString(params.requestParams);
|
||||
// Twilio's idempotency header is not signed. Bind replay identity to the URL,
|
||||
// sorted signed params, and signature material that passed verification.
|
||||
return `twilio:req:${sha256Hex(
|
||||
`${params.verificationUrl}\n${canonicalParams}\n${params.signature}`,
|
||||
)}`;
|
||||
@@ -480,11 +429,11 @@ function importEd25519PublicKey(publicKey: string): crypto.KeyObject | string {
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify Telnyx webhook signature using Ed25519.
|
||||
* Verifies Telnyx webhook signatures using Ed25519 and signed timestamp/body material.
|
||||
*
|
||||
* Telnyx signs `timestamp|payload` and provides:
|
||||
* - `telnyx-signature-ed25519` (Base64 signature)
|
||||
* - `telnyx-timestamp` (Unix seconds)
|
||||
* Successful verification returns a stable request key for replay detection;
|
||||
* development skip mode also emits a deterministic key so local retries follow
|
||||
* the same dedupe path as signed callbacks.
|
||||
*/
|
||||
export function verifyTelnyxWebhook(
|
||||
ctx: WebhookContext,
|
||||
@@ -554,13 +503,16 @@ export function verifyTelnyxWebhook(
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify Twilio webhook with full context and detailed result.
|
||||
* Verifies Twilio callbacks, including proxy URL reconstruction and replay identity.
|
||||
*
|
||||
* The replay key is derived from signed URL/body/signature material, not
|
||||
* unsigned idempotency headers, so duplicate detection tracks verified input.
|
||||
*/
|
||||
export function verifyTwilioWebhook(
|
||||
ctx: WebhookContext,
|
||||
authToken: string,
|
||||
options?: {
|
||||
/** Override the public URL (e.g., from config) */
|
||||
/** Canonical external origin used when Twilio signs a URL different from the local request. */
|
||||
publicUrl?: string;
|
||||
/**
|
||||
* Allow ngrok free tier compatibility mode (loopback only).
|
||||
@@ -570,31 +522,18 @@ export function verifyTwilioWebhook(
|
||||
* reconstruct the public ngrok URL that Twilio used for signing.
|
||||
*/
|
||||
allowNgrokFreeTierLoopbackBypass?: boolean;
|
||||
/** Skip verification entirely (only for development) */
|
||||
/** Development-only bypass that still emits deterministic replay keys. */
|
||||
skipVerification?: boolean;
|
||||
/**
|
||||
* Whitelist of allowed hostnames for host header validation.
|
||||
* Prevents host header injection attacks.
|
||||
*/
|
||||
/** Host allowlist for forwarding headers used during signature URL reconstruction. */
|
||||
allowedHosts?: string[];
|
||||
/**
|
||||
* Explicitly trust X-Forwarded-* headers without a whitelist.
|
||||
* WARNING: Only enable if you trust your proxy configuration.
|
||||
* @default false
|
||||
*/
|
||||
/** Trust X-Forwarded-* without a host allowlist when the deployment owns the proxy boundary. */
|
||||
trustForwardingHeaders?: boolean;
|
||||
/**
|
||||
* List of trusted proxy IP addresses. X-Forwarded-* headers will only
|
||||
* be trusted from these IPs.
|
||||
*/
|
||||
/** Optional source-IP allowlist required before forwarded headers affect signature URLs. */
|
||||
trustedProxyIPs?: string[];
|
||||
/**
|
||||
* The remote IP address of the request (for proxy validation).
|
||||
*/
|
||||
/** Incoming request IP used to evaluate trustedProxyIPs. */
|
||||
remoteIP?: string;
|
||||
},
|
||||
): TwilioVerificationResult {
|
||||
// Allow skipping verification for development/testing
|
||||
if (options?.skipVerification) {
|
||||
const replayKey = createSkippedVerificationReplayKey("twilio", ctx);
|
||||
const isReplay = markReplay(twilioReplayCache, replayKey);
|
||||
@@ -615,7 +554,6 @@ export function verifyTwilioWebhook(
|
||||
const isLoopback = isLoopbackHost(options?.remoteIP ?? ctx.remoteAddress ?? "");
|
||||
const allowLoopbackForwarding = options?.allowNgrokFreeTierLoopbackBypass && isLoopback;
|
||||
|
||||
// Reconstruct the URL Twilio used
|
||||
const verificationUrl = buildTwilioVerificationUrl(ctx, options?.publicUrl, {
|
||||
allowedHosts: options?.allowedHosts,
|
||||
trustForwardingHeaders: options?.trustForwardingHeaders || allowLoopbackForwarding,
|
||||
@@ -623,7 +561,6 @@ export function verifyTwilioWebhook(
|
||||
remoteIP: options?.remoteIP,
|
||||
});
|
||||
|
||||
// Parse the body as URL-encoded params
|
||||
const params = new URLSearchParams(ctx.rawBody);
|
||||
|
||||
const isValid = validateTwilioSignature(authToken, signature, verificationUrl, params);
|
||||
@@ -638,8 +575,8 @@ export function verifyTwilioWebhook(
|
||||
return { ok: true, verificationUrl, isReplay, verifiedRequestKey: replayKey };
|
||||
}
|
||||
|
||||
// Twilio webhook signatures can differ in whether port is included.
|
||||
// Retry a small, deterministic set of URL variants before failing closed.
|
||||
// Keep fallback URL variants deterministic and tiny. They cover the known
|
||||
// Twilio port ambiguity without trying unbounded proxy/header combinations.
|
||||
const variants = new Set<string>();
|
||||
variants.add(verificationUrl);
|
||||
variants.add(stripPortFromUrl(verificationUrl));
|
||||
@@ -677,7 +614,6 @@ export function verifyTwilioWebhook(
|
||||
return { ok: true, verificationUrl: candidateUrl, isReplay, verifiedRequestKey: replayKey };
|
||||
}
|
||||
|
||||
// Check if this is ngrok free tier - the URL might have different format
|
||||
const isNgrokFreeTier =
|
||||
verificationUrl.includes(".ngrok-free.app") || verificationUrl.includes(".ngrok.io");
|
||||
|
||||
@@ -689,18 +625,11 @@ export function verifyTwilioWebhook(
|
||||
};
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Plivo webhook verification
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Result of Plivo webhook verification with detailed info.
|
||||
*/
|
||||
interface PlivoVerificationResult {
|
||||
ok: boolean;
|
||||
reason?: string;
|
||||
verificationUrl?: string;
|
||||
/** Signature version used for verification */
|
||||
/** Signature algorithm version accepted for this request. */
|
||||
version?: "v3" | "v2";
|
||||
/** Request is cryptographically valid but was already processed recently. */
|
||||
isReplay?: boolean;
|
||||
@@ -733,6 +662,8 @@ function createPlivoV3ReplayKey(params: {
|
||||
url: params.url,
|
||||
postParams: params.postParams,
|
||||
});
|
||||
// Mirror Plivo's canonical V3 base string so reordered query/post parameters
|
||||
// resolve to the same verified request identity.
|
||||
return `plivo:v3:${sha256Hex(`${baseUrl}\n${params.nonce}`)}`;
|
||||
}
|
||||
|
||||
@@ -852,39 +783,26 @@ function validatePlivoV3Signature(params: {
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify Plivo webhooks using V3 signature if present; fall back to V2.
|
||||
* Verifies Plivo callbacks, preferring V3 signatures and falling back to V2.
|
||||
*
|
||||
* Header names (case-insensitive; Node provides lower-case keys):
|
||||
* - V3: X-Plivo-Signature-V3 / X-Plivo-Signature-V3-Nonce
|
||||
* - V2: X-Plivo-Signature-V2 / X-Plivo-Signature-V2-Nonce
|
||||
* Replay keys mirror the accepted Plivo canonical string so reordered
|
||||
* query/body parameters resolve to the same verified request identity.
|
||||
*/
|
||||
export function verifyPlivoWebhook(
|
||||
ctx: WebhookContext,
|
||||
authToken: string,
|
||||
options?: {
|
||||
/** Override the public URL origin (host) used for verification */
|
||||
/** Canonical external origin used when Plivo signs a URL different from the local request. */
|
||||
publicUrl?: string;
|
||||
/** Skip verification entirely (only for development) */
|
||||
/** Development-only bypass that still emits deterministic replay keys. */
|
||||
skipVerification?: boolean;
|
||||
/**
|
||||
* Whitelist of allowed hostnames for host header validation.
|
||||
* Prevents host header injection attacks.
|
||||
*/
|
||||
/** Host allowlist for forwarding headers used during signature URL reconstruction. */
|
||||
allowedHosts?: string[];
|
||||
/**
|
||||
* Explicitly trust X-Forwarded-* headers without a whitelist.
|
||||
* WARNING: Only enable if you trust your proxy configuration.
|
||||
* @default false
|
||||
*/
|
||||
/** Trust X-Forwarded-* without a host allowlist when the deployment owns the proxy boundary. */
|
||||
trustForwardingHeaders?: boolean;
|
||||
/**
|
||||
* List of trusted proxy IP addresses. X-Forwarded-* headers will only
|
||||
* be trusted from these IPs.
|
||||
*/
|
||||
/** Optional source-IP allowlist required before forwarded headers affect signature URLs. */
|
||||
trustedProxyIPs?: string[];
|
||||
/**
|
||||
* The remote IP address of the request (for proxy validation).
|
||||
*/
|
||||
/** Incoming request IP used to evaluate trustedProxyIPs. */
|
||||
remoteIP?: string;
|
||||
},
|
||||
): PlivoVerificationResult {
|
||||
@@ -913,6 +831,8 @@ export function verifyPlivoWebhook(
|
||||
let verificationUrl = reconstructed;
|
||||
if (options?.publicUrl) {
|
||||
try {
|
||||
// publicUrl supplies the external origin; the actual webhook request keeps
|
||||
// ownership of path and query for signature verification.
|
||||
const req = new URL(reconstructed);
|
||||
const base = new URL(options.publicUrl);
|
||||
base.pathname = req.pathname;
|
||||
|
||||
@@ -74,6 +74,7 @@ type WebhookHeaderGateResult =
|
||||
reason: string;
|
||||
};
|
||||
|
||||
/** Sanitizes and bounds STT text before logs so transcripts cannot inject control output. */
|
||||
function sanitizeTranscriptForLog(value: string): string {
|
||||
const sanitized = value
|
||||
.replace(/\p{Cc}/gu, " ")
|
||||
@@ -85,6 +86,7 @@ function sanitizeTranscriptForLog(value: string): string {
|
||||
return `${sanitized.slice(0, TRANSCRIPT_LOG_MAX_CHARS)}...`;
|
||||
}
|
||||
|
||||
/** Stores a bounded realtime talk-event trail on the call without growing metadata unboundedly. */
|
||||
function appendRecentTalkEventMetadata(call: CallRecord, event: TalkEvent): void {
|
||||
const metadata = call.metadata ?? {};
|
||||
const recent = Array.isArray(metadata.recentTalkEvents)
|
||||
@@ -129,6 +131,7 @@ function normalizeProxyIp(value: string | undefined): string | undefined {
|
||||
return normalized;
|
||||
}
|
||||
|
||||
/** Resolves the original client IP from trusted proxy headers for media-stream rate limits. */
|
||||
function resolveForwardedClientIp(
|
||||
request: http.IncomingMessage,
|
||||
trustedProxyIPs: readonly string[],
|
||||
@@ -157,6 +160,7 @@ function resolveForwardedClientIp(
|
||||
return realIp || undefined;
|
||||
}
|
||||
|
||||
/** Converts provider parse output into a complete HTTP response payload. */
|
||||
function normalizeWebhookResponse(parsed: {
|
||||
statusCode?: number;
|
||||
providerResponseHeaders?: Record<string, string>;
|
||||
@@ -202,10 +206,7 @@ function cloneWebhookResponsePayload(payload: WebhookResponsePayload): WebhookRe
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* HTTP server for receiving voice call webhooks from providers.
|
||||
* Supports WebSocket upgrades for media streams when streaming is enabled.
|
||||
*/
|
||||
/** HTTP/WebSocket ingress for voice provider callbacks, media streams, and replay-safe replies. */
|
||||
export class VoiceCallWebhookServer {
|
||||
private server: http.Server | null = null;
|
||||
private listeningUrl: string | null = null;
|
||||
@@ -220,7 +221,7 @@ export class VoiceCallWebhookServer {
|
||||
private stopStaleCallReaper: (() => void) | null = null;
|
||||
private readonly webhookInFlightLimiter = createWebhookInFlightLimiter();
|
||||
|
||||
/** Media stream handler for bidirectional audio (when streaming enabled) */
|
||||
/** Optional STT media-stream bridge used by providers that connect by WebSocket. */
|
||||
private mediaStreamHandler: MediaStreamHandler | null = null;
|
||||
/** Delayed auto-hangup timers keyed by provider call ID after stream disconnect. */
|
||||
private pendingDisconnectHangups = new Map<string, ReturnType<typeof setTimeout>>();
|
||||
@@ -252,17 +253,17 @@ export class VoiceCallWebhookServer {
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the media stream handler (for wiring to provider).
|
||||
*/
|
||||
/** Exposes the stream bridge so providers can attach carrier-specific media controls. */
|
||||
getMediaStreamHandler(): MediaStreamHandler | null {
|
||||
return this.mediaStreamHandler;
|
||||
}
|
||||
|
||||
/** Returns the realtime duplex handler when realtime voice mode is configured. */
|
||||
getRealtimeHandler(): RealtimeCallHandler | null {
|
||||
return this.realtimeHandler;
|
||||
}
|
||||
|
||||
/** Sends operator text into an active realtime voice call through the duplex handler. */
|
||||
speakRealtime(callId: string, instructions: string): { success: boolean; error?: string } {
|
||||
if (!this.realtimeHandler) {
|
||||
return { success: false, error: "Realtime voice handler is not configured" };
|
||||
@@ -270,6 +271,7 @@ export class VoiceCallWebhookServer {
|
||||
return this.realtimeHandler.speak(callId, instructions);
|
||||
}
|
||||
|
||||
/** Installs a realtime handler created outside the server startup path. */
|
||||
setRealtimeHandler(handler: RealtimeCallHandler): void {
|
||||
this.realtimeHandler = handler;
|
||||
}
|
||||
@@ -298,6 +300,8 @@ export class VoiceCallWebhookServer {
|
||||
this.config.webhookSecurity.trustForwardingHeaders && fromTrustedProxy;
|
||||
|
||||
if (shouldTrustForwardingHeaders) {
|
||||
// Media stream limits are keyed by client IP, so forwarded headers are
|
||||
// accepted only from a configured trusted proxy, never from arbitrary callers.
|
||||
const forwardedIp = resolveForwardedClientIp(request, trustedProxyIPs);
|
||||
if (forwardedIp) {
|
||||
return forwardedIp;
|
||||
@@ -327,9 +331,7 @@ export class VoiceCallWebhookServer {
|
||||
return initialMessage.length > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize media streaming with the selected realtime transcription provider.
|
||||
*/
|
||||
/** Initializes provider-selected STT media streaming and binds callbacks into call state. */
|
||||
private async initializeMediaStreaming(): Promise<void> {
|
||||
const streaming = this.config.streaming;
|
||||
const pluginConfig =
|
||||
@@ -411,12 +413,12 @@ export class VoiceCallWebhookServer {
|
||||
return;
|
||||
}
|
||||
|
||||
// Clear TTS queue on barge-in (user started speaking, interrupt current playback)
|
||||
// Caller speech interrupts queued Twilio playback unless the initial greeting is protected.
|
||||
if (this.provider.name === "twilio") {
|
||||
(this.provider as TwilioProvider).clearTtsQueue(providerCallId);
|
||||
}
|
||||
|
||||
// Create a speech event and process it through the manager
|
||||
// Media transcripts bypass provider webhooks, so synthesize the normalized event here.
|
||||
const event: NormalizedEvent = {
|
||||
id: `stream-transcript-${Date.now()}`,
|
||||
type: "call.speech",
|
||||
@@ -428,7 +430,7 @@ export class VoiceCallWebhookServer {
|
||||
};
|
||||
this.manager.processEvent(event);
|
||||
|
||||
// Auto-respond in conversation mode (inbound always, outbound if mode is conversation)
|
||||
// Notify-mode outbound calls record transcripts but do not trigger an agent reply.
|
||||
const callMode = call.metadata?.mode as string | undefined;
|
||||
const shouldRespond = call.direction === "inbound" || callMode === "conversation";
|
||||
if (shouldRespond) {
|
||||
@@ -478,6 +480,8 @@ export class VoiceCallWebhookServer {
|
||||
}
|
||||
|
||||
this.clearPendingDisconnectHangup(callId);
|
||||
// Twilio can reconnect a media stream for the same call; delay hangup
|
||||
// briefly and re-check provider stream state before ending the call.
|
||||
const timer = setTimeout(() => {
|
||||
this.pendingDisconnectHangups.delete(callId);
|
||||
const disconnectedCall = this.manager.getCallByProviderCallId(callId);
|
||||
@@ -743,6 +747,8 @@ export class VoiceCallWebhookServer {
|
||||
return { statusCode: 401, body: "Unauthorized" };
|
||||
}
|
||||
if (!verification.verifiedRequestKey) {
|
||||
// Replay protection depends on a provider-stable request identity. Treat
|
||||
// verification without a key as unauthenticated rather than best-effort.
|
||||
console.warn("[voice-call] Webhook verification succeeded without request identity key");
|
||||
return { statusCode: 401, body: "Unauthorized" };
|
||||
}
|
||||
@@ -804,6 +810,8 @@ export class VoiceCallWebhookServer {
|
||||
return await buildResponse();
|
||||
}
|
||||
|
||||
// Twilio retries initial TwiML fetches; do not cache those responses here
|
||||
// because replayed realtime requests must not mint fresh stream tokens.
|
||||
if (this.provider.name === "twilio") {
|
||||
return await buildResponse();
|
||||
}
|
||||
@@ -864,6 +872,8 @@ export class VoiceCallWebhookServer {
|
||||
this.replayResponses.delete(key);
|
||||
throw err;
|
||||
});
|
||||
// Store the in-flight promise so simultaneous duplicate provider retries
|
||||
// share one parsed response and one set of manager side effects.
|
||||
if (expiresAt !== undefined) {
|
||||
this.replayResponses.set(key, {
|
||||
expiresAt,
|
||||
@@ -876,6 +886,7 @@ export class VoiceCallWebhookServer {
|
||||
return cloneWebhookResponsePayload(await response);
|
||||
}
|
||||
|
||||
/** Rejects obviously unsigned carrier webhooks before reading attacker-controlled bodies. */
|
||||
private verifyPreAuthWebhookHeaders(headers: http.IncomingHttpHeaders): WebhookHeaderGateResult {
|
||||
if (this.config.skipSignatureVerification) {
|
||||
return { ok: true };
|
||||
@@ -935,16 +946,21 @@ export class VoiceCallWebhookServer {
|
||||
}
|
||||
|
||||
if (ctx.query?.type === "status") {
|
||||
// Status callbacks only carry lifecycle notifications; returning realtime
|
||||
// TwiML here would cause Twilio to open media streams from notification retries.
|
||||
return null;
|
||||
}
|
||||
|
||||
const callStatus = params.get("CallStatus");
|
||||
if (callStatus && isProviderStatusTerminal(callStatus)) {
|
||||
// Terminal callbacks must be parsed as events so local cleanup/finalization
|
||||
// happens instead of attempting to reconnect a dead call.
|
||||
return null;
|
||||
}
|
||||
|
||||
// Initial TwiML fetches without gathered input may enter realtime handling.
|
||||
// Replay checks run before this helper so retries cannot mint new stream tokens.
|
||||
// Gathered speech/DTMF callbacks must stay on the provider event path.
|
||||
return !params.get("SpeechResult") && !params.get("Digits") ? params : null;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
/** Normalized HTTP response produced by provider webhook parsing or replay handling. */
|
||||
export type WebhookResponsePayload = {
|
||||
/** HTTP status returned to the carrier webhook request. */
|
||||
statusCode: number;
|
||||
/** Response body; XML for telephony providers, plain text for generic failures. */
|
||||
body: string;
|
||||
/** Optional carrier-specific headers such as TwiML/XML content type. */
|
||||
headers?: Record<string, string>;
|
||||
};
|
||||
|
||||
@@ -8,6 +8,8 @@ const DEFAULT_MAX_QUEUED_AUDIO_BYTES = TELEPHONY_SAMPLE_RATE * 120;
|
||||
const PCM16_MAX_AMPLITUDE = 32768;
|
||||
const MULAW_LINEAR_SAMPLES = new Int16Array(256);
|
||||
|
||||
// Decode table is process-stable and hot-path reused by the speech detector;
|
||||
// build it once instead of recomputing mu-law expansion per audio sample.
|
||||
for (let i = 0; i < MULAW_LINEAR_SAMPLES.length; i += 1) {
|
||||
MULAW_LINEAR_SAMPLES[i] = decodeMulawSample(i);
|
||||
}
|
||||
@@ -23,14 +25,20 @@ type RealtimeAudioQueueItem =
|
||||
type: "mark";
|
||||
};
|
||||
|
||||
/** Sends one serialized provider media/control frame; false means the socket can no longer accept output. */
|
||||
export type RealtimeAudioSend = (message: string) => boolean;
|
||||
|
||||
/** Serializes provider-specific realtime media control envelopes. */
|
||||
export interface RealtimeAudioSerializer {
|
||||
/** Wraps one base64 PCMU frame in the provider's outbound media envelope. */
|
||||
media(payloadBase64: string): string;
|
||||
/** Builds the provider command that drops queued carrier-side audio. */
|
||||
clear(): string;
|
||||
/** Builds a provider mark/control frame emitted after preceding paced audio. */
|
||||
mark(name: string): string;
|
||||
}
|
||||
|
||||
/** Paces mu-law telephony output so realtime providers receive one 20ms frame at a time. */
|
||||
export class RealtimeAudioPacer {
|
||||
private queue: RealtimeAudioQueueItem[] = [];
|
||||
private timer: ReturnType<typeof setTimeout> | null = null;
|
||||
@@ -39,21 +47,28 @@ export class RealtimeAudioPacer {
|
||||
|
||||
constructor(
|
||||
private readonly params: {
|
||||
/** Maximum queued PCMU bytes before playback is abandoned and backpressure fires. */
|
||||
maxQueuedAudioBytes?: number;
|
||||
/** Called once the pacer closes because outbound audio outran the provider socket. */
|
||||
onBackpressure?: () => void;
|
||||
/** Sends serialized frames to the provider socket; returning false drops remaining playback. */
|
||||
send: RealtimeAudioSend;
|
||||
/** Provider-specific media/clear/mark serializer, usually bound to the stream id. */
|
||||
serializer: RealtimeAudioSerializer;
|
||||
},
|
||||
) {}
|
||||
|
||||
/** Queues provider-ready mu-law bytes and starts the pacing pump if needed. */
|
||||
sendAudio(muLaw: Buffer): void {
|
||||
if (this.closed || muLaw.length === 0) {
|
||||
return;
|
||||
}
|
||||
const maxQueuedAudioBytes = this.params.maxQueuedAudioBytes ?? DEFAULT_MAX_QUEUED_AUDIO_BYTES;
|
||||
for (let offset = 0; offset < muLaw.length; offset += TELEPHONY_CHUNK_BYTES) {
|
||||
// Queue an owned copy so callers can safely reuse or mutate their source buffer.
|
||||
const chunk = Buffer.from(muLaw.subarray(offset, offset + TELEPHONY_CHUNK_BYTES));
|
||||
if (this.queuedAudioBytes + chunk.length > maxQueuedAudioBytes) {
|
||||
// Once provider output falls too far behind, close instead of sending a partial response.
|
||||
this.failBackpressure();
|
||||
return;
|
||||
}
|
||||
@@ -67,6 +82,7 @@ export class RealtimeAudioPacer {
|
||||
this.ensurePump();
|
||||
}
|
||||
|
||||
/** Queues a provider mark after any earlier audio frames. */
|
||||
sendMark(name: string): void {
|
||||
if (this.closed || !name) {
|
||||
return;
|
||||
@@ -75,6 +91,7 @@ export class RealtimeAudioPacer {
|
||||
this.ensurePump();
|
||||
}
|
||||
|
||||
/** Drops unsent audio/marks and emits the provider clear command. */
|
||||
clearAudio(): number {
|
||||
if (this.closed) {
|
||||
return 0;
|
||||
@@ -87,6 +104,7 @@ export class RealtimeAudioPacer {
|
||||
return clearedAudioBytes;
|
||||
}
|
||||
|
||||
/** Stops future sends and releases queued audio. */
|
||||
close(): void {
|
||||
this.closed = true;
|
||||
this.clearTimer();
|
||||
@@ -134,6 +152,8 @@ export class RealtimeAudioPacer {
|
||||
}
|
||||
|
||||
if (!sent) {
|
||||
// Treat a failed send as terminal for queued playback. Keeping stale audio
|
||||
// queued after provider backpressure would play the wrong turn later.
|
||||
this.queue = [];
|
||||
this.queuedAudioBytes = 0;
|
||||
return;
|
||||
@@ -144,6 +164,7 @@ export class RealtimeAudioPacer {
|
||||
}
|
||||
}
|
||||
|
||||
/** Calculates normalized RMS for 8kHz mu-law frames using the same lookup table as the pacer. */
|
||||
export function calculateMulawRms(muLaw: Buffer): number {
|
||||
if (muLaw.length === 0) {
|
||||
return 0;
|
||||
@@ -156,6 +177,7 @@ export function calculateMulawRms(muLaw: Buffer): number {
|
||||
return Math.sqrt(sum / muLaw.length);
|
||||
}
|
||||
|
||||
/** Edge detector for caller speech starts, debounced across loud and quiet telephony chunks. */
|
||||
export class RealtimeMulawSpeechStartDetector {
|
||||
private loudChunks = 0;
|
||||
private quietChunks = DEFAULT_REQUIRED_QUIET_CHUNKS;
|
||||
@@ -163,12 +185,16 @@ export class RealtimeMulawSpeechStartDetector {
|
||||
|
||||
constructor(
|
||||
private readonly params: {
|
||||
/** Consecutive loud chunks required before reporting the speech-start edge. */
|
||||
requiredLoudChunks?: number;
|
||||
/** Consecutive quiet chunks required before arming the next speech-start edge. */
|
||||
requiredQuietChunks?: number;
|
||||
/** Normalized mu-law RMS threshold that separates silence/noise from speech. */
|
||||
rmsThreshold?: number;
|
||||
} = {},
|
||||
) {}
|
||||
|
||||
/** Returns true only on the transition from quiet/not-speaking to sustained speech. */
|
||||
accept(muLaw: Buffer): boolean {
|
||||
const rms = calculateMulawRms(muLaw);
|
||||
const threshold = this.params.rmsThreshold ?? DEFAULT_SPEECH_RMS_THRESHOLD;
|
||||
@@ -187,6 +213,8 @@ export class RealtimeMulawSpeechStartDetector {
|
||||
this.quietChunks += 1;
|
||||
const requiredQuietChunks = this.params.requiredQuietChunks ?? DEFAULT_REQUIRED_QUIET_CHUNKS;
|
||||
if (this.quietChunks >= requiredQuietChunks) {
|
||||
// Require sustained quiet before arming the next speech-start edge, so
|
||||
// brief pauses inside one utterance do not trigger repeated barge-ins.
|
||||
this.speaking = false;
|
||||
}
|
||||
return false;
|
||||
|
||||
@@ -39,9 +39,12 @@ import {
|
||||
TwilioStreamFrameAdapter,
|
||||
} from "./stream-frame-adapter.js";
|
||||
|
||||
/** Context passed to realtime tool handlers with caller speech captured outside provider tool args. */
|
||||
export type ToolHandlerContext = {
|
||||
/** Best current user transcript assembled from partial/final provider events. */
|
||||
partialUserTranscript?: string;
|
||||
};
|
||||
/** Tool callback invoked by the realtime voice bridge for call-scoped actions. */
|
||||
export type ToolHandlerFn = (
|
||||
args: unknown,
|
||||
callId: string,
|
||||
@@ -153,6 +156,9 @@ function appendTranscriptText(base: string | undefined, fragment: string): strin
|
||||
return next;
|
||||
}
|
||||
const overlap = findTextOverlap(currentLower, nextLower);
|
||||
// Realtime providers often emit growing partials plus tiny trailing fragments.
|
||||
// Merge only clear overlap so consult prompts keep the caller's words without
|
||||
// duplicating syllables when the provider revises a partial transcript.
|
||||
if (overlap >= 6 || (overlap >= 3 && next.length <= 12)) {
|
||||
return `${current}${next.slice(overlap)}`.trim();
|
||||
}
|
||||
@@ -220,6 +226,7 @@ type PendingStreamToken = {
|
||||
callId?: string;
|
||||
};
|
||||
|
||||
/** Metadata used to mint a one-shot provider websocket stream URL. */
|
||||
export type StreamSessionRequest = {
|
||||
providerName?: "twilio" | "telnyx";
|
||||
callId?: string;
|
||||
@@ -228,8 +235,11 @@ export type StreamSessionRequest = {
|
||||
direction?: "inbound" | "outbound";
|
||||
};
|
||||
|
||||
/** One-shot stream authorization returned to telephony providers in TwiML/API payloads. */
|
||||
export type StreamSession = {
|
||||
/** Opaque path token consumed on the first websocket upgrade attempt. */
|
||||
token: string;
|
||||
/** Public `wss://` URL carrying the stream token as the final path segment. */
|
||||
streamUrl: string;
|
||||
};
|
||||
|
||||
@@ -289,6 +299,7 @@ function appendRecentTalkEventMetadata(
|
||||
call.metadata = metadata;
|
||||
}
|
||||
|
||||
/** Bridges telephony websocket media to a realtime voice provider and call manager state. */
|
||||
export class RealtimeCallHandler {
|
||||
private readonly toolHandlers = new Map<string, ToolHandlerFn>();
|
||||
private readonly pendingStreamTokens = new Map<string, PendingStreamToken>();
|
||||
@@ -323,6 +334,7 @@ export class RealtimeCallHandler {
|
||||
private readonly coreConfig?: OpenClawConfig,
|
||||
) {}
|
||||
|
||||
/** Records the public webhook origin/path prefix used to build carrier stream URLs. */
|
||||
setPublicUrl(url: string): void {
|
||||
try {
|
||||
const parsed = new URL(url);
|
||||
@@ -337,10 +349,12 @@ export class RealtimeCallHandler {
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns the websocket path pattern, including any public path prefix before servePath. */
|
||||
getStreamPathPattern(): string {
|
||||
return `${this.publicPathPrefix}${normalizePath(this.config.streamPath ?? "/voice/stream/realtime")}`;
|
||||
}
|
||||
|
||||
/** Builds TwiML that connects Twilio to a one-shot realtime stream session. */
|
||||
buildTwiMLPayload(req: http.IncomingMessage, params?: URLSearchParams): WebhookResponsePayload {
|
||||
const rawDirection = params?.get("Direction");
|
||||
const previousOrigin = this.publicOrigin;
|
||||
@@ -370,6 +384,7 @@ export class RealtimeCallHandler {
|
||||
}
|
||||
}
|
||||
|
||||
/** Accepts a carrier websocket upgrade after consuming its one-shot stream token. */
|
||||
handleWebSocketUpgrade(request: http.IncomingMessage, socket: Duplex, head: Buffer): void {
|
||||
const url = new URL(request.url ?? "/", "wss://localhost");
|
||||
const token = url.pathname.split("/").pop() ?? null;
|
||||
@@ -474,10 +489,12 @@ export class RealtimeCallHandler {
|
||||
});
|
||||
}
|
||||
|
||||
/** Registers a realtime tool implementation scoped by name for active call bridges. */
|
||||
registerToolHandler(name: string, fn: ToolHandlerFn): void {
|
||||
this.toolHandlers.set(name, fn);
|
||||
}
|
||||
|
||||
/** Injects speech instructions into an active realtime call bridge. */
|
||||
speak(callId: string, instructions: string): RealtimeSpeakResult {
|
||||
const bridge = this.activeBridgesByCallId.get(callId);
|
||||
if (!bridge) {
|
||||
@@ -491,6 +508,7 @@ export class RealtimeCallHandler {
|
||||
}
|
||||
}
|
||||
|
||||
/** Issues the one-shot token and public stream URL embedded in provider connect payloads. */
|
||||
issueStreamSession(request: StreamSessionRequest = {}): StreamSession {
|
||||
const token = this.issueStreamToken({
|
||||
providerName: request.providerName ?? "twilio",
|
||||
@@ -500,6 +518,8 @@ export class RealtimeCallHandler {
|
||||
direction: request.direction,
|
||||
});
|
||||
const host = this.publicOrigin || DEFAULT_HOST;
|
||||
// The token is a one-shot capability embedded in the path so Twilio/Telnyx
|
||||
// WebSocket upgrades can be authorized before provider start frames arrive.
|
||||
const streamUrl = `wss://${host}${this.getStreamPathPattern()}/${token}`;
|
||||
return { token, streamUrl };
|
||||
}
|
||||
@@ -511,6 +531,8 @@ export class RealtimeCallHandler {
|
||||
if (expiry !== undefined) {
|
||||
this.pendingStreamTokens.set(token, { expiry, ...meta });
|
||||
}
|
||||
// Token issuance is also the cleanup point; media stream tokens are
|
||||
// short-lived one-shot capabilities, not a growing session registry.
|
||||
for (const [candidate, entry] of this.pendingStreamTokens) {
|
||||
if (!isFutureDateTimestampMs(entry.expiry, { nowMs: now })) {
|
||||
this.pendingStreamTokens.delete(candidate);
|
||||
@@ -525,6 +547,8 @@ export class RealtimeCallHandler {
|
||||
return null;
|
||||
}
|
||||
this.pendingStreamTokens.delete(token);
|
||||
// Consume before expiry validation so replayed or stale stream URLs cannot
|
||||
// be retried after a failed upgrade attempt.
|
||||
if (!isFutureDateTimestampMs(entry.expiry)) {
|
||||
return null;
|
||||
}
|
||||
@@ -860,6 +884,8 @@ export class RealtimeCallHandler {
|
||||
emitCallEnd(reason);
|
||||
session.close();
|
||||
};
|
||||
// Public APIs address bridges by OpenClaw call id; telephony callbacks use
|
||||
// provider ids, so both keys must point at the same live bridge/closer.
|
||||
this.activeBridgesByCallId.set(callId, session);
|
||||
this.activeBridgesByCallId.set(callSid, session);
|
||||
this.activeTelephonyClosersByCallId.set(callId, closeTelephony);
|
||||
@@ -926,6 +952,9 @@ export class RealtimeCallHandler {
|
||||
private setRecentFinalUserTranscript(callId: string, text: string): void {
|
||||
this.clearRecentFinalUserTranscript(callId);
|
||||
this.recentFinalUserTranscriptsByCallId.set(callId, text);
|
||||
// Keep final transcript context only long enough for the provider's tool
|
||||
// call to arrive after response finalization; otherwise old caller intent
|
||||
// can leak into a later turn's consult.
|
||||
const timer = setTimeout(() => {
|
||||
if (this.recentFinalUserTranscriptsByCallId.get(callId) === text) {
|
||||
this.recentFinalUserTranscriptsByCallId.delete(callId);
|
||||
@@ -999,6 +1028,9 @@ export class RealtimeCallHandler {
|
||||
if (quietFor >= CONSULT_TRANSCRIPT_SETTLE_MS || now >= deadline) {
|
||||
return;
|
||||
}
|
||||
// Wait for partial transcript churn to go quiet before building consult
|
||||
// args; the max deadline bounds tool latency when a provider keeps
|
||||
// streaming tiny deltas.
|
||||
await new Promise((resolve) => {
|
||||
setTimeout(resolve, Math.min(CONSULT_TRANSCRIPT_SETTLE_MS - quietFor, deadline - now));
|
||||
});
|
||||
@@ -1061,6 +1093,9 @@ export class RealtimeCallHandler {
|
||||
return;
|
||||
}
|
||||
coordinator.clearPending();
|
||||
// Give the realtime provider a short chance to call the native consult tool
|
||||
// first; the forced path exists only when the provider finalizes speech
|
||||
// without asking OpenClaw for an agent consult.
|
||||
const pending = coordinator.prepare(question);
|
||||
if (!pending) {
|
||||
return;
|
||||
@@ -1286,6 +1321,8 @@ export class RealtimeCallHandler {
|
||||
if (forcedMatch.kind === "none") {
|
||||
const pending = coordinator.consumePending();
|
||||
if (pending) {
|
||||
// A native provider consult arrived before the fallback delay fired;
|
||||
// cancel that pending forced consult for this utterance.
|
||||
coordinator.remove(pending);
|
||||
}
|
||||
}
|
||||
@@ -1298,6 +1335,8 @@ export class RealtimeCallHandler {
|
||||
});
|
||||
return;
|
||||
}
|
||||
// A native provider tool call takes over speech delivery from the
|
||||
// forced fallback, but shares the same in-flight agent consult result.
|
||||
forcedConsult.sendSpeechPrompt = false;
|
||||
const result = await forcedConsult.promise.catch((error: unknown) => ({
|
||||
error: formatErrorMessage(error),
|
||||
@@ -1321,6 +1360,8 @@ export class RealtimeCallHandler {
|
||||
startedAt,
|
||||
promise: Promise.resolve(),
|
||||
};
|
||||
// Share same-turn native consults so duplicate provider tool calls do not
|
||||
// fan out multiple agent runs for the same caller utterance.
|
||||
state.promise = (async () => {
|
||||
await this.waitForConsultTranscriptSettle(callId, startedAt);
|
||||
const context = {
|
||||
@@ -1353,6 +1394,8 @@ export class RealtimeCallHandler {
|
||||
);
|
||||
submitFinalToolResult(result);
|
||||
if (status === "ok") {
|
||||
// Consume only after a successful consult so failed tool calls can be
|
||||
// retried with the same caller transcript context.
|
||||
this.consumePartialUserTranscript(callId, state.partialUserTranscript);
|
||||
}
|
||||
} finally {
|
||||
|
||||
@@ -3,11 +3,20 @@ import { TerminalStates } from "../types.js";
|
||||
|
||||
const CHECK_INTERVAL_MS = 30_000;
|
||||
|
||||
/**
|
||||
* Starts a periodic cleanup loop for outbound calls that never reach answered state.
|
||||
*
|
||||
* Returns a stop function when enabled, or null when the configured threshold
|
||||
* disables stale-call cleanup.
|
||||
*/
|
||||
export function startStaleCallReaper(params: {
|
||||
/** Call manager that owns active-call enumeration and provider hangup/finalization. */
|
||||
manager: CallManager;
|
||||
/** Maximum unanswered call age in seconds; missing or non-positive disables the loop. */
|
||||
staleCallReaperSeconds?: number;
|
||||
}): (() => void) | null {
|
||||
const maxAgeSeconds = params.staleCallReaperSeconds;
|
||||
// A missing or non-positive threshold disables the reaper without installing timers.
|
||||
if (!maxAgeSeconds || maxAgeSeconds <= 0) {
|
||||
return null;
|
||||
}
|
||||
@@ -16,6 +25,8 @@ export function startStaleCallReaper(params: {
|
||||
const interval = setInterval(() => {
|
||||
const now = Date.now();
|
||||
for (const call of params.manager.getActiveCalls()) {
|
||||
// Only reap unanswered in-flight calls; answered or terminal calls are owned
|
||||
// by normal lifecycle handling even if their startedAt timestamp is old.
|
||||
if (call.answeredAt || TerminalStates.has(call.state)) {
|
||||
continue;
|
||||
}
|
||||
@@ -26,6 +37,8 @@ export function startStaleCallReaper(params: {
|
||||
`[voice-call] Reaping stale call ${call.callId} (age: ${Math.round(age / 1000)}s, state: ${call.state})`,
|
||||
);
|
||||
void params.manager.endCall(call.callId).catch((err: unknown) => {
|
||||
// Keep the interval alive if a provider hangup fails; the next tick can retry
|
||||
// while logging the provider/runtime failure for operators.
|
||||
console.warn(`[voice-call] Reaper failed to end call ${call.callId}:`, err);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1,21 +1,37 @@
|
||||
/** Normalized provider websocket frame consumed by the realtime voice handler. */
|
||||
export type StreamFrame =
|
||||
/** First accepted carrier frame that binds a stream id to the provider call id. */
|
||||
| { kind: "start"; streamId: string; providerCallId: string }
|
||||
| {
|
||||
/** Carrier media payload after basic shape/base64 validation. */
|
||||
kind: "media";
|
||||
/** Base64 encoded 8 kHz mu-law audio payload passed through to the bridge. */
|
||||
payloadBase64: string;
|
||||
/** Provider timestamp when present and parseable as an integer millisecond value. */
|
||||
timestampMs?: number;
|
||||
/** Provider track label, such as inbound/outbound, when supplied by the carrier. */
|
||||
track?: string;
|
||||
}
|
||||
/** Provider acknowledgement marker; used to know when buffered outbound audio finished. */
|
||||
| { kind: "mark"; name?: string }
|
||||
/** Provider stream-end signal. */
|
||||
| { kind: "stop" }
|
||||
/** Structured carrier-side stream failure. */
|
||||
| { kind: "error"; code?: string; title?: string; detail?: string }
|
||||
/** Malformed, unsupported, or intentionally ignored provider frame. */
|
||||
| { kind: "ignored" };
|
||||
|
||||
/** Translates provider websocket envelopes into normalized frames and outbound media controls. */
|
||||
export interface StreamFrameAdapter {
|
||||
/** Provider id for logs and handler routing. */
|
||||
readonly providerName: "twilio" | "telnyx";
|
||||
/** Parses one raw carrier websocket message without throwing on malformed provider input. */
|
||||
parseInbound(rawMessage: string): StreamFrame;
|
||||
/** Serializes outbound audio using provider-required stream identifiers. */
|
||||
serializeMedia(payloadBase64: string): string;
|
||||
/** Serializes the provider command that clears queued outbound audio. */
|
||||
serializeClear(): string;
|
||||
/** Serializes an outbound marker so playback completion can be observed later. */
|
||||
serializeMark(name: string): string;
|
||||
}
|
||||
|
||||
@@ -24,6 +40,7 @@ function parseTimestampMs(value: unknown): number | undefined {
|
||||
return value;
|
||||
}
|
||||
if (typeof value === "string" && /^[+-]?\d+$/.test(value.trim())) {
|
||||
// Providers may send timestamps as strings; reject partial tokens like "20ms".
|
||||
const parsed = Number(value.trim());
|
||||
return Number.isSafeInteger(parsed) ? parsed : undefined;
|
||||
}
|
||||
@@ -37,6 +54,8 @@ function tryParseJson(rawMessage: string): Record<string, unknown> | null {
|
||||
return parsed as Record<string, unknown>;
|
||||
}
|
||||
} catch {
|
||||
// Malformed provider frames are ignored, not fatal. The realtime handler
|
||||
// keeps the socket open so one bad carrier frame does not end the call.
|
||||
/* fall through */
|
||||
}
|
||||
return null;
|
||||
@@ -58,6 +77,8 @@ function normalizeBase64ForCompare(value: string): string {
|
||||
|
||||
function isValidBase64Payload(value: string): boolean {
|
||||
const buffer = Buffer.from(value, "base64");
|
||||
// Node's base64 decoder is permissive; round-trip before forwarding audio so
|
||||
// malformed provider payloads cannot reach the realtime bridge.
|
||||
return normalizeBase64ForCompare(buffer.toString("base64")) === normalizeBase64ForCompare(value);
|
||||
}
|
||||
|
||||
@@ -145,10 +166,12 @@ function serializeMarkFrame(name: string, streamSid?: string): string {
|
||||
});
|
||||
}
|
||||
|
||||
/** Twilio media adapter; outbound control frames reuse the streamSid learned from start. */
|
||||
export class TwilioStreamFrameAdapter implements StreamFrameAdapter {
|
||||
readonly providerName = "twilio" as const;
|
||||
private streamSid = "";
|
||||
|
||||
/** Captures Twilio's streamSid from the start frame for later outbound control frames. */
|
||||
parseInbound(rawMessage: string): StreamFrame {
|
||||
return parseProviderInboundFrame(rawMessage, (msg) => {
|
||||
const startData = readRecordField(msg, "start");
|
||||
@@ -157,6 +180,8 @@ export class TwilioStreamFrameAdapter implements StreamFrameAdapter {
|
||||
if (!streamSid || !callSid) {
|
||||
return undefined;
|
||||
}
|
||||
// Twilio requires streamSid on outbound media/mark/clear frames; capture
|
||||
// it from the accepted start frame instead of trusting later media frames.
|
||||
this.streamSid = streamSid;
|
||||
return { kind: "start", streamId: streamSid, providerCallId: callSid };
|
||||
});
|
||||
@@ -175,9 +200,11 @@ export class TwilioStreamFrameAdapter implements StreamFrameAdapter {
|
||||
}
|
||||
}
|
||||
|
||||
/** Telnyx media adapter; outbound control frames intentionally omit Twilio-style streamSid. */
|
||||
export class TelnyxStreamFrameAdapter implements StreamFrameAdapter {
|
||||
readonly providerName = "telnyx" as const;
|
||||
|
||||
/** Parses Telnyx's split stream_id/call_control_id start shape plus provider error frames. */
|
||||
parseInbound(rawMessage: string): StreamFrame {
|
||||
return parseProviderInboundFrame(
|
||||
rawMessage,
|
||||
@@ -203,6 +230,8 @@ export class TelnyxStreamFrameAdapter implements StreamFrameAdapter {
|
||||
return undefined;
|
||||
}
|
||||
const errorData = readRecordField(msg, "payload");
|
||||
// Telnyx reports stream failures as structured frames; surface them so
|
||||
// callers can log carrier failures instead of treating them as noise.
|
||||
return {
|
||||
kind: "error",
|
||||
code:
|
||||
|
||||
@@ -14,6 +14,7 @@ type TailscaleCommandStdout = {
|
||||
text: string;
|
||||
};
|
||||
|
||||
/** Appends command stdout while dropping retained text once the safety cap is exceeded. */
|
||||
export function appendTailscaleCommandStdout(
|
||||
current: TailscaleCommandStdout,
|
||||
data: Buffer | string,
|
||||
@@ -25,6 +26,7 @@ export function appendTailscaleCommandStdout(
|
||||
const buffer = Buffer.isBuffer(data) ? data : Buffer.from(data);
|
||||
const bytes = current.bytes + buffer.byteLength;
|
||||
if (bytes > maxBytes) {
|
||||
// Avoid keeping oversized command output in memory or logs after the limit trips.
|
||||
return { bytes, exceeded: true, text: "" };
|
||||
}
|
||||
return { bytes, exceeded: false, text: `${current.text}${buffer.toString("utf8")}` };
|
||||
@@ -53,6 +55,7 @@ function runTailscaleCommand(
|
||||
proc.stdout.on("data", (data) => {
|
||||
stdout = appendTailscaleCommandStdout(stdout, data);
|
||||
if (stdout.exceeded) {
|
||||
// Treat runaway tailscale output like a failed command; callers only need availability.
|
||||
proc.kill("SIGKILL");
|
||||
finish({ code: -1, stdout: "" });
|
||||
}
|
||||
@@ -73,6 +76,7 @@ function runTailscaleCommand(
|
||||
});
|
||||
}
|
||||
|
||||
/** Reads local Tailscale identity, returning null when the CLI is absent or unusable. */
|
||||
export async function getTailscaleSelfInfo(): Promise<TailscaleSelfInfo | null> {
|
||||
const { code, stdout } = await runTailscaleCommand(["status", "--json", "--peers=false"]);
|
||||
if (code !== 0) {
|
||||
@@ -82,6 +86,7 @@ export async function getTailscaleSelfInfo(): Promise<TailscaleSelfInfo | null>
|
||||
try {
|
||||
const status = JSON.parse(stdout);
|
||||
return {
|
||||
// tailscale status reports a trailing dot; route URLs need the host without it.
|
||||
dnsName: status.Self?.DNSName?.replace(/\.$/, "") || null,
|
||||
nodeId: status.Self?.ID || null,
|
||||
};
|
||||
@@ -90,11 +95,13 @@ export async function getTailscaleSelfInfo(): Promise<TailscaleSelfInfo | null>
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns the local node's MagicDNS name when Tailscale status is available. */
|
||||
export async function getTailscaleDnsName(): Promise<string | null> {
|
||||
const info = await getTailscaleSelfInfo();
|
||||
return info?.dnsName ?? null;
|
||||
}
|
||||
|
||||
/** Activates one Tailscale serve/funnel path and returns its public URL on success. */
|
||||
export async function setupTailscaleExposureRoute(opts: {
|
||||
mode: "serve" | "funnel";
|
||||
path: string;
|
||||
@@ -125,6 +132,7 @@ export async function setupTailscaleExposureRoute(opts: {
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Removes one Tailscale serve/funnel path through the same bounded CLI wrapper. */
|
||||
export async function cleanupTailscaleExposureRoute(opts: {
|
||||
mode: "serve" | "funnel";
|
||||
path: string;
|
||||
@@ -132,6 +140,7 @@ export async function cleanupTailscaleExposureRoute(opts: {
|
||||
await runTailscaleCommand([opts.mode, "off", opts.path]);
|
||||
}
|
||||
|
||||
/** Maps voice-call config onto a local webhook URL exposed through Tailscale. */
|
||||
export async function setupTailscaleExposure(config: VoiceCallConfig): Promise<string | null> {
|
||||
if (config.tailscale.mode === "off") {
|
||||
return null;
|
||||
@@ -146,6 +155,7 @@ export async function setupTailscaleExposure(config: VoiceCallConfig): Promise<s
|
||||
});
|
||||
}
|
||||
|
||||
/** Cleans up the configured Tailscale exposure path when Tailscale exposure was enabled. */
|
||||
export async function cleanupTailscaleExposure(config: VoiceCallConfig): Promise<void> {
|
||||
if (config.tailscale.mode === "off") {
|
||||
return;
|
||||
|
||||
@@ -2,6 +2,7 @@ import { once } from "node:events";
|
||||
import http from "node:http";
|
||||
import { WebSocket } from "ws";
|
||||
|
||||
/** Race a promise against a short test timeout and always clear the timer. */
|
||||
export const withTimeout = async <T>(promise: Promise<T>, timeoutMs = 2000): Promise<T> => {
|
||||
let timer: ReturnType<typeof setTimeout> | null = null;
|
||||
const timeout = new Promise<never>((_, reject) => {
|
||||
@@ -17,15 +18,20 @@ export const withTimeout = async <T>(promise: Promise<T>, timeoutMs = 2000): Pro
|
||||
}
|
||||
};
|
||||
|
||||
/** Starts a loopback HTTP server that delegates websocket upgrades to the caller. */
|
||||
export const startUpgradeWsServer = async (params: {
|
||||
/** Path advertised in the returned websocket URL. */
|
||||
urlPath: string;
|
||||
/** Upgrade handler under test; owns accepting or rejecting the socket. */
|
||||
onUpgrade: (
|
||||
request: http.IncomingMessage,
|
||||
socket: Parameters<http.Server["emit"]>[2],
|
||||
head: Buffer,
|
||||
) => void;
|
||||
}): Promise<{
|
||||
/** Loopback websocket URL bound to the ephemeral test port. */
|
||||
url: string;
|
||||
/** Close the HTTP server and wait for the close callback. */
|
||||
close: () => Promise<void>;
|
||||
}> => {
|
||||
const server = http.createServer();
|
||||
@@ -52,12 +58,14 @@ export const startUpgradeWsServer = async (params: {
|
||||
};
|
||||
};
|
||||
|
||||
/** Open a websocket and wait until the connection reaches the open state. */
|
||||
export const connectWs = async (url: string): Promise<WebSocket> => {
|
||||
const ws = new WebSocket(url);
|
||||
await withTimeout(once(ws, "open") as Promise<[unknown]>);
|
||||
return ws;
|
||||
};
|
||||
|
||||
/** Wait for websocket close and normalize the close reason buffer to text. */
|
||||
export const waitForClose = async (
|
||||
ws: WebSocket,
|
||||
): Promise<{
|
||||
|
||||
@@ -13,14 +13,20 @@ export interface ShellCaptureOptions extends Omit<
|
||||
ExecutionEnvExecOptions,
|
||||
"onStdout" | "onStderr"
|
||||
> {
|
||||
/** Optional observer for sanitized stdout/stderr chunks as they arrive. */
|
||||
onChunk?: (chunk: string) => void;
|
||||
}
|
||||
|
||||
export interface ShellCaptureResult {
|
||||
/** Bounded output tail returned to the model or caller. */
|
||||
output: string;
|
||||
/** Process exit code, undefined when the command was cancelled. */
|
||||
exitCode: number | undefined;
|
||||
/** True when execution ended through the abort path. */
|
||||
cancelled: boolean;
|
||||
/** True when returned output was shortened to the tail window. */
|
||||
truncated: boolean;
|
||||
/** Temp file containing full output once captured output crosses the in-memory limit. */
|
||||
fullOutputPath?: string;
|
||||
}
|
||||
|
||||
@@ -32,6 +38,7 @@ function toExecutionError(error: unknown): ExecutionError {
|
||||
return new ExecutionError("unknown", cause.message, cause);
|
||||
}
|
||||
|
||||
/** Removes control/binary markers while preserving tabs and line breaks for shell diagnostics. */
|
||||
export function sanitizeBinaryOutput(str: string): string {
|
||||
return Array.from(str)
|
||||
.filter((char) => {
|
||||
@@ -53,6 +60,7 @@ export function sanitizeBinaryOutput(str: string): string {
|
||||
.join("");
|
||||
}
|
||||
|
||||
/** Executes a shell command with bounded in-memory output and optional full-output spillover. */
|
||||
export async function executeShellWithCapture(
|
||||
env: ExecutionEnv,
|
||||
command: string,
|
||||
@@ -90,6 +98,7 @@ export async function executeShellWithCapture(
|
||||
if (!previous.ok) {
|
||||
return previous;
|
||||
}
|
||||
// Create the spill file lazily so short commands never touch the filesystem.
|
||||
const tempFile = await env.createTempFile({
|
||||
prefix: "bash-",
|
||||
suffix: ".log",
|
||||
@@ -113,6 +122,8 @@ export async function executeShellWithCapture(
|
||||
totalBytes += encoder.encode(chunk).byteLength;
|
||||
const text = sanitizeBinaryOutput(chunk).replace(/\r/g, "");
|
||||
if (totalBytes > DEFAULT_MAX_BYTES && !fullOutputPath) {
|
||||
// Once raw output crosses the default window, keep the model-facing tail
|
||||
// in memory and stream the full sanitized transcript into a temp file.
|
||||
ensureFullOutputFile(outputChunks.join("") + text);
|
||||
} else {
|
||||
appendFullOutput(text);
|
||||
|
||||
@@ -28,13 +28,18 @@ import { buildDeviceAuthPayloadV3 } from "./device-auth.js";
|
||||
import { resolveConnectChallengeTimeoutMs, resolveSafeTimeoutDelayMs } from "./timeouts.js";
|
||||
|
||||
export type DeviceIdentity = {
|
||||
/** Stable gateway device id associated with this keypair. */
|
||||
deviceId: string;
|
||||
/** PEM private key used by host deps to sign device-auth payloads. */
|
||||
privateKeyPem: string;
|
||||
/** PEM public key sent to the gateway during device pairing/auth. */
|
||||
publicKeyPem: string;
|
||||
};
|
||||
|
||||
export type DeviceAuthTokenRecord = {
|
||||
/** Stored device bearer token returned by the gateway. */
|
||||
token?: string;
|
||||
/** Scopes granted to the stored token; reused only when still sufficient. */
|
||||
scopes?: string[];
|
||||
};
|
||||
|
||||
@@ -306,8 +311,11 @@ type Pending = {
|
||||
};
|
||||
|
||||
export type GatewayClientRequestOptions = {
|
||||
/** Wait for an accepted response followed by a final response. */
|
||||
expectFinal?: boolean;
|
||||
/** Per-request timeout; null disables request timeout scheduling. */
|
||||
timeoutMs?: number | null;
|
||||
/** Cancels the request and removes its pending response handler. */
|
||||
signal?: AbortSignal;
|
||||
/** Called once for expectFinal requests after an accepted response, before the final result. */
|
||||
onAccepted?: (payload: unknown) => void;
|
||||
@@ -355,11 +363,15 @@ const DEFAULT_GATEWAY_CLIENT_URL = "ws://127.0.0.1:18789";
|
||||
const DEFAULT_CLIENT_VERSION = "0.0.0";
|
||||
|
||||
export type GatewayReconnectPausedInfo = {
|
||||
/** WebSocket close code that paused reconnect attempts. */
|
||||
code: number;
|
||||
/** Raw close reason supplied by the gateway/socket. */
|
||||
reason: string;
|
||||
/** Structured connect-error detail code when the close came from gateway auth/startup. */
|
||||
detailCode: string | null;
|
||||
};
|
||||
|
||||
/** Error wrapper for gateway response frames that preserves retry metadata for callers. */
|
||||
export class GatewayClientRequestError extends Error {
|
||||
readonly gatewayCode: string;
|
||||
readonly details?: unknown;
|
||||
@@ -397,8 +409,10 @@ export function isGatewayConnectAssemblyError(value: unknown): value is Error {
|
||||
);
|
||||
}
|
||||
|
||||
/** Construction options for GatewayClient connections, auth, protocol bounds, and callbacks. */
|
||||
export type GatewayClientOptions = {
|
||||
url?: string; // ws://127.0.0.1:18789
|
||||
/** Client-side watchdog for receiving the connect challenge. */
|
||||
connectChallengeTimeoutMs?: number;
|
||||
/** @deprecated Use connectChallengeTimeoutMs. */
|
||||
connectDelayMs?: number;
|
||||
@@ -450,6 +464,7 @@ export const GATEWAY_CLOSE_CODE_HINTS: Readonly<Record<number, string>> = {
|
||||
1013: "try again later",
|
||||
};
|
||||
|
||||
/** Returns the short operator-facing description for common gateway close codes. */
|
||||
export function describeGatewayCloseCode(code: number): string | undefined {
|
||||
return GATEWAY_CLOSE_CODE_HINTS[code];
|
||||
}
|
||||
@@ -490,6 +505,8 @@ export function resolveGatewayClientConnectChallengeTimeoutMs(
|
||||
"connectChallengeTimeoutMs" | "connectDelayMs" | "preauthHandshakeTimeoutMs"
|
||||
>,
|
||||
): number {
|
||||
// Keep the legacy connectDelayMs alias feeding the same clamp path until the
|
||||
// public option is removed; explicit challenge timeout still wins.
|
||||
return resolveConnectChallengeTimeoutMs(readConnectChallengeTimeoutOverride(opts), {
|
||||
configuredTimeoutMs: opts.preauthHandshakeTimeoutMs,
|
||||
});
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
/**
|
||||
* Normalizes optional device metadata before it becomes part of a signed auth
|
||||
* payload.
|
||||
*/
|
||||
export function normalizeDeviceMetadataForAuth(value?: string | null): string {
|
||||
if (typeof value !== "string") {
|
||||
return "";
|
||||
@@ -6,25 +10,38 @@ export function normalizeDeviceMetadataForAuth(value?: string | null): string {
|
||||
if (!trimmed) {
|
||||
return "";
|
||||
}
|
||||
// Preserve the gateway's historical ASCII-only case fold; locale-sensitive
|
||||
// lowercasing would change existing signatures for non-ASCII device names.
|
||||
return trimmed.replace(/[A-Z]/g, (char) => String.fromCharCode(char.charCodeAt(0) + 32));
|
||||
}
|
||||
|
||||
type DeviceAuthPayloadParams = {
|
||||
/** Stable device id paired with the gateway. */
|
||||
deviceId: string;
|
||||
/** Client application id, such as the desktop or mobile client. */
|
||||
clientId: string;
|
||||
/** Gateway client mode included in the signed payload. */
|
||||
clientMode: string;
|
||||
/** Requested gateway role for the authenticated device. */
|
||||
role: string;
|
||||
/** Ordered scope list; order is signature-significant. */
|
||||
scopes: string[];
|
||||
/** Signing timestamp in epoch milliseconds. */
|
||||
signedAtMs: number;
|
||||
/** Optional bootstrap token; null/undefined still reserves the v2/v3 field. */
|
||||
token?: string | null;
|
||||
/** Per-request nonce included to prevent replay. */
|
||||
nonce: string;
|
||||
};
|
||||
|
||||
type DeviceAuthPayloadV3Params = DeviceAuthPayloadParams & {
|
||||
/** Optional normalized platform metadata appended after the v2 fields. */
|
||||
platform?: string | null;
|
||||
/** Optional normalized device-family metadata appended after platform. */
|
||||
deviceFamily?: string | null;
|
||||
};
|
||||
|
||||
/** Builds the canonical v2 device-auth string that the gateway verifies byte-for-byte. */
|
||||
export function buildDeviceAuthPayload(params: DeviceAuthPayloadParams): string {
|
||||
const scopes = params.scopes.join(",");
|
||||
const token = params.token ?? "";
|
||||
@@ -41,6 +58,7 @@ export function buildDeviceAuthPayload(params: DeviceAuthPayloadParams): string
|
||||
].join("|");
|
||||
}
|
||||
|
||||
/** Builds the canonical v3 device-auth string with normalized platform/family metadata. */
|
||||
export function buildDeviceAuthPayloadV3(params: DeviceAuthPayloadV3Params): string {
|
||||
const scopes = params.scopes.join(",");
|
||||
const token = params.token ?? "";
|
||||
|
||||
@@ -2,19 +2,29 @@ import { resolveFiniteTimeoutDelayMs } from "./timeouts.js";
|
||||
|
||||
/** Readiness probe outcome with timing data for diagnosing event-loop stalls. */
|
||||
export type EventLoopReadyResult = {
|
||||
/** True when enough consecutive timer checks stayed below the drift threshold. */
|
||||
ready: boolean;
|
||||
/** Wall-clock time spent in the readiness probe. */
|
||||
elapsedMs: number;
|
||||
/** Largest observed timer drift across all checks. */
|
||||
maxDriftMs: number;
|
||||
/** Number of scheduled timer checks that fired before completion. */
|
||||
checks: number;
|
||||
/** True when the supplied AbortSignal stopped the probe before readiness or timeout. */
|
||||
aborted: boolean;
|
||||
};
|
||||
|
||||
/** Controls how aggressively the client waits for low-drift timer checks before starting IO. */
|
||||
export type EventLoopReadyOptions = {
|
||||
/** Maximum wall-clock time to wait before reporting not ready. */
|
||||
maxWaitMs?: number;
|
||||
/** Delay between drift samples; clamped to safe Node timer bounds. */
|
||||
intervalMs?: number;
|
||||
/** Maximum acceptable timer drift for a sample to count as ready. */
|
||||
driftThresholdMs?: number;
|
||||
/** Number of low-drift samples required before the event loop is considered ready. */
|
||||
consecutiveReadyChecks?: number;
|
||||
/** Cancels the probe without starting client IO. */
|
||||
signal?: AbortSignal;
|
||||
};
|
||||
|
||||
@@ -104,6 +114,8 @@ export async function waitForEventLoopReady(
|
||||
if (driftMs > driftThresholdMs) {
|
||||
readyChecks = 0;
|
||||
} else {
|
||||
// Require consecutive low-drift samples so one lucky timer after a
|
||||
// blocked loop does not start IO while the process is still saturated.
|
||||
readyChecks += 1;
|
||||
}
|
||||
if (readyChecks >= consecutiveReadyChecks) {
|
||||
|
||||
@@ -7,6 +7,7 @@ import {
|
||||
import { resolveConnectChallengeTimeoutMs } from "./timeouts.js";
|
||||
|
||||
export type GatewayClientStartable = {
|
||||
/** Starts the underlying gateway connection after readiness succeeds. */
|
||||
start(): void;
|
||||
};
|
||||
|
||||
@@ -17,11 +18,14 @@ export type EventLoopReadyWaiter = (
|
||||
|
||||
/** Timeout and abort controls for delaying client start until the loop can process IO. */
|
||||
export type GatewayClientStartReadinessOptions = {
|
||||
/** Explicit readiness wait cap; wins over client connection timeout settings. */
|
||||
timeoutMs?: number;
|
||||
/** Client connection settings used to derive a readiness cap when timeoutMs is absent. */
|
||||
clientOptions?: Pick<
|
||||
GatewayClientOptions,
|
||||
"connectChallengeTimeoutMs" | "connectDelayMs" | "preauthHandshakeTimeoutMs"
|
||||
>;
|
||||
/** Cancels readiness without starting the client. */
|
||||
signal?: AbortSignal;
|
||||
};
|
||||
|
||||
@@ -33,6 +37,8 @@ function resolveGatewayClientStartReadinessTimeoutMs(
|
||||
}
|
||||
const clientOptions = options.clientOptions ?? {};
|
||||
const timeoutOverride =
|
||||
// Prefer the challenge watchdog over the older connectDelayMs alias so
|
||||
// readiness stays aligned with the server-side preauth handshake window.
|
||||
typeof clientOptions.connectChallengeTimeoutMs === "number" &&
|
||||
Number.isFinite(clientOptions.connectChallengeTimeoutMs)
|
||||
? clientOptions.connectChallengeTimeoutMs
|
||||
@@ -55,6 +61,8 @@ export async function startGatewayClientWithReadinessWait(
|
||||
maxWaitMs: resolveGatewayClientStartReadinessTimeoutMs(options),
|
||||
signal: options.signal,
|
||||
});
|
||||
// The readiness waiter can race with abort delivery; gate start on both the
|
||||
// returned state and the current signal so aborted startup remains side-effect-free.
|
||||
if (readiness.ready && !readiness.aborted && options.signal?.aborted !== true) {
|
||||
client.start();
|
||||
}
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
function parseStrictPositiveInteger(value: string): number | undefined {
|
||||
const trimmed = value.trim();
|
||||
// Env overrides accept only decimal integers so units/decimals do not
|
||||
// silently truncate into a shorter timeout.
|
||||
if (!/^\+?\d+$/u.test(trimmed)) {
|
||||
return undefined;
|
||||
}
|
||||
@@ -19,6 +21,8 @@ export const MAX_CONNECT_CHALLENGE_TIMEOUT_MS = DEFAULT_PREAUTH_HANDSHAKE_TIMEOU
|
||||
/** Clamps arbitrary timer delays to Node's safe range and an optional floor. */
|
||||
export function resolveSafeTimeoutDelayMs(delayMs: number, opts?: { minMs?: number }): number {
|
||||
const rawMinMs = opts?.minMs ?? 1;
|
||||
// Clamp the floor first; callers can opt into immediate timers with minMs=0,
|
||||
// but invalid floors still fall back to the nonzero default timeout guard.
|
||||
const minMs = Math.min(
|
||||
MAX_SAFE_TIMEOUT_DELAY_MS,
|
||||
Math.max(0, Number.isFinite(rawMinMs) ? Math.floor(rawMinMs) : 1),
|
||||
@@ -59,6 +63,8 @@ export function clampConnectChallengeTimeoutMs(
|
||||
timeoutMs: number,
|
||||
maxTimeoutMs = MAX_CONNECT_CHALLENGE_TIMEOUT_MS,
|
||||
): number {
|
||||
// Keep the upper bound at least as large as the watchdog floor so callers
|
||||
// cannot invert the clamp range with an undersized configured server timeout.
|
||||
return Math.max(
|
||||
MIN_CONNECT_CHALLENGE_TIMEOUT_MS,
|
||||
Math.min(Math.max(MIN_CONNECT_CHALLENGE_TIMEOUT_MS, maxTimeoutMs), timeoutMs),
|
||||
@@ -105,6 +111,8 @@ export function resolveConnectChallengeTimeoutMs(
|
||||
}
|
||||
const envOverride = getConnectChallengeTimeoutMsFromEnv(params?.env);
|
||||
if (envOverride !== undefined) {
|
||||
// Explicit client overrides are allowed to exceed the server-derived cap
|
||||
// for tests and slow environments; still apply the lower watchdog floor.
|
||||
return clampConnectChallengeTimeoutMs(envOverride, Math.max(maxTimeoutMs, envOverride));
|
||||
}
|
||||
return clampConnectChallengeTimeoutMs(configuredPreauthTimeoutMs, maxTimeoutMs);
|
||||
|
||||
@@ -152,6 +152,7 @@ const CONNECT_PAIRING_REQUIRED_MESSAGE_BY_REASON: Readonly<
|
||||
"metadata-upgrade": "device metadata change pending approval",
|
||||
};
|
||||
|
||||
/** Maps server auth failure reasons into stable protocol detail codes for clients. */
|
||||
export function resolveAuthConnectErrorDetailCode(
|
||||
reason: string | undefined,
|
||||
): ConnectErrorDetailCode {
|
||||
@@ -191,6 +192,7 @@ export function resolveAuthConnectErrorDetailCode(
|
||||
}
|
||||
}
|
||||
|
||||
/** Maps device-auth verification failures into stable protocol detail codes. */
|
||||
export function resolveDeviceAuthConnectErrorDetailCode(
|
||||
reason: string | undefined,
|
||||
): ConnectErrorDetailCode {
|
||||
@@ -212,6 +214,7 @@ export function resolveDeviceAuthConnectErrorDetailCode(
|
||||
}
|
||||
}
|
||||
|
||||
/** Reads the opaque gateway error detail code without trusting the rest of the payload. */
|
||||
export function readConnectErrorDetailCode(details: unknown): string | null {
|
||||
if (!details || typeof details !== "object" || Array.isArray(details)) {
|
||||
return null;
|
||||
@@ -220,6 +223,7 @@ export function readConnectErrorDetailCode(details: unknown): string | null {
|
||||
return typeof code === "string" && code.trim().length > 0 ? code : null;
|
||||
}
|
||||
|
||||
/** Extracts client retry hints while dropping unknown or malformed advice fields. */
|
||||
export function readConnectErrorRecoveryAdvice(details: unknown): ConnectErrorRecoveryAdvice {
|
||||
if (!details || typeof details !== "object" || Array.isArray(details)) {
|
||||
return {};
|
||||
@@ -249,6 +253,7 @@ function normalizePairingConnectReason(value: unknown): ConnectPairingRequiredRe
|
||||
: undefined;
|
||||
}
|
||||
|
||||
/** Keeps pairing request ids log/close-reason safe before echoing them to clients. */
|
||||
export function normalizePairingConnectRequestId(value: unknown): string | undefined {
|
||||
const normalized = normalizeOptionalString(value);
|
||||
return normalized && PAIRING_CONNECT_REQUEST_ID_PATTERN.test(normalized) ? normalized : undefined;
|
||||
@@ -319,6 +324,7 @@ export function buildPairingConnectRecoveryTitle(
|
||||
: "Gateway pairing approval required.";
|
||||
}
|
||||
|
||||
/** Builds the structured PAIRING_REQUIRED details payload shared by HTTP and WS handshakes. */
|
||||
export function buildPairingConnectErrorDetails(params: {
|
||||
reason: ConnectPairingRequiredReason | undefined;
|
||||
requestId?: string;
|
||||
@@ -356,6 +362,7 @@ export function buildPairingConnectErrorDetails(params: {
|
||||
});
|
||||
}
|
||||
|
||||
/** Formats the compact WebSocket close reason while preserving a safe request id when present. */
|
||||
export function buildPairingConnectCloseReason(params: {
|
||||
reason: ConnectPairingRequiredReason | undefined;
|
||||
requestId?: string;
|
||||
@@ -365,6 +372,7 @@ export function buildPairingConnectCloseReason(params: {
|
||||
return requestId ? `${message} (requestId: ${requestId})` : message;
|
||||
}
|
||||
|
||||
/** Normalizes structured pairing details received by clients from untrusted gateway errors. */
|
||||
export function readPairingConnectErrorDetails(
|
||||
details: unknown,
|
||||
): PairingConnectErrorDetails | null {
|
||||
@@ -430,6 +438,7 @@ export function readConnectPairingRequiredDetails(
|
||||
};
|
||||
}
|
||||
|
||||
/** Recovers pairing details from older string-only close reasons. */
|
||||
export function readConnectPairingRequiredMessage(
|
||||
message: string | null | undefined,
|
||||
): ConnectPairingRequiredDetails | null {
|
||||
@@ -471,6 +480,7 @@ export function formatConnectPairingRequiredMessage(details: unknown): string {
|
||||
return pairing?.requestId ? `${base} (requestId: ${pairing.requestId})` : base;
|
||||
}
|
||||
|
||||
/** Formats gateway connect failures with pairing/protocol detail-aware client text. */
|
||||
export function formatConnectErrorMessage(params: { message?: string; details?: unknown }): string {
|
||||
if (readConnectErrorDetailCode(params.details) === ConnectErrorDetailCodes.PAIRING_REQUIRED) {
|
||||
return formatConnectPairingRequiredMessage(params.details);
|
||||
|
||||
@@ -22,6 +22,7 @@ export class EventStream<T, R = T> implements AsyncIterable<T> {
|
||||
});
|
||||
}
|
||||
|
||||
/** Pushes one event, resolving `result()` when the event is terminal. */
|
||||
push(event: T): void {
|
||||
if (this.done) {
|
||||
return;
|
||||
@@ -32,6 +33,8 @@ export class EventStream<T, R = T> implements AsyncIterable<T> {
|
||||
this.resolveFinalResult(this.extractResult(event));
|
||||
}
|
||||
|
||||
// Deliver directly to one pending iterator before queuing so live streams
|
||||
// do not accumulate unbounded buffered events while a consumer is waiting.
|
||||
const waiter = this.waiting.shift();
|
||||
if (waiter) {
|
||||
waiter({ value: event, done: false });
|
||||
@@ -40,6 +43,7 @@ export class EventStream<T, R = T> implements AsyncIterable<T> {
|
||||
}
|
||||
}
|
||||
|
||||
/** Ends iteration and optionally resolves the final result for streams without a terminal event. */
|
||||
end(result?: R): void {
|
||||
this.done = true;
|
||||
if (result !== undefined) {
|
||||
@@ -69,6 +73,7 @@ export class EventStream<T, R = T> implements AsyncIterable<T> {
|
||||
}
|
||||
}
|
||||
|
||||
/** Resolves to the provider's final assistant message/error when the stream completes. */
|
||||
result(): Promise<R> {
|
||||
return this.finalResultPromise;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import { scanFenceSpans, type FenceScanState, type FenceSpan } from "./fences.js";
|
||||
|
||||
/** Incremental inline-code scanner state carried between streamed chunks. */
|
||||
export type InlineCodeState = {
|
||||
/** True when a previous chunk opened a backtick run that has not closed yet. */
|
||||
open: boolean;
|
||||
/** Backtick run length required to close the current inline-code span. */
|
||||
ticks: number;
|
||||
};
|
||||
|
||||
@@ -21,7 +24,7 @@ type CodeSpanIndex = {
|
||||
isInside: (index: number) => boolean;
|
||||
};
|
||||
|
||||
/** Builds a lookup for fenced and inline code spans while preserving scanner state. */
|
||||
/** Builds a zero-based code-region lookup for fenced and inline spans, plus next scanner state. */
|
||||
export function buildCodeSpanIndex(
|
||||
text: string,
|
||||
inlineState?: InlineCodeState,
|
||||
@@ -59,6 +62,7 @@ function parseInlineCodeSpans(
|
||||
while (i < text.length) {
|
||||
const fence = findFenceSpanAtInclusive(fenceSpans, i);
|
||||
if (fence) {
|
||||
// Fenced code owns its full range; inline backticks inside it must not change state.
|
||||
i = fence.end;
|
||||
continue;
|
||||
}
|
||||
@@ -91,6 +95,7 @@ function parseInlineCodeSpans(
|
||||
}
|
||||
|
||||
if (open) {
|
||||
// Treat an unfinished span as code through chunk end so partial tags stay protected.
|
||||
spans.push([openStart, text.length]);
|
||||
}
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@ type MarkdownToken = {
|
||||
level?: number;
|
||||
};
|
||||
|
||||
/** Style categories tracked as ranges over rendered plaintext. */
|
||||
export type MarkdownStyle =
|
||||
| "bold"
|
||||
| "italic"
|
||||
@@ -37,19 +38,23 @@ export type MarkdownStyle =
|
||||
| "spoiler"
|
||||
| "blockquote";
|
||||
|
||||
/** Half-open style range in `MarkdownIR.text`; `end` is exclusive. */
|
||||
export type MarkdownStyleSpan = {
|
||||
start: number;
|
||||
end: number;
|
||||
style: MarkdownStyle;
|
||||
/** Fence language info for code blocks when markdown-it provided one. */
|
||||
language?: string;
|
||||
};
|
||||
|
||||
/** Half-open link-label range in `MarkdownIR.text` with the original href. */
|
||||
export type MarkdownLinkSpan = {
|
||||
start: number;
|
||||
end: number;
|
||||
href: string;
|
||||
};
|
||||
|
||||
/** Plaintext markdown projection plus style/link ranges into that text. */
|
||||
export type MarkdownIR = {
|
||||
text: string;
|
||||
styles: MarkdownStyleSpan[];
|
||||
@@ -68,11 +73,13 @@ function createStyleSpan(params: MarkdownStyleSpan): MarkdownStyleSpan {
|
||||
return span;
|
||||
}
|
||||
|
||||
/** Parsed table text after markdown inline rendering has been applied per cell. */
|
||||
export type MarkdownTableData = {
|
||||
headers: string[];
|
||||
rows: string[][];
|
||||
};
|
||||
|
||||
/** Table metadata collected for block-mode rendering with the placeholder location. */
|
||||
export type MarkdownTableMeta = MarkdownTableData & {
|
||||
placeholderOffset: number;
|
||||
};
|
||||
@@ -116,10 +123,15 @@ type RenderState = RenderTarget & {
|
||||
};
|
||||
|
||||
export type MarkdownParseOptions = {
|
||||
/** Enable markdown-it linkify conversion. Default: true. */
|
||||
linkify?: boolean;
|
||||
/** Interpret paired `||` text delimiters as spoiler style spans. Default: false. */
|
||||
enableSpoilers?: boolean;
|
||||
/** Whether headings should become bold spans or plain text. Default: none. */
|
||||
headingStyle?: "none" | "bold";
|
||||
/** Text prefix inserted at each blockquote open before applying blockquote style. */
|
||||
blockquotePrefix?: string;
|
||||
/** Enable markdown-it autolinks. Default: true unless explicitly false. */
|
||||
autolink?: boolean;
|
||||
/** How to render tables (off|bullets|code|block). Default: off. */
|
||||
tableMode?: MarkdownTableMode;
|
||||
@@ -966,6 +978,7 @@ function sliceLinkSpans(spans: MarkdownLinkSpan[], start: number, end: number):
|
||||
return sliced;
|
||||
}
|
||||
|
||||
/** Slices IR text and rebases overlapping style/link spans into the returned range. */
|
||||
export function sliceMarkdownIR(ir: MarkdownIR, start: number, end: number): MarkdownIR {
|
||||
return {
|
||||
text: ir.text.slice(start, end),
|
||||
@@ -974,10 +987,12 @@ export function sliceMarkdownIR(ir: MarkdownIR, start: number, end: number): Mar
|
||||
};
|
||||
}
|
||||
|
||||
/** Parses markdown into plaintext plus style/link ranges. */
|
||||
export function markdownToIR(markdown: string, options: MarkdownParseOptions = {}): MarkdownIR {
|
||||
return markdownToIRWithMeta(markdown, options).ir;
|
||||
}
|
||||
|
||||
/** Parses markdown into IR and returns table-detection metadata for table-aware callers. */
|
||||
export function markdownToIRWithMeta(
|
||||
markdown: string,
|
||||
options: MarkdownParseOptions = {},
|
||||
@@ -1040,6 +1055,7 @@ export function markdownToIRWithMeta(
|
||||
};
|
||||
}
|
||||
|
||||
/** Chunks IR text at readable boundaries and rebases style/link spans per chunk. */
|
||||
export function chunkMarkdownIR(ir: MarkdownIR, limit: number): MarkdownIR[] {
|
||||
if (!ir.text) {
|
||||
return [];
|
||||
|
||||
@@ -128,6 +128,8 @@ function findMarkdownIRPreservedSplitIndex(text: string, start: number, limit: n
|
||||
return text.length;
|
||||
}
|
||||
|
||||
// Prefer split points outside markdown-link destinations so `[label](url)`
|
||||
// survives chunking when there is any whitespace boundary before the limit.
|
||||
let lastOutsideParenNewlineBreak = -1;
|
||||
let lastOutsideParenWhitespaceBreak = -1;
|
||||
let lastOutsideParenWhitespaceRunStart = -1;
|
||||
@@ -254,6 +256,8 @@ function mergeAdjacentLinkSpans(links: MarkdownLinkSpan[]): MarkdownLinkSpan[] {
|
||||
|
||||
function mergeMarkdownIRChunks(left: MarkdownIR, right: MarkdownIR): MarkdownIR {
|
||||
const offset = left.text.length;
|
||||
// Shift right-side spans onto the merged text coordinate system before
|
||||
// coalescing, otherwise rendered markers would attach to stale offsets.
|
||||
return {
|
||||
text: left.text + right.text,
|
||||
styles: mergeAdjacentStyleSpans([
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
import type { MarkdownIR, MarkdownLinkSpan, MarkdownStyle, MarkdownStyleSpan } from "./ir.js";
|
||||
|
||||
/** Opening/closing marker pair used when rendering one Markdown style span. */
|
||||
export type RenderStyleMarker = {
|
||||
open: string | ((span: MarkdownStyleSpan) => string);
|
||||
close: string;
|
||||
};
|
||||
|
||||
/** Optional marker overrides keyed by Markdown style. */
|
||||
export type RenderStyleMap = Partial<Record<MarkdownStyle, RenderStyleMarker>>;
|
||||
|
||||
/** Rendered link wrapper coordinates and markers returned by link builders. */
|
||||
export type RenderLink = {
|
||||
start: number;
|
||||
end: number;
|
||||
@@ -14,6 +17,7 @@ export type RenderLink = {
|
||||
close: string;
|
||||
};
|
||||
|
||||
/** Rendering hooks for escaping text, styles, and optional link wrappers. */
|
||||
export type RenderOptions = {
|
||||
styleMarkers: RenderStyleMap;
|
||||
escapeText: (text: string) => string;
|
||||
@@ -46,6 +50,7 @@ function sortStyleSpans(spans: MarkdownStyleSpan[]): MarkdownStyleSpan[] {
|
||||
});
|
||||
}
|
||||
|
||||
/** Renders Markdown IR by applying caller-provided style/link markers. */
|
||||
export function renderMarkdownWithMarkers(ir: MarkdownIR, options: RenderOptions): string {
|
||||
const text = ir.text ?? "";
|
||||
if (!text) {
|
||||
@@ -104,7 +109,7 @@ export function renderMarkdownWithMarkers(ir: MarkdownIR, options: RenderOptions
|
||||
}
|
||||
|
||||
const points = [...boundaries].toSorted((a, b) => a - b);
|
||||
// Unified stack for both styles and links, tracking close string and end position
|
||||
// Links and styles share one stack so overlapping spans close in one LIFO order.
|
||||
const stack: { close: string; end: number }[] = [];
|
||||
type OpeningItem =
|
||||
| { end: number; open: string; close: string; kind: "link"; index: number }
|
||||
@@ -121,7 +126,7 @@ export function renderMarkdownWithMarkers(ir: MarkdownIR, options: RenderOptions
|
||||
for (let i = 0; i < points.length; i += 1) {
|
||||
const pos = points[i];
|
||||
|
||||
// Close ALL elements (styles and links) in LIFO order at this position
|
||||
// Close every element ending here before opening new same-position spans.
|
||||
while (stack.length && stack[stack.length - 1]?.end === pos) {
|
||||
const item = stack.pop();
|
||||
if (item) {
|
||||
|
||||
@@ -10,11 +10,12 @@ const MARKDOWN_STYLE_MARKERS = {
|
||||
code_block: { open: "```\n", close: "```" },
|
||||
} as const;
|
||||
|
||||
/** Converts markdown tables into the configured plaintext/code rendering mode. */
|
||||
/** Converts markdown tables into the configured plaintext/code mode while preserving links. */
|
||||
export function convertMarkdownTables(markdown: string, mode: MarkdownTableMode): string {
|
||||
if (!markdown || mode === "off") {
|
||||
return markdown;
|
||||
}
|
||||
// External "block" mode shares the code renderer when callers want inline replacement text.
|
||||
const effectiveMode = mode === "block" ? "code" : mode;
|
||||
const { ir, hasTables } = markdownToIRWithMeta(markdown, {
|
||||
linkify: false,
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
/** Table rendering modes shared by markdown parsing and table conversion helpers. */
|
||||
export type MarkdownTableMode = "off" | "bullets" | "code" | "block";
|
||||
|
||||
@@ -9,6 +9,8 @@ function normalizePosixAbsolutePath(value: string): string | undefined {
|
||||
if (!trimmed || trimmed.includes("\0")) {
|
||||
return undefined;
|
||||
}
|
||||
// Normalize to POSIX separators so policy matching is deterministic across
|
||||
// callers that hand us macOS/Linux paths or Windows drive-style paths.
|
||||
const normalized = path.posix.normalize(trimmed.replaceAll("\\", "/"));
|
||||
const isAbsolute = normalized.startsWith("/") || WINDOWS_DRIVE_ABS_RE.test(normalized);
|
||||
if (!isAbsolute || normalized === "/") {
|
||||
@@ -44,6 +46,7 @@ function matchesRootPattern(params: { candidatePath: string; rootPattern: string
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Validates one absolute inbound-media root pattern with single-segment wildcards only. */
|
||||
export function isValidInboundPathRootPattern(value: string): boolean {
|
||||
const normalized = normalizePosixAbsolutePath(value);
|
||||
if (!normalized) {
|
||||
@@ -56,6 +59,7 @@ export function isValidInboundPathRootPattern(value: string): boolean {
|
||||
return segments.every((segment) => segment === WILDCARD_SEGMENT || !segment.includes("*"));
|
||||
}
|
||||
|
||||
/** Normalizes, filters, and de-duplicates configured inbound-media root patterns. */
|
||||
export function normalizeInboundPathRoots(roots?: readonly string[]): string[] {
|
||||
const normalized: string[] = [];
|
||||
const seen = new Set<string>();
|
||||
@@ -94,6 +98,7 @@ export function mergeInboundPathRoots(
|
||||
return merged;
|
||||
}
|
||||
|
||||
/** Checks a local media path against configured roots, using fallback roots only when none are valid. */
|
||||
export function isInboundPathAllowed(params: {
|
||||
filePath: string;
|
||||
roots: readonly string[];
|
||||
|
||||
@@ -97,6 +97,10 @@ const AUDIO_FILE_EXTENSIONS = new Set([
|
||||
|
||||
const fileTypeModuleLoader = createLazyImportLoader(() => import("file-type"));
|
||||
|
||||
/**
|
||||
* Canonicalize caller/provider MIME strings by dropping parameters and
|
||||
* normalizing known aliases.
|
||||
*/
|
||||
export function normalizeMimeType(mime?: string | null): string | undefined {
|
||||
if (!mime) {
|
||||
return undefined;
|
||||
@@ -176,6 +180,10 @@ export function isAudioFileName(fileName?: string | null): boolean {
|
||||
return AUDIO_FILE_EXTENSIONS.has(ext);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the best MIME hint from bytes, path, and headers while avoiding generic
|
||||
* container sniffs overriding more specific document extensions.
|
||||
*/
|
||||
export function detectMime(opts: {
|
||||
buffer?: Buffer;
|
||||
headerMime?: string | null;
|
||||
@@ -232,6 +240,7 @@ async function detectMimeImpl(opts: {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/** Return the preferred file extension for a canonical or alias MIME type. */
|
||||
export function extensionForMime(mime?: string | null): string | undefined {
|
||||
const normalized = normalizeMimeType(mime);
|
||||
if (!normalized) {
|
||||
|
||||
@@ -1,10 +1,14 @@
|
||||
export type ByteStreamLimitOverflow = {
|
||||
/** Bytes observed after including the chunk that crossed the limit. */
|
||||
size: number;
|
||||
/** Maximum bytes the caller allowed before aborting the stream. */
|
||||
maxBytes: number;
|
||||
};
|
||||
|
||||
export type ReadByteStreamWithLimitOptions = {
|
||||
/** Maximum bytes to retain before aborting and throwing. */
|
||||
maxBytes: number;
|
||||
/** Optional error factory used when the stream crosses maxBytes. */
|
||||
onOverflow?: (params: ByteStreamLimitOverflow) => Error;
|
||||
};
|
||||
|
||||
@@ -42,6 +46,7 @@ function destroyReadableOnOverflow(stream: unknown, err: Error): void {
|
||||
}
|
||||
}
|
||||
|
||||
/** Reads an async byte stream into one Buffer while destroying/canceling it on overflow. */
|
||||
export async function readByteStreamWithLimit(
|
||||
stream: AsyncIterable<unknown>,
|
||||
opts: ReadByteStreamWithLimitOptions,
|
||||
|
||||
@@ -23,6 +23,8 @@ async function readChunkWithIdleTimeout(
|
||||
onIdleTimeout?.({ chunkTimeoutMs: resolvedChunkTimeoutMs }) ??
|
||||
new Error(`Media download stalled: no data received for ${resolvedChunkTimeoutMs}ms`);
|
||||
clear();
|
||||
// Cancel the response body on idle timeout so remote media downloads do
|
||||
// not keep sockets open after the caller has already failed the read.
|
||||
void reader.cancel(error).catch(() => undefined);
|
||||
reject(error);
|
||||
}, resolvedChunkTimeoutMs);
|
||||
@@ -123,6 +125,7 @@ async function readResponsePrefix(
|
||||
};
|
||||
}
|
||||
|
||||
/** Reads a Response body into memory, enforcing byte and optional per-chunk idle limits. */
|
||||
export async function readResponseWithLimit(
|
||||
res: Response,
|
||||
maxBytes: number,
|
||||
@@ -146,6 +149,7 @@ export async function readResponseWithLimit(
|
||||
return prefix.buffer;
|
||||
}
|
||||
|
||||
/** Reads a bounded, whitespace-collapsed response prefix for diagnostics. */
|
||||
export async function readResponseTextSnippet(
|
||||
res: Response,
|
||||
opts?: {
|
||||
|
||||
@@ -8,10 +8,14 @@ export type MediaGenerationCatalogKind =
|
||||
export type MediaGenerationCatalogSource = "static" | "live" | "cache" | "configured";
|
||||
|
||||
export type MediaGenerationCatalogEntry<TCapabilities = unknown> = {
|
||||
/** Capability family the row belongs to, such as image or video generation. */
|
||||
kind: MediaGenerationCatalogKind;
|
||||
/** Provider id that owns the model. */
|
||||
provider: string;
|
||||
/** Provider model id. */
|
||||
model: string;
|
||||
label?: string;
|
||||
/** Origin of this catalog row: static metadata, live fetch, cache, or user config. */
|
||||
source: MediaGenerationCatalogSource;
|
||||
default?: boolean;
|
||||
configured?: boolean;
|
||||
@@ -37,6 +41,7 @@ function uniqueModels(provider: { defaultModel?: string; models?: readonly strin
|
||||
return uniqueTrimmedStrings([provider.defaultModel, ...(provider.models ?? [])]);
|
||||
}
|
||||
|
||||
/** Builds stable static catalog rows from a provider default model plus advertised models. */
|
||||
export function synthesizeMediaGenerationCatalogEntries<TCapabilities>(params: {
|
||||
kind: MediaGenerationCatalogKind;
|
||||
provider: MediaGenerationCatalogProvider<TCapabilities>;
|
||||
@@ -63,6 +68,7 @@ export function synthesizeMediaGenerationCatalogEntries<TCapabilities>(params: {
|
||||
});
|
||||
}
|
||||
|
||||
/** Lists unique provider models in display order, with the default model first when present. */
|
||||
export function listMediaGenerationProviderModels(provider: {
|
||||
defaultModel?: string;
|
||||
models?: readonly string[];
|
||||
|
||||
@@ -32,7 +32,9 @@ function formatSection(
|
||||
|
||||
/** Formats media-understanding outputs into the chat body sent back to the model. */
|
||||
export function formatMediaUnderstandingBody(params: {
|
||||
/** Original user body, often containing synthetic `<media:...>` placeholders. */
|
||||
body?: string;
|
||||
/** Provider outputs for audio, image, or video attachments. */
|
||||
outputs: MediaUnderstandingOutput[];
|
||||
}): string {
|
||||
const outputs = params.outputs.filter((output) => output.text.trim());
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
function extractLastJsonObject(raw: string): unknown {
|
||||
const trimmed = raw.trim();
|
||||
// Gemini CLI can print logs before the final JSON response; parse only the
|
||||
// trailing object so noisy preamble text does not break extraction.
|
||||
const start = trimmed.lastIndexOf("{");
|
||||
if (start === -1) {
|
||||
return null;
|
||||
@@ -12,6 +14,7 @@ function extractLastJsonObject(raw: string): unknown {
|
||||
}
|
||||
}
|
||||
|
||||
/** Extracts the final non-empty `response` string from Gemini CLI JSON output. */
|
||||
export function extractGeminiResponse(raw: string): string | null {
|
||||
const payload = extractLastJsonObject(raw);
|
||||
if (!payload || typeof payload !== "object") {
|
||||
|
||||
@@ -22,11 +22,13 @@ function getResponseErrorMessage(line: BatchOutputErrorLike | undefined): string
|
||||
return typeof body.error?.message === "string" ? body.error.message : undefined;
|
||||
}
|
||||
|
||||
/** Returns the first useful provider batch error from parsed output/error-file rows. */
|
||||
export function extractBatchErrorMessage(lines: BatchOutputErrorLike[]): string | undefined {
|
||||
const first = lines.find((line) => line.error?.message || getResponseErrorMessage(line));
|
||||
return first?.error?.message ?? getResponseErrorMessage(first);
|
||||
}
|
||||
|
||||
/** Formats an error-file read failure without hiding the original provider failure path. */
|
||||
export function formatUnavailableBatchError(err: unknown): string | undefined {
|
||||
const message = formatErrorMessage(err);
|
||||
return message ? `error file unavailable: ${message}` : undefined;
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
export type EmbeddingBatchOutputLine = {
|
||||
/** Provider request id for the input row, used to match embeddings back to chunks. */
|
||||
custom_id?: string;
|
||||
/** Provider-level batch row error, when the request never produced a response body. */
|
||||
error?: { message?: string };
|
||||
response?: {
|
||||
/** HTTP-like status for the row inside the provider batch output file. */
|
||||
status_code?: number;
|
||||
body?:
|
||||
| {
|
||||
@@ -14,6 +17,7 @@ export type EmbeddingBatchOutputLine = {
|
||||
};
|
||||
};
|
||||
|
||||
/** Applies one provider batch output row to remaining ids, embedding map, or error list. */
|
||||
export function applyEmbeddingBatchOutputLine(params: {
|
||||
line: EmbeddingBatchOutputLine;
|
||||
remaining: Set<string>;
|
||||
|
||||
@@ -1,17 +1,24 @@
|
||||
const TERMINAL_FAILURE_STATES = new Set(["failed", "expired", "cancelled", "canceled"]);
|
||||
|
||||
type BatchStatusLike = {
|
||||
/** Provider batch id, when included in the status payload. */
|
||||
id?: string;
|
||||
/** Provider batch state such as completed, failed, expired, or cancelled. */
|
||||
status?: string;
|
||||
/** Provider file id containing successful output rows. */
|
||||
output_file_id?: string | null;
|
||||
/** Provider file id containing row-level errors. */
|
||||
error_file_id?: string | null;
|
||||
};
|
||||
|
||||
export type BatchCompletionResult = {
|
||||
/** Provider file id containing successful output rows. */
|
||||
outputFileId: string;
|
||||
/** Optional provider file id containing row-level errors. */
|
||||
errorFileId?: string;
|
||||
};
|
||||
|
||||
/** Extracts output/error file ids from a completed provider batch status. */
|
||||
export function resolveBatchCompletionFromStatus(params: {
|
||||
provider: string;
|
||||
batchId: string;
|
||||
@@ -26,6 +33,7 @@ export function resolveBatchCompletionFromStatus(params: {
|
||||
};
|
||||
}
|
||||
|
||||
/** Throws with provider error-file detail when the batch reached a terminal failure state. */
|
||||
export async function throwIfBatchTerminalFailure(params: {
|
||||
provider: string;
|
||||
status: BatchStatusLike;
|
||||
@@ -42,6 +50,7 @@ export async function throwIfBatchTerminalFailure(params: {
|
||||
throw new Error(`${params.provider} batch ${params.status.id ?? "<unknown>"} ${state}${suffix}`);
|
||||
}
|
||||
|
||||
/** Returns completed batch files now, or waits when remote.batch.wait is enabled. */
|
||||
export async function resolveCompletedBatchResult(params: {
|
||||
provider: string;
|
||||
status: BatchStatusLike;
|
||||
|
||||
@@ -50,6 +50,7 @@ export function asSafeIntegerInRange(
|
||||
return value;
|
||||
}
|
||||
|
||||
/** Normalizes numeric string tokens while rejecting whitespace-only input. */
|
||||
function normalizeNumericString(value: string): string | undefined {
|
||||
const trimmed = value.trim();
|
||||
return trimmed ? trimmed : undefined;
|
||||
@@ -366,6 +367,8 @@ export function resolveExpiresAtMsFromDurationOrEpoch(
|
||||
return resolveExpiresAtMsFromDurationSeconds(parsed, { nowMs: opts.nowMs });
|
||||
}
|
||||
const absoluteMillisecondsThreshold = opts.absoluteMillisecondsThreshold ?? 1_000_000_000_000;
|
||||
// Values below this threshold are treated as epoch seconds; larger values are
|
||||
// already millisecond timestamps and must fit JavaScript Date bounds.
|
||||
if (parsed < absoluteMillisecondsThreshold) {
|
||||
return resolveExpiresAtMsFromEpochSeconds(parsed);
|
||||
}
|
||||
|
||||
@@ -2,6 +2,8 @@ import { normalizeOptionalLowercaseString, normalizeOptionalString } from "./str
|
||||
|
||||
/** Coerces entries to strings, trims them, and drops empty results. */
|
||||
export function normalizeStringEntries(list?: ReadonlyArray<unknown>) {
|
||||
// Use String(entry) deliberately: allowlist/config callers preserve primitive ids
|
||||
// and object-provided labels instead of accepting only pre-typed strings.
|
||||
return (list ?? []).map((entry) => normalizeOptionalString(String(entry)) ?? "").filter(Boolean);
|
||||
}
|
||||
|
||||
|
||||
@@ -11,15 +11,22 @@ import {
|
||||
} from "./grammar.js";
|
||||
|
||||
export type PlainTextToolCallBlock = {
|
||||
/** Parsed JSON object or XML parameter map to pass to structured tool execution. */
|
||||
arguments: Record<string, unknown>;
|
||||
/** Exclusive source offset after the consumed standalone block. */
|
||||
end: number;
|
||||
/** Tool name recovered from bracketed, Harmony, or XML-style syntax. */
|
||||
name: string;
|
||||
/** Exact source span consumed for diagnostics and replay-safe stripping. */
|
||||
raw: string;
|
||||
/** Inclusive source offset where the standalone block starts. */
|
||||
start: number;
|
||||
};
|
||||
|
||||
export type PlainTextToolCallParseOptions = {
|
||||
/** Optional exact-name allowlist; candidates outside it are ignored instead of parsed. */
|
||||
allowedToolNames?: Iterable<string>;
|
||||
/** Maximum serialized payload accepted before a candidate is treated as non-tool text. */
|
||||
maxPayloadBytes?: number;
|
||||
};
|
||||
|
||||
@@ -257,6 +264,8 @@ function extractXmlishParameterValue(text: string, start: number, end: number):
|
||||
let payloadEnd = end;
|
||||
const afterOpeningLineBreak = consumeLineBreak(text, payloadStart);
|
||||
if (afterOpeningLineBreak !== null) {
|
||||
// Serialized XML parameters use wrapper-line indentation; trim only wrapper-adjacent line
|
||||
// breaks so caller payload whitespace inside the parameter remains byte-for-byte meaningful.
|
||||
payloadStart = afterOpeningLineBreak;
|
||||
if (payloadEnd > payloadStart && text[payloadEnd - 1] === "\n") {
|
||||
payloadEnd -= 1;
|
||||
@@ -370,6 +379,7 @@ function parseXmlishPlainTextToolCallBlockAt(
|
||||
};
|
||||
}
|
||||
|
||||
/** Parses text that consists only of one or more standalone escaped tool-call blocks. */
|
||||
export function parseStandalonePlainTextToolCallBlocks(
|
||||
text: string,
|
||||
options?: PlainTextToolCallParseOptions,
|
||||
@@ -389,6 +399,7 @@ export function parseStandalonePlainTextToolCallBlocks(
|
||||
return blocks.length > 0 ? blocks : null;
|
||||
}
|
||||
|
||||
/** Removes line-start standalone tool-call blocks while preserving surrounding visible text. */
|
||||
export function stripPlainTextToolCallBlocks(text: string): string {
|
||||
if (
|
||||
!text ||
|
||||
|
||||
@@ -11,21 +11,27 @@ import {
|
||||
} from "./grammar.js";
|
||||
|
||||
export type PlainTextToolCallNameMatcher = {
|
||||
/** True when a complete model-emitted tool name is currently supported. */
|
||||
hasExactName(name: string): boolean;
|
||||
/** True while a streamed prefix could still become a supported tool name. */
|
||||
hasNamePrefix(prefix: string): boolean;
|
||||
};
|
||||
|
||||
export type PlainTextToolCallMessageNormalization =
|
||||
| { kind: "promoted" | "scrubbed"; message: Record<string, unknown> }
|
||||
| undefined;
|
||||
/** Normalized terminal message replacing escaped text with structured tool-call events. */
|
||||
{ kind: "promoted" | "scrubbed"; message: Record<string, unknown> } | undefined;
|
||||
|
||||
export type PlainTextToolCallStreamNormalizerOptions = {
|
||||
/** Builds provider-native events for a terminal message promoted into structured tool calls. */
|
||||
createPromotedToolCallEvents(message: Record<string, unknown>): Iterable<unknown>;
|
||||
/** Tool-name matcher used while buffering partial plaintext tool-call prefixes. */
|
||||
matcher: PlainTextToolCallNameMatcher;
|
||||
/** Gives provider-specific terminal messages a chance to promote or scrub escaped tool text. */
|
||||
normalizeDoneMessage(params: {
|
||||
message: unknown;
|
||||
reason: unknown;
|
||||
}): PlainTextToolCallMessageNormalization;
|
||||
/** Stop yielding after a normalized done message when the upstream protocol is terminal. */
|
||||
stopAfterDone?: boolean;
|
||||
};
|
||||
|
||||
@@ -343,6 +349,8 @@ function getPlainTextToolCallBufferState(
|
||||
if (text.length <= TEXT_TOOL_CALL_BUFFER_MAX_CHARS) {
|
||||
return "possible";
|
||||
}
|
||||
// Over-cap buffers are kept only when they still look like serialized tool calls; visible text
|
||||
// after a complete serialized prefix flips the state to impossible so it can be released.
|
||||
const textAfterCompleteToolBlocks = stripSerializedToolCallPrefixes(trimmed, matcher);
|
||||
return textAfterCompleteToolBlocks !== null && textAfterCompleteToolBlocks.trim()
|
||||
? "impossible"
|
||||
@@ -942,8 +950,11 @@ function scrubReclassifiedMixedTextFromError(
|
||||
}
|
||||
|
||||
export function scrubOverCapPlainTextToolCallMessage(params: {
|
||||
/** Text candidate accumulated from streaming deltas before the terminal message arrived. */
|
||||
candidateText: string | undefined;
|
||||
/** Tool-name matcher for deciding whether the candidate is still a suppressed tool call. */
|
||||
matcher: PlainTextToolCallNameMatcher;
|
||||
/** Provider terminal message whose visible content may need escaped tool text removed. */
|
||||
message: unknown;
|
||||
}): Record<string, unknown> | undefined {
|
||||
const record = asRecord(params.message);
|
||||
@@ -1039,6 +1050,7 @@ function isBufferedTextEvent(bufferedEvent: unknown): boolean {
|
||||
);
|
||||
}
|
||||
|
||||
/** Converts streamed escaped plaintext tool-call text into structured tool-call events. */
|
||||
export async function* normalizePlainTextToolCallStreamEvents(
|
||||
source: AsyncIterable<unknown>,
|
||||
options: PlainTextToolCallStreamNormalizerOptions,
|
||||
|
||||
@@ -28,6 +28,11 @@ function workspacePathsOverlap(left: string, right: string): boolean {
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find other configured agents whose workspaces overlap the target deletion
|
||||
* workspace. Deletion callers use this to avoid removing shared parent/child
|
||||
* directories that still belong to another agent.
|
||||
*/
|
||||
export function findOverlappingWorkspaceAgentIds(
|
||||
cfg: OpenClawConfig,
|
||||
agentId: string,
|
||||
|
||||
@@ -5,6 +5,11 @@ import { normalizeProviderId } from "./model-selection.js";
|
||||
|
||||
const CLAUDE_CLI_BACKEND_ID = "claude-cli";
|
||||
|
||||
/**
|
||||
* Hash CLI-session reuse inputs before persisting them into session metadata.
|
||||
* The stored value is only an equality token, so prompt/cwd/MCP inputs are not
|
||||
* written back into the session store in plaintext.
|
||||
*/
|
||||
export function hashCliSessionText(value: string | undefined): string | undefined {
|
||||
const trimmed = normalizeOptionalString(value);
|
||||
if (!trimmed) {
|
||||
@@ -13,6 +18,11 @@ export function hashCliSessionText(value: string | undefined): string | undefine
|
||||
return crypto.createHash("sha256").update(trimmed).digest("hex");
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the stored CLI session binding for a provider. New structured
|
||||
* bindings win, older provider-id maps are still read, and the legacy
|
||||
* Claude-only field is retained as a final migration fallback.
|
||||
*/
|
||||
export function getCliSessionBinding(
|
||||
entry: SessionEntry | undefined,
|
||||
provider: string,
|
||||
@@ -51,6 +61,7 @@ export function getCliSessionBinding(
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/** Return only the reusable CLI session id for callers that do not need invalidation metadata. */
|
||||
export function getCliSessionId(
|
||||
entry: SessionEntry | undefined,
|
||||
provider: string,
|
||||
@@ -58,10 +69,19 @@ export function getCliSessionId(
|
||||
return getCliSessionBinding(entry, provider)?.sessionId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Store a CLI session id without reuse metadata. Prefer `setCliSessionBinding`
|
||||
* when the caller can also persist auth, prompt, cwd, or MCP hashes.
|
||||
*/
|
||||
export function setCliSessionId(entry: SessionEntry, provider: string, sessionId: string): void {
|
||||
setCliSessionBinding(entry, provider, { sessionId });
|
||||
}
|
||||
|
||||
/**
|
||||
* Persist a provider-scoped CLI session binding in all currently supported
|
||||
* session-store shapes. The duplicate legacy writes keep older readers working
|
||||
* while structured bindings carry the invalidation inputs for newer runtimes.
|
||||
*/
|
||||
export function setCliSessionBinding(
|
||||
entry: SessionEntry,
|
||||
provider: string,
|
||||
@@ -109,6 +129,11 @@ export function setCliSessionBinding(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear one provider's CLI session binding across structured and legacy fields.
|
||||
* Other providers' bindings stay intact so a model switch only invalidates the
|
||||
* backend that actually failed or changed reuse conditions.
|
||||
*/
|
||||
export function clearCliSession(entry: SessionEntry, provider: string): void {
|
||||
const normalized = normalizeProviderId(provider);
|
||||
if (entry.cliSessionBindings?.[normalized] !== undefined) {
|
||||
@@ -126,12 +151,18 @@ export function clearCliSession(entry: SessionEntry, provider: string): void {
|
||||
}
|
||||
}
|
||||
|
||||
/** Clear every persisted CLI session binding from a session entry. */
|
||||
export function clearAllCliSessions(entry: SessionEntry): void {
|
||||
entry.cliSessionBindings = undefined;
|
||||
entry.cliSessionIds = undefined;
|
||||
entry.claudeCliSessionId = undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decide whether a stored CLI session can be reused under the current run
|
||||
* inputs. Auth, system prompt, cwd, and MCP changes invalidate the session
|
||||
* unless the binding was explicitly marked `forceReuse`.
|
||||
*/
|
||||
export function resolveCliSessionReuse(params: {
|
||||
binding?: CliSessionBinding;
|
||||
authProfileId?: string;
|
||||
@@ -163,6 +194,8 @@ export function resolveCliSessionReuse(params: {
|
||||
const currentMcpResumeHash = normalizeOptionalString(params.mcpResumeHash);
|
||||
const storedAuthProfileId = normalizeOptionalString(binding?.authProfileId);
|
||||
const storedAuthEpoch = normalizeOptionalString(binding?.authEpoch);
|
||||
// Versioned auth epochs let a rotated profile keep reuse when the underlying
|
||||
// auth material is known to be unchanged, avoiding unnecessary CLI restarts.
|
||||
const hasMatchingVersionedAuthEpoch =
|
||||
binding?.authEpochVersion === params.authEpochVersion &&
|
||||
storedAuthEpoch !== undefined &&
|
||||
|
||||
@@ -5,6 +5,11 @@ import { ensureCustomApiRegistered } from "./custom-api-registry.js";
|
||||
import { createTransportAwareStreamFnForModel } from "./provider-transport-stream.js";
|
||||
import type { StreamFn } from "./runtime/index.js";
|
||||
|
||||
/**
|
||||
* Resolve and register the stream function for a concrete model. Provider
|
||||
* plugin streams win, transport-aware built-ins are the fallback, and successful
|
||||
* resolution updates the custom API registry for downstream runtime dispatch.
|
||||
*/
|
||||
export function registerProviderStreamForModel<TApi extends Api>(params: {
|
||||
model: Model<TApi>;
|
||||
cfg?: OpenClawConfig;
|
||||
|
||||
@@ -6,10 +6,6 @@ import { isRecord } from "../utils.js";
|
||||
import { asBoolean } from "../utils/boolean.js";
|
||||
import type { ChannelAccountSnapshot } from "./plugins/types.core.js";
|
||||
|
||||
// Read-only status commands project a safe subset of account fields into snapshots
|
||||
// so renderers can preserve "configured but unavailable" state without touching
|
||||
// strict runtime-only credential helpers.
|
||||
|
||||
const CREDENTIAL_STATUS_KEYS = [
|
||||
"tokenStatus",
|
||||
"botTokenStatus",
|
||||
@@ -33,6 +29,8 @@ function readNullableNumber(
|
||||
record: Record<string, unknown>,
|
||||
key: string,
|
||||
): number | null | undefined {
|
||||
// Preserve explicit null timestamps; status callers use null to distinguish
|
||||
// "known empty" from an omitted/unsupported field.
|
||||
if (record[key] === null) {
|
||||
return null;
|
||||
}
|
||||
@@ -44,6 +42,8 @@ function readStringArray(record: Record<string, unknown>, key: string): string[]
|
||||
if (!Array.isArray(value)) {
|
||||
return undefined;
|
||||
}
|
||||
// Snapshot arrays are display data. Coerce only string/number entries and drop empties so
|
||||
// arbitrary config objects cannot leak through status output as JSON-ish strings.
|
||||
const normalized = normalizeStringEntries(
|
||||
value.map((entry) => (typeof entry === "string" || typeof entry === "number" ? entry : "")),
|
||||
);
|
||||
@@ -57,6 +57,7 @@ function readCredentialStatus(record: Record<string, unknown>, key: CredentialSt
|
||||
: undefined;
|
||||
}
|
||||
|
||||
/** Infers configured state from any credential status field on an account snapshot-like object. */
|
||||
export function resolveConfiguredFromCredentialStatuses(account: unknown): boolean | undefined {
|
||||
const record = isRecord(account) ? account : null;
|
||||
if (!record) {
|
||||
@@ -70,12 +71,15 @@ export function resolveConfiguredFromCredentialStatuses(account: unknown): boole
|
||||
}
|
||||
sawCredentialStatus = true;
|
||||
if (status !== "missing") {
|
||||
// Any configured credential is enough for coarse account presence; callers
|
||||
// that require every credential use resolveConfiguredFromRequiredCredentialStatuses.
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return sawCredentialStatus ? false : undefined;
|
||||
}
|
||||
|
||||
/** Infers configured state only when every named required credential is non-missing. */
|
||||
export function resolveConfiguredFromRequiredCredentialStatuses(
|
||||
account: unknown,
|
||||
requiredKeys: CredentialStatusKey[],
|
||||
@@ -92,12 +96,15 @@ export function resolveConfiguredFromRequiredCredentialStatuses(
|
||||
}
|
||||
sawCredentialStatus = true;
|
||||
if (status === "missing") {
|
||||
// Required-credential checks are all-or-nothing so multi-token accounts
|
||||
// do not appear configured when one mandatory credential is absent.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return sawCredentialStatus ? true : undefined;
|
||||
}
|
||||
|
||||
/** Returns true when a credential exists but is unavailable to the current process. */
|
||||
export function hasConfiguredUnavailableCredentialStatus(account: unknown): boolean {
|
||||
const record = isRecord(account) ? account : null;
|
||||
if (!record) {
|
||||
@@ -108,6 +115,7 @@ export function hasConfiguredUnavailableCredentialStatus(account: unknown): bool
|
||||
);
|
||||
}
|
||||
|
||||
/** Returns true when an account snapshot exposes an actual credential or available status. */
|
||||
export function hasResolvedCredentialValue(account: unknown): boolean {
|
||||
const record = isRecord(account) ? account : null;
|
||||
if (!record) {
|
||||
@@ -120,6 +128,7 @@ export function hasResolvedCredentialValue(account: unknown): boolean {
|
||||
);
|
||||
}
|
||||
|
||||
/** Projects non-secret credential source/status fields into a channel account snapshot. */
|
||||
export function projectCredentialSnapshotFields(
|
||||
account: unknown,
|
||||
): Pick<
|
||||
@@ -143,6 +152,8 @@ export function projectCredentialSnapshotFields(
|
||||
const appTokenSource = normalizeOptionalString(record.appTokenSource);
|
||||
const signingSecretSource = normalizeOptionalString(record.signingSecretSource);
|
||||
|
||||
// Only expose source/status metadata. Raw credential fields are intentionally
|
||||
// omitted here because channel snapshots are safe to display in status output.
|
||||
return {
|
||||
...(tokenSource ? { tokenSource } : {}),
|
||||
...(botTokenSource ? { botTokenSource } : {}),
|
||||
@@ -166,6 +177,7 @@ export function projectCredentialSnapshotFields(
|
||||
};
|
||||
}
|
||||
|
||||
/** Projects a safe read-only account snapshot, redacting URL credentials and raw secrets. */
|
||||
export function projectSafeChannelAccountSnapshotFields(
|
||||
account: unknown,
|
||||
): Partial<ChannelAccountSnapshot> {
|
||||
@@ -232,6 +244,7 @@ export function projectSafeChannelAccountSnapshotFields(
|
||||
? { allowFrom: readStringArray(record, "allowFrom") }
|
||||
: {}),
|
||||
...projectCredentialSnapshotFields(account),
|
||||
// Status output may display base URLs, but embedded credentials must never leak.
|
||||
...(baseUrl ? { baseUrl: stripUrlUserInfo(baseUrl) } : {}),
|
||||
...(readBoolean(record, "allowUnmentionedGroups") !== undefined
|
||||
? { allowUnmentionedGroups: readBoolean(record, "allowUnmentionedGroups") }
|
||||
|
||||
@@ -2,12 +2,14 @@ export type AckReactionScope = "all" | "direct" | "group-all" | "group-mentions"
|
||||
|
||||
export type WhatsAppAckReactionMode = "always" | "mentions" | "never";
|
||||
|
||||
/** Pending ack reaction plus the provider callback needed to remove it after a reply. */
|
||||
export type AckReactionHandle = {
|
||||
ackReactionPromise: Promise<boolean>;
|
||||
ackReactionValue: string;
|
||||
remove: () => Promise<void>;
|
||||
};
|
||||
|
||||
/** Channel-neutral facts used to decide whether an inbound message gets an ack reaction. */
|
||||
export type AckReactionGateParams = {
|
||||
scope: AckReactionScope | undefined;
|
||||
isDirect: boolean;
|
||||
@@ -19,6 +21,7 @@ export type AckReactionGateParams = {
|
||||
shouldBypassMention?: boolean;
|
||||
};
|
||||
|
||||
/** Apply channel-neutral ack reaction scope rules before a provider sends an emoji. */
|
||||
export function shouldAckReaction(params: AckReactionGateParams): boolean {
|
||||
const scope = params.scope ?? "group-mentions";
|
||||
if (scope === "off" || scope === "none") {
|
||||
@@ -48,6 +51,7 @@ export function shouldAckReaction(params: AckReactionGateParams): boolean {
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Adapt WhatsApp's direct/group knobs onto the shared ack reaction gate. */
|
||||
export function shouldAckReactionForWhatsApp(params: {
|
||||
emoji: string;
|
||||
isDirect: boolean;
|
||||
@@ -84,6 +88,7 @@ export function shouldAckReactionForWhatsApp(params: {
|
||||
});
|
||||
}
|
||||
|
||||
/** Start sending an ack reaction and retain enough state for optional cleanup. */
|
||||
export function createAckReactionHandle(params: {
|
||||
ackReactionValue: string;
|
||||
send: () => Promise<void>;
|
||||
@@ -115,6 +120,7 @@ export function createAckReactionHandle(params: {
|
||||
};
|
||||
}
|
||||
|
||||
/** Remove an ack reaction only after the send path confirmed it was applied. */
|
||||
export function removeAckReactionAfterReply(params: {
|
||||
removeAfterReply: boolean;
|
||||
ackReactionPromise: Promise<boolean> | null;
|
||||
@@ -139,6 +145,7 @@ export function removeAckReactionAfterReply(params: {
|
||||
});
|
||||
}
|
||||
|
||||
/** Convenience wrapper for removing a stored ack reaction handle after reply delivery. */
|
||||
export function removeAckReactionHandleAfterReply(params: {
|
||||
removeAfterReply: boolean;
|
||||
ackReaction: AckReactionHandle | null | undefined;
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
import { normalizeStringEntries } from "@openclaw/normalization-core/string-normalization";
|
||||
|
||||
/** Prefix used in allow-from entries that delegate membership to an access group. */
|
||||
export const ACCESS_GROUP_ALLOW_FROM_PREFIX = "accessGroup:";
|
||||
|
||||
/** Parses an access-group allow-from entry and returns the referenced group name. */
|
||||
export function parseAccessGroupAllowFromEntry(entry: string): string | null {
|
||||
const trimmed = entry.trim();
|
||||
if (!trimmed.startsWith(ACCESS_GROUP_ALLOW_FROM_PREFIX)) {
|
||||
@@ -11,11 +13,14 @@ export function parseAccessGroupAllowFromEntry(entry: string): string | null {
|
||||
return name.length > 0 ? name : null;
|
||||
}
|
||||
|
||||
/** Merges configured and pairing-store DM allowlists according to the active DM policy. */
|
||||
export function mergeDmAllowFromSources(params: {
|
||||
allowFrom?: Array<string | number>;
|
||||
storeAllowFrom?: Array<string | number>;
|
||||
dmPolicy?: string;
|
||||
}): string[] {
|
||||
// Explicit allowlist/open policy owns the effective list; pairing-store entries only supplement
|
||||
// pairing/default policies so old approved users do not override a stricter configured list.
|
||||
const storeEntries =
|
||||
params.dmPolicy === "allowlist" || params.dmPolicy === "open"
|
||||
? []
|
||||
@@ -23,6 +28,7 @@ export function mergeDmAllowFromSources(params: {
|
||||
return normalizeStringEntries([...(params.allowFrom ?? []), ...storeEntries]);
|
||||
}
|
||||
|
||||
/** Resolves group allow-from entries with optional fallback to the generic allowFrom list. */
|
||||
export function resolveGroupAllowFromSources(params: {
|
||||
allowFrom?: Array<string | number>;
|
||||
groupAllowFrom?: Array<string | number>;
|
||||
@@ -40,6 +46,7 @@ export function resolveGroupAllowFromSources(params: {
|
||||
return normalizeStringEntries(scoped);
|
||||
}
|
||||
|
||||
/** Returns the first defined value without treating null/false/empty string as missing. */
|
||||
export function firstDefined<T>(...values: Array<T | undefined>) {
|
||||
for (const value of values) {
|
||||
if (value !== undefined) {
|
||||
@@ -49,6 +56,7 @@ export function firstDefined<T>(...values: Array<T | undefined>) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/** Checks a normalized sender id against a compiled allowlist summary. */
|
||||
export function isSenderIdAllowed(
|
||||
allow: { entries: string[]; hasWildcard: boolean; hasEntries: boolean },
|
||||
senderId: string | undefined,
|
||||
|
||||
@@ -3,6 +3,7 @@ import {
|
||||
normalizeOptionalLowercaseString,
|
||||
} from "@openclaw/normalization-core/string-coerce";
|
||||
|
||||
/** Candidate class that matched an allowlist entry. */
|
||||
export type AllowlistMatchSource =
|
||||
| "wildcard"
|
||||
| "id"
|
||||
@@ -15,23 +16,32 @@ export type AllowlistMatchSource =
|
||||
| "slug"
|
||||
| "localpart";
|
||||
|
||||
/** Allowlist decision plus optional match metadata for diagnostics. */
|
||||
export type AllowlistMatch<TSource extends string = AllowlistMatchSource> = {
|
||||
/** Whether the candidate was allowed. */
|
||||
allowed: boolean;
|
||||
/** Config entry or wildcard that matched. */
|
||||
matchKey?: string;
|
||||
/** Candidate source that matched the config entry. */
|
||||
matchSource?: TSource;
|
||||
};
|
||||
|
||||
/** Precompiled allowlist for repeated candidate checks. */
|
||||
export type CompiledAllowlist = {
|
||||
/** Normalized allowlist entries. */
|
||||
set: ReadonlySet<string>;
|
||||
/** Whether the wildcard entry allows every candidate. */
|
||||
wildcard: boolean;
|
||||
};
|
||||
|
||||
/** Formats match metadata for compact logs and tests. */
|
||||
export function formatAllowlistMatchMeta(
|
||||
match?: { matchKey?: string; matchSource?: string } | null,
|
||||
): string {
|
||||
return `matchKey=${match?.matchKey ?? "none"} matchSource=${match?.matchSource ?? "none"}`;
|
||||
}
|
||||
|
||||
/** Compiles already-normalized allowlist entries into a lookup set. */
|
||||
export function compileAllowlist(entries: ReadonlyArray<string>): CompiledAllowlist {
|
||||
const set = new Set(entries.filter(Boolean));
|
||||
return {
|
||||
@@ -48,6 +58,7 @@ function compileSimpleAllowlist(entries: ReadonlyArray<string | number>): Compil
|
||||
);
|
||||
}
|
||||
|
||||
/** Checks candidates in order, returning the first exact allowlist match. */
|
||||
export function resolveAllowlistCandidates<TSource extends string>(params: {
|
||||
compiledAllowlist: CompiledAllowlist;
|
||||
candidates: Array<{ value?: string; source: TSource }>;
|
||||
@@ -67,6 +78,7 @@ export function resolveAllowlistCandidates<TSource extends string>(params: {
|
||||
return { allowed: false };
|
||||
}
|
||||
|
||||
/** Resolves an allowlist decision with wildcard taking precedence over candidate checks. */
|
||||
export function resolveCompiledAllowlistMatch<TSource extends string>(params: {
|
||||
compiledAllowlist: CompiledAllowlist;
|
||||
candidates: Array<{ value?: string; source: TSource }>;
|
||||
@@ -80,6 +92,7 @@ export function resolveCompiledAllowlistMatch<TSource extends string>(params: {
|
||||
return resolveAllowlistCandidates(params);
|
||||
}
|
||||
|
||||
/** Compiles an allowlist and resolves it against ordered candidate values. */
|
||||
export function resolveAllowlistMatchByCandidates<TSource extends string>(params: {
|
||||
allowList: ReadonlyArray<string>;
|
||||
candidates: Array<{ value?: string; source: TSource }>;
|
||||
@@ -90,12 +103,14 @@ export function resolveAllowlistMatchByCandidates<TSource extends string>(params
|
||||
});
|
||||
}
|
||||
|
||||
/** Resolves the common id/name allowlist shape used by channel sender checks. */
|
||||
export function resolveAllowlistMatchSimple(params: {
|
||||
allowFrom: ReadonlyArray<string | number>;
|
||||
senderId: string;
|
||||
senderName?: string | null;
|
||||
allowNameMatching?: boolean;
|
||||
}): AllowlistMatch<"wildcard" | "id" | "name"> {
|
||||
// Compile from the current array contents so in-place config edits are visible immediately.
|
||||
const allowFrom = compileSimpleAllowlist(params.allowFrom);
|
||||
|
||||
if (allowFrom.set.size === 0) {
|
||||
@@ -111,6 +126,7 @@ export function resolveAllowlistMatchSimple(params: {
|
||||
compiledAllowlist: allowFrom,
|
||||
candidates: [
|
||||
{ value: senderId, source: "id" },
|
||||
// Name matching is opt-in because display names can be mutable or ambiguous.
|
||||
...(params.allowNameMatching === true && senderName
|
||||
? ([{ value: senderName, source: "name" as const }] satisfies Array<{
|
||||
value?: string;
|
||||
|
||||
@@ -7,8 +7,11 @@ import type { RuntimeEnv } from "../../runtime.js";
|
||||
import { summarizeStringEntries } from "../../shared/string-sample.js";
|
||||
|
||||
export type AllowlistUserResolutionLike = {
|
||||
/** Original config token that the channel-specific resolver attempted to map. */
|
||||
input: string;
|
||||
/** True only when the resolver positively identified the input. */
|
||||
resolved: boolean;
|
||||
/** Stable channel/user id to store when resolution succeeded. */
|
||||
id?: string;
|
||||
};
|
||||
|
||||
@@ -30,6 +33,7 @@ function dedupeAllowlistEntries(entries: string[]): string[] {
|
||||
return deduped;
|
||||
}
|
||||
|
||||
/** Appends resolved ids to an allowlist while preserving first-seen casing/order. */
|
||||
export function mergeAllowlist(params: {
|
||||
existing?: Array<string | number>;
|
||||
additions: string[];
|
||||
@@ -37,6 +41,7 @@ export function mergeAllowlist(params: {
|
||||
return dedupeAllowlistEntries([...mapAllowFromEntries(params.existing), ...params.additions]);
|
||||
}
|
||||
|
||||
/** Builds resolved/unresolved summaries plus id additions from resolver output. */
|
||||
export function buildAllowlistResolutionSummary<T extends AllowlistUserResolutionLike>(
|
||||
resolvedUsers: T[],
|
||||
opts?: { formatResolved?: (entry: T) => string; formatUnresolved?: (entry: T) => string },
|
||||
@@ -47,6 +52,7 @@ export function buildAllowlistResolutionSummary<T extends AllowlistUserResolutio
|
||||
additions: string[];
|
||||
} {
|
||||
const resolvedMap = new Map(resolvedUsers.map((entry) => [entry.input, entry]));
|
||||
// Missing ids are treated as unresolved even when a resolver marks the input as resolved.
|
||||
const resolvedOk = (entry: T) => Boolean(entry.resolved && entry.id);
|
||||
const formatResolved = opts?.formatResolved ?? ((entry: T) => `${entry.input}→${entry.id}`);
|
||||
const formatUnresolved = opts?.formatUnresolved ?? ((entry: T) => entry.input);
|
||||
@@ -83,6 +89,7 @@ export function canonicalizeAllowlistWithResolvedIds<
|
||||
if (!trimmed) {
|
||||
continue;
|
||||
}
|
||||
// `*` is a wildcard policy marker, not a user alias; never try to resolve it as an id.
|
||||
if (trimmed === "*") {
|
||||
canonicalized.push(trimmed);
|
||||
continue;
|
||||
@@ -93,6 +100,7 @@ export function canonicalizeAllowlistWithResolvedIds<
|
||||
return dedupeAllowlistEntries(canonicalized);
|
||||
}
|
||||
|
||||
/** Rewrites nested `users` arrays in channel config entries after allowlist resolution. */
|
||||
export function patchAllowlistUsersInConfigEntries<
|
||||
T extends AllowlistUserResolutionLike,
|
||||
TEntries extends Record<string, unknown>,
|
||||
@@ -110,6 +118,7 @@ export function patchAllowlistUsersInConfigEntries<
|
||||
if (!Array.isArray(users) || users.length === 0) {
|
||||
continue;
|
||||
}
|
||||
// Merge keeps user-facing aliases; canonicalize replaces aliases with stable ids when possible.
|
||||
const resolvedUsers =
|
||||
params.strategy === "canonicalize"
|
||||
? canonicalizeAllowlistWithResolvedIds({
|
||||
@@ -131,6 +140,7 @@ export function patchAllowlistUsersInConfigEntries<
|
||||
return nextEntries as TEntries;
|
||||
}
|
||||
|
||||
/** Collects resolvable user aliases from one config entry, excluding wildcard entries. */
|
||||
export function addAllowlistUserEntriesFromConfigEntry(target: Set<string>, entry: unknown): void {
|
||||
if (!entry || typeof entry !== "object") {
|
||||
return;
|
||||
@@ -147,6 +157,7 @@ export function addAllowlistUserEntriesFromConfigEntry(target: Set<string>, entr
|
||||
}
|
||||
}
|
||||
|
||||
/** Logs compact allowlist resolution mapping output when there is anything to report. */
|
||||
export function summarizeMapping(
|
||||
label: string,
|
||||
mapping: string[],
|
||||
|
||||
@@ -1,19 +1,30 @@
|
||||
import { normalizeLowercaseStringOrEmpty } from "@openclaw/normalization-core/string-coerce";
|
||||
import { normalizeUniqueSingleOrTrimmedStringList } from "@openclaw/normalization-core/string-normalization";
|
||||
|
||||
/** Source of the config entry selected for a channel target. */
|
||||
export type ChannelMatchSource = "direct" | "parent" | "wildcard";
|
||||
|
||||
/** Match result retaining direct, parent, and wildcard candidates for diagnostics. */
|
||||
export type ChannelEntryMatch<T> = {
|
||||
/** Entry selected for the effective config result. */
|
||||
entry?: T;
|
||||
/** Config key for the selected entry. */
|
||||
key?: string;
|
||||
/** Wildcard fallback entry, retained even when a direct match wins. */
|
||||
wildcardEntry?: T;
|
||||
/** Config key for the wildcard fallback entry. */
|
||||
wildcardKey?: string;
|
||||
/** Parent conversation entry, retained when direct target matching falls back. */
|
||||
parentEntry?: T;
|
||||
/** Config key for the parent conversation entry. */
|
||||
parentKey?: string;
|
||||
/** Key that should be reported to callers as the effective match. */
|
||||
matchKey?: string;
|
||||
/** Precedence source that produced the effective match. */
|
||||
matchSource?: ChannelMatchSource;
|
||||
};
|
||||
|
||||
/** Copies match metadata onto a resolved config result. */
|
||||
export function applyChannelMatchMeta<
|
||||
TResult extends { matchKey?: string; matchSource?: ChannelMatchSource },
|
||||
>(result: TResult, match: ChannelEntryMatch<unknown>): TResult {
|
||||
@@ -24,6 +35,7 @@ export function applyChannelMatchMeta<
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Resolves the matched entry into a config result while preserving match metadata. */
|
||||
export function resolveChannelMatchConfig<
|
||||
TEntry,
|
||||
TResult extends { matchKey?: string; matchSource?: ChannelMatchSource },
|
||||
@@ -34,6 +46,7 @@ export function resolveChannelMatchConfig<
|
||||
return applyChannelMatchMeta(resolveEntry(match.entry), match);
|
||||
}
|
||||
|
||||
/** Normalizes user-visible channel names into lowercase slug keys. */
|
||||
export function normalizeChannelSlug(value: string): string {
|
||||
return normalizeLowercaseStringOrEmpty(value)
|
||||
.replace(/^#/, "")
|
||||
@@ -41,10 +54,12 @@ export function normalizeChannelSlug(value: string): string {
|
||||
.replace(/^-+|-+$/g, "");
|
||||
}
|
||||
|
||||
/** Builds deduped key candidates while dropping blank/nullish entries. */
|
||||
export function buildChannelKeyCandidates(...keys: Array<string | undefined | null>): string[] {
|
||||
return normalizeUniqueSingleOrTrimmedStringList(keys);
|
||||
}
|
||||
|
||||
/** Finds direct and wildcard entries without applying parent fallback precedence. */
|
||||
export function resolveChannelEntryMatch<T>(params: {
|
||||
entries?: Record<string, T>;
|
||||
keys: string[];
|
||||
@@ -61,12 +76,15 @@ export function resolveChannelEntryMatch<T>(params: {
|
||||
break;
|
||||
}
|
||||
if (params.wildcardKey && Object.hasOwn(entries, params.wildcardKey)) {
|
||||
// Keep wildcard metadata even when a direct entry exists so diagnostics can
|
||||
// explain the fallback that would have applied.
|
||||
match.wildcardEntry = entries[params.wildcardKey];
|
||||
match.wildcardKey = params.wildcardKey;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
/** Resolves channel config by direct match, normalized direct match, parent match, then wildcard. */
|
||||
export function resolveChannelEntryMatchWithFallback<T>(params: {
|
||||
entries?: Record<string, T>;
|
||||
keys: string[];
|
||||
@@ -86,11 +104,15 @@ export function resolveChannelEntryMatchWithFallback<T>(params: {
|
||||
|
||||
const normalizeKey = params.normalizeKey;
|
||||
if (normalizeKey) {
|
||||
// Normalized direct matching lets display names and ids converge before parent/wildcard
|
||||
// fallback can broaden the selected config.
|
||||
const normalizedKeys = params.keys.map((key) => normalizeKey(key)).filter(Boolean);
|
||||
if (normalizedKeys.length > 0) {
|
||||
for (const [entryKey, entry] of Object.entries(params.entries ?? {})) {
|
||||
const normalizedEntry = normalizeKey(entryKey);
|
||||
if (normalizedEntry && normalizedKeys.includes(normalizedEntry)) {
|
||||
// Preserve the original configured key as matchKey; callers surface it
|
||||
// in status/debug output instead of the normalized comparison key.
|
||||
return {
|
||||
...direct,
|
||||
entry,
|
||||
@@ -118,6 +140,7 @@ export function resolveChannelEntryMatchWithFallback<T>(params: {
|
||||
};
|
||||
}
|
||||
if (normalizeKey) {
|
||||
// Normalized parent keys keep thread/channel parent fallback consistent with direct keys.
|
||||
const normalizedParentKeys = parentKeys.map((key) => normalizeKey(key)).filter(Boolean);
|
||||
if (normalizedParentKeys.length > 0) {
|
||||
for (const [entryKey, entry] of Object.entries(params.entries ?? {})) {
|
||||
@@ -151,6 +174,7 @@ export function resolveChannelEntryMatchWithFallback<T>(params: {
|
||||
return direct;
|
||||
}
|
||||
|
||||
/** Resolves nested allowlists where an unconfigured outer/inner list means "no restriction". */
|
||||
export function resolveNestedAllowlistDecision(params: {
|
||||
outerConfigured: boolean;
|
||||
outerMatched: boolean;
|
||||
@@ -158,6 +182,8 @@ export function resolveNestedAllowlistDecision(params: {
|
||||
innerMatched: boolean;
|
||||
}): boolean {
|
||||
if (!params.outerConfigured) {
|
||||
// Unconfigured outer lists mean the whole nested policy is inactive; do not
|
||||
// require an inner match until the outer scope has opted into restriction.
|
||||
return true;
|
||||
}
|
||||
if (!params.outerMatched) {
|
||||
|
||||
@@ -1,13 +1,20 @@
|
||||
export type CommandAuthorizer = {
|
||||
/** True when this authorizer has policy data for the current sender/context. */
|
||||
configured: boolean;
|
||||
/** True when the configured policy allows the control command. */
|
||||
allowed: boolean;
|
||||
};
|
||||
|
||||
/** Fallback policy used when access groups are disabled for a channel/account. */
|
||||
export type CommandGatingModeWhenAccessGroupsOff = "allow" | "deny" | "configured";
|
||||
|
||||
/** Resolves command authorization from one or more configured policy sources. */
|
||||
export function resolveCommandAuthorizedFromAuthorizers(params: {
|
||||
/** True when configured access groups should be enforced. */
|
||||
useAccessGroups: boolean;
|
||||
/** Candidate authorizers; any configured allow grants access. */
|
||||
authorizers: CommandAuthorizer[];
|
||||
/** Fallback behavior when access groups are disabled. Defaults to allow. */
|
||||
modeWhenAccessGroupsOff?: CommandGatingModeWhenAccessGroupsOff;
|
||||
}): boolean {
|
||||
const { useAccessGroups, authorizers } = params;
|
||||
@@ -23,16 +30,23 @@ export function resolveCommandAuthorizedFromAuthorizers(params: {
|
||||
if (!anyConfigured) {
|
||||
return true;
|
||||
}
|
||||
// "configured" preserves legacy permissive behavior until a concrete authorizer exists.
|
||||
return authorizers.some((entry) => entry.configured && entry.allowed);
|
||||
}
|
||||
return authorizers.some((entry) => entry.configured && entry.allowed);
|
||||
}
|
||||
|
||||
/** Returns both command authorization and whether a text control command must be blocked. */
|
||||
export function resolveControlCommandGate(params: {
|
||||
/** True when configured access groups should be enforced. */
|
||||
useAccessGroups: boolean;
|
||||
/** Candidate authorizers checked before allowing text control commands. */
|
||||
authorizers: CommandAuthorizer[];
|
||||
/** True when text commands are enabled for this inbound surface. */
|
||||
allowTextCommands: boolean;
|
||||
/** True when the inbound text contains a recognized control command. */
|
||||
hasControlCommand: boolean;
|
||||
/** Fallback behavior when access groups are disabled. Defaults to allow. */
|
||||
modeWhenAccessGroupsOff?: CommandGatingModeWhenAccessGroupsOff;
|
||||
}): { commandAuthorized: boolean; shouldBlock: boolean } {
|
||||
const commandAuthorized = resolveCommandAuthorizedFromAuthorizers({
|
||||
@@ -44,13 +58,21 @@ export function resolveControlCommandGate(params: {
|
||||
return { commandAuthorized, shouldBlock };
|
||||
}
|
||||
|
||||
/** Convenience wrapper for text command gates with primary and secondary authorizers. */
|
||||
export function resolveDualTextControlCommandGate(params: {
|
||||
/** True when configured access groups should be enforced. */
|
||||
useAccessGroups: boolean;
|
||||
/** True when the primary authorizer has policy data for this sender/context. */
|
||||
primaryConfigured: boolean;
|
||||
/** True when the primary authorizer allows the command. */
|
||||
primaryAllowed: boolean;
|
||||
/** True when the secondary authorizer has policy data for this sender/context. */
|
||||
secondaryConfigured: boolean;
|
||||
/** True when the secondary authorizer allows the command. */
|
||||
secondaryAllowed: boolean;
|
||||
/** True when the inbound text contains a recognized control command. */
|
||||
hasControlCommand: boolean;
|
||||
/** Fallback behavior when access groups are disabled. Defaults to allow. */
|
||||
modeWhenAccessGroupsOff?: CommandGatingModeWhenAccessGroupsOff;
|
||||
}): { commandAuthorized: boolean; shouldBlock: boolean } {
|
||||
return resolveControlCommandGate({
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user