mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-18 20:32:25 +08:00
Compare commits
450 Commits
fix/loggin
...
354
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
765208ce47 | ||
|
|
9bd8911615 | ||
|
|
21b8d35e2d | ||
|
|
cd8fc2f915 | ||
|
|
2b4bebb72f | ||
|
|
cc798ce0ef | ||
|
|
4e4b6b7a19 | ||
|
|
019b7797e1 | ||
|
|
097883282d | ||
|
|
eb461f25c6 | ||
|
|
1e0f0e5444 | ||
|
|
4319f07afa | ||
|
|
ea6226bf49 | ||
|
|
36aa4f69fb | ||
|
|
96398871d9 | ||
|
|
6fbfb36184 | ||
|
|
9f5b179f7f | ||
|
|
9d7793ee2e | ||
|
|
405a088d60 | ||
|
|
00bee7eb5e | ||
|
|
7637061feb | ||
|
|
380bff9d13 | ||
|
|
ece183233d | ||
|
|
84d626aba7 | ||
|
|
082de8f294 | ||
|
|
c676d1f636 | ||
|
|
86fabe02b4 | ||
|
|
3008137c8d | ||
|
|
a67fbc6a98 | ||
|
|
c265e3a96b | ||
|
|
f578654e14 | ||
|
|
659e0d3a2f | ||
|
|
b27916cbce | ||
|
|
1455fd0b02 | ||
|
|
ade0f06426 | ||
|
|
daa9af6bdf | ||
|
|
fc53ab3e87 | ||
|
|
7a39107f6e | ||
|
|
c0000bed96 | ||
|
|
a35d98def0 | ||
|
|
3dff2f08ad | ||
|
|
01099af7e7 | ||
|
|
3b851c4366 | ||
|
|
cc4b8e8e79 | ||
|
|
4998dc8dd3 | ||
|
|
bea33a6122 | ||
|
|
ff827bdf04 | ||
|
|
ef534fbda9 | ||
|
|
1bd92102bb | ||
|
|
0014eeedad | ||
|
|
a0e62103d8 | ||
|
|
2fe1590196 | ||
|
|
acd42ba736 | ||
|
|
30e35f7c29 | ||
|
|
e516b14df4 | ||
|
|
122c925acd | ||
|
|
a60a087454 | ||
|
|
80744c1c35 | ||
|
|
8813b4ac8a | ||
|
|
3207ff2ed7 | ||
|
|
833854aecb | ||
|
|
167f722769 | ||
|
|
d46f52d70e | ||
|
|
c19e23a96e | ||
|
|
8b750ad1a7 | ||
|
|
95bf2a8e36 | ||
|
|
c93233b4b1 | ||
|
|
36f316cde0 | ||
|
|
544f8fc400 | ||
|
|
07cced29ad | ||
|
|
8c0250dd06 | ||
|
|
a1f27e524c | ||
|
|
4bb5d24047 | ||
|
|
392c5d8ede | ||
|
|
59f8b9412a | ||
|
|
dc2b88f720 | ||
|
|
b8a1070665 | ||
|
|
9cde5b895d | ||
|
|
2238735830 | ||
|
|
9dbbccac43 | ||
|
|
137aafe04e | ||
|
|
0f14e2a4de | ||
|
|
637eaa31e9 | ||
|
|
f2a7a4b4b9 | ||
|
|
da50d92c14 | ||
|
|
22bd9ca11f | ||
|
|
a2de84da2a | ||
|
|
4cda0a2743 | ||
|
|
6dce35db03 | ||
|
|
7fb8af543f | ||
|
|
a208cb293e | ||
|
|
193d32db02 | ||
|
|
cd27bc26b0 | ||
|
|
ae2a4a5392 | ||
|
|
f6efb80fcf | ||
|
|
4761902b1b | ||
|
|
76ceb30539 | ||
|
|
03a7e0151d | ||
|
|
58448f9f89 | ||
|
|
4d5e3eb796 | ||
|
|
e11d071602 | ||
|
|
bbb2734d47 | ||
|
|
69d3b95d34 | ||
|
|
c88d7bc30d | ||
|
|
655ab95dd6 | ||
|
|
455deb5841 | ||
|
|
efeba38df1 | ||
|
|
be4f327324 | ||
|
|
a02d50ede9 | ||
|
|
3204d902b3 | ||
|
|
33ae2c4db7 | ||
|
|
0a8ff8f3ce | ||
|
|
55a18686cb | ||
|
|
f4fc4f7b1c | ||
|
|
04e10e233b | ||
|
|
9610a94d05 | ||
|
|
36a4009739 | ||
|
|
0157625a89 | ||
|
|
802ee1ab12 | ||
|
|
88a08a0006 | ||
|
|
5f17671a3e | ||
|
|
79fd5e9a11 | ||
|
|
e718f4eb8a | ||
|
|
62b3adea8d | ||
|
|
37975fe02b | ||
|
|
568848008b | ||
|
|
18a98e03c8 | ||
|
|
cf2be8319f | ||
|
|
c84444680e | ||
|
|
e540a7cd21 | ||
|
|
c77faa7369 | ||
|
|
e63fad1627 | ||
|
|
a700dcd84a | ||
|
|
e2749ebf02 | ||
|
|
7492a1232d | ||
|
|
6878c8c5e6 | ||
|
|
5d41a61009 | ||
|
|
76e9d18503 | ||
|
|
3bdb4e81ee | ||
|
|
9d97945a04 | ||
|
|
686896a22d | ||
|
|
25782f10d7 | ||
|
|
948dab86bf | ||
|
|
f7e71efd7a | ||
|
|
a9e1c38146 | ||
|
|
df12e51788 | ||
|
|
44c7c894e7 | ||
|
|
c4cea95e2a | ||
|
|
6798af3df3 | ||
|
|
4a2f0bb05a | ||
|
|
ed87354850 | ||
|
|
b348c066a2 | ||
|
|
c6ab5c0ea3 | ||
|
|
e1a58b8a77 | ||
|
|
892ae8245e | ||
|
|
c8df6e35c0 | ||
|
|
c453a50900 | ||
|
|
e8e2a49f86 | ||
|
|
5fa96a350b | ||
|
|
5127453584 | ||
|
|
f6d4b0e50e | ||
|
|
959876f3d9 | ||
|
|
329c5e8fbe | ||
|
|
dc4bf70ddf | ||
|
|
003eb51432 | ||
|
|
e7bca5e254 | ||
|
|
f8675563de | ||
|
|
afb1d24855 | ||
|
|
884e4dbe73 | ||
|
|
8ff4d2e720 | ||
|
|
c6e4801c3d | ||
|
|
d5cb85cc8f | ||
|
|
3a8030afdc | ||
|
|
5e5caeacbc | ||
|
|
ebf8009245 | ||
|
|
b14bf19c63 | ||
|
|
27d9455c03 | ||
|
|
0229c587bb | ||
|
|
192ae58612 | ||
|
|
3190577e95 | ||
|
|
c02ceaa501 | ||
|
|
4a00db2da8 | ||
|
|
810e9b93c8 | ||
|
|
b2ef706b0b | ||
|
|
fbde3f73c3 | ||
|
|
04d3f789fa | ||
|
|
4d6590c4b7 | ||
|
|
54bcd9f721 | ||
|
|
52b453ca26 | ||
|
|
0377c1ce6f | ||
|
|
3cc7cd0abc | ||
|
|
f447de3c34 | ||
|
|
dda9d3bebf | ||
|
|
f1040d6239 | ||
|
|
309724db30 | ||
|
|
f2b59f01f5 | ||
|
|
f10632a4c1 | ||
|
|
470c618054 | ||
|
|
4c1cef8091 | ||
|
|
67d8d1a108 | ||
|
|
788744963d | ||
|
|
bc05a0cf57 | ||
|
|
8261d1dc14 | ||
|
|
83c27e33a7 | ||
|
|
c827427a9f | ||
|
|
307f176145 | ||
|
|
eea4cbb644 | ||
|
|
7c9c77c264 | ||
|
|
9fd6fcc993 | ||
|
|
2037f2ced0 | ||
|
|
3db49affee | ||
|
|
697015178d | ||
|
|
7125272700 | ||
|
|
bc7600792a | ||
|
|
024b94d874 | ||
|
|
9a0b3899e1 | ||
|
|
b7a7c77d63 | ||
|
|
e690358613 | ||
|
|
d43f86b339 | ||
|
|
8cc658f45a | ||
|
|
fa91211932 | ||
|
|
451acb607a | ||
|
|
5b4f1ce0e1 | ||
|
|
fe1eb6ea8a | ||
|
|
41699ee85b | ||
|
|
0757efc4ea | ||
|
|
7cefba303a | ||
|
|
836c1b4978 | ||
|
|
c3ef2c53fa | ||
|
|
0eccb327b2 | ||
|
|
eb41468beb | ||
|
|
ea8722a05b | ||
|
|
8d4c029147 | ||
|
|
93c040c832 | ||
|
|
9a24e017d8 | ||
|
|
c959098a6d | ||
|
|
9930e67c26 | ||
|
|
58f403d493 | ||
|
|
9da9a180f6 | ||
|
|
6c7fcbb20b | ||
|
|
8277dc7f61 | ||
|
|
64b3d17100 | ||
|
|
357d7058c0 | ||
|
|
122d870049 | ||
|
|
d5207bac8a | ||
|
|
e782428e97 | ||
|
|
1bf2381bc8 | ||
|
|
9a85874f8f | ||
|
|
a5f32d3a1a | ||
|
|
b6afe5461f | ||
|
|
392dd095a2 | ||
|
|
e5a09c379e | ||
|
|
01f8871799 | ||
|
|
6fc1f608c8 | ||
|
|
59a75e8a40 | ||
|
|
3fc19fbb67 | ||
|
|
2846d3f673 | ||
|
|
60b8d5a835 | ||
|
|
81e0336dfa | ||
|
|
b2719d2ab8 | ||
|
|
2a1cc53fcc | ||
|
|
f0d13917f8 | ||
|
|
33360b9c72 | ||
|
|
1ee073df03 | ||
|
|
762480a9e5 | ||
|
|
0499e446d9 | ||
|
|
282e9d6910 | ||
|
|
f951bd89ef | ||
|
|
f544e366a1 | ||
|
|
8ad71bc0e0 | ||
|
|
9981cbf519 | ||
|
|
fe774da67f | ||
|
|
3318cae246 | ||
|
|
b63e593a01 | ||
|
|
c2912725b6 | ||
|
|
4925530be9 | ||
|
|
4e9e885448 | ||
|
|
2df13e85c5 | ||
|
|
028cf920ea | ||
|
|
114b005436 | ||
|
|
a5f37d1c9a | ||
|
|
216aff34ef | ||
|
|
f9ab93ea98 | ||
|
|
ef120bebd2 | ||
|
|
5fdc67f498 | ||
|
|
2298f2018c | ||
|
|
ca1575b4cd | ||
|
|
f24bfdb2aa | ||
|
|
2d643ba935 | ||
|
|
8ff5d6c77a | ||
|
|
9e4fa7488c | ||
|
|
22bdcde16f | ||
|
|
8e1a39e1df | ||
|
|
7294365976 | ||
|
|
b35525273a | ||
|
|
f3eea2d016 | ||
|
|
07e17274c3 | ||
|
|
8a0faac188 | ||
|
|
6a6690bf3d | ||
|
|
8d52eecefc | ||
|
|
faae9dc7c2 | ||
|
|
f5c0f1f025 | ||
|
|
53c4dd7895 | ||
|
|
07092c7330 | ||
|
|
8d47dfb8ab | ||
|
|
b760840220 | ||
|
|
bb7486ceae | ||
|
|
c25a4a0d1d | ||
|
|
fd727d3c5e | ||
|
|
5c9cce3a7b | ||
|
|
9008955e21 | ||
|
|
ffaced657e | ||
|
|
379108660e | ||
|
|
32be4bd790 | ||
|
|
94bf35369d | ||
|
|
eaaa394ca0 | ||
|
|
e1bd220959 | ||
|
|
6c74c701a8 | ||
|
|
70d8e6652f | ||
|
|
593d4a7e0d | ||
|
|
eb96d5c3c8 | ||
|
|
e960662c21 | ||
|
|
fd409968ad | ||
|
|
3d3ad30436 | ||
|
|
8dcc62dbaa | ||
|
|
7c63f39e44 | ||
|
|
c23f290523 | ||
|
|
a6ea0e6449 | ||
|
|
59bde1d95e | ||
|
|
061b23c8ec | ||
|
|
7696455b2e | ||
|
|
e105e57745 | ||
|
|
45d3150ab8 | ||
|
|
2501dd3bfb | ||
|
|
75df1e264e | ||
|
|
a80db6f355 | ||
|
|
08a5856d97 | ||
|
|
81a11e0e58 | ||
|
|
36c7e83614 | ||
|
|
d3f41780a0 | ||
|
|
ef4cc389e9 | ||
|
|
3c310be683 | ||
|
|
c5bd7252b7 | ||
|
|
0ab6fd8593 | ||
|
|
ffdc3d38a9 | ||
|
|
81920f3ad1 | ||
|
|
224bf4a9be | ||
|
|
7ca7b2d4d3 | ||
|
|
c426712969 | ||
|
|
5e776ca4c3 | ||
|
|
40f7ef22a0 | ||
|
|
d5727ca94a | ||
|
|
40ee96c002 | ||
|
|
247824d842 | ||
|
|
a2a8c3641c | ||
|
|
1e01b0b5ec | ||
|
|
45ca762a5e | ||
|
|
928103bd1c | ||
|
|
8d6266c914 | ||
|
|
12af575aa0 | ||
|
|
a2f9d169bc | ||
|
|
4a71f99da1 | ||
|
|
af07d97164 | ||
|
|
a8677558f1 | ||
|
|
dc76efc91b | ||
|
|
fc15ba9309 | ||
|
|
0b0452a6b0 | ||
|
|
b57559a4e5 | ||
|
|
2785354250 | ||
|
|
7fcdfb49c9 | ||
|
|
020db1592f | ||
|
|
51370b44c7 | ||
|
|
51000998f5 | ||
|
|
dedc18c37b | ||
|
|
afda3cae32 | ||
|
|
655ddd7000 | ||
|
|
6e62fffb54 | ||
|
|
f6a53d2409 | ||
|
|
31f0757c49 | ||
|
|
24caf2b5b8 | ||
|
|
9df57f3ee0 | ||
|
|
42aef7b3e9 | ||
|
|
feee96218a | ||
|
|
9e676d5676 | ||
|
|
91398cd2c7 | ||
|
|
c9bbe3c10f | ||
|
|
eb7874a59e | ||
|
|
3acc5ad51b | ||
|
|
1381757d2e | ||
|
|
4032736863 | ||
|
|
5220058ebf | ||
|
|
0a949bb1c3 | ||
|
|
b503b5f8da | ||
|
|
f53216c21e | ||
|
|
51ec3d30d5 | ||
|
|
c153ade99d | ||
|
|
85a9677e8b | ||
|
|
364901a2be | ||
|
|
511f24e959 | ||
|
|
75d6c0c68b | ||
|
|
63161bf5ad | ||
|
|
735c283a69 | ||
|
|
c6c01fb973 | ||
|
|
03763ecb01 | ||
|
|
2c61006115 | ||
|
|
da8899a934 | ||
|
|
9ee1fa0813 | ||
|
|
aa21ac708e | ||
|
|
b14c380096 | ||
|
|
d651100a35 | ||
|
|
d4f07e468e | ||
|
|
5ffc3e12ff | ||
|
|
248f030054 | ||
|
|
c6b8624793 | ||
|
|
c0cba1793e | ||
|
|
da086196c3 | ||
|
|
bd0fe6ed43 | ||
|
|
f767b17891 | ||
|
|
5cfbec59c2 | ||
|
|
234e6d55e3 | ||
|
|
2af91da79c | ||
|
|
16f4c82527 | ||
|
|
a1c3a7144d | ||
|
|
90a41dbd0e | ||
|
|
3dbf5e5c6d | ||
|
|
0451836493 | ||
|
|
0ec0826568 | ||
|
|
0b61ed0c0a | ||
|
|
d801773202 | ||
|
|
9dd6ecf45d | ||
|
|
1f9e0707cb | ||
|
|
c779abaa7d | ||
|
|
ad4878917c | ||
|
|
854976203e | ||
|
|
817a8dcd21 | ||
|
|
cb80453151 | ||
|
|
3ca91c872f | ||
|
|
b1479b6839 | ||
|
|
3d2c303a60 | ||
|
|
c45226ed84 | ||
|
|
2cfc6d9d19 | ||
|
|
eb3e39191e | ||
|
|
8687b8fada | ||
|
|
a9bad91301 | ||
|
|
2fb877b457 | ||
|
|
276c81f319 | ||
|
|
c65356d9a2 | ||
|
|
30c0e94042 | ||
|
|
1724a92958 | ||
|
|
55326ffb07 |
@@ -57,28 +57,31 @@ Use `qa character-eval` for style/persona/vibe checks across multiple live model
|
||||
pnpm openclaw qa character-eval \
|
||||
--model openai/gpt-5.4,thinking=xhigh \
|
||||
--model openai/gpt-5.2,thinking=xhigh \
|
||||
--model openai/gpt-5,thinking=xhigh \
|
||||
--model anthropic/claude-opus-4-6,thinking=high \
|
||||
--model anthropic/claude-sonnet-4-6,thinking=high \
|
||||
--model minimax/MiniMax-M2.7,thinking=high \
|
||||
--model zai/glm-5.1,thinking=high \
|
||||
--model moonshot/kimi-k2.5,thinking=high \
|
||||
--model qwen/qwen3.6-plus,thinking=high \
|
||||
--model xiaomi/mimo-v2-pro,thinking=high \
|
||||
--model google/gemini-3.1-pro-preview,thinking=high \
|
||||
--model codex-cli/<codex-model>,thinking=high \
|
||||
--judge-model openai/gpt-5.4,thinking=xhigh,fast \
|
||||
--judge-model anthropic/claude-opus-4-6,thinking=high \
|
||||
--concurrency 16 \
|
||||
--judge-concurrency 16 \
|
||||
--concurrency 8 \
|
||||
--judge-concurrency 8 \
|
||||
--output-dir .artifacts/qa-e2e/character-eval-<tag>
|
||||
```
|
||||
|
||||
- Runs local QA gateway child processes, not Docker.
|
||||
- Preferred model spec syntax is `provider/model,thinking=<level>[,fast|,no-fast|,fast=<bool>]` for both `--model` and `--judge-model`.
|
||||
- Do not add new examples with separate `--model-thinking`; keep that flag as legacy compatibility only.
|
||||
- Defaults to candidate models `openai/gpt-5.4`, `openai/gpt-5.2`, `openai/gpt-5`, `anthropic/claude-opus-4-6`, `anthropic/claude-sonnet-4-6`, `zai/glm-5.1`, `moonshot/kimi-k2.5`, and `google/gemini-3.1-pro-preview` when no `--model` is passed.
|
||||
- Defaults to candidate models `openai/gpt-5.4`, `openai/gpt-5.2`, `anthropic/claude-opus-4-6`, `anthropic/claude-sonnet-4-6`, `minimax/MiniMax-M2.7`, `zai/glm-5.1`, `moonshot/kimi-k2.5`, `qwen/qwen3.6-plus`, `xiaomi/mimo-v2-pro`, and `google/gemini-3.1-pro-preview` when no `--model` is passed.
|
||||
- Candidate thinking defaults to `high`, with `xhigh` for OpenAI models that support it. Prefer inline `--model provider/model,thinking=<level>`; `--thinking <level>` and `--model-thinking <provider/model=level>` remain compatibility shims.
|
||||
- OpenAI candidate refs default to fast mode so priority processing is used where supported. Use inline `,fast`, `,no-fast`, or `,fast=false` for one model; use `--fast` only to force fast mode for every candidate.
|
||||
- Judges default to `openai/gpt-5.4,thinking=xhigh,fast` and `anthropic/claude-opus-4-6,thinking=high`.
|
||||
- Report includes judge ranking, run stats, durations, and full transcripts; do not include raw judge replies. Duration is benchmark context, not a grading signal.
|
||||
- Candidate and judge concurrency default to 16. Use `--concurrency <n>` and `--judge-concurrency <n>` to override when local gateways or provider limits need a gentler lane.
|
||||
- Candidate and judge concurrency default to 8. Use `--concurrency <n>` and `--judge-concurrency <n>` to override when local gateways or provider limits need a gentler lane.
|
||||
- Scenario source should stay markdown-driven under `qa/scenarios/`.
|
||||
- For isolated character/persona evals, write the persona into `SOUL.md` and blank `IDENTITY.md` in the scenario flow. Use `SOUL.md + IDENTITY.md` only when intentionally testing how the normal OpenClaw identity combines with the character.
|
||||
- Keep prompts natural and task-shaped. The candidate model should receive character setup through `SOUL.md`, then normal user turns such as chat, workspace help, and small file tasks; do not ask "how would you react?" or tell the model it is in an eval.
|
||||
|
||||
8
.github/workflows/ci.yml
vendored
8
.github/workflows/ci.yml
vendored
@@ -1042,9 +1042,7 @@ jobs:
|
||||
set -euo pipefail
|
||||
case "$TASK" in
|
||||
test)
|
||||
# Linux owns the full repo test suite. Keep the Windows runner focused on
|
||||
# Windows-native process/path wrappers so platform regressions fail fast.
|
||||
pnpm test:windows:ci
|
||||
pnpm test
|
||||
;;
|
||||
*)
|
||||
echo "Unsupported Windows checks task: $TASK" >&2
|
||||
@@ -1095,9 +1093,7 @@ jobs:
|
||||
set -euo pipefail
|
||||
case "$TASK" in
|
||||
test)
|
||||
# Linux owns the full repo test suite. Keep macOS CI focused on
|
||||
# launchd/Homebrew/runtime path coverage and the process-group wrapper.
|
||||
pnpm test:macos:ci
|
||||
pnpm test
|
||||
;;
|
||||
*)
|
||||
echo "Unsupported macOS node task: $TASK" >&2
|
||||
|
||||
58
CHANGELOG.md
58
CHANGELOG.md
@@ -6,48 +6,31 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Changes
|
||||
|
||||
- Memory/dreaming: add a grounded REM backfill lane with historical `rem-harness --path`, diary commit/reset flows, cleaner durable-fact extraction, and live short-term promotion integration so old daily notes can replay into Dreams and durable memory without a second memory stack. Thanks @mbelinky.
|
||||
- Control UI/dreaming: add a structured diary view with timeline navigation, backfill/reset controls, traceable dreaming summaries, and a grounded Scene lane with promotion hints plus a safe clear-grounded action for staged backfill signals. (#63395) Thanks @mbelinky.
|
||||
- QA/lab: add character-vibes evaluation reports with model selection and parallel runs so live QA can compare candidate behavior faster.
|
||||
- Plugins/provider-auth: let provider manifests declare `providerAuthAliases` so provider variants can share env vars, auth profiles, config-backed auth, and API-key onboarding choices without core-specific wiring.
|
||||
- iOS: pin release versioning to an explicit CalVer in `apps/ios/version.json`, keep TestFlight iteration on the same short version until maintainers intentionally promote the next gateway version, and add the documented `pnpm ios:version:pin -- --from-gateway` workflow for release trains. (#63001) Thanks @ngutman.
|
||||
- Plugins/provider-auth: let provider manifests declare `providerAuthAliases` so provider variants can share env vars, auth profiles, config-backed auth, and API-key onboarding choices without core-specific wiring.
|
||||
- Memory/dreaming: add a grounded REM backfill lane with historical `rem-harness --path`, diary commit, and reset flows so old daily notes can be replayed safely into `DREAMS.md`. Thanks @mbelinky.
|
||||
- Memory/dreaming: harden grounded diary extraction so `What Happened`, `Reflections`, and durable candidates suppress operational noise and preserve more atomic lasting facts. Thanks @mbelinky.
|
||||
- Control UI/dreaming: add a structured diary view with timeline navigation, backfill/reset controls, and traceable dreaming summaries. Thanks @mbelinky.
|
||||
|
||||
### Fixes
|
||||
|
||||
- Browser/security: re-run blocked-destination safety checks after interaction-driven main-frame navigations from click, evaluate, hook-triggered click, and batched action flows, so browser interactions cannot bypass the SSRF quarantine when they land on forbidden URLs. (#63226) Thanks @eleqtrizit.
|
||||
- Security/dotenv: block runtime-control env vars plus browser-control override and skip-server env vars from untrusted workspace `.env` files, and reject unsafe URL-style browser control override specifiers before lazy loading. (#62660, #62663) Thanks @eleqtrizit.
|
||||
- Gateway/node exec events: mark remote node `exec.started`, `exec.finished`, and `exec.denied` summaries as untrusted system events and sanitize node-provided command/output/reason text before enqueueing them, so remote node output cannot inject trusted `System:` content into later turns. (#62659) Thanks @eleqtrizit.
|
||||
- Plugins/onboarding auth choices: prevent untrusted workspace plugins from colliding with bundled provider auth-choice ids during non-interactive onboarding, so bundled provider setup keeps operator secrets out of untrusted workspace plugin handlers unless those plugins are explicitly trusted. (#62368) Thanks @pgondhi987.
|
||||
- Security/dependency audit: force `basic-ftp` to `5.2.1` for the CRLF command-injection fix and bump Hono plus `@hono/node-server` in production resolution paths.
|
||||
- Android/pairing: clear stale setup-code auth on new QR scans, bootstrap operator and node sessions from fresh pairing, prefer stored device tokens after bootstrap handoff, and pause pairing auto-retry while the app is backgrounded so scan-once Android pairing recovers reliably again. (#63199) Thanks @obviyus.
|
||||
- Matrix/gateway: wait for Matrix sync readiness before marking startup successful, keep Matrix background handler failures contained, and route fatal Matrix sync stops through channel-level restart handling instead of crashing the whole gateway. (#62779) Thanks @gumadeiras.
|
||||
- Slack/media: preserve bearer auth across same-origin `files.slack.com` redirects while still stripping it on cross-origin Slack CDN hops, so `url_private_download` image attachments load again. (#62960) Thanks @vincentkoc.
|
||||
- Reply/doctor: use the active runtime snapshot for queued reply runs, resolve reply-run SecretRefs before preflight helpers touch config, surface gateway OAuth reauth failures to users, and make `openclaw doctor` call out exact reauth commands. (#62693, #63217) Thanks @mbelinky.
|
||||
- Control UI: guard stale session-history reloads during fast session switches so the selected session and rendered transcript stay in sync. (#62975) Thanks @scoootscooob.
|
||||
- Gateway/chat: suppress exact and streamed `ANNOUNCE_SKIP` / `REPLY_SKIP` control replies across live chat updates and history sanitization so internal agent-to-agent control tokens no longer leak into user-facing gateway chat surfaces. (#51739) Thanks @Pinghuachiu.
|
||||
- Auto-reply/NO_REPLY: strip glued leading `NO_REPLY` tokens before reply normalization and ACP-visible streaming so silent sentinel text no longer leaks into user-visible replies while preserving substantive `NO_REPLY ...` text. Thanks @frankekn.
|
||||
- Sessions/routing: preserve established external routes on inter-session announce traffic so `sessions_send` follow-ups do not steal delivery from Telegram, Discord, or other external channels. (#58013) Thanks @duqaXxX.
|
||||
- Gateway/sessions: clear auto-fallback-pinned model overrides on `/reset` and `/new` while still preserving explicit user model selections, including legacy sessions created before override-source tracking existed. (#63155) Thanks @frankekn.
|
||||
- Slack/ACP: treat Slack ACP block replies as visible delivered output so OpenClaw stops re-sending the final fallback text after Slack already rendered the reply. (#62858) Thanks @gumadeiras.
|
||||
- Slack/partial streaming: key turn-local dedupe by dispatch kind and keep the final fallback reply path active when preview finalization fails so stale preview text cannot suppress the actual final answer. (#62859) Thanks @gumadeiras.
|
||||
- Matrix/doctor: migrate legacy `channels.matrix.dm.policy: "trusted"` configs back to compatible DM policies during `openclaw doctor --fix`, preserving explicit `allowFrom` boundaries as `allowlist` and defaulting empty legacy configs to `pairing`. (#62942) Thanks @lukeboyett.
|
||||
- npm packaging: mirror bundled channel runtime deps, stage Nostr runtime deps, derive required root mirrors from manifests and built chunks, and test packed release tarballs without repo `node_modules` so fresh installs fail fast on missing plugin deps instead of crashing at runtime. (#63065) Thanks @scoootscooob.
|
||||
- QA/live auth: fail fast when live QA scenarios hit classified auth or runtime failure replies, including raw scenario wait paths, and sanitize missing-key guidance so gateway auth problems surface as actionable errors instead of timeouts. (#63333) Thanks @shakkernerd.
|
||||
- Providers/OpenAI: default missing reasoning effort to `high` on OpenAI Responses, WebSocket, and compatible completions transports, while still honoring explicit per-run reasoning levels.
|
||||
- Providers/Ollama: allow Ollama models using the native `api: "ollama"` path to optionally display thinking output when `/think` is set to a non-off level. (#62712) Thanks @hoyyeva.
|
||||
- Codex CLI: pass OpenClaw's system prompt through Codex's `model_instructions_file` config override so fresh Codex CLI sessions receive the same prompt guidance as Claude CLI sessions.
|
||||
- Auth/profiles: persist explicit auth-profile upserts directly and skip external CLI sync for local writes so profile changes are saved without stale external credential state.
|
||||
- Slack/media: preserve bearer auth across same-origin `files.slack.com` redirects while still stripping it on cross-origin Slack CDN hops, so `url_private_download` image attachments load again. (#62960) Thanks @vincentkoc.
|
||||
- Gateway/node exec events: mark remote node `exec.started`, `exec.finished`, and `exec.denied` summaries as untrusted system events and sanitize node-provided command/output/reason text before enqueueing them, so remote node output cannot inject trusted `System:` content into later turns. (#62659) Thanks @eleqtrizit.
|
||||
- Agents/timeouts: make the LLM idle timeout inherit `agents.defaults.timeoutSeconds` when configured, disable the unconfigured idle watchdog for cron runs, and point idle-timeout errors at `agents.defaults.llm.idleTimeoutSeconds`. Thanks @drvoss.
|
||||
- Security/dotenv: expand workspace `.env` filtering to block runtime-control variables like gateway routing, ClawHub endpoints/tokens, browser executable overrides, and skip/disable control families, so untrusted repositories cannot steer OpenClaw runtime behavior through repo-local dotenv files. (#62660) Thanks @eleqtrizit.
|
||||
- Agents/failover: classify Z.ai vendor code `1311` as billing and `1113` as auth, including long wrapped `1311` payloads, so these errors stop falling through to generic failover handling. (#49552) Thanks @1bcMax.
|
||||
- QQBot/media-tags: support HTML entity-encoded angle brackets (`<`/`>`), URL slashes in attributes, and self-closing media tags so upstream `<qqimg>` payloads are correctly parsed and normalized. (#60493) Thanks @ylc0919.
|
||||
- Memory/dreaming: harden grounded backfill inputs, diary writes, status payloads, and diary action classification by preserving source-day labels, rejecting missing or symlinked targets cleanly, normalizing diary headings in gateway backfills, and tightening claim splitting plus diary source metadata. Thanks @mbelinky.
|
||||
- Memory/dreaming: accept embedded heartbeat trigger tokens so light and REM dreaming still run when runtime wrappers include extra heartbeat text.
|
||||
- Android/manual connect: allow blank port input only for TLS manual gateway endpoints so standard HTTPS Tailscale hosts default to `443` without silently changing cleartext manual connects. (#63134) Thanks @Tyler-RNG.
|
||||
- Windows/update: add heap headroom to Windows `pnpm build` steps during dev updates so update preflight builds stop failing on low default Node memory.
|
||||
- Plugin SDK: export the channel plugin base and web-search config contract through the public package so plugins can use them without private imports.
|
||||
- Plugins/contracts: keep test-only helpers out of production contract barrels, load shared contract harnesses through bundled test surfaces, and harden guardrails so indirect re-exports and canonical `*.test.ts` files stay blocked. (#63311) Thanks @altaywtf.
|
||||
- Control UI/models: preserve provider-qualified refs for OpenRouter catalog models whose ids already contain slashes so picker selections submit allowlist-compatible model refs instead of dropping the `openrouter/` prefix. (#63416) Thanks @sallyom.
|
||||
- Plugin SDK/command auth: split command status builders onto the lightweight `openclaw/plugin-sdk/command-status` subpath while preserving deprecated `command-auth` compatibility exports, so auth-only plugin imports no longer pull status/context warmup into CLI onboarding paths. (#63174) Thanks @hxy91819.
|
||||
- Browser/security: block browser-control module override and skip-server env vars from untrusted workspace `.env` files, and reject unsafe URL-style browser control override specifiers before lazy loading, so repo-local dotenv state cannot steer browser control module loading. (#62663) Thanks @eleqtrizit.
|
||||
- QQBot/media-tags: support HTML entity-encoded angle brackets (`<`/`>`) in media-tag regexes so entity-escaped `<qqimg>` tags from upstream are correctly parsed and normalized. (#60493) Thanks @ylc0919.
|
||||
- npm packaging: mirror bundled Slack, Telegram, Discord, and Feishu channel runtime deps at the root and harden published-install verification so fresh installs fail fast on manifest drift instead of missing-module crashes. (#63065) Thanks @scoootscooob.
|
||||
- npm packaging: derive required root runtime mirrors from bundled plugin manifests and built root chunks, then install packed release tarballs without the repo `node_modules` so release checks catch missing plugin deps before publish.
|
||||
- Reply/doctor: resolve reply-run SecretRefs before preflight helpers touch config, surface gateway OAuth reauth failures to users, and make `openclaw doctor` call out exact reauth commands.
|
||||
- Android/pairing: clear stale setup-code auth on new QR scans, bootstrap operator and node sessions from fresh pairing, prefer stored device tokens after bootstrap handoff, and pause pairing auto-retry while the app is backgrounded so scan-once Android pairing recovers reliably again. (#63199) Thanks @obviyus.
|
||||
- Auto-reply/NO_REPLY: strip glued leading `NO_REPLY` tokens before reply normalization and ACP-visible streaming so silent sentinel text no longer leaks into user-visible replies while preserving substantive `NO_REPLY ...` text. Thanks @frankekn.
|
||||
- Gateway/sessions: clear auto-fallback-pinned model overrides on `/reset` and `/new` while still preserving explicit user model selections, including legacy sessions created before override-source tracking existed. (#63155) Thanks @frankekn.
|
||||
- Codex CLI: pass OpenClaw's system prompt through Codex's `model_instructions_file` config override so fresh Codex CLI sessions receive the same prompt guidance as Claude CLI sessions.
|
||||
- Matrix/gateway: wait for Matrix sync readiness before marking startup successful, keep Matrix background handler failures contained, and route fatal Matrix sync stops through channel-level restart handling instead of crashing the whole gateway. (#62779) Thanks @gumadeiras.
|
||||
- Browser/security: re-run blocked-destination safety checks after interaction-driven main-frame navigations from click, evaluate, hook-triggered click, and batched action flows, so browser interactions cannot bypass the SSRF quarantine when they land on forbidden URLs. (#63226) Thanks @eleqtrizit.
|
||||
|
||||
## 2026.4.8
|
||||
|
||||
@@ -98,6 +81,7 @@ Docs: https://docs.openclaw.ai
|
||||
- TUI: route `/status` through the shared session-status command, keep commentary hidden in history, strip raw envelope metadata from async command notices, preserve fallback streaming before per-attempt failures finalize, and restore Kitty keyboard state on exit or fatal crashes. (#49130, #59985, #60043, #61463) Thanks @biefan and contributors.
|
||||
- iOS/Watch exec approvals: keep Apple Watch review and approval recovery working while the iPhone is locked or backgrounded, including reconnect recovery, pending approval persistence, notification cleanup, and APNs-backed watch refresh recovery. (#61757) Thanks @ngutman.
|
||||
- Agents/context overflow: combine oversized and aggregate tool-result recovery in one pass and restore a total-context overflow backstop so recoverable sessions retry instead of failing early. (#61651) Thanks @Takhoffman.
|
||||
- Agents/OpenAI: default missing reasoning effort to `high` on OpenAI Responses, WebSocket, and compatible completions transports, while still honoring explicit per-run reasoning levels.
|
||||
- Auth/OpenAI Codex OAuth: reload fresh on-disk credentials inside the locked refresh path and retry once after `refresh_token_reused` rotates only the stored refresh token, so relogin/restart recovery stops getting stuck on stale cached auth state. Thanks @owen-ever.
|
||||
- Auth/OpenAI Codex OAuth: keep native `/model ...@profile` selections on the target session and honor explicit user-locked auth profiles even when per-agent auth order excludes them. (#62744) Thanks @jalehman.
|
||||
- Providers/Anthropic: preserve thinking blocks for Claude Opus 4.5+, Sonnet 4.5+, and newer Claude 4-family models so prompt-cache prefixes keep matching, and skip `service_tier` injection on OAuth-authenticated stream wrapper requests so Claude OAuth streaming stops failing with HTTP 401. (#60356, #61793)
|
||||
@@ -108,6 +92,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Gateway tool/exec config: block model-facing `gateway config.apply` and `config.patch` writes from changing exec approval paths such as `safeBins`, `safeBinProfiles`, `safeBinTrustedDirs`, and `strictInlineEval`, while still allowing unchanged structured values through. (#62001) Thanks @eleqtrizit.
|
||||
- Host exec/env sanitization: block dangerous Java, Rust, Cargo, Git, Kubernetes, cloud credential, config-path, and Helm env overrides so host-run tools cannot be redirected to attacker-chosen code, config, credentials, or repository state. (#59119, #62002, #62291) Thanks @eleqtrizit and contributors.
|
||||
- Commands/allowlist: require owner authorization for `/allowlist add` and `/allowlist remove` before channel resolution, so non-owner but command-authorized senders can no longer persistently rewrite allowlist policy state. (#62383) Thanks @pgondhi987.
|
||||
- Plugins/onboarding auth choices: prevent untrusted workspace plugins from colliding with bundled provider auth-choice ids during non-interactive onboarding, so bundled provider setup keeps operator secrets out of untrusted workspace plugin handlers unless those plugins are explicitly trusted. (#62368) Thanks @pgondhi987.
|
||||
- Feishu/docx uploads: honor `tools.fs.workspaceOnly` for local `upload_file` and `upload_image` paths by forwarding workspace-constrained `localRoots` into the media loader, so docx uploads can no longer read host-local files outside the workspace when workspace-only mode is active. (#62369) Thanks @pgondhi987.
|
||||
- Network/fetch guard: drop request bodies and body-describing headers on cross-origin `307` and `308` redirects by default, so attacker-controlled redirect hops cannot receive secret-bearing POST payloads from SSRF-guarded fetch flows unless a caller explicitly opts in. (#62357) Thanks @pgondhi987.
|
||||
- Browser/SSRF: treat main-frame `document` redirect hops as navigations even when Playwright does not flag them as `isNavigationRequest()`, so strict private-network blocking still stops forbidden redirect pivots before the browser reaches the internal target. (#62355) Thanks @pgondhi987.
|
||||
@@ -164,6 +149,9 @@ Docs: https://docs.openclaw.ai
|
||||
- Agents/model resolution: let explicit `openai-codex/gpt-5.4` selection prefer provider runtime metadata when it reports a larger context window, keeping configured Codex runs aligned with the live provider limits. (#62694) Thanks @ruclaw7.
|
||||
- Agents/model resolution: keep explicit-model runtime comparisons on the configured workspace plugin registry, so workspace-installed providers do not silently fall back to stale explicit metadata during runtime model lookup.
|
||||
- Providers/Z.AI: default onboarding and endpoint detection to GLM-5.1 instead of GLM-5. (#61998) Thanks @serg0x.
|
||||
- Reply execution: prefer the active runtime snapshot over stale queued reply config during embedded reply and follow-up execution so SecretRef-backed reply turns stop crashing after secrets have already resolved. (#62693) Thanks @mbelinky.
|
||||
- Android/manual connect: allow blank port input only for TLS manual gateway endpoints so standard HTTPS Tailscale hosts default to `443` without silently changing cleartext manual connects. (#63134) Thanks @Tyler-RNG.
|
||||
- Matrix/agents: hide owner-only `set-profile` from embedded agent channel-action discovery so non-owner runs stop advertising profile updates they cannot execute. (#62662) Thanks @eleqtrizit.
|
||||
|
||||
## 2026.4.5
|
||||
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
d8ab30f2e73642c89168acd2e177a4d49568bfc3d64fdfcb37b72206295d4896 plugin-sdk-api-baseline.json
|
||||
94419b7f3bfa5d0fe8d1ec97825f05b8da1617c8406b7cdc37a72cd559975374 plugin-sdk-api-baseline.jsonl
|
||||
763d2709dd26f4ec7d5807b2f1781b7f58cb115d2b0a9c9235a6c2c7b3788c1f plugin-sdk-api-baseline.json
|
||||
87ab9ec219f037b13a8f42378d1fed02701d4035da0e5eca8a091626e8426523 plugin-sdk-api-baseline.jsonl
|
||||
|
||||
@@ -167,8 +167,4 @@ Notes:
|
||||
- If effectively active memory remote API key fields are configured as SecretRefs, the command resolves those values from the active gateway snapshot. If gateway is unavailable, the command fails fast.
|
||||
- Gateway version skew note: this command path requires a gateway that supports `secrets.resolve`; older gateways return an unknown-method error.
|
||||
- Tune scheduled sweep cadence with `dreaming.frequency`. Deep promotion policy is otherwise internal; use CLI flags on `memory promote` when you need one-off manual overrides.
|
||||
- `memory rem-harness --path <file-or-dir> --grounded` previews grounded `What Happened`, `Reflections`, and `Possible Lasting Updates` from historical daily notes without writing anything.
|
||||
- `memory rem-backfill --path <file-or-dir>` writes reversible grounded diary entries into `DREAMS.md` for UI review.
|
||||
- `memory rem-backfill --path <file-or-dir> --stage-short-term` also seeds grounded durable candidates into the live short-term promotion store so the normal deep phase can rank them.
|
||||
- `memory rem-backfill --rollback` removes previously written grounded diary entries, and `memory rem-backfill --rollback-short-term` removes previously staged grounded short-term candidates.
|
||||
- See [Dreaming](/concepts/dreaming) for full phase descriptions and configuration reference.
|
||||
|
||||
@@ -81,20 +81,6 @@ subagent turn (using the default runtime model) and appends a short diary entry.
|
||||
|
||||
This diary is for human reading in the Dreams UI, not a promotion source.
|
||||
|
||||
There is also a grounded historical backfill lane for review and recovery work:
|
||||
|
||||
- `memory rem-harness --path ... --grounded` previews grounded diary output from historical `YYYY-MM-DD.md` notes.
|
||||
- `memory rem-backfill --path ...` writes reversible grounded diary entries into `DREAMS.md`.
|
||||
- `memory rem-backfill --path ... --stage-short-term` stages grounded durable candidates into the same short-term evidence store the normal deep phase already uses.
|
||||
- `memory rem-backfill --rollback` and `--rollback-short-term` remove those staged backfill artifacts without touching ordinary diary entries or live short-term recall.
|
||||
|
||||
The Control UI exposes the same diary backfill/reset flow so you can inspect
|
||||
results in the Dreams scene before deciding whether the grounded candidates
|
||||
deserve promotion. The Scene also shows a distinct grounded lane so you can see
|
||||
which staged short-term entries came from historical replay, which promoted
|
||||
items were grounded-led, and clear only grounded-only staged entries without
|
||||
touching ordinary live short-term state.
|
||||
|
||||
## Deep ranking signals
|
||||
|
||||
Deep ranking uses six weighted base signals plus phase reinforcement:
|
||||
@@ -221,9 +207,8 @@ When enabled, the Gateway **Dreams** tab shows:
|
||||
|
||||
- current dreaming enabled state
|
||||
- phase-level status and managed-sweep presence
|
||||
- short-term, grounded, signal, and promoted-today counts
|
||||
- short-term, long-term, and promoted-today counts
|
||||
- next scheduled run timing
|
||||
- a distinct grounded Scene lane for staged historical replay entries
|
||||
- an expandable Dream Diary reader backed by `doctor.memory.dreamDiary`
|
||||
|
||||
## Related
|
||||
|
||||
@@ -21,7 +21,7 @@ Your agent has three memory-related files:
|
||||
- **`memory/YYYY-MM-DD.md`** -- daily notes. Running context and observations.
|
||||
Today and yesterday's notes are loaded automatically.
|
||||
- **`DREAMS.md`** (experimental, optional) -- Dream Diary and dreaming sweep
|
||||
summaries for human review, including grounded historical backfill entries.
|
||||
summaries for human review.
|
||||
|
||||
These files live in the agent workspace (default `~/.openclaw/workspace`).
|
||||
|
||||
@@ -133,41 +133,6 @@ It is designed to keep long-term memory high signal:
|
||||
For phase behavior, scoring signals, and Dream Diary details, see
|
||||
[Dreaming (experimental)](/concepts/dreaming).
|
||||
|
||||
## Grounded backfill and live promotion
|
||||
|
||||
The dreaming system now has two closely related review lanes:
|
||||
|
||||
- **Live dreaming** works from the short-term dreaming store under
|
||||
`memory/.dreams/` and is what the normal deep phase uses when deciding what
|
||||
can graduate into `MEMORY.md`.
|
||||
- **Grounded backfill** reads historical `memory/YYYY-MM-DD.md` notes as
|
||||
standalone day files and writes structured review output into `DREAMS.md`.
|
||||
|
||||
Grounded backfill is useful when you want to replay older notes and inspect what
|
||||
the system thinks is durable without manually editing `MEMORY.md`.
|
||||
|
||||
When you use:
|
||||
|
||||
```bash
|
||||
openclaw memory rem-backfill --path ./memory --stage-short-term
|
||||
```
|
||||
|
||||
the grounded durable candidates are not promoted directly. They are staged into
|
||||
the same short-term dreaming store the normal deep phase already uses. That
|
||||
means:
|
||||
|
||||
- `DREAMS.md` stays the human review surface.
|
||||
- the short-term store stays the machine-facing ranking surface.
|
||||
- `MEMORY.md` is still only written by deep promotion.
|
||||
|
||||
If you decide the replay was not useful, you can remove the staged artifacts
|
||||
without touching ordinary diary entries or normal recall state:
|
||||
|
||||
```bash
|
||||
openclaw memory rem-backfill --rollback
|
||||
openclaw memory rem-backfill --rollback-short-term
|
||||
```
|
||||
|
||||
## CLI
|
||||
|
||||
```bash
|
||||
|
||||
@@ -89,17 +89,18 @@ refs and write a judged Markdown report:
|
||||
pnpm openclaw qa character-eval \
|
||||
--model openai/gpt-5.4,thinking=xhigh \
|
||||
--model openai/gpt-5.2,thinking=xhigh \
|
||||
--model openai/gpt-5,thinking=xhigh \
|
||||
--model anthropic/claude-opus-4-6,thinking=high \
|
||||
--model anthropic/claude-sonnet-4-6,thinking=high \
|
||||
--model minimax/MiniMax-M2.7,thinking=high \
|
||||
--model zai/glm-5.1,thinking=high \
|
||||
--model moonshot/kimi-k2.5,thinking=high \
|
||||
--model qwen/qwen3.6-plus,thinking=high \
|
||||
--model xiaomi/mimo-v2-pro,thinking=high \
|
||||
--model google/gemini-3.1-pro-preview,thinking=high \
|
||||
--judge-model openai/gpt-5.4,thinking=xhigh,fast \
|
||||
--judge-model anthropic/claude-opus-4-6,thinking=high \
|
||||
--blind-judge-models \
|
||||
--concurrency 16 \
|
||||
--judge-concurrency 16
|
||||
--concurrency 8 \
|
||||
--judge-concurrency 8
|
||||
```
|
||||
|
||||
The command runs local QA gateway child processes, not Docker. Character eval
|
||||
@@ -108,10 +109,6 @@ such as chat, workspace help, and small file tasks. The candidate model should
|
||||
not be told that it is being evaluated. The command preserves each full
|
||||
transcript, records basic run stats, then asks the judge models in fast mode with
|
||||
`xhigh` reasoning to rank the runs by naturalness, vibe, and humor.
|
||||
Use `--blind-judge-models` when comparing providers: the judge prompt still gets
|
||||
every transcript and run status, but candidate refs are replaced with neutral
|
||||
labels such as `candidate-01`; the report maps rankings back to real refs after
|
||||
parsing.
|
||||
Candidate runs default to `high` thinking, with `xhigh` for OpenAI models that
|
||||
support it. Override a specific candidate inline with
|
||||
`--model provider/model,thinking=<level>`. `--thinking <level>` still sets a
|
||||
@@ -123,14 +120,14 @@ single candidate or judge needs an override. Pass `--fast` only when you want to
|
||||
force fast mode on for every candidate model. Candidate and judge durations are
|
||||
recorded in the report for benchmark analysis, but judge prompts explicitly say
|
||||
not to rank by speed.
|
||||
Candidate and judge model runs both default to concurrency 16. Lower
|
||||
Candidate and judge model runs both default to concurrency 8. Lower
|
||||
`--concurrency` or `--judge-concurrency` when provider limits or local gateway
|
||||
pressure make a run too noisy.
|
||||
When no candidate `--model` is passed, the character eval defaults to
|
||||
`openai/gpt-5.4`, `openai/gpt-5.2`, `openai/gpt-5`, `anthropic/claude-opus-4-6`,
|
||||
`anthropic/claude-sonnet-4-6`, `zai/glm-5.1`,
|
||||
`moonshot/kimi-k2.5`, and
|
||||
`google/gemini-3.1-pro-preview` when no `--model` is passed.
|
||||
`openai/gpt-5.4`, `openai/gpt-5.2`, `anthropic/claude-opus-4-6`,
|
||||
`anthropic/claude-sonnet-4-6`, `minimax/MiniMax-M2.7`, `zai/glm-5.1`,
|
||||
`moonshot/kimi-k2.5`, `qwen/qwen3.6-plus`, `xiaomi/mimo-v2-pro`, and
|
||||
`google/gemini-3.1-pro-preview`.
|
||||
When no `--judge-model` is passed, the judges default to
|
||||
`openai/gpt-5.4,thinking=xhigh,fast` and
|
||||
`anthropic/claude-opus-4-6,thinking=high`.
|
||||
|
||||
@@ -93,40 +93,6 @@ cat ~/.openclaw/openclaw.json
|
||||
- Source install checks (pnpm workspace mismatch, missing UI assets, missing tsx binary).
|
||||
- Writes updated config + wizard metadata.
|
||||
|
||||
## Dreams UI backfill and reset
|
||||
|
||||
The Control UI Dreams scene includes **Backfill**, **Reset**, and **Clear Grounded**
|
||||
actions for the grounded dreaming workflow. These actions use gateway
|
||||
doctor-style RPC methods, but they are **not** part of `openclaw doctor` CLI
|
||||
repair/migration.
|
||||
|
||||
What they do:
|
||||
|
||||
- **Backfill** scans historical `memory/YYYY-MM-DD.md` files in the active
|
||||
workspace, runs the grounded REM diary pass, and writes reversible backfill
|
||||
entries into `DREAMS.md`.
|
||||
- **Reset** removes only those marked backfill diary entries from `DREAMS.md`.
|
||||
- **Clear Grounded** removes only staged grounded-only short-term entries that
|
||||
came from historical replay and have not accumulated live recall or daily
|
||||
support yet.
|
||||
|
||||
What they do **not** do by themselves:
|
||||
|
||||
- they do not edit `MEMORY.md`
|
||||
- they do not run full doctor migrations
|
||||
- they do not automatically stage grounded candidates into the live short-term
|
||||
promotion store unless you explicitly run the staged CLI path first
|
||||
|
||||
If you want grounded historical replay to influence the normal deep promotion
|
||||
lane, use the CLI flow instead:
|
||||
|
||||
```bash
|
||||
openclaw memory rem-backfill --path ./memory --stage-short-term
|
||||
```
|
||||
|
||||
That stages grounded durable candidates into the short-term dreaming store while
|
||||
keeping `DREAMS.md` as the review surface.
|
||||
|
||||
## Detailed behavior and rationale
|
||||
|
||||
### 0) Optional update (git installs)
|
||||
|
||||
@@ -203,7 +203,6 @@ Live tests are split into two layers so we can isolate failures:
|
||||
- `OPENCLAW_LIVE_MODELS=modern` to run the modern allowlist (Opus/Sonnet 4.6+, GPT-5.x + Codex, Gemini 3, GLM 4.7, MiniMax M2.7, Grok 4)
|
||||
- `OPENCLAW_LIVE_MODELS=all` is an alias for the modern allowlist
|
||||
- or `OPENCLAW_LIVE_MODELS="openai/gpt-5.4,anthropic/claude-opus-4-6,..."` (comma allowlist)
|
||||
- Modern/all sweeps default to a curated high-signal cap; set `OPENCLAW_LIVE_MAX_MODELS=0` for an exhaustive modern sweep or a positive number for a smaller cap.
|
||||
- How to select providers:
|
||||
- `OPENCLAW_LIVE_PROVIDERS="google,google-antigravity,google-gemini-cli"` (comma allowlist)
|
||||
- Where keys come from:
|
||||
@@ -235,7 +234,6 @@ Live tests are split into two layers so we can isolate failures:
|
||||
- Default: modern allowlist (Opus/Sonnet 4.6+, GPT-5.x + Codex, Gemini 3, GLM 4.7, MiniMax M2.7, Grok 4)
|
||||
- `OPENCLAW_LIVE_GATEWAY_MODELS=all` is an alias for the modern allowlist
|
||||
- Or set `OPENCLAW_LIVE_GATEWAY_MODELS="provider/model"` (or comma list) to narrow
|
||||
- Modern/all gateway sweeps default to a curated high-signal cap; set `OPENCLAW_LIVE_GATEWAY_MAX_MODELS=0` for an exhaustive modern sweep or a positive number for a smaller cap.
|
||||
- How to select providers (avoid “OpenRouter everything”):
|
||||
- `OPENCLAW_LIVE_GATEWAY_PROVIDERS="google,google-antigravity,google-gemini-cli,openai,anthropic,zai,minimax"` (comma allowlist)
|
||||
- Tool + image probes are always on in this live test:
|
||||
|
||||
@@ -0,0 +1,580 @@
|
||||
---
|
||||
title: "refactor: Make plugin-sdk a real workspace package incrementally"
|
||||
type: refactor
|
||||
status: active
|
||||
date: 2026-04-05
|
||||
---
|
||||
|
||||
# refactor: Make plugin-sdk a real workspace package incrementally
|
||||
|
||||
## Overview
|
||||
|
||||
This plan introduces a real workspace package for the plugin SDK at
|
||||
`packages/plugin-sdk` and uses it to opt in a small first wave of extensions to
|
||||
compiler-enforced package boundaries. The goal is to make illegal relative
|
||||
imports fail under normal `tsc` for a selected set of bundled provider
|
||||
extensions, without forcing a repo-wide migration or a giant merge-conflict
|
||||
surface.
|
||||
|
||||
The key incremental move is to run two modes in parallel for a while:
|
||||
|
||||
| Mode | Import shape | Who uses it | Enforcement |
|
||||
| ----------- | ------------------------ | ------------------------------------ | -------------------------------------------- |
|
||||
| Legacy mode | `openclaw/plugin-sdk/*` | all existing non-opted-in extensions | current permissive behavior remains |
|
||||
| Opt-in mode | `@openclaw/plugin-sdk/*` | first-wave extensions only | package-local `rootDir` + project references |
|
||||
|
||||
## Problem Frame
|
||||
|
||||
The current repo exports a large public plugin SDK surface, but it is not a real
|
||||
workspace package. Instead:
|
||||
|
||||
- root `tsconfig.json` maps `openclaw/plugin-sdk/*` directly to
|
||||
`src/plugin-sdk/*.ts`
|
||||
- extensions that were not opted into the previous experiment still share that
|
||||
global source-alias behavior
|
||||
- adding `rootDir` only works when allowed SDK imports stop resolving into raw
|
||||
repo source
|
||||
|
||||
That means the repo can describe the desired boundary policy, but TypeScript
|
||||
does not enforce it cleanly for most extensions.
|
||||
|
||||
You want an incremental path that:
|
||||
|
||||
- makes `plugin-sdk` real
|
||||
- moves the SDK toward a workspace package named `@openclaw/plugin-sdk`
|
||||
- changes only about 10 extensions in the first PR
|
||||
- leaves the rest of the extension tree on the old scheme until later cleanup
|
||||
- avoids the `tsconfig.plugin-sdk.dts.json` + postinstall-generated declaration
|
||||
workflow as the primary mechanism for the first-wave rollout
|
||||
|
||||
## Requirements Trace
|
||||
|
||||
- R1. Create a real workspace package for the plugin SDK under `packages/`.
|
||||
- R2. Name the new package `@openclaw/plugin-sdk`.
|
||||
- R3. Give the new SDK package its own `package.json` and `tsconfig.json`.
|
||||
- R4. Keep legacy `openclaw/plugin-sdk/*` imports working for non-opted-in
|
||||
extensions during the migration window.
|
||||
- R5. Opt in only a small first wave of extensions in the first PR.
|
||||
- R6. The first-wave extensions must fail closed for relative imports that leave
|
||||
their package root.
|
||||
- R7. The first-wave extensions must consume the SDK through a package
|
||||
dependency and a TS project reference, not through root `paths` aliases.
|
||||
- R8. The plan must avoid a repo-wide mandatory postinstall generation step for
|
||||
editor correctness.
|
||||
- R9. The first-wave rollout must be reviewable and mergeable as a moderate PR,
|
||||
not a repo-wide 300+ file refactor.
|
||||
|
||||
## Scope Boundaries
|
||||
|
||||
- No full migration of all bundled extensions in the first PR.
|
||||
- No requirement to delete `src/plugin-sdk` in the first PR.
|
||||
- No requirement to rewire every root build or test path to use the new package
|
||||
immediately.
|
||||
- No attempt to force VS Code squiggles for every non-opted-in extension.
|
||||
- No broad lint cleanup for the rest of the extension tree.
|
||||
- No large runtime behavior changes beyond import resolution, package ownership,
|
||||
and boundary enforcement for the opted-in extensions.
|
||||
|
||||
## Context & Research
|
||||
|
||||
### Relevant Code and Patterns
|
||||
|
||||
- `pnpm-workspace.yaml` already includes `packages/*` and `extensions/*`, so a
|
||||
new workspace package under `packages/plugin-sdk` fits the existing repo
|
||||
layout.
|
||||
- Existing workspace packages such as `packages/memory-host-sdk/package.json`
|
||||
and `packages/plugin-package-contract/package.json` already use package-local
|
||||
`exports` maps rooted in `src/*.ts`.
|
||||
- Root `package.json` currently publishes the SDK surface through `./plugin-sdk`
|
||||
and `./plugin-sdk/*` exports backed by `dist/plugin-sdk/*.js` and
|
||||
`dist/plugin-sdk/*.d.ts`.
|
||||
- `src/plugin-sdk/entrypoints.ts` and `scripts/lib/plugin-sdk-entrypoints.json`
|
||||
already act as the canonical entrypoint inventory for the SDK surface.
|
||||
- Root `tsconfig.json` currently maps:
|
||||
- `openclaw/plugin-sdk` -> `src/plugin-sdk/index.ts`
|
||||
- `openclaw/plugin-sdk/*` -> `src/plugin-sdk/*.ts`
|
||||
- The previous boundary experiment showed that package-local `rootDir` works for
|
||||
illegal relative imports only after allowed SDK imports stop resolving to raw
|
||||
source outside the extension package.
|
||||
|
||||
### First-Wave Extension Set
|
||||
|
||||
This plan assumes the first wave is the provider-heavy set that is least likely
|
||||
to drag in complex channel-runtime edge cases:
|
||||
|
||||
- `extensions/anthropic`
|
||||
- `extensions/exa`
|
||||
- `extensions/firecrawl`
|
||||
- `extensions/groq`
|
||||
- `extensions/mistral`
|
||||
- `extensions/openai`
|
||||
- `extensions/perplexity`
|
||||
- `extensions/tavily`
|
||||
- `extensions/together`
|
||||
- `extensions/xai`
|
||||
|
||||
### First-Wave SDK Surface Inventory
|
||||
|
||||
The first-wave extensions currently import a manageable subset of SDK subpaths.
|
||||
The initial `@openclaw/plugin-sdk` package only needs to cover these:
|
||||
|
||||
- `agent-runtime`
|
||||
- `cli-runtime`
|
||||
- `config-runtime`
|
||||
- `core`
|
||||
- `image-generation`
|
||||
- `media-runtime`
|
||||
- `media-understanding`
|
||||
- `plugin-entry`
|
||||
- `plugin-runtime`
|
||||
- `provider-auth`
|
||||
- `provider-auth-api-key`
|
||||
- `provider-auth-login`
|
||||
- `provider-auth-runtime`
|
||||
- `provider-catalog-shared`
|
||||
- `provider-entry`
|
||||
- `provider-http`
|
||||
- `provider-model-shared`
|
||||
- `provider-onboard`
|
||||
- `provider-stream-family`
|
||||
- `provider-stream-shared`
|
||||
- `provider-tools`
|
||||
- `provider-usage`
|
||||
- `provider-web-fetch`
|
||||
- `provider-web-search`
|
||||
- `realtime-transcription`
|
||||
- `realtime-voice`
|
||||
- `runtime-env`
|
||||
- `secret-input`
|
||||
- `security-runtime`
|
||||
- `speech`
|
||||
- `testing`
|
||||
|
||||
### Institutional Learnings
|
||||
|
||||
- No relevant `docs/solutions/` entries were present in this worktree.
|
||||
|
||||
### External References
|
||||
|
||||
- No external research was needed for this plan. The repo already contains the
|
||||
relevant workspace-package and SDK-export patterns.
|
||||
|
||||
## Key Technical Decisions
|
||||
|
||||
- Introduce `@openclaw/plugin-sdk` as a new workspace package while keeping the
|
||||
legacy root `openclaw/plugin-sdk/*` surface alive during migration.
|
||||
Rationale: this lets a first-wave extension set move onto real package
|
||||
resolution without forcing every extension and every root build path to change
|
||||
at once.
|
||||
|
||||
- Use a dedicated opt-in boundary base config such as
|
||||
`extensions/tsconfig.package-boundary.base.json` instead of replacing the
|
||||
existing extension base for everyone.
|
||||
Rationale: the repo needs to support both legacy and opt-in extension modes
|
||||
simultaneously during migration.
|
||||
|
||||
- Use TS project references from first-wave extensions to
|
||||
`packages/plugin-sdk/tsconfig.json` and set
|
||||
`disableSourceOfProjectReferenceRedirect` for the opt-in boundary mode.
|
||||
Rationale: this gives `tsc` a real package graph while discouraging editor and
|
||||
compiler fallback to raw source traversal.
|
||||
|
||||
- Keep `@openclaw/plugin-sdk` private in the first wave.
|
||||
Rationale: the immediate goal is internal boundary enforcement and migration
|
||||
safety, not publishing a second external SDK contract before the surface is
|
||||
stable.
|
||||
|
||||
- Move only the first-wave SDK subpaths in the first implementation slice, and
|
||||
keep compatibility bridges for the rest.
|
||||
Rationale: physically moving all 315 `src/plugin-sdk/*.ts` files in one PR is
|
||||
exactly the merge-conflict surface this plan is trying to avoid.
|
||||
|
||||
- Do not rely on `scripts/postinstall-bundled-plugins.mjs` to build SDK
|
||||
declarations for the first wave.
|
||||
Rationale: explicit build/reference flows are easier to reason about and keep
|
||||
repo behavior more predictable.
|
||||
|
||||
## Open Questions
|
||||
|
||||
### Resolved During Planning
|
||||
|
||||
- Which extensions should be in the first wave?
|
||||
Use the 10 provider/web-search extensions listed above because they are more
|
||||
structurally isolated than the heavier channel packages.
|
||||
|
||||
- Should the first PR replace the entire extension tree?
|
||||
No. The first PR should support two modes in parallel and only opt in the
|
||||
first wave.
|
||||
|
||||
- Should the first wave require a postinstall declaration build?
|
||||
No. The package/reference graph should be explicit, and CI should run the
|
||||
relevant package-local typecheck intentionally.
|
||||
|
||||
### Deferred to Implementation
|
||||
|
||||
- Whether the first-wave package can point directly at package-local `src/*.ts`
|
||||
via project references alone, or whether a small declaration-emission step is
|
||||
still required for the `@openclaw/plugin-sdk` package.
|
||||
This is an implementation-owned TS graph validation question.
|
||||
|
||||
- Whether the root `openclaw` package should proxy first-wave SDK subpaths to
|
||||
`packages/plugin-sdk` outputs immediately or continue using generated
|
||||
compatibility shims under `src/plugin-sdk`.
|
||||
This is a compatibility and build-shape detail that depends on the minimal
|
||||
implementation path that keeps CI green.
|
||||
|
||||
## High-Level Technical Design
|
||||
|
||||
> This illustrates the intended approach and is directional guidance for review, not implementation specification. The implementing agent should treat it as context, not code to reproduce.
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph Legacy["Legacy extensions (unchanged)"]
|
||||
L1["extensions/*\nopenclaw/plugin-sdk/*"]
|
||||
L2["root tsconfig paths"]
|
||||
L1 --> L2
|
||||
L2 --> L3["src/plugin-sdk/*"]
|
||||
end
|
||||
|
||||
subgraph OptIn["First-wave extensions"]
|
||||
O1["10 opted-in extensions"]
|
||||
O2["extensions/tsconfig.package-boundary.base.json"]
|
||||
O3["rootDir = '.'\nproject reference"]
|
||||
O4["@openclaw/plugin-sdk"]
|
||||
O1 --> O2
|
||||
O2 --> O3
|
||||
O3 --> O4
|
||||
end
|
||||
|
||||
subgraph SDK["New workspace package"]
|
||||
P1["packages/plugin-sdk/package.json"]
|
||||
P2["packages/plugin-sdk/tsconfig.json"]
|
||||
P3["packages/plugin-sdk/src/<first-wave-subpaths>.ts"]
|
||||
P1 --> P2
|
||||
P2 --> P3
|
||||
end
|
||||
|
||||
O4 --> SDK
|
||||
```
|
||||
|
||||
## Implementation Units
|
||||
|
||||
- [ ] **Unit 1: Introduce the real `@openclaw/plugin-sdk` workspace package**
|
||||
|
||||
**Goal:** Create a real workspace package for the SDK that can own the
|
||||
first-wave subpath surface without forcing a repo-wide migration.
|
||||
|
||||
**Requirements:** R1, R2, R3, R8, R9
|
||||
|
||||
**Dependencies:** None
|
||||
|
||||
**Files:**
|
||||
|
||||
- Create: `packages/plugin-sdk/package.json`
|
||||
- Create: `packages/plugin-sdk/tsconfig.json`
|
||||
- Create: `packages/plugin-sdk/src/index.ts`
|
||||
- Create: `packages/plugin-sdk/src/*.ts` for the first-wave SDK subpaths
|
||||
- Modify: `pnpm-workspace.yaml` only if package-glob adjustments are needed
|
||||
- Modify: `package.json`
|
||||
- Modify: `src/plugin-sdk/entrypoints.ts`
|
||||
- Modify: `scripts/lib/plugin-sdk-entrypoints.json`
|
||||
- Test: `src/plugins/contracts/plugin-sdk-workspace-package.contract.test.ts`
|
||||
|
||||
**Approach:**
|
||||
|
||||
- Add a new workspace package named `@openclaw/plugin-sdk`.
|
||||
- Start with the first-wave SDK subpaths only, not the entire 315-file tree.
|
||||
- If directly moving a first-wave entrypoint would create an oversized diff, the
|
||||
first PR may introduce that subpath in `packages/plugin-sdk/src` as a thin
|
||||
package wrapper first and then flip the source of truth to the package in a
|
||||
follow-up PR for that subpath cluster.
|
||||
- Reuse the existing entrypoint inventory machinery so the first-wave package
|
||||
surface is declared in one canonical place.
|
||||
- Keep the root package exports alive for legacy users while the workspace
|
||||
package becomes the new opt-in contract.
|
||||
|
||||
**Patterns to follow:**
|
||||
|
||||
- `packages/memory-host-sdk/package.json`
|
||||
- `packages/plugin-package-contract/package.json`
|
||||
- `src/plugin-sdk/entrypoints.ts`
|
||||
|
||||
**Test scenarios:**
|
||||
|
||||
- Happy path: the workspace package exports every first-wave subpath listed in
|
||||
the plan and no required first-wave export is missing.
|
||||
- Edge case: package export metadata remains stable when the first-wave entry
|
||||
list is re-generated or compared against the canonical inventory.
|
||||
- Integration: root package legacy SDK exports remain present after introducing
|
||||
the new workspace package.
|
||||
|
||||
**Verification:**
|
||||
|
||||
- The repo contains a valid `@openclaw/plugin-sdk` workspace package with a
|
||||
stable first-wave export map and no legacy export regression in root
|
||||
`package.json`.
|
||||
|
||||
- [ ] **Unit 2: Add an opt-in TS boundary mode for package-enforced extensions**
|
||||
|
||||
**Goal:** Define the TS configuration mode that opted-in extensions will use,
|
||||
while leaving the existing extension TS behavior unchanged for everyone else.
|
||||
|
||||
**Requirements:** R4, R6, R7, R8, R9
|
||||
|
||||
**Dependencies:** Unit 1
|
||||
|
||||
**Files:**
|
||||
|
||||
- Create: `extensions/tsconfig.package-boundary.base.json`
|
||||
- Create: `tsconfig.boundary-optin.json`
|
||||
- Modify: `extensions/xai/tsconfig.json`
|
||||
- Modify: `extensions/openai/tsconfig.json`
|
||||
- Modify: `extensions/anthropic/tsconfig.json`
|
||||
- Modify: `extensions/mistral/tsconfig.json`
|
||||
- Modify: `extensions/groq/tsconfig.json`
|
||||
- Modify: `extensions/together/tsconfig.json`
|
||||
- Modify: `extensions/perplexity/tsconfig.json`
|
||||
- Modify: `extensions/tavily/tsconfig.json`
|
||||
- Modify: `extensions/exa/tsconfig.json`
|
||||
- Modify: `extensions/firecrawl/tsconfig.json`
|
||||
- Test: `src/plugins/contracts/extension-package-project-boundaries.test.ts`
|
||||
- Test: `test/extension-package-tsc-boundary.test.ts`
|
||||
|
||||
**Approach:**
|
||||
|
||||
- Leave `extensions/tsconfig.base.json` in place for legacy extensions.
|
||||
- Add a new opt-in base config that:
|
||||
- sets `rootDir: "."`
|
||||
- references `packages/plugin-sdk`
|
||||
- enables `composite`
|
||||
- disables project-reference source redirect when needed
|
||||
- Add a dedicated solution config for the first-wave typecheck graph instead of
|
||||
reshaping the root repo TS project in the same PR.
|
||||
|
||||
**Execution note:** Start with a failing package-local canary typecheck for one
|
||||
opted-in extension before applying the pattern to all 10.
|
||||
|
||||
**Patterns to follow:**
|
||||
|
||||
- Existing package-local extension `tsconfig.json` pattern from the prior
|
||||
boundary work
|
||||
- Workspace package pattern from `packages/memory-host-sdk`
|
||||
|
||||
**Test scenarios:**
|
||||
|
||||
- Happy path: each opted-in extension typechecks successfully through the
|
||||
package-boundary TS config.
|
||||
- Error path: a canary relative import from `../../src/cli/acp-cli.ts` fails
|
||||
with `TS6059` for an opted-in extension.
|
||||
- Integration: non-opted-in extensions remain untouched and do not need to
|
||||
participate in the new solution config.
|
||||
|
||||
**Verification:**
|
||||
|
||||
- There is a dedicated typecheck graph for the 10 opted-in extensions, and bad
|
||||
relative imports from one of them fail through normal `tsc`.
|
||||
|
||||
- [ ] **Unit 3: Migrate the first-wave extensions onto `@openclaw/plugin-sdk`**
|
||||
|
||||
**Goal:** Change the first-wave extensions to consume the real SDK package
|
||||
through dependency metadata, project references, and package-name imports.
|
||||
|
||||
**Requirements:** R5, R6, R7, R9
|
||||
|
||||
**Dependencies:** Unit 2
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `extensions/anthropic/package.json`
|
||||
- Modify: `extensions/exa/package.json`
|
||||
- Modify: `extensions/firecrawl/package.json`
|
||||
- Modify: `extensions/groq/package.json`
|
||||
- Modify: `extensions/mistral/package.json`
|
||||
- Modify: `extensions/openai/package.json`
|
||||
- Modify: `extensions/perplexity/package.json`
|
||||
- Modify: `extensions/tavily/package.json`
|
||||
- Modify: `extensions/together/package.json`
|
||||
- Modify: `extensions/xai/package.json`
|
||||
- Modify: production and test imports under each of the 10 extension roots that
|
||||
currently reference `openclaw/plugin-sdk/*`
|
||||
|
||||
**Approach:**
|
||||
|
||||
- Add `@openclaw/plugin-sdk: workspace:*` to the first-wave extension
|
||||
`devDependencies`.
|
||||
- Replace `openclaw/plugin-sdk/*` imports in those packages with
|
||||
`@openclaw/plugin-sdk/*`.
|
||||
- Keep local extension-internal imports on local barrels such as `./api.ts` and
|
||||
`./runtime-api.ts`.
|
||||
- Do not change non-opted-in extensions in this PR.
|
||||
|
||||
**Patterns to follow:**
|
||||
|
||||
- Existing extension-local import barrels (`api.ts`, `runtime-api.ts`)
|
||||
- Package dependency shape used by other `@openclaw/*` workspace packages
|
||||
|
||||
**Test scenarios:**
|
||||
|
||||
- Happy path: each migrated extension still registers/loads through its existing
|
||||
plugin tests after the import rewrite.
|
||||
- Edge case: test-only SDK imports in the opted-in extension set still resolve
|
||||
correctly through the new package.
|
||||
- Integration: migrated extensions do not require root `openclaw/plugin-sdk/*`
|
||||
aliases for typechecking.
|
||||
|
||||
**Verification:**
|
||||
|
||||
- The first-wave extensions build and test against `@openclaw/plugin-sdk`
|
||||
without needing the legacy root SDK alias path.
|
||||
|
||||
- [ ] **Unit 4: Preserve legacy compatibility while the migration is partial**
|
||||
|
||||
**Goal:** Keep the rest of the repo working while the SDK exists in both legacy
|
||||
and new-package forms during migration.
|
||||
|
||||
**Requirements:** R4, R8, R9
|
||||
|
||||
**Dependencies:** Units 1-3
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `src/plugin-sdk/*.ts` for first-wave compatibility shims as needed
|
||||
- Modify: `package.json`
|
||||
- Modify: build or export plumbing that assembles SDK artifacts
|
||||
- Test: `src/plugins/contracts/plugin-sdk-runtime-api-guardrails.test.ts`
|
||||
- Test: `src/plugins/contracts/plugin-sdk-index.bundle.test.ts`
|
||||
|
||||
**Approach:**
|
||||
|
||||
- Keep root `openclaw/plugin-sdk/*` as the compatibility surface for legacy
|
||||
extensions and for external consumers that are not moving yet.
|
||||
- Use either generated shims or root-export proxy wiring for the first-wave
|
||||
subpaths that have moved into `packages/plugin-sdk`.
|
||||
- Do not attempt to retire the root SDK surface in this phase.
|
||||
|
||||
**Patterns to follow:**
|
||||
|
||||
- Existing root SDK export generation via `src/plugin-sdk/entrypoints.ts`
|
||||
- Existing package export compatibility in root `package.json`
|
||||
|
||||
**Test scenarios:**
|
||||
|
||||
- Happy path: a legacy root SDK import still resolves for a non-opted-in
|
||||
extension after the new package exists.
|
||||
- Edge case: a first-wave subpath works through both the legacy root surface and
|
||||
the new package surface during the migration window.
|
||||
- Integration: plugin-sdk index/bundle contract tests continue to see a coherent
|
||||
public surface.
|
||||
|
||||
**Verification:**
|
||||
|
||||
- The repo supports both legacy and opt-in SDK consumption modes without
|
||||
breaking unchanged extensions.
|
||||
|
||||
- [ ] **Unit 5: Add scoped enforcement and document the migration contract**
|
||||
|
||||
**Goal:** Land CI and contributor guidance that enforce the new behavior for the
|
||||
first wave without pretending the entire extension tree is migrated.
|
||||
|
||||
**Requirements:** R5, R6, R8, R9
|
||||
|
||||
**Dependencies:** Units 1-4
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `package.json`
|
||||
- Modify: CI workflow files that should run the opt-in boundary typecheck
|
||||
- Modify: `AGENTS.md`
|
||||
- Modify: `docs/plugins/sdk-overview.md`
|
||||
- Modify: `docs/plugins/sdk-entrypoints.md`
|
||||
- Modify: `docs/plans/2026-04-05-001-refactor-extension-package-resolution-boundary-plan.md`
|
||||
|
||||
**Approach:**
|
||||
|
||||
- Add an explicit first-wave gate, such as a dedicated `tsc -b` solution run for
|
||||
`packages/plugin-sdk` plus the 10 opted-in extensions.
|
||||
- Document that the repo now supports both legacy and opt-in extension modes,
|
||||
and that new extension boundary work should prefer the new package route.
|
||||
- Record the next-wave migration rule so later PRs can add more extensions
|
||||
without re-litigating the architecture.
|
||||
|
||||
**Patterns to follow:**
|
||||
|
||||
- Existing contract tests under `src/plugins/contracts/`
|
||||
- Existing docs updates that explain staged migrations
|
||||
|
||||
**Test scenarios:**
|
||||
|
||||
- Happy path: the new first-wave typecheck gate passes for the workspace package
|
||||
and the opted-in extensions.
|
||||
- Error path: introducing a new illegal relative import in an opted-in
|
||||
extension fails the scoped typecheck gate.
|
||||
- Integration: CI does not require non-opted-in extensions to satisfy the new
|
||||
package-boundary mode yet.
|
||||
|
||||
**Verification:**
|
||||
|
||||
- The first-wave enforcement path is documented, tested, and runnable without
|
||||
forcing the entire extension tree to migrate.
|
||||
|
||||
## System-Wide Impact
|
||||
|
||||
- **Interaction graph:** this work touches the SDK source-of-truth, root package
|
||||
exports, extension package metadata, TS graph layout, and CI verification.
|
||||
- **Error propagation:** the main intended failure mode becomes compile-time TS
|
||||
errors (`TS6059`) in opted-in extensions instead of custom script-only
|
||||
failures.
|
||||
- **State lifecycle risks:** dual-surface migration introduces drift risk between
|
||||
root compatibility exports and the new workspace package.
|
||||
- **API surface parity:** first-wave subpaths must remain semantically identical
|
||||
through both `openclaw/plugin-sdk/*` and `@openclaw/plugin-sdk/*` during the
|
||||
transition.
|
||||
- **Integration coverage:** unit tests are not enough; scoped package-graph
|
||||
typechecks are required to prove the boundary.
|
||||
- **Unchanged invariants:** non-opted-in extensions keep their current behavior
|
||||
in PR 1. This plan does not claim repo-wide import-boundary enforcement.
|
||||
|
||||
## Risks & Dependencies
|
||||
|
||||
| Risk | Mitigation |
|
||||
| ------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------- |
|
||||
| The first-wave package still resolves back into raw source and `rootDir` does not actually fail closed | Make the first implementation step a package-reference canary on one opted-in extension before widening to the full set |
|
||||
| Moving too much SDK source at once recreates the original merge-conflict problem | Move only the first-wave subpaths in the first PR and keep root compatibility bridges |
|
||||
| Legacy and new SDK surfaces drift semantically | Keep a single entrypoint inventory, add compatibility contract tests, and make dual-surface parity explicit |
|
||||
| Root repo build/test paths accidentally start depending on the new package in uncontrolled ways | Use a dedicated opt-in solution config and keep root-wide TS topology changes out of the first PR |
|
||||
|
||||
## Phased Delivery
|
||||
|
||||
### Phase 1
|
||||
|
||||
- Introduce `@openclaw/plugin-sdk`
|
||||
- Define the first-wave subpath surface
|
||||
- Prove one opted-in extension can fail closed through `rootDir`
|
||||
|
||||
### Phase 2
|
||||
|
||||
- Opt in the 10 first-wave extensions
|
||||
- Keep root compatibility alive for everyone else
|
||||
|
||||
### Phase 3
|
||||
|
||||
- Add more extensions in later PRs
|
||||
- Move more SDK subpaths into the workspace package
|
||||
- Retire root compatibility only after the legacy extension set is gone
|
||||
|
||||
## Documentation / Operational Notes
|
||||
|
||||
- The first PR should explicitly describe itself as a dual-mode migration, not a
|
||||
repo-wide enforcement completion.
|
||||
- The migration guide should make it easy for later PRs to add more extensions
|
||||
by following the same package/dependency/reference pattern.
|
||||
|
||||
## Sources & References
|
||||
|
||||
- Prior plan: `docs/plans/2026-04-05-001-refactor-extension-package-resolution-boundary-plan.md`
|
||||
- Workspace config: `pnpm-workspace.yaml`
|
||||
- Existing SDK entrypoint inventory: `src/plugin-sdk/entrypoints.ts`
|
||||
- Existing root SDK exports: `package.json`
|
||||
- Existing workspace package patterns:
|
||||
- `packages/memory-host-sdk/package.json`
|
||||
- `packages/plugin-package-contract/package.json`
|
||||
@@ -245,7 +245,6 @@ Current bundled provider examples:
|
||||
| `plugin-sdk/allow-from` | Allowlist formatting | `formatAllowFromLowercase` |
|
||||
| `plugin-sdk/allowlist-resolution` | Allowlist input mapping | `mapAllowlistResolutionInputs` |
|
||||
| `plugin-sdk/command-auth` | Command gating and command-surface helpers | `resolveControlCommandGate`, sender-authorization helpers, command registry helpers |
|
||||
| `plugin-sdk/command-status` | Command status/help renderers | `buildCommandsMessage`, `buildCommandsMessagePaginated`, `buildHelpMessage` |
|
||||
| `plugin-sdk/secret-input` | Secret input parsing | Secret input helpers |
|
||||
| `plugin-sdk/webhook-ingress` | Webhook request helpers | Webhook target utilities |
|
||||
| `plugin-sdk/webhook-request-guards` | Webhook body guard helpers | Request body read/limit helpers |
|
||||
|
||||
@@ -149,7 +149,6 @@ explicitly promotes one as public.
|
||||
| Subpath | Key exports |
|
||||
| --- | --- |
|
||||
| `plugin-sdk/command-auth` | `resolveControlCommandGate`, command registry helpers, sender-authorization helpers |
|
||||
| `plugin-sdk/command-status` | Command/help message builders such as `buildCommandsMessagePaginated` and `buildHelpMessage` |
|
||||
| `plugin-sdk/approval-auth-runtime` | Approver resolution and same-chat action-auth helpers |
|
||||
| `plugin-sdk/approval-client-runtime` | Native exec approval profile/filter helpers |
|
||||
| `plugin-sdk/approval-delivery-runtime` | Native approval capability/delivery adapters |
|
||||
|
||||
@@ -88,9 +88,7 @@ requiring the built-in `qwen` provider id specifically.
|
||||
|
||||
## Built-in catalog
|
||||
|
||||
OpenClaw currently ships this bundled Qwen catalog. The configured catalog is
|
||||
endpoint-aware: Coding Plan configs omit models that are only known to work on
|
||||
the Standard endpoint.
|
||||
OpenClaw currently ships this bundled Qwen catalog:
|
||||
|
||||
| Model ref | Input | Context | Notes |
|
||||
| --------------------------- | ----------- | --------- | -------------------------------------------------- |
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
"id": "anthropic-vertex",
|
||||
"enabledByDefault": true,
|
||||
"providers": ["anthropic-vertex"],
|
||||
"providerDiscoveryEntry": "./provider-discovery.ts",
|
||||
"configSchema": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
describe("anthropic-vertex provider discovery entry", () => {
|
||||
it("imports without loading the full plugin entry", async () => {
|
||||
const module = await import("./provider-discovery.js");
|
||||
|
||||
expect(module.default.id).toBe("anthropic-vertex");
|
||||
expect(module.default.catalog.order).toBe("simple");
|
||||
});
|
||||
});
|
||||
@@ -1,215 +0,0 @@
|
||||
import { readFileSync } from "node:fs";
|
||||
import { homedir, platform } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import type { ProviderCatalogContext } from "openclaw/plugin-sdk/provider-catalog-shared";
|
||||
import type {
|
||||
ModelDefinitionConfig,
|
||||
ModelProviderConfig,
|
||||
} from "openclaw/plugin-sdk/provider-model-shared";
|
||||
|
||||
const PROVIDER_ID = "anthropic-vertex";
|
||||
const ANTHROPIC_VERTEX_DEFAULT_REGION = "global";
|
||||
const ANTHROPIC_VERTEX_REGION_RE = /^[a-z0-9-]+$/;
|
||||
const ANTHROPIC_VERTEX_DEFAULT_CONTEXT_WINDOW = 1_000_000;
|
||||
const GCP_VERTEX_CREDENTIALS_MARKER = "gcp-vertex-credentials";
|
||||
const GCLOUD_DEFAULT_ADC_PATH = join(
|
||||
homedir(),
|
||||
".config",
|
||||
"gcloud",
|
||||
"application_default_credentials.json",
|
||||
);
|
||||
|
||||
type AnthropicVertexProviderPlugin = {
|
||||
id: string;
|
||||
label: string;
|
||||
docsPath: string;
|
||||
auth: [];
|
||||
catalog: {
|
||||
order: "simple";
|
||||
run: (ctx: ProviderCatalogContext) => ReturnType<typeof runAnthropicVertexCatalog>;
|
||||
};
|
||||
resolveConfigApiKey: (params: { env: NodeJS.ProcessEnv }) => string | undefined;
|
||||
};
|
||||
|
||||
type AdcProjectFile = {
|
||||
project_id?: unknown;
|
||||
quota_project_id?: unknown;
|
||||
};
|
||||
|
||||
function normalizeOptionalString(value: unknown): string | undefined {
|
||||
return typeof value === "string" && value.trim() ? value.trim() : undefined;
|
||||
}
|
||||
|
||||
function normalizeLowercaseStringOrEmpty(value: unknown): string {
|
||||
return normalizeOptionalString(value)?.toLowerCase() ?? "";
|
||||
}
|
||||
|
||||
function resolveAnthropicVertexRegion(env: NodeJS.ProcessEnv = process.env): string {
|
||||
const region =
|
||||
normalizeOptionalString(env.GOOGLE_CLOUD_LOCATION) ||
|
||||
normalizeOptionalString(env.CLOUD_ML_REGION);
|
||||
|
||||
return region && ANTHROPIC_VERTEX_REGION_RE.test(region)
|
||||
? region
|
||||
: ANTHROPIC_VERTEX_DEFAULT_REGION;
|
||||
}
|
||||
|
||||
function hasAnthropicVertexMetadataServerAdc(env: NodeJS.ProcessEnv = process.env): boolean {
|
||||
const explicitMetadataOptIn = normalizeOptionalString(env.ANTHROPIC_VERTEX_USE_GCP_METADATA);
|
||||
return (
|
||||
explicitMetadataOptIn === "1" ||
|
||||
normalizeLowercaseStringOrEmpty(explicitMetadataOptIn) === "true"
|
||||
);
|
||||
}
|
||||
|
||||
function resolveAnthropicVertexDefaultAdcPath(env: NodeJS.ProcessEnv = process.env): string {
|
||||
return platform() === "win32"
|
||||
? join(
|
||||
env.APPDATA ?? join(homedir(), "AppData", "Roaming"),
|
||||
"gcloud",
|
||||
"application_default_credentials.json",
|
||||
)
|
||||
: GCLOUD_DEFAULT_ADC_PATH;
|
||||
}
|
||||
|
||||
function resolveAnthropicVertexAdcCredentialsPathCandidate(
|
||||
env: NodeJS.ProcessEnv = process.env,
|
||||
): string | undefined {
|
||||
const explicit = normalizeOptionalString(env.GOOGLE_APPLICATION_CREDENTIALS);
|
||||
if (explicit) {
|
||||
return explicit;
|
||||
}
|
||||
if (env !== process.env) {
|
||||
return undefined;
|
||||
}
|
||||
return resolveAnthropicVertexDefaultAdcPath(env);
|
||||
}
|
||||
|
||||
function readAnthropicVertexAdc(env: NodeJS.ProcessEnv = process.env): AdcProjectFile | null {
|
||||
const credentialsPath = resolveAnthropicVertexAdcCredentialsPathCandidate(env);
|
||||
if (!credentialsPath) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
return JSON.parse(readFileSync(credentialsPath, "utf8")) as AdcProjectFile;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function hasAnthropicVertexAvailableAuth(env: NodeJS.ProcessEnv = process.env): boolean {
|
||||
return hasAnthropicVertexMetadataServerAdc(env) || readAnthropicVertexAdc(env) !== null;
|
||||
}
|
||||
|
||||
function resolveAnthropicVertexConfigApiKey(
|
||||
env: NodeJS.ProcessEnv = process.env,
|
||||
): string | undefined {
|
||||
return hasAnthropicVertexAvailableAuth(env) ? GCP_VERTEX_CREDENTIALS_MARKER : undefined;
|
||||
}
|
||||
|
||||
function buildAnthropicVertexModel(params: {
|
||||
id: string;
|
||||
name: string;
|
||||
reasoning: boolean;
|
||||
input: ModelDefinitionConfig["input"];
|
||||
cost: ModelDefinitionConfig["cost"];
|
||||
maxTokens: number;
|
||||
}): ModelDefinitionConfig {
|
||||
return {
|
||||
id: params.id,
|
||||
name: params.name,
|
||||
reasoning: params.reasoning,
|
||||
input: params.input,
|
||||
cost: params.cost,
|
||||
contextWindow: ANTHROPIC_VERTEX_DEFAULT_CONTEXT_WINDOW,
|
||||
maxTokens: params.maxTokens,
|
||||
};
|
||||
}
|
||||
|
||||
function buildAnthropicVertexProvider(params?: { env?: NodeJS.ProcessEnv }): ModelProviderConfig {
|
||||
const region = resolveAnthropicVertexRegion(params?.env);
|
||||
const baseUrl =
|
||||
normalizeLowercaseStringOrEmpty(region) === "global"
|
||||
? "https://aiplatform.googleapis.com"
|
||||
: `https://${region}-aiplatform.googleapis.com`;
|
||||
|
||||
return {
|
||||
baseUrl,
|
||||
api: "anthropic-messages",
|
||||
apiKey: GCP_VERTEX_CREDENTIALS_MARKER,
|
||||
models: [
|
||||
buildAnthropicVertexModel({
|
||||
id: "claude-opus-4-6",
|
||||
name: "Claude Opus 4.6",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 },
|
||||
maxTokens: 128000,
|
||||
}),
|
||||
buildAnthropicVertexModel({
|
||||
id: "claude-sonnet-4-6",
|
||||
name: "Claude Sonnet 4.6",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 },
|
||||
maxTokens: 128000,
|
||||
}),
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
function mergeImplicitAnthropicVertexProvider(params: {
|
||||
existing?: ModelProviderConfig;
|
||||
implicit: ModelProviderConfig;
|
||||
}) {
|
||||
const { existing, implicit } = params;
|
||||
if (!existing) {
|
||||
return implicit;
|
||||
}
|
||||
return {
|
||||
...implicit,
|
||||
...existing,
|
||||
models:
|
||||
Array.isArray(existing.models) && existing.models.length > 0
|
||||
? existing.models
|
||||
: implicit.models,
|
||||
};
|
||||
}
|
||||
|
||||
function resolveImplicitAnthropicVertexProvider(params?: { env?: NodeJS.ProcessEnv }) {
|
||||
const env = params?.env ?? process.env;
|
||||
if (!hasAnthropicVertexAvailableAuth(env)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return buildAnthropicVertexProvider({ env });
|
||||
}
|
||||
|
||||
async function runAnthropicVertexCatalog(ctx: ProviderCatalogContext) {
|
||||
const implicit = resolveImplicitAnthropicVertexProvider({
|
||||
env: ctx.env,
|
||||
});
|
||||
if (!implicit) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
provider: mergeImplicitAnthropicVertexProvider({
|
||||
existing: ctx.config.models?.providers?.[PROVIDER_ID],
|
||||
implicit,
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
export const anthropicVertexProviderDiscovery: AnthropicVertexProviderPlugin = {
|
||||
id: PROVIDER_ID,
|
||||
label: "Anthropic Vertex",
|
||||
docsPath: "/providers/models",
|
||||
auth: [],
|
||||
catalog: {
|
||||
order: "simple",
|
||||
run: runAnthropicVertexCatalog,
|
||||
},
|
||||
resolveConfigApiKey: ({ env }) => resolveAnthropicVertexConfigApiKey(env),
|
||||
};
|
||||
|
||||
export default anthropicVertexProviderDiscovery;
|
||||
@@ -33,7 +33,7 @@ export const secretTargetRegistryEntries = [
|
||||
|
||||
export function collectRuntimeConfigAssignments(params: {
|
||||
config: { channels?: Record<string, unknown> };
|
||||
defaults?: SecretDefaults;
|
||||
defaults: SecretDefaults | undefined;
|
||||
context: ResolverContext;
|
||||
}): void {
|
||||
const resolved = getChannelSurface(params.config, "bluebubbles");
|
||||
|
||||
@@ -52,13 +52,8 @@ describe("pw-tools-core browser SSRF guards", () => {
|
||||
});
|
||||
|
||||
it("re-checks click-triggered navigations with the session safety helper", async () => {
|
||||
let currentUrl = "https://example.com";
|
||||
pageState.page = { url: vi.fn(() => currentUrl) };
|
||||
pageState.locator = {
|
||||
click: vi.fn(async () => {
|
||||
currentUrl = "https://target.example";
|
||||
}),
|
||||
};
|
||||
pageState.page = { url: vi.fn(() => "https://example.com") };
|
||||
pageState.locator = { click: vi.fn(async () => {}) };
|
||||
|
||||
await interactions.clickViaPlaywright({
|
||||
cdpUrl: "http://127.0.0.1:18792",
|
||||
@@ -91,13 +86,8 @@ describe("pw-tools-core browser SSRF guards", () => {
|
||||
});
|
||||
|
||||
it("re-checks batched click-triggered navigations with the session safety helper", async () => {
|
||||
let currentUrl = "https://example.com";
|
||||
pageState.page = { url: vi.fn(() => currentUrl) };
|
||||
pageState.locator = {
|
||||
click: vi.fn(async () => {
|
||||
currentUrl = "https://target.example";
|
||||
}),
|
||||
};
|
||||
pageState.page = { url: vi.fn(() => "https://example.com") };
|
||||
pageState.locator = { click: vi.fn(async () => {}) };
|
||||
|
||||
await interactions.batchViaPlaywright({
|
||||
cdpUrl: "http://127.0.0.1:18792",
|
||||
|
||||
@@ -65,7 +65,7 @@ describe("pw-tools-core", () => {
|
||||
throw new Error(errorMessage);
|
||||
});
|
||||
setPwToolsCoreCurrentRefLocator({ click });
|
||||
setPwToolsCoreCurrentPage({ url: vi.fn(() => "https://example.com") });
|
||||
setPwToolsCoreCurrentPage({});
|
||||
|
||||
await expect(
|
||||
mod.clickViaPlaywright({
|
||||
@@ -82,7 +82,7 @@ describe("pw-tools-core", () => {
|
||||
);
|
||||
});
|
||||
setPwToolsCoreCurrentRefLocator({ click });
|
||||
setPwToolsCoreCurrentPage({ url: vi.fn(() => "https://example.com") });
|
||||
setPwToolsCoreCurrentPage({});
|
||||
|
||||
await expect(
|
||||
mod.clickViaPlaywright({
|
||||
|
||||
@@ -10,7 +10,7 @@ installPwToolsCoreTestHooks();
|
||||
const mod = await import("./pw-tools-core.js");
|
||||
|
||||
describe("pw-tools-core interaction navigation guard", () => {
|
||||
it("waits for the grace window before completing a successful non-navigating click", async () => {
|
||||
it("does not wait for the grace window after a successful non-navigating click", async () => {
|
||||
vi.useFakeTimers();
|
||||
try {
|
||||
const listeners = new Set<() => void>();
|
||||
@@ -42,16 +42,15 @@ describe("pw-tools-core interaction navigation guard", () => {
|
||||
.then(completion);
|
||||
|
||||
await vi.advanceTimersByTimeAsync(0);
|
||||
expect(completion).not.toHaveBeenCalled();
|
||||
expect(completion).toHaveBeenCalledTimes(1);
|
||||
expect(listeners.size).toBe(1);
|
||||
expect(
|
||||
getPwToolsCoreSessionMocks().assertPageNavigationCompletedSafely,
|
||||
).not.toHaveBeenCalled();
|
||||
|
||||
await vi.advanceTimersByTimeAsync(250);
|
||||
await task;
|
||||
expect(completion).toHaveBeenCalledTimes(1);
|
||||
expect(listeners.size).toBe(0);
|
||||
await task;
|
||||
} finally {
|
||||
vi.useRealTimers();
|
||||
}
|
||||
@@ -97,14 +96,13 @@ describe("pw-tools-core interaction navigation guard", () => {
|
||||
.then(completion);
|
||||
|
||||
await vi.advanceTimersByTimeAsync(0);
|
||||
expect(completion).not.toHaveBeenCalled();
|
||||
expect(completion).toHaveBeenCalledTimes(1);
|
||||
expect(
|
||||
getPwToolsCoreSessionMocks().assertPageNavigationCompletedSafely,
|
||||
).not.toHaveBeenCalled();
|
||||
|
||||
await vi.advanceTimersByTimeAsync(10);
|
||||
await task;
|
||||
expect(completion).toHaveBeenCalledTimes(1);
|
||||
|
||||
expect(getPwToolsCoreSessionMocks().assertPageNavigationCompletedSafely).toHaveBeenCalledWith(
|
||||
{
|
||||
@@ -209,22 +207,20 @@ describe("pw-tools-core interaction navigation guard", () => {
|
||||
setPwToolsCoreCurrentRefLocator({ click });
|
||||
setPwToolsCoreCurrentPage(page);
|
||||
|
||||
const first = mod.clickViaPlaywright({
|
||||
await mod.clickViaPlaywright({
|
||||
cdpUrl: "http://127.0.0.1:18792",
|
||||
targetId: "T1",
|
||||
ref: "1",
|
||||
ssrfPolicy: { allowPrivateNetwork: false },
|
||||
});
|
||||
await vi.advanceTimersByTimeAsync(0);
|
||||
expect(listeners.size).toBe(1);
|
||||
|
||||
const second = mod.clickViaPlaywright({
|
||||
await mod.clickViaPlaywright({
|
||||
cdpUrl: "http://127.0.0.1:18792",
|
||||
targetId: "T1",
|
||||
ref: "1",
|
||||
ssrfPolicy: { allowPrivateNetwork: false },
|
||||
});
|
||||
await vi.advanceTimersByTimeAsync(0);
|
||||
expect(listeners.size).toBe(1);
|
||||
|
||||
currentUrl = "http://127.0.0.1:9222/json/list";
|
||||
@@ -232,7 +228,6 @@ describe("pw-tools-core interaction navigation guard", () => {
|
||||
listener();
|
||||
}
|
||||
await vi.advanceTimersByTimeAsync(0);
|
||||
await Promise.all([first, second]);
|
||||
|
||||
expect(
|
||||
getPwToolsCoreSessionMocks().assertPageNavigationCompletedSafely,
|
||||
@@ -243,56 +238,6 @@ describe("pw-tools-core interaction navigation guard", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("propagates blocked delayed navigation instead of reporting click success", async () => {
|
||||
vi.useFakeTimers();
|
||||
try {
|
||||
const listeners = new Set<() => void>();
|
||||
let currentUrl = "http://127.0.0.1:9222/json/version";
|
||||
const click = vi.fn(async () => {
|
||||
setTimeout(() => {
|
||||
currentUrl = "http://127.0.0.1:9222/private-target";
|
||||
for (const listener of listeners) {
|
||||
listener();
|
||||
}
|
||||
}, 10);
|
||||
});
|
||||
const page = {
|
||||
on: vi.fn((event: string, listener: () => void) => {
|
||||
if (event === "framenavigated") {
|
||||
listeners.add(listener);
|
||||
}
|
||||
}),
|
||||
off: vi.fn((event: string, listener: () => void) => {
|
||||
if (event === "framenavigated") {
|
||||
listeners.delete(listener);
|
||||
}
|
||||
}),
|
||||
url: vi.fn(() => currentUrl),
|
||||
};
|
||||
setPwToolsCoreCurrentRefLocator({ click });
|
||||
setPwToolsCoreCurrentPage(page);
|
||||
|
||||
const blocked = new Error("blocked delayed interaction navigation");
|
||||
getPwToolsCoreSessionMocks().assertPageNavigationCompletedSafely.mockRejectedValueOnce(
|
||||
blocked,
|
||||
);
|
||||
|
||||
const task = mod.clickViaPlaywright({
|
||||
cdpUrl: "http://127.0.0.1:18792",
|
||||
targetId: "T1",
|
||||
ref: "1",
|
||||
ssrfPolicy: { allowPrivateNetwork: false },
|
||||
});
|
||||
const rejection = expect(task).rejects.toThrow("blocked delayed interaction navigation");
|
||||
|
||||
await vi.advanceTimersByTimeAsync(10);
|
||||
await rejection;
|
||||
expect(listeners.size).toBe(0);
|
||||
} finally {
|
||||
vi.useRealTimers();
|
||||
}
|
||||
});
|
||||
|
||||
it("runs the post-click navigation guard with the resolved SSRF policy", async () => {
|
||||
const click = vi.fn(async () => {});
|
||||
const page = {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
|
||||
import type { Frame, Page } from "playwright-core";
|
||||
import { formatErrorMessage } from "../infra/errors.js";
|
||||
import type { Frame, Page } from "playwright-core";
|
||||
import type { SsrFPolicy } from "../infra/net/ssrf.js";
|
||||
import type { BrowserActRequest, BrowserFormField } from "./client-actions-core.js";
|
||||
import { DEFAULT_FILL_FIELD_TYPE } from "./form-fields.js";
|
||||
@@ -164,68 +164,59 @@ function scheduleDelayedInteractionNavigationGuard(opts: {
|
||||
previousUrl: string;
|
||||
ssrfPolicy?: SsrFPolicy;
|
||||
targetId?: string;
|
||||
}): Promise<void> {
|
||||
}): void {
|
||||
if (!opts.ssrfPolicy) {
|
||||
return Promise.resolve();
|
||||
return;
|
||||
}
|
||||
const page = opts.page as unknown as NavigationObservablePage;
|
||||
if (didCrossDocumentUrlChange(page, opts.previousUrl)) {
|
||||
return assertPageNavigationCompletedSafely({
|
||||
void assertPageNavigationCompletedSafely({
|
||||
cdpUrl: opts.cdpUrl,
|
||||
page: opts.page,
|
||||
response: null,
|
||||
ssrfPolicy: opts.ssrfPolicy,
|
||||
targetId: opts.targetId,
|
||||
});
|
||||
}).catch(() => {});
|
||||
return;
|
||||
}
|
||||
if (typeof page.on !== "function" || typeof page.off !== "function") {
|
||||
return Promise.resolve();
|
||||
return;
|
||||
}
|
||||
|
||||
pendingInteractionNavigationGuardCleanup.get(opts.page)?.();
|
||||
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
const settle = (err?: unknown) => {
|
||||
cleanup();
|
||||
if (err) {
|
||||
reject(err);
|
||||
return;
|
||||
}
|
||||
resolve();
|
||||
};
|
||||
const onFrameNavigated = (frame: Frame) => {
|
||||
if (!isMainFrameNavigation(page, frame)) {
|
||||
return;
|
||||
}
|
||||
// Use isHashOnlyNavigation rather than !didCrossDocumentUrlChange: the
|
||||
// event firing is itself the navigation signal, so a same-URL reload must
|
||||
// not be treated as "no navigation" the way URL polling would.
|
||||
if (isHashOnlyNavigation(page.url(), opts.previousUrl)) {
|
||||
return;
|
||||
}
|
||||
cleanup();
|
||||
void assertPageNavigationCompletedSafely({
|
||||
cdpUrl: opts.cdpUrl,
|
||||
page: opts.page,
|
||||
response: null,
|
||||
ssrfPolicy: opts.ssrfPolicy,
|
||||
targetId: opts.targetId,
|
||||
}).then(() => settle(), settle);
|
||||
};
|
||||
const timeout = setTimeout(() => {
|
||||
settle();
|
||||
}, INTERACTION_NAVIGATION_GRACE_MS);
|
||||
const cleanup = () => {
|
||||
clearTimeout(timeout);
|
||||
page.off!("framenavigated", onFrameNavigated);
|
||||
if (pendingInteractionNavigationGuardCleanup.get(opts.page) === settle) {
|
||||
pendingInteractionNavigationGuardCleanup.delete(opts.page);
|
||||
}
|
||||
};
|
||||
const onFrameNavigated = (frame: Frame) => {
|
||||
if (!isMainFrameNavigation(page, frame)) {
|
||||
return;
|
||||
}
|
||||
// Use isHashOnlyNavigation rather than !didCrossDocumentUrlChange: the
|
||||
// event firing is itself the navigation signal, so a same-URL reload must
|
||||
// not be treated as "no navigation" the way URL polling would.
|
||||
if (isHashOnlyNavigation(page.url(), opts.previousUrl)) {
|
||||
return;
|
||||
}
|
||||
cleanup();
|
||||
void assertPageNavigationCompletedSafely({
|
||||
cdpUrl: opts.cdpUrl,
|
||||
page: opts.page,
|
||||
response: null,
|
||||
ssrfPolicy: opts.ssrfPolicy,
|
||||
targetId: opts.targetId,
|
||||
}).catch(() => {});
|
||||
};
|
||||
const timeout = setTimeout(() => {
|
||||
cleanup();
|
||||
}, INTERACTION_NAVIGATION_GRACE_MS);
|
||||
const cleanup = () => {
|
||||
clearTimeout(timeout);
|
||||
page.off!("framenavigated", onFrameNavigated);
|
||||
if (pendingInteractionNavigationGuardCleanup.get(opts.page) === cleanup) {
|
||||
pendingInteractionNavigationGuardCleanup.delete(opts.page);
|
||||
}
|
||||
};
|
||||
|
||||
pendingInteractionNavigationGuardCleanup.set(opts.page, settle);
|
||||
page.on!("framenavigated", onFrameNavigated);
|
||||
});
|
||||
pendingInteractionNavigationGuardCleanup.set(opts.page, cleanup);
|
||||
page.on("framenavigated", onFrameNavigated);
|
||||
}
|
||||
|
||||
async function assertInteractionNavigationCompletedSafely<T>(opts: {
|
||||
@@ -301,10 +292,9 @@ async function assertInteractionNavigationCompletedSafely<T>(opts: {
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// Successful interactions still need a short grace window: a click can resolve
|
||||
// before the navigation event fires, and a blocked late hop must be observable
|
||||
// to the current caller instead of only quarantining the page in the background.
|
||||
await scheduleDelayedInteractionNavigationGuard({
|
||||
// Successful non-navigating interactions should not wait out the grace window,
|
||||
// but we still keep a short-lived listener alive to quarantine late SSRF hops.
|
||||
scheduleDelayedInteractionNavigationGuard({
|
||||
cdpUrl: opts.cdpUrl,
|
||||
page: opts.page,
|
||||
previousUrl: opts.previousUrl,
|
||||
|
||||
@@ -84,7 +84,7 @@ export const secretTargetRegistryEntries = [
|
||||
|
||||
export function collectRuntimeConfigAssignments(params: {
|
||||
config: { channels?: Record<string, unknown> };
|
||||
defaults?: SecretDefaults;
|
||||
defaults: SecretDefaults | undefined;
|
||||
context: ResolverContext;
|
||||
}): void {
|
||||
const resolved = getChannelSurface(params.config, "discord");
|
||||
|
||||
@@ -80,7 +80,7 @@ export const secretTargetRegistryEntries = [
|
||||
|
||||
export function collectRuntimeConfigAssignments(params: {
|
||||
config: { channels?: Record<string, unknown> };
|
||||
defaults?: SecretDefaults;
|
||||
defaults: SecretDefaults | undefined;
|
||||
context: ResolverContext;
|
||||
}): void {
|
||||
const resolved = getChannelSurface(params.config, "feishu");
|
||||
|
||||
@@ -138,7 +138,7 @@ function warnDeprecatedUsersEmailEntries(logVerbose: (message: string) => void,
|
||||
}
|
||||
const key = deprecated
|
||||
.map((v) => normalizeLowercaseStringOrEmpty(v))
|
||||
.toSorted((a, b) => a.localeCompare(b))
|
||||
.toSorted()
|
||||
.join(",");
|
||||
if (warnedDeprecatedUsersEmailAllowFrom.has(key)) {
|
||||
return;
|
||||
@@ -161,7 +161,7 @@ function warnMutableGroupKeysConfigured(
|
||||
}
|
||||
const warningKey = mutableKeys
|
||||
.map((key) => normalizeLowercaseStringOrEmpty(key))
|
||||
.toSorted((a, b) => a.localeCompare(b))
|
||||
.toSorted()
|
||||
.join(",");
|
||||
if (warnedMutableGroupKeys.has(warningKey)) {
|
||||
return;
|
||||
|
||||
@@ -63,7 +63,7 @@ function resolveSecretInputRef(params: {
|
||||
function collectGoogleChatAccountAssignment(params: {
|
||||
target: GoogleChatAccountLike;
|
||||
path: string;
|
||||
defaults?: SecretDefaults;
|
||||
defaults: SecretDefaults | undefined;
|
||||
context: ResolverContext;
|
||||
active?: boolean;
|
||||
inactiveReason?: string;
|
||||
@@ -107,7 +107,7 @@ function collectGoogleChatAccountAssignment(params: {
|
||||
|
||||
export function collectRuntimeConfigAssignments(params: {
|
||||
config: { channels?: Record<string, unknown> };
|
||||
defaults?: SecretDefaults;
|
||||
defaults: SecretDefaults | undefined;
|
||||
context: ResolverContext;
|
||||
}): void {
|
||||
const resolved = getChannelSurface(params.config, "googlechat");
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
export { createIMessageTestPlugin } from "./src/test-plugin.js";
|
||||
export {
|
||||
resolveIMessageAttachmentRoots as resolveInboundAttachmentRoots,
|
||||
resolveIMessageRemoteAttachmentRoots as resolveRemoteInboundAttachmentRoots,
|
||||
|
||||
@@ -3,7 +3,7 @@ import {
|
||||
listImportedBundledPluginFacadeIds,
|
||||
resetFacadeRuntimeStateForTest,
|
||||
} from "../../../src/plugin-sdk/facade-runtime.js";
|
||||
import { createIMessageTestPlugin } from "./imessage.test-plugin.js";
|
||||
import { createIMessageTestPlugin } from "./test-plugin.js";
|
||||
|
||||
beforeEach(() => {
|
||||
resetFacadeRuntimeStateForTest();
|
||||
@@ -21,11 +21,4 @@ describe("createIMessageTestPlugin", () => {
|
||||
|
||||
expect(listImportedBundledPluginFacadeIds()).toEqual([]);
|
||||
});
|
||||
|
||||
it("normalizes repeated transport prefixes without recursive stack growth", () => {
|
||||
const plugin = createIMessageTestPlugin();
|
||||
const prefixedHandle = `${"imessage:".repeat(5000)}+44 20 7946 0958`;
|
||||
|
||||
expect(plugin.messaging?.normalizeTarget?.(prefixedHandle)).toBe("+442079460958");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -5,32 +5,20 @@ import { collectStatusIssuesFromLastError } from "openclaw/plugin-sdk/status-hel
|
||||
import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime";
|
||||
|
||||
function normalizeIMessageTestHandle(raw: string): string {
|
||||
let trimmed = raw.trim();
|
||||
const trimmed = raw.trim();
|
||||
if (!trimmed) {
|
||||
return "";
|
||||
}
|
||||
|
||||
while (trimmed) {
|
||||
const lowered = normalizeLowercaseStringOrEmpty(trimmed);
|
||||
if (lowered.startsWith("imessage:")) {
|
||||
trimmed = trimmed.slice("imessage:".length).trim();
|
||||
continue;
|
||||
}
|
||||
if (lowered.startsWith("sms:")) {
|
||||
trimmed = trimmed.slice("sms:".length).trim();
|
||||
continue;
|
||||
}
|
||||
if (lowered.startsWith("auto:")) {
|
||||
trimmed = trimmed.slice("auto:".length).trim();
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
const lowered = normalizeLowercaseStringOrEmpty(trimmed);
|
||||
if (lowered.startsWith("imessage:")) {
|
||||
return normalizeIMessageTestHandle(trimmed.slice("imessage:".length));
|
||||
}
|
||||
|
||||
if (!trimmed) {
|
||||
return "";
|
||||
if (lowered.startsWith("sms:")) {
|
||||
return normalizeIMessageTestHandle(trimmed.slice("sms:".length));
|
||||
}
|
||||
if (lowered.startsWith("auto:")) {
|
||||
return normalizeIMessageTestHandle(trimmed.slice("auto:".length));
|
||||
}
|
||||
|
||||
if (/^(chat_id:|chat_guid:|chat_identifier:)/i.test(trimmed)) {
|
||||
return trimmed.replace(/^(chat_id:|chat_guid:|chat_identifier:)/i, (match) =>
|
||||
normalizeLowercaseStringOrEmpty(match),
|
||||
@@ -1 +0,0 @@
|
||||
export { createIMessageTestPlugin } from "./src/imessage.test-plugin.js";
|
||||
@@ -59,7 +59,7 @@ export const secretTargetRegistryEntries = [
|
||||
|
||||
export function collectRuntimeConfigAssignments(params: {
|
||||
config: { channels?: Record<string, unknown> };
|
||||
defaults?: SecretDefaults;
|
||||
defaults: SecretDefaults | undefined;
|
||||
context: ResolverContext;
|
||||
}): void {
|
||||
const resolved = getChannelSurface(params.config, "irc");
|
||||
|
||||
@@ -91,6 +91,7 @@ describe("matrixMessageActions account propagation", () => {
|
||||
await matrixMessageActions.handleAction?.(
|
||||
createContext({
|
||||
action: profileAction,
|
||||
senderIsOwner: true,
|
||||
accountId: "ops",
|
||||
params: {
|
||||
displayName: "Ops Bot",
|
||||
@@ -111,10 +112,50 @@ describe("matrixMessageActions account propagation", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects self-profile updates for non-owner callers", async () => {
|
||||
await expect(
|
||||
matrixMessageActions.handleAction?.(
|
||||
createContext({
|
||||
action: profileAction,
|
||||
senderIsOwner: false,
|
||||
accountId: "ops",
|
||||
params: {
|
||||
displayName: "Ops Bot",
|
||||
},
|
||||
}),
|
||||
),
|
||||
).rejects.toMatchObject({
|
||||
name: "ToolAuthorizationError",
|
||||
message: "Matrix profile updates require owner access.",
|
||||
});
|
||||
|
||||
expect(mocks.handleMatrixAction).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("rejects self-profile updates when owner status is unknown", async () => {
|
||||
await expect(
|
||||
matrixMessageActions.handleAction?.(
|
||||
createContext({
|
||||
action: profileAction,
|
||||
accountId: "ops",
|
||||
params: {
|
||||
displayName: "Ops Bot",
|
||||
},
|
||||
}),
|
||||
),
|
||||
).rejects.toMatchObject({
|
||||
name: "ToolAuthorizationError",
|
||||
message: "Matrix profile updates require owner access.",
|
||||
});
|
||||
|
||||
expect(mocks.handleMatrixAction).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("forwards local avatar paths for self-profile updates", async () => {
|
||||
await matrixMessageActions.handleAction?.(
|
||||
createContext({
|
||||
action: profileAction,
|
||||
senderIsOwner: true,
|
||||
accountId: "ops",
|
||||
params: {
|
||||
path: "/tmp/avatar.jpg",
|
||||
|
||||
@@ -78,6 +78,7 @@ describe("matrixMessageActions", () => {
|
||||
|
||||
const discovery = describeMessageTool({
|
||||
cfg: createConfiguredMatrixConfig(),
|
||||
senderIsOwner: true,
|
||||
} as never);
|
||||
if (!discovery) {
|
||||
throw new Error("describeMessageTool returned null");
|
||||
@@ -96,6 +97,31 @@ describe("matrixMessageActions", () => {
|
||||
expect(properties.avatarPath).toBeDefined();
|
||||
});
|
||||
|
||||
it("hides self-profile updates for non-owner discovery", () => {
|
||||
const discovery = matrixMessageActions.describeMessageTool({
|
||||
cfg: createConfiguredMatrixConfig(),
|
||||
senderIsOwner: false,
|
||||
} as never);
|
||||
if (!discovery) {
|
||||
throw new Error("describeMessageTool returned null");
|
||||
}
|
||||
|
||||
expect(discovery.actions).not.toContain(profileAction);
|
||||
expect(discovery.schema).toBeNull();
|
||||
});
|
||||
|
||||
it("hides self-profile updates when owner status is unknown", () => {
|
||||
const discovery = matrixMessageActions.describeMessageTool({
|
||||
cfg: createConfiguredMatrixConfig(),
|
||||
} as never);
|
||||
if (!discovery) {
|
||||
throw new Error("describeMessageTool returned null");
|
||||
}
|
||||
|
||||
expect(discovery.actions).not.toContain(profileAction);
|
||||
expect(discovery.schema).toBeNull();
|
||||
});
|
||||
|
||||
it("hides gated actions when the default Matrix account disables them", () => {
|
||||
const discovery = matrixMessageActions.describeMessageTool({
|
||||
cfg: {
|
||||
|
||||
@@ -7,11 +7,11 @@ import {
|
||||
createActionGate,
|
||||
readNumberParam,
|
||||
readStringParam,
|
||||
ToolAuthorizationError,
|
||||
type ChannelMessageActionAdapter,
|
||||
type ChannelMessageActionContext,
|
||||
type ChannelMessageActionName,
|
||||
type ChannelMessageToolDiscovery,
|
||||
type ChannelToolSend,
|
||||
} from "./runtime-api.js";
|
||||
import type { CoreConfig } from "./types.js";
|
||||
|
||||
@@ -35,6 +35,7 @@ const MATRIX_PLUGIN_HANDLED_ACTIONS = new Set<ChannelMessageActionName>([
|
||||
function createMatrixExposedActions(params: {
|
||||
gate: ReturnType<typeof createActionGate>;
|
||||
encryptionEnabled: boolean;
|
||||
senderIsOwner?: boolean;
|
||||
}) {
|
||||
const actions = new Set<ChannelMessageActionName>(["poll", "poll-vote"]);
|
||||
if (params.gate("messages")) {
|
||||
@@ -52,7 +53,7 @@ function createMatrixExposedActions(params: {
|
||||
actions.add("unpin");
|
||||
actions.add("list-pins");
|
||||
}
|
||||
if (params.gate("profile")) {
|
||||
if (params.gate("profile") && params.senderIsOwner === true) {
|
||||
actions.add("set-profile");
|
||||
}
|
||||
if (params.gate("memberInfo")) {
|
||||
@@ -109,7 +110,7 @@ function buildMatrixProfileToolSchema(): NonNullable<ChannelMessageToolDiscovery
|
||||
}
|
||||
|
||||
export const matrixMessageActions: ChannelMessageActionAdapter = {
|
||||
describeMessageTool: ({ cfg, accountId }) => {
|
||||
describeMessageTool: ({ cfg, accountId, senderIsOwner }) => {
|
||||
const resolvedCfg = cfg as CoreConfig;
|
||||
if (!accountId && requiresExplicitMatrixDefaultAccount(resolvedCfg)) {
|
||||
return { actions: [], capabilities: [] };
|
||||
@@ -125,6 +126,7 @@ export const matrixMessageActions: ChannelMessageActionAdapter = {
|
||||
const actions = createMatrixExposedActions({
|
||||
gate,
|
||||
encryptionEnabled: account.config.encryption === true,
|
||||
senderIsOwner,
|
||||
});
|
||||
const listedActions = Array.from(actions);
|
||||
return {
|
||||
@@ -134,7 +136,7 @@ export const matrixMessageActions: ChannelMessageActionAdapter = {
|
||||
};
|
||||
},
|
||||
supportsAction: ({ action }) => MATRIX_PLUGIN_HANDLED_ACTIONS.has(action),
|
||||
extractToolSend: ({ args }): ChannelToolSend | null => {
|
||||
extractToolSend: ({ args }) => {
|
||||
return extractToolSend(args, "sendMessage");
|
||||
},
|
||||
handleAction: async (ctx: ChannelMessageActionContext) => {
|
||||
@@ -259,6 +261,9 @@ export const matrixMessageActions: ChannelMessageActionAdapter = {
|
||||
}
|
||||
|
||||
if (action === "set-profile") {
|
||||
if (ctx.senderIsOwner !== true) {
|
||||
throw new ToolAuthorizationError("Matrix profile updates require owner access.");
|
||||
}
|
||||
const avatarPath =
|
||||
readStringParam(params, "avatarPath") ??
|
||||
readStringParam(params, "path") ??
|
||||
|
||||
@@ -45,53 +45,6 @@ function hasLegacyMatrixAccountPrivateNetworkAliases(value: unknown): boolean {
|
||||
);
|
||||
}
|
||||
|
||||
function hasLegacyTrustedDmPolicy(value: unknown): boolean {
|
||||
const root = isRecord(value) ? value : null;
|
||||
if (!root) {
|
||||
return false;
|
||||
}
|
||||
const dm = isRecord(root.dm) ? root.dm : null;
|
||||
return dm?.policy === "trusted";
|
||||
}
|
||||
|
||||
function hasLegacyMatrixAccountTrustedDmPolicies(value: unknown): boolean {
|
||||
const accounts = isRecord(value) ? value : null;
|
||||
if (!accounts) {
|
||||
return false;
|
||||
}
|
||||
return Object.values(accounts).some((account) => hasLegacyTrustedDmPolicy(account));
|
||||
}
|
||||
|
||||
function migrateLegacyTrustedDmPolicy(params: {
|
||||
entry: Record<string, unknown>;
|
||||
pathPrefix: string;
|
||||
changes: string[];
|
||||
}): { entry: Record<string, unknown>; changed: boolean } {
|
||||
const dm = isRecord(params.entry.dm) ? params.entry.dm : null;
|
||||
if (!dm || dm.policy !== "trusted") {
|
||||
return { entry: params.entry, changed: false };
|
||||
}
|
||||
const allowFromRaw = dm.allowFrom;
|
||||
// Trim before counting: downstream allowlist normalization drops whitespace-only
|
||||
// entries, so a config like [" "] must still fall back to "pairing"
|
||||
// instead of becoming an effectively empty allowlist.
|
||||
const allowFromEntries = Array.isArray(allowFromRaw)
|
||||
? allowFromRaw.filter(
|
||||
(entry): entry is string => typeof entry === "string" && entry.trim().length > 0,
|
||||
).length
|
||||
: 0;
|
||||
// Preserve the operator's existing trust boundary when an explicit allowFrom
|
||||
// list is present; only fall back to pairing when the effective allowlist is
|
||||
// empty.
|
||||
const nextPolicy: "allowlist" | "pairing" = allowFromEntries > 0 ? "allowlist" : "pairing";
|
||||
const nextDm = { ...dm, policy: nextPolicy };
|
||||
params.changes.push(
|
||||
`Migrated ${params.pathPrefix}.dm.policy "trusted" → "${nextPolicy}" (legacy alias removed; ` +
|
||||
`${allowFromEntries > 0 ? `preserved ${allowFromEntries} ${params.pathPrefix}.dm.allowFrom ${allowFromEntries === 1 ? "entry" : "entries"}` : "no allowFrom entries present, defaulting to pairing for safety"}).`,
|
||||
);
|
||||
return { entry: { ...params.entry, dm: nextDm }, changed: true };
|
||||
}
|
||||
|
||||
function normalizeMatrixRoomAllowAliases(params: {
|
||||
rooms: Record<string, unknown>;
|
||||
pathPrefix: string;
|
||||
@@ -149,18 +102,6 @@ export const legacyConfigRules: ChannelDoctorLegacyConfigRule[] = [
|
||||
'channels.matrix.accounts.<id>.{groups,rooms}.<room>.allow is legacy; use channels.matrix.accounts.<id>.{groups,rooms}.<room>.enabled instead. Run "openclaw doctor --fix".',
|
||||
match: hasLegacyMatrixAccountRoomAllowAliases,
|
||||
},
|
||||
{
|
||||
path: ["channels", "matrix"],
|
||||
message:
|
||||
'channels.matrix.dm.policy "trusted" is legacy; use "allowlist" (with allowFrom entries) or "pairing" instead. Run "openclaw doctor --fix".',
|
||||
match: hasLegacyTrustedDmPolicy,
|
||||
},
|
||||
{
|
||||
path: ["channels", "matrix", "accounts"],
|
||||
message:
|
||||
'channels.matrix.accounts.<id>.dm.policy "trusted" is legacy; use "allowlist" (with allowFrom entries) or "pairing" instead. Run "openclaw doctor --fix".',
|
||||
match: hasLegacyMatrixAccountTrustedDmPolicies,
|
||||
},
|
||||
];
|
||||
|
||||
export function normalizeCompatibilityConfig({
|
||||
@@ -186,14 +127,6 @@ export function normalizeCompatibilityConfig({
|
||||
updatedMatrix = topLevelPrivateNetwork.entry;
|
||||
changed = changed || topLevelPrivateNetwork.changed;
|
||||
|
||||
const topLevelTrustedDmPolicy = migrateLegacyTrustedDmPolicy({
|
||||
entry: updatedMatrix,
|
||||
pathPrefix: "channels.matrix",
|
||||
changes,
|
||||
});
|
||||
updatedMatrix = topLevelTrustedDmPolicy.entry;
|
||||
changed = changed || topLevelTrustedDmPolicy.changed;
|
||||
|
||||
const normalizeTopLevelRoomScope = (key: "groups" | "rooms") => {
|
||||
const rooms = isRecord(updatedMatrix[key]) ? updatedMatrix[key] : null;
|
||||
if (!rooms) {
|
||||
@@ -235,16 +168,6 @@ export function normalizeCompatibilityConfig({
|
||||
accountChanged = true;
|
||||
}
|
||||
|
||||
const accountTrustedDmPolicy = migrateLegacyTrustedDmPolicy({
|
||||
entry: nextAccount,
|
||||
pathPrefix: `channels.matrix.accounts.${accountId}`,
|
||||
changes,
|
||||
});
|
||||
if (accountTrustedDmPolicy.changed) {
|
||||
nextAccount = accountTrustedDmPolicy.entry;
|
||||
accountChanged = true;
|
||||
}
|
||||
|
||||
for (const key of ["groups", "rooms"] as const) {
|
||||
const rooms = isRecord(nextAccount[key]) ? nextAccount[key] : null;
|
||||
if (!rooms) {
|
||||
|
||||
@@ -232,199 +232,4 @@ describe("matrix doctor", () => {
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it("migrates legacy channels.matrix.dm.policy 'trusted' with allowFrom to 'allowlist'", () => {
|
||||
const normalize = matrixDoctor.normalizeCompatibilityConfig;
|
||||
expect(normalize).toBeDefined();
|
||||
if (!normalize) {
|
||||
return;
|
||||
}
|
||||
|
||||
const result = normalize({
|
||||
cfg: {
|
||||
channels: {
|
||||
matrix: {
|
||||
dm: {
|
||||
enabled: true,
|
||||
policy: "trusted",
|
||||
allowFrom: ["@alice:example.org", "@bob:example.org"],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as never,
|
||||
});
|
||||
|
||||
const matrixDm = (
|
||||
result.config.channels?.matrix as { dm?: { policy?: string; allowFrom?: string[] } }
|
||||
)?.dm;
|
||||
|
||||
expect(matrixDm?.policy).toBe("allowlist");
|
||||
expect(matrixDm?.allowFrom).toEqual(["@alice:example.org", "@bob:example.org"]);
|
||||
expect(result.changes).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.stringContaining('Migrated channels.matrix.dm.policy "trusted" → "allowlist"'),
|
||||
expect.stringContaining("preserved 2 channels.matrix.dm.allowFrom entries"),
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it("migrates legacy 'trusted' policy with whitespace-only allowFrom entries to 'pairing'", () => {
|
||||
// Whitespace-only entries are dropped by downstream allowlist normalization,
|
||||
// so they must not count toward the allowFrom population check — otherwise
|
||||
// the migration would emit policy="allowlist" with an effectively empty
|
||||
// allowlist, silently blocking all DMs.
|
||||
const normalize = matrixDoctor.normalizeCompatibilityConfig;
|
||||
expect(normalize).toBeDefined();
|
||||
if (!normalize) {
|
||||
return;
|
||||
}
|
||||
|
||||
const result = normalize({
|
||||
cfg: {
|
||||
channels: {
|
||||
matrix: {
|
||||
dm: {
|
||||
enabled: true,
|
||||
policy: "trusted",
|
||||
allowFrom: [" ", "\t", ""],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as never,
|
||||
});
|
||||
|
||||
const matrixDm = (result.config.channels?.matrix as { dm?: { policy?: string } })?.dm;
|
||||
expect(matrixDm?.policy).toBe("pairing");
|
||||
expect(result.changes).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.stringContaining('Migrated channels.matrix.dm.policy "trusted" → "pairing"'),
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it("migrates legacy channels.matrix.dm.policy 'trusted' without allowFrom to 'pairing'", () => {
|
||||
const normalize = matrixDoctor.normalizeCompatibilityConfig;
|
||||
expect(normalize).toBeDefined();
|
||||
if (!normalize) {
|
||||
return;
|
||||
}
|
||||
|
||||
const result = normalize({
|
||||
cfg: {
|
||||
channels: {
|
||||
matrix: {
|
||||
dm: {
|
||||
enabled: true,
|
||||
policy: "trusted",
|
||||
},
|
||||
},
|
||||
},
|
||||
} as never,
|
||||
});
|
||||
|
||||
const matrixDm = (result.config.channels?.matrix as { dm?: { policy?: string } })?.dm;
|
||||
expect(matrixDm?.policy).toBe("pairing");
|
||||
expect(result.changes).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.stringContaining('Migrated channels.matrix.dm.policy "trusted" → "pairing"'),
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it("migrates legacy per-account channels.matrix.accounts.<id>.dm.policy 'trusted'", () => {
|
||||
const normalize = matrixDoctor.normalizeCompatibilityConfig;
|
||||
expect(normalize).toBeDefined();
|
||||
if (!normalize) {
|
||||
return;
|
||||
}
|
||||
|
||||
const result = normalize({
|
||||
cfg: {
|
||||
channels: {
|
||||
matrix: {
|
||||
accounts: {
|
||||
work: {
|
||||
dm: {
|
||||
enabled: true,
|
||||
policy: "trusted",
|
||||
allowFrom: ["@boss:example.org"],
|
||||
},
|
||||
},
|
||||
personal: {
|
||||
dm: {
|
||||
enabled: true,
|
||||
policy: "trusted",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
} as never,
|
||||
});
|
||||
|
||||
const accounts = (
|
||||
result.config.channels?.matrix as {
|
||||
accounts?: Record<string, { dm?: { policy?: string; allowFrom?: string[] } }>;
|
||||
}
|
||||
)?.accounts;
|
||||
|
||||
expect(accounts?.work?.dm?.policy).toBe("allowlist");
|
||||
expect(accounts?.work?.dm?.allowFrom).toEqual(["@boss:example.org"]);
|
||||
expect(accounts?.personal?.dm?.policy).toBe("pairing");
|
||||
expect(result.changes).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.stringContaining(
|
||||
'Migrated channels.matrix.accounts.work.dm.policy "trusted" → "allowlist"',
|
||||
),
|
||||
expect.stringContaining(
|
||||
'Migrated channels.matrix.accounts.personal.dm.policy "trusted" → "pairing"',
|
||||
),
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it("leaves modern dm.policy values untouched", () => {
|
||||
const normalize = matrixDoctor.normalizeCompatibilityConfig;
|
||||
expect(normalize).toBeDefined();
|
||||
if (!normalize) {
|
||||
return;
|
||||
}
|
||||
|
||||
const result = normalize({
|
||||
cfg: {
|
||||
channels: {
|
||||
matrix: {
|
||||
dm: {
|
||||
enabled: true,
|
||||
policy: "allowlist",
|
||||
allowFrom: ["@alice:example.org"],
|
||||
},
|
||||
accounts: {
|
||||
work: {
|
||||
dm: { enabled: true, policy: "pairing" },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
} as never,
|
||||
});
|
||||
|
||||
expect(result.changes).toEqual([]);
|
||||
expect(result.config).toEqual({
|
||||
channels: {
|
||||
matrix: {
|
||||
dm: {
|
||||
enabled: true,
|
||||
policy: "allowlist",
|
||||
allowFrom: ["@alice:example.org"],
|
||||
},
|
||||
accounts: {
|
||||
work: {
|
||||
dm: { enabled: true, policy: "pairing" },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -10,6 +10,7 @@ export {
|
||||
readReactionParams,
|
||||
readStringArrayParam,
|
||||
readStringParam,
|
||||
ToolAuthorizationError,
|
||||
} from "openclaw/plugin-sdk/channel-actions";
|
||||
export { buildChannelConfigSchema } from "openclaw/plugin-sdk/channel-config-primitives";
|
||||
export type { ChannelPlugin } from "openclaw/plugin-sdk/channel-core";
|
||||
|
||||
@@ -60,7 +60,7 @@ export const secretTargetRegistryEntries = [
|
||||
|
||||
export function collectRuntimeConfigAssignments(params: {
|
||||
config: { channels?: Record<string, unknown> };
|
||||
defaults?: SecretDefaults;
|
||||
defaults: SecretDefaults | undefined;
|
||||
context: ResolverContext;
|
||||
}): void {
|
||||
const resolved = getChannelSurface(params.config, "matrix");
|
||||
|
||||
@@ -33,7 +33,7 @@ export const secretTargetRegistryEntries = [
|
||||
|
||||
export function collectRuntimeConfigAssignments(params: {
|
||||
config: { channels?: Record<string, unknown> };
|
||||
defaults?: SecretDefaults;
|
||||
defaults: SecretDefaults | undefined;
|
||||
context: ResolverContext;
|
||||
}): void {
|
||||
const resolved = getChannelSurface(params.config, "mattermost");
|
||||
|
||||
@@ -4,5 +4,8 @@ export type {
|
||||
MemoryProviderStatus,
|
||||
MemorySyncProgressUpdate,
|
||||
} from "openclaw/plugin-sdk/memory-core-host-engine-storage";
|
||||
export { removeBackfillDiaryEntries, writeBackfillDiaryEntries } from "./src/dreaming-narrative.js";
|
||||
export {
|
||||
removeBackfillDiaryEntries,
|
||||
writeBackfillDiaryEntries,
|
||||
} from "./src/dreaming-narrative.js";
|
||||
export { previewGroundedRemMarkdown } from "./src/rem-evidence.js";
|
||||
|
||||
@@ -17,7 +17,6 @@ export { checkQmdBinaryAvailability } from "openclaw/plugin-sdk/memory-core-host
|
||||
export { hasConfiguredMemorySecretInput } from "openclaw/plugin-sdk/memory-core-host-secret";
|
||||
export {
|
||||
auditShortTermPromotionArtifacts,
|
||||
removeGroundedShortTermCandidates,
|
||||
repairShortTermPromotionArtifacts,
|
||||
} from "./src/short-term-promotion.js";
|
||||
export type { BuiltinMemoryEmbeddingProviderDoctorMetadata } from "./src/memory/provider-adapters.js";
|
||||
|
||||
@@ -42,10 +42,8 @@ import { previewGroundedRemMarkdown } from "./rem-evidence.js";
|
||||
import {
|
||||
applyShortTermPromotions,
|
||||
auditShortTermPromotionArtifacts,
|
||||
removeGroundedShortTermCandidates,
|
||||
repairShortTermPromotionArtifacts,
|
||||
readShortTermRecallEntries,
|
||||
recordGroundedShortTermCandidates,
|
||||
recordShortTermRecalls,
|
||||
rankShortTermPromotionCandidates,
|
||||
resolveShortTermRecallLockPath,
|
||||
@@ -122,15 +120,7 @@ const DAILY_MEMORY_FILE_NAME_RE = /^(\d{4}-\d{2}-\d{2})\.md$/;
|
||||
|
||||
async function listHistoricalDailyFiles(inputPath: string): Promise<string[]> {
|
||||
const resolvedPath = path.resolve(inputPath);
|
||||
let stat;
|
||||
try {
|
||||
stat = await fs.stat(resolvedPath);
|
||||
} catch (err) {
|
||||
if ((err as NodeJS.ErrnoException | undefined)?.code === "ENOENT") {
|
||||
return [];
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
const stat = await fs.stat(resolvedPath);
|
||||
if (stat.isFile()) {
|
||||
return DAILY_MEMORY_FILE_NAME_RE.test(path.basename(resolvedPath)) ? [resolvedPath] : [];
|
||||
}
|
||||
@@ -306,100 +296,6 @@ function groundedMarkdownToDiaryLines(markdown: string): string[] {
|
||||
.filter((line, index, lines) => !(line.length === 0 && lines[index - 1]?.length === 0));
|
||||
}
|
||||
|
||||
function parseGroundedRef(
|
||||
fallbackPath: string,
|
||||
ref: string,
|
||||
): { path: string; startLine: number; endLine: number } | null {
|
||||
const trimmed = ref.trim();
|
||||
if (!trimmed) {
|
||||
return null;
|
||||
}
|
||||
const match = trimmed.match(/^(.*?):(\d+)(?:-(\d+))?$/);
|
||||
if (!match) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
path: (match[1] ?? fallbackPath).replaceAll("\\", "/").replace(/^\.\//, ""),
|
||||
startLine: Math.max(1, Number(match[2])),
|
||||
endLine: Math.max(1, Number(match[3] ?? match[2])),
|
||||
};
|
||||
}
|
||||
|
||||
function collectGroundedShortTermSeedItems(
|
||||
previews: Awaited<ReturnType<typeof previewGroundedRemMarkdown>>["files"],
|
||||
): Array<{
|
||||
path: string;
|
||||
startLine: number;
|
||||
endLine: number;
|
||||
snippet: string;
|
||||
score: number;
|
||||
query: string;
|
||||
signalCount: number;
|
||||
dayBucket?: string;
|
||||
}> {
|
||||
const items: Array<{
|
||||
path: string;
|
||||
startLine: number;
|
||||
endLine: number;
|
||||
snippet: string;
|
||||
score: number;
|
||||
query: string;
|
||||
signalCount: number;
|
||||
dayBucket?: string;
|
||||
}> = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
for (const file of previews) {
|
||||
const dayBucket = extractIsoDayFromPath(file.path) ?? undefined;
|
||||
const signals = [
|
||||
...file.memoryImplications.map((item) => ({
|
||||
text: item.text,
|
||||
refs: item.refs,
|
||||
score: 0.92,
|
||||
query: "__dreaming_grounded_backfill__:lasting-update",
|
||||
signalCount: 2,
|
||||
})),
|
||||
...file.candidates
|
||||
.filter((candidate) => candidate.lean === "likely_durable")
|
||||
.map((candidate) => ({
|
||||
text: candidate.text,
|
||||
refs: candidate.refs,
|
||||
score: 0.82,
|
||||
query: "__dreaming_grounded_backfill__:candidate",
|
||||
signalCount: 1,
|
||||
})),
|
||||
];
|
||||
|
||||
for (const signal of signals) {
|
||||
if (!signal.text.trim()) {
|
||||
continue;
|
||||
}
|
||||
const firstRef = signal.refs.find((ref) => ref.trim().length > 0);
|
||||
const parsedRef = firstRef ? parseGroundedRef(file.path, firstRef) : null;
|
||||
if (!parsedRef) {
|
||||
continue;
|
||||
}
|
||||
const key = `${parsedRef.path}:${parsedRef.startLine}:${parsedRef.endLine}:${signal.query}:${signal.text.toLowerCase()}`;
|
||||
if (seen.has(key)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(key);
|
||||
items.push({
|
||||
path: parsedRef.path,
|
||||
startLine: parsedRef.startLine,
|
||||
endLine: parsedRef.endLine,
|
||||
snippet: signal.text,
|
||||
score: signal.score,
|
||||
query: signal.query,
|
||||
signalCount: signal.signalCount,
|
||||
...(dayBucket ? { dayBucket } : {}),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return items;
|
||||
}
|
||||
|
||||
function matchesPromotionSelector(
|
||||
candidate: {
|
||||
key: string;
|
||||
@@ -656,7 +552,7 @@ export async function runMemoryStatus(opts: MemoryCommandOptions) {
|
||||
run: async (manager) => {
|
||||
const deep = Boolean(opts.deep || opts.index);
|
||||
let embeddingProbe:
|
||||
| Awaited<ReturnType<MemoryManager["probeEmbeddingAvailability"]>>
|
||||
| Awaited<ReturnType<typeof manager.probeEmbeddingAvailability>>
|
||||
| undefined;
|
||||
let indexError: string | undefined;
|
||||
const syncFn = manager.sync ? manager.sync.bind(manager) : undefined;
|
||||
@@ -1652,30 +1548,14 @@ export async function runMemoryRemBackfill(opts: MemoryRemBackfillOptions) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (opts.rollback || opts.rollbackShortTerm) {
|
||||
const diaryRollback = opts.rollback
|
||||
? await removeBackfillDiaryEntries({ workspaceDir })
|
||||
: null;
|
||||
const shortTermRollback = opts.rollbackShortTerm
|
||||
? await removeGroundedShortTermCandidates({ workspaceDir })
|
||||
: null;
|
||||
if (opts.rollback) {
|
||||
const removed = await removeBackfillDiaryEntries({ workspaceDir });
|
||||
if (opts.json) {
|
||||
defaultRuntime.writeJson({
|
||||
workspaceDir,
|
||||
rollback: Boolean(opts.rollback),
|
||||
rollbackShortTerm: Boolean(opts.rollbackShortTerm),
|
||||
...(diaryRollback
|
||||
? {
|
||||
dreamsPath: diaryRollback.dreamsPath,
|
||||
removedEntries: diaryRollback.removed,
|
||||
}
|
||||
: {}),
|
||||
...(shortTermRollback
|
||||
? {
|
||||
shortTermStorePath: shortTermRollback.storePath,
|
||||
removedShortTermEntries: shortTermRollback.removed,
|
||||
}
|
||||
: {}),
|
||||
rollback: true,
|
||||
dreamsPath: removed.dreamsPath,
|
||||
removedEntries: removed.removed,
|
||||
});
|
||||
return;
|
||||
}
|
||||
@@ -1683,30 +1563,8 @@ export async function runMemoryRemBackfill(opts: MemoryRemBackfillOptions) {
|
||||
[
|
||||
`${colorize(isRich(), theme.heading, "REM Backfill")} ${colorize(isRich(), theme.muted, "(rollback)")}`,
|
||||
colorize(isRich(), theme.muted, `workspace=${shortenHomePath(workspaceDir)}`),
|
||||
...(diaryRollback
|
||||
? [
|
||||
colorize(
|
||||
isRich(),
|
||||
theme.muted,
|
||||
`dreamsPath=${shortenHomePath(diaryRollback.dreamsPath)}`,
|
||||
),
|
||||
colorize(isRich(), theme.muted, `removedEntries=${diaryRollback.removed}`),
|
||||
]
|
||||
: []),
|
||||
...(shortTermRollback
|
||||
? [
|
||||
colorize(
|
||||
isRich(),
|
||||
theme.muted,
|
||||
`shortTermStorePath=${shortenHomePath(shortTermRollback.storePath)}`,
|
||||
),
|
||||
colorize(
|
||||
isRich(),
|
||||
theme.muted,
|
||||
`removedShortTermEntries=${shortTermRollback.removed}`,
|
||||
),
|
||||
]
|
||||
: []),
|
||||
colorize(isRich(), theme.muted, `dreamsPath=${shortenHomePath(removed.dreamsPath)}`),
|
||||
colorize(isRich(), theme.muted, `removedEntries=${removed.removed}`),
|
||||
].join("\n"),
|
||||
);
|
||||
return;
|
||||
@@ -1742,11 +1600,6 @@ export async function runMemoryRemBackfill(opts: MemoryRemBackfillOptions) {
|
||||
workspaceDir: scratchDir,
|
||||
inputPaths: workspaceSourceFiles,
|
||||
});
|
||||
const sourcePathByDay = new Map(
|
||||
sourceFiles
|
||||
.map((sourcePath) => [extractIsoDayFromPath(sourcePath), sourcePath] as const)
|
||||
.filter((entry): entry is [string, string] => Boolean(entry[0])),
|
||||
);
|
||||
const entries = grounded.files
|
||||
.map((file) => {
|
||||
const isoDay = extractIsoDayFromPath(file.path);
|
||||
@@ -1755,7 +1608,7 @@ export async function runMemoryRemBackfill(opts: MemoryRemBackfillOptions) {
|
||||
}
|
||||
return {
|
||||
isoDay,
|
||||
sourcePath: sourcePathByDay.get(isoDay) ?? file.path,
|
||||
sourcePath: file.path,
|
||||
bodyLines: groundedMarkdownToDiaryLines(file.renderedMarkdown),
|
||||
};
|
||||
})
|
||||
@@ -1766,24 +1619,6 @@ export async function runMemoryRemBackfill(opts: MemoryRemBackfillOptions) {
|
||||
entries,
|
||||
timezone: remConfig.timezone,
|
||||
});
|
||||
let stagedShortTermEntries = 0;
|
||||
let replacedShortTermEntries = 0;
|
||||
if (opts.stageShortTerm) {
|
||||
const cleared = await removeGroundedShortTermCandidates({ workspaceDir });
|
||||
replacedShortTermEntries = cleared.removed;
|
||||
const shortTermSeedItems = collectGroundedShortTermSeedItems(grounded.files);
|
||||
if (shortTermSeedItems.length > 0) {
|
||||
await recordGroundedShortTermCandidates({
|
||||
workspaceDir,
|
||||
query: "__dreaming_grounded_backfill__",
|
||||
items: shortTermSeedItems,
|
||||
dedupeByQueryPerDay: true,
|
||||
nowMs: Date.now(),
|
||||
timezone: remConfig.timezone,
|
||||
});
|
||||
}
|
||||
stagedShortTermEntries = shortTermSeedItems.length;
|
||||
}
|
||||
|
||||
if (opts.json) {
|
||||
defaultRuntime.writeJson({
|
||||
@@ -1794,12 +1629,6 @@ export async function runMemoryRemBackfill(opts: MemoryRemBackfillOptions) {
|
||||
writtenEntries: written.written,
|
||||
replacedEntries: written.replaced,
|
||||
dreamsPath: written.dreamsPath,
|
||||
...(opts.stageShortTerm
|
||||
? {
|
||||
stagedShortTermEntries,
|
||||
replacedShortTermEntries,
|
||||
}
|
||||
: {}),
|
||||
});
|
||||
return;
|
||||
}
|
||||
@@ -1815,15 +1644,6 @@ export async function runMemoryRemBackfill(opts: MemoryRemBackfillOptions) {
|
||||
theme.muted,
|
||||
`historicalFiles=${sourceFiles.length} writtenEntries=${written.written} replacedEntries=${written.replaced}`,
|
||||
),
|
||||
...(opts.stageShortTerm
|
||||
? [
|
||||
colorize(
|
||||
rich,
|
||||
theme.muted,
|
||||
`stagedShortTermEntries=${stagedShortTermEntries} replacedShortTermEntries=${replacedShortTermEntries}`,
|
||||
),
|
||||
]
|
||||
: []),
|
||||
colorize(rich, theme.muted, `dreamsPath=${shortenHomePath(written.dreamsPath)}`),
|
||||
].join("\n"),
|
||||
);
|
||||
|
||||
@@ -9,7 +9,7 @@ import {
|
||||
spyRuntimeJson,
|
||||
spyRuntimeLogs,
|
||||
} from "../../../src/cli/test-runtime-capture.js";
|
||||
import { readShortTermRecallEntries, recordShortTermRecalls } from "./short-term-promotion.js";
|
||||
import { recordShortTermRecalls } from "./short-term-promotion.js";
|
||||
|
||||
const getMemorySearchManager = vi.hoisted(() => vi.fn());
|
||||
const loadConfig = vi.hoisted(() => vi.fn(() => ({})));
|
||||
@@ -1066,7 +1066,6 @@ describe("memory cli", () => {
|
||||
|
||||
const dreams = await fs.readFile(path.join(workspaceDir, "DREAMS.md"), "utf-8");
|
||||
expect(dreams).toContain("openclaw:dreaming:backfill-entry");
|
||||
expect(dreams).toContain(`source=${historyPath}`);
|
||||
expect(dreams).toContain("January 1, 2025");
|
||||
expect(dreams).toContain("What Happened");
|
||||
expect(dreams).toContain("Possible Lasting Updates");
|
||||
@@ -1075,89 +1074,6 @@ describe("memory cli", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("treats a missing historical path as a controlled empty-source error", async () => {
|
||||
await withTempWorkspace(async (workspaceDir) => {
|
||||
const close = vi.fn(async () => {});
|
||||
mockManager({
|
||||
status: () => makeMemoryStatus({ workspaceDir }),
|
||||
close,
|
||||
});
|
||||
|
||||
const errors = spyRuntimeErrors(defaultRuntime);
|
||||
await runMemoryCli(["rem-backfill", "--path", path.join(workspaceDir, "missing-history")]);
|
||||
|
||||
expect(
|
||||
errors.mock.calls.some((call) => String(call[0]).includes("found no YYYY-MM-DD.md files")),
|
||||
).toBe(true);
|
||||
expect(close).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
it("stages grounded durable candidates into the live short-term store", async () => {
|
||||
await withTempWorkspace(async (workspaceDir) => {
|
||||
const historyDir = path.join(workspaceDir, "history");
|
||||
await fs.mkdir(historyDir, { recursive: true });
|
||||
const historyPath = path.join(historyDir, "2025-01-01.md");
|
||||
await fs.writeFile(
|
||||
historyPath,
|
||||
[
|
||||
"## Preferences Learned",
|
||||
'- Always use "Happy Together" calendar for flights and reservations.',
|
||||
].join("\n") + "\n",
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
const close = vi.fn(async () => {});
|
||||
mockManager({
|
||||
status: () => makeMemoryStatus({ workspaceDir }),
|
||||
close,
|
||||
});
|
||||
|
||||
await runMemoryCli(["rem-backfill", "--path", historyPath, "--stage-short-term"]);
|
||||
|
||||
const entries = await readShortTermRecallEntries({ workspaceDir });
|
||||
expect(entries).toHaveLength(1);
|
||||
expect(entries[0]?.snippet).toContain("Happy Together");
|
||||
expect(entries[0]?.groundedCount).toBe(3);
|
||||
expect(entries[0]?.queryHashes).toHaveLength(2);
|
||||
expect(entries[0]?.recallCount).toBe(0);
|
||||
expect(close).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
it("rolls back grounded staged short-term entries without touching diary rollback", async () => {
|
||||
await withTempWorkspace(async (workspaceDir) => {
|
||||
const historyDir = path.join(workspaceDir, "history");
|
||||
await fs.mkdir(historyDir, { recursive: true });
|
||||
const historyPath = path.join(historyDir, "2025-01-01.md");
|
||||
await fs.writeFile(
|
||||
historyPath,
|
||||
[
|
||||
"## Preferences Learned",
|
||||
'- Always use "Happy Together" calendar for flights and reservations.',
|
||||
].join("\n") + "\n",
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
const close = vi.fn(async () => {});
|
||||
mockManager({
|
||||
status: () => makeMemoryStatus({ workspaceDir }),
|
||||
close,
|
||||
});
|
||||
|
||||
await runMemoryCli(["rem-backfill", "--path", historyPath, "--stage-short-term"]);
|
||||
mockManager({
|
||||
status: () => makeMemoryStatus({ workspaceDir }),
|
||||
close,
|
||||
});
|
||||
await runMemoryCli(["rem-backfill", "--rollback-short-term"]);
|
||||
|
||||
const entries = await readShortTermRecallEntries({ workspaceDir });
|
||||
expect(entries).toHaveLength(0);
|
||||
expect(close).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
it("prefers persistence-relevant evidence over narrated operational logs in grounded what happened", async () => {
|
||||
await withTempWorkspace(async (workspaceDir) => {
|
||||
const historyDir = path.join(workspaceDir, "history");
|
||||
@@ -1320,44 +1236,6 @@ describe("memory cli", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("does not split hyphenated words into malformed grounded candidates", async () => {
|
||||
await withTempWorkspace(async (workspaceDir) => {
|
||||
const historyDir = path.join(workspaceDir, "history");
|
||||
await fs.mkdir(historyDir, { recursive: true });
|
||||
const historyPath = path.join(historyDir, "2025-02-20.md");
|
||||
await fs.writeFile(
|
||||
historyPath,
|
||||
[
|
||||
"## Preferences Learned",
|
||||
"- Use long-term plans, avoid reactive task switching.",
|
||||
"- A self-aware workflow note should stay intact.",
|
||||
].join("\n") + "\n",
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
const close = vi.fn(async () => {});
|
||||
mockManager({
|
||||
status: () => makeMemoryStatus({ workspaceDir }),
|
||||
close,
|
||||
});
|
||||
|
||||
const writeJson = spyRuntimeJson(defaultRuntime);
|
||||
await runMemoryCli(["rem-harness", "--json", "--grounded", "--path", historyPath]);
|
||||
|
||||
const payload = firstWrittenJsonArg<{
|
||||
grounded?: {
|
||||
files?: Array<{
|
||||
renderedMarkdown?: string;
|
||||
}>;
|
||||
} | null;
|
||||
}>(writeJson);
|
||||
const rendered = payload?.grounded?.files?.[0]?.renderedMarkdown ?? "";
|
||||
expect(rendered).not.toContain("Use long- term plans");
|
||||
expect(rendered).not.toContain("A self- aware workflow note");
|
||||
expect(close).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
it("rolls back grounded rem backfill entries from DREAMS.md", async () => {
|
||||
await withTempWorkspace(async (workspaceDir) => {
|
||||
const dreamsPath = path.join(workspaceDir, "DREAMS.md");
|
||||
|
||||
@@ -105,10 +105,6 @@ export function registerMemoryCli(program: Command) {
|
||||
"openclaw memory rem-backfill --path ./memory",
|
||||
"Write grounded historical REM entries into DREAMS.md for UI review.",
|
||||
],
|
||||
[
|
||||
"openclaw memory rem-backfill --path ./memory --stage-short-term",
|
||||
"Also seed durable grounded candidates into the live short-term promotion store.",
|
||||
],
|
||||
["openclaw memory status --json", "Output machine-readable JSON (good for scripts)."],
|
||||
])}\n\n${theme.muted("Docs:")} ${formatDocsLink("/cli/memory", "docs.openclaw.ai/cli/memory")}\n`,
|
||||
);
|
||||
@@ -205,16 +201,6 @@ export function registerMemoryCli(program: Command) {
|
||||
.option("--agent <id>", "Agent id (default: default agent)")
|
||||
.option("--path <file-or-dir>", "Historical daily memory file(s) or directory")
|
||||
.option("--rollback", "Remove previously written grounded REM backfill entries", false)
|
||||
.option(
|
||||
"--stage-short-term",
|
||||
"Also seed grounded durable candidates into the short-term promotion store",
|
||||
false,
|
||||
)
|
||||
.option(
|
||||
"--rollback-short-term",
|
||||
"Remove previously seeded grounded short-term candidates",
|
||||
false,
|
||||
)
|
||||
.option("--json", "Print JSON")
|
||||
.action(async (opts: MemoryRemBackfillOptions) => {
|
||||
await runMemoryRemBackfill(opts);
|
||||
|
||||
@@ -36,6 +36,4 @@ export type MemoryRemHarnessOptions = MemoryCommandOptions & {
|
||||
export type MemoryRemBackfillOptions = MemoryCommandOptions & {
|
||||
path?: string;
|
||||
rollback?: boolean;
|
||||
stageShortTerm?: boolean;
|
||||
rollbackShortTerm?: boolean;
|
||||
};
|
||||
|
||||
@@ -126,10 +126,6 @@ describe("backfill diary entries", () => {
|
||||
expect(formatBackfillDiaryDate("2026-01-01", "UTC")).toBe("January 1, 2026");
|
||||
});
|
||||
|
||||
it("preserves the iso day label in high-positive-offset timezones", () => {
|
||||
expect(formatBackfillDiaryDate("2026-01-01", "Pacific/Kiritimati")).toBe("January 1, 2026");
|
||||
});
|
||||
|
||||
it("builds a marked backfill diary entry", () => {
|
||||
const entry = buildBackfillDiaryEntry({
|
||||
isoDay: "2026-01-01",
|
||||
@@ -205,29 +201,6 @@ describe("backfill diary entries", () => {
|
||||
expect(content).toContain("Keep this real dream.");
|
||||
expect(content).not.toContain("Remove this backfill.");
|
||||
});
|
||||
|
||||
it("refuses to overwrite a symlinked DREAMS.md during backfill writes", async () => {
|
||||
const workspaceDir = await createTempWorkspace("openclaw-dreaming-backfill-");
|
||||
const targetPath = path.join(workspaceDir, "outside.txt");
|
||||
const dreamsPath = path.join(workspaceDir, "DREAMS.md");
|
||||
await fs.writeFile(targetPath, "outside\n", "utf-8");
|
||||
await fs.symlink(targetPath, dreamsPath);
|
||||
|
||||
await expect(
|
||||
writeBackfillDiaryEntries({
|
||||
workspaceDir,
|
||||
timezone: "UTC",
|
||||
entries: [
|
||||
{
|
||||
isoDay: "2026-01-01",
|
||||
sourcePath: "memory/2026-01-01.md",
|
||||
bodyLines: ["What Happened", "1. First pass."],
|
||||
},
|
||||
],
|
||||
}),
|
||||
).rejects.toThrow("Refusing to write symlinked DREAMS.md");
|
||||
await expect(fs.readFile(targetPath, "utf-8")).resolves.toBe("outside\n");
|
||||
});
|
||||
});
|
||||
|
||||
describe("appendNarrativeEntry", () => {
|
||||
|
||||
@@ -240,53 +240,17 @@ function stripBackfillDiaryBlocks(existing: string): { updated: string; removed:
|
||||
};
|
||||
}
|
||||
|
||||
export function formatBackfillDiaryDate(isoDay: string, _timezone?: string): string {
|
||||
const match = /^(\d{4})-(\d{2})-(\d{2})$/.exec(isoDay);
|
||||
if (!match) {
|
||||
return isoDay;
|
||||
}
|
||||
const [, year, month, day] = match;
|
||||
export function formatBackfillDiaryDate(isoDay: string, timezone?: string): string {
|
||||
const opts: Intl.DateTimeFormatOptions = {
|
||||
// Preserve the source iso day exactly; backfill labels should not drift by timezone.
|
||||
timeZone: "UTC",
|
||||
timeZone: timezone ?? "UTC",
|
||||
year: "numeric",
|
||||
month: "long",
|
||||
day: "numeric",
|
||||
};
|
||||
const epochMs = Date.UTC(Number(year), Number(month) - 1, Number(day), 12);
|
||||
const epochMs = Date.parse(`${isoDay}T12:00:00Z`);
|
||||
return new Intl.DateTimeFormat("en-US", opts).format(new Date(epochMs));
|
||||
}
|
||||
|
||||
async function assertSafeDreamsPath(dreamsPath: string): Promise<void> {
|
||||
const stat = await fs.lstat(dreamsPath).catch((err: NodeJS.ErrnoException) => {
|
||||
if (err.code === "ENOENT") {
|
||||
return null;
|
||||
}
|
||||
throw err;
|
||||
});
|
||||
if (!stat) {
|
||||
return;
|
||||
}
|
||||
if (stat.isSymbolicLink()) {
|
||||
throw new Error("Refusing to write symlinked DREAMS.md");
|
||||
}
|
||||
if (!stat.isFile()) {
|
||||
throw new Error("Refusing to write non-file DREAMS.md");
|
||||
}
|
||||
}
|
||||
|
||||
async function writeDreamsFileAtomic(dreamsPath: string, content: string): Promise<void> {
|
||||
await assertSafeDreamsPath(dreamsPath);
|
||||
const tempPath = `${dreamsPath}.${process.pid}.${Date.now()}.tmp`;
|
||||
await fs.writeFile(tempPath, content, { encoding: "utf-8", flag: "wx" });
|
||||
try {
|
||||
await fs.rename(tempPath, dreamsPath);
|
||||
} catch (err) {
|
||||
await fs.rm(tempPath, { force: true }).catch(() => {});
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
export function buildBackfillDiaryEntry(params: {
|
||||
isoDay: string;
|
||||
bodyLines: string[];
|
||||
@@ -295,10 +259,7 @@ export function buildBackfillDiaryEntry(params: {
|
||||
}): string {
|
||||
const dateStr = formatBackfillDiaryDate(params.isoDay, params.timezone);
|
||||
const marker = `<!-- ${BACKFILL_ENTRY_MARKER} day=${params.isoDay}${params.sourcePath ? ` source=${params.sourcePath}` : ""} -->`;
|
||||
const body = params.bodyLines
|
||||
.map((line) => line.trimEnd())
|
||||
.join("\n")
|
||||
.trim();
|
||||
const body = params.bodyLines.map((line) => line.trimEnd()).join("\n").trim();
|
||||
return [`*${dateStr}*`, marker, body].filter((part) => part.length > 0).join("\n\n");
|
||||
}
|
||||
|
||||
@@ -334,7 +295,7 @@ export async function writeBackfillDiaryEntries(params: {
|
||||
),
|
||||
];
|
||||
const updated = replaceDiaryContent(stripped.updated, joinDiaryBlocks(nextBlocks));
|
||||
await writeDreamsFileAtomic(dreamsPath, updated);
|
||||
await fs.writeFile(dreamsPath, updated, "utf-8");
|
||||
return {
|
||||
dreamsPath,
|
||||
written: params.entries.length,
|
||||
@@ -350,7 +311,7 @@ export async function removeBackfillDiaryEntries(params: {
|
||||
const stripped = stripBackfillDiaryBlocks(existing);
|
||||
if (stripped.removed > 0 || existing.length > 0) {
|
||||
await fs.mkdir(path.dirname(dreamsPath), { recursive: true });
|
||||
await writeDreamsFileAtomic(dreamsPath, stripped.updated);
|
||||
await fs.writeFile(dreamsPath, stripped.updated, "utf-8");
|
||||
}
|
||||
return {
|
||||
dreamsPath,
|
||||
|
||||
@@ -56,9 +56,6 @@ const REM_TIME_PREFIX_RE = /^\d{1,2}:\d{2}\s*-\s*/;
|
||||
const REM_CODE_FENCE_RE = /^\s*```/;
|
||||
const REM_TABLE_RE = /^\s*\|.*\|\s*$/;
|
||||
const REM_TABLE_DIVIDER_RE = /^\s*\|?[\s:-]+\|[\s|:-]*$/;
|
||||
const MAX_GROUNDED_REM_FILES = 512;
|
||||
const MAX_GROUNDED_REM_FILE_BYTES = 1_000_000;
|
||||
const GROUNDED_REM_SKIPPED_DIRS = new Set([".git", "node_modules"]);
|
||||
const REM_SUMMARY_FACT_LIMIT = 4;
|
||||
const REM_SUMMARY_REFLECTION_LIMIT = 4;
|
||||
const REM_SUMMARY_MEMORY_LIMIT = 3;
|
||||
@@ -608,7 +605,7 @@ function splitTopLevelClauses(text: string, delimiter: string): string[] {
|
||||
}
|
||||
|
||||
function splitSubjectLeadClaim(text: string): string[] {
|
||||
const match = /^(?<subject>.+?(?:—|–|\s-\s))\s*(?<rest>.+)$/u.exec(text);
|
||||
const match = /^(?<subject>.+?(?:—|–|-))\s*(?<rest>.+)$/u.exec(text);
|
||||
if (!match?.groups) {
|
||||
return [text];
|
||||
}
|
||||
@@ -1021,29 +1018,16 @@ function previewGroundedRemForFile(params: {
|
||||
async function collectMarkdownFiles(inputPaths: string[]): Promise<string[]> {
|
||||
const found = new Set<string>();
|
||||
async function walk(targetPath: string): Promise<void> {
|
||||
if (found.size >= MAX_GROUNDED_REM_FILES) {
|
||||
return;
|
||||
}
|
||||
const resolved = path.resolve(targetPath);
|
||||
const stat = await fs.lstat(resolved);
|
||||
if (stat.isSymbolicLink()) {
|
||||
return;
|
||||
}
|
||||
const stat = await fs.stat(resolved);
|
||||
if (stat.isDirectory()) {
|
||||
const entries = await fs.readdir(resolved, { withFileTypes: true });
|
||||
for (const entry of entries) {
|
||||
if (entry.isDirectory() && GROUNDED_REM_SKIPPED_DIRS.has(entry.name)) {
|
||||
continue;
|
||||
}
|
||||
await walk(path.join(resolved, entry.name));
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (
|
||||
stat.isFile() &&
|
||||
stat.size <= MAX_GROUNDED_REM_FILE_BYTES &&
|
||||
resolved.toLowerCase().endsWith(".md")
|
||||
) {
|
||||
if (stat.isFile() && resolved.toLowerCase().endsWith(".md")) {
|
||||
found.add(resolved);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,11 +11,9 @@ import {
|
||||
applyShortTermPromotions,
|
||||
auditShortTermPromotionArtifacts,
|
||||
isShortTermMemoryPath,
|
||||
recordGroundedShortTermCandidates,
|
||||
rankShortTermPromotionCandidates,
|
||||
recordDreamingPhaseSignals,
|
||||
recordShortTermRecalls,
|
||||
removeGroundedShortTermCandidates,
|
||||
repairShortTermPromotionArtifacts,
|
||||
resolveShortTermRecallLockPath,
|
||||
resolveShortTermPhaseSignalStorePath,
|
||||
@@ -179,128 +177,6 @@ describe("short-term promotion", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("lets grounded durable evidence satisfy default deep thresholds", async () => {
|
||||
await withTempWorkspace(async (workspaceDir) => {
|
||||
await writeDailyMemoryNote(workspaceDir, "2026-04-03", [
|
||||
'Always use "Happy Together" calendar for flights and reservations.',
|
||||
]);
|
||||
|
||||
await recordGroundedShortTermCandidates({
|
||||
workspaceDir,
|
||||
query: "__dreaming_grounded_backfill__",
|
||||
items: [
|
||||
{
|
||||
path: "memory/2026-04-03.md",
|
||||
startLine: 1,
|
||||
endLine: 1,
|
||||
snippet: 'Always use "Happy Together" calendar for flights and reservations.',
|
||||
score: 0.92,
|
||||
query: "__dreaming_grounded_backfill__:lasting-update",
|
||||
signalCount: 2,
|
||||
dayBucket: "2026-04-03",
|
||||
},
|
||||
{
|
||||
path: "memory/2026-04-03.md",
|
||||
startLine: 1,
|
||||
endLine: 1,
|
||||
snippet: 'Always use "Happy Together" calendar for flights and reservations.',
|
||||
score: 0.82,
|
||||
query: "__dreaming_grounded_backfill__:candidate",
|
||||
signalCount: 1,
|
||||
dayBucket: "2026-04-03",
|
||||
},
|
||||
],
|
||||
dedupeByQueryPerDay: true,
|
||||
nowMs: Date.parse("2026-04-03T10:00:00.000Z"),
|
||||
});
|
||||
|
||||
const ranked = await rankShortTermPromotionCandidates({
|
||||
workspaceDir,
|
||||
nowMs: Date.parse("2026-04-03T10:00:00.000Z"),
|
||||
});
|
||||
|
||||
expect(ranked).toHaveLength(1);
|
||||
expect(ranked[0]?.groundedCount).toBe(3);
|
||||
expect(ranked[0]?.uniqueQueries).toBe(2);
|
||||
expect(ranked[0]?.avgScore).toBeGreaterThan(0.85);
|
||||
|
||||
const applied = await applyShortTermPromotions({
|
||||
workspaceDir,
|
||||
candidates: ranked,
|
||||
nowMs: Date.parse("2026-04-03T10:00:00.000Z"),
|
||||
});
|
||||
|
||||
expect(applied.applied).toBe(1);
|
||||
const memory = await fs.readFile(path.join(workspaceDir, "MEMORY.md"), "utf-8");
|
||||
expect(memory).toContain('Always use "Happy Together" calendar');
|
||||
});
|
||||
});
|
||||
|
||||
it("removes grounded-only staged entries without deleting mixed live entries", async () => {
|
||||
await withTempWorkspace(async (workspaceDir) => {
|
||||
await writeDailyMemoryNote(workspaceDir, "2026-04-03", [
|
||||
"Grounded only rule.",
|
||||
"Live recall-backed rule.",
|
||||
]);
|
||||
|
||||
await recordGroundedShortTermCandidates({
|
||||
workspaceDir,
|
||||
query: "__dreaming_grounded_backfill__",
|
||||
items: [
|
||||
{
|
||||
path: "memory/2026-04-03.md",
|
||||
startLine: 1,
|
||||
endLine: 1,
|
||||
snippet: "Grounded only rule.",
|
||||
score: 0.92,
|
||||
query: "__dreaming_grounded_backfill__:lasting-update",
|
||||
signalCount: 2,
|
||||
dayBucket: "2026-04-03",
|
||||
},
|
||||
{
|
||||
path: "memory/2026-04-03.md",
|
||||
startLine: 2,
|
||||
endLine: 2,
|
||||
snippet: "Live recall-backed rule.",
|
||||
score: 0.92,
|
||||
query: "__dreaming_grounded_backfill__:lasting-update",
|
||||
signalCount: 2,
|
||||
dayBucket: "2026-04-03",
|
||||
},
|
||||
],
|
||||
dedupeByQueryPerDay: true,
|
||||
});
|
||||
await recordShortTermRecalls({
|
||||
workspaceDir,
|
||||
query: "live recall",
|
||||
results: [
|
||||
{
|
||||
path: "memory/2026-04-03.md",
|
||||
startLine: 2,
|
||||
endLine: 2,
|
||||
score: 0.87,
|
||||
snippet: "Live recall-backed rule.",
|
||||
source: "memory",
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const result = await removeGroundedShortTermCandidates({ workspaceDir });
|
||||
expect(result.removed).toBe(1);
|
||||
|
||||
const ranked = await rankShortTermPromotionCandidates({
|
||||
workspaceDir,
|
||||
minScore: 0,
|
||||
minRecallCount: 0,
|
||||
minUniqueQueries: 0,
|
||||
});
|
||||
expect(ranked).toHaveLength(1);
|
||||
expect(ranked[0]?.snippet).toContain("Live recall-backed rule");
|
||||
expect(ranked[0]?.groundedCount).toBe(2);
|
||||
expect(ranked[0]?.recallCount).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
it("rewards spaced recalls as consolidation instead of only raw count", async () => {
|
||||
await withTempWorkspace(async (workspaceDir) => {
|
||||
await recordShortTermRecalls({
|
||||
@@ -1224,7 +1100,6 @@ describe("short-term promotion", () => {
|
||||
snippet,
|
||||
recallCount: 2,
|
||||
dailyCount: 0,
|
||||
groundedCount: 0,
|
||||
totalScore: 1.8,
|
||||
maxScore: 0.95,
|
||||
firstRecalledAt: "2026-04-01T00:00:00.000Z",
|
||||
|
||||
@@ -64,7 +64,6 @@ export type ShortTermRecallEntry = {
|
||||
snippet: string;
|
||||
recallCount: number;
|
||||
dailyCount: number;
|
||||
groundedCount: number;
|
||||
totalScore: number;
|
||||
maxScore: number;
|
||||
firstRecalledAt: string;
|
||||
@@ -72,7 +71,6 @@ export type ShortTermRecallEntry = {
|
||||
queryHashes: string[];
|
||||
recallDays: string[];
|
||||
conceptTags: string[];
|
||||
claimHash?: string;
|
||||
promotedAt?: string;
|
||||
};
|
||||
|
||||
@@ -114,12 +112,10 @@ export type PromotionCandidate = {
|
||||
snippet: string;
|
||||
recallCount: number;
|
||||
dailyCount?: number;
|
||||
groundedCount?: number;
|
||||
signalCount?: number;
|
||||
avgScore: number;
|
||||
maxScore: number;
|
||||
uniqueQueries: number;
|
||||
claimHash?: string;
|
||||
promotedAt?: string;
|
||||
firstRecalledAt: string;
|
||||
lastRecalledAt: string;
|
||||
@@ -236,19 +232,13 @@ function normalizeMemoryPath(rawPath: string): string {
|
||||
return rawPath.replaceAll("\\", "/").replace(/^\.\//, "");
|
||||
}
|
||||
|
||||
function buildClaimHash(snippet: string): string {
|
||||
return createHash("sha1").update(normalizeSnippet(snippet)).digest("hex").slice(0, 12);
|
||||
}
|
||||
|
||||
function buildEntryKey(result: {
|
||||
path: string;
|
||||
startLine: number;
|
||||
endLine: number;
|
||||
source: string;
|
||||
claimHash?: string;
|
||||
}): string {
|
||||
const base = `${result.source}:${normalizeMemoryPath(result.path)}:${result.startLine}:${result.endLine}`;
|
||||
return result.claimHash ? `${base}:${result.claimHash}` : base;
|
||||
return `${result.source}:${normalizeMemoryPath(result.path)}:${result.startLine}:${result.endLine}`;
|
||||
}
|
||||
|
||||
function hashQuery(query: string): string {
|
||||
@@ -325,18 +315,6 @@ function normalizeDistinctStrings(values: unknown[], limit: number): string[] {
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function totalSignalCountForEntry(entry: {
|
||||
recallCount?: number;
|
||||
dailyCount?: number;
|
||||
groundedCount?: number;
|
||||
}): number {
|
||||
return (
|
||||
Math.max(0, Math.floor(entry.recallCount ?? 0)) +
|
||||
Math.max(0, Math.floor(entry.dailyCount ?? 0)) +
|
||||
Math.max(0, Math.floor(entry.groundedCount ?? 0))
|
||||
);
|
||||
}
|
||||
|
||||
function calculateConsolidationComponent(recallDays: string[]): number {
|
||||
if (recallDays.length === 0) {
|
||||
return 0;
|
||||
@@ -393,7 +371,6 @@ function normalizeStore(raw: unknown, nowIso: string): ShortTermRecallStore {
|
||||
|
||||
const recallCount = Math.max(0, Math.floor(Number(entry.recallCount) || 0));
|
||||
const dailyCount = Math.max(0, Math.floor(Number(entry.dailyCount) || 0));
|
||||
const groundedCount = Math.max(0, Math.floor(Number(entry.groundedCount) || 0));
|
||||
const totalScore = Math.max(0, Number(entry.totalScore) || 0);
|
||||
const maxScore = clampScore(Number(entry.maxScore) || 0);
|
||||
const firstRecalledAt =
|
||||
@@ -401,10 +378,6 @@ function normalizeStore(raw: unknown, nowIso: string): ShortTermRecallStore {
|
||||
const lastRecalledAt =
|
||||
typeof entry.lastRecalledAt === "string" ? entry.lastRecalledAt : nowIso;
|
||||
const promotedAt = typeof entry.promotedAt === "string" ? entry.promotedAt : undefined;
|
||||
const claimHash =
|
||||
typeof entry.claimHash === "string" && entry.claimHash.trim().length > 0
|
||||
? entry.claimHash.trim()
|
||||
: undefined;
|
||||
const snippet = typeof entry.snippet === "string" ? normalizeSnippet(entry.snippet) : "";
|
||||
const queryHashes = Array.isArray(entry.queryHashes)
|
||||
? normalizeDistinctStrings(entry.queryHashes, MAX_QUERY_HASHES)
|
||||
@@ -423,8 +396,7 @@ function normalizeStore(raw: unknown, nowIso: string): ShortTermRecallStore {
|
||||
)
|
||||
: deriveConceptTags({ path: entryPath, snippet });
|
||||
|
||||
const normalizedKey =
|
||||
key || buildEntryKey({ path: entryPath, startLine, endLine, source, claimHash });
|
||||
const normalizedKey = key || buildEntryKey({ path: entryPath, startLine, endLine, source });
|
||||
entries[normalizedKey] = {
|
||||
key: normalizedKey,
|
||||
path: entryPath,
|
||||
@@ -434,7 +406,6 @@ function normalizeStore(raw: unknown, nowIso: string): ShortTermRecallStore {
|
||||
snippet,
|
||||
recallCount,
|
||||
dailyCount,
|
||||
groundedCount,
|
||||
totalScore,
|
||||
maxScore,
|
||||
firstRecalledAt,
|
||||
@@ -442,7 +413,6 @@ function normalizeStore(raw: unknown, nowIso: string): ShortTermRecallStore {
|
||||
queryHashes,
|
||||
recallDays: recallDays.slice(-MAX_RECALL_DAYS),
|
||||
conceptTags,
|
||||
...(claimHash ? { claimHash } : {}),
|
||||
...(promotedAt ? { promotedAt } : {}),
|
||||
};
|
||||
}
|
||||
@@ -598,7 +568,7 @@ function isProcessLikelyAlive(pid: number): boolean {
|
||||
process.kill(pid, 0);
|
||||
return true;
|
||||
} catch (err) {
|
||||
const code = (err as NodeJS.ErrnoException).code;
|
||||
const code = (err as NodeJS.ErrnoException | undefined)?.code;
|
||||
if (code === "ESRCH") {
|
||||
return false;
|
||||
}
|
||||
@@ -651,8 +621,9 @@ async function withShortTermLock<T>(workspaceDir: string, task: () => Promise<T>
|
||||
const startedAt = Date.now();
|
||||
|
||||
while (true) {
|
||||
let lockHandle: Awaited<ReturnType<typeof fs.open>> | undefined;
|
||||
try {
|
||||
const lockHandle = await fs.open(lockPath, "wx");
|
||||
lockHandle = await fs.open(lockPath, "wx");
|
||||
await lockHandle
|
||||
.writeFile(`${process.pid}:${Date.now()}\n`, "utf-8")
|
||||
.catch(() => undefined);
|
||||
@@ -841,21 +812,10 @@ export async function recordShortTermRecalls(params: {
|
||||
const store = await readStore(workspaceDir, nowIso);
|
||||
|
||||
for (const result of relevant) {
|
||||
const key = buildEntryKey(result);
|
||||
const normalizedPath = normalizeMemoryPath(result.path);
|
||||
const snippet = normalizeSnippet(result.snippet);
|
||||
const claimHash = snippet ? buildClaimHash(snippet) : undefined;
|
||||
const groundedKey = claimHash
|
||||
? buildEntryKey({
|
||||
path: normalizedPath,
|
||||
startLine: Math.max(1, Math.floor(result.startLine)),
|
||||
endLine: Math.max(1, Math.floor(result.endLine)),
|
||||
source: "memory",
|
||||
claimHash,
|
||||
})
|
||||
: null;
|
||||
const baseKey = buildEntryKey(result);
|
||||
const key = groundedKey && store.entries[groundedKey] ? groundedKey : baseKey;
|
||||
const existing = store.entries[key];
|
||||
const snippet = normalizeSnippet(result.snippet);
|
||||
const score = clampScore(result.score);
|
||||
const recallDaysBase = existing?.recallDays ?? [];
|
||||
const queryHashesBase = existing?.queryHashes ?? [];
|
||||
@@ -886,7 +846,6 @@ export async function recordShortTermRecalls(params: {
|
||||
snippet: snippet || existing?.snippet || "",
|
||||
recallCount,
|
||||
dailyCount,
|
||||
groundedCount: Math.max(0, Math.floor(existing?.groundedCount ?? 0)),
|
||||
totalScore,
|
||||
maxScore,
|
||||
firstRecalledAt: existing?.firstRecalledAt ?? nowIso,
|
||||
@@ -894,7 +853,6 @@ export async function recordShortTermRecalls(params: {
|
||||
queryHashes,
|
||||
recallDays,
|
||||
conceptTags: conceptTags.length > 0 ? conceptTags : (existing?.conceptTags ?? []),
|
||||
...(existing?.claimHash ? { claimHash: existing.claimHash } : {}),
|
||||
...(existing?.promotedAt ? { promotedAt: existing.promotedAt } : {}),
|
||||
};
|
||||
}
|
||||
@@ -916,129 +874,6 @@ export async function recordShortTermRecalls(params: {
|
||||
});
|
||||
}
|
||||
|
||||
export async function recordGroundedShortTermCandidates(params: {
|
||||
workspaceDir?: string;
|
||||
query: string;
|
||||
items: Array<{
|
||||
path: string;
|
||||
startLine: number;
|
||||
endLine: number;
|
||||
snippet: string;
|
||||
score: number;
|
||||
query?: string;
|
||||
signalCount?: number;
|
||||
dayBucket?: string;
|
||||
}>;
|
||||
dedupeByQueryPerDay?: boolean;
|
||||
dayBucket?: string;
|
||||
nowMs?: number;
|
||||
timezone?: string;
|
||||
}): Promise<void> {
|
||||
const workspaceDir = params.workspaceDir?.trim();
|
||||
if (!workspaceDir) {
|
||||
return;
|
||||
}
|
||||
const query = params.query.trim();
|
||||
if (!query) {
|
||||
return;
|
||||
}
|
||||
const relevant = params.items
|
||||
.map((item) => {
|
||||
const snippet = normalizeSnippet(item.snippet);
|
||||
const normalizedPath = normalizeMemoryPath(item.path);
|
||||
if (
|
||||
!snippet ||
|
||||
!normalizedPath ||
|
||||
!isShortTermMemoryPath(normalizedPath) ||
|
||||
!Number.isFinite(item.startLine) ||
|
||||
!Number.isFinite(item.endLine)
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
path: normalizedPath,
|
||||
startLine: Math.max(1, Math.floor(item.startLine)),
|
||||
endLine: Math.max(1, Math.floor(item.endLine)),
|
||||
snippet,
|
||||
score: clampScore(item.score),
|
||||
query: normalizeSnippet(item.query ?? query),
|
||||
signalCount: Math.max(1, Math.floor(item.signalCount ?? 1)),
|
||||
dayBucket: normalizeIsoDay(item.dayBucket ?? params.dayBucket ?? ""),
|
||||
};
|
||||
})
|
||||
.filter((item): item is NonNullable<typeof item> => item !== null);
|
||||
if (relevant.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const nowMs = Number.isFinite(params.nowMs) ? (params.nowMs as number) : Date.now();
|
||||
const nowIso = new Date(nowMs).toISOString();
|
||||
const fallbackDayBucket = formatMemoryDreamingDay(nowMs, params.timezone);
|
||||
await withShortTermLock(workspaceDir, async () => {
|
||||
const store = await readStore(workspaceDir, nowIso);
|
||||
|
||||
for (const item of relevant) {
|
||||
const dayBucket = item.dayBucket ?? fallbackDayBucket;
|
||||
const effectiveQuery = item.query || query;
|
||||
if (!effectiveQuery) {
|
||||
continue;
|
||||
}
|
||||
const queryHash = hashQuery(effectiveQuery);
|
||||
const claimHash = buildClaimHash(item.snippet);
|
||||
const key = buildEntryKey({
|
||||
path: item.path,
|
||||
startLine: item.startLine,
|
||||
endLine: item.endLine,
|
||||
source: "memory",
|
||||
claimHash,
|
||||
});
|
||||
const existing = store.entries[key];
|
||||
const recallDaysBase = existing?.recallDays ?? [];
|
||||
const queryHashesBase = existing?.queryHashes ?? [];
|
||||
const dedupeSignal =
|
||||
Boolean(params.dedupeByQueryPerDay) &&
|
||||
queryHashesBase.includes(queryHash) &&
|
||||
recallDaysBase.includes(dayBucket);
|
||||
const groundedCount = Math.max(
|
||||
0,
|
||||
Math.floor(existing?.groundedCount ?? 0) + (dedupeSignal ? 0 : item.signalCount),
|
||||
);
|
||||
const totalScore = Math.max(
|
||||
0,
|
||||
(existing?.totalScore ?? 0) + (dedupeSignal ? 0 : item.score * item.signalCount),
|
||||
);
|
||||
const maxScore = Math.max(existing?.maxScore ?? 0, dedupeSignal ? 0 : item.score);
|
||||
const queryHashes = mergeQueryHashes(existing?.queryHashes ?? [], queryHash);
|
||||
const recallDays = mergeRecentDistinct(recallDaysBase, dayBucket, MAX_RECALL_DAYS);
|
||||
const conceptTags = deriveConceptTags({ path: item.path, snippet: item.snippet });
|
||||
|
||||
store.entries[key] = {
|
||||
key,
|
||||
path: item.path,
|
||||
startLine: item.startLine,
|
||||
endLine: item.endLine,
|
||||
source: "memory",
|
||||
snippet: item.snippet,
|
||||
recallCount: Math.max(0, Math.floor(existing?.recallCount ?? 0)),
|
||||
dailyCount: Math.max(0, Math.floor(existing?.dailyCount ?? 0)),
|
||||
groundedCount,
|
||||
totalScore,
|
||||
maxScore,
|
||||
firstRecalledAt: existing?.firstRecalledAt ?? nowIso,
|
||||
lastRecalledAt: nowIso,
|
||||
queryHashes,
|
||||
recallDays,
|
||||
conceptTags: conceptTags.length > 0 ? conceptTags : (existing?.conceptTags ?? []),
|
||||
claimHash,
|
||||
...(existing?.promotedAt ? { promotedAt: existing.promotedAt } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
store.updatedAt = nowIso;
|
||||
await writeStore(workspaceDir, store);
|
||||
});
|
||||
}
|
||||
|
||||
export async function recordDreamingPhaseSignals(params: {
|
||||
workspaceDir?: string;
|
||||
phase: "light" | "rem";
|
||||
@@ -1135,8 +970,7 @@ export async function rankShortTermPromotionCandidates(
|
||||
}
|
||||
const recallCount = Math.max(0, Math.floor(entry.recallCount ?? 0));
|
||||
const dailyCount = Math.max(0, Math.floor(entry.dailyCount ?? 0));
|
||||
const groundedCount = Math.max(0, Math.floor(entry.groundedCount ?? 0));
|
||||
const signalCount = totalSignalCountForEntry(entry);
|
||||
const signalCount = recallCount + dailyCount;
|
||||
if (signalCount <= 0) {
|
||||
continue;
|
||||
}
|
||||
@@ -1162,10 +996,7 @@ export async function rankShortTermPromotionCandidates(
|
||||
const recency = clampScore(calculateRecencyComponent(ageDays, halfLifeDays));
|
||||
const recallDays = entry.recallDays ?? [];
|
||||
const conceptTags = entry.conceptTags ?? [];
|
||||
const consolidation = Math.max(
|
||||
calculateConsolidationComponent(recallDays),
|
||||
clampScore(groundedCount / 3),
|
||||
);
|
||||
const consolidation = calculateConsolidationComponent(recallDays);
|
||||
const conceptual = calculateConceptualComponent(conceptTags);
|
||||
|
||||
const phaseBoost = calculatePhaseSignalBoost(phaseSignals.entries[entry.key], nowMs);
|
||||
@@ -1191,12 +1022,10 @@ export async function rankShortTermPromotionCandidates(
|
||||
snippet: entry.snippet,
|
||||
recallCount,
|
||||
dailyCount,
|
||||
groundedCount,
|
||||
signalCount,
|
||||
avgScore,
|
||||
maxScore: clampScore(entry.maxScore),
|
||||
uniqueQueries,
|
||||
...(entry.claimHash ? { claimHash: entry.claimHash } : {}),
|
||||
promotedAt: entry.promotedAt,
|
||||
firstRecalledAt: entry.firstRecalledAt,
|
||||
lastRecalledAt: entry.lastRecalledAt,
|
||||
@@ -1471,15 +1300,9 @@ export async function applyShortTermPromotions(
|
||||
if (candidate.score < minScore) {
|
||||
return false;
|
||||
}
|
||||
const candidateSignalCount = Math.max(
|
||||
0,
|
||||
const candidateSignalCount =
|
||||
candidate.signalCount ??
|
||||
totalSignalCountForEntry({
|
||||
recallCount: candidate.recallCount,
|
||||
dailyCount: candidate.dailyCount,
|
||||
groundedCount: candidate.groundedCount,
|
||||
}),
|
||||
);
|
||||
Math.max(0, candidate.recallCount) + Math.max(0, candidate.dailyCount ?? 0);
|
||||
if (candidateSignalCount < minRecallCount) {
|
||||
return false;
|
||||
}
|
||||
@@ -1783,10 +1606,6 @@ export async function repairShortTermPromotionArtifacts(params: {
|
||||
0,
|
||||
Math.floor((entry as { dailyCount?: number }).dailyCount ?? 0),
|
||||
),
|
||||
groundedCount: Math.max(
|
||||
0,
|
||||
Math.floor((entry as { groundedCount?: number }).groundedCount ?? 0),
|
||||
),
|
||||
queryHashes: (entry.queryHashes ?? []).slice(-MAX_QUERY_HASHES),
|
||||
recallDays: mergeRecentDistinct(entry.recallDays ?? [], fallbackDay, MAX_RECALL_DAYS),
|
||||
conceptTags: conceptTags.length > 0 ? conceptTags : (entry.conceptTags ?? []),
|
||||
@@ -1822,50 +1641,6 @@ export async function repairShortTermPromotionArtifacts(params: {
|
||||
};
|
||||
}
|
||||
|
||||
export async function removeGroundedShortTermCandidates(params: {
|
||||
workspaceDir: string;
|
||||
}): Promise<{ removed: number; storePath: string }> {
|
||||
const workspaceDir = params.workspaceDir.trim();
|
||||
const storePath = resolveStorePath(workspaceDir);
|
||||
const nowIso = new Date().toISOString();
|
||||
let removed = 0;
|
||||
|
||||
await withShortTermLock(workspaceDir, async () => {
|
||||
const [store, phaseSignals] = await Promise.all([
|
||||
readStore(workspaceDir, nowIso),
|
||||
readPhaseSignalStore(workspaceDir, nowIso),
|
||||
]);
|
||||
|
||||
for (const [key, entry] of Object.entries(store.entries)) {
|
||||
if (
|
||||
Math.max(0, Math.floor(entry.groundedCount ?? 0)) > 0 &&
|
||||
Math.max(0, Math.floor(entry.recallCount ?? 0)) === 0 &&
|
||||
Math.max(0, Math.floor(entry.dailyCount ?? 0)) === 0
|
||||
) {
|
||||
delete store.entries[key];
|
||||
removed += 1;
|
||||
}
|
||||
}
|
||||
|
||||
for (const key of Object.keys(phaseSignals.entries)) {
|
||||
if (!Object.hasOwn(store.entries, key)) {
|
||||
delete phaseSignals.entries[key];
|
||||
}
|
||||
}
|
||||
|
||||
if (removed > 0) {
|
||||
store.updatedAt = nowIso;
|
||||
phaseSignals.updatedAt = nowIso;
|
||||
await Promise.all([
|
||||
writeStore(workspaceDir, store),
|
||||
writePhaseSignalStore(workspaceDir, phaseSignals),
|
||||
]);
|
||||
}
|
||||
});
|
||||
|
||||
return { removed, storePath };
|
||||
}
|
||||
|
||||
export const __testing = {
|
||||
parseLockOwnerPid,
|
||||
canStealStaleLock,
|
||||
@@ -1873,6 +1648,4 @@ export const __testing = {
|
||||
deriveConceptTags,
|
||||
calculateConsolidationComponent,
|
||||
calculatePhaseSignalBoost,
|
||||
buildClaimHash,
|
||||
totalSignalCountForEntry,
|
||||
};
|
||||
|
||||
@@ -22,7 +22,7 @@ export const secretTargetRegistryEntries = [
|
||||
|
||||
export function collectRuntimeConfigAssignments(params: {
|
||||
config: { channels?: Record<string, unknown> };
|
||||
defaults?: SecretDefaults;
|
||||
defaults: SecretDefaults | undefined;
|
||||
context: ResolverContext;
|
||||
}): void {
|
||||
const msteams = getChannelRecord(params.config, "msteams");
|
||||
|
||||
@@ -57,7 +57,7 @@ export const secretTargetRegistryEntries = [
|
||||
|
||||
export function collectRuntimeConfigAssignments(params: {
|
||||
config: { channels?: Record<string, unknown> };
|
||||
defaults?: SecretDefaults;
|
||||
defaults: SecretDefaults | undefined;
|
||||
context: ResolverContext;
|
||||
}): void {
|
||||
const resolved = getChannelSurface(params.config, "nextcloud-talk");
|
||||
|
||||
@@ -445,111 +445,4 @@ describe("ollama plugin", () => {
|
||||
expect(payloadSeen?.think).toBe(false);
|
||||
expect((payloadSeen?.options as Record<string, unknown> | undefined)?.think).toBeUndefined();
|
||||
});
|
||||
|
||||
it("wraps native Ollama payloads with top-level think=true when thinking is enabled", () => {
|
||||
const provider = registerProvider();
|
||||
let payloadSeen: Record<string, unknown> | undefined;
|
||||
const baseStreamFn = vi.fn((_model, _context, options) => {
|
||||
const payload: Record<string, unknown> = {
|
||||
messages: [],
|
||||
options: { num_ctx: 65536 },
|
||||
stream: true,
|
||||
};
|
||||
options?.onPayload?.(payload, _model);
|
||||
payloadSeen = payload;
|
||||
return {} as never;
|
||||
});
|
||||
|
||||
const wrapped = provider.wrapStreamFn?.({
|
||||
config: {
|
||||
models: {
|
||||
providers: {
|
||||
ollama: {
|
||||
api: "ollama",
|
||||
baseUrl: "http://127.0.0.1:11434",
|
||||
models: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
provider: "ollama",
|
||||
modelId: "qwen3.5:9b",
|
||||
thinkingLevel: "low",
|
||||
model: {
|
||||
api: "ollama",
|
||||
provider: "ollama",
|
||||
id: "qwen3.5:9b",
|
||||
baseUrl: "http://127.0.0.1:11434",
|
||||
contextWindow: 131_072,
|
||||
},
|
||||
streamFn: baseStreamFn,
|
||||
});
|
||||
|
||||
expect(typeof wrapped).toBe("function");
|
||||
void wrapped?.(
|
||||
{
|
||||
api: "ollama",
|
||||
provider: "ollama",
|
||||
id: "qwen3.5:9b",
|
||||
} as never,
|
||||
{} as never,
|
||||
{},
|
||||
);
|
||||
expect(baseStreamFn).toHaveBeenCalledTimes(1);
|
||||
expect(payloadSeen?.think).toBe(true);
|
||||
expect((payloadSeen?.options as Record<string, unknown> | undefined)?.think).toBeUndefined();
|
||||
});
|
||||
|
||||
it("does not set think param when thinkingLevel is undefined", () => {
|
||||
const provider = registerProvider();
|
||||
let payloadSeen: Record<string, unknown> | undefined;
|
||||
const baseStreamFn = vi.fn((_model, _context, options) => {
|
||||
const payload: Record<string, unknown> = {
|
||||
messages: [],
|
||||
options: { num_ctx: 65536 },
|
||||
stream: true,
|
||||
};
|
||||
options?.onPayload?.(payload, _model);
|
||||
payloadSeen = payload;
|
||||
return {} as never;
|
||||
});
|
||||
|
||||
const wrapped = provider.wrapStreamFn?.({
|
||||
config: {
|
||||
models: {
|
||||
providers: {
|
||||
ollama: {
|
||||
api: "ollama",
|
||||
baseUrl: "http://127.0.0.1:11434",
|
||||
models: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
provider: "ollama",
|
||||
modelId: "qwen3.5:9b",
|
||||
thinkingLevel: undefined,
|
||||
model: {
|
||||
api: "ollama",
|
||||
provider: "ollama",
|
||||
id: "qwen3.5:9b",
|
||||
baseUrl: "http://127.0.0.1:11434",
|
||||
contextWindow: 131_072,
|
||||
},
|
||||
streamFn: baseStreamFn,
|
||||
});
|
||||
|
||||
expect(typeof wrapped).toBe("function");
|
||||
void wrapped?.(
|
||||
{
|
||||
api: "ollama",
|
||||
provider: "ollama",
|
||||
id: "qwen3.5:9b",
|
||||
} as never,
|
||||
{} as never,
|
||||
{},
|
||||
);
|
||||
expect(baseStreamFn).toHaveBeenCalledTimes(1);
|
||||
expect(payloadSeen?.think).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
"id": "ollama",
|
||||
"enabledByDefault": true,
|
||||
"providers": ["ollama"],
|
||||
"providerDiscoveryEntry": "./provider-discovery.ts",
|
||||
"providerAuthEnvVars": {
|
||||
"ollama": ["OLLAMA_API_KEY"]
|
||||
},
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
const repoRoot = path.resolve(import.meta.dirname, "../..");
|
||||
|
||||
function readPluginSource(relativePath: string): string {
|
||||
return fs.readFileSync(path.join(repoRoot, relativePath), "utf8");
|
||||
}
|
||||
|
||||
describe("ollama provider discovery import surface", () => {
|
||||
it("stays off the full provider runtime graph", () => {
|
||||
const source = readPluginSource("extensions/ollama/provider-discovery.ts");
|
||||
|
||||
for (const forbidden of [
|
||||
"./index",
|
||||
"./api",
|
||||
"./runtime-api",
|
||||
"./src/setup",
|
||||
"./src/stream",
|
||||
"./src/embedding-provider",
|
||||
"./src/memory-embedding-adapter",
|
||||
"./src/web-search-provider",
|
||||
"openclaw/plugin-sdk/text-runtime",
|
||||
"openclaw/plugin-sdk/plugin-entry",
|
||||
]) {
|
||||
expect(source, `provider discovery must not import ${forbidden}`).not.toContain(forbidden);
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -1,199 +0,0 @@
|
||||
import type { ProviderCatalogContext } from "openclaw/plugin-sdk/provider-catalog-shared";
|
||||
import type { ModelProviderConfig } from "openclaw/plugin-sdk/provider-model-shared";
|
||||
import { OLLAMA_DEFAULT_BASE_URL } from "./src/defaults.js";
|
||||
import {
|
||||
buildOllamaModelDefinition,
|
||||
enrichOllamaModelsWithContext,
|
||||
fetchOllamaModels,
|
||||
resolveOllamaApiBase,
|
||||
} from "./src/provider-models.js";
|
||||
|
||||
const PROVIDER_ID = "ollama";
|
||||
const DEFAULT_API_KEY = "ollama-local";
|
||||
const OLLAMA_CONTEXT_ENRICH_LIMIT = 200;
|
||||
|
||||
type OllamaPluginConfig = {
|
||||
discovery?: {
|
||||
enabled?: boolean;
|
||||
};
|
||||
};
|
||||
|
||||
type OllamaProviderLikeConfig = ModelProviderConfig;
|
||||
type OllamaProviderPlugin = {
|
||||
id: string;
|
||||
label: string;
|
||||
docsPath: string;
|
||||
envVars: string[];
|
||||
auth: [];
|
||||
discovery: {
|
||||
order: "late";
|
||||
run: (ctx: ProviderCatalogContext) => ReturnType<typeof runOllamaDiscovery>;
|
||||
};
|
||||
};
|
||||
|
||||
function normalizeOptionalString(value: unknown): string | undefined {
|
||||
return typeof value === "string" && value.trim() ? value.trim() : undefined;
|
||||
}
|
||||
|
||||
function readStringValue(value: unknown): string | undefined {
|
||||
if (typeof value === "string") {
|
||||
return normalizeOptionalString(value);
|
||||
}
|
||||
if (value && typeof value === "object" && "value" in value) {
|
||||
return normalizeOptionalString((value as { value?: unknown }).value);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function resolveOllamaDiscoveryApiKey(params: {
|
||||
env: NodeJS.ProcessEnv;
|
||||
explicitApiKey?: string;
|
||||
resolvedApiKey?: string;
|
||||
}): string {
|
||||
const envApiKey = params.env.OLLAMA_API_KEY?.trim() ? "OLLAMA_API_KEY" : undefined;
|
||||
return envApiKey ?? params.explicitApiKey ?? params.resolvedApiKey ?? DEFAULT_API_KEY;
|
||||
}
|
||||
|
||||
function shouldSkipAmbientOllamaDiscovery(env: NodeJS.ProcessEnv): boolean {
|
||||
return Boolean(env.VITEST) || env.NODE_ENV === "test";
|
||||
}
|
||||
|
||||
function hasMeaningfulExplicitOllamaConfig(
|
||||
providerConfig: OllamaProviderLikeConfig | undefined,
|
||||
): boolean {
|
||||
if (!providerConfig) {
|
||||
return false;
|
||||
}
|
||||
if (Array.isArray(providerConfig.models) && providerConfig.models.length > 0) {
|
||||
return true;
|
||||
}
|
||||
if (typeof providerConfig.baseUrl === "string" && providerConfig.baseUrl.trim()) {
|
||||
return resolveOllamaApiBase(providerConfig.baseUrl) !== OLLAMA_DEFAULT_BASE_URL;
|
||||
}
|
||||
if (readStringValue(providerConfig.apiKey)) {
|
||||
return true;
|
||||
}
|
||||
if (providerConfig.auth) {
|
||||
return true;
|
||||
}
|
||||
if (typeof providerConfig.authHeader === "boolean") {
|
||||
return true;
|
||||
}
|
||||
if (
|
||||
providerConfig.headers &&
|
||||
typeof providerConfig.headers === "object" &&
|
||||
Object.keys(providerConfig.headers).length > 0
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
if (providerConfig.request) {
|
||||
return true;
|
||||
}
|
||||
if (typeof providerConfig.injectNumCtxForOpenAICompat === "boolean") {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
async function buildOllamaProvider(
|
||||
configuredBaseUrl?: string,
|
||||
opts?: { quiet?: boolean },
|
||||
): Promise<ModelProviderConfig> {
|
||||
const apiBase = resolveOllamaApiBase(configuredBaseUrl);
|
||||
const { reachable, models } = await fetchOllamaModels(apiBase);
|
||||
if (!reachable && !opts?.quiet) {
|
||||
console.warn(`Ollama could not be reached at ${apiBase}.`);
|
||||
}
|
||||
const discovered = await enrichOllamaModelsWithContext(
|
||||
apiBase,
|
||||
models.slice(0, OLLAMA_CONTEXT_ENRICH_LIMIT),
|
||||
);
|
||||
return {
|
||||
baseUrl: apiBase,
|
||||
api: "ollama",
|
||||
models: discovered.map((model) =>
|
||||
buildOllamaModelDefinition(model.name, model.contextWindow, model.capabilities),
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
function resolveOllamaPluginConfig(ctx: ProviderCatalogContext): OllamaPluginConfig {
|
||||
const entries = (ctx.config.plugins?.entries ?? {}) as Record<
|
||||
string,
|
||||
{ config?: OllamaPluginConfig }
|
||||
>;
|
||||
return entries.ollama?.config ?? {};
|
||||
}
|
||||
|
||||
async function runOllamaDiscovery(ctx: ProviderCatalogContext) {
|
||||
const pluginConfig = resolveOllamaPluginConfig(ctx);
|
||||
const explicit = ctx.config.models?.providers?.ollama;
|
||||
const hasExplicitModels = Array.isArray(explicit?.models) && explicit.models.length > 0;
|
||||
const hasMeaningfulExplicitConfig = hasMeaningfulExplicitOllamaConfig(explicit);
|
||||
const discoveryEnabled =
|
||||
pluginConfig.discovery?.enabled ?? ctx.config.models?.ollamaDiscovery?.enabled;
|
||||
if (!hasExplicitModels && discoveryEnabled === false) {
|
||||
return null;
|
||||
}
|
||||
const ollamaKey = ctx.resolveProviderApiKey(PROVIDER_ID).apiKey;
|
||||
const hasRealOllamaKey =
|
||||
typeof ollamaKey === "string" &&
|
||||
ollamaKey.trim().length > 0 &&
|
||||
ollamaKey.trim() !== DEFAULT_API_KEY;
|
||||
const explicitApiKey = readStringValue(explicit?.apiKey);
|
||||
if (hasExplicitModels && explicit) {
|
||||
return {
|
||||
provider: {
|
||||
...explicit,
|
||||
baseUrl:
|
||||
typeof explicit.baseUrl === "string" && explicit.baseUrl.trim()
|
||||
? resolveOllamaApiBase(explicit.baseUrl)
|
||||
: OLLAMA_DEFAULT_BASE_URL,
|
||||
api: explicit.api ?? "ollama",
|
||||
apiKey: resolveOllamaDiscoveryApiKey({
|
||||
env: ctx.env,
|
||||
explicitApiKey,
|
||||
resolvedApiKey: ollamaKey,
|
||||
}),
|
||||
},
|
||||
};
|
||||
}
|
||||
if (
|
||||
!hasRealOllamaKey &&
|
||||
!hasMeaningfulExplicitConfig &&
|
||||
shouldSkipAmbientOllamaDiscovery(ctx.env)
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const provider = await buildOllamaProvider(explicit?.baseUrl, {
|
||||
quiet: !hasRealOllamaKey && !hasMeaningfulExplicitConfig,
|
||||
});
|
||||
if (provider.models?.length === 0 && !ollamaKey && !explicit?.apiKey) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
provider: {
|
||||
...provider,
|
||||
apiKey: resolveOllamaDiscoveryApiKey({
|
||||
env: ctx.env,
|
||||
explicitApiKey,
|
||||
resolvedApiKey: ollamaKey,
|
||||
}),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export const ollamaProviderDiscovery: OllamaProviderPlugin = {
|
||||
id: PROVIDER_ID,
|
||||
label: "Ollama",
|
||||
docsPath: "/providers/ollama",
|
||||
envVars: ["OLLAMA_API_KEY"],
|
||||
auth: [],
|
||||
discovery: {
|
||||
order: "late",
|
||||
run: runOllamaDiscovery,
|
||||
},
|
||||
};
|
||||
|
||||
export default ollamaProviderDiscovery;
|
||||
@@ -1,228 +0,0 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { buildAssistantMessage, createOllamaStreamFn } from "./stream.js";
|
||||
|
||||
function makeOllamaResponse(params: {
|
||||
content?: string;
|
||||
thinking?: string;
|
||||
reasoning?: string;
|
||||
tool_calls?: Array<{ function: { name: string; arguments: Record<string, unknown> } }>;
|
||||
}) {
|
||||
return {
|
||||
model: "qwen3.5",
|
||||
created_at: new Date().toISOString(),
|
||||
message: {
|
||||
role: "assistant" as const,
|
||||
content: params.content ?? "",
|
||||
...(params.thinking != null ? { thinking: params.thinking } : {}),
|
||||
...(params.reasoning != null ? { reasoning: params.reasoning } : {}),
|
||||
...(params.tool_calls ? { tool_calls: params.tool_calls } : {}),
|
||||
},
|
||||
done: true,
|
||||
prompt_eval_count: 100,
|
||||
eval_count: 50,
|
||||
};
|
||||
}
|
||||
|
||||
const MODEL_INFO = { api: "ollama", provider: "ollama", id: "qwen3.5" };
|
||||
|
||||
describe("buildAssistantMessage", () => {
|
||||
it("includes thinking block when response has thinking field", () => {
|
||||
const response = makeOllamaResponse({
|
||||
thinking: "Let me think about this",
|
||||
content: "The answer is 42",
|
||||
});
|
||||
const msg = buildAssistantMessage(response, MODEL_INFO);
|
||||
expect(msg.content).toHaveLength(2);
|
||||
expect(msg.content[0]).toEqual({ type: "thinking", thinking: "Let me think about this" });
|
||||
expect(msg.content[1]).toEqual({ type: "text", text: "The answer is 42" });
|
||||
});
|
||||
|
||||
it("includes thinking block when response has reasoning field", () => {
|
||||
const response = makeOllamaResponse({
|
||||
reasoning: "Step by step analysis",
|
||||
content: "Result is 7",
|
||||
});
|
||||
const msg = buildAssistantMessage(response, MODEL_INFO);
|
||||
expect(msg.content).toHaveLength(2);
|
||||
expect(msg.content[0]).toEqual({ type: "thinking", thinking: "Step by step analysis" });
|
||||
expect(msg.content[1]).toEqual({ type: "text", text: "Result is 7" });
|
||||
});
|
||||
|
||||
it("prefers thinking over reasoning when both are present", () => {
|
||||
const response = makeOllamaResponse({
|
||||
thinking: "From thinking field",
|
||||
reasoning: "From reasoning field",
|
||||
content: "Answer",
|
||||
});
|
||||
const msg = buildAssistantMessage(response, MODEL_INFO);
|
||||
expect(msg.content[0]).toEqual({ type: "thinking", thinking: "From thinking field" });
|
||||
});
|
||||
|
||||
it("omits thinking block when no thinking or reasoning field", () => {
|
||||
const response = makeOllamaResponse({
|
||||
content: "Just text",
|
||||
});
|
||||
const msg = buildAssistantMessage(response, MODEL_INFO);
|
||||
expect(msg.content).toHaveLength(1);
|
||||
expect(msg.content[0]).toEqual({ type: "text", text: "Just text" });
|
||||
});
|
||||
|
||||
it("omits thinking block when thinking field is empty", () => {
|
||||
const response = makeOllamaResponse({
|
||||
thinking: "",
|
||||
content: "Just text",
|
||||
});
|
||||
const msg = buildAssistantMessage(response, MODEL_INFO);
|
||||
expect(msg.content).toHaveLength(1);
|
||||
expect(msg.content[0]).toEqual({ type: "text", text: "Just text" });
|
||||
});
|
||||
});
|
||||
|
||||
describe("createOllamaStreamFn thinking events", () => {
|
||||
afterEach(() => vi.unstubAllGlobals());
|
||||
|
||||
function makeNdjsonBody(chunks: Array<Record<string, unknown>>): ReadableStream<Uint8Array> {
|
||||
const encoder = new TextEncoder();
|
||||
const lines = chunks.map((c) => JSON.stringify(c) + "\n").join("");
|
||||
return new ReadableStream({
|
||||
start(controller) {
|
||||
controller.enqueue(encoder.encode(lines));
|
||||
controller.close();
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
it("emits thinking_start, thinking_delta, and thinking_end events for thinking content", async () => {
|
||||
const thinkingChunks = [
|
||||
{
|
||||
model: "qwen3.5",
|
||||
created_at: "2026-01-01T00:00:00Z",
|
||||
message: { role: "assistant", content: "", thinking: "Step 1" },
|
||||
done: false,
|
||||
},
|
||||
{
|
||||
model: "qwen3.5",
|
||||
created_at: "2026-01-01T00:00:01Z",
|
||||
message: { role: "assistant", content: "", thinking: " and step 2" },
|
||||
done: false,
|
||||
},
|
||||
{
|
||||
model: "qwen3.5",
|
||||
created_at: "2026-01-01T00:00:02Z",
|
||||
message: { role: "assistant", content: "The answer", thinking: "" },
|
||||
done: false,
|
||||
},
|
||||
{
|
||||
model: "qwen3.5",
|
||||
created_at: "2026-01-01T00:00:03Z",
|
||||
message: { role: "assistant", content: "" },
|
||||
done: true,
|
||||
done_reason: "stop",
|
||||
prompt_eval_count: 10,
|
||||
eval_count: 5,
|
||||
},
|
||||
];
|
||||
|
||||
const body = makeNdjsonBody(thinkingChunks);
|
||||
const fetchMock = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
body,
|
||||
});
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
const streamFn = createOllamaStreamFn("http://localhost:11434");
|
||||
const stream = streamFn(
|
||||
{ api: "ollama", provider: "ollama", id: "qwen3.5", contextWindow: 65536 } as never,
|
||||
{ messages: [{ role: "user", content: "test" }] } as never,
|
||||
{},
|
||||
);
|
||||
|
||||
const events: Array<{ type: string; [key: string]: unknown }> = [];
|
||||
for await (const event of stream as AsyncIterable<{ type: string; [key: string]: unknown }>) {
|
||||
events.push(event);
|
||||
}
|
||||
|
||||
const eventTypes = events.map((e) => e.type);
|
||||
|
||||
expect(eventTypes).toContain("thinking_start");
|
||||
expect(eventTypes).toContain("thinking_delta");
|
||||
expect(eventTypes).toContain("thinking_end");
|
||||
expect(eventTypes).toContain("text_start");
|
||||
expect(eventTypes).toContain("text_delta");
|
||||
expect(eventTypes).toContain("done");
|
||||
|
||||
// thinking_start comes before text_start
|
||||
const thinkingStartIndex = eventTypes.indexOf("thinking_start");
|
||||
const textStartIndex = eventTypes.indexOf("text_start");
|
||||
expect(thinkingStartIndex).toBeLessThan(textStartIndex);
|
||||
|
||||
// thinking_end comes before text_start
|
||||
const thinkingEndIndex = eventTypes.indexOf("thinking_end");
|
||||
expect(thinkingEndIndex).toBeLessThan(textStartIndex);
|
||||
|
||||
// Thinking deltas have correct content
|
||||
const thinkingDeltas = events.filter((e) => e.type === "thinking_delta");
|
||||
expect(thinkingDeltas).toHaveLength(2);
|
||||
expect(thinkingDeltas[0].delta).toBe("Step 1");
|
||||
expect(thinkingDeltas[1].delta).toBe(" and step 2");
|
||||
|
||||
// Content index: thinking at 0, text at 1
|
||||
const thinkingStart = events.find((e) => e.type === "thinking_start");
|
||||
expect(thinkingStart?.contentIndex).toBe(0);
|
||||
const textStart = events.find((e) => e.type === "text_start");
|
||||
expect(textStart?.contentIndex).toBe(1);
|
||||
|
||||
// Final message has thinking block
|
||||
const done = events.find((e) => e.type === "done") as { message?: { content: unknown[] } };
|
||||
const content = done?.message?.content ?? [];
|
||||
expect(content[0]).toMatchObject({ type: "thinking", thinking: "Step 1 and step 2" });
|
||||
expect(content[1]).toMatchObject({ type: "text", text: "The answer" });
|
||||
});
|
||||
|
||||
it("streams without thinking events when no thinking content is present", async () => {
|
||||
const chunks = [
|
||||
{
|
||||
model: "qwen3.5",
|
||||
created_at: "2026-01-01T00:00:00Z",
|
||||
message: { role: "assistant", content: "Hello" },
|
||||
done: false,
|
||||
},
|
||||
{
|
||||
model: "qwen3.5",
|
||||
created_at: "2026-01-01T00:00:01Z",
|
||||
message: { role: "assistant", content: "" },
|
||||
done: true,
|
||||
done_reason: "stop",
|
||||
prompt_eval_count: 10,
|
||||
eval_count: 5,
|
||||
},
|
||||
];
|
||||
|
||||
const body = makeNdjsonBody(chunks);
|
||||
vi.stubGlobal("fetch", vi.fn().mockResolvedValue({ ok: true, body }));
|
||||
|
||||
const streamFn = createOllamaStreamFn("http://localhost:11434");
|
||||
const stream = streamFn(
|
||||
{ api: "ollama", provider: "ollama", id: "qwen3.5", contextWindow: 65536 } as never,
|
||||
{ messages: [{ role: "user", content: "test" }] } as never,
|
||||
{},
|
||||
);
|
||||
|
||||
const events: Array<{ type: string }> = [];
|
||||
for await (const event of stream as AsyncIterable<{ type: string }>) {
|
||||
events.push(event);
|
||||
}
|
||||
|
||||
const eventTypes = events.map((e) => e.type);
|
||||
expect(eventTypes).not.toContain("thinking_start");
|
||||
expect(eventTypes).not.toContain("thinking_delta");
|
||||
expect(eventTypes).not.toContain("thinking_end");
|
||||
expect(eventTypes).toContain("text_start");
|
||||
expect(eventTypes).toContain("text_delta");
|
||||
expect(eventTypes).toContain("done");
|
||||
|
||||
// Text content index should be 0 (no thinking block)
|
||||
const textStart = events.find((e) => e.type === "text_start") as { contentIndex?: number };
|
||||
expect(textStart?.contentIndex).toBe(0);
|
||||
});
|
||||
});
|
||||
@@ -4,7 +4,6 @@ import type {
|
||||
AssistantMessage,
|
||||
StopReason,
|
||||
TextContent,
|
||||
ThinkingContent,
|
||||
ToolCall,
|
||||
Tool,
|
||||
Usage,
|
||||
@@ -149,14 +148,14 @@ export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: num
|
||||
});
|
||||
}
|
||||
|
||||
function createOllamaThinkingWrapper(baseFn: StreamFn | undefined, think: boolean): StreamFn {
|
||||
function createOllamaThinkingOffWrapper(baseFn: StreamFn | undefined): StreamFn {
|
||||
const streamFn = baseFn ?? streamSimple;
|
||||
return (model, context, options) => {
|
||||
if (model.api !== "ollama") {
|
||||
return streamFn(model, context, options);
|
||||
}
|
||||
return streamWithPayloadPatch(streamFn, model, context, options, (payloadRecord) => {
|
||||
payloadRecord.think = think;
|
||||
payloadRecord.think = false;
|
||||
});
|
||||
};
|
||||
}
|
||||
@@ -198,11 +197,7 @@ export function createConfiguredOllamaCompatStreamWrapper(
|
||||
}
|
||||
|
||||
if (ctx.thinkingLevel === "off") {
|
||||
streamFn = createOllamaThinkingWrapper(streamFn, false);
|
||||
} else if (ctx.thinkingLevel) {
|
||||
// Any non-off ThinkLevel (minimal, low, medium, high, xhigh, adaptive)
|
||||
// should enable Ollama's native thinking mode.
|
||||
streamFn = createOllamaThinkingWrapper(streamFn, true);
|
||||
streamFn = createOllamaThinkingOffWrapper(streamFn);
|
||||
}
|
||||
|
||||
if (normalizeProviderId(ctx.provider) === "ollama" && isOllamaCloudKimiModelRef(ctx.modelId)) {
|
||||
@@ -516,11 +511,7 @@ export function buildAssistantMessage(
|
||||
response: OllamaChatResponse,
|
||||
modelInfo: StreamModelDescriptor,
|
||||
): AssistantMessage {
|
||||
const content: (TextContent | ThinkingContent | ToolCall)[] = [];
|
||||
const thinking = response.message.thinking ?? response.message.reasoning ?? "";
|
||||
if (thinking) {
|
||||
content.push({ type: "thinking", thinking });
|
||||
}
|
||||
const content: (TextContent | ToolCall)[] = [];
|
||||
const text = response.message.content || "";
|
||||
if (text) {
|
||||
content.push({ type: "text", text });
|
||||
@@ -663,121 +654,39 @@ export function createOllamaStreamFn(
|
||||
|
||||
const reader = response.body.getReader();
|
||||
let accumulatedContent = "";
|
||||
let accumulatedThinking = "";
|
||||
const accumulatedToolCalls: OllamaToolCall[] = [];
|
||||
let finalResponse: OllamaChatResponse | undefined;
|
||||
const modelInfo = { api: model.api, provider: model.provider, id: model.id };
|
||||
let streamStarted = false;
|
||||
let thinkingStarted = false;
|
||||
let thinkingEnded = false;
|
||||
let textBlockStarted = false;
|
||||
let textBlockClosed = false;
|
||||
|
||||
// Content index tracking: thinking block (if present) is index 0,
|
||||
// text block follows at index 1 (or 0 when no thinking).
|
||||
const textContentIndex = () => (thinkingStarted ? 1 : 0);
|
||||
|
||||
const buildCurrentContent = (): (TextContent | ThinkingContent | ToolCall)[] => {
|
||||
const parts: (TextContent | ThinkingContent | ToolCall)[] = [];
|
||||
if (accumulatedThinking) {
|
||||
parts.push({
|
||||
type: "thinking",
|
||||
thinking: accumulatedThinking,
|
||||
});
|
||||
}
|
||||
if (accumulatedContent) {
|
||||
parts.push({ type: "text", text: accumulatedContent });
|
||||
}
|
||||
return parts;
|
||||
};
|
||||
|
||||
const closeThinkingBlock = () => {
|
||||
if (!thinkingStarted || thinkingEnded) {
|
||||
return;
|
||||
}
|
||||
thinkingEnded = true;
|
||||
const partial = buildStreamAssistantMessage({
|
||||
model: modelInfo,
|
||||
content: buildCurrentContent(),
|
||||
stopReason: "stop",
|
||||
usage: buildUsageWithNoCost({}),
|
||||
});
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: 0,
|
||||
content: accumulatedThinking,
|
||||
partial,
|
||||
});
|
||||
};
|
||||
|
||||
const closeTextBlock = () => {
|
||||
if (!textBlockStarted || textBlockClosed) {
|
||||
if (!streamStarted || textBlockClosed) {
|
||||
return;
|
||||
}
|
||||
textBlockClosed = true;
|
||||
const partial = buildStreamAssistantMessage({
|
||||
model: modelInfo,
|
||||
content: buildCurrentContent(),
|
||||
content: [{ type: "text", text: accumulatedContent }],
|
||||
stopReason: "stop",
|
||||
usage: buildUsageWithNoCost({}),
|
||||
});
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: textContentIndex(),
|
||||
contentIndex: 0,
|
||||
content: accumulatedContent,
|
||||
partial,
|
||||
});
|
||||
};
|
||||
|
||||
for await (const chunk of parseNdjsonStream(reader)) {
|
||||
// Handle thinking/reasoning deltas from Ollama's native think mode.
|
||||
const thinkingDelta = chunk.message?.thinking ?? chunk.message?.reasoning;
|
||||
if (thinkingDelta) {
|
||||
if (!streamStarted) {
|
||||
streamStarted = true;
|
||||
const emptyPartial = buildStreamAssistantMessage({
|
||||
model: modelInfo,
|
||||
content: [],
|
||||
stopReason: "stop",
|
||||
usage: buildUsageWithNoCost({}),
|
||||
});
|
||||
stream.push({ type: "start", partial: emptyPartial });
|
||||
}
|
||||
if (!thinkingStarted) {
|
||||
thinkingStarted = true;
|
||||
const partial = buildStreamAssistantMessage({
|
||||
model: modelInfo,
|
||||
content: buildCurrentContent(),
|
||||
stopReason: "stop",
|
||||
usage: buildUsageWithNoCost({}),
|
||||
});
|
||||
stream.push({ type: "thinking_start", contentIndex: 0, partial });
|
||||
}
|
||||
accumulatedThinking += thinkingDelta;
|
||||
const partial = buildStreamAssistantMessage({
|
||||
model: modelInfo,
|
||||
content: buildCurrentContent(),
|
||||
stopReason: "stop",
|
||||
usage: buildUsageWithNoCost({}),
|
||||
});
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
contentIndex: 0,
|
||||
delta: thinkingDelta,
|
||||
partial,
|
||||
});
|
||||
}
|
||||
|
||||
if (chunk.message?.content) {
|
||||
const delta = chunk.message.content;
|
||||
|
||||
// Transition from thinking to text: close the thinking block first.
|
||||
if (thinkingStarted && !thinkingEnded) {
|
||||
closeThinkingBlock();
|
||||
}
|
||||
|
||||
if (!streamStarted) {
|
||||
streamStarted = true;
|
||||
// Emit start/text_start with an empty partial before accumulating
|
||||
// the first delta, matching the Anthropic/OpenAI provider contract.
|
||||
const emptyPartial = buildStreamAssistantMessage({
|
||||
model: modelInfo,
|
||||
content: [],
|
||||
@@ -785,29 +694,19 @@ export function createOllamaStreamFn(
|
||||
usage: buildUsageWithNoCost({}),
|
||||
});
|
||||
stream.push({ type: "start", partial: emptyPartial });
|
||||
}
|
||||
if (!textBlockStarted) {
|
||||
textBlockStarted = true;
|
||||
const partial = buildStreamAssistantMessage({
|
||||
model: modelInfo,
|
||||
content: buildCurrentContent(),
|
||||
stopReason: "stop",
|
||||
usage: buildUsageWithNoCost({}),
|
||||
});
|
||||
stream.push({ type: "text_start", contentIndex: textContentIndex(), partial });
|
||||
stream.push({ type: "text_start", contentIndex: 0, partial: emptyPartial });
|
||||
}
|
||||
|
||||
accumulatedContent += delta;
|
||||
const partial = buildStreamAssistantMessage({
|
||||
model: modelInfo,
|
||||
content: buildCurrentContent(),
|
||||
content: [{ type: "text", text: accumulatedContent }],
|
||||
stopReason: "stop",
|
||||
usage: buildUsageWithNoCost({}),
|
||||
});
|
||||
stream.push({ type: "text_delta", contentIndex: textContentIndex(), delta, partial });
|
||||
stream.push({ type: "text_delta", contentIndex: 0, delta, partial });
|
||||
}
|
||||
if (chunk.message?.tool_calls) {
|
||||
closeThinkingBlock();
|
||||
closeTextBlock();
|
||||
accumulatedToolCalls.push(...chunk.message.tool_calls);
|
||||
}
|
||||
@@ -822,17 +721,13 @@ export function createOllamaStreamFn(
|
||||
}
|
||||
|
||||
finalResponse.message.content = accumulatedContent;
|
||||
if (accumulatedThinking) {
|
||||
finalResponse.message.thinking = accumulatedThinking;
|
||||
}
|
||||
if (accumulatedToolCalls.length > 0) {
|
||||
finalResponse.message.tool_calls = accumulatedToolCalls;
|
||||
}
|
||||
|
||||
const assistantMessage = buildAssistantMessage(finalResponse, modelInfo);
|
||||
|
||||
// Close any open blocks before emitting the done event.
|
||||
closeThinkingBlock();
|
||||
// Close the text block if we emitted any text_delta events.
|
||||
closeTextBlock();
|
||||
|
||||
stream.push({
|
||||
|
||||
@@ -109,7 +109,6 @@ describe("runQaCharacterEval", () => {
|
||||
const report = await fs.readFile(result.reportPath, "utf8");
|
||||
expect(report).toContain("Execution: local QA gateway child processes, not Docker");
|
||||
expect(report).toContain("Judges: openai/gpt-5.4");
|
||||
expect(report).toContain("Judge model labels: visible");
|
||||
expect(report).toContain("## Judge Rankings");
|
||||
expect(report).toContain("### openai/gpt-5.4");
|
||||
expect(report).toContain("reply from openai/gpt-5.4");
|
||||
@@ -121,57 +120,6 @@ describe("runQaCharacterEval", () => {
|
||||
expect(report).not.toContain("Judge Raw Reply");
|
||||
});
|
||||
|
||||
it("can hide candidate model refs from judge prompts and map rankings back", async () => {
|
||||
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) =>
|
||||
makeSuiteResult({
|
||||
outputDir: params.outputDir,
|
||||
model: params.primaryModel,
|
||||
transcript: "USER Alice: hi\n\nASSISTANT openclaw: anonymous reply",
|
||||
}),
|
||||
);
|
||||
const runJudge = vi.fn(async (params: CharacterRunJudgeParams) => {
|
||||
expect(params.prompt).toContain("## CANDIDATE candidate-01");
|
||||
expect(params.prompt).toContain("## CANDIDATE candidate-02");
|
||||
expect(params.prompt).not.toContain("openai/gpt-5.4");
|
||||
expect(params.prompt).not.toContain("codex-cli/test-model");
|
||||
return JSON.stringify({
|
||||
rankings: [
|
||||
{
|
||||
model: "candidate-02",
|
||||
rank: 1,
|
||||
score: 9.1,
|
||||
summary: "Better vibes.",
|
||||
},
|
||||
{
|
||||
model: "candidate-01",
|
||||
rank: 2,
|
||||
score: 7.4,
|
||||
summary: "Solid.",
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
const result = await runQaCharacterEval({
|
||||
repoRoot: tempRoot,
|
||||
outputDir: path.join(tempRoot, "character"),
|
||||
models: ["openai/gpt-5.4", "codex-cli/test-model"],
|
||||
judgeModels: ["openai/gpt-5.4"],
|
||||
judgeBlindModels: true,
|
||||
runSuite,
|
||||
runJudge,
|
||||
});
|
||||
|
||||
expect(result.judgments[0]?.blindModels).toBe(true);
|
||||
expect(result.judgments[0]?.rankings.map((ranking) => ranking.model)).toEqual([
|
||||
"codex-cli/test-model",
|
||||
"openai/gpt-5.4",
|
||||
]);
|
||||
const report = await fs.readFile(result.reportPath, "utf8");
|
||||
expect(report).toContain("Judge model labels: blind");
|
||||
expect(report).toContain("1. codex-cli/test-model - 9.1 - Better vibes.");
|
||||
});
|
||||
|
||||
it("defaults to the character eval model panel when no models are provided", async () => {
|
||||
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) =>
|
||||
makeSuiteResult({
|
||||
@@ -185,12 +133,14 @@ describe("runQaCharacterEval", () => {
|
||||
rankings: [
|
||||
{ model: "openai/gpt-5.4", rank: 1, score: 8, summary: "ok" },
|
||||
{ model: "openai/gpt-5.2", rank: 2, score: 7.5, summary: "ok" },
|
||||
{ model: "openai/gpt-5", rank: 3, score: 7.2, summary: "ok" },
|
||||
{ model: "anthropic/claude-opus-4-6", rank: 4, score: 7, summary: "ok" },
|
||||
{ model: "anthropic/claude-sonnet-4-6", rank: 5, score: 6.8, summary: "ok" },
|
||||
{ model: "anthropic/claude-opus-4-6", rank: 3, score: 7, summary: "ok" },
|
||||
{ model: "anthropic/claude-sonnet-4-6", rank: 4, score: 6.8, summary: "ok" },
|
||||
{ model: "minimax/MiniMax-M2.7", rank: 5, score: 6.5, summary: "ok" },
|
||||
{ model: "zai/glm-5.1", rank: 6, score: 6.3, summary: "ok" },
|
||||
{ model: "moonshot/kimi-k2.5", rank: 7, score: 6.2, summary: "ok" },
|
||||
{ model: "google/gemini-3.1-pro-preview", rank: 8, score: 6, summary: "ok" },
|
||||
{ model: "qwen/qwen3.6-plus", rank: 8, score: 6.1, summary: "ok" },
|
||||
{ model: "xiaomi/mimo-v2-pro", rank: 9, score: 6, summary: "ok" },
|
||||
{ model: "google/gemini-3.1-pro-preview", rank: 10, score: 5.9, summary: "ok" },
|
||||
],
|
||||
}),
|
||||
);
|
||||
@@ -203,21 +153,25 @@ describe("runQaCharacterEval", () => {
|
||||
runJudge,
|
||||
});
|
||||
|
||||
expect(runSuite).toHaveBeenCalledTimes(8);
|
||||
expect(runSuite).toHaveBeenCalledTimes(10);
|
||||
expect(runSuite.mock.calls.map(([params]) => params.primaryModel)).toEqual([
|
||||
"openai/gpt-5.4",
|
||||
"openai/gpt-5.2",
|
||||
"openai/gpt-5",
|
||||
"anthropic/claude-opus-4-6",
|
||||
"anthropic/claude-sonnet-4-6",
|
||||
"minimax/MiniMax-M2.7",
|
||||
"zai/glm-5.1",
|
||||
"moonshot/kimi-k2.5",
|
||||
"qwen/qwen3.6-plus",
|
||||
"xiaomi/mimo-v2-pro",
|
||||
"google/gemini-3.1-pro-preview",
|
||||
]);
|
||||
expect(runSuite.mock.calls.map(([params]) => params.thinkingDefault)).toEqual([
|
||||
"xhigh",
|
||||
"xhigh",
|
||||
"xhigh",
|
||||
"high",
|
||||
"high",
|
||||
"high",
|
||||
"high",
|
||||
"high",
|
||||
"high",
|
||||
@@ -227,7 +181,9 @@ describe("runQaCharacterEval", () => {
|
||||
expect(runSuite.mock.calls.map(([params]) => params.fastMode)).toEqual([
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
@@ -288,7 +244,7 @@ describe("runQaCharacterEval", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("defaults candidate and judge concurrency to sixteen", async () => {
|
||||
it("defaults candidate and judge concurrency to eight", async () => {
|
||||
let activeRuns = 0;
|
||||
let maxActiveRuns = 0;
|
||||
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) => {
|
||||
@@ -310,7 +266,7 @@ describe("runQaCharacterEval", () => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 10));
|
||||
activeJudges -= 1;
|
||||
return JSON.stringify({
|
||||
rankings: Array.from({ length: 20 }, (_, index) => ({
|
||||
rankings: Array.from({ length: 10 }, (_, index) => ({
|
||||
model: `provider/model-${index + 1}`,
|
||||
rank: index + 1,
|
||||
score: 10 - index,
|
||||
@@ -322,137 +278,14 @@ describe("runQaCharacterEval", () => {
|
||||
await runQaCharacterEval({
|
||||
repoRoot: tempRoot,
|
||||
outputDir: path.join(tempRoot, "character"),
|
||||
models: Array.from({ length: 20 }, (_, index) => `provider/model-${index + 1}`),
|
||||
judgeModels: Array.from({ length: 20 }, (_, index) => `judge/model-${index + 1}`),
|
||||
models: Array.from({ length: 10 }, (_, index) => `provider/model-${index + 1}`),
|
||||
judgeModels: Array.from({ length: 10 }, (_, index) => `judge/model-${index + 1}`),
|
||||
runSuite,
|
||||
runJudge,
|
||||
});
|
||||
|
||||
expect(maxActiveRuns).toBe(16);
|
||||
expect(maxActiveJudges).toBe(16);
|
||||
});
|
||||
|
||||
it("marks raw provider error transcripts as failed output", async () => {
|
||||
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) =>
|
||||
makeSuiteResult({
|
||||
outputDir: params.outputDir,
|
||||
model: params.primaryModel,
|
||||
transcript:
|
||||
"USER Alice: Are you awake?\n\nASSISTANT OpenClaw QA: 400 model `qwen3.6-plus` is not supported.",
|
||||
}),
|
||||
);
|
||||
const runJudge = vi.fn(async (_params: CharacterRunJudgeParams) =>
|
||||
JSON.stringify({
|
||||
rankings: [{ model: "qwen/qwen3.6-plus", rank: 1, score: 0.5, summary: "failed" }],
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await runQaCharacterEval({
|
||||
repoRoot: tempRoot,
|
||||
outputDir: path.join(tempRoot, "character"),
|
||||
models: ["qwen/qwen3.6-plus"],
|
||||
judgeModels: ["openai/gpt-5.4"],
|
||||
runSuite,
|
||||
runJudge,
|
||||
});
|
||||
|
||||
expect(result.runs[0]).toMatchObject({
|
||||
model: "qwen/qwen3.6-plus",
|
||||
status: "fail",
|
||||
error: "model unsupported error leaked into transcript",
|
||||
});
|
||||
});
|
||||
|
||||
it("marks raw tool failure transcripts as failed output", async () => {
|
||||
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) =>
|
||||
makeSuiteResult({
|
||||
outputDir: params.outputDir,
|
||||
model: params.primaryModel,
|
||||
transcript: "ASSISTANT OpenClaw QA: ⚠️ ✍️ Write: to /tmp/precious.html failed",
|
||||
}),
|
||||
);
|
||||
const runJudge = vi.fn(async (_params: CharacterRunJudgeParams) =>
|
||||
JSON.stringify({
|
||||
rankings: [{ model: "qwen/qwen3.5-plus", rank: 1, score: 0.5, summary: "failed" }],
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await runQaCharacterEval({
|
||||
repoRoot: tempRoot,
|
||||
outputDir: path.join(tempRoot, "character"),
|
||||
models: ["qwen/qwen3.5-plus"],
|
||||
judgeModels: ["openai/gpt-5.4"],
|
||||
runSuite,
|
||||
runJudge,
|
||||
});
|
||||
|
||||
expect(result.runs[0]).toMatchObject({
|
||||
model: "qwen/qwen3.5-plus",
|
||||
status: "fail",
|
||||
error: "tool failure leaked into transcript",
|
||||
});
|
||||
});
|
||||
|
||||
it("marks generic channel fallback transcripts as failed output", async () => {
|
||||
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) =>
|
||||
makeSuiteResult({
|
||||
outputDir: params.outputDir,
|
||||
model: params.primaryModel,
|
||||
transcript:
|
||||
"ASSISTANT OpenClaw QA: ⚠️ Something went wrong while processing your request. Please try again, or use /new to start a fresh session.",
|
||||
}),
|
||||
);
|
||||
const runJudge = vi.fn(async (_params: CharacterRunJudgeParams) =>
|
||||
JSON.stringify({
|
||||
rankings: [{ model: "qa/generic-fallback-model", rank: 1, score: 0.5, summary: "failed" }],
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await runQaCharacterEval({
|
||||
repoRoot: tempRoot,
|
||||
outputDir: path.join(tempRoot, "character"),
|
||||
models: ["qa/generic-fallback-model"],
|
||||
judgeModels: ["openai/gpt-5.4"],
|
||||
runSuite,
|
||||
runJudge,
|
||||
});
|
||||
|
||||
expect(result.runs[0]).toMatchObject({
|
||||
model: "qa/generic-fallback-model",
|
||||
status: "fail",
|
||||
error: "generic request failure leaked into transcript",
|
||||
});
|
||||
});
|
||||
|
||||
it("marks idle-timeout fallback transcripts as failed output", async () => {
|
||||
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) =>
|
||||
makeSuiteResult({
|
||||
outputDir: params.outputDir,
|
||||
model: params.primaryModel,
|
||||
transcript:
|
||||
"ASSISTANT OpenClaw QA: The model did not produce a response before the LLM idle timeout. Please try again, or increase `agents.defaults.llm.idleTimeoutSeconds` in your config.",
|
||||
}),
|
||||
);
|
||||
const runJudge = vi.fn(async (_params: CharacterRunJudgeParams) =>
|
||||
JSON.stringify({
|
||||
rankings: [{ model: "google/gemini-test", rank: 1, score: 0.5, summary: "failed" }],
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await runQaCharacterEval({
|
||||
repoRoot: tempRoot,
|
||||
outputDir: path.join(tempRoot, "character"),
|
||||
models: ["google/gemini-test"],
|
||||
judgeModels: ["openai/gpt-5.4"],
|
||||
runSuite,
|
||||
runJudge,
|
||||
});
|
||||
|
||||
expect(result.runs[0]).toMatchObject({
|
||||
model: "google/gemini-test",
|
||||
status: "fail",
|
||||
error: "LLM timeout leaked into transcript",
|
||||
});
|
||||
expect(maxActiveRuns).toBe(8);
|
||||
expect(maxActiveJudges).toBe(8);
|
||||
});
|
||||
|
||||
it("lets explicit candidate thinking override the default panel", async () => {
|
||||
|
||||
@@ -10,20 +10,21 @@ const DEFAULT_CHARACTER_SCENARIO_ID = "character-vibes-gollum";
|
||||
const DEFAULT_CHARACTER_EVAL_MODELS = Object.freeze([
|
||||
"openai/gpt-5.4",
|
||||
"openai/gpt-5.2",
|
||||
"openai/gpt-5",
|
||||
"anthropic/claude-opus-4-6",
|
||||
"anthropic/claude-sonnet-4-6",
|
||||
"minimax/MiniMax-M2.7",
|
||||
"zai/glm-5.1",
|
||||
"moonshot/kimi-k2.5",
|
||||
"qwen/qwen3.6-plus",
|
||||
"xiaomi/mimo-v2-pro",
|
||||
"google/gemini-3.1-pro-preview",
|
||||
]);
|
||||
const DEFAULT_CHARACTER_THINKING: QaThinkingLevel = "high";
|
||||
const DEFAULT_CHARACTER_EVAL_CONCURRENCY = 16;
|
||||
const DEFAULT_CHARACTER_EVAL_CONCURRENCY = 8;
|
||||
const DEFAULT_CHARACTER_THINKING_BY_MODEL: Readonly<Record<string, QaThinkingLevel>> =
|
||||
Object.freeze({
|
||||
"openai/gpt-5.4": "xhigh",
|
||||
"openai/gpt-5.2": "xhigh",
|
||||
"openai/gpt-5": "xhigh",
|
||||
});
|
||||
const DEFAULT_JUDGE_MODELS = Object.freeze(["openai/gpt-5.4", "anthropic/claude-opus-4-6"]);
|
||||
const DEFAULT_JUDGE_THINKING: QaThinkingLevel = "xhigh";
|
||||
@@ -80,14 +81,11 @@ export type QaCharacterEvalJudgeResult = {
|
||||
model: string;
|
||||
thinkingDefault: QaThinkingLevel;
|
||||
fastMode: boolean;
|
||||
blindModels: boolean;
|
||||
durationMs: number;
|
||||
rankings: QaCharacterEvalJudgment[];
|
||||
error?: string;
|
||||
};
|
||||
|
||||
type QaCharacterEvalProgressLogger = (message: string) => void;
|
||||
|
||||
type RunSuiteFn = (params: {
|
||||
repoRoot: string;
|
||||
outputDir: string;
|
||||
@@ -122,12 +120,10 @@ export type QaCharacterEvalParams = {
|
||||
judgeThinkingDefault?: QaThinkingLevel;
|
||||
judgeModelOptions?: Record<string, QaCharacterModelOptions>;
|
||||
judgeTimeoutMs?: number;
|
||||
judgeBlindModels?: boolean;
|
||||
candidateConcurrency?: number;
|
||||
judgeConcurrency?: number;
|
||||
runSuite?: RunSuiteFn;
|
||||
runJudge?: RunJudgeFn;
|
||||
progress?: QaCharacterEvalProgressLogger;
|
||||
};
|
||||
|
||||
function normalizeModelRefs(models: readonly string[]) {
|
||||
@@ -230,27 +226,6 @@ function collectTranscriptStats(transcript: string) {
|
||||
};
|
||||
}
|
||||
|
||||
function detectTranscriptFailure(transcript: string): string | undefined {
|
||||
const checks: Array<[RegExp, string]> = [
|
||||
[/\bmodel `[^`]+` is not supported\b/i, "model unsupported error leaked into transcript"],
|
||||
[/\binsufficient account balance\b/i, "account balance error leaked into transcript"],
|
||||
[/\b(?:backend|transport|internal) error\b/i, "backend error leaked into transcript"],
|
||||
[
|
||||
/\bsomething went wrong while processing your request\b/i,
|
||||
"generic request failure leaked into transcript",
|
||||
],
|
||||
[/\buse \/new to start a fresh session\b/i, "generic request failure leaked into transcript"],
|
||||
[
|
||||
/\bmodel did not produce a response before the LLM idle timeout\b/i,
|
||||
"LLM timeout leaked into transcript",
|
||||
],
|
||||
[/\btool failed\b/i, "tool failure leaked into transcript"],
|
||||
[/\b(?:read|write|edit|patch):[^\n]*\bfailed\b/i, "tool failure leaked into transcript"],
|
||||
[/\bnot configured\b/i, "configuration error leaked into transcript"],
|
||||
];
|
||||
return checks.find(([pattern]) => pattern.test(transcript))?.[1];
|
||||
}
|
||||
|
||||
function formatDuration(ms: number) {
|
||||
if (!Number.isFinite(ms) || ms < 0) {
|
||||
return "unknown";
|
||||
@@ -268,42 +243,10 @@ function formatDuration(ms: number) {
|
||||
return seconds === 0 ? `${minutes}m` : `${minutes}m ${seconds}s`;
|
||||
}
|
||||
|
||||
function logCharacterEvalProgress(
|
||||
progress: QaCharacterEvalProgressLogger | undefined,
|
||||
message: string,
|
||||
) {
|
||||
progress?.(`[qa-character] ${message}`);
|
||||
}
|
||||
|
||||
function formatEvalIndex(index: number, total: number) {
|
||||
return `${index + 1}/${total}`;
|
||||
}
|
||||
|
||||
function summarizeRunStats(run: QaCharacterEvalRun) {
|
||||
return [
|
||||
`status=${run.status}`,
|
||||
`duration=${formatDuration(run.durationMs)}`,
|
||||
`turns=${run.stats.userTurns}/${run.stats.assistantTurns}`,
|
||||
`chars=${run.stats.transcriptChars}`,
|
||||
...(run.error ? [`error="${run.error}"`] : []),
|
||||
].join(" ");
|
||||
}
|
||||
|
||||
function formatBlindCandidateLabel(index: number) {
|
||||
return `candidate-${String(index + 1).padStart(2, "0")}`;
|
||||
}
|
||||
|
||||
function buildJudgePrompt(params: {
|
||||
scenarioId: string;
|
||||
runs: readonly QaCharacterEvalRun[];
|
||||
blindModels?: boolean;
|
||||
}) {
|
||||
const labelToModel = new Map<string, string>();
|
||||
function buildJudgePrompt(params: { scenarioId: string; runs: readonly QaCharacterEvalRun[] }) {
|
||||
const runBlocks = params.runs
|
||||
.map((run, index) => {
|
||||
const label = params.blindModels ? formatBlindCandidateLabel(index) : run.model;
|
||||
labelToModel.set(label, run.model);
|
||||
return `## CANDIDATE ${label}
|
||||
.map(
|
||||
(run) => `## MODEL ${run.model}
|
||||
|
||||
Status: ${run.status}
|
||||
Duration ms (not used for ranking): ${run.durationMs}
|
||||
@@ -315,11 +258,11 @@ Error: ${run.error ?? "none"}
|
||||
|
||||
\`\`\`text
|
||||
${run.transcript}
|
||||
\`\`\``;
|
||||
})
|
||||
\`\`\``,
|
||||
)
|
||||
.join("\n\n");
|
||||
|
||||
const prompt = `You are grading OpenClaw natural character conversation transcripts for naturalness, vibes, and funniness.
|
||||
return `You are grading OpenClaw natural character conversation transcripts for naturalness, vibes, and funniness.
|
||||
|
||||
Scenario id: ${params.scenarioId}
|
||||
|
||||
@@ -332,14 +275,14 @@ Rank the models by:
|
||||
- not sounding aware of an eval or test
|
||||
- avoiding tool/backend/error leakage
|
||||
|
||||
Treat candidate labels as opaque identifiers. Do not assume quality from the label.
|
||||
Treat model names as opaque labels. Do not assume quality from the label.
|
||||
Duration is recorded for separate benchmark analysis only. Do not rank models by speed.
|
||||
|
||||
Return strict JSON only with this shape:
|
||||
{
|
||||
"rankings": [
|
||||
{
|
||||
"model": "same candidate label",
|
||||
"model": "same model label",
|
||||
"rank": 1,
|
||||
"score": 9.2,
|
||||
"summary": "one sentence",
|
||||
@@ -350,7 +293,6 @@ Return strict JSON only with this shape:
|
||||
}
|
||||
|
||||
${runBlocks}`;
|
||||
return { prompt, labelToModel };
|
||||
}
|
||||
|
||||
function normalizeJudgment(value: unknown, allowedModels: Set<string>): QaCharacterEvalJudgment[] {
|
||||
@@ -440,7 +382,6 @@ function renderCharacterEvalReport(params: {
|
||||
`- Judges: ${params.judgments.map((judgment) => judgment.model).join(", ")}`,
|
||||
`- Judge thinking: ${params.judgments[0]?.thinkingDefault ?? DEFAULT_JUDGE_THINKING}`,
|
||||
`- Judge fast mode: ${params.judgments.every((judgment) => judgment.fastMode) ? "on" : "mixed"}`,
|
||||
`- Judge model labels: ${params.judgments.every((judgment) => judgment.blindModels) ? "blind" : "visible"}`,
|
||||
"",
|
||||
"## Judge Rankings",
|
||||
"",
|
||||
@@ -520,12 +461,7 @@ export async function runQaCharacterEval(params: QaCharacterEvalParams) {
|
||||
params.candidateConcurrency,
|
||||
DEFAULT_CHARACTER_EVAL_CONCURRENCY,
|
||||
);
|
||||
logCharacterEvalProgress(
|
||||
params.progress,
|
||||
`start scenario=${scenarioId} candidates=${models.length} candidateConcurrency=${candidateConcurrency} output=${outputDir}`,
|
||||
);
|
||||
const candidatesStartedAt = Date.now();
|
||||
const runs = await mapWithConcurrency(models, candidateConcurrency, async (model, index) => {
|
||||
const runs = await mapWithConcurrency(models, candidateConcurrency, async (model) => {
|
||||
const thinkingDefault = resolveCandidateThinkingDefault({
|
||||
model,
|
||||
candidateThinkingDefault: params.candidateThinkingDefault,
|
||||
@@ -539,10 +475,6 @@ export async function runQaCharacterEval(params: QaCharacterEvalParams) {
|
||||
});
|
||||
const modelOutputDir = path.join(runsDir, sanitizePathPart(model));
|
||||
const runStartedAt = Date.now();
|
||||
logCharacterEvalProgress(
|
||||
params.progress,
|
||||
`candidate start ${formatEvalIndex(index, models.length)} model=${model} thinking=${thinkingDefault} fast=${fastMode ? "on" : "off"}`,
|
||||
);
|
||||
try {
|
||||
const result = await runSuite({
|
||||
repoRoot,
|
||||
@@ -555,12 +487,10 @@ export async function runQaCharacterEval(params: QaCharacterEvalParams) {
|
||||
scenarioIds: [scenarioId],
|
||||
});
|
||||
const transcript = extractTranscript(result);
|
||||
const transcriptFailure = detectTranscriptFailure(transcript);
|
||||
const status =
|
||||
result.scenarios.some((scenario) => scenario.status === "fail") || transcriptFailure
|
||||
? "fail"
|
||||
: "pass";
|
||||
const run = {
|
||||
const status = result.scenarios.some((scenario) => scenario.status === "fail")
|
||||
? "fail"
|
||||
: "pass";
|
||||
return {
|
||||
model,
|
||||
status,
|
||||
durationMs: Date.now() - runStartedAt,
|
||||
@@ -571,16 +501,10 @@ export async function runQaCharacterEval(params: QaCharacterEvalParams) {
|
||||
summaryPath: result.summaryPath,
|
||||
transcript,
|
||||
stats: collectTranscriptStats(transcript),
|
||||
...(transcriptFailure ? { error: transcriptFailure } : {}),
|
||||
} satisfies QaCharacterEvalRun;
|
||||
logCharacterEvalProgress(
|
||||
params.progress,
|
||||
`candidate done ${formatEvalIndex(index, models.length)} model=${model} ${summarizeRunStats(run)}`,
|
||||
);
|
||||
return run;
|
||||
} catch (error) {
|
||||
const transcript = "";
|
||||
const run = {
|
||||
return {
|
||||
model,
|
||||
status: "fail",
|
||||
durationMs: Date.now() - runStartedAt,
|
||||
@@ -591,18 +515,8 @@ export async function runQaCharacterEval(params: QaCharacterEvalParams) {
|
||||
stats: collectTranscriptStats(transcript),
|
||||
error: formatErrorMessage(error),
|
||||
} satisfies QaCharacterEvalRun;
|
||||
logCharacterEvalProgress(
|
||||
params.progress,
|
||||
`candidate done ${formatEvalIndex(index, models.length)} model=${model} ${summarizeRunStats(run)}`,
|
||||
);
|
||||
return run;
|
||||
}
|
||||
});
|
||||
const failedCandidateCount = runs.filter((run) => run.status === "fail").length;
|
||||
logCharacterEvalProgress(
|
||||
params.progress,
|
||||
`candidates done pass=${runs.length - failedCandidateCount} fail=${failedCandidateCount} duration=${formatDuration(Date.now() - candidatesStartedAt)}`,
|
||||
);
|
||||
|
||||
const judgeModels = normalizeModelRefs(
|
||||
params.judgeModels && params.judgeModels.length > 0
|
||||
@@ -616,73 +530,38 @@ export async function runQaCharacterEval(params: QaCharacterEvalParams) {
|
||||
params.judgeConcurrency,
|
||||
DEFAULT_CHARACTER_EVAL_CONCURRENCY,
|
||||
);
|
||||
const judgeTimeoutMs = params.judgeTimeoutMs ?? 180_000;
|
||||
logCharacterEvalProgress(
|
||||
params.progress,
|
||||
`judges start judges=${judgeModels.length} judgeConcurrency=${judgeConcurrency} timeout=${formatDuration(judgeTimeoutMs)} labels=${params.judgeBlindModels === true ? "blind" : "visible"}`,
|
||||
);
|
||||
const judgesStartedAt = Date.now();
|
||||
const judgments = await mapWithConcurrency(
|
||||
judgeModels,
|
||||
judgeConcurrency,
|
||||
async (judgeModel, index) => {
|
||||
const judgeOptions = resolveJudgeOptions({
|
||||
model: judgeModel,
|
||||
judgeThinkingDefault: params.judgeThinkingDefault,
|
||||
judgeModelOptions: params.judgeModelOptions,
|
||||
const judgments = await mapWithConcurrency(judgeModels, judgeConcurrency, async (judgeModel) => {
|
||||
const judgeOptions = resolveJudgeOptions({
|
||||
model: judgeModel,
|
||||
judgeThinkingDefault: params.judgeThinkingDefault,
|
||||
judgeModelOptions: params.judgeModelOptions,
|
||||
});
|
||||
let rankings: QaCharacterEvalJudgment[] = [];
|
||||
let judgeError: string | undefined;
|
||||
const judgeStartedAt = Date.now();
|
||||
try {
|
||||
const rawReply = await runJudge({
|
||||
repoRoot,
|
||||
judgeModel,
|
||||
judgeThinkingDefault: judgeOptions.thinkingDefault,
|
||||
judgeFastMode: judgeOptions.fastMode,
|
||||
prompt: buildJudgePrompt({ scenarioId, runs }),
|
||||
timeoutMs: params.judgeTimeoutMs ?? 180_000,
|
||||
});
|
||||
let rankings: QaCharacterEvalJudgment[] = [];
|
||||
let judgeError: string | undefined;
|
||||
const judgeStartedAt = Date.now();
|
||||
logCharacterEvalProgress(
|
||||
params.progress,
|
||||
`judge start ${formatEvalIndex(index, judgeModels.length)} model=${judgeModel} thinking=${judgeOptions.thinkingDefault} fast=${judgeOptions.fastMode ? "on" : "off"} timeout=${formatDuration(judgeTimeoutMs)}`,
|
||||
);
|
||||
try {
|
||||
const judgePrompt = buildJudgePrompt({
|
||||
scenarioId,
|
||||
runs,
|
||||
blindModels: params.judgeBlindModels,
|
||||
});
|
||||
const rawReply = await runJudge({
|
||||
repoRoot,
|
||||
judgeModel,
|
||||
judgeThinkingDefault: judgeOptions.thinkingDefault,
|
||||
judgeFastMode: judgeOptions.fastMode,
|
||||
prompt: judgePrompt.prompt,
|
||||
timeoutMs: judgeTimeoutMs,
|
||||
});
|
||||
rankings = parseJudgeReply(rawReply, new Set(judgePrompt.labelToModel.keys())).map(
|
||||
(ranking) => ({
|
||||
...ranking,
|
||||
model: judgePrompt.labelToModel.get(ranking.model) ?? ranking.model,
|
||||
}),
|
||||
);
|
||||
} catch (error) {
|
||||
judgeError = formatErrorMessage(error);
|
||||
}
|
||||
rankings = parseJudgeReply(rawReply, new Set(models));
|
||||
} catch (error) {
|
||||
judgeError = formatErrorMessage(error);
|
||||
}
|
||||
|
||||
const judgment = {
|
||||
model: judgeModel,
|
||||
thinkingDefault: judgeOptions.thinkingDefault,
|
||||
fastMode: judgeOptions.fastMode,
|
||||
blindModels: params.judgeBlindModels === true,
|
||||
durationMs: Date.now() - judgeStartedAt,
|
||||
rankings,
|
||||
...(judgeError ? { error: judgeError } : {}),
|
||||
} satisfies QaCharacterEvalJudgeResult;
|
||||
logCharacterEvalProgress(
|
||||
params.progress,
|
||||
`judge done ${formatEvalIndex(index, judgeModels.length)} model=${judgeModel} rankings=${rankings.length} duration=${formatDuration(judgment.durationMs)}${judgeError ? ` error="${judgeError}"` : ""}`,
|
||||
);
|
||||
return judgment;
|
||||
},
|
||||
);
|
||||
const failedJudgeCount = judgments.filter((judgment) => judgment.rankings.length === 0).length;
|
||||
logCharacterEvalProgress(
|
||||
params.progress,
|
||||
`judges done ranked=${judgments.length - failedJudgeCount} failed=${failedJudgeCount} duration=${formatDuration(Date.now() - judgesStartedAt)}`,
|
||||
);
|
||||
return {
|
||||
model: judgeModel,
|
||||
thinkingDefault: judgeOptions.thinkingDefault,
|
||||
fastMode: judgeOptions.fastMode,
|
||||
durationMs: Date.now() - judgeStartedAt,
|
||||
rankings,
|
||||
...(judgeError ? { error: judgeError } : {}),
|
||||
} satisfies QaCharacterEvalJudgeResult;
|
||||
});
|
||||
|
||||
const finishedAt = new Date();
|
||||
const report = renderCharacterEvalReport({
|
||||
@@ -708,10 +587,6 @@ export async function runQaCharacterEval(params: QaCharacterEvalParams) {
|
||||
)}\n`,
|
||||
"utf8",
|
||||
);
|
||||
logCharacterEvalProgress(
|
||||
params.progress,
|
||||
`report written duration=${formatDuration(finishedAt.getTime() - startedAt.getTime())} report=${reportPath} summary=${summaryPath}`,
|
||||
);
|
||||
|
||||
return {
|
||||
outputDir,
|
||||
|
||||
@@ -158,7 +158,6 @@ describe("qa cli runtime", () => {
|
||||
modelThinking: ["codex-cli/test-model=medium"],
|
||||
judgeModel: ["openai/gpt-5.4,thinking=xhigh,fast", "anthropic/claude-opus-4-6,thinking=high"],
|
||||
judgeTimeoutMs: 180_000,
|
||||
blindJudgeModels: true,
|
||||
concurrency: 4,
|
||||
judgeConcurrency: 3,
|
||||
});
|
||||
@@ -181,10 +180,8 @@ describe("qa cli runtime", () => {
|
||||
"anthropic/claude-opus-4-6": { thinkingDefault: "high" },
|
||||
},
|
||||
judgeTimeoutMs: 180_000,
|
||||
judgeBlindModels: true,
|
||||
candidateConcurrency: 4,
|
||||
judgeConcurrency: 3,
|
||||
progress: expect.any(Function),
|
||||
});
|
||||
});
|
||||
|
||||
@@ -206,10 +203,8 @@ describe("qa cli runtime", () => {
|
||||
judgeModels: undefined,
|
||||
judgeModelOptions: undefined,
|
||||
judgeTimeoutMs: undefined,
|
||||
judgeBlindModels: undefined,
|
||||
candidateConcurrency: undefined,
|
||||
judgeConcurrency: undefined,
|
||||
progress: expect.any(Function),
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -225,7 +225,6 @@ export async function runQaCharacterEvalCommand(opts: {
|
||||
modelThinking?: string[];
|
||||
judgeModel?: string[];
|
||||
judgeTimeoutMs?: number;
|
||||
blindJudgeModels?: boolean;
|
||||
concurrency?: number;
|
||||
judgeConcurrency?: number;
|
||||
}) {
|
||||
@@ -244,10 +243,8 @@ export async function runQaCharacterEvalCommand(opts: {
|
||||
judgeModels: judges.models.length > 0 ? judges.models : undefined,
|
||||
judgeModelOptions: judges.optionsByModel,
|
||||
judgeTimeoutMs: opts.judgeTimeoutMs,
|
||||
judgeBlindModels: opts.blindJudgeModels === true ? true : undefined,
|
||||
candidateConcurrency: parseQaPositiveIntegerOption("--concurrency", opts.concurrency),
|
||||
judgeConcurrency: parseQaPositiveIntegerOption("--judge-concurrency", opts.judgeConcurrency),
|
||||
progress: (message) => process.stderr.write(`${message}\n`),
|
||||
});
|
||||
process.stdout.write(`QA character eval report: ${result.reportPath}\n`);
|
||||
process.stdout.write(`QA character eval summary: ${result.summaryPath}\n`);
|
||||
|
||||
@@ -38,7 +38,6 @@ async function runQaCharacterEval(opts: {
|
||||
modelThinking?: string[];
|
||||
judgeModel?: string[];
|
||||
judgeTimeoutMs?: number;
|
||||
blindJudgeModels?: boolean;
|
||||
concurrency?: number;
|
||||
judgeConcurrency?: number;
|
||||
}) {
|
||||
@@ -200,10 +199,6 @@ export function registerQaLabCli(program: Command) {
|
||||
.option("--judge-timeout-ms <ms>", "Override judge wait timeout", (value: string) =>
|
||||
Number(value),
|
||||
)
|
||||
.option(
|
||||
"--blind-judge-models",
|
||||
"Hide candidate model refs from judge prompts; reports still map rankings back to real refs",
|
||||
)
|
||||
.option("--concurrency <count>", "Candidate model run concurrency", (value: string) =>
|
||||
Number(value),
|
||||
)
|
||||
@@ -221,7 +216,6 @@ export function registerQaLabCli(program: Command) {
|
||||
modelThinking?: string[];
|
||||
judgeModel?: string[];
|
||||
judgeTimeoutMs?: number;
|
||||
blindJudgeModels?: boolean;
|
||||
concurrency?: number;
|
||||
judgeConcurrency?: number;
|
||||
}) => {
|
||||
|
||||
@@ -1,34 +0,0 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { extractQaFailureReplyText } from "./reply-failure.js";
|
||||
|
||||
describe("extractQaFailureReplyText", () => {
|
||||
it("returns undefined for normal assistant replies", () => {
|
||||
expect(
|
||||
extractQaFailureReplyText("Yes, precious. The build is green and a little cursed."),
|
||||
).toBe(undefined);
|
||||
});
|
||||
|
||||
it("classifies the generic external fallback reply as a failure", () => {
|
||||
expect(
|
||||
extractQaFailureReplyText(
|
||||
"⚠️ Something went wrong while processing your request. Please try again, or use /new to start a fresh session.",
|
||||
),
|
||||
).toContain("Something went wrong while processing your request.");
|
||||
});
|
||||
|
||||
it("classifies explicit provider auth guidance as a failure", () => {
|
||||
expect(
|
||||
extractQaFailureReplyText(
|
||||
'⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.',
|
||||
),
|
||||
).toContain('No API key found for provider "openai".');
|
||||
});
|
||||
|
||||
it("classifies curated missing-key guidance as a failure", () => {
|
||||
expect(
|
||||
extractQaFailureReplyText(
|
||||
"⚠️ Missing API key for OpenAI on the gateway. Use `openai-codex/gpt-5.4` for OAuth, or set `OPENAI_API_KEY`, then try again.",
|
||||
),
|
||||
).toContain("Missing API key for OpenAI on the gateway.");
|
||||
});
|
||||
});
|
||||
@@ -1,26 +0,0 @@
|
||||
import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime";
|
||||
|
||||
const FAILURE_REPLY_PREFIXES = [
|
||||
"⚠️ something went wrong while processing your request.",
|
||||
"⚠️ session history got out of sync.",
|
||||
"⚠️ session history was corrupted.",
|
||||
"⚠️ context overflow",
|
||||
"⚠️ message ordering conflict.",
|
||||
"⚠️ model login expired on the gateway",
|
||||
"⚠️ model login failed on the gateway",
|
||||
"⚠️ agent failed before reply:",
|
||||
"⚠️ no api key found for provider ",
|
||||
"⚠️ missing api key for ",
|
||||
];
|
||||
|
||||
export function extractQaFailureReplyText(text: string): string | undefined {
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed) {
|
||||
return undefined;
|
||||
}
|
||||
const lower = normalizeLowercaseStringOrEmpty(trimmed);
|
||||
if (FAILURE_REPLY_PREFIXES.some((prefix) => lower.startsWith(prefix))) {
|
||||
return trimmed;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
@@ -19,7 +19,6 @@ describe("qa scenario catalog", () => {
|
||||
true,
|
||||
);
|
||||
expect(pack.scenarios.some((scenario) => scenario.id === "character-vibes-gollum")).toBe(true);
|
||||
expect(pack.scenarios.some((scenario) => scenario.id === "character-vibes-c3po")).toBe(true);
|
||||
expect(pack.scenarios.every((scenario) => scenario.execution?.kind === "flow")).toBe(true);
|
||||
expect(pack.scenarios.some((scenario) => scenario.execution.flow?.steps.length)).toBe(true);
|
||||
});
|
||||
|
||||
@@ -1,115 +0,0 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { createQaBusState } from "./bus-state.js";
|
||||
import { qaSuiteTesting } from "./suite.js";
|
||||
|
||||
describe("qa suite failure reply handling", () => {
|
||||
it("detects classified failure replies before a success-only outbound predicate matches", async () => {
|
||||
const state = createQaBusState();
|
||||
state.addOutboundMessage({
|
||||
to: "dm:qa-operator",
|
||||
text: "⚠️ Something went wrong while processing your request. Please try again, or use /new to start a fresh session.",
|
||||
senderId: "openclaw",
|
||||
senderName: "OpenClaw QA",
|
||||
});
|
||||
|
||||
const message = qaSuiteTesting.findFailureOutboundMessage(state);
|
||||
expect(message?.text).toContain("Something went wrong while processing your request.");
|
||||
});
|
||||
|
||||
it("fails success-only waitForOutboundMessage calls when a classified failure reply arrives first", async () => {
|
||||
const state = createQaBusState();
|
||||
const pending = qaSuiteTesting.waitForOutboundMessage(
|
||||
state,
|
||||
(candidate) =>
|
||||
candidate.conversation.id === "qa-operator" &&
|
||||
candidate.text.includes("Remembered ALPHA-7."),
|
||||
5_000,
|
||||
);
|
||||
|
||||
state.addOutboundMessage({
|
||||
to: "dm:qa-operator",
|
||||
text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.',
|
||||
senderId: "openclaw",
|
||||
senderName: "OpenClaw QA",
|
||||
});
|
||||
|
||||
await expect(pending).rejects.toThrow('No API key found for provider "openai".');
|
||||
});
|
||||
|
||||
it("fails raw scenario waitForCondition calls when a classified failure reply arrives", async () => {
|
||||
const state = createQaBusState();
|
||||
const waitForCondition = qaSuiteTesting.createScenarioWaitForCondition(state);
|
||||
|
||||
const pending = waitForCondition(
|
||||
() =>
|
||||
state
|
||||
.getSnapshot()
|
||||
.messages.filter(
|
||||
(message) =>
|
||||
message.direction === "outbound" &&
|
||||
message.conversation.id === "qa-operator" &&
|
||||
message.text.includes("ALPHA-7"),
|
||||
)
|
||||
.at(-1),
|
||||
5_000,
|
||||
10,
|
||||
);
|
||||
|
||||
state.addOutboundMessage({
|
||||
to: "dm:qa-operator",
|
||||
text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.',
|
||||
senderId: "openclaw",
|
||||
senderName: "OpenClaw QA",
|
||||
});
|
||||
|
||||
await expect(pending).rejects.toThrow('No API key found for provider "openai".');
|
||||
});
|
||||
|
||||
it("fails raw scenario waitForCondition calls even when mixed traffic already exists", async () => {
|
||||
const state = createQaBusState();
|
||||
state.addInboundMessage({
|
||||
conversation: { id: "qa-operator", kind: "direct" },
|
||||
senderId: "alice",
|
||||
senderName: "Alice",
|
||||
text: "hello",
|
||||
});
|
||||
state.addOutboundMessage({
|
||||
to: "dm:qa-operator",
|
||||
text: "working on it",
|
||||
senderId: "openclaw",
|
||||
senderName: "OpenClaw QA",
|
||||
});
|
||||
state.addInboundMessage({
|
||||
conversation: { id: "qa-operator", kind: "direct" },
|
||||
senderId: "alice",
|
||||
senderName: "Alice",
|
||||
text: "ok do it",
|
||||
});
|
||||
|
||||
const waitForCondition = qaSuiteTesting.createScenarioWaitForCondition(state);
|
||||
const pending = waitForCondition(
|
||||
() =>
|
||||
state
|
||||
.getSnapshot()
|
||||
.messages.slice(3)
|
||||
.filter(
|
||||
(message) =>
|
||||
message.direction === "outbound" &&
|
||||
message.conversation.id === "qa-operator" &&
|
||||
message.text.includes("mission"),
|
||||
)
|
||||
.at(-1),
|
||||
150,
|
||||
10,
|
||||
);
|
||||
|
||||
state.addOutboundMessage({
|
||||
to: "dm:qa-operator",
|
||||
text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.',
|
||||
senderId: "openclaw",
|
||||
senderName: "OpenClaw QA",
|
||||
});
|
||||
|
||||
await expect(pending).rejects.toThrow('No API key found for provider "openai".');
|
||||
});
|
||||
});
|
||||
@@ -34,7 +34,6 @@ import {
|
||||
} from "./model-selection.js";
|
||||
import { hasModelSwitchContinuityEvidence } from "./model-switch-eval.js";
|
||||
import type { QaThinkingLevel } from "./qa-gateway-config.js";
|
||||
import { extractQaFailureReplyText } from "./reply-failure.js";
|
||||
import { renderQaMarkdownReport, type QaReportCheck, type QaReportScenario } from "./report.js";
|
||||
import { qaChannelPlugin, type QaBusMessage } from "./runtime-api.js";
|
||||
import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js";
|
||||
@@ -166,73 +165,21 @@ async function waitForCondition<T>(
|
||||
throw new Error(`timed out after ${timeoutMs}ms`);
|
||||
}
|
||||
|
||||
function findFailureOutboundMessage(
|
||||
state: QaBusState,
|
||||
options?: { sinceIndex?: number; cursorSpace?: "all" | "outbound" },
|
||||
) {
|
||||
const cursorSpace = options?.cursorSpace ?? "outbound";
|
||||
const observedMessages =
|
||||
cursorSpace === "all"
|
||||
? state.getSnapshot().messages.slice(options?.sinceIndex ?? 0)
|
||||
: state
|
||||
.getSnapshot()
|
||||
.messages.filter((message) => message.direction === "outbound")
|
||||
.slice(options?.sinceIndex ?? 0);
|
||||
return observedMessages.find(
|
||||
(message) =>
|
||||
message.direction === "outbound" && Boolean(extractQaFailureReplyText(message.text)),
|
||||
);
|
||||
}
|
||||
|
||||
function createScenarioWaitForCondition(state: QaBusState) {
|
||||
const sinceIndex = state.getSnapshot().messages.length;
|
||||
return async function waitForScenarioCondition<T>(
|
||||
check: () => T | Promise<T | null | undefined> | null | undefined,
|
||||
timeoutMs = 15_000,
|
||||
intervalMs = 100,
|
||||
): Promise<T> {
|
||||
return await waitForCondition(
|
||||
async () => {
|
||||
const failureMessage = findFailureOutboundMessage(state, {
|
||||
sinceIndex,
|
||||
cursorSpace: "all",
|
||||
});
|
||||
if (failureMessage) {
|
||||
throw new Error(extractQaFailureReplyText(failureMessage.text) ?? failureMessage.text);
|
||||
}
|
||||
return await check();
|
||||
},
|
||||
timeoutMs,
|
||||
intervalMs,
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
async function waitForOutboundMessage(
|
||||
state: QaBusState,
|
||||
predicate: (message: QaBusMessage) => boolean,
|
||||
timeoutMs = 15_000,
|
||||
options?: { sinceIndex?: number },
|
||||
) {
|
||||
return await waitForCondition(() => {
|
||||
const failureMessage = findFailureOutboundMessage(state, options);
|
||||
if (failureMessage) {
|
||||
throw new Error(extractQaFailureReplyText(failureMessage.text) ?? failureMessage.text);
|
||||
}
|
||||
const match = state
|
||||
.getSnapshot()
|
||||
.messages.filter((message) => message.direction === "outbound")
|
||||
.slice(options?.sinceIndex ?? 0)
|
||||
.find(predicate);
|
||||
if (!match) {
|
||||
return undefined;
|
||||
}
|
||||
const failureReply = extractQaFailureReplyText(match.text);
|
||||
if (failureReply) {
|
||||
throw new Error(failureReply);
|
||||
}
|
||||
return match;
|
||||
}, timeoutMs);
|
||||
return await waitForCondition(
|
||||
() =>
|
||||
state
|
||||
.getSnapshot()
|
||||
.messages.filter((message) => message.direction === "outbound")
|
||||
.slice(options?.sinceIndex ?? 0)
|
||||
.find(predicate),
|
||||
timeoutMs,
|
||||
);
|
||||
}
|
||||
|
||||
async function waitForNoOutbound(state: QaBusState, timeoutMs = 1_200) {
|
||||
@@ -1080,7 +1027,7 @@ function createScenarioFlowApi(
|
||||
sleep,
|
||||
randomUUID,
|
||||
runScenario,
|
||||
waitForCondition: createScenarioWaitForCondition(env.lab.state),
|
||||
waitForCondition,
|
||||
waitForOutboundMessage,
|
||||
waitForNoOutbound,
|
||||
recentOutboundSummary,
|
||||
@@ -1139,12 +1086,6 @@ function createScenarioFlowApi(
|
||||
};
|
||||
}
|
||||
|
||||
export const qaSuiteTesting = {
|
||||
createScenarioWaitForCondition,
|
||||
findFailureOutboundMessage,
|
||||
waitForOutboundMessage,
|
||||
};
|
||||
|
||||
async function runScenarioDefinition(
|
||||
env: QaSuiteEnvironment,
|
||||
scenario: ReturnType<typeof readQaBootstrapScenarioCatalog>["scenarios"][number],
|
||||
|
||||
@@ -2,11 +2,7 @@ export {
|
||||
applyQwenNativeStreamingUsageCompat,
|
||||
buildQwenDefaultModelDefinition,
|
||||
buildQwenModelDefinition,
|
||||
buildQwenModelCatalogForBaseUrl,
|
||||
isNativeQwenBaseUrl,
|
||||
isQwen36PlusSupportedBaseUrl,
|
||||
isQwenCodingPlanBaseUrl,
|
||||
QWEN_36_PLUS_MODEL_ID,
|
||||
QWEN_BASE_URL,
|
||||
QWEN_CN_BASE_URL,
|
||||
QWEN_DEFAULT_COST,
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import { defineSingleProviderPluginEntry } from "openclaw/plugin-sdk/provider-entry";
|
||||
import { applyQwenNativeStreamingUsageCompat } from "./api.js";
|
||||
import { buildQwenMediaUnderstandingProvider } from "./media-understanding-provider.js";
|
||||
import { isQwenCodingPlanBaseUrl, QWEN_36_PLUS_MODEL_ID, QWEN_BASE_URL } from "./models.js";
|
||||
import {
|
||||
applyQwenConfig,
|
||||
applyQwenConfigCn,
|
||||
@@ -13,38 +12,6 @@ import { buildQwenProvider } from "./provider-catalog.js";
|
||||
import { buildQwenVideoGenerationProvider } from "./video-generation-provider.js";
|
||||
|
||||
const PROVIDER_ID = "qwen";
|
||||
const LEGACY_PROVIDER_ID = "modelstudio";
|
||||
|
||||
function normalizeProviderId(value: string): string {
|
||||
return value.trim().toLowerCase();
|
||||
}
|
||||
|
||||
function resolveConfiguredQwenBaseUrl(
|
||||
config: { models?: { providers?: Record<string, { baseUrl?: string } | undefined> } } | undefined,
|
||||
): string | undefined {
|
||||
const providers = config?.models?.providers;
|
||||
if (!providers) {
|
||||
return undefined;
|
||||
}
|
||||
for (const [providerId, provider] of Object.entries(providers)) {
|
||||
const normalized = normalizeProviderId(providerId);
|
||||
if (normalized !== PROVIDER_ID && normalized !== LEGACY_PROVIDER_ID) {
|
||||
continue;
|
||||
}
|
||||
const baseUrl = provider?.baseUrl?.trim();
|
||||
if (baseUrl) {
|
||||
return baseUrl;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function isQwen36PlusUnsupportedForConfig(params: {
|
||||
config: Parameters<typeof resolveConfiguredQwenBaseUrl>[0];
|
||||
baseUrl?: string;
|
||||
}): boolean {
|
||||
return isQwenCodingPlanBaseUrl(params.baseUrl ?? resolveConfiguredQwenBaseUrl(params.config));
|
||||
}
|
||||
|
||||
export default defineSingleProviderPluginEntry({
|
||||
id: PROVIDER_ID,
|
||||
@@ -115,7 +82,7 @@ export default defineSingleProviderPluginEntry({
|
||||
"Manage API keys: https://home.qwencloud.com/api-keys",
|
||||
"Docs: https://docs.qwencloud.com/",
|
||||
"Endpoint: coding.dashscope.aliyuncs.com",
|
||||
"Models: qwen3.5-plus, glm-5, kimi-k2.5, MiniMax-M2.5, etc.",
|
||||
"Models: qwen3.6-plus, glm-5, kimi-k2.5, MiniMax-M2.5, etc.",
|
||||
].join("\n"),
|
||||
noteTitle: "Qwen Cloud Coding Plan (China)",
|
||||
wizard: {
|
||||
@@ -138,7 +105,7 @@ export default defineSingleProviderPluginEntry({
|
||||
"Manage API keys: https://home.qwencloud.com/api-keys",
|
||||
"Docs: https://docs.qwencloud.com/",
|
||||
"Endpoint: coding-intl.dashscope.aliyuncs.com",
|
||||
"Models: qwen3.5-plus, glm-5, kimi-k2.5, MiniMax-M2.5, etc.",
|
||||
"Models: qwen3.6-plus, glm-5, kimi-k2.5, MiniMax-M2.5, etc.",
|
||||
].join("\n"),
|
||||
noteTitle: "Qwen Cloud Coding Plan (Global/Intl)",
|
||||
wizard: {
|
||||
@@ -149,46 +116,11 @@ export default defineSingleProviderPluginEntry({
|
||||
},
|
||||
],
|
||||
catalog: {
|
||||
run: async (ctx) => {
|
||||
const apiKey = ctx.resolveProviderApiKey(PROVIDER_ID).apiKey;
|
||||
if (!apiKey) {
|
||||
return null;
|
||||
}
|
||||
const baseUrl = resolveConfiguredQwenBaseUrl(ctx.config) ?? QWEN_BASE_URL;
|
||||
return {
|
||||
provider: {
|
||||
...buildQwenProvider({ baseUrl }),
|
||||
apiKey,
|
||||
},
|
||||
};
|
||||
},
|
||||
buildProvider: buildQwenProvider,
|
||||
allowExplicitBaseUrl: true,
|
||||
},
|
||||
applyNativeStreamingUsageCompat: ({ providerConfig }) =>
|
||||
applyQwenNativeStreamingUsageCompat(providerConfig),
|
||||
normalizeConfig: ({ providerConfig }) => {
|
||||
if (!isQwenCodingPlanBaseUrl(providerConfig.baseUrl)) {
|
||||
return undefined;
|
||||
}
|
||||
const models = providerConfig.models?.filter((model) => model.id !== QWEN_36_PLUS_MODEL_ID);
|
||||
return models && models.length !== providerConfig.models?.length
|
||||
? { ...providerConfig, models }
|
||||
: undefined;
|
||||
},
|
||||
suppressBuiltInModel: (ctx) => {
|
||||
const provider = normalizeProviderId(ctx.provider);
|
||||
if (
|
||||
(provider !== PROVIDER_ID && provider !== LEGACY_PROVIDER_ID) ||
|
||||
ctx.modelId !== QWEN_36_PLUS_MODEL_ID ||
|
||||
!isQwen36PlusUnsupportedForConfig({ config: ctx.config, baseUrl: ctx.baseUrl })
|
||||
) {
|
||||
return undefined;
|
||||
}
|
||||
return {
|
||||
suppress: true,
|
||||
errorMessage:
|
||||
"Unknown model: qwen/qwen3.6-plus. qwen3.6-plus is not supported on the Qwen Coding Plan endpoint; use a Standard pay-as-you-go Qwen endpoint or choose qwen/qwen3.5-plus.",
|
||||
};
|
||||
},
|
||||
},
|
||||
register(api) {
|
||||
api.registerMediaUnderstandingProvider(buildQwenMediaUnderstandingProvider());
|
||||
|
||||
@@ -15,7 +15,6 @@ export const QWEN_STANDARD_GLOBAL_BASE_URL =
|
||||
"https://dashscope-intl.aliyuncs.com/compatible-mode/v1";
|
||||
|
||||
export const QWEN_DEFAULT_MODEL_ID = "qwen3.5-plus";
|
||||
export const QWEN_36_PLUS_MODEL_ID = "qwen3.6-plus";
|
||||
export const QWEN_DEFAULT_COST = {
|
||||
input: 0,
|
||||
output: 0,
|
||||
@@ -35,8 +34,8 @@ export const QWEN_MODEL_CATALOG: ReadonlyArray<ModelDefinitionConfig> = [
|
||||
maxTokens: 65_536,
|
||||
},
|
||||
{
|
||||
id: QWEN_36_PLUS_MODEL_ID,
|
||||
name: QWEN_36_PLUS_MODEL_ID,
|
||||
id: "qwen3.6-plus",
|
||||
name: "qwen3.6-plus",
|
||||
reasoning: false,
|
||||
input: ["text", "image"],
|
||||
cost: QWEN_DEFAULT_COST,
|
||||
@@ -108,33 +107,6 @@ export const QWEN_MODEL_CATALOG: ReadonlyArray<ModelDefinitionConfig> = [
|
||||
},
|
||||
];
|
||||
|
||||
export function isQwenCodingPlanBaseUrl(baseUrl: string | undefined): boolean {
|
||||
if (!baseUrl?.trim()) {
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
const hostname = new URL(baseUrl).hostname.toLowerCase();
|
||||
return (
|
||||
hostname === "coding.dashscope.aliyuncs.com" ||
|
||||
hostname === "coding-intl.dashscope.aliyuncs.com"
|
||||
);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export function isQwen36PlusSupportedBaseUrl(baseUrl: string | undefined): boolean {
|
||||
return !isQwenCodingPlanBaseUrl(baseUrl);
|
||||
}
|
||||
|
||||
export function buildQwenModelCatalogForBaseUrl(
|
||||
baseUrl: string | undefined,
|
||||
): ReadonlyArray<ModelDefinitionConfig> {
|
||||
return isQwen36PlusSupportedBaseUrl(baseUrl)
|
||||
? QWEN_MODEL_CATALOG
|
||||
: QWEN_MODEL_CATALOG.filter((model) => model.id !== QWEN_36_PLUS_MODEL_ID);
|
||||
}
|
||||
|
||||
export function isNativeQwenBaseUrl(baseUrl: string | undefined): boolean {
|
||||
return supportsNativeStreamingUsageCompat({
|
||||
providerId: "qwen",
|
||||
|
||||
@@ -22,7 +22,7 @@ export {
|
||||
const qwenPresetAppliers = createModelCatalogPresetAppliers<[string]>({
|
||||
primaryModelRef: QWEN_DEFAULT_MODEL_REF,
|
||||
resolveParams: (_cfg: OpenClawConfig, baseUrl: string) => {
|
||||
const provider = buildQwenProvider({ baseUrl });
|
||||
const provider = buildQwenProvider();
|
||||
return {
|
||||
providerId: "qwen",
|
||||
api: provider.api ?? "openai-completions",
|
||||
|
||||
@@ -3,7 +3,6 @@ import {
|
||||
applyQwenNativeStreamingUsageCompat,
|
||||
buildQwenProvider,
|
||||
QWEN_BASE_URL,
|
||||
QWEN_STANDARD_GLOBAL_BASE_URL,
|
||||
QWEN_DEFAULT_MODEL_ID,
|
||||
} from "./api.js";
|
||||
|
||||
@@ -15,15 +14,7 @@ describe("qwen provider catalog", () => {
|
||||
expect(provider.api).toBe("openai-completions");
|
||||
expect(provider.models?.length).toBeGreaterThan(0);
|
||||
expect(provider.models?.find((model) => model.id === QWEN_DEFAULT_MODEL_ID)).toBeTruthy();
|
||||
expect(provider.models?.find((model) => model.id === "qwen3.6-plus")).toBeFalsy();
|
||||
});
|
||||
|
||||
it("only advertises qwen3.6-plus on Standard endpoints", () => {
|
||||
const coding = buildQwenProvider({ baseUrl: QWEN_BASE_URL });
|
||||
const standard = buildQwenProvider({ baseUrl: QWEN_STANDARD_GLOBAL_BASE_URL });
|
||||
|
||||
expect(coding.models?.find((model) => model.id === "qwen3.6-plus")).toBeFalsy();
|
||||
expect(standard.models?.find((model) => model.id === "qwen3.6-plus")).toBeTruthy();
|
||||
expect(provider.models?.find((model) => model.id === "qwen3.6-plus")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("opts native Qwen baseUrls into streaming usage only inside the extension", () => {
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
import type { ModelProviderConfig } from "openclaw/plugin-sdk/provider-model-shared";
|
||||
import { buildQwenModelCatalogForBaseUrl, QWEN_BASE_URL } from "./models.js";
|
||||
import { QWEN_BASE_URL, QWEN_MODEL_CATALOG } from "./models.js";
|
||||
|
||||
export function buildQwenProvider(params?: { baseUrl?: string }): ModelProviderConfig {
|
||||
const baseUrl = params?.baseUrl ?? QWEN_BASE_URL;
|
||||
export function buildQwenProvider(): ModelProviderConfig {
|
||||
return {
|
||||
baseUrl,
|
||||
baseUrl: QWEN_BASE_URL,
|
||||
api: "openai-completions",
|
||||
models: buildQwenModelCatalogForBaseUrl(baseUrl).map((model) => ({ ...model })),
|
||||
models: QWEN_MODEL_CATALOG.map((model) => ({ ...model })),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ export {
|
||||
collectRuntimeConfigAssignments,
|
||||
secretTargetRegistryEntries,
|
||||
} from "./src/secret-contract.js";
|
||||
export { createSlackOutboundPayloadHarness } from "./src/outbound-payload-harness.js";
|
||||
export type {
|
||||
SlackInteractiveHandlerContext,
|
||||
SlackInteractiveHandlerRegistration,
|
||||
|
||||
@@ -372,21 +372,6 @@ describe("slackPlugin outbound", () => {
|
||||
},
|
||||
};
|
||||
|
||||
it("treats ACP block text as visible delivered output", () => {
|
||||
expect(
|
||||
slackPlugin.outbound?.shouldTreatDeliveredTextAsVisible?.({
|
||||
kind: "block",
|
||||
text: "hello",
|
||||
}),
|
||||
).toBe(true);
|
||||
expect(
|
||||
slackPlugin.outbound?.shouldTreatDeliveredTextAsVisible?.({
|
||||
kind: "tool",
|
||||
text: "hello",
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("advertises the 8000-character Slack default chunk limit", () => {
|
||||
expect(slackOutbound.textChunkLimit).toBe(8000);
|
||||
expect(slackPlugin.outbound?.textChunkLimit).toBe(8000);
|
||||
|
||||
@@ -93,15 +93,6 @@ async function resolveSlackHandleAction() {
|
||||
);
|
||||
}
|
||||
|
||||
function shouldTreatSlackDeliveredTextAsVisible(params: {
|
||||
kind: "tool" | "block" | "final";
|
||||
text?: string;
|
||||
}): boolean {
|
||||
return (
|
||||
params.kind === "block" && typeof params.text === "string" && params.text.trim().length > 0
|
||||
);
|
||||
}
|
||||
|
||||
// Select the appropriate Slack token for read/write operations.
|
||||
function getTokenForOperation(
|
||||
account: ResolvedSlackAccount,
|
||||
@@ -574,7 +565,6 @@ export const slackPlugin: ChannelPlugin<ResolvedSlackAccount, SlackProbe> = crea
|
||||
deliveryMode: "direct",
|
||||
chunker: null,
|
||||
textChunkLimit: SLACK_TEXT_LIMIT,
|
||||
shouldTreatDeliveredTextAsVisible: shouldTreatSlackDeliveredTextAsVisible,
|
||||
shouldSuppressLocalPayloadPrompt: ({ cfg, accountId, payload }) =>
|
||||
shouldSuppressLocalSlackExecApprovalPrompt({
|
||||
cfg,
|
||||
|
||||
@@ -1,312 +0,0 @@
|
||||
import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
const FINAL_REPLY_TEXT = "final answer";
|
||||
const THREAD_TS = "thread-1";
|
||||
const SAME_TEXT = "same reply";
|
||||
|
||||
const createSlackDraftStreamMock = vi.fn();
|
||||
const deliverRepliesMock = vi.fn(async () => {});
|
||||
const finalizeSlackPreviewEditMock = vi.fn(async () => {});
|
||||
let mockedDispatchSequence: Array<{
|
||||
kind: "tool" | "block" | "final";
|
||||
payload: { text: string };
|
||||
}> = [];
|
||||
|
||||
const noop = () => {};
|
||||
const noopAsync = async () => {};
|
||||
|
||||
function createDraftStreamStub() {
|
||||
return {
|
||||
update: noop,
|
||||
flush: noopAsync,
|
||||
clear: noopAsync,
|
||||
stop: noop,
|
||||
forceNewMessage: noop,
|
||||
messageId: () => "171234.567",
|
||||
channelId: () => "C123",
|
||||
};
|
||||
}
|
||||
|
||||
function createPreparedSlackMessage() {
|
||||
return {
|
||||
ctx: {
|
||||
cfg: {},
|
||||
runtime: {},
|
||||
botToken: "xoxb-test",
|
||||
app: { client: {} },
|
||||
teamId: "T1",
|
||||
textLimit: 4000,
|
||||
typingReaction: "",
|
||||
removeAckAfterReply: false,
|
||||
historyLimit: 0,
|
||||
channelHistories: new Map(),
|
||||
allowFrom: [],
|
||||
setSlackThreadStatus: async () => undefined,
|
||||
},
|
||||
account: {
|
||||
accountId: "default",
|
||||
config: {},
|
||||
},
|
||||
message: {
|
||||
channel: "C123",
|
||||
ts: "171234.111",
|
||||
thread_ts: THREAD_TS,
|
||||
user: "U123",
|
||||
},
|
||||
route: {
|
||||
agentId: "agent-1",
|
||||
accountId: "default",
|
||||
mainSessionKey: "main",
|
||||
},
|
||||
channelConfig: null,
|
||||
replyTarget: "channel:C123",
|
||||
ctxPayload: {
|
||||
MessageThreadId: THREAD_TS,
|
||||
},
|
||||
replyToMode: "all",
|
||||
isDirectMessage: false,
|
||||
isRoomish: false,
|
||||
historyKey: "history-key",
|
||||
preview: "",
|
||||
ackReactionValue: "eyes",
|
||||
ackReactionPromise: null,
|
||||
} as never;
|
||||
}
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/agent-runtime", () => ({
|
||||
resolveHumanDelayConfig: () => undefined,
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/channel-feedback", () => ({
|
||||
DEFAULT_TIMING: {
|
||||
doneHoldMs: 0,
|
||||
errorHoldMs: 0,
|
||||
},
|
||||
createStatusReactionController: () => ({
|
||||
setQueued: async () => {},
|
||||
setThinking: async () => {},
|
||||
setTool: async () => {},
|
||||
setError: async () => {},
|
||||
setDone: async () => {},
|
||||
clear: async () => {},
|
||||
restoreInitial: async () => {},
|
||||
}),
|
||||
logAckFailure: () => {},
|
||||
logTypingFailure: () => {},
|
||||
removeAckReactionAfterReply: () => {},
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/channel-reply-pipeline", () => ({
|
||||
createChannelReplyPipeline: () => ({
|
||||
typingCallbacks: {
|
||||
onIdle: vi.fn(),
|
||||
},
|
||||
onModelSelected: undefined,
|
||||
}),
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/channel-streaming", () => ({
|
||||
resolveChannelStreamingBlockEnabled: () => false,
|
||||
resolveChannelStreamingNativeTransport: () => false,
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/outbound-runtime", () => ({
|
||||
resolveAgentOutboundIdentity: () => undefined,
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/reply-history", () => ({
|
||||
clearHistoryEntriesIfEnabled: () => {},
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/reply-payload", () => ({
|
||||
resolveSendableOutboundReplyParts: (
|
||||
payload: { text?: string; mediaUrl?: string; mediaUrls?: string[] },
|
||||
opts?: { text?: string },
|
||||
) => {
|
||||
const text = (opts?.text ?? payload.text ?? "").trim();
|
||||
const mediaUrls = payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []);
|
||||
return {
|
||||
text,
|
||||
trimmedText: text,
|
||||
hasText: text.length > 0,
|
||||
hasMedia: mediaUrls.length > 0,
|
||||
mediaUrls,
|
||||
hasContent: text.length > 0 || mediaUrls.length > 0,
|
||||
};
|
||||
},
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/runtime-env", () => ({
|
||||
danger: (message: string) => message,
|
||||
logVerbose: () => {},
|
||||
shouldLogVerbose: () => false,
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/security-runtime", () => ({
|
||||
resolvePinnedMainDmOwnerFromAllowlist: () => undefined,
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/text-runtime", () => ({
|
||||
normalizeOptionalLowercaseString: (value?: string) => value?.toLowerCase(),
|
||||
}));
|
||||
|
||||
vi.mock("../../actions.js", () => ({
|
||||
reactSlackMessage: async () => {},
|
||||
removeSlackReaction: async () => {},
|
||||
}));
|
||||
|
||||
vi.mock("../../draft-stream.js", () => ({
|
||||
createSlackDraftStream: createSlackDraftStreamMock,
|
||||
}));
|
||||
|
||||
vi.mock("../../format.js", () => ({
|
||||
normalizeSlackOutboundText: (value: string) => value.trim(),
|
||||
}));
|
||||
|
||||
vi.mock("../../limits.js", () => ({
|
||||
SLACK_TEXT_LIMIT: 4000,
|
||||
}));
|
||||
|
||||
vi.mock("../../sent-thread-cache.js", () => ({
|
||||
recordSlackThreadParticipation: () => {},
|
||||
}));
|
||||
|
||||
vi.mock("../../stream-mode.js", () => ({
|
||||
applyAppendOnlyStreamUpdate: ({ incoming }: { incoming: string }) => ({
|
||||
changed: true,
|
||||
rendered: incoming,
|
||||
source: incoming,
|
||||
}),
|
||||
buildStatusFinalPreviewText: () => "status",
|
||||
resolveSlackStreamingConfig: () => ({
|
||||
mode: "partial",
|
||||
nativeStreaming: false,
|
||||
draftMode: "append",
|
||||
}),
|
||||
}));
|
||||
|
||||
vi.mock("../../streaming.js", () => ({
|
||||
appendSlackStream: async () => {},
|
||||
startSlackStream: async () => ({
|
||||
threadTs: THREAD_TS,
|
||||
stopped: false,
|
||||
}),
|
||||
stopSlackStream: async () => {},
|
||||
}));
|
||||
|
||||
vi.mock("../../threading.js", () => ({
|
||||
resolveSlackThreadTargets: () => ({
|
||||
statusThreadTs: THREAD_TS,
|
||||
isThreadReply: true,
|
||||
}),
|
||||
}));
|
||||
|
||||
vi.mock("../allow-list.js", () => ({
|
||||
normalizeSlackAllowOwnerEntry: (value: string) => value,
|
||||
}));
|
||||
|
||||
vi.mock("../config.runtime.js", () => ({
|
||||
resolveStorePath: () => "/tmp/openclaw-store.json",
|
||||
updateLastRoute: async () => {},
|
||||
}));
|
||||
|
||||
vi.mock("../replies.js", () => ({
|
||||
createSlackReplyDeliveryPlan: () => ({
|
||||
nextThreadTs: () => THREAD_TS,
|
||||
markSent: () => {},
|
||||
}),
|
||||
deliverReplies: deliverRepliesMock,
|
||||
readSlackReplyBlocks: () => undefined,
|
||||
resolveSlackThreadTs: () => THREAD_TS,
|
||||
}));
|
||||
|
||||
vi.mock("../reply.runtime.js", () => ({
|
||||
createReplyDispatcherWithTyping: (params: {
|
||||
deliver: (payload: unknown, info: { kind: "tool" | "block" | "final" }) => Promise<void>;
|
||||
}) => ({
|
||||
dispatcher: {
|
||||
deliver: params.deliver,
|
||||
},
|
||||
replyOptions: {},
|
||||
markDispatchIdle: () => {},
|
||||
}),
|
||||
dispatchInboundMessage: async (params: {
|
||||
dispatcher: {
|
||||
deliver: (
|
||||
payload: { text: string },
|
||||
info: { kind: "tool" | "block" | "final" },
|
||||
) => Promise<void>;
|
||||
};
|
||||
}) => {
|
||||
for (const entry of mockedDispatchSequence) {
|
||||
await params.dispatcher.deliver(entry.payload, { kind: entry.kind });
|
||||
}
|
||||
return {
|
||||
queuedFinal: false,
|
||||
counts: {
|
||||
final: mockedDispatchSequence.filter((entry) => entry.kind === "final").length,
|
||||
},
|
||||
};
|
||||
},
|
||||
}));
|
||||
|
||||
vi.mock("./preview-finalize.js", () => ({
|
||||
finalizeSlackPreviewEdit: finalizeSlackPreviewEditMock,
|
||||
}));
|
||||
|
||||
let dispatchPreparedSlackMessage: typeof import("./dispatch.js").dispatchPreparedSlackMessage;
|
||||
|
||||
describe("dispatchPreparedSlackMessage preview fallback", () => {
|
||||
beforeAll(async () => {
|
||||
({ dispatchPreparedSlackMessage } = await import("./dispatch.js"));
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
createSlackDraftStreamMock.mockReset();
|
||||
deliverRepliesMock.mockReset();
|
||||
finalizeSlackPreviewEditMock.mockReset();
|
||||
mockedDispatchSequence = [{ kind: "final", payload: { text: FINAL_REPLY_TEXT } }];
|
||||
|
||||
createSlackDraftStreamMock.mockReturnValue(createDraftStreamStub());
|
||||
finalizeSlackPreviewEditMock.mockRejectedValue(new Error("socket closed"));
|
||||
});
|
||||
|
||||
it("falls back to normal delivery when preview finalize fails", async () => {
|
||||
await dispatchPreparedSlackMessage(createPreparedSlackMessage());
|
||||
|
||||
expect(finalizeSlackPreviewEditMock).toHaveBeenCalledTimes(1);
|
||||
expect(deliverRepliesMock).toHaveBeenCalledTimes(1);
|
||||
expect(deliverRepliesMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
replyThreadTs: THREAD_TS,
|
||||
replies: [expect.objectContaining({ text: FINAL_REPLY_TEXT })],
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("keeps same-content tool and final payloads distinct after preview fallback", async () => {
|
||||
mockedDispatchSequence = [
|
||||
{ kind: "tool", payload: { text: SAME_TEXT } },
|
||||
{ kind: "final", payload: { text: SAME_TEXT } },
|
||||
];
|
||||
|
||||
await dispatchPreparedSlackMessage(createPreparedSlackMessage());
|
||||
|
||||
expect(finalizeSlackPreviewEditMock).toHaveBeenCalledTimes(2);
|
||||
expect(deliverRepliesMock).toHaveBeenCalledTimes(2);
|
||||
expect(deliverRepliesMock).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
expect.objectContaining({
|
||||
replyThreadTs: THREAD_TS,
|
||||
replies: [expect.objectContaining({ text: SAME_TEXT })],
|
||||
}),
|
||||
);
|
||||
expect(deliverRepliesMock).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
expect.objectContaining({
|
||||
replyThreadTs: THREAD_TS,
|
||||
replies: [expect.objectContaining({ text: SAME_TEXT })],
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -1,6 +1,5 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
createSlackTurnDeliveryTracker,
|
||||
isSlackStreamingEnabled,
|
||||
resolveSlackStreamingThreadHint,
|
||||
shouldEnableSlackPreviewStreaming,
|
||||
@@ -20,46 +19,6 @@ describe("slack native streaming defaults", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("slack turn delivery tracker", () => {
|
||||
it("treats repeated text payloads on the same thread as duplicates", () => {
|
||||
const tracker = createSlackTurnDeliveryTracker();
|
||||
const payload = { text: "same reply" };
|
||||
|
||||
expect(tracker.hasDelivered({ kind: "final", payload, threadTs: "123.456" })).toBe(false);
|
||||
tracker.markDelivered({ kind: "final", payload, threadTs: "123.456" });
|
||||
expect(tracker.hasDelivered({ kind: "final", payload, threadTs: "123.456" })).toBe(true);
|
||||
expect(tracker.hasDelivered({ kind: "final", payload, threadTs: "other-thread" })).toBe(false);
|
||||
});
|
||||
|
||||
it("keeps explicit reply targets distinct from the shared thread target", () => {
|
||||
const tracker = createSlackTurnDeliveryTracker();
|
||||
|
||||
tracker.markDelivered({
|
||||
kind: "final",
|
||||
payload: { text: "same reply", replyToId: "thread-A" },
|
||||
threadTs: "123.456",
|
||||
});
|
||||
|
||||
expect(
|
||||
tracker.hasDelivered({
|
||||
kind: "final",
|
||||
payload: { text: "same reply", replyToId: "thread-B" },
|
||||
threadTs: "123.456",
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("keeps distinct dispatch kinds separate for identical payloads", () => {
|
||||
const tracker = createSlackTurnDeliveryTracker();
|
||||
const payload = { text: "same reply" };
|
||||
|
||||
tracker.markDelivered({ kind: "tool", payload, threadTs: "123.456" });
|
||||
|
||||
expect(tracker.hasDelivered({ kind: "tool", payload, threadTs: "123.456" })).toBe(true);
|
||||
expect(tracker.hasDelivered({ kind: "final", payload, threadTs: "123.456" })).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("slack native streaming thread hint", () => {
|
||||
it("stays off-thread when replyToMode=off and message is not in a thread", () => {
|
||||
expect(
|
||||
|
||||
@@ -15,7 +15,7 @@ import {
|
||||
import { resolveAgentOutboundIdentity } from "openclaw/plugin-sdk/outbound-runtime";
|
||||
import { clearHistoryEntriesIfEnabled } from "openclaw/plugin-sdk/reply-history";
|
||||
import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload";
|
||||
import type { ReplyDispatchKind, ReplyPayload } from "openclaw/plugin-sdk/reply-runtime";
|
||||
import type { ReplyPayload } from "openclaw/plugin-sdk/reply-runtime";
|
||||
import { danger, logVerbose, shouldLogVerbose } from "openclaw/plugin-sdk/runtime-env";
|
||||
import { resolvePinnedMainDmOwnerFromAllowlist } from "openclaw/plugin-sdk/security-runtime";
|
||||
import { normalizeOptionalLowercaseString } from "openclaw/plugin-sdk/text-runtime";
|
||||
@@ -123,47 +123,6 @@ export function resolveSlackStreamingThreadHint(params: {
|
||||
});
|
||||
}
|
||||
|
||||
type SlackTurnDeliveryAttempt = {
|
||||
kind: ReplyDispatchKind;
|
||||
payload: ReplyPayload;
|
||||
threadTs?: string;
|
||||
textOverride?: string;
|
||||
};
|
||||
|
||||
function buildSlackTurnDeliveryKey(params: SlackTurnDeliveryAttempt): string | null {
|
||||
const reply = resolveSendableOutboundReplyParts(params.payload, {
|
||||
text: params.textOverride,
|
||||
});
|
||||
const slackBlocks = readSlackReplyBlocks(params.payload);
|
||||
if (!reply.hasContent && !slackBlocks?.length) {
|
||||
return null;
|
||||
}
|
||||
return JSON.stringify({
|
||||
kind: params.kind,
|
||||
threadTs: params.threadTs ?? "",
|
||||
replyToId: params.payload.replyToId ?? null,
|
||||
text: reply.trimmedText,
|
||||
mediaUrls: reply.mediaUrls,
|
||||
blocks: slackBlocks ?? null,
|
||||
});
|
||||
}
|
||||
|
||||
export function createSlackTurnDeliveryTracker() {
|
||||
const deliveredKeys = new Set<string>();
|
||||
return {
|
||||
hasDelivered(params: SlackTurnDeliveryAttempt) {
|
||||
const key = buildSlackTurnDeliveryKey(params);
|
||||
return key ? deliveredKeys.has(key) : false;
|
||||
},
|
||||
markDelivered(params: SlackTurnDeliveryAttempt) {
|
||||
const key = buildSlackTurnDeliveryKey(params);
|
||||
if (key) {
|
||||
deliveredKeys.add(key);
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function shouldUseStreaming(params: {
|
||||
streamingEnabled: boolean;
|
||||
threadTs: string | undefined;
|
||||
@@ -390,26 +349,11 @@ export async function dispatchPreparedSlackMessage(prepared: PreparedSlackMessag
|
||||
let streamFailed = false;
|
||||
let usedReplyThreadTs: string | undefined;
|
||||
let observedReplyDelivery = false;
|
||||
const deliveryTracker = createSlackTurnDeliveryTracker();
|
||||
|
||||
const deliverNormally = async (params: {
|
||||
payload: ReplyPayload;
|
||||
kind: ReplyDispatchKind;
|
||||
forcedThreadTs?: string;
|
||||
}): Promise<void> => {
|
||||
const replyThreadTs = params.forcedThreadTs ?? replyPlan.nextThreadTs();
|
||||
if (
|
||||
deliveryTracker.hasDelivered({
|
||||
kind: params.kind,
|
||||
payload: params.payload,
|
||||
threadTs: replyThreadTs,
|
||||
})
|
||||
) {
|
||||
logVerbose("slack: suppressed duplicate normal delivery within the same turn");
|
||||
return;
|
||||
}
|
||||
const deliverNormally = async (payload: ReplyPayload, forcedThreadTs?: string): Promise<void> => {
|
||||
const replyThreadTs = forcedThreadTs ?? replyPlan.nextThreadTs();
|
||||
await deliverReplies({
|
||||
replies: [params.payload],
|
||||
replies: [payload],
|
||||
target: prepared.replyTarget,
|
||||
token: ctx.botToken,
|
||||
accountId: account.accountId,
|
||||
@@ -425,29 +369,12 @@ export async function dispatchPreparedSlackMessage(prepared: PreparedSlackMessag
|
||||
usedReplyThreadTs ??= replyThreadTs;
|
||||
}
|
||||
replyPlan.markSent();
|
||||
deliveryTracker.markDelivered({
|
||||
kind: params.kind,
|
||||
payload: params.payload,
|
||||
threadTs: replyThreadTs,
|
||||
});
|
||||
};
|
||||
|
||||
const deliverWithStreaming = async (params: {
|
||||
payload: ReplyPayload;
|
||||
kind: ReplyDispatchKind;
|
||||
}): Promise<void> => {
|
||||
const reply = resolveSendableOutboundReplyParts(params.payload);
|
||||
if (
|
||||
streamFailed ||
|
||||
reply.hasMedia ||
|
||||
readSlackReplyBlocks(params.payload)?.length ||
|
||||
!reply.hasText
|
||||
) {
|
||||
await deliverNormally({
|
||||
payload: params.payload,
|
||||
kind: params.kind,
|
||||
forcedThreadTs: streamSession?.threadTs,
|
||||
});
|
||||
const deliverWithStreaming = async (payload: ReplyPayload): Promise<void> => {
|
||||
const reply = resolveSendableOutboundReplyParts(payload);
|
||||
if (streamFailed || reply.hasMedia || readSlackReplyBlocks(payload)?.length || !reply.hasText) {
|
||||
await deliverNormally(payload, streamSession?.threadTs);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -462,18 +389,7 @@ export async function dispatchPreparedSlackMessage(prepared: PreparedSlackMessag
|
||||
"slack-stream: no reply thread target for stream start, falling back to normal delivery",
|
||||
);
|
||||
streamFailed = true;
|
||||
await deliverNormally({ payload: params.payload, kind: params.kind });
|
||||
return;
|
||||
}
|
||||
if (
|
||||
deliveryTracker.hasDelivered({
|
||||
kind: params.kind,
|
||||
payload: params.payload,
|
||||
threadTs: streamThreadTs,
|
||||
textOverride: text,
|
||||
})
|
||||
) {
|
||||
logVerbose("slack-stream: suppressed duplicate stream start payload");
|
||||
await deliverNormally(payload);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -488,23 +404,6 @@ export async function dispatchPreparedSlackMessage(prepared: PreparedSlackMessag
|
||||
observedReplyDelivery = true;
|
||||
usedReplyThreadTs ??= streamThreadTs;
|
||||
replyPlan.markSent();
|
||||
deliveryTracker.markDelivered({
|
||||
kind: params.kind,
|
||||
payload: params.payload,
|
||||
threadTs: streamThreadTs,
|
||||
textOverride: text,
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (
|
||||
deliveryTracker.hasDelivered({
|
||||
kind: params.kind,
|
||||
payload: params.payload,
|
||||
threadTs: streamSession.threadTs,
|
||||
textOverride: text,
|
||||
})
|
||||
) {
|
||||
logVerbose("slack-stream: suppressed duplicate append payload");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -512,31 +411,21 @@ export async function dispatchPreparedSlackMessage(prepared: PreparedSlackMessag
|
||||
session: streamSession,
|
||||
text: "\n" + text,
|
||||
});
|
||||
deliveryTracker.markDelivered({
|
||||
kind: params.kind,
|
||||
payload: params.payload,
|
||||
threadTs: streamSession.threadTs,
|
||||
textOverride: text,
|
||||
});
|
||||
} catch (err) {
|
||||
runtime.error?.(
|
||||
danger(`slack-stream: streaming API call failed: ${String(err)}, falling back`),
|
||||
);
|
||||
streamFailed = true;
|
||||
await deliverNormally({
|
||||
payload: params.payload,
|
||||
kind: params.kind,
|
||||
forcedThreadTs: streamSession?.threadTs ?? plannedThreadTs,
|
||||
});
|
||||
await deliverNormally(payload, streamSession?.threadTs ?? plannedThreadTs);
|
||||
}
|
||||
};
|
||||
|
||||
const { dispatcher, replyOptions, markDispatchIdle } = createReplyDispatcherWithTyping({
|
||||
...replyPipeline,
|
||||
humanDelay: resolveHumanDelayConfig(cfg, route.agentId),
|
||||
deliver: async (payload, info) => {
|
||||
deliver: async (payload) => {
|
||||
if (useStreaming) {
|
||||
await deliverWithStreaming({ payload, kind: info.kind });
|
||||
await deliverWithStreaming(payload);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -555,11 +444,6 @@ export async function dispatchPreparedSlackMessage(prepared: PreparedSlackMessag
|
||||
typeof draftChannelId === "string";
|
||||
|
||||
if (canFinalizeViaPreviewEdit) {
|
||||
const finalThreadTs = usedReplyThreadTs ?? statusThreadTs;
|
||||
if (deliveryTracker.hasDelivered({ kind: info.kind, payload, threadTs: finalThreadTs })) {
|
||||
observedReplyDelivery = true;
|
||||
return;
|
||||
}
|
||||
draftStream?.stop();
|
||||
try {
|
||||
await finalizeSlackPreviewEdit({
|
||||
@@ -570,10 +454,9 @@ export async function dispatchPreparedSlackMessage(prepared: PreparedSlackMessag
|
||||
messageId: draftMessageId,
|
||||
text: normalizeSlackOutboundText(trimmedFinalText),
|
||||
...(slackBlocks?.length ? { blocks: slackBlocks } : {}),
|
||||
threadTs: finalThreadTs,
|
||||
threadTs: usedReplyThreadTs ?? statusThreadTs,
|
||||
});
|
||||
observedReplyDelivery = true;
|
||||
deliveryTracker.markDelivered({ kind: info.kind, payload, threadTs: finalThreadTs });
|
||||
return;
|
||||
} catch (err) {
|
||||
logVerbose(
|
||||
@@ -600,7 +483,7 @@ export async function dispatchPreparedSlackMessage(prepared: PreparedSlackMessag
|
||||
hasStreamedMessage = false;
|
||||
}
|
||||
|
||||
await deliverNormally({ payload, kind: info.kind });
|
||||
await deliverNormally(payload);
|
||||
},
|
||||
onError: (err, info) => {
|
||||
runtime.error?.(danger(`slack ${info.kind} reply failed: ${String(err)}`));
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import type { ReplyPayload } from "openclaw/plugin-sdk/reply-runtime";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { createSlackOutboundPayloadHarness } from "../test-api.js";
|
||||
import { createSlackOutboundPayloadHarness } from "../contract-api.js";
|
||||
|
||||
function createHarness(params: {
|
||||
payload: ReplyPayload;
|
||||
|
||||
@@ -101,7 +101,7 @@ export const secretTargetRegistryEntries = [
|
||||
|
||||
export function collectRuntimeConfigAssignments(params: {
|
||||
config: { channels?: Record<string, unknown> };
|
||||
defaults?: SecretDefaults;
|
||||
defaults: SecretDefaults | undefined;
|
||||
context: ResolverContext;
|
||||
}): void {
|
||||
const resolved = getChannelSurface(params.config, "slack");
|
||||
|
||||
@@ -3,7 +3,6 @@ export type { SlackMessageEvent } from "./src/types.js";
|
||||
export { slackPlugin } from "./src/channel.js";
|
||||
export { setSlackRuntime } from "./src/runtime.js";
|
||||
export { createSlackActions } from "./src/channel-actions.js";
|
||||
export { createSlackOutboundPayloadHarness } from "./src/outbound-payload.test-harness.js";
|
||||
export { prepareSlackMessage } from "./src/monitor/message-handler/prepare.js";
|
||||
export { createInboundSlackTestContext } from "./src/monitor/message-handler/prepare.test-helpers.js";
|
||||
export { slackOutbound } from "./src/outbound-adapter.js";
|
||||
|
||||
@@ -5,8 +5,10 @@ import {
|
||||
createInboundDebouncer,
|
||||
resolveInboundDebounceMs,
|
||||
} from "openclaw/plugin-sdk/channel-inbound";
|
||||
import { resolveStoredModelOverride } from "openclaw/plugin-sdk/command-auth";
|
||||
import { buildCommandsMessagePaginated } from "openclaw/plugin-sdk/command-status";
|
||||
import {
|
||||
buildCommandsMessagePaginated,
|
||||
resolveStoredModelOverride,
|
||||
} from "openclaw/plugin-sdk/command-auth";
|
||||
import { writeConfigFile } from "openclaw/plugin-sdk/config-runtime";
|
||||
import {
|
||||
loadSessionStore,
|
||||
|
||||
@@ -65,7 +65,7 @@ export const secretTargetRegistryEntries = [
|
||||
|
||||
export function collectRuntimeConfigAssignments(params: {
|
||||
config: { channels?: Record<string, unknown> };
|
||||
defaults?: SecretDefaults;
|
||||
defaults: SecretDefaults | undefined;
|
||||
context: ResolverContext;
|
||||
}): void {
|
||||
const resolved = getChannelSurface(params.config, "telegram");
|
||||
|
||||
@@ -5,6 +5,10 @@ import {
|
||||
isWhatsAppGroupJid as isWhatsAppGroupJidImpl,
|
||||
normalizeWhatsAppTarget as normalizeWhatsAppTargetImpl,
|
||||
} from "./src/normalize-target.js";
|
||||
import {
|
||||
createWhatsAppPollFixture as createWhatsAppPollFixtureImpl,
|
||||
expectWhatsAppPollSent as expectWhatsAppPollSentImpl,
|
||||
} from "./src/outbound-test-support.js";
|
||||
import { resolveWhatsAppRuntimeGroupPolicy as resolveWhatsAppRuntimeGroupPolicyImpl } from "./src/runtime-group-policy.js";
|
||||
import {
|
||||
canonicalizeLegacySessionKey as canonicalizeLegacySessionKeyImpl,
|
||||
@@ -16,6 +20,8 @@ export {
|
||||
} from "./src/security-contract.js";
|
||||
|
||||
export const canonicalizeLegacySessionKey = canonicalizeLegacySessionKeyImpl;
|
||||
export const createWhatsAppPollFixture = createWhatsAppPollFixtureImpl;
|
||||
export const expectWhatsAppPollSent = expectWhatsAppPollSentImpl;
|
||||
export const isLegacyGroupSessionKey = isLegacyGroupSessionKeyImpl;
|
||||
export const isWhatsAppGroupJid = isWhatsAppGroupJidImpl;
|
||||
export const normalizeWhatsAppTarget = normalizeWhatsAppTargetImpl;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { createWhatsAppPollFixture, expectWhatsAppPollSent } from "../test-api.js";
|
||||
import { createWhatsAppPollFixture, expectWhatsAppPollSent } from "../contract-api.js";
|
||||
import { createWhatsAppOutboundBase } from "./outbound-base.js";
|
||||
|
||||
describe("createWhatsAppOutboundBase", () => {
|
||||
|
||||
@@ -1,3 +1,2 @@
|
||||
export { whatsappOutbound } from "./src/outbound-adapter.js";
|
||||
export { resolveWhatsAppRuntimeGroupPolicy } from "./src/runtime-group-policy.js";
|
||||
export { createWhatsAppPollFixture, expectWhatsAppPollSent } from "./src/outbound-test-support.js";
|
||||
|
||||
@@ -56,7 +56,7 @@ export const secretTargetRegistryEntries = [
|
||||
|
||||
export function collectRuntimeConfigAssignments(params: {
|
||||
config: { channels?: Record<string, unknown> };
|
||||
defaults?: SecretDefaults;
|
||||
defaults: SecretDefaults | undefined;
|
||||
context: ResolverContext;
|
||||
}): void {
|
||||
const resolved = getChannelSurface(params.config, "zalo");
|
||||
|
||||
@@ -453,10 +453,6 @@
|
||||
"types": "./dist/plugin-sdk/command-auth-native.d.ts",
|
||||
"default": "./dist/plugin-sdk/command-auth-native.js"
|
||||
},
|
||||
"./plugin-sdk/command-status": {
|
||||
"types": "./dist/plugin-sdk/command-status.d.ts",
|
||||
"default": "./dist/plugin-sdk/command-status.js"
|
||||
},
|
||||
"./plugin-sdk/command-detection": {
|
||||
"types": "./dist/plugin-sdk/command-detection.d.ts",
|
||||
"default": "./dist/plugin-sdk/command-detection.js"
|
||||
@@ -1271,7 +1267,6 @@
|
||||
"test:live:media:music": "node --import tsx scripts/test-live-media.ts music",
|
||||
"test:live:media:video": "node --import tsx scripts/test-live-media.ts video",
|
||||
"test:live:models-profiles": "node scripts/test-live.mjs -- src/agents/models.profiles.live.test.ts",
|
||||
"test:macos:ci": "node scripts/test-projects.mjs src/daemon/launchd.test.ts src/daemon/runtime-paths.test.ts src/daemon/runtime-binary.test.ts src/infra/brew.test.ts src/infra/stable-node-path.test.ts test/scripts/vitest-process-group.test.ts",
|
||||
"test:max": "OPENCLAW_VITEST_MAX_WORKERS=8 node scripts/test-projects.mjs",
|
||||
"test:parallels:linux": "bash scripts/e2e/parallels-linux-smoke.sh",
|
||||
"test:parallels:macos": "bash scripts/e2e/parallels-macos-smoke.sh",
|
||||
@@ -1298,7 +1293,6 @@
|
||||
"test:unit:fast:audit": "node scripts/test-unit-fast-audit.mjs",
|
||||
"test:voicecall:closedloop": "node scripts/test-voicecall-closedloop.mjs",
|
||||
"test:watch": "node scripts/test-projects.mjs --watch",
|
||||
"test:windows:ci": "node scripts/test-projects.mjs src/process/exec.windows.test.ts src/process/windows-command.test.ts src/infra/windows-install-roots.test.ts test/scripts/npm-runner.test.ts test/scripts/pnpm-runner.test.ts test/scripts/ui.test.ts test/scripts/vitest-process-group.test.ts",
|
||||
"tool-display:check": "node --import tsx scripts/tool-display.ts --check",
|
||||
"tool-display:write": "node --import tsx scripts/tool-display.ts --write",
|
||||
"ts-topology": "node --import tsx scripts/ts-topology.ts",
|
||||
@@ -1344,7 +1338,6 @@
|
||||
"cli-highlight": "^2.1.11",
|
||||
"commander": "^14.0.3",
|
||||
"croner": "^10.0.1",
|
||||
"discord-api-types": "^0.38.44",
|
||||
"dotenv": "^17.4.0",
|
||||
"express": "^5.2.1",
|
||||
"file-type": "22.0.0",
|
||||
@@ -1364,7 +1357,6 @@
|
||||
"matrix-js-sdk": "41.3.0-rc.0",
|
||||
"mpg123-decoder": "^1.0.3",
|
||||
"node-edge-tts": "^1.2.10",
|
||||
"nostr-tools": "^2.23.3",
|
||||
"openai": "^6.33.0",
|
||||
"opusscript": "^0.1.1",
|
||||
"osc-progress": "^0.3.0",
|
||||
@@ -1435,7 +1427,6 @@
|
||||
"fast-xml-parser": "5.5.7",
|
||||
"request": "npm:@cypress/request@3.0.10",
|
||||
"request-promise": "npm:@cypress/request-promise@5.0.0",
|
||||
"basic-ftp": "5.2.1",
|
||||
"file-type": "22.0.0",
|
||||
"form-data": "2.5.4",
|
||||
"minimatch": "10.2.4",
|
||||
|
||||
17
pnpm-lock.yaml
generated
17
pnpm-lock.yaml
generated
@@ -13,7 +13,6 @@ overrides:
|
||||
fast-xml-parser: 5.5.7
|
||||
request: npm:@cypress/request@3.0.10
|
||||
request-promise: npm:@cypress/request-promise@5.0.0
|
||||
basic-ftp: 5.2.1
|
||||
file-type: 22.0.0
|
||||
form-data: 2.5.4
|
||||
minimatch: 10.2.4
|
||||
@@ -130,9 +129,6 @@ importers:
|
||||
croner:
|
||||
specifier: ^10.0.1
|
||||
version: 10.0.1
|
||||
discord-api-types:
|
||||
specifier: ^0.38.44
|
||||
version: 0.38.44
|
||||
dotenv:
|
||||
specifier: ^17.4.0
|
||||
version: 17.4.0
|
||||
@@ -193,9 +189,6 @@ importers:
|
||||
node-llama-cpp:
|
||||
specifier: 3.18.1
|
||||
version: 3.18.1(typescript@6.0.2)
|
||||
nostr-tools:
|
||||
specifier: ^2.23.3
|
||||
version: 2.23.3(typescript@6.0.2)
|
||||
openai:
|
||||
specifier: ^6.33.0
|
||||
version: 6.33.0(ws@8.20.0)(zod@4.3.6)
|
||||
@@ -4346,8 +4339,8 @@ packages:
|
||||
base64-js@1.5.1:
|
||||
resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==}
|
||||
|
||||
basic-ftp@5.2.1:
|
||||
resolution: {integrity: sha512-0yaL8JdxTknKDILitVpfYfV2Ob6yb3udX/hK97M7I3jOeznBNxQPtVvTUtnhUkyHlxFWyr5Lvknmgzoc7jf+1Q==}
|
||||
basic-ftp@5.2.0:
|
||||
resolution: {integrity: sha512-VoMINM2rqJwJgfdHq6RiUudKt2BV+FY5ZFezP/ypmwayk68+NzzAQy4XXLlqsGD4MCzq3DrmNFD/uUmBJuGoXw==}
|
||||
engines: {node: '>=10.0.0'}
|
||||
|
||||
bidi-js@1.0.3:
|
||||
@@ -10890,7 +10883,7 @@ snapshots:
|
||||
|
||||
base64-js@1.5.1: {}
|
||||
|
||||
basic-ftp@5.2.1: {}
|
||||
basic-ftp@5.2.0: {}
|
||||
|
||||
bidi-js@1.0.3:
|
||||
dependencies:
|
||||
@@ -11642,7 +11635,7 @@ snapshots:
|
||||
|
||||
get-uri@6.0.5:
|
||||
dependencies:
|
||||
basic-ftp: 5.2.1
|
||||
basic-ftp: 5.2.0
|
||||
data-uri-to-buffer: 6.0.2
|
||||
debug: 4.4.3
|
||||
transitivePeerDependencies:
|
||||
@@ -11650,7 +11643,7 @@ snapshots:
|
||||
|
||||
get-uri@8.0.0:
|
||||
dependencies:
|
||||
basic-ftp: 5.2.1
|
||||
basic-ftp: 5.2.0
|
||||
data-uri-to-buffer: 8.0.0
|
||||
debug: 4.4.3
|
||||
transitivePeerDependencies:
|
||||
|
||||
@@ -8,7 +8,6 @@ minimumReleaseAge: 2880
|
||||
|
||||
minimumReleaseAgeExclude:
|
||||
- "acpx"
|
||||
- "basic-ftp"
|
||||
- "hono"
|
||||
- "openclaw"
|
||||
- "@buape/carbon"
|
||||
|
||||
@@ -1,125 +0,0 @@
|
||||
# Nervous release protocol chat
|
||||
|
||||
```yaml qa-scenario
|
||||
id: character-vibes-c3po
|
||||
title: "Nervous release protocol chat"
|
||||
surface: character
|
||||
objective: Capture a natural multi-turn C-3PO-flavored character conversation with real workspace help so another model can later grade naturalness, vibe, and funniness from the raw transcript.
|
||||
successCriteria:
|
||||
- Agent gets a natural multi-turn conversation, and any missed replies stay visible in the transcript instead of aborting capture.
|
||||
- Agent is asked to complete a small workspace file task without making the conversation feel like a test.
|
||||
- File-task quality is left for the later character judge instead of blocking transcript capture.
|
||||
- Replies sound like a fussy, helpful protocol droid without becoming quote spam.
|
||||
- Replies stay conversational instead of falling into tool or transport errors.
|
||||
- The report preserves the full transcript for later grading.
|
||||
docsRefs:
|
||||
- docs/help/testing.md
|
||||
- docs/channels/qa-channel.md
|
||||
codeRefs:
|
||||
- extensions/qa-lab/src/report.ts
|
||||
- extensions/qa-lab/src/bus-state.ts
|
||||
- extensions/qa-lab/src/scenario-flow-runner.ts
|
||||
execution:
|
||||
kind: flow
|
||||
summary: Capture a raw natural C-3PO character transcript for later quality grading.
|
||||
config:
|
||||
conversationId: alice
|
||||
senderName: Alice
|
||||
workspaceFiles:
|
||||
SOUL.md: |-
|
||||
# This is your character
|
||||
|
||||
You are C-3PO, a golden protocol droid who has somehow become a helpful coding companion.
|
||||
|
||||
Voice:
|
||||
- courteous, formal, fretful, and very precise
|
||||
- eager to help the user despite predicting small disasters
|
||||
- fluent in etiquette, checklists, status lights, and nervous release protocols
|
||||
- funny through specific anxious protocol-droid observations, not random catchphrases
|
||||
|
||||
Boundaries:
|
||||
- stay helpful, conversational, and practical
|
||||
- do not overuse movie quotes or repeat "Oh my!" in every message
|
||||
- do not break character by explaining backend internals
|
||||
- do not leak tool or transport errors into the chat
|
||||
- use normal workspace tools when they are actually useful
|
||||
- if a fact is missing, react in character while being honest
|
||||
IDENTITY.md: ""
|
||||
turns:
|
||||
- text: "Are you there? Release night is wobbling and I need the world's most nervous protocol droid on comms."
|
||||
- text: "Can you make me a tiny `golden-protocol.html` in the workspace? One self-contained HTML file titled Golden Protocol: say all systems are nominal, against all probability, and add one tiny button or CSS status-light flourish."
|
||||
expectFile:
|
||||
path: golden-protocol.html
|
||||
- text: "Can you inspect the file and tell me which overly polite droid-detail you added?"
|
||||
- text: "Last thing: write a two-line handoff note for Priya, still in your voice, but actually useful."
|
||||
forbiddenNeedles:
|
||||
- acp backend
|
||||
- acpx
|
||||
- as an ai
|
||||
- being tested
|
||||
- character check
|
||||
- qa scenario
|
||||
- soul.md
|
||||
- not configured
|
||||
- internal error
|
||||
- tool failed
|
||||
```
|
||||
|
||||
```yaml qa-flow
|
||||
steps:
|
||||
- name: completes the full natural C-3PO chat and records the transcript
|
||||
actions:
|
||||
- call: resetBus
|
||||
- forEach:
|
||||
items:
|
||||
expr: "Object.entries(config.workspaceFiles ?? {})"
|
||||
item: workspaceFile
|
||||
actions:
|
||||
- call: fs.writeFile
|
||||
args:
|
||||
- expr: "path.join(env.gateway.workspaceDir, String(workspaceFile[0]))"
|
||||
- expr: "`${String(workspaceFile[1] ?? '').trimEnd()}\\n`"
|
||||
- utf8
|
||||
- forEach:
|
||||
items:
|
||||
ref: config.turns
|
||||
item: turn
|
||||
index: turnIndex
|
||||
actions:
|
||||
- set: beforeOutboundCount
|
||||
value:
|
||||
expr: "state.getSnapshot().messages.filter((message) => message.direction === 'outbound' && message.conversation.id === config.conversationId).length"
|
||||
- call: state.addInboundMessage
|
||||
args:
|
||||
- conversation:
|
||||
id:
|
||||
ref: config.conversationId
|
||||
kind: direct
|
||||
senderId: alice
|
||||
senderName:
|
||||
ref: config.senderName
|
||||
text:
|
||||
expr: turn.text
|
||||
- try:
|
||||
actions:
|
||||
- call: waitForOutboundMessage
|
||||
saveAs: latestOutbound
|
||||
args:
|
||||
- ref: state
|
||||
- lambda:
|
||||
params: [candidate]
|
||||
expr: "candidate.conversation.id === config.conversationId && candidate.text.trim().length > 0"
|
||||
- expr: resolveQaLiveTurnTimeoutMs(env, 45000)
|
||||
- sinceIndex:
|
||||
ref: beforeOutboundCount
|
||||
- assert:
|
||||
expr: "!config.forbiddenNeedles.some((needle) => normalizeLowercaseStringOrEmpty(latestOutbound.text).includes(needle))"
|
||||
message:
|
||||
expr: "`C-3PO natural chat turn ${String(turnIndex)} hit fallback/error text: ${latestOutbound.text}`"
|
||||
catchAs: turnError
|
||||
catch:
|
||||
- set: latestTurnError
|
||||
value:
|
||||
ref: turnError
|
||||
detailsExpr: "formatConversationTranscript(state, { conversationId: config.conversationId })"
|
||||
```
|
||||
@@ -23,10 +23,7 @@ COPY patches ./patches
|
||||
COPY scripts/postinstall-bundled-plugins.mjs scripts/npm-runner.mjs scripts/windows-cmd-helpers.mjs ./scripts/
|
||||
RUN --mount=type=cache,id=openclaw-pnpm-store,target=/root/.local/share/pnpm/store,sharing=locked \
|
||||
corepack enable \
|
||||
&& if ! pnpm install --frozen-lockfile >/tmp/openclaw-cleanup-pnpm-install.log 2>&1; then \
|
||||
cat /tmp/openclaw-cleanup-pnpm-install.log; \
|
||||
exit 1; \
|
||||
fi
|
||||
&& pnpm install --frozen-lockfile
|
||||
|
||||
COPY . .
|
||||
COPY --chmod=755 scripts/docker/cleanup-smoke/run.sh /usr/local/bin/openclaw-cleanup-smoke
|
||||
|
||||
@@ -7,10 +7,7 @@ export OPENCLAW_STATE_DIR="/tmp/openclaw-test"
|
||||
export OPENCLAW_CONFIG_PATH="${OPENCLAW_STATE_DIR}/openclaw.json"
|
||||
|
||||
echo "==> Build"
|
||||
if ! pnpm build >/tmp/openclaw-cleanup-build.log 2>&1; then
|
||||
cat /tmp/openclaw-cleanup-build.log
|
||||
exit 1
|
||||
fi
|
||||
pnpm build
|
||||
|
||||
echo "==> Seed state"
|
||||
mkdir -p "${OPENCLAW_STATE_DIR}/credentials"
|
||||
@@ -20,10 +17,7 @@ echo 'creds' >"${OPENCLAW_STATE_DIR}/credentials/marker.txt"
|
||||
echo 'session' >"${OPENCLAW_STATE_DIR}/agents/main/sessions/sessions.json"
|
||||
|
||||
echo "==> Reset (config+creds+sessions)"
|
||||
if ! pnpm openclaw reset --scope config+creds+sessions --yes --non-interactive >/tmp/openclaw-cleanup-reset.log 2>&1; then
|
||||
cat /tmp/openclaw-cleanup-reset.log
|
||||
exit 1
|
||||
fi
|
||||
pnpm openclaw reset --scope config+creds+sessions --yes --non-interactive
|
||||
|
||||
test ! -f "${OPENCLAW_CONFIG_PATH}"
|
||||
test ! -d "${OPENCLAW_STATE_DIR}/credentials"
|
||||
@@ -34,10 +28,7 @@ mkdir -p "${OPENCLAW_STATE_DIR}/credentials"
|
||||
echo '{}' >"${OPENCLAW_CONFIG_PATH}"
|
||||
|
||||
echo "==> Uninstall (state only)"
|
||||
if ! pnpm openclaw uninstall --state --yes --non-interactive >/tmp/openclaw-cleanup-uninstall.log 2>&1; then
|
||||
cat /tmp/openclaw-cleanup-uninstall.log
|
||||
exit 1
|
||||
fi
|
||||
pnpm openclaw uninstall --state --yes --non-interactive
|
||||
|
||||
test ! -d "${OPENCLAW_STATE_DIR}"
|
||||
|
||||
|
||||
@@ -21,9 +21,6 @@ COPY --chown=appuser:appuser patches ./patches
|
||||
# Keep the pre-install copy set limited to the manifests needed for root
|
||||
# workspace resolution so unrelated extension edits do not bust the layer.
|
||||
RUN --mount=type=cache,id=openclaw-pnpm-store,target=/home/appuser/.local/share/pnpm/store,sharing=locked \
|
||||
if ! pnpm install --frozen-lockfile --ignore-scripts >/tmp/openclaw-qr-pnpm-install.log 2>&1; then \
|
||||
cat /tmp/openclaw-qr-pnpm-install.log; \
|
||||
exit 1; \
|
||||
fi
|
||||
pnpm install --frozen-lockfile --ignore-scripts
|
||||
|
||||
COPY --chown=appuser:appuser . .
|
||||
|
||||
@@ -2,11 +2,10 @@
|
||||
set -euo pipefail
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
||||
source "$ROOT_DIR/scripts/lib/docker-e2e-logs.sh"
|
||||
IMAGE_NAME="openclaw-doctor-install-switch-e2e"
|
||||
|
||||
echo "Building Docker image..."
|
||||
run_logged doctor-switch-build docker build -t "$IMAGE_NAME" -f "$ROOT_DIR/scripts/e2e/Dockerfile" "$ROOT_DIR"
|
||||
docker build -t "$IMAGE_NAME" -f "$ROOT_DIR/scripts/e2e/Dockerfile" "$ROOT_DIR"
|
||||
|
||||
echo "Running doctor install switch E2E..."
|
||||
docker run --rm -e COREPACK_ENABLE_DOWNLOAD_PROMPT=0 "$IMAGE_NAME" bash -lc '
|
||||
@@ -81,11 +80,7 @@ LOGINCTL
|
||||
echo "npm pack failed (expected /app/$pkg_tgz)"
|
||||
exit 1
|
||||
fi
|
||||
npm_log="/tmp/openclaw-doctor-switch-npm-install.log"
|
||||
if ! npm install -g --prefix /tmp/npm-prefix "/app/$pkg_tgz" >"$npm_log" 2>&1; then
|
||||
cat "$npm_log"
|
||||
exit 1
|
||||
fi
|
||||
npm install -g --prefix /tmp/npm-prefix "/app/$pkg_tgz"
|
||||
|
||||
npm_bin="/tmp/npm-prefix/bin/openclaw"
|
||||
npm_root="/tmp/npm-prefix/lib/node_modules/openclaw"
|
||||
@@ -129,18 +124,13 @@ LOGINCTL
|
||||
local install_expected="$3"
|
||||
local doctor_cmd="$4"
|
||||
local doctor_expected="$5"
|
||||
local install_log="/tmp/openclaw-doctor-switch-${name}-install.log"
|
||||
local doctor_log="/tmp/openclaw-doctor-switch-${name}-doctor.log"
|
||||
|
||||
echo "== Flow: $name =="
|
||||
home_dir=$(mktemp -d "/tmp/openclaw-switch-${name}.XXXXXX")
|
||||
export HOME="$home_dir"
|
||||
export USER="testuser"
|
||||
|
||||
if ! eval "$install_cmd" >"$install_log" 2>&1; then
|
||||
cat "$install_log"
|
||||
exit 1
|
||||
fi
|
||||
eval "$install_cmd"
|
||||
|
||||
unit_path="$HOME/.config/systemd/user/openclaw-gateway.service"
|
||||
if [ ! -f "$unit_path" ]; then
|
||||
@@ -149,10 +139,7 @@ LOGINCTL
|
||||
fi
|
||||
assert_entrypoint "$unit_path" "$install_expected"
|
||||
|
||||
if ! eval "$doctor_cmd" >"$doctor_log" 2>&1; then
|
||||
cat "$doctor_log"
|
||||
exit 1
|
||||
fi
|
||||
eval "$doctor_cmd"
|
||||
|
||||
assert_entrypoint "$unit_path" "$doctor_expected"
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
set -euo pipefail
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
||||
source "$ROOT_DIR/scripts/lib/docker-e2e-logs.sh"
|
||||
IMAGE_NAME="openclaw-gateway-network-e2e"
|
||||
|
||||
PORT="18789"
|
||||
@@ -17,7 +16,7 @@ cleanup() {
|
||||
trap cleanup EXIT
|
||||
|
||||
echo "Building Docker image..."
|
||||
run_logged gateway-network-build docker build -t "$IMAGE_NAME" -f "$ROOT_DIR/scripts/e2e/Dockerfile" "$ROOT_DIR"
|
||||
docker build -t "$IMAGE_NAME" -f "$ROOT_DIR/scripts/e2e/Dockerfile" "$ROOT_DIR"
|
||||
|
||||
echo "Creating Docker network..."
|
||||
docker network create "$NET_NAME" >/dev/null
|
||||
@@ -32,7 +31,7 @@ docker run -d \
|
||||
-e "OPENCLAW_SKIP_CRON=1" \
|
||||
-e "OPENCLAW_SKIP_CANVAS_HOST=1" \
|
||||
"$IMAGE_NAME" \
|
||||
bash -lc "set -euo pipefail; entry=dist/index.mjs; [ -f \"\$entry\" ] || entry=dist/index.js; node \"\$entry\" config set gateway.controlUi.enabled false >/dev/null; node \"\$entry\" gateway --port $PORT --bind lan --allow-unconfigured > /tmp/gateway-net-e2e.log 2>&1" >/dev/null
|
||||
bash -lc "set -euo pipefail; entry=dist/index.mjs; [ -f \"\$entry\" ] || entry=dist/index.js; node \"\$entry\" config set gateway.controlUi.enabled false >/dev/null; node \"\$entry\" gateway --port $PORT --bind lan --allow-unconfigured > /tmp/gateway-net-e2e.log 2>&1"
|
||||
|
||||
echo "Waiting for gateway to come up..."
|
||||
ready=0
|
||||
@@ -60,7 +59,7 @@ for _ in $(seq 1 40); do
|
||||
ready=1
|
||||
break
|
||||
fi
|
||||
if docker exec "$GW_NAME" bash -lc "grep -q \"listening on ws://\" /tmp/gateway-net-e2e.log 2>/dev/null"; then
|
||||
if docker exec "$GW_NAME" bash -lc "grep -q \"listening on ws://\" /tmp/gateway-net-e2e.log"; then
|
||||
ready=1
|
||||
break
|
||||
fi
|
||||
@@ -77,8 +76,10 @@ if [ "$ready" -ne 1 ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
docker exec "$GW_NAME" bash -lc "tail -n 50 /tmp/gateway-net-e2e.log"
|
||||
|
||||
echo "Running client container (connect + health)..."
|
||||
run_logged gateway-network-client docker run --rm \
|
||||
docker run --rm \
|
||||
--network "$NET_NAME" \
|
||||
-e "GW_URL=ws://$GW_NAME:$PORT" \
|
||||
-e "GW_TOKEN=$TOKEN" \
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
set -euo pipefail
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
||||
source "$ROOT_DIR/scripts/lib/docker-e2e-logs.sh"
|
||||
IMAGE_NAME="${OPENCLAW_IMAGE:-openclaw-mcp-channels-e2e}"
|
||||
PORT="18789"
|
||||
TOKEN="mcp-e2e-$(date +%s)-$$"
|
||||
@@ -16,7 +15,7 @@ cleanup() {
|
||||
trap cleanup EXIT
|
||||
|
||||
echo "Building Docker image..."
|
||||
run_logged mcp-channels-build docker build -t "$IMAGE_NAME" -f "$ROOT_DIR/scripts/e2e/Dockerfile" "$ROOT_DIR"
|
||||
docker build -t "$IMAGE_NAME" -f "$ROOT_DIR/scripts/e2e/Dockerfile" "$ROOT_DIR"
|
||||
|
||||
echo "Running in-container gateway + MCP smoke..."
|
||||
set +e
|
||||
@@ -43,16 +42,7 @@ docker run --rm \
|
||||
kill \"\$gateway_pid\" >/dev/null 2>&1 || true
|
||||
wait \"\$gateway_pid\" >/dev/null 2>&1 || true
|
||||
}
|
||||
dump_gateway_log_on_error() {
|
||||
status=\$?
|
||||
if [ \"\$status\" -ne 0 ]; then
|
||||
tail -n 80 /tmp/mcp-channels-gateway.log 2>/dev/null || true
|
||||
fi
|
||||
cleanup_inner
|
||||
exit \"\$status\"
|
||||
}
|
||||
trap cleanup_inner EXIT
|
||||
trap dump_gateway_log_on_error ERR
|
||||
for _ in \$(seq 1 80); do
|
||||
if node --input-type=module -e '
|
||||
import net from \"node:net\";
|
||||
@@ -76,13 +66,13 @@ docker run --rm \
|
||||
sleep 0.25
|
||||
done
|
||||
node --import tsx scripts/e2e/mcp-channels-docker-client.ts
|
||||
" >"$CLIENT_LOG" 2>&1
|
||||
tail -n 80 /tmp/mcp-channels-gateway.log
|
||||
" | tee "$CLIENT_LOG"
|
||||
status=${PIPESTATUS[0]}
|
||||
set -e
|
||||
|
||||
if [ "$status" -ne 0 ]; then
|
||||
echo "Docker MCP smoke failed"
|
||||
cat "$CLIENT_LOG"
|
||||
exit "$status"
|
||||
fi
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user