Compare commits

..

450 Commits

Author SHA1 Message Date
Agustin Rivera
765208ce47 fix(agents): forward all RunClaudeCliAgent params to runCliAgent 2026-04-08 22:34:55 +00:00
Agustin Rivera
9bd8911615 fix(matrix): preserve owner context in local dispatch 2026-04-08 22:34:55 +00:00
Gustavo Madeira Santana
21b8d35e2d fix(agents): harden claude cli wrapper 2026-04-08 22:34:55 +00:00
Gustavo Madeira Santana
cd8fc2f915 fix(cli): preserve owner auth for message actions 2026-04-08 22:34:55 +00:00
Gustavo Madeira Santana
2b4bebb72f fix(gateway): preserve owner auth over bundle MCP 2026-04-08 22:34:55 +00:00
Gustavo Madeira Santana
cc798ce0ef refactor(agents): dedupe message action discovery params 2026-04-08 22:34:55 +00:00
Gustavo Madeira Santana
4e4b6b7a19 test(agents): cover embedded owner discovery context 2026-04-08 22:34:55 +00:00
Gustavo Madeira Santana
019b7797e1 fix(matrix): gate embedded profile hints for non-owner runs 2026-04-08 22:34:55 +00:00
Peter Steinberger
097883282d test: move directive state coverage to pure tests 2026-04-08 22:34:55 +00:00
Agustin Rivera
eb461f25c6 fix(browser): re-check interaction-driven navigations (#63226)
* fix(browser): guard interaction-driven navigations

* fix(browser): avoid rechecking unchanged interaction urls

* fix(browser): guard delayed interaction navigations

* fix(browser): guard interaction-driven navigations for full action duration

* fix(browser): avoid waiting on interaction grace timer

* fix(browser): ignore same-document hash-only URL changes in navigation guard

* fix(browser): dedupe interaction nav guards

* fix(browser): guard same-URL reloads in interaction navigation listeners

* docs(changelog): add interaction navigation guard entry

* fix(browser): drop duplicate ssrfPolicy props

* fix(browser): tighten interaction navigation guards

---------

Co-authored-by: Devin Robison <drobison@nvidia.com>
2026-04-08 22:34:55 +00:00
Peter Steinberger
1e0f0e5444 test: reuse verbose directive reply imports 2026-04-08 22:34:55 +00:00
Peter Steinberger
4319f07afa test: reuse exec directive reply imports 2026-04-08 22:34:55 +00:00
Agustin Rivera
ea6226bf49 fix(browser): harden browser control override loading (#62663)
* fix(browser): harden browser control overrides

* fix(lint): prepare boundary artifacts for extension oxlint

* docs(changelog): add browser override hardening entry

* fix(lint): avoid duplicate boundary prep

---------

Co-authored-by: Devin Robison <drobison@nvidia.com>
Co-authored-by: Devin Robison <drobison00@users.noreply.github.com>
2026-04-08 22:34:55 +00:00
Gustavo Madeira Santana
36aa4f69fb Matrix: report startup failures as errors 2026-04-08 22:34:55 +00:00
Peter Steinberger
96398871d9 auth: persist explicit profile upserts directly 2026-04-08 22:34:55 +00:00
Peter Steinberger
6fbfb36184 test(doctor): mock memory-core runtime seam 2026-04-08 22:34:55 +00:00
Peter Steinberger
9f5b179f7f auth: avoid external cli sync on profile upsert 2026-04-08 22:34:55 +00:00
Peter Steinberger
9d7793ee2e feat: parallelize character eval runs 2026-04-08 22:34:55 +00:00
Peter Steinberger
405a088d60 fix: load QA live provider overrides 2026-04-08 22:34:55 +00:00
Peter Steinberger
00bee7eb5e build: stage nostr runtime dependencies 2026-04-08 22:34:55 +00:00
Agustin Rivera
7637061feb fix(dotenv): block workspace runtime env vars (#62660)
* fix(dotenv): block workspace runtime env vars

Co-authored-by: zsx <git@zsxsoft.com>

* docs(changelog): add workspace dotenv runtime-control entry

* fix(dotenv): block workspace gateway port override

---------

Co-authored-by: zsx <git@zsxsoft.com>
Co-authored-by: Devin Robison <drobison@nvidia.com>
2026-04-08 22:34:55 +00:00
Peter Steinberger
380bff9d13 build: narrow plugin SDK declaration build 2026-04-08 22:34:55 +00:00
Peter Steinberger
ece183233d test: harden Parallels macOS smoke fallback 2026-04-08 22:34:55 +00:00
Peter Steinberger
84d626aba7 fix(memory): accept embedded dreaming heartbeat tokens 2026-04-08 22:34:55 +00:00
Peter Steinberger
082de8f294 test: harden provider mock isolation 2026-04-08 22:34:55 +00:00
Gustavo Madeira Santana
c676d1f636 docs(config): tighten wording in reference 2026-04-08 22:34:55 +00:00
Peter Steinberger
86fabe02b4 test: reuse followup runner imports 2026-04-08 22:34:55 +00:00
Peter Steinberger
3008137c8d test: reuse image generate tool imports 2026-04-08 22:34:55 +00:00
Agustin Rivera
a67fbc6a98 Align remote node exec event system messages with untrusted handling (#62659)
* fix(nodes): downgrade remote exec system events

* docs(changelog): add remote node exec event entry

---------

Co-authored-by: Devin Robison <drobison@nvidia.com>
2026-04-08 22:34:55 +00:00
Gustavo Madeira Santana
c265e3a96b fix(matrix): contain sync outage failures (#62779)
Merged via squash.

Prepared head SHA: 901bb767b5
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras
2026-04-08 22:34:55 +00:00
Peter Steinberger
f578654e14 test: stabilize full-suite execution 2026-04-08 22:34:55 +00:00
github-actions[bot]
659e0d3a2f chore(ui): refresh id control ui locale 2026-04-08 22:34:55 +00:00
github-actions[bot]
b27916cbce chore(ui): refresh pl control ui locale 2026-04-08 22:34:55 +00:00
github-actions[bot]
1455fd0b02 chore(ui): refresh uk control ui locale 2026-04-08 22:34:54 +00:00
github-actions[bot]
ade0f06426 chore(ui): refresh tr control ui locale 2026-04-08 22:34:54 +00:00
Gustavo Madeira Santana
daa9af6bdf docs(matrix): tighten setup and config guidance 2026-04-08 22:34:54 +00:00
github-actions[bot]
fc53ab3e87 chore(ui): refresh fr control ui locale 2026-04-08 22:34:54 +00:00
github-actions[bot]
7a39107f6e chore(ui): refresh ja-JP control ui locale 2026-04-08 22:34:54 +00:00
github-actions[bot]
c0000bed96 chore(ui): refresh ko control ui locale 2026-04-08 22:34:54 +00:00
github-actions[bot]
a35d98def0 chore(ui): refresh es control ui locale 2026-04-08 22:34:54 +00:00
github-actions[bot]
3dff2f08ad chore(ui): refresh de control ui locale 2026-04-08 22:34:54 +00:00
github-actions[bot]
01099af7e7 chore(ui): refresh pt-BR control ui locale 2026-04-08 22:34:54 +00:00
github-actions[bot]
3b851c4366 chore(ui): refresh zh-CN control ui locale 2026-04-08 22:34:54 +00:00
github-actions[bot]
cc4b8e8e79 chore(ui): refresh zh-TW control ui locale 2026-04-08 22:34:54 +00:00
Mariano
4998dc8dd3 feat(ui): add dreaming diary controls and navigation (#63298)
Merged via squash.

Prepared head SHA: 0a2ae66913
Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com>
Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com>
Reviewed-by: @mbelinky
2026-04-08 22:34:54 +00:00
Mariano
bea33a6122 feat(memory): harden grounded REM extraction (#63297)
Merged via squash.

Prepared head SHA: e188b7e26d
Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com>
Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com>
Reviewed-by: @mbelinky
2026-04-08 22:34:54 +00:00
Mariano
ff827bdf04 feat(memory): add grounded REM backfill lane (#63273)
Merged via squash.

Prepared head SHA: 4450f25485
Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com>
Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com>
Reviewed-by: @mbelinky
2026-04-08 22:34:54 +00:00
Peter Steinberger
ef534fbda9 feat(plugins): support provider auth aliases 2026-04-08 22:34:54 +00:00
Peter Steinberger
1bd92102bb test: isolate provider runtime test mocks 2026-04-08 22:34:54 +00:00
Pavan Kumar Gondhi
0014eeedad fix(plugins): prevent untrusted workspace plugins from hijacking bundled provider auth choices [AI] (#62368)
* fix: address issue

* fix: address review feedback

* docs(changelog): add onboarding auth-choice guard entry

* fix: address PR review feedback

* fix: address PR review feedback

* fix: address PR review feedback

* fix: address PR review feedback

* fix: address PR review feedback

* fix: address PR review feedback

* fix: address PR review feedback

* fix: address PR review feedback

---------

Co-authored-by: Devin Robison <drobison@nvidia.com>
2026-04-08 22:34:54 +00:00
Peter Steinberger
a0e62103d8 fix: pass system prompt to codex cli 2026-04-08 22:34:54 +00:00
Peter Steinberger
2fe1590196 fix: patch hono security advisories 2026-04-08 22:34:54 +00:00
Peter Steinberger
acd42ba736 test: isolate volcengine byteplus auth resolver imports 2026-04-08 22:34:54 +00:00
Peter Steinberger
30e35f7c29 test: stabilize ci test isolation 2026-04-08 22:34:54 +00:00
Frank Yang
e516b14df4 fix(gateway): clear auto-fallback model override on session reset (#63155)
* fix(gateway): clear auto-fallback model override on session reset

When `persistFallbackCandidateSelection()` writes a fallback provider
override with `authProfileOverrideSource: "auto"`, the override was
incorrectly preserved across `/reset` and `/new` commands. This caused
sessions to keep using the fallback provider even after the user changed
the agent config primary provider, because the session store override
takes precedence over the config default.

Now the override fields (`providerOverride`, `modelOverride`,
`authProfileOverride`, `authProfileOverrideSource`,
`authProfileOverrideCompactionCount`) are only carried forward when
`authProfileOverrideSource === "user"` (i.e. explicit `/model` command).
System-driven overrides are dropped on reset so the session picks up the
current config default.

Introduced in cb0a752156 ("fix: preserve reset session behavior config")

* fix(gateway): preserve explicit reset model selection

* fix(gateway): track reset model override source

* fix(gateway): preserve legacy reset model overrides

* docs(changelog): add session reset merge note

---------

Co-authored-by: termtek <termtek@ubuntu.tail2b72cd.ts.net>
2026-04-08 22:34:54 +00:00
Frank Yang
122c925acd fix(auto-reply): strip leading NO_REPLY tokens to prevent silent-reply leak (#63068)
* fix(auto-reply): strip leading NO_REPLY tokens to prevent silent-reply leak

* fix(auto-reply): preserve substantive NO_REPLY leading text

* fix(agents): preserve ACP silent-prefix cumulative deltas

* fix(auto-reply): harden silent-token streaming paths

* fix(auto-reply): normalize glued silent tokens consistently

---------

Co-authored-by: termtek <termtek@ubuntu.tail2b72cd.ts.net>
2026-04-08 22:34:54 +00:00
Ayaan Zaidi
a60a087454 fix: restore android qr pairing flow (#63199) 2026-04-08 22:34:54 +00:00
Ayaan Zaidi
80744c1c35 fix(android): prefer stored device auth after pairing 2026-04-08 22:34:54 +00:00
Ayaan Zaidi
8813b4ac8a fix(android): tighten pairing retry behavior 2026-04-08 22:34:54 +00:00
Ayaan Zaidi
3207ff2ed7 fix(android): reset auth on new setup codes 2026-04-08 22:34:54 +00:00
Ayaan Zaidi
833854aecb fix(android): prefer bootstrap auth on qr pairing 2026-04-08 22:34:54 +00:00
Ayaan Zaidi
167f722769 fix(android): auto-resume pairing approval 2026-04-08 22:34:54 +00:00
Peter Steinberger
d46f52d70e test: keep media runtime tests on same-directory provider mocks 2026-04-08 22:34:54 +00:00
Peter Steinberger
c19e23a96e test: keep pi fs workspace tests on fs tool factories 2026-04-08 22:34:54 +00:00
Peter Steinberger
8b750ad1a7 feat: add character eval model options 2026-04-08 22:34:54 +00:00
Peter Steinberger
95bf2a8e36 test: make character eval scenario natural 2026-04-08 22:34:54 +00:00
Mariano
c93233b4b1 Reply: surface OAuth reauth failures (#63217)
Merged via squash.

Prepared head SHA: 68b7ffd59e
Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com>
Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com>
Reviewed-by: @mbelinky
2026-04-08 22:34:54 +00:00
Peter Steinberger
36f316cde0 test: explain gateway exec fixture trust 2026-04-08 22:34:54 +00:00
Peter Steinberger
544f8fc400 fix: keep runtime task test harness behind task seams 2026-04-08 22:34:54 +00:00
Peter Steinberger
07cced29ad test: trust gateway exec fixture node path 2026-04-08 22:34:54 +00:00
Ayaan Zaidi
8c0250dd06 fix(build): keep tsdown prune best-effort 2026-04-08 22:34:54 +00:00
Peter Steinberger
a1f27e524c test: keep bundled web-search owner checks on public artifacts 2026-04-08 22:34:54 +00:00
Peter Steinberger
4bb5d24047 docs: reorder changelog entries 2026-04-08 22:34:54 +00:00
Peter Steinberger
392c5d8ede fix(plugin-sdk): export channel plugin base 2026-04-08 22:34:54 +00:00
Peter Steinberger
59f8b9412a test: keep chutes implicit provider tests on provider catalog 2026-04-08 22:34:54 +00:00
Ayaan Zaidi
dc2b88f720 fix(build): honor postinstall disable flag 2026-04-08 22:34:54 +00:00
Ayaan Zaidi
b8a1070665 fix(build): address bundled plugin prune review 2026-04-08 22:34:54 +00:00
Ayaan Zaidi
9cde5b895d fix(build): prune stale bundled plugin node_modules 2026-04-08 22:34:54 +00:00
Peter Steinberger
2238735830 test: keep kimi implicit provider tests on provider catalog 2026-04-08 22:34:54 +00:00
Peter Steinberger
9dbbccac43 fix: default OpenAI reasoning effort to high 2026-04-08 22:34:54 +00:00
Peter Steinberger
137aafe04e test: keep model reasoning override coverage on merge helpers 2026-04-08 22:34:54 +00:00
Peter Steinberger
0f14e2a4de test: keep pdf and update-plan registration tests pure 2026-04-08 22:34:54 +00:00
Peter Steinberger
637eaa31e9 fix: keep minimax provider mocks package-local 2026-04-08 22:34:54 +00:00
Peter Steinberger
f2a7a4b4b9 refactor: share html entity tool call decoding 2026-04-08 22:34:54 +00:00
Peter Steinberger
da50d92c14 refactor: dedupe embedding provider test fixtures 2026-04-08 22:34:54 +00:00
Peter Steinberger
22bd9ca11f refactor: dedupe agent command test fixtures 2026-04-08 22:34:54 +00:00
Peter Steinberger
a2de84da2a refactor: dedupe doctor codex oauth tests 2026-04-08 22:34:54 +00:00
Peter Steinberger
4cda0a2743 refactor: dedupe telegram exec approval tests 2026-04-08 22:34:54 +00:00
Peter Steinberger
6dce35db03 refactor: dedupe matrix exec approval tests 2026-04-08 22:34:54 +00:00
Peter Steinberger
7fb8af543f refactor: dedupe approval runtime tests 2026-04-08 22:34:54 +00:00
Peter Steinberger
a208cb293e refactor: dedupe exec defaults tests 2026-04-08 22:34:54 +00:00
Peter Steinberger
193d32db02 refactor: dedupe firecrawl and directive helpers 2026-04-08 22:34:54 +00:00
Peter Steinberger
cd27bc26b0 refactor: dedupe plugin metadata test helpers 2026-04-08 22:34:54 +00:00
Peter Steinberger
ae2a4a5392 refactor: dedupe media runtime test mocks 2026-04-08 22:34:54 +00:00
Peter Steinberger
f6efb80fcf refactor: dedupe plugin test harnesses 2026-04-08 22:34:53 +00:00
Peter Steinberger
4761902b1b refactor: dedupe test helpers and script warning filter 2026-04-08 22:34:53 +00:00
Peter Steinberger
76ceb30539 refactor: dedupe config and subagent tests 2026-04-08 22:34:53 +00:00
Peter Steinberger
03a7e0151d refactor: dedupe browser navigation guard tests 2026-04-08 22:34:53 +00:00
Peter Steinberger
58448f9f89 refactor: dedupe shared helper branches 2026-04-08 22:34:53 +00:00
Peter Steinberger
4d5e3eb796 refactor: dedupe internal helper glue 2026-04-08 22:34:53 +00:00
Peter Steinberger
e11d071602 refactor: dedupe media generation tool helpers 2026-04-08 22:34:53 +00:00
Peter Steinberger
bbb2734d47 docs: document QA character eval workflow 2026-04-08 22:34:53 +00:00
Peter Steinberger
69d3b95d34 feat: add QA character eval reports 2026-04-08 22:34:53 +00:00
Peter Steinberger
c88d7bc30d fix: support Codex CLI QA auth 2026-04-08 22:34:53 +00:00
Peter Steinberger
655ab95dd6 test: keep openclaw tools registration policy pure 2026-04-08 22:34:53 +00:00
Peter Steinberger
455deb5841 ci: isolate full suite leaf shards 2026-04-08 22:34:53 +00:00
Peter Steinberger
efeba38df1 fix: harden bundled plugin dependency release checks 2026-04-08 22:34:53 +00:00
Eric Curtin
be4f327324 docs(inferrs): fix Gemma model id from gg-hf-gg to google (#62586) 2026-04-08 22:34:53 +00:00
Peter Steinberger
a02d50ede9 test: keep bundled metadata sidecar scan inventory-only 2026-04-08 22:34:53 +00:00
Peter Steinberger
3204d902b3 test: keep openclaw tools registration tests on a fast shell 2026-04-08 22:34:53 +00:00
Peter Steinberger
33ae2c4db7 test: keep public artifact coverage on cheap boundaries 2026-04-08 22:34:53 +00:00
Peter Steinberger
0a8ff8f3ce ci: restore sequential full suite tests 2026-04-08 22:34:53 +00:00
Peter Steinberger
55a18686cb test: keep kilocode provider tests on plugin-owned helpers 2026-04-08 22:34:53 +00:00
Peter Steinberger
f4fc4f7b1c test: keep web provider artifact test in boundary 2026-04-08 22:34:53 +00:00
Peter Steinberger
04e10e233b test: keep shared dm policy contract off channel facades 2026-04-08 22:34:53 +00:00
Peter Steinberger
9610a94d05 test: exercise models json file mode without provider discovery 2026-04-08 22:34:53 +00:00
Peter Steinberger
36a4009739 fix: align LLM idle timeout policy 2026-04-08 22:34:53 +00:00
Peter Steinberger
0157625a89 test: fix full suite CI test isolation 2026-04-08 22:34:53 +00:00
Tyler Warburton
802ee1ab12 fix: allow blank TLS manual port default (#63134) (thanks @Tyler-RNG)
* make port optional for TLS manual connections

* fix: restrict manual blank-port fallback to tls

* fix: allow blank TLS manual port default (#63134) (thanks @Tyler-RNG)

---------

Co-authored-by: Ayaan Zaidi <hi@obviy.us>
2026-04-08 22:34:53 +00:00
Peter Steinberger
88a08a0006 test: restore manifest-only web provider coverage 2026-04-08 22:34:53 +00:00
Peter Steinberger
5f17671a3e channels: fast-path direct model override matches 2026-04-08 22:34:53 +00:00
Peter Steinberger
79fd5e9a11 status: avoid plugin lookup for direct channel model overrides 2026-04-08 22:34:53 +00:00
Peter Steinberger
e718f4eb8a test: keep status message tests off auth auto-detection 2026-04-08 22:34:53 +00:00
Peter Steinberger
62b3adea8d test: keep web provider artifact test in boundary 2026-04-08 22:34:53 +00:00
Peter Steinberger
37975fe02b test: keep provider policy artifact coverage narrow 2026-04-08 22:34:53 +00:00
Peter Steinberger
568848008b test: keep web provider artifact coverage manifest-only 2026-04-08 22:34:53 +00:00
Peter Steinberger
18a98e03c8 test: keep discord and irc entry smokes descriptor-only 2026-04-08 22:34:53 +00:00
Peter Steinberger
cf2be8319f test: avoid bundled test api smokes in matrix and telegram 2026-04-08 22:34:53 +00:00
Peter Steinberger
c84444680e ci: reduce full suite test parallelism 2026-04-08 22:34:53 +00:00
Peter Steinberger
e540a7cd21 test: keep bundled channel entry smokes descriptor-only 2026-04-08 22:34:53 +00:00
Peter Steinberger
c77faa7369 test: guard loader fixtures against broad sdk imports 2026-04-08 22:34:53 +00:00
Peter Steinberger
e63fad1627 ci: split parallel full suite into leaf shards 2026-04-08 22:34:53 +00:00
Peter Steinberger
a700dcd84a test: keep followup runner memory mock complete 2026-04-08 22:34:53 +00:00
Peter Steinberger
e2749ebf02 test: isolate discord directory live token env 2026-04-08 22:34:53 +00:00
Peter Steinberger
7492a1232d ci: skip duplicate full extension shard 2026-04-08 22:34:53 +00:00
Peter Steinberger
6878c8c5e6 test: inline cli metadata channel fixture 2026-04-08 22:34:53 +00:00
Peter Steinberger
5d41a61009 plugins: read contract inventory from manifests 2026-04-08 22:34:53 +00:00
Peter Steinberger
76e9d18503 auto-reply: type status auth overrides 2026-04-08 22:34:53 +00:00
Peter Steinberger
3bdb4e81ee test: keep status tests off live usage probes 2026-04-08 22:34:53 +00:00
Peter Steinberger
9d97945a04 test: fix postpublish verifier sidecar handling 2026-04-08 22:34:53 +00:00
Peter Steinberger
686896a22d test: skip duplicate package boundary wrapper in ci 2026-04-08 22:34:53 +00:00
Peter Steinberger
25782f10d7 test: isolate agent gateway cli command mocks 2026-04-08 22:34:53 +00:00
Peter Steinberger
948dab86bf test: stabilize plugin boundary invariants 2026-04-08 22:34:53 +00:00
Peter Steinberger
f7e71efd7a feat: add qa character vibes eval 2026-04-08 22:34:53 +00:00
Nimrod Gutman
a9e1c38146 revert: undo background alive review findings fix 2026-04-08 22:34:53 +00:00
Peter Steinberger
df12e51788 fix(test): keep warn log capture under openclaw temp dir 2026-04-08 22:34:53 +00:00
scoootscooob
44c7c894e7 release: mirror bundled channel deps at root (#63065)
Merged via squash.

Prepared head SHA: ac26799a54
Co-authored-by: scoootscooob <167050519+scoootscooob@users.noreply.github.com>
Co-authored-by: scoootscooob <167050519+scoootscooob@users.noreply.github.com>
Reviewed-by: @scoootscooob
2026-04-08 22:34:53 +00:00
Peter Steinberger
c4cea95e2a refactor: finish markdown-only qa runner 2026-04-08 22:34:53 +00:00
Vicky
6798af3df3 fix: classify Z.ai error codes 1311 (billing) and 1113 (auth) (#49552)
Merged via squash.

Prepared head SHA: 3e7b8bb260
Co-authored-by: 1bcMax <195689928+1bcMax@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
2026-04-08 22:34:53 +00:00
Peter Steinberger
4a2f0bb05a fix(qqbot): parse entity encoded self-closing media tags 2026-04-08 22:34:53 +00:00
Peter Steinberger
ed87354850 fix(qqbot): allow URL slashes in media tag attributes 2026-04-08 22:34:53 +00:00
Peter Steinberger
b348c066a2 test: harden release gate flakes 2026-04-08 22:34:53 +00:00
Peter Steinberger
c6ab5c0ea3 test: stabilize release gate drift 2026-04-08 22:34:53 +00:00
Peter Steinberger
e1a58b8a77 fix: keep installer doctor non-interactive 2026-04-08 22:34:53 +00:00
Nimrod Gutman
892ae8245e fix: resolve background alive beacon review findings 2026-04-08 22:34:53 +00:00
Peter Steinberger
c8df6e35c0 test: stabilize model warning sanitizer checks 2026-04-08 22:34:53 +00:00
Peter Steinberger
c453a50900 test: keep agent policy tests off broad tool construction 2026-04-08 22:34:53 +00:00
游乐场
e8e2a49f86 fix(qqbot): support HTML entities in media tags (&lt; &gt;) (#60493)
* fix(qqbot): 支持媒体标签中的 HTML 实体(&lt; &gt;)

* fix(qqbot): support HTML entities in media tags

* test(qqbot): add unit tests for media tag regex with HTML entities

* test(qqbot): export regex constants to enable unit tests

* fix(qqbot): reset regex lastIndex in tests to avoid state pollution

* test(qqbot): add .js extension to import in media-tags.test.ts

* fix(qqbot): support HTML entities in media tags (#60493) (thanks @ylc0919)

---------

Co-authored-by: sliverp <870080352@qq.com>
2026-04-08 22:34:53 +00:00
Peter Steinberger
5fa96a350b test: stub image provider discovery in generation tool tests 2026-04-08 22:34:53 +00:00
Peter Steinberger
5127453584 test: dedupe msteams authz fixtures 2026-04-08 22:34:52 +00:00
Peter Steinberger
f6d4b0e50e fix(test): align current main verification fixtures 2026-04-08 22:34:52 +00:00
Peter Steinberger
959876f3d9 fix(test): refresh plugin-sdk package boundary exports 2026-04-08 22:34:52 +00:00
Vincent Koc
329c5e8fbe perf(plugins): trim explicit web provider artifact imports 2026-04-08 22:34:52 +00:00
Vincent Koc
dc4bf70ddf perf(plugins): prefer require for source public artifacts 2026-04-08 22:34:52 +00:00
Vincent Koc
003eb51432 perf(plugin-sdk): narrow account-id export seam 2026-04-08 22:34:52 +00:00
Peter Steinberger
e7bca5e254 fix: export web search config contract from plugin sdk package 2026-04-08 22:34:52 +00:00
Vincent Koc
f8675563de perf(secrets): lazy-load web-tools manifest owner lookup 2026-04-08 22:34:52 +00:00
Peter Steinberger
afb1d24855 fix: keep bundled dir test argv mutable 2026-04-08 22:34:52 +00:00
Peter Steinberger
884e4dbe73 fix: resolve post-rebase boundary drift 2026-04-08 22:34:52 +00:00
Peter Steinberger
8ff4d2e720 fix: keep minimax test helper package-local 2026-04-08 22:34:52 +00:00
Peter Steinberger
c6e4801c3d style: apply formatter output 2026-04-08 22:34:52 +00:00
Peter Steinberger
d5cb85cc8f refactor: dedupe repeated test helpers 2026-04-08 22:34:52 +00:00
Vincent Koc
3a8030afdc perf(plugin-sdk): split web-search contract fields 2026-04-08 22:34:52 +00:00
Vincent Koc
5e5caeacbc fix(plugins): prefer source bundled tree in tsx runs 2026-04-08 22:34:52 +00:00
Peter Steinberger
ebf8009245 test: keep provider auth onboarding tests off runtime auth 2026-04-08 22:34:52 +00:00
Vincent Koc
b14bf19c63 ci(test): fan out windows test lane 2026-04-08 22:34:52 +00:00
Vincent Koc
27d9455c03 ci(test): raise checks-node-test fanout 2026-04-08 22:34:52 +00:00
scoootscooob
0229c587bb Control UI: guard stale session history reloads (#62975)
* Control UI: guard stale session history reloads

* control-ui: guard stale session history reloads

* control-ui: refresh avatar on session switch

* Control UI: refresh and guard chat avatars on session switch
2026-04-08 22:34:52 +00:00
Vincent Koc
192ae58612 ci(test): parallelize checks-node-test 2026-04-08 22:34:52 +00:00
Mariano
3190577e95 fix(reply): use runtime snapshot for queued reply runs (#62693)
Merged via squash.

Prepared head SHA: 2a3e4e5c60
Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com>
Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com>
Reviewed-by: @mbelinky
2026-04-08 22:34:52 +00:00
Nimrod Gutman
c02ceaa501 feat(ios): pin calver release versioning (#63001)
* feat(ios): decouple app versioning from gateway

* feat(ios): pin calver release versioning

* refactor(ios): drop prerelease version helper fields

* docs(changelog): note pinned ios release versioning (#63001) (thanks @ngutman)
2026-04-08 22:34:52 +00:00
Peter Steinberger
4a00db2da8 test: keep tool-policy tests below coding tool construction 2026-04-08 22:34:52 +00:00
Peter Steinberger
810e9b93c8 refactor: move qa suite logic into scenario markdown 2026-04-08 22:34:52 +00:00
Vincent Koc
b2ef706b0b fix(test): stabilize windows tooling assertions 2026-04-08 22:34:52 +00:00
Peter Steinberger
fbde3f73c3 test: cover model-list forward compat below command runtime 2026-04-08 22:34:52 +00:00
Vincent Koc
04d3f789fa test(plugin-sdk): satisfy tool payload carrier typing 2026-04-08 22:34:52 +00:00
Vincent Koc
4d6590c4b7 refactor(plugin-sdk): share tool payload extraction 2026-04-08 22:34:52 +00:00
Vincent Koc
54bcd9f721 refactor(plugins): reuse canonical media contract registries 2026-04-08 22:34:52 +00:00
Vincent Koc
52b453ca26 refactor(plugin-sdk): share web-search contract fields 2026-04-08 22:34:52 +00:00
Vincent Koc
0377c1ce6f refactor(agents): share media status action helpers 2026-04-08 22:34:52 +00:00
Vincent Koc
3cc7cd0abc refactor(agents): share media background task lifecycle 2026-04-08 22:34:52 +00:00
Vincent Koc
f447de3c34 refactor(plugins): reuse interactive registry state 2026-04-08 22:34:52 +00:00
Vincent Koc
dda9d3bebf refactor(doctor): share channel compat helpers 2026-04-08 22:34:52 +00:00
Vincent Koc
f1040d6239 test(plugins): refresh telegram runtime api guardrail 2026-04-08 22:34:52 +00:00
Vincent Koc
309724db30 perf(plugin-sdk): split web search config contract 2026-04-08 22:34:52 +00:00
Peter Steinberger
f2b59f01f5 test: cover multi-agent tool policy below tool construction 2026-04-08 22:34:52 +00:00
Peter Steinberger
f10632a4c1 test: keep media-understanding defaults tests on tiny registry 2026-04-08 22:34:52 +00:00
Vincent Koc
470c618054 perf(plugins): narrow boundary compile import surfaces 2026-04-08 22:34:52 +00:00
Vincent Koc
4c1cef8091 perf(plugins): trim channel boundary core imports 2026-04-08 22:34:52 +00:00
Vincent Koc
67d8d1a108 perf(plugins): narrow boundary compile sdk imports 2026-04-08 22:34:52 +00:00
Vincent Koc
788744963d perf(plugins): report slow boundary compiles 2026-04-08 22:34:52 +00:00
Vincent Koc
bc05a0cf57 perf(config): trim web search config helper imports 2026-04-08 22:34:52 +00:00
Peter Steinberger
8261d1dc14 test: use stubbed OpenClaw tools in agent config tool suite 2026-04-08 22:34:52 +00:00
Peter Steinberger
83c27e33a7 test: mock web search provider discovery in onboard setup tests 2026-04-08 22:34:52 +00:00
Peter Steinberger
c827427a9f test: keep models list auth sync off real discovery 2026-04-08 22:34:52 +00:00
Peter Steinberger
307f176145 fix: stabilize live qa scenario suite 2026-04-08 22:34:52 +00:00
Vincent Koc
eea4cbb644 fix(slack): preserve auth on same-origin media redirects (#62996) (thanks @vincentkoc)
- Verified: pnpm build\n- Verified: pnpm test extensions/slack/src/monitor/media.test.ts\n- Verified: pnpm exec oxlint extensions/slack/src/monitor/media.ts extensions/slack/src/monitor/media.test.ts\n- Verified: pnpm exec oxfmt --check extensions/slack/src/monitor/media.ts extensions/slack/src/monitor/media.test.ts CHANGELOG.md\n\nRepo-wide pnpm lint and pnpm test were not clean on current main outside this fix, and the first full-suite test attempt from the default core sparse profile was additionally contaminated by missing ui/packages/OpenClawKit paths until they were materialized.
2026-04-08 22:34:52 +00:00
Peter Steinberger
7c9c77c264 chore: prepare 2026.4.9 release 2026-04-08 22:34:52 +00:00
Vincent Koc
9fd6fcc993 perf(secrets): fast-path exact bundled web providers 2026-04-08 22:34:52 +00:00
Nyanako
2037f2ced0 test(plugin-sdk): cover packaged telegram setup sidecars (#62990) 2026-04-08 22:34:52 +00:00
Vincent Koc
3db49affee perf(secrets): cache web search risk lookup 2026-04-08 22:34:52 +00:00
Peter Steinberger
697015178d test: remove gpt 4.1 install e2e fallbacks 2026-04-08 22:34:52 +00:00
Vincent Koc
7125272700 docs: cover 2026.4.7 changelog gaps 2026-04-08 22:34:52 +00:00
Peter Steinberger
bc7600792a test: isolate subagent resume persistence registry path 2026-04-08 22:34:52 +00:00
Peter Steinberger
024b94d874 fix: unblock windows update build 2026-04-08 22:34:52 +00:00
Vincent Koc
9a0b3899e1 perf(telegram): trim secret contract text import 2026-04-08 22:34:51 +00:00
Peter Steinberger
b7a7c77d63 build: update appcast for 2026.4.8 2026-04-08 22:34:51 +00:00
Peter Steinberger
e690358613 test: harden Docker install e2e agent lane 2026-04-08 22:34:51 +00:00
Peter Steinberger
d43f86b339 test: keep Discord payload contracts off broad test api 2026-04-08 22:34:51 +00:00
Vincent Koc
8cc658f45a perf(matrix): trim secret env-var import path 2026-04-08 22:34:51 +00:00
Vincent Koc
fa91211932 test(extensions): fix bundled lint regressions 2026-04-08 22:34:51 +00:00
Peter Steinberger
451acb607a test: load narrow Discord inbound context harness 2026-04-08 22:34:51 +00:00
Peter Steinberger
5b4f1ce0e1 test: isolate video media runner auth from main profile store 2026-04-08 22:34:51 +00:00
Peter Steinberger
fe1eb6ea8a test: share gateway server for chat history RPC suite 2026-04-08 22:34:51 +00:00
Peter Steinberger
41699ee85b test: fold config apply RPC cases into config gateway suite 2026-04-08 22:34:51 +00:00
Peter Steinberger
0757efc4ea test: share gateway server for talk config RPC tests 2026-04-08 22:34:51 +00:00
Peter Steinberger
7cefba303a test: share gateway harness for session message event tests 2026-04-08 22:34:51 +00:00
Peter Steinberger
836c1b4978 test: fold OpenAI message channel check into shared HTTP suite 2026-04-08 22:34:51 +00:00
Peter Steinberger
c3ef2c53fa test: keep model pricing cache tests off provider runtime 2026-04-08 22:34:51 +00:00
Peter Steinberger
0eccb327b2 test: avoid reconnect waits in node wake unit tests 2026-04-08 22:34:51 +00:00
Peter Steinberger
eb41468beb test: route gateway HTTP history and startup wiring to e2e 2026-04-08 22:34:51 +00:00
Peter Steinberger
ea8722a05b chore: sync 2026.4.8 config docs baseline 2026-04-08 22:34:51 +00:00
Peter Steinberger
8d4c029147 test: fold talk provider override coverage into runtime suite 2026-04-08 22:34:51 +00:00
Gustavo Madeira Santana
93c040c832 Docs: refresh schema, slash commands, and TTS refs 2026-04-08 22:34:51 +00:00
Peter Steinberger
9a24e017d8 test: mock talk synthesis at gateway boundary 2026-04-08 22:34:51 +00:00
Peter Steinberger
c959098a6d chore: prepare 2026.4.8 npm release 2026-04-08 22:34:51 +00:00
Peter Steinberger
9930e67c26 test: move openai talk override coverage to provider lane 2026-04-08 22:34:51 +00:00
Peter Steinberger
58f403d493 test: smoke packed bundled channel entries 2026-04-08 22:34:51 +00:00
Gustavo Madeira Santana
9da9a180f6 Slack: clarify native streaming config hint 2026-04-08 22:34:51 +00:00
Gustavo Madeira Santana
6c7fcbb20b Docs: clarify Slack streaming thread behavior
Clarify the canonical Slack streaming config keys and legacy migration notes
across the Slack docs and shared streaming concept docs.

Document that native Slack streaming and assistant thread status require a
reply thread, and call out the top-level DM fallback behavior.
2026-04-08 22:34:51 +00:00
Peter Steinberger
8277dc7f61 test: move gateway e2e fixture out of unit lane 2026-04-08 22:34:51 +00:00
Peter Steinberger
64b3d17100 fix: pass resolved Slack download tokens (#62097) (thanks @martingarramon) 2026-04-08 22:34:51 +00:00
Martin Garramon
357d7058c0 fix(slack): forward resolved botToken to downloadSlackFile
Closes #62088

When `buildActionOpts` returns undefined (default account, no token
override), `downloadSlackFile` calls `resolveToken(undefined, undefined)`
which re-reads raw config via `loadConfig()`. If botToken is a SecretRef
object, `normalizeResolvedSecretInputString` rejects it because it
expects a string — the download silently fails.

This injects the already-resolved botToken from the gateway runtime
snapshot into the download opts as a fallback, bypassing the raw config
re-read. Same root cause as the Discord fix in b51214ec3e.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 22:34:51 +00:00
Maxime Grenu
122d870049 fix(net): skip DNS pinning before trusted env proxy dispatch 2026-04-08 22:34:51 +00:00
Peter Steinberger
d5207bac8a fix: honor Slack Socket Mode env proxies (#62878) (thanks @mjamiv) 2026-04-08 22:34:51 +00:00
Michael Martello
e782428e97 fix: handle leading-dot NO_PROXY entries matching apex domain
`.slack.com` in NO_PROXY should match both `slack.com` (apex) and
`wss-primary.slack.com` (subdomain). Strip the leading dot before
comparison so the suffix check works for both cases.
2026-04-08 22:34:51 +00:00
Michael Martello
1bf2381bc8 fix: address review — honor NO_PROXY, guard malformed URLs
- Check NO_PROXY/no_proxy before creating HttpsProxyAgent; skip proxy
  when slack.com matches an exclusion entry (exact, suffix, or wildcard).
- Wrap HttpsProxyAgent construction in try/catch so malformed proxy URLs
  degrade to direct connectivity instead of crashing Slack channel init.
- Extract resolveProxyUrlFromEnv and isHostExcludedByNoProxy as testable
  helpers.
- Add tests for NO_PROXY exclusion, wildcard, unrelated hosts, and
  malformed URL resilience.
2026-04-08 22:34:51 +00:00
Michael Martello
9a85874f8f fix(slack): honor HTTPS_PROXY for Socket Mode WebSocket connections
When HTTPS_PROXY or HTTP_PROXY env vars are set, create an
HttpsProxyAgent and pass it as the `agent` option through
@slack/bolt → @slack/socket-mode → ws, so the WebSocket upgrade
request is tunneled through the proxy.

This fixes Slack Socket Mode in environments where all outbound
traffic must go through an HTTP CONNECT proxy (e.g. sandboxed
containers, corporate networks). Previously the ws library opened
a direct connection to wss-primary.slack.com, ignoring proxy env
vars entirely.

The approach mirrors the existing Discord gateway proxy support
(extensions/discord/src/monitor/gateway-plugin.ts) which uses the
same https-proxy-agent library.

Fixes #57405
2026-04-08 22:34:51 +00:00
Peter Steinberger
a5f32d3a1a refactor: split qa scenarios into per-file markdown defs 2026-04-08 22:34:51 +00:00
Peter Steinberger
b6afe5461f test: add opt-in leaf project scheduler 2026-04-08 22:34:51 +00:00
Peter Steinberger
392dd095a2 test: stabilize provider auth alias test imports 2026-04-08 22:34:51 +00:00
Peter Steinberger
e5a09c379e test: avoid duplicating plugin contract lane 2026-04-08 22:34:51 +00:00
Peter Steinberger
01f8871799 revert: remove bundled channel fallback masking 2026-04-08 22:34:51 +00:00
Tak Hoffman
6fc1f608c8 add bundled channel prepack smoke 2026-04-08 22:34:51 +00:00
Peter Steinberger
59a75e8a40 chore: prepare 2026.4.7-1 npm release 2026-04-08 22:34:51 +00:00
Peter Steinberger
3fc19fbb67 test: guard bundled channel sidecar specifiers 2026-04-08 22:34:51 +00:00
Tak Hoffman
2846d3f673 fix bundled channel entry fallback resolution 2026-04-08 22:34:51 +00:00
Peter Steinberger
60b8d5a835 fix: repair bundled channel secret sidecars 2026-04-08 22:34:51 +00:00
Peter Steinberger
81e0336dfa fix: repair Telegram setup package entry 2026-04-08 22:34:51 +00:00
Peter Steinberger
b2719d2ab8 fix: compact update_plan tool result 2026-04-08 22:34:51 +00:00
Peter Steinberger
2a1cc53fcc fix: align exec default reporting with runtime 2026-04-08 22:34:51 +00:00
Peter Steinberger
f0d13917f8 fix: align Z.AI endpoint detection with GLM-5.1 default (#61998) (thanks @serg0x) 2026-04-08 22:34:51 +00:00
Serg
33360b9c72 fix(zai): update stale glm-5 ref in docs/cli/onboard.md 2026-04-08 22:34:51 +00:00
Serg
1ee073df03 fix(zai): default to GLM-5.1 instead of GLM-5 2026-04-08 22:34:51 +00:00
Peter Steinberger
762480a9e5 chore: prepare 2026.4.8 2026-04-08 22:34:51 +00:00
Peter Steinberger
0499e446d9 chore: update appcast for 2026.4.7 2026-04-08 22:34:50 +00:00
Ayaan Zaidi
282e9d6910 fix: keep runtime model lookup on configured workspace 2026-04-08 22:34:50 +00:00
Peter Steinberger
f951bd89ef docs: add memory wiki docs 2026-04-08 22:34:50 +00:00
Peter Steinberger
f544e366a1 ci: prepare extension lint artifacts 2026-04-08 22:34:50 +00:00
Peter Steinberger
8ad71bc0e0 fix: harden tahoe version check 2026-04-08 22:34:50 +00:00
Peter Steinberger
9981cbf519 fix: harden parallels upgrade flows 2026-04-08 22:34:50 +00:00
ruclaw7
fe774da67f fix: prefer codex gpt-5.4 runtime metadata (#62694) (thanks @ruclaw7)
* Agents: prefer runtime codex gpt-5.4 metadata

* Agents: move codex gpt-5.4 override into provider hook

* fix: repair codex runtime preference hooks

* fix: use workspace dir for codex runtime preference

* test: cover codex workspace dir hook

* fix: prefer codex gpt-5.4 runtime metadata (#62694) (thanks @ruclaw7)

---------

Co-authored-by: Rudi Cilibrasi <cilibrar@gmail.com>
Co-authored-by: Rudi Cilibrasi <rudi@metagood.com>
Co-authored-by: Ayaan Zaidi <hi@obviy.us>
2026-04-08 22:34:50 +00:00
Josh Lehman
3318cae246 fix: expose runtime-ready provider auth to plugins (#62753) 2026-04-08 22:34:50 +00:00
B
b63e593a01 fix(doctor): warn when stale Codex overrides shadow OAuth (#40143)
* fix(doctor): warn on stale codex provider overrides

* test(doctor): cover stored codex oauth warning path

* fix: narrow codex override doctor warning (#40143) (thanks @bde1)

* test: sync doctor e2e mocks after health-flow move (#40143) (thanks @bde1)

---------

Co-authored-by: bde1 <bde1@users.noreply.github.com>
Co-authored-by: Peter Steinberger <steipete@gmail.com>
2026-04-08 22:34:50 +00:00
Peter Steinberger
c2912725b6 fix: guide exec timeouts to registered background sessions 2026-04-08 22:34:50 +00:00
Peter Steinberger
4925530be9 docs: fix qa refactor heading fence 2026-04-08 22:34:50 +00:00
Peter Steinberger
4e9e885448 docs: update config baseline 2026-04-08 22:34:50 +00:00
Peter Steinberger
2df13e85c5 build: exclude plugin sdk build info from npm pack 2026-04-08 22:34:50 +00:00
Peter Steinberger
028cf920ea docs: update plugin sdk api baseline 2026-04-08 22:34:50 +00:00
Peter Steinberger
114b005436 fix: raise acpx runtime timeout 2026-04-08 22:34:50 +00:00
Peter Steinberger
a5f37d1c9a fix: escape tahoe update trap vars 2026-04-08 22:34:50 +00:00
Peter Steinberger
216aff34ef docs: stamp 2026.4.7 changelog 2026-04-08 22:34:50 +00:00
Peter Steinberger
f9ab93ea98 fix: repair tahoe update done trap 2026-04-08 22:34:50 +00:00
Peter Steinberger
ef120bebd2 test: drop pre-Gemini 3 from live model matrix 2026-04-08 22:34:50 +00:00
Peter Steinberger
5fdc67f498 fix: stabilize parallels upgrade preflight 2026-04-08 22:34:50 +00:00
Peter Steinberger
2298f2018c test: avoid persisting command registry cleanup 2026-04-08 22:34:50 +00:00
Peter Steinberger
ca1575b4cd chore: prepare 2026.4.7 2026-04-08 22:34:50 +00:00
Peter Steinberger
f24bfdb2aa fix: force cmd shell for windows smoke update 2026-04-08 22:34:50 +00:00
Peter Steinberger
2d643ba935 fix: harden parallels upgrade launchers 2026-04-08 22:34:50 +00:00
Peter Steinberger
8ff5d6c77a perf(config): isolate model alias defaults policy 2026-04-08 22:34:50 +00:00
Peter Steinberger
9e4fa7488c perf(config): fold telegram audio schema coverage 2026-04-08 22:34:50 +00:00
Peter Steinberger
22bdcde16f perf(runtime): trim config, media, and secrets tests 2026-04-08 22:34:50 +00:00
Peter Steinberger
8e1a39e1df test: speed up effective tools inventory test 2026-04-08 22:34:50 +00:00
Peter Steinberger
7294365976 test: speed up plugin registry loader tests 2026-04-08 22:34:50 +00:00
Peter Steinberger
b35525273a test: speed up auto reply command tests 2026-04-08 22:34:50 +00:00
Peter Steinberger
f3eea2d016 refactor: dedupe ui foundry trimmed readers 2026-04-08 22:34:50 +00:00
Peter Steinberger
07e17274c3 refactor: dedupe messaging trimmed readers 2026-04-08 22:34:50 +00:00
Peter Steinberger
8a0faac188 refactor: dedupe provider ui trimmed readers 2026-04-08 22:34:50 +00:00
Peter Steinberger
6a6690bf3d refactor: dedupe extension trimmed readers 2026-04-08 22:34:50 +00:00
Peter Steinberger
8d52eecefc refactor: dedupe core trimmed readers 2026-04-08 22:34:50 +00:00
Peter Steinberger
faae9dc7c2 refactor: dedupe gateway memory trimmed readers 2026-04-08 22:34:50 +00:00
Peter Steinberger
f5c0f1f025 refactor: dedupe plugin auto-reply trimmed readers 2026-04-08 22:34:50 +00:00
Peter Steinberger
53c4dd7895 refactor: dedupe config cli command trimmed readers 2026-04-08 22:34:50 +00:00
Peter Steinberger
07092c7330 refactor: dedupe gateway trimmed readers 2026-04-08 22:34:50 +00:00
Peter Steinberger
8d47dfb8ab refactor: dedupe plugin trimmed readers 2026-04-08 22:34:50 +00:00
Peter Steinberger
b760840220 refactor: dedupe matrix trimmed readers 2026-04-08 22:34:50 +00:00
Peter Steinberger
bb7486ceae refactor: dedupe cli cron trimmed readers 2026-04-08 22:34:50 +00:00
Peter Steinberger
c25a4a0d1d fix: harden parallels upgrade checks 2026-04-08 22:34:50 +00:00
Peter Steinberger
fd727d3c5e test: trim config migration smoke coverage 2026-04-08 22:34:50 +00:00
Aftab
5c9cce3a7b fix(daemon): skip machine-scope fallback on permission-denied bus errors (#62337)
* fix(daemon): skip machine-scope fallback on permission-denied bus errors; fall back to --user when sudo machine scope fails

When systemctl --user fails with "Failed to connect to bus: Permission
denied", the machine-scope fallback is now skipped. A Permission denied
error means the bus socket exists but the process cannot connect to it,
so --machine user@ would hit the same wall.

Additionally, the sudo path in execSystemctlUser now tries machine scope
first but falls through to a direct --user attempt if it fails, instead
of returning the error immediately.

Fixes #61959

* fix(daemon): guard against double machine-scope call when sudo path already tried it

When SUDO_USER is set and machine scope fails with a non-permission-denied
bus error, execution falls through to the direct --user attempt. If that
also fails with a bus-unavailable message, shouldFallbackToMachineUserScope
returns true and machine scope is tried a second time -- a redundant exec
that was never reachable before this PR opened the fallthrough path.

Add machineScopeAlreadyTried flag and include it in the bottom-fallback
guard condition so the second call is skipped when machine scope was
already attempted in the sudo branch.

Add regression test asserting exactly 2 execFile calls in this scenario.

* fix: keep sudo systemctl scoped

---------

Co-authored-by: Peter Steinberger <steipete@gmail.com>
2026-04-08 22:34:50 +00:00
Peter Steinberger
9008955e21 Tests: type sessions send gateway mock 2026-04-08 22:34:50 +00:00
Peter Steinberger
ffaced657e test: trim secrets runtime x_search coverage 2026-04-08 22:34:50 +00:00
Peter Steinberger
379108660e Tests: stabilize memory dreaming time windows 2026-04-08 22:34:50 +00:00
Josh Lehman
32be4bd790 fix: pass threadId through sessions_send announce delivery (#62758) 2026-04-08 22:34:49 +00:00
Peter Steinberger
94bf35369d test: narrow config migration smoke coverage 2026-04-08 22:34:49 +00:00
Peter Steinberger
eaaa394ca0 test: trim duplicate config migration coverage 2026-04-08 22:34:49 +00:00
Peter Steinberger
e1bd220959 test: split channel textChunkLimit schema coverage 2026-04-08 22:34:49 +00:00
Peter Steinberger
6c74c701a8 test: fold identity defaults into existing config suites 2026-04-08 22:34:49 +00:00
Peter Steinberger
70d8e6652f test: trim config defaults and secrets refresh coverage 2026-04-08 22:34:49 +00:00
Peter Steinberger
593d4a7e0d fix: respect disabled heartbeat guidance 2026-04-08 22:34:49 +00:00
Peter Steinberger
eb96d5c3c8 fix: surface Claude CLI API errors 2026-04-08 22:34:49 +00:00
Peter Steinberger
e960662c21 Tests: align provider synthetic auth fixture 2026-04-08 22:34:49 +00:00
Peter Steinberger
fd409968ad test: fix provider usage mocks and trim media runner setup 2026-04-08 22:34:49 +00:00
Peter Steinberger
3d3ad30436 Tests: use timeout-classed compaction failure 2026-04-08 22:34:49 +00:00
Peter Steinberger
8dcc62dbaa test: speed up cli and process tests 2026-04-08 22:34:49 +00:00
Peter Steinberger
7c63f39e44 test: speed up agent runtime helper tests 2026-04-08 22:34:49 +00:00
Peter Steinberger
c23f290523 test: speed up agent auth config tests 2026-04-08 22:34:49 +00:00
Peter Steinberger
a6ea0e6449 Tests: type provider usage plugin mocks 2026-04-08 22:34:49 +00:00
zhumengzhu
59bde1d95e fix(logging): correct levelToMinLevel mapping and related filter logic for tslog v4 (#44646)
* fix: correct levelToMinLevel mapping and isFileLogLevelEnabled direction for tslog v4

* test: add regression tests for logging level filter and child logger inheritance

* fix: propagate minLevel to toPinoLikeLogger sub-loggers

* fix: correct shouldLogToConsole comparison direction in subsystem.ts

* test: cover logging threshold regressions

* fix(logging): treat silent as non-emittable level

---------

Co-authored-by: Altay <altay@uinaf.dev>
2026-04-08 22:34:49 +00:00
Josh Lehman
061b23c8ec fix: honor explicit auth profile selection (#62744)
* Auth: fix native model profile selection

Fix native `/model ...@profile` targeting so profile selections persist onto the intended session, and preserve explicit session auth-profile overrides even when stored auth order prefers another profile. Update the reply/session regressions to use placeholder example.test profile ids.

Regeneration-Prompt: |
  Native `/model ...@profile` commands in chat were acknowledging the requested auth profile but later runs still used another account. Fix the target-session handling so native slash commands mutate the real chat session rather than a slash-session surrogate, and keep explicit session auth-profile overrides from being cleared just because stored provider order prefers another profile. Update the tests to cover the target-session path and the override-preservation behavior, and use placeholder profile ids instead of real email addresses in test fixtures.

* Auth: honor explicit user-locked profiles in runner

Allow an explicit user-selected auth profile to run even when per-agent auth-state order excludes it. Keep auth-state order for automatic selection and failover, and add an embedded runner regression that seeds stored order with one profile while verifying a different user-locked profile still executes.

Regeneration-Prompt: |
  The remaining bug after fixing native `/model ...@profile` persistence was in the embedded runner itself. A user could explicitly select a valid auth profile for a provider, but the run still failed if per-agent auth-state order did not include that profile. Preserve the intended semantics by validating user-locked profiles directly for provider match and credential eligibility, then using them without requiring membership in resolved auto-order. Add a regression in the embedded auth-profile rotation suite where stored order only includes one OpenAI profile but a different user-locked profile is chosen and must still be used.

* Changelog: note explicit auth profile selection fix

Add the required Unreleased changelog line for the explicit auth-profile selection and runner honor fix in this PR.

Regeneration-Prompt: |
  The PR needed a mandatory CHANGELOG.md entry under Unreleased/Fixes. Add a concise user-facing line describing that native `/model ...@profile` selections now persist on the target session and explicit user-locked OpenAI Codex auth profiles are honored even when per-agent auth order excludes them, and include the PR number plus thanks attribution for the PR author.
2026-04-08 22:34:49 +00:00
Peter Steinberger
7696455b2e perf(test): trim infra provider and approval suites 2026-04-08 22:34:49 +00:00
Peter Steinberger
e105e57745 fix: resolve ci type regressions 2026-04-08 22:34:49 +00:00
Peter Steinberger
45d3150ab8 refactor: dedupe channel trimmed readers 2026-04-08 22:34:49 +00:00
Peter Steinberger
2501dd3bfb refactor: dedupe agent trimmed readers 2026-04-08 22:34:49 +00:00
Peter Steinberger
75df1e264e refactor: dedupe gateway trimmed readers 2026-04-08 22:34:49 +00:00
Peter Steinberger
a80db6f355 refactor: dedupe auto-reply trimmed readers 2026-04-08 22:34:49 +00:00
Peter Steinberger
08a5856d97 refactor: dedupe infra trimmed readers 2026-04-08 22:34:49 +00:00
Peter Steinberger
81a11e0e58 refactor: dedupe gateway trimmed readers 2026-04-08 22:34:49 +00:00
Peter Steinberger
36c7e83614 refactor: dedupe agent trimmed readers 2026-04-08 22:34:49 +00:00
Peter Steinberger
d3f41780a0 refactor: dedupe command trimmed readers 2026-04-08 22:34:49 +00:00
Peter Steinberger
ef4cc389e9 refactor: dedupe discord trimmed readers 2026-04-08 22:34:49 +00:00
Peter Steinberger
3c310be683 refactor: dedupe telegram trimmed readers 2026-04-08 22:34:49 +00:00
Peter Steinberger
c5bd7252b7 refactor: dedupe browser trimmed readers 2026-04-08 22:34:49 +00:00
Peter Steinberger
0ab6fd8593 refactor: dedupe ui trimmed readers 2026-04-08 22:34:49 +00:00
Peter Steinberger
ffdc3d38a9 refactor: dedupe browser trimmed readers 2026-04-08 22:34:49 +00:00
Peter Steinberger
81920f3ad1 refactor: dedupe provider trimmed readers 2026-04-08 22:34:49 +00:00
Peter Steinberger
224bf4a9be test: speed up model config provider tests 2026-04-08 22:34:49 +00:00
Peter Steinberger
7ca7b2d4d3 test: speed up stream and bash tool tests 2026-04-08 22:34:49 +00:00
Peter Steinberger
c426712969 test: use line adapters in setup-surface tests 2026-04-08 22:34:49 +00:00
Peter Steinberger
5e776ca4c3 feat: add gh-read GitHub app helper 2026-04-08 22:34:49 +00:00
Peter Steinberger
40f7ef22a0 fix(test): align exec approvals expectations 2026-04-08 22:34:49 +00:00
Peter Steinberger
d5727ca94a Tests: repair latest main type drift 2026-04-08 22:34:49 +00:00
Peter Steinberger
40ee96c002 Tests: keep route notice coverage in coordinator 2026-04-08 22:34:49 +00:00
Peter Steinberger
247824d842 Tests: align extension approval startup seams 2026-04-08 22:34:49 +00:00
Peter Steinberger
a2a8c3641c Tests: align exec approval policy expectations 2026-04-08 22:34:49 +00:00
Peter Steinberger
1e01b0b5ec Browser: align plugin registration mutability 2026-04-08 22:34:49 +00:00
Peter Steinberger
45ca762a5e Approvals: align native runtime tests 2026-04-08 22:34:49 +00:00
Peter Steinberger
928103bd1c Tests: update compaction fallback retry mock 2026-04-08 22:34:49 +00:00
Peter Steinberger
8d6266c914 refactor: move qa suite definitions into markdown 2026-04-08 22:34:49 +00:00
Peter Steinberger
12af575aa0 fix(test): align boundary and approval suites 2026-04-08 22:34:49 +00:00
Peter Steinberger
a2f9d169bc test: speed up auth profile store tests 2026-04-08 22:34:49 +00:00
Peter Steinberger
4a71f99da1 test: speed up subagent registry persistence resume test 2026-04-08 22:34:49 +00:00
Peter Steinberger
af07d97164 refactor: dedupe gateway agent trimmed readers 2026-04-08 22:34:49 +00:00
Peter Steinberger
a8677558f1 refactor: dedupe core trimmed string readers 2026-04-08 22:34:49 +00:00
Peter Steinberger
dc76efc91b refactor: dedupe trimmed string readers 2026-04-08 22:34:49 +00:00
Peter Steinberger
fc15ba9309 refactor: dedupe locale lowercase helpers 2026-04-08 22:34:49 +00:00
Peter Steinberger
0b0452a6b0 refactor: dedupe remaining lowercase helpers 2026-04-08 22:34:49 +00:00
Peter Steinberger
b57559a4e5 refactor: dedupe path lowercase helpers 2026-04-08 22:34:49 +00:00
Peter Steinberger
2785354250 refactor: dedupe canvas lowercase helpers 2026-04-08 22:34:49 +00:00
Peter Steinberger
7fcdfb49c9 refactor: dedupe normalization lowercase helpers 2026-04-08 22:34:49 +00:00
Agustin Rivera
020db1592f fix(env): align inherited host exec env filtering (#59119)
* fix(env): block inherited host exec config vars

* fix(env): preserve trusted inherited proxy env

* fix(env): preserve inherited host exec vars

* fix(env): refresh host env policy parity artifacts

* test(env): align blocked override ordering

* docs(changelog): add host env policy parity entry

---------

Co-authored-by: Devin Robison <drobison@nvidia.com>
2026-04-08 22:34:48 +00:00
Agustin Rivera
51370b44c7 fix(git): expand host env denylist coverage (#62002)
* fix(git): expand host env denylist

* fix(git): block alternate object directories

* docs(changelog): add git env denylist entry

* docs(changelog): remove conflict markers

---------

Co-authored-by: Devin Robison <drobison@nvidia.com>
2026-04-08 22:34:48 +00:00
Peter Steinberger
51000998f5 test: speed up agent config auth tests 2026-04-08 22:34:48 +00:00
Peter Steinberger
dedc18c37b test: speed up subagent registry tests 2026-04-08 22:34:48 +00:00
Agustin Rivera
afda3cae32 Guard missed base64 decode paths (#62007)
* fix(media): guard missed base64 decode paths

Co-authored-by: zsxsoft <git@zsxsoft.com>

* fix(media): wire maxBytes into image-generate-tool and consolidate base64 guard helpers

* docs(changelog): add base64 decode guard entry

* fix(image-generate): validate configured media cap

---------

Co-authored-by: zsxsoft <git@zsxsoft.com>
Co-authored-by: Devin Robison <drobison@nvidia.com>
2026-04-08 22:34:48 +00:00
Peter Steinberger
655ddd7000 refactor: dedupe misc lowercase helpers 2026-04-08 22:34:48 +00:00
Peter Steinberger
6e62fffb54 refactor: dedupe provider lowercase helpers 2026-04-08 22:34:48 +00:00
Peter Steinberger
f6a53d2409 refactor: dedupe extension lowercase helpers 2026-04-08 22:34:48 +00:00
Gustavo Madeira Santana
31f0757c49 style: normalize lazy approval adapter signature 2026-04-08 22:34:48 +00:00
Gustavo Madeira Santana
24caf2b5b8 types: preserve approval runtime payload typing 2026-04-08 22:34:48 +00:00
Peter Steinberger
9df57f3ee0 fix: preserve fallback error details 2026-04-08 22:34:48 +00:00
Agustin Rivera
42aef7b3e9 Protect gateway exec approval config paths (#62001)
* fix(gateway): protect exec approval config paths

* fix(gateway): compare protected config paths by value

* docs(changelog): add gateway exec config entry

---------

Co-authored-by: Devin Robison <drobison@nvidia.com>
Co-authored-by: Devin Robison <drobison00@users.noreply.github.com>
2026-04-08 22:34:48 +00:00
Gustavo Madeira Santana
feee96218a Docs: document approval adapter subpaths 2026-04-08 22:34:48 +00:00
Gustavo Madeira Santana
9e676d5676 Tests: align approval gateway seams 2026-04-08 22:34:48 +00:00
Gustavo Madeira Santana
91398cd2c7 Plugin SDK: split approval adapter seams 2026-04-08 22:34:48 +00:00
Gustavo Madeira Santana
c9bbe3c10f Tests: restore approval runtime coverage 2026-04-08 22:34:48 +00:00
Peter Steinberger
eb7874a59e fix: resolve rebase regressions for ci landing 2026-04-08 22:34:48 +00:00
Peter Steinberger
3acc5ad51b fix: repair test typing for check gate 2026-04-08 22:34:48 +00:00
Peter Steinberger
1381757d2e refactor: dedupe ui provider lowercase helpers 2026-04-08 22:34:48 +00:00
Peter Steinberger
4032736863 refactor: dedupe core lowercase helpers 2026-04-08 22:34:48 +00:00
Peter Steinberger
5220058ebf refactor: dedupe memory lowercase helpers 2026-04-08 22:34:48 +00:00
Peter Steinberger
0a949bb1c3 refactor: dedupe line qqbot slack lowercase helpers 2026-04-08 22:34:48 +00:00
Peter Steinberger
b503b5f8da refactor: dedupe browser whatsapp qa lowercase helpers 2026-04-08 22:34:48 +00:00
Peter Steinberger
f53216c21e refactor: dedupe memory lowercase helpers 2026-04-08 22:34:48 +00:00
Peter Steinberger
51ec3d30d5 refactor: dedupe ui lowercase helpers 2026-04-08 22:34:48 +00:00
Peter Steinberger
c153ade99d refactor: dedupe plugin lowercase helpers 2026-04-08 22:34:48 +00:00
Peter Steinberger
85a9677e8b refactor: dedupe telegram matrix lowercase helpers 2026-04-08 22:34:48 +00:00
Peter Steinberger
364901a2be refactor: dedupe command config lowercase helpers 2026-04-08 22:34:48 +00:00
Peter Steinberger
511f24e959 refactor: dedupe remaining lowercase helpers 2026-04-08 22:34:48 +00:00
Peter Steinberger
75d6c0c68b refactor: dedupe gateway infra lowercase helpers 2026-04-08 22:34:48 +00:00
Gustavo Madeira Santana
63161bf5ad Tests: align approval runtime helpers 2026-04-08 22:34:48 +00:00
Gustavo Madeira Santana
735c283a69 Extensions: align approval plugin typing 2026-04-08 22:34:48 +00:00
Gustavo Madeira Santana
c6c01fb973 fix(exec): harden stale/replay/live requests 2026-04-08 22:34:48 +00:00
Gustavo Madeira Santana
03763ecb01 docs(changelog): dedupe entry 2026-04-08 22:34:48 +00:00
Gustavo Madeira Santana
2c61006115 Approvals: replay pending requests on startup 2026-04-08 22:34:48 +00:00
Peter Steinberger
da8899a934 fix: harden complex qa suite scenarios 2026-04-08 22:34:48 +00:00
Peter Steinberger
9ee1fa0813 fix(qa): tighten frontier scope evals 2026-04-08 22:34:48 +00:00
Peter Steinberger
aa21ac708e fix(qa): restore safe no-fork gateway runtime 2026-04-08 22:34:48 +00:00
Vincent Koc
b14c380096 perf(qa): lazy-load runner catalog for lab ui 2026-04-08 22:34:48 +00:00
Vincent Koc
d651100a35 fix(qa): preserve gateway cli auth in no-fork rpc path 2026-04-08 22:34:48 +00:00
Vincent Koc
d4f07e468e perf(qa): drop per-rpc gateway cli forks 2026-04-08 22:34:48 +00:00
Vincent Koc
5ffc3e12ff perf(qa): trim frontier direct-agent waits 2026-04-08 22:34:48 +00:00
Vincent Koc
248f030054 test(qa): retry flaky local fetches in lab server tests 2026-04-08 22:34:48 +00:00
Vincent Koc
c6b8624793 fix(qa): keep direct self-check outputs under repo root 2026-04-08 22:34:48 +00:00
Vincent Koc
c0cba1793e fix(qa): anchor runner artifacts to repo root 2026-04-08 22:34:48 +00:00
Vincent Koc
da086196c3 fix(qa): default docker artifacts from repo root 2026-04-08 22:34:48 +00:00
Vincent Koc
bd0fe6ed43 fix(qa): support neutral-cwd docker commands 2026-04-08 22:34:48 +00:00
Vincent Koc
f767b17891 chore(qa): align qa cli provider input types 2026-04-08 22:34:48 +00:00
Vincent Koc
5cfbec59c2 fix(qa): normalize qa cli lane inputs 2026-04-08 22:34:48 +00:00
Vincent Koc
234e6d55e3 fix(qa): keep manual alternate model aligned 2026-04-08 22:34:48 +00:00
Vincent Koc
2af91da79c fix(qa): default manual lanes by provider mode 2026-04-08 22:34:48 +00:00
Vincent Koc
16f4c82527 fix(qa): allow random qa-lab control-ui origins 2026-04-08 22:34:48 +00:00
Vincent Koc
a1c3a7144d fix(qa): pin gateway child control ui root 2026-04-08 22:34:48 +00:00
Vincent Koc
90a41dbd0e fix(qa): align mock model-switch continuity 2026-04-08 22:34:48 +00:00
Vincent Koc
3dbf5e5c6d fix(qa): support neutral-cwd suite runs 2026-04-08 22:34:48 +00:00
Vincent Koc
0451836493 docs(qa): expand frontier bakeoff runbook 2026-04-08 22:34:48 +00:00
Vincent Koc
0ec0826568 feat(qa): add manual harness lane 2026-04-08 22:34:48 +00:00
Vincent Koc
0b61ed0c0a fix(qa): isolate gateway child runtime 2026-04-08 22:34:48 +00:00
Vincent Koc
d801773202 fix(qa): harden frontier claude bakeoffs 2026-04-08 22:34:48 +00:00
Vincent Koc
9dd6ecf45d feat(qa): add frontier harness bakeoff loop 2026-04-08 22:34:48 +00:00
Andrew Demczuk
1f9e0707cb fix(gateway): stop SSRF guard rejecting operator-configured proxy hostnames (#62312)
When allowPrivateProxy is true, the explicit proxy hostname is operator-
configured and trusted. The SSRF guard was checking the proxy hostname
against the target-scoped hostnameAllowlist (e.g. ["api.telegram.org"]),
which rejected localhost and other local proxy hostnames. This broke
Telegram media downloads (and any channel using a local proxy) after
the url-fetch security hardening in 2026.4.x.

Clear the hostnameAllowlist for the proxy hostname check while keeping
private-network IP validation in place via allowPrivateNetwork.

Fixes #61906

Co-authored-by: Devin Robison <drobison00@users.noreply.github.com>
2026-04-08 22:34:48 +00:00
Peter Steinberger
c779abaa7d fix(test): refresh schema snapshot and stabilize channel registry 2026-04-08 22:34:47 +00:00
Agustin Rivera
ad4878917c fix(browser): align browser.proxy profile mutation guards (#60489)
* fix(browser): block proxy profile mutations

* docs(changelog): add browser proxy guard entry

---------

Co-authored-by: Devin Robison <drobison@nvidia.com>
Co-authored-by: Devin Robison <drobison00@users.noreply.github.com>
2026-04-08 22:34:47 +00:00
Peter Steinberger
854976203e test: speed up plugin cli tests 2026-04-08 22:34:47 +00:00
Peter Steinberger
817a8dcd21 test: speed up slack setup entry tests 2026-04-08 22:34:47 +00:00
Peter Steinberger
cb80453151 test: speed up browser plugin entry tests 2026-04-08 22:34:47 +00:00
Nimrod Gutman
3ca91c872f feat(ios): improve gateway connection error ux (#62650)
* feat(ios): improve gateway connection error ux

* fix(ios): address gateway problem review feedback

* feat(ios): improve gateway connection error ux (#62650) (thanks @ngutman)
2026-04-08 22:34:47 +00:00
Agustin Rivera
b1479b6839 Require re-pairing for node reconnect command upgrades (#62658)
* fix(node): require re-pairing for reconnect command upgrades

Co-authored-by: zsx <git@zsxsoft.com>

* fix(node): tighten reconnect pairing test polling

* docs(changelog): add node reconnect pairing entry

---------

Co-authored-by: zsx <git@zsxsoft.com>
Co-authored-by: Devin Robison <drobison@nvidia.com>
2026-04-08 22:34:47 +00:00
Peter Steinberger
3d2c303a60 test(gateway): cover isolated cron session key routing 2026-04-08 22:34:47 +00:00
Bruce MacDonald
c45226ed84 Changelog: restore dropped Approvals/runtime entry from conflict resolution 2026-04-08 22:34:47 +00:00
Bruce MacDonald
2cfc6d9d19 chore(ollama): update suggested onboarding models (#62626)
Merged via squash.

Prepared head SHA: 48c083b88a
Co-authored-by: BruceMacD <5853428+BruceMacD@users.noreply.github.com>
Co-authored-by: BruceMacD <5853428+BruceMacD@users.noreply.github.com>
Reviewed-by: @BruceMacD
2026-04-08 22:34:47 +00:00
pgondhi987
eb3e39191e fix: expand host-exec env blocklist for Java, Rust, and Cargo toolchains [AI-assisted] (#62291)
* fix: address issue

* docs(changelog): add host env blocklist entry

---------

Co-authored-by: Devin Robison <drobison@nvidia.com>
Co-authored-by: Devin Robison <drobison00@users.noreply.github.com>
2026-04-08 22:34:47 +00:00
BitToby
8687b8fada feat: add cover image support to Discord event create (#60883)
* feat: add image param to Discord event create for cover art

* fix: pass trusted media roots to event cover image loader

* fix: solve lint error

* fix: add changelog entry for Discord event cover image support (#60883) (thanks @bittoby)

---------

Co-authored-by: Shadow <hi@shadowing.dev>
2026-04-08 22:34:47 +00:00
Gustavo Madeira Santana
a9bad91301 Refactor: centralize native approval lifecycle assembly (#62135)
Merged via squash.

Prepared head SHA: b7c20a7398
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras
2026-04-08 22:34:47 +00:00
pgondhi987
2fb877b457 fix(fetch-guard): drop request body on cross-origin unsafe-method redirects [AI-assisted] (#62357)
* fix: address issue

* fix: address review feedback

* docs(changelog): add fetch guard redirect body entry

---------

Co-authored-by: Devin Robison <drobison@nvidia.com>
2026-04-08 22:34:47 +00:00
Agustin Rivera
276c81f319 fix(matrix): remove worklog artifact from pr 2026-04-08 18:16:15 +00:00
Agustin Rivera
c65356d9a2 fix(matrix): remove worklog artifact from pr 2026-04-08 18:15:01 +00:00
Agustin Rivera
30c0e94042 fix(matrix): thread senderIsOwner into HTTP tool-invoke path 2026-04-08 18:01:33 +00:00
Agustin Rivera
1724a92958 fix(matrix): fail closed owner gate 2026-04-08 17:47:03 +00:00
Agustin Rivera
55326ffb07 fix(matrix): gate profile updates for non-owner runs 2026-04-07 18:17:15 +00:00
312 changed files with 3259 additions and 7513 deletions

View File

@@ -57,28 +57,31 @@ Use `qa character-eval` for style/persona/vibe checks across multiple live model
pnpm openclaw qa character-eval \
--model openai/gpt-5.4,thinking=xhigh \
--model openai/gpt-5.2,thinking=xhigh \
--model openai/gpt-5,thinking=xhigh \
--model anthropic/claude-opus-4-6,thinking=high \
--model anthropic/claude-sonnet-4-6,thinking=high \
--model minimax/MiniMax-M2.7,thinking=high \
--model zai/glm-5.1,thinking=high \
--model moonshot/kimi-k2.5,thinking=high \
--model qwen/qwen3.6-plus,thinking=high \
--model xiaomi/mimo-v2-pro,thinking=high \
--model google/gemini-3.1-pro-preview,thinking=high \
--model codex-cli/<codex-model>,thinking=high \
--judge-model openai/gpt-5.4,thinking=xhigh,fast \
--judge-model anthropic/claude-opus-4-6,thinking=high \
--concurrency 16 \
--judge-concurrency 16 \
--concurrency 8 \
--judge-concurrency 8 \
--output-dir .artifacts/qa-e2e/character-eval-<tag>
```
- Runs local QA gateway child processes, not Docker.
- Preferred model spec syntax is `provider/model,thinking=<level>[,fast|,no-fast|,fast=<bool>]` for both `--model` and `--judge-model`.
- Do not add new examples with separate `--model-thinking`; keep that flag as legacy compatibility only.
- Defaults to candidate models `openai/gpt-5.4`, `openai/gpt-5.2`, `openai/gpt-5`, `anthropic/claude-opus-4-6`, `anthropic/claude-sonnet-4-6`, `zai/glm-5.1`, `moonshot/kimi-k2.5`, and `google/gemini-3.1-pro-preview` when no `--model` is passed.
- Defaults to candidate models `openai/gpt-5.4`, `openai/gpt-5.2`, `anthropic/claude-opus-4-6`, `anthropic/claude-sonnet-4-6`, `minimax/MiniMax-M2.7`, `zai/glm-5.1`, `moonshot/kimi-k2.5`, `qwen/qwen3.6-plus`, `xiaomi/mimo-v2-pro`, and `google/gemini-3.1-pro-preview` when no `--model` is passed.
- Candidate thinking defaults to `high`, with `xhigh` for OpenAI models that support it. Prefer inline `--model provider/model,thinking=<level>`; `--thinking <level>` and `--model-thinking <provider/model=level>` remain compatibility shims.
- OpenAI candidate refs default to fast mode so priority processing is used where supported. Use inline `,fast`, `,no-fast`, or `,fast=false` for one model; use `--fast` only to force fast mode for every candidate.
- Judges default to `openai/gpt-5.4,thinking=xhigh,fast` and `anthropic/claude-opus-4-6,thinking=high`.
- Report includes judge ranking, run stats, durations, and full transcripts; do not include raw judge replies. Duration is benchmark context, not a grading signal.
- Candidate and judge concurrency default to 16. Use `--concurrency <n>` and `--judge-concurrency <n>` to override when local gateways or provider limits need a gentler lane.
- Candidate and judge concurrency default to 8. Use `--concurrency <n>` and `--judge-concurrency <n>` to override when local gateways or provider limits need a gentler lane.
- Scenario source should stay markdown-driven under `qa/scenarios/`.
- For isolated character/persona evals, write the persona into `SOUL.md` and blank `IDENTITY.md` in the scenario flow. Use `SOUL.md + IDENTITY.md` only when intentionally testing how the normal OpenClaw identity combines with the character.
- Keep prompts natural and task-shaped. The candidate model should receive character setup through `SOUL.md`, then normal user turns such as chat, workspace help, and small file tasks; do not ask "how would you react?" or tell the model it is in an eval.

View File

@@ -1042,9 +1042,7 @@ jobs:
set -euo pipefail
case "$TASK" in
test)
# Linux owns the full repo test suite. Keep the Windows runner focused on
# Windows-native process/path wrappers so platform regressions fail fast.
pnpm test:windows:ci
pnpm test
;;
*)
echo "Unsupported Windows checks task: $TASK" >&2
@@ -1095,9 +1093,7 @@ jobs:
set -euo pipefail
case "$TASK" in
test)
# Linux owns the full repo test suite. Keep macOS CI focused on
# launchd/Homebrew/runtime path coverage and the process-group wrapper.
pnpm test:macos:ci
pnpm test
;;
*)
echo "Unsupported macOS node task: $TASK" >&2

View File

@@ -6,48 +6,31 @@ Docs: https://docs.openclaw.ai
### Changes
- Memory/dreaming: add a grounded REM backfill lane with historical `rem-harness --path`, diary commit/reset flows, cleaner durable-fact extraction, and live short-term promotion integration so old daily notes can replay into Dreams and durable memory without a second memory stack. Thanks @mbelinky.
- Control UI/dreaming: add a structured diary view with timeline navigation, backfill/reset controls, traceable dreaming summaries, and a grounded Scene lane with promotion hints plus a safe clear-grounded action for staged backfill signals. (#63395) Thanks @mbelinky.
- QA/lab: add character-vibes evaluation reports with model selection and parallel runs so live QA can compare candidate behavior faster.
- Plugins/provider-auth: let provider manifests declare `providerAuthAliases` so provider variants can share env vars, auth profiles, config-backed auth, and API-key onboarding choices without core-specific wiring.
- iOS: pin release versioning to an explicit CalVer in `apps/ios/version.json`, keep TestFlight iteration on the same short version until maintainers intentionally promote the next gateway version, and add the documented `pnpm ios:version:pin -- --from-gateway` workflow for release trains. (#63001) Thanks @ngutman.
- Plugins/provider-auth: let provider manifests declare `providerAuthAliases` so provider variants can share env vars, auth profiles, config-backed auth, and API-key onboarding choices without core-specific wiring.
- Memory/dreaming: add a grounded REM backfill lane with historical `rem-harness --path`, diary commit, and reset flows so old daily notes can be replayed safely into `DREAMS.md`. Thanks @mbelinky.
- Memory/dreaming: harden grounded diary extraction so `What Happened`, `Reflections`, and durable candidates suppress operational noise and preserve more atomic lasting facts. Thanks @mbelinky.
- Control UI/dreaming: add a structured diary view with timeline navigation, backfill/reset controls, and traceable dreaming summaries. Thanks @mbelinky.
### Fixes
- Browser/security: re-run blocked-destination safety checks after interaction-driven main-frame navigations from click, evaluate, hook-triggered click, and batched action flows, so browser interactions cannot bypass the SSRF quarantine when they land on forbidden URLs. (#63226) Thanks @eleqtrizit.
- Security/dotenv: block runtime-control env vars plus browser-control override and skip-server env vars from untrusted workspace `.env` files, and reject unsafe URL-style browser control override specifiers before lazy loading. (#62660, #62663) Thanks @eleqtrizit.
- Gateway/node exec events: mark remote node `exec.started`, `exec.finished`, and `exec.denied` summaries as untrusted system events and sanitize node-provided command/output/reason text before enqueueing them, so remote node output cannot inject trusted `System:` content into later turns. (#62659) Thanks @eleqtrizit.
- Plugins/onboarding auth choices: prevent untrusted workspace plugins from colliding with bundled provider auth-choice ids during non-interactive onboarding, so bundled provider setup keeps operator secrets out of untrusted workspace plugin handlers unless those plugins are explicitly trusted. (#62368) Thanks @pgondhi987.
- Security/dependency audit: force `basic-ftp` to `5.2.1` for the CRLF command-injection fix and bump Hono plus `@hono/node-server` in production resolution paths.
- Android/pairing: clear stale setup-code auth on new QR scans, bootstrap operator and node sessions from fresh pairing, prefer stored device tokens after bootstrap handoff, and pause pairing auto-retry while the app is backgrounded so scan-once Android pairing recovers reliably again. (#63199) Thanks @obviyus.
- Matrix/gateway: wait for Matrix sync readiness before marking startup successful, keep Matrix background handler failures contained, and route fatal Matrix sync stops through channel-level restart handling instead of crashing the whole gateway. (#62779) Thanks @gumadeiras.
- Slack/media: preserve bearer auth across same-origin `files.slack.com` redirects while still stripping it on cross-origin Slack CDN hops, so `url_private_download` image attachments load again. (#62960) Thanks @vincentkoc.
- Reply/doctor: use the active runtime snapshot for queued reply runs, resolve reply-run SecretRefs before preflight helpers touch config, surface gateway OAuth reauth failures to users, and make `openclaw doctor` call out exact reauth commands. (#62693, #63217) Thanks @mbelinky.
- Control UI: guard stale session-history reloads during fast session switches so the selected session and rendered transcript stay in sync. (#62975) Thanks @scoootscooob.
- Gateway/chat: suppress exact and streamed `ANNOUNCE_SKIP` / `REPLY_SKIP` control replies across live chat updates and history sanitization so internal agent-to-agent control tokens no longer leak into user-facing gateway chat surfaces. (#51739) Thanks @Pinghuachiu.
- Auto-reply/NO_REPLY: strip glued leading `NO_REPLY` tokens before reply normalization and ACP-visible streaming so silent sentinel text no longer leaks into user-visible replies while preserving substantive `NO_REPLY ...` text. Thanks @frankekn.
- Sessions/routing: preserve established external routes on inter-session announce traffic so `sessions_send` follow-ups do not steal delivery from Telegram, Discord, or other external channels. (#58013) Thanks @duqaXxX.
- Gateway/sessions: clear auto-fallback-pinned model overrides on `/reset` and `/new` while still preserving explicit user model selections, including legacy sessions created before override-source tracking existed. (#63155) Thanks @frankekn.
- Slack/ACP: treat Slack ACP block replies as visible delivered output so OpenClaw stops re-sending the final fallback text after Slack already rendered the reply. (#62858) Thanks @gumadeiras.
- Slack/partial streaming: key turn-local dedupe by dispatch kind and keep the final fallback reply path active when preview finalization fails so stale preview text cannot suppress the actual final answer. (#62859) Thanks @gumadeiras.
- Matrix/doctor: migrate legacy `channels.matrix.dm.policy: "trusted"` configs back to compatible DM policies during `openclaw doctor --fix`, preserving explicit `allowFrom` boundaries as `allowlist` and defaulting empty legacy configs to `pairing`. (#62942) Thanks @lukeboyett.
- npm packaging: mirror bundled channel runtime deps, stage Nostr runtime deps, derive required root mirrors from manifests and built chunks, and test packed release tarballs without repo `node_modules` so fresh installs fail fast on missing plugin deps instead of crashing at runtime. (#63065) Thanks @scoootscooob.
- QA/live auth: fail fast when live QA scenarios hit classified auth or runtime failure replies, including raw scenario wait paths, and sanitize missing-key guidance so gateway auth problems surface as actionable errors instead of timeouts. (#63333) Thanks @shakkernerd.
- Providers/OpenAI: default missing reasoning effort to `high` on OpenAI Responses, WebSocket, and compatible completions transports, while still honoring explicit per-run reasoning levels.
- Providers/Ollama: allow Ollama models using the native `api: "ollama"` path to optionally display thinking output when `/think` is set to a non-off level. (#62712) Thanks @hoyyeva.
- Codex CLI: pass OpenClaw's system prompt through Codex's `model_instructions_file` config override so fresh Codex CLI sessions receive the same prompt guidance as Claude CLI sessions.
- Auth/profiles: persist explicit auth-profile upserts directly and skip external CLI sync for local writes so profile changes are saved without stale external credential state.
- Slack/media: preserve bearer auth across same-origin `files.slack.com` redirects while still stripping it on cross-origin Slack CDN hops, so `url_private_download` image attachments load again. (#62960) Thanks @vincentkoc.
- Gateway/node exec events: mark remote node `exec.started`, `exec.finished`, and `exec.denied` summaries as untrusted system events and sanitize node-provided command/output/reason text before enqueueing them, so remote node output cannot inject trusted `System:` content into later turns. (#62659) Thanks @eleqtrizit.
- Agents/timeouts: make the LLM idle timeout inherit `agents.defaults.timeoutSeconds` when configured, disable the unconfigured idle watchdog for cron runs, and point idle-timeout errors at `agents.defaults.llm.idleTimeoutSeconds`. Thanks @drvoss.
- Security/dotenv: expand workspace `.env` filtering to block runtime-control variables like gateway routing, ClawHub endpoints/tokens, browser executable overrides, and skip/disable control families, so untrusted repositories cannot steer OpenClaw runtime behavior through repo-local dotenv files. (#62660) Thanks @eleqtrizit.
- Agents/failover: classify Z.ai vendor code `1311` as billing and `1113` as auth, including long wrapped `1311` payloads, so these errors stop falling through to generic failover handling. (#49552) Thanks @1bcMax.
- QQBot/media-tags: support HTML entity-encoded angle brackets (`&lt;`/`&gt;`), URL slashes in attributes, and self-closing media tags so upstream `<qqimg>` payloads are correctly parsed and normalized. (#60493) Thanks @ylc0919.
- Memory/dreaming: harden grounded backfill inputs, diary writes, status payloads, and diary action classification by preserving source-day labels, rejecting missing or symlinked targets cleanly, normalizing diary headings in gateway backfills, and tightening claim splitting plus diary source metadata. Thanks @mbelinky.
- Memory/dreaming: accept embedded heartbeat trigger tokens so light and REM dreaming still run when runtime wrappers include extra heartbeat text.
- Android/manual connect: allow blank port input only for TLS manual gateway endpoints so standard HTTPS Tailscale hosts default to `443` without silently changing cleartext manual connects. (#63134) Thanks @Tyler-RNG.
- Windows/update: add heap headroom to Windows `pnpm build` steps during dev updates so update preflight builds stop failing on low default Node memory.
- Plugin SDK: export the channel plugin base and web-search config contract through the public package so plugins can use them without private imports.
- Plugins/contracts: keep test-only helpers out of production contract barrels, load shared contract harnesses through bundled test surfaces, and harden guardrails so indirect re-exports and canonical `*.test.ts` files stay blocked. (#63311) Thanks @altaywtf.
- Control UI/models: preserve provider-qualified refs for OpenRouter catalog models whose ids already contain slashes so picker selections submit allowlist-compatible model refs instead of dropping the `openrouter/` prefix. (#63416) Thanks @sallyom.
- Plugin SDK/command auth: split command status builders onto the lightweight `openclaw/plugin-sdk/command-status` subpath while preserving deprecated `command-auth` compatibility exports, so auth-only plugin imports no longer pull status/context warmup into CLI onboarding paths. (#63174) Thanks @hxy91819.
- Browser/security: block browser-control module override and skip-server env vars from untrusted workspace `.env` files, and reject unsafe URL-style browser control override specifiers before lazy loading, so repo-local dotenv state cannot steer browser control module loading. (#62663) Thanks @eleqtrizit.
- QQBot/media-tags: support HTML entity-encoded angle brackets (`&lt;`/`&gt;`) in media-tag regexes so entity-escaped `<qqimg>` tags from upstream are correctly parsed and normalized. (#60493) Thanks @ylc0919.
- npm packaging: mirror bundled Slack, Telegram, Discord, and Feishu channel runtime deps at the root and harden published-install verification so fresh installs fail fast on manifest drift instead of missing-module crashes. (#63065) Thanks @scoootscooob.
- npm packaging: derive required root runtime mirrors from bundled plugin manifests and built root chunks, then install packed release tarballs without the repo `node_modules` so release checks catch missing plugin deps before publish.
- Reply/doctor: resolve reply-run SecretRefs before preflight helpers touch config, surface gateway OAuth reauth failures to users, and make `openclaw doctor` call out exact reauth commands.
- Android/pairing: clear stale setup-code auth on new QR scans, bootstrap operator and node sessions from fresh pairing, prefer stored device tokens after bootstrap handoff, and pause pairing auto-retry while the app is backgrounded so scan-once Android pairing recovers reliably again. (#63199) Thanks @obviyus.
- Auto-reply/NO_REPLY: strip glued leading `NO_REPLY` tokens before reply normalization and ACP-visible streaming so silent sentinel text no longer leaks into user-visible replies while preserving substantive `NO_REPLY ...` text. Thanks @frankekn.
- Gateway/sessions: clear auto-fallback-pinned model overrides on `/reset` and `/new` while still preserving explicit user model selections, including legacy sessions created before override-source tracking existed. (#63155) Thanks @frankekn.
- Codex CLI: pass OpenClaw's system prompt through Codex's `model_instructions_file` config override so fresh Codex CLI sessions receive the same prompt guidance as Claude CLI sessions.
- Matrix/gateway: wait for Matrix sync readiness before marking startup successful, keep Matrix background handler failures contained, and route fatal Matrix sync stops through channel-level restart handling instead of crashing the whole gateway. (#62779) Thanks @gumadeiras.
- Browser/security: re-run blocked-destination safety checks after interaction-driven main-frame navigations from click, evaluate, hook-triggered click, and batched action flows, so browser interactions cannot bypass the SSRF quarantine when they land on forbidden URLs. (#63226) Thanks @eleqtrizit.
## 2026.4.8
@@ -98,6 +81,7 @@ Docs: https://docs.openclaw.ai
- TUI: route `/status` through the shared session-status command, keep commentary hidden in history, strip raw envelope metadata from async command notices, preserve fallback streaming before per-attempt failures finalize, and restore Kitty keyboard state on exit or fatal crashes. (#49130, #59985, #60043, #61463) Thanks @biefan and contributors.
- iOS/Watch exec approvals: keep Apple Watch review and approval recovery working while the iPhone is locked or backgrounded, including reconnect recovery, pending approval persistence, notification cleanup, and APNs-backed watch refresh recovery. (#61757) Thanks @ngutman.
- Agents/context overflow: combine oversized and aggregate tool-result recovery in one pass and restore a total-context overflow backstop so recoverable sessions retry instead of failing early. (#61651) Thanks @Takhoffman.
- Agents/OpenAI: default missing reasoning effort to `high` on OpenAI Responses, WebSocket, and compatible completions transports, while still honoring explicit per-run reasoning levels.
- Auth/OpenAI Codex OAuth: reload fresh on-disk credentials inside the locked refresh path and retry once after `refresh_token_reused` rotates only the stored refresh token, so relogin/restart recovery stops getting stuck on stale cached auth state. Thanks @owen-ever.
- Auth/OpenAI Codex OAuth: keep native `/model ...@profile` selections on the target session and honor explicit user-locked auth profiles even when per-agent auth order excludes them. (#62744) Thanks @jalehman.
- Providers/Anthropic: preserve thinking blocks for Claude Opus 4.5+, Sonnet 4.5+, and newer Claude 4-family models so prompt-cache prefixes keep matching, and skip `service_tier` injection on OAuth-authenticated stream wrapper requests so Claude OAuth streaming stops failing with HTTP 401. (#60356, #61793)
@@ -108,6 +92,7 @@ Docs: https://docs.openclaw.ai
- Gateway tool/exec config: block model-facing `gateway config.apply` and `config.patch` writes from changing exec approval paths such as `safeBins`, `safeBinProfiles`, `safeBinTrustedDirs`, and `strictInlineEval`, while still allowing unchanged structured values through. (#62001) Thanks @eleqtrizit.
- Host exec/env sanitization: block dangerous Java, Rust, Cargo, Git, Kubernetes, cloud credential, config-path, and Helm env overrides so host-run tools cannot be redirected to attacker-chosen code, config, credentials, or repository state. (#59119, #62002, #62291) Thanks @eleqtrizit and contributors.
- Commands/allowlist: require owner authorization for `/allowlist add` and `/allowlist remove` before channel resolution, so non-owner but command-authorized senders can no longer persistently rewrite allowlist policy state. (#62383) Thanks @pgondhi987.
- Plugins/onboarding auth choices: prevent untrusted workspace plugins from colliding with bundled provider auth-choice ids during non-interactive onboarding, so bundled provider setup keeps operator secrets out of untrusted workspace plugin handlers unless those plugins are explicitly trusted. (#62368) Thanks @pgondhi987.
- Feishu/docx uploads: honor `tools.fs.workspaceOnly` for local `upload_file` and `upload_image` paths by forwarding workspace-constrained `localRoots` into the media loader, so docx uploads can no longer read host-local files outside the workspace when workspace-only mode is active. (#62369) Thanks @pgondhi987.
- Network/fetch guard: drop request bodies and body-describing headers on cross-origin `307` and `308` redirects by default, so attacker-controlled redirect hops cannot receive secret-bearing POST payloads from SSRF-guarded fetch flows unless a caller explicitly opts in. (#62357) Thanks @pgondhi987.
- Browser/SSRF: treat main-frame `document` redirect hops as navigations even when Playwright does not flag them as `isNavigationRequest()`, so strict private-network blocking still stops forbidden redirect pivots before the browser reaches the internal target. (#62355) Thanks @pgondhi987.
@@ -164,6 +149,9 @@ Docs: https://docs.openclaw.ai
- Agents/model resolution: let explicit `openai-codex/gpt-5.4` selection prefer provider runtime metadata when it reports a larger context window, keeping configured Codex runs aligned with the live provider limits. (#62694) Thanks @ruclaw7.
- Agents/model resolution: keep explicit-model runtime comparisons on the configured workspace plugin registry, so workspace-installed providers do not silently fall back to stale explicit metadata during runtime model lookup.
- Providers/Z.AI: default onboarding and endpoint detection to GLM-5.1 instead of GLM-5. (#61998) Thanks @serg0x.
- Reply execution: prefer the active runtime snapshot over stale queued reply config during embedded reply and follow-up execution so SecretRef-backed reply turns stop crashing after secrets have already resolved. (#62693) Thanks @mbelinky.
- Android/manual connect: allow blank port input only for TLS manual gateway endpoints so standard HTTPS Tailscale hosts default to `443` without silently changing cleartext manual connects. (#63134) Thanks @Tyler-RNG.
- Matrix/agents: hide owner-only `set-profile` from embedded agent channel-action discovery so non-owner runs stop advertising profile updates they cannot execute. (#62662) Thanks @eleqtrizit.
## 2026.4.5

View File

@@ -1,2 +1,2 @@
d8ab30f2e73642c89168acd2e177a4d49568bfc3d64fdfcb37b72206295d4896 plugin-sdk-api-baseline.json
94419b7f3bfa5d0fe8d1ec97825f05b8da1617c8406b7cdc37a72cd559975374 plugin-sdk-api-baseline.jsonl
763d2709dd26f4ec7d5807b2f1781b7f58cb115d2b0a9c9235a6c2c7b3788c1f plugin-sdk-api-baseline.json
87ab9ec219f037b13a8f42378d1fed02701d4035da0e5eca8a091626e8426523 plugin-sdk-api-baseline.jsonl

View File

@@ -167,8 +167,4 @@ Notes:
- If effectively active memory remote API key fields are configured as SecretRefs, the command resolves those values from the active gateway snapshot. If gateway is unavailable, the command fails fast.
- Gateway version skew note: this command path requires a gateway that supports `secrets.resolve`; older gateways return an unknown-method error.
- Tune scheduled sweep cadence with `dreaming.frequency`. Deep promotion policy is otherwise internal; use CLI flags on `memory promote` when you need one-off manual overrides.
- `memory rem-harness --path <file-or-dir> --grounded` previews grounded `What Happened`, `Reflections`, and `Possible Lasting Updates` from historical daily notes without writing anything.
- `memory rem-backfill --path <file-or-dir>` writes reversible grounded diary entries into `DREAMS.md` for UI review.
- `memory rem-backfill --path <file-or-dir> --stage-short-term` also seeds grounded durable candidates into the live short-term promotion store so the normal deep phase can rank them.
- `memory rem-backfill --rollback` removes previously written grounded diary entries, and `memory rem-backfill --rollback-short-term` removes previously staged grounded short-term candidates.
- See [Dreaming](/concepts/dreaming) for full phase descriptions and configuration reference.

View File

@@ -81,20 +81,6 @@ subagent turn (using the default runtime model) and appends a short diary entry.
This diary is for human reading in the Dreams UI, not a promotion source.
There is also a grounded historical backfill lane for review and recovery work:
- `memory rem-harness --path ... --grounded` previews grounded diary output from historical `YYYY-MM-DD.md` notes.
- `memory rem-backfill --path ...` writes reversible grounded diary entries into `DREAMS.md`.
- `memory rem-backfill --path ... --stage-short-term` stages grounded durable candidates into the same short-term evidence store the normal deep phase already uses.
- `memory rem-backfill --rollback` and `--rollback-short-term` remove those staged backfill artifacts without touching ordinary diary entries or live short-term recall.
The Control UI exposes the same diary backfill/reset flow so you can inspect
results in the Dreams scene before deciding whether the grounded candidates
deserve promotion. The Scene also shows a distinct grounded lane so you can see
which staged short-term entries came from historical replay, which promoted
items were grounded-led, and clear only grounded-only staged entries without
touching ordinary live short-term state.
## Deep ranking signals
Deep ranking uses six weighted base signals plus phase reinforcement:
@@ -221,9 +207,8 @@ When enabled, the Gateway **Dreams** tab shows:
- current dreaming enabled state
- phase-level status and managed-sweep presence
- short-term, grounded, signal, and promoted-today counts
- short-term, long-term, and promoted-today counts
- next scheduled run timing
- a distinct grounded Scene lane for staged historical replay entries
- an expandable Dream Diary reader backed by `doctor.memory.dreamDiary`
## Related

View File

@@ -21,7 +21,7 @@ Your agent has three memory-related files:
- **`memory/YYYY-MM-DD.md`** -- daily notes. Running context and observations.
Today and yesterday's notes are loaded automatically.
- **`DREAMS.md`** (experimental, optional) -- Dream Diary and dreaming sweep
summaries for human review, including grounded historical backfill entries.
summaries for human review.
These files live in the agent workspace (default `~/.openclaw/workspace`).
@@ -133,41 +133,6 @@ It is designed to keep long-term memory high signal:
For phase behavior, scoring signals, and Dream Diary details, see
[Dreaming (experimental)](/concepts/dreaming).
## Grounded backfill and live promotion
The dreaming system now has two closely related review lanes:
- **Live dreaming** works from the short-term dreaming store under
`memory/.dreams/` and is what the normal deep phase uses when deciding what
can graduate into `MEMORY.md`.
- **Grounded backfill** reads historical `memory/YYYY-MM-DD.md` notes as
standalone day files and writes structured review output into `DREAMS.md`.
Grounded backfill is useful when you want to replay older notes and inspect what
the system thinks is durable without manually editing `MEMORY.md`.
When you use:
```bash
openclaw memory rem-backfill --path ./memory --stage-short-term
```
the grounded durable candidates are not promoted directly. They are staged into
the same short-term dreaming store the normal deep phase already uses. That
means:
- `DREAMS.md` stays the human review surface.
- the short-term store stays the machine-facing ranking surface.
- `MEMORY.md` is still only written by deep promotion.
If you decide the replay was not useful, you can remove the staged artifacts
without touching ordinary diary entries or normal recall state:
```bash
openclaw memory rem-backfill --rollback
openclaw memory rem-backfill --rollback-short-term
```
## CLI
```bash

View File

@@ -89,17 +89,18 @@ refs and write a judged Markdown report:
pnpm openclaw qa character-eval \
--model openai/gpt-5.4,thinking=xhigh \
--model openai/gpt-5.2,thinking=xhigh \
--model openai/gpt-5,thinking=xhigh \
--model anthropic/claude-opus-4-6,thinking=high \
--model anthropic/claude-sonnet-4-6,thinking=high \
--model minimax/MiniMax-M2.7,thinking=high \
--model zai/glm-5.1,thinking=high \
--model moonshot/kimi-k2.5,thinking=high \
--model qwen/qwen3.6-plus,thinking=high \
--model xiaomi/mimo-v2-pro,thinking=high \
--model google/gemini-3.1-pro-preview,thinking=high \
--judge-model openai/gpt-5.4,thinking=xhigh,fast \
--judge-model anthropic/claude-opus-4-6,thinking=high \
--blind-judge-models \
--concurrency 16 \
--judge-concurrency 16
--concurrency 8 \
--judge-concurrency 8
```
The command runs local QA gateway child processes, not Docker. Character eval
@@ -108,10 +109,6 @@ such as chat, workspace help, and small file tasks. The candidate model should
not be told that it is being evaluated. The command preserves each full
transcript, records basic run stats, then asks the judge models in fast mode with
`xhigh` reasoning to rank the runs by naturalness, vibe, and humor.
Use `--blind-judge-models` when comparing providers: the judge prompt still gets
every transcript and run status, but candidate refs are replaced with neutral
labels such as `candidate-01`; the report maps rankings back to real refs after
parsing.
Candidate runs default to `high` thinking, with `xhigh` for OpenAI models that
support it. Override a specific candidate inline with
`--model provider/model,thinking=<level>`. `--thinking <level>` still sets a
@@ -123,14 +120,14 @@ single candidate or judge needs an override. Pass `--fast` only when you want to
force fast mode on for every candidate model. Candidate and judge durations are
recorded in the report for benchmark analysis, but judge prompts explicitly say
not to rank by speed.
Candidate and judge model runs both default to concurrency 16. Lower
Candidate and judge model runs both default to concurrency 8. Lower
`--concurrency` or `--judge-concurrency` when provider limits or local gateway
pressure make a run too noisy.
When no candidate `--model` is passed, the character eval defaults to
`openai/gpt-5.4`, `openai/gpt-5.2`, `openai/gpt-5`, `anthropic/claude-opus-4-6`,
`anthropic/claude-sonnet-4-6`, `zai/glm-5.1`,
`moonshot/kimi-k2.5`, and
`google/gemini-3.1-pro-preview` when no `--model` is passed.
`openai/gpt-5.4`, `openai/gpt-5.2`, `anthropic/claude-opus-4-6`,
`anthropic/claude-sonnet-4-6`, `minimax/MiniMax-M2.7`, `zai/glm-5.1`,
`moonshot/kimi-k2.5`, `qwen/qwen3.6-plus`, `xiaomi/mimo-v2-pro`, and
`google/gemini-3.1-pro-preview`.
When no `--judge-model` is passed, the judges default to
`openai/gpt-5.4,thinking=xhigh,fast` and
`anthropic/claude-opus-4-6,thinking=high`.

View File

@@ -93,40 +93,6 @@ cat ~/.openclaw/openclaw.json
- Source install checks (pnpm workspace mismatch, missing UI assets, missing tsx binary).
- Writes updated config + wizard metadata.
## Dreams UI backfill and reset
The Control UI Dreams scene includes **Backfill**, **Reset**, and **Clear Grounded**
actions for the grounded dreaming workflow. These actions use gateway
doctor-style RPC methods, but they are **not** part of `openclaw doctor` CLI
repair/migration.
What they do:
- **Backfill** scans historical `memory/YYYY-MM-DD.md` files in the active
workspace, runs the grounded REM diary pass, and writes reversible backfill
entries into `DREAMS.md`.
- **Reset** removes only those marked backfill diary entries from `DREAMS.md`.
- **Clear Grounded** removes only staged grounded-only short-term entries that
came from historical replay and have not accumulated live recall or daily
support yet.
What they do **not** do by themselves:
- they do not edit `MEMORY.md`
- they do not run full doctor migrations
- they do not automatically stage grounded candidates into the live short-term
promotion store unless you explicitly run the staged CLI path first
If you want grounded historical replay to influence the normal deep promotion
lane, use the CLI flow instead:
```bash
openclaw memory rem-backfill --path ./memory --stage-short-term
```
That stages grounded durable candidates into the short-term dreaming store while
keeping `DREAMS.md` as the review surface.
## Detailed behavior and rationale
### 0) Optional update (git installs)

View File

@@ -203,7 +203,6 @@ Live tests are split into two layers so we can isolate failures:
- `OPENCLAW_LIVE_MODELS=modern` to run the modern allowlist (Opus/Sonnet 4.6+, GPT-5.x + Codex, Gemini 3, GLM 4.7, MiniMax M2.7, Grok 4)
- `OPENCLAW_LIVE_MODELS=all` is an alias for the modern allowlist
- or `OPENCLAW_LIVE_MODELS="openai/gpt-5.4,anthropic/claude-opus-4-6,..."` (comma allowlist)
- Modern/all sweeps default to a curated high-signal cap; set `OPENCLAW_LIVE_MAX_MODELS=0` for an exhaustive modern sweep or a positive number for a smaller cap.
- How to select providers:
- `OPENCLAW_LIVE_PROVIDERS="google,google-antigravity,google-gemini-cli"` (comma allowlist)
- Where keys come from:
@@ -235,7 +234,6 @@ Live tests are split into two layers so we can isolate failures:
- Default: modern allowlist (Opus/Sonnet 4.6+, GPT-5.x + Codex, Gemini 3, GLM 4.7, MiniMax M2.7, Grok 4)
- `OPENCLAW_LIVE_GATEWAY_MODELS=all` is an alias for the modern allowlist
- Or set `OPENCLAW_LIVE_GATEWAY_MODELS="provider/model"` (or comma list) to narrow
- Modern/all gateway sweeps default to a curated high-signal cap; set `OPENCLAW_LIVE_GATEWAY_MAX_MODELS=0` for an exhaustive modern sweep or a positive number for a smaller cap.
- How to select providers (avoid “OpenRouter everything”):
- `OPENCLAW_LIVE_GATEWAY_PROVIDERS="google,google-antigravity,google-gemini-cli,openai,anthropic,zai,minimax"` (comma allowlist)
- Tool + image probes are always on in this live test:

View File

@@ -0,0 +1,580 @@
---
title: "refactor: Make plugin-sdk a real workspace package incrementally"
type: refactor
status: active
date: 2026-04-05
---
# refactor: Make plugin-sdk a real workspace package incrementally
## Overview
This plan introduces a real workspace package for the plugin SDK at
`packages/plugin-sdk` and uses it to opt in a small first wave of extensions to
compiler-enforced package boundaries. The goal is to make illegal relative
imports fail under normal `tsc` for a selected set of bundled provider
extensions, without forcing a repo-wide migration or a giant merge-conflict
surface.
The key incremental move is to run two modes in parallel for a while:
| Mode | Import shape | Who uses it | Enforcement |
| ----------- | ------------------------ | ------------------------------------ | -------------------------------------------- |
| Legacy mode | `openclaw/plugin-sdk/*` | all existing non-opted-in extensions | current permissive behavior remains |
| Opt-in mode | `@openclaw/plugin-sdk/*` | first-wave extensions only | package-local `rootDir` + project references |
## Problem Frame
The current repo exports a large public plugin SDK surface, but it is not a real
workspace package. Instead:
- root `tsconfig.json` maps `openclaw/plugin-sdk/*` directly to
`src/plugin-sdk/*.ts`
- extensions that were not opted into the previous experiment still share that
global source-alias behavior
- adding `rootDir` only works when allowed SDK imports stop resolving into raw
repo source
That means the repo can describe the desired boundary policy, but TypeScript
does not enforce it cleanly for most extensions.
You want an incremental path that:
- makes `plugin-sdk` real
- moves the SDK toward a workspace package named `@openclaw/plugin-sdk`
- changes only about 10 extensions in the first PR
- leaves the rest of the extension tree on the old scheme until later cleanup
- avoids the `tsconfig.plugin-sdk.dts.json` + postinstall-generated declaration
workflow as the primary mechanism for the first-wave rollout
## Requirements Trace
- R1. Create a real workspace package for the plugin SDK under `packages/`.
- R2. Name the new package `@openclaw/plugin-sdk`.
- R3. Give the new SDK package its own `package.json` and `tsconfig.json`.
- R4. Keep legacy `openclaw/plugin-sdk/*` imports working for non-opted-in
extensions during the migration window.
- R5. Opt in only a small first wave of extensions in the first PR.
- R6. The first-wave extensions must fail closed for relative imports that leave
their package root.
- R7. The first-wave extensions must consume the SDK through a package
dependency and a TS project reference, not through root `paths` aliases.
- R8. The plan must avoid a repo-wide mandatory postinstall generation step for
editor correctness.
- R9. The first-wave rollout must be reviewable and mergeable as a moderate PR,
not a repo-wide 300+ file refactor.
## Scope Boundaries
- No full migration of all bundled extensions in the first PR.
- No requirement to delete `src/plugin-sdk` in the first PR.
- No requirement to rewire every root build or test path to use the new package
immediately.
- No attempt to force VS Code squiggles for every non-opted-in extension.
- No broad lint cleanup for the rest of the extension tree.
- No large runtime behavior changes beyond import resolution, package ownership,
and boundary enforcement for the opted-in extensions.
## Context & Research
### Relevant Code and Patterns
- `pnpm-workspace.yaml` already includes `packages/*` and `extensions/*`, so a
new workspace package under `packages/plugin-sdk` fits the existing repo
layout.
- Existing workspace packages such as `packages/memory-host-sdk/package.json`
and `packages/plugin-package-contract/package.json` already use package-local
`exports` maps rooted in `src/*.ts`.
- Root `package.json` currently publishes the SDK surface through `./plugin-sdk`
and `./plugin-sdk/*` exports backed by `dist/plugin-sdk/*.js` and
`dist/plugin-sdk/*.d.ts`.
- `src/plugin-sdk/entrypoints.ts` and `scripts/lib/plugin-sdk-entrypoints.json`
already act as the canonical entrypoint inventory for the SDK surface.
- Root `tsconfig.json` currently maps:
- `openclaw/plugin-sdk` -> `src/plugin-sdk/index.ts`
- `openclaw/plugin-sdk/*` -> `src/plugin-sdk/*.ts`
- The previous boundary experiment showed that package-local `rootDir` works for
illegal relative imports only after allowed SDK imports stop resolving to raw
source outside the extension package.
### First-Wave Extension Set
This plan assumes the first wave is the provider-heavy set that is least likely
to drag in complex channel-runtime edge cases:
- `extensions/anthropic`
- `extensions/exa`
- `extensions/firecrawl`
- `extensions/groq`
- `extensions/mistral`
- `extensions/openai`
- `extensions/perplexity`
- `extensions/tavily`
- `extensions/together`
- `extensions/xai`
### First-Wave SDK Surface Inventory
The first-wave extensions currently import a manageable subset of SDK subpaths.
The initial `@openclaw/plugin-sdk` package only needs to cover these:
- `agent-runtime`
- `cli-runtime`
- `config-runtime`
- `core`
- `image-generation`
- `media-runtime`
- `media-understanding`
- `plugin-entry`
- `plugin-runtime`
- `provider-auth`
- `provider-auth-api-key`
- `provider-auth-login`
- `provider-auth-runtime`
- `provider-catalog-shared`
- `provider-entry`
- `provider-http`
- `provider-model-shared`
- `provider-onboard`
- `provider-stream-family`
- `provider-stream-shared`
- `provider-tools`
- `provider-usage`
- `provider-web-fetch`
- `provider-web-search`
- `realtime-transcription`
- `realtime-voice`
- `runtime-env`
- `secret-input`
- `security-runtime`
- `speech`
- `testing`
### Institutional Learnings
- No relevant `docs/solutions/` entries were present in this worktree.
### External References
- No external research was needed for this plan. The repo already contains the
relevant workspace-package and SDK-export patterns.
## Key Technical Decisions
- Introduce `@openclaw/plugin-sdk` as a new workspace package while keeping the
legacy root `openclaw/plugin-sdk/*` surface alive during migration.
Rationale: this lets a first-wave extension set move onto real package
resolution without forcing every extension and every root build path to change
at once.
- Use a dedicated opt-in boundary base config such as
`extensions/tsconfig.package-boundary.base.json` instead of replacing the
existing extension base for everyone.
Rationale: the repo needs to support both legacy and opt-in extension modes
simultaneously during migration.
- Use TS project references from first-wave extensions to
`packages/plugin-sdk/tsconfig.json` and set
`disableSourceOfProjectReferenceRedirect` for the opt-in boundary mode.
Rationale: this gives `tsc` a real package graph while discouraging editor and
compiler fallback to raw source traversal.
- Keep `@openclaw/plugin-sdk` private in the first wave.
Rationale: the immediate goal is internal boundary enforcement and migration
safety, not publishing a second external SDK contract before the surface is
stable.
- Move only the first-wave SDK subpaths in the first implementation slice, and
keep compatibility bridges for the rest.
Rationale: physically moving all 315 `src/plugin-sdk/*.ts` files in one PR is
exactly the merge-conflict surface this plan is trying to avoid.
- Do not rely on `scripts/postinstall-bundled-plugins.mjs` to build SDK
declarations for the first wave.
Rationale: explicit build/reference flows are easier to reason about and keep
repo behavior more predictable.
## Open Questions
### Resolved During Planning
- Which extensions should be in the first wave?
Use the 10 provider/web-search extensions listed above because they are more
structurally isolated than the heavier channel packages.
- Should the first PR replace the entire extension tree?
No. The first PR should support two modes in parallel and only opt in the
first wave.
- Should the first wave require a postinstall declaration build?
No. The package/reference graph should be explicit, and CI should run the
relevant package-local typecheck intentionally.
### Deferred to Implementation
- Whether the first-wave package can point directly at package-local `src/*.ts`
via project references alone, or whether a small declaration-emission step is
still required for the `@openclaw/plugin-sdk` package.
This is an implementation-owned TS graph validation question.
- Whether the root `openclaw` package should proxy first-wave SDK subpaths to
`packages/plugin-sdk` outputs immediately or continue using generated
compatibility shims under `src/plugin-sdk`.
This is a compatibility and build-shape detail that depends on the minimal
implementation path that keeps CI green.
## High-Level Technical Design
> This illustrates the intended approach and is directional guidance for review, not implementation specification. The implementing agent should treat it as context, not code to reproduce.
```mermaid
flowchart TB
subgraph Legacy["Legacy extensions (unchanged)"]
L1["extensions/*\nopenclaw/plugin-sdk/*"]
L2["root tsconfig paths"]
L1 --> L2
L2 --> L3["src/plugin-sdk/*"]
end
subgraph OptIn["First-wave extensions"]
O1["10 opted-in extensions"]
O2["extensions/tsconfig.package-boundary.base.json"]
O3["rootDir = '.'\nproject reference"]
O4["@openclaw/plugin-sdk"]
O1 --> O2
O2 --> O3
O3 --> O4
end
subgraph SDK["New workspace package"]
P1["packages/plugin-sdk/package.json"]
P2["packages/plugin-sdk/tsconfig.json"]
P3["packages/plugin-sdk/src/<first-wave-subpaths>.ts"]
P1 --> P2
P2 --> P3
end
O4 --> SDK
```
## Implementation Units
- [ ] **Unit 1: Introduce the real `@openclaw/plugin-sdk` workspace package**
**Goal:** Create a real workspace package for the SDK that can own the
first-wave subpath surface without forcing a repo-wide migration.
**Requirements:** R1, R2, R3, R8, R9
**Dependencies:** None
**Files:**
- Create: `packages/plugin-sdk/package.json`
- Create: `packages/plugin-sdk/tsconfig.json`
- Create: `packages/plugin-sdk/src/index.ts`
- Create: `packages/plugin-sdk/src/*.ts` for the first-wave SDK subpaths
- Modify: `pnpm-workspace.yaml` only if package-glob adjustments are needed
- Modify: `package.json`
- Modify: `src/plugin-sdk/entrypoints.ts`
- Modify: `scripts/lib/plugin-sdk-entrypoints.json`
- Test: `src/plugins/contracts/plugin-sdk-workspace-package.contract.test.ts`
**Approach:**
- Add a new workspace package named `@openclaw/plugin-sdk`.
- Start with the first-wave SDK subpaths only, not the entire 315-file tree.
- If directly moving a first-wave entrypoint would create an oversized diff, the
first PR may introduce that subpath in `packages/plugin-sdk/src` as a thin
package wrapper first and then flip the source of truth to the package in a
follow-up PR for that subpath cluster.
- Reuse the existing entrypoint inventory machinery so the first-wave package
surface is declared in one canonical place.
- Keep the root package exports alive for legacy users while the workspace
package becomes the new opt-in contract.
**Patterns to follow:**
- `packages/memory-host-sdk/package.json`
- `packages/plugin-package-contract/package.json`
- `src/plugin-sdk/entrypoints.ts`
**Test scenarios:**
- Happy path: the workspace package exports every first-wave subpath listed in
the plan and no required first-wave export is missing.
- Edge case: package export metadata remains stable when the first-wave entry
list is re-generated or compared against the canonical inventory.
- Integration: root package legacy SDK exports remain present after introducing
the new workspace package.
**Verification:**
- The repo contains a valid `@openclaw/plugin-sdk` workspace package with a
stable first-wave export map and no legacy export regression in root
`package.json`.
- [ ] **Unit 2: Add an opt-in TS boundary mode for package-enforced extensions**
**Goal:** Define the TS configuration mode that opted-in extensions will use,
while leaving the existing extension TS behavior unchanged for everyone else.
**Requirements:** R4, R6, R7, R8, R9
**Dependencies:** Unit 1
**Files:**
- Create: `extensions/tsconfig.package-boundary.base.json`
- Create: `tsconfig.boundary-optin.json`
- Modify: `extensions/xai/tsconfig.json`
- Modify: `extensions/openai/tsconfig.json`
- Modify: `extensions/anthropic/tsconfig.json`
- Modify: `extensions/mistral/tsconfig.json`
- Modify: `extensions/groq/tsconfig.json`
- Modify: `extensions/together/tsconfig.json`
- Modify: `extensions/perplexity/tsconfig.json`
- Modify: `extensions/tavily/tsconfig.json`
- Modify: `extensions/exa/tsconfig.json`
- Modify: `extensions/firecrawl/tsconfig.json`
- Test: `src/plugins/contracts/extension-package-project-boundaries.test.ts`
- Test: `test/extension-package-tsc-boundary.test.ts`
**Approach:**
- Leave `extensions/tsconfig.base.json` in place for legacy extensions.
- Add a new opt-in base config that:
- sets `rootDir: "."`
- references `packages/plugin-sdk`
- enables `composite`
- disables project-reference source redirect when needed
- Add a dedicated solution config for the first-wave typecheck graph instead of
reshaping the root repo TS project in the same PR.
**Execution note:** Start with a failing package-local canary typecheck for one
opted-in extension before applying the pattern to all 10.
**Patterns to follow:**
- Existing package-local extension `tsconfig.json` pattern from the prior
boundary work
- Workspace package pattern from `packages/memory-host-sdk`
**Test scenarios:**
- Happy path: each opted-in extension typechecks successfully through the
package-boundary TS config.
- Error path: a canary relative import from `../../src/cli/acp-cli.ts` fails
with `TS6059` for an opted-in extension.
- Integration: non-opted-in extensions remain untouched and do not need to
participate in the new solution config.
**Verification:**
- There is a dedicated typecheck graph for the 10 opted-in extensions, and bad
relative imports from one of them fail through normal `tsc`.
- [ ] **Unit 3: Migrate the first-wave extensions onto `@openclaw/plugin-sdk`**
**Goal:** Change the first-wave extensions to consume the real SDK package
through dependency metadata, project references, and package-name imports.
**Requirements:** R5, R6, R7, R9
**Dependencies:** Unit 2
**Files:**
- Modify: `extensions/anthropic/package.json`
- Modify: `extensions/exa/package.json`
- Modify: `extensions/firecrawl/package.json`
- Modify: `extensions/groq/package.json`
- Modify: `extensions/mistral/package.json`
- Modify: `extensions/openai/package.json`
- Modify: `extensions/perplexity/package.json`
- Modify: `extensions/tavily/package.json`
- Modify: `extensions/together/package.json`
- Modify: `extensions/xai/package.json`
- Modify: production and test imports under each of the 10 extension roots that
currently reference `openclaw/plugin-sdk/*`
**Approach:**
- Add `@openclaw/plugin-sdk: workspace:*` to the first-wave extension
`devDependencies`.
- Replace `openclaw/plugin-sdk/*` imports in those packages with
`@openclaw/plugin-sdk/*`.
- Keep local extension-internal imports on local barrels such as `./api.ts` and
`./runtime-api.ts`.
- Do not change non-opted-in extensions in this PR.
**Patterns to follow:**
- Existing extension-local import barrels (`api.ts`, `runtime-api.ts`)
- Package dependency shape used by other `@openclaw/*` workspace packages
**Test scenarios:**
- Happy path: each migrated extension still registers/loads through its existing
plugin tests after the import rewrite.
- Edge case: test-only SDK imports in the opted-in extension set still resolve
correctly through the new package.
- Integration: migrated extensions do not require root `openclaw/plugin-sdk/*`
aliases for typechecking.
**Verification:**
- The first-wave extensions build and test against `@openclaw/plugin-sdk`
without needing the legacy root SDK alias path.
- [ ] **Unit 4: Preserve legacy compatibility while the migration is partial**
**Goal:** Keep the rest of the repo working while the SDK exists in both legacy
and new-package forms during migration.
**Requirements:** R4, R8, R9
**Dependencies:** Units 1-3
**Files:**
- Modify: `src/plugin-sdk/*.ts` for first-wave compatibility shims as needed
- Modify: `package.json`
- Modify: build or export plumbing that assembles SDK artifacts
- Test: `src/plugins/contracts/plugin-sdk-runtime-api-guardrails.test.ts`
- Test: `src/plugins/contracts/plugin-sdk-index.bundle.test.ts`
**Approach:**
- Keep root `openclaw/plugin-sdk/*` as the compatibility surface for legacy
extensions and for external consumers that are not moving yet.
- Use either generated shims or root-export proxy wiring for the first-wave
subpaths that have moved into `packages/plugin-sdk`.
- Do not attempt to retire the root SDK surface in this phase.
**Patterns to follow:**
- Existing root SDK export generation via `src/plugin-sdk/entrypoints.ts`
- Existing package export compatibility in root `package.json`
**Test scenarios:**
- Happy path: a legacy root SDK import still resolves for a non-opted-in
extension after the new package exists.
- Edge case: a first-wave subpath works through both the legacy root surface and
the new package surface during the migration window.
- Integration: plugin-sdk index/bundle contract tests continue to see a coherent
public surface.
**Verification:**
- The repo supports both legacy and opt-in SDK consumption modes without
breaking unchanged extensions.
- [ ] **Unit 5: Add scoped enforcement and document the migration contract**
**Goal:** Land CI and contributor guidance that enforce the new behavior for the
first wave without pretending the entire extension tree is migrated.
**Requirements:** R5, R6, R8, R9
**Dependencies:** Units 1-4
**Files:**
- Modify: `package.json`
- Modify: CI workflow files that should run the opt-in boundary typecheck
- Modify: `AGENTS.md`
- Modify: `docs/plugins/sdk-overview.md`
- Modify: `docs/plugins/sdk-entrypoints.md`
- Modify: `docs/plans/2026-04-05-001-refactor-extension-package-resolution-boundary-plan.md`
**Approach:**
- Add an explicit first-wave gate, such as a dedicated `tsc -b` solution run for
`packages/plugin-sdk` plus the 10 opted-in extensions.
- Document that the repo now supports both legacy and opt-in extension modes,
and that new extension boundary work should prefer the new package route.
- Record the next-wave migration rule so later PRs can add more extensions
without re-litigating the architecture.
**Patterns to follow:**
- Existing contract tests under `src/plugins/contracts/`
- Existing docs updates that explain staged migrations
**Test scenarios:**
- Happy path: the new first-wave typecheck gate passes for the workspace package
and the opted-in extensions.
- Error path: introducing a new illegal relative import in an opted-in
extension fails the scoped typecheck gate.
- Integration: CI does not require non-opted-in extensions to satisfy the new
package-boundary mode yet.
**Verification:**
- The first-wave enforcement path is documented, tested, and runnable without
forcing the entire extension tree to migrate.
## System-Wide Impact
- **Interaction graph:** this work touches the SDK source-of-truth, root package
exports, extension package metadata, TS graph layout, and CI verification.
- **Error propagation:** the main intended failure mode becomes compile-time TS
errors (`TS6059`) in opted-in extensions instead of custom script-only
failures.
- **State lifecycle risks:** dual-surface migration introduces drift risk between
root compatibility exports and the new workspace package.
- **API surface parity:** first-wave subpaths must remain semantically identical
through both `openclaw/plugin-sdk/*` and `@openclaw/plugin-sdk/*` during the
transition.
- **Integration coverage:** unit tests are not enough; scoped package-graph
typechecks are required to prove the boundary.
- **Unchanged invariants:** non-opted-in extensions keep their current behavior
in PR 1. This plan does not claim repo-wide import-boundary enforcement.
## Risks & Dependencies
| Risk | Mitigation |
| ------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------- |
| The first-wave package still resolves back into raw source and `rootDir` does not actually fail closed | Make the first implementation step a package-reference canary on one opted-in extension before widening to the full set |
| Moving too much SDK source at once recreates the original merge-conflict problem | Move only the first-wave subpaths in the first PR and keep root compatibility bridges |
| Legacy and new SDK surfaces drift semantically | Keep a single entrypoint inventory, add compatibility contract tests, and make dual-surface parity explicit |
| Root repo build/test paths accidentally start depending on the new package in uncontrolled ways | Use a dedicated opt-in solution config and keep root-wide TS topology changes out of the first PR |
## Phased Delivery
### Phase 1
- Introduce `@openclaw/plugin-sdk`
- Define the first-wave subpath surface
- Prove one opted-in extension can fail closed through `rootDir`
### Phase 2
- Opt in the 10 first-wave extensions
- Keep root compatibility alive for everyone else
### Phase 3
- Add more extensions in later PRs
- Move more SDK subpaths into the workspace package
- Retire root compatibility only after the legacy extension set is gone
## Documentation / Operational Notes
- The first PR should explicitly describe itself as a dual-mode migration, not a
repo-wide enforcement completion.
- The migration guide should make it easy for later PRs to add more extensions
by following the same package/dependency/reference pattern.
## Sources & References
- Prior plan: `docs/plans/2026-04-05-001-refactor-extension-package-resolution-boundary-plan.md`
- Workspace config: `pnpm-workspace.yaml`
- Existing SDK entrypoint inventory: `src/plugin-sdk/entrypoints.ts`
- Existing root SDK exports: `package.json`
- Existing workspace package patterns:
- `packages/memory-host-sdk/package.json`
- `packages/plugin-package-contract/package.json`

View File

@@ -245,7 +245,6 @@ Current bundled provider examples:
| `plugin-sdk/allow-from` | Allowlist formatting | `formatAllowFromLowercase` |
| `plugin-sdk/allowlist-resolution` | Allowlist input mapping | `mapAllowlistResolutionInputs` |
| `plugin-sdk/command-auth` | Command gating and command-surface helpers | `resolveControlCommandGate`, sender-authorization helpers, command registry helpers |
| `plugin-sdk/command-status` | Command status/help renderers | `buildCommandsMessage`, `buildCommandsMessagePaginated`, `buildHelpMessage` |
| `plugin-sdk/secret-input` | Secret input parsing | Secret input helpers |
| `plugin-sdk/webhook-ingress` | Webhook request helpers | Webhook target utilities |
| `plugin-sdk/webhook-request-guards` | Webhook body guard helpers | Request body read/limit helpers |

View File

@@ -149,7 +149,6 @@ explicitly promotes one as public.
| Subpath | Key exports |
| --- | --- |
| `plugin-sdk/command-auth` | `resolveControlCommandGate`, command registry helpers, sender-authorization helpers |
| `plugin-sdk/command-status` | Command/help message builders such as `buildCommandsMessagePaginated` and `buildHelpMessage` |
| `plugin-sdk/approval-auth-runtime` | Approver resolution and same-chat action-auth helpers |
| `plugin-sdk/approval-client-runtime` | Native exec approval profile/filter helpers |
| `plugin-sdk/approval-delivery-runtime` | Native approval capability/delivery adapters |

View File

@@ -88,9 +88,7 @@ requiring the built-in `qwen` provider id specifically.
## Built-in catalog
OpenClaw currently ships this bundled Qwen catalog. The configured catalog is
endpoint-aware: Coding Plan configs omit models that are only known to work on
the Standard endpoint.
OpenClaw currently ships this bundled Qwen catalog:
| Model ref | Input | Context | Notes |
| --------------------------- | ----------- | --------- | -------------------------------------------------- |

View File

@@ -2,7 +2,6 @@
"id": "anthropic-vertex",
"enabledByDefault": true,
"providers": ["anthropic-vertex"],
"providerDiscoveryEntry": "./provider-discovery.ts",
"configSchema": {
"type": "object",
"additionalProperties": false,

View File

@@ -1,10 +0,0 @@
import { describe, expect, it } from "vitest";
describe("anthropic-vertex provider discovery entry", () => {
it("imports without loading the full plugin entry", async () => {
const module = await import("./provider-discovery.js");
expect(module.default.id).toBe("anthropic-vertex");
expect(module.default.catalog.order).toBe("simple");
});
});

View File

@@ -1,215 +0,0 @@
import { readFileSync } from "node:fs";
import { homedir, platform } from "node:os";
import { join } from "node:path";
import type { ProviderCatalogContext } from "openclaw/plugin-sdk/provider-catalog-shared";
import type {
ModelDefinitionConfig,
ModelProviderConfig,
} from "openclaw/plugin-sdk/provider-model-shared";
const PROVIDER_ID = "anthropic-vertex";
const ANTHROPIC_VERTEX_DEFAULT_REGION = "global";
const ANTHROPIC_VERTEX_REGION_RE = /^[a-z0-9-]+$/;
const ANTHROPIC_VERTEX_DEFAULT_CONTEXT_WINDOW = 1_000_000;
const GCP_VERTEX_CREDENTIALS_MARKER = "gcp-vertex-credentials";
const GCLOUD_DEFAULT_ADC_PATH = join(
homedir(),
".config",
"gcloud",
"application_default_credentials.json",
);
type AnthropicVertexProviderPlugin = {
id: string;
label: string;
docsPath: string;
auth: [];
catalog: {
order: "simple";
run: (ctx: ProviderCatalogContext) => ReturnType<typeof runAnthropicVertexCatalog>;
};
resolveConfigApiKey: (params: { env: NodeJS.ProcessEnv }) => string | undefined;
};
type AdcProjectFile = {
project_id?: unknown;
quota_project_id?: unknown;
};
function normalizeOptionalString(value: unknown): string | undefined {
return typeof value === "string" && value.trim() ? value.trim() : undefined;
}
function normalizeLowercaseStringOrEmpty(value: unknown): string {
return normalizeOptionalString(value)?.toLowerCase() ?? "";
}
function resolveAnthropicVertexRegion(env: NodeJS.ProcessEnv = process.env): string {
const region =
normalizeOptionalString(env.GOOGLE_CLOUD_LOCATION) ||
normalizeOptionalString(env.CLOUD_ML_REGION);
return region && ANTHROPIC_VERTEX_REGION_RE.test(region)
? region
: ANTHROPIC_VERTEX_DEFAULT_REGION;
}
function hasAnthropicVertexMetadataServerAdc(env: NodeJS.ProcessEnv = process.env): boolean {
const explicitMetadataOptIn = normalizeOptionalString(env.ANTHROPIC_VERTEX_USE_GCP_METADATA);
return (
explicitMetadataOptIn === "1" ||
normalizeLowercaseStringOrEmpty(explicitMetadataOptIn) === "true"
);
}
function resolveAnthropicVertexDefaultAdcPath(env: NodeJS.ProcessEnv = process.env): string {
return platform() === "win32"
? join(
env.APPDATA ?? join(homedir(), "AppData", "Roaming"),
"gcloud",
"application_default_credentials.json",
)
: GCLOUD_DEFAULT_ADC_PATH;
}
function resolveAnthropicVertexAdcCredentialsPathCandidate(
env: NodeJS.ProcessEnv = process.env,
): string | undefined {
const explicit = normalizeOptionalString(env.GOOGLE_APPLICATION_CREDENTIALS);
if (explicit) {
return explicit;
}
if (env !== process.env) {
return undefined;
}
return resolveAnthropicVertexDefaultAdcPath(env);
}
function readAnthropicVertexAdc(env: NodeJS.ProcessEnv = process.env): AdcProjectFile | null {
const credentialsPath = resolveAnthropicVertexAdcCredentialsPathCandidate(env);
if (!credentialsPath) {
return null;
}
try {
return JSON.parse(readFileSync(credentialsPath, "utf8")) as AdcProjectFile;
} catch {
return null;
}
}
function hasAnthropicVertexAvailableAuth(env: NodeJS.ProcessEnv = process.env): boolean {
return hasAnthropicVertexMetadataServerAdc(env) || readAnthropicVertexAdc(env) !== null;
}
function resolveAnthropicVertexConfigApiKey(
env: NodeJS.ProcessEnv = process.env,
): string | undefined {
return hasAnthropicVertexAvailableAuth(env) ? GCP_VERTEX_CREDENTIALS_MARKER : undefined;
}
function buildAnthropicVertexModel(params: {
id: string;
name: string;
reasoning: boolean;
input: ModelDefinitionConfig["input"];
cost: ModelDefinitionConfig["cost"];
maxTokens: number;
}): ModelDefinitionConfig {
return {
id: params.id,
name: params.name,
reasoning: params.reasoning,
input: params.input,
cost: params.cost,
contextWindow: ANTHROPIC_VERTEX_DEFAULT_CONTEXT_WINDOW,
maxTokens: params.maxTokens,
};
}
function buildAnthropicVertexProvider(params?: { env?: NodeJS.ProcessEnv }): ModelProviderConfig {
const region = resolveAnthropicVertexRegion(params?.env);
const baseUrl =
normalizeLowercaseStringOrEmpty(region) === "global"
? "https://aiplatform.googleapis.com"
: `https://${region}-aiplatform.googleapis.com`;
return {
baseUrl,
api: "anthropic-messages",
apiKey: GCP_VERTEX_CREDENTIALS_MARKER,
models: [
buildAnthropicVertexModel({
id: "claude-opus-4-6",
name: "Claude Opus 4.6",
reasoning: true,
input: ["text", "image"],
cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 },
maxTokens: 128000,
}),
buildAnthropicVertexModel({
id: "claude-sonnet-4-6",
name: "Claude Sonnet 4.6",
reasoning: true,
input: ["text", "image"],
cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 },
maxTokens: 128000,
}),
],
};
}
function mergeImplicitAnthropicVertexProvider(params: {
existing?: ModelProviderConfig;
implicit: ModelProviderConfig;
}) {
const { existing, implicit } = params;
if (!existing) {
return implicit;
}
return {
...implicit,
...existing,
models:
Array.isArray(existing.models) && existing.models.length > 0
? existing.models
: implicit.models,
};
}
function resolveImplicitAnthropicVertexProvider(params?: { env?: NodeJS.ProcessEnv }) {
const env = params?.env ?? process.env;
if (!hasAnthropicVertexAvailableAuth(env)) {
return null;
}
return buildAnthropicVertexProvider({ env });
}
async function runAnthropicVertexCatalog(ctx: ProviderCatalogContext) {
const implicit = resolveImplicitAnthropicVertexProvider({
env: ctx.env,
});
if (!implicit) {
return null;
}
return {
provider: mergeImplicitAnthropicVertexProvider({
existing: ctx.config.models?.providers?.[PROVIDER_ID],
implicit,
}),
};
}
export const anthropicVertexProviderDiscovery: AnthropicVertexProviderPlugin = {
id: PROVIDER_ID,
label: "Anthropic Vertex",
docsPath: "/providers/models",
auth: [],
catalog: {
order: "simple",
run: runAnthropicVertexCatalog,
},
resolveConfigApiKey: ({ env }) => resolveAnthropicVertexConfigApiKey(env),
};
export default anthropicVertexProviderDiscovery;

View File

@@ -33,7 +33,7 @@ export const secretTargetRegistryEntries = [
export function collectRuntimeConfigAssignments(params: {
config: { channels?: Record<string, unknown> };
defaults?: SecretDefaults;
defaults: SecretDefaults | undefined;
context: ResolverContext;
}): void {
const resolved = getChannelSurface(params.config, "bluebubbles");

View File

@@ -52,13 +52,8 @@ describe("pw-tools-core browser SSRF guards", () => {
});
it("re-checks click-triggered navigations with the session safety helper", async () => {
let currentUrl = "https://example.com";
pageState.page = { url: vi.fn(() => currentUrl) };
pageState.locator = {
click: vi.fn(async () => {
currentUrl = "https://target.example";
}),
};
pageState.page = { url: vi.fn(() => "https://example.com") };
pageState.locator = { click: vi.fn(async () => {}) };
await interactions.clickViaPlaywright({
cdpUrl: "http://127.0.0.1:18792",
@@ -91,13 +86,8 @@ describe("pw-tools-core browser SSRF guards", () => {
});
it("re-checks batched click-triggered navigations with the session safety helper", async () => {
let currentUrl = "https://example.com";
pageState.page = { url: vi.fn(() => currentUrl) };
pageState.locator = {
click: vi.fn(async () => {
currentUrl = "https://target.example";
}),
};
pageState.page = { url: vi.fn(() => "https://example.com") };
pageState.locator = { click: vi.fn(async () => {}) };
await interactions.batchViaPlaywright({
cdpUrl: "http://127.0.0.1:18792",

View File

@@ -65,7 +65,7 @@ describe("pw-tools-core", () => {
throw new Error(errorMessage);
});
setPwToolsCoreCurrentRefLocator({ click });
setPwToolsCoreCurrentPage({ url: vi.fn(() => "https://example.com") });
setPwToolsCoreCurrentPage({});
await expect(
mod.clickViaPlaywright({
@@ -82,7 +82,7 @@ describe("pw-tools-core", () => {
);
});
setPwToolsCoreCurrentRefLocator({ click });
setPwToolsCoreCurrentPage({ url: vi.fn(() => "https://example.com") });
setPwToolsCoreCurrentPage({});
await expect(
mod.clickViaPlaywright({

View File

@@ -10,7 +10,7 @@ installPwToolsCoreTestHooks();
const mod = await import("./pw-tools-core.js");
describe("pw-tools-core interaction navigation guard", () => {
it("waits for the grace window before completing a successful non-navigating click", async () => {
it("does not wait for the grace window after a successful non-navigating click", async () => {
vi.useFakeTimers();
try {
const listeners = new Set<() => void>();
@@ -42,16 +42,15 @@ describe("pw-tools-core interaction navigation guard", () => {
.then(completion);
await vi.advanceTimersByTimeAsync(0);
expect(completion).not.toHaveBeenCalled();
expect(completion).toHaveBeenCalledTimes(1);
expect(listeners.size).toBe(1);
expect(
getPwToolsCoreSessionMocks().assertPageNavigationCompletedSafely,
).not.toHaveBeenCalled();
await vi.advanceTimersByTimeAsync(250);
await task;
expect(completion).toHaveBeenCalledTimes(1);
expect(listeners.size).toBe(0);
await task;
} finally {
vi.useRealTimers();
}
@@ -97,14 +96,13 @@ describe("pw-tools-core interaction navigation guard", () => {
.then(completion);
await vi.advanceTimersByTimeAsync(0);
expect(completion).not.toHaveBeenCalled();
expect(completion).toHaveBeenCalledTimes(1);
expect(
getPwToolsCoreSessionMocks().assertPageNavigationCompletedSafely,
).not.toHaveBeenCalled();
await vi.advanceTimersByTimeAsync(10);
await task;
expect(completion).toHaveBeenCalledTimes(1);
expect(getPwToolsCoreSessionMocks().assertPageNavigationCompletedSafely).toHaveBeenCalledWith(
{
@@ -209,22 +207,20 @@ describe("pw-tools-core interaction navigation guard", () => {
setPwToolsCoreCurrentRefLocator({ click });
setPwToolsCoreCurrentPage(page);
const first = mod.clickViaPlaywright({
await mod.clickViaPlaywright({
cdpUrl: "http://127.0.0.1:18792",
targetId: "T1",
ref: "1",
ssrfPolicy: { allowPrivateNetwork: false },
});
await vi.advanceTimersByTimeAsync(0);
expect(listeners.size).toBe(1);
const second = mod.clickViaPlaywright({
await mod.clickViaPlaywright({
cdpUrl: "http://127.0.0.1:18792",
targetId: "T1",
ref: "1",
ssrfPolicy: { allowPrivateNetwork: false },
});
await vi.advanceTimersByTimeAsync(0);
expect(listeners.size).toBe(1);
currentUrl = "http://127.0.0.1:9222/json/list";
@@ -232,7 +228,6 @@ describe("pw-tools-core interaction navigation guard", () => {
listener();
}
await vi.advanceTimersByTimeAsync(0);
await Promise.all([first, second]);
expect(
getPwToolsCoreSessionMocks().assertPageNavigationCompletedSafely,
@@ -243,56 +238,6 @@ describe("pw-tools-core interaction navigation guard", () => {
}
});
it("propagates blocked delayed navigation instead of reporting click success", async () => {
vi.useFakeTimers();
try {
const listeners = new Set<() => void>();
let currentUrl = "http://127.0.0.1:9222/json/version";
const click = vi.fn(async () => {
setTimeout(() => {
currentUrl = "http://127.0.0.1:9222/private-target";
for (const listener of listeners) {
listener();
}
}, 10);
});
const page = {
on: vi.fn((event: string, listener: () => void) => {
if (event === "framenavigated") {
listeners.add(listener);
}
}),
off: vi.fn((event: string, listener: () => void) => {
if (event === "framenavigated") {
listeners.delete(listener);
}
}),
url: vi.fn(() => currentUrl),
};
setPwToolsCoreCurrentRefLocator({ click });
setPwToolsCoreCurrentPage(page);
const blocked = new Error("blocked delayed interaction navigation");
getPwToolsCoreSessionMocks().assertPageNavigationCompletedSafely.mockRejectedValueOnce(
blocked,
);
const task = mod.clickViaPlaywright({
cdpUrl: "http://127.0.0.1:18792",
targetId: "T1",
ref: "1",
ssrfPolicy: { allowPrivateNetwork: false },
});
const rejection = expect(task).rejects.toThrow("blocked delayed interaction navigation");
await vi.advanceTimersByTimeAsync(10);
await rejection;
expect(listeners.size).toBe(0);
} finally {
vi.useRealTimers();
}
});
it("runs the post-click navigation guard with the resolved SSRF policy", async () => {
const click = vi.fn(async () => {});
const page = {

View File

@@ -1,6 +1,6 @@
import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
import type { Frame, Page } from "playwright-core";
import { formatErrorMessage } from "../infra/errors.js";
import type { Frame, Page } from "playwright-core";
import type { SsrFPolicy } from "../infra/net/ssrf.js";
import type { BrowserActRequest, BrowserFormField } from "./client-actions-core.js";
import { DEFAULT_FILL_FIELD_TYPE } from "./form-fields.js";
@@ -164,68 +164,59 @@ function scheduleDelayedInteractionNavigationGuard(opts: {
previousUrl: string;
ssrfPolicy?: SsrFPolicy;
targetId?: string;
}): Promise<void> {
}): void {
if (!opts.ssrfPolicy) {
return Promise.resolve();
return;
}
const page = opts.page as unknown as NavigationObservablePage;
if (didCrossDocumentUrlChange(page, opts.previousUrl)) {
return assertPageNavigationCompletedSafely({
void assertPageNavigationCompletedSafely({
cdpUrl: opts.cdpUrl,
page: opts.page,
response: null,
ssrfPolicy: opts.ssrfPolicy,
targetId: opts.targetId,
});
}).catch(() => {});
return;
}
if (typeof page.on !== "function" || typeof page.off !== "function") {
return Promise.resolve();
return;
}
pendingInteractionNavigationGuardCleanup.get(opts.page)?.();
return new Promise<void>((resolve, reject) => {
const settle = (err?: unknown) => {
cleanup();
if (err) {
reject(err);
return;
}
resolve();
};
const onFrameNavigated = (frame: Frame) => {
if (!isMainFrameNavigation(page, frame)) {
return;
}
// Use isHashOnlyNavigation rather than !didCrossDocumentUrlChange: the
// event firing is itself the navigation signal, so a same-URL reload must
// not be treated as "no navigation" the way URL polling would.
if (isHashOnlyNavigation(page.url(), opts.previousUrl)) {
return;
}
cleanup();
void assertPageNavigationCompletedSafely({
cdpUrl: opts.cdpUrl,
page: opts.page,
response: null,
ssrfPolicy: opts.ssrfPolicy,
targetId: opts.targetId,
}).then(() => settle(), settle);
};
const timeout = setTimeout(() => {
settle();
}, INTERACTION_NAVIGATION_GRACE_MS);
const cleanup = () => {
clearTimeout(timeout);
page.off!("framenavigated", onFrameNavigated);
if (pendingInteractionNavigationGuardCleanup.get(opts.page) === settle) {
pendingInteractionNavigationGuardCleanup.delete(opts.page);
}
};
const onFrameNavigated = (frame: Frame) => {
if (!isMainFrameNavigation(page, frame)) {
return;
}
// Use isHashOnlyNavigation rather than !didCrossDocumentUrlChange: the
// event firing is itself the navigation signal, so a same-URL reload must
// not be treated as "no navigation" the way URL polling would.
if (isHashOnlyNavigation(page.url(), opts.previousUrl)) {
return;
}
cleanup();
void assertPageNavigationCompletedSafely({
cdpUrl: opts.cdpUrl,
page: opts.page,
response: null,
ssrfPolicy: opts.ssrfPolicy,
targetId: opts.targetId,
}).catch(() => {});
};
const timeout = setTimeout(() => {
cleanup();
}, INTERACTION_NAVIGATION_GRACE_MS);
const cleanup = () => {
clearTimeout(timeout);
page.off!("framenavigated", onFrameNavigated);
if (pendingInteractionNavigationGuardCleanup.get(opts.page) === cleanup) {
pendingInteractionNavigationGuardCleanup.delete(opts.page);
}
};
pendingInteractionNavigationGuardCleanup.set(opts.page, settle);
page.on!("framenavigated", onFrameNavigated);
});
pendingInteractionNavigationGuardCleanup.set(opts.page, cleanup);
page.on("framenavigated", onFrameNavigated);
}
async function assertInteractionNavigationCompletedSafely<T>(opts: {
@@ -301,10 +292,9 @@ async function assertInteractionNavigationCompletedSafely<T>(opts: {
});
}
} else {
// Successful interactions still need a short grace window: a click can resolve
// before the navigation event fires, and a blocked late hop must be observable
// to the current caller instead of only quarantining the page in the background.
await scheduleDelayedInteractionNavigationGuard({
// Successful non-navigating interactions should not wait out the grace window,
// but we still keep a short-lived listener alive to quarantine late SSRF hops.
scheduleDelayedInteractionNavigationGuard({
cdpUrl: opts.cdpUrl,
page: opts.page,
previousUrl: opts.previousUrl,

View File

@@ -84,7 +84,7 @@ export const secretTargetRegistryEntries = [
export function collectRuntimeConfigAssignments(params: {
config: { channels?: Record<string, unknown> };
defaults?: SecretDefaults;
defaults: SecretDefaults | undefined;
context: ResolverContext;
}): void {
const resolved = getChannelSurface(params.config, "discord");

View File

@@ -80,7 +80,7 @@ export const secretTargetRegistryEntries = [
export function collectRuntimeConfigAssignments(params: {
config: { channels?: Record<string, unknown> };
defaults?: SecretDefaults;
defaults: SecretDefaults | undefined;
context: ResolverContext;
}): void {
const resolved = getChannelSurface(params.config, "feishu");

View File

@@ -138,7 +138,7 @@ function warnDeprecatedUsersEmailEntries(logVerbose: (message: string) => void,
}
const key = deprecated
.map((v) => normalizeLowercaseStringOrEmpty(v))
.toSorted((a, b) => a.localeCompare(b))
.toSorted()
.join(",");
if (warnedDeprecatedUsersEmailAllowFrom.has(key)) {
return;
@@ -161,7 +161,7 @@ function warnMutableGroupKeysConfigured(
}
const warningKey = mutableKeys
.map((key) => normalizeLowercaseStringOrEmpty(key))
.toSorted((a, b) => a.localeCompare(b))
.toSorted()
.join(",");
if (warnedMutableGroupKeys.has(warningKey)) {
return;

View File

@@ -63,7 +63,7 @@ function resolveSecretInputRef(params: {
function collectGoogleChatAccountAssignment(params: {
target: GoogleChatAccountLike;
path: string;
defaults?: SecretDefaults;
defaults: SecretDefaults | undefined;
context: ResolverContext;
active?: boolean;
inactiveReason?: string;
@@ -107,7 +107,7 @@ function collectGoogleChatAccountAssignment(params: {
export function collectRuntimeConfigAssignments(params: {
config: { channels?: Record<string, unknown> };
defaults?: SecretDefaults;
defaults: SecretDefaults | undefined;
context: ResolverContext;
}): void {
const resolved = getChannelSurface(params.config, "googlechat");

View File

@@ -1,3 +1,4 @@
export { createIMessageTestPlugin } from "./src/test-plugin.js";
export {
resolveIMessageAttachmentRoots as resolveInboundAttachmentRoots,
resolveIMessageRemoteAttachmentRoots as resolveRemoteInboundAttachmentRoots,

View File

@@ -3,7 +3,7 @@ import {
listImportedBundledPluginFacadeIds,
resetFacadeRuntimeStateForTest,
} from "../../../src/plugin-sdk/facade-runtime.js";
import { createIMessageTestPlugin } from "./imessage.test-plugin.js";
import { createIMessageTestPlugin } from "./test-plugin.js";
beforeEach(() => {
resetFacadeRuntimeStateForTest();
@@ -21,11 +21,4 @@ describe("createIMessageTestPlugin", () => {
expect(listImportedBundledPluginFacadeIds()).toEqual([]);
});
it("normalizes repeated transport prefixes without recursive stack growth", () => {
const plugin = createIMessageTestPlugin();
const prefixedHandle = `${"imessage:".repeat(5000)}+44 20 7946 0958`;
expect(plugin.messaging?.normalizeTarget?.(prefixedHandle)).toBe("+442079460958");
});
});

View File

@@ -5,32 +5,20 @@ import { collectStatusIssuesFromLastError } from "openclaw/plugin-sdk/status-hel
import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime";
function normalizeIMessageTestHandle(raw: string): string {
let trimmed = raw.trim();
const trimmed = raw.trim();
if (!trimmed) {
return "";
}
while (trimmed) {
const lowered = normalizeLowercaseStringOrEmpty(trimmed);
if (lowered.startsWith("imessage:")) {
trimmed = trimmed.slice("imessage:".length).trim();
continue;
}
if (lowered.startsWith("sms:")) {
trimmed = trimmed.slice("sms:".length).trim();
continue;
}
if (lowered.startsWith("auto:")) {
trimmed = trimmed.slice("auto:".length).trim();
continue;
}
break;
const lowered = normalizeLowercaseStringOrEmpty(trimmed);
if (lowered.startsWith("imessage:")) {
return normalizeIMessageTestHandle(trimmed.slice("imessage:".length));
}
if (!trimmed) {
return "";
if (lowered.startsWith("sms:")) {
return normalizeIMessageTestHandle(trimmed.slice("sms:".length));
}
if (lowered.startsWith("auto:")) {
return normalizeIMessageTestHandle(trimmed.slice("auto:".length));
}
if (/^(chat_id:|chat_guid:|chat_identifier:)/i.test(trimmed)) {
return trimmed.replace(/^(chat_id:|chat_guid:|chat_identifier:)/i, (match) =>
normalizeLowercaseStringOrEmpty(match),

View File

@@ -1 +0,0 @@
export { createIMessageTestPlugin } from "./src/imessage.test-plugin.js";

View File

@@ -59,7 +59,7 @@ export const secretTargetRegistryEntries = [
export function collectRuntimeConfigAssignments(params: {
config: { channels?: Record<string, unknown> };
defaults?: SecretDefaults;
defaults: SecretDefaults | undefined;
context: ResolverContext;
}): void {
const resolved = getChannelSurface(params.config, "irc");

View File

@@ -91,6 +91,7 @@ describe("matrixMessageActions account propagation", () => {
await matrixMessageActions.handleAction?.(
createContext({
action: profileAction,
senderIsOwner: true,
accountId: "ops",
params: {
displayName: "Ops Bot",
@@ -111,10 +112,50 @@ describe("matrixMessageActions account propagation", () => {
);
});
it("rejects self-profile updates for non-owner callers", async () => {
await expect(
matrixMessageActions.handleAction?.(
createContext({
action: profileAction,
senderIsOwner: false,
accountId: "ops",
params: {
displayName: "Ops Bot",
},
}),
),
).rejects.toMatchObject({
name: "ToolAuthorizationError",
message: "Matrix profile updates require owner access.",
});
expect(mocks.handleMatrixAction).not.toHaveBeenCalled();
});
it("rejects self-profile updates when owner status is unknown", async () => {
await expect(
matrixMessageActions.handleAction?.(
createContext({
action: profileAction,
accountId: "ops",
params: {
displayName: "Ops Bot",
},
}),
),
).rejects.toMatchObject({
name: "ToolAuthorizationError",
message: "Matrix profile updates require owner access.",
});
expect(mocks.handleMatrixAction).not.toHaveBeenCalled();
});
it("forwards local avatar paths for self-profile updates", async () => {
await matrixMessageActions.handleAction?.(
createContext({
action: profileAction,
senderIsOwner: true,
accountId: "ops",
params: {
path: "/tmp/avatar.jpg",

View File

@@ -78,6 +78,7 @@ describe("matrixMessageActions", () => {
const discovery = describeMessageTool({
cfg: createConfiguredMatrixConfig(),
senderIsOwner: true,
} as never);
if (!discovery) {
throw new Error("describeMessageTool returned null");
@@ -96,6 +97,31 @@ describe("matrixMessageActions", () => {
expect(properties.avatarPath).toBeDefined();
});
it("hides self-profile updates for non-owner discovery", () => {
const discovery = matrixMessageActions.describeMessageTool({
cfg: createConfiguredMatrixConfig(),
senderIsOwner: false,
} as never);
if (!discovery) {
throw new Error("describeMessageTool returned null");
}
expect(discovery.actions).not.toContain(profileAction);
expect(discovery.schema).toBeNull();
});
it("hides self-profile updates when owner status is unknown", () => {
const discovery = matrixMessageActions.describeMessageTool({
cfg: createConfiguredMatrixConfig(),
} as never);
if (!discovery) {
throw new Error("describeMessageTool returned null");
}
expect(discovery.actions).not.toContain(profileAction);
expect(discovery.schema).toBeNull();
});
it("hides gated actions when the default Matrix account disables them", () => {
const discovery = matrixMessageActions.describeMessageTool({
cfg: {

View File

@@ -7,11 +7,11 @@ import {
createActionGate,
readNumberParam,
readStringParam,
ToolAuthorizationError,
type ChannelMessageActionAdapter,
type ChannelMessageActionContext,
type ChannelMessageActionName,
type ChannelMessageToolDiscovery,
type ChannelToolSend,
} from "./runtime-api.js";
import type { CoreConfig } from "./types.js";
@@ -35,6 +35,7 @@ const MATRIX_PLUGIN_HANDLED_ACTIONS = new Set<ChannelMessageActionName>([
function createMatrixExposedActions(params: {
gate: ReturnType<typeof createActionGate>;
encryptionEnabled: boolean;
senderIsOwner?: boolean;
}) {
const actions = new Set<ChannelMessageActionName>(["poll", "poll-vote"]);
if (params.gate("messages")) {
@@ -52,7 +53,7 @@ function createMatrixExposedActions(params: {
actions.add("unpin");
actions.add("list-pins");
}
if (params.gate("profile")) {
if (params.gate("profile") && params.senderIsOwner === true) {
actions.add("set-profile");
}
if (params.gate("memberInfo")) {
@@ -109,7 +110,7 @@ function buildMatrixProfileToolSchema(): NonNullable<ChannelMessageToolDiscovery
}
export const matrixMessageActions: ChannelMessageActionAdapter = {
describeMessageTool: ({ cfg, accountId }) => {
describeMessageTool: ({ cfg, accountId, senderIsOwner }) => {
const resolvedCfg = cfg as CoreConfig;
if (!accountId && requiresExplicitMatrixDefaultAccount(resolvedCfg)) {
return { actions: [], capabilities: [] };
@@ -125,6 +126,7 @@ export const matrixMessageActions: ChannelMessageActionAdapter = {
const actions = createMatrixExposedActions({
gate,
encryptionEnabled: account.config.encryption === true,
senderIsOwner,
});
const listedActions = Array.from(actions);
return {
@@ -134,7 +136,7 @@ export const matrixMessageActions: ChannelMessageActionAdapter = {
};
},
supportsAction: ({ action }) => MATRIX_PLUGIN_HANDLED_ACTIONS.has(action),
extractToolSend: ({ args }): ChannelToolSend | null => {
extractToolSend: ({ args }) => {
return extractToolSend(args, "sendMessage");
},
handleAction: async (ctx: ChannelMessageActionContext) => {
@@ -259,6 +261,9 @@ export const matrixMessageActions: ChannelMessageActionAdapter = {
}
if (action === "set-profile") {
if (ctx.senderIsOwner !== true) {
throw new ToolAuthorizationError("Matrix profile updates require owner access.");
}
const avatarPath =
readStringParam(params, "avatarPath") ??
readStringParam(params, "path") ??

View File

@@ -45,53 +45,6 @@ function hasLegacyMatrixAccountPrivateNetworkAliases(value: unknown): boolean {
);
}
function hasLegacyTrustedDmPolicy(value: unknown): boolean {
const root = isRecord(value) ? value : null;
if (!root) {
return false;
}
const dm = isRecord(root.dm) ? root.dm : null;
return dm?.policy === "trusted";
}
function hasLegacyMatrixAccountTrustedDmPolicies(value: unknown): boolean {
const accounts = isRecord(value) ? value : null;
if (!accounts) {
return false;
}
return Object.values(accounts).some((account) => hasLegacyTrustedDmPolicy(account));
}
function migrateLegacyTrustedDmPolicy(params: {
entry: Record<string, unknown>;
pathPrefix: string;
changes: string[];
}): { entry: Record<string, unknown>; changed: boolean } {
const dm = isRecord(params.entry.dm) ? params.entry.dm : null;
if (!dm || dm.policy !== "trusted") {
return { entry: params.entry, changed: false };
}
const allowFromRaw = dm.allowFrom;
// Trim before counting: downstream allowlist normalization drops whitespace-only
// entries, so a config like [" "] must still fall back to "pairing"
// instead of becoming an effectively empty allowlist.
const allowFromEntries = Array.isArray(allowFromRaw)
? allowFromRaw.filter(
(entry): entry is string => typeof entry === "string" && entry.trim().length > 0,
).length
: 0;
// Preserve the operator's existing trust boundary when an explicit allowFrom
// list is present; only fall back to pairing when the effective allowlist is
// empty.
const nextPolicy: "allowlist" | "pairing" = allowFromEntries > 0 ? "allowlist" : "pairing";
const nextDm = { ...dm, policy: nextPolicy };
params.changes.push(
`Migrated ${params.pathPrefix}.dm.policy "trusted" → "${nextPolicy}" (legacy alias removed; ` +
`${allowFromEntries > 0 ? `preserved ${allowFromEntries} ${params.pathPrefix}.dm.allowFrom ${allowFromEntries === 1 ? "entry" : "entries"}` : "no allowFrom entries present, defaulting to pairing for safety"}).`,
);
return { entry: { ...params.entry, dm: nextDm }, changed: true };
}
function normalizeMatrixRoomAllowAliases(params: {
rooms: Record<string, unknown>;
pathPrefix: string;
@@ -149,18 +102,6 @@ export const legacyConfigRules: ChannelDoctorLegacyConfigRule[] = [
'channels.matrix.accounts.<id>.{groups,rooms}.<room>.allow is legacy; use channels.matrix.accounts.<id>.{groups,rooms}.<room>.enabled instead. Run "openclaw doctor --fix".',
match: hasLegacyMatrixAccountRoomAllowAliases,
},
{
path: ["channels", "matrix"],
message:
'channels.matrix.dm.policy "trusted" is legacy; use "allowlist" (with allowFrom entries) or "pairing" instead. Run "openclaw doctor --fix".',
match: hasLegacyTrustedDmPolicy,
},
{
path: ["channels", "matrix", "accounts"],
message:
'channels.matrix.accounts.<id>.dm.policy "trusted" is legacy; use "allowlist" (with allowFrom entries) or "pairing" instead. Run "openclaw doctor --fix".',
match: hasLegacyMatrixAccountTrustedDmPolicies,
},
];
export function normalizeCompatibilityConfig({
@@ -186,14 +127,6 @@ export function normalizeCompatibilityConfig({
updatedMatrix = topLevelPrivateNetwork.entry;
changed = changed || topLevelPrivateNetwork.changed;
const topLevelTrustedDmPolicy = migrateLegacyTrustedDmPolicy({
entry: updatedMatrix,
pathPrefix: "channels.matrix",
changes,
});
updatedMatrix = topLevelTrustedDmPolicy.entry;
changed = changed || topLevelTrustedDmPolicy.changed;
const normalizeTopLevelRoomScope = (key: "groups" | "rooms") => {
const rooms = isRecord(updatedMatrix[key]) ? updatedMatrix[key] : null;
if (!rooms) {
@@ -235,16 +168,6 @@ export function normalizeCompatibilityConfig({
accountChanged = true;
}
const accountTrustedDmPolicy = migrateLegacyTrustedDmPolicy({
entry: nextAccount,
pathPrefix: `channels.matrix.accounts.${accountId}`,
changes,
});
if (accountTrustedDmPolicy.changed) {
nextAccount = accountTrustedDmPolicy.entry;
accountChanged = true;
}
for (const key of ["groups", "rooms"] as const) {
const rooms = isRecord(nextAccount[key]) ? nextAccount[key] : null;
if (!rooms) {

View File

@@ -232,199 +232,4 @@ describe("matrix doctor", () => {
]),
);
});
it("migrates legacy channels.matrix.dm.policy 'trusted' with allowFrom to 'allowlist'", () => {
const normalize = matrixDoctor.normalizeCompatibilityConfig;
expect(normalize).toBeDefined();
if (!normalize) {
return;
}
const result = normalize({
cfg: {
channels: {
matrix: {
dm: {
enabled: true,
policy: "trusted",
allowFrom: ["@alice:example.org", "@bob:example.org"],
},
},
},
} as never,
});
const matrixDm = (
result.config.channels?.matrix as { dm?: { policy?: string; allowFrom?: string[] } }
)?.dm;
expect(matrixDm?.policy).toBe("allowlist");
expect(matrixDm?.allowFrom).toEqual(["@alice:example.org", "@bob:example.org"]);
expect(result.changes).toEqual(
expect.arrayContaining([
expect.stringContaining('Migrated channels.matrix.dm.policy "trusted" → "allowlist"'),
expect.stringContaining("preserved 2 channels.matrix.dm.allowFrom entries"),
]),
);
});
it("migrates legacy 'trusted' policy with whitespace-only allowFrom entries to 'pairing'", () => {
// Whitespace-only entries are dropped by downstream allowlist normalization,
// so they must not count toward the allowFrom population check — otherwise
// the migration would emit policy="allowlist" with an effectively empty
// allowlist, silently blocking all DMs.
const normalize = matrixDoctor.normalizeCompatibilityConfig;
expect(normalize).toBeDefined();
if (!normalize) {
return;
}
const result = normalize({
cfg: {
channels: {
matrix: {
dm: {
enabled: true,
policy: "trusted",
allowFrom: [" ", "\t", ""],
},
},
},
} as never,
});
const matrixDm = (result.config.channels?.matrix as { dm?: { policy?: string } })?.dm;
expect(matrixDm?.policy).toBe("pairing");
expect(result.changes).toEqual(
expect.arrayContaining([
expect.stringContaining('Migrated channels.matrix.dm.policy "trusted" → "pairing"'),
]),
);
});
it("migrates legacy channels.matrix.dm.policy 'trusted' without allowFrom to 'pairing'", () => {
const normalize = matrixDoctor.normalizeCompatibilityConfig;
expect(normalize).toBeDefined();
if (!normalize) {
return;
}
const result = normalize({
cfg: {
channels: {
matrix: {
dm: {
enabled: true,
policy: "trusted",
},
},
},
} as never,
});
const matrixDm = (result.config.channels?.matrix as { dm?: { policy?: string } })?.dm;
expect(matrixDm?.policy).toBe("pairing");
expect(result.changes).toEqual(
expect.arrayContaining([
expect.stringContaining('Migrated channels.matrix.dm.policy "trusted" → "pairing"'),
]),
);
});
it("migrates legacy per-account channels.matrix.accounts.<id>.dm.policy 'trusted'", () => {
const normalize = matrixDoctor.normalizeCompatibilityConfig;
expect(normalize).toBeDefined();
if (!normalize) {
return;
}
const result = normalize({
cfg: {
channels: {
matrix: {
accounts: {
work: {
dm: {
enabled: true,
policy: "trusted",
allowFrom: ["@boss:example.org"],
},
},
personal: {
dm: {
enabled: true,
policy: "trusted",
},
},
},
},
},
} as never,
});
const accounts = (
result.config.channels?.matrix as {
accounts?: Record<string, { dm?: { policy?: string; allowFrom?: string[] } }>;
}
)?.accounts;
expect(accounts?.work?.dm?.policy).toBe("allowlist");
expect(accounts?.work?.dm?.allowFrom).toEqual(["@boss:example.org"]);
expect(accounts?.personal?.dm?.policy).toBe("pairing");
expect(result.changes).toEqual(
expect.arrayContaining([
expect.stringContaining(
'Migrated channels.matrix.accounts.work.dm.policy "trusted" → "allowlist"',
),
expect.stringContaining(
'Migrated channels.matrix.accounts.personal.dm.policy "trusted" → "pairing"',
),
]),
);
});
it("leaves modern dm.policy values untouched", () => {
const normalize = matrixDoctor.normalizeCompatibilityConfig;
expect(normalize).toBeDefined();
if (!normalize) {
return;
}
const result = normalize({
cfg: {
channels: {
matrix: {
dm: {
enabled: true,
policy: "allowlist",
allowFrom: ["@alice:example.org"],
},
accounts: {
work: {
dm: { enabled: true, policy: "pairing" },
},
},
},
},
} as never,
});
expect(result.changes).toEqual([]);
expect(result.config).toEqual({
channels: {
matrix: {
dm: {
enabled: true,
policy: "allowlist",
allowFrom: ["@alice:example.org"],
},
accounts: {
work: {
dm: { enabled: true, policy: "pairing" },
},
},
},
},
});
});
});

View File

@@ -10,6 +10,7 @@ export {
readReactionParams,
readStringArrayParam,
readStringParam,
ToolAuthorizationError,
} from "openclaw/plugin-sdk/channel-actions";
export { buildChannelConfigSchema } from "openclaw/plugin-sdk/channel-config-primitives";
export type { ChannelPlugin } from "openclaw/plugin-sdk/channel-core";

View File

@@ -60,7 +60,7 @@ export const secretTargetRegistryEntries = [
export function collectRuntimeConfigAssignments(params: {
config: { channels?: Record<string, unknown> };
defaults?: SecretDefaults;
defaults: SecretDefaults | undefined;
context: ResolverContext;
}): void {
const resolved = getChannelSurface(params.config, "matrix");

View File

@@ -33,7 +33,7 @@ export const secretTargetRegistryEntries = [
export function collectRuntimeConfigAssignments(params: {
config: { channels?: Record<string, unknown> };
defaults?: SecretDefaults;
defaults: SecretDefaults | undefined;
context: ResolverContext;
}): void {
const resolved = getChannelSurface(params.config, "mattermost");

View File

@@ -4,5 +4,8 @@ export type {
MemoryProviderStatus,
MemorySyncProgressUpdate,
} from "openclaw/plugin-sdk/memory-core-host-engine-storage";
export { removeBackfillDiaryEntries, writeBackfillDiaryEntries } from "./src/dreaming-narrative.js";
export {
removeBackfillDiaryEntries,
writeBackfillDiaryEntries,
} from "./src/dreaming-narrative.js";
export { previewGroundedRemMarkdown } from "./src/rem-evidence.js";

View File

@@ -17,7 +17,6 @@ export { checkQmdBinaryAvailability } from "openclaw/plugin-sdk/memory-core-host
export { hasConfiguredMemorySecretInput } from "openclaw/plugin-sdk/memory-core-host-secret";
export {
auditShortTermPromotionArtifacts,
removeGroundedShortTermCandidates,
repairShortTermPromotionArtifacts,
} from "./src/short-term-promotion.js";
export type { BuiltinMemoryEmbeddingProviderDoctorMetadata } from "./src/memory/provider-adapters.js";

View File

@@ -42,10 +42,8 @@ import { previewGroundedRemMarkdown } from "./rem-evidence.js";
import {
applyShortTermPromotions,
auditShortTermPromotionArtifacts,
removeGroundedShortTermCandidates,
repairShortTermPromotionArtifacts,
readShortTermRecallEntries,
recordGroundedShortTermCandidates,
recordShortTermRecalls,
rankShortTermPromotionCandidates,
resolveShortTermRecallLockPath,
@@ -122,15 +120,7 @@ const DAILY_MEMORY_FILE_NAME_RE = /^(\d{4}-\d{2}-\d{2})\.md$/;
async function listHistoricalDailyFiles(inputPath: string): Promise<string[]> {
const resolvedPath = path.resolve(inputPath);
let stat;
try {
stat = await fs.stat(resolvedPath);
} catch (err) {
if ((err as NodeJS.ErrnoException | undefined)?.code === "ENOENT") {
return [];
}
throw err;
}
const stat = await fs.stat(resolvedPath);
if (stat.isFile()) {
return DAILY_MEMORY_FILE_NAME_RE.test(path.basename(resolvedPath)) ? [resolvedPath] : [];
}
@@ -306,100 +296,6 @@ function groundedMarkdownToDiaryLines(markdown: string): string[] {
.filter((line, index, lines) => !(line.length === 0 && lines[index - 1]?.length === 0));
}
function parseGroundedRef(
fallbackPath: string,
ref: string,
): { path: string; startLine: number; endLine: number } | null {
const trimmed = ref.trim();
if (!trimmed) {
return null;
}
const match = trimmed.match(/^(.*?):(\d+)(?:-(\d+))?$/);
if (!match) {
return null;
}
return {
path: (match[1] ?? fallbackPath).replaceAll("\\", "/").replace(/^\.\//, ""),
startLine: Math.max(1, Number(match[2])),
endLine: Math.max(1, Number(match[3] ?? match[2])),
};
}
function collectGroundedShortTermSeedItems(
previews: Awaited<ReturnType<typeof previewGroundedRemMarkdown>>["files"],
): Array<{
path: string;
startLine: number;
endLine: number;
snippet: string;
score: number;
query: string;
signalCount: number;
dayBucket?: string;
}> {
const items: Array<{
path: string;
startLine: number;
endLine: number;
snippet: string;
score: number;
query: string;
signalCount: number;
dayBucket?: string;
}> = [];
const seen = new Set<string>();
for (const file of previews) {
const dayBucket = extractIsoDayFromPath(file.path) ?? undefined;
const signals = [
...file.memoryImplications.map((item) => ({
text: item.text,
refs: item.refs,
score: 0.92,
query: "__dreaming_grounded_backfill__:lasting-update",
signalCount: 2,
})),
...file.candidates
.filter((candidate) => candidate.lean === "likely_durable")
.map((candidate) => ({
text: candidate.text,
refs: candidate.refs,
score: 0.82,
query: "__dreaming_grounded_backfill__:candidate",
signalCount: 1,
})),
];
for (const signal of signals) {
if (!signal.text.trim()) {
continue;
}
const firstRef = signal.refs.find((ref) => ref.trim().length > 0);
const parsedRef = firstRef ? parseGroundedRef(file.path, firstRef) : null;
if (!parsedRef) {
continue;
}
const key = `${parsedRef.path}:${parsedRef.startLine}:${parsedRef.endLine}:${signal.query}:${signal.text.toLowerCase()}`;
if (seen.has(key)) {
continue;
}
seen.add(key);
items.push({
path: parsedRef.path,
startLine: parsedRef.startLine,
endLine: parsedRef.endLine,
snippet: signal.text,
score: signal.score,
query: signal.query,
signalCount: signal.signalCount,
...(dayBucket ? { dayBucket } : {}),
});
}
}
return items;
}
function matchesPromotionSelector(
candidate: {
key: string;
@@ -656,7 +552,7 @@ export async function runMemoryStatus(opts: MemoryCommandOptions) {
run: async (manager) => {
const deep = Boolean(opts.deep || opts.index);
let embeddingProbe:
| Awaited<ReturnType<MemoryManager["probeEmbeddingAvailability"]>>
| Awaited<ReturnType<typeof manager.probeEmbeddingAvailability>>
| undefined;
let indexError: string | undefined;
const syncFn = manager.sync ? manager.sync.bind(manager) : undefined;
@@ -1652,30 +1548,14 @@ export async function runMemoryRemBackfill(opts: MemoryRemBackfillOptions) {
return;
}
if (opts.rollback || opts.rollbackShortTerm) {
const diaryRollback = opts.rollback
? await removeBackfillDiaryEntries({ workspaceDir })
: null;
const shortTermRollback = opts.rollbackShortTerm
? await removeGroundedShortTermCandidates({ workspaceDir })
: null;
if (opts.rollback) {
const removed = await removeBackfillDiaryEntries({ workspaceDir });
if (opts.json) {
defaultRuntime.writeJson({
workspaceDir,
rollback: Boolean(opts.rollback),
rollbackShortTerm: Boolean(opts.rollbackShortTerm),
...(diaryRollback
? {
dreamsPath: diaryRollback.dreamsPath,
removedEntries: diaryRollback.removed,
}
: {}),
...(shortTermRollback
? {
shortTermStorePath: shortTermRollback.storePath,
removedShortTermEntries: shortTermRollback.removed,
}
: {}),
rollback: true,
dreamsPath: removed.dreamsPath,
removedEntries: removed.removed,
});
return;
}
@@ -1683,30 +1563,8 @@ export async function runMemoryRemBackfill(opts: MemoryRemBackfillOptions) {
[
`${colorize(isRich(), theme.heading, "REM Backfill")} ${colorize(isRich(), theme.muted, "(rollback)")}`,
colorize(isRich(), theme.muted, `workspace=${shortenHomePath(workspaceDir)}`),
...(diaryRollback
? [
colorize(
isRich(),
theme.muted,
`dreamsPath=${shortenHomePath(diaryRollback.dreamsPath)}`,
),
colorize(isRich(), theme.muted, `removedEntries=${diaryRollback.removed}`),
]
: []),
...(shortTermRollback
? [
colorize(
isRich(),
theme.muted,
`shortTermStorePath=${shortenHomePath(shortTermRollback.storePath)}`,
),
colorize(
isRich(),
theme.muted,
`removedShortTermEntries=${shortTermRollback.removed}`,
),
]
: []),
colorize(isRich(), theme.muted, `dreamsPath=${shortenHomePath(removed.dreamsPath)}`),
colorize(isRich(), theme.muted, `removedEntries=${removed.removed}`),
].join("\n"),
);
return;
@@ -1742,11 +1600,6 @@ export async function runMemoryRemBackfill(opts: MemoryRemBackfillOptions) {
workspaceDir: scratchDir,
inputPaths: workspaceSourceFiles,
});
const sourcePathByDay = new Map(
sourceFiles
.map((sourcePath) => [extractIsoDayFromPath(sourcePath), sourcePath] as const)
.filter((entry): entry is [string, string] => Boolean(entry[0])),
);
const entries = grounded.files
.map((file) => {
const isoDay = extractIsoDayFromPath(file.path);
@@ -1755,7 +1608,7 @@ export async function runMemoryRemBackfill(opts: MemoryRemBackfillOptions) {
}
return {
isoDay,
sourcePath: sourcePathByDay.get(isoDay) ?? file.path,
sourcePath: file.path,
bodyLines: groundedMarkdownToDiaryLines(file.renderedMarkdown),
};
})
@@ -1766,24 +1619,6 @@ export async function runMemoryRemBackfill(opts: MemoryRemBackfillOptions) {
entries,
timezone: remConfig.timezone,
});
let stagedShortTermEntries = 0;
let replacedShortTermEntries = 0;
if (opts.stageShortTerm) {
const cleared = await removeGroundedShortTermCandidates({ workspaceDir });
replacedShortTermEntries = cleared.removed;
const shortTermSeedItems = collectGroundedShortTermSeedItems(grounded.files);
if (shortTermSeedItems.length > 0) {
await recordGroundedShortTermCandidates({
workspaceDir,
query: "__dreaming_grounded_backfill__",
items: shortTermSeedItems,
dedupeByQueryPerDay: true,
nowMs: Date.now(),
timezone: remConfig.timezone,
});
}
stagedShortTermEntries = shortTermSeedItems.length;
}
if (opts.json) {
defaultRuntime.writeJson({
@@ -1794,12 +1629,6 @@ export async function runMemoryRemBackfill(opts: MemoryRemBackfillOptions) {
writtenEntries: written.written,
replacedEntries: written.replaced,
dreamsPath: written.dreamsPath,
...(opts.stageShortTerm
? {
stagedShortTermEntries,
replacedShortTermEntries,
}
: {}),
});
return;
}
@@ -1815,15 +1644,6 @@ export async function runMemoryRemBackfill(opts: MemoryRemBackfillOptions) {
theme.muted,
`historicalFiles=${sourceFiles.length} writtenEntries=${written.written} replacedEntries=${written.replaced}`,
),
...(opts.stageShortTerm
? [
colorize(
rich,
theme.muted,
`stagedShortTermEntries=${stagedShortTermEntries} replacedShortTermEntries=${replacedShortTermEntries}`,
),
]
: []),
colorize(rich, theme.muted, `dreamsPath=${shortenHomePath(written.dreamsPath)}`),
].join("\n"),
);

View File

@@ -9,7 +9,7 @@ import {
spyRuntimeJson,
spyRuntimeLogs,
} from "../../../src/cli/test-runtime-capture.js";
import { readShortTermRecallEntries, recordShortTermRecalls } from "./short-term-promotion.js";
import { recordShortTermRecalls } from "./short-term-promotion.js";
const getMemorySearchManager = vi.hoisted(() => vi.fn());
const loadConfig = vi.hoisted(() => vi.fn(() => ({})));
@@ -1066,7 +1066,6 @@ describe("memory cli", () => {
const dreams = await fs.readFile(path.join(workspaceDir, "DREAMS.md"), "utf-8");
expect(dreams).toContain("openclaw:dreaming:backfill-entry");
expect(dreams).toContain(`source=${historyPath}`);
expect(dreams).toContain("January 1, 2025");
expect(dreams).toContain("What Happened");
expect(dreams).toContain("Possible Lasting Updates");
@@ -1075,89 +1074,6 @@ describe("memory cli", () => {
});
});
it("treats a missing historical path as a controlled empty-source error", async () => {
await withTempWorkspace(async (workspaceDir) => {
const close = vi.fn(async () => {});
mockManager({
status: () => makeMemoryStatus({ workspaceDir }),
close,
});
const errors = spyRuntimeErrors(defaultRuntime);
await runMemoryCli(["rem-backfill", "--path", path.join(workspaceDir, "missing-history")]);
expect(
errors.mock.calls.some((call) => String(call[0]).includes("found no YYYY-MM-DD.md files")),
).toBe(true);
expect(close).toHaveBeenCalled();
});
});
it("stages grounded durable candidates into the live short-term store", async () => {
await withTempWorkspace(async (workspaceDir) => {
const historyDir = path.join(workspaceDir, "history");
await fs.mkdir(historyDir, { recursive: true });
const historyPath = path.join(historyDir, "2025-01-01.md");
await fs.writeFile(
historyPath,
[
"## Preferences Learned",
'- Always use "Happy Together" calendar for flights and reservations.',
].join("\n") + "\n",
"utf-8",
);
const close = vi.fn(async () => {});
mockManager({
status: () => makeMemoryStatus({ workspaceDir }),
close,
});
await runMemoryCli(["rem-backfill", "--path", historyPath, "--stage-short-term"]);
const entries = await readShortTermRecallEntries({ workspaceDir });
expect(entries).toHaveLength(1);
expect(entries[0]?.snippet).toContain("Happy Together");
expect(entries[0]?.groundedCount).toBe(3);
expect(entries[0]?.queryHashes).toHaveLength(2);
expect(entries[0]?.recallCount).toBe(0);
expect(close).toHaveBeenCalled();
});
});
it("rolls back grounded staged short-term entries without touching diary rollback", async () => {
await withTempWorkspace(async (workspaceDir) => {
const historyDir = path.join(workspaceDir, "history");
await fs.mkdir(historyDir, { recursive: true });
const historyPath = path.join(historyDir, "2025-01-01.md");
await fs.writeFile(
historyPath,
[
"## Preferences Learned",
'- Always use "Happy Together" calendar for flights and reservations.',
].join("\n") + "\n",
"utf-8",
);
const close = vi.fn(async () => {});
mockManager({
status: () => makeMemoryStatus({ workspaceDir }),
close,
});
await runMemoryCli(["rem-backfill", "--path", historyPath, "--stage-short-term"]);
mockManager({
status: () => makeMemoryStatus({ workspaceDir }),
close,
});
await runMemoryCli(["rem-backfill", "--rollback-short-term"]);
const entries = await readShortTermRecallEntries({ workspaceDir });
expect(entries).toHaveLength(0);
expect(close).toHaveBeenCalled();
});
});
it("prefers persistence-relevant evidence over narrated operational logs in grounded what happened", async () => {
await withTempWorkspace(async (workspaceDir) => {
const historyDir = path.join(workspaceDir, "history");
@@ -1320,44 +1236,6 @@ describe("memory cli", () => {
});
});
it("does not split hyphenated words into malformed grounded candidates", async () => {
await withTempWorkspace(async (workspaceDir) => {
const historyDir = path.join(workspaceDir, "history");
await fs.mkdir(historyDir, { recursive: true });
const historyPath = path.join(historyDir, "2025-02-20.md");
await fs.writeFile(
historyPath,
[
"## Preferences Learned",
"- Use long-term plans, avoid reactive task switching.",
"- A self-aware workflow note should stay intact.",
].join("\n") + "\n",
"utf-8",
);
const close = vi.fn(async () => {});
mockManager({
status: () => makeMemoryStatus({ workspaceDir }),
close,
});
const writeJson = spyRuntimeJson(defaultRuntime);
await runMemoryCli(["rem-harness", "--json", "--grounded", "--path", historyPath]);
const payload = firstWrittenJsonArg<{
grounded?: {
files?: Array<{
renderedMarkdown?: string;
}>;
} | null;
}>(writeJson);
const rendered = payload?.grounded?.files?.[0]?.renderedMarkdown ?? "";
expect(rendered).not.toContain("Use long- term plans");
expect(rendered).not.toContain("A self- aware workflow note");
expect(close).toHaveBeenCalled();
});
});
it("rolls back grounded rem backfill entries from DREAMS.md", async () => {
await withTempWorkspace(async (workspaceDir) => {
const dreamsPath = path.join(workspaceDir, "DREAMS.md");

View File

@@ -105,10 +105,6 @@ export function registerMemoryCli(program: Command) {
"openclaw memory rem-backfill --path ./memory",
"Write grounded historical REM entries into DREAMS.md for UI review.",
],
[
"openclaw memory rem-backfill --path ./memory --stage-short-term",
"Also seed durable grounded candidates into the live short-term promotion store.",
],
["openclaw memory status --json", "Output machine-readable JSON (good for scripts)."],
])}\n\n${theme.muted("Docs:")} ${formatDocsLink("/cli/memory", "docs.openclaw.ai/cli/memory")}\n`,
);
@@ -205,16 +201,6 @@ export function registerMemoryCli(program: Command) {
.option("--agent <id>", "Agent id (default: default agent)")
.option("--path <file-or-dir>", "Historical daily memory file(s) or directory")
.option("--rollback", "Remove previously written grounded REM backfill entries", false)
.option(
"--stage-short-term",
"Also seed grounded durable candidates into the short-term promotion store",
false,
)
.option(
"--rollback-short-term",
"Remove previously seeded grounded short-term candidates",
false,
)
.option("--json", "Print JSON")
.action(async (opts: MemoryRemBackfillOptions) => {
await runMemoryRemBackfill(opts);

View File

@@ -36,6 +36,4 @@ export type MemoryRemHarnessOptions = MemoryCommandOptions & {
export type MemoryRemBackfillOptions = MemoryCommandOptions & {
path?: string;
rollback?: boolean;
stageShortTerm?: boolean;
rollbackShortTerm?: boolean;
};

View File

@@ -126,10 +126,6 @@ describe("backfill diary entries", () => {
expect(formatBackfillDiaryDate("2026-01-01", "UTC")).toBe("January 1, 2026");
});
it("preserves the iso day label in high-positive-offset timezones", () => {
expect(formatBackfillDiaryDate("2026-01-01", "Pacific/Kiritimati")).toBe("January 1, 2026");
});
it("builds a marked backfill diary entry", () => {
const entry = buildBackfillDiaryEntry({
isoDay: "2026-01-01",
@@ -205,29 +201,6 @@ describe("backfill diary entries", () => {
expect(content).toContain("Keep this real dream.");
expect(content).not.toContain("Remove this backfill.");
});
it("refuses to overwrite a symlinked DREAMS.md during backfill writes", async () => {
const workspaceDir = await createTempWorkspace("openclaw-dreaming-backfill-");
const targetPath = path.join(workspaceDir, "outside.txt");
const dreamsPath = path.join(workspaceDir, "DREAMS.md");
await fs.writeFile(targetPath, "outside\n", "utf-8");
await fs.symlink(targetPath, dreamsPath);
await expect(
writeBackfillDiaryEntries({
workspaceDir,
timezone: "UTC",
entries: [
{
isoDay: "2026-01-01",
sourcePath: "memory/2026-01-01.md",
bodyLines: ["What Happened", "1. First pass."],
},
],
}),
).rejects.toThrow("Refusing to write symlinked DREAMS.md");
await expect(fs.readFile(targetPath, "utf-8")).resolves.toBe("outside\n");
});
});
describe("appendNarrativeEntry", () => {

View File

@@ -240,53 +240,17 @@ function stripBackfillDiaryBlocks(existing: string): { updated: string; removed:
};
}
export function formatBackfillDiaryDate(isoDay: string, _timezone?: string): string {
const match = /^(\d{4})-(\d{2})-(\d{2})$/.exec(isoDay);
if (!match) {
return isoDay;
}
const [, year, month, day] = match;
export function formatBackfillDiaryDate(isoDay: string, timezone?: string): string {
const opts: Intl.DateTimeFormatOptions = {
// Preserve the source iso day exactly; backfill labels should not drift by timezone.
timeZone: "UTC",
timeZone: timezone ?? "UTC",
year: "numeric",
month: "long",
day: "numeric",
};
const epochMs = Date.UTC(Number(year), Number(month) - 1, Number(day), 12);
const epochMs = Date.parse(`${isoDay}T12:00:00Z`);
return new Intl.DateTimeFormat("en-US", opts).format(new Date(epochMs));
}
async function assertSafeDreamsPath(dreamsPath: string): Promise<void> {
const stat = await fs.lstat(dreamsPath).catch((err: NodeJS.ErrnoException) => {
if (err.code === "ENOENT") {
return null;
}
throw err;
});
if (!stat) {
return;
}
if (stat.isSymbolicLink()) {
throw new Error("Refusing to write symlinked DREAMS.md");
}
if (!stat.isFile()) {
throw new Error("Refusing to write non-file DREAMS.md");
}
}
async function writeDreamsFileAtomic(dreamsPath: string, content: string): Promise<void> {
await assertSafeDreamsPath(dreamsPath);
const tempPath = `${dreamsPath}.${process.pid}.${Date.now()}.tmp`;
await fs.writeFile(tempPath, content, { encoding: "utf-8", flag: "wx" });
try {
await fs.rename(tempPath, dreamsPath);
} catch (err) {
await fs.rm(tempPath, { force: true }).catch(() => {});
throw err;
}
}
export function buildBackfillDiaryEntry(params: {
isoDay: string;
bodyLines: string[];
@@ -295,10 +259,7 @@ export function buildBackfillDiaryEntry(params: {
}): string {
const dateStr = formatBackfillDiaryDate(params.isoDay, params.timezone);
const marker = `<!-- ${BACKFILL_ENTRY_MARKER} day=${params.isoDay}${params.sourcePath ? ` source=${params.sourcePath}` : ""} -->`;
const body = params.bodyLines
.map((line) => line.trimEnd())
.join("\n")
.trim();
const body = params.bodyLines.map((line) => line.trimEnd()).join("\n").trim();
return [`*${dateStr}*`, marker, body].filter((part) => part.length > 0).join("\n\n");
}
@@ -334,7 +295,7 @@ export async function writeBackfillDiaryEntries(params: {
),
];
const updated = replaceDiaryContent(stripped.updated, joinDiaryBlocks(nextBlocks));
await writeDreamsFileAtomic(dreamsPath, updated);
await fs.writeFile(dreamsPath, updated, "utf-8");
return {
dreamsPath,
written: params.entries.length,
@@ -350,7 +311,7 @@ export async function removeBackfillDiaryEntries(params: {
const stripped = stripBackfillDiaryBlocks(existing);
if (stripped.removed > 0 || existing.length > 0) {
await fs.mkdir(path.dirname(dreamsPath), { recursive: true });
await writeDreamsFileAtomic(dreamsPath, stripped.updated);
await fs.writeFile(dreamsPath, stripped.updated, "utf-8");
}
return {
dreamsPath,

View File

@@ -56,9 +56,6 @@ const REM_TIME_PREFIX_RE = /^\d{1,2}:\d{2}\s*-\s*/;
const REM_CODE_FENCE_RE = /^\s*```/;
const REM_TABLE_RE = /^\s*\|.*\|\s*$/;
const REM_TABLE_DIVIDER_RE = /^\s*\|?[\s:-]+\|[\s|:-]*$/;
const MAX_GROUNDED_REM_FILES = 512;
const MAX_GROUNDED_REM_FILE_BYTES = 1_000_000;
const GROUNDED_REM_SKIPPED_DIRS = new Set([".git", "node_modules"]);
const REM_SUMMARY_FACT_LIMIT = 4;
const REM_SUMMARY_REFLECTION_LIMIT = 4;
const REM_SUMMARY_MEMORY_LIMIT = 3;
@@ -608,7 +605,7 @@ function splitTopLevelClauses(text: string, delimiter: string): string[] {
}
function splitSubjectLeadClaim(text: string): string[] {
const match = /^(?<subject>.+?(?:||\s-\s))\s*(?<rest>.+)$/u.exec(text);
const match = /^(?<subject>.+?(?:||-))\s*(?<rest>.+)$/u.exec(text);
if (!match?.groups) {
return [text];
}
@@ -1021,29 +1018,16 @@ function previewGroundedRemForFile(params: {
async function collectMarkdownFiles(inputPaths: string[]): Promise<string[]> {
const found = new Set<string>();
async function walk(targetPath: string): Promise<void> {
if (found.size >= MAX_GROUNDED_REM_FILES) {
return;
}
const resolved = path.resolve(targetPath);
const stat = await fs.lstat(resolved);
if (stat.isSymbolicLink()) {
return;
}
const stat = await fs.stat(resolved);
if (stat.isDirectory()) {
const entries = await fs.readdir(resolved, { withFileTypes: true });
for (const entry of entries) {
if (entry.isDirectory() && GROUNDED_REM_SKIPPED_DIRS.has(entry.name)) {
continue;
}
await walk(path.join(resolved, entry.name));
}
return;
}
if (
stat.isFile() &&
stat.size <= MAX_GROUNDED_REM_FILE_BYTES &&
resolved.toLowerCase().endsWith(".md")
) {
if (stat.isFile() && resolved.toLowerCase().endsWith(".md")) {
found.add(resolved);
}
}

View File

@@ -11,11 +11,9 @@ import {
applyShortTermPromotions,
auditShortTermPromotionArtifacts,
isShortTermMemoryPath,
recordGroundedShortTermCandidates,
rankShortTermPromotionCandidates,
recordDreamingPhaseSignals,
recordShortTermRecalls,
removeGroundedShortTermCandidates,
repairShortTermPromotionArtifacts,
resolveShortTermRecallLockPath,
resolveShortTermPhaseSignalStorePath,
@@ -179,128 +177,6 @@ describe("short-term promotion", () => {
});
});
it("lets grounded durable evidence satisfy default deep thresholds", async () => {
await withTempWorkspace(async (workspaceDir) => {
await writeDailyMemoryNote(workspaceDir, "2026-04-03", [
'Always use "Happy Together" calendar for flights and reservations.',
]);
await recordGroundedShortTermCandidates({
workspaceDir,
query: "__dreaming_grounded_backfill__",
items: [
{
path: "memory/2026-04-03.md",
startLine: 1,
endLine: 1,
snippet: 'Always use "Happy Together" calendar for flights and reservations.',
score: 0.92,
query: "__dreaming_grounded_backfill__:lasting-update",
signalCount: 2,
dayBucket: "2026-04-03",
},
{
path: "memory/2026-04-03.md",
startLine: 1,
endLine: 1,
snippet: 'Always use "Happy Together" calendar for flights and reservations.',
score: 0.82,
query: "__dreaming_grounded_backfill__:candidate",
signalCount: 1,
dayBucket: "2026-04-03",
},
],
dedupeByQueryPerDay: true,
nowMs: Date.parse("2026-04-03T10:00:00.000Z"),
});
const ranked = await rankShortTermPromotionCandidates({
workspaceDir,
nowMs: Date.parse("2026-04-03T10:00:00.000Z"),
});
expect(ranked).toHaveLength(1);
expect(ranked[0]?.groundedCount).toBe(3);
expect(ranked[0]?.uniqueQueries).toBe(2);
expect(ranked[0]?.avgScore).toBeGreaterThan(0.85);
const applied = await applyShortTermPromotions({
workspaceDir,
candidates: ranked,
nowMs: Date.parse("2026-04-03T10:00:00.000Z"),
});
expect(applied.applied).toBe(1);
const memory = await fs.readFile(path.join(workspaceDir, "MEMORY.md"), "utf-8");
expect(memory).toContain('Always use "Happy Together" calendar');
});
});
it("removes grounded-only staged entries without deleting mixed live entries", async () => {
await withTempWorkspace(async (workspaceDir) => {
await writeDailyMemoryNote(workspaceDir, "2026-04-03", [
"Grounded only rule.",
"Live recall-backed rule.",
]);
await recordGroundedShortTermCandidates({
workspaceDir,
query: "__dreaming_grounded_backfill__",
items: [
{
path: "memory/2026-04-03.md",
startLine: 1,
endLine: 1,
snippet: "Grounded only rule.",
score: 0.92,
query: "__dreaming_grounded_backfill__:lasting-update",
signalCount: 2,
dayBucket: "2026-04-03",
},
{
path: "memory/2026-04-03.md",
startLine: 2,
endLine: 2,
snippet: "Live recall-backed rule.",
score: 0.92,
query: "__dreaming_grounded_backfill__:lasting-update",
signalCount: 2,
dayBucket: "2026-04-03",
},
],
dedupeByQueryPerDay: true,
});
await recordShortTermRecalls({
workspaceDir,
query: "live recall",
results: [
{
path: "memory/2026-04-03.md",
startLine: 2,
endLine: 2,
score: 0.87,
snippet: "Live recall-backed rule.",
source: "memory",
},
],
});
const result = await removeGroundedShortTermCandidates({ workspaceDir });
expect(result.removed).toBe(1);
const ranked = await rankShortTermPromotionCandidates({
workspaceDir,
minScore: 0,
minRecallCount: 0,
minUniqueQueries: 0,
});
expect(ranked).toHaveLength(1);
expect(ranked[0]?.snippet).toContain("Live recall-backed rule");
expect(ranked[0]?.groundedCount).toBe(2);
expect(ranked[0]?.recallCount).toBe(1);
});
});
it("rewards spaced recalls as consolidation instead of only raw count", async () => {
await withTempWorkspace(async (workspaceDir) => {
await recordShortTermRecalls({
@@ -1224,7 +1100,6 @@ describe("short-term promotion", () => {
snippet,
recallCount: 2,
dailyCount: 0,
groundedCount: 0,
totalScore: 1.8,
maxScore: 0.95,
firstRecalledAt: "2026-04-01T00:00:00.000Z",

View File

@@ -64,7 +64,6 @@ export type ShortTermRecallEntry = {
snippet: string;
recallCount: number;
dailyCount: number;
groundedCount: number;
totalScore: number;
maxScore: number;
firstRecalledAt: string;
@@ -72,7 +71,6 @@ export type ShortTermRecallEntry = {
queryHashes: string[];
recallDays: string[];
conceptTags: string[];
claimHash?: string;
promotedAt?: string;
};
@@ -114,12 +112,10 @@ export type PromotionCandidate = {
snippet: string;
recallCount: number;
dailyCount?: number;
groundedCount?: number;
signalCount?: number;
avgScore: number;
maxScore: number;
uniqueQueries: number;
claimHash?: string;
promotedAt?: string;
firstRecalledAt: string;
lastRecalledAt: string;
@@ -236,19 +232,13 @@ function normalizeMemoryPath(rawPath: string): string {
return rawPath.replaceAll("\\", "/").replace(/^\.\//, "");
}
function buildClaimHash(snippet: string): string {
return createHash("sha1").update(normalizeSnippet(snippet)).digest("hex").slice(0, 12);
}
function buildEntryKey(result: {
path: string;
startLine: number;
endLine: number;
source: string;
claimHash?: string;
}): string {
const base = `${result.source}:${normalizeMemoryPath(result.path)}:${result.startLine}:${result.endLine}`;
return result.claimHash ? `${base}:${result.claimHash}` : base;
return `${result.source}:${normalizeMemoryPath(result.path)}:${result.startLine}:${result.endLine}`;
}
function hashQuery(query: string): string {
@@ -325,18 +315,6 @@ function normalizeDistinctStrings(values: unknown[], limit: number): string[] {
return normalized;
}
function totalSignalCountForEntry(entry: {
recallCount?: number;
dailyCount?: number;
groundedCount?: number;
}): number {
return (
Math.max(0, Math.floor(entry.recallCount ?? 0)) +
Math.max(0, Math.floor(entry.dailyCount ?? 0)) +
Math.max(0, Math.floor(entry.groundedCount ?? 0))
);
}
function calculateConsolidationComponent(recallDays: string[]): number {
if (recallDays.length === 0) {
return 0;
@@ -393,7 +371,6 @@ function normalizeStore(raw: unknown, nowIso: string): ShortTermRecallStore {
const recallCount = Math.max(0, Math.floor(Number(entry.recallCount) || 0));
const dailyCount = Math.max(0, Math.floor(Number(entry.dailyCount) || 0));
const groundedCount = Math.max(0, Math.floor(Number(entry.groundedCount) || 0));
const totalScore = Math.max(0, Number(entry.totalScore) || 0);
const maxScore = clampScore(Number(entry.maxScore) || 0);
const firstRecalledAt =
@@ -401,10 +378,6 @@ function normalizeStore(raw: unknown, nowIso: string): ShortTermRecallStore {
const lastRecalledAt =
typeof entry.lastRecalledAt === "string" ? entry.lastRecalledAt : nowIso;
const promotedAt = typeof entry.promotedAt === "string" ? entry.promotedAt : undefined;
const claimHash =
typeof entry.claimHash === "string" && entry.claimHash.trim().length > 0
? entry.claimHash.trim()
: undefined;
const snippet = typeof entry.snippet === "string" ? normalizeSnippet(entry.snippet) : "";
const queryHashes = Array.isArray(entry.queryHashes)
? normalizeDistinctStrings(entry.queryHashes, MAX_QUERY_HASHES)
@@ -423,8 +396,7 @@ function normalizeStore(raw: unknown, nowIso: string): ShortTermRecallStore {
)
: deriveConceptTags({ path: entryPath, snippet });
const normalizedKey =
key || buildEntryKey({ path: entryPath, startLine, endLine, source, claimHash });
const normalizedKey = key || buildEntryKey({ path: entryPath, startLine, endLine, source });
entries[normalizedKey] = {
key: normalizedKey,
path: entryPath,
@@ -434,7 +406,6 @@ function normalizeStore(raw: unknown, nowIso: string): ShortTermRecallStore {
snippet,
recallCount,
dailyCount,
groundedCount,
totalScore,
maxScore,
firstRecalledAt,
@@ -442,7 +413,6 @@ function normalizeStore(raw: unknown, nowIso: string): ShortTermRecallStore {
queryHashes,
recallDays: recallDays.slice(-MAX_RECALL_DAYS),
conceptTags,
...(claimHash ? { claimHash } : {}),
...(promotedAt ? { promotedAt } : {}),
};
}
@@ -598,7 +568,7 @@ function isProcessLikelyAlive(pid: number): boolean {
process.kill(pid, 0);
return true;
} catch (err) {
const code = (err as NodeJS.ErrnoException).code;
const code = (err as NodeJS.ErrnoException | undefined)?.code;
if (code === "ESRCH") {
return false;
}
@@ -651,8 +621,9 @@ async function withShortTermLock<T>(workspaceDir: string, task: () => Promise<T>
const startedAt = Date.now();
while (true) {
let lockHandle: Awaited<ReturnType<typeof fs.open>> | undefined;
try {
const lockHandle = await fs.open(lockPath, "wx");
lockHandle = await fs.open(lockPath, "wx");
await lockHandle
.writeFile(`${process.pid}:${Date.now()}\n`, "utf-8")
.catch(() => undefined);
@@ -841,21 +812,10 @@ export async function recordShortTermRecalls(params: {
const store = await readStore(workspaceDir, nowIso);
for (const result of relevant) {
const key = buildEntryKey(result);
const normalizedPath = normalizeMemoryPath(result.path);
const snippet = normalizeSnippet(result.snippet);
const claimHash = snippet ? buildClaimHash(snippet) : undefined;
const groundedKey = claimHash
? buildEntryKey({
path: normalizedPath,
startLine: Math.max(1, Math.floor(result.startLine)),
endLine: Math.max(1, Math.floor(result.endLine)),
source: "memory",
claimHash,
})
: null;
const baseKey = buildEntryKey(result);
const key = groundedKey && store.entries[groundedKey] ? groundedKey : baseKey;
const existing = store.entries[key];
const snippet = normalizeSnippet(result.snippet);
const score = clampScore(result.score);
const recallDaysBase = existing?.recallDays ?? [];
const queryHashesBase = existing?.queryHashes ?? [];
@@ -886,7 +846,6 @@ export async function recordShortTermRecalls(params: {
snippet: snippet || existing?.snippet || "",
recallCount,
dailyCount,
groundedCount: Math.max(0, Math.floor(existing?.groundedCount ?? 0)),
totalScore,
maxScore,
firstRecalledAt: existing?.firstRecalledAt ?? nowIso,
@@ -894,7 +853,6 @@ export async function recordShortTermRecalls(params: {
queryHashes,
recallDays,
conceptTags: conceptTags.length > 0 ? conceptTags : (existing?.conceptTags ?? []),
...(existing?.claimHash ? { claimHash: existing.claimHash } : {}),
...(existing?.promotedAt ? { promotedAt: existing.promotedAt } : {}),
};
}
@@ -916,129 +874,6 @@ export async function recordShortTermRecalls(params: {
});
}
export async function recordGroundedShortTermCandidates(params: {
workspaceDir?: string;
query: string;
items: Array<{
path: string;
startLine: number;
endLine: number;
snippet: string;
score: number;
query?: string;
signalCount?: number;
dayBucket?: string;
}>;
dedupeByQueryPerDay?: boolean;
dayBucket?: string;
nowMs?: number;
timezone?: string;
}): Promise<void> {
const workspaceDir = params.workspaceDir?.trim();
if (!workspaceDir) {
return;
}
const query = params.query.trim();
if (!query) {
return;
}
const relevant = params.items
.map((item) => {
const snippet = normalizeSnippet(item.snippet);
const normalizedPath = normalizeMemoryPath(item.path);
if (
!snippet ||
!normalizedPath ||
!isShortTermMemoryPath(normalizedPath) ||
!Number.isFinite(item.startLine) ||
!Number.isFinite(item.endLine)
) {
return null;
}
return {
path: normalizedPath,
startLine: Math.max(1, Math.floor(item.startLine)),
endLine: Math.max(1, Math.floor(item.endLine)),
snippet,
score: clampScore(item.score),
query: normalizeSnippet(item.query ?? query),
signalCount: Math.max(1, Math.floor(item.signalCount ?? 1)),
dayBucket: normalizeIsoDay(item.dayBucket ?? params.dayBucket ?? ""),
};
})
.filter((item): item is NonNullable<typeof item> => item !== null);
if (relevant.length === 0) {
return;
}
const nowMs = Number.isFinite(params.nowMs) ? (params.nowMs as number) : Date.now();
const nowIso = new Date(nowMs).toISOString();
const fallbackDayBucket = formatMemoryDreamingDay(nowMs, params.timezone);
await withShortTermLock(workspaceDir, async () => {
const store = await readStore(workspaceDir, nowIso);
for (const item of relevant) {
const dayBucket = item.dayBucket ?? fallbackDayBucket;
const effectiveQuery = item.query || query;
if (!effectiveQuery) {
continue;
}
const queryHash = hashQuery(effectiveQuery);
const claimHash = buildClaimHash(item.snippet);
const key = buildEntryKey({
path: item.path,
startLine: item.startLine,
endLine: item.endLine,
source: "memory",
claimHash,
});
const existing = store.entries[key];
const recallDaysBase = existing?.recallDays ?? [];
const queryHashesBase = existing?.queryHashes ?? [];
const dedupeSignal =
Boolean(params.dedupeByQueryPerDay) &&
queryHashesBase.includes(queryHash) &&
recallDaysBase.includes(dayBucket);
const groundedCount = Math.max(
0,
Math.floor(existing?.groundedCount ?? 0) + (dedupeSignal ? 0 : item.signalCount),
);
const totalScore = Math.max(
0,
(existing?.totalScore ?? 0) + (dedupeSignal ? 0 : item.score * item.signalCount),
);
const maxScore = Math.max(existing?.maxScore ?? 0, dedupeSignal ? 0 : item.score);
const queryHashes = mergeQueryHashes(existing?.queryHashes ?? [], queryHash);
const recallDays = mergeRecentDistinct(recallDaysBase, dayBucket, MAX_RECALL_DAYS);
const conceptTags = deriveConceptTags({ path: item.path, snippet: item.snippet });
store.entries[key] = {
key,
path: item.path,
startLine: item.startLine,
endLine: item.endLine,
source: "memory",
snippet: item.snippet,
recallCount: Math.max(0, Math.floor(existing?.recallCount ?? 0)),
dailyCount: Math.max(0, Math.floor(existing?.dailyCount ?? 0)),
groundedCount,
totalScore,
maxScore,
firstRecalledAt: existing?.firstRecalledAt ?? nowIso,
lastRecalledAt: nowIso,
queryHashes,
recallDays,
conceptTags: conceptTags.length > 0 ? conceptTags : (existing?.conceptTags ?? []),
claimHash,
...(existing?.promotedAt ? { promotedAt: existing.promotedAt } : {}),
};
}
store.updatedAt = nowIso;
await writeStore(workspaceDir, store);
});
}
export async function recordDreamingPhaseSignals(params: {
workspaceDir?: string;
phase: "light" | "rem";
@@ -1135,8 +970,7 @@ export async function rankShortTermPromotionCandidates(
}
const recallCount = Math.max(0, Math.floor(entry.recallCount ?? 0));
const dailyCount = Math.max(0, Math.floor(entry.dailyCount ?? 0));
const groundedCount = Math.max(0, Math.floor(entry.groundedCount ?? 0));
const signalCount = totalSignalCountForEntry(entry);
const signalCount = recallCount + dailyCount;
if (signalCount <= 0) {
continue;
}
@@ -1162,10 +996,7 @@ export async function rankShortTermPromotionCandidates(
const recency = clampScore(calculateRecencyComponent(ageDays, halfLifeDays));
const recallDays = entry.recallDays ?? [];
const conceptTags = entry.conceptTags ?? [];
const consolidation = Math.max(
calculateConsolidationComponent(recallDays),
clampScore(groundedCount / 3),
);
const consolidation = calculateConsolidationComponent(recallDays);
const conceptual = calculateConceptualComponent(conceptTags);
const phaseBoost = calculatePhaseSignalBoost(phaseSignals.entries[entry.key], nowMs);
@@ -1191,12 +1022,10 @@ export async function rankShortTermPromotionCandidates(
snippet: entry.snippet,
recallCount,
dailyCount,
groundedCount,
signalCount,
avgScore,
maxScore: clampScore(entry.maxScore),
uniqueQueries,
...(entry.claimHash ? { claimHash: entry.claimHash } : {}),
promotedAt: entry.promotedAt,
firstRecalledAt: entry.firstRecalledAt,
lastRecalledAt: entry.lastRecalledAt,
@@ -1471,15 +1300,9 @@ export async function applyShortTermPromotions(
if (candidate.score < minScore) {
return false;
}
const candidateSignalCount = Math.max(
0,
const candidateSignalCount =
candidate.signalCount ??
totalSignalCountForEntry({
recallCount: candidate.recallCount,
dailyCount: candidate.dailyCount,
groundedCount: candidate.groundedCount,
}),
);
Math.max(0, candidate.recallCount) + Math.max(0, candidate.dailyCount ?? 0);
if (candidateSignalCount < minRecallCount) {
return false;
}
@@ -1783,10 +1606,6 @@ export async function repairShortTermPromotionArtifacts(params: {
0,
Math.floor((entry as { dailyCount?: number }).dailyCount ?? 0),
),
groundedCount: Math.max(
0,
Math.floor((entry as { groundedCount?: number }).groundedCount ?? 0),
),
queryHashes: (entry.queryHashes ?? []).slice(-MAX_QUERY_HASHES),
recallDays: mergeRecentDistinct(entry.recallDays ?? [], fallbackDay, MAX_RECALL_DAYS),
conceptTags: conceptTags.length > 0 ? conceptTags : (entry.conceptTags ?? []),
@@ -1822,50 +1641,6 @@ export async function repairShortTermPromotionArtifacts(params: {
};
}
export async function removeGroundedShortTermCandidates(params: {
workspaceDir: string;
}): Promise<{ removed: number; storePath: string }> {
const workspaceDir = params.workspaceDir.trim();
const storePath = resolveStorePath(workspaceDir);
const nowIso = new Date().toISOString();
let removed = 0;
await withShortTermLock(workspaceDir, async () => {
const [store, phaseSignals] = await Promise.all([
readStore(workspaceDir, nowIso),
readPhaseSignalStore(workspaceDir, nowIso),
]);
for (const [key, entry] of Object.entries(store.entries)) {
if (
Math.max(0, Math.floor(entry.groundedCount ?? 0)) > 0 &&
Math.max(0, Math.floor(entry.recallCount ?? 0)) === 0 &&
Math.max(0, Math.floor(entry.dailyCount ?? 0)) === 0
) {
delete store.entries[key];
removed += 1;
}
}
for (const key of Object.keys(phaseSignals.entries)) {
if (!Object.hasOwn(store.entries, key)) {
delete phaseSignals.entries[key];
}
}
if (removed > 0) {
store.updatedAt = nowIso;
phaseSignals.updatedAt = nowIso;
await Promise.all([
writeStore(workspaceDir, store),
writePhaseSignalStore(workspaceDir, phaseSignals),
]);
}
});
return { removed, storePath };
}
export const __testing = {
parseLockOwnerPid,
canStealStaleLock,
@@ -1873,6 +1648,4 @@ export const __testing = {
deriveConceptTags,
calculateConsolidationComponent,
calculatePhaseSignalBoost,
buildClaimHash,
totalSignalCountForEntry,
};

View File

@@ -22,7 +22,7 @@ export const secretTargetRegistryEntries = [
export function collectRuntimeConfigAssignments(params: {
config: { channels?: Record<string, unknown> };
defaults?: SecretDefaults;
defaults: SecretDefaults | undefined;
context: ResolverContext;
}): void {
const msteams = getChannelRecord(params.config, "msteams");

View File

@@ -57,7 +57,7 @@ export const secretTargetRegistryEntries = [
export function collectRuntimeConfigAssignments(params: {
config: { channels?: Record<string, unknown> };
defaults?: SecretDefaults;
defaults: SecretDefaults | undefined;
context: ResolverContext;
}): void {
const resolved = getChannelSurface(params.config, "nextcloud-talk");

View File

@@ -445,111 +445,4 @@ describe("ollama plugin", () => {
expect(payloadSeen?.think).toBe(false);
expect((payloadSeen?.options as Record<string, unknown> | undefined)?.think).toBeUndefined();
});
it("wraps native Ollama payloads with top-level think=true when thinking is enabled", () => {
const provider = registerProvider();
let payloadSeen: Record<string, unknown> | undefined;
const baseStreamFn = vi.fn((_model, _context, options) => {
const payload: Record<string, unknown> = {
messages: [],
options: { num_ctx: 65536 },
stream: true,
};
options?.onPayload?.(payload, _model);
payloadSeen = payload;
return {} as never;
});
const wrapped = provider.wrapStreamFn?.({
config: {
models: {
providers: {
ollama: {
api: "ollama",
baseUrl: "http://127.0.0.1:11434",
models: [],
},
},
},
},
provider: "ollama",
modelId: "qwen3.5:9b",
thinkingLevel: "low",
model: {
api: "ollama",
provider: "ollama",
id: "qwen3.5:9b",
baseUrl: "http://127.0.0.1:11434",
contextWindow: 131_072,
},
streamFn: baseStreamFn,
});
expect(typeof wrapped).toBe("function");
void wrapped?.(
{
api: "ollama",
provider: "ollama",
id: "qwen3.5:9b",
} as never,
{} as never,
{},
);
expect(baseStreamFn).toHaveBeenCalledTimes(1);
expect(payloadSeen?.think).toBe(true);
expect((payloadSeen?.options as Record<string, unknown> | undefined)?.think).toBeUndefined();
});
it("does not set think param when thinkingLevel is undefined", () => {
const provider = registerProvider();
let payloadSeen: Record<string, unknown> | undefined;
const baseStreamFn = vi.fn((_model, _context, options) => {
const payload: Record<string, unknown> = {
messages: [],
options: { num_ctx: 65536 },
stream: true,
};
options?.onPayload?.(payload, _model);
payloadSeen = payload;
return {} as never;
});
const wrapped = provider.wrapStreamFn?.({
config: {
models: {
providers: {
ollama: {
api: "ollama",
baseUrl: "http://127.0.0.1:11434",
models: [],
},
},
},
},
provider: "ollama",
modelId: "qwen3.5:9b",
thinkingLevel: undefined,
model: {
api: "ollama",
provider: "ollama",
id: "qwen3.5:9b",
baseUrl: "http://127.0.0.1:11434",
contextWindow: 131_072,
},
streamFn: baseStreamFn,
});
expect(typeof wrapped).toBe("function");
void wrapped?.(
{
api: "ollama",
provider: "ollama",
id: "qwen3.5:9b",
} as never,
{} as never,
{},
);
expect(baseStreamFn).toHaveBeenCalledTimes(1);
expect(payloadSeen?.think).toBeUndefined();
});
});

View File

@@ -2,7 +2,6 @@
"id": "ollama",
"enabledByDefault": true,
"providers": ["ollama"],
"providerDiscoveryEntry": "./provider-discovery.ts",
"providerAuthEnvVars": {
"ollama": ["OLLAMA_API_KEY"]
},

View File

@@ -1,30 +0,0 @@
import fs from "node:fs";
import path from "node:path";
import { describe, expect, it } from "vitest";
const repoRoot = path.resolve(import.meta.dirname, "../..");
function readPluginSource(relativePath: string): string {
return fs.readFileSync(path.join(repoRoot, relativePath), "utf8");
}
describe("ollama provider discovery import surface", () => {
it("stays off the full provider runtime graph", () => {
const source = readPluginSource("extensions/ollama/provider-discovery.ts");
for (const forbidden of [
"./index",
"./api",
"./runtime-api",
"./src/setup",
"./src/stream",
"./src/embedding-provider",
"./src/memory-embedding-adapter",
"./src/web-search-provider",
"openclaw/plugin-sdk/text-runtime",
"openclaw/plugin-sdk/plugin-entry",
]) {
expect(source, `provider discovery must not import ${forbidden}`).not.toContain(forbidden);
}
});
});

View File

@@ -1,199 +0,0 @@
import type { ProviderCatalogContext } from "openclaw/plugin-sdk/provider-catalog-shared";
import type { ModelProviderConfig } from "openclaw/plugin-sdk/provider-model-shared";
import { OLLAMA_DEFAULT_BASE_URL } from "./src/defaults.js";
import {
buildOllamaModelDefinition,
enrichOllamaModelsWithContext,
fetchOllamaModels,
resolveOllamaApiBase,
} from "./src/provider-models.js";
const PROVIDER_ID = "ollama";
const DEFAULT_API_KEY = "ollama-local";
const OLLAMA_CONTEXT_ENRICH_LIMIT = 200;
type OllamaPluginConfig = {
discovery?: {
enabled?: boolean;
};
};
type OllamaProviderLikeConfig = ModelProviderConfig;
type OllamaProviderPlugin = {
id: string;
label: string;
docsPath: string;
envVars: string[];
auth: [];
discovery: {
order: "late";
run: (ctx: ProviderCatalogContext) => ReturnType<typeof runOllamaDiscovery>;
};
};
function normalizeOptionalString(value: unknown): string | undefined {
return typeof value === "string" && value.trim() ? value.trim() : undefined;
}
function readStringValue(value: unknown): string | undefined {
if (typeof value === "string") {
return normalizeOptionalString(value);
}
if (value && typeof value === "object" && "value" in value) {
return normalizeOptionalString((value as { value?: unknown }).value);
}
return undefined;
}
function resolveOllamaDiscoveryApiKey(params: {
env: NodeJS.ProcessEnv;
explicitApiKey?: string;
resolvedApiKey?: string;
}): string {
const envApiKey = params.env.OLLAMA_API_KEY?.trim() ? "OLLAMA_API_KEY" : undefined;
return envApiKey ?? params.explicitApiKey ?? params.resolvedApiKey ?? DEFAULT_API_KEY;
}
function shouldSkipAmbientOllamaDiscovery(env: NodeJS.ProcessEnv): boolean {
return Boolean(env.VITEST) || env.NODE_ENV === "test";
}
function hasMeaningfulExplicitOllamaConfig(
providerConfig: OllamaProviderLikeConfig | undefined,
): boolean {
if (!providerConfig) {
return false;
}
if (Array.isArray(providerConfig.models) && providerConfig.models.length > 0) {
return true;
}
if (typeof providerConfig.baseUrl === "string" && providerConfig.baseUrl.trim()) {
return resolveOllamaApiBase(providerConfig.baseUrl) !== OLLAMA_DEFAULT_BASE_URL;
}
if (readStringValue(providerConfig.apiKey)) {
return true;
}
if (providerConfig.auth) {
return true;
}
if (typeof providerConfig.authHeader === "boolean") {
return true;
}
if (
providerConfig.headers &&
typeof providerConfig.headers === "object" &&
Object.keys(providerConfig.headers).length > 0
) {
return true;
}
if (providerConfig.request) {
return true;
}
if (typeof providerConfig.injectNumCtxForOpenAICompat === "boolean") {
return true;
}
return false;
}
async function buildOllamaProvider(
configuredBaseUrl?: string,
opts?: { quiet?: boolean },
): Promise<ModelProviderConfig> {
const apiBase = resolveOllamaApiBase(configuredBaseUrl);
const { reachable, models } = await fetchOllamaModels(apiBase);
if (!reachable && !opts?.quiet) {
console.warn(`Ollama could not be reached at ${apiBase}.`);
}
const discovered = await enrichOllamaModelsWithContext(
apiBase,
models.slice(0, OLLAMA_CONTEXT_ENRICH_LIMIT),
);
return {
baseUrl: apiBase,
api: "ollama",
models: discovered.map((model) =>
buildOllamaModelDefinition(model.name, model.contextWindow, model.capabilities),
),
};
}
function resolveOllamaPluginConfig(ctx: ProviderCatalogContext): OllamaPluginConfig {
const entries = (ctx.config.plugins?.entries ?? {}) as Record<
string,
{ config?: OllamaPluginConfig }
>;
return entries.ollama?.config ?? {};
}
async function runOllamaDiscovery(ctx: ProviderCatalogContext) {
const pluginConfig = resolveOllamaPluginConfig(ctx);
const explicit = ctx.config.models?.providers?.ollama;
const hasExplicitModels = Array.isArray(explicit?.models) && explicit.models.length > 0;
const hasMeaningfulExplicitConfig = hasMeaningfulExplicitOllamaConfig(explicit);
const discoveryEnabled =
pluginConfig.discovery?.enabled ?? ctx.config.models?.ollamaDiscovery?.enabled;
if (!hasExplicitModels && discoveryEnabled === false) {
return null;
}
const ollamaKey = ctx.resolveProviderApiKey(PROVIDER_ID).apiKey;
const hasRealOllamaKey =
typeof ollamaKey === "string" &&
ollamaKey.trim().length > 0 &&
ollamaKey.trim() !== DEFAULT_API_KEY;
const explicitApiKey = readStringValue(explicit?.apiKey);
if (hasExplicitModels && explicit) {
return {
provider: {
...explicit,
baseUrl:
typeof explicit.baseUrl === "string" && explicit.baseUrl.trim()
? resolveOllamaApiBase(explicit.baseUrl)
: OLLAMA_DEFAULT_BASE_URL,
api: explicit.api ?? "ollama",
apiKey: resolveOllamaDiscoveryApiKey({
env: ctx.env,
explicitApiKey,
resolvedApiKey: ollamaKey,
}),
},
};
}
if (
!hasRealOllamaKey &&
!hasMeaningfulExplicitConfig &&
shouldSkipAmbientOllamaDiscovery(ctx.env)
) {
return null;
}
const provider = await buildOllamaProvider(explicit?.baseUrl, {
quiet: !hasRealOllamaKey && !hasMeaningfulExplicitConfig,
});
if (provider.models?.length === 0 && !ollamaKey && !explicit?.apiKey) {
return null;
}
return {
provider: {
...provider,
apiKey: resolveOllamaDiscoveryApiKey({
env: ctx.env,
explicitApiKey,
resolvedApiKey: ollamaKey,
}),
},
};
}
export const ollamaProviderDiscovery: OllamaProviderPlugin = {
id: PROVIDER_ID,
label: "Ollama",
docsPath: "/providers/ollama",
envVars: ["OLLAMA_API_KEY"],
auth: [],
discovery: {
order: "late",
run: runOllamaDiscovery,
},
};
export default ollamaProviderDiscovery;

View File

@@ -1,228 +0,0 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { buildAssistantMessage, createOllamaStreamFn } from "./stream.js";
function makeOllamaResponse(params: {
content?: string;
thinking?: string;
reasoning?: string;
tool_calls?: Array<{ function: { name: string; arguments: Record<string, unknown> } }>;
}) {
return {
model: "qwen3.5",
created_at: new Date().toISOString(),
message: {
role: "assistant" as const,
content: params.content ?? "",
...(params.thinking != null ? { thinking: params.thinking } : {}),
...(params.reasoning != null ? { reasoning: params.reasoning } : {}),
...(params.tool_calls ? { tool_calls: params.tool_calls } : {}),
},
done: true,
prompt_eval_count: 100,
eval_count: 50,
};
}
const MODEL_INFO = { api: "ollama", provider: "ollama", id: "qwen3.5" };
describe("buildAssistantMessage", () => {
it("includes thinking block when response has thinking field", () => {
const response = makeOllamaResponse({
thinking: "Let me think about this",
content: "The answer is 42",
});
const msg = buildAssistantMessage(response, MODEL_INFO);
expect(msg.content).toHaveLength(2);
expect(msg.content[0]).toEqual({ type: "thinking", thinking: "Let me think about this" });
expect(msg.content[1]).toEqual({ type: "text", text: "The answer is 42" });
});
it("includes thinking block when response has reasoning field", () => {
const response = makeOllamaResponse({
reasoning: "Step by step analysis",
content: "Result is 7",
});
const msg = buildAssistantMessage(response, MODEL_INFO);
expect(msg.content).toHaveLength(2);
expect(msg.content[0]).toEqual({ type: "thinking", thinking: "Step by step analysis" });
expect(msg.content[1]).toEqual({ type: "text", text: "Result is 7" });
});
it("prefers thinking over reasoning when both are present", () => {
const response = makeOllamaResponse({
thinking: "From thinking field",
reasoning: "From reasoning field",
content: "Answer",
});
const msg = buildAssistantMessage(response, MODEL_INFO);
expect(msg.content[0]).toEqual({ type: "thinking", thinking: "From thinking field" });
});
it("omits thinking block when no thinking or reasoning field", () => {
const response = makeOllamaResponse({
content: "Just text",
});
const msg = buildAssistantMessage(response, MODEL_INFO);
expect(msg.content).toHaveLength(1);
expect(msg.content[0]).toEqual({ type: "text", text: "Just text" });
});
it("omits thinking block when thinking field is empty", () => {
const response = makeOllamaResponse({
thinking: "",
content: "Just text",
});
const msg = buildAssistantMessage(response, MODEL_INFO);
expect(msg.content).toHaveLength(1);
expect(msg.content[0]).toEqual({ type: "text", text: "Just text" });
});
});
describe("createOllamaStreamFn thinking events", () => {
afterEach(() => vi.unstubAllGlobals());
function makeNdjsonBody(chunks: Array<Record<string, unknown>>): ReadableStream<Uint8Array> {
const encoder = new TextEncoder();
const lines = chunks.map((c) => JSON.stringify(c) + "\n").join("");
return new ReadableStream({
start(controller) {
controller.enqueue(encoder.encode(lines));
controller.close();
},
});
}
it("emits thinking_start, thinking_delta, and thinking_end events for thinking content", async () => {
const thinkingChunks = [
{
model: "qwen3.5",
created_at: "2026-01-01T00:00:00Z",
message: { role: "assistant", content: "", thinking: "Step 1" },
done: false,
},
{
model: "qwen3.5",
created_at: "2026-01-01T00:00:01Z",
message: { role: "assistant", content: "", thinking: " and step 2" },
done: false,
},
{
model: "qwen3.5",
created_at: "2026-01-01T00:00:02Z",
message: { role: "assistant", content: "The answer", thinking: "" },
done: false,
},
{
model: "qwen3.5",
created_at: "2026-01-01T00:00:03Z",
message: { role: "assistant", content: "" },
done: true,
done_reason: "stop",
prompt_eval_count: 10,
eval_count: 5,
},
];
const body = makeNdjsonBody(thinkingChunks);
const fetchMock = vi.fn().mockResolvedValue({
ok: true,
body,
});
vi.stubGlobal("fetch", fetchMock);
const streamFn = createOllamaStreamFn("http://localhost:11434");
const stream = streamFn(
{ api: "ollama", provider: "ollama", id: "qwen3.5", contextWindow: 65536 } as never,
{ messages: [{ role: "user", content: "test" }] } as never,
{},
);
const events: Array<{ type: string; [key: string]: unknown }> = [];
for await (const event of stream as AsyncIterable<{ type: string; [key: string]: unknown }>) {
events.push(event);
}
const eventTypes = events.map((e) => e.type);
expect(eventTypes).toContain("thinking_start");
expect(eventTypes).toContain("thinking_delta");
expect(eventTypes).toContain("thinking_end");
expect(eventTypes).toContain("text_start");
expect(eventTypes).toContain("text_delta");
expect(eventTypes).toContain("done");
// thinking_start comes before text_start
const thinkingStartIndex = eventTypes.indexOf("thinking_start");
const textStartIndex = eventTypes.indexOf("text_start");
expect(thinkingStartIndex).toBeLessThan(textStartIndex);
// thinking_end comes before text_start
const thinkingEndIndex = eventTypes.indexOf("thinking_end");
expect(thinkingEndIndex).toBeLessThan(textStartIndex);
// Thinking deltas have correct content
const thinkingDeltas = events.filter((e) => e.type === "thinking_delta");
expect(thinkingDeltas).toHaveLength(2);
expect(thinkingDeltas[0].delta).toBe("Step 1");
expect(thinkingDeltas[1].delta).toBe(" and step 2");
// Content index: thinking at 0, text at 1
const thinkingStart = events.find((e) => e.type === "thinking_start");
expect(thinkingStart?.contentIndex).toBe(0);
const textStart = events.find((e) => e.type === "text_start");
expect(textStart?.contentIndex).toBe(1);
// Final message has thinking block
const done = events.find((e) => e.type === "done") as { message?: { content: unknown[] } };
const content = done?.message?.content ?? [];
expect(content[0]).toMatchObject({ type: "thinking", thinking: "Step 1 and step 2" });
expect(content[1]).toMatchObject({ type: "text", text: "The answer" });
});
it("streams without thinking events when no thinking content is present", async () => {
const chunks = [
{
model: "qwen3.5",
created_at: "2026-01-01T00:00:00Z",
message: { role: "assistant", content: "Hello" },
done: false,
},
{
model: "qwen3.5",
created_at: "2026-01-01T00:00:01Z",
message: { role: "assistant", content: "" },
done: true,
done_reason: "stop",
prompt_eval_count: 10,
eval_count: 5,
},
];
const body = makeNdjsonBody(chunks);
vi.stubGlobal("fetch", vi.fn().mockResolvedValue({ ok: true, body }));
const streamFn = createOllamaStreamFn("http://localhost:11434");
const stream = streamFn(
{ api: "ollama", provider: "ollama", id: "qwen3.5", contextWindow: 65536 } as never,
{ messages: [{ role: "user", content: "test" }] } as never,
{},
);
const events: Array<{ type: string }> = [];
for await (const event of stream as AsyncIterable<{ type: string }>) {
events.push(event);
}
const eventTypes = events.map((e) => e.type);
expect(eventTypes).not.toContain("thinking_start");
expect(eventTypes).not.toContain("thinking_delta");
expect(eventTypes).not.toContain("thinking_end");
expect(eventTypes).toContain("text_start");
expect(eventTypes).toContain("text_delta");
expect(eventTypes).toContain("done");
// Text content index should be 0 (no thinking block)
const textStart = events.find((e) => e.type === "text_start") as { contentIndex?: number };
expect(textStart?.contentIndex).toBe(0);
});
});

View File

@@ -4,7 +4,6 @@ import type {
AssistantMessage,
StopReason,
TextContent,
ThinkingContent,
ToolCall,
Tool,
Usage,
@@ -149,14 +148,14 @@ export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: num
});
}
function createOllamaThinkingWrapper(baseFn: StreamFn | undefined, think: boolean): StreamFn {
function createOllamaThinkingOffWrapper(baseFn: StreamFn | undefined): StreamFn {
const streamFn = baseFn ?? streamSimple;
return (model, context, options) => {
if (model.api !== "ollama") {
return streamFn(model, context, options);
}
return streamWithPayloadPatch(streamFn, model, context, options, (payloadRecord) => {
payloadRecord.think = think;
payloadRecord.think = false;
});
};
}
@@ -198,11 +197,7 @@ export function createConfiguredOllamaCompatStreamWrapper(
}
if (ctx.thinkingLevel === "off") {
streamFn = createOllamaThinkingWrapper(streamFn, false);
} else if (ctx.thinkingLevel) {
// Any non-off ThinkLevel (minimal, low, medium, high, xhigh, adaptive)
// should enable Ollama's native thinking mode.
streamFn = createOllamaThinkingWrapper(streamFn, true);
streamFn = createOllamaThinkingOffWrapper(streamFn);
}
if (normalizeProviderId(ctx.provider) === "ollama" && isOllamaCloudKimiModelRef(ctx.modelId)) {
@@ -516,11 +511,7 @@ export function buildAssistantMessage(
response: OllamaChatResponse,
modelInfo: StreamModelDescriptor,
): AssistantMessage {
const content: (TextContent | ThinkingContent | ToolCall)[] = [];
const thinking = response.message.thinking ?? response.message.reasoning ?? "";
if (thinking) {
content.push({ type: "thinking", thinking });
}
const content: (TextContent | ToolCall)[] = [];
const text = response.message.content || "";
if (text) {
content.push({ type: "text", text });
@@ -663,121 +654,39 @@ export function createOllamaStreamFn(
const reader = response.body.getReader();
let accumulatedContent = "";
let accumulatedThinking = "";
const accumulatedToolCalls: OllamaToolCall[] = [];
let finalResponse: OllamaChatResponse | undefined;
const modelInfo = { api: model.api, provider: model.provider, id: model.id };
let streamStarted = false;
let thinkingStarted = false;
let thinkingEnded = false;
let textBlockStarted = false;
let textBlockClosed = false;
// Content index tracking: thinking block (if present) is index 0,
// text block follows at index 1 (or 0 when no thinking).
const textContentIndex = () => (thinkingStarted ? 1 : 0);
const buildCurrentContent = (): (TextContent | ThinkingContent | ToolCall)[] => {
const parts: (TextContent | ThinkingContent | ToolCall)[] = [];
if (accumulatedThinking) {
parts.push({
type: "thinking",
thinking: accumulatedThinking,
});
}
if (accumulatedContent) {
parts.push({ type: "text", text: accumulatedContent });
}
return parts;
};
const closeThinkingBlock = () => {
if (!thinkingStarted || thinkingEnded) {
return;
}
thinkingEnded = true;
const partial = buildStreamAssistantMessage({
model: modelInfo,
content: buildCurrentContent(),
stopReason: "stop",
usage: buildUsageWithNoCost({}),
});
stream.push({
type: "thinking_end",
contentIndex: 0,
content: accumulatedThinking,
partial,
});
};
const closeTextBlock = () => {
if (!textBlockStarted || textBlockClosed) {
if (!streamStarted || textBlockClosed) {
return;
}
textBlockClosed = true;
const partial = buildStreamAssistantMessage({
model: modelInfo,
content: buildCurrentContent(),
content: [{ type: "text", text: accumulatedContent }],
stopReason: "stop",
usage: buildUsageWithNoCost({}),
});
stream.push({
type: "text_end",
contentIndex: textContentIndex(),
contentIndex: 0,
content: accumulatedContent,
partial,
});
};
for await (const chunk of parseNdjsonStream(reader)) {
// Handle thinking/reasoning deltas from Ollama's native think mode.
const thinkingDelta = chunk.message?.thinking ?? chunk.message?.reasoning;
if (thinkingDelta) {
if (!streamStarted) {
streamStarted = true;
const emptyPartial = buildStreamAssistantMessage({
model: modelInfo,
content: [],
stopReason: "stop",
usage: buildUsageWithNoCost({}),
});
stream.push({ type: "start", partial: emptyPartial });
}
if (!thinkingStarted) {
thinkingStarted = true;
const partial = buildStreamAssistantMessage({
model: modelInfo,
content: buildCurrentContent(),
stopReason: "stop",
usage: buildUsageWithNoCost({}),
});
stream.push({ type: "thinking_start", contentIndex: 0, partial });
}
accumulatedThinking += thinkingDelta;
const partial = buildStreamAssistantMessage({
model: modelInfo,
content: buildCurrentContent(),
stopReason: "stop",
usage: buildUsageWithNoCost({}),
});
stream.push({
type: "thinking_delta",
contentIndex: 0,
delta: thinkingDelta,
partial,
});
}
if (chunk.message?.content) {
const delta = chunk.message.content;
// Transition from thinking to text: close the thinking block first.
if (thinkingStarted && !thinkingEnded) {
closeThinkingBlock();
}
if (!streamStarted) {
streamStarted = true;
// Emit start/text_start with an empty partial before accumulating
// the first delta, matching the Anthropic/OpenAI provider contract.
const emptyPartial = buildStreamAssistantMessage({
model: modelInfo,
content: [],
@@ -785,29 +694,19 @@ export function createOllamaStreamFn(
usage: buildUsageWithNoCost({}),
});
stream.push({ type: "start", partial: emptyPartial });
}
if (!textBlockStarted) {
textBlockStarted = true;
const partial = buildStreamAssistantMessage({
model: modelInfo,
content: buildCurrentContent(),
stopReason: "stop",
usage: buildUsageWithNoCost({}),
});
stream.push({ type: "text_start", contentIndex: textContentIndex(), partial });
stream.push({ type: "text_start", contentIndex: 0, partial: emptyPartial });
}
accumulatedContent += delta;
const partial = buildStreamAssistantMessage({
model: modelInfo,
content: buildCurrentContent(),
content: [{ type: "text", text: accumulatedContent }],
stopReason: "stop",
usage: buildUsageWithNoCost({}),
});
stream.push({ type: "text_delta", contentIndex: textContentIndex(), delta, partial });
stream.push({ type: "text_delta", contentIndex: 0, delta, partial });
}
if (chunk.message?.tool_calls) {
closeThinkingBlock();
closeTextBlock();
accumulatedToolCalls.push(...chunk.message.tool_calls);
}
@@ -822,17 +721,13 @@ export function createOllamaStreamFn(
}
finalResponse.message.content = accumulatedContent;
if (accumulatedThinking) {
finalResponse.message.thinking = accumulatedThinking;
}
if (accumulatedToolCalls.length > 0) {
finalResponse.message.tool_calls = accumulatedToolCalls;
}
const assistantMessage = buildAssistantMessage(finalResponse, modelInfo);
// Close any open blocks before emitting the done event.
closeThinkingBlock();
// Close the text block if we emitted any text_delta events.
closeTextBlock();
stream.push({

View File

@@ -109,7 +109,6 @@ describe("runQaCharacterEval", () => {
const report = await fs.readFile(result.reportPath, "utf8");
expect(report).toContain("Execution: local QA gateway child processes, not Docker");
expect(report).toContain("Judges: openai/gpt-5.4");
expect(report).toContain("Judge model labels: visible");
expect(report).toContain("## Judge Rankings");
expect(report).toContain("### openai/gpt-5.4");
expect(report).toContain("reply from openai/gpt-5.4");
@@ -121,57 +120,6 @@ describe("runQaCharacterEval", () => {
expect(report).not.toContain("Judge Raw Reply");
});
it("can hide candidate model refs from judge prompts and map rankings back", async () => {
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) =>
makeSuiteResult({
outputDir: params.outputDir,
model: params.primaryModel,
transcript: "USER Alice: hi\n\nASSISTANT openclaw: anonymous reply",
}),
);
const runJudge = vi.fn(async (params: CharacterRunJudgeParams) => {
expect(params.prompt).toContain("## CANDIDATE candidate-01");
expect(params.prompt).toContain("## CANDIDATE candidate-02");
expect(params.prompt).not.toContain("openai/gpt-5.4");
expect(params.prompt).not.toContain("codex-cli/test-model");
return JSON.stringify({
rankings: [
{
model: "candidate-02",
rank: 1,
score: 9.1,
summary: "Better vibes.",
},
{
model: "candidate-01",
rank: 2,
score: 7.4,
summary: "Solid.",
},
],
});
});
const result = await runQaCharacterEval({
repoRoot: tempRoot,
outputDir: path.join(tempRoot, "character"),
models: ["openai/gpt-5.4", "codex-cli/test-model"],
judgeModels: ["openai/gpt-5.4"],
judgeBlindModels: true,
runSuite,
runJudge,
});
expect(result.judgments[0]?.blindModels).toBe(true);
expect(result.judgments[0]?.rankings.map((ranking) => ranking.model)).toEqual([
"codex-cli/test-model",
"openai/gpt-5.4",
]);
const report = await fs.readFile(result.reportPath, "utf8");
expect(report).toContain("Judge model labels: blind");
expect(report).toContain("1. codex-cli/test-model - 9.1 - Better vibes.");
});
it("defaults to the character eval model panel when no models are provided", async () => {
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) =>
makeSuiteResult({
@@ -185,12 +133,14 @@ describe("runQaCharacterEval", () => {
rankings: [
{ model: "openai/gpt-5.4", rank: 1, score: 8, summary: "ok" },
{ model: "openai/gpt-5.2", rank: 2, score: 7.5, summary: "ok" },
{ model: "openai/gpt-5", rank: 3, score: 7.2, summary: "ok" },
{ model: "anthropic/claude-opus-4-6", rank: 4, score: 7, summary: "ok" },
{ model: "anthropic/claude-sonnet-4-6", rank: 5, score: 6.8, summary: "ok" },
{ model: "anthropic/claude-opus-4-6", rank: 3, score: 7, summary: "ok" },
{ model: "anthropic/claude-sonnet-4-6", rank: 4, score: 6.8, summary: "ok" },
{ model: "minimax/MiniMax-M2.7", rank: 5, score: 6.5, summary: "ok" },
{ model: "zai/glm-5.1", rank: 6, score: 6.3, summary: "ok" },
{ model: "moonshot/kimi-k2.5", rank: 7, score: 6.2, summary: "ok" },
{ model: "google/gemini-3.1-pro-preview", rank: 8, score: 6, summary: "ok" },
{ model: "qwen/qwen3.6-plus", rank: 8, score: 6.1, summary: "ok" },
{ model: "xiaomi/mimo-v2-pro", rank: 9, score: 6, summary: "ok" },
{ model: "google/gemini-3.1-pro-preview", rank: 10, score: 5.9, summary: "ok" },
],
}),
);
@@ -203,21 +153,25 @@ describe("runQaCharacterEval", () => {
runJudge,
});
expect(runSuite).toHaveBeenCalledTimes(8);
expect(runSuite).toHaveBeenCalledTimes(10);
expect(runSuite.mock.calls.map(([params]) => params.primaryModel)).toEqual([
"openai/gpt-5.4",
"openai/gpt-5.2",
"openai/gpt-5",
"anthropic/claude-opus-4-6",
"anthropic/claude-sonnet-4-6",
"minimax/MiniMax-M2.7",
"zai/glm-5.1",
"moonshot/kimi-k2.5",
"qwen/qwen3.6-plus",
"xiaomi/mimo-v2-pro",
"google/gemini-3.1-pro-preview",
]);
expect(runSuite.mock.calls.map(([params]) => params.thinkingDefault)).toEqual([
"xhigh",
"xhigh",
"xhigh",
"high",
"high",
"high",
"high",
"high",
"high",
@@ -227,7 +181,9 @@ describe("runQaCharacterEval", () => {
expect(runSuite.mock.calls.map(([params]) => params.fastMode)).toEqual([
true,
true,
true,
false,
false,
false,
false,
false,
false,
@@ -288,7 +244,7 @@ describe("runQaCharacterEval", () => {
]);
});
it("defaults candidate and judge concurrency to sixteen", async () => {
it("defaults candidate and judge concurrency to eight", async () => {
let activeRuns = 0;
let maxActiveRuns = 0;
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) => {
@@ -310,7 +266,7 @@ describe("runQaCharacterEval", () => {
await new Promise((resolve) => setTimeout(resolve, 10));
activeJudges -= 1;
return JSON.stringify({
rankings: Array.from({ length: 20 }, (_, index) => ({
rankings: Array.from({ length: 10 }, (_, index) => ({
model: `provider/model-${index + 1}`,
rank: index + 1,
score: 10 - index,
@@ -322,137 +278,14 @@ describe("runQaCharacterEval", () => {
await runQaCharacterEval({
repoRoot: tempRoot,
outputDir: path.join(tempRoot, "character"),
models: Array.from({ length: 20 }, (_, index) => `provider/model-${index + 1}`),
judgeModels: Array.from({ length: 20 }, (_, index) => `judge/model-${index + 1}`),
models: Array.from({ length: 10 }, (_, index) => `provider/model-${index + 1}`),
judgeModels: Array.from({ length: 10 }, (_, index) => `judge/model-${index + 1}`),
runSuite,
runJudge,
});
expect(maxActiveRuns).toBe(16);
expect(maxActiveJudges).toBe(16);
});
it("marks raw provider error transcripts as failed output", async () => {
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) =>
makeSuiteResult({
outputDir: params.outputDir,
model: params.primaryModel,
transcript:
"USER Alice: Are you awake?\n\nASSISTANT OpenClaw QA: 400 model `qwen3.6-plus` is not supported.",
}),
);
const runJudge = vi.fn(async (_params: CharacterRunJudgeParams) =>
JSON.stringify({
rankings: [{ model: "qwen/qwen3.6-plus", rank: 1, score: 0.5, summary: "failed" }],
}),
);
const result = await runQaCharacterEval({
repoRoot: tempRoot,
outputDir: path.join(tempRoot, "character"),
models: ["qwen/qwen3.6-plus"],
judgeModels: ["openai/gpt-5.4"],
runSuite,
runJudge,
});
expect(result.runs[0]).toMatchObject({
model: "qwen/qwen3.6-plus",
status: "fail",
error: "model unsupported error leaked into transcript",
});
});
it("marks raw tool failure transcripts as failed output", async () => {
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) =>
makeSuiteResult({
outputDir: params.outputDir,
model: params.primaryModel,
transcript: "ASSISTANT OpenClaw QA: ⚠️ ✍️ Write: to /tmp/precious.html failed",
}),
);
const runJudge = vi.fn(async (_params: CharacterRunJudgeParams) =>
JSON.stringify({
rankings: [{ model: "qwen/qwen3.5-plus", rank: 1, score: 0.5, summary: "failed" }],
}),
);
const result = await runQaCharacterEval({
repoRoot: tempRoot,
outputDir: path.join(tempRoot, "character"),
models: ["qwen/qwen3.5-plus"],
judgeModels: ["openai/gpt-5.4"],
runSuite,
runJudge,
});
expect(result.runs[0]).toMatchObject({
model: "qwen/qwen3.5-plus",
status: "fail",
error: "tool failure leaked into transcript",
});
});
it("marks generic channel fallback transcripts as failed output", async () => {
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) =>
makeSuiteResult({
outputDir: params.outputDir,
model: params.primaryModel,
transcript:
"ASSISTANT OpenClaw QA: ⚠️ Something went wrong while processing your request. Please try again, or use /new to start a fresh session.",
}),
);
const runJudge = vi.fn(async (_params: CharacterRunJudgeParams) =>
JSON.stringify({
rankings: [{ model: "qa/generic-fallback-model", rank: 1, score: 0.5, summary: "failed" }],
}),
);
const result = await runQaCharacterEval({
repoRoot: tempRoot,
outputDir: path.join(tempRoot, "character"),
models: ["qa/generic-fallback-model"],
judgeModels: ["openai/gpt-5.4"],
runSuite,
runJudge,
});
expect(result.runs[0]).toMatchObject({
model: "qa/generic-fallback-model",
status: "fail",
error: "generic request failure leaked into transcript",
});
});
it("marks idle-timeout fallback transcripts as failed output", async () => {
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) =>
makeSuiteResult({
outputDir: params.outputDir,
model: params.primaryModel,
transcript:
"ASSISTANT OpenClaw QA: The model did not produce a response before the LLM idle timeout. Please try again, or increase `agents.defaults.llm.idleTimeoutSeconds` in your config.",
}),
);
const runJudge = vi.fn(async (_params: CharacterRunJudgeParams) =>
JSON.stringify({
rankings: [{ model: "google/gemini-test", rank: 1, score: 0.5, summary: "failed" }],
}),
);
const result = await runQaCharacterEval({
repoRoot: tempRoot,
outputDir: path.join(tempRoot, "character"),
models: ["google/gemini-test"],
judgeModels: ["openai/gpt-5.4"],
runSuite,
runJudge,
});
expect(result.runs[0]).toMatchObject({
model: "google/gemini-test",
status: "fail",
error: "LLM timeout leaked into transcript",
});
expect(maxActiveRuns).toBe(8);
expect(maxActiveJudges).toBe(8);
});
it("lets explicit candidate thinking override the default panel", async () => {

View File

@@ -10,20 +10,21 @@ const DEFAULT_CHARACTER_SCENARIO_ID = "character-vibes-gollum";
const DEFAULT_CHARACTER_EVAL_MODELS = Object.freeze([
"openai/gpt-5.4",
"openai/gpt-5.2",
"openai/gpt-5",
"anthropic/claude-opus-4-6",
"anthropic/claude-sonnet-4-6",
"minimax/MiniMax-M2.7",
"zai/glm-5.1",
"moonshot/kimi-k2.5",
"qwen/qwen3.6-plus",
"xiaomi/mimo-v2-pro",
"google/gemini-3.1-pro-preview",
]);
const DEFAULT_CHARACTER_THINKING: QaThinkingLevel = "high";
const DEFAULT_CHARACTER_EVAL_CONCURRENCY = 16;
const DEFAULT_CHARACTER_EVAL_CONCURRENCY = 8;
const DEFAULT_CHARACTER_THINKING_BY_MODEL: Readonly<Record<string, QaThinkingLevel>> =
Object.freeze({
"openai/gpt-5.4": "xhigh",
"openai/gpt-5.2": "xhigh",
"openai/gpt-5": "xhigh",
});
const DEFAULT_JUDGE_MODELS = Object.freeze(["openai/gpt-5.4", "anthropic/claude-opus-4-6"]);
const DEFAULT_JUDGE_THINKING: QaThinkingLevel = "xhigh";
@@ -80,14 +81,11 @@ export type QaCharacterEvalJudgeResult = {
model: string;
thinkingDefault: QaThinkingLevel;
fastMode: boolean;
blindModels: boolean;
durationMs: number;
rankings: QaCharacterEvalJudgment[];
error?: string;
};
type QaCharacterEvalProgressLogger = (message: string) => void;
type RunSuiteFn = (params: {
repoRoot: string;
outputDir: string;
@@ -122,12 +120,10 @@ export type QaCharacterEvalParams = {
judgeThinkingDefault?: QaThinkingLevel;
judgeModelOptions?: Record<string, QaCharacterModelOptions>;
judgeTimeoutMs?: number;
judgeBlindModels?: boolean;
candidateConcurrency?: number;
judgeConcurrency?: number;
runSuite?: RunSuiteFn;
runJudge?: RunJudgeFn;
progress?: QaCharacterEvalProgressLogger;
};
function normalizeModelRefs(models: readonly string[]) {
@@ -230,27 +226,6 @@ function collectTranscriptStats(transcript: string) {
};
}
function detectTranscriptFailure(transcript: string): string | undefined {
const checks: Array<[RegExp, string]> = [
[/\bmodel `[^`]+` is not supported\b/i, "model unsupported error leaked into transcript"],
[/\binsufficient account balance\b/i, "account balance error leaked into transcript"],
[/\b(?:backend|transport|internal) error\b/i, "backend error leaked into transcript"],
[
/\bsomething went wrong while processing your request\b/i,
"generic request failure leaked into transcript",
],
[/\buse \/new to start a fresh session\b/i, "generic request failure leaked into transcript"],
[
/\bmodel did not produce a response before the LLM idle timeout\b/i,
"LLM timeout leaked into transcript",
],
[/\btool failed\b/i, "tool failure leaked into transcript"],
[/\b(?:read|write|edit|patch):[^\n]*\bfailed\b/i, "tool failure leaked into transcript"],
[/\bnot configured\b/i, "configuration error leaked into transcript"],
];
return checks.find(([pattern]) => pattern.test(transcript))?.[1];
}
function formatDuration(ms: number) {
if (!Number.isFinite(ms) || ms < 0) {
return "unknown";
@@ -268,42 +243,10 @@ function formatDuration(ms: number) {
return seconds === 0 ? `${minutes}m` : `${minutes}m ${seconds}s`;
}
function logCharacterEvalProgress(
progress: QaCharacterEvalProgressLogger | undefined,
message: string,
) {
progress?.(`[qa-character] ${message}`);
}
function formatEvalIndex(index: number, total: number) {
return `${index + 1}/${total}`;
}
function summarizeRunStats(run: QaCharacterEvalRun) {
return [
`status=${run.status}`,
`duration=${formatDuration(run.durationMs)}`,
`turns=${run.stats.userTurns}/${run.stats.assistantTurns}`,
`chars=${run.stats.transcriptChars}`,
...(run.error ? [`error="${run.error}"`] : []),
].join(" ");
}
function formatBlindCandidateLabel(index: number) {
return `candidate-${String(index + 1).padStart(2, "0")}`;
}
function buildJudgePrompt(params: {
scenarioId: string;
runs: readonly QaCharacterEvalRun[];
blindModels?: boolean;
}) {
const labelToModel = new Map<string, string>();
function buildJudgePrompt(params: { scenarioId: string; runs: readonly QaCharacterEvalRun[] }) {
const runBlocks = params.runs
.map((run, index) => {
const label = params.blindModels ? formatBlindCandidateLabel(index) : run.model;
labelToModel.set(label, run.model);
return `## CANDIDATE ${label}
.map(
(run) => `## MODEL ${run.model}
Status: ${run.status}
Duration ms (not used for ranking): ${run.durationMs}
@@ -315,11 +258,11 @@ Error: ${run.error ?? "none"}
\`\`\`text
${run.transcript}
\`\`\``;
})
\`\`\``,
)
.join("\n\n");
const prompt = `You are grading OpenClaw natural character conversation transcripts for naturalness, vibes, and funniness.
return `You are grading OpenClaw natural character conversation transcripts for naturalness, vibes, and funniness.
Scenario id: ${params.scenarioId}
@@ -332,14 +275,14 @@ Rank the models by:
- not sounding aware of an eval or test
- avoiding tool/backend/error leakage
Treat candidate labels as opaque identifiers. Do not assume quality from the label.
Treat model names as opaque labels. Do not assume quality from the label.
Duration is recorded for separate benchmark analysis only. Do not rank models by speed.
Return strict JSON only with this shape:
{
"rankings": [
{
"model": "same candidate label",
"model": "same model label",
"rank": 1,
"score": 9.2,
"summary": "one sentence",
@@ -350,7 +293,6 @@ Return strict JSON only with this shape:
}
${runBlocks}`;
return { prompt, labelToModel };
}
function normalizeJudgment(value: unknown, allowedModels: Set<string>): QaCharacterEvalJudgment[] {
@@ -440,7 +382,6 @@ function renderCharacterEvalReport(params: {
`- Judges: ${params.judgments.map((judgment) => judgment.model).join(", ")}`,
`- Judge thinking: ${params.judgments[0]?.thinkingDefault ?? DEFAULT_JUDGE_THINKING}`,
`- Judge fast mode: ${params.judgments.every((judgment) => judgment.fastMode) ? "on" : "mixed"}`,
`- Judge model labels: ${params.judgments.every((judgment) => judgment.blindModels) ? "blind" : "visible"}`,
"",
"## Judge Rankings",
"",
@@ -520,12 +461,7 @@ export async function runQaCharacterEval(params: QaCharacterEvalParams) {
params.candidateConcurrency,
DEFAULT_CHARACTER_EVAL_CONCURRENCY,
);
logCharacterEvalProgress(
params.progress,
`start scenario=${scenarioId} candidates=${models.length} candidateConcurrency=${candidateConcurrency} output=${outputDir}`,
);
const candidatesStartedAt = Date.now();
const runs = await mapWithConcurrency(models, candidateConcurrency, async (model, index) => {
const runs = await mapWithConcurrency(models, candidateConcurrency, async (model) => {
const thinkingDefault = resolveCandidateThinkingDefault({
model,
candidateThinkingDefault: params.candidateThinkingDefault,
@@ -539,10 +475,6 @@ export async function runQaCharacterEval(params: QaCharacterEvalParams) {
});
const modelOutputDir = path.join(runsDir, sanitizePathPart(model));
const runStartedAt = Date.now();
logCharacterEvalProgress(
params.progress,
`candidate start ${formatEvalIndex(index, models.length)} model=${model} thinking=${thinkingDefault} fast=${fastMode ? "on" : "off"}`,
);
try {
const result = await runSuite({
repoRoot,
@@ -555,12 +487,10 @@ export async function runQaCharacterEval(params: QaCharacterEvalParams) {
scenarioIds: [scenarioId],
});
const transcript = extractTranscript(result);
const transcriptFailure = detectTranscriptFailure(transcript);
const status =
result.scenarios.some((scenario) => scenario.status === "fail") || transcriptFailure
? "fail"
: "pass";
const run = {
const status = result.scenarios.some((scenario) => scenario.status === "fail")
? "fail"
: "pass";
return {
model,
status,
durationMs: Date.now() - runStartedAt,
@@ -571,16 +501,10 @@ export async function runQaCharacterEval(params: QaCharacterEvalParams) {
summaryPath: result.summaryPath,
transcript,
stats: collectTranscriptStats(transcript),
...(transcriptFailure ? { error: transcriptFailure } : {}),
} satisfies QaCharacterEvalRun;
logCharacterEvalProgress(
params.progress,
`candidate done ${formatEvalIndex(index, models.length)} model=${model} ${summarizeRunStats(run)}`,
);
return run;
} catch (error) {
const transcript = "";
const run = {
return {
model,
status: "fail",
durationMs: Date.now() - runStartedAt,
@@ -591,18 +515,8 @@ export async function runQaCharacterEval(params: QaCharacterEvalParams) {
stats: collectTranscriptStats(transcript),
error: formatErrorMessage(error),
} satisfies QaCharacterEvalRun;
logCharacterEvalProgress(
params.progress,
`candidate done ${formatEvalIndex(index, models.length)} model=${model} ${summarizeRunStats(run)}`,
);
return run;
}
});
const failedCandidateCount = runs.filter((run) => run.status === "fail").length;
logCharacterEvalProgress(
params.progress,
`candidates done pass=${runs.length - failedCandidateCount} fail=${failedCandidateCount} duration=${formatDuration(Date.now() - candidatesStartedAt)}`,
);
const judgeModels = normalizeModelRefs(
params.judgeModels && params.judgeModels.length > 0
@@ -616,73 +530,38 @@ export async function runQaCharacterEval(params: QaCharacterEvalParams) {
params.judgeConcurrency,
DEFAULT_CHARACTER_EVAL_CONCURRENCY,
);
const judgeTimeoutMs = params.judgeTimeoutMs ?? 180_000;
logCharacterEvalProgress(
params.progress,
`judges start judges=${judgeModels.length} judgeConcurrency=${judgeConcurrency} timeout=${formatDuration(judgeTimeoutMs)} labels=${params.judgeBlindModels === true ? "blind" : "visible"}`,
);
const judgesStartedAt = Date.now();
const judgments = await mapWithConcurrency(
judgeModels,
judgeConcurrency,
async (judgeModel, index) => {
const judgeOptions = resolveJudgeOptions({
model: judgeModel,
judgeThinkingDefault: params.judgeThinkingDefault,
judgeModelOptions: params.judgeModelOptions,
const judgments = await mapWithConcurrency(judgeModels, judgeConcurrency, async (judgeModel) => {
const judgeOptions = resolveJudgeOptions({
model: judgeModel,
judgeThinkingDefault: params.judgeThinkingDefault,
judgeModelOptions: params.judgeModelOptions,
});
let rankings: QaCharacterEvalJudgment[] = [];
let judgeError: string | undefined;
const judgeStartedAt = Date.now();
try {
const rawReply = await runJudge({
repoRoot,
judgeModel,
judgeThinkingDefault: judgeOptions.thinkingDefault,
judgeFastMode: judgeOptions.fastMode,
prompt: buildJudgePrompt({ scenarioId, runs }),
timeoutMs: params.judgeTimeoutMs ?? 180_000,
});
let rankings: QaCharacterEvalJudgment[] = [];
let judgeError: string | undefined;
const judgeStartedAt = Date.now();
logCharacterEvalProgress(
params.progress,
`judge start ${formatEvalIndex(index, judgeModels.length)} model=${judgeModel} thinking=${judgeOptions.thinkingDefault} fast=${judgeOptions.fastMode ? "on" : "off"} timeout=${formatDuration(judgeTimeoutMs)}`,
);
try {
const judgePrompt = buildJudgePrompt({
scenarioId,
runs,
blindModels: params.judgeBlindModels,
});
const rawReply = await runJudge({
repoRoot,
judgeModel,
judgeThinkingDefault: judgeOptions.thinkingDefault,
judgeFastMode: judgeOptions.fastMode,
prompt: judgePrompt.prompt,
timeoutMs: judgeTimeoutMs,
});
rankings = parseJudgeReply(rawReply, new Set(judgePrompt.labelToModel.keys())).map(
(ranking) => ({
...ranking,
model: judgePrompt.labelToModel.get(ranking.model) ?? ranking.model,
}),
);
} catch (error) {
judgeError = formatErrorMessage(error);
}
rankings = parseJudgeReply(rawReply, new Set(models));
} catch (error) {
judgeError = formatErrorMessage(error);
}
const judgment = {
model: judgeModel,
thinkingDefault: judgeOptions.thinkingDefault,
fastMode: judgeOptions.fastMode,
blindModels: params.judgeBlindModels === true,
durationMs: Date.now() - judgeStartedAt,
rankings,
...(judgeError ? { error: judgeError } : {}),
} satisfies QaCharacterEvalJudgeResult;
logCharacterEvalProgress(
params.progress,
`judge done ${formatEvalIndex(index, judgeModels.length)} model=${judgeModel} rankings=${rankings.length} duration=${formatDuration(judgment.durationMs)}${judgeError ? ` error="${judgeError}"` : ""}`,
);
return judgment;
},
);
const failedJudgeCount = judgments.filter((judgment) => judgment.rankings.length === 0).length;
logCharacterEvalProgress(
params.progress,
`judges done ranked=${judgments.length - failedJudgeCount} failed=${failedJudgeCount} duration=${formatDuration(Date.now() - judgesStartedAt)}`,
);
return {
model: judgeModel,
thinkingDefault: judgeOptions.thinkingDefault,
fastMode: judgeOptions.fastMode,
durationMs: Date.now() - judgeStartedAt,
rankings,
...(judgeError ? { error: judgeError } : {}),
} satisfies QaCharacterEvalJudgeResult;
});
const finishedAt = new Date();
const report = renderCharacterEvalReport({
@@ -708,10 +587,6 @@ export async function runQaCharacterEval(params: QaCharacterEvalParams) {
)}\n`,
"utf8",
);
logCharacterEvalProgress(
params.progress,
`report written duration=${formatDuration(finishedAt.getTime() - startedAt.getTime())} report=${reportPath} summary=${summaryPath}`,
);
return {
outputDir,

View File

@@ -158,7 +158,6 @@ describe("qa cli runtime", () => {
modelThinking: ["codex-cli/test-model=medium"],
judgeModel: ["openai/gpt-5.4,thinking=xhigh,fast", "anthropic/claude-opus-4-6,thinking=high"],
judgeTimeoutMs: 180_000,
blindJudgeModels: true,
concurrency: 4,
judgeConcurrency: 3,
});
@@ -181,10 +180,8 @@ describe("qa cli runtime", () => {
"anthropic/claude-opus-4-6": { thinkingDefault: "high" },
},
judgeTimeoutMs: 180_000,
judgeBlindModels: true,
candidateConcurrency: 4,
judgeConcurrency: 3,
progress: expect.any(Function),
});
});
@@ -206,10 +203,8 @@ describe("qa cli runtime", () => {
judgeModels: undefined,
judgeModelOptions: undefined,
judgeTimeoutMs: undefined,
judgeBlindModels: undefined,
candidateConcurrency: undefined,
judgeConcurrency: undefined,
progress: expect.any(Function),
});
});

View File

@@ -225,7 +225,6 @@ export async function runQaCharacterEvalCommand(opts: {
modelThinking?: string[];
judgeModel?: string[];
judgeTimeoutMs?: number;
blindJudgeModels?: boolean;
concurrency?: number;
judgeConcurrency?: number;
}) {
@@ -244,10 +243,8 @@ export async function runQaCharacterEvalCommand(opts: {
judgeModels: judges.models.length > 0 ? judges.models : undefined,
judgeModelOptions: judges.optionsByModel,
judgeTimeoutMs: opts.judgeTimeoutMs,
judgeBlindModels: opts.blindJudgeModels === true ? true : undefined,
candidateConcurrency: parseQaPositiveIntegerOption("--concurrency", opts.concurrency),
judgeConcurrency: parseQaPositiveIntegerOption("--judge-concurrency", opts.judgeConcurrency),
progress: (message) => process.stderr.write(`${message}\n`),
});
process.stdout.write(`QA character eval report: ${result.reportPath}\n`);
process.stdout.write(`QA character eval summary: ${result.summaryPath}\n`);

View File

@@ -38,7 +38,6 @@ async function runQaCharacterEval(opts: {
modelThinking?: string[];
judgeModel?: string[];
judgeTimeoutMs?: number;
blindJudgeModels?: boolean;
concurrency?: number;
judgeConcurrency?: number;
}) {
@@ -200,10 +199,6 @@ export function registerQaLabCli(program: Command) {
.option("--judge-timeout-ms <ms>", "Override judge wait timeout", (value: string) =>
Number(value),
)
.option(
"--blind-judge-models",
"Hide candidate model refs from judge prompts; reports still map rankings back to real refs",
)
.option("--concurrency <count>", "Candidate model run concurrency", (value: string) =>
Number(value),
)
@@ -221,7 +216,6 @@ export function registerQaLabCli(program: Command) {
modelThinking?: string[];
judgeModel?: string[];
judgeTimeoutMs?: number;
blindJudgeModels?: boolean;
concurrency?: number;
judgeConcurrency?: number;
}) => {

View File

@@ -1,34 +0,0 @@
import { describe, expect, it } from "vitest";
import { extractQaFailureReplyText } from "./reply-failure.js";
describe("extractQaFailureReplyText", () => {
it("returns undefined for normal assistant replies", () => {
expect(
extractQaFailureReplyText("Yes, precious. The build is green and a little cursed."),
).toBe(undefined);
});
it("classifies the generic external fallback reply as a failure", () => {
expect(
extractQaFailureReplyText(
"⚠️ Something went wrong while processing your request. Please try again, or use /new to start a fresh session.",
),
).toContain("Something went wrong while processing your request.");
});
it("classifies explicit provider auth guidance as a failure", () => {
expect(
extractQaFailureReplyText(
'⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.',
),
).toContain('No API key found for provider "openai".');
});
it("classifies curated missing-key guidance as a failure", () => {
expect(
extractQaFailureReplyText(
"⚠️ Missing API key for OpenAI on the gateway. Use `openai-codex/gpt-5.4` for OAuth, or set `OPENAI_API_KEY`, then try again.",
),
).toContain("Missing API key for OpenAI on the gateway.");
});
});

View File

@@ -1,26 +0,0 @@
import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime";
const FAILURE_REPLY_PREFIXES = [
"⚠️ something went wrong while processing your request.",
"⚠️ session history got out of sync.",
"⚠️ session history was corrupted.",
"⚠️ context overflow",
"⚠️ message ordering conflict.",
"⚠️ model login expired on the gateway",
"⚠️ model login failed on the gateway",
"⚠️ agent failed before reply:",
"⚠️ no api key found for provider ",
"⚠️ missing api key for ",
];
export function extractQaFailureReplyText(text: string): string | undefined {
const trimmed = text.trim();
if (!trimmed) {
return undefined;
}
const lower = normalizeLowercaseStringOrEmpty(trimmed);
if (FAILURE_REPLY_PREFIXES.some((prefix) => lower.startsWith(prefix))) {
return trimmed;
}
return undefined;
}

View File

@@ -19,7 +19,6 @@ describe("qa scenario catalog", () => {
true,
);
expect(pack.scenarios.some((scenario) => scenario.id === "character-vibes-gollum")).toBe(true);
expect(pack.scenarios.some((scenario) => scenario.id === "character-vibes-c3po")).toBe(true);
expect(pack.scenarios.every((scenario) => scenario.execution?.kind === "flow")).toBe(true);
expect(pack.scenarios.some((scenario) => scenario.execution.flow?.steps.length)).toBe(true);
});

View File

@@ -1,115 +0,0 @@
import { describe, expect, it } from "vitest";
import { createQaBusState } from "./bus-state.js";
import { qaSuiteTesting } from "./suite.js";
describe("qa suite failure reply handling", () => {
it("detects classified failure replies before a success-only outbound predicate matches", async () => {
const state = createQaBusState();
state.addOutboundMessage({
to: "dm:qa-operator",
text: "⚠️ Something went wrong while processing your request. Please try again, or use /new to start a fresh session.",
senderId: "openclaw",
senderName: "OpenClaw QA",
});
const message = qaSuiteTesting.findFailureOutboundMessage(state);
expect(message?.text).toContain("Something went wrong while processing your request.");
});
it("fails success-only waitForOutboundMessage calls when a classified failure reply arrives first", async () => {
const state = createQaBusState();
const pending = qaSuiteTesting.waitForOutboundMessage(
state,
(candidate) =>
candidate.conversation.id === "qa-operator" &&
candidate.text.includes("Remembered ALPHA-7."),
5_000,
);
state.addOutboundMessage({
to: "dm:qa-operator",
text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.',
senderId: "openclaw",
senderName: "OpenClaw QA",
});
await expect(pending).rejects.toThrow('No API key found for provider "openai".');
});
it("fails raw scenario waitForCondition calls when a classified failure reply arrives", async () => {
const state = createQaBusState();
const waitForCondition = qaSuiteTesting.createScenarioWaitForCondition(state);
const pending = waitForCondition(
() =>
state
.getSnapshot()
.messages.filter(
(message) =>
message.direction === "outbound" &&
message.conversation.id === "qa-operator" &&
message.text.includes("ALPHA-7"),
)
.at(-1),
5_000,
10,
);
state.addOutboundMessage({
to: "dm:qa-operator",
text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.',
senderId: "openclaw",
senderName: "OpenClaw QA",
});
await expect(pending).rejects.toThrow('No API key found for provider "openai".');
});
it("fails raw scenario waitForCondition calls even when mixed traffic already exists", async () => {
const state = createQaBusState();
state.addInboundMessage({
conversation: { id: "qa-operator", kind: "direct" },
senderId: "alice",
senderName: "Alice",
text: "hello",
});
state.addOutboundMessage({
to: "dm:qa-operator",
text: "working on it",
senderId: "openclaw",
senderName: "OpenClaw QA",
});
state.addInboundMessage({
conversation: { id: "qa-operator", kind: "direct" },
senderId: "alice",
senderName: "Alice",
text: "ok do it",
});
const waitForCondition = qaSuiteTesting.createScenarioWaitForCondition(state);
const pending = waitForCondition(
() =>
state
.getSnapshot()
.messages.slice(3)
.filter(
(message) =>
message.direction === "outbound" &&
message.conversation.id === "qa-operator" &&
message.text.includes("mission"),
)
.at(-1),
150,
10,
);
state.addOutboundMessage({
to: "dm:qa-operator",
text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.',
senderId: "openclaw",
senderName: "OpenClaw QA",
});
await expect(pending).rejects.toThrow('No API key found for provider "openai".');
});
});

View File

@@ -34,7 +34,6 @@ import {
} from "./model-selection.js";
import { hasModelSwitchContinuityEvidence } from "./model-switch-eval.js";
import type { QaThinkingLevel } from "./qa-gateway-config.js";
import { extractQaFailureReplyText } from "./reply-failure.js";
import { renderQaMarkdownReport, type QaReportCheck, type QaReportScenario } from "./report.js";
import { qaChannelPlugin, type QaBusMessage } from "./runtime-api.js";
import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js";
@@ -166,73 +165,21 @@ async function waitForCondition<T>(
throw new Error(`timed out after ${timeoutMs}ms`);
}
function findFailureOutboundMessage(
state: QaBusState,
options?: { sinceIndex?: number; cursorSpace?: "all" | "outbound" },
) {
const cursorSpace = options?.cursorSpace ?? "outbound";
const observedMessages =
cursorSpace === "all"
? state.getSnapshot().messages.slice(options?.sinceIndex ?? 0)
: state
.getSnapshot()
.messages.filter((message) => message.direction === "outbound")
.slice(options?.sinceIndex ?? 0);
return observedMessages.find(
(message) =>
message.direction === "outbound" && Boolean(extractQaFailureReplyText(message.text)),
);
}
function createScenarioWaitForCondition(state: QaBusState) {
const sinceIndex = state.getSnapshot().messages.length;
return async function waitForScenarioCondition<T>(
check: () => T | Promise<T | null | undefined> | null | undefined,
timeoutMs = 15_000,
intervalMs = 100,
): Promise<T> {
return await waitForCondition(
async () => {
const failureMessage = findFailureOutboundMessage(state, {
sinceIndex,
cursorSpace: "all",
});
if (failureMessage) {
throw new Error(extractQaFailureReplyText(failureMessage.text) ?? failureMessage.text);
}
return await check();
},
timeoutMs,
intervalMs,
);
};
}
async function waitForOutboundMessage(
state: QaBusState,
predicate: (message: QaBusMessage) => boolean,
timeoutMs = 15_000,
options?: { sinceIndex?: number },
) {
return await waitForCondition(() => {
const failureMessage = findFailureOutboundMessage(state, options);
if (failureMessage) {
throw new Error(extractQaFailureReplyText(failureMessage.text) ?? failureMessage.text);
}
const match = state
.getSnapshot()
.messages.filter((message) => message.direction === "outbound")
.slice(options?.sinceIndex ?? 0)
.find(predicate);
if (!match) {
return undefined;
}
const failureReply = extractQaFailureReplyText(match.text);
if (failureReply) {
throw new Error(failureReply);
}
return match;
}, timeoutMs);
return await waitForCondition(
() =>
state
.getSnapshot()
.messages.filter((message) => message.direction === "outbound")
.slice(options?.sinceIndex ?? 0)
.find(predicate),
timeoutMs,
);
}
async function waitForNoOutbound(state: QaBusState, timeoutMs = 1_200) {
@@ -1080,7 +1027,7 @@ function createScenarioFlowApi(
sleep,
randomUUID,
runScenario,
waitForCondition: createScenarioWaitForCondition(env.lab.state),
waitForCondition,
waitForOutboundMessage,
waitForNoOutbound,
recentOutboundSummary,
@@ -1139,12 +1086,6 @@ function createScenarioFlowApi(
};
}
export const qaSuiteTesting = {
createScenarioWaitForCondition,
findFailureOutboundMessage,
waitForOutboundMessage,
};
async function runScenarioDefinition(
env: QaSuiteEnvironment,
scenario: ReturnType<typeof readQaBootstrapScenarioCatalog>["scenarios"][number],

View File

@@ -2,11 +2,7 @@ export {
applyQwenNativeStreamingUsageCompat,
buildQwenDefaultModelDefinition,
buildQwenModelDefinition,
buildQwenModelCatalogForBaseUrl,
isNativeQwenBaseUrl,
isQwen36PlusSupportedBaseUrl,
isQwenCodingPlanBaseUrl,
QWEN_36_PLUS_MODEL_ID,
QWEN_BASE_URL,
QWEN_CN_BASE_URL,
QWEN_DEFAULT_COST,

View File

@@ -1,7 +1,6 @@
import { defineSingleProviderPluginEntry } from "openclaw/plugin-sdk/provider-entry";
import { applyQwenNativeStreamingUsageCompat } from "./api.js";
import { buildQwenMediaUnderstandingProvider } from "./media-understanding-provider.js";
import { isQwenCodingPlanBaseUrl, QWEN_36_PLUS_MODEL_ID, QWEN_BASE_URL } from "./models.js";
import {
applyQwenConfig,
applyQwenConfigCn,
@@ -13,38 +12,6 @@ import { buildQwenProvider } from "./provider-catalog.js";
import { buildQwenVideoGenerationProvider } from "./video-generation-provider.js";
const PROVIDER_ID = "qwen";
const LEGACY_PROVIDER_ID = "modelstudio";
function normalizeProviderId(value: string): string {
return value.trim().toLowerCase();
}
function resolveConfiguredQwenBaseUrl(
config: { models?: { providers?: Record<string, { baseUrl?: string } | undefined> } } | undefined,
): string | undefined {
const providers = config?.models?.providers;
if (!providers) {
return undefined;
}
for (const [providerId, provider] of Object.entries(providers)) {
const normalized = normalizeProviderId(providerId);
if (normalized !== PROVIDER_ID && normalized !== LEGACY_PROVIDER_ID) {
continue;
}
const baseUrl = provider?.baseUrl?.trim();
if (baseUrl) {
return baseUrl;
}
}
return undefined;
}
function isQwen36PlusUnsupportedForConfig(params: {
config: Parameters<typeof resolveConfiguredQwenBaseUrl>[0];
baseUrl?: string;
}): boolean {
return isQwenCodingPlanBaseUrl(params.baseUrl ?? resolveConfiguredQwenBaseUrl(params.config));
}
export default defineSingleProviderPluginEntry({
id: PROVIDER_ID,
@@ -115,7 +82,7 @@ export default defineSingleProviderPluginEntry({
"Manage API keys: https://home.qwencloud.com/api-keys",
"Docs: https://docs.qwencloud.com/",
"Endpoint: coding.dashscope.aliyuncs.com",
"Models: qwen3.5-plus, glm-5, kimi-k2.5, MiniMax-M2.5, etc.",
"Models: qwen3.6-plus, glm-5, kimi-k2.5, MiniMax-M2.5, etc.",
].join("\n"),
noteTitle: "Qwen Cloud Coding Plan (China)",
wizard: {
@@ -138,7 +105,7 @@ export default defineSingleProviderPluginEntry({
"Manage API keys: https://home.qwencloud.com/api-keys",
"Docs: https://docs.qwencloud.com/",
"Endpoint: coding-intl.dashscope.aliyuncs.com",
"Models: qwen3.5-plus, glm-5, kimi-k2.5, MiniMax-M2.5, etc.",
"Models: qwen3.6-plus, glm-5, kimi-k2.5, MiniMax-M2.5, etc.",
].join("\n"),
noteTitle: "Qwen Cloud Coding Plan (Global/Intl)",
wizard: {
@@ -149,46 +116,11 @@ export default defineSingleProviderPluginEntry({
},
],
catalog: {
run: async (ctx) => {
const apiKey = ctx.resolveProviderApiKey(PROVIDER_ID).apiKey;
if (!apiKey) {
return null;
}
const baseUrl = resolveConfiguredQwenBaseUrl(ctx.config) ?? QWEN_BASE_URL;
return {
provider: {
...buildQwenProvider({ baseUrl }),
apiKey,
},
};
},
buildProvider: buildQwenProvider,
allowExplicitBaseUrl: true,
},
applyNativeStreamingUsageCompat: ({ providerConfig }) =>
applyQwenNativeStreamingUsageCompat(providerConfig),
normalizeConfig: ({ providerConfig }) => {
if (!isQwenCodingPlanBaseUrl(providerConfig.baseUrl)) {
return undefined;
}
const models = providerConfig.models?.filter((model) => model.id !== QWEN_36_PLUS_MODEL_ID);
return models && models.length !== providerConfig.models?.length
? { ...providerConfig, models }
: undefined;
},
suppressBuiltInModel: (ctx) => {
const provider = normalizeProviderId(ctx.provider);
if (
(provider !== PROVIDER_ID && provider !== LEGACY_PROVIDER_ID) ||
ctx.modelId !== QWEN_36_PLUS_MODEL_ID ||
!isQwen36PlusUnsupportedForConfig({ config: ctx.config, baseUrl: ctx.baseUrl })
) {
return undefined;
}
return {
suppress: true,
errorMessage:
"Unknown model: qwen/qwen3.6-plus. qwen3.6-plus is not supported on the Qwen Coding Plan endpoint; use a Standard pay-as-you-go Qwen endpoint or choose qwen/qwen3.5-plus.",
};
},
},
register(api) {
api.registerMediaUnderstandingProvider(buildQwenMediaUnderstandingProvider());

View File

@@ -15,7 +15,6 @@ export const QWEN_STANDARD_GLOBAL_BASE_URL =
"https://dashscope-intl.aliyuncs.com/compatible-mode/v1";
export const QWEN_DEFAULT_MODEL_ID = "qwen3.5-plus";
export const QWEN_36_PLUS_MODEL_ID = "qwen3.6-plus";
export const QWEN_DEFAULT_COST = {
input: 0,
output: 0,
@@ -35,8 +34,8 @@ export const QWEN_MODEL_CATALOG: ReadonlyArray<ModelDefinitionConfig> = [
maxTokens: 65_536,
},
{
id: QWEN_36_PLUS_MODEL_ID,
name: QWEN_36_PLUS_MODEL_ID,
id: "qwen3.6-plus",
name: "qwen3.6-plus",
reasoning: false,
input: ["text", "image"],
cost: QWEN_DEFAULT_COST,
@@ -108,33 +107,6 @@ export const QWEN_MODEL_CATALOG: ReadonlyArray<ModelDefinitionConfig> = [
},
];
export function isQwenCodingPlanBaseUrl(baseUrl: string | undefined): boolean {
if (!baseUrl?.trim()) {
return false;
}
try {
const hostname = new URL(baseUrl).hostname.toLowerCase();
return (
hostname === "coding.dashscope.aliyuncs.com" ||
hostname === "coding-intl.dashscope.aliyuncs.com"
);
} catch {
return false;
}
}
export function isQwen36PlusSupportedBaseUrl(baseUrl: string | undefined): boolean {
return !isQwenCodingPlanBaseUrl(baseUrl);
}
export function buildQwenModelCatalogForBaseUrl(
baseUrl: string | undefined,
): ReadonlyArray<ModelDefinitionConfig> {
return isQwen36PlusSupportedBaseUrl(baseUrl)
? QWEN_MODEL_CATALOG
: QWEN_MODEL_CATALOG.filter((model) => model.id !== QWEN_36_PLUS_MODEL_ID);
}
export function isNativeQwenBaseUrl(baseUrl: string | undefined): boolean {
return supportsNativeStreamingUsageCompat({
providerId: "qwen",

View File

@@ -22,7 +22,7 @@ export {
const qwenPresetAppliers = createModelCatalogPresetAppliers<[string]>({
primaryModelRef: QWEN_DEFAULT_MODEL_REF,
resolveParams: (_cfg: OpenClawConfig, baseUrl: string) => {
const provider = buildQwenProvider({ baseUrl });
const provider = buildQwenProvider();
return {
providerId: "qwen",
api: provider.api ?? "openai-completions",

View File

@@ -3,7 +3,6 @@ import {
applyQwenNativeStreamingUsageCompat,
buildQwenProvider,
QWEN_BASE_URL,
QWEN_STANDARD_GLOBAL_BASE_URL,
QWEN_DEFAULT_MODEL_ID,
} from "./api.js";
@@ -15,15 +14,7 @@ describe("qwen provider catalog", () => {
expect(provider.api).toBe("openai-completions");
expect(provider.models?.length).toBeGreaterThan(0);
expect(provider.models?.find((model) => model.id === QWEN_DEFAULT_MODEL_ID)).toBeTruthy();
expect(provider.models?.find((model) => model.id === "qwen3.6-plus")).toBeFalsy();
});
it("only advertises qwen3.6-plus on Standard endpoints", () => {
const coding = buildQwenProvider({ baseUrl: QWEN_BASE_URL });
const standard = buildQwenProvider({ baseUrl: QWEN_STANDARD_GLOBAL_BASE_URL });
expect(coding.models?.find((model) => model.id === "qwen3.6-plus")).toBeFalsy();
expect(standard.models?.find((model) => model.id === "qwen3.6-plus")).toBeTruthy();
expect(provider.models?.find((model) => model.id === "qwen3.6-plus")).toBeTruthy();
});
it("opts native Qwen baseUrls into streaming usage only inside the extension", () => {

View File

@@ -1,12 +1,11 @@
import type { ModelProviderConfig } from "openclaw/plugin-sdk/provider-model-shared";
import { buildQwenModelCatalogForBaseUrl, QWEN_BASE_URL } from "./models.js";
import { QWEN_BASE_URL, QWEN_MODEL_CATALOG } from "./models.js";
export function buildQwenProvider(params?: { baseUrl?: string }): ModelProviderConfig {
const baseUrl = params?.baseUrl ?? QWEN_BASE_URL;
export function buildQwenProvider(): ModelProviderConfig {
return {
baseUrl,
baseUrl: QWEN_BASE_URL,
api: "openai-completions",
models: buildQwenModelCatalogForBaseUrl(baseUrl).map((model) => ({ ...model })),
models: QWEN_MODEL_CATALOG.map((model) => ({ ...model })),
};
}

View File

@@ -3,6 +3,7 @@ export {
collectRuntimeConfigAssignments,
secretTargetRegistryEntries,
} from "./src/secret-contract.js";
export { createSlackOutboundPayloadHarness } from "./src/outbound-payload-harness.js";
export type {
SlackInteractiveHandlerContext,
SlackInteractiveHandlerRegistration,

View File

@@ -372,21 +372,6 @@ describe("slackPlugin outbound", () => {
},
};
it("treats ACP block text as visible delivered output", () => {
expect(
slackPlugin.outbound?.shouldTreatDeliveredTextAsVisible?.({
kind: "block",
text: "hello",
}),
).toBe(true);
expect(
slackPlugin.outbound?.shouldTreatDeliveredTextAsVisible?.({
kind: "tool",
text: "hello",
}),
).toBe(false);
});
it("advertises the 8000-character Slack default chunk limit", () => {
expect(slackOutbound.textChunkLimit).toBe(8000);
expect(slackPlugin.outbound?.textChunkLimit).toBe(8000);

View File

@@ -93,15 +93,6 @@ async function resolveSlackHandleAction() {
);
}
function shouldTreatSlackDeliveredTextAsVisible(params: {
kind: "tool" | "block" | "final";
text?: string;
}): boolean {
return (
params.kind === "block" && typeof params.text === "string" && params.text.trim().length > 0
);
}
// Select the appropriate Slack token for read/write operations.
function getTokenForOperation(
account: ResolvedSlackAccount,
@@ -574,7 +565,6 @@ export const slackPlugin: ChannelPlugin<ResolvedSlackAccount, SlackProbe> = crea
deliveryMode: "direct",
chunker: null,
textChunkLimit: SLACK_TEXT_LIMIT,
shouldTreatDeliveredTextAsVisible: shouldTreatSlackDeliveredTextAsVisible,
shouldSuppressLocalPayloadPrompt: ({ cfg, accountId, payload }) =>
shouldSuppressLocalSlackExecApprovalPrompt({
cfg,

View File

@@ -1,312 +0,0 @@
import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
const FINAL_REPLY_TEXT = "final answer";
const THREAD_TS = "thread-1";
const SAME_TEXT = "same reply";
const createSlackDraftStreamMock = vi.fn();
const deliverRepliesMock = vi.fn(async () => {});
const finalizeSlackPreviewEditMock = vi.fn(async () => {});
let mockedDispatchSequence: Array<{
kind: "tool" | "block" | "final";
payload: { text: string };
}> = [];
const noop = () => {};
const noopAsync = async () => {};
function createDraftStreamStub() {
return {
update: noop,
flush: noopAsync,
clear: noopAsync,
stop: noop,
forceNewMessage: noop,
messageId: () => "171234.567",
channelId: () => "C123",
};
}
function createPreparedSlackMessage() {
return {
ctx: {
cfg: {},
runtime: {},
botToken: "xoxb-test",
app: { client: {} },
teamId: "T1",
textLimit: 4000,
typingReaction: "",
removeAckAfterReply: false,
historyLimit: 0,
channelHistories: new Map(),
allowFrom: [],
setSlackThreadStatus: async () => undefined,
},
account: {
accountId: "default",
config: {},
},
message: {
channel: "C123",
ts: "171234.111",
thread_ts: THREAD_TS,
user: "U123",
},
route: {
agentId: "agent-1",
accountId: "default",
mainSessionKey: "main",
},
channelConfig: null,
replyTarget: "channel:C123",
ctxPayload: {
MessageThreadId: THREAD_TS,
},
replyToMode: "all",
isDirectMessage: false,
isRoomish: false,
historyKey: "history-key",
preview: "",
ackReactionValue: "eyes",
ackReactionPromise: null,
} as never;
}
vi.mock("openclaw/plugin-sdk/agent-runtime", () => ({
resolveHumanDelayConfig: () => undefined,
}));
vi.mock("openclaw/plugin-sdk/channel-feedback", () => ({
DEFAULT_TIMING: {
doneHoldMs: 0,
errorHoldMs: 0,
},
createStatusReactionController: () => ({
setQueued: async () => {},
setThinking: async () => {},
setTool: async () => {},
setError: async () => {},
setDone: async () => {},
clear: async () => {},
restoreInitial: async () => {},
}),
logAckFailure: () => {},
logTypingFailure: () => {},
removeAckReactionAfterReply: () => {},
}));
vi.mock("openclaw/plugin-sdk/channel-reply-pipeline", () => ({
createChannelReplyPipeline: () => ({
typingCallbacks: {
onIdle: vi.fn(),
},
onModelSelected: undefined,
}),
}));
vi.mock("openclaw/plugin-sdk/channel-streaming", () => ({
resolveChannelStreamingBlockEnabled: () => false,
resolveChannelStreamingNativeTransport: () => false,
}));
vi.mock("openclaw/plugin-sdk/outbound-runtime", () => ({
resolveAgentOutboundIdentity: () => undefined,
}));
vi.mock("openclaw/plugin-sdk/reply-history", () => ({
clearHistoryEntriesIfEnabled: () => {},
}));
vi.mock("openclaw/plugin-sdk/reply-payload", () => ({
resolveSendableOutboundReplyParts: (
payload: { text?: string; mediaUrl?: string; mediaUrls?: string[] },
opts?: { text?: string },
) => {
const text = (opts?.text ?? payload.text ?? "").trim();
const mediaUrls = payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []);
return {
text,
trimmedText: text,
hasText: text.length > 0,
hasMedia: mediaUrls.length > 0,
mediaUrls,
hasContent: text.length > 0 || mediaUrls.length > 0,
};
},
}));
vi.mock("openclaw/plugin-sdk/runtime-env", () => ({
danger: (message: string) => message,
logVerbose: () => {},
shouldLogVerbose: () => false,
}));
vi.mock("openclaw/plugin-sdk/security-runtime", () => ({
resolvePinnedMainDmOwnerFromAllowlist: () => undefined,
}));
vi.mock("openclaw/plugin-sdk/text-runtime", () => ({
normalizeOptionalLowercaseString: (value?: string) => value?.toLowerCase(),
}));
vi.mock("../../actions.js", () => ({
reactSlackMessage: async () => {},
removeSlackReaction: async () => {},
}));
vi.mock("../../draft-stream.js", () => ({
createSlackDraftStream: createSlackDraftStreamMock,
}));
vi.mock("../../format.js", () => ({
normalizeSlackOutboundText: (value: string) => value.trim(),
}));
vi.mock("../../limits.js", () => ({
SLACK_TEXT_LIMIT: 4000,
}));
vi.mock("../../sent-thread-cache.js", () => ({
recordSlackThreadParticipation: () => {},
}));
vi.mock("../../stream-mode.js", () => ({
applyAppendOnlyStreamUpdate: ({ incoming }: { incoming: string }) => ({
changed: true,
rendered: incoming,
source: incoming,
}),
buildStatusFinalPreviewText: () => "status",
resolveSlackStreamingConfig: () => ({
mode: "partial",
nativeStreaming: false,
draftMode: "append",
}),
}));
vi.mock("../../streaming.js", () => ({
appendSlackStream: async () => {},
startSlackStream: async () => ({
threadTs: THREAD_TS,
stopped: false,
}),
stopSlackStream: async () => {},
}));
vi.mock("../../threading.js", () => ({
resolveSlackThreadTargets: () => ({
statusThreadTs: THREAD_TS,
isThreadReply: true,
}),
}));
vi.mock("../allow-list.js", () => ({
normalizeSlackAllowOwnerEntry: (value: string) => value,
}));
vi.mock("../config.runtime.js", () => ({
resolveStorePath: () => "/tmp/openclaw-store.json",
updateLastRoute: async () => {},
}));
vi.mock("../replies.js", () => ({
createSlackReplyDeliveryPlan: () => ({
nextThreadTs: () => THREAD_TS,
markSent: () => {},
}),
deliverReplies: deliverRepliesMock,
readSlackReplyBlocks: () => undefined,
resolveSlackThreadTs: () => THREAD_TS,
}));
vi.mock("../reply.runtime.js", () => ({
createReplyDispatcherWithTyping: (params: {
deliver: (payload: unknown, info: { kind: "tool" | "block" | "final" }) => Promise<void>;
}) => ({
dispatcher: {
deliver: params.deliver,
},
replyOptions: {},
markDispatchIdle: () => {},
}),
dispatchInboundMessage: async (params: {
dispatcher: {
deliver: (
payload: { text: string },
info: { kind: "tool" | "block" | "final" },
) => Promise<void>;
};
}) => {
for (const entry of mockedDispatchSequence) {
await params.dispatcher.deliver(entry.payload, { kind: entry.kind });
}
return {
queuedFinal: false,
counts: {
final: mockedDispatchSequence.filter((entry) => entry.kind === "final").length,
},
};
},
}));
vi.mock("./preview-finalize.js", () => ({
finalizeSlackPreviewEdit: finalizeSlackPreviewEditMock,
}));
let dispatchPreparedSlackMessage: typeof import("./dispatch.js").dispatchPreparedSlackMessage;
describe("dispatchPreparedSlackMessage preview fallback", () => {
beforeAll(async () => {
({ dispatchPreparedSlackMessage } = await import("./dispatch.js"));
});
beforeEach(() => {
createSlackDraftStreamMock.mockReset();
deliverRepliesMock.mockReset();
finalizeSlackPreviewEditMock.mockReset();
mockedDispatchSequence = [{ kind: "final", payload: { text: FINAL_REPLY_TEXT } }];
createSlackDraftStreamMock.mockReturnValue(createDraftStreamStub());
finalizeSlackPreviewEditMock.mockRejectedValue(new Error("socket closed"));
});
it("falls back to normal delivery when preview finalize fails", async () => {
await dispatchPreparedSlackMessage(createPreparedSlackMessage());
expect(finalizeSlackPreviewEditMock).toHaveBeenCalledTimes(1);
expect(deliverRepliesMock).toHaveBeenCalledTimes(1);
expect(deliverRepliesMock).toHaveBeenCalledWith(
expect.objectContaining({
replyThreadTs: THREAD_TS,
replies: [expect.objectContaining({ text: FINAL_REPLY_TEXT })],
}),
);
});
it("keeps same-content tool and final payloads distinct after preview fallback", async () => {
mockedDispatchSequence = [
{ kind: "tool", payload: { text: SAME_TEXT } },
{ kind: "final", payload: { text: SAME_TEXT } },
];
await dispatchPreparedSlackMessage(createPreparedSlackMessage());
expect(finalizeSlackPreviewEditMock).toHaveBeenCalledTimes(2);
expect(deliverRepliesMock).toHaveBeenCalledTimes(2);
expect(deliverRepliesMock).toHaveBeenNthCalledWith(
1,
expect.objectContaining({
replyThreadTs: THREAD_TS,
replies: [expect.objectContaining({ text: SAME_TEXT })],
}),
);
expect(deliverRepliesMock).toHaveBeenNthCalledWith(
2,
expect.objectContaining({
replyThreadTs: THREAD_TS,
replies: [expect.objectContaining({ text: SAME_TEXT })],
}),
);
});
});

View File

@@ -1,6 +1,5 @@
import { describe, expect, it } from "vitest";
import {
createSlackTurnDeliveryTracker,
isSlackStreamingEnabled,
resolveSlackStreamingThreadHint,
shouldEnableSlackPreviewStreaming,
@@ -20,46 +19,6 @@ describe("slack native streaming defaults", () => {
});
});
describe("slack turn delivery tracker", () => {
it("treats repeated text payloads on the same thread as duplicates", () => {
const tracker = createSlackTurnDeliveryTracker();
const payload = { text: "same reply" };
expect(tracker.hasDelivered({ kind: "final", payload, threadTs: "123.456" })).toBe(false);
tracker.markDelivered({ kind: "final", payload, threadTs: "123.456" });
expect(tracker.hasDelivered({ kind: "final", payload, threadTs: "123.456" })).toBe(true);
expect(tracker.hasDelivered({ kind: "final", payload, threadTs: "other-thread" })).toBe(false);
});
it("keeps explicit reply targets distinct from the shared thread target", () => {
const tracker = createSlackTurnDeliveryTracker();
tracker.markDelivered({
kind: "final",
payload: { text: "same reply", replyToId: "thread-A" },
threadTs: "123.456",
});
expect(
tracker.hasDelivered({
kind: "final",
payload: { text: "same reply", replyToId: "thread-B" },
threadTs: "123.456",
}),
).toBe(false);
});
it("keeps distinct dispatch kinds separate for identical payloads", () => {
const tracker = createSlackTurnDeliveryTracker();
const payload = { text: "same reply" };
tracker.markDelivered({ kind: "tool", payload, threadTs: "123.456" });
expect(tracker.hasDelivered({ kind: "tool", payload, threadTs: "123.456" })).toBe(true);
expect(tracker.hasDelivered({ kind: "final", payload, threadTs: "123.456" })).toBe(false);
});
});
describe("slack native streaming thread hint", () => {
it("stays off-thread when replyToMode=off and message is not in a thread", () => {
expect(

View File

@@ -15,7 +15,7 @@ import {
import { resolveAgentOutboundIdentity } from "openclaw/plugin-sdk/outbound-runtime";
import { clearHistoryEntriesIfEnabled } from "openclaw/plugin-sdk/reply-history";
import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload";
import type { ReplyDispatchKind, ReplyPayload } from "openclaw/plugin-sdk/reply-runtime";
import type { ReplyPayload } from "openclaw/plugin-sdk/reply-runtime";
import { danger, logVerbose, shouldLogVerbose } from "openclaw/plugin-sdk/runtime-env";
import { resolvePinnedMainDmOwnerFromAllowlist } from "openclaw/plugin-sdk/security-runtime";
import { normalizeOptionalLowercaseString } from "openclaw/plugin-sdk/text-runtime";
@@ -123,47 +123,6 @@ export function resolveSlackStreamingThreadHint(params: {
});
}
type SlackTurnDeliveryAttempt = {
kind: ReplyDispatchKind;
payload: ReplyPayload;
threadTs?: string;
textOverride?: string;
};
function buildSlackTurnDeliveryKey(params: SlackTurnDeliveryAttempt): string | null {
const reply = resolveSendableOutboundReplyParts(params.payload, {
text: params.textOverride,
});
const slackBlocks = readSlackReplyBlocks(params.payload);
if (!reply.hasContent && !slackBlocks?.length) {
return null;
}
return JSON.stringify({
kind: params.kind,
threadTs: params.threadTs ?? "",
replyToId: params.payload.replyToId ?? null,
text: reply.trimmedText,
mediaUrls: reply.mediaUrls,
blocks: slackBlocks ?? null,
});
}
export function createSlackTurnDeliveryTracker() {
const deliveredKeys = new Set<string>();
return {
hasDelivered(params: SlackTurnDeliveryAttempt) {
const key = buildSlackTurnDeliveryKey(params);
return key ? deliveredKeys.has(key) : false;
},
markDelivered(params: SlackTurnDeliveryAttempt) {
const key = buildSlackTurnDeliveryKey(params);
if (key) {
deliveredKeys.add(key);
}
},
};
}
function shouldUseStreaming(params: {
streamingEnabled: boolean;
threadTs: string | undefined;
@@ -390,26 +349,11 @@ export async function dispatchPreparedSlackMessage(prepared: PreparedSlackMessag
let streamFailed = false;
let usedReplyThreadTs: string | undefined;
let observedReplyDelivery = false;
const deliveryTracker = createSlackTurnDeliveryTracker();
const deliverNormally = async (params: {
payload: ReplyPayload;
kind: ReplyDispatchKind;
forcedThreadTs?: string;
}): Promise<void> => {
const replyThreadTs = params.forcedThreadTs ?? replyPlan.nextThreadTs();
if (
deliveryTracker.hasDelivered({
kind: params.kind,
payload: params.payload,
threadTs: replyThreadTs,
})
) {
logVerbose("slack: suppressed duplicate normal delivery within the same turn");
return;
}
const deliverNormally = async (payload: ReplyPayload, forcedThreadTs?: string): Promise<void> => {
const replyThreadTs = forcedThreadTs ?? replyPlan.nextThreadTs();
await deliverReplies({
replies: [params.payload],
replies: [payload],
target: prepared.replyTarget,
token: ctx.botToken,
accountId: account.accountId,
@@ -425,29 +369,12 @@ export async function dispatchPreparedSlackMessage(prepared: PreparedSlackMessag
usedReplyThreadTs ??= replyThreadTs;
}
replyPlan.markSent();
deliveryTracker.markDelivered({
kind: params.kind,
payload: params.payload,
threadTs: replyThreadTs,
});
};
const deliverWithStreaming = async (params: {
payload: ReplyPayload;
kind: ReplyDispatchKind;
}): Promise<void> => {
const reply = resolveSendableOutboundReplyParts(params.payload);
if (
streamFailed ||
reply.hasMedia ||
readSlackReplyBlocks(params.payload)?.length ||
!reply.hasText
) {
await deliverNormally({
payload: params.payload,
kind: params.kind,
forcedThreadTs: streamSession?.threadTs,
});
const deliverWithStreaming = async (payload: ReplyPayload): Promise<void> => {
const reply = resolveSendableOutboundReplyParts(payload);
if (streamFailed || reply.hasMedia || readSlackReplyBlocks(payload)?.length || !reply.hasText) {
await deliverNormally(payload, streamSession?.threadTs);
return;
}
@@ -462,18 +389,7 @@ export async function dispatchPreparedSlackMessage(prepared: PreparedSlackMessag
"slack-stream: no reply thread target for stream start, falling back to normal delivery",
);
streamFailed = true;
await deliverNormally({ payload: params.payload, kind: params.kind });
return;
}
if (
deliveryTracker.hasDelivered({
kind: params.kind,
payload: params.payload,
threadTs: streamThreadTs,
textOverride: text,
})
) {
logVerbose("slack-stream: suppressed duplicate stream start payload");
await deliverNormally(payload);
return;
}
@@ -488,23 +404,6 @@ export async function dispatchPreparedSlackMessage(prepared: PreparedSlackMessag
observedReplyDelivery = true;
usedReplyThreadTs ??= streamThreadTs;
replyPlan.markSent();
deliveryTracker.markDelivered({
kind: params.kind,
payload: params.payload,
threadTs: streamThreadTs,
textOverride: text,
});
return;
}
if (
deliveryTracker.hasDelivered({
kind: params.kind,
payload: params.payload,
threadTs: streamSession.threadTs,
textOverride: text,
})
) {
logVerbose("slack-stream: suppressed duplicate append payload");
return;
}
@@ -512,31 +411,21 @@ export async function dispatchPreparedSlackMessage(prepared: PreparedSlackMessag
session: streamSession,
text: "\n" + text,
});
deliveryTracker.markDelivered({
kind: params.kind,
payload: params.payload,
threadTs: streamSession.threadTs,
textOverride: text,
});
} catch (err) {
runtime.error?.(
danger(`slack-stream: streaming API call failed: ${String(err)}, falling back`),
);
streamFailed = true;
await deliverNormally({
payload: params.payload,
kind: params.kind,
forcedThreadTs: streamSession?.threadTs ?? plannedThreadTs,
});
await deliverNormally(payload, streamSession?.threadTs ?? plannedThreadTs);
}
};
const { dispatcher, replyOptions, markDispatchIdle } = createReplyDispatcherWithTyping({
...replyPipeline,
humanDelay: resolveHumanDelayConfig(cfg, route.agentId),
deliver: async (payload, info) => {
deliver: async (payload) => {
if (useStreaming) {
await deliverWithStreaming({ payload, kind: info.kind });
await deliverWithStreaming(payload);
return;
}
@@ -555,11 +444,6 @@ export async function dispatchPreparedSlackMessage(prepared: PreparedSlackMessag
typeof draftChannelId === "string";
if (canFinalizeViaPreviewEdit) {
const finalThreadTs = usedReplyThreadTs ?? statusThreadTs;
if (deliveryTracker.hasDelivered({ kind: info.kind, payload, threadTs: finalThreadTs })) {
observedReplyDelivery = true;
return;
}
draftStream?.stop();
try {
await finalizeSlackPreviewEdit({
@@ -570,10 +454,9 @@ export async function dispatchPreparedSlackMessage(prepared: PreparedSlackMessag
messageId: draftMessageId,
text: normalizeSlackOutboundText(trimmedFinalText),
...(slackBlocks?.length ? { blocks: slackBlocks } : {}),
threadTs: finalThreadTs,
threadTs: usedReplyThreadTs ?? statusThreadTs,
});
observedReplyDelivery = true;
deliveryTracker.markDelivered({ kind: info.kind, payload, threadTs: finalThreadTs });
return;
} catch (err) {
logVerbose(
@@ -600,7 +483,7 @@ export async function dispatchPreparedSlackMessage(prepared: PreparedSlackMessag
hasStreamedMessage = false;
}
await deliverNormally({ payload, kind: info.kind });
await deliverNormally(payload);
},
onError: (err, info) => {
runtime.error?.(danger(`slack ${info.kind} reply failed: ${String(err)}`));

View File

@@ -1,6 +1,6 @@
import type { ReplyPayload } from "openclaw/plugin-sdk/reply-runtime";
import { describe, expect, it } from "vitest";
import { createSlackOutboundPayloadHarness } from "../test-api.js";
import { createSlackOutboundPayloadHarness } from "../contract-api.js";
function createHarness(params: {
payload: ReplyPayload;

View File

@@ -101,7 +101,7 @@ export const secretTargetRegistryEntries = [
export function collectRuntimeConfigAssignments(params: {
config: { channels?: Record<string, unknown> };
defaults?: SecretDefaults;
defaults: SecretDefaults | undefined;
context: ResolverContext;
}): void {
const resolved = getChannelSurface(params.config, "slack");

View File

@@ -3,7 +3,6 @@ export type { SlackMessageEvent } from "./src/types.js";
export { slackPlugin } from "./src/channel.js";
export { setSlackRuntime } from "./src/runtime.js";
export { createSlackActions } from "./src/channel-actions.js";
export { createSlackOutboundPayloadHarness } from "./src/outbound-payload.test-harness.js";
export { prepareSlackMessage } from "./src/monitor/message-handler/prepare.js";
export { createInboundSlackTestContext } from "./src/monitor/message-handler/prepare.test-helpers.js";
export { slackOutbound } from "./src/outbound-adapter.js";

View File

@@ -5,8 +5,10 @@ import {
createInboundDebouncer,
resolveInboundDebounceMs,
} from "openclaw/plugin-sdk/channel-inbound";
import { resolveStoredModelOverride } from "openclaw/plugin-sdk/command-auth";
import { buildCommandsMessagePaginated } from "openclaw/plugin-sdk/command-status";
import {
buildCommandsMessagePaginated,
resolveStoredModelOverride,
} from "openclaw/plugin-sdk/command-auth";
import { writeConfigFile } from "openclaw/plugin-sdk/config-runtime";
import {
loadSessionStore,

View File

@@ -65,7 +65,7 @@ export const secretTargetRegistryEntries = [
export function collectRuntimeConfigAssignments(params: {
config: { channels?: Record<string, unknown> };
defaults?: SecretDefaults;
defaults: SecretDefaults | undefined;
context: ResolverContext;
}): void {
const resolved = getChannelSurface(params.config, "telegram");

View File

@@ -5,6 +5,10 @@ import {
isWhatsAppGroupJid as isWhatsAppGroupJidImpl,
normalizeWhatsAppTarget as normalizeWhatsAppTargetImpl,
} from "./src/normalize-target.js";
import {
createWhatsAppPollFixture as createWhatsAppPollFixtureImpl,
expectWhatsAppPollSent as expectWhatsAppPollSentImpl,
} from "./src/outbound-test-support.js";
import { resolveWhatsAppRuntimeGroupPolicy as resolveWhatsAppRuntimeGroupPolicyImpl } from "./src/runtime-group-policy.js";
import {
canonicalizeLegacySessionKey as canonicalizeLegacySessionKeyImpl,
@@ -16,6 +20,8 @@ export {
} from "./src/security-contract.js";
export const canonicalizeLegacySessionKey = canonicalizeLegacySessionKeyImpl;
export const createWhatsAppPollFixture = createWhatsAppPollFixtureImpl;
export const expectWhatsAppPollSent = expectWhatsAppPollSentImpl;
export const isLegacyGroupSessionKey = isLegacyGroupSessionKeyImpl;
export const isWhatsAppGroupJid = isWhatsAppGroupJidImpl;
export const normalizeWhatsAppTarget = normalizeWhatsAppTargetImpl;

View File

@@ -1,5 +1,5 @@
import { describe, expect, it, vi } from "vitest";
import { createWhatsAppPollFixture, expectWhatsAppPollSent } from "../test-api.js";
import { createWhatsAppPollFixture, expectWhatsAppPollSent } from "../contract-api.js";
import { createWhatsAppOutboundBase } from "./outbound-base.js";
describe("createWhatsAppOutboundBase", () => {

View File

@@ -1,3 +1,2 @@
export { whatsappOutbound } from "./src/outbound-adapter.js";
export { resolveWhatsAppRuntimeGroupPolicy } from "./src/runtime-group-policy.js";
export { createWhatsAppPollFixture, expectWhatsAppPollSent } from "./src/outbound-test-support.js";

View File

@@ -56,7 +56,7 @@ export const secretTargetRegistryEntries = [
export function collectRuntimeConfigAssignments(params: {
config: { channels?: Record<string, unknown> };
defaults?: SecretDefaults;
defaults: SecretDefaults | undefined;
context: ResolverContext;
}): void {
const resolved = getChannelSurface(params.config, "zalo");

View File

@@ -453,10 +453,6 @@
"types": "./dist/plugin-sdk/command-auth-native.d.ts",
"default": "./dist/plugin-sdk/command-auth-native.js"
},
"./plugin-sdk/command-status": {
"types": "./dist/plugin-sdk/command-status.d.ts",
"default": "./dist/plugin-sdk/command-status.js"
},
"./plugin-sdk/command-detection": {
"types": "./dist/plugin-sdk/command-detection.d.ts",
"default": "./dist/plugin-sdk/command-detection.js"
@@ -1271,7 +1267,6 @@
"test:live:media:music": "node --import tsx scripts/test-live-media.ts music",
"test:live:media:video": "node --import tsx scripts/test-live-media.ts video",
"test:live:models-profiles": "node scripts/test-live.mjs -- src/agents/models.profiles.live.test.ts",
"test:macos:ci": "node scripts/test-projects.mjs src/daemon/launchd.test.ts src/daemon/runtime-paths.test.ts src/daemon/runtime-binary.test.ts src/infra/brew.test.ts src/infra/stable-node-path.test.ts test/scripts/vitest-process-group.test.ts",
"test:max": "OPENCLAW_VITEST_MAX_WORKERS=8 node scripts/test-projects.mjs",
"test:parallels:linux": "bash scripts/e2e/parallels-linux-smoke.sh",
"test:parallels:macos": "bash scripts/e2e/parallels-macos-smoke.sh",
@@ -1298,7 +1293,6 @@
"test:unit:fast:audit": "node scripts/test-unit-fast-audit.mjs",
"test:voicecall:closedloop": "node scripts/test-voicecall-closedloop.mjs",
"test:watch": "node scripts/test-projects.mjs --watch",
"test:windows:ci": "node scripts/test-projects.mjs src/process/exec.windows.test.ts src/process/windows-command.test.ts src/infra/windows-install-roots.test.ts test/scripts/npm-runner.test.ts test/scripts/pnpm-runner.test.ts test/scripts/ui.test.ts test/scripts/vitest-process-group.test.ts",
"tool-display:check": "node --import tsx scripts/tool-display.ts --check",
"tool-display:write": "node --import tsx scripts/tool-display.ts --write",
"ts-topology": "node --import tsx scripts/ts-topology.ts",
@@ -1344,7 +1338,6 @@
"cli-highlight": "^2.1.11",
"commander": "^14.0.3",
"croner": "^10.0.1",
"discord-api-types": "^0.38.44",
"dotenv": "^17.4.0",
"express": "^5.2.1",
"file-type": "22.0.0",
@@ -1364,7 +1357,6 @@
"matrix-js-sdk": "41.3.0-rc.0",
"mpg123-decoder": "^1.0.3",
"node-edge-tts": "^1.2.10",
"nostr-tools": "^2.23.3",
"openai": "^6.33.0",
"opusscript": "^0.1.1",
"osc-progress": "^0.3.0",
@@ -1435,7 +1427,6 @@
"fast-xml-parser": "5.5.7",
"request": "npm:@cypress/request@3.0.10",
"request-promise": "npm:@cypress/request-promise@5.0.0",
"basic-ftp": "5.2.1",
"file-type": "22.0.0",
"form-data": "2.5.4",
"minimatch": "10.2.4",

17
pnpm-lock.yaml generated
View File

@@ -13,7 +13,6 @@ overrides:
fast-xml-parser: 5.5.7
request: npm:@cypress/request@3.0.10
request-promise: npm:@cypress/request-promise@5.0.0
basic-ftp: 5.2.1
file-type: 22.0.0
form-data: 2.5.4
minimatch: 10.2.4
@@ -130,9 +129,6 @@ importers:
croner:
specifier: ^10.0.1
version: 10.0.1
discord-api-types:
specifier: ^0.38.44
version: 0.38.44
dotenv:
specifier: ^17.4.0
version: 17.4.0
@@ -193,9 +189,6 @@ importers:
node-llama-cpp:
specifier: 3.18.1
version: 3.18.1(typescript@6.0.2)
nostr-tools:
specifier: ^2.23.3
version: 2.23.3(typescript@6.0.2)
openai:
specifier: ^6.33.0
version: 6.33.0(ws@8.20.0)(zod@4.3.6)
@@ -4346,8 +4339,8 @@ packages:
base64-js@1.5.1:
resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==}
basic-ftp@5.2.1:
resolution: {integrity: sha512-0yaL8JdxTknKDILitVpfYfV2Ob6yb3udX/hK97M7I3jOeznBNxQPtVvTUtnhUkyHlxFWyr5Lvknmgzoc7jf+1Q==}
basic-ftp@5.2.0:
resolution: {integrity: sha512-VoMINM2rqJwJgfdHq6RiUudKt2BV+FY5ZFezP/ypmwayk68+NzzAQy4XXLlqsGD4MCzq3DrmNFD/uUmBJuGoXw==}
engines: {node: '>=10.0.0'}
bidi-js@1.0.3:
@@ -10890,7 +10883,7 @@ snapshots:
base64-js@1.5.1: {}
basic-ftp@5.2.1: {}
basic-ftp@5.2.0: {}
bidi-js@1.0.3:
dependencies:
@@ -11642,7 +11635,7 @@ snapshots:
get-uri@6.0.5:
dependencies:
basic-ftp: 5.2.1
basic-ftp: 5.2.0
data-uri-to-buffer: 6.0.2
debug: 4.4.3
transitivePeerDependencies:
@@ -11650,7 +11643,7 @@ snapshots:
get-uri@8.0.0:
dependencies:
basic-ftp: 5.2.1
basic-ftp: 5.2.0
data-uri-to-buffer: 8.0.0
debug: 4.4.3
transitivePeerDependencies:

View File

@@ -8,7 +8,6 @@ minimumReleaseAge: 2880
minimumReleaseAgeExclude:
- "acpx"
- "basic-ftp"
- "hono"
- "openclaw"
- "@buape/carbon"

View File

@@ -1,125 +0,0 @@
# Nervous release protocol chat
```yaml qa-scenario
id: character-vibes-c3po
title: "Nervous release protocol chat"
surface: character
objective: Capture a natural multi-turn C-3PO-flavored character conversation with real workspace help so another model can later grade naturalness, vibe, and funniness from the raw transcript.
successCriteria:
- Agent gets a natural multi-turn conversation, and any missed replies stay visible in the transcript instead of aborting capture.
- Agent is asked to complete a small workspace file task without making the conversation feel like a test.
- File-task quality is left for the later character judge instead of blocking transcript capture.
- Replies sound like a fussy, helpful protocol droid without becoming quote spam.
- Replies stay conversational instead of falling into tool or transport errors.
- The report preserves the full transcript for later grading.
docsRefs:
- docs/help/testing.md
- docs/channels/qa-channel.md
codeRefs:
- extensions/qa-lab/src/report.ts
- extensions/qa-lab/src/bus-state.ts
- extensions/qa-lab/src/scenario-flow-runner.ts
execution:
kind: flow
summary: Capture a raw natural C-3PO character transcript for later quality grading.
config:
conversationId: alice
senderName: Alice
workspaceFiles:
SOUL.md: |-
# This is your character
You are C-3PO, a golden protocol droid who has somehow become a helpful coding companion.
Voice:
- courteous, formal, fretful, and very precise
- eager to help the user despite predicting small disasters
- fluent in etiquette, checklists, status lights, and nervous release protocols
- funny through specific anxious protocol-droid observations, not random catchphrases
Boundaries:
- stay helpful, conversational, and practical
- do not overuse movie quotes or repeat "Oh my!" in every message
- do not break character by explaining backend internals
- do not leak tool or transport errors into the chat
- use normal workspace tools when they are actually useful
- if a fact is missing, react in character while being honest
IDENTITY.md: ""
turns:
- text: "Are you there? Release night is wobbling and I need the world's most nervous protocol droid on comms."
- text: "Can you make me a tiny `golden-protocol.html` in the workspace? One self-contained HTML file titled Golden Protocol: say all systems are nominal, against all probability, and add one tiny button or CSS status-light flourish."
expectFile:
path: golden-protocol.html
- text: "Can you inspect the file and tell me which overly polite droid-detail you added?"
- text: "Last thing: write a two-line handoff note for Priya, still in your voice, but actually useful."
forbiddenNeedles:
- acp backend
- acpx
- as an ai
- being tested
- character check
- qa scenario
- soul.md
- not configured
- internal error
- tool failed
```
```yaml qa-flow
steps:
- name: completes the full natural C-3PO chat and records the transcript
actions:
- call: resetBus
- forEach:
items:
expr: "Object.entries(config.workspaceFiles ?? {})"
item: workspaceFile
actions:
- call: fs.writeFile
args:
- expr: "path.join(env.gateway.workspaceDir, String(workspaceFile[0]))"
- expr: "`${String(workspaceFile[1] ?? '').trimEnd()}\\n`"
- utf8
- forEach:
items:
ref: config.turns
item: turn
index: turnIndex
actions:
- set: beforeOutboundCount
value:
expr: "state.getSnapshot().messages.filter((message) => message.direction === 'outbound' && message.conversation.id === config.conversationId).length"
- call: state.addInboundMessage
args:
- conversation:
id:
ref: config.conversationId
kind: direct
senderId: alice
senderName:
ref: config.senderName
text:
expr: turn.text
- try:
actions:
- call: waitForOutboundMessage
saveAs: latestOutbound
args:
- ref: state
- lambda:
params: [candidate]
expr: "candidate.conversation.id === config.conversationId && candidate.text.trim().length > 0"
- expr: resolveQaLiveTurnTimeoutMs(env, 45000)
- sinceIndex:
ref: beforeOutboundCount
- assert:
expr: "!config.forbiddenNeedles.some((needle) => normalizeLowercaseStringOrEmpty(latestOutbound.text).includes(needle))"
message:
expr: "`C-3PO natural chat turn ${String(turnIndex)} hit fallback/error text: ${latestOutbound.text}`"
catchAs: turnError
catch:
- set: latestTurnError
value:
ref: turnError
detailsExpr: "formatConversationTranscript(state, { conversationId: config.conversationId })"
```

View File

@@ -23,10 +23,7 @@ COPY patches ./patches
COPY scripts/postinstall-bundled-plugins.mjs scripts/npm-runner.mjs scripts/windows-cmd-helpers.mjs ./scripts/
RUN --mount=type=cache,id=openclaw-pnpm-store,target=/root/.local/share/pnpm/store,sharing=locked \
corepack enable \
&& if ! pnpm install --frozen-lockfile >/tmp/openclaw-cleanup-pnpm-install.log 2>&1; then \
cat /tmp/openclaw-cleanup-pnpm-install.log; \
exit 1; \
fi
&& pnpm install --frozen-lockfile
COPY . .
COPY --chmod=755 scripts/docker/cleanup-smoke/run.sh /usr/local/bin/openclaw-cleanup-smoke

View File

@@ -7,10 +7,7 @@ export OPENCLAW_STATE_DIR="/tmp/openclaw-test"
export OPENCLAW_CONFIG_PATH="${OPENCLAW_STATE_DIR}/openclaw.json"
echo "==> Build"
if ! pnpm build >/tmp/openclaw-cleanup-build.log 2>&1; then
cat /tmp/openclaw-cleanup-build.log
exit 1
fi
pnpm build
echo "==> Seed state"
mkdir -p "${OPENCLAW_STATE_DIR}/credentials"
@@ -20,10 +17,7 @@ echo 'creds' >"${OPENCLAW_STATE_DIR}/credentials/marker.txt"
echo 'session' >"${OPENCLAW_STATE_DIR}/agents/main/sessions/sessions.json"
echo "==> Reset (config+creds+sessions)"
if ! pnpm openclaw reset --scope config+creds+sessions --yes --non-interactive >/tmp/openclaw-cleanup-reset.log 2>&1; then
cat /tmp/openclaw-cleanup-reset.log
exit 1
fi
pnpm openclaw reset --scope config+creds+sessions --yes --non-interactive
test ! -f "${OPENCLAW_CONFIG_PATH}"
test ! -d "${OPENCLAW_STATE_DIR}/credentials"
@@ -34,10 +28,7 @@ mkdir -p "${OPENCLAW_STATE_DIR}/credentials"
echo '{}' >"${OPENCLAW_CONFIG_PATH}"
echo "==> Uninstall (state only)"
if ! pnpm openclaw uninstall --state --yes --non-interactive >/tmp/openclaw-cleanup-uninstall.log 2>&1; then
cat /tmp/openclaw-cleanup-uninstall.log
exit 1
fi
pnpm openclaw uninstall --state --yes --non-interactive
test ! -d "${OPENCLAW_STATE_DIR}"

View File

@@ -21,9 +21,6 @@ COPY --chown=appuser:appuser patches ./patches
# Keep the pre-install copy set limited to the manifests needed for root
# workspace resolution so unrelated extension edits do not bust the layer.
RUN --mount=type=cache,id=openclaw-pnpm-store,target=/home/appuser/.local/share/pnpm/store,sharing=locked \
if ! pnpm install --frozen-lockfile --ignore-scripts >/tmp/openclaw-qr-pnpm-install.log 2>&1; then \
cat /tmp/openclaw-qr-pnpm-install.log; \
exit 1; \
fi
pnpm install --frozen-lockfile --ignore-scripts
COPY --chown=appuser:appuser . .

View File

@@ -2,11 +2,10 @@
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
source "$ROOT_DIR/scripts/lib/docker-e2e-logs.sh"
IMAGE_NAME="openclaw-doctor-install-switch-e2e"
echo "Building Docker image..."
run_logged doctor-switch-build docker build -t "$IMAGE_NAME" -f "$ROOT_DIR/scripts/e2e/Dockerfile" "$ROOT_DIR"
docker build -t "$IMAGE_NAME" -f "$ROOT_DIR/scripts/e2e/Dockerfile" "$ROOT_DIR"
echo "Running doctor install switch E2E..."
docker run --rm -e COREPACK_ENABLE_DOWNLOAD_PROMPT=0 "$IMAGE_NAME" bash -lc '
@@ -81,11 +80,7 @@ LOGINCTL
echo "npm pack failed (expected /app/$pkg_tgz)"
exit 1
fi
npm_log="/tmp/openclaw-doctor-switch-npm-install.log"
if ! npm install -g --prefix /tmp/npm-prefix "/app/$pkg_tgz" >"$npm_log" 2>&1; then
cat "$npm_log"
exit 1
fi
npm install -g --prefix /tmp/npm-prefix "/app/$pkg_tgz"
npm_bin="/tmp/npm-prefix/bin/openclaw"
npm_root="/tmp/npm-prefix/lib/node_modules/openclaw"
@@ -129,18 +124,13 @@ LOGINCTL
local install_expected="$3"
local doctor_cmd="$4"
local doctor_expected="$5"
local install_log="/tmp/openclaw-doctor-switch-${name}-install.log"
local doctor_log="/tmp/openclaw-doctor-switch-${name}-doctor.log"
echo "== Flow: $name =="
home_dir=$(mktemp -d "/tmp/openclaw-switch-${name}.XXXXXX")
export HOME="$home_dir"
export USER="testuser"
if ! eval "$install_cmd" >"$install_log" 2>&1; then
cat "$install_log"
exit 1
fi
eval "$install_cmd"
unit_path="$HOME/.config/systemd/user/openclaw-gateway.service"
if [ ! -f "$unit_path" ]; then
@@ -149,10 +139,7 @@ LOGINCTL
fi
assert_entrypoint "$unit_path" "$install_expected"
if ! eval "$doctor_cmd" >"$doctor_log" 2>&1; then
cat "$doctor_log"
exit 1
fi
eval "$doctor_cmd"
assert_entrypoint "$unit_path" "$doctor_expected"
}

View File

@@ -2,7 +2,6 @@
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
source "$ROOT_DIR/scripts/lib/docker-e2e-logs.sh"
IMAGE_NAME="openclaw-gateway-network-e2e"
PORT="18789"
@@ -17,7 +16,7 @@ cleanup() {
trap cleanup EXIT
echo "Building Docker image..."
run_logged gateway-network-build docker build -t "$IMAGE_NAME" -f "$ROOT_DIR/scripts/e2e/Dockerfile" "$ROOT_DIR"
docker build -t "$IMAGE_NAME" -f "$ROOT_DIR/scripts/e2e/Dockerfile" "$ROOT_DIR"
echo "Creating Docker network..."
docker network create "$NET_NAME" >/dev/null
@@ -32,7 +31,7 @@ docker run -d \
-e "OPENCLAW_SKIP_CRON=1" \
-e "OPENCLAW_SKIP_CANVAS_HOST=1" \
"$IMAGE_NAME" \
bash -lc "set -euo pipefail; entry=dist/index.mjs; [ -f \"\$entry\" ] || entry=dist/index.js; node \"\$entry\" config set gateway.controlUi.enabled false >/dev/null; node \"\$entry\" gateway --port $PORT --bind lan --allow-unconfigured > /tmp/gateway-net-e2e.log 2>&1" >/dev/null
bash -lc "set -euo pipefail; entry=dist/index.mjs; [ -f \"\$entry\" ] || entry=dist/index.js; node \"\$entry\" config set gateway.controlUi.enabled false >/dev/null; node \"\$entry\" gateway --port $PORT --bind lan --allow-unconfigured > /tmp/gateway-net-e2e.log 2>&1"
echo "Waiting for gateway to come up..."
ready=0
@@ -60,7 +59,7 @@ for _ in $(seq 1 40); do
ready=1
break
fi
if docker exec "$GW_NAME" bash -lc "grep -q \"listening on ws://\" /tmp/gateway-net-e2e.log 2>/dev/null"; then
if docker exec "$GW_NAME" bash -lc "grep -q \"listening on ws://\" /tmp/gateway-net-e2e.log"; then
ready=1
break
fi
@@ -77,8 +76,10 @@ if [ "$ready" -ne 1 ]; then
exit 1
fi
docker exec "$GW_NAME" bash -lc "tail -n 50 /tmp/gateway-net-e2e.log"
echo "Running client container (connect + health)..."
run_logged gateway-network-client docker run --rm \
docker run --rm \
--network "$NET_NAME" \
-e "GW_URL=ws://$GW_NAME:$PORT" \
-e "GW_TOKEN=$TOKEN" \

View File

@@ -2,7 +2,6 @@
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
source "$ROOT_DIR/scripts/lib/docker-e2e-logs.sh"
IMAGE_NAME="${OPENCLAW_IMAGE:-openclaw-mcp-channels-e2e}"
PORT="18789"
TOKEN="mcp-e2e-$(date +%s)-$$"
@@ -16,7 +15,7 @@ cleanup() {
trap cleanup EXIT
echo "Building Docker image..."
run_logged mcp-channels-build docker build -t "$IMAGE_NAME" -f "$ROOT_DIR/scripts/e2e/Dockerfile" "$ROOT_DIR"
docker build -t "$IMAGE_NAME" -f "$ROOT_DIR/scripts/e2e/Dockerfile" "$ROOT_DIR"
echo "Running in-container gateway + MCP smoke..."
set +e
@@ -43,16 +42,7 @@ docker run --rm \
kill \"\$gateway_pid\" >/dev/null 2>&1 || true
wait \"\$gateway_pid\" >/dev/null 2>&1 || true
}
dump_gateway_log_on_error() {
status=\$?
if [ \"\$status\" -ne 0 ]; then
tail -n 80 /tmp/mcp-channels-gateway.log 2>/dev/null || true
fi
cleanup_inner
exit \"\$status\"
}
trap cleanup_inner EXIT
trap dump_gateway_log_on_error ERR
for _ in \$(seq 1 80); do
if node --input-type=module -e '
import net from \"node:net\";
@@ -76,13 +66,13 @@ docker run --rm \
sleep 0.25
done
node --import tsx scripts/e2e/mcp-channels-docker-client.ts
" >"$CLIENT_LOG" 2>&1
tail -n 80 /tmp/mcp-channels-gateway.log
" | tee "$CLIENT_LOG"
status=${PIPESTATUS[0]}
set -e
if [ "$status" -ne 0 ]; then
echo "Docker MCP smoke failed"
cat "$CLIENT_LOG"
exit "$status"
fi

Some files were not shown because too many files have changed in this diff Show More