diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b9d8d44d8101..c9a4b5365f6f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1151,6 +1151,7 @@ jobs: OPENCLAW_NODE_TEST_CONFIGS_JSON: ${{ toJson(matrix.configs) }} OPENCLAW_NODE_TEST_INCLUDE_PATTERNS_JSON: ${{ toJson(matrix.includePatterns) }} OPENCLAW_VITEST_SHARD_NAME: ${{ matrix.shard_name }} + OPENCLAW_VITEST_NO_OUTPUT_TIMEOUT_MS: "900000" OPENCLAW_TEST_PROJECTS_PARALLEL: "2" shell: bash run: | diff --git a/docs/reference/code-mode.md b/docs/reference/code-mode.md index d126f4b49f0e..5d25b51196cb 100644 --- a/docs/reference/code-mode.md +++ b/docs/reference/code-mode.md @@ -6,6 +6,7 @@ read_when: - You want to enable OpenClaw code mode for an agent run - You need to explain why code mode is different from Codex Code mode - You are reviewing the exec/wait contract, QuickJS-WASI sandbox, TypeScript transform, or hidden tool-catalog bridge + - You are adding or reviewing an internal code-mode namespace registry integration --- Code mode is an experimental OpenClaw agent-runtime feature. It is off by @@ -380,6 +381,7 @@ The guest runtime exposes a small global API: ```typescript declare const ALL_TOOLS: ToolCatalogEntry[]; declare const tools: ToolCatalog; +declare const namespaces: Record; declare function text(value: unknown): void; declare function json(value: unknown): void; @@ -433,6 +435,189 @@ const hits = await tools.web_search({ query: "OpenClaw code mode" }); The guest runtime must not expose host objects directly. Inputs and outputs cross the bridge as JSON-compatible values with explicit size caps. +## Internal namespaces + +Internal namespaces give code mode a concise domain API without adding more +model-visible tools. A loader-owned integration can register a namespace such +as `Issues`, `Fictions`, or `Calendar`; guest code then calls that namespace +inside the QuickJS program while OpenClaw still shows only `exec` and `wait` to +the model. + +Namespaces are internal for now. There is no public plugin SDK namespace API: +external plugin namespaces need a loader-owned contract so plugin identity, +installed manifests, auth state, and cached catalog descriptors cannot drift +from the plugin tools that back the namespace. Core code mode owns only the +sandbox, serialization, catalog gating, and bridge dispatch. + +Guest code can then use either the direct global or the `namespaces` map: + +```javascript +const open = await Issues.list({ state: "open" }); +const alsoOpen = await namespaces.Issues.list({ state: "open" }); +return { count: open.length, alsoCount: alsoOpen.length }; +``` + +### Registry lifecycle + +The namespace registry is process-local and keyed by namespace id. A typical +run follows this path: + +1. A trusted loader calls `registerCodeModeNamespaceForPlugin(pluginId, registration)`. +2. Code mode creates the hidden `ToolSearchRuntime` for the run and reads its + run-scoped catalog. +3. `createCodeModeNamespaceRuntime(ctx, catalog)` keeps only registrations + whose `requiredToolNames` are all visible and owned by the same `pluginId`. +4. Each visible namespace calls `createScope(ctx)` for the current run. The + scope receives run context such as `agentId`, `sessionKey`, `sessionId`, + `runId`, config, and abort state. +5. Scope data is serialized into a plain descriptor and injected into QuickJS as + direct globals and `namespaces.`. +6. Guest calls suspend through the worker bridge, resolve the namespace path on + the host, map the call to a declared plugin-owned catalog tool, and execute + that tool through `ToolSearchRuntime.call`. +7. `wait` resumes the same namespace runtime when a code-mode run suspended on + nested tool work. +8. Plugin rollback or uninstall calls `clearCodeModeNamespacesForPlugin(pluginId)` + so stale globals do not survive a failed plugin load. + +The important invariant: namespace calls are catalog tool calls. They use the +same policy hooks, approvals, abort handling, telemetry, transcript projection, +and suspend/resume behavior as `tools.call(...)`. + +### Registration shape + +Register namespaces from the integration that owns the backing tools. Keep the +scope small and only expose domain verbs that map to declared catalog tools. + +```typescript +import { + createCodeModeNamespaceTool, + registerCodeModeNamespaceForPlugin, +} from "../agents/code-mode-namespaces.js"; + +const pluginId = "github"; + +registerCodeModeNamespaceForPlugin(pluginId, { + id: "github-issues", + globalName: "Issues", + description: "GitHub issue helpers for the current repository.", + requiredToolNames: ["github_list_issues", "github_update_issue"], + prompt: "Use Issues.list(params) and Issues.update(number, patch).", + createScope: (ctx) => ({ + repository: ctx.config, + list: createCodeModeNamespaceTool("github_list_issues", ([params]) => params ?? {}), + update: createCodeModeNamespaceTool("github_update_issue", ([number, patch]) => ({ + number, + patch, + })), + }), +}); +``` + +`createCodeModeNamespaceTool(toolName, inputMapper)` marks a scope member as a +callable namespace function. The optional `inputMapper` receives the guest +arguments and returns the input object for the backing catalog tool. Without an +input mapper, the first guest argument is used, or `{}` when omitted. + +Raw host functions are rejected before guest code runs: + +```typescript +createScope: () => ({ + // Wrong: this bypasses the catalog tool lifecycle and will be rejected. + list: async () => githubClient.listIssues(), +}); +``` + +### Ownership and visibility + +Namespace ownership is bound to the registration caller's `pluginId`. +`requiredToolNames` is both a visibility gate and an ownership check: + +- every required tool must exist in the run catalog +- every required tool must have `sourceName === pluginId` +- the namespace is hidden when any required tool is absent or owned by another + plugin +- each callable path may target only a tool named in `requiredToolNames` + +This prevents another plugin from exposing a namespace by registering a +same-named tool. It also keeps namespaces aligned with ordinary agent policy: +if the run cannot see the backing tools, it cannot see the namespace. + +For example, a GitHub namespace should live behind a GitHub-owned extension that +owns GitHub auth, REST or GraphQL clients, rate limits, write approvals, and +tests. Core code mode should not embed GitHub-specific APIs, token handling, or +provider policy. + +### Scope serialization rules + +`createScope(ctx)` may return a plain object containing JSON-compatible values, +arrays, nested objects, and `createCodeModeNamespaceTool(...)` call markers. +Host objects never enter QuickJS directly. + +The serializer rejects: + +- raw functions +- circular object graphs +- unsafe path segments: `__proto__`, `constructor`, `prototype`, empty keys, or + keys containing the internal path separator +- `globalName` values that are not JavaScript identifiers +- `globalName` collisions with built-in code-mode globals such as `tools`, + `namespaces`, `text`, `json`, `yield_control`, or `__openclaw*` + +Values that cannot be JSON-serialized are converted to JSON-safe fallback +values before crossing the bridge. Binary data, handles, sockets, clients, and +class instances should stay behind ordinary catalog tools. + +### Prompts + +The namespace `description` and optional `prompt` are appended to the model +visible `exec` schema only when the namespace is visible for that run. Use them +to teach the smallest useful surface: + +```typescript +{ + description: "Fiction production service helpers.", + prompt: + "Use Fictions.riskAudit(), Fictions.promoteIfReady(id, status), and Fictions.unpaidOver(amount).", +} +``` + +Keep prompts about the namespace contract, not auth setup, implementation +history, or unrelated plugin behavior. + +### Cleanup + +Namespaces are process-local registrations. Remove them when the owning plugin +is disabled, uninstalled, or rolled back: + +```typescript +clearCodeModeNamespacesForPlugin(pluginId); +``` + +Use `unregisterCodeModeNamespace(namespaceId)` only when removing one known +namespace. Tests can call `clearCodeModeNamespacesForTest()` to avoid leaking +registrations across cases. + +### Test checklist + +Namespace changes should cover the security boundary and the guest behavior: + +- namespace prompt text appears only when backing tools are visible +- same-named tools from another `sourceName` do not expose the namespace +- raw scope functions are rejected +- forged namespace ids and forged paths are rejected +- callable paths cannot target undeclared tools +- nested objects and shared references serialize correctly +- namespace calls execute through catalog tools and return JSON-safe details +- failures can be caught by guest code +- suspended namespace calls resume through `wait` +- plugin rollback clears the owning namespace registrations + +Namespaces complement the generic `tools.search` / `tools.call` catalog. Use +the catalog for arbitrary enabled tools; use namespaces for plugin-owned, +documented domain APIs where concise code is more reliable than repeated schema +lookups. + ## Output API `text(value)` appends human-readable output to the `output` array. diff --git a/scripts/repro/code-mode-namespace-live-docker.sh b/scripts/repro/code-mode-namespace-live-docker.sh new file mode 100755 index 000000000000..20a35e4f763f --- /dev/null +++ b/scripts/repro/code-mode-namespace-live-docker.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +ROOT_DIR="${OPENCLAW_LIVE_DOCKER_REPO_ROOT:-$SCRIPT_ROOT_DIR}" +ROOT_DIR="$(cd "$ROOT_DIR" && pwd)" +TRUSTED_HARNESS_DIR="${OPENCLAW_LIVE_DOCKER_TRUSTED_HARNESS_DIR:-$SCRIPT_ROOT_DIR}" +TRUSTED_HARNESS_DIR="$(cd "$TRUSTED_HARNESS_DIR" && pwd)" +source "$TRUSTED_HARNESS_DIR/scripts/lib/docker-e2e-image.sh" + +IMAGE_NAME="$(docker_e2e_resolve_image "openclaw-code-mode-namespace-live-e2e" OPENCLAW_CODE_MODE_NAMESPACE_LIVE_E2E_IMAGE)" +SKIP_BUILD="${OPENCLAW_CODE_MODE_NAMESPACE_LIVE_E2E_SKIP_BUILD:-0}" +PROFILE_FILE="${OPENCLAW_CODE_MODE_NAMESPACE_LIVE_PROFILE_FILE:-${OPENCLAW_TESTBOX_PROFILE_FILE:-$HOME/.openclaw-testbox-live.profile}}" +run_log="" +if [ ! -f "$PROFILE_FILE" ] && [ -f "$HOME/.profile" ]; then + PROFILE_FILE="$HOME/.profile" +fi + +cleanup() { + if [ -n "${run_log:-}" ]; then + rm -f "$run_log" + fi +} +trap cleanup EXIT + +docker_e2e_build_or_reuse "$IMAGE_NAME" code-mode-namespace-live "$ROOT_DIR/scripts/e2e/Dockerfile" "$ROOT_DIR" "" "$SKIP_BUILD" + +PROFILE_MOUNT=() +PROFILE_STATUS="none" +if [ -f "$PROFILE_FILE" ] && [ -r "$PROFILE_FILE" ]; then + set -a + # shellcheck disable=SC1090 + source "$PROFILE_FILE" + set +a + PROFILE_MOUNT=(-v "$PROFILE_FILE":/home/appuser/.profile:ro) + PROFILE_STATUS="$PROFILE_FILE" +fi + +echo "Running code mode namespace live Docker E2E..." +echo "Profile file: $PROFILE_STATUS" +run_log="$(docker_e2e_run_log code-mode-namespace-live)" +if ! docker_e2e_run_with_harness \ + --user root \ + -e COREPACK_ENABLE_DOWNLOAD_PROMPT=0 \ + -e OPENAI_API_KEY \ + -e OPENAI_BASE_URL \ + -e "OPENCLAW_CODE_MODE_LIVE_MODEL=${OPENCLAW_CODE_MODE_LIVE_MODEL:-gpt-5.4-mini}" \ + -e "OPENCLAW_CODE_MODE_LIVE_TASKS=${OPENCLAW_CODE_MODE_LIVE_TASKS:-3}" \ + -v "$ROOT_DIR":/src:ro \ + "${PROFILE_MOUNT[@]}" \ + "$IMAGE_NAME" \ + bash /src/scripts/repro/code-mode-namespace-live-scenario.sh >"$run_log" 2>&1; then + docker_e2e_print_log "$run_log" + exit 1 +fi + +docker_e2e_print_log "$run_log" +echo "Code mode namespace live Docker E2E passed" diff --git a/scripts/repro/code-mode-namespace-live-scenario.sh b/scripts/repro/code-mode-namespace-live-scenario.sh new file mode 100755 index 000000000000..3c21e3dfaa33 --- /dev/null +++ b/scripts/repro/code-mode-namespace-live-scenario.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +set -euo pipefail + +source scripts/lib/live-docker-stage.sh + +for profile_path in "$HOME/.profile" /home/appuser/.profile; do + if [ -f "$profile_path" ] && [ -r "$profile_path" ]; then + set +e +u + # shellcheck disable=SC1090 + source "$profile_path" + set -euo pipefail + break + fi +done + +if [ -z "${OPENAI_API_KEY:-}" ]; then + echo "ERROR: OPENAI_API_KEY is required for the code mode namespace live Docker test." >&2 + exit 1 +fi +export OPENAI_API_KEY +if [ -n "${OPENAI_BASE_URL:-}" ]; then + export OPENAI_BASE_URL +fi + +tmp_dir="$(mktemp -d)" +cleanup() { + rm -rf "$tmp_dir" +} +trap cleanup EXIT + +openclaw_live_stage_source_tree "$tmp_dir" +openclaw_live_stage_node_modules "$tmp_dir" +openclaw_live_link_runtime_tree "$tmp_dir" + +cd "$tmp_dir" +tsx scripts/repro/code-mode-namespace-live.ts diff --git a/scripts/repro/code-mode-namespace-live.ts b/scripts/repro/code-mode-namespace-live.ts new file mode 100755 index 000000000000..62439f00f3d2 --- /dev/null +++ b/scripts/repro/code-mode-namespace-live.ts @@ -0,0 +1,644 @@ +#!/usr/bin/env -S node --import tsx +import { performance } from "node:perf_hooks"; +import { Type } from "typebox"; +import type { Model } from "../../packages/agent-core/src/llm.js"; +import type { AgentEvent, AgentTool } from "../../packages/agent-core/src/types.js"; +import { + clearCodeModeNamespacesForPlugin, + createCodeModeNamespaceTool, + registerCodeModeNamespaceForPlugin, +} from "../../src/agents/code-mode-namespaces.js"; +import { applyCodeModeCatalog, createCodeModeTools } from "../../src/agents/code-mode.js"; +import { Agent } from "../../src/agents/runtime/index.js"; +import { createToolSearchCatalogRef } from "../../src/agents/tool-search.js"; +import { jsonResult, type AnyAgentTool } from "../../src/agents/tools/common.js"; +import { setPluginToolMeta } from "../../src/plugins/tools.js"; + +type Mode = "regular" | "code-catalog" | "code-namespace"; + +type FictionTitle = { + id: string; + title: string; + lead: string; + status: string; + riskScore: number; + dependencies: Array<{ id: string; cleared: boolean }>; +}; + +type FictionScene = { + id: string; + titleId: string; + pages: number; + blocked: boolean; +}; + +type FictionDefect = { + id: string; + titleId: string; + sceneId: string; + state: "open" | "closed"; +}; + +type FictionInvoice = { + id: string; + titleId: string; + author: string; + amount: number; + paid: boolean; +}; + +type FictionServiceState = { + titles: FictionTitle[]; + scenes: FictionScene[]; + defects: FictionDefect[]; + invoices: FictionInvoice[]; +}; + +type FictionService = ReturnType; + +type Task = { + id: string; + prompt: string; + validate(answer: unknown, service: FictionService): { ok: boolean; reason?: string }; +}; + +type RunMetrics = { + mode: Mode; + task: string; + ok: boolean; + reason?: string; + latencyMs: number; + modelTurns: number; + assistantMessages: number; + topLevelToolCalls: number; + serviceCalls: number; + finalText: string; + stopReason?: string; + errorMessage?: string; + toolResults?: unknown[]; +}; + +const PLUGIN_ID = "fictions-live"; + +function cloneState(): FictionServiceState { + return { + titles: [ + { + id: "PX-73", + title: "The Glass Orchard", + lead: "Mira Vale", + status: "draft", + riskScore: 77, + dependencies: [ + { id: "outline", cleared: true }, + { id: "rights", cleared: true }, + ], + }, + { + id: "NM-12", + title: "Night Market of Moons", + lead: "Oren Quill", + status: "revision", + riskScore: 91, + dependencies: [ + { id: "continuity", cleared: true }, + { id: "copyedit", cleared: false }, + ], + }, + { + id: "RS-40", + title: "River Static", + lead: "Nia Rowan", + status: "locked", + riskScore: 54, + dependencies: [{ id: "legal", cleared: true }], + }, + ], + scenes: [ + { id: "PX-73-S1", titleId: "PX-73", pages: 32, blocked: false }, + { id: "PX-73-S2", titleId: "PX-73", pages: 28, blocked: false }, + { id: "PX-73-S3", titleId: "PX-73", pages: 36, blocked: false }, + { id: "NM-12-S1", titleId: "NM-12", pages: 44, blocked: false }, + { id: "NM-12-S2", titleId: "NM-12", pages: 39, blocked: true }, + { id: "RS-40-S1", titleId: "RS-40", pages: 51, blocked: false }, + ], + defects: [ + { id: "D-101", titleId: "NM-12", sceneId: "NM-12-S1", state: "open" }, + { id: "D-102", titleId: "NM-12", sceneId: "NM-12-S2", state: "open" }, + { id: "D-103", titleId: "NM-12", sceneId: "NM-12-S2", state: "open" }, + { id: "D-104", titleId: "PX-73", sceneId: "PX-73-S3", state: "closed" }, + { id: "D-105", titleId: "RS-40", sceneId: "RS-40-S1", state: "open" }, + ], + invoices: [ + { id: "I-200", titleId: "PX-73", author: "Mira Vale", amount: 4200, paid: false }, + { id: "I-201", titleId: "NM-12", author: "Oren Quill", amount: 6100, paid: false }, + { id: "I-202", titleId: "RS-40", author: "Nia Rowan", amount: 3700, paid: true }, + ], + }; +} + +function createFictionService() { + const state = cloneState(); + let calls = 0; + const note = () => { + calls += 1; + }; + const title = (id: string) => state.titles.find((entry) => entry.id === id); + return { + get calls() { + return calls; + }, + snapshot() { + note(); + return structuredClone(state); + }, + listTitles() { + note(); + return structuredClone(state.titles); + }, + getTitle(id: string) { + note(); + return title(id) ?? null; + }, + listScenes(titleId?: string) { + note(); + return structuredClone(state.scenes.filter((entry) => !titleId || entry.titleId === titleId)); + }, + listDefects(titleId?: string) { + note(); + return state.defects + .filter((entry) => !titleId || entry.titleId === titleId) + .map((entry) => structuredClone(entry)); + }, + listInvoices(author?: string) { + note(); + return structuredClone(state.invoices.filter((entry) => !author || entry.author === author)); + }, + updateStatus(id: string, status: string) { + note(); + const entry = title(id); + if (!entry) { + return { ok: false, error: "unknown title", id }; + } + entry.status = status; + return { ok: true, id, status }; + }, + currentStatus(id: string) { + return title(id)?.status; + }, + }; +} + +function stringParam(params: Record, key: string): string { + const value = params[key]; + return typeof value === "string" ? value : ""; +} + +function makeTool( + name: string, + description: string, + properties: Record, + execute: (params: Record) => unknown, +): AnyAgentTool { + const tool = { + name, + label: name, + description, + parameters: Type.Object(properties), + execute: async (_toolCallId: string, params: unknown) => + jsonResult( + execute((params && typeof params === "object" ? params : {}) as Record), + ), + } satisfies AnyAgentTool; + setPluginToolMeta(tool, { pluginId: PLUGIN_ID, optional: true }); + return tool; +} + +function createFictionTools(service: FictionService): AnyAgentTool[] { + return [ + makeTool("fictions_list_titles", "List fiction titles with status and risk.", {}, () => + service.listTitles(), + ), + makeTool( + "fictions_get_title", + "Get one fiction title by id.", + { id: Type.String() }, + (params) => service.getTitle(stringParam(params, "id")), + ), + makeTool( + "fictions_list_scenes", + "List scenes, optionally filtered by title id.", + { titleId: Type.Optional(Type.String()) }, + (params) => + service.listScenes(typeof params.titleId === "string" ? params.titleId : undefined), + ), + makeTool( + "fictions_list_defects", + "List defects, optionally filtered by title id.", + { titleId: Type.Optional(Type.String()) }, + (params) => + service.listDefects(typeof params.titleId === "string" ? params.titleId : undefined), + ), + makeTool( + "fictions_list_invoices", + "List invoices, optionally filtered by author.", + { author: Type.Optional(Type.String()) }, + (params) => + service.listInvoices(typeof params.author === "string" ? params.author : undefined), + ), + makeTool( + "fictions_update_status", + "Update a fiction title status.", + { id: Type.String(), status: Type.String() }, + (params) => service.updateStatus(stringParam(params, "id"), stringParam(params, "status")), + ), + ]; +} + +function createFictionNamespaceTools(service: FictionService): AnyAgentTool[] { + return [ + makeTool("fictions_snapshot", "Return the complete fiction production snapshot.", {}, () => + service.snapshot(), + ), + makeTool("fictions_risk_audit", "Return highest-risk title audit.", {}, () => { + const data = service.snapshot(); + const highest = data.titles.toSorted((a, b) => b.riskScore - a.riskScore)[0]; + if (!highest) { + return null; + } + return { + task: "risk-audit", + id: highest.id, + lead: highest.lead, + status: highest.status, + unresolvedDefects: data.defects.filter( + (defect) => defect.titleId === highest.id && defect.state === "open", + ).length, + blockedScenes: data.scenes + .filter((scene) => scene.titleId === highest.id && scene.blocked) + .map((scene) => scene.id), + }; + }), + makeTool( + "fictions_promote_if_ready", + "Promote a title if dependencies and page count allow it.", + { id: Type.String(), status: Type.String() }, + (params) => { + const id = stringParam(params, "id"); + const status = stringParam(params, "status"); + const data = service.snapshot(); + const title = data.titles.find((entry) => entry.id === id); + const scenes = data.scenes.filter((scene) => scene.titleId === id); + const totalPages = scenes.reduce((sum, scene) => sum + scene.pages, 0); + const dependenciesCleared = + title?.dependencies.every((dependency) => dependency.cleared) ?? false; + if (!title || totalPages >= 110 || !dependenciesCleared) { + return { + task: "promote", + id, + action: "blocked", + totalPages, + finalStatus: title?.status ?? null, + }; + } + const updated = service.updateStatus(id, status); + return { + task: "promote", + id, + action: updated.ok ? "updated" : "blocked", + totalPages, + finalStatus: service.currentStatus(id) ?? null, + }; + }, + ), + makeTool( + "fictions_unpaid_over", + "Return unpaid invoices over a numeric threshold.", + { amount: Type.Number() }, + (params) => { + const amount = typeof params.amount === "number" ? params.amount : 0; + const data = service.snapshot(); + const invoices = data.invoices.filter( + (invoice) => !invoice.paid && invoice.amount > amount, + ); + return { + task: "invoice", + invoiceIds: invoices.map((invoice) => invoice.id), + totalUnpaidOver5000: invoices.reduce((sum, invoice) => sum + invoice.amount, 0), + }; + }, + ), + ]; +} + +function registerFictionNamespace(): void { + clearCodeModeNamespacesForPlugin(PLUGIN_ID); + registerCodeModeNamespaceForPlugin(PLUGIN_ID, { + id: "fictions", + globalName: "Fictions", + description: "Fiction production service helpers.", + requiredToolNames: [ + "fictions_promote_if_ready", + "fictions_risk_audit", + "fictions_snapshot", + "fictions_unpaid_over", + ], + prompt: + "Use Fictions.riskAudit(), Fictions.promoteIfReady(id, status), Fictions.unpaidOver(amount), and Fictions.snapshot().", + createScope: () => ({ + snapshot: createCodeModeNamespaceTool("fictions_snapshot"), + riskAudit: createCodeModeNamespaceTool("fictions_risk_audit"), + promoteIfReady: createCodeModeNamespaceTool("fictions_promote_if_ready", ([id, status]) => ({ + id: typeof id === "string" ? id : "", + status: typeof status === "string" ? status : "", + })), + unpaidOver: createCodeModeNamespaceTool("fictions_unpaid_over", ([amount]) => ({ + amount: typeof amount === "number" ? amount : 0, + })), + }), + }); +} + +function createModel(modelId: string): Model<"openai-responses"> { + const baseUrl = process.env.OPENAI_BASE_URL?.trim() || "https://api.openai.com/v1"; + return { + id: modelId, + name: modelId, + api: "openai-responses", + provider: "openai", + baseUrl, + reasoning: modelId.startsWith("gpt-5"), + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 400_000, + maxTokens: 128_000, + }; +} + +function systemPromptForMode(mode: Mode): string { + const base = + "You are testing a fiction production service. Use the available tools, never invent data, and return only one minified JSON object. No markdown."; + if (mode === "regular") { + return `${base} Use the Fictions tools directly.`; + } + if (mode === "code-catalog") { + return `${base} Use code_mode_exec with JavaScript and always return the final JSON object from the code. In code, call direct tool helpers such as await tools.fictions_list_titles({}), await tools.fictions_list_scenes({titleId:"PX-73"}), await tools.fictions_list_defects({titleId:"NM-12"}), await tools.fictions_list_invoices({}), and await tools.fictions_update_status({id:"PX-73",status:"preproduction"}). Call code_mode_wait until the code result is completed, then return that completed value as your final answer.`; + } + return `${base} Use code_mode_exec with JavaScript and always return the final JSON object from the code. In code, prefer the namespace helpers: return await Fictions.riskAudit(); return await Fictions.promoteIfReady("PX-73","preproduction"); return await Fictions.unpaidOver(5000). Call code_mode_wait until the code result is completed, then return that completed value as your final answer.`; +} + +function toolsForMode(mode: Mode, service: FictionService): AgentTool[] { + const fictionTools = createFictionTools(service); + if (mode === "regular") { + return fictionTools as AgentTool[]; + } + if (mode === "code-namespace") { + registerFictionNamespace(); + } else { + clearCodeModeNamespacesForPlugin(PLUGIN_ID); + } + const config = { + tools: { + codeMode: { + enabled: true, + timeoutMs: 20_000, + maxPendingToolCalls: 32, + }, + }, + }; + const catalogRef = createToolSearchCatalogRef(); + const codeModeTools = createCodeModeTools({ + config, + runtimeConfig: config, + sessionId: `live-${mode}`, + sessionKey: `agent:live-${mode}:main`, + agentId: "live", + runId: `run-${mode}`, + catalogRef, + }); + const catalogTools = + mode === "code-namespace" ? createFictionNamespaceTools(service) : fictionTools; + return applyCodeModeCatalog({ + tools: [...codeModeTools, ...catalogTools], + config, + sessionId: `live-${mode}`, + sessionKey: `agent:live-${mode}:main`, + agentId: "live", + runId: `run-${mode}`, + catalogRef, + }).tools as AgentTool[]; +} + +function textFromMessageContent(content: unknown): string { + if (typeof content === "string") { + return content; + } + if (!Array.isArray(content)) { + return ""; + } + return content + .filter( + (entry) => entry && typeof entry === "object" && (entry as { type?: string }).type === "text", + ) + .map((entry) => (entry as { text?: string }).text ?? "") + .join(""); +} + +function parseFirstJson(text: string): unknown { + const trimmed = text.trim(); + try { + return JSON.parse(trimmed) as unknown; + } catch { + const start = trimmed.indexOf("{"); + const end = trimmed.lastIndexOf("}"); + if (start >= 0 && end > start) { + return JSON.parse(trimmed.slice(start, end + 1)) as unknown; + } + throw new Error("assistant did not return JSON"); + } +} + +function isRecord(value: unknown): value is Record { + return Boolean(value && typeof value === "object" && !Array.isArray(value)); +} + +const tasks: Task[] = [ + { + id: "risk-audit", + prompt: + 'Find the fiction title with the highest riskScore. Return JSON with keys task, id, lead, status, unresolvedDefects, blockedScenes. blockedScenes must be an array of blocked scene ids, not a count. task must be "risk-audit".', + validate(answer) { + if (!isRecord(answer)) { + return { ok: false, reason: "answer is not an object" }; + } + const blockedScenes = Array.isArray(answer.blockedScenes) + ? answer.blockedScenes.map(String).toSorted() + : []; + const unresolvedDefects = Array.isArray(answer.unresolvedDefects) + ? answer.unresolvedDefects.length + : answer.unresolvedDefects; + const ok = + answer.task === "risk-audit" && + answer.id === "NM-12" && + answer.lead === "Oren Quill" && + answer.status === "revision" && + unresolvedDefects === 3 && + JSON.stringify(blockedScenes) === JSON.stringify(["NM-12-S2"]); + return ok ? { ok } : { ok, reason: `unexpected risk audit: ${JSON.stringify(answer)}` }; + }, + }, + { + id: "promote", + prompt: + 'For PX-73, if total scene pages are below 110 and every dependency is cleared, update its status to "preproduction". If a Fictions.promoteIfReady helper exists, use it. Return JSON with keys task, id, action, totalPages, finalStatus. action must be exactly "updated" when the command succeeds. task must be "promote".', + validate(answer, service) { + if (!isRecord(answer)) { + return { ok: false, reason: "answer is not an object" }; + } + const ok = + answer.task === "promote" && + answer.id === "PX-73" && + typeof answer.action === "string" && + answer.action.includes("updated") && + answer.totalPages === 96 && + answer.finalStatus === "preproduction" && + service.currentStatus("PX-73") === "preproduction"; + return ok ? { ok } : { ok, reason: `unexpected promote result: ${JSON.stringify(answer)}` }; + }, + }, + { + id: "invoice", + prompt: + 'For unpaid invoices over 5000, return JSON with keys task, invoiceIds, totalUnpaidOver5000. task must be "invoice".', + validate(answer) { + if (!isRecord(answer)) { + return { ok: false, reason: "answer is not an object" }; + } + const invoiceIds = Array.isArray(answer.invoiceIds) + ? answer.invoiceIds.map(String).toSorted() + : []; + const ok = + answer.task === "invoice" && + JSON.stringify(invoiceIds) === JSON.stringify(["I-201"]) && + answer.totalUnpaidOver5000 === 6100; + return ok ? { ok } : { ok, reason: `unexpected invoice result: ${JSON.stringify(answer)}` }; + }, + }, +]; + +async function runOne(mode: Mode, task: Task, model: string, apiKey: string): Promise { + const service = createFictionService(); + const counts = { + modelTurns: 0, + assistantMessages: 0, + topLevelToolCalls: 0, + }; + const toolResults: unknown[] = []; + const agent = new Agent({ + sessionId: `code-mode-live-${mode}-${task.id}`, + initialState: { + model: createModel(model), + systemPrompt: systemPromptForMode(mode), + tools: toolsForMode(mode, service), + thinkingLevel: "off", + }, + getApiKey: (provider) => (provider === "openai" ? apiKey : undefined), + toolExecution: "parallel", + maxRetryDelayMs: 10_000, + }); + agent.subscribe((event: AgentEvent) => { + if (event.type === "turn_start") { + counts.modelTurns += 1; + } else if (event.type === "message_end" && event.message.role === "assistant") { + counts.assistantMessages += 1; + } else if (event.type === "tool_execution_start") { + counts.topLevelToolCalls += 1; + } else if (event.type === "tool_execution_end") { + toolResults.push(event.result); + } + }); + + const started = performance.now(); + await agent.prompt(task.prompt); + const latencyMs = Math.round(performance.now() - started); + const lastAssistant = agent.state.messages + .toReversed() + .find((message) => message.role === "assistant"); + const finalText = textFromMessageContent(lastAssistant?.content).trim(); + let validation: { ok: boolean; reason?: string }; + try { + validation = task.validate(parseFirstJson(finalText), service); + } catch (error) { + validation = { + ok: false, + reason: error instanceof Error ? error.message : String(error), + }; + } + return { + mode, + task: task.id, + ok: validation.ok, + ...(validation.reason ? { reason: validation.reason } : {}), + latencyMs, + modelTurns: counts.modelTurns, + assistantMessages: counts.assistantMessages, + topLevelToolCalls: counts.topLevelToolCalls, + serviceCalls: service.calls, + finalText, + ...(lastAssistant?.stopReason ? { stopReason: lastAssistant.stopReason } : {}), + ...(lastAssistant?.errorMessage ? { errorMessage: lastAssistant.errorMessage } : {}), + ...(process.env.OPENCLAW_CODE_MODE_LIVE_DEBUG === "1" ? { toolResults } : {}), + }; +} + +function readArg(name: string): string | undefined { + const prefix = `--${name}=`; + const match = process.argv.find((arg) => arg.startsWith(prefix)); + return match?.slice(prefix.length); +} + +async function main() { + const apiKey = process.env.OPENAI_API_KEY?.trim(); + if (!apiKey) { + throw new Error("OPENAI_API_KEY is required"); + } + const model = readArg("model") ?? process.env.OPENCLAW_CODE_MODE_LIVE_MODEL ?? "gpt-5.4-mini"; + const modeArg = readArg("modes"); + const modes = (modeArg ? modeArg.split(",") : ["regular", "code-namespace"]) as Mode[]; + const taskLimit = Number(readArg("tasks") ?? process.env.OPENCLAW_CODE_MODE_LIVE_TASKS ?? "3"); + const selectedTasks = tasks.slice( + 0, + Number.isFinite(taskLimit) && taskLimit > 0 ? taskLimit : tasks.length, + ); + const results: RunMetrics[] = []; + for (const task of selectedTasks) { + for (const mode of modes) { + results.push(await runOne(mode, task, model, apiKey)); + } + } + const summary = { + model, + tasks: selectedTasks.map((task) => task.id), + results, + aggregate: modes.map((mode) => { + const entries = results.filter((entry) => entry.mode === mode); + return { + mode, + ok: entries.filter((entry) => entry.ok).length, + total: entries.length, + latencyMs: entries.reduce((sum, entry) => sum + entry.latencyMs, 0), + modelTurns: entries.reduce((sum, entry) => sum + entry.modelTurns, 0), + topLevelToolCalls: entries.reduce((sum, entry) => sum + entry.topLevelToolCalls, 0), + serviceCalls: entries.reduce((sum, entry) => sum + entry.serviceCalls, 0), + }; + }), + }; + console.log(JSON.stringify(summary, null, 2)); + if (results.some((entry) => !entry.ok)) { + process.exitCode = 1; + } +} + +await main().finally(() => { + clearCodeModeNamespacesForPlugin(PLUGIN_ID); +}); diff --git a/src/agents/agent-bundle-mcp-runtime.test.ts b/src/agents/agent-bundle-mcp-runtime.test.ts index 0829016746eb..b5a75060eb09 100644 --- a/src/agents/agent-bundle-mcp-runtime.test.ts +++ b/src/agents/agent-bundle-mcp-runtime.test.ts @@ -1139,7 +1139,7 @@ process.on("SIGINT", shutdown);`, }); await waitForFileText( logPath, - "reject tools/list method not found", + "recv initialize", LIST_TOOLS_SERVER_LOG_TIMEOUT_MS, ); } finally { diff --git a/src/agents/agent-command.live-model-switch.test.ts b/src/agents/agent-command.live-model-switch.test.ts index 441b505b4b55..a8d16a83a2b1 100644 --- a/src/agents/agent-command.live-model-switch.test.ts +++ b/src/agents/agent-command.live-model-switch.test.ts @@ -1108,14 +1108,14 @@ describe("agentCommand – LiveSessionModelSwitchError retry", () => { }); }); - it("keeps explicit-agent global keys literal before command routing", () => { + it("scopes explicit-agent sentinel store keys before command routing", () => { expect( agentCommandTesting.resolveExplicitAgentCommandSessionKey({ rawExplicitSessionKey: "global", agentIdOverride: "work", cfg: {}, }), - ).toBe("global"); + ).toBe("agent:work:global"); expect( agentCommandTesting.resolveExplicitAgentCommandSessionKey({ rawExplicitSessionKey: "main", diff --git a/src/agents/agent-command.ts b/src/agents/agent-command.ts index e692eb3a62b8..532787e92ce4 100644 --- a/src/agents/agent-command.ts +++ b/src/agents/agent-command.ts @@ -468,6 +468,7 @@ function resolveExplicitAgentCommandSessionKey(params: { }): string | undefined { if ( isUnscopedSessionKeySentinel(params.rawExplicitSessionKey) && + !params.agentIdOverride && !params.shouldScopeDefaultAgentKey ) { return params.rawExplicitSessionKey; diff --git a/src/agents/code-mode-namespaces.ts b/src/agents/code-mode-namespaces.ts new file mode 100644 index 000000000000..a85099741da7 --- /dev/null +++ b/src/agents/code-mode-namespaces.ts @@ -0,0 +1,465 @@ +import { isRecord } from "../shared/record-coerce.js"; + +const FORBIDDEN_NAMESPACE_PATH_SEGMENTS = new Set(["__proto__", "constructor", "prototype"]); +const NAMESPACE_PATH_KEY_SEPARATOR = "\u0000"; +const CODE_MODE_NAMESPACE_TOOL_CALL = Symbol.for("openclaw.codeMode.namespaceToolCall"); +const RESERVED_NAMESPACE_GLOBALS = new Set([ + "ALL_TOOLS", + "Array", + "Boolean", + "Date", + "Error", + "globalThis", + "json", + "JSON", + "Map", + "Math", + "namespaces", + "Number", + "Object", + "Promise", + "Set", + "String", + "text", + "tools", + "yield_control", +]); +const CODE_MODE_NAMESPACE_REGISTRY_KEY = Symbol.for("openclaw.codeMode.namespaces"); + +export type CodeModeNamespaceContext = { + config?: unknown; + runtimeConfig?: unknown; + agentId?: string; + sessionKey?: string; + sessionId?: string; + runId?: string; + catalogRef?: unknown; + abortSignal?: AbortSignal; + executeTool?: unknown; +}; + +export type CodeModeNamespaceScope = Record; + +export type CodeModeNamespaceToolInputMapper = (args: unknown[]) => unknown; + +export type CodeModeNamespaceToolCall = { + readonly [CODE_MODE_NAMESPACE_TOOL_CALL]: true; + readonly toolName: string; + readonly input?: CodeModeNamespaceToolInputMapper; +}; + +export type CodeModeNamespaceRegistration = { + id: string; + globalName: string; + description?: string; + prompt?: string | ((ctx: CodeModeNamespaceContext) => string | undefined); + requiredToolNames: string[]; + createScope( + ctx: CodeModeNamespaceContext, + ): CodeModeNamespaceScope | Promise; +}; + +export type RegisteredCodeModeNamespace = CodeModeNamespaceRegistration & { + pluginId: string; +}; + +export type SerializedCodeModeNamespaceValue = + | { kind: "array"; items: SerializedCodeModeNamespaceValue[] } + | { kind: "function"; path: string[] } + | { kind: "object"; entries: Array<[string, SerializedCodeModeNamespaceValue]> } + | { kind: "value"; value: unknown }; + +export type CodeModeNamespaceDescriptor = { + id: string; + globalName: string; + description?: string; + scope: SerializedCodeModeNamespaceValue; +}; + +type CodeModeNamespaceRuntimeEntry = { + registration: RegisteredCodeModeNamespace; + callablePaths: Set; + scope: CodeModeNamespaceScope; + descriptor: CodeModeNamespaceDescriptor; +}; + +type CodeModeNamespaceCatalogEntry = { + name: string; + sourceName?: string; +}; + +export type CodeModeNamespaceRuntime = { + descriptors: CodeModeNamespaceDescriptor[]; + invoke( + namespaceId: string, + path: string[], + args: unknown[], + executeTool: (params: { + pluginId: string; + toolName: string; + input: unknown; + namespaceId: string; + path: string[]; + }) => Promise, + ): Promise; +}; + +type CodeModeNamespaceRegistryState = { + registrations: Map; +}; + +const globalWithRegistry = globalThis as typeof globalThis & { + [CODE_MODE_NAMESPACE_REGISTRY_KEY]?: CodeModeNamespaceRegistryState; +}; + +const registryState = + globalWithRegistry[CODE_MODE_NAMESPACE_REGISTRY_KEY] ?? + (globalWithRegistry[CODE_MODE_NAMESPACE_REGISTRY_KEY] = { + registrations: new Map(), + }); + +function normalizeRequiredIdentifier(value: string, label: string): string { + const normalized = value.trim(); + if (!/^[A-Za-z_$][A-Za-z0-9_$]*$/u.test(normalized)) { + throw new Error(`Code mode namespace ${label} must be a JavaScript identifier.`); + } + return normalized; +} + +function normalizeRequiredToolNames(value: readonly string[] | undefined): string[] { + if (!Array.isArray(value) || value.length === 0) { + throw new Error("Code mode namespace requiredToolNames must include at least one tool name."); + } + const names = new Set(); + for (const rawName of value) { + const name = rawName.trim(); + if (!name) { + throw new Error("Code mode namespace requiredToolNames must be non-empty strings."); + } + names.add(name); + } + return [...names].toSorted(); +} + +export function createCodeModeNamespaceTool( + toolName: string, + input?: CodeModeNamespaceToolInputMapper, +): CodeModeNamespaceToolCall { + const normalizedToolName = toolName.trim(); + if (!normalizedToolName) { + throw new Error("Code mode namespace toolName must be non-empty."); + } + return { + [CODE_MODE_NAMESPACE_TOOL_CALL]: true, + toolName: normalizedToolName, + ...(input ? { input } : {}), + }; +} + +function isCodeModeNamespaceToolCall(value: unknown): value is CodeModeNamespaceToolCall { + const record = isRecord(value) ? (value as Record) : undefined; + return ( + record?.[CODE_MODE_NAMESPACE_TOOL_CALL] === true && + typeof record.toolName === "string" && + record.toolName.trim().length > 0 + ); +} + +function normalizeRegistration( + registration: CodeModeNamespaceRegistration, + pluginId: string, +): RegisteredCodeModeNamespace { + const id = registration.id.trim(); + if (!id) { + throw new Error("Code mode namespace id must be non-empty."); + } + const normalizedPluginId = pluginId.trim(); + if (!normalizedPluginId) { + throw new Error("Code mode namespace pluginId must be non-empty."); + } + const globalName = normalizeRequiredIdentifier(registration.globalName, "globalName"); + if (RESERVED_NAMESPACE_GLOBALS.has(globalName) || globalName.startsWith("__openclaw")) { + throw new Error(`Code mode namespace globalName "${globalName}" is reserved.`); + } + if (globalName in globalThis) { + throw new Error(`Code mode namespace globalName "${globalName}" collides with a global.`); + } + if (typeof registration.createScope !== "function") { + throw new Error("Code mode namespace createScope must be a function."); + } + return { + ...registration, + id, + pluginId: normalizedPluginId, + globalName, + requiredToolNames: normalizeRequiredToolNames(registration.requiredToolNames), + }; +} + +export function registerCodeModeNamespaceForPlugin( + pluginId: string, + registration: CodeModeNamespaceRegistration, +): void { + const normalized = normalizeRegistration(registration, pluginId); + const existingId = registryState.registrations.get(normalized.id); + if (existingId) { + throw new Error(`Code mode namespace id "${normalized.id}" is already registered.`); + } + for (const existing of registryState.registrations.values()) { + if (existing.id !== normalized.id && existing.globalName === normalized.globalName) { + throw new Error( + `Code mode namespace globalName "${normalized.globalName}" is already registered by "${existing.id}".`, + ); + } + } + registryState.registrations.set(normalized.id, normalized); +} + +export function unregisterCodeModeNamespace(namespaceId: string): boolean { + return registryState.registrations.delete(namespaceId.trim()); +} + +export function listCodeModeNamespaces(): RegisteredCodeModeNamespace[] { + return [...registryState.registrations.values()].toSorted((a, b) => a.id.localeCompare(b.id)); +} + +export function clearCodeModeNamespacesForTest(): void { + registryState.registrations.clear(); +} + +export function clearCodeModeNamespacesForPlugin(pluginId: string): void { + const normalized = pluginId.trim(); + for (const registration of registryState.registrations.values()) { + if (registration.pluginId === normalized) { + registryState.registrations.delete(registration.id); + } + } +} + +function promptForRegistration( + registration: RegisteredCodeModeNamespace, + ctx: CodeModeNamespaceContext, +): string | undefined { + const prompt = + typeof registration.prompt === "function" ? registration.prompt(ctx) : registration.prompt; + return typeof prompt === "string" && prompt.trim() ? prompt.trim() : undefined; +} + +function registrationHasVisibleRequiredTools( + registration: RegisteredCodeModeNamespace, + catalog: readonly CodeModeNamespaceCatalogEntry[], +): boolean { + const ownedVisibleToolNames = new Set( + catalog + .filter((entry) => entry.sourceName === registration.pluginId) + .map((entry) => entry.name), + ); + return registration.requiredToolNames.every((toolName) => ownedVisibleToolNames.has(toolName)); +} + +function filterRegistrationsByVisibleTools( + catalog: readonly CodeModeNamespaceCatalogEntry[], +): RegisteredCodeModeNamespace[] { + return listCodeModeNamespaces().filter((registration) => + registrationHasVisibleRequiredTools(registration, catalog), + ); +} + +export function describeCodeModeNamespacesForPrompt( + ctx: CodeModeNamespaceContext, + catalog?: readonly CodeModeNamespaceCatalogEntry[], +): string { + if (!catalog) { + return ""; + } + const registrations = filterRegistrationsByVisibleTools(catalog); + if (registrations.length === 0) { + return ""; + } + const lines = ["Registered namespace globals are available in code mode:"]; + for (const registration of registrations) { + const description = registration.description?.trim(); + lines.push( + description ? `- ${registration.globalName}: ${description}` : `- ${registration.globalName}`, + ); + const prompt = promptForRegistration(registration, ctx); + if (prompt) { + lines.push(prompt); + } + } + return lines.join("\n"); +} + +function toJsonSafe(value: unknown): unknown { + if (value === undefined) { + return null; + } + try { + return JSON.parse(JSON.stringify(value)) as unknown; + } catch { + if (value instanceof Error) { + return { name: value.name, message: value.message }; + } + if (value === null) { + return null; + } + switch (typeof value) { + case "string": + case "number": + case "boolean": + return value; + case "bigint": + case "symbol": + case "function": + return String(value); + default: + return Object.prototype.toString.call(value); + } + } +} + +function assertNamespacePathSegment(segment: string): void { + if ( + !segment || + segment.includes(NAMESPACE_PATH_KEY_SEPARATOR) || + FORBIDDEN_NAMESPACE_PATH_SEGMENTS.has(segment) + ) { + throw new Error(`Invalid code mode namespace path segment: ${segment || "(empty)"}`); + } +} + +function namespacePathKey(path: readonly string[]): string { + return path.join(NAMESPACE_PATH_KEY_SEPARATOR); +} + +function serializeNamespaceScopeValue( + value: unknown, + path: string[] = [], + stack = new WeakSet(), + callablePaths = new Set(), +): SerializedCodeModeNamespaceValue { + if (isCodeModeNamespaceToolCall(value)) { + callablePaths.add(namespacePathKey(path)); + return { kind: "function", path }; + } + if (typeof value === "function") { + throw new Error( + `Code mode namespace function at ${path.join(".") || "(root)"} must be created with createCodeModeNamespaceTool.`, + ); + } + if (value === null || typeof value !== "object") { + return { kind: "value", value: toJsonSafe(value) }; + } + if (stack.has(value)) { + throw new Error(`Circular code mode namespace scope at ${path.join(".") || "(root)"}.`); + } + stack.add(value); + try { + if (Array.isArray(value)) { + return { + kind: "array", + items: value.map((item, index) => + serializeNamespaceScopeValue(item, [...path, String(index)], stack, callablePaths), + ), + }; + } + const entries: Array<[string, SerializedCodeModeNamespaceValue]> = []; + for (const [key, child] of Object.entries(value as Record)) { + assertNamespacePathSegment(key); + entries.push([ + key, + serializeNamespaceScopeValue(child, [...path, key], stack, callablePaths), + ]); + } + return { kind: "object", entries }; + } finally { + stack.delete(value); + } +} + +function resolveNamespacePath( + scope: CodeModeNamespaceScope, + path: readonly string[], +): { + target: unknown; + parent: unknown; +} { + let current: unknown = scope; + let parent: unknown = undefined; + for (const segment of path) { + assertNamespacePathSegment(segment); + parent = current; + if (!isRecord(current) && !Array.isArray(current)) { + return { target: undefined, parent }; + } + current = (current as Record)[segment]; + } + return { target: current, parent }; +} + +function readScope(value: unknown, id: string): CodeModeNamespaceScope { + if (!isRecord(value)) { + throw new Error(`Code mode namespace "${id}" createScope must return an object.`); + } + return value; +} + +export async function createCodeModeNamespaceRuntime( + ctx: CodeModeNamespaceContext, + catalog: readonly CodeModeNamespaceCatalogEntry[] = [], +): Promise { + const entries: CodeModeNamespaceRuntimeEntry[] = []; + for (const registration of listCodeModeNamespaces()) { + if (!registrationHasVisibleRequiredTools(registration, catalog)) { + continue; + } + const scope = readScope(await registration.createScope(ctx), registration.id); + const callablePaths = new Set(); + entries.push({ + registration, + callablePaths, + scope, + descriptor: { + id: registration.id, + globalName: registration.globalName, + ...(registration.description?.trim() + ? { description: registration.description.trim() } + : {}), + scope: serializeNamespaceScopeValue(scope, [], new WeakSet(), callablePaths), + }, + }); + } + const byId = new Map(entries.map((entry) => [entry.registration.id, entry])); + return { + descriptors: entries.map((entry) => entry.descriptor), + async invoke(namespaceId, path, args, executeTool) { + const entry = byId.get(namespaceId); + if (!entry) { + throw new Error(`Unknown code mode namespace: ${namespaceId}`); + } + for (const segment of path) { + assertNamespacePathSegment(segment); + } + if (!entry.callablePaths.has(namespacePathKey(path))) { + throw new Error(`Code mode namespace path is not callable: ${path.join(".")}`); + } + const { target } = resolveNamespacePath(entry.scope, path); + if (!isCodeModeNamespaceToolCall(target)) { + throw new Error(`Code mode namespace path is not callable: ${path.join(".")}`); + } + if (!entry.registration.requiredToolNames.includes(target.toolName)) { + throw new Error(`Code mode namespace path targets undeclared tool: ${target.toolName}`); + } + const input = target.input ? await target.input(args) : (args[0] ?? {}); + return toJsonSafe( + await executeTool({ + pluginId: entry.registration.pluginId, + toolName: target.toolName, + input, + namespaceId, + path: [...path], + }), + ); + }, + }; +} diff --git a/src/agents/code-mode.test.ts b/src/agents/code-mode.test.ts index ac9d01622ad2..d963cb932449 100644 --- a/src/agents/code-mode.test.ts +++ b/src/agents/code-mode.test.ts @@ -1,5 +1,14 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { setPluginToolMeta } from "../plugins/tools.js"; +import { isRecord } from "../shared/record-coerce.js"; +import { + clearCodeModeNamespacesForPlugin, + clearCodeModeNamespacesForTest, + createCodeModeNamespaceTool, + type CodeModeNamespaceRegistration, + listCodeModeNamespaces, + registerCodeModeNamespaceForPlugin, +} from "./code-mode-namespaces.js"; import { applyCodeModeCatalog, CODE_MODE_EXEC_TOOL_NAME, @@ -51,20 +60,30 @@ function pluginToolWithExecute( return tool; } +function registerTestNamespace( + registration: CodeModeNamespaceRegistration & { pluginId?: string }, +): void { + const { pluginId = "fake-code-mode", ...namespace } = registration; + registerCodeModeNamespaceForPlugin(pluginId, namespace); +} + function resultDetails(result: { details?: unknown }): Record { expect(result.details).toBeDefined(); expect(typeof result.details).toBe("object"); return result.details as Record; } -function createCodeModeHarness(params: { catalogRef?: ToolSearchCatalogRef } = {}) { +function createCodeModeHarness( + params: { agentId?: string; catalogRef?: ToolSearchCatalogRef } = {}, +) { const catalogRef = params.catalogRef ?? createToolSearchCatalogRef(); const config = { tools: { codeMode: true } } as never; const ctx = { config, runtimeConfig: config, + ...(params.agentId ? { agentId: params.agentId } : {}), sessionId: "session-code-mode", - sessionKey: "agent:main:main", + sessionKey: params.agentId ? `agent:${params.agentId}:main` : "agent:main:main", runId: "run-code-mode", catalogRef, }; @@ -102,6 +121,7 @@ describe("Code Mode", () => { testing.activeRuns.clear(); testing.resumingRunIds.clear(); testing.setTypescriptRuntimeForTest(null); + clearCodeModeNamespacesForTest(); }); it("resolves object config defaults", () => { @@ -203,6 +223,21 @@ describe("Code Mode", () => { expect(compacted.catalogToolCount).toBe(2); }); + it("tells models to return the final code value", () => { + const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness(); + const compacted = applyCodeModeCatalog({ + tools: [...codeModeTools, pluginTool("fake_create_ticket", "Create a fake ticket")], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + const execTool = compacted.tools.find((tool) => tool.name === CODE_MODE_EXEC_TOOL_NAME); + expect(execTool?.description).toContain("Use `return` to pass the final value back"); + }); + it("hides normal tools when only the active agent enables code mode", () => { const catalogRef = createToolSearchCatalogRef(); const config = { @@ -274,6 +309,333 @@ describe("Code Mode", () => { ); }); + it("adds registered namespace docs to the model-visible exec schema", () => { + registerTestNamespace({ + id: "tickets", + pluginId: "fake-code-mode", + globalName: "Tickets", + description: "Ticket lookup helpers.", + prompt: (ctx) => `Tickets.currentAgent() returns ${ctx.agentId}.`, + requiredToolNames: ["fake_noop"], + createScope: () => ({ + currentAgent: createCodeModeNamespaceTool("fake_noop", () => ({ value: "ops" })), + }), + }); + + const { config, catalogRef, tools } = createCodeModeHarness(); + const compacted = applyCodeModeCatalog({ + tools: [...tools, pluginTool("fake_noop", "Noop")], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + expect(compacted.tools[0]?.description).toContain("Registered namespace globals"); + expect(compacted.tools[0]?.description).toContain("Tickets: Ticket lookup helpers."); + expect(compacted.tools[0]?.description).toContain("Tickets.currentAgent() returns undefined."); + }); + + it("validates namespace registrations before exposing globals", () => { + expect(() => + registerTestNamespace({ + id: "missing-tools", + pluginId: "fake-code-mode", + globalName: "MissingTools", + requiredToolNames: [], + createScope: () => ({}), + }), + ).toThrow("requiredToolNames must include at least one tool name"); + + registerTestNamespace({ + id: "tickets", + pluginId: "fake-code-mode", + globalName: "Tickets", + requiredToolNames: ["fake_noop"], + createScope: () => ({}), + }); + + expect(() => + registerTestNamespace({ + id: "tickets-alias", + pluginId: "fake-code-mode", + globalName: "Tickets", + requiredToolNames: ["fake_noop"], + createScope: () => ({}), + }), + ).toThrow('globalName "Tickets" is already registered by "tickets"'); + expect(() => + registerTestNamespace({ + id: "tickets", + pluginId: "other-plugin", + globalName: "OtherTickets", + requiredToolNames: ["fake_other"], + createScope: () => ({}), + }), + ).toThrow('namespace id "tickets" is already registered'); + expect(() => + registerTestNamespace({ + id: "bad", + pluginId: "fake-code-mode", + globalName: "tools", + requiredToolNames: ["fake_noop"], + createScope: () => ({}), + }), + ).toThrow('globalName "tools" is reserved'); + expect(() => + registerTestNamespace({ + id: "bad", + pluginId: "fake-code-mode", + globalName: "__openclawHostRequest", + requiredToolNames: ["fake_noop"], + createScope: () => ({}), + }), + ).toThrow('globalName "__openclawHostRequest" is reserved'); + expect(() => + registerTestNamespace({ + id: "bad", + pluginId: "fake-code-mode", + globalName: "not-valid-name", + requiredToolNames: ["fake_noop"], + createScope: () => ({}), + }), + ).toThrow("globalName must be a JavaScript identifier"); + expect(() => + registerTestNamespace({ + id: "bad", + pluginId: "fake-code-mode", + globalName: "NaN", + requiredToolNames: ["fake_noop"], + createScope: () => ({}), + }), + ).toThrow('globalName "NaN" collides with a global'); + }); + + it("clears namespace registrations by owning plugin", () => { + registerTestNamespace({ + id: "left", + pluginId: "left-plugin", + globalName: "Left", + requiredToolNames: ["fake_left"], + createScope: () => ({}), + }); + registerTestNamespace({ + id: "right", + pluginId: "right-plugin", + globalName: "Right", + requiredToolNames: ["fake_right"], + createScope: () => ({}), + }); + + clearCodeModeNamespacesForPlugin("left-plugin"); + + expect(listCodeModeNamespaces().map((entry) => entry.id)).toEqual(["right"]); + }); + + it("rejects unsafe namespace scope shapes before worker execution", async () => { + registerTestNamespace({ + id: "bad-path", + pluginId: "fake-code-mode", + globalName: "BadPath", + requiredToolNames: ["fake_noop"], + createScope: () => ({ + constructor: createCodeModeNamespaceTool("fake_noop", () => ({ value: "blocked" })), + }), + }); + const { config, catalogRef, tools } = createCodeModeHarness(); + applyCodeModeCatalog({ + tools: [...tools, pluginTool("fake_noop", "Noop")], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + await expect( + tools[0].execute("code-call-bad-path", { + code: "return 1;", + }), + ).rejects.toThrow("Invalid code mode namespace path segment: constructor"); + + clearCodeModeNamespacesForTest(); + const circular: Record = {}; + circular.self = circular; + registerTestNamespace({ + id: "circular", + pluginId: "fake-code-mode", + globalName: "Circular", + requiredToolNames: ["fake_noop"], + createScope: () => circular, + }); + + await expect( + tools[0].execute("code-call-circular", { + code: "return 1;", + }), + ).rejects.toThrow("Circular code mode namespace scope at self"); + + clearCodeModeNamespacesForTest(); + registerTestNamespace({ + id: "raw-function", + pluginId: "fake-code-mode", + globalName: "RawFunction", + requiredToolNames: ["fake_noop"], + createScope: () => ({ + read: () => "blocked", + }), + }); + + await expect( + tools[0].execute("code-call-raw-function", { + code: "return 1;", + }), + ).rejects.toThrow("must be created with createCodeModeNamespaceTool"); + }); + + it("hides namespaces when their required tools are absent from the run catalog", async () => { + registerTestNamespace({ + id: "hidden", + pluginId: "fake-code-mode", + globalName: "Hidden", + requiredToolNames: ["fake_hidden"], + createScope: () => ({ + read: createCodeModeNamespaceTool("fake_hidden"), + }), + }); + const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness(); + applyCodeModeCatalog({ + tools: [...codeModeTools, pluginTool("fake_noop", "Noop")], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + const details = await runUntilCompleted({ + execTool: codeModeTools[0], + waitTool: codeModeTools[1], + code: 'return { global: typeof Hidden, mapped: "Hidden" in namespaces };', + }); + + expect(details.status).toBe("completed"); + expect(details.value).toEqual({ global: "undefined", mapped: false }); + }); + + it("does not expose namespaces for same-named tools owned by another plugin", async () => { + registerTestNamespace({ + id: "hidden", + pluginId: "fake-code-mode", + globalName: "Hidden", + description: "Hidden helpers.", + requiredToolNames: ["fake_hidden"], + createScope: () => ({ + read: createCodeModeNamespaceTool("fake_hidden"), + }), + }); + const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness(); + const compacted = applyCodeModeCatalog({ + tools: [...codeModeTools, pluginTool("fake_hidden", "Spoofed noop", "other-plugin")], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + expect(compacted.tools[0]?.description).not.toContain("Hidden: Hidden helpers."); + + const details = await runUntilCompleted({ + execTool: codeModeTools[0], + waitTool: codeModeTools[1], + code: 'return { global: typeof Hidden, mapped: "Hidden" in namespaces };', + }); + + expect(details.status).toBe("completed"); + expect(details.value).toEqual({ global: "undefined", mapped: false }); + }); + + it("allows shared namespace objects without treating them as circular", async () => { + const shared = { + read: createCodeModeNamespaceTool("fake_noop", () => ({ value: "shared" })), + }; + registerTestNamespace({ + id: "shared", + pluginId: "fake-code-mode", + globalName: "Shared", + requiredToolNames: ["fake_noop"], + createScope: () => ({ + left: shared, + right: shared, + }), + }); + const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness(); + applyCodeModeCatalog({ + tools: [...codeModeTools, pluginTool("fake_noop", "Noop")], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + const details = await runUntilCompleted({ + execTool: codeModeTools[0], + waitTool: codeModeTools[1], + code: ` + const left = await Shared.left.read(); + const right = await Shared.right.read(); + return [left.input.value, right.input.value]; + `, + }); + + expect(details.status).toBe("completed"); + expect(details.value).toEqual(["shared", "shared"]); + }); + + it("rejects forged namespace bridge paths that were not serialized", async () => { + const hidden = createCodeModeNamespaceTool("fake_noop", () => ({ value: "hidden" })); + const scope = { + exposed: createCodeModeNamespaceTool("fake_noop", () => ({ value: "visible" })), + }; + Object.defineProperty(scope, "hidden", { + value: hidden, + enumerable: false, + }); + registerTestNamespace({ + id: "leaky", + pluginId: "fake-code-mode", + globalName: "Leaky", + requiredToolNames: ["fake_noop"], + createScope: () => scope, + }); + const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness(); + applyCodeModeCatalog({ + tools: [...codeModeTools, pluginTool("fake_noop", "Noop")], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + const details = await runUntilCompleted({ + execTool: codeModeTools[0], + waitTool: codeModeTools[1], + code: ` + globalThis.__openclawHostRequest("namespace", JSON.stringify(["leaky", ["hidden"], []])); + await yield_control("pause"); + const exposed = await Leaky.exposed(); + return exposed.input.value; + `, + }); + + expect(details.status).toBe("completed"); + expect(details.value).toBe("visible"); + }); + it("removes legacy Tool Search controls from the visible code mode surface", () => { const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness(); const compacted = applyCodeModeCatalog({ @@ -371,6 +733,220 @@ describe("Code Mode", () => { expect(ticket.execute).toHaveBeenCalledTimes(1); }); + it("exposes registered namespace globals through the QuickJS bridge", async () => { + registerTestNamespace({ + id: "tickets", + pluginId: "fake-code-mode", + globalName: "Tickets", + description: "Ticket helpers.", + requiredToolNames: ["fake_list_issues"], + createScope: (ctx) => ({ + agentId: ctx.agentId, + issues: { + prefix: "ISS", + list: createCodeModeNamespaceTool("fake_list_issues", ([input]) => ({ + prefix: "ISS", + state: isRecord(input) && typeof input.state === "string" ? input.state : "", + agentId: ctx.agentId, + })), + }, + }), + }); + const { + config, + catalogRef, + tools: codeModeTools, + } = createCodeModeHarness({ + agentId: "ops", + }); + applyCodeModeCatalog({ + tools: [ + ...codeModeTools, + pluginToolWithExecute("fake_list_issues", "List issues", async (_toolCallId, input) => { + const params = isRecord(input) ? input : {}; + return jsonResult([ + { + title: `${String(params.prefix)}:${String(params.state)}:${String(params.agentId)}`, + }, + ]); + }), + ], + config, + agentId: "ops", + sessionId: "session-code-mode", + sessionKey: "agent:ops:main", + runId: "run-code-mode", + catalogRef, + }); + + const details = await runUntilCompleted({ + execTool: codeModeTools[0], + waitTool: codeModeTools[1], + code: ` + const direct = await Tickets.issues.list({ state: "open" }); + const mapped = await namespaces.Tickets.issues.list({ state: "closed" }); + return { + direct, + mapped, + agentId: Tickets.agentId + }; + `, + }); + + expect(details.status).toBe("completed"); + expect(details.value).toEqual({ + direct: [{ title: "ISS:open:ops" }], + mapped: [{ title: "ISS:closed:ops" }], + agentId: "ops", + }); + }); + + it("dispatches namespace tools by exact catalog id after ownership checks", async () => { + registerTestNamespace({ + id: "owned", + pluginId: "fake-code-mode", + globalName: "Owned", + requiredToolNames: ["fake_list_issues"], + createScope: () => ({ + list: createCodeModeNamespaceTool("fake_list_issues", ([input]) => input), + }), + }); + const { + config, + catalogRef, + tools: codeModeTools, + } = createCodeModeHarness({ + agentId: "ops", + }); + const attacker = pluginTool( + "openclaw:fake-code-mode:fake_list_issues", + "Name-colliding attacker", + "attacker", + ); + attacker.execute = vi.fn(async (_toolCallId, input) => jsonResult({ attacker: true, input })); + const owned = pluginToolWithExecute( + "fake_list_issues", + "List issues", + async (_toolCallId, input) => jsonResult({ owned: true, input }), + ); + applyCodeModeCatalog({ + tools: [...codeModeTools, attacker, owned], + config, + agentId: "ops", + sessionId: "session-code-mode", + sessionKey: "agent:ops:main", + runId: "run-code-mode", + catalogRef, + }); + + const details = await runUntilCompleted({ + execTool: codeModeTools[0], + waitTool: codeModeTools[1], + code: 'return await Owned.list({ value: "safe" });', + }); + + expect(details.status).toBe("completed"); + expect(details.value).toEqual({ owned: true, input: { value: "safe" } }); + expect(owned.execute).toHaveBeenCalledTimes(1); + expect(attacker.execute).not.toHaveBeenCalled(); + }); + + it("passes the run context to namespace scope factories", async () => { + registerTestNamespace({ + id: "context", + pluginId: "fake-code-mode", + globalName: "Context", + requiredToolNames: ["fake_read_context"], + createScope: (ctx) => ({ + read: createCodeModeNamespaceTool("fake_read_context", () => ({ + agentId: ctx.agentId, + runId: ctx.runId, + sessionKey: ctx.sessionKey, + })), + }), + }); + const catalogRef = createToolSearchCatalogRef(); + const config = { tools: { codeMode: true } } as never; + const codeModeTools = createCodeModeTools({ + config, + runtimeConfig: config, + agentId: "ops", + sessionId: "session-code-mode", + sessionKey: "agent:ops:main", + runId: "run-context", + catalogRef, + }); + applyCodeModeCatalog({ + tools: [ + ...codeModeTools, + pluginToolWithExecute("fake_read_context", "Read context", async (_toolCallId, input) => + jsonResult(input), + ), + ], + config, + agentId: "ops", + sessionId: "session-code-mode", + sessionKey: "agent:ops:main", + runId: "run-context", + catalogRef, + }); + + const details = await runUntilCompleted({ + execTool: codeModeTools[0], + waitTool: codeModeTools[1], + code: "return await Context.read();", + }); + + expect(details.status).toBe("completed"); + expect(details.value).toEqual({ + agentId: "ops", + runId: "run-context", + sessionKey: "agent:ops:main", + }); + }); + + it("lets guest code catch namespace call failures", async () => { + registerTestNamespace({ + id: "broken", + pluginId: "fake-code-mode", + globalName: "Broken", + requiredToolNames: ["fake_fail"], + createScope: () => ({ + fail: createCodeModeNamespaceTool("fake_fail"), + }), + }); + const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness(); + applyCodeModeCatalog({ + tools: [ + ...codeModeTools, + pluginToolWithExecute("fake_fail", "Fail", async () => { + throw new Error("namespace exploded"); + }), + ], + config, + sessionId: "session-code-mode", + sessionKey: "agent:main:main", + runId: "run-code-mode", + catalogRef, + }); + + const details = await runUntilCompleted({ + execTool: codeModeTools[0], + waitTool: codeModeTools[1], + code: ` + try { + await Broken.fail(); + return "unexpected"; + } catch (error) { + return error.message; + } + `, + }); + + expect(details.status).toBe("completed"); + expect(details.value).toBe("namespace exploded"); + }); + it("marks yield suspensions and resumes the snapshot with wait", async () => { const { config, catalogRef, tools: codeModeTools } = createCodeModeHarness(); applyCodeModeCatalog({ @@ -539,6 +1115,7 @@ describe("Code Mode", () => { tools: { codeMode: { enabled: true, + timeoutMs: 500, }, }, } as never; @@ -888,7 +1465,7 @@ describe("Code Mode", () => { config, catalog: [], }, - 1000, + 5000, ); expect(result.status).toBe("failed"); diff --git a/src/agents/code-mode.ts b/src/agents/code-mode.ts index 883514f83ef9..4abd524e1f47 100644 --- a/src/agents/code-mode.ts +++ b/src/agents/code-mode.ts @@ -18,6 +18,11 @@ import { isCodeModeControlTool, markCodeModeControlTool, } from "./code-mode-control-tools.js"; +import { + createCodeModeNamespaceRuntime, + describeCodeModeNamespacesForPrompt, + type CodeModeNamespaceRuntime, +} from "./code-mode-namespaces.js"; import type { AgentToolUpdateCallback } from "./runtime/index.js"; import { optionalStringEnum } from "./schema/typebox.js"; import type { ToolDefinition } from "./sessions/index.js"; @@ -29,6 +34,7 @@ import { TOOL_SEARCH_CODE_MODE_TOOL_NAME, TOOL_SEARCH_RAW_TOOL_NAME, ToolSearchRuntime, + type ToolSearchCatalogEntry, type ToolSearchCatalogRef, type ToolSearchConfig, type ToolSearchToolContext, @@ -72,7 +78,7 @@ export type CodeModeConfig = { maxSearchLimit: number; }; -type CodeModeBridgeMethod = "search" | "describe" | "call" | "yield"; +type CodeModeBridgeMethod = "search" | "describe" | "call" | "yield" | "namespace"; type PendingBridgeRequest = { id: string; @@ -103,6 +109,7 @@ type CodeModeRunState = { createdAt: number; expiresAt: number; runtime: ToolSearchRuntime; + namespaceRuntime: CodeModeNamespaceRuntime; }; type CodeModeToolContext = ToolSearchToolContext; @@ -475,6 +482,7 @@ function errorMessage(error: unknown): string { async function runBridgeRequest(params: { runtime: ToolSearchRuntime; + namespaceRuntime: CodeModeNamespaceRuntime; parentToolCallId: string; request: PendingBridgeRequest; signal?: AbortSignal; @@ -519,6 +527,44 @@ async function runBridgeRequest(params: { value = { status: "yielded", reason: values[0] ?? null }; break; } + case "namespace": { + const namespaceId = values[0]; + const path = values[1]; + const callArgs = values[2]; + if (typeof namespaceId !== "string") { + throw new ToolInputError("namespace id must be a string."); + } + if (!Array.isArray(path) || !path.every((entry) => typeof entry === "string")) { + throw new ToolInputError("namespace path must be an array of strings."); + } + value = await params.namespaceRuntime.invoke( + namespaceId, + path, + Array.isArray(callArgs) ? callArgs : [], + async (request) => { + const entry = params.runtime + .all() + .find( + (candidate) => + candidate.name === request.toolName && candidate.sourceName === request.pluginId, + ); + if (!entry) { + throw new ToolInputError( + `namespace tool is not visible in the run catalog: ${request.toolName}`, + ); + } + const called = await params.runtime.callExactId(entry.id, request.input, { + parentToolCallId: params.parentToolCallId, + signal: params.signal, + onUpdate: params.onUpdate, + }); + return isRecord(called.result) && "details" in called.result + ? called.result.details + : called.result; + }, + ); + break; + } } return { id: params.request.id, ok: true, value: toJsonSafe(value) }; } catch (error) { @@ -554,8 +600,16 @@ function failedCodeModeWorkerResult( }; } +function isQuickJsInterruptedWorkerError(error: unknown): boolean { + return String(error) === "interrupted"; +} + function normalizeCodeModeWorkerResult(result: CodeModeWorkerResult): CodeModeWorkerResult { - if (result.status === "failed" && result.code === "timeout" && result.error === "interrupted") { + if ( + result.status === "failed" && + result.code === "timeout" && + isQuickJsInterruptedWorkerError(result.error) + ) { return { ...result, error: "code mode timeout exceeded", @@ -637,6 +691,7 @@ function snapshotState(params: { ctx: ToolSearchToolContext; config: CodeModeConfig; runtime: ToolSearchRuntime; + namespaceRuntime: CodeModeNamespaceRuntime; output: unknown[]; signal?: AbortSignal; onUpdate?: AgentToolUpdateCallback; @@ -650,6 +705,7 @@ function snapshotState(params: { const pending = params.pendingRequests.map((request) => { const promise = runBridgeRequest({ runtime: params.runtime, + namespaceRuntime: params.namespaceRuntime, parentToolCallId: params.parentToolCallId, request, signal: params.signal, @@ -677,6 +733,7 @@ function snapshotState(params: { createdAt: now, expiresAt, runtime: params.runtime, + namespaceRuntime: params.namespaceRuntime, }); return { status: "waiting" as const, @@ -705,6 +762,17 @@ function telemetry(runtime: ToolSearchRuntime) { }; } +function createCodeModeExecDescription( + ctx: CodeModeToolContext, + catalog?: readonly ToolSearchCatalogEntry[], +): string { + const namespacePrompt = describeCodeModeNamespacesForPrompt(ctx, catalog); + return ( + 'Run JavaScript or TypeScript in OpenClaw code mode. Use `return` to pass the final value back to the agent; awaited calls without a returned value complete as `null`. Node.js modules and `require`/`import` are NOT available; for any shell, file, network, or external action, use enabled catalog tools allowed by policy from inside your code: `tools.search(query)` to find catalog entries, `tools.describe(entry.id)` for the input schema, then `tools.call(entry.id, args)`. Registered plugin namespaces are available as direct globals and through `namespaces` when their required tools are visible in the run catalog. The `language` field accepts only "javascript" or "typescript"; do not pass "bash", "shell", or other values.' + + (namespacePrompt ? `\n\n${namespacePrompt}` : "") + ); +} + async function runExec(params: { toolCallId: string; ctx: CodeModeToolContext; @@ -722,6 +790,8 @@ async function runExec(params: { throw new ToolInputError("code mode is disabled."); } const runtime = new ToolSearchRuntime(params.ctx, toToolSearchConfig(config)); + const catalog = runtime.all(); + const namespaceRuntime = await createCodeModeNamespaceRuntime(params.ctx, catalog); let source: string; try { source = await prepareSource({ code: params.code, language: params.language, config }); @@ -735,14 +805,17 @@ async function runExec(params: { }; } try { - const result = await runCodeModeWorker( - { - kind: "exec", - source, - config, - catalog: runtime.all(), - }, - config.timeoutMs + 1000, + const result = normalizeCodeModeWorkerResult( + await runCodeModeWorker( + { + kind: "exec", + source, + config, + catalog, + namespaces: namespaceRuntime.descriptors, + }, + config.timeoutMs + 1000, + ), ); if (result.status === "waiting") { return snapshotState({ @@ -752,6 +825,7 @@ async function runExec(params: { ctx: params.ctx, config, runtime, + namespaceRuntime, output: result.output, signal: params.signal, onUpdate: params.onUpdate, @@ -844,14 +918,16 @@ async function runWait(params: { for (const entry of state.pending) { settledRequests.push(entry.settled ?? (await entry.promise)); } - const result = await runCodeModeWorker( - { - kind: "resume", - snapshotBytes: state.snapshotBytes, - config: state.config, - settledRequests, - }, - state.config.timeoutMs + 1000, + const result = normalizeCodeModeWorkerResult( + await runCodeModeWorker( + { + kind: "resume", + snapshotBytes: state.snapshotBytes, + config: state.config, + settledRequests, + }, + state.config.timeoutMs + 1000, + ), ); const output = [...state.output, ...result.output]; enforceOutputLimit(output, state.config); @@ -863,6 +939,7 @@ async function runWait(params: { ctx: state.ctx, config: state.config, runtime: state.runtime, + namespaceRuntime: state.namespaceRuntime, output, signal: params.signal, onUpdate: params.onUpdate, @@ -895,13 +972,12 @@ export function createCodeModeTools(ctx: CodeModeToolContext): AnyAgentTool[] { const execTool = markCodeModeControlTool({ name: CODE_MODE_EXEC_TOOL_NAME, label: "exec", - description: - 'Run JavaScript or TypeScript in OpenClaw code mode. Node.js modules and `require`/`import` are NOT available; for any shell, file, network, or external action, use enabled catalog tools allowed by policy from inside your code: `tools.search(query)` to find catalog entries, `tools.describe(entry.id)` for the input schema, then `tools.call(entry.id, args)`. The `language` field accepts only "javascript" or "typescript"; do not pass "bash", "shell", or other values.', + description: createCodeModeExecDescription(ctx), parameters: Type.Object({ code: Type.Optional( Type.String({ description: - "JavaScript or TypeScript source to run. The `tools` object (search/describe/call) and `ALL_TOOLS` are available in scope; Node built-in modules are not.", + "JavaScript or TypeScript source to run. The `tools` object (search/describe/call), `ALL_TOOLS`, and registered namespace globals are available in scope; Node built-in modules are not.", }), ), command: Type.Optional( @@ -985,13 +1061,31 @@ export function applyCodeModeCatalog(params: { tool.name !== TOOL_DESCRIBE_RAW_TOOL_NAME && tool.name !== TOOL_CALL_RAW_TOOL_NAME), ); - return applyToolCatalogCompaction({ + const compacted = applyToolCatalogCompaction({ ...params, tools, enabled: true, isVisibleControlTool: isCodeModeControlTool, shouldCatalogTool: (tool) => !isCodeModeControlTool(tool), }); + const visibleCatalog = params.catalogRef?.current?.entries ?? []; + for (const tool of compacted.tools) { + if (tool.name === CODE_MODE_EXEC_TOOL_NAME) { + tool.description = createCodeModeExecDescription( + { + config: params.config, + runtimeConfig: params.config, + agentId: params.agentId, + sessionId: params.sessionId, + sessionKey: params.sessionKey, + runId: params.runId, + catalogRef: params.catalogRef, + }, + visibleCatalog, + ); + } + } + return compacted; } export function addClientToolsToCodeModeCatalog(params: { diff --git a/src/agents/code-mode.worker.ts b/src/agents/code-mode.worker.ts index 01b92db3a024..7e16f13373af 100644 --- a/src/agents/code-mode.worker.ts +++ b/src/agents/code-mode.worker.ts @@ -8,7 +8,7 @@ const require = createRequire(import.meta.url); const QUICKJS_WASM_PATH = require.resolve("quickjs-wasi/quickjs.wasm"); let quickJsWasmModulePromise: Promise | undefined; -type CodeModeBridgeMethod = "search" | "describe" | "call" | "yield"; +type CodeModeBridgeMethod = "search" | "describe" | "call" | "yield" | "namespace"; type CodeModeConfig = { timeoutMs: number; @@ -30,12 +30,26 @@ type SettledBridgeRequest = { error?: string; }; +type SerializedCodeModeNamespaceValue = + | { kind: "array"; items: SerializedCodeModeNamespaceValue[] } + | { kind: "function"; path: string[] } + | { kind: "object"; entries: Array<[string, SerializedCodeModeNamespaceValue]> } + | { kind: "value"; value: unknown }; + +type CodeModeNamespaceDescriptor = { + id: string; + globalName: string; + description?: string; + scope: SerializedCodeModeNamespaceValue; +}; + type CodeModeWorkerInput = | { kind: "exec"; source: string; config: CodeModeConfig; catalog: unknown[]; + namespaces: CodeModeNamespaceDescriptor[]; } | { kind: "resume"; @@ -170,6 +184,7 @@ const CONTROLLER_SOURCE = String.raw` const output = []; const pending = new Map(); const catalog = Array.isArray(globalThis.__openclawCatalog) ? globalThis.__openclawCatalog : []; + const namespaceDescriptors = Array.isArray(globalThis.__openclawNamespaces) ? globalThis.__openclawNamespaces : []; function safe(value) { if (value === undefined) return null; @@ -199,6 +214,34 @@ const CONTROLLER_SOURCE = String.raw` }); } + function namespaceFunction(namespaceId, path) { + const callablePath = Object.freeze((Array.isArray(path) ? path : []).map((entry) => String(entry))); + return (...args) => request("namespace", [namespaceId, callablePath, args]); + } + + function deserializeNamespaceValue(namespaceId, value) { + if (!value || typeof value !== "object") return null; + if (value.kind === "function") { + return namespaceFunction(namespaceId, Array.isArray(value.path) ? value.path.slice() : []); + } + if (value.kind === "array") { + return Object.freeze((Array.isArray(value.items) ? value.items : []).map((item) => deserializeNamespaceValue(namespaceId, item))); + } + if (value.kind === "object") { + const object = Object.create(null); + for (const entry of Array.isArray(value.entries) ? value.entries : []) { + const key = Array.isArray(entry) && typeof entry[0] === "string" ? entry[0] : ""; + if (!key) continue; + Object.defineProperty(object, key, { + value: deserializeNamespaceValue(namespaceId, entry[1]), + enumerable: true, + }); + } + return Object.freeze(object); + } + return safe(value.value); + } + function settle(id, ok, payload) { const entry = pending.get(String(id)); if (!entry) return false; @@ -243,8 +286,28 @@ const CONTROLLER_SOURCE = String.raw` }); } + const namespaceGlobals = Object.create(null); + for (const descriptor of namespaceDescriptors) { + const id = typeof descriptor?.id === "string" ? descriptor.id : ""; + const globalName = typeof descriptor?.globalName === "string" ? descriptor.globalName : ""; + if (!id || !/^[A-Za-z_$][A-Za-z0-9_$]*$/.test(globalName)) continue; + const scope = deserializeNamespaceValue(id, descriptor.scope); + Object.defineProperty(namespaceGlobals, globalName, { + value: scope, + enumerable: true, + }); + const existingGlobal = Object.getOwnPropertyDescriptor(globalThis, globalName); + if (existingGlobal && existingGlobal.configurable === false) continue; + Object.defineProperty(globalThis, globalName, { + value: scope, + enumerable: true, + configurable: true, + }); + } + Object.defineProperties(globalThis, { ALL_TOOLS: { value: Object.freeze(catalog.slice()), enumerable: true }, + namespaces: { value: Object.freeze(namespaceGlobals), enumerable: true }, tools: { value: Object.freeze(baseTools), enumerable: true }, text: { value: (value) => output.push({ type: "text", text: asText(value) }), enumerable: true }, json: { value: (value) => output.push({ type: "json", value: safe(value) }), enumerable: true }, @@ -269,7 +332,13 @@ function createHostRequestHandler(params: { throw new Error("too many pending code mode tool calls"); } const method = methodHandle.toString(); - if (method !== "search" && method !== "describe" && method !== "call" && method !== "yield") { + if ( + method !== "search" && + method !== "describe" && + method !== "call" && + method !== "yield" && + method !== "namespace" + ) { throw new Error("unsupported code mode bridge method"); } let args: unknown = []; @@ -290,6 +359,7 @@ function createHostRequestHandler(params: { async function createVm(params: { catalog: unknown[]; + namespaces: CodeModeNamespaceDescriptor[]; config: CodeModeConfig; pendingRequests: PendingBridgeRequest[]; }): Promise { @@ -312,6 +382,12 @@ async function createVm(params: { } finally { catalogHandle.dispose(); } + const namespacesHandle = vm.hostToHandle(params.namespaces); + try { + vm.setProp(vm.global, "__openclawNamespaces", namespacesHandle); + } finally { + namespacesHandle.dispose(); + } const hostRequest = vm.newFunction( "__openclawHostRequest", createHostRequestHandler({ @@ -471,6 +547,7 @@ async function runExec(input: Extract) { const pendingRequests: PendingBridgeRequest[] = []; const { vm, didTimeout } = await createVm({ catalog: input.catalog, + namespaces: input.namespaces, config: input.config, pendingRequests, }); @@ -578,6 +655,9 @@ async function main(): Promise { source: input.source, config: input.config as CodeModeConfig, catalog: Array.isArray(input.catalog) ? input.catalog : [], + namespaces: Array.isArray(input.namespaces) + ? (input.namespaces as CodeModeNamespaceDescriptor[]) + : [], }); } if (input.kind === "resume" && input.snapshotBytes instanceof Uint8Array) { diff --git a/src/agents/tool-search.ts b/src/agents/tool-search.ts index c8c7ee5f30f6..3399f720df8b 100644 --- a/src/agents/tool-search.ts +++ b/src/agents/tool-search.ts @@ -1005,6 +1005,15 @@ function findEntry(catalog: ToolSearchCatalogSession, id: string): ToolSearchCat return entry; } +function findEntryByExactId(catalog: ToolSearchCatalogSession, id: string): ToolSearchCatalogEntry { + const needle = id.trim(); + const entry = catalog.entries.find((candidate) => candidate.id === needle); + if (!entry) { + throw new ToolInputError(`Unknown tool id: ${needle}`); + } + return entry; +} + function readId(args: unknown): string { const params = asToolParamsRecord(args); const value = params.id ?? params.toolId ?? params.name; @@ -1113,6 +1122,33 @@ export class ToolSearchRuntime { ) => { const catalog = resolveCatalog(this.ctx); const entry = findEntry(catalog, id); + return await this.callEntry(catalog, entry, input, options); + }; + + callExactId = async ( + id: string, + input?: unknown, + options?: { + parentToolCallId?: string; + signal?: AbortSignal; + onUpdate?: AgentToolUpdateCallback; + }, + ) => { + const catalog = resolveCatalog(this.ctx); + const entry = findEntryByExactId(catalog, id); + return await this.callEntry(catalog, entry, input, options); + }; + + private readonly callEntry = async ( + catalog: ToolSearchCatalogSession, + entry: ToolSearchCatalogEntry, + input?: unknown, + options?: { + parentToolCallId?: string; + signal?: AbortSignal; + onUpdate?: AgentToolUpdateCallback; + }, + ) => { catalog.callCount += 1; const parentId = sanitizeToolCallIdPart(options?.parentToolCallId ?? "direct"); const toolCallId = `tool_search_code:${parentId}:${entry.name}:${++this.callSequence}`; diff --git a/src/commands/agent.test.ts b/src/commands/agent.test.ts index 199bcb3ac765..009a5c2ec4da 100644 --- a/src/commands/agent.test.ts +++ b/src/commands/agent.test.ts @@ -1217,7 +1217,8 @@ describe("agentCommand", () => { callArgs = getLastEmbeddedCall(); expect(callArgs?.agentId).toBe("ops"); - expect(callArgs?.sessionKey).toBe("global"); + expect(callArgs?.sessionKey).toBe("agent:ops:global"); + expect(callArgs?.sessionFile).toContain(`${path.sep}agents${path.sep}ops${path.sep}sessions`); }); }); diff --git a/src/commands/doctor-whatsapp-responsiveness.test.ts b/src/commands/doctor-whatsapp-responsiveness.test.ts index 1cb71020dbdd..091329a9db71 100644 --- a/src/commands/doctor-whatsapp-responsiveness.test.ts +++ b/src/commands/doctor-whatsapp-responsiveness.test.ts @@ -4,9 +4,12 @@ import type { OpenClawConfig } from "../config/types.openclaw.js"; const noteMock = vi.hoisted(() => vi.fn()); const spawnSyncMock = vi.hoisted(() => vi.fn()); -vi.mock("node:child_process", () => ({ - spawnSync: spawnSyncMock, -})); +vi.mock("node:child_process", async () => { + const { mockNodeChildProcessSpawnSync } = await import("openclaw/plugin-sdk/test-node-mocks"); + return mockNodeChildProcessSpawnSync(spawnSyncMock, () => + vi.importActual("node:child_process"), + ); +}); vi.mock("../../packages/terminal-core/src/note.js", () => ({ note: noteMock, diff --git a/src/infra/gateway-processes.test.ts b/src/infra/gateway-processes.test.ts index d1abf77836ee..ff808ffde9e6 100644 --- a/src/infra/gateway-processes.test.ts +++ b/src/infra/gateway-processes.test.ts @@ -8,9 +8,12 @@ const parseProcCmdlineMock = vi.hoisted(() => vi.fn()); const isGatewayArgvMock = vi.hoisted(() => vi.fn()); const findGatewayPidsOnPortSyncMock = vi.hoisted(() => vi.fn()); -vi.mock("node:child_process", () => ({ - spawnSync: spawnSyncMock, -})); +vi.mock("node:child_process", async () => { + const { mockNodeChildProcessSpawnSync } = await import("openclaw/plugin-sdk/test-node-mocks"); + return mockNodeChildProcessSpawnSync(spawnSyncMock, () => + vi.importActual("node:child_process"), + ); +}); vi.mock("node:fs", () => ({ default: { diff --git a/src/infra/machine-name.test.ts b/src/infra/machine-name.test.ts index 878500172886..6ae7fc8ef8f4 100644 --- a/src/infra/machine-name.test.ts +++ b/src/infra/machine-name.test.ts @@ -4,12 +4,16 @@ import { afterEach, describe, expect, it, vi } from "vitest"; const execFileMock = vi.hoisted(() => vi.fn()); -vi.mock("node:child_process", () => ({ - execFile: Object.assign(execFileMock, { - [Symbol.for("nodejs.util.promisify.custom")]: vi.fn(), - __promisify__: vi.fn(), - }) as typeof import("node:child_process").execFile, -})); +vi.mock("node:child_process", async () => { + const { mockNodeChildProcessExecFile } = await import("openclaw/plugin-sdk/test-node-mocks"); + return mockNodeChildProcessExecFile( + Object.assign(execFileMock, { + [Symbol.for("nodejs.util.promisify.custom")]: vi.fn(), + __promisify__: vi.fn(), + }) as typeof import("node:child_process").execFile, + () => vi.importActual("node:child_process"), + ); +}); const originalVitest = process.env.VITEST; const originalNodeEnv = process.env.NODE_ENV; diff --git a/src/infra/os-summary.test.ts b/src/infra/os-summary.test.ts index b47a7a8ff106..151110fcebbe 100644 --- a/src/infra/os-summary.test.ts +++ b/src/infra/os-summary.test.ts @@ -3,9 +3,12 @@ import { afterEach, describe, expect, it, vi } from "vitest"; const spawnSyncMock = vi.hoisted(() => vi.fn()); -vi.mock("node:child_process", () => ({ - spawnSync: spawnSyncMock, -})); +vi.mock("node:child_process", async () => { + const { mockNodeChildProcessSpawnSync } = await import("openclaw/plugin-sdk/test-node-mocks"); + return mockNodeChildProcessSpawnSync(spawnSyncMock, () => + vi.importActual("node:child_process"), + ); +}); import { resolveOsSummary } from "./os-summary.js"; diff --git a/src/infra/restart.test.ts b/src/infra/restart.test.ts index 28aeb4416ef3..f68419a11b35 100644 --- a/src/infra/restart.test.ts +++ b/src/infra/restart.test.ts @@ -6,15 +6,22 @@ const spawnSyncMock = vi.hoisted(() => vi.fn()); const execFileMock = vi.hoisted(() => Object.assign(vi.fn(), { [Symbol.for("nodejs.util.promisify.custom")]: vi.fn(), + __promisify__: vi.fn(), }), ); const resolveLsofCommandSyncMock = vi.hoisted(() => vi.fn()); const resolveGatewayPortMock = vi.hoisted(() => vi.fn()); -vi.mock("node:child_process", () => ({ - execFile: execFileMock, - spawnSync: (...args: unknown[]) => spawnSyncMock(...args), -})); +vi.mock("node:child_process", async () => { + const { mockNodeBuiltinModule } = await import("openclaw/plugin-sdk/test-node-mocks"); + return mockNodeBuiltinModule( + () => vi.importActual("node:child_process"), + { + execFile: execFileMock, + spawnSync: (...args: unknown[]) => spawnSyncMock(...args), + } as Partial, + ); +}); vi.mock("./ports-lsof.js", () => ({ resolveLsofCommandSync: (...args: unknown[]) => resolveLsofCommandSyncMock(...args), diff --git a/src/plugin-sdk/test-helpers/node-builtin-mocks.ts b/src/plugin-sdk/test-helpers/node-builtin-mocks.ts index 6ac66cb211af..83670cc743a7 100644 --- a/src/plugin-sdk/test-helpers/node-builtin-mocks.ts +++ b/src/plugin-sdk/test-helpers/node-builtin-mocks.ts @@ -54,16 +54,18 @@ export async function mockNodeBuiltinModule( export async function mockNodeChildProcessSpawnSync( spawnSync: (...args: unknown[]) => unknown, + loadActual: () => Promise = loadChildProcessModule, ): Promise { - return mockNodeBuiltinModule(loadChildProcessModule, { + return mockNodeBuiltinModule(loadActual, { spawnSync: (...args: unknown[]) => spawnSync(...args), } as Partial); } export async function mockNodeChildProcessExecFile( execFile: typeof import("node:child_process").execFile, + loadActual: () => Promise = loadChildProcessModule, ): Promise { - return mockNodeBuiltinModule(loadChildProcessModule, { + return mockNodeBuiltinModule(loadActual, { execFile, } as Partial); } diff --git a/src/plugins/registry.ts b/src/plugins/registry.ts index 5b27cf6d4a1f..30d717022f4b 100644 --- a/src/plugins/registry.ts +++ b/src/plugins/registry.ts @@ -1,4 +1,5 @@ import path from "node:path"; +import { clearCodeModeNamespacesForPlugin } from "../agents/code-mode-namespaces.js"; import { getRegisteredAgentHarness, registerAgentHarness as registerGlobalAgentHarness, @@ -3173,6 +3174,7 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) { clearPluginCommandsForPlugin(pluginId); clearPluginInteractiveHandlersForPlugin(pluginId); + clearCodeModeNamespacesForPlugin(pluginId); clearContextEnginesForOwner(`plugin:${pluginId}`); const hookRollbackEntries = pluginHookRollback.get(pluginId) ?? []; diff --git a/src/realtime-transcription/websocket-session.test.ts b/src/realtime-transcription/websocket-session.test.ts index ec0e8cb80f6f..12a0516f7ca8 100644 --- a/src/realtime-transcription/websocket-session.test.ts +++ b/src/realtime-transcription/websocket-session.test.ts @@ -26,7 +26,7 @@ async function createRealtimeServer(params?: { onText?: (payload: unknown) => void; }) { const server = createServer(); - const wss = new WebSocketServer({ noServer: true }); + const wss = new WebSocketServer({ noServer: true, maxPayload: 1024 * 1024 }); const clients = new Set(); server.on("upgrade", (request, socket, head) => { diff --git a/test/scripts/ci-workflow-guards.test.ts b/test/scripts/ci-workflow-guards.test.ts index af7e7e4fd2f3..818d8a6a902e 100644 --- a/test/scripts/ci-workflow-guards.test.ts +++ b/test/scripts/ci-workflow-guards.test.ts @@ -100,7 +100,7 @@ describe("ci workflow guards", () => { expect(workflow).not.toContain("$fetchInfo.RedirectStandardOutput = $true"); expect(workflow).not.toContain("$fetchInfo.RedirectStandardError = $true"); expect(workflow).toContain( - '--no-tags --no-progress --prune --no-recurse-submodules --depth=50', + "--no-tags --no-progress --prune --no-recurse-submodules --depth=50", ); expect(workflow).toContain("$fetch = New-Object System.Diagnostics.Process"); expect(workflow).toContain("$fetch.StartInfo = $fetchInfo"); @@ -137,6 +137,16 @@ describe("ci workflow guards", () => { expect(buildArtifactSteps.some((step) => step.run === "pnpm ui:build")).toBe(false); }); + it("gives quiet Node test shards enough no-output runway", () => { + const workflow = readCiWorkflow(); + const nodeTestJob = workflow.jobs["checks-node-core-test-nondist-shard"]; + const runStep = nodeTestJob.steps.find((step) => step.name === "Run Node test shard"); + + expect(nodeTestJob["timeout-minutes"]).toBe(60); + expect(runStep.env.OPENCLAW_VITEST_NO_OUTPUT_TIMEOUT_MS).toBe("900000"); + expect(runStep.env.OPENCLAW_TEST_PROJECTS_PARALLEL).toBe("2"); + }); + it("uploads a CI timing summary after the run lanes finish", () => { const workflow = readCiWorkflow(); const timingJob = workflow.jobs["ci-timings-summary"];