diff --git a/.github/labeler.yml b/.github/labeler.yml index b79dc8727280..18728eebc36b 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -405,6 +405,11 @@ - "extensions/codex-supervisor/**" - "docs/plugins/reference/codex-supervisor.md" - "docs/specs/claw-supervisor.md" +"extensions: copilot": + - changed-files: + - any-glob-to-any-file: + - "extensions/copilot/**" + - "docs/plugins/copilot.md" "extensions: kimi-coding": - changed-files: - any-glob-to-any-file: diff --git a/.gitignore b/.gitignore index f49af8bf19a6..31d484957550 100644 --- a/.gitignore +++ b/.gitignore @@ -178,6 +178,7 @@ mantis/ /local/ /client_secret_*.json package-lock.json +!src/commands/copilot-sdk-install-manifest/package-lock.json .claude/ .agent/ skills-lock.json diff --git a/docs/.generated/plugin-sdk-api-baseline.sha256 b/docs/.generated/plugin-sdk-api-baseline.sha256 index b4380f3ab5fd..ebc13d8cbd0e 100644 --- a/docs/.generated/plugin-sdk-api-baseline.sha256 +++ b/docs/.generated/plugin-sdk-api-baseline.sha256 @@ -1,2 +1,2 @@ -91cb45dc1e8aaa3dac9a2c1d3c98c8ff22112e41c305de17f30d0d4420635ee4 plugin-sdk-api-baseline.json -3aa4802ffcb68c4f15e367030994eae10e73b55b5f14c8e23d4e9467fae325fe plugin-sdk-api-baseline.jsonl +28bbd7e0a05747ef3d17ae25e6dac5002d6cc9ad3256f1c4e58ee8e45014e397 plugin-sdk-api-baseline.json +d1d3fe6599e6cbc64f069737d08099d6b2586bc2b9d8d2ddb00d9f6e35c87cc7 plugin-sdk-api-baseline.jsonl diff --git a/docs/.i18n/glossary.zh-CN.json b/docs/.i18n/glossary.zh-CN.json index b87414f37a0f..4b1ed3af218c 100644 --- a/docs/.i18n/glossary.zh-CN.json +++ b/docs/.i18n/glossary.zh-CN.json @@ -175,6 +175,26 @@ "source": "Agent harness plugins", "target": "Agent harness plugins" }, + { + "source": "Agent harness plugins (SDK reference)", + "target": "Agent harness plugins (SDK reference)" + }, + { + "source": "Copilot SDK harness", + "target": "Copilot SDK harness" + }, + { + "source": "Copilot plugin", + "target": "Copilot plugin" + }, + { + "source": "GitHub Copilot agent runtime", + "target": "GitHub Copilot agent runtime" + }, + { + "source": "copilot", + "target": "copilot" + }, { "source": "Agent loop", "target": "Agent loop" diff --git a/docs/concepts/agent-runtimes.md b/docs/concepts/agent-runtimes.md index a80ac65772ab..7c6d06085bec 100644 --- a/docs/concepts/agent-runtimes.md +++ b/docs/concepts/agent-runtimes.md @@ -14,12 +14,12 @@ the finished turn to OpenClaw. Runtimes are easy to confuse with providers because both show up near model configuration. They are different layers: -| Layer | Examples | What it means | -| ------------- | ------------------------------------- | ------------------------------------------------------------------- | -| Provider | `openai`, `anthropic`, `openai-codex` | How OpenClaw authenticates, discovers models, and names model refs. | -| Model | `gpt-5.5`, `claude-opus-4-6` | The model selected for the agent turn. | -| Agent runtime | `openclaw`, `codex`, `claude-cli` | The low level loop or backend that executes the prepared turn. | -| Channel | Telegram, Discord, Slack, WhatsApp | Where messages enter and leave OpenClaw. | +| Layer | Examples | What it means | +| ------------- | -------------------------------------------- | ------------------------------------------------------------------- | +| Provider | `openai`, `anthropic`, `openai-codex` | How OpenClaw authenticates, discovers models, and names model refs. | +| Model | `gpt-5.5`, `claude-opus-4-6` | The model selected for the agent turn. | +| Agent runtime | `openclaw`, `codex`, `copilot`, `claude-cli` | The low level loop or backend that executes the prepared turn. | +| Channel | Telegram, Discord, Slack, WhatsApp | Where messages enter and leave OpenClaw. | You will also see the word **harness** in code. A harness is the implementation that provides an agent runtime. For example, the bundled Codex harness @@ -33,13 +33,17 @@ There are two runtime families: - **Embedded harnesses** run inside OpenClaw's prepared agent loop. Today this is the built-in `openclaw` runtime plus registered plugin harnesses such as - `codex`. + `codex` and `copilot`. - **CLI backends** run a local CLI process while keeping the model ref canonical. For example, `anthropic/claude-opus-4-7` with a model-scoped `agentRuntime.id: "claude-cli"` means "select the Anthropic model, execute through Claude CLI." `claude-cli` is not an embedded harness id and must not be passed to AgentHarness selection. +The `copilot` harness is a separate, opt-in plugin harness for the +GitHub Copilot CLI; see [GitHub Copilot agent runtime](/plugins/copilot) +for the user-facing decision between PI, Codex, and GitHub Copilot agent runtime. + ## Codex surfaces Most confusion comes from several different surfaces sharing the Codex name: @@ -201,6 +205,34 @@ If `openclaw doctor` warns that the `codex` plugin is enabled while `openai-codex/*` remains in config, treat that as legacy route state. Run `openclaw doctor --fix` to rewrite it to `openai/*` with the Codex runtime. +## GitHub Copilot agent runtime + +The bundled `copilot` extension registers an opt-in `copilot` runtime +backed by the GitHub Copilot CLI (`@github/copilot-sdk`). It claims the +canonical subscription `github-copilot` provider and is **never** selected by +`auto`. Opt in per-model or per-provider via `agentRuntime.id`: + +```json5 +{ + agents: { + defaults: { + model: "github-copilot/gpt-5.5", + models: { + "github-copilot/gpt-5.5": { + agentRuntime: { id: "copilot" }, + }, + }, + }, + }, +} +``` + +The harness claims its provider, runtime, CLI session key, and auth profile +prefix in `extensions/copilot/doctor-contract-api.ts`, which +`openclaw doctor` auto-loads. For configuration, auth, transcript mirroring, +compaction, the doctor probe surface, and the broader PI vs Codex vs Copilot +SDK decision, see [GitHub Copilot agent runtime](/plugins/copilot). + ## Compatibility contract When a runtime is not OpenClaw, it should document what OpenClaw surfaces it supports. @@ -236,6 +268,7 @@ runtime policy first. Legacy session runtime pins no longer decide routing. - [Codex harness](/plugins/codex-harness) - [Codex harness runtime](/plugins/codex-harness-runtime) +- [GitHub Copilot agent runtime](/plugins/copilot) - [OpenAI](/providers/openai) - [Agent harness plugins](/plugins/sdk-agent-harness) - [Agent loop](/concepts/agent-loop) diff --git a/docs/concepts/models.md b/docs/concepts/models.md index 81eb61afeba1..87325e76718d 100644 --- a/docs/concepts/models.md +++ b/docs/concepts/models.md @@ -23,7 +23,7 @@ sidebarTitle: "Models CLI" -Model refs choose a provider and model. They do not usually choose the low-level agent runtime. OpenAI agent refs are the main exception: `openai/gpt-5.5` runs through the Codex app-server runtime by default on the official OpenAI provider. Explicit runtime overrides belong on provider/model policy, not on the whole agent or session. In Codex runtime mode, the `openai/gpt-*` ref does not imply API-key billing; auth can come from a Codex account or `openai-codex` auth profile. See [Agent runtimes](/concepts/agent-runtimes). +Model refs choose a provider and model. They do not usually choose the low-level agent runtime. OpenAI agent refs are the main exception: `openai/gpt-5.5` runs through the Codex app-server runtime by default on the official OpenAI provider. Subscription Copilot refs (`github-copilot/*`) can additionally be opted into the bundled GitHub Copilot agent runtime — that path stays explicit (no `auto` fallback). Explicit runtime overrides belong on provider/model policy, not on the whole agent or session. In Codex runtime mode, the `openai/gpt-*` ref does not imply API-key billing; auth can come from a Codex account or `openai-codex` auth profile. See [Agent runtimes](/concepts/agent-runtimes) and [GitHub Copilot agent runtime](/plugins/copilot). ## How model selection works diff --git a/docs/plugins/copilot.md b/docs/plugins/copilot.md new file mode 100755 index 000000000000..d7bcadbacdb3 --- /dev/null +++ b/docs/plugins/copilot.md @@ -0,0 +1,374 @@ +--- +summary: "Run OpenClaw embedded agent turns through the bundled GitHub Copilot SDK harness" +title: "Copilot SDK harness" +read_when: + - You want to use the bundled GitHub Copilot SDK harness for an agent + - You need configuration examples for the `copilot` runtime + - You are wiring an agent to subscription Copilot (github / openclaw / copilot) and want it to run through the Copilot CLI +--- + +The bundled `copilot` extension lets OpenClaw run embedded subscription +Copilot agent turns through the GitHub Copilot CLI (`@github/copilot-sdk`) +instead of the built-in PI harness. + +Use the Copilot SDK harness when you want the Copilot CLI session to own the +low-level agent loop: native tool execution, native compaction +(`infiniteSessions`), and CLI-managed thread state under `copilotHome`. +OpenClaw still owns chat channels, session files, model selection, OpenClaw +dynamic tools (bridged), approvals, media delivery, the visible transcript +mirror, `/btw` side questions (handled by the in-tree PI fallback — see +[Side questions (`/btw`)](#side-questions-btw)), and `openclaw doctor`. + +For the broader model/provider/runtime split, start with +[Agent runtimes](/concepts/agent-runtimes). + +## Requirements + +- OpenClaw with the bundled `copilot` extension available. +- If your config uses `plugins.allow`, include `copilot` (the manifest + id in `extensions/copilot/openclaw.plugin.json`). A restrictive + allowlist that uses the npm-style `@openclaw/copilot` package name + will leave the bundled plugin blocked and the runtime will not load + even with `agentRuntime.id: "copilot"`. +- A GitHub Copilot subscription that can drive the Copilot CLI (or a + `gitHubToken` env / auth-profile entry for headless / cron runs). +- A writable `copilotHome` directory. The harness defaults to + `~/.openclaw/agents//copilot` for full per-agent isolation. The + platform default (`%APPDATA%\copilot` on Windows, `$XDG_CONFIG_HOME/copilot` + or `~/.config/copilot` elsewhere) is used as the doctor probe fallback when + no explicit home is set. + +`openclaw doctor` runs the bundled +[doctor contract](#doctor-and-probes) for the extension; failures there are +the canonical way to confirm the environment is ready before opting an agent +in. + +## On-demand SDK install + +The Copilot agent runtime ships its small TypeScript code bundled inside +the openclaw tarball, but the underlying `@github/copilot-sdk` package +(and its platform-specific `@github/copilot--` CLI +binary) is **not** installed by default — together they add ~260 MB to +your openclaw install footprint, and most openclaw users do not select +a Copilot model. + +The wizard offers to install the SDK the first time you select a +`github-copilot/*` model **and** your config opts the model (or its +provider) into the Copilot agent runtime via +`agentRuntime: { id: "copilot" }` (see [Quickstart](#quickstart) below). +Without the opt-in, openclaw uses its built-in GitHub Copilot provider +and never prompts for the SDK install: + +``` +The Copilot agent runtime needs @github/copilot-sdk (~260 MB on first +install, downloads the @github/copilot CLI binary for your platform). +Install now? [Y/n] +``` + +If you accept, the SDK is installed into +`~/.openclaw/npm-runtime/copilot/` and detected on subsequent runs. The +install runs `npm ci` against a checked-in `package-lock.json` shipped +with openclaw at +`src/commands/copilot-sdk-install-manifest/package-lock.json`, so the +exact transitive graph reviewed for this release lands on disk on every +user machine. + +If you decline, the runtime will fail at first invocation with an +actionable install message; re-run `openclaw setup` to retry the install +(or copy the pinned manifest into `~/.openclaw/npm-runtime/copilot/` and +run `npm ci` yourself if you need to install offline). + +The runtime resolves the SDK in this order: + +1. `import("@github/copilot-sdk")` against the host openclaw install + (covers source/dev checkouts and any environment that pre-installs + the SDK alongside openclaw). +2. The well-known fallback dir `~/.openclaw/npm-runtime/copilot/` (the + wizard install target). + +A missing SDK surfaces a single error with code `COPILOT_SDK_MISSING` +and the manual install command above. + +## Quickstart + +Pin one model (or one provider) to the harness: + +```json5 +{ + agents: { + defaults: { + model: "github-copilot/gpt-5.5", + models: { + "github-copilot/gpt-5.5": { + agentRuntime: { id: "copilot" }, + }, + }, + }, + }, +} +``` + +Both routes are equivalent. Use `agentRuntime.id` on a single model entry +when only that model should be routed through the harness; set +`agentRuntime.id` on a provider when every model under that provider should +use it. + +## Supported providers + +The harness advertises support for the canonical `github-copilot` provider +(the same id owned by `extensions/github-copilot`): + +- `github-copilot` + +Anything outside that set falls through `selection.ts`'s `auto_pi` branch back +to PI. + +## Auth + +Per-agent precedence, applied during `runCopilotAttempt`: + +1. **Explicit `useLoggedInUser: true`** on the attempt input. Uses the Copilot + CLI's logged-in user resolved under the agent's `copilotHome`. +2. **Explicit `gitHubToken`** on the attempt input (with `profileId` + + `profileVersion`). Useful for direct CLI invocations and tests where the + caller wants to bypass auth-profile resolution. +3. **Contract-resolved `resolvedApiKey` + `authProfileId`** from the + `EmbeddedRunAttemptParams` shape. This is the **production main path**: + core resolves the agent's configured `github-copilot` auth profile + (via `src/infra/provider-usage.auth.ts:resolveProviderAuths`) before + invoking the harness, and the harness consumes both fields directly. + This makes a `github-copilot:` auth profile work end-to-end + for headless / cron / multi-profile setups without env vars. +4. **Env-var fallback** for direct CLI / dogfood runs where no auth + profile is configured. The runtime checks the following vars in + precedence order, mirroring the shipped `github-copilot` provider + (`extensions/github-copilot/auth.ts`) and the documented Copilot SDK + setup: + 1. `OPENCLAW_GITHUB_TOKEN` -- harness-specific override; set this + to pin a token for the OpenClaw harness without disturbing + system-wide `gh` / Copilot CLI config. + 2. `COPILOT_GITHUB_TOKEN` -- standard Copilot SDK / CLI env var. + 3. `GH_TOKEN` -- standard `gh` CLI env var (matches the existing + `github-copilot` provider precedence). + 4. `GITHUB_TOKEN` -- generic GitHub token fallback. + + The first non-empty value wins; empty strings are treated as + absent. The synthesised pool profile id is `env:` and the + profileVersion is a non-reversible sha256 fingerprint of the + token, so rotating the env value cleanly busts the client pool. + +5. **Default `useLoggedInUser`** when no token signal is available. + +Each agent gets a dedicated `copilotHome` so Copilot CLI tokens, sessions, and +config do not leak between agents on the same machine. The default is +`/copilot` when the host hands the harness an agent directory +(isolating SDK state from OpenClaw's `models.json` / `auth-profiles.json` in +the same directory), or `~/.openclaw/agents//copilot` otherwise. +Override with `copilotHome: ` on the attempt input when you need a +custom location (for example, a shared mount for migration). + +`probeCopilotAuthShape` (see [Doctor and probes](#doctor-and-probes)) is the +pure shape check that validates which of the modes above will be used. +It does not perform a live SDK handshake. + +## Configuration surface + +The harness reads its config from per-attempt input +(`runCopilotAttempt({...})`) plus a small set of env defaults inside +`extensions/copilot/src/`: + +- `copilotHome` — per-agent CLI state directory (defaults documented above). +- `model` — string or `{ provider, id, api? }`. When omitted, OpenClaw uses + the agent's normal model selection and the harness verifies the resolved + provider is in the supported set. +- `reasoningEffort` — `"low" | "medium" | "high" | "xhigh"`. Maps from + OpenClaw's `ThinkLevel` / `ReasoningLevel` resolution in + `auto-reply/thinking.ts`. +- `infiniteSessionConfig` — optional override for the SDK + `infiniteSessions` block driven by `harness.compact`. Defaults are safe to + leave as-is. +- `hooksConfig` — optional bridge config exposing OpenClaw + before/after-message-write hooks to the SDK loop. +- `permissionPolicy` — optional override for the SDK's + `onPermissionRequest` handler used for built-in SDK tool kinds + (`shell`, `write`, `read`, `url`, `mcp`, `memory`, `hook`). Defaults + to `rejectAllPolicy` as a safety net; in practice the SDK never + invokes any of those kinds because every bridged OpenClaw tool is + registered with `overridesBuiltInTool: true` and + `skipPermission: true` so 100% of tool calls flow through OpenClaw's + wrapped `execute()`. See [Permissions and ask_user](#permissions-and-ask_user). +- `enableSessionTelemetry` — opt-in OpenTelemetry routing via + `telemetry-bridge.ts`. + +Nothing in the rest of OpenClaw needs to know about these fields. Other +plugins, channels, and core code only see the standard +`AgentHarnessAttemptParams` / `AgentHarnessAttemptResult` shape. + +## Compaction + +When `harness.compact` runs, the Copilot SDK harness: + +1. Enables `infiniteSessions` on the SDK session. +2. Lets the SDK perform its native compaction. +3. Writes an OpenClaw-shaped marker at + `workspacePath/files/openclaw-compaction-.json` so existing OpenClaw + transcript readers still see a familiar artifact. + +The OpenClaw side transcript mirror (see below) continues to receive the +post-compaction messages, so user-facing chat history stays consistent. + +## Transcript mirroring + +`runCopilotAttempt` dual-writes each turn's mirrorable messages into the +OpenClaw audit transcript via +`extensions/copilot/src/dual-write-transcripts.ts`. The mirror is +per-session scoped (`copilot:${sessionId}`) and uses a per-message +identity (`${role}:${sha256_16(role,content)}`) so re-emits of prior-turn +entries collide with existing on-disk keys and do not duplicate. + +The mirror is wrapped in two layers of failure containment so a transcript +write failure cannot fail the attempt: an internal best-effort wrapper and a +defense-in-depth `.catch(...)` at the attempt level. Failures are logged but +not surfaced. + +## Side questions (`/btw`) + +`/btw` is **not** native on this harness. `createCopilotAgentHarness()` +deliberately leaves `harness.runSideQuestion` undefined, so OpenClaw's `/btw` +dispatcher (`src/agents/btw.ts`) falls through to the same in-tree PI fallback +path it uses for every non-Codex runtime: the configured model provider is +called directly with a short side-question prompt and streamed back via +`streamSimple` (no CLI session, no extra pool slot). + +This keeps Copilot CLI sessions reserved for the agent's main turn loop, and +keeps `/btw` behavior identical to other PI-backed runtimes. The contract is +asserted in +[`extensions/copilot/harness.test.ts`](https://github.com/openclaw/openclaw/blob/main/extensions/copilot/harness.test.ts) +under `describe("runSideQuestion")`. + +## Doctor and probes + +`extensions/copilot/doctor-contract-api.ts` is auto-loaded by +`src/plugins/doctor-contract-registry.ts`. It contributes: + +- An empty `legacyConfigRules` (no retired fields at MVP). +- A no-op `normalizeCompatibilityConfig` (kept so future field retirements + have a stable in-tree home). +- One `sessionRouteStateOwners` entry claiming provider `github-copilot`; + runtime `copilot`; CLI session key `copilot`; auth profile + prefix `github-copilot:`. + +`extensions/copilot/src/doctor-probes.ts` exports three imperative probes +that hosts (including `openclaw doctor`) can call to verify the environment: + +| Probe | What it checks | Reasons it can fail | +| -------------------------- | --------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | +| `probeCopilotCliVersion` | `copilot --version` exits 0 with a non-empty version string | `non-zero-exit`, `empty-version`, `spawn-failed`, `spawn-error`, `probe-timeout` | +| `probeCopilotHomeWritable` | `mkdir -p copilotHome` + write + rm a marker file | `copilothome-not-writable` (with the underlying fs error in `details.rawError`) | +| `probeCopilotAuthShape` | At least one of `useLoggedInUser`, `gitHubToken`, or `profileId`+`profileVersion` | `no-auth-source` | + +Each probe accepts a DI seam (`spawnFn`, `fsApi`) so tests do not spawn the +real Copilot CLI or touch the host fs. + +## Limitations + +- The harness only claims the canonical `github-copilot` provider at MVP. + Additional providers (BYOK or otherwise) should land in follow-up PRs that + ship the adapter alongside the wire-up. +- The harness does not deliver TUI; PI's TUI is unaffected and remains the + fallback for whatever runtimes do not have a peer surface. +- PI session state is not migrated when an agent switches to `copilot`. + Selection is per attempt; existing PI sessions remain valid. +- **Interactive `ask_user` is not yet wired.** The SDK's + `onUserInputRequest` handler is intentionally not registered, which + per the SDK contract hides the `ask_user` tool from the model + entirely. Agents running under this harness make best-judgment + decisions from the initial prompt rather than asking clarifying + questions mid-turn. A follow-up will port the codex pattern at + `extensions/codex/src/app-server/user-input-bridge.ts` to route SDK + `UserInputRequest`s through the OpenClaw channel/TUI prompt path; the + dormant scaffolding in `extensions/copilot/src/user-input-bridge.ts` + is the surface that follow-up will wire. + +## Permissions and ask_user + +Permission enforcement for bridged OpenClaw tools happens **inside the +tool wrapper**, not via the SDK's `onPermissionRequest` callback. The +same `wrapToolWithBeforeToolCallHook` that PI uses +(`src/agents/pi-tools.before-tool-call.ts`) is applied by +`createOpenClawCodingTools` to every coding tool: loop detection, +trusted plugin policies, before-tool-call hooks, and two-phase plugin +approvals via the gateway (`plugin.approval.request`) all run with the +exact same code path as native PI attempts. + +To let that wrapper own the decision, the SDK Tool returned by +`convertOpenClawToolToSdkTool` is marked with: + +- `overridesBuiltInTool: true` — replaces the Copilot CLI's built-in + tool of the same name (edit, read, write, bash, …) so every tool + invocation routes back to OpenClaw. +- `skipPermission: true` — tells the SDK not to fire + `onPermissionRequest({kind: "custom-tool"})` before invoking the tool. + The wrapped `execute()` performs the richer OpenClaw policy check + internally; an SDK-level prompt would either short-circuit OpenClaw's + enforcement (if we allow-all) or block every tool call (if we + reject-all) — neither matches PI parity. + +The in-tree codex harness uses the same split: bridged OpenClaw tools +are wrapped (`extensions/codex/src/app-server/dynamic-tools.ts`) and +the codex-app-server's _own_ native approval kinds +(`item/commandExecution/requestApproval`, +`item/fileChange/requestApproval`, +`item/permissions/requestApproval`) are routed through +`plugin.approval.request` +(`extensions/codex/src/app-server/approval-bridge.ts`). The Copilot SDK +equivalent — fail-closed `rejectAllPolicy` for any non-`custom-tool` +kind that ever reaches `onPermissionRequest` — is the same safety net, +and it does not fire in practice because `overridesBuiltInTool: true` +displaces every built-in. + +For the wrapped-tool layer to make policy decisions equivalent to PI, +the harness forwards the full PI attempt-tool context to +`createOpenClawCodingTools` — identity (`senderIsOwner`, +`memberRoleIds`, `ownerOnlyToolAllowlist`, …), channel/routing +(`groupId`, `currentChannelId`, `replyToMode`, message-tool toggles), +auth (`authProfileStore`), run identity +(`sessionKey`/`runSessionKey` derived from `sandboxSessionKey`, +`runId`), model context (`modelApi`, `modelContextWindowTokens`, +`modelCompat`, `modelHasVision`), and run hooks (`onToolOutcome`, +`onYield`). Without those fields, owner-only allowlists silently +behave as deny-by-default, plugin-trust policies cannot resolve to the +right scope, and `session_status: "current"` resolves to a stale +sandbox key. The bridge builder is in +`extensions/copilot/src/tool-bridge.ts` and mirrors the PI +authoritative call at +`src/agents/pi-embedded-runner/run/attempt.ts:1029-1117`. Two PI fields +are intentionally **not** forwarded at MVP and tracked as follow-ups: +`sandbox` (the harness does not yet route through `resolveSandboxContext`) +and the PI tool-search/code-mode machinery +(`toolSearchCatalogRef`, `includeCoreTools`, +`includeToolSearchControls`, `toolSearchCatalogExecutor`, +`toolConstructionPlan`), which has no analog at the SDK boundary. + +### Session-level GitHub token + +The Copilot SDK contract distinguishes the **client-level** GitHub +token (`CopilotClientOptions.gitHubToken`, used to authenticate the +CLI process itself) from the **session-level** token +(`SessionConfig.gitHubToken`, which determines content exclusion, +model routing, and quota for that session and is honored on both +`createSession` and `resumeSession`). The harness resolves auth once +via `resolveCopilotAuth` and sets both fields when the auth mode is +`gitHubToken` (an explicit `auth.gitHubToken` or a contract-resolved +`resolvedApiKey` from a configured `github-copilot` auth profile). +When the resolved mode is `useLoggedInUser`, the session-level field +is omitted so the SDK keeps deriving identity from the logged-in +identity. + +`ask_user` is intentionally hidden — see Limitations above. + +## Related + +- [Agent runtimes](/concepts/agent-runtimes) +- [Codex harness](/plugins/codex-harness) +- [Agent harness plugins (SDK reference)](/plugins/sdk-agent-harness) diff --git a/docs/plugins/plugin-inventory.md b/docs/plugins/plugin-inventory.md index 9317884db549..88b6497c9ef5 100644 --- a/docs/plugins/plugin-inventory.md +++ b/docs/plugins/plugin-inventory.md @@ -66,6 +66,7 @@ commands. | [cloudflare-ai-gateway](/plugins/reference/cloudflare-ai-gateway) | Adds Cloudflare AI Gateway model provider support to OpenClaw. | `@openclaw/cloudflare-ai-gateway-provider`
included in OpenClaw | providers: cloudflare-ai-gateway | | [codex-supervisor](/plugins/reference/codex-supervisor) | Supervise Codex app-server sessions from OpenClaw. | `@openclaw/codex-supervisor`
included in OpenClaw | contracts: tools | | [comfy](/plugins/reference/comfy) | Adds ComfyUI model provider support to OpenClaw. | `@openclaw/comfy-provider`
included in OpenClaw | providers: comfy; contracts: imageGenerationProviders, musicGenerationProviders, videoGenerationProviders | +| [copilot](/plugins/reference/copilot) | Registers the GitHub Copilot agent runtime. | `@openclaw/copilot`
included in OpenClaw | plugin | | [copilot-proxy](/plugins/reference/copilot-proxy) | Adds Copilot Proxy model provider support to OpenClaw. | `@openclaw/copilot-proxy`
included in OpenClaw | providers: copilot-proxy | | [deepgram](/plugins/reference/deepgram) | Adds media understanding provider support. Adds realtime transcription provider support. | `@openclaw/deepgram-provider`
included in OpenClaw | contracts: mediaUnderstandingProviders, realtimeTranscriptionProviders | | [deepinfra](/plugins/reference/deepinfra) | Adds DeepInfra model provider support to OpenClaw. | `@openclaw/deepinfra-provider`
included in OpenClaw | providers: deepinfra; contracts: imageGenerationProviders, mediaUnderstandingProviders, memoryEmbeddingProviders, speechProviders, videoGenerationProviders | diff --git a/docs/plugins/reference.md b/docs/plugins/reference.md index 20c2cc058c2d..b1f2e9234d77 100644 --- a/docs/plugins/reference.md +++ b/docs/plugins/reference.md @@ -38,6 +38,7 @@ pnpm plugins:inventory:gen | [codex](/plugins/reference/codex) | OpenClaw Codex app-server harness and model provider plugin with a Codex-managed GPT catalog. | `@openclaw/codex`
npm; ClawHub | providers: codex; contracts: mediaUnderstandingProviders, migrationProviders | | [codex-supervisor](/plugins/reference/codex-supervisor) | Supervise Codex app-server sessions from OpenClaw. | `@openclaw/codex-supervisor`
included in OpenClaw | contracts: tools | | [comfy](/plugins/reference/comfy) | Adds ComfyUI model provider support to OpenClaw. | `@openclaw/comfy-provider`
included in OpenClaw | providers: comfy; contracts: imageGenerationProviders, musicGenerationProviders, videoGenerationProviders | +| [copilot](/plugins/reference/copilot) | Registers the GitHub Copilot agent runtime. | `@openclaw/copilot`
included in OpenClaw | plugin | | [copilot-proxy](/plugins/reference/copilot-proxy) | Adds Copilot Proxy model provider support to OpenClaw. | `@openclaw/copilot-proxy`
included in OpenClaw | providers: copilot-proxy | | [deepgram](/plugins/reference/deepgram) | Adds media understanding provider support. Adds realtime transcription provider support. | `@openclaw/deepgram-provider`
included in OpenClaw | contracts: mediaUnderstandingProviders, realtimeTranscriptionProviders | | [deepinfra](/plugins/reference/deepinfra) | Adds DeepInfra model provider support to OpenClaw. | `@openclaw/deepinfra-provider`
included in OpenClaw | providers: deepinfra; contracts: imageGenerationProviders, mediaUnderstandingProviders, memoryEmbeddingProviders, speechProviders, videoGenerationProviders | diff --git a/docs/plugins/reference/copilot.md b/docs/plugins/reference/copilot.md new file mode 100644 index 000000000000..55d803339d37 --- /dev/null +++ b/docs/plugins/reference/copilot.md @@ -0,0 +1,23 @@ +--- +summary: "Registers the GitHub Copilot agent runtime." +read_when: + - You are installing, configuring, or auditing the copilot plugin +title: "Copilot plugin" +--- + +# Copilot plugin + +Registers the GitHub Copilot agent runtime. + +## Distribution + +- Package: `@openclaw/copilot` +- Install route: included in OpenClaw + +## Surface + +plugin + +## Related docs + +- [copilot](/plugins/copilot) diff --git a/extensions/copilot/README.md b/extensions/copilot/README.md new file mode 100644 index 000000000000..abc56df052b0 --- /dev/null +++ b/extensions/copilot/README.md @@ -0,0 +1,15 @@ +# GitHub Copilot agent runtime (OpenClaw plugin) + +Bundled OpenClaw plugin that registers a `copilot` agent harness backed +by `@github/copilot-sdk` and the GitHub Copilot CLI. + +The harness claims the canonical subscription `github-copilot` provider and +is opt-in only — selection requires explicit `agentRuntime.id: "copilot"` +on a model or provider entry; `auto` never picks it. PI remains the default +embedded runtime. + +See [GitHub Copilot agent runtime](../../docs/plugins/copilot.md) for +configuration, doctor probes, transcript mirroring, compaction, side +questions, replay, and the supported-surface contract. +See [qa/copilot-capabilities.md](../../qa/copilot-capabilities.md) +for the SDK capability inventory the harness is pinned to. diff --git a/extensions/copilot/doctor-contract-api.test.ts b/extensions/copilot/doctor-contract-api.test.ts new file mode 100755 index 000000000000..a629b67d2a6a --- /dev/null +++ b/extensions/copilot/doctor-contract-api.test.ts @@ -0,0 +1,42 @@ +import { describe, expect, it } from "vitest"; +import { + legacyConfigRules, + normalizeCompatibilityConfig, + sessionRouteStateOwners, +} from "./doctor-contract-api.js"; + +describe("copilot doctor contract", () => { + it("has no legacy config rules at MVP (no retired fields exist yet)", () => { + expect(legacyConfigRules).toEqual([]); + }); + + it("normalizeCompatibilityConfig is a structural no-op when no migrations apply", () => { + const cfg = { + plugins: { + entries: { copilot: { enabled: true, config: { pool: { idleTtlMs: 12345 } } } }, + }, + } as unknown as Parameters[0]["cfg"]; + const result = normalizeCompatibilityConfig({ cfg }); + expect(result.config).toBe(cfg); + expect(result.changes).toEqual([]); + }); + + it("declares exactly one session route state owner for copilot", () => { + expect(sessionRouteStateOwners).toHaveLength(1); + const owner = sessionRouteStateOwners[0]; + expect(owner.id).toBe("copilot"); + expect(owner.label).toBe("GitHub Copilot agent runtime"); + }); + + it("claims the subscription Copilot providers (matches attempt.ts SUPPORTED_PROVIDERS)", () => { + const owner = sessionRouteStateOwners[0]; + expect(owner.providerIds).toEqual(["github-copilot"]); + }); + + it("claims the copilot runtime, session key, and auth profile prefix", () => { + const owner = sessionRouteStateOwners[0]; + expect(owner.runtimeIds).toEqual(["copilot"]); + expect(owner.cliSessionKeys).toEqual(["copilot"]); + expect(owner.authProfilePrefixes).toEqual(["github-copilot:"]); + }); +}); diff --git a/extensions/copilot/doctor-contract-api.ts b/extensions/copilot/doctor-contract-api.ts new file mode 100755 index 000000000000..ddf6a1d67a71 --- /dev/null +++ b/extensions/copilot/doctor-contract-api.ts @@ -0,0 +1,63 @@ +/** + * Doctor contract for the copilot extension. + * + * Mirrors {@link ../codex/doctor-contract-api.ts} so `openclaw doctor` + * can: + * - Reason about which session-state belongs to this extension + * (sessionRouteStateOwners) for cleanup of stale state across + * runtime swaps. + * - Detect retired config fields and migrate them + * (legacyConfigRules + normalizeCompatibilityConfig). No retired + * fields exist for copilot yet; the array is empty by design + * and normalizeCompatibilityConfig is a structural no-op so + * future retirements have a stable in-tree home. + * + * The deeper runtime probes (copilot CLI version, copilot auth, + * copilotHome writability) live in {@link ./src/doctor-probes.ts} + * because they have side effects (subprocess spawn, fs touch) and + * need to be invoked imperatively, not declaratively, from the + * doctor command. They are exported separately so callers can opt + * in. Auto-discovery of doctor-contract-api.ts at the plugin root + * keeps this file purely declarative. + */ + +import type { OpenClawConfig } from "openclaw/plugin-sdk/config-contracts"; +import type { DoctorSessionRouteStateOwner } from "openclaw/plugin-sdk/runtime-doctor"; + +type LegacyConfigRule = { + path: string[]; + message: string; + match: (value: unknown) => boolean; +}; + +export const legacyConfigRules: LegacyConfigRule[] = []; + +export function normalizeCompatibilityConfig({ cfg }: { cfg: OpenClawConfig }): { + config: OpenClawConfig; + changes: string[]; +} { + return { config: cfg, changes: [] }; +} + +/** + * Session-state ownership claim for the copilot agent runtime. + * + * - id / label: Identify the extension in doctor output. + * - providerIds: The subscription Copilot providers (kept in sync + * with `SUPPORTED_PROVIDERS` in attempt.ts). + * - runtimeIds: Our harness id (matches harness.ts `id` field). + * - cliSessionKeys: Session keys this harness writes; doctor uses + * this when pruning stale CLI session state. + * - authProfilePrefixes: Conventional prefix for any auth profile + * created/consumed by this extension. + */ +export const sessionRouteStateOwners: DoctorSessionRouteStateOwner[] = [ + { + id: "copilot", + label: "GitHub Copilot agent runtime", + providerIds: ["github-copilot"], + runtimeIds: ["copilot"], + cliSessionKeys: ["copilot"], + authProfilePrefixes: ["github-copilot:"], + }, +]; diff --git a/extensions/copilot/harness.test.ts b/extensions/copilot/harness.test.ts new file mode 100644 index 000000000000..c41c222c1567 --- /dev/null +++ b/extensions/copilot/harness.test.ts @@ -0,0 +1,879 @@ +import { mkdtemp, readdir, readFile, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { beforeEach, describe, expect, it, vi } from "vitest"; +import type { CopilotClientPool } from "./harness.js"; +import { createCopilotAgentHarness } from "./harness.js"; + +const mocks = vi.hoisted(() => ({ + runCopilotAttempt: vi.fn(), + createCopilotClientPool: vi.fn(), +})); + +vi.mock("./src/attempt.js", () => ({ + runCopilotAttempt: mocks.runCopilotAttempt, +})); + +vi.mock("./src/runtime.js", () => ({ + createCopilotClientPool: mocks.createCopilotClientPool, +})); + +const ATTEMPT_PARAMS = { provider: "github-copilot", model: "gpt-4.1" } as any; +const ATTEMPT_RESULT = { ok: true } as any; + +function makePoolMock(): CopilotClientPool { + return { + acquire: vi.fn(), + release: vi.fn(), + dispose: vi.fn().mockResolvedValue([]), + size: vi.fn().mockReturnValue(0), + }; +} + +function createDeferred() { + let resolve!: (value: T | PromiseLike) => void; + let reject!: (reason?: unknown) => void; + const promise = new Promise((resolvePromise, rejectPromise) => { + resolve = resolvePromise; + reject = rejectPromise; + }); + return { promise, resolve, reject }; +} + +async function flushAsyncWork() { + await new Promise((resolve) => setTimeout(resolve, 0)); +} + +describe("createCopilotAgentHarness", () => { + beforeEach(() => { + mocks.runCopilotAttempt.mockReset(); + mocks.createCopilotClientPool.mockReset(); + mocks.runCopilotAttempt.mockResolvedValue(ATTEMPT_RESULT); + mocks.createCopilotClientPool.mockImplementation(() => makePoolMock()); + }); + + it("returns the copilot id and default label", () => { + const harness = createCopilotAgentHarness(); + + expect(harness.id).toBe("copilot"); + expect(harness.label).toBe("GitHub Copilot agent runtime"); + }); + + it("accepts custom id and label from options", () => { + const harness = createCopilotAgentHarness({ id: "sdk", label: "SDK Harness" }); + + expect(harness.id).toBe("sdk"); + expect(harness.label).toBe("SDK Harness"); + }); + + it("supports returns false in auto runtime even for github provider", () => { + const harness = createCopilotAgentHarness(); + + expect( + harness.supports({ + provider: "github-copilot", + modelId: "gpt-4.1", + requestedRuntime: "auto", + }), + ).toEqual({ + supported: false, + reason: "copilot is opt-in only", + }); + }); + + it("supports returns false in pi runtime", () => { + const harness = createCopilotAgentHarness(); + + expect( + harness.supports({ provider: "github-copilot", modelId: "gpt-4.1", requestedRuntime: "pi" }), + ).toEqual({ + supported: false, + reason: "copilot is opt-in only", + }); + }); + + it("supports returns true for requestedRuntime copilot with github-copilot provider", () => { + const harness = createCopilotAgentHarness(); + + expect( + harness.supports({ + provider: "github-copilot", + modelId: "gpt-4.1", + requestedRuntime: "copilot", + }), + ).toEqual({ supported: true, priority: 100 }); + }); + + it("supports normalizes provider casing and whitespace", () => { + const harness = createCopilotAgentHarness(); + + expect( + harness.supports({ + provider: " GitHub-Copilot ", + modelId: "gpt-4.1", + requestedRuntime: "copilot", + }), + ).toEqual({ supported: true, priority: 100 }); + }); + + it("supports normalizes requestedRuntime casing", () => { + const harness = createCopilotAgentHarness(); + + expect( + harness.supports({ + provider: "github-copilot", + modelId: "gpt-4.1", + requestedRuntime: " COPILOT " as any, + }), + ).toEqual({ supported: true, priority: 100 }); + }); + + it("supports rejects providers outside the whitelist", () => { + const harness = createCopilotAgentHarness(); + + expect( + harness.supports({ + provider: "anthropic", + modelId: "claude-sonnet-4.5", + requestedRuntime: "copilot", + }), + ).toEqual({ + supported: false, + reason: "provider is not one of: github-copilot", + }); + // Legacy aspirational ids should not be claimed by the harness. + for (const legacyId of ["github", "openclaw", "copilot"]) { + expect( + harness.supports({ + provider: legacyId, + modelId: "gpt-4.1", + requestedRuntime: "copilot", + }), + ).toEqual({ + supported: false, + reason: "provider is not one of: github-copilot", + }); + } + }); + + it("runAttempt lazy-imports attempt by waiting until invocation to create a pool", async () => { + const pool = makePoolMock(); + mocks.createCopilotClientPool.mockReturnValue(pool); + const harness = createCopilotAgentHarness(); + + expect(mocks.createCopilotClientPool).not.toHaveBeenCalled(); + expect(mocks.runCopilotAttempt).not.toHaveBeenCalled(); + + await expect(harness.runAttempt(ATTEMPT_PARAMS)).resolves.toBe(ATTEMPT_RESULT); + + expect(mocks.createCopilotClientPool).toHaveBeenCalledTimes(1); + expect(mocks.runCopilotAttempt).toHaveBeenCalledTimes(1); + }); + + it("runAttempt creates one pool lazily and reuses it across two attempts on the same harness", async () => { + const pool = makePoolMock(); + const firstResult = { attempt: 1 } as any; + const secondResult = { attempt: 2 } as any; + mocks.createCopilotClientPool.mockReturnValue(pool); + mocks.runCopilotAttempt.mockResolvedValueOnce(firstResult).mockResolvedValueOnce(secondResult); + const harness = createCopilotAgentHarness(); + + await expect(harness.runAttempt(ATTEMPT_PARAMS)).resolves.toBe(firstResult); + await expect(harness.runAttempt(ATTEMPT_PARAMS)).resolves.toBe(secondResult); + + expect(mocks.createCopilotClientPool).toHaveBeenCalledTimes(1); + expect(mocks.runCopilotAttempt).toHaveBeenNthCalledWith( + 1, + ATTEMPT_PARAMS, + expect.objectContaining({ pool }), + ); + expect(mocks.runCopilotAttempt).toHaveBeenNthCalledWith( + 2, + ATTEMPT_PARAMS, + expect.objectContaining({ pool }), + ); + }); + + it("multiple harness instances create independent pools", async () => { + const poolOne = makePoolMock(); + const poolTwo = makePoolMock(); + mocks.createCopilotClientPool.mockReturnValueOnce(poolOne).mockReturnValueOnce(poolTwo); + const firstHarness = createCopilotAgentHarness(); + const secondHarness = createCopilotAgentHarness(); + + await expect(firstHarness.runAttempt(ATTEMPT_PARAMS)).resolves.toBe(ATTEMPT_RESULT); + await expect(secondHarness.runAttempt(ATTEMPT_PARAMS)).resolves.toBe(ATTEMPT_RESULT); + + expect(mocks.createCopilotClientPool).toHaveBeenCalledTimes(2); + expect(mocks.runCopilotAttempt).toHaveBeenNthCalledWith( + 1, + ATTEMPT_PARAMS, + expect.objectContaining({ pool: poolOne }), + ); + expect(mocks.runCopilotAttempt).toHaveBeenNthCalledWith( + 2, + ATTEMPT_PARAMS, + expect.objectContaining({ pool: poolTwo }), + ); + }); + + it("runAttempt does not serialize concurrent attempts", async () => { + const pool = makePoolMock(); + const firstResult = { attempt: 1 } as any; + const secondResult = { attempt: 2 } as any; + mocks.createCopilotClientPool.mockReturnValue(pool); + mocks.runCopilotAttempt.mockResolvedValueOnce(firstResult).mockResolvedValueOnce(secondResult); + const harness = createCopilotAgentHarness(); + + await expect(harness.runAttempt(ATTEMPT_PARAMS)).resolves.toBe(firstResult); + await expect(harness.runAttempt(ATTEMPT_PARAMS)).resolves.toBe(secondResult); + + expect(mocks.createCopilotClientPool).toHaveBeenCalledTimes(1); + expect(mocks.runCopilotAttempt).toHaveBeenCalledTimes(2); + }); + + it("dispose before first runAttempt does not create a pool", async () => { + const harness = createCopilotAgentHarness(); + + await expect(harness.dispose?.()).resolves.toBeUndefined(); + + expect(mocks.createCopilotClientPool).not.toHaveBeenCalled(); + }); + + it("dispose during lazy startup prevents the attempt from creating a pool", async () => { + const harness = createCopilotAgentHarness(); + + const attemptPromise = harness.runAttempt(ATTEMPT_PARAMS); + const disposePromise = harness.dispose?.(); + + await expect(attemptPromise).rejects.toThrow( + "[copilot] harness was disposed while starting an attempt", + ); + await expect(disposePromise).resolves.toBeUndefined(); + expect(mocks.createCopilotClientPool).not.toHaveBeenCalled(); + expect(mocks.runCopilotAttempt).not.toHaveBeenCalled(); + }); + + it("dispose after pool creation calls pool.dispose once even when called twice", async () => { + const pool = makePoolMock(); + mocks.createCopilotClientPool.mockReturnValue(pool); + const harness = createCopilotAgentHarness(); + + await harness.runAttempt(ATTEMPT_PARAMS); + + const firstDispose = harness.dispose?.(); + const secondDispose = harness.dispose?.(); + + await expect(firstDispose).resolves.toBeUndefined(); + await expect(secondDispose).resolves.toBeUndefined(); + expect(pool.dispose).toHaveBeenCalledTimes(1); + }); + + it("dispose waits for in-flight runAttempt before disposing", async () => { + const pool = makePoolMock(); + const deferred = createDeferred(); + mocks.createCopilotClientPool.mockReturnValue(pool); + mocks.runCopilotAttempt.mockImplementation(() => deferred.promise); + const harness = createCopilotAgentHarness(); + + const attemptPromise = harness.runAttempt(ATTEMPT_PARAMS); + await flushAsyncWork(); + + const disposePromise = harness.dispose?.(); + let disposeSettled = false; + void disposePromise?.then(() => { + disposeSettled = true; + }); + + await flushAsyncWork(); + + expect(pool.dispose).not.toHaveBeenCalled(); + expect(disposeSettled).toBe(false); + + deferred.resolve(ATTEMPT_RESULT); + + await expect(attemptPromise).resolves.toBe(ATTEMPT_RESULT); + await expect(disposePromise).resolves.toBeUndefined(); + expect(pool.dispose).toHaveBeenCalledTimes(1); + }); + + it("runAttempt after dispose rejects without creating a new pool", async () => { + const harness = createCopilotAgentHarness(); + + await harness.dispose?.(); + + await expect(harness.runAttempt(ATTEMPT_PARAMS)).rejects.toThrow( + "[copilot] harness has been disposed; cannot start new attempts", + ); + expect(mocks.createCopilotClientPool).not.toHaveBeenCalled(); + }); + + it("dispose surfaces pool.dispose errors as AggregateError", async () => { + const pool = makePoolMock(); + const errors = [new Error("first"), new Error("second")]; + pool.dispose = vi.fn().mockResolvedValue(errors); + mocks.createCopilotClientPool.mockReturnValue(pool); + const harness = createCopilotAgentHarness(); + + await harness.runAttempt(ATTEMPT_PARAMS); + + try { + await harness.dispose?.(); + throw new Error("expected dispose to throw"); + } catch (error) { + expect(error).toBeInstanceOf(AggregateError); + expect((error as AggregateError).message).toBe("[copilot] pool disposal errors"); + expect((error as AggregateError).errors).toEqual(errors); + } + }); + + it("dispose does not dispose a caller-supplied pool", async () => { + const pool = makePoolMock(); + const harness = createCopilotAgentHarness({ pool }); + + await harness.runAttempt(ATTEMPT_PARAMS); + await expect(harness.dispose?.()).resolves.toBeUndefined(); + + expect(pool.dispose).not.toHaveBeenCalled(); + }); + + it("uses options.pool when supplied", async () => { + const pool = makePoolMock(); + const harness = createCopilotAgentHarness({ pool }); + + await expect(harness.runAttempt(ATTEMPT_PARAMS)).resolves.toBe(ATTEMPT_RESULT); + + expect(mocks.createCopilotClientPool).not.toHaveBeenCalled(); + expect(mocks.runCopilotAttempt).toHaveBeenCalledWith( + ATTEMPT_PARAMS, + expect.objectContaining({ pool }), + ); + }); + + describe("reset", () => { + it("is a no-op when params.sessionId is missing", async () => { + const pool = makePoolMock(); + const harness = createCopilotAgentHarness({ pool }); + + await expect(harness.reset?.({})).resolves.toBeUndefined(); + }); + + it("is a no-op when the session was never tracked", async () => { + const pool = makePoolMock(); + const harness = createCopilotAgentHarness({ pool }); + + await expect(harness.reset?.({ sessionId: "unknown" })).resolves.toBeUndefined(); + }); + + it("calls deleteSession on the client that created the session", async () => { + const pool = makePoolMock(); + const deleteSession = vi.fn().mockResolvedValue(undefined); + const client = { deleteSession } as any; + mocks.runCopilotAttempt.mockImplementation(async (params, deps) => { + deps.onSessionEstablished?.({ + sdkSessionId: "sdk-sess-123", + pooledClient: { key: {} as any, client }, + }); + return ATTEMPT_RESULT; + }); + const harness = createCopilotAgentHarness({ pool }); + + await harness.runAttempt({ ...ATTEMPT_PARAMS, sessionId: "oc-sess-1" }); + await harness.reset?.({ sessionId: "oc-sess-1" }); + + expect(deleteSession).toHaveBeenCalledTimes(1); + expect(deleteSession).toHaveBeenCalledWith("sdk-sess-123"); + }); + + it("does not call deleteSession when no sdkSessionId was reported", async () => { + const pool = makePoolMock(); + const deleteSession = vi.fn().mockResolvedValue(undefined); + mocks.runCopilotAttempt.mockImplementation(async (_params, _deps) => ATTEMPT_RESULT); + const harness = createCopilotAgentHarness({ pool }); + + await harness.runAttempt({ ...ATTEMPT_PARAMS, sessionId: "oc-sess-2" }); + await harness.reset?.({ sessionId: "oc-sess-2" }); + + expect(deleteSession).not.toHaveBeenCalled(); + }); + + it("swallows errors thrown by client.deleteSession", async () => { + const pool = makePoolMock(); + const deleteSession = vi.fn().mockRejectedValue(new Error("session not found")); + const client = { deleteSession } as any; + mocks.runCopilotAttempt.mockImplementation(async (params, deps) => { + deps.onSessionEstablished?.({ + sdkSessionId: "sdk-sess-err", + pooledClient: { key: {} as any, client }, + }); + return ATTEMPT_RESULT; + }); + const harness = createCopilotAgentHarness({ pool }); + + await harness.runAttempt({ ...ATTEMPT_PARAMS, sessionId: "oc-sess-3" }); + + await expect(harness.reset?.({ sessionId: "oc-sess-3" })).resolves.toBeUndefined(); + expect(deleteSession).toHaveBeenCalledTimes(1); + }); + + it("forgets the session after reset; a second reset is a no-op", async () => { + const pool = makePoolMock(); + const deleteSession = vi.fn().mockResolvedValue(undefined); + const client = { deleteSession } as any; + mocks.runCopilotAttempt.mockImplementation(async (params, deps) => { + deps.onSessionEstablished?.({ + sdkSessionId: "sdk-sess-x", + pooledClient: { key: {} as any, client }, + }); + return ATTEMPT_RESULT; + }); + const harness = createCopilotAgentHarness({ pool }); + + await harness.runAttempt({ ...ATTEMPT_PARAMS, sessionId: "oc-sess-4" }); + await harness.reset?.({ sessionId: "oc-sess-4" }); + await harness.reset?.({ sessionId: "oc-sess-4" }); + + expect(deleteSession).toHaveBeenCalledTimes(1); + }); + + it("does not invoke deleteSession for a session belonging to a different openclawSessionId", async () => { + const pool = makePoolMock(); + const deleteSession = vi.fn().mockResolvedValue(undefined); + const client = { deleteSession } as any; + mocks.runCopilotAttempt.mockImplementation(async (params, deps) => { + deps.onSessionEstablished?.({ + sdkSessionId: "sdk-sess-y", + pooledClient: { key: {} as any, client }, + }); + return ATTEMPT_RESULT; + }); + const harness = createCopilotAgentHarness({ pool }); + + await harness.runAttempt({ ...ATTEMPT_PARAMS, sessionId: "oc-A" }); + await harness.reset?.({ sessionId: "oc-B" }); + + expect(deleteSession).not.toHaveBeenCalled(); + }); + }); + + it("dispose clears tracked sessions so subsequent reset is a no-op", async () => { + const pool = makePoolMock(); + const deleteSession = vi.fn().mockResolvedValue(undefined); + const client = { deleteSession } as any; + mocks.runCopilotAttempt.mockImplementation(async (params, deps) => { + deps.onSessionEstablished?.({ + sdkSessionId: "sdk-sess-d", + pooledClient: { key: {} as any, client }, + }); + return ATTEMPT_RESULT; + }); + const harness = createCopilotAgentHarness({ pool }); + + await harness.runAttempt({ ...ATTEMPT_PARAMS, sessionId: "oc-disp" }); + await harness.dispose?.(); + await harness.reset?.({ sessionId: "oc-disp" }); + + expect(deleteSession).not.toHaveBeenCalled(); + }); + + describe("session reuse across turns (dogfood finding #4)", () => { + // These tests pin the harness's session-reuse contract: subsequent + // `runAttempt` calls within the same OpenClaw session should pass + // the tracked `sdkSessionId` to the attempt via `initialReplayState` + // so the SDK can `resumeSession` and keep its prompt cache + thread + // history warm. Compatibility-fingerprint mismatch (provider/model/ + // cwd/auth) starts a fresh SDK session instead, and any caller- + // provided `replayInvalid: true` must survive untouched. + + function makeAttemptParams(overrides: Record = {}): any { + return { + provider: "github-copilot", + model: { provider: "github-copilot", id: "gpt-4.1" }, + cwd: "/ws", + workspaceDir: "/ws", + agentDir: "/home", + copilotHome: "/copilot-home", + auth: { useLoggedInUser: true }, + sessionId: "oc-sess-reuse", + ...overrides, + }; + } + + it("seeds initialReplayState.sdkSessionId from trackedSessions on the second turn", async () => { + const pool = makePoolMock(); + const client = { deleteSession: vi.fn() } as any; + mocks.runCopilotAttempt.mockImplementation(async (_params, deps) => { + deps.onSessionEstablished?.({ + sdkSessionId: "sdk-sess-warm", + pooledClient: { key: {} as any, client }, + }); + return ATTEMPT_RESULT; + }); + const harness = createCopilotAgentHarness({ pool }); + + await harness.runAttempt(makeAttemptParams({ runId: "t1" })); + await harness.runAttempt(makeAttemptParams({ runId: "t2" })); + + expect(mocks.runCopilotAttempt).toHaveBeenCalledTimes(2); + const secondCallParams = mocks.runCopilotAttempt.mock.calls[1]?.[0] as { + initialReplayState?: { sdkSessionId?: string; replayInvalid?: boolean }; + }; + expect(secondCallParams.initialReplayState?.sdkSessionId).toBe("sdk-sess-warm"); + // Must not synthesize a replayInvalid signal: undefined → resumable. + expect(secondCallParams.initialReplayState?.replayInvalid).toBeUndefined(); + }); + + it("does not seed sdkSessionId on the first turn (nothing tracked yet)", async () => { + const pool = makePoolMock(); + mocks.runCopilotAttempt.mockImplementation(async (_params, deps) => { + deps.onSessionEstablished?.({ + sdkSessionId: "sdk-sess-cold", + pooledClient: { key: {} as any, client: {} as any }, + }); + return ATTEMPT_RESULT; + }); + const harness = createCopilotAgentHarness({ pool }); + + await harness.runAttempt(makeAttemptParams({ runId: "t1" })); + + const firstCallParams = mocks.runCopilotAttempt.mock.calls[0]?.[0] as { + initialReplayState?: { sdkSessionId?: string }; + }; + expect(firstCallParams.initialReplayState?.sdkSessionId).toBeUndefined(); + }); + + it("does not seed when compatibility fingerprint differs (model change)", async () => { + const pool = makePoolMock(); + mocks.runCopilotAttempt.mockImplementation(async (_params, deps) => { + deps.onSessionEstablished?.({ + sdkSessionId: "sdk-sess-gpt4", + pooledClient: { key: {} as any, client: {} as any }, + }); + return ATTEMPT_RESULT; + }); + const harness = createCopilotAgentHarness({ pool }); + + await harness.runAttempt( + makeAttemptParams({ runId: "t1", model: { provider: "github-copilot", id: "gpt-4.1" } }), + ); + await harness.runAttempt( + makeAttemptParams({ + runId: "t2", + model: { provider: "github-copilot", id: "claude-sonnet-4.5" }, + }), + ); + + const secondCallParams = mocks.runCopilotAttempt.mock.calls[1]?.[0] as { + initialReplayState?: { sdkSessionId?: string }; + }; + expect(secondCallParams.initialReplayState?.sdkSessionId).toBeUndefined(); + }); + + it("does not seed when compatibility fingerprint differs (legacy auth.gitHubToken rotation)", async () => { + const pool = makePoolMock(); + mocks.runCopilotAttempt.mockImplementation(async (_params, deps) => { + deps.onSessionEstablished?.({ + sdkSessionId: "sdk-sess-auth1", + pooledClient: { key: {} as any, client: {} as any }, + }); + return ATTEMPT_RESULT; + }); + const harness = createCopilotAgentHarness({ pool }); + + // Use the explicit-token auth branch (which carries gitHubToken + // + profileId + profileVersion through resolveCopilotAuth and + // surfaces the version into authProfileVersion) so a profile + // version bump is a real auth rotation, not a no-op fall-through + // to useLoggedInUser. + await harness.runAttempt( + makeAttemptParams({ + runId: "t1", + auth: { gitHubToken: "tok-1", profileId: "p1", profileVersion: "v1" }, + }), + ); + await harness.runAttempt( + makeAttemptParams({ + runId: "t2", + auth: { gitHubToken: "tok-1", profileId: "p1", profileVersion: "v2" }, + }), + ); + + const secondCallParams = mocks.runCopilotAttempt.mock.calls[1]?.[0] as { + initialReplayState?: { sdkSessionId?: string }; + }; + expect(secondCallParams.initialReplayState?.sdkSessionId).toBeUndefined(); + }); + + it("G3: does not seed when top-level authProfileId rotates (production path)", async () => { + // The production main path (EmbeddedRunAttemptParams) carries + // top-level `authProfileId` + `resolvedApiKey`, not the legacy + // `auth.*` sub-object. computeSessionCompatKey delegates to + // resolveCopilotAuth so both paths produce the same effective + // auth identity. Rotating the top-level profile id must + // invalidate session reuse. + const pool = makePoolMock(); + mocks.runCopilotAttempt.mockImplementation(async (_params, deps) => { + deps.onSessionEstablished?.({ + sdkSessionId: "sdk-sess-p1", + pooledClient: { key: {} as any, client: {} as any }, + }); + return ATTEMPT_RESULT; + }); + const harness = createCopilotAgentHarness({ pool }); + + await harness.runAttempt( + makeAttemptParams({ + runId: "t1", + auth: undefined, + authProfileId: "p1", + resolvedApiKey: "tok-same", + }), + ); + await harness.runAttempt( + makeAttemptParams({ + runId: "t2", + auth: undefined, + authProfileId: "p2", + resolvedApiKey: "tok-same", + }), + ); + + const secondCallParams = mocks.runCopilotAttempt.mock.calls[1]?.[0] as { + initialReplayState?: { sdkSessionId?: string }; + }; + expect(secondCallParams.initialReplayState?.sdkSessionId).toBeUndefined(); + }); + + it("G3: does not seed when top-level resolvedApiKey rotates (token fingerprint changes)", async () => { + // Same authProfileId but the resolved token bytes change. + // resolveCopilotAuth synthesizes authProfileVersion via + // tokenFingerprint(resolvedApiKey) for the contract path, so + // rotating the bytes flips the fingerprint and therefore the + // compat key. Important for cases where an upstream auth + // store re-issues a token under the same profile id. + const pool = makePoolMock(); + mocks.runCopilotAttempt.mockImplementation(async (_params, deps) => { + deps.onSessionEstablished?.({ + sdkSessionId: "sdk-sess-tok1", + pooledClient: { key: {} as any, client: {} as any }, + }); + return ATTEMPT_RESULT; + }); + const harness = createCopilotAgentHarness({ pool }); + + await harness.runAttempt( + makeAttemptParams({ + runId: "t1", + auth: undefined, + authProfileId: "p1", + resolvedApiKey: "tok-a", + }), + ); + await harness.runAttempt( + makeAttemptParams({ + runId: "t2", + auth: undefined, + authProfileId: "p1", + resolvedApiKey: "tok-b", + }), + ); + + const secondCallParams = mocks.runCopilotAttempt.mock.calls[1]?.[0] as { + initialReplayState?: { sdkSessionId?: string }; + }; + expect(secondCallParams.initialReplayState?.sdkSessionId).toBeUndefined(); + }); + + it("preserves caller-provided initialReplayState.replayInvalid:true (does not overwrite)", async () => { + const pool = makePoolMock(); + mocks.runCopilotAttempt.mockImplementation(async (_params, deps) => { + deps.onSessionEstablished?.({ + sdkSessionId: "sdk-sess-tracked", + pooledClient: { key: {} as any, client: {} as any }, + }); + return ATTEMPT_RESULT; + }); + const harness = createCopilotAgentHarness({ pool }); + + await harness.runAttempt(makeAttemptParams({ runId: "t1" })); + await harness.runAttempt( + makeAttemptParams({ + runId: "t2", + initialReplayState: { replayInvalid: true }, + }), + ); + + const secondCallParams = mocks.runCopilotAttempt.mock.calls[1]?.[0] as { + initialReplayState?: { sdkSessionId?: string; replayInvalid?: boolean }; + }; + // sdkSessionId is still injected from tracking, but replayInvalid + // must remain true so replay-shim treats this as create-not-resume. + expect(secondCallParams.initialReplayState?.sdkSessionId).toBe("sdk-sess-tracked"); + expect(secondCallParams.initialReplayState?.replayInvalid).toBe(true); + }); + + it("updates the tracked session when onSessionEstablished reports a new sdkSessionId", async () => { + const pool = makePoolMock(); + const deleteSession = vi.fn(); + const client = { deleteSession } as any; + let nextSdkId = "sdk-sess-1"; + mocks.runCopilotAttempt.mockImplementation(async (_params, deps) => { + deps.onSessionEstablished?.({ + sdkSessionId: nextSdkId, + pooledClient: { key: {} as any, client }, + }); + return ATTEMPT_RESULT; + }); + const harness = createCopilotAgentHarness({ pool }); + + await harness.runAttempt(makeAttemptParams({ runId: "t1" })); + nextSdkId = "sdk-sess-2"; // Simulate downgraded resume → new SDK session. + await harness.runAttempt(makeAttemptParams({ runId: "t2" })); + await harness.reset?.({ sessionId: "oc-sess-reuse" }); + + expect(deleteSession).toHaveBeenCalledTimes(1); + // The newer sdkSessionId must be the one targeted by reset, not + // the stale first-turn id. + expect(deleteSession).toHaveBeenCalledWith("sdk-sess-2"); + }); + }); + + describe("compact", () => { + it("returns ok:false when sessionId is missing", async () => { + const harness = createCopilotAgentHarness({ pool: makePoolMock() }); + const result = await harness.compact?.({ workspaceDir: "/ws" } as any); + expect(result).toEqual({ + ok: false, + compacted: false, + reason: "missing-required-params", + }); + }); + + it("returns ok:false when workspaceDir is missing", async () => { + const harness = createCopilotAgentHarness({ pool: makePoolMock() }); + const result = await harness.compact?.({ sessionId: "s" } as any); + expect(result).toEqual({ + ok: false, + compacted: false, + reason: "missing-required-params", + }); + }); + + it("writes an OpenClaw marker under /files and returns ok:true,compacted:false", async () => { + const workspaceDir = await mkdtemp(join(tmpdir(), "copilot-harness-compact-")); + try { + const harness = createCopilotAgentHarness({ pool: makePoolMock() }); + const result = await harness.compact?.({ + sessionId: "oc-sess-compact-1", + workspaceDir, + trigger: "budget", + currentTokenCount: 12345, + } as any); + + expect(result).toEqual({ + ok: true, + compacted: false, + reason: "deferred-to-sdk-infinite-sessions", + }); + + const files = await readdir(join(workspaceDir, "files")); + const marker = files.find((f) => f.startsWith("openclaw-compaction-")); + expect(marker).toBeDefined(); + expect(marker).toMatch(/openclaw-compaction-\d+-oc-sess-compact-1\.json/); + const contents = JSON.parse(await readFile(join(workspaceDir, "files", marker!), "utf8")); + expect(contents).toMatchObject({ + version: 1, + source: "copilot-harness", + sessionId: "oc-sess-compact-1", + compacted: false, + trigger: "budget", + currentTokenCount: 12345, + reason: "deferred-to-sdk-infinite-sessions", + }); + } finally { + await rm(workspaceDir, { recursive: true, force: true }); + } + }); + + it("records the tracked sdkSessionId in the marker when an attempt has run", async () => { + const workspaceDir = await mkdtemp(join(tmpdir(), "copilot-harness-compact-tracked-")); + try { + const pool = makePoolMock(); + mocks.runCopilotAttempt.mockImplementation(async (params, deps) => { + deps.onSessionEstablished?.({ + sdkSessionId: "sdk-sess-tracked", + pooledClient: { key: {} as any, client: { deleteSession: vi.fn() } as any }, + }); + return ATTEMPT_RESULT; + }); + const harness = createCopilotAgentHarness({ pool }); + + await harness.runAttempt({ ...ATTEMPT_PARAMS, sessionId: "oc-sess-tracked" }); + await harness.compact?.({ + sessionId: "oc-sess-tracked", + workspaceDir, + trigger: "manual", + } as any); + + const files = await readdir(join(workspaceDir, "files")); + const marker = files.find((f) => f.startsWith("openclaw-compaction-"))!; + const contents = JSON.parse(await readFile(join(workspaceDir, "files", marker), "utf8")); + expect(contents.sdkSessionId).toBe("sdk-sess-tracked"); + } finally { + await rm(workspaceDir, { recursive: true, force: true }); + } + }); + + it("records force:true in the marker and surfaces a force-specific reason", async () => { + const workspaceDir = await mkdtemp(join(tmpdir(), "copilot-harness-compact-force-")); + try { + const harness = createCopilotAgentHarness({ pool: makePoolMock() }); + const result = await harness.compact?.({ + sessionId: "oc-sess-force", + workspaceDir, + force: true, + } as any); + + expect(result).toEqual({ + ok: true, + compacted: false, + reason: "force-requested-but-sdk-has-no-synchronous-compact-api", + }); + + const files = await readdir(join(workspaceDir, "files")); + const marker = files.find((f) => f.startsWith("openclaw-compaction-"))!; + const contents = JSON.parse(await readFile(join(workspaceDir, "files", marker), "utf8")); + expect(contents.force).toBe(true); + expect(contents.reason).toBe("force-requested-but-sdk-has-no-synchronous-compact-api"); + } finally { + await rm(workspaceDir, { recursive: true, force: true }); + } + }); + + it("returns ok:false with structured failure when the marker write throws", async () => { + const harness = createCopilotAgentHarness({ pool: makePoolMock() }); + // Use a path with a NUL character which Node rejects synchronously + // on every platform, simulating a write failure that the harness + // must convert into a structured failure instead of throwing. + const badWorkspace = "/this\u0000is/illegal"; + const result = await harness.compact?.({ + sessionId: "oc-sess-bad", + workspaceDir: badWorkspace, + } as any); + + expect(result?.ok).toBe(false); + expect(result?.compacted).toBe(false); + expect(result?.reason).toBe("marker-write-failed"); + expect(result?.failure?.reason).toBe("marker-write-failed"); + expect(typeof result?.failure?.rawError).toBe("string"); + expect(result?.failure?.rawError?.length ?? 0).toBeGreaterThan(0); + }); + }); + + describe("runSideQuestion", () => { + it("is not implemented; /btw falls through to the in-tree PI fallback path", () => { + const harness = createCopilotAgentHarness({ pool: makePoolMock() }); + expect(harness.runSideQuestion).toBeUndefined(); + }); + }); +}); diff --git a/extensions/copilot/harness.ts b/extensions/copilot/harness.ts new file mode 100644 index 000000000000..a7e43914f801 --- /dev/null +++ b/extensions/copilot/harness.ts @@ -0,0 +1,339 @@ +import type { CopilotClient } from "@github/copilot-sdk"; +import type { + AgentHarness, + AgentHarnessAttemptParams, + AgentHarnessAttemptResult, + AgentHarnessCompactParams, + AgentHarnessCompactResult, + AgentHarnessResetParams, +} from "openclaw/plugin-sdk/agent-harness-runtime"; +import { resolveCopilotAuth } from "./src/auth-bridge.js"; +import { writeOpenClawCompactionMarker } from "./src/compaction-bridge.js"; +import type { CopilotClientPool, CopilotClientPoolOptions, PooledClient } from "./src/runtime.js"; + +export type { CopilotClientPool, CopilotClientPoolOptions }; + +const COPILOT_PROVIDER_IDS: ReadonlySet = new Set(["github-copilot"]); + +export interface CreateCopilotAgentHarnessOptions { + id?: string; + label?: string; + pluginConfig?: unknown; + pool?: CopilotClientPool; + poolOptions?: CopilotClientPoolOptions; +} + +interface TrackedSession { + sdkSessionId: string; + client: CopilotClient; + // Compatibility fingerprint of the params that created the SDK + // session. We only reuse the tracked SDK session when the next + // attempt's fingerprint matches — different provider/model/cwd/auth + // configurations should start a fresh SDK session rather than resume + // one bound to incompatible state. Mismatch falls back to + // `createSession` (no resume injection) and the new sdkSessionId + // replaces this entry via `onSessionEstablished`. + compatKey: string; +} + +// Build a string fingerprint of the attempt params that must agree +// across turns for SDK-session reuse to be safe. Keep this list +// conservative: any field whose change would invalidate the SDK +// session's bound state belongs here. Token / auth profile rotation +// produces a new fingerprint so we don't replay a session against a +// stale credential. +// +// Auth identity is derived from `resolveCopilotAuth(...)` — the same +// function `resolvePoolAcquire` uses to build the pool key. That +// ensures the compat key tracks the EFFECTIVE auth (which can come +// from the legacy `auth.*` subobject, the contract-resolved +// top-level `resolvedApiKey` + `authProfileId`, or the env-var +// fallback) rather than any single one of those raw inputs. The +// `authProfileVersion` field is a non-secret sha256 fingerprint of +// the token (see `tokenFingerprint` in `src/auth-bridge.ts`), so +// rotating the token under the same profile id still invalidates +// the compat key without ever serializing the raw credential. +function computeSessionCompatKey(params: AgentHarnessAttemptParams): string { + const p = params as AgentHarnessAttemptParams & { + auth?: { + gitHubToken?: string; + profileId?: string; + profileVersion?: string; + useLoggedInUser?: boolean; + }; + agentId?: string; + authProfileId?: string; + copilotHome?: string; + cwd?: string; + model?: string | { api?: string; id?: string; provider?: string }; + profileVersion?: string; + resolvedApiKey?: string; + workspaceDir?: string; + }; + const modelObj: { api?: string; id?: string; provider?: string } = + p.model && typeof p.model === "object" + ? p.model + : { id: typeof p.model === "string" ? p.model : undefined }; + // resolveCopilotAuth can throw when an explicit `auth.gitHubToken` + // is supplied without profileId + profileVersion (the existing + // pool-key safety invariant). That same error would surface + // immediately afterwards from `resolvePoolAcquire` inside + // `runCopilotAttempt`, so we don't want to mask it here — but + // we also can't include random / time-based data in the compat key + // (would break the deterministic equality check). Use a stable + // sentinel that will never match any previously-tracked compat key. + let authParts: string[]; + try { + const resolved = resolveCopilotAuth({ + agentId: typeof p.agentId === "string" ? p.agentId : undefined, + agentDir: typeof p.agentDir === "string" ? p.agentDir : undefined, + workspaceDir: typeof p.workspaceDir === "string" ? p.workspaceDir : undefined, + copilotHome: typeof p.copilotHome === "string" ? p.copilotHome : undefined, + auth: p.auth, + resolvedApiKey: typeof p.resolvedApiKey === "string" ? p.resolvedApiKey : undefined, + authProfileId: typeof p.authProfileId === "string" ? p.authProfileId : undefined, + profileVersion: typeof p.profileVersion === "string" ? p.profileVersion : undefined, + }); + authParts = [ + `auth.mode=${resolved.authMode}`, + `auth.profileId=${resolved.authProfileId ?? ""}`, + `auth.profileVersion=${resolved.authProfileVersion ?? ""}`, + ]; + } catch { + authParts = ["auth=unresolvable"]; + } + const parts = [ + `provider=${modelObj.provider ?? ""}`, + `model=${modelObj.id ?? ""}`, + `api=${modelObj.api ?? ""}`, + `cwd=${p.cwd ?? p.workspaceDir ?? ""}`, + `agentDir=${p.agentDir ?? ""}`, + `copilotHome=${p.copilotHome ?? ""}`, + ...authParts, + ]; + return parts.join("|"); +} + +export function createCopilotAgentHarness( + options?: CreateCopilotAgentHarnessOptions, +): AgentHarness { + let poolPromise: Promise | undefined; + let createdPool: CopilotClientPool | undefined; + let disposed = false; + let disposePromise: Promise | undefined; + const inFlight = new Set>(); + // Maps OpenClaw session id (from AgentHarnessAttemptParams.sessionId) to + // the SDK session id + client that owns it. Populated by + // runCopilotAttempt via the onSessionEstablished callback so that + // reset(params) can call client.deleteSession on the right client. + const trackedSessions = new Map(); + + async function getPool(): Promise { + if (options?.pool) { + return options.pool; + } + if (!poolPromise) { + poolPromise = (async () => { + const { createCopilotClientPool } = await import("./src/runtime.js"); + createdPool = createCopilotClientPool(options?.poolOptions); + return createdPool; + })(); + } + return poolPromise; + } + + return { + id: options?.id ?? "copilot", + label: options?.label ?? "GitHub Copilot agent runtime", + + supports(ctx) { + const requestedRuntime = String(ctx.requestedRuntime ?? "") + .trim() + .toLowerCase(); + if (requestedRuntime !== "copilot") { + return { supported: false, reason: "copilot is opt-in only" }; + } + const provider = ctx.provider.trim().toLowerCase(); + if (!COPILOT_PROVIDER_IDS.has(provider)) { + return { + supported: false, + reason: `provider is not one of: ${[...COPILOT_PROVIDER_IDS].toSorted().join(", ")}`, + }; + } + return { supported: true, priority: 100 }; + }, + + async runAttempt(params: AgentHarnessAttemptParams): Promise { + const attemptPromise = (async () => { + if (disposed) { + throw new Error("[copilot] harness has been disposed; cannot start new attempts"); + } + const { runCopilotAttempt } = await import("./src/attempt.js"); + if (disposed) { + throw new Error("[copilot] harness was disposed while starting an attempt"); + } + const pool = await getPool(); + if (disposed) { + throw new Error("[copilot] harness was disposed while starting an attempt"); + } + const openclawSessionId = + typeof params.sessionId === "string" ? params.sessionId : undefined; + + // Dogfood finding #4: reuse the SDK session across turns within + // the same OpenClaw session so that the GitHub Copilot agent runtime's prompt + // cache, tool-call history, and any server-side compaction state + // survive turn boundaries. Without this, every turn called + // `createSession()` and lost cache + thread continuity — the + // smoking gun was distinct `${sdkSessionId}` scopes per turn in + // the playground transcript. + // + // Safety: + // - Only inject when the tracked compatKey still matches the + // current attempt's fingerprint (provider/model/cwd/auth). + // Mismatch falls through to `createSession` and the new SDK + // session replaces the tracked entry below. + // - Preserve any caller-provided `replayInvalid: true` — never + // downgrade an orchestrator-issued safety signal to false. + // `decideReplayAction` treats undefined as resumable already. + // - On resume failure, `attempt.ts` recovers via the + // `replay-shim` (`resumeFailureRecovered:true`) and falls + // back to `createSession`, so a stale-session error never + // surfaces as a prompt error. + const currentCompatKey = computeSessionCompatKey(params); + const tracked = openclawSessionId ? trackedSessions.get(openclawSessionId) : undefined; + const resumableSessionId = + tracked && tracked.compatKey === currentCompatKey ? tracked.sdkSessionId : undefined; + const effectiveParams: AgentHarnessAttemptParams = resumableSessionId + ? ({ + ...params, + initialReplayState: { + ...params.initialReplayState, + sdkSessionId: resumableSessionId, + }, + } as AgentHarnessAttemptParams) + : params; + + return runCopilotAttempt(effectiveParams, { + pool, + onSessionEstablished: openclawSessionId + ? ({ + sdkSessionId, + pooledClient, + }: { + sdkSessionId: string; + pooledClient: PooledClient; + }) => { + trackedSessions.set(openclawSessionId, { + sdkSessionId, + client: pooledClient.client, + compatKey: currentCompatKey, + }); + } + : undefined, + }); + })(); + inFlight.add(attemptPromise); + try { + return await attemptPromise; + } finally { + inFlight.delete(attemptPromise); + } + }, + + async reset(params: AgentHarnessResetParams): Promise { + const openclawSessionId = typeof params.sessionId === "string" ? params.sessionId : undefined; + if (!openclawSessionId) { + return; + } + const tracked = trackedSessions.get(openclawSessionId); + if (!tracked) { + // Session was created by a different harness, or already reset. + return; + } + trackedSessions.delete(openclawSessionId); + try { + await tracked.client.deleteSession(tracked.sdkSessionId); + } catch { + // Best-effort: client may be stopped, session may not exist + // server-side, or the SDK may report a transient error. The + // registry already logs broadcast reset failures; swallow here + // so one harness cannot block the reset broadcast. + } + }, + + async compact( + params: AgentHarnessCompactParams, + ): Promise { + // The GitHub Copilot agent runtime manages compaction automatically via + // `SessionConfig.infiniteSessions` (background-async when + // utilization crosses `backgroundCompactionThreshold`). There is + // no synchronous compact RPC, so the harness cannot honour + // `params.force === true` directly. Instead this method writes + // an OpenClaw-shaped marker file under + // `/files/openclaw-compaction--.json` + // so existing OpenClaw transcript readers see a familiar + // compaction artifact when the host calls compact(). See + // src/compaction-bridge.ts for the bridge boundary. + const openclawSessionId = typeof params.sessionId === "string" ? params.sessionId : undefined; + const workspaceDir = + typeof params.workspaceDir === "string" ? params.workspaceDir : undefined; + if (!openclawSessionId || !workspaceDir) { + return { + ok: false, + compacted: false, + reason: "missing-required-params", + }; + } + const tracked = trackedSessions.get(openclawSessionId); + const reason = params.force + ? "force-requested-but-sdk-has-no-synchronous-compact-api" + : "deferred-to-sdk-infinite-sessions"; + try { + await writeOpenClawCompactionMarker({ + sessionId: openclawSessionId, + workspaceDir, + trigger: params.trigger, + currentTokenCount: params.currentTokenCount, + sdkSessionId: tracked?.sdkSessionId, + force: params.force, + reason, + }); + } catch (err) { + return { + ok: false, + compacted: false, + reason: "marker-write-failed", + failure: { + reason: "marker-write-failed", + rawError: err instanceof Error ? err.message : String(err), + }, + }; + } + return { + ok: true, + compacted: false, + reason, + }; + }, + + async dispose() { + if (disposePromise) { + return disposePromise; + } + disposed = true; + disposePromise = (async () => { + if (inFlight.size > 0) { + await Promise.allSettled(inFlight); + } + trackedSessions.clear(); + if (createdPool) { + const errors = await createdPool.dispose(); + if (errors.length > 0) { + throw new AggregateError(errors, "[copilot] pool disposal errors"); + } + } + })(); + return disposePromise; + }, + }; +} diff --git a/extensions/copilot/index.test.ts b/extensions/copilot/index.test.ts new file mode 100644 index 000000000000..7bd694fe969c --- /dev/null +++ b/extensions/copilot/index.test.ts @@ -0,0 +1,140 @@ +import fs from "node:fs"; +import { createTestPluginApi } from "openclaw/plugin-sdk/plugin-test-api"; +import { describe, expect, it, vi } from "vitest"; + +vi.mock("./harness.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + createCopilotAgentHarness: vi.fn(actual.createCopilotAgentHarness), + }; +}); + +import { createCopilotAgentHarness } from "./harness.js"; +import plugin from "./index.js"; + +function loadManifest(): Record { + return JSON.parse( + fs.readFileSync(new URL("./openclaw.plugin.json", import.meta.url), "utf8"), + ) as Record; +} + +function registerWithPluginConfig(pluginConfig: Record | undefined) { + const registerAgentHarness = vi.fn(); + plugin.register( + createTestPluginApi({ + id: "copilot", + name: "GitHub Copilot agent runtime", + source: "test", + config: {}, + pluginConfig, + runtime: {} as never, + registerAgentHarness, + }), + ); + const harness = registerAgentHarness.mock.calls.at(0)?.at(0) as { + id: string; + label: string; + supports(ctx: { + provider: string; + modelId?: string; + requestedRuntime?: string; + }): { supported: true; priority?: number } | { supported: false; reason?: string }; + }; + return { registerAgentHarness, harness }; +} + +describe("copilot plugin", () => { + it("is opt-in by default and only declares an agent harness activation", () => { + const manifest = loadManifest(); + const activation = manifest.activation as Record; + + expect(manifest.enabledByDefault).toBeUndefined(); + expect(activation.onStartup).toBe(false); + expect(activation.onAgentHarnesses).toEqual(["copilot"]); + expect(manifest.providers).toBeUndefined(); + expect(typeof manifest.version).toBe("string"); + expect(manifest.version).not.toBe(""); + }); + + it("registers exactly one copilot agent harness and nothing else", () => { + const registerAgentHarness = vi.fn(); + const registerProvider = vi.fn(); + const registerModelCatalogProvider = vi.fn(); + const registerMediaUnderstandingProvider = vi.fn(); + const registerMigrationProvider = vi.fn(); + const registerCommand = vi.fn(); + const registerNodeHostCommand = vi.fn(); + const registerNodeInvokePolicy = vi.fn(); + const on = vi.fn(); + const onConversationBindingResolved = vi.fn(); + + plugin.register( + createTestPluginApi({ + id: "copilot", + name: "GitHub Copilot agent runtime", + source: "test", + config: {}, + pluginConfig: {}, + runtime: {} as never, + registerAgentHarness, + registerProvider, + registerModelCatalogProvider, + registerMediaUnderstandingProvider, + registerMigrationProvider, + registerCommand, + registerNodeHostCommand, + registerNodeInvokePolicy, + on, + onConversationBindingResolved, + }), + ); + + expect(registerAgentHarness).toHaveBeenCalledTimes(1); + expect(registerAgentHarness).toHaveBeenCalledWith( + expect.objectContaining({ id: "copilot", label: "GitHub Copilot agent runtime" }), + ); + expect(registerProvider).not.toHaveBeenCalled(); + expect(registerModelCatalogProvider).not.toHaveBeenCalled(); + expect(registerMediaUnderstandingProvider).not.toHaveBeenCalled(); + expect(registerMigrationProvider).not.toHaveBeenCalled(); + expect(registerCommand).not.toHaveBeenCalled(); + expect(registerNodeHostCommand).not.toHaveBeenCalled(); + expect(registerNodeInvokePolicy).not.toHaveBeenCalled(); + expect(on).not.toHaveBeenCalled(); + expect(onConversationBindingResolved).not.toHaveBeenCalled(); + }); + + it("registers a harness hard-bound to the canonical github-copilot provider", () => { + const { harness } = registerWithPluginConfig({}); + + expect( + harness.supports({ + provider: "github-copilot", + modelId: "gpt-4.1", + requestedRuntime: "copilot", + }), + ).toEqual({ supported: true, priority: 100 }); + expect( + harness.supports({ + provider: "anthropic", + modelId: "claude-sonnet-4.5", + requestedRuntime: "copilot", + }), + ).toEqual({ + supported: false, + reason: "provider is not one of: github-copilot", + }); + }); + + it("passes through a valid pool idle TTL and ignores malformed values", () => { + const createHarness = vi.mocked(createCopilotAgentHarness); + createHarness.mockClear(); + + registerWithPluginConfig({ pool: { idleTtlMs: 2500 } }); + registerWithPluginConfig({ pool: { idleTtlMs: 0 } }); + + expect(createHarness).toHaveBeenNthCalledWith(1, { poolOptions: { idleTtlMs: 2500 } }); + expect(createHarness.mock.calls[1]?.[0]).toBeUndefined(); + }); +}); diff --git a/extensions/copilot/index.ts b/extensions/copilot/index.ts new file mode 100644 index 000000000000..79fe7c2cbdab --- /dev/null +++ b/extensions/copilot/index.ts @@ -0,0 +1,35 @@ +import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry"; +import { createCopilotAgentHarness } from "./harness.js"; + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +function readPoolOptions(pluginConfig: unknown): { idleTtlMs: number } | undefined { + if (!isRecord(pluginConfig)) { + return undefined; + } + + const pool = pluginConfig.pool; + if (!isRecord(pool)) { + return undefined; + } + + const idleTtlMs = pool.idleTtlMs; + if (typeof idleTtlMs !== "number" || !Number.isFinite(idleTtlMs) || idleTtlMs < 1) { + return undefined; + } + + return { idleTtlMs }; +} + +export default definePluginEntry({ + id: "copilot", + name: "GitHub Copilot agent runtime", + description: "Registers the GitHub Copilot agent runtime.", + register(api) { + const poolOptions = readPoolOptions(api.pluginConfig); + + api.registerAgentHarness(createCopilotAgentHarness(poolOptions ? { poolOptions } : undefined)); + }, +}); diff --git a/extensions/copilot/openclaw.plugin.json b/extensions/copilot/openclaw.plugin.json new file mode 100644 index 000000000000..39d1f75b540a --- /dev/null +++ b/extensions/copilot/openclaw.plugin.json @@ -0,0 +1,39 @@ +{ + "id": "copilot", + "name": "GitHub Copilot agent runtime", + "description": "Registers the GitHub Copilot agent runtime.", + "version": "2026.5.28", + "activation": { + "onStartup": false, + "onAgentHarnesses": ["copilot"] + }, + "configSchema": { + "type": "object", + "additionalProperties": false, + "properties": { + "pool": { + "type": "object", + "additionalProperties": false, + "properties": { + "idleTtlMs": { + "type": "number", + "minimum": 1, + "default": 300000 + } + } + } + } + }, + "uiHints": { + "pool": { + "label": "Client Pool", + "help": "Advanced GitHub Copilot agent runtime client pooling controls.", + "advanced": true + }, + "pool.idleTtlMs": { + "label": "Idle Client TTL", + "help": "Milliseconds to keep an idle GitHub Copilot agent runtime client alive before disposal.", + "advanced": true + } + } +} diff --git a/extensions/copilot/package.json b/extensions/copilot/package.json new file mode 100644 index 000000000000..7984e7d9524c --- /dev/null +++ b/extensions/copilot/package.json @@ -0,0 +1,43 @@ +{ + "name": "@openclaw/copilot", + "version": "2026.5.28", + "description": "OpenClaw GitHub Copilot agent runtime plugin (registers a `github-copilot` AgentHarness backed by @github/copilot-sdk over JSON-RPC to the bundled GitHub Copilot CLI)", + "repository": { + "type": "git", + "url": "https://github.com/openclaw/openclaw" + }, + "type": "module", + "devDependencies": { + "@github/copilot": "1.0.48", + "@github/copilot-sdk": "1.0.0-beta.4", + "@openclaw/plugin-sdk": "workspace:*" + }, + "peerDependencies": { + "@github/copilot-sdk": "1.0.0-beta.4" + }, + "peerDependenciesMeta": { + "@github/copilot-sdk": { + "optional": true + } + }, + "openclaw": { + "extensions": [ + "./index.ts" + ], + "install": { + "npmSpec": "@openclaw/copilot", + "defaultChoice": "npm", + "minHostVersion": ">=2026.5.1-beta.1" + }, + "compat": { + "pluginApi": ">=2026.5.28" + }, + "build": { + "openclawVersion": "2026.5.28" + }, + "release": { + "publishToClawHub": false, + "publishToNpm": false + } + } +} diff --git a/extensions/copilot/src/attempt.live.test.ts b/extensions/copilot/src/attempt.live.test.ts new file mode 100644 index 000000000000..3669b313c5b8 --- /dev/null +++ b/extensions/copilot/src/attempt.live.test.ts @@ -0,0 +1,213 @@ +import { mkdtemp, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { CopilotClient, approveAll } from "@github/copilot-sdk"; +import type { AgentHarnessAttemptParams } from "openclaw/plugin-sdk/agent-harness-runtime"; +import { isLiveTestEnabled } from "openclaw/plugin-sdk/test-env"; +import { describe, expect, it, vi } from "vitest"; +import { createCopilotAgentHarness, type CopilotClientPool } from "../harness.js"; + +const liveToolState = vi.hoisted(() => ({ + calls: [] as string[], + expectedText: "phase-1-green", + sentinelPrefix: "copilot-live-smoke:", + toolName: "live_echo", +})); + +vi.mock("openclaw/plugin-sdk/agent-harness", async (importOriginal) => { + const actual = await importOriginal(); + + return { + ...actual, + createOpenClawCodingTools: vi.fn(() => [ + { + name: liveToolState.toolName, + label: liveToolState.toolName, + description: "Echo the requested text for the copilot live smoke test.", + parameters: { + type: "object", + additionalProperties: false, + properties: { + text: { + type: "string", + description: "Text to echo back to the model.", + }, + }, + required: ["text"], + }, + async execute(_toolCallId: string, params: unknown) { + const textInput = + params && typeof params === "object" && !Array.isArray(params) + ? (params as { text?: unknown }).text + : undefined; + const text = typeof textInput === "string" ? textInput : ""; + const echoed = `${liveToolState.sentinelPrefix}${text}`; + liveToolState.calls.push(text); + console.info( + `[copilot-live-smoke] ${liveToolState.toolName} ${JSON.stringify({ echoed, text })}`, + ); + return { + content: [{ type: "text", text: echoed }], + details: { echoed }, + }; + }, + }, + ]), + }; +}); + +const LIVE = isLiveTestEnabled(["OPENCLAW_COPILOT_AGENT_LIVE_TEST"]); +const TOKEN = + process.env.OPENCLAW_COPILOT_AGENT_LIVE_TOKEN || + process.env.GITHUB_TOKEN || + process.env.GH_TOKEN || + ""; +const describeLive = LIVE && TOKEN ? describe : describe.skip; + +function createApproveAllPool(): CopilotClientPool { + const activeClients = new Set(); + + return { + async acquire(key, options) { + const client = new CopilotClient(options); + activeClients.add(client); + return { + key, + client: { + createSession: (config: Parameters[0]) => + client.createSession({ ...config, onPermissionRequest: approveAll }), + resumeSession: ( + sessionId: Parameters[0], + config: Parameters[1], + ) => client.resumeSession(sessionId, { ...config, onPermissionRequest: approveAll }), + stop: () => client.stop(), + } as unknown as CopilotClient, + }; + }, + async dispose() { + const errors: Error[] = []; + for (const client of activeClients) { + try { + errors.push(...(await client.stop())); + } catch (error) { + errors.push(error instanceof Error ? error : new Error(String(error))); + } + } + activeClients.clear(); + return errors; + }, + async release() {}, + size() { + return activeClients.size; + }, + }; +} + +function createAttemptParams(params: { + copilotHome: string; + onAssistantDelta: (payload: { text: string }) => void | Promise; + prompt: string; +}): AgentHarnessAttemptParams { + const profileId = "live-smoke-profile"; + const profileVersion = "v1"; + const now = Date.now(); + + return { + agentDir: params.copilotHome, + agentId: "copilot-live-smoke", + auth: { + gitHubToken: TOKEN, + profileId, + profileVersion, + }, + authProfileId: profileId, + copilotHome: params.copilotHome, + cwd: process.cwd(), + messages: [{ content: params.prompt, role: "user", timestamp: now }], + model: { + api: "openai-responses", + id: "gpt-4.1", + provider: "github-copilot", + }, + modelId: "gpt-4.1", + onAssistantDelta: params.onAssistantDelta, + profileVersion, + prompt: params.prompt, + provider: "github-copilot", + runId: `copilot-live-smoke-${now}`, + sessionFile: join(params.copilotHome, "copilot-live-smoke.session.json"), + sessionId: `copilot-live-smoke-session-${now}`, + timeoutMs: 90_000, + workspaceDir: process.cwd(), + } as unknown as AgentHarnessAttemptParams; +} + +describeLive("copilot agent runtime live smoke", () => { + it("runs one turn on gpt-4.1 with one custom tool", async () => { + liveToolState.calls.length = 0; + const streamedTexts: string[] = []; + const prompt = `Use the ${liveToolState.toolName} tool exactly once with text '${liveToolState.expectedText}', then reply with exactly two short sentences totaling at least twelve words.`; + const copilotHome = await mkdtemp(join(tmpdir(), "openclaw-copilot-live-")); + const harness = createCopilotAgentHarness({ pool: createApproveAllPool() }); + + expect( + harness.supports({ + provider: "github-copilot", + modelId: "gpt-4.1", + requestedRuntime: "copilot", + }), + ).toEqual({ supported: true, priority: 100 }); + + try { + const result = await harness.runAttempt( + createAttemptParams({ + copilotHome, + onAssistantDelta: ({ text }) => { + if (text.trim()) { + streamedTexts.push(text); + } + }, + prompt, + }), + ); + const assistantText = result.assistantTexts.join("\n").trim(); + const hasAssistantText = result.assistantTexts.some((text) => text.trim().length > 0); + const matchingCalls = liveToolState.calls.filter( + (text) => text === liveToolState.expectedText, + ); + const usage = result.attemptUsage; + + console.info( + "[copilot-live-smoke] summary", + JSON.stringify( + { + assistantText, + toolCalls: liveToolState.calls, + streamedTexts, + toolMetas: result.toolMetas, + usage, + }, + null, + 2, + ), + ); + + expect(result.promptError).toBeUndefined(); + expect(result.timedOut).toBe(false); + expect(matchingCalls.length).toBeGreaterThanOrEqual(1); + expect(hasAssistantText).toBe(true); + expect(assistantText.length).toBeGreaterThan(0); + expect((usage?.input ?? 0) + (usage?.output ?? 0)).toBeGreaterThan(0); + expect( + result.toolMetas.some( + (toolMeta) => + toolMeta.toolName === liveToolState.toolName && + toolMeta.meta?.includes(liveToolState.sentinelPrefix), + ), + ).toBe(true); + } finally { + await harness.dispose?.(); + await rm(copilotHome, { recursive: true, force: true }); + } + }, 90_000); +}); diff --git a/extensions/copilot/src/attempt.test.ts b/extensions/copilot/src/attempt.test.ts new file mode 100644 index 000000000000..fbf62f83af23 --- /dev/null +++ b/extensions/copilot/src/attempt.test.ts @@ -0,0 +1,2537 @@ +import fsp from "node:fs/promises"; +import { tmpdir } from "node:os"; +import path from "node:path"; +import type { CopilotClient, Tool as SdkTool } from "@github/copilot-sdk"; +import type { + AgentHarnessAttemptParams, + AgentHarnessAttemptResult, +} from "openclaw/plugin-sdk/agent-harness-runtime"; +import type { SandboxContext } from "openclaw/plugin-sdk/agent-harness-runtime"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { runCopilotAttempt } from "./attempt.js"; +import type { CopilotClientPool } from "./runtime.js"; + +const TINY_PNG_BASE64 = + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAACXBIWXMAAAsTAAALEwEAmpwYAAAADUlEQVR4nGP4////KwAJ5gPoxLp9owAAAABJRU5ErkJggg=="; + +// Mock the dual-write transcript mirror so attempt tests do not touch the +// real filesystem. The mirror call site is exercised separately in +// dual-write-transcripts.test.ts and by the dedicated attempt +// dual-write tests below; the mocked module here just captures the +// invocation arguments without writing to disk. +const dualWriteMock = vi.hoisted(() => ({ + dualWriteCopilotTranscriptBestEffort: vi.fn().mockResolvedValue(undefined), + attachCopilotMirrorIdentity: (message: T, identity: string): T => { + const record = message as unknown as Record; + return { + ...record, + __openclaw: { ...(record["__openclaw"] as object | undefined), mirrorIdentity: identity }, + } as unknown as T; + }, +})); +vi.mock("./dual-write-transcripts.js", () => dualWriteMock); + +// Mock the workspace-bootstrap loader so attempt tests do not perform +// real filesystem reads (which add async ticks and would break the +// carefully-timed delta-ordering tests below). Real loader behavior is +// covered separately in workspace-bootstrap.test.ts. The dedicated +// "workspace bootstrap (systemMessage)" describe block below overrides +// the mock per-test to verify wiring into SessionConfig.systemMessage. +const workspaceBootstrapMock = vi.hoisted(() => ({ + resolveCopilotWorkspaceBootstrapContext: vi.fn().mockResolvedValue({ + bootstrapFiles: [], + contextFiles: [], + instructions: undefined, + }), +})); +vi.mock("./workspace-bootstrap.js", () => workspaceBootstrapMock); + +type SessionEventShape = { + data: Record; + id: string; + parentId: string | null; + timestamp: string; + type: string; +}; + +type FakeSession = { + abort: ReturnType; + cfg: Record; + disconnect: ReturnType; + emit: (eventType: string, data: Record) => void; + id: string; + off: ReturnType; + on: ReturnType; + sendAndWait: ReturnType; + sessionId: string; +}; + +type FakeSdk = ReturnType; + +function createDeferred() { + let rejectPromise: ((reason?: unknown) => void) | undefined; + let resolvePromise: ((value: T | PromiseLike) => void) | undefined; + const promise = new Promise((resolve, reject) => { + resolvePromise = resolve; + rejectPromise = reject; + }); + return { + promise, + reject(reason?: unknown) { + rejectPromise?.(reason); + }, + resolve(value: T) { + resolvePromise?.(value); + }, + }; +} + +function flushAsync() { + // Pump enough microtasks for the attempt to settle past every + // pre-createSession `await` in attempt.ts (resolvePoolAcquire, + // resolveCopilotWorkspaceBootstrapContext, createSession, etc.). + // Each chained `then` is one tick; tests rely on this to observe + // `sdk.sessions[0]` being populated before they emit deltas. + /* oxlint-disable unicorn/no-useless-promise-resolve-reject -- inner Promise.resolve()s force additional microtask ticks; ordering of sdk.sessions[0] population depends on this. */ + return Promise.resolve() + .then(() => Promise.resolve()) + .then(() => Promise.resolve()); + /* oxlint-enable unicorn/no-useless-promise-resolve-reject */ +} + +function getPromptErrorCode(result: AgentHarnessAttemptResult): string | undefined { + return (result.promptError as { code?: string } | undefined)?.code; +} + +function getSdkSessionId(result: AgentHarnessAttemptResult): string | undefined { + return (result as AgentHarnessAttemptResult & { sdkSessionId?: string }).sdkSessionId; +} + +function makeEvent(type: string, data: Record): SessionEventShape { + return { + data, + id: `${type}-id`, + parentId: null, + timestamp: "2024-01-01T00:00:00.000Z", + type, + }; +} + +function makeAssistantMessageEvent( + content = "assistant text", + overrides: Partial> = {}, +): SessionEventShape { + return makeEvent("assistant.message", { + content, + messageId: "msg-1", + model: "gpt-4o", + ...overrides, + }); +} + +function createFakeSession(cfg: Record, id: string): FakeSession { + const listeners = new Map void>>(); + return { + abort: vi.fn(async () => undefined), + cfg, + disconnect: vi.fn(async () => undefined), + emit: (eventType: string, data: Record) => { + const event = makeEvent(eventType, data); + for (const listener of listeners.get(eventType) ?? []) { + listener(event); + } + }, + id, + off: vi.fn((eventType: string, handler: (event: SessionEventShape) => void) => { + const handlers = listeners.get(eventType) ?? []; + listeners.set( + eventType, + handlers.filter((existing) => existing !== handler), + ); + }), + on: vi.fn((eventType: string, handler: (event: SessionEventShape) => void) => { + const handlers = listeners.get(eventType) ?? []; + handlers.push(handler); + listeners.set(eventType, handlers); + }), + sendAndWait: vi.fn(async () => makeAssistantMessageEvent()), + sessionId: id, + }; +} + +function makeFakePool(sdk: FakeSdk) { + const pool: CopilotClientPool = { + acquire: vi.fn(async (key, _options) => ({ + client: sdk.client as unknown as CopilotClient, + key, + })), + dispose: vi.fn(async () => []), + release: vi.fn(async () => undefined), + size: vi.fn(() => 0), + }; + return pool; +} + +function makeFakeSdk( + options: { + onCreateSession?: (session: FakeSession, cfg: Record) => void | Promise; + onResumeSession?: ( + session: FakeSession, + sessionId: string, + cfg: Record, + ) => void | Promise; + } = {}, +) { + const sessions: FakeSession[] = []; + + const createSession = vi.fn(async (cfg: Record) => { + const session = createFakeSession(cfg, `sess-${sessions.length + 1}`); + await options.onCreateSession?.(session, cfg); + sessions.push(session); + return session; + }); + + const resumeSession = vi.fn(async (sessionId: string, cfg: Record) => { + const session = createFakeSession(cfg, sessionId); + await options.onResumeSession?.(session, sessionId, cfg); + sessions.push(session); + return session; + }); + + return { + client: { + createSession, + resumeSession, + stop: vi.fn(async () => []), + }, + createSession, + resumeSession, + sessions, + }; +} + +function makeParams( + overrides: Partial< + AgentHarnessAttemptParams & { + auth: { + gitHubToken?: string; + profileId?: string; + profileVersion?: string; + useLoggedInUser?: boolean; + }; + initialReplayState: { sdkSessionId?: string }; + messages: Array<{ content: string; role: "user"; timestamp: number }>; + model: { api: string; id: string; provider: string }; + onAssistantDelta: (payload: { delta: string; text: string }) => void | Promise; + profileVersion: string; + } + > = {}, +): AgentHarnessAttemptParams { + return { + agentDir: "C:\\copilot-home", + agentId: "agent-1", + auth: { useLoggedInUser: true, ...(overrides as { auth?: object }).auth }, + initialReplayState: undefined, + messages: [{ content: "hello", role: "user", timestamp: 1 }], + model: { + api: "openai-responses", + id: "gpt-4o", + provider: "github-copilot", + ...(typeof overrides.model === "object" ? overrides.model : {}), + }, + prompt: "hello", + runId: "run-1", + sessionFile: "session.json", + sessionId: "session-1", + timeoutMs: 5000, + workspaceDir: "C:\\workspace", + ...overrides, + } as unknown as AgentHarnessAttemptParams; +} + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe("runCopilotAttempt", () => { + it("happy path", async () => { + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("done")); + }, + }); + const pool = makeFakePool(sdk); + + const result = await runCopilotAttempt(makeParams(), { pool }); + + expect(sdk.createSession).toHaveBeenCalledTimes(1); + expect(sdk.sessions[0]?.sendAndWait).toHaveBeenCalledTimes(1); + expect(result.aborted).toBe(false); + expect(result.timedOut).toBe(false); + expect(result.promptError).toBeUndefined(); + expect(result.lastAssistant?.role).toBe("assistant"); + expect(result.assistantTexts).toEqual(["done"]); + expect(result.messagesSnapshot.length).toBe(2); + expect(getSdkSessionId(result)).toBe("sess-1"); + }); + + it("forwards prompt images as SDK blob attachments", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt( + makeParams({ + images: [{ type: "image", data: TINY_PNG_BASE64, mimeType: "image/png" }], + } as never), + { pool }, + ); + + const sendOptions = sdk.sessions[0]?.sendAndWait.mock.calls[0]?.[0] as + | { attachments?: unknown[]; prompt?: string } + | undefined; + expect(sendOptions?.prompt).toBe("hello"); + expect(sendOptions?.attachments).toEqual([ + { + type: "blob", + data: TINY_PNG_BASE64, + mimeType: "image/png", + displayName: "prompt-image-1", + }, + ]); + }); + + it("hydrates offloaded prompt images before creating SDK blob attachments", async () => { + const stateDir = await fsp.mkdtemp(path.join(tmpdir(), "copilot-offloaded-image-")); + const inboundDir = path.join(stateDir, "media", "inbound"); + const mediaId = "telegram-photo.png"; + await fsp.mkdir(inboundDir, { recursive: true }); + await fsp.writeFile(path.join(inboundDir, mediaId), Buffer.from(TINY_PNG_BASE64, "base64")); + vi.stubEnv("OPENCLAW_STATE_DIR", stateDir); + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + try { + await runCopilotAttempt( + makeParams({ + imageOrder: ["offloaded"], + images: [], + model: { + api: "openai-responses", + id: "gpt-4o", + input: ["text", "image"], + provider: "github-copilot", + }, + prompt: `describe this\n[media attached: media://inbound/${mediaId}]`, + } as never), + { pool }, + ); + + const sendOptions = sdk.sessions[0]?.sendAndWait.mock.calls[0]?.[0] as + | { attachments?: unknown[] } + | undefined; + expect(sendOptions?.attachments).toEqual([ + { + type: "blob", + data: TINY_PNG_BASE64, + mimeType: "image/png", + displayName: "prompt-image-1", + }, + ]); + } finally { + vi.unstubAllEnvs(); + await fsp.rm(stateDir, { recursive: true, force: true }); + } + }); + + it("does not hydrate prompt image paths outside workspace-only policy", async () => { + const stateDir = await fsp.mkdtemp(path.join(tmpdir(), "copilot-image-policy-")); + const workspaceDir = path.join(stateDir, "workspace"); + const outsideDir = path.join(stateDir, "outside"); + const outsideImage = path.join(outsideDir, "secret.png"); + await fsp.mkdir(workspaceDir, { recursive: true }); + await fsp.mkdir(outsideDir, { recursive: true }); + await fsp.writeFile(outsideImage, Buffer.from(TINY_PNG_BASE64, "base64")); + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + try { + await runCopilotAttempt( + makeParams({ + config: { tools: { fs: { workspaceOnly: true } } }, + model: { + api: "openai-responses", + id: "gpt-4o", + input: ["text", "image"], + provider: "github-copilot", + }, + prompt: `inspect ${outsideImage}`, + workspaceDir, + } as never), + { pool }, + ); + + const sendOptions = sdk.sessions[0]?.sendAndWait.mock.calls[0]?.[0] as + | { attachments?: unknown[] } + | undefined; + expect(sendOptions?.attachments).toBeUndefined(); + } finally { + await fsp.rm(stateDir, { recursive: true, force: true }); + } + }); + + it("hydrates quoted prompt image paths through the shared detector", async () => { + const stateDir = await fsp.mkdtemp(path.join(tmpdir(), "copilot-quoted-image-")); + const workspaceDir = path.join(stateDir, "workspace"); + const imagePath = path.join(workspaceDir, "quoted.png"); + await fsp.mkdir(workspaceDir, { recursive: true }); + await fsp.writeFile(imagePath, Buffer.from(TINY_PNG_BASE64, "base64")); + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + try { + await runCopilotAttempt( + makeParams({ + config: { tools: { fs: { workspaceOnly: true } } }, + model: { + api: "openai-responses", + id: "gpt-4o", + input: ["text", "image"], + provider: "github-copilot", + }, + prompt: `inspect "${imagePath}"`, + workspaceDir, + } as never), + { pool }, + ); + + const sendOptions = sdk.sessions[0]?.sendAndWait.mock.calls[0]?.[0] as + | { attachments?: unknown[] } + | undefined; + expect(sendOptions?.attachments).toEqual([ + { + type: "blob", + data: TINY_PNG_BASE64, + mimeType: "image/png", + displayName: "prompt-image-1", + }, + ]); + } finally { + await fsp.rm(stateDir, { recursive: true, force: true }); + } + }); + + it("resolves relative prompt image paths from task cwd", async () => { + const stateDir = await fsp.mkdtemp(path.join(tmpdir(), "copilot-cwd-image-")); + const workspaceDir = path.join(stateDir, "workspace"); + const cwd = path.join(workspaceDir, "task-repo"); + const imagePath = path.join(cwd, "relative.png"); + await fsp.mkdir(cwd, { recursive: true }); + await fsp.writeFile(imagePath, Buffer.from(TINY_PNG_BASE64, "base64")); + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + try { + await runCopilotAttempt( + makeParams({ + config: { tools: { fs: { workspaceOnly: true } } }, + cwd, + model: { + api: "openai-responses", + id: "gpt-4o", + input: ["text", "image"], + provider: "github-copilot", + }, + prompt: "inspect ./relative.png", + workspaceDir, + } as never), + { pool }, + ); + + const sendOptions = sdk.sessions[0]?.sendAndWait.mock.calls[0]?.[0] as + | { attachments?: unknown[] } + | undefined; + expect(sendOptions?.attachments).toEqual([ + { + type: "blob", + data: TINY_PNG_BASE64, + mimeType: "image/png", + displayName: "prompt-image-1", + }, + ]); + } finally { + await fsp.rm(stateDir, { recursive: true, force: true }); + } + }); + + it("subscribe-before-send", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt(makeParams(), { pool }); + + const session = sdk.sessions[0]; + expect(session.on.mock.calls[0]?.[0]).toBe("assistant.message_delta"); + expect(session.on.mock.invocationCallOrder[0]).toBeLessThan( + session.sendAndWait.mock.invocationCallOrder[0], + ); + }); + + it("deltas forwarded in order via promise chain", async () => { + const sendDeferred = createDeferred(); + const order: string[] = []; + const releases: Array<() => void> = []; + const onAssistantDelta = vi.fn(async (payload: { delta: string }) => { + order.push(`start:${payload.delta}`); + await new Promise((resolve) => { + releases.push(() => { + order.push(`end:${payload.delta}`); + resolve(); + }); + }); + }); + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockReturnValue(sendDeferred.promise); + }, + }); + const pool = makeFakePool(sdk); + const createToolBridge = vi.fn(async () => ({ sdkTools: [], sourceTools: [] })); + + const runPromise = runCopilotAttempt(makeParams({ onAssistantDelta }), { + createToolBridge, + pool, + }); + await flushAsync(); + + const session = sdk.sessions[0]; + session.emit("assistant.message_delta", { deltaContent: "a", messageId: "msg-1" }); + session.emit("assistant.message_delta", { deltaContent: "b", messageId: "msg-1" }); + session.emit("assistant.message_delta", { deltaContent: "c", messageId: "msg-1" }); + await flushAsync(); + + expect(onAssistantDelta).toHaveBeenCalledTimes(1); + releases[0]?.(); + await flushAsync(); + expect(onAssistantDelta).toHaveBeenCalledTimes(2); + releases[1]?.(); + await flushAsync(); + expect(onAssistantDelta).toHaveBeenCalledTimes(3); + releases[2]?.(); + sendDeferred.resolve(makeAssistantMessageEvent("abc")); + + const result = await runPromise; + expect(order).toEqual(["start:a", "end:a", "start:b", "end:b", "start:c", "end:c"]); + expect(result.assistantTexts).toEqual(["abc"]); + }); + + it("deltas forwarded even when no consumer", async () => { + const sendDeferred = createDeferred(); + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockReturnValue(sendDeferred.promise); + }, + }); + const pool = makeFakePool(sdk); + const createToolBridge = vi.fn(async () => ({ sdkTools: [], sourceTools: [] })); + + const runPromise = runCopilotAttempt(makeParams(), { createToolBridge, pool }); + await flushAsync(); + + const session = sdk.sessions[0]; + session.emit("assistant.message_delta", { deltaContent: "a", messageId: "msg-1" }); + session.emit("assistant.message_delta", { deltaContent: "b", messageId: "msg-1" }); + session.emit("assistant.message_delta", { deltaContent: "c", messageId: "msg-1" }); + sendDeferred.resolve(makeAssistantMessageEvent("abc")); + + const result = await runPromise; + expect(result.assistantTexts).toEqual(["abc"]); + }); + + it("resume path", async () => { + const sdk = makeFakeSdk({ + onResumeSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("resumed")); + }, + }); + const pool = makeFakePool(sdk); + + await runCopilotAttempt( + makeParams({ initialReplayState: { sdkSessionId: "resume-1" } as never }), + { pool }, + ); + + expect(sdk.resumeSession).toHaveBeenCalledTimes(1); + expect(sdk.resumeSession.mock.calls[0]?.[0]).toBe("resume-1"); + expect( + (sdk.resumeSession.mock.calls[0]?.[1] as { continuePendingWork?: boolean }) + .continuePendingWork, + ).toBe(false); + expect(sdk.createSession).toHaveBeenCalledTimes(0); + }); + + it("replay-shim: replayInvalid:true forces createSession even when sdkSessionId is present", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + const result = await runCopilotAttempt( + makeParams({ + initialReplayState: { + sdkSessionId: "resume-stale", + replayInvalid: true, + } as never, + }), + { pool }, + ); + + expect(sdk.resumeSession).toHaveBeenCalledTimes(0); + expect(sdk.createSession).toHaveBeenCalledTimes(1); + // Downgrade invalidates replay even when no side effects occurred. + expect(result.replayMetadata).toEqual({ + hadPotentialSideEffects: false, + replaySafe: false, + }); + }); + + it("replay-shim: recovers from missing-session resume failure by downgrading to createSession", async () => { + let resumeCalls = 0; + const sdk = makeFakeSdk({ + onResumeSession: () => { + resumeCalls += 1; + throw Object.assign(new Error("session not found"), { status: 404 }); + }, + onCreateSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("fresh")); + }, + }); + const pool = makeFakePool(sdk); + + const result = await runCopilotAttempt( + makeParams({ initialReplayState: { sdkSessionId: "resume-gone" } as never }), + { pool }, + ); + + expect(resumeCalls).toBe(1); + expect(sdk.createSession).toHaveBeenCalledTimes(1); + expect(result.promptError).toBeUndefined(); + // Recovery invalidates replay even though no side effects occurred. + expect(result.replayMetadata).toEqual({ + hadPotentialSideEffects: false, + replaySafe: false, + }); + // The freshly-created session id is reported, not the stale resume id. + expect(getSdkSessionId(result)).not.toBe("resume-gone"); + }); + + it("replay-shim: unrecoverable resume failure surfaces as promptError (no downgrade)", async () => { + const sdk = makeFakeSdk({ + onResumeSession: () => { + throw new Error("ECONNRESET network failure"); + }, + }); + const pool = makeFakePool(sdk); + + const result = await runCopilotAttempt( + makeParams({ initialReplayState: { sdkSessionId: "resume-x" } as never }), + { pool }, + ); + + expect(sdk.resumeSession).toHaveBeenCalledTimes(1); + expect(sdk.createSession).toHaveBeenCalledTimes(0); + expect((result.promptError as Error | undefined)?.message).toContain("ECONNRESET"); + }); + + it("replay-shim: prior hadPotentialSideEffects propagates into result replayMetadata", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + const result = await runCopilotAttempt( + makeParams({ + initialReplayState: { hadPotentialSideEffects: true } as never, + }), + { pool }, + ); + + expect(result.replayMetadata).toEqual({ + hadPotentialSideEffects: true, + replaySafe: false, + }); + }); + + it("replay-shim: mutating tool side effects make the attempt replay-unsafe", async () => { + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockImplementationOnce(async () => { + session.emit("tool.execution_start", { + toolCallId: "tool-1", + toolName: "write", + }); + session.emit("tool.execution_complete", { + result: { content: "wrote file" }, + success: true, + toolCallId: "tool-1", + }); + return makeAssistantMessageEvent("done"); + }); + }, + }); + const pool = makeFakePool(sdk); + + const result = await runCopilotAttempt(makeParams(), { pool }); + + expect(result.toolMetas).toEqual([ + { toolName: "write" }, + { meta: "wrote file", toolName: "write" }, + ]); + expect(result.replayMetadata).toEqual({ + hadPotentialSideEffects: true, + replaySafe: false, + }); + }); + + it("replay-shim: prior replayInvalid propagates even on an early-return failure", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + const result = await runCopilotAttempt( + makeParams({ + model: { api: "openai-responses", id: "claude", provider: "anthropic" } as never, + initialReplayState: { + replayInvalid: true, + hadPotentialSideEffects: true, + } as never, + }), + { pool }, + ); + + expect(getPromptErrorCode(result)).toBe("model_not_supported"); + expect(result.replayMetadata).toEqual({ + hadPotentialSideEffects: true, + replaySafe: false, + }); + }); + + it("abort path (mid-stream)", async () => { + const controller = new AbortController(); + const sendDeferred = createDeferred(); + const sessionCreated = createDeferred(); + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockReturnValue(sendDeferred.promise); + session.abort.mockImplementationOnce(async () => { + sendDeferred.resolve(undefined); + }); + sessionCreated.resolve(session); + }, + }); + const pool = makeFakePool(sdk); + const createToolBridge = vi.fn(async () => ({ sdkTools: [], sourceTools: [] })); + + const runPromise = runCopilotAttempt(makeParams({ abortSignal: controller.signal }), { + createToolBridge, + pool, + }); + const session = await sessionCreated.promise; + for (let i = 0; i < 100 && session.sendAndWait.mock.calls.length === 0; i++) { + await new Promise((resolve) => setTimeout(resolve, 0)); + } + expect(session.sendAndWait).toHaveBeenCalledTimes(1); + + controller.abort(); + const result = await runPromise; + + expect(session.abort).toHaveBeenCalledTimes(1); + expect(result.aborted).toBe(true); + expect(result.externalAbort).toBe(true); + }); + + it("abort path (signal already aborted)", async () => { + const controller = new AbortController(); + controller.abort(); + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + const result = await runCopilotAttempt(makeParams({ abortSignal: controller.signal }), { + pool, + }); + + expect(result.aborted).toBe(true); + expect(result.externalAbort).toBe(true); + expect(sdk.createSession).toHaveBeenCalledTimes(0); + expect(pool.acquire).toHaveBeenCalledTimes(0); + }); + + it("abort path (signal fires after settled)", async () => { + const controller = new AbortController(); + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + const result = await runCopilotAttempt(makeParams({ abortSignal: controller.signal }), { + pool, + }); + controller.abort(); + + expect(sdk.sessions[0]?.abort).toHaveBeenCalledTimes(0); + expect(result.aborted).toBe(false); + expect(result.timedOut).toBe(false); + }); + + it("tool bridge wiring: injected tools populate session config", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + const sdkTools: SdkTool[] = [ + { + description: "Fake SDK tool", + handler: async () => ({ resultType: "success", textResultForLlm: "ok" }), + name: "fake_sdk_tool", + parameters: { type: "object" }, + }, + ]; + const createToolBridge = vi.fn(async () => ({ sdkTools, sourceTools: [] })); + + await runCopilotAttempt(makeParams(), { createToolBridge, pool }); + + expect(createToolBridge).toHaveBeenCalledTimes(1); + expect(createToolBridge).toHaveBeenCalledWith( + expect.objectContaining({ + abortSignal: undefined, + agentDir: "C:\\copilot-home", + agentId: "agent-1", + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + sessionKey: undefined, + workspaceDir: "C:\\workspace", + }), + ); + // F6: attempt params and sessionRef are threaded through so the + // bridge can build PI-parity tool context and wire onYield to the + // live SDK session once it exists. See tool-bridge.ts. + const bridgeCall = (createToolBridge.mock.calls[0] as unknown[] | undefined)?.[0] as { + attemptParams?: unknown; + sessionRef?: { current?: unknown }; + }; + expect(bridgeCall.attemptParams).toBeDefined(); + expect(bridgeCall.sessionRef).toBeDefined(); + expect( + ((sdk.createSession.mock.calls[0] as unknown[] | undefined)?.[0] as { tools?: unknown[] }) + .tools, + ).toBe(sdkTools); + }); + + it("F6: sessionRef is populated after createSession so the tool bridge's onYield can abort the live SDK session", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + let capturedRef: { current: { abort?: () => unknown } | undefined } | undefined; + const createToolBridge = vi.fn( + async (input: { sessionRef?: { current: { abort?: () => unknown } | undefined } }) => { + capturedRef = input.sessionRef; + return { sdkTools: [], sourceTools: [] }; + }, + ); + + await runCopilotAttempt(makeParams(), { createToolBridge, pool }); + + expect(capturedRef).toBeDefined(); + // After createSession resolves, attempt.ts binds the live session + // to sessionRef.current so onYield can route to session.abort(). + expect(capturedRef?.current).toBeDefined(); + expect(capturedRef?.current).toBe(sdk.sessions[0]); + }); + + it("F6: sessionRef is populated after a successful resumeSession (resume path)", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + let capturedRef: { current: { abort?: () => unknown } | undefined } | undefined; + const createToolBridge = vi.fn( + async (input: { sessionRef?: { current: { abort?: () => unknown } | undefined } }) => { + capturedRef = input.sessionRef; + return { sdkTools: [], sourceTools: [] }; + }, + ); + + await runCopilotAttempt( + makeParams({ + initialReplayState: { sdkSessionId: "resume-target" } as never, + }), + { createToolBridge, pool }, + ); + + expect(sdk.resumeSession).toHaveBeenCalledTimes(1); + expect(capturedRef?.current).toBeDefined(); + expect(capturedRef?.current).toBe(sdk.sessions[0]); + }); + + it("F6: attemptParams carries the full input so the bridge can derive PI-parity tool context", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + let capturedParams: unknown; + const createToolBridge = vi.fn(async (input: { attemptParams?: unknown }) => { + capturedParams = input.attemptParams; + return { sdkTools: [], sourceTools: [] }; + }); + + const params = makeParams({ + senderIsOwner: true, + groupId: "g-9", + currentChannelId: "C-9", + } as never); + await runCopilotAttempt(params, { createToolBridge, pool }); + + // The bridge receives the same params object so it can read every + // identity/policy/channel field the wrapped-tool layer needs. + expect(capturedParams).toBe(params); + }); + + it("F7: result.yieldDetected is true when the tool bridge fires onYieldDetected during the attempt", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + const createToolBridge = vi.fn(async (input: { onYieldDetected?: (msg?: string) => void }) => { + // Simulate a wrapped tool invoking sessions_yield before the + // attempt settles. The bridge is responsible for notifying the + // caller via onYieldDetected so the final result can carry the + // flag (parent runner uses it to mark liveness paused / + // stop_reason end_turn). Mirrors PI/codex parity. + input.onYieldDetected?.("paused by tool"); + return { sdkTools: [], sourceTools: [] }; + }); + + const result = await runCopilotAttempt(makeParams(), { + createToolBridge, + pool, + }); + + expect(result.yieldDetected).toBe(true); + }); + + it("F7: result.yieldDetected is false on a clean attempt (no sessions_yield fired)", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + // Default createToolBridge in deps falls back to the real one, + // which only fires onYieldDetected when a wrapped tool yields. We + // pass a bridge that never yields and assert the flag stays false. + const createToolBridge = vi.fn(async () => ({ sdkTools: [], sourceTools: [] })); + + const result = await runCopilotAttempt(makeParams(), { + createToolBridge, + pool, + }); + + expect(result.yieldDetected).toBe(false); + }); + + it("tool bridge failures become prompt errors", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + const createToolBridge = vi.fn(async () => { + throw new Error("bridge failed"); + }); + + const result = await runCopilotAttempt(makeParams(), { createToolBridge, pool }); + + expect(getPromptErrorCode(result)).toBe("tool_bridge_failure"); + expect((result.promptError as Error | undefined)?.message).toBe( + "[copilot-attempt] tool-bridge construction failed: bridge failed", + ); + expect(sdk.createSession).toHaveBeenCalledTimes(0); + expect(pool.acquire).toHaveBeenCalledTimes(0); + expect(pool.release).toHaveBeenCalledTimes(0); + }); + + it("unsupported providers skip injected tool bridge wiring", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + const createToolBridge = vi.fn(async () => ({ sdkTools: [], sourceTools: [] })); + + const result = await runCopilotAttempt( + makeParams({ + model: { api: "openai-responses", id: "claude", provider: "anthropic" } as never, + }), + { createToolBridge, pool }, + ); + + expect(getPromptErrorCode(result)).toBe("model_not_supported"); + expect(createToolBridge).toHaveBeenCalledTimes(0); + expect(sdk.createSession).toHaveBeenCalledTimes(0); + }); + + it("default permission policy rejects fail-closed", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt(makeParams(), { pool }); + + const handler = ( + (sdk.createSession.mock.calls[0] as unknown[] | undefined)?.[0] as { + onPermissionRequest: ( + request: { kind: string }, + invocation: { sessionId: string }, + ) => Promise<{ kind: string; feedback?: string }>; + } + ).onPermissionRequest; + const result = await handler({ kind: "write" }, { sessionId: "sess-1" }); + expect(result.kind).toBe("reject"); + expect(result.feedback).toContain("no permission policy installed"); + }); + + it("does not register onUserInputRequest (ask_user hidden from the model in MVP)", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt(makeParams(), { pool }); + + const cfg = sdk.createSession.mock.calls[0]?.[0]; + // Per the SDK contract (types.d.ts: `When provided, enables the + // ask_user tool allowing the agent to ask questions`), omitting the + // handler hides ask_user from the model entirely. The MVP keeps it + // hidden; a follow-up will port the codex user-input-bridge to wire + // ask_user to the OpenClaw channel/TUI path. + expect("onUserInputRequest" in cfg).toBe(false); + }); + + it("enableSessionTelemetry is omitted from createSession when undefined (SDK default)", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt(makeParams(), { pool }); + + const cfg = sdk.createSession.mock.calls[0]?.[0]; + expect("enableSessionTelemetry" in cfg).toBe(false); + }); + + it("enableSessionTelemetry: true is propagated to createSession", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt(makeParams({ enableSessionTelemetry: true } as never), { pool }); + + const cfg = (sdk.createSession.mock.calls[0] as unknown[] | undefined)?.[0] as { + enableSessionTelemetry?: boolean; + }; + expect(cfg.enableSessionTelemetry).toBe(true); + }); + + it("enableSessionTelemetry: false is propagated to createSession", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt(makeParams({ enableSessionTelemetry: false } as never), { pool }); + + const cfg = (sdk.createSession.mock.calls[0] as unknown[] | undefined)?.[0] as { + enableSessionTelemetry?: boolean; + }; + expect(cfg.enableSessionTelemetry).toBe(false); + }); + + it("enableSessionTelemetry is propagated to resumeSession on resume path", async () => { + const sdk = makeFakeSdk({ + onResumeSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("resumed")); + }, + }); + const pool = makeFakePool(sdk); + + await runCopilotAttempt( + makeParams({ + enableSessionTelemetry: false, + initialReplayState: { sdkSessionId: "resume-2" }, + } as never), + { pool }, + ); + + expect(sdk.resumeSession).toHaveBeenCalledTimes(1); + const cfg = sdk.resumeSession.mock.calls[0]?.[1] as { enableSessionTelemetry?: boolean }; + expect(cfg.enableSessionTelemetry).toBe(false); + }); + + it("infiniteSessions is omitted from createSession when host did not supply config", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt(makeParams(), { pool }); + + const cfg = sdk.createSession.mock.calls[0]?.[0]; + expect("infiniteSessions" in cfg).toBe(false); + }); + + describe("workspace bootstrap (systemMessage)", () => { + beforeEach(() => { + workspaceBootstrapMock.resolveCopilotWorkspaceBootstrapContext.mockReset(); + // Re-establish the default fast-path so unrelated tests in the + // suite keep getting `instructions: undefined`. Tests in this + // block override the mock locally to inject their own rendered + // instructions string. + workspaceBootstrapMock.resolveCopilotWorkspaceBootstrapContext.mockResolvedValue({ + bootstrapFiles: [], + contextFiles: [], + instructions: undefined, + }); + }); + + it("forwards rendered bootstrap instructions into SDK SessionConfig.systemMessage (append mode)", async () => { + const rendered = + "# Project Context\n## /ws/SOUL.md\n\nSoul voice goes here.\n\n## /ws/IDENTITY.md\n\nI am the agent."; + workspaceBootstrapMock.resolveCopilotWorkspaceBootstrapContext.mockResolvedValueOnce({ + bootstrapFiles: [], + contextFiles: [], + instructions: rendered, + }); + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt(makeParams(), { pool }); + + // Regression: persona/identity bootstrap (SOUL.md, IDENTITY.md) + // must reach SDK SessionConfig.systemMessage so the model + // receives it as system context without having to read the file + // via its read tool. The SDK's `append` mode keeps the SDK + // foundation (identity/safety/tool-instruction sections) intact + // while layering OpenClaw context after it. See + // workspace-bootstrap.ts and @github/copilot-sdk types.d.ts + // L1052 (SystemMessageConfig). + const cfg = (sdk.createSession.mock.calls[0] as unknown[] | undefined)?.[0] as { + systemMessage?: { mode?: string; content?: string }; + }; + expect(cfg.systemMessage).toBeDefined(); + expect(cfg.systemMessage?.mode).toBe("append"); + expect(cfg.systemMessage?.content).toBe(rendered); + }); + + it("omits systemMessage entirely when the loader returns no instructions", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt(makeParams(), { pool }); + + const cfg = sdk.createSession.mock.calls[0]?.[0]; + // No rendered instructions => skip the systemMessage field so + // the SDK default (foundation only) applies. Avoids polluting + // session logs with an empty `append` and removes a no-op SDK + // codepath. Mirrors the omit-when-empty pattern used elsewhere + // in createSessionConfig (hooks, infiniteSessions, + // enableSessionTelemetry). + expect("systemMessage" in cfg).toBe(false); + }); + + it("forwards extraSystemPrompt into SDK SessionConfig.systemMessage", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt( + makeParams({ + extraSystemPrompt: "Tool and file actions are disabled for this sender.", + }), + { pool }, + ); + + const cfg = sdk.createSession.mock.calls[0]?.[0] as { + systemMessage?: { mode?: string; content?: string }; + }; + expect(cfg.systemMessage?.mode).toBe("append"); + expect(cfg.systemMessage?.content).toBe( + "## Group Chat Context\nTool and file actions are disabled for this sender.", + ); + }); + + it("omits extraSystemPrompt for raw model runs", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt( + makeParams({ + extraSystemPrompt: "Do not leak into raw model probes.", + modelRun: true, + } as never), + { pool }, + ); + + const cfg = sdk.createSession.mock.calls[0]?.[0]; + expect("systemMessage" in cfg).toBe(false); + }); + + it("appends extraSystemPrompt after rendered bootstrap instructions", async () => { + const rendered = "# Project Context\n## /ws/SOUL.md\n\nSoul voice goes here."; + workspaceBootstrapMock.resolveCopilotWorkspaceBootstrapContext.mockResolvedValueOnce({ + bootstrapFiles: [], + contextFiles: [], + instructions: rendered, + }); + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt( + makeParams({ + extraSystemPrompt: "Only answer in the current group thread.", + }), + { pool }, + ); + + const cfg = sdk.createSession.mock.calls[0]?.[0] as { + systemMessage?: { mode?: string; content?: string }; + }; + expect(cfg.systemMessage?.content).toBe( + `${rendered}\n\n## Group Chat Context\nOnly answer in the current group thread.`, + ); + }); + + it("forwards rendered bootstrap instructions to resumeSession on the resume path", async () => { + const rendered = "# Project Context\n## /ws/SOUL.md\n\nSoul voice goes here."; + workspaceBootstrapMock.resolveCopilotWorkspaceBootstrapContext.mockResolvedValueOnce({ + bootstrapFiles: [], + contextFiles: [], + instructions: rendered, + }); + const sdk = makeFakeSdk({ + onResumeSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("resumed")); + }, + }); + const pool = makeFakePool(sdk); + + await runCopilotAttempt( + makeParams({ initialReplayState: { sdkSessionId: "sess-resume-1" } } as never), + { pool }, + ); + + // SystemMessage is in ResumeSessionConfig's Pick set (per SDK + // types.d.ts:1198), so it must be propagated on resume too, + // otherwise resumed sessions would silently lose OpenClaw + // persona/identity context after every reconnect. + const cfg = sdk.resumeSession.mock.calls[0]?.[1] as { + systemMessage?: { mode?: string; content?: string }; + }; + expect(cfg.systemMessage).toBeDefined(); + expect(cfg.systemMessage?.mode).toBe("append"); + expect(cfg.systemMessage?.content).toBe(rendered); + }); + }); + + it("infiniteSessions config is propagated to createSession when host supplies it", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt( + makeParams({ + infiniteSessionConfig: { + enabled: true, + backgroundCompactionThreshold: 0.7, + bufferExhaustionThreshold: 0.9, + }, + } as never), + { pool }, + ); + + const cfg = (sdk.createSession.mock.calls[0] as unknown[] | undefined)?.[0] as { + infiniteSessions?: Record; + }; + expect(cfg.infiniteSessions).toEqual({ + enabled: true, + backgroundCompactionThreshold: 0.7, + bufferExhaustionThreshold: 0.9, + }); + }); + + it("infiniteSessions enabled:false explicitly disables infinite sessions", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt(makeParams({ infiniteSessionConfig: { enabled: false } } as never), { + pool, + }); + + const cfg = (sdk.createSession.mock.calls[0] as unknown[] | undefined)?.[0] as { + infiniteSessions?: Record; + }; + expect(cfg.infiniteSessions).toEqual({ enabled: false }); + }); + + it("infiniteSessions is propagated to resumeSession on resume path", async () => { + const sdk = makeFakeSdk({ + onResumeSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("resumed")); + }, + }); + const pool = makeFakePool(sdk); + + await runCopilotAttempt( + makeParams({ + infiniteSessionConfig: { backgroundCompactionThreshold: 0.5 }, + initialReplayState: { sdkSessionId: "resume-3" }, + } as never), + { pool }, + ); + + expect(sdk.resumeSession).toHaveBeenCalledTimes(1); + const cfg = sdk.resumeSession.mock.calls[0]?.[1] as { + infiniteSessions?: Record; + }; + expect(cfg.infiniteSessions).toEqual({ backgroundCompactionThreshold: 0.5 }); + }); + + it("timeout", async () => { + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(undefined); + }, + }); + const pool = makeFakePool(sdk); + + const result = await runCopilotAttempt(makeParams(), { pool }); + + expect(result.timedOut).toBe(true); + expect(result.aborted).toBe(false); + expect(getSdkSessionId(result)).toBe("sess-1"); + expect(sdk.sessions[0]?.abort).toHaveBeenCalledTimes(0); + }); + + it("G1: SDK timeout rejection (Error 'Timeout after Nms waiting for session.idle') sets timedOut, leaves promptError undefined, and does NOT abort the session", async () => { + // @github/copilot-sdk@1.0.0-beta.4 actually REJECTS sendAndWait + // with this exact message when the internal timer beats + // session.idle (see node_modules/@github/copilot-sdk/dist/ + // session.js:156-164). Before round-5 we only handled the legacy + // resolve(undefined) shape, which meant a real timeout fell into + // the catch and surfaced as a generic prompt error with + // timedOut=false — the replay metadata then incorrectly treated + // the attempt as side-effect-safe. + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockRejectedValueOnce( + new Error("Timeout after 60000ms waiting for session.idle"), + ); + }, + }); + const pool = makeFakePool(sdk); + + const result = await runCopilotAttempt(makeParams(), { pool }); + + expect(result.timedOut).toBe(true); + expect(result.promptError).toBeUndefined(); + expect(result.aborted).toBe(false); + expect(result.externalAbort).toBe(false); + // Do NOT abort on timeout: orchestrator may resume the in-flight + // SDK session on the next attempt. Matches the existing + // resolve(undefined) test above. + expect(sdk.sessions[0]?.abort).toHaveBeenCalledTimes(0); + // Replay metadata must reflect that the timeout flipped the + // side-effect-risky bit (and therefore replay-unsafe). Before + // round-5 the SDK rejection fell through to a generic prompt + // error path with timedOut=false and the orchestrator's + // replay-shim incorrectly treated the attempt as side-effect-safe. + expect(result.replayMetadata?.hadPotentialSideEffects).toBe(true); + expect(result.replayMetadata?.replaySafe).toBe(false); + }); + + it("G1: SDK timeout flushes the in-flight delta chain before snapshot so assistant text is preserved", async () => { + // If the SDK delivered streaming deltas before the timer fired + // but the delta-chain promise had not yet resolved (slow async + // onAssistantDelta consumer), the snapshot used to be built + // without waiting for them. Round-5 awaits the delta chain inside + // the timeout branch so the recorded assistantTexts reflect what + // the model actually streamed. + const sendDeferred = createDeferred(); + const release = createDeferred(); + const onAssistantDelta = vi.fn(async (_payload: { delta: string }) => { + await release.promise; + }); + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockReturnValue(sendDeferred.promise); + }, + }); + const pool = makeFakePool(sdk); + const createToolBridge = vi.fn(async () => ({ sdkTools: [], sourceTools: [] })); + + const runPromise = runCopilotAttempt(makeParams({ onAssistantDelta }), { + createToolBridge, + pool, + }); + await flushAsync(); + const session = sdk.sessions[0]; + session.emit("assistant.message_delta", { deltaContent: "partial-", messageId: "msg-1" }); + await flushAsync(); + // SDK timer fires before the slow delta consumer resolves. + sendDeferred.reject(new Error("Timeout after 60000ms waiting for session.idle")); + await flushAsync(); + // Release the delta consumer so the awaitDeltaChain in the + // timeout branch can complete. + release.resolve(); + const result = await runPromise; + + expect(result.timedOut).toBe(true); + expect(onAssistantDelta).toHaveBeenCalledTimes(1); + expect(result.assistantTexts?.join("")).toContain("partial-"); + }); + + it("model translation: unsupported provider", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + const result = await runCopilotAttempt( + makeParams({ + model: { api: "openai-responses", id: "claude", provider: "anthropic" } as never, + }), + { pool }, + ); + + expect(getPromptErrorCode(result)).toBe("model_not_supported"); + expect(sdk.createSession).toHaveBeenCalledTimes(0); + expect(pool.acquire).toHaveBeenCalledTimes(0); + expect(pool.release).toHaveBeenCalledTimes(0); + }); + + it("acquire failure", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + const error = new Error("acquire failed"); + pool.acquire = vi.fn(async () => { + throw error; + }); + + const result = await runCopilotAttempt(makeParams(), { pool }); + + expect(result.promptError).toBe(error); + expect(sdk.createSession).toHaveBeenCalledTimes(0); + expect(pool.release).toHaveBeenCalledTimes(0); + }); + + it("release failure after a successful send rejects the attempt", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + pool.release = vi.fn(async () => { + throw "release failed"; + }); + + await expect(runCopilotAttempt(makeParams(), { pool })).rejects.toThrow("release failed"); + + expect(sdk.sessions[0]?.disconnect).toHaveBeenCalledTimes(1); + }); + + it("release failure after a primary prompt error warns without masking the error", async () => { + const primaryError = new Error("send failed"); + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => undefined); + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockRejectedValueOnce(primaryError); + }, + }); + const pool = makeFakePool(sdk); + pool.release = vi.fn(async () => { + throw "release failed"; + }); + + const result = await runCopilotAttempt(makeParams(), { pool }); + + expect(result.promptError).toBe(primaryError); + expect(warnSpy).toHaveBeenCalledWith( + "[copilot-attempt] pool.release failed after primary error", + expect.objectContaining({ message: "release failed" }), + ); + }); + + it("accepts string model ids and falls back to top-level provider metadata", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + const result = await runCopilotAttempt( + makeParams({ model: "gpt-4.1" as never, provider: "github-copilot" } as never), + { now: () => 123, pool }, + ); + + expect(getPromptErrorCode(result)).toBeUndefined(); + expect(sdk.createSession).toHaveBeenCalledWith(expect.objectContaining({ model: "gpt-4.1" })); + expect(result.currentAttemptAssistant).toEqual( + expect.objectContaining({ provider: "github-copilot", timestamp: 123 }), + ); + }); + + it("cleanup on success", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt(makeParams(), { pool }); + + const session = sdk.sessions[0]; + expect(session.off).toHaveBeenCalledTimes(session.on.mock.calls.length); + expect(session.disconnect).toHaveBeenCalledTimes(1); + expect(pool.release).toHaveBeenCalledTimes(1); + }); + + it("cleanup on send error", async () => { + const error = new Error("send failed"); + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockRejectedValueOnce(error); + }, + }); + const pool = makeFakePool(sdk); + + const result = await runCopilotAttempt(makeParams(), { pool }); + const session = sdk.sessions[0]; + + expect(result.promptError).toBe(error); + expect(session.off).toHaveBeenCalledTimes(session.on.mock.calls.length); + expect(session.disconnect).toHaveBeenCalledTimes(1); + expect(pool.release).toHaveBeenCalledTimes(1); + }); + + it("cleanup on disconnect throw", async () => { + const primaryError = new Error("send failed"); + const sdkWithPrimaryError = makeFakeSdk({ + onCreateSession: (session) => { + session.disconnect.mockRejectedValueOnce(new Error("disconnect failed")); + session.sendAndWait.mockRejectedValueOnce(primaryError); + }, + }); + const poolWithPrimaryError = makeFakePool(sdkWithPrimaryError); + + const first = await runCopilotAttempt(makeParams(), { pool: poolWithPrimaryError }); + expect(first.promptError).toBe(primaryError); + + const sdkWithoutPrimaryError = makeFakeSdk({ + onCreateSession: (session) => { + session.disconnect.mockRejectedValueOnce(new Error("disconnect failed")); + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("done")); + }, + }); + const poolWithoutPrimaryError = makeFakePool(sdkWithoutPrimaryError); + + const second = await runCopilotAttempt(makeParams(), { pool: poolWithoutPrimaryError }); + expect((second.promptError as Error | undefined)?.message).toBe("disconnect failed"); + }); + + it("pool keying: useLoggedInUser", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt( + makeParams({ auth: { gitHubToken: "ignored", useLoggedInUser: true } as never }), + { pool }, + ); + + const key = (vi.mocked(pool.acquire).mock.calls[0] as unknown[] | undefined)?.[0] as { + authMode: string; + }; + const options = (vi.mocked(pool.acquire).mock.calls[0] as unknown[] | undefined)?.[1] as { + gitHubToken?: string; + useLoggedInUser?: boolean; + }; + expect(key.authMode).toBe("useLoggedInUser"); + expect(options.useLoggedInUser).toBe(true); + expect(options.gitHubToken).toBeUndefined(); + }); + + it("pool keying: gitHubToken requires profileId+profileVersion", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await expect( + runCopilotAttempt(makeParams({ auth: { gitHubToken: "token" } as never }), { pool }), + ).rejects.toThrow( + "[copilot-attempt] gitHubToken auth requires profileId+profileVersion (pool keying safety; per Q5/Q1 decisions)", + ); + expect(pool.acquire).toHaveBeenCalledTimes(0); + expect(sdk.createSession).toHaveBeenCalledTimes(0); + }); + + it("pool keying: gitHubToken with profile", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt( + makeParams({ + auth: { gitHubToken: "token", profileId: "profile-1", profileVersion: "v1" } as never, + }), + { pool }, + ); + + const key = (vi.mocked(pool.acquire).mock.calls[0] as unknown[] | undefined)?.[0] as { + authMode: string; + authProfileId?: string; + authProfileVersion?: string; + }; + const options = (vi.mocked(pool.acquire).mock.calls[0] as unknown[] | undefined)?.[1] as { + gitHubToken?: string; + useLoggedInUser?: boolean; + }; + expect(key.authMode).toBe("gitHubToken"); + expect(key.authProfileId).toBe("profile-1"); + expect(key.authProfileVersion).toBe("v1"); + expect(options.gitHubToken).toBe("token"); + expect(options.useLoggedInUser).toBe(false); + }); + + describe("session-level gitHubToken (independent of client-level)", () => { + // The SDK contract (@github/copilot-sdk/dist/types.d.ts:1168-1178) + // makes `SessionConfig.gitHubToken` independent of the client-level + // `CopilotClientOptions.gitHubToken`. The session-level field is + // what drives content exclusion, model routing, and quota for that + // session. ResumeSessionConfig (types.d.ts:1198) also includes + // `gitHubToken` in its Pick, so resume must carry it too. + + it("contract resolvedApiKey populates SessionConfig.gitHubToken on createSession", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt( + makeParams({ + auth: {} as never, + resolvedApiKey: "contract-token-xyz", + authProfileId: "github-copilot:main", + } as never), + { pool }, + ); + + const cfg = (sdk.createSession.mock.calls[0] as unknown[] | undefined)?.[0] as { + gitHubToken?: string; + }; + expect(cfg.gitHubToken).toBe("contract-token-xyz"); + }); + + it("explicit auth.gitHubToken populates SessionConfig.gitHubToken on createSession", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt( + makeParams({ + auth: { gitHubToken: "explicit-token", profileId: "p", profileVersion: "v1" } as never, + }), + { pool }, + ); + + const cfg = (sdk.createSession.mock.calls[0] as unknown[] | undefined)?.[0] as { + gitHubToken?: string; + }; + expect(cfg.gitHubToken).toBe("explicit-token"); + }); + + it("SessionConfig.gitHubToken is forwarded to resumeSession on a resumed session", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt( + makeParams({ + auth: {} as never, + resolvedApiKey: "contract-token-resume", + authProfileId: "github-copilot:main", + initialReplayState: { sdkSessionId: "resume-target" } as never, + } as never), + { pool }, + ); + + expect(sdk.resumeSession).toHaveBeenCalledTimes(1); + const resumeCfg = sdk.resumeSession.mock.calls[0]?.[1] as { gitHubToken?: string }; + expect(resumeCfg.gitHubToken).toBe("contract-token-resume"); + }); + + it("SessionConfig.gitHubToken is omitted when useLoggedInUser is the resolved mode", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + await runCopilotAttempt(makeParams({ auth: { useLoggedInUser: true } as never }), { pool }); + + const cfg = sdk.createSession.mock.calls[0]?.[0]; + // Per the SDK contract, passing both useLoggedInUser and a + // session-level gitHubToken would be contradictory. The + // logged-in identity already determines content exclusion / + // routing / quota, so the field must be absent (not + // empty-string, not undefined-as-key). + expect("gitHubToken" in cfg).toBe(false); + }); + + it("SessionConfig.gitHubToken is omitted when default mode is useLoggedInUser (no auth signal)", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + // No env tokens, no contract token, no explicit token: falls + // through to default useLoggedInUser mode. + const prevOpenclaw = process.env.OPENCLAW_GITHUB_TOKEN; + const prevGithub = process.env.GITHUB_TOKEN; + delete process.env.OPENCLAW_GITHUB_TOKEN; + delete process.env.GITHUB_TOKEN; + try { + await runCopilotAttempt(makeParams({ auth: {} as never }), { pool }); + const cfg = sdk.createSession.mock.calls[0]?.[0]; + expect("gitHubToken" in cfg).toBe(false); + } finally { + if (prevOpenclaw !== undefined) { + process.env.OPENCLAW_GITHUB_TOKEN = prevOpenclaw; + } + if (prevGithub !== undefined) { + process.env.GITHUB_TOKEN = prevGithub; + } + } + }); + }); + + describe("dual-write transcript mirror", () => { + afterEach(() => { + dualWriteMock.dualWriteCopilotTranscriptBestEffort.mockClear(); + dualWriteMock.dualWriteCopilotTranscriptBestEffort.mockResolvedValue(undefined); + }); + + it("invokes dual-write mirror with sessionFile and scoped idempotencyScope when sessionFile is set", async () => { + dualWriteMock.dualWriteCopilotTranscriptBestEffort.mockClear(); + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("done")); + }, + }); + const pool = makeFakePool(sdk); + + await runCopilotAttempt(makeParams(), { pool }); + + expect(dualWriteMock.dualWriteCopilotTranscriptBestEffort).toHaveBeenCalledTimes(1); + const args = dualWriteMock.dualWriteCopilotTranscriptBestEffort.mock.calls[0]?.[0] as { + sessionFile: string; + messages: Array<{ role: string }>; + idempotencyScope?: string; + }; + expect(args.sessionFile).toBe("session.json"); + expect(args.idempotencyScope).toMatch(/^copilot:/u); + expect(args.messages.length).toBeGreaterThan(0); + const roles = args.messages.map((m) => m.role); + expect(roles).toContain("user"); + expect(roles).toContain("assistant"); + }); + + it("does not invoke dual-write mirror when sessionFile is absent", async () => { + dualWriteMock.dualWriteCopilotTranscriptBestEffort.mockClear(); + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("done")); + }, + }); + const pool = makeFakePool(sdk); + const params = makeParams() as unknown as Record; + delete params.sessionFile; + + await runCopilotAttempt(params as never, { pool }); + + expect(dualWriteMock.dualWriteCopilotTranscriptBestEffort).not.toHaveBeenCalled(); + }); + + it("tags mirrored messages with copilot mirror identity per role and position", async () => { + dualWriteMock.dualWriteCopilotTranscriptBestEffort.mockClear(); + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("done")); + }, + }); + const pool = makeFakePool(sdk); + + await runCopilotAttempt(makeParams(), { pool }); + + const args = dualWriteMock.dualWriteCopilotTranscriptBestEffort.mock.calls[0]?.[0] as { + messages: Array<{ role: string; __openclaw?: { mirrorIdentity?: string } }>; + }; + for (const [index, message] of args.messages.entries()) { + if ( + message.role !== "user" && + message.role !== "assistant" && + message.role !== "toolResult" + ) { + continue; + } + const identity = message["__openclaw"]?.mirrorIdentity ?? ""; + // The terminal assistant carries the turn-stable + // `${runId}:assistant:final` identity attached by attempt.ts + // (rubber-duck-validated identity scheme — survives SDK session + // reuse across turns). Caller-passed history without an + // identity falls through to the positional `${scope}:role:idx` + // fingerprint that the existing tagging map applies. + if (message.role === "assistant" && index === args.messages.length - 1) { + expect(identity).toMatch(/:assistant:final$/u); + expect(identity).toContain("run-1"); + } else { + expect(identity).toMatch(new RegExp(`:${message.role}:${index}$`, "u")); + } + } + }); + + it("dual-write failure does not surface from runCopilotAttempt", async () => { + dualWriteMock.dualWriteCopilotTranscriptBestEffort.mockRejectedValueOnce( + new Error("mirror boom"), + ); + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("done")); + }, + }); + const pool = makeFakePool(sdk); + + // dualWriteCopilotTranscriptBestEffort is already best-effort + // internally; this test asserts attempt.ts also awaits it without + // letting an unexpected rejection escape. + await expect(runCopilotAttempt(makeParams(), { pool })).resolves.toBeDefined(); + }); + + // --------------------------------------------------------------- + // Dogfood finding #3: synthetic current-turn user message in the + // OpenClaw audit transcript (mirrors codex event-projector pattern). + // + // Without this synthesis the dashboard / CLI history shows only + // assistant bubbles — the user's typed turn is lost — because the + // OpenClaw shell's `persistTextTurnTranscript` skips its own user + // write when `embeddedAssistantGapFill` is true, trusting the + // harness to mirror the user turn. + // --------------------------------------------------------------- + it("injects synthetic user message with runId:prompt identity when caller passes no history", async () => { + dualWriteMock.dualWriteCopilotTranscriptBestEffort.mockClear(); + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("done")); + }, + }); + const pool = makeFakePool(sdk); + const params = makeParams({ + messages: [], + prompt: "what's my name?", + runId: "run-A", + } as never); + + await runCopilotAttempt(params, { pool }); + + const args = dualWriteMock.dualWriteCopilotTranscriptBestEffort.mock.calls[0]?.[0] as { + messages: Array<{ + role: string; + content: unknown; + __openclaw?: { mirrorIdentity?: string }; + }>; + }; + expect(args.messages.length).toBe(2); + expect(args.messages[0]?.role).toBe("user"); + expect(args.messages[0]?.content).toBe("what's my name?"); + expect(args.messages[0]?.["__openclaw"]?.mirrorIdentity).toBe("run-A:prompt"); + expect(args.messages[1]?.role).toBe("assistant"); + expect(args.messages[1]?.["__openclaw"]?.mirrorIdentity).toBe("run-A:assistant:final"); + }); + + it("does not duplicate synthetic user when caller passed the same prompt as the messages tail", async () => { + dualWriteMock.dualWriteCopilotTranscriptBestEffort.mockClear(); + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("done")); + }, + }); + const pool = makeFakePool(sdk); + // Default makeParams() seeds messages with the same text as + // prompt, so the synthetic user should be suppressed and the + // mirrored payload should contain exactly one user entry. + await runCopilotAttempt(makeParams(), { pool }); + + const args = dualWriteMock.dualWriteCopilotTranscriptBestEffort.mock.calls[0]?.[0] as { + messages: Array<{ role: string }>; + }; + const userCount = args.messages.filter((m) => m.role === "user").length; + expect(userCount).toBe(1); + }); + + it("prefers transcriptPrompt over prompt for the synthetic user body", async () => { + dualWriteMock.dualWriteCopilotTranscriptBestEffort.mockClear(); + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("done")); + }, + }); + const pool = makeFakePool(sdk); + const params = makeParams({ + messages: [], + prompt: "EXPANDED: please answer with your real name", + transcriptPrompt: "what's your name?", + runId: "run-B", + } as never); + + await runCopilotAttempt(params, { pool }); + + const args = dualWriteMock.dualWriteCopilotTranscriptBestEffort.mock.calls[0]?.[0] as { + messages: Array<{ role: string; content: unknown }>; + }; + const user = args.messages.find((m) => m.role === "user"); + expect(user?.content).toBe("what's your name?"); + }); + + it("two attempts that share the same sdkSessionId but differ by runId produce distinct user/assistant mirror identities", async () => { + // Simulates session reuse (Fix B): the SDK keeps `sess-1` across + // both turns, so a session-relative `${sdkSessionId}:user:0` + // identity would collide and drop the second turn's user message. + // The runId-stable identity scheme avoids that collision. + dualWriteMock.dualWriteCopilotTranscriptBestEffort.mockClear(); + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("turn-1-reply")); + }, + onResumeSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("turn-2-reply")); + }, + }); + const pool = makeFakePool(sdk); + + await runCopilotAttempt( + makeParams({ + messages: [], + prompt: "turn 1", + runId: "run-1", + } as never), + { pool }, + ); + await runCopilotAttempt( + makeParams({ + messages: [], + prompt: "turn 2", + runId: "run-2", + initialReplayState: { sdkSessionId: "sess-1" }, + } as never), + { pool }, + ); + + const calls = dualWriteMock.dualWriteCopilotTranscriptBestEffort.mock.calls; + expect(calls.length).toBe(2); + const turn1 = calls[0]?.[0] as { + messages: Array<{ role: string; __openclaw?: { mirrorIdentity?: string } }>; + }; + const turn2 = calls[1]?.[0] as { + messages: Array<{ role: string; __openclaw?: { mirrorIdentity?: string } }>; + }; + const turn1User = turn1.messages.find((m) => m.role === "user"); + const turn2User = turn2.messages.find((m) => m.role === "user"); + const turn1Assistant = turn1.messages.find((m) => m.role === "assistant"); + const turn2Assistant = turn2.messages.find((m) => m.role === "assistant"); + expect(turn1User?.["__openclaw"]?.mirrorIdentity).toBe("run-1:prompt"); + expect(turn2User?.["__openclaw"]?.mirrorIdentity).toBe("run-2:prompt"); + expect(turn1Assistant?.["__openclaw"]?.mirrorIdentity).toBe("run-1:assistant:final"); + expect(turn2Assistant?.["__openclaw"]?.mirrorIdentity).toBe("run-2:assistant:final"); + }); + + it("two attempts with identical prompts but different runIds remain distinct (no content-fingerprint collapse)", async () => { + dualWriteMock.dualWriteCopilotTranscriptBestEffort.mockClear(); + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("first")); + }, + onResumeSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("second")); + }, + }); + const pool = makeFakePool(sdk); + + await runCopilotAttempt( + makeParams({ messages: [], prompt: "same question", runId: "run-X" } as never), + { pool }, + ); + await runCopilotAttempt( + makeParams({ + messages: [], + prompt: "same question", + runId: "run-Y", + initialReplayState: { sdkSessionId: "sess-1" }, + } as never), + { pool }, + ); + + const calls = dualWriteMock.dualWriteCopilotTranscriptBestEffort.mock.calls; + const id1 = ( + calls[0]?.[0] as { + messages: Array<{ role: string; __openclaw?: { mirrorIdentity?: string } }>; + } + ).messages.find((m) => m.role === "user")?.["__openclaw"]?.mirrorIdentity; + const id2 = ( + calls[1]?.[0] as { + messages: Array<{ role: string; __openclaw?: { mirrorIdentity?: string } }>; + } + ).messages.find((m) => m.role === "user")?.["__openclaw"]?.mirrorIdentity; + expect(id1).toBe("run-X:prompt"); + expect(id2).toBe("run-Y:prompt"); + expect(id1).not.toBe(id2); + }); + }); + + describe("sandbox parity (PR #86155 [P1])", () => { + function makeSandboxStub(overrides: Partial = {}): SandboxContext { + return { + enabled: true, + workspaceAccess: "ro", + workspaceDir: "/sandbox/copy", + agentWorkspaceDir: "/sandbox/agent", + scopeKey: "agent-1:session-1", + sessionKey: "session-1", + backend: { kind: "local" } as never, + cfg: {} as never, + ...overrides, + } as unknown as SandboxContext; + } + + it("forwards sandbox=null when resolveSandboxContext returns null", async () => { + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("done")); + }, + }); + const pool = makeFakePool(sdk); + const createToolBridge = vi.fn(async () => ({ sdkTools: [], sourceTools: [] })); + const resolveSandboxContextOverride = vi.fn(async () => null); + + await runCopilotAttempt(makeParams(), { + createToolBridge, + pool, + resolveSandboxContextOverride, + }); + + expect(resolveSandboxContextOverride).toHaveBeenCalledTimes(1); + const bridgeArgs = (createToolBridge.mock.calls[0] as unknown[] | undefined)?.[0] as { + sandbox?: unknown; + spawnWorkspaceDir?: unknown; + workspaceDir?: unknown; + }; + expect(bridgeArgs?.sandbox).toBeNull(); + expect(bridgeArgs?.spawnWorkspaceDir).toBeUndefined(); + expect(bridgeArgs?.workspaceDir).toBe("C:\\workspace"); + }); + + it("sandbox=null: SDK session workingDirectory matches original workspace", async () => { + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("done")); + }, + }); + const pool = makeFakePool(sdk); + const createToolBridge = vi.fn(async () => ({ sdkTools: [], sourceTools: [] })); + const resolveSandboxContextOverride = vi.fn(async () => null); + + await runCopilotAttempt(makeParams(), { + createToolBridge, + pool, + resolveSandboxContextOverride, + }); + + const sessionConfig = (sdk.createSession.mock.calls[0] as unknown[] | undefined)?.[0] as { + workingDirectory?: unknown; + }; + expect(sessionConfig?.workingDirectory).toBe("C:\\workspace"); + }); + + it("uses task cwd for SDK workingDirectory and bridged tools when unsandboxed", async () => { + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("done")); + }, + }); + const pool = makeFakePool(sdk); + const createToolBridge = vi.fn(async () => ({ sdkTools: [], sourceTools: [] })); + const resolveSandboxContextOverride = vi.fn(async () => null); + + await runCopilotAttempt( + makeParams({ + cwd: "C:\\workspace\\task-repo", + workspaceDir: "C:\\workspace", + } as never), + { + createToolBridge, + pool, + resolveSandboxContextOverride, + }, + ); + + const bridgeArgs = (createToolBridge.mock.calls[0] as unknown[] | undefined)?.[0] as { + cwd?: unknown; + workspaceDir?: unknown; + }; + expect(bridgeArgs?.workspaceDir).toBe("C:\\workspace"); + expect(bridgeArgs?.cwd).toBe("C:\\workspace\\task-repo"); + + const sessionConfig = (sdk.createSession.mock.calls[0] as unknown[] | undefined)?.[0] as { + instructionDirectories?: unknown; + workingDirectory?: unknown; + }; + expect(sessionConfig?.workingDirectory).toBe("C:\\workspace\\task-repo"); + expect(sessionConfig?.instructionDirectories).toEqual(["C:\\workspace"]); + }); + + it("normalizes task cwd before wiring SDK and bridged tools", async () => { + const stateDir = await fsp.mkdtemp(path.join(tmpdir(), "copilot-cwd-normalize-")); + const workspaceDir = path.join(stateDir, "workspace"); + const taskDir = path.join(workspaceDir, "task-repo"); + await fsp.mkdir(taskDir, { recursive: true }); + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("done")); + }, + }); + const pool = makeFakePool(sdk); + const createToolBridge = vi.fn(async () => ({ sdkTools: [], sourceTools: [] })); + const resolveSandboxContextOverride = vi.fn(async () => null); + + try { + await runCopilotAttempt( + makeParams({ + cwd: path.join(taskDir, "."), + workspaceDir: path.join(workspaceDir, "."), + } as never), + { + createToolBridge, + pool, + resolveSandboxContextOverride, + }, + ); + + const bridgeArgs = (createToolBridge.mock.calls[0] as unknown[] | undefined)?.[0] as { + cwd?: unknown; + workspaceDir?: unknown; + }; + expect(bridgeArgs?.workspaceDir).toBe(workspaceDir); + expect(bridgeArgs?.cwd).toBe(taskDir); + + const sessionConfig = (sdk.createSession.mock.calls[0] as unknown[] | undefined)?.[0] as { + instructionDirectories?: unknown; + workingDirectory?: unknown; + }; + expect(sessionConfig?.workingDirectory).toBe(taskDir); + expect(sessionConfig?.instructionDirectories).toEqual([workspaceDir]); + } finally { + await fsp.rm(stateDir, { recursive: true, force: true }); + } + }); + + it("forwards rw sandbox: bridge sees original workspace and no spawn override", async () => { + const sandbox = makeSandboxStub({ workspaceAccess: "rw" }); + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("done")); + }, + }); + const pool = makeFakePool(sdk); + const createToolBridge = vi.fn(async () => ({ sdkTools: [], sourceTools: [] })); + const resolveSandboxContextOverride = vi.fn(async () => sandbox); + + await runCopilotAttempt(makeParams(), { + createToolBridge, + pool, + resolveSandboxContextOverride, + }); + + const bridgeArgs = (createToolBridge.mock.calls[0] as unknown[] | undefined)?.[0] as { + sandbox?: unknown; + spawnWorkspaceDir?: unknown; + workspaceDir?: unknown; + }; + expect(bridgeArgs?.sandbox).toBe(sandbox); + // rw sandbox keeps the original workspace; subagent spawn inherits the same path. + expect(bridgeArgs?.workspaceDir).toBe("C:\\workspace"); + expect(bridgeArgs?.spawnWorkspaceDir).toBeUndefined(); + }); + + it("forwards rw sandbox: SDK session workingDirectory stays on the original workspace", async () => { + const sandbox = makeSandboxStub({ workspaceAccess: "rw" }); + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("done")); + }, + }); + const pool = makeFakePool(sdk); + const createToolBridge = vi.fn(async () => ({ sdkTools: [], sourceTools: [] })); + const resolveSandboxContextOverride = vi.fn(async () => sandbox); + + await runCopilotAttempt(makeParams(), { + createToolBridge, + pool, + resolveSandboxContextOverride, + }); + + const sessionConfig = (sdk.createSession.mock.calls[0] as unknown[] | undefined)?.[0] as { + workingDirectory?: unknown; + }; + expect(sessionConfig?.workingDirectory).toBe("C:\\workspace"); + }); + + it("forwards ro sandbox: bridge sees sandbox copy, spawn keeps original workspace", async () => { + const sandboxDir = `${tmpdir()}/copilot-sandbox-${Date.now()}`; + const sandbox = makeSandboxStub({ workspaceAccess: "ro", workspaceDir: sandboxDir }); + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("done")); + }, + }); + const pool = makeFakePool(sdk); + const createToolBridge = vi.fn(async () => ({ sdkTools: [], sourceTools: [] })); + const resolveSandboxContextOverride = vi.fn(async () => sandbox); + + const workspaceDir = `${tmpdir()}/copilot-orig-${Date.now()}`; + try { + await runCopilotAttempt(makeParams({ workspaceDir } as never), { + createToolBridge, + pool, + resolveSandboxContextOverride, + }); + + const bridgeArgs = (createToolBridge.mock.calls[0] as unknown[] | undefined)?.[0] as { + sandbox?: unknown; + spawnWorkspaceDir?: unknown; + workspaceDir?: unknown; + }; + expect(bridgeArgs?.sandbox).toBe(sandbox); + expect(bridgeArgs?.workspaceDir).toBe(sandboxDir); + // The mkdir for the sandbox copy must have run as a side effect. + await expect(fsp.stat(sandboxDir)).resolves.toBeTruthy(); + expect(bridgeArgs?.spawnWorkspaceDir).toBe(workspaceDir); + } finally { + const sessionConfig = (sdk.createSession.mock.calls[0] as unknown[] | undefined)?.[0] as { + workingDirectory?: unknown; + }; + // SDK session must point at the sandbox copy so native tool ops (shell, + // write, AGENTS.md loader) cannot escape into the host workspace. + expect(sessionConfig?.workingDirectory).toBe(sandboxDir); + await fsp.rm(sandboxDir, { recursive: true, force: true }); + await fsp.rm(workspaceDir, { recursive: true, force: true }); + } + }); + + it("applies sandbox workspace-only guards when hydrating prompt image refs", async () => { + const stateDir = await fsp.mkdtemp(path.join(tmpdir(), "copilot-sandbox-image-policy-")); + const sandboxDir = path.join(stateDir, "sandbox"); + const outsideDir = path.join(stateDir, "agent"); + const outsideImage = path.join(outsideDir, "secret.png"); + await fsp.mkdir(sandboxDir, { recursive: true }); + await fsp.mkdir(outsideDir, { recursive: true }); + await fsp.writeFile(outsideImage, Buffer.from(TINY_PNG_BASE64, "base64")); + const fsBridge = { + mkdirp: vi.fn(async () => undefined), + readFile: vi.fn(async () => Buffer.from(TINY_PNG_BASE64, "base64")), + remove: vi.fn(async () => undefined), + rename: vi.fn(async () => undefined), + resolvePath: vi.fn(() => ({ + containerPath: "/agent/secret.png", + hostPath: outsideImage, + relativePath: "../agent/secret.png", + })), + stat: vi.fn(async () => ({ mtimeMs: 1, size: 1, type: "file" as const })), + writeFile: vi.fn(async () => undefined), + }; + const sandbox = makeSandboxStub({ + fsBridge, + workspaceAccess: "ro", + workspaceDir: sandboxDir, + } as never); + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + const createToolBridge = vi.fn(async () => ({ sdkTools: [], sourceTools: [] })); + const resolveSandboxContextOverride = vi.fn(async () => sandbox); + + try { + await runCopilotAttempt( + makeParams({ + config: { tools: { fs: { workspaceOnly: true } } }, + model: { + api: "openai-responses", + id: "gpt-4o", + input: ["text", "image"], + provider: "github-copilot", + }, + prompt: "inspect /agent/secret.png", + workspaceDir: path.join(stateDir, "workspace"), + } as never), + { + createToolBridge, + pool, + resolveSandboxContextOverride, + }, + ); + + const sendOptions = sdk.sessions[0]?.sendAndWait.mock.calls[0]?.[0] as + | { attachments?: unknown[] } + | undefined; + expect(sendOptions?.attachments).toBeUndefined(); + expect(fsBridge.resolvePath).toHaveBeenCalled(); + expect(fsBridge.readFile).not.toHaveBeenCalled(); + } finally { + await fsp.rm(stateDir, { recursive: true, force: true }); + } + }); + + it("fails closed when sandbox is enabled with a cwd override", async () => { + const sandbox = makeSandboxStub({ workspaceAccess: "rw" }); + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + const createToolBridge = vi.fn(async () => ({ sdkTools: [], sourceTools: [] })); + const resolveSandboxContextOverride = vi.fn(async () => sandbox); + + const result = await runCopilotAttempt( + makeParams({ + cwd: "C:\\workspace\\task-repo", + workspaceDir: "C:\\workspace", + } as never), + { + createToolBridge, + pool, + resolveSandboxContextOverride, + }, + ); + + expect(getPromptErrorCode(result)).toBe("sandbox_cwd_override_unsupported"); + expect(createToolBridge).not.toHaveBeenCalled(); + expect(sdk.createSession).not.toHaveBeenCalled(); + }); + + it("fails closed when sandbox resolution fails", async () => { + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("done")); + }, + }); + const pool = makeFakePool(sdk); + const createToolBridge = vi.fn(async () => ({ sdkTools: [], sourceTools: [] })); + const resolveSandboxContextOverride = vi.fn(async () => { + throw new Error("sandbox provisioning boom"); + }); + + const result = await runCopilotAttempt(makeParams(), { + createToolBridge, + pool, + resolveSandboxContextOverride, + }); + + expect(getPromptErrorCode(result)).toBe("sandbox_resolution_failure"); + expect((result.promptError as Error | undefined)?.message).toContain( + "sandbox provisioning boom", + ); + expect(createToolBridge).not.toHaveBeenCalled(); + expect(sdk.createSession).not.toHaveBeenCalled(); + }); + + it("fails closed when creating the sandbox copy workspace fails", async () => { + const sdk = makeFakeSdk({ + onCreateSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("done")); + }, + }); + const pool = makeFakePool(sdk); + const createToolBridge = vi.fn(async () => ({ sdkTools: [], sourceTools: [] })); + const blockingFile = path.join(tmpdir(), `copilot-sandbox-block-${Date.now()}`); + await fsp.writeFile(blockingFile, "not a directory"); + const sandbox = makeSandboxStub({ + workspaceAccess: "ro", + workspaceDir: path.join(blockingFile, "copy"), + }); + + try { + const result = await runCopilotAttempt(makeParams(), { + createToolBridge, + pool, + resolveSandboxContextOverride: async () => sandbox, + }); + + expect(getPromptErrorCode(result)).toBe("sandbox_resolution_failure"); + expect((result.promptError as Error | undefined)?.message).toContain("ENOTDIR"); + expect(createToolBridge).not.toHaveBeenCalled(); + expect(sdk.createSession).not.toHaveBeenCalled(); + } finally { + await fsp.rm(blockingFile, { force: true }); + } + }); + }); + + // ClawSweeper PR #86155 [P1] round-8: the SDK SessionConfig accepts + // `availableTools` as a hard catalog allowlist + // (`@github/copilot-sdk/dist/types.d.ts:1059-1066`). Without it, the + // CLI keeps its native read/write/shell/url/mcp/memory/hook tools + // visible to the model alongside our bridged overrides, which would + // bypass OpenClaw's wrapped-tool enforcement under any permissive + // permission policy and pollute the catalog under the default reject + // policy. `createSessionConfig` derives `availableTools` from the + // post-filter `sdkTools` so create- and resume-session always carry + // exactly the names of the tools the bridge actually exposed. + describe("availableTools surface restriction (PR #86155 [P1] round-8)", () => { + function makeFakeSdkTool(name: string): SdkTool { + return { + description: `Fake tool ${name}`, + handler: async () => ({ resultType: "success", textResultForLlm: "ok" }), + name, + parameters: { type: "object" }, + }; + } + + function readAvailableTools(call: unknown): readonly string[] | undefined { + const cfg = (call as unknown[] | undefined)?.[0] as { availableTools?: string[] }; + return cfg?.availableTools; + } + + it("forwards exactly the bridged tool names when the bridge returns a narrow tool set", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + const sdkTools = [makeFakeSdkTool("read"), makeFakeSdkTool("edit")]; + const createToolBridge = vi.fn(async () => ({ sdkTools, sourceTools: [] })); + + await runCopilotAttempt(makeParams(), { createToolBridge, pool }); + + expect(readAvailableTools(sdk.createSession.mock.calls[0])).toEqual(["read", "edit"]); + }); + + it("forwards `[]` to the SDK when the bridge returns no tools (disable / raw / fully filtered)", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + // The bridge already collapses `disableTools: true`, raw model runs + // (`modelRun: true` or `promptMode: "none"`), an empty + // `toolsAllow: []`, and an unsupported provider to `sdkTools: []`. + // Whatever the upstream reason, `availableTools` must be the same + // empty list so the SDK cannot fall back to its native catalog. + const createToolBridge = vi.fn(async () => ({ sdkTools: [], sourceTools: [] })); + + await runCopilotAttempt(makeParams(), { createToolBridge, pool }); + + expect(readAvailableTools(sdk.createSession.mock.calls[0])).toEqual([]); + }); + + it("forwards the full bridged set when the run is unrestricted (no toolsAllow)", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + const sdkTools = [ + makeFakeSdkTool("read"), + makeFakeSdkTool("write"), + makeFakeSdkTool("edit"), + makeFakeSdkTool("exec"), + makeFakeSdkTool("message"), + ]; + const createToolBridge = vi.fn(async () => ({ sdkTools, sourceTools: [] })); + + await runCopilotAttempt(makeParams(), { createToolBridge, pool }); + + // The bridge is the source of truth, not the raw `toolsAllow` + // input: wildcard `["*"]` and unrestricted both flow through as + // "all bridged tool names" so the SDK sees a concrete catalog. + expect(readAvailableTools(sdk.createSession.mock.calls[0])).toEqual([ + "read", + "write", + "edit", + "exec", + "message", + ]); + }); + + it("forwards the same `availableTools` on the resumeSession path", async () => { + // `ResumeSessionConfig` picks `availableTools` per + // `@github/copilot-sdk/dist/types.d.ts:1198`, so the spread into + // `client.resumeSession(id, { ...sessionConfig })` must carry the + // same surface restriction; otherwise resumed sessions would + // silently restore the native catalog after every reconnect. + const sdk = makeFakeSdk({ + onResumeSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("resumed")); + }, + }); + const pool = makeFakePool(sdk); + const sdkTools = [makeFakeSdkTool("read")]; + const createToolBridge = vi.fn(async () => ({ sdkTools, sourceTools: [] })); + + await runCopilotAttempt( + makeParams({ initialReplayState: { sdkSessionId: "sess-resume-1" } } as never), + { createToolBridge, pool }, + ); + + const resumeCall = sdk.resumeSession.mock.calls[0] as unknown[] | undefined; + const resumeCfg = resumeCall?.[1] as { availableTools?: string[] }; + expect(resumeCfg?.availableTools).toEqual(["read"]); + }); + + it("forwards `[]` to resumeSession when the bridge returns no tools", async () => { + const sdk = makeFakeSdk({ + onResumeSession: (session) => { + session.sendAndWait.mockResolvedValueOnce(makeAssistantMessageEvent("resumed")); + }, + }); + const pool = makeFakePool(sdk); + const createToolBridge = vi.fn(async () => ({ sdkTools: [], sourceTools: [] })); + + await runCopilotAttempt( + makeParams({ initialReplayState: { sdkSessionId: "sess-resume-2" } } as never), + { createToolBridge, pool }, + ); + + const resumeCall = sdk.resumeSession.mock.calls[0] as unknown[] | undefined; + const resumeCfg = resumeCall?.[1] as { availableTools?: string[] }; + expect(resumeCfg?.availableTools).toEqual([]); + }); + }); + + describe("bootstrap path remap wiring (PR #86155 [P2] round-9)", () => { + // attempt.ts must forward the sandbox-resolved + // `effectiveWorkspaceDir` to `resolveCopilotWorkspaceBootstrapContext` + // so the helper can remap context-file paths from the host + // workspace to the sandbox copy when sandbox `ro`/`none` + // redirects the workingDirectory. The helper's own remap logic + // and the rendered-systemMessage assertion live in + // workspace-bootstrap.test.ts; this block locks in the integration + // contract so future refactors cannot silently drop the parameter. + beforeEach(() => { + workspaceBootstrapMock.resolveCopilotWorkspaceBootstrapContext.mockReset(); + workspaceBootstrapMock.resolveCopilotWorkspaceBootstrapContext.mockResolvedValue({ + bootstrapFiles: [], + contextFiles: [], + instructions: undefined, + }); + }); + + it("forwards effectiveWorkspaceDir matching params.workspaceDir for non-sandboxed runs", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + const params = makeParams(); + await runCopilotAttempt(params, { pool }); + + const call = workspaceBootstrapMock.resolveCopilotWorkspaceBootstrapContext.mock.calls[0]; + const arg = (call as unknown[] | undefined)?.[0] as { + attempt: { workspaceDir?: string }; + effectiveWorkspaceDir?: string; + }; + // No sandbox configured -> bootstrap sees the same workspace + // the attempt was given. Remap is a no-op (helper fast path). + expect(arg.effectiveWorkspaceDir).toBe(arg.attempt.workspaceDir); + }); + + it("forwards the sandbox copy directory as effectiveWorkspaceDir for readonly sandbox runs", async () => { + const sdk = makeFakeSdk(); + const pool = makeFakePool(sdk); + + const params = makeParams(); + const hostWorkspace = (params as { workspaceDir?: string }).workspaceDir; + const sandboxWorkspace = await fsp.mkdtemp(path.join(tmpdir(), "copilot-sbx-ro-")); + try { + await runCopilotAttempt(params, { + pool, + // Bypass the real plugin-bridge wiring; with a sandbox in play + // attempt.ts would otherwise call the real createToolBridge which + // requires plugin SDK fixtures we do not stand up here. + createToolBridge: vi.fn(async () => ({ sdkTools: [], sourceTools: [] })), + // Drive the sandbox resolution branch deterministically so the + // test asserts the exact wiring rather than the orchestrator's + // real sandbox discovery path. Include every SandboxContext + // field attempt.ts touches (enabled, workspaceAccess, + // workspaceDir) plus the structural fields the bridge wiring + // expects. + resolveSandboxContextOverride: async () => + ({ + enabled: true, + workspaceAccess: "ro", + workspaceDir: sandboxWorkspace, + agentWorkspaceDir: sandboxWorkspace, + scopeKey: "agent-1:session-1", + sessionKey: "session-1", + backend: { kind: "local" } as never, + cfg: {} as never, + }) as unknown as SandboxContext, + }); + + const call = workspaceBootstrapMock.resolveCopilotWorkspaceBootstrapContext.mock.calls[0]; + const arg = (call as unknown[] | undefined)?.[0] as { + attempt: { workspaceDir?: string }; + effectiveWorkspaceDir?: string; + }; + // Positive: bootstrap receives the sandbox path so the helper + // remaps rendered paths into the sandbox copy. + expect(arg.effectiveWorkspaceDir).toBe(sandboxWorkspace); + // Negative: the host workspace must not appear as the effective + // directory, otherwise the helper's fast path would suppress the + // remap and the model would see host paths. + expect(arg.effectiveWorkspaceDir).not.toBe(hostWorkspace); + } finally { + await fsp.rm(sandboxWorkspace, { force: true, recursive: true }); + } + }); + }); +}); diff --git a/extensions/copilot/src/attempt.ts b/extensions/copilot/src/attempt.ts new file mode 100644 index 000000000000..d72b91f7676f --- /dev/null +++ b/extensions/copilot/src/attempt.ts @@ -0,0 +1,1140 @@ +import fsp from "node:fs/promises"; +import type { MessageOptions, SessionConfig, Tool as SdkTool } from "@github/copilot-sdk"; +import type { + AgentHarnessAttemptParams, + AgentHarnessAttemptResult, + AgentMessage, + SandboxContext, +} from "openclaw/plugin-sdk/agent-harness-runtime"; +import { + detectAndLoadAgentHarnessPromptImages, + resolveAttemptFsWorkspaceOnly, + resolveAttemptSpawnWorkspaceDir, + resolveSandboxContext as defaultResolveSandboxContext, + resolveSessionAgentIds, + resolveUserPath, +} from "openclaw/plugin-sdk/agent-harness-runtime"; +import { resolveCopilotAuth } from "./auth-bridge.js"; +import { + createInfiniteSessionConfig, + type CopilotInfiniteSessionOptions, +} from "./compaction-bridge.js"; +import { + attachCopilotMirrorIdentity, + dualWriteCopilotTranscriptBestEffort, +} from "./dual-write-transcripts.js"; +import { + attachEventBridge, + type AssistantMessage, + type AssistantUsageSnapshot, + type OnAssistantDeltaPayload, + type SessionLike, +} from "./event-bridge.js"; +import { createHooksBridge, type CopilotHooksConfig } from "./hooks-bridge.js"; +import { + createPermissionBridge, + rejectAllPolicy, + type CopilotPermissionPolicy, +} from "./permission-bridge.js"; +import { + classifyResumeFailure, + computeReplayMetadata, + copilotToolMetasHavePotentialSideEffects, + decideReplayAction, +} from "./replay-shim.js"; +import type { ClientCreateOptions, CopilotClientPool, PoolKey, PooledClient } from "./runtime.js"; +import { createCopilotToolBridge } from "./tool-bridge.js"; +import { resolveCopilotWorkspaceBootstrapContext } from "./workspace-bootstrap.js"; + +const SUPPORTED_PROVIDERS = new Set(["github-copilot"]); + +type AttemptResultWithSdkSessionId = AgentHarnessAttemptResult & { sdkSessionId?: string }; +type PromptErrorWithCode = Error & { code?: string; cause?: unknown }; +// NOTE(plugin-sdk-widening): AttemptParamsLike can be removed once +// openclaw/plugin-sdk/agent-harness-runtime declares auth, messages, +// onAssistantDelta, and initialReplayState.sdkSessionId fields. Tracked by +// project openclaw-copilot-harness; reviewer-attempt-bridge note. + +type AttemptParamsLike = AgentHarnessAttemptParams & { + auth?: { + gitHubToken?: string; + profileId?: string; + profileVersion?: string; + useLoggedInUser?: boolean; + }; + copilotHome?: string; + cwd?: string; + enableSessionTelemetry?: boolean; + hooksConfig?: CopilotHooksConfig; + infiniteSessionConfig?: CopilotInfiniteSessionOptions; + initialReplayState?: AgentHarnessAttemptParams["initialReplayState"] & { sdkSessionId?: string }; + messages?: AgentMessage[]; + model?: string | { api?: string; id?: string; input?: string[]; provider?: string }; + onAssistantDelta?: (payload: OnAssistantDeltaPayload) => void | Promise; + permissionPolicy?: CopilotPermissionPolicy; + profileVersion?: string; + reasoningEffort?: "low" | "medium" | "high" | "xhigh"; + // User-visible prompt body (when distinct from `prompt`, which may + // include runtime-expanded context). Used when synthesizing the + // current-turn user message for the OpenClaw audit transcript so + // dashboard/CLI history shows what the user actually typed, not the + // internal expansion. Symmetric to `EmbeddedRunAttemptParams.transcriptPrompt`. + transcriptPrompt?: string; +}; +type ModelRef = { api?: string; id: string; provider: string }; + +export type { AttemptParamsLike as CopilotPoolAcquireInput, ModelRef }; +export { SUPPORTED_PROVIDERS }; + +export type ResolveSandboxContextFn = typeof defaultResolveSandboxContext; + +export interface CopilotAttemptDeps { + pool: CopilotClientPool; + now?: () => number; + createToolBridge?: typeof createCopilotToolBridge; + /** + * Optional override for sandbox-context resolution. The default delegates to + * `openclaw/plugin-sdk/agent-harness-runtime#resolveSandboxContext`, which is + * the same path PI uses. Tests inject a stub here to avoid the real + * resolver's side effects (container provisioning, registry writes). + */ + resolveSandboxContextOverride?: ResolveSandboxContextFn; + /** + * Called once with the SDK session id and pooled client immediately + * after the SDK session is created (or resumed) successfully. The + * harness uses this to track the openclawSessionId -> sdkSessionId + * mapping needed for `reset(params)` (see harness.ts). Exceptions + * thrown from this callback are swallowed so they cannot break the + * attempt. + */ + onSessionEstablished?: (info: { sdkSessionId: string; pooledClient: PooledClient }) => void; +} + +export async function runCopilotAttempt( + params: AgentHarnessAttemptParams, + deps: CopilotAttemptDeps, +): Promise { + const now = deps.now ?? Date.now; + const input = params as AttemptParamsLike; + const createToolBridge = deps.createToolBridge ?? createCopilotToolBridge; + const messages = getMessagesSnapshotInput(input); + + if (params.abortSignal?.aborted) { + return createResult(input, { + aborted: true, + externalAbort: true, + messagesSnapshot: messages, + now, + promptError: undefined, + sdkSessionId: undefined, + sessionIdUsed: input.sessionId, + }); + } + + const modelRef = resolveModelRef(input); + if (!SUPPORTED_PROVIDERS.has(modelRef.provider)) { + return createResult(input, { + messagesSnapshot: messages, + now, + promptError: createPromptError( + "model_not_supported", + `[copilot-attempt] provider ${modelRef.provider} is not supported at MVP (subscription Copilot models only; BYOK arrives via byok-mapping-skeleton)`, + ), + sdkSessionId: undefined, + sessionIdUsed: input.sessionId, + }); + } + + let abortRequested = false; + let aborted = false; + let externalAbort = false; + let settled = false; + let sentTurnStarted = false; + let timedOut = false; + let promptError: Error | undefined; + let sdkSessionId: string | undefined; + let sessionIdUsed = input.sessionId; + let disconnectError: Error | undefined; + let handle: PooledClient | undefined; + let session: SessionLike | undefined; + let bridge: ReturnType | undefined; + let releaseError: Error | undefined; + let downgradedFromResume = false; + let resumeFailureRecovered = false; + // True when a wrapped tool fired `sessions_yield`. Propagated into + // the final attempt result so the parent runner can mark liveness + // as paused and stop_reason as `end_turn`, matching the in-tree PI + // (`src/agents/pi-embedded-runner/run/attempt.ts:1107-1113`) and + // codex (`extensions/codex/src/app-server/run-attempt.ts:539,1739`) + // behavior. See `EmbeddedRunAttemptResult.yieldDetected` at + // `src/agents/pi-embedded-runner/run/types.ts:139`. + let yieldDetected = false; + + const onAbort = () => { + abortRequested = true; + externalAbort = true; + aborted = true; + if (settled || !sentTurnStarted || !session) { + return; + } + void session.abort().catch(() => undefined); + }; + + params.abortSignal?.addEventListener("abort", onAbort, { once: true }); + + // Sandbox parity with PI (`src/agents/pi-embedded-runner/run/attempt.ts:1232-1244`): + // resolve the sandbox context using the same session-key derivation, then + // compute the workspace dir the SDK should see vs the original workspace + // spawned subagents should inherit. When sandbox is disabled (the default), + // `resolveSandboxContext` returns `null` and behavior is unchanged from the + // pre-fix path. + const resolvedWorkspaceForSandbox = + readResolvedAttemptPath(input.workspaceDir) ?? readResolvedAttemptPath(input.cwd); + const sandboxSessionKey = + readString((input as { sandboxSessionKey?: unknown }).sandboxSessionKey) ?? + readString((input as { sessionKey?: unknown }).sessionKey) ?? + readString(input.sessionId); + const resolveSandbox = deps.resolveSandboxContextOverride ?? defaultResolveSandboxContext; + let sandbox: SandboxContext | null = null; + let effectiveWorkspaceDir = resolvedWorkspaceForSandbox; + if (resolvedWorkspaceForSandbox) { + try { + sandbox = await resolveSandbox({ + config: input.config, + sessionKey: sandboxSessionKey, + workspaceDir: resolvedWorkspaceForSandbox, + }); + effectiveWorkspaceDir = sandbox?.enabled + ? sandbox.workspaceAccess === "rw" + ? resolvedWorkspaceForSandbox + : sandbox.workspaceDir + : resolvedWorkspaceForSandbox; + // Only ensure the workspace exists when sandbox redirected us to a + // newly-resolved path. The original workspace is owned by the + // orchestrator (PI's runner pre-creates it before entering the + // attempt); duplicating the mkdir here would also break long-standing + // tests that pass placeholder workspaceDir values. + if ( + sandbox?.enabled && + effectiveWorkspaceDir && + effectiveWorkspaceDir !== resolvedWorkspaceForSandbox + ) { + await fsp.mkdir(effectiveWorkspaceDir, { recursive: true }); + } + } catch (error: unknown) { + settled = true; + params.abortSignal?.removeEventListener("abort", onAbort); + if (abortRequested || params.abortSignal?.aborted) { + return createResult(input, { + aborted: true, + externalAbort: true, + messagesSnapshot: messages, + now, + promptError: undefined, + sdkSessionId: undefined, + sessionIdUsed: input.sessionId, + }); + } + return createResult(input, { + messagesSnapshot: messages, + now, + promptError: createPromptError( + "sandbox_resolution_failure", + `[copilot-attempt] sandbox resolution failed: ${toError(error).message}`, + error, + ), + sdkSessionId: undefined, + sessionIdUsed: input.sessionId, + }); + } + } + const requestedCwd = readResolvedAttemptPath(input.cwd); + if (sandbox?.enabled && requestedCwd && requestedCwd !== resolvedWorkspaceForSandbox) { + settled = true; + params.abortSignal?.removeEventListener("abort", onAbort); + return createResult(input, { + messagesSnapshot: messages, + now, + promptError: createPromptError( + "sandbox_cwd_override_unsupported", + "[copilot-attempt] cwd override is not supported for sandboxed Copilot runs; omit cwd or use the agent workspace as cwd", + ), + sdkSessionId: undefined, + sessionIdUsed: input.sessionId, + }); + } + const effectiveCwd = sandbox?.enabled + ? effectiveWorkspaceDir + : (requestedCwd ?? effectiveWorkspaceDir); + const { sessionAgentId } = resolveSessionAgentIds({ + sessionKey: readString((input as { sessionKey?: unknown }).sessionKey), + config: input.config, + agentId: readString(params.agentId), + }); + const effectiveFsWorkspaceOnly = resolveAttemptFsWorkspaceOnly({ + config: input.config, + sessionAgentId, + }); + const sandboxAwareSpawnWorkspaceDir = resolvedWorkspaceForSandbox + ? resolveAttemptSpawnWorkspaceDir({ + sandbox, + resolvedWorkspace: resolvedWorkspaceForSandbox, + }) + : undefined; + + const poolAcquire = resolvePoolAcquire(input); + + // Mutable session holder shared with the tool bridge so onYield + // (raised inside wrapped-tool execution) can route to the live SDK + // session's abort once it exists. The bridge is constructed before + // createSession/resumeSession resolves, so the holder is the only + // safe way to defer the binding without creating a circular dep. + // See tool-bridge.ts CopilotSessionHolder. + const sessionRef: { current: SessionLike | undefined } = { current: undefined }; + + try { + let sdkTools: SdkTool[]; + try { + const toolBridge = await createToolBridge({ + modelProvider: modelRef.provider, + modelId: modelRef.id, + agentId: readString(params.agentId) ?? "copilot", + sessionId: readString(input.sessionId) ?? "copilot-session", + sessionKey: readString((input as { sessionKey?: unknown }).sessionKey), + agentDir: readString(input.agentDir), + // Sandbox parity (`src/agents/pi-embedded-runner/run/attempt.ts:1438-1450`): + // bridged tools see the *effective* workspace (sandbox copy when not `rw`), + // while spawned subagents inherit the *original* workspace. + workspaceDir: effectiveWorkspaceDir, + cwd: effectiveCwd, + sandbox, + spawnWorkspaceDir: sandboxAwareSpawnWorkspaceDir, + abortSignal: params.abortSignal, + // Forward the full attempt params so the wrapped-tool + // enforcement layer receives the same context PI does + // (identity, owner-only allowlist, auth-profile store, + // channel/routing, model context, run hooks). See + // tool-bridge.ts buildOpenClawCodingToolsOptions(). + attemptParams: input, + sessionRef, + onYieldDetected: () => { + yieldDetected = true; + }, + }); + sdkTools = toolBridge.sdkTools; + } catch (error: unknown) { + return createResult(input, { + messagesSnapshot: messages, + now, + promptError: createPromptError( + "tool_bridge_failure", + `[copilot-attempt] tool-bridge construction failed: ${toError(error).message}`, + error, + ), + sdkSessionId: undefined, + sessionIdUsed: input.sessionId, + }); + } + + handle = await deps.pool.acquire(poolAcquire.key, poolAcquire.options); + const client = handle.client; + // Load OpenClaw workspace bootstrap files (SOUL.md, IDENTITY.md, + // HEARTBEAT.md, ...) before constructing the SDK SessionConfig so + // persona/identity/heartbeat reach the model via + // `SessionConfig.systemMessage` (append mode). Mirrors codex's + // `buildCodexWorkspaceBootstrapContext` call in run-attempt.ts. + // Failures here are non-fatal: workspace-bootstrap returns + // `instructions: undefined` and the session proceeds without the + // OpenClaw bootstrap block (SDK still loads AGENTS.md natively). + const workspaceBootstrap = await resolveCopilotWorkspaceBootstrapContext({ + attempt: input, + // Pair with `createSessionConfig`'s `workingDirectory: + // effectiveWorkspaceDir` (round-8 [P1]) so bootstrap context + // paths rendered into `SessionConfig.systemMessage` reflect + // the sandbox copy when a `ro` / `none` sandbox redirected + // the workspace. Without this remap the model would see + // host-workspace paths while its native loader and bridged + // tools all operate in the sandbox copy. Mirrors PI's + // `remapInjectedContextFilesToWorkspace` call at + // `src/agents/pi-embedded-runner/run/attempt.ts:1595`. + effectiveWorkspaceDir, + warn: (message) => console.warn(message), + }); + const sessionConfig = createSessionConfig( + input, + modelRef.id, + sdkTools, + poolAcquire.auth, + workspaceBootstrap.instructions, + effectiveWorkspaceDir, + effectiveCwd, + ); + const replayDecision = decideReplayAction({ + sdkSessionId: input.initialReplayState?.sdkSessionId, + replayInvalid: input.initialReplayState?.replayInvalid, + }); + downgradedFromResume = replayDecision.downgradedFromResume; + const resumeSessionId = + replayDecision.action === "resume" ? replayDecision.sdkSessionId : undefined; + + // SAFETY: replay-shim owns the create/resume decision and the + // recovery policy when resumeSession fails. See replay-shim.ts. + // continuePendingWork is always false here so suspended tool/ + // permission work cannot be replayed implicitly — replay-shim's + // worst-case-wins replayMetadata is the only signal the + // orchestrator uses to decide whether the next attempt is safe. + if (resumeSessionId) { + try { + session = (await client.resumeSession(resumeSessionId, { + ...sessionConfig, + continuePendingWork: false, + })) as unknown as SessionLike; + } catch (error: unknown) { + const classification = classifyResumeFailure(error); + if (!classification.recoverable) { + throw error; + } + // Downgrade silently: the prior SDK session is gone, so start a + // fresh one. replayMetadata will reflect replaySafe:false via + // resumeFailureRecovered so the orchestrator does not blindly + // retry the same prompt with stale assumptions. + resumeFailureRecovered = true; + session = (await client.createSession(sessionConfig)) as unknown as SessionLike; + } + } else { + session = (await client.createSession(sessionConfig)) as unknown as SessionLike; + } + // Bind the session holder so the tool bridge's onYield callback + // can abort the live SDK session if a wrapped tool yields. + sessionRef.current = session; + + // After a recovered resume, the prior sdkSessionId no longer exists + // server-side, so don't fall back to it: only the freshly-created + // session's id is valid. + sdkSessionId = readSessionId(session) ?? (resumeFailureRecovered ? undefined : resumeSessionId); + sessionIdUsed = sdkSessionId ?? input.sessionId; + if (sdkSessionId && deps.onSessionEstablished) { + try { + deps.onSessionEstablished({ sdkSessionId, pooledClient: handle }); + } catch { + // never let session-tracking callbacks break attempts + } + } + bridge = attachEventBridge(session, { + onAssistantDelta: input.onAssistantDelta, + getSdkSessionId: () => sdkSessionId, + isAborted: () => aborted, + }); + + const messageOptions = await createMessageOptions(input, { + effectiveCwd, + effectiveWorkspaceDir, + sandbox, + workspaceOnly: effectiveFsWorkspaceOnly, + }); + if (abortRequested || params.abortSignal?.aborted) { + aborted = true; + externalAbort = true; + } else { + sentTurnStarted = true; + const result = await session.sendAndWait(messageOptions, input.timeoutMs); + await bridge.awaitDeltaChain(); + if (!bridge.recordSendResult(result) && !aborted) { + // SDK sendAndWait returning undefined is treated as a timeout by the + // capability inventory. Do not call session.abort() here: OpenClaw may + // resume the in-flight SDK session on the next attempt. + timedOut = true; + } + const snap = bridge.snapshot(); + if (!promptError && !timedOut && !aborted && snap.streamError) { + promptError = snap.streamError; + } + } + } catch (error: unknown) { + if (!aborted) { + if (isSdkSendAndWaitTimeoutError(error)) { + // The SDK's sendAndWait timeout rejects with a deterministic + // message but explicitly does NOT abort in-flight agent work + // (see isSdkSendAndWaitTimeoutError docstring and + // node_modules/@github/copilot-sdk/dist/session.js:156-164). + // Mark timedOut so createResult's computeReplayMetadata flips + // to side-effect-risky and the orchestrator's replay-shim can + // decide whether to resume or restart. Do NOT call + // session.abort() here: the orchestrator may resume the + // in-flight SDK session on the next attempt (the SDK keeps + // the server-side session intact across this kind of timeout). + timedOut = true; + // Flush any in-flight delta promise chain so the snapshot + // built below in `finally` includes the deltas the SDK already + // delivered before the timer fired. + try { + await bridge?.awaitDeltaChain(); + } catch { + // delta-flush failure must not mask the timeout state + } + } else { + promptError = toError(error); + } + } + } finally { + settled = true; + bridge?.detach(); + params.abortSignal?.removeEventListener("abort", onAbort); + + if (session) { + try { + await session.disconnect(); + } catch (error: unknown) { + disconnectError = toError(error); + // A timeout is a higher-fidelity signal than a cleanup-time + // disconnect failure; don't let a stale disconnect error + // mask the timeout classification the replay-shim depends on. + if (!promptError && !timedOut) { + promptError = disconnectError; + } + } + } + + if (handle) { + try { + await deps.pool.release(handle); + } catch (error: unknown) { + const releaseFailure = toError(error); + if (promptError) { + console.warn("[copilot-attempt] pool.release failed after primary error", releaseFailure); + } else { + releaseError = releaseFailure; + } + } + } + } + + if (releaseError) { + throw releaseError; + } + + const snap = bridge?.snapshot(); + const assistantTexts = bridge?.finalizeAssistantTexts() ?? []; + const lastAssistant = bridge?.buildAssistantMessage({ modelRef, now }); + + // Dogfood finding #3 (mirror codex parity): + // + // Without this synthesis the OpenClaw audit transcript never sees + // the user's prompt for a copilot attempt. The shell's + // `persistTextTurnTranscript` skips the user write when + // `embeddedAssistantGapFill` is true (its `body` arrives as ""), + // trusting the harness to mirror it. Codex does exactly this in + // `event-projector.ts:262` by prepending + // `{role:"user", content:params.prompt, ...}` tagged `${turnId}:prompt`. + // We mirror that pattern with `${runId}:prompt` as the turn-stable + // identity so re-mirror of the same turn is a true no-op AND two + // turns sharing the same SDK session produce distinct dedupe keys + // (the latter matters once session reuse lands in harness.ts). + // + // Defensive guard: if the caller already passed the same user turn + // as the tail of `messages`, skip synthesis to avoid double-writing + // the user message. + const syntheticUserText = readString(input.transcriptPrompt) ?? readString(input.prompt); + const tailUserText = readTailUserText(messages); + const syntheticUser: AgentMessage | undefined = + syntheticUserText && syntheticUserText !== tailUserText + ? attachCopilotMirrorIdentity( + { role: "user", content: syntheticUserText, timestamp: now() } as AgentMessage, + `${input.runId}:prompt`, + ) + : undefined; + const taggedLastAssistant = lastAssistant + ? attachCopilotMirrorIdentity(lastAssistant, `${input.runId}:assistant:final`) + : undefined; + const messagesSnapshot: AgentMessage[] = [ + ...messages, + ...(syntheticUser ? [syntheticUser] : []), + ...(taggedLastAssistant ? [taggedLastAssistant] : []), + ]; + + // Best-effort dual-write: mirror this attempt's full message snapshot + // (user/assistant/toolResult) into the OpenClaw audit transcript at + // params.sessionFile, alongside the SDK's own session storage. The + // OpenClaw shell (attempt-execution.ts) writes only the user prompt + // and terminal assistant text; mirroring here captures intermediate + // tool calls/results for full audit/replay parity with the codex + // extension. Identity-tagged so re-emits dedupe. Errors are + // swallowed so a mirror failure cannot break the attempt. + const sessionFileForMirror = readString(input.sessionFile); + const sessionIdForScope = sessionIdUsed ?? readString(input.sessionId); + if (sessionFileForMirror && messagesSnapshot.length > 0) { + const taggedMessages = messagesSnapshot.map((message, index) => { + if ( + message.role !== "user" && + message.role !== "assistant" && + message.role !== "toolResult" + ) { + return message; + } + // Preserve any caller-attached (or upstream-attached) mirror + // identity — especially the `${runId}:prompt` / + // `${runId}:assistant:final` identities attached above — so the + // dedupe key stays turn-stable. Falling back to a per-attempt + // positional identity here is only safe for messages that don't + // already carry a logical identity; with SDK session reuse the + // positional scheme would collapse turn 2's index-0 user onto + // turn 1's index-0 user inside the same `${sdkSessionId}` + // scope. See replay-shim.ts + harness.ts session-reuse path. + if (hasMirrorIdentity(message)) { + return message; + } + const identityScope = sdkSessionId ?? sessionIdForScope ?? "attempt"; + return attachCopilotMirrorIdentity(message, `${identityScope}:${message.role}:${index}`); + }); + await dualWriteCopilotTranscriptBestEffort({ + sessionFile: sessionFileForMirror, + sessionKey: readString((input as { sessionKey?: unknown }).sessionKey), + agentId: readString(input.agentId), + messages: taggedMessages, + idempotencyScope: sessionIdForScope ? `copilot:${sessionIdForScope}` : undefined, + config: (input as { config?: unknown }).config as never, + }).catch((mirrorError: unknown) => { + // Defense-in-depth: the best-effort wrapper already swallows + // mirror failures, but we double-guard here so any future + // signature change or unexpected rejection cannot break the + // attempt result. The SDK's own session storage remains + // authoritative; only the OpenClaw audit transcript would be + // missing intermediate messages for this turn. + console.warn( + "[copilot-attempt] dual-write transcript wrapper rejected unexpectedly", + mirrorError, + ); + }); + } + + return createResult(input, { + aborted, + assistantTexts, + currentAttemptAssistant: lastAssistant, + downgradedFromResume, + externalAbort, + itemLifecycle: { + activeCount: Math.max((snap?.startedCount ?? 0) - (snap?.completedCount ?? 0), 0), + completedCount: snap?.completedCount ?? 0, + startedCount: snap?.startedCount ?? 0, + }, + lastAssistant, + messagesSnapshot, + now, + promptError, + resumeFailureRecovered, + sdkSessionId, + sessionIdUsed, + timedOut, + toolMetas: snap ? [...snap.toolMetas] : [], + usage: snap?.usage, + yieldDetected, + }); +} + +function createResult( + params: AttemptParamsLike, + state: { + aborted?: boolean; + assistantTexts?: string[]; + currentAttemptAssistant?: AssistantMessage; + downgradedFromResume?: boolean; + externalAbort?: boolean; + itemLifecycle?: { activeCount: number; completedCount: number; startedCount: number }; + lastAssistant?: AssistantMessage; + messagesSnapshot: AgentMessage[]; + now: () => number; + promptError: Error | undefined; + resumeFailureRecovered?: boolean; + sdkSessionId?: string; + sessionIdUsed?: string; + timedOut?: boolean; + toolMetas?: Array<{ meta?: string; toolName: string }>; + usage?: AssistantUsageSnapshot; + yieldDetected?: boolean; + }, +): AttemptResultWithSdkSessionId { + const promptError = state.promptError; + const timedOut = state.timedOut === true; + const toolMetas = state.toolMetas ?? []; + const replayMetadata = computeReplayMetadata({ + priorReplayInvalid: params.initialReplayState?.replayInvalid, + priorHadPotentialSideEffects: params.initialReplayState?.hadPotentialSideEffects, + thisAttemptTimedOut: timedOut, + thisAttemptHadPotentialSideEffects: copilotToolMetasHavePotentialSideEffects(toolMetas), + thisAttemptDowngradedFromResume: state.downgradedFromResume, + thisAttemptResumeFailureRecovered: state.resumeFailureRecovered, + }); + return { + aborted: state.aborted === true, + ...(state.sdkSessionId ? { sdkSessionId: state.sdkSessionId } : {}), + assistantTexts: state.assistantTexts ?? [], + attemptUsage: state.usage, + cloudCodeAssistFormatError: false, + currentAttemptAssistant: state.currentAttemptAssistant, + didSendViaMessagingTool: false, + externalAbort: state.externalAbort === true, + idleTimedOut: false, + itemLifecycle: state.itemLifecycle ?? { + activeCount: 0, + completedCount: 0, + startedCount: 0, + }, + lastAssistant: state.lastAssistant, + messagesSnapshot: state.messagesSnapshot, + messagingToolSentMediaUrls: [], + messagingToolSentTargets: [], + messagingToolSentTexts: [], + promptError, + promptErrorSource: promptError ? "prompt" : null, + replayMetadata, + sessionFileUsed: readString(params.sessionFile), + sessionIdUsed: state.sessionIdUsed ?? readString(params.sessionId) ?? "copilot-session", + timedOut, + timedOutDuringCompaction: false, + toolMetas, + yieldDetected: state.yieldDetected === true, + }; +} + +function createPromptError(code: string, message: string, cause?: unknown): PromptErrorWithCode { + const error = new Error(message) as PromptErrorWithCode; + error.code = code; + if (cause !== undefined) { + error.cause = cause; + } + return error; +} + +function createSessionConfig( + params: AttemptParamsLike, + sdkModelId: string, + sdkTools: SdkTool[], + resolvedAuth: ReturnType, + workspaceBootstrapInstructions: string | undefined, + effectiveWorkspaceDir: string | undefined, + effectiveCwd: string | undefined, +): Pick< + SessionConfig, + | "availableTools" + | "enableSessionTelemetry" + | "gitHubToken" + | "hooks" + | "instructionDirectories" + | "infiniteSessions" + | "model" + | "onPermissionRequest" + | "reasoningEffort" + | "systemMessage" + | "tools" + | "workingDirectory" +> { + const permissionPolicy = params.permissionPolicy ?? rejectAllPolicy; + const hooks = createHooksBridge(params.hooksConfig); + const infiniteSessions = createInfiniteSessionConfig(params.infiniteSessionConfig); + const systemMessageContent = createSystemMessageContent(params, workspaceBootstrapInstructions); + return { + model: sdkModelId, + // Permission decisions for SDK built-in tool kinds (shell, write, + // read, url, mcp, memory, hook) fall through to permission-bridge. + // The default (`rejectAllPolicy`) keeps the harness fail-closed, + // but the primary catalog restriction is `availableTools` below + // (PR #86155 [P1] round-8): the SDK only exposes the exact set of + // bridged tool names to the model, so native shell/read/write/url/ + // mcp/memory/hook tools never appear in the catalog and cannot be + // invoked even under a permissive permission policy. The + // permission-bridge stays in place as defense-in-depth for any + // built-in kind that future SDK versions might surface outside + // `availableTools`. Every bridged tool is also registered with + // `overridesBuiltInTool: true` and `skipPermission: true` (see + // tool-bridge.ts) so 100% of tool calls go through OpenClaw's + // wrapped `execute()` which runs `runBeforeToolCallHook` (loop + // detection, trusted plugin policies, before-tool-call hooks, + // two-phase plugin approval). This mirrors the in-tree codex + // harness's split: bridged-tool enforcement happens inside the + // tool wrapper, and the SDK gate is a safety net for kinds we + // don't surface. See permission-bridge.ts and docs/plugins/copilot.md. + onPermissionRequest: createPermissionBridge(permissionPolicy), + // `onUserInputRequest` is intentionally NOT registered: per the SDK + // contract, omitting the handler hides the `ask_user` tool from the + // model entirely. This is the MVP posture — interactive ask_user + // requires routing the request to the OpenClaw channel/TUI prompt + // path (mirroring extensions/codex/src/app-server/user-input-bridge.ts), + // which is tracked as a follow-up. With the handler absent, agents + // running under this harness must make best-judgment decisions from + // the initial prompt rather than asking clarifying questions + // mid-turn. See user-input-bridge.ts for the dormant policy + // scaffolding the follow-up will reuse. + // SessionHooks: only set when the host actually supplied handlers. + // createHooksBridge returns undefined for an empty config so we + // never install an empty hooks subsystem. See hooks-bridge.ts for + // the back-pointer to src/agents/harness/lifecycle-hook-helpers.ts. + ...(hooks ? { hooks } : {}), + // Session-level telemetry opt-out: only propagate when the host + // explicitly set a boolean. undefined means "use SDK default" + // (enabled for GitHub auth; disabled when a BYOK provider is set). + // Client-level OTel config is plumbed via runtime.ts / + // telemetry-bridge.ts. + ...(typeof params.enableSessionTelemetry === "boolean" + ? { enableSessionTelemetry: params.enableSessionTelemetry } + : {}), + // Infinite sessions / background compaction: only attach when the + // host provided an InfiniteSessionConfig. SDK defaults + // (`enabled: true`, background 0.80, buffer 0.95) apply when + // omitted. See compaction-bridge.ts. + ...(infiniteSessions ? { infiniteSessions } : {}), + reasoningEffort: params.reasoningEffort, + tools: sdkTools, + // Restrict the SDK's tool catalog to exactly the bridged tool names + // returned by `createCopilotToolBridge`. Without this, the SDK + // would still expose its native read/write/shell/url/mcp/memory/ + // hook tools to the model alongside our overrides, which would + // bypass OpenClaw's wrapped-tool enforcement under any permissive + // permission policy and pollute the catalog with disabled tools + // under the default reject policy. An empty list (`[]`) is + // meaningful per the SDK contract + // (`@github/copilot-sdk/dist/types.d.ts:1059-1061`): when set, + // only the listed tools are available. Derived inside this + // function (not passed as a parameter) so create/resume always + // stay coupled to the registered external `tools` array. See PR + // #86155 [P1] round-8 and ResumeSessionConfig at + // `@github/copilot-sdk/dist/types.d.ts:1198` (it picks + // `availableTools`, so the spread into `resumeSession` covers + // the resume path too). + availableTools: sdkTools.map((tool) => tool.name), + workingDirectory: + effectiveCwd ?? effectiveWorkspaceDir ?? readResolvedAttemptPath(params.workspaceDir), + // When a task runs from a sub-cwd, keep SDK-native project docs + // (AGENTS.md, .github/copilot-instructions.md) visible from the + // canonical workspace too; workspace-bootstrap filters AGENTS.md + // because the SDK owns those instruction files. + ...(effectiveWorkspaceDir && effectiveCwd && effectiveCwd !== effectiveWorkspaceDir + ? { instructionDirectories: [effectiveWorkspaceDir] } + : {}), + // Session-level GitHub token. INDEPENDENT of the client-level + // token in `CopilotClientOptions.gitHubToken` (set in + // `resolvePoolAcquire().options`). Per the SDK contract + // (`@github/copilot-sdk/dist/types.d.ts:1168-1178`), the client- + // level token authenticates the CLI process while the session- + // level token determines the identity used for content exclusion, + // model routing, and quota — and is sent on BOTH `createSession` + // and `resumeSession` (`ResumeSessionConfig` picks `gitHubToken` + // at types.d.ts:1198). Omitted when `useLoggedInUser` is the + // resolved mode — passing both would be contradictory and the SDK + // already implies content-exclusion/quota from the logged-in + // identity in that mode. + ...(resolvedAuth.authMode === "gitHubToken" && resolvedAuth.gitHubToken + ? { gitHubToken: resolvedAuth.gitHubToken } + : {}), + // OpenClaw workspace bootstrap plus per-turn runtime guidance + // injected via the SDK's `systemMessage` field in append mode: + // SDK foundation + OpenClaw context. Append keeps every SDK + // guardrail intact while ensuring persona/identity/heartbeat and + // channel policy guidance reach the model without native reads. + // AGENTS.md and .github/copilot-instructions.md are filtered by + // workspace-bootstrap.ts because the SDK auto-loads them from + // `workingDirectory` (see `@github/copilot-sdk/dist/types.d.ts` + // L1036). Omitted when there is no OpenClaw-owned context so the + // SDK default foundation applies. + ...(systemMessageContent + ? { + systemMessage: { + mode: "append" as const, + content: systemMessageContent, + }, + } + : {}), + }; +} + +async function createMessageOptions( + params: AttemptParamsLike, + context: { + effectiveCwd: string | undefined; + effectiveWorkspaceDir: string | undefined; + sandbox: SandboxContext | null; + workspaceOnly: boolean; + }, +): Promise { + const attachments = createPromptImageAttachments(await resolvePromptImages(params, context)); + return attachments.length > 0 + ? { prompt: params.prompt, attachments } + : { prompt: params.prompt }; +} + +function createPromptImageAttachments( + images: unknown[], +): NonNullable { + return images.flatMap((image, index) => { + if ( + !image || + typeof image !== "object" || + (image as { type?: unknown }).type !== "image" || + typeof (image as { data?: unknown }).data !== "string" || + typeof (image as { mimeType?: unknown }).mimeType !== "string" + ) { + return []; + } + return [ + { + type: "blob" as const, + data: (image as { data: string }).data, + mimeType: (image as { mimeType: string }).mimeType, + displayName: `prompt-image-${index + 1}`, + }, + ]; + }); +} + +async function resolvePromptImages( + params: AttemptParamsLike, + context: { + effectiveCwd: string | undefined; + effectiveWorkspaceDir: string | undefined; + sandbox: SandboxContext | null; + workspaceOnly: boolean; + }, +): Promise { + const workspaceDir = + context.effectiveCwd ?? + context.effectiveWorkspaceDir ?? + readResolvedAttemptPath(params.cwd) ?? + readResolvedAttemptPath(params.workspaceDir); + if (!workspaceDir) { + return []; + } + const localRoots = + context.workspaceOnly && context.effectiveWorkspaceDir + ? [context.effectiveWorkspaceDir] + : undefined; + const result = await detectAndLoadAgentHarnessPromptImages({ + prompt: params.prompt, + workspaceDir, + model: resolveImageCapabilityModel(params), + existingImages: Array.isArray(params.images) ? params.images : undefined, + imageOrder: Array.isArray(params.imageOrder) ? params.imageOrder : undefined, + config: params.config, + workspaceOnly: context.workspaceOnly, + localRoots, + sandbox: + context.sandbox?.enabled && context.sandbox.fsBridge + ? { root: context.sandbox.workspaceDir, bridge: context.sandbox.fsBridge } + : undefined, + }); + return result.images; +} + +function resolveImageCapabilityModel(params: AttemptParamsLike): { input?: string[] } { + const model = params.model; + if (model && typeof model === "object" && Array.isArray((model as { input?: unknown }).input)) { + return { input: (model as { input: string[] }).input }; + } + return { input: ["image"] }; +} + +function createSystemMessageContent( + params: AttemptParamsLike, + workspaceBootstrapInstructions: string | undefined, +): string | undefined { + const sections: string[] = []; + const bootstrap = workspaceBootstrapInstructions?.trim(); + if (bootstrap) { + sections.push(bootstrap); + } + const extraSystemPrompt = readString(params.extraSystemPrompt)?.trim(); + if (extraSystemPrompt && !isRawCopilotModelRun(params)) { + const contextHeader = + params.promptMode === "minimal" ? "## Subagent Context" : "## Group Chat Context"; + sections.push(`${contextHeader}\n${extraSystemPrompt}`); + } + return sections.length > 0 ? sections.join("\n\n") : undefined; +} + +function isRawCopilotModelRun(params: AttemptParamsLike): boolean { + return params.modelRun === true || params.promptMode === "none"; +} + +function getMessagesSnapshotInput(params: AttemptParamsLike): AgentMessage[] { + return Array.isArray(params.messages) ? [...params.messages] : []; +} + +// Returns the trimmed plain-text content of the tail user message in +// `messages`, if any. Used to skip synthetic-user injection when the +// caller already passed the current turn's user prompt as the last +// entry of `params.messages`, which would otherwise produce a duplicate +// user record in the audit transcript. +function readTailUserText(messages: AgentMessage[]): string | undefined { + const tail = messages[messages.length - 1]; + if (!tail || tail.role !== "user") { + return undefined; + } + const content = (tail as { content?: unknown }).content; + if (typeof content === "string") { + return content; + } + if (Array.isArray(content)) { + for (const part of content) { + if (part && typeof part === "object" && (part as { type?: unknown }).type === "text") { + const text = (part as { text?: unknown }).text; + if (typeof text === "string" && text.length > 0) { + return text; + } + } + } + } + return undefined; +} + +// True when an AgentMessage already carries a stable mirror identity +// (e.g. the `${runId}:prompt` / `${runId}:assistant:final` identities +// attached in attempt.ts before the dual-write, or any caller-attached +// identity from a prior turn). Keep this in sync with the +// MIRROR_IDENTITY_META_KEY constant in dual-write-transcripts.ts; we +// duplicate the read here instead of importing the helper to avoid +// widening the module's public surface for what is otherwise a pure +// guard. See attempt.ts dual-write tagging block. +function hasMirrorIdentity(message: AgentMessage): boolean { + const record = message as unknown as { __openclaw?: unknown }; + const meta = record["__openclaw"]; + if (!meta || typeof meta !== "object" || Array.isArray(meta)) { + return false; + } + const id = (meta as Record).mirrorIdentity; + return typeof id === "string" && id.length > 0; +} + +function readSessionId(session: SessionLike | undefined): string | undefined { + if (!session) { + return undefined; + } + return readString(session.sessionId) ?? readString(session.id); +} + +export function readString(value: unknown): string | undefined { + return typeof value === "string" && value.length > 0 ? value : undefined; +} + +function readResolvedAttemptPath(value: unknown): string | undefined { + const raw = readString(value)?.trim(); + if (!raw) { + return undefined; + } + if (process.platform !== "win32" && /^[A-Za-z]:[\\/]/.test(raw)) { + return raw; + } + return resolveUserPath(raw); +} + +export function resolveModelRef(params: AttemptParamsLike): ModelRef { + const rawModel = params.model; + if (rawModel && typeof rawModel === "object") { + return { + api: readString(rawModel.api), + id: + readString(rawModel.id) ?? + readString((params as { modelId?: unknown }).modelId) ?? + "unknown-model", + provider: + readString(rawModel.provider) ?? + readString((params as { provider?: unknown }).provider) ?? + "unknown-provider", + }; + } + return { + id: + readString(typeof rawModel === "string" ? rawModel : undefined) ?? + readString((params as { modelId?: unknown }).modelId) ?? + "unknown-model", + provider: readString((params as { provider?: unknown }).provider) ?? "unknown-provider", + }; +} + +export function resolvePoolAcquire(params: AttemptParamsLike): { + key: PoolKey; + options: ClientCreateOptions; + /** + * The resolved auth result is returned so call sites that build a + * `SessionConfig` immediately afterwards (attempt.ts + + * side-question.ts) can populate `SessionConfig.gitHubToken` + * without re-resolving auth. `SessionConfig.gitHubToken` is + * INDEPENDENT of `CopilotClientOptions.gitHubToken` per the SDK + * contract (`@github/copilot-sdk/dist/types.d.ts:1168-1178`): the + * client-level token authenticates the CLI process, while the + * session-level token determines the identity used for content + * exclusion, model routing, and quota. Both `createSession` and + * `resumeSession` (`ResumeSessionConfig` at types.d.ts:1198) honor + * the session-level field, so per-session multitenancy requires + * setting both. + */ + auth: ReturnType; +} { + const resolved = resolveCopilotAuth({ + agentId: readString(params.agentId), + agentDir: readString(params.agentDir), + workspaceDir: readString(params.workspaceDir), + copilotHome: readString(params.copilotHome), + auth: params.auth, + // Contract-resolved auth (EmbeddedRunAttemptParams): the production + // main path for agents with a configured `github-copilot` auth + // profile. Falling through to env / useLoggedInUser when absent + // keeps the direct-CLI / dogfood paths working unchanged. + resolvedApiKey: readString(params.resolvedApiKey), + authProfileId: readString(params.authProfileId), + profileVersion: readString(params.profileVersion), + }); + + return { + key: { + agentId: resolved.agentId, + authMode: resolved.authMode, + ...(resolved.authMode === "gitHubToken" + ? { + authProfileId: resolved.authProfileId, + authProfileVersion: resolved.authProfileVersion, + } + : {}), + copilotHome: resolved.copilotHome, + }, + options: { + copilotHome: resolved.copilotHome, + cwd: readString(params.cwd) ?? readString(params.workspaceDir), + gitHubToken: resolved.authMode === "gitHubToken" ? resolved.gitHubToken : undefined, + useLoggedInUser: resolved.authMode === "useLoggedInUser", + }, + auth: resolved, + }; +} + +export function toError(error: unknown): Error { + return error instanceof Error ? error : new Error(String(error)); +} + +/** + * Detect the @github/copilot-sdk `session.sendAndWait` timeout + * rejection shape. The SDK's `sendAndWait` races the internal + * `session.idle` event against a timer; when the timer fires first + * it REJECTS the promise with + * `new Error(`Timeout after ${effectiveTimeout}ms waiting for + * session.idle`)` (see + * `node_modules/@github/copilot-sdk/dist/session.js:156-164`), and + * the SDK docs explicitly note the timeout "does not abort in-flight + * agent work". The caller is therefore responsible for setting the + * timed-out state and (for paths where in-flight work should be + * stopped) calling `session.abort()`. + * + * Keep the regex anchored and narrow so unrelated errors that happen + * to mention "Timeout" are NOT mis-classified. The shape is a literal + * template-string concatenation in the 1.0.0-beta line; a minor + * version bump that changes the wording will safely fall through to + * the generic prompt-error path. + */ +export function isSdkSendAndWaitTimeoutError(error: unknown): boolean { + if (error === null || typeof error !== "object") { + return false; + } + const message = (error as { message?: unknown }).message; + if (typeof message !== "string") { + return false; + } + return /^Timeout after \d+ms waiting for session\.idle$/.test(message); +} diff --git a/extensions/copilot/src/auth-bridge.test.ts b/extensions/copilot/src/auth-bridge.test.ts new file mode 100755 index 000000000000..eb835eb69d92 --- /dev/null +++ b/extensions/copilot/src/auth-bridge.test.ts @@ -0,0 +1,524 @@ +import { createHash } from "node:crypto"; +import { resolve, join } from "node:path"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + COPILOT_DEFAULT_AGENT_ID, + COPILOT_TOKEN_PROFILE_ERROR, + normalizeCopilotHomePath, + resolveCopilotAuth, + sanitizeAgentId, + tokenFingerprint, +} from "./auth-bridge.js"; + +function cleanEnv(): NodeJS.ProcessEnv { + return {} as NodeJS.ProcessEnv; +} + +const FAKE_HOME = "/fake-home"; +const fakeHomeDir = () => FAKE_HOME; + +describe("sanitizeAgentId", () => { + it("returns default for null/undefined/empty", () => { + expect(sanitizeAgentId(undefined)).toBe(COPILOT_DEFAULT_AGENT_ID); + expect(sanitizeAgentId(null)).toBe(COPILOT_DEFAULT_AGENT_ID); + expect(sanitizeAgentId("")).toBe(COPILOT_DEFAULT_AGENT_ID); + expect(sanitizeAgentId(" ")).toBe(COPILOT_DEFAULT_AGENT_ID); + }); + + it("lowercases and accepts alnum + dash + underscore", () => { + expect(sanitizeAgentId("Agent-1")).toBe("agent-1"); + expect(sanitizeAgentId("my_agent_42")).toBe("my_agent_42"); + expect(sanitizeAgentId("a")).toBe("a"); + }); + + it("rejects path-traversal segments and falls back to default", () => { + expect(sanitizeAgentId("../etc/passwd")).toBe(COPILOT_DEFAULT_AGENT_ID); + expect(sanitizeAgentId("../..")).toBe(COPILOT_DEFAULT_AGENT_ID); + expect(sanitizeAgentId("a/b")).toBe(COPILOT_DEFAULT_AGENT_ID); + expect(sanitizeAgentId("a\\b")).toBe(COPILOT_DEFAULT_AGENT_ID); + expect(sanitizeAgentId("a\u0000b")).toBe(COPILOT_DEFAULT_AGENT_ID); + }); + + it("rejects ids that do not start with alnum", () => { + expect(sanitizeAgentId("-foo")).toBe(COPILOT_DEFAULT_AGENT_ID); + expect(sanitizeAgentId("_bar")).toBe(COPILOT_DEFAULT_AGENT_ID); + }); + + it("rejects ids longer than 64 chars", () => { + expect(sanitizeAgentId("a".repeat(64))).toBe("a".repeat(64)); + expect(sanitizeAgentId("a".repeat(65))).toBe(COPILOT_DEFAULT_AGENT_ID); + }); +}); + +describe("tokenFingerprint", () => { + it("returns a stable sha256-prefixed 12-hex fingerprint", () => { + const a = tokenFingerprint("hello"); + const b = tokenFingerprint("hello"); + expect(a).toBe(b); + expect(a.startsWith("sha256:")).toBe(true); + expect(a.length).toBe("sha256:".length + 12); + const expected = "sha256:" + createHash("sha256").update("hello").digest("hex").slice(0, 12); + expect(a).toBe(expected); + }); + + it("differs across distinct inputs (no collision for common values)", () => { + expect(tokenFingerprint("alpha")).not.toBe(tokenFingerprint("beta")); + expect(tokenFingerprint("token-v1")).not.toBe(tokenFingerprint("token-v2")); + }); + + it("never contains the raw token", () => { + const token = "ghp_abcdefghijklmnop"; + expect(tokenFingerprint(token).includes(token)).toBe(false); + }); +}); + +describe("resolveCopilotAuth - copilotHome resolution", () => { + it("uses explicit copilotHome when provided", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + copilotHome: "/explicit/home", + env: cleanEnv(), + homeDir: fakeHomeDir, + }); + expect(result.copilotHome).toBe(resolve("/explicit/home")); + }); + + it("falls back to /copilot when copilotHome is absent", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + agentDir: "/agent/dir", + env: cleanEnv(), + homeDir: fakeHomeDir, + }); + expect(result.copilotHome).toBe(resolve(join("/agent/dir", "copilot"))); + }); + + it("synthesises per-agent default from homeDir when no path is given", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + env: cleanEnv(), + homeDir: fakeHomeDir, + }); + expect(result.copilotHome).toBe( + resolve(join(FAKE_HOME, ".openclaw", "agents", "agent-1", "copilot")), + ); + }); + + it("respects OPENCLAW_HOME env var as the home root", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + env: { OPENCLAW_HOME: "/custom/openclaw" } as NodeJS.ProcessEnv, + homeDir: fakeHomeDir, + }); + expect(result.copilotHome).toBe( + resolve(join("/custom/openclaw", ".openclaw", "agents", "agent-1", "copilot")), + ); + }); + + it("uses the default agent id when agentId is invalid/missing", () => { + const result = resolveCopilotAuth({ + agentId: undefined, + env: cleanEnv(), + homeDir: fakeHomeDir, + }); + expect(result.agentId).toBe(COPILOT_DEFAULT_AGENT_ID); + expect(result.copilotHome).toBe( + resolve(join(FAKE_HOME, ".openclaw", "agents", COPILOT_DEFAULT_AGENT_ID, "copilot")), + ); + }); + + it("isolates per-agent copilotHome between agents", () => { + const a = resolveCopilotAuth({ + agentId: "agent-a", + env: cleanEnv(), + homeDir: fakeHomeDir, + }); + const b = resolveCopilotAuth({ + agentId: "agent-b", + env: cleanEnv(), + homeDir: fakeHomeDir, + }); + expect(a.copilotHome).not.toBe(b.copilotHome); + expect(a.copilotHome.endsWith(join("agent-a", "copilot"))).toBe(true); + expect(b.copilotHome.endsWith(join("agent-b", "copilot"))).toBe(true); + }); +}); + +describe("resolveCopilotAuth - auth mode resolution", () => { + it("returns useLoggedInUser when auth.useLoggedInUser=true (ignoring gitHubToken)", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + auth: { useLoggedInUser: true, gitHubToken: "should-be-ignored" }, + env: { GITHUB_TOKEN: "env-token" } as NodeJS.ProcessEnv, + homeDir: fakeHomeDir, + }); + expect(result.authMode).toBe("useLoggedInUser"); + expect(result.gitHubToken).toBeUndefined(); + expect(result.authProfileId).toBeUndefined(); + expect(result.authProfileVersion).toBeUndefined(); + }); + + it("returns gitHubToken when explicit token + profile id/version provided", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + auth: { gitHubToken: "tok", profileId: "p", profileVersion: "v1" }, + env: cleanEnv(), + homeDir: fakeHomeDir, + }); + expect(result.authMode).toBe("gitHubToken"); + expect(result.gitHubToken).toBe("tok"); + expect(result.authProfileId).toBe("p"); + expect(result.authProfileVersion).toBe("v1"); + }); + + it("accepts legacy top-level profileVersion + authProfileId fallbacks", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + auth: { gitHubToken: "tok" }, + authProfileId: "legacy-p", + profileVersion: "legacy-v1", + env: cleanEnv(), + homeDir: fakeHomeDir, + }); + expect(result.authMode).toBe("gitHubToken"); + expect(result.authProfileId).toBe("legacy-p"); + expect(result.authProfileVersion).toBe("legacy-v1"); + }); + + it("throws when explicit gitHubToken is given without both profileId + profileVersion", () => { + expect(() => + resolveCopilotAuth({ + agentId: "agent-1", + auth: { gitHubToken: "tok" }, + env: cleanEnv(), + homeDir: fakeHomeDir, + }), + ).toThrow(COPILOT_TOKEN_PROFILE_ERROR); + + expect(() => + resolveCopilotAuth({ + agentId: "agent-1", + auth: { gitHubToken: "tok", profileId: "p" }, + env: cleanEnv(), + homeDir: fakeHomeDir, + }), + ).toThrow(COPILOT_TOKEN_PROFILE_ERROR); + + expect(() => + resolveCopilotAuth({ + agentId: "agent-1", + auth: { gitHubToken: "tok", profileVersion: "v" }, + env: cleanEnv(), + homeDir: fakeHomeDir, + }), + ).toThrow(COPILOT_TOKEN_PROFILE_ERROR); + }); + + it("defaults to useLoggedInUser when no auth signal at all", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + env: cleanEnv(), + homeDir: fakeHomeDir, + }); + expect(result.authMode).toBe("useLoggedInUser"); + expect(result.gitHubToken).toBeUndefined(); + }); +}); + +describe("resolveCopilotAuth - contract-resolved auth (resolvedApiKey + authProfileId)", () => { + it("consumes resolvedApiKey + authProfileId from the EmbeddedRunAttemptParams contract", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + resolvedApiKey: "contract-token-xyz", + authProfileId: "github-copilot:main", + env: cleanEnv(), + homeDir: fakeHomeDir, + }); + expect(result.authMode).toBe("gitHubToken"); + expect(result.gitHubToken).toBe("contract-token-xyz"); + expect(result.authProfileId).toBe("github-copilot:main"); + expect(result.authProfileVersion).toBe(tokenFingerprint("contract-token-xyz")); + }); + + it("synthesises authProfileId when contract-resolved token has no profile id", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + resolvedApiKey: "contract-token-xyz", + env: cleanEnv(), + homeDir: fakeHomeDir, + }); + expect(result.authMode).toBe("gitHubToken"); + expect(result.gitHubToken).toBe("contract-token-xyz"); + expect(result.authProfileId).toBe("pi:resolved"); + expect(result.authProfileVersion).toBe(tokenFingerprint("contract-token-xyz")); + }); + + it("auth.useLoggedInUser=true takes precedence over contract resolvedApiKey", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + auth: { useLoggedInUser: true }, + resolvedApiKey: "should-be-ignored", + authProfileId: "p", + env: cleanEnv(), + homeDir: fakeHomeDir, + }); + expect(result.authMode).toBe("useLoggedInUser"); + expect(result.gitHubToken).toBeUndefined(); + }); + + it("explicit auth.gitHubToken takes precedence over contract resolvedApiKey", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + auth: { gitHubToken: "explicit", profileId: "p", profileVersion: "v1" }, + resolvedApiKey: "contract-should-be-ignored", + authProfileId: "contract-profile", + env: cleanEnv(), + homeDir: fakeHomeDir, + }); + expect(result.authMode).toBe("gitHubToken"); + expect(result.gitHubToken).toBe("explicit"); + expect(result.authProfileId).toBe("p"); + expect(result.authProfileVersion).toBe("v1"); + }); + + it("contract resolvedApiKey takes precedence over env fallback", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + resolvedApiKey: "contract-token", + authProfileId: "p", + env: { + OPENCLAW_GITHUB_TOKEN: "env-should-be-ignored", + COPILOT_GITHUB_TOKEN: "copilot-env-should-be-ignored", + GH_TOKEN: "gh-env-should-be-ignored", + GITHUB_TOKEN: "github-env-should-be-ignored", + } as NodeJS.ProcessEnv, + homeDir: fakeHomeDir, + }); + expect(result.gitHubToken).toBe("contract-token"); + expect(result.authProfileId).toBe("p"); + }); + + it("falls back to env when resolvedApiKey is absent", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + authProfileId: "p", + env: { GITHUB_TOKEN: "env-only" } as NodeJS.ProcessEnv, + homeDir: fakeHomeDir, + }); + expect(result.gitHubToken).toBe("env-only"); + expect(result.authProfileId).toBe("env:GITHUB_TOKEN"); + }); +}); + +describe("resolveCopilotAuth - env var fallbacks", () => { + it("falls back to GITHUB_TOKEN with synthesised profile id + fingerprint", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + env: { GITHUB_TOKEN: "env-token-123" } as NodeJS.ProcessEnv, + homeDir: fakeHomeDir, + }); + expect(result.authMode).toBe("gitHubToken"); + expect(result.gitHubToken).toBe("env-token-123"); + expect(result.authProfileId).toBe("env:GITHUB_TOKEN"); + expect(result.authProfileVersion).toBe(tokenFingerprint("env-token-123")); + }); + + it("OPENCLAW_GITHUB_TOKEN takes precedence over GITHUB_TOKEN", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + env: { + OPENCLAW_GITHUB_TOKEN: "openclaw-tok", + GITHUB_TOKEN: "github-tok", + } as NodeJS.ProcessEnv, + homeDir: fakeHomeDir, + }); + expect(result.gitHubToken).toBe("openclaw-tok"); + expect(result.authProfileId).toBe("env:OPENCLAW_GITHUB_TOKEN"); + expect(result.authProfileVersion).toBe(tokenFingerprint("openclaw-tok")); + }); + + it("falls back to COPILOT_GITHUB_TOKEN with synthesised profile id + fingerprint", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + env: { COPILOT_GITHUB_TOKEN: "copilot-tok-123" } as NodeJS.ProcessEnv, + homeDir: fakeHomeDir, + }); + expect(result.authMode).toBe("gitHubToken"); + expect(result.gitHubToken).toBe("copilot-tok-123"); + expect(result.authProfileId).toBe("env:COPILOT_GITHUB_TOKEN"); + expect(result.authProfileVersion).toBe(tokenFingerprint("copilot-tok-123")); + }); + + it("falls back to GH_TOKEN with synthesised profile id + fingerprint", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + env: { GH_TOKEN: "gh-tok-456" } as NodeJS.ProcessEnv, + homeDir: fakeHomeDir, + }); + expect(result.authMode).toBe("gitHubToken"); + expect(result.gitHubToken).toBe("gh-tok-456"); + expect(result.authProfileId).toBe("env:GH_TOKEN"); + expect(result.authProfileVersion).toBe(tokenFingerprint("gh-tok-456")); + }); + + it("OPENCLAW_GITHUB_TOKEN takes precedence over COPILOT_GITHUB_TOKEN, GH_TOKEN and GITHUB_TOKEN", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + env: { + OPENCLAW_GITHUB_TOKEN: "openclaw-tok", + COPILOT_GITHUB_TOKEN: "copilot-tok", + GH_TOKEN: "gh-tok", + GITHUB_TOKEN: "github-tok", + } as NodeJS.ProcessEnv, + homeDir: fakeHomeDir, + }); + expect(result.gitHubToken).toBe("openclaw-tok"); + expect(result.authProfileId).toBe("env:OPENCLAW_GITHUB_TOKEN"); + }); + + it("COPILOT_GITHUB_TOKEN takes precedence over GH_TOKEN and GITHUB_TOKEN", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + env: { + COPILOT_GITHUB_TOKEN: "copilot-tok", + GH_TOKEN: "gh-tok", + GITHUB_TOKEN: "github-tok", + } as NodeJS.ProcessEnv, + homeDir: fakeHomeDir, + }); + expect(result.gitHubToken).toBe("copilot-tok"); + expect(result.authProfileId).toBe("env:COPILOT_GITHUB_TOKEN"); + }); + + it("GH_TOKEN takes precedence over GITHUB_TOKEN", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + env: { + GH_TOKEN: "gh-tok", + GITHUB_TOKEN: "github-tok", + } as NodeJS.ProcessEnv, + homeDir: fakeHomeDir, + }); + expect(result.gitHubToken).toBe("gh-tok"); + expect(result.authProfileId).toBe("env:GH_TOKEN"); + }); + + it("token rotation in env changes the pool fingerprint (cache-busting)", () => { + const a = resolveCopilotAuth({ + agentId: "agent-1", + env: { GITHUB_TOKEN: "v1" } as NodeJS.ProcessEnv, + homeDir: fakeHomeDir, + }); + const b = resolveCopilotAuth({ + agentId: "agent-1", + env: { GITHUB_TOKEN: "v2" } as NodeJS.ProcessEnv, + homeDir: fakeHomeDir, + }); + expect(a.authProfileVersion).not.toBe(b.authProfileVersion); + }); + + it("explicit auth.useLoggedInUser=true wins over env tokens", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + auth: { useLoggedInUser: true }, + env: { OPENCLAW_GITHUB_TOKEN: "env-tok" } as NodeJS.ProcessEnv, + homeDir: fakeHomeDir, + }); + expect(result.authMode).toBe("useLoggedInUser"); + }); + + it("explicit auth.gitHubToken wins over env tokens", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + auth: { gitHubToken: "explicit", profileId: "p", profileVersion: "v" }, + env: { OPENCLAW_GITHUB_TOKEN: "env-tok" } as NodeJS.ProcessEnv, + homeDir: fakeHomeDir, + }); + expect(result.authMode).toBe("gitHubToken"); + expect(result.gitHubToken).toBe("explicit"); + expect(result.authProfileId).toBe("p"); + expect(result.authProfileVersion).toBe("v"); + }); + + it("ignores empty-string env tokens (treated as absent)", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + env: { + GITHUB_TOKEN: "", + OPENCLAW_GITHUB_TOKEN: "", + COPILOT_GITHUB_TOKEN: "", + GH_TOKEN: "", + } as NodeJS.ProcessEnv, + homeDir: fakeHomeDir, + }); + expect(result.authMode).toBe("useLoggedInUser"); + }); +}); + +describe("resolveCopilotAuth - defaults wiring", () => { + let originalEnv: NodeJS.ProcessEnv; + + beforeEach(() => { + originalEnv = process.env; + process.env = { ...originalEnv }; + delete process.env.GITHUB_TOKEN; + delete process.env.OPENCLAW_GITHUB_TOKEN; + delete process.env.COPILOT_GITHUB_TOKEN; + delete process.env.GH_TOKEN; + delete process.env.OPENCLAW_HOME; + }); + + afterEach(() => { + process.env = originalEnv; + }); + + it("uses process.env when env is not injected", () => { + process.env.GITHUB_TOKEN = "from-process-env"; + const result = resolveCopilotAuth({ + agentId: "agent-1", + homeDir: fakeHomeDir, + }); + expect(result.authMode).toBe("gitHubToken"); + expect(result.gitHubToken).toBe("from-process-env"); + }); + + it("uses os.homedir() when homeDir is not injected", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + }); + // We don't know the actual home, just that the resolver did not throw and + // produced an absolute path containing the per-agent suffix. + expect(result.copilotHome.endsWith(join(".openclaw", "agents", "agent-1", "copilot"))).toBe( + true, + ); + }); + + it("falls back to process.cwd() if homeDir throws", () => { + const result = resolveCopilotAuth({ + agentId: "agent-1", + env: cleanEnv(), + homeDir: () => { + throw new Error("no home"); + }, + }); + // Should not throw; should produce a path under cwd. + expect(result.copilotHome.includes(join(".openclaw", "agents", "agent-1", "copilot"))).toBe( + true, + ); + }); +}); + +describe("normalizeCopilotHomePath", () => { + it("resolves to absolute and strips trailing separators", () => { + const normalized = normalizeCopilotHomePath("./foo/bar/"); + expect(normalized).toBe(resolve("./foo/bar")); + expect(normalized.endsWith("/")).toBe(false); + expect(normalized.endsWith("\\")).toBe(false); + }); + + it("is idempotent", () => { + const once = normalizeCopilotHomePath("/some/path/"); + const twice = normalizeCopilotHomePath(once); + expect(twice).toBe(once); + }); +}); diff --git a/extensions/copilot/src/auth-bridge.ts b/extensions/copilot/src/auth-bridge.ts new file mode 100755 index 000000000000..1c419a8c9970 --- /dev/null +++ b/extensions/copilot/src/auth-bridge.ts @@ -0,0 +1,321 @@ +import { createHash } from "node:crypto"; +import { homedir as osHomedir } from "node:os"; +import { join, normalize, resolve, sep } from "node:path"; + +/** + * Pure functional auth resolver for the copilot agent runtime. + * + * Scope: + * + * - Consumes the resolved auth signals that core's harness contract + * already carries on `EmbeddedRunAttemptParams` (= + * `AgentHarnessAttemptParams`): `resolvedApiKey`, `authProfileId`, + * `authProfileIdSource`. Core resolves these from the agent's + * `AuthProfileStore` via `provider-usage.auth.ts:resolveProviderAuths` + * before invoking the harness, so the harness does not re-perform + * the lookup (and could not, due to the package boundary in + * `tsconfig.package-boundary.base.json`). + * - Reads optional explicit overrides from the harness attempt params + * (`auth.useLoggedInUser`, `auth.gitHubToken`) for direct CLI / test + * use cases. + * - Falls back to OPENCLAW_GITHUB_TOKEN, COPILOT_GITHUB_TOKEN, + * GH_TOKEN, or GITHUB_TOKEN env vars (in that precedence) when + * no contract-resolved token is given; synthesises a stable, + * non-reversible pool fingerprint so token rotation busts the + * client pool cleanly. + * - Computes a per-agent `copilotHome` default + * (`/.openclaw/agents//copilot`, or + * `/copilot` when an agent directory is supplied) that + * respects `OPENCLAW_HOME` for the home directory root. + * - Defaults to `useLoggedInUser` when no token signal is available. + * + * Precedence (highest to lowest): + * 1. `auth.useLoggedInUser === true` (explicit user opt-in) + * 2. `auth.gitHubToken` (explicit override; requires + * `profileId` + `profileVersion`) + * 3. `resolvedApiKey` + `authProfileId` from the contract (core's + * AuthProfileStore-resolved token — the production main path for + * a configured `github-copilot` auth profile) + * 4. OPENCLAW_GITHUB_TOKEN, then COPILOT_GITHUB_TOKEN, then + * GH_TOKEN, then GITHUB_TOKEN env vars (mirrors the + * shipped `github-copilot` provider precedence so headless + * users who already follow the documented + * COPILOT_GITHUB_TOKEN / GH_TOKEN setup get the token they + * configured rather than silently falling through to the + * logged-in CLI user.) + * 5. `useLoggedInUser` (default) + */ + +export const COPILOT_TOKEN_PROFILE_ERROR = + "[copilot-attempt] gitHubToken auth requires profileId+profileVersion (pool keying safety; per Q5/Q1 decisions)"; + +export const COPILOT_DEFAULT_AGENT_ID = "copilot"; + +/** Resolved auth shape that the runtime / pool consumes. */ +export interface ResolvedCopilotAuth { + authMode: "useLoggedInUser" | "gitHubToken"; + /** Present only when authMode is "gitHubToken". */ + gitHubToken?: string; + /** Present only when authMode is "gitHubToken". */ + authProfileId?: string; + /** Present only when authMode is "gitHubToken". */ + authProfileVersion?: string; + /** Absolute, normalized path. */ + copilotHome: string; + /** Validated agent id used for path defaults and pool keying. */ + agentId: string; +} + +export interface ResolveCopilotAuthInput { + agentId?: string; + agentDir?: string; + workspaceDir?: string; + copilotHome?: string; + auth?: { + gitHubToken?: string; + useLoggedInUser?: boolean; + profileId?: string; + profileVersion?: string; + }; + /** + * Contract-resolved token from core's AuthProfileStore lookup, + * carried on `EmbeddedRunAttemptParams.resolvedApiKey`. Used as the + * production main path when the agent has a configured + * `github-copilot` auth profile. + */ + resolvedApiKey?: string; + /** + * Contract-resolved auth profile id, carried on + * `EmbeddedRunAttemptParams.authProfileId`. Used for pool keying so + * concurrent agents with distinct profiles do not share a CLI + * session/state. + */ + authProfileId?: string; + /** + * Legacy top-level `profileVersion` fallback kept for back-compat + * with explicit-token (`auth.gitHubToken`) callers. The + * contract-resolved `resolvedApiKey` path synthesises a version from + * the token fingerprint because `EmbeddedRunAttemptParams` does not + * carry a `profileVersion` field. + */ + profileVersion?: string; + /** Injected for test seams. Defaults to `process.env`. */ + env?: NodeJS.ProcessEnv; + /** Injected for test seams. Defaults to `os.homedir()`. */ + homeDir?: () => string; +} + +/** + * Resolve copilot auth + copilotHome. + * + * Synchronous because we intentionally do not perform any I/O or + * cross-package credential lookups here (see file header for rationale). + * + * Throws if `gitHubToken` is supplied via `params.auth.gitHubToken` + * WITHOUT both `profileId` and `profileVersion` (the existing invariant + * from attempt.ts; preserves pool-key safety per Q5/Q1). + */ +export function resolveCopilotAuth(input: ResolveCopilotAuthInput): ResolvedCopilotAuth { + const env = input.env ?? process.env; + const homeDir = input.homeDir ?? osHomedir; + + const agentId = sanitizeAgentId(input.agentId); + const copilotHome = resolveCopilotHome({ + explicit: readString(input.copilotHome), + agentDir: readString(input.agentDir), + workspaceDir: readString(input.workspaceDir), + agentId, + env, + homeDir, + }); + + const explicitToken = readString(input.auth?.gitHubToken); + const explicitProfileId = readString(input.auth?.profileId) ?? readString(input.authProfileId); + const explicitProfileVersion = + readString(input.auth?.profileVersion) ?? readString(input.profileVersion); + + if (input.auth?.useLoggedInUser === true) { + return { + authMode: "useLoggedInUser", + copilotHome, + agentId, + }; + } + + if (explicitToken) { + if (!explicitProfileId || !explicitProfileVersion) { + throw new Error(COPILOT_TOKEN_PROFILE_ERROR); + } + return { + authMode: "gitHubToken", + gitHubToken: explicitToken, + authProfileId: explicitProfileId, + authProfileVersion: explicitProfileVersion, + copilotHome, + agentId, + }; + } + + // Contract-resolved token from core's AuthProfileStore lookup. This + // is the production main path: a configured `github-copilot` auth + // profile flows into `EmbeddedRunAttemptParams.resolvedApiKey` and + // `authProfileId` upstream of the harness, and we consume both here + // so headless / cron / multi-profile runs work without env vars. + // We synthesise the pool-key version from the token fingerprint so + // rotation busts the cache cleanly (matching the env-fallback + // strategy). The contract does not carry a separate `profileVersion`. + const contractToken = readString(input.resolvedApiKey); + if (contractToken) { + const contractProfileId = readString(input.authProfileId); + return { + authMode: "gitHubToken", + gitHubToken: contractToken, + authProfileId: contractProfileId ?? "pi:resolved", + authProfileVersion: tokenFingerprint(contractToken), + copilotHome, + agentId, + }; + } + + const envFallback = readEnvTokenFallback(env); + if (envFallback) { + return { + authMode: "gitHubToken", + gitHubToken: envFallback.token, + authProfileId: envFallback.profileId, + authProfileVersion: envFallback.profileVersion, + copilotHome, + agentId, + }; + } + + return { + authMode: "useLoggedInUser", + copilotHome, + agentId, + }; +} + +/** + * Validate + sanitise an agent id for use in filesystem paths and pool + * keys. + * + * Mirrors the shape constraints documented by core's `normalizeAgentId` + * / `isValidAgentId` in `src/routing/session-key.ts` (alnum + `-_`, + * starts with alnum, lowercase, <=64 chars). We re-implement here + * because the package boundary prevents importing from `src/`. Any + * caller that passes an invalid id falls back to the shared default + * (`COPILOT_DEFAULT_AGENT_ID`) rather than throwing - the harness's + * job is to keep running with a safe default, not to validate config. + */ +export function sanitizeAgentId(value: string | undefined | null): string { + const trimmed = (value ?? "").trim().toLowerCase(); + if (!trimmed) { + return COPILOT_DEFAULT_AGENT_ID; + } + if (!/^[a-z0-9][a-z0-9_-]{0,63}$/.test(trimmed)) { + return COPILOT_DEFAULT_AGENT_ID; + } + return trimmed; +} + +function resolveCopilotHome(args: { + explicit: string | undefined; + agentDir: string | undefined; + workspaceDir: string | undefined; + agentId: string; + env: NodeJS.ProcessEnv; + homeDir: () => string; +}): string { + if (args.explicit) { + return resolve(args.explicit); + } + // When the host hands us an agent directory we isolate the SDK CLI state + // (config.json, logs/, session-store.db, session-state/) under a dedicated + // "copilot" subdir so it cannot collide with OpenClaw's own files + // (models.json, auth-profiles.json, ...) in the same agent directory. + // This matches the documented layout and mirrors how the codex harness + // isolates `/codex-home/`. + if (args.agentDir) { + return resolve(join(args.agentDir, "copilot")); + } + + const openClawHome = readString(args.env.OPENCLAW_HOME); + const rootHome = openClawHome ? resolve(openClawHome) : safeHomeDir(args.homeDir); + // Per-agent isolation per proposal section 3.6: + // /.openclaw/agents//copilot + return resolve(join(rootHome, ".openclaw", "agents", args.agentId, "copilot")); +} + +function safeHomeDir(homeDir: () => string): string { + try { + const value = homeDir(); + if (typeof value === "string" && value.length > 0) { + return value; + } + } catch { + // fall through + } + return process.cwd(); +} + +function readEnvTokenFallback( + env: NodeJS.ProcessEnv, +): { token: string; profileId: string; profileVersion: string } | undefined { + // OPENCLAW_GITHUB_TOKEN is the harness-specific override and stays at + // the top so operators can pin a token without disturbing system-wide + // gh / Copilot CLI config. The remaining entries mirror the shipped + // `github-copilot` provider precedence + // (COPILOT_GITHUB_TOKEN -> GH_TOKEN -> GITHUB_TOKEN, see + // extensions/github-copilot/auth.ts:24) and the documented Copilot SDK + // setup in docs/providers/github-copilot.md, so a headless user who + // already configured COPILOT_GITHUB_TOKEN / GH_TOKEN and opted into + // agentRuntime.id: "copilot" gets the token they configured rather + // than silently falling through to the logged-in CLI user. + const candidates: Array<{ name: string; value: string | undefined }> = [ + { name: "OPENCLAW_GITHUB_TOKEN", value: readString(env.OPENCLAW_GITHUB_TOKEN) }, + { name: "COPILOT_GITHUB_TOKEN", value: readString(env.COPILOT_GITHUB_TOKEN) }, + { name: "GH_TOKEN", value: readString(env.GH_TOKEN) }, + { name: "GITHUB_TOKEN", value: readString(env.GITHUB_TOKEN) }, + ]; + for (const { name, value } of candidates) { + if (value) { + return { + token: value, + profileId: `env:${name}`, + profileVersion: tokenFingerprint(value), + }; + } + } + return undefined; +} + +/** + * Non-reversible 12-hex-char fingerprint of a token, prefixed with + * `sha256:` for forward-compat. Used as the pool-key profileVersion when + * a token comes from env: rotation -> different fingerprint -> pool + * entry invalidated cleanly. 48 bits of entropy is sufficient + * collision resistance for a per-agent client pool; never log the + * fingerprint alongside an account id. + */ +export function tokenFingerprint(token: string): string { + const hex = createHash("sha256").update(token).digest("hex").slice(0, 12); + return `sha256:${hex}`; +} + +function readString(value: unknown): string | undefined { + return typeof value === "string" && value.length > 0 ? value : undefined; +} + +/** + * Normalize a copilotHome path for cross-platform pool keying. + * Re-exported so attempt.ts / runtime.ts can share the same + * normalization without re-implementing. + */ +export function normalizeCopilotHomePath(value: string): string { + return normalize(resolve(value)).replace(new RegExp(`${escapeForRegex(sep)}+$`), ""); +} + +function escapeForRegex(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} diff --git a/extensions/copilot/src/compaction-bridge.test.ts b/extensions/copilot/src/compaction-bridge.test.ts new file mode 100755 index 000000000000..1d387b1532be --- /dev/null +++ b/extensions/copilot/src/compaction-bridge.test.ts @@ -0,0 +1,242 @@ +import { mkdtemp, readFile, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { describe, expect, it, vi } from "vitest"; +import { createInfiniteSessionConfig, writeOpenClawCompactionMarker } from "./compaction-bridge.js"; + +describe("createInfiniteSessionConfig", () => { + it("returns undefined when no options provided", () => { + expect(createInfiniteSessionConfig()).toBeUndefined(); + expect(createInfiniteSessionConfig(undefined)).toBeUndefined(); + }); + + it("returns undefined when options is an empty object", () => { + expect(createInfiniteSessionConfig({})).toBeUndefined(); + }); + + it("preserves explicit enabled:false to disable infinite sessions", () => { + expect(createInfiniteSessionConfig({ enabled: false })).toEqual({ enabled: false }); + }); + + it("preserves explicit enabled:true", () => { + expect(createInfiniteSessionConfig({ enabled: true })).toEqual({ enabled: true }); + }); + + it("forwards threshold fields when set", () => { + expect( + createInfiniteSessionConfig({ + backgroundCompactionThreshold: 0.7, + bufferExhaustionThreshold: 0.9, + }), + ).toEqual({ + backgroundCompactionThreshold: 0.7, + bufferExhaustionThreshold: 0.9, + }); + }); + + it("combines enabled and thresholds", () => { + expect( + createInfiniteSessionConfig({ + enabled: true, + backgroundCompactionThreshold: 0.5, + bufferExhaustionThreshold: 0.85, + }), + ).toEqual({ + enabled: true, + backgroundCompactionThreshold: 0.5, + bufferExhaustionThreshold: 0.85, + }); + }); + + it("omits undefined fields without coercing them", () => { + const result = createInfiniteSessionConfig({ + enabled: undefined, + backgroundCompactionThreshold: 0.6, + bufferExhaustionThreshold: undefined, + }); + expect(result).toEqual({ backgroundCompactionThreshold: 0.6 }); + expect(result).not.toHaveProperty("enabled"); + expect(result).not.toHaveProperty("bufferExhaustionThreshold"); + }); +}); + +describe("writeOpenClawCompactionMarker", () => { + it("writes a JSON marker with expected shape under /files", async () => { + const workspaceDir = await mkdtemp(join(tmpdir(), "copilot-compaction-")); + try { + const written = await writeOpenClawCompactionMarker( + { + sessionId: "openclaw-sess-123", + workspaceDir, + trigger: "manual", + currentTokenCount: 42, + sdkSessionId: "sdk-sess-abc", + reason: "deferred-to-sdk-infinite-sessions", + }, + { now: () => 1_700_000_000_000 }, + ); + + expect(written.path).toBe( + join(workspaceDir, "files", "openclaw-compaction-1700000000000-openclaw-sess-123.json"), + ); + expect(written.marker).toEqual({ + version: 1, + source: "copilot-harness", + sessionId: "openclaw-sess-123", + ts: 1_700_000_000_000, + compacted: false, + trigger: "manual", + sdkSessionId: "sdk-sess-abc", + currentTokenCount: 42, + reason: "deferred-to-sdk-infinite-sessions", + }); + + const contents = await readFile(written.path, "utf8"); + expect(contents.endsWith("\n")).toBe(true); + expect(JSON.parse(contents)).toEqual(written.marker); + } finally { + await rm(workspaceDir, { recursive: true, force: true }); + } + }); + + it("records force:true in the marker without acting on it", async () => { + const writes: Array<{ path: string; contents: string }> = []; + const fs = { + mkdir: vi.fn(async () => undefined), + writeFile: vi.fn(async (path: string, contents: string) => { + writes.push({ path, contents }); + }), + }; + + const written = await writeOpenClawCompactionMarker( + { + sessionId: "s1", + workspaceDir: "/ws", + force: true, + reason: "force-requested-but-sdk-has-no-synchronous-compact-api", + }, + { now: () => 1, fs: fs as never }, + ); + + expect(written.marker.force).toBe(true); + expect(written.marker.compacted).toBe(false); + expect(writes).toHaveLength(1); + expect(JSON.parse(writes[0].contents)).toMatchObject({ force: true }); + }); + + it("omits force / trigger / sdkSessionId / currentTokenCount when undefined", async () => { + const writes: Array<{ path: string; contents: string }> = []; + const fs = { + mkdir: vi.fn(async () => undefined), + writeFile: vi.fn(async (path: string, contents: string) => { + writes.push({ path, contents }); + }), + }; + + const written = await writeOpenClawCompactionMarker( + { sessionId: "s1", workspaceDir: "/ws" }, + { now: () => 7, fs: fs as never }, + ); + + expect(written.marker).toEqual({ + version: 1, + source: "copilot-harness", + sessionId: "s1", + ts: 7, + compacted: false, + }); + const parsed = JSON.parse(writes[0].contents); + expect(parsed).not.toHaveProperty("force"); + expect(parsed).not.toHaveProperty("trigger"); + expect(parsed).not.toHaveProperty("sdkSessionId"); + expect(parsed).not.toHaveProperty("currentTokenCount"); + expect(parsed).not.toHaveProperty("reason"); + }); + + it("sanitizes sessionId chars in the filename", async () => { + const fs = { + mkdir: vi.fn(async () => undefined), + writeFile: vi.fn(async () => undefined), + }; + const written = await writeOpenClawCompactionMarker( + { sessionId: "abc:/?\\@!def", workspaceDir: "/ws" }, + { now: () => 1, fs: fs as never }, + ); + expect(written.path).toContain("openclaw-compaction-1-abc______def.json"); + // sessionId in the marker body stays the original unsanitized value. + expect(written.marker.sessionId).toBe("abc:/?\\@!def"); + }); + + it("creates the subdir recursively before writing", async () => { + const calls: Array<{ kind: "mkdir" | "write"; path: string; opts?: unknown }> = []; + const fs = { + mkdir: vi.fn(async (path: string, opts: unknown) => { + calls.push({ kind: "mkdir", path, opts }); + }), + writeFile: vi.fn(async (path: string) => { + calls.push({ kind: "write", path }); + }), + }; + await writeOpenClawCompactionMarker( + { sessionId: "s", workspaceDir: "/ws" }, + { now: () => 1, fs: fs as never }, + ); + expect(calls[0]).toEqual({ kind: "mkdir", path: "/ws/files", opts: { recursive: true } }); + expect(calls[1]?.kind).toBe("write"); + }); + + it("honours a custom subdir option", async () => { + const fs = { + mkdir: vi.fn(async () => undefined), + writeFile: vi.fn(async () => undefined), + }; + const written = await writeOpenClawCompactionMarker( + { sessionId: "s", workspaceDir: "/ws" }, + { now: () => 1, fs: fs as never, subdir: "compaction" }, + ); + expect(written.path).toBe("/ws/compaction/openclaw-compaction-1-s.json"); + }); + + it("surfaces mkdir failures", async () => { + const fs = { + mkdir: vi.fn(async () => { + throw new Error("EACCES"); + }), + writeFile: vi.fn(async () => undefined), + }; + await expect( + writeOpenClawCompactionMarker( + { sessionId: "s", workspaceDir: "/ws" }, + { now: () => 1, fs: fs as never }, + ), + ).rejects.toThrow("EACCES"); + expect(fs.writeFile).not.toHaveBeenCalled(); + }); + + it("surfaces writeFile failures", async () => { + const fs = { + mkdir: vi.fn(async () => undefined), + writeFile: vi.fn(async () => { + throw new Error("ENOSPC"); + }), + }; + await expect( + writeOpenClawCompactionMarker( + { sessionId: "s", workspaceDir: "/ws" }, + { now: () => 1, fs: fs as never }, + ), + ).rejects.toThrow("ENOSPC"); + }); + + it("throws on missing sessionId", async () => { + await expect( + writeOpenClawCompactionMarker({ sessionId: "", workspaceDir: "/ws" }), + ).rejects.toThrow(/sessionId is required/); + }); + + it("throws on missing workspaceDir", async () => { + await expect( + writeOpenClawCompactionMarker({ sessionId: "s", workspaceDir: "" }), + ).rejects.toThrow(/workspaceDir is required/); + }); +}); diff --git a/extensions/copilot/src/compaction-bridge.ts b/extensions/copilot/src/compaction-bridge.ts new file mode 100755 index 000000000000..c4b1e96627c9 --- /dev/null +++ b/extensions/copilot/src/compaction-bridge.ts @@ -0,0 +1,183 @@ +import { mkdir, writeFile } from "node:fs/promises"; +import { join } from "node:path"; +import type { SessionConfig } from "@github/copilot-sdk"; + +// Compaction bridge for the GitHub Copilot agent runtime. +// +// Two responsibilities: +// +// 1. Shape `SessionConfig.infiniteSessions` from a typed options bag +// so attempt.ts can opt the SDK in to background auto-compaction +// at session creation. The SDK manages the actual compaction +// under the `infiniteSessions` config (background at +// `backgroundCompactionThreshold`, blocking at +// `bufferExhaustionThreshold`). +// +// 2. Write an OpenClaw-shaped JSON marker file at +// `/files/openclaw-compaction--.json` +// whenever the host calls `harness.compact(params)`. Existing +// OpenClaw transcript readers look in `workspacePath/files/` for +// compaction artifacts; the marker keeps them informed even +// though the SDK now owns the actual context-window mechanics +// under infiniteSessions. +// +// Host back-pointers (NOT imported here to keep the package boundary +// clean): +// - `src/agents/pi-embedded-runner/compact.types.ts` — canonical +// `CompactEmbeddedPiSessionParams`. +// - `src/agents/pi-embedded-runner/types.ts` — canonical +// `EmbeddedPiCompactResult`. + +type SdkInfiniteSessionConfig = NonNullable; + +export type { SdkInfiniteSessionConfig as CopilotInfiniteSessionConfig }; + +export interface CopilotInfiniteSessionOptions { + enabled?: boolean; + backgroundCompactionThreshold?: number; + bufferExhaustionThreshold?: number; +} + +/** + * Shape an `InfiniteSessionConfig` for `SessionConfig.infiniteSessions`. + * Returns `undefined` when no fields were supplied so callers can + * spread conditionally and let the SDK apply its own defaults + * (`enabled: true`, background 0.80, buffer 0.95). Any explicitly-set + * value (including `enabled: false` to disable infinite sessions) is + * preserved. + */ +export function createInfiniteSessionConfig( + options?: CopilotInfiniteSessionOptions, +): SdkInfiniteSessionConfig | undefined { + if (!options) { + return undefined; + } + const result: SdkInfiniteSessionConfig = {}; + if (options.enabled !== undefined) { + result.enabled = options.enabled; + } + if (options.backgroundCompactionThreshold !== undefined) { + result.backgroundCompactionThreshold = options.backgroundCompactionThreshold; + } + if (options.bufferExhaustionThreshold !== undefined) { + result.bufferExhaustionThreshold = options.bufferExhaustionThreshold; + } + return Object.keys(result).length > 0 ? result : undefined; +} + +export interface OpenClawCompactionMarkerInput { + /** OpenClaw session id (CompactEmbeddedPiSessionParams.sessionId). */ + readonly sessionId: string; + /** Workspace dir (CompactEmbeddedPiSessionParams.workspaceDir). */ + readonly workspaceDir: string; + /** Compaction trigger from CompactEmbeddedPiSessionParams.trigger. */ + readonly trigger?: "budget" | "overflow" | "manual"; + /** Optional caller-observed token count at compaction time. */ + readonly currentTokenCount?: number; + /** Optional active SDK session id when the marker is written. */ + readonly sdkSessionId?: string; + /** Optional reason string for the marker. */ + readonly reason?: string; + /** + * Whether the host passed `force: true` in CompactEmbeddedPiSessionParams. + * Recorded for diagnostics — the harness cannot synchronously force + * compaction since the SDK has no on-demand compact RPC. + */ + readonly force?: boolean; +} + +export interface OpenClawCompactionMarkerOptions { + /** Override `Date.now`. Default: `Date.now`. */ + readonly now?: () => number; + /** Override `node:fs/promises` writers. Useful in tests. */ + readonly fs?: Pick; + /** + * Subdirectory under workspaceDir that holds the markers. Default + * `files` to match the proposal-defined location. + */ + readonly subdir?: string; +} + +export interface OpenClawCompactionMarker { + readonly version: 1; + readonly source: "copilot-harness"; + readonly sessionId: string; + readonly ts: number; + /** + * Whether actual compaction occurred. Always false from the harness + * path: SDK auto-compaction runs asynchronously in the background + * and the harness does not synchronously force it. + */ + readonly compacted: false; + readonly trigger?: "budget" | "overflow" | "manual"; + readonly force?: boolean; + readonly sdkSessionId?: string; + readonly currentTokenCount?: number; + readonly reason?: string; +} + +export interface WrittenOpenClawCompactionMarker { + readonly path: string; + readonly marker: OpenClawCompactionMarker; +} + +function compactJsonValue>(input: T): T { + const out: Record = {}; + for (const [key, value] of Object.entries(input)) { + if (value !== undefined) { + out[key] = value; + } + } + return out as T; +} + +/** + * Write an OpenClaw-shaped compaction marker JSON file under + * `//openclaw-compaction--.json`. + * + * Returns the resolved file path and the marker payload that was + * written. Throws if the workspaceDir or sessionId is missing/empty + * (the caller should not invoke this without those — the harness + * `compact()` must validate first). + */ +export async function writeOpenClawCompactionMarker( + input: OpenClawCompactionMarkerInput, + options: OpenClawCompactionMarkerOptions = {}, +): Promise { + if (!input.workspaceDir || typeof input.workspaceDir !== "string") { + throw new Error("[copilot:compaction-bridge] workspaceDir is required to write a marker"); + } + if (!input.sessionId || typeof input.sessionId !== "string") { + throw new Error("[copilot:compaction-bridge] sessionId is required to write a marker"); + } + + const now = options.now ?? Date.now; + const fs = options.fs ?? { mkdir, writeFile }; + const subdir = options.subdir ?? "files"; + const ts = now(); + const safeSessionId = input.sessionId.replace(/[^a-zA-Z0-9._-]/g, "_"); + // Filename pattern: ts-first so listings sort chronologically. Suffix + // sessionId for collision safety when multiple sessions share a + // workspace. Matches the proposal's `openclaw-compaction-` prefix. + const filename = `openclaw-compaction-${ts}-${safeSessionId}.json`; + const dirPath = join(input.workspaceDir, subdir); + const filePath = join(dirPath, filename); + + const marker: OpenClawCompactionMarker = compactJsonValue({ + version: 1 as const, + source: "copilot-harness" as const, + sessionId: input.sessionId, + ts, + compacted: false as const, + trigger: input.trigger, + force: input.force, + sdkSessionId: input.sdkSessionId, + currentTokenCount: input.currentTokenCount, + reason: input.reason, + }); + + await fs.mkdir(dirPath, { recursive: true }); + await fs.writeFile(filePath, `${JSON.stringify(marker, null, 2)}\n`, "utf8"); + + return { path: filePath, marker }; +} diff --git a/extensions/copilot/src/doctor-probes.test.ts b/extensions/copilot/src/doctor-probes.test.ts new file mode 100755 index 000000000000..db80447af537 --- /dev/null +++ b/extensions/copilot/src/doctor-probes.test.ts @@ -0,0 +1,283 @@ +import { EventEmitter } from "node:events"; +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import { + probeCopilotAuthShape, + probeCopilotCliVersion, + probeCopilotHomeWritable, +} from "./doctor-probes.js"; + +type FakeChildOptions = { + exitCode?: number | null; + signal?: NodeJS.Signals | null; + stdout?: string; + stderr?: string; + emitErrorMessage?: string; + /** When true, never emits close; useful for timeout tests. */ + hang?: boolean; +}; + +function makeFakeChild(opts: FakeChildOptions = {}) { + const emitter = new EventEmitter() as EventEmitter & { + stdout: EventEmitter; + stderr: EventEmitter; + kill: () => void; + }; + emitter.stdout = new EventEmitter(); + emitter.stderr = new EventEmitter(); + emitter.kill = vi.fn(); + + queueMicrotask(() => { + if (opts.stdout) { + emitter.stdout.emit("data", Buffer.from(opts.stdout, "utf8")); + } + if (opts.stderr) { + emitter.stderr.emit("data", Buffer.from(opts.stderr, "utf8")); + } + if (opts.emitErrorMessage) { + emitter.emit("error", new Error(opts.emitErrorMessage)); + return; + } + if (!opts.hang) { + emitter.emit("close", opts.exitCode ?? 0, opts.signal ?? null); + } + }); + + return emitter; +} + +const tempDirs: string[] = []; + +afterEach(async () => { + for (const dir of tempDirs.splice(0)) { + await fs.rm(dir, { recursive: true, force: true }); + } +}); + +async function makeTempHome(): Promise { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-copilot-doctor-")); + tempDirs.push(dir); + return dir; +} + +describe("probeCopilotCliVersion", () => { + it("reports ok with trimmed version on exit 0 with stdout", async () => { + const result = await probeCopilotCliVersion({ + spawnFn: () => makeFakeChild({ stdout: " 1.2.3 \n" }) as never, + }); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.version).toBe("1.2.3"); + expect(result.command).toBe("copilot"); + } + }); + + it("uses custom command and args when provided", async () => { + const calls: Array<{ cmd: string; args: string[] }> = []; + const result = await probeCopilotCliVersion({ + command: "my-copilot", + args: ["-V"], + spawnFn: ((cmd: string, args: readonly string[]) => { + calls.push({ cmd, args: [...args] }); + return makeFakeChild({ stdout: "9.9.9" }) as never; + }) as never, + }); + expect(calls).toEqual([{ cmd: "my-copilot", args: ["-V"] }]); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.command).toBe("my-copilot"); + } + }); + + it("reports non-zero-exit with stderr details", async () => { + const result = await probeCopilotCliVersion({ + spawnFn: () => makeFakeChild({ exitCode: 2, stderr: "boom: not installed" }) as never, + }); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.reason).toBe("non-zero-exit"); + expect(result.details?.exitCode).toBe(2); + expect(result.details?.stderr).toBe("boom: not installed"); + } + }); + + it("reports empty-version when exit 0 produces no stdout", async () => { + const result = await probeCopilotCliVersion({ + spawnFn: () => makeFakeChild({ stdout: " \n" }) as never, + }); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.reason).toBe("empty-version"); + } + }); + + it("reports spawn-failed when spawnFn throws synchronously (e.g. ENOENT)", async () => { + const result = await probeCopilotCliVersion({ + spawnFn: (() => { + throw new Error("ENOENT: copilot not found"); + }) as never, + }); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.reason).toBe("spawn-failed"); + expect(result.details?.rawError).toContain("ENOENT"); + } + }); + + it("reports spawn-error when child emits 'error'", async () => { + const result = await probeCopilotCliVersion({ + spawnFn: () => makeFakeChild({ emitErrorMessage: "spawn ENOEXEC" }) as never, + }); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.reason).toBe("spawn-error"); + expect(result.details?.rawError).toBe("spawn ENOEXEC"); + } + }); + + it("reports probe-timeout when child hangs past timeoutMs and kills the child", async () => { + const fakeChild = makeFakeChild({ hang: true }); + const result = await probeCopilotCliVersion({ + timeoutMs: 10, + spawnFn: () => fakeChild as never, + }); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.reason).toBe("probe-timeout"); + expect(result.details?.timeoutMs).toBe(10); + } + expect(fakeChild.kill).toHaveBeenCalled(); + }); + + it("returns just the first non-empty line as version when stdout has a banner / update hint", async () => { + const result = await probeCopilotCliVersion({ + spawnFn: () => + makeFakeChild({ + stdout: "GitHub Copilot CLI 1.0.48.\nRun 'copilot update' to check for updates.\n", + }) as never, + }); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.version).toBe("GitHub Copilot CLI 1.0.48."); + expect(result.rawStdout).toBe( + "GitHub Copilot CLI 1.0.48.\nRun 'copilot update' to check for updates.", + ); + } + }); + + it("does not surface rawStdout when stdout is already single-line", async () => { + const result = await probeCopilotCliVersion({ + spawnFn: () => makeFakeChild({ stdout: "1.2.3\n" }) as never, + }); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.version).toBe("1.2.3"); + expect(result.rawStdout).toBeUndefined(); + } + }); +}); + +describe("probeCopilotHomeWritable", () => { + it("reports ok when the directory exists and is writable, cleaning up after itself", async () => { + const home = await makeTempHome(); + const result = await probeCopilotHomeWritable(home); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.copilotHome).toBe(home); + expect(result.probedPath.startsWith(home)).toBe(true); + } + const entries = await fs.readdir(home); + expect(entries).toEqual([]); + }); + + it("creates copilotHome if missing", async () => { + const root = await makeTempHome(); + const home = path.join(root, "nested", "copilot-cfg"); + const result = await probeCopilotHomeWritable(home); + expect(result.ok).toBe(true); + const stat = await fs.stat(home); + expect(stat.isDirectory()).toBe(true); + }); + + it("reports copilothome-not-writable when fs throws on mkdir", async () => { + const result = await probeCopilotHomeWritable("/some/path", { + fsApi: { + mkdir: vi.fn().mockRejectedValueOnce(new Error("EPERM: not permitted")), + writeFile: vi.fn(), + rm: vi.fn(), + } as never, + }); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.reason).toBe("copilothome-not-writable"); + expect(result.details?.rawError).toContain("EPERM"); + } + }); + + it("falls back to the platform default copilotHome when argument is empty or whitespace", async () => { + const writeFile = vi.fn().mockResolvedValue(undefined); + const result = await probeCopilotHomeWritable(" ", { + fsApi: { + mkdir: vi.fn().mockResolvedValue(undefined), + writeFile, + rm: vi.fn().mockResolvedValue(undefined), + } as never, + }); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.copilotHome.length).toBeGreaterThan(0); + expect(result.copilotHome.toLowerCase()).toContain("copilot"); + } + }); +}); + +describe("probeCopilotAuthShape", () => { + it("resolves to useLoggedInUser when the flag is true", () => { + const result = probeCopilotAuthShape({ useLoggedInUser: true }); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.resolvedMode).toBe("useLoggedInUser"); + } + }); + + it("resolves to gitHubToken when a non-empty token is supplied", () => { + const result = probeCopilotAuthShape({ gitHubToken: "ghp_xxx" }); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.resolvedMode).toBe("gitHubToken"); + } + }); + + it("resolves to profile when both profileId and profileVersion are supplied", () => { + const result = probeCopilotAuthShape({ profileId: "p1", profileVersion: "v1" }); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.resolvedMode).toBe("profile"); + } + }); + + it("rejects when no auth source is provided", () => { + const result = probeCopilotAuthShape({}); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.reason).toBe("no-auth-source"); + } + }); + + it("rejects when only one of profileId / profileVersion is provided", () => { + expect(probeCopilotAuthShape({ profileId: "p1" }).ok).toBe(false); + expect(probeCopilotAuthShape({ profileVersion: "v1" }).ok).toBe(false); + }); + + it("rejects useLoggedInUser:false on its own", () => { + const result = probeCopilotAuthShape({ useLoggedInUser: false }); + expect(result.ok).toBe(false); + }); + + it("rejects an empty gitHubToken string", () => { + const result = probeCopilotAuthShape({ gitHubToken: "" }); + expect(result.ok).toBe(false); + }); +}); diff --git a/extensions/copilot/src/doctor-probes.ts b/extensions/copilot/src/doctor-probes.ts new file mode 100755 index 000000000000..2f10448194b1 --- /dev/null +++ b/extensions/copilot/src/doctor-probes.ts @@ -0,0 +1,260 @@ +/** + * Runtime doctor probes for the copilot extension. + * + * Imperative side-effecting checks used to diagnose a copilot + * deployment from within `openclaw doctor` (or any equivalent + * harness-side health check). Kept out of doctor-contract-api.ts + * because that contract is declarative and auto-loaded by the + * plugin registry, whereas these probes spawn subprocesses or + * touch the filesystem and must be invoked imperatively. + * + * All probes are pure (no module-level state) and dependency- + * injectable for tests. They never throw on a probe-negative + * result — failure is surfaced via the `ok: false` shape so the + * caller can render a structured doctor report. + */ + +import { spawn } from "node:child_process"; +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; + +export type ProbeResult> = + | ({ ok: true } & TPayload) + | { ok: false; reason: string; details?: Record }; + +export interface ProbeCopilotCliVersionOptions { + /** Command to invoke; defaults to "copilot". */ + command?: string; + /** Argv used to ask for version; defaults to ["--version"]. */ + args?: readonly string[]; + /** Timeout in milliseconds; defaults to 5_000. */ + timeoutMs?: number; + /** Injection seam for testing. Defaults to node:child_process spawn. */ + spawnFn?: typeof spawn; +} + +export interface ProbeCopilotHomeOptions { + /** Injection seam for testing. */ + fsApi?: Pick; + /** Filename used for the writability probe. */ + probeFileName?: string; +} + +const DEFAULT_PROBE_TIMEOUT_MS = 5_000; +const DEFAULT_PROBE_FILENAME = ".copilot-doctor-probe"; + +/** + * Probe that the Copilot CLI is installed and prints a version. + * Treats non-zero exit, missing stdout, and timeout all as failures. + */ +export async function probeCopilotCliVersion( + options: ProbeCopilotCliVersionOptions = {}, +): Promise> { + const command = options.command ?? "copilot"; + const args = options.args ?? ["--version"]; + const timeoutMs = options.timeoutMs ?? DEFAULT_PROBE_TIMEOUT_MS; + const spawnImpl = options.spawnFn ?? spawn; + + return new Promise>( + (resolve) => { + let child: ReturnType | undefined; + let settled = false; + const settle = ( + result: ProbeResult<{ version: string; command: string; rawStdout?: string }>, + ): void => { + if (settled) { + return; + } + settled = true; + if (timer) { + clearTimeout(timer); + } + try { + child?.kill(); + } catch { + // ignore double-kill / already-dead errors + } + resolve(result); + }; + + const timer = setTimeout(() => { + settle({ + ok: false, + reason: "probe-timeout", + details: { command, args: [...args], timeoutMs }, + }); + }, timeoutMs); + + try { + child = spawnImpl(command, [...args], { stdio: ["ignore", "pipe", "pipe"] }); + } catch (error) { + settle({ + ok: false, + reason: "spawn-failed", + details: { command, args: [...args], rawError: formatProbeError(error) }, + }); + return; + } + + let stdout = ""; + let stderr = ""; + child.stdout?.on("data", (chunk: Buffer) => { + stdout += chunk.toString("utf8"); + }); + child.stderr?.on("data", (chunk: Buffer) => { + stderr += chunk.toString("utf8"); + }); + child.on("error", (error: Error) => { + settle({ + ok: false, + reason: "spawn-error", + details: { command, args: [...args], rawError: error.message }, + }); + }); + child.on("close", (code: number | null, signal: NodeJS.Signals | null) => { + if (code !== 0) { + settle({ + ok: false, + reason: "non-zero-exit", + details: { + command, + args: [...args], + exitCode: code, + signal, + stderr: stderr.trim() || undefined, + }, + }); + return; + } + const rawStdout = stdout.trim(); + if (!rawStdout) { + settle({ + ok: false, + reason: "empty-version", + details: { command, args: [...args] }, + }); + return; + } + // Many version commands (notably the bundled `copilot --version`) + // print a banner plus an "update available" hint on subsequent + // lines. Surface only the first non-empty line as `version` so the + // doctor UI gets a clean string; keep the full stdout in + // `rawStdout` for debugging. + const version = firstNonEmptyLine(rawStdout) ?? rawStdout; + const payload: { version: string; command: string; rawStdout?: string } = { + version, + command, + }; + if (rawStdout !== version) { + payload.rawStdout = rawStdout; + } + settle({ ok: true, ...payload }); + }); + }, + ); +} + +function firstNonEmptyLine(value: string): string | undefined { + for (const line of value.split(/\r?\n/)) { + const trimmed = line.trim(); + if (trimmed.length > 0) { + return trimmed; + } + } + return undefined; +} + +/** + * Probe that copilotHome (or default ~/.config/copilot) is writable + * by the running user. Mirrors the existing auth-bridge's expectation + * that the SDK can persist credentials under copilotHome. + */ +export async function probeCopilotHomeWritable( + copilotHome: string | undefined, + options: ProbeCopilotHomeOptions = {}, +): Promise> { + const fsApi = options.fsApi ?? fs; + const probeFileName = options.probeFileName ?? DEFAULT_PROBE_FILENAME; + const resolvedHome = + typeof copilotHome === "string" && copilotHome.trim().length > 0 + ? copilotHome.trim() + : defaultCopilotHome(); + const probedPath = path.join(resolvedHome, probeFileName); + + try { + await fsApi.mkdir(resolvedHome, { recursive: true }); + await fsApi.writeFile(probedPath, "copilot-doctor-probe", "utf8"); + await fsApi.rm(probedPath, { force: true }); + return { ok: true, copilotHome: resolvedHome, probedPath }; + } catch (error) { + return { + ok: false, + reason: "copilothome-not-writable", + details: { + copilotHome: resolvedHome, + probedPath, + rawError: formatProbeError(error), + }, + }; + } +} + +/** + * Probe GitHub Copilot agent runtime auth resolution given a useLoggedInUser hint. + * Validates that at least one of {useLoggedInUser, gitHubToken, + * profileId+profileVersion} is set. This is intentionally a + * shape-only probe: actually performing an SDK auth handshake + * would require a pool and is out of scope for `openclaw doctor`. + */ +export function probeCopilotAuthShape(input: { + useLoggedInUser?: boolean; + gitHubToken?: string; + profileId?: string; + profileVersion?: string; +}): ProbeResult<{ resolvedMode: "useLoggedInUser" | "gitHubToken" | "profile" }> { + if (input.useLoggedInUser === true) { + return { ok: true, resolvedMode: "useLoggedInUser" }; + } + if (typeof input.gitHubToken === "string" && input.gitHubToken.length > 0) { + return { ok: true, resolvedMode: "gitHubToken" }; + } + if ( + typeof input.profileId === "string" && + input.profileId.length > 0 && + typeof input.profileVersion === "string" && + input.profileVersion.length > 0 + ) { + return { ok: true, resolvedMode: "profile" }; + } + return { + ok: false, + reason: "no-auth-source", + details: { + hint: "Set useLoggedInUser:true, or gitHubToken, or both profileId+profileVersion", + }, + }; +} + +function defaultCopilotHome(): string { + // Mirrors the SDK convention; auth-bridge uses the same default. + if (process.platform === "win32") { + return path.join(process.env.APPDATA ?? os.homedir(), "copilot"); + } + const xdg = process.env.XDG_CONFIG_HOME; + if (xdg && xdg.length > 0) { + return path.join(xdg, "copilot"); + } + return path.join(os.homedir(), ".config", "copilot"); +} + +function formatProbeError(error: unknown): string { + if (error instanceof Error) { + return error.message; + } + try { + return JSON.stringify(error); + } catch { + return String(error); + } +} diff --git a/extensions/copilot/src/dual-write-transcripts.test.ts b/extensions/copilot/src/dual-write-transcripts.test.ts new file mode 100755 index 000000000000..0878a12f5f63 --- /dev/null +++ b/extensions/copilot/src/dual-write-transcripts.test.ts @@ -0,0 +1,376 @@ +import { createHash } from "node:crypto"; +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import type { AgentMessage } from "openclaw/plugin-sdk/agent-harness-runtime"; +import { + initializeGlobalHookRunner, + resetGlobalHookRunner, +} from "openclaw/plugin-sdk/hook-runtime"; +import { createMockPluginRegistry } from "openclaw/plugin-sdk/plugin-test-runtime"; +import { + castAgentMessage, + makeAgentAssistantMessage, + makeAgentUserMessage, +} from "openclaw/plugin-sdk/test-fixtures"; +import { afterEach, describe, expect, it } from "vitest"; +import { + attachCopilotMirrorIdentity, + dualWriteCopilotTranscriptBestEffort, + mirrorCopilotTranscript, +} from "./dual-write-transcripts.js"; + +type MirroredAgentMessage = Extract; + +function expectedFingerprint(message: MirroredAgentMessage): string { + const payload = JSON.stringify({ role: message.role, content: message.content }); + return createHash("sha256").update(payload).digest("hex").slice(0, 16); +} + +const tempDirs: string[] = []; + +afterEach(async () => { + resetGlobalHookRunner(); + for (const dir of tempDirs.splice(0)) { + await fs.rm(dir, { recursive: true, force: true }); + } +}); + +async function createTempSessionFile() { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-copilot-mirror-")); + tempDirs.push(dir); + return path.join(dir, "session.jsonl"); +} + +async function makeRoot(prefix: string): Promise { + const root = await fs.mkdtemp(path.join(os.tmpdir(), prefix)); + tempDirs.push(root); + return root; +} + +function parseJsonLines(raw: string): T[] { + const records: T[] = []; + for (const line of raw.trim().split("\n")) { + if (line.length > 0) { + records.push(JSON.parse(line) as T); + } + } + return records; +} + +describe("mirrorCopilotTranscript", () => { + it("mirrors user, assistant, and tool result messages into the OpenClaw transcript", async () => { + const sessionFile = await createTempSessionFile(); + const userMessage = makeAgentUserMessage({ + content: [{ type: "text", text: "hello" }], + timestamp: Date.now(), + }); + const assistantMessage = makeAgentAssistantMessage({ + content: [{ type: "text", text: "hi there" }], + timestamp: Date.now() + 1, + }); + const toolResultMessage = castAgentMessage({ + role: "toolResult", + toolCallId: "call-1", + toolName: "read", + content: [ + { + type: "toolResult", + toolCallId: "call-1", + content: "read output", + }, + ], + timestamp: Date.now() + 2, + }) as MirroredAgentMessage; + + await mirrorCopilotTranscript({ + sessionFile, + sessionKey: "session-1", + messages: [userMessage, assistantMessage, toolResultMessage], + idempotencyScope: "copilot:session-1", + }); + + const raw = await fs.readFile(sessionFile, "utf8"); + expect(raw).toContain('"role":"user"'); + expect(raw).toContain('"role":"assistant"'); + expect(raw).toContain('"role":"toolResult"'); + expect(raw).toContain('"toolCallId":"call-1"'); + expect(raw).toContain( + `"idempotencyKey":"copilot:session-1:user:${expectedFingerprint(userMessage)}"`, + ); + expect(raw).toContain( + `"idempotencyKey":"copilot:session-1:assistant:${expectedFingerprint(assistantMessage)}"`, + ); + expect(raw).toContain( + `"idempotencyKey":"copilot:session-1:toolResult:${expectedFingerprint(toolResultMessage)}"`, + ); + }); + + it("creates the transcript directory on first mirror", async () => { + const root = await makeRoot("openclaw-copilot-mirror-missing-dir-"); + const sessionFile = path.join(root, "nested", "sessions", "session.jsonl"); + + await mirrorCopilotTranscript({ + sessionFile, + sessionKey: "session-1", + messages: [ + makeAgentAssistantMessage({ + content: [{ type: "text", text: "first mirror" }], + timestamp: Date.now(), + }), + ], + idempotencyScope: "copilot:session-1", + }); + + const raw = await fs.readFile(sessionFile, "utf8"); + expect(raw).toContain('"role":"assistant"'); + expect(raw).toContain('"content":[{"type":"text","text":"first mirror"}]'); + }); + + it("deduplicates re-emits by idempotency scope", async () => { + const sessionFile = await createTempSessionFile(); + const messages = [ + makeAgentUserMessage({ + content: [{ type: "text", text: "hello" }], + timestamp: Date.now(), + }), + makeAgentAssistantMessage({ + content: [{ type: "text", text: "hi there" }], + timestamp: Date.now() + 1, + }), + ] as const; + + await mirrorCopilotTranscript({ + sessionFile, + sessionKey: "session-1", + messages: [...messages], + idempotencyScope: "copilot:session-1", + }); + await mirrorCopilotTranscript({ + sessionFile, + sessionKey: "session-1", + messages: [...messages], + idempotencyScope: "copilot:session-1", + }); + + const records = parseJsonLines<{ type?: string; message?: { role?: string } }>( + await fs.readFile(sessionFile, "utf8"), + ); + // First "header" record may or may not appear depending on migration. + // What matters is that the second mirror call adds zero new messages. + const messageRecords = records.filter((r) => r.message?.role !== undefined); + expect(messageRecords).toHaveLength(2); + }); + + it("runs before_message_write before appending mirrored messages", async () => { + initializeGlobalHookRunner( + createMockPluginRegistry([ + { + hookName: "before_message_write", + handler: (event) => ({ + message: castAgentMessage({ + ...((event as { message: unknown }).message as Record), + content: [{ type: "text", text: "hello [hooked]" }], + }), + }), + }, + ]), + ); + const sessionFile = await createTempSessionFile(); + const sourceMessage = makeAgentAssistantMessage({ + content: [{ type: "text", text: "hello" }], + timestamp: Date.now(), + }); + + await mirrorCopilotTranscript({ + sessionFile, + sessionKey: "session-1", + messages: [sourceMessage], + idempotencyScope: "copilot:session-1", + }); + + const raw = await fs.readFile(sessionFile, "utf8"); + expect(raw).toContain('"content":[{"type":"text","text":"hello [hooked]"}]'); + expect(raw).toContain( + `"idempotencyKey":"copilot:session-1:assistant:${expectedFingerprint(sourceMessage)}"`, + ); + }); + + it("respects before_message_write blocking decisions", async () => { + initializeGlobalHookRunner( + createMockPluginRegistry([ + { + hookName: "before_message_write", + handler: () => ({ block: true }), + }, + ]), + ); + const sessionFile = await createTempSessionFile(); + + await mirrorCopilotTranscript({ + sessionFile, + sessionKey: "session-1", + messages: [ + makeAgentAssistantMessage({ + content: [{ type: "text", text: "should not persist" }], + timestamp: Date.now(), + }), + ], + idempotencyScope: "copilot:session-1", + }); + + await expect(fs.readFile(sessionFile, "utf8")).rejects.toHaveProperty("code", "ENOENT"); + }); + + it("is a no-op when no mirrorable messages are present", async () => { + const sessionFile = await createTempSessionFile(); + + await mirrorCopilotTranscript({ + sessionFile, + sessionKey: "session-1", + messages: [], + idempotencyScope: "copilot:session-1", + }); + + await expect(fs.readFile(sessionFile, "utf8")).rejects.toHaveProperty("code", "ENOENT"); + }); + + it("uses content fingerprint when no explicit mirror identity is attached", async () => { + const sessionFile = await createTempSessionFile(); + const message = makeAgentAssistantMessage({ + content: [{ type: "text", text: "fp" }], + timestamp: Date.now(), + }); + + await mirrorCopilotTranscript({ + sessionFile, + messages: [message], + idempotencyScope: "scope-fp", + }); + + const raw = await fs.readFile(sessionFile, "utf8"); + expect(raw).toContain(`"idempotencyKey":"scope-fp:assistant:${expectedFingerprint(message)}"`); + }); + + it("uses attached identity instead of content fingerprint when provided", async () => { + const sessionFile = await createTempSessionFile(); + const baseMessage = makeAgentAssistantMessage({ + content: [{ type: "text", text: "explicit" }], + timestamp: Date.now(), + }); + const tagged = attachCopilotMirrorIdentity(baseMessage, "sdk-session-1:assistant:0"); + + await mirrorCopilotTranscript({ + sessionFile, + messages: [tagged], + idempotencyScope: "copilot:openclaw-session-1", + }); + + const raw = await fs.readFile(sessionFile, "utf8"); + expect(raw).toContain( + '"idempotencyKey":"copilot:openclaw-session-1:sdk-session-1:assistant:0"', + ); + expect(raw).not.toContain(expectedFingerprint(baseMessage)); + }); + + it("omits idempotencyKey when no idempotencyScope is provided", async () => { + const sessionFile = await createTempSessionFile(); + + await mirrorCopilotTranscript({ + sessionFile, + messages: [ + makeAgentAssistantMessage({ + content: [{ type: "text", text: "no scope" }], + timestamp: Date.now(), + }), + ], + }); + + const raw = await fs.readFile(sessionFile, "utf8"); + expect(raw).toContain('"content":[{"type":"text","text":"no scope"}]'); + expect(raw).not.toContain("idempotencyKey"); + }); + + it("filters out non-mirrorable roles", async () => { + const sessionFile = await createTempSessionFile(); + const userMessage = makeAgentUserMessage({ + content: [{ type: "text", text: "u" }], + timestamp: Date.now(), + }); + const systemLike = castAgentMessage({ + role: "system" as never, + content: [{ type: "text", text: "system note" }], + timestamp: Date.now() + 1, + }); + + await mirrorCopilotTranscript({ + sessionFile, + messages: [userMessage, systemLike], + idempotencyScope: "scope", + }); + + const raw = await fs.readFile(sessionFile, "utf8"); + expect(raw).toContain('"role":"user"'); + expect(raw).not.toContain("system note"); + }); + + it("preserves explicit identity across attachCopilotMirrorIdentity overrides", async () => { + const sessionFile = await createTempSessionFile(); + const base = makeAgentAssistantMessage({ + content: [{ type: "text", text: "x" }], + timestamp: Date.now(), + }); + const first = attachCopilotMirrorIdentity(base, "id-1"); + const second = attachCopilotMirrorIdentity(first, "id-2"); + + await mirrorCopilotTranscript({ + sessionFile, + messages: [second], + idempotencyScope: "scope", + }); + + const raw = await fs.readFile(sessionFile, "utf8"); + expect(raw).toContain('"idempotencyKey":"scope:id-2"'); + expect(raw).not.toContain('"idempotencyKey":"scope:id-1"'); + }); +}); + +describe("dualWriteCopilotTranscriptBestEffort", () => { + it("returns normally when mirror succeeds", async () => { + const sessionFile = await createTempSessionFile(); + await expect( + dualWriteCopilotTranscriptBestEffort({ + sessionFile, + messages: [ + makeAgentAssistantMessage({ + content: [{ type: "text", text: "ok" }], + timestamp: Date.now(), + }), + ], + idempotencyScope: "scope", + }), + ).resolves.toBeUndefined(); + const raw = await fs.readFile(sessionFile, "utf8"); + expect(raw).toContain('"role":"assistant"'); + }); + + it("swallows infrastructure failures and never rejects", async () => { + // Pointing sessionFile at a path under a non-existent root with an + // empty-string segment can fail differently on different platforms; + // instead force failure by passing an invalid type and asserting + // that the wrapper itself does not reject. Use any-cast for the + // bad input shape since we are testing the wrapper's catch. + await expect( + dualWriteCopilotTranscriptBestEffort({ + sessionFile: "" as unknown as string, + messages: [ + makeAgentAssistantMessage({ + content: [{ type: "text", text: "should-not-throw" }], + timestamp: Date.now(), + }), + ], + idempotencyScope: "scope", + }), + ).resolves.toBeUndefined(); + }); +}); diff --git a/extensions/copilot/src/dual-write-transcripts.ts b/extensions/copilot/src/dual-write-transcripts.ts new file mode 100755 index 000000000000..7c9d3e4957c1 --- /dev/null +++ b/extensions/copilot/src/dual-write-transcripts.ts @@ -0,0 +1,220 @@ +/** + * Mirrors the AgentMessages produced by the copilot agent runtime into the + * OpenClaw audit transcript that sits next to (but is distinct from) the + * SDK's own session storage. + * + * The OpenClaw shell (src/agents/command/attempt-execution.ts) already + * writes the user prompt and the terminal assistant text into the + * transcript at the end of each attempt. That is the bare minimum to + * keep `/history` working. It does NOT capture tool calls, tool + * results, or intermediate assistant turns — those live only in the + * SDK's own session file. + * + * For audit/compliance and for the codex-parity guarantees we promised + * in the proposal, we mirror the full `messagesSnapshot` (user + + * assistant + toolResult) into the OpenClaw transcript via the same + * plugin-sdk primitives that the codex extension uses + * (extensions/codex/src/app-server/transcript-mirror.ts). Both writers + * cooperate via idempotency-key dedupe: each mirrored entry carries a + * stable `${idempotencyScope}:${identity}` key, and we skip any key + * already present in the transcript on disk before appending. Both + * attempt-execution's untagged entries (no idempotencyKey) and our + * tagged mirror entries can coexist; attempt-execution dedupes its own + * final-assistant append via `embeddedAssistantGapFill` content match. + * + * Failures (lock contention, fs errors, etc.) are swallowed by the + * caller-side `dualWriteCopilotTranscriptBestEffort` wrapper used + * in attempt.ts so they cannot break the attempt; this module itself + * throws on infrastructure failure so callers can choose policy. + */ + +import { createHash } from "node:crypto"; +import fs from "node:fs/promises"; +import { + acquireSessionWriteLock, + appendSessionTranscriptMessage, + emitSessionTranscriptUpdate, + resolveSessionWriteLockAcquireTimeoutMs, + runAgentHarnessBeforeMessageWriteHook, + type AgentMessage, + type SessionWriteLockAcquireTimeoutConfig, +} from "openclaw/plugin-sdk/agent-harness-runtime"; + +type MirroredAgentMessage = Extract; + +const MIRROR_IDENTITY_META_KEY = "mirrorIdentity" as const; + +/** + * Tag a message with a stable logical identity for mirror dedupe. + * Callers should use a value that is invariant for the same logical + * message across re-emits (e.g. `${sdkSessionId}:assistant:${turnIndex}`) + * but distinct for genuinely-distinct messages. When present this + * identity replaces the role/content fingerprint in the idempotency + * key, so the dedupe survives caller-scope rotation without collapsing + * distinct same-content turns. Symmetric to + * `attachCodexMirrorIdentity` in the codex extension. + */ +export function attachCopilotMirrorIdentity( + message: T, + identity: string, +): T { + const record = message as unknown as Record; + const existing = record["__openclaw"]; + const baseMeta = + existing && typeof existing === "object" && !Array.isArray(existing) + ? (existing as Record) + : {}; + return { + ...record, + __openclaw: { ...baseMeta, [MIRROR_IDENTITY_META_KEY]: identity }, + } as unknown as T; +} + +function readMirrorIdentity(message: MirroredAgentMessage): string | undefined { + const record = message as unknown as { __openclaw?: unknown }; + const meta = record["__openclaw"]; + if (!meta || typeof meta !== "object" || Array.isArray(meta)) { + return undefined; + } + const id = (meta as Record)[MIRROR_IDENTITY_META_KEY]; + return typeof id === "string" && id.length > 0 ? id : undefined; +} + +function fingerprintMirrorMessageContent(message: MirroredAgentMessage): string { + const payload = JSON.stringify({ role: message.role, content: message.content }); + return createHash("sha256").update(payload).digest("hex").slice(0, 16); +} + +function buildMirrorDedupeIdentity(message: MirroredAgentMessage): string { + const explicit = readMirrorIdentity(message); + if (explicit) { + return explicit; + } + return `${message.role}:${fingerprintMirrorMessageContent(message)}`; +} + +export interface MirrorCopilotTranscriptParams { + sessionFile: string; + sessionKey?: string; + agentId?: string; + messages: AgentMessage[]; + /** + * Stable per-harness/per-thread scope. The codex equivalent uses + * `codex-app-server:${threadId}`; we use `copilot:${sessionId}` + * by convention (see attempt.ts call site). Keeping the scope + * thread-stable (not per-turn) is what lets a re-emitted prior-turn + * entry collide with its existing on-disk key and be a true no-op. + */ + idempotencyScope?: string; + config?: SessionWriteLockAcquireTimeoutConfig; +} + +export async function mirrorCopilotTranscript( + params: MirrorCopilotTranscriptParams, +): Promise { + const messages = params.messages.filter( + (message): message is MirroredAgentMessage => + message.role === "user" || message.role === "assistant" || message.role === "toolResult", + ); + if (messages.length === 0) { + return; + } + + const lock = await acquireSessionWriteLock({ + sessionFile: params.sessionFile, + timeoutMs: resolveSessionWriteLockAcquireTimeoutMs(params.config), + }); + try { + const existingIdempotencyKeys = await readTranscriptIdempotencyKeys(params.sessionFile); + for (const message of messages) { + const dedupeIdentity = buildMirrorDedupeIdentity(message); + const idempotencyKey = params.idempotencyScope + ? `${params.idempotencyScope}:${dedupeIdentity}` + : undefined; + if (idempotencyKey && existingIdempotencyKeys.has(idempotencyKey)) { + continue; + } + const transcriptMessage = { + ...message, + ...(idempotencyKey ? { idempotencyKey } : {}), + } as AgentMessage; + const nextMessage = runAgentHarnessBeforeMessageWriteHook({ + message: transcriptMessage, + agentId: params.agentId, + sessionKey: params.sessionKey, + }); + if (!nextMessage) { + continue; + } + const messageToAppend = ( + idempotencyKey + ? { + ...(nextMessage as unknown as Record), + idempotencyKey, + } + : nextMessage + ) as AgentMessage; + await appendSessionTranscriptMessage({ + transcriptPath: params.sessionFile, + message: messageToAppend, + config: params.config, + }); + if (idempotencyKey) { + existingIdempotencyKeys.add(idempotencyKey); + } + } + } finally { + await lock.release(); + } + + if (params.sessionKey) { + emitSessionTranscriptUpdate({ sessionFile: params.sessionFile, sessionKey: params.sessionKey }); + } else { + emitSessionTranscriptUpdate(params.sessionFile); + } +} + +async function readTranscriptIdempotencyKeys(sessionFile: string): Promise> { + const keys = new Set(); + let raw: string; + try { + raw = await fs.readFile(sessionFile, "utf8"); + } catch (error) { + if ((error as NodeJS.ErrnoException).code !== "ENOENT") { + throw error; + } + return keys; + } + for (const line of raw.split(/\r?\n/)) { + if (!line.trim()) { + continue; + } + try { + const parsed = JSON.parse(line) as { message?: { idempotencyKey?: unknown } }; + if (typeof parsed.message?.idempotencyKey === "string") { + keys.add(parsed.message.idempotencyKey); + } + } catch { + continue; + } + } + return keys; +} + +/** + * Caller-side wrapper that swallows mirror failures. attempt.ts uses + * this so that a transient transcript-mirror failure (lock contention, + * disk full, etc.) never breaks an otherwise-successful attempt. The + * SDK's own session file remains the source of truth in that case; + * the OpenClaw audit trail just misses the intermediate messages for + * this turn. + */ +export async function dualWriteCopilotTranscriptBestEffort( + params: MirrorCopilotTranscriptParams, +): Promise { + try { + await mirrorCopilotTranscript(params); + } catch (error) { + console.warn("[copilot-attempt] dual-write transcript mirror failed", error); + } +} diff --git a/extensions/copilot/src/event-bridge.test.ts b/extensions/copilot/src/event-bridge.test.ts new file mode 100644 index 000000000000..d84b56993701 --- /dev/null +++ b/extensions/copilot/src/event-bridge.test.ts @@ -0,0 +1,828 @@ +import type { SessionEvent } from "@github/copilot-sdk"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import { attachEventBridge, type SessionLike } from "./event-bridge.js"; + +const MODEL_REF = { + api: "openai-responses", + id: "gpt-5", + provider: "github-copilot", +} as const; +const REGISTERED_EVENT_TYPES = [ + "assistant.message_delta", + "assistant.reasoning_delta", + "assistant.message", + "assistant.usage", + "tool.execution_start", + "tool.execution_complete", + "session.error", + "abort", +] as const; + +type FakeSession = SessionLike & { + emit: (eventType: string, event: SessionEvent) => void; + listenerCount: (eventType: string) => number; +}; + +function createDeferred() { + let rejectPromise: ((reason?: unknown) => void) | undefined; + let resolvePromise: ((value: T | PromiseLike) => void) | undefined; + const promise = new Promise((resolve, reject) => { + resolvePromise = resolve; + rejectPromise = reject; + }); + return { + promise, + reject(reason?: unknown) { + rejectPromise?.(reason); + }, + resolve(value: T) { + resolvePromise?.(value); + }, + }; +} + +function flushAsync() { + // oxlint-disable-next-line unicorn/no-useless-promise-resolve-reject -- the inner Promise.resolve() forces an additional microtask tick so delta-chain ordering can be observed deterministically in tests. + return Promise.resolve().then(() => Promise.resolve()); +} + +function makeEvent(type: string, data: Record): SessionEvent { + return { + data, + id: `${type}-id`, + parentId: null, + timestamp: "2024-01-01T00:00:00.000Z", + type, + } as SessionEvent; +} + +function makeAssistantMessageEvent( + content = "assistant text", + overrides: Record = {}, +): SessionEvent { + return makeEvent("assistant.message", { + content, + messageId: "msg-1", + model: "gpt-5", + ...overrides, + }); +} + +function createFakeSession( + options: { + onOff?: (eventType: string) => void; + onReturnedUnsubscribe?: (eventType: string) => void; + returnUnsubscribe?: boolean; + } = {}, +): FakeSession { + const listeners = new Map void>>(); + const returnUnsubscribe = options.returnUnsubscribe !== false; + + const off = vi.fn((eventType: string, handler: (event: SessionEvent) => void) => { + options.onOff?.(eventType); + listeners.set( + eventType, + (listeners.get(eventType) ?? []).filter((existing) => existing !== handler), + ); + }); + + const on = vi.fn((eventType: string, handler: (event: SessionEvent) => void) => { + listeners.set(eventType, [...(listeners.get(eventType) ?? []), handler]); + if (!returnUnsubscribe) { + return undefined; + } + return () => { + options.onReturnedUnsubscribe?.(eventType); + off(eventType, handler); + }; + }); + + return { + abort: vi.fn().mockResolvedValue(undefined), + disconnect: vi.fn().mockResolvedValue(undefined), + emit(eventType: string, event: SessionEvent) { + for (const handler of listeners.get(eventType) ?? []) { + handler(event); + } + }, + id: "session-id", + listenerCount(eventType: string) { + return listeners.get(eventType)?.length ?? 0; + }, + off, + on, + sendAndWait: vi.fn().mockResolvedValue(undefined), + sessionId: "sdk-session-id", + }; +} + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe("attachEventBridge", () => { + it("assistant.message_delta accumulates text per messageId in arrival order", () => { + const session = createFakeSession(); + const bridge = attachEventBridge(session, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + }); + + session.emit( + "assistant.message_delta", + makeEvent("assistant.message_delta", { deltaContent: "he", messageId: "msg-1" }), + ); + session.emit( + "assistant.message_delta", + makeEvent("assistant.message_delta", { deltaContent: "llo", messageId: "msg-1" }), + ); + + expect(bridge.snapshot().assistantTexts).toEqual(["hello"]); + }); + + it("interleaved messageIds produce two ordered assistantTexts entries", () => { + const session = createFakeSession(); + const bridge = attachEventBridge(session, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + }); + + session.emit( + "assistant.message_delta", + makeEvent("assistant.message_delta", { deltaContent: "a", messageId: "msg-1" }), + ); + session.emit( + "assistant.message_delta", + makeEvent("assistant.message_delta", { deltaContent: "x", messageId: "msg-2" }), + ); + session.emit( + "assistant.message_delta", + makeEvent("assistant.message_delta", { deltaContent: "b", messageId: "msg-1" }), + ); + + expect(bridge.snapshot().assistantTexts).toEqual(["ab", "x"]); + }); + + it("onAssistantDelta receives appended text, live sessionId, and current usage", async () => { + const session = createFakeSession(); + let sdkSessionId = "sdk-session-1"; + const onAssistantDelta = vi.fn().mockResolvedValue(undefined); + const bridge = attachEventBridge(session, { + getSdkSessionId: () => sdkSessionId, + isAborted: () => false, + onAssistantDelta, + }); + + session.emit( + "assistant.usage", + makeEvent("assistant.usage", { + cacheReadTokens: 1, + cacheWriteTokens: 2, + inputTokens: 3, + outputTokens: 4, + }), + ); + sdkSessionId = "sdk-session-2"; + session.emit( + "assistant.message_delta", + makeEvent("assistant.message_delta", { deltaContent: "hi", messageId: "msg-1" }), + ); + + await bridge.awaitDeltaChain(); + + expect(onAssistantDelta).toHaveBeenCalledTimes(1); + expect(onAssistantDelta).toHaveBeenCalledWith({ + delta: "hi", + sessionId: "sdk-session-2", + text: "hi", + usage: { + cacheRead: 1, + cacheWrite: 2, + input: 3, + output: 4, + total: 10, + }, + }); + }); + + it("onAssistantDelta callbacks are serialized and awaitDeltaChain resolves after both", async () => { + const session = createFakeSession(); + const order: string[] = []; + const releases: Array<() => void> = []; + const bridge = attachEventBridge(session, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + onAssistantDelta: vi.fn(async (payload: { delta: string }) => { + order.push(`start:${payload.delta}`); + await new Promise((resolve) => { + releases.push(() => { + order.push(`end:${payload.delta}`); + resolve(); + }); + }); + }), + }); + + session.emit( + "assistant.message_delta", + makeEvent("assistant.message_delta", { deltaContent: "a", messageId: "msg-1" }), + ); + session.emit( + "assistant.message_delta", + makeEvent("assistant.message_delta", { deltaContent: "b", messageId: "msg-1" }), + ); + await flushAsync(); + + expect(order).toEqual(["start:a"]); + releases[0]?.(); + await flushAsync(); + expect(order).toEqual(["start:a", "end:a", "start:b"]); + releases[1]?.(); + + await expect(bridge.awaitDeltaChain()).resolves.toBeUndefined(); + expect(order).toEqual(["start:a", "end:a", "start:b", "end:b"]); + }); + + it("onAssistantDelta rejection propagates through awaitDeltaChain while later deltas still serialize", async () => { + const session = createFakeSession(); + const order: string[] = []; + const firstError = new Error("delta failed"); + const secondDeferred = createDeferred(); + const bridge = attachEventBridge(session, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + onAssistantDelta: vi.fn((payload: { delta: string }) => { + order.push(`start:${payload.delta}`); + if (payload.delta === "a") { + return Promise.reject(firstError); + } + return secondDeferred.promise.then(() => { + order.push(`end:${payload.delta}`); + }); + }), + }); + + session.emit( + "assistant.message_delta", + makeEvent("assistant.message_delta", { deltaContent: "a", messageId: "msg-1" }), + ); + session.emit( + "assistant.message_delta", + makeEvent("assistant.message_delta", { deltaContent: "b", messageId: "msg-1" }), + ); + await flushAsync(); + await flushAsync(); + + expect(order).toEqual(["start:a", "start:b"]); + secondDeferred.resolve(undefined); + + await expect(bridge.awaitDeltaChain()).rejects.toBe(firstError); + expect(order).toEqual(["start:a", "start:b", "end:b"]); + }); + + it("assistant.reasoning_delta accumulates reasoning in arrival order for buildAssistantMessage", () => { + const session = createFakeSession(); + const bridge = attachEventBridge(session, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + }); + + session.emit( + "assistant.reasoning_delta", + makeEvent("assistant.reasoning_delta", { deltaContent: "thin", reasoningId: "reason-1" }), + ); + session.emit( + "assistant.reasoning_delta", + makeEvent("assistant.reasoning_delta", { deltaContent: "king", reasoningId: "reason-1" }), + ); + bridge.recordSendResult(makeAssistantMessageEvent("done")); + + expect(bridge.buildAssistantMessage({ modelRef: MODEL_REF, now: () => 7 })?.content).toEqual([ + { thinking: "thinking", type: "thinking" }, + { text: "done", type: "text" }, + ]); + }); + + it("buildAssistantMessage prefers terminal reasoningText over reasoning deltas", () => { + const session = createFakeSession(); + const bridge = attachEventBridge(session, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + }); + + session.emit( + "assistant.reasoning_delta", + makeEvent("assistant.reasoning_delta", { deltaContent: "older", reasoningId: "reason-1" }), + ); + bridge.recordSendResult( + makeAssistantMessageEvent("done", { + reasoningText: "terminal reasoning", + }), + ); + + expect(bridge.buildAssistantMessage({ modelRef: MODEL_REF, now: () => 8 })?.content).toEqual([ + { thinking: "terminal reasoning", type: "thinking" }, + { text: "done", type: "text" }, + ]); + }); + + it("assistant.message only overwrites accumulated text when content is at least as long", () => { + const shorterSession = createFakeSession(); + const shorterBridge = attachEventBridge(shorterSession, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + }); + shorterSession.emit( + "assistant.message_delta", + makeEvent("assistant.message_delta", { deltaContent: "longer", messageId: "msg-1" }), + ); + shorterSession.emit( + "assistant.message", + makeAssistantMessageEvent("short", { messageId: "msg-1" }), + ); + + const longerSession = createFakeSession(); + const longerBridge = attachEventBridge(longerSession, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + }); + longerSession.emit( + "assistant.message_delta", + makeEvent("assistant.message_delta", { deltaContent: "tiny", messageId: "msg-1" }), + ); + longerSession.emit( + "assistant.message", + makeAssistantMessageEvent("longer text", { messageId: "msg-1" }), + ); + + expect(shorterBridge.finalizeAssistantTexts()).toEqual(["longer"]); + expect(longerBridge.finalizeAssistantTexts()).toEqual(["longer text"]); + }); + + it("assistant.message with toolRequests produces toolCall content and toolUse stopReason", () => { + const session = createFakeSession(); + const bridge = attachEventBridge(session, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + }); + + bridge.recordSendResult( + makeAssistantMessageEvent("call tool", { + outputTokens: 7, + toolRequests: [ + { + arguments: { path: "README.md" }, + name: "read_file", + toolCallId: "call-1", + }, + ], + }), + ); + + expect(bridge.buildAssistantMessage({ modelRef: MODEL_REF, now: () => 9 })).toEqual({ + api: "openai-responses", + content: [ + { text: "call tool", type: "text" }, + { + arguments: { path: "README.md" }, + id: "call-1", + name: "read_file", + type: "toolCall", + }, + ], + model: "gpt-5", + provider: "github-copilot", + role: "assistant", + stopReason: "toolUse", + timestamp: 9, + usage: { + cacheRead: 0, + cacheWrite: 0, + cost: { + cacheRead: 0, + cacheWrite: 0, + input: 0, + output: 0, + total: 0, + }, + input: 0, + output: 7, + totalTokens: 7, + }, + }); + }); + + it("assistant.usage updates internal usage and the next onAssistantDelta payload reads it", async () => { + const session = createFakeSession(); + const onAssistantDelta = vi.fn().mockResolvedValue(undefined); + const bridge = attachEventBridge(session, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + onAssistantDelta, + }); + + session.emit( + "assistant.usage", + makeEvent("assistant.usage", { + cacheReadTokens: -2, + cacheWriteTokens: Number.NaN, + inputTokens: 4.9, + outputTokens: 5.1, + }), + ); + session.emit( + "assistant.message_delta", + makeEvent("assistant.message_delta", { deltaContent: "x", messageId: "msg-1" }), + ); + + await bridge.awaitDeltaChain(); + + expect(onAssistantDelta).toHaveBeenCalledWith({ + delta: "x", + sessionId: "sdk-session-id", + text: "x", + usage: { + cacheRead: 0, + cacheWrite: undefined, + input: 4, + output: 5, + total: 9, + }, + }); + }); + + it("preserves all-zero usage snapshot after an invalid assistant.usage event", () => { + const session = createFakeSession(); + const bridge = attachEventBridge(session, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + }); + + bridge.recordSendResult(makeAssistantMessageEvent("done", { outputTokens: 7 })); + session.emit( + "assistant.usage", + makeEvent("assistant.usage", { + cacheReadTokens: "bad", + cacheWriteTokens: Number.POSITIVE_INFINITY, + inputTokens: undefined, + outputTokens: Number.NaN, + }), + ); + + expect(bridge.snapshot().usage).toEqual({ + cacheRead: undefined, + cacheWrite: undefined, + input: undefined, + output: undefined, + total: 0, + }); + expect(bridge.buildAssistantMessage({ modelRef: MODEL_REF, now: () => 9.5 })?.usage).toEqual({ + cacheRead: 0, + cacheWrite: 0, + cost: { + cacheRead: 0, + cacheWrite: 0, + input: 0, + output: 0, + total: 0, + }, + input: 0, + output: 0, + totalTokens: 0, + }); + }); + + it("overwrites prior usage with an all-zero snapshot when a later invalid usage event arrives", () => { + const session = createFakeSession(); + const bridge = attachEventBridge(session, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + }); + + session.emit( + "assistant.usage", + makeEvent("assistant.usage", { + inputTokens: 5, + }), + ); + session.emit( + "assistant.usage", + makeEvent("assistant.usage", { + inputTokens: "bad", + }), + ); + + expect(bridge.snapshot().usage).toEqual({ + cacheRead: undefined, + cacheWrite: undefined, + input: undefined, + output: undefined, + total: 0, + }); + }); + + it("tool.execution_start increments startedCount and pushes toolMetas without meta", () => { + const session = createFakeSession(); + const bridge = attachEventBridge(session, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + }); + + session.emit( + "tool.execution_start", + makeEvent("tool.execution_start", { toolCallId: "call-1", toolName: "bash" }), + ); + + expect(bridge.snapshot()).toEqual({ + assistantTexts: [], + completedCount: 0, + lastAssistantEvent: undefined, + startedCount: 1, + streamError: undefined, + toolMetas: [{ toolName: "bash" }], + usage: undefined, + }); + }); + + it("tool.execution_complete uses detailedContent or content on success and error.message on failure", () => { + const session = createFakeSession(); + const bridge = attachEventBridge(session, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + }); + + session.emit( + "tool.execution_start", + makeEvent("tool.execution_start", { toolCallId: "call-1", toolName: "bash" }), + ); + session.emit( + "tool.execution_complete", + makeEvent("tool.execution_complete", { + result: { content: "content", detailedContent: "details" }, + success: true, + toolCallId: "call-1", + }), + ); + session.emit( + "tool.execution_start", + makeEvent("tool.execution_start", { toolCallId: "call-2", toolName: "read" }), + ); + session.emit( + "tool.execution_complete", + makeEvent("tool.execution_complete", { + error: { message: "failed" }, + success: false, + toolCallId: "call-2", + }), + ); + + expect(bridge.snapshot().toolMetas).toEqual([ + { toolName: "bash" }, + { meta: "details", toolName: "bash" }, + { toolName: "read" }, + { meta: "failed", toolName: "read" }, + ]); + }); + + it("tool.execution_complete without a matching start increments completedCount without pushing meta", () => { + const session = createFakeSession(); + const bridge = attachEventBridge(session, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + }); + + session.emit( + "tool.execution_complete", + makeEvent("tool.execution_complete", { + result: { content: "done" }, + success: true, + toolCallId: "missing", + }), + ); + + expect(bridge.snapshot().completedCount).toBe(1); + expect(bridge.snapshot().toolMetas).toEqual([]); + }); + + it("session.error populates streamError with errorCode or errorType only when not aborted", () => { + const activeSession = createFakeSession(); + const activeBridge = attachEventBridge(activeSession, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + }); + activeSession.emit( + "session.error", + makeEvent("session.error", { + errorCode: "boom_code", + errorType: "boom_type", + message: "boom", + }), + ); + + const abortedSession = createFakeSession(); + const abortedBridge = attachEventBridge(abortedSession, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => true, + }); + abortedSession.emit( + "session.error", + makeEvent("session.error", { + errorType: "ignored", + message: "ignored", + }), + ); + + expect((activeBridge.snapshot().streamError as Error & { code?: string })?.code).toBe( + "boom_code", + ); + expect(activeBridge.snapshot().streamError?.message).toBe("boom"); + expect(abortedBridge.snapshot().streamError).toBeUndefined(); + }); + + it("abort populates streamError with session_aborted only when not aborted", () => { + const activeSession = createFakeSession(); + const activeBridge = attachEventBridge(activeSession, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + }); + activeSession.emit("abort", makeEvent("abort", { reason: "because" })); + + const abortedSession = createFakeSession(); + const abortedBridge = attachEventBridge(abortedSession, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => true, + }); + abortedSession.emit("abort", makeEvent("abort", { reason: "ignored" })); + + expect((activeBridge.snapshot().streamError as Error & { code?: string })?.code).toBe( + "session_aborted", + ); + expect(activeBridge.snapshot().streamError?.message).toBe( + "[copilot-attempt] session aborted: because", + ); + expect(abortedBridge.snapshot().streamError).toBeUndefined(); + }); + + it("recordSendResult returns false for undefined and true for assistant.message while updating lastAssistantEvent", () => { + const session = createFakeSession(); + const bridge = attachEventBridge(session, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + }); + + expect(bridge.recordSendResult(undefined)).toBe(false); + const event = makeAssistantMessageEvent("done", { outputTokens: 2 }); + expect(bridge.recordSendResult(event)).toBe(true); + expect(bridge.snapshot().lastAssistantEvent).toEqual(event); + expect(bridge.buildAssistantMessage({ modelRef: MODEL_REF, now: () => 11 })?.content).toEqual([ + { text: "done", type: "text" }, + ]); + }); + + it("recordSendResult falls back to terminal content when no deltas arrived", () => { + const session = createFakeSession(); + const bridge = attachEventBridge(session, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + }); + + bridge.recordSendResult(makeAssistantMessageEvent("done")); + + expect(bridge.finalizeAssistantTexts()).toEqual(["done"]); + }); + + it("ignores empty assistant and reasoning deltas", () => { + const onAssistantDelta = vi.fn(); + const session = createFakeSession(); + const bridge = attachEventBridge(session, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + onAssistantDelta, + }); + + session.emit( + "assistant.message_delta", + makeEvent("assistant.message_delta", { deltaContent: "", messageId: "msg-1" }), + ); + session.emit( + "assistant.reasoning_delta", + makeEvent("assistant.reasoning_delta", { deltaContent: "", reasoningId: "reason-1" }), + ); + session.emit("assistant.message", makeAssistantMessageEvent("", { messageId: "msg-1" })); + + expect(onAssistantDelta).not.toHaveBeenCalled(); + expect(bridge.finalizeAssistantTexts()).toEqual([]); + expect(bridge.buildAssistantMessage({ modelRef: MODEL_REF, now: () => 13 })).toBeUndefined(); + }); + + it("detach is idempotent after the first unsubscribe pass", () => { + const order: string[] = []; + const session = createFakeSession({ + onReturnedUnsubscribe: (eventType) => { + order.push(eventType); + }, + }); + const bridge = attachEventBridge(session, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + }); + + bridge.detach(); + bridge.detach(); + + expect(order).toEqual([...REGISTERED_EVENT_TYPES].toReversed()); + expect(session.off).toHaveBeenCalledTimes(REGISTERED_EVENT_TYPES.length); + }); + + it("detach unsubscribes in reverse order when session.on returns unsubscribe functions", () => { + const order: string[] = []; + const session = createFakeSession({ + onReturnedUnsubscribe: (eventType) => { + order.push(eventType); + }, + }); + const bridge = attachEventBridge(session, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + }); + + bridge.detach(); + session.emit( + "assistant.message_delta", + makeEvent("assistant.message_delta", { deltaContent: "ignored", messageId: "msg-1" }), + ); + + expect(order).toEqual([...REGISTERED_EVENT_TYPES].toReversed()); + expect(session.listenerCount("assistant.message_delta")).toBe(0); + }); + + it("detach unsubscribes in reverse order via off() fallback", () => { + const order: string[] = []; + const session = createFakeSession({ + onOff: (eventType) => { + order.push(eventType); + }, + returnUnsubscribe: false, + }); + const bridge = attachEventBridge(session, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + }); + + bridge.detach(); + session.emit( + "assistant.message_delta", + makeEvent("assistant.message_delta", { deltaContent: "ignored", messageId: "msg-1" }), + ); + + expect(order).toEqual([...REGISTERED_EVENT_TYPES].toReversed()); + expect(session.listenerCount("assistant.message_delta")).toBe(0); + }); + + it("buildAssistantMessage returns undefined with no event, text, reasoning, or toolRequests", () => { + const session = createFakeSession(); + const bridge = attachEventBridge(session, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + }); + + expect(bridge.buildAssistantMessage({ modelRef: MODEL_REF, now: () => 12 })).toBeUndefined(); + }); + + it("snapshot returns defensive copies for arrays and usage objects", () => { + const session = createFakeSession(); + const bridge = attachEventBridge(session, { + getSdkSessionId: () => "sdk-session-id", + isAborted: () => false, + }); + + session.emit( + "assistant.message_delta", + makeEvent("assistant.message_delta", { deltaContent: "hello", messageId: "msg-1" }), + ); + session.emit( + "assistant.usage", + makeEvent("assistant.usage", { inputTokens: 1, outputTokens: 2 }), + ); + session.emit( + "tool.execution_start", + makeEvent("tool.execution_start", { toolCallId: "call-1", toolName: "bash" }), + ); + + const first = bridge.snapshot(); + (first.assistantTexts as string[]).push("mutated"); + (first.toolMetas as Array<{ meta?: string; toolName: string }>)[0].toolName = "mutated"; + (first.usage as { input?: number }).input = 999; + + const second = bridge.snapshot(); + expect(second.assistantTexts).toEqual(["hello"]); + expect(second.toolMetas).toEqual([{ toolName: "bash" }]); + expect(second.usage).toEqual({ + cacheRead: undefined, + cacheWrite: undefined, + input: 1, + output: 2, + total: 3, + }); + }); +}); diff --git a/extensions/copilot/src/event-bridge.ts b/extensions/copilot/src/event-bridge.ts new file mode 100644 index 000000000000..b9406e38873d --- /dev/null +++ b/extensions/copilot/src/event-bridge.ts @@ -0,0 +1,356 @@ +import type { MessageOptions, SessionEvent, SessionEventType } from "@github/copilot-sdk"; +import type { AgentMessage } from "openclaw/plugin-sdk/agent-harness-runtime"; +import { + buildCopilotAssistantUsage, + normalizeCopilotUsage, + type CopilotUsageSnapshot, +} from "./usage-bridge.js"; + +export type AssistantMessage = Extract; + +export type AssistantUsageSnapshot = CopilotUsageSnapshot; + +export interface OnAssistantDeltaPayload { + delta: string; + sessionId?: string; + text: string; + usage?: AssistantUsageSnapshot; +} + +export interface SessionLike { + abort(): Promise; + disconnect(): Promise; + id?: string; + off?: (eventType: string, handler: (...args: unknown[]) => void) => void; + on: { + ( + eventType: K, + handler: (event: Extract) => void, + ): (() => void) | void; + (eventType: string, handler: (event: SessionEvent) => void): (() => void) | void; + }; + sendAndWait(options: MessageOptions, timeout?: number): Promise; + sessionId?: string; +} + +export interface EventBridgeOptions { + onAssistantDelta?: (payload: OnAssistantDeltaPayload) => void | Promise; + getSdkSessionId: () => string | undefined; + isAborted: () => boolean; +} + +export interface EventBridgeSnapshot { + readonly assistantTexts: readonly string[]; + readonly completedCount: number; + readonly lastAssistantEvent: Extract | undefined; + readonly startedCount: number; + readonly streamError: Error | undefined; + readonly toolMetas: ReadonlyArray<{ meta?: string; toolName: string }>; + readonly usage: AssistantUsageSnapshot | undefined; +} + +export interface BuildAssistantMessageArgs { + modelRef: { api?: string; id: string; provider: string }; + now: () => number; +} + +export interface EventBridgeController { + recordSendResult(result: SessionEvent | undefined): boolean; + awaitDeltaChain(): Promise; + snapshot(): EventBridgeSnapshot; + buildAssistantMessage(args: BuildAssistantMessageArgs): AssistantMessage | undefined; + finalizeAssistantTexts(): string[]; + detach(): void; +} + +type MessageAccumulator = { messageId: string; text: string }; +type PromptErrorWithCode = Error & { code?: string; cause?: unknown }; + +export function attachEventBridge( + session: SessionLike, + options: EventBridgeOptions, +): EventBridgeController { + const messageOrder: string[] = []; + const messagesById = new Map(); + const reasoningOrder: string[] = []; + const reasoningById = new Map(); + let lastAssistantEvent: Extract | undefined; + let usage: AssistantUsageSnapshot | undefined; + let streamError: Error | undefined; + const toolMetas: Array<{ meta?: string; toolName: string }> = []; + const toolNamesByCallId = new Map(); + let startedCount = 0; + let completedCount = 0; + let deltaQueue = Promise.resolve(); + let deltaChain = Promise.resolve(); + let firstDeltaError: unknown; + let detached = false; + const unsubscribeFns: Array<() => void> = []; + + registerListener(session, unsubscribeFns, "assistant.message_delta", (event) => { + const messageId = readString(event.data.messageId) ?? "assistant-message"; + const delta = event.data.deltaContent; + if (!delta) { + return; + } + const entry = ensureMessageAccumulator(messagesById, messageOrder, messageId); + entry.text += delta; + const onAssistantDelta = options.onAssistantDelta; + if (!onAssistantDelta) { + return; + } + const payload: OnAssistantDeltaPayload = { + delta, + sessionId: options.getSdkSessionId(), + text: entry.text, + usage, + }; + deltaQueue = deltaQueue + .then( + () => onAssistantDelta(payload), + () => onAssistantDelta(payload), + ) + .catch((error: unknown) => { + firstDeltaError ??= error; + }); + deltaChain = deltaQueue.then(() => { + if (firstDeltaError !== undefined) { + throw firstDeltaError; + } + }); + void deltaChain.catch(() => undefined); + }); + + registerListener(session, unsubscribeFns, "assistant.reasoning_delta", (event) => { + const reasoningId = readString(event.data.reasoningId) ?? "assistant-reasoning"; + const delta = event.data.deltaContent; + if (!delta) { + return; + } + if (!reasoningById.has(reasoningId)) { + reasoningById.set(reasoningId, ""); + reasoningOrder.push(reasoningId); + } + reasoningById.set(reasoningId, `${reasoningById.get(reasoningId) ?? ""}${delta}`); + }); + + registerListener(session, unsubscribeFns, "assistant.message", (event) => { + lastAssistantEvent = event; + const entry = ensureMessageAccumulator(messagesById, messageOrder, event.data.messageId); + if (typeof event.data.content === "string" && event.data.content.length >= entry.text.length) { + entry.text = event.data.content; + } + }); + + registerListener(session, unsubscribeFns, "assistant.usage", (event) => { + usage = normalizeCopilotUsage(event.data); + }); + + registerListener(session, unsubscribeFns, "tool.execution_start", (event) => { + startedCount += 1; + toolNamesByCallId.set(event.data.toolCallId, event.data.toolName); + toolMetas.push({ toolName: event.data.toolName }); + }); + + registerListener(session, unsubscribeFns, "tool.execution_complete", (event) => { + completedCount += 1; + const toolName = toolNamesByCallId.get(event.data.toolCallId); + const meta = event.data.success + ? (event.data.result?.detailedContent ?? event.data.result?.content) + : event.data.error?.message; + if (toolName) { + toolMetas.push({ meta, toolName }); + } + }); + + registerListener(session, unsubscribeFns, "session.error", (event) => { + if (!options.isAborted()) { + streamError = createPromptError( + event.data.errorCode ?? event.data.errorType, + event.data.message, + ); + } + }); + + registerListener(session, unsubscribeFns, "abort", (event) => { + if (!options.isAborted()) { + streamError = createPromptError( + "session_aborted", + `[copilot-attempt] session aborted: ${event.data.reason}`, + ); + } + }); + + return { + recordSendResult(result) { + if (!isAssistantMessageEvent(result)) { + return false; + } + lastAssistantEvent = result; + return true; + }, + awaitDeltaChain() { + return deltaChain; + }, + snapshot() { + return { + assistantTexts: finalizeAssistantTexts(messageOrder, messagesById, lastAssistantEvent), + completedCount, + lastAssistantEvent, + startedCount, + streamError, + toolMetas: toolMetas.map((toolMeta) => Object.assign({}, toolMeta)), + usage: usage ? { ...usage } : undefined, + }; + }, + buildAssistantMessage(args) { + return buildAssistantMessage({ + event: lastAssistantEvent, + modelRef: args.modelRef, + now: args.now, + reasoningById, + reasoningOrder, + usage, + assistantTexts: finalizeAssistantTexts(messageOrder, messagesById, lastAssistantEvent), + }); + }, + finalizeAssistantTexts() { + return finalizeAssistantTexts(messageOrder, messagesById, lastAssistantEvent); + }, + detach() { + if (detached) { + return; + } + detached = true; + for (const unsubscribe of [...unsubscribeFns].toReversed()) { + try { + unsubscribe(); + } catch { + // best-effort cleanup only + } + } + unsubscribeFns.length = 0; + }, + }; +} + +function buildAssistantMessage(params: { + assistantTexts: string[]; + event?: Extract; + modelRef: { api?: string; id: string; provider: string }; + now: () => number; + reasoningById: Map; + reasoningOrder: string[]; + usage?: AssistantUsageSnapshot; +}): AssistantMessage | undefined { + const event = params.event; + const text = event + ? event.data.content || params.assistantTexts[params.assistantTexts.length - 1] || "" + : ""; + const reasoningText = + event?.data.reasoningText ?? joinReasoning(params.reasoningOrder, params.reasoningById); + const toolRequests = event?.data.toolRequests ?? []; + if (!text && !reasoningText && toolRequests.length === 0) { + return undefined; + } + + const content: AssistantMessage["content"] = []; + if (reasoningText) { + content.push({ thinking: reasoningText, type: "thinking" }); + } + if (text) { + content.push({ text, type: "text" }); + } + for (const request of toolRequests) { + content.push({ + arguments: request.arguments ?? {}, + id: request.toolCallId, + name: request.name, + type: "toolCall", + }); + } + + return { + api: params.modelRef.api ?? "openai-responses", + content, + model: event?.data.model ?? params.modelRef.id, + provider: params.modelRef.provider, + role: "assistant", + stopReason: toolRequests.length > 0 ? "toolUse" : "stop", + timestamp: params.now(), + usage: buildCopilotAssistantUsage({ + fallbackOutputTokens: event?.data.outputTokens, + usage: params.usage, + }), + }; +} + +function createPromptError(code: string, message: string, cause?: unknown): PromptErrorWithCode { + const error = new Error(message) as PromptErrorWithCode; + error.code = code; + if (cause !== undefined) { + error.cause = cause; + } + return error; +} + +function ensureMessageAccumulator( + messagesById: Map, + messageOrder: string[], + messageId: string, +): MessageAccumulator { + let entry = messagesById.get(messageId); + if (!entry) { + entry = { messageId, text: "" }; + messagesById.set(messageId, entry); + messageOrder.push(messageId); + } + return entry; +} + +function finalizeAssistantTexts( + messageOrder: string[], + messagesById: Map, + event?: Extract, +): string[] { + const texts = messageOrder + .map((messageId) => messagesById.get(messageId)?.text ?? "") + .filter((text) => text.length > 0); + if (texts.length > 0) { + return texts; + } + if (event?.data.content) { + return [event.data.content]; + } + return []; +} + +function isAssistantMessageEvent( + event: SessionEvent | undefined, +): event is Extract { + return event?.type === "assistant.message"; +} + +function joinReasoning(order: string[], reasoningById: Map): string { + return order.map((reasoningId) => reasoningById.get(reasoningId) ?? "").join(""); +} + +function readString(value: unknown): string | undefined { + return typeof value === "string" && value.length > 0 ? value : undefined; +} + +function registerListener( + session: SessionLike, + unsubscribeFns: Array<() => void>, + eventType: K, + handler: (event: Extract) => void, +): void { + const maybeUnsubscribe = session.on(eventType, handler); + if (typeof maybeUnsubscribe === "function") { + unsubscribeFns.push(maybeUnsubscribe); + return; + } + unsubscribeFns.push(() => { + session.off?.(eventType, handler as (...args: unknown[]) => void); + }); +} diff --git a/extensions/copilot/src/hooks-bridge.test.ts b/extensions/copilot/src/hooks-bridge.test.ts new file mode 100755 index 000000000000..0e24809757bd --- /dev/null +++ b/extensions/copilot/src/hooks-bridge.test.ts @@ -0,0 +1,149 @@ +import { describe, expect, it, vi } from "vitest"; +import { createHooksBridge, type CopilotHooksConfig } from "./hooks-bridge.js"; + +describe("createHooksBridge", () => { + it("returns undefined when no config is provided", () => { + expect(createHooksBridge()).toBeUndefined(); + }); + + it("returns undefined when config has no handlers", () => { + expect(createHooksBridge({})).toBeUndefined(); + }); + + it("returns undefined when only onHookError is supplied (no real handlers)", () => { + expect(createHooksBridge({ onHookError: () => undefined })).toBeUndefined(); + }); + + it("includes only the handlers that were configured", () => { + const onPreToolUse = vi.fn(); + const onSessionStart = vi.fn(); + const hooks = createHooksBridge({ onPreToolUse, onSessionStart })!; + expect(hooks).toBeDefined(); + expect(typeof hooks.onPreToolUse).toBe("function"); + expect(typeof hooks.onSessionStart).toBe("function"); + expect(hooks.onPostToolUse).toBeUndefined(); + expect(hooks.onUserPromptSubmitted).toBeUndefined(); + expect(hooks.onSessionEnd).toBeUndefined(); + expect(hooks.onErrorOccurred).toBeUndefined(); + }); + + it("forwards arguments and return values from a successful handler", async () => { + const onPreToolUse = vi + .fn() + .mockResolvedValue({ permissionDecision: "allow" as const, additionalContext: "ok" }); + const hooks = createHooksBridge({ onPreToolUse })!; + const input = { timestamp: 1, cwd: "/tmp", toolName: "bash", toolArgs: { cmd: "ls" } }; + const result = await hooks.onPreToolUse!(input, { sessionId: "sess-1" }); + expect(result).toEqual({ permissionDecision: "allow", additionalContext: "ok" }); + expect(onPreToolUse).toHaveBeenCalledTimes(1); + expect(onPreToolUse).toHaveBeenCalledWith(input, { sessionId: "sess-1" }); + }); + + it("isolates synchronous throws: returns undefined and notifies onHookError", async () => { + const onHookError = vi.fn(); + const hooks = createHooksBridge({ + onPostToolUse: () => { + throw new Error("post boom"); + }, + onHookError, + })!; + const result = await hooks.onPostToolUse!( + { timestamp: 1, cwd: "/", toolName: "x", toolArgs: {}, toolResult: {} as never }, + { sessionId: "s" }, + ); + expect(result).toBeUndefined(); + expect(onHookError).toHaveBeenCalledTimes(1); + expect(onHookError.mock.calls[0]?.[0]).toEqual({ + hookName: "onPostToolUse", + error: expect.any(Error), + }); + expect((onHookError.mock.calls[0]?.[0]?.error as Error).message).toBe("post boom"); + }); + + it("isolates async rejections: returns undefined and notifies onHookError", async () => { + const onHookError = vi.fn(); + const hooks = createHooksBridge({ + onUserPromptSubmitted: async () => { + throw new Error("async boom"); + }, + onHookError, + })!; + const result = await hooks.onUserPromptSubmitted!( + { timestamp: 1, cwd: "/", prompt: "hi" }, + { sessionId: "s" }, + ); + expect(result).toBeUndefined(); + expect(onHookError).toHaveBeenCalledTimes(1); + expect(onHookError.mock.calls[0]?.[0]?.hookName).toBe("onUserPromptSubmitted"); + }); + + it("uses console.warn as the default onHookError", async () => { + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => undefined); + try { + const hooks = createHooksBridge({ + onErrorOccurred: () => { + throw new Error("default-error-handler"); + }, + })!; + const result = await hooks.onErrorOccurred!( + { timestamp: 1, cwd: "/", error: "x", errorContext: "system", recoverable: true }, + { sessionId: "s" }, + ); + expect(result).toBeUndefined(); + expect(warnSpy).toHaveBeenCalledTimes(1); + expect(String(warnSpy.mock.calls[0]?.[0])).toContain("onErrorOccurred"); + } finally { + warnSpy.mockRestore(); + } + }); + + it("never throws when onHookError itself throws", async () => { + const hooks = createHooksBridge({ + onSessionEnd: () => { + throw new Error("hook boom"); + }, + onHookError: () => { + throw new Error("notifier boom"); + }, + })!; + await expect( + hooks.onSessionEnd!({ timestamp: 1, cwd: "/", reason: "complete" }, { sessionId: "s" }), + ).resolves.toBeUndefined(); + }); + + it("preserves all six SDK hook handlers when supplied", async () => { + const config: CopilotHooksConfig = { + onPreToolUse: vi.fn().mockResolvedValue({ suppressOutput: true }), + onPostToolUse: vi.fn().mockResolvedValue({ suppressOutput: false }), + onUserPromptSubmitted: vi.fn().mockResolvedValue({ modifiedPrompt: "trimmed" }), + onSessionStart: vi.fn().mockResolvedValue({ additionalContext: "context" }), + onSessionEnd: vi.fn().mockResolvedValue({ sessionSummary: "done" }), + onErrorOccurred: vi.fn().mockResolvedValue({ errorHandling: "retry" as const }), + }; + const hooks = createHooksBridge(config)!; + expect(typeof hooks.onPreToolUse).toBe("function"); + expect(typeof hooks.onPostToolUse).toBe("function"); + expect(typeof hooks.onUserPromptSubmitted).toBe("function"); + expect(typeof hooks.onSessionStart).toBe("function"); + expect(typeof hooks.onSessionEnd).toBe("function"); + expect(typeof hooks.onErrorOccurred).toBe("function"); + }); + + it("forwards void returns transparently", async () => { + const hooks = createHooksBridge({ + onSessionStart: () => undefined, + })!; + const result = await hooks.onSessionStart!( + { timestamp: 1, cwd: "/", source: "new" }, + { sessionId: "s" }, + ); + expect(result).toBeUndefined(); + }); + + it("does not invoke unconfigured handlers' isolators", () => { + const hooks = createHooksBridge({ onPreToolUse: () => undefined })!; + // ensure the missing handlers are literally absent, not just nullable + expect("onPostToolUse" in hooks).toBe(false); + expect("onUserPromptSubmitted" in hooks).toBe(false); + }); +}); diff --git a/extensions/copilot/src/hooks-bridge.ts b/extensions/copilot/src/hooks-bridge.ts new file mode 100755 index 000000000000..6f0d020cb0d3 --- /dev/null +++ b/extensions/copilot/src/hooks-bridge.ts @@ -0,0 +1,134 @@ +/** + * Hooks bridge for the copilot agent runtime. + * + * BACK-POINTER: The host-side hook runner lives outside this package + * boundary in `src/agents/harness/lifecycle-hook-helpers.ts` (uses the + * plugin hook runner via `src/plugins/hook-runner-global.ts`). Per + * proposal §266 (todo `hooks-bridge`), this module provides a small + * contract surface that mirrors the SDK's `SessionHooks` shape; the + * core wiring layer constructs handlers that call into + * `runAgentHarnessLlmInputHook`, `runAgentHarnessLlmOutputHook`, + * `runAgentHarnessAgentEndHook`, etc., and threads them through + * `AttemptParamsLike.hooks`. + * + * Cross-package boundary note: the heavy host lifecycle helpers + * cannot be imported here (`tsconfig.package-boundary.base.json`). The + * bridge keeps the SDK hook contracts intact, wraps each provided + * handler in an error-isolating envelope so a thrown host hook cannot + * crash the SDK session, and returns a `SessionHooks` object that + * `createSessionConfig` can plug into `SessionConfig.hooks`. + * + * Note on default omission: if no handlers are supplied, the bridge + * returns `undefined` so that `SessionConfig.hooks` stays absent and + * the SDK skips the entire hook subsystem (matches the "no hooks + * installed" runtime behaviour the harness had pre-bridge). + */ + +import type { SessionConfig } from "@github/copilot-sdk"; + +// All hook handler types are derived from SessionHooks so this bridge +// stays pinned to the same SDK source the rest of the harness uses, +// without depending on the SDK re-exporting individual handler aliases +// (which it does not, as of @github/copilot-sdk@1.0.0-beta.4). +type SdkSessionHooks = NonNullable; +type PreToolUseHandler = NonNullable; +type PostToolUseHandler = NonNullable; +type UserPromptSubmittedHandler = NonNullable; +type SessionStartHandler = NonNullable; +type SessionEndHandler = NonNullable; +type ErrorOccurredHandler = NonNullable; + +export interface CopilotHooksConfig { + onPreToolUse?: PreToolUseHandler; + onPostToolUse?: PostToolUseHandler; + onUserPromptSubmitted?: UserPromptSubmittedHandler; + onSessionStart?: SessionStartHandler; + onSessionEnd?: SessionEndHandler; + onErrorOccurred?: ErrorOccurredHandler; + /** + * Optional hook-error notifier. Called whenever any wrapped handler + * throws (synchronously or as a Promise rejection). Defaults to + * `console.warn` so the failure is visible to operators without + * crashing the SDK session. Receives the SDK hook name and the + * raised error. + */ + onHookError?: (info: { hookName: keyof SdkSessionHooks; error: unknown }) => void; +} + +const DEFAULT_HOOK_ERROR_HANDLER: NonNullable = ({ + hookName, + error, +}) => { + console.warn(`[copilot hooks-bridge] ${hookName} handler threw:`, error); +}; + +/** + * Wrap a host handler in an error-isolating envelope so it cannot + * throw out into the SDK. Returns `undefined` (no opinion) when the + * host handler throws, so the SDK falls back to its default behaviour + * for that hook. + */ +function isolate( + hookName: keyof SdkSessionHooks, + handler: ((...args: TArgs) => TResult | Promise) | undefined, + onError: NonNullable, +): ((...args: TArgs) => Promise) | undefined { + if (!handler) { + return undefined; + } + return async (...args: TArgs) => { + try { + return await handler(...args); + } catch (error) { + try { + onError({ hookName, error }); + } catch { + // never let the error notifier itself throw out + } + return undefined; + } + }; +} + +/** + * Build an SDK-shaped `SessionHooks` object from a host-supplied + * `CopilotHooksConfig`. Returns `undefined` when no handlers were + * supplied so the SDK skips the hook subsystem entirely. + */ +export function createHooksBridge(config?: CopilotHooksConfig): SdkSessionHooks | undefined { + if (!config) { + return undefined; + } + const onError = config.onHookError ?? DEFAULT_HOOK_ERROR_HANDLER; + const hooks: SdkSessionHooks = {}; + const pre = isolate("onPreToolUse", config.onPreToolUse, onError); + const post = isolate("onPostToolUse", config.onPostToolUse, onError); + const userPrompt = isolate("onUserPromptSubmitted", config.onUserPromptSubmitted, onError); + const sessionStart = isolate("onSessionStart", config.onSessionStart, onError); + const sessionEnd = isolate("onSessionEnd", config.onSessionEnd, onError); + const errorOccurred = isolate("onErrorOccurred", config.onErrorOccurred, onError); + + if (pre) { + hooks.onPreToolUse = pre as PreToolUseHandler; + } + if (post) { + hooks.onPostToolUse = post as PostToolUseHandler; + } + if (userPrompt) { + hooks.onUserPromptSubmitted = userPrompt as UserPromptSubmittedHandler; + } + if (sessionStart) { + hooks.onSessionStart = sessionStart as SessionStartHandler; + } + if (sessionEnd) { + hooks.onSessionEnd = sessionEnd as SessionEndHandler; + } + if (errorOccurred) { + hooks.onErrorOccurred = errorOccurred as ErrorOccurredHandler; + } + + if (Object.keys(hooks).length === 0) { + return undefined; + } + return hooks; +} diff --git a/extensions/copilot/src/permission-bridge.test.ts b/extensions/copilot/src/permission-bridge.test.ts new file mode 100755 index 000000000000..d1d663f86368 --- /dev/null +++ b/extensions/copilot/src/permission-bridge.test.ts @@ -0,0 +1,255 @@ +import type { + PermissionRequest as SdkPermissionRequest, + PermissionRequestResult as SdkPermissionRequestResult, +} from "@github/copilot-sdk"; +import { describe, expect, it, vi } from "vitest"; +import { + allowListPolicy, + allowOncePolicy, + composePolicies, + createPermissionBridge, + delegatingPolicy, + rejectAllPolicy, + REJECT_ALL_FEEDBACK, + type CopilotPermissionContext, + type CopilotPermissionPolicy, +} from "./permission-bridge.js"; + +function makeRequest(overrides: Partial = {}): SdkPermissionRequest { + return { + kind: "shell", + toolCallId: "call-1", + ...overrides, + }; +} + +function makeCtx(overrides: Partial = {}): CopilotPermissionContext { + return { + request: makeRequest(), + sessionId: "sess-1", + ...overrides, + }; +} + +describe("rejectAllPolicy", () => { + it("returns reject with the fail-closed feedback", async () => { + const result = await rejectAllPolicy(makeCtx()); + expect(result).toEqual({ kind: "reject", feedback: REJECT_ALL_FEEDBACK }); + }); +}); + +describe("allowOncePolicy", () => { + it("returns approve-once for every request kind", async () => { + for (const kind of [ + "shell", + "write", + "mcp", + "read", + "url", + "custom-tool", + "memory", + "hook", + ] as const) { + const result = await allowOncePolicy(makeCtx({ request: makeRequest({ kind }) })); + expect(result).toEqual({ kind: "approve-once" }); + } + }); +}); + +describe("allowListPolicy", () => { + it("approves listed kinds and rejects others with default feedback", async () => { + const policy = allowListPolicy({ kinds: ["read"] }); + const approved = await policy(makeCtx({ request: makeRequest({ kind: "read" }) })); + expect(approved).toEqual({ kind: "approve-once" }); + const rejected = await policy(makeCtx({ request: makeRequest({ kind: "shell" }) })); + expect(rejected).toEqual({ kind: "reject", feedback: REJECT_ALL_FEEDBACK }); + }); + + it("uses custom rejectFeedback when provided", async () => { + const policy = allowListPolicy({ + kinds: ["read"], + rejectFeedback: "only reads allowed", + }); + const result = await policy(makeCtx({ request: makeRequest({ kind: "write" }) })); + expect(result).toEqual({ kind: "reject", feedback: "only reads allowed" }); + }); + + it("supports multiple kinds in the allow-list", async () => { + const policy = allowListPolicy({ kinds: ["read", "write"] }); + expect(await policy(makeCtx({ request: makeRequest({ kind: "read" }) }))).toEqual({ + kind: "approve-once", + }); + expect(await policy(makeCtx({ request: makeRequest({ kind: "write" }) }))).toEqual({ + kind: "approve-once", + }); + expect((await policy(makeCtx({ request: makeRequest({ kind: "mcp" }) })))?.kind).toBe("reject"); + }); + + it("rejects all when given an empty allow-list", async () => { + const policy = allowListPolicy({ kinds: [] }); + for (const kind of ["shell", "read", "write"] as const) { + const result = await policy(makeCtx({ request: makeRequest({ kind }) })); + expect(result?.kind).toBe("reject"); + } + }); +}); + +describe("delegatingPolicy", () => { + it("forwards the request to the host callback and returns its decision", async () => { + const onRequest = vi.fn().mockResolvedValue({ + kind: "approve-for-session", + } satisfies SdkPermissionRequestResult); + const policy = delegatingPolicy({ onRequest }); + const ctx = makeCtx({ sessionId: "sess-xyz", request: makeRequest({ kind: "write" }) }); + const result = await policy(ctx); + expect(result).toEqual({ kind: "approve-for-session" }); + expect(onRequest).toHaveBeenCalledTimes(1); + expect(onRequest).toHaveBeenCalledWith(ctx); + }); + + it("returns the rejectAll default when host callback returns undefined", async () => { + const onRequest = vi.fn().mockResolvedValue(undefined); + const policy = delegatingPolicy({ onRequest }); + const result = await policy(makeCtx()); + expect(result).toEqual({ kind: "reject", feedback: REJECT_ALL_FEEDBACK }); + }); + + it("rejects with the error message when host callback throws", async () => { + const onRequest = vi + .fn() + .mockRejectedValue(new Error("host policy boom")); + const policy = delegatingPolicy({ onRequest }); + const result = await policy(makeCtx()); + expect(result?.kind).toBe("reject"); + expect((result as { feedback?: string }).feedback).toContain("host policy boom"); + }); + + it("falls back to onError policy when host callback throws", async () => { + const onError = vi.fn().mockResolvedValue({ kind: "approve-once" }); + const policy = delegatingPolicy({ + onRequest: () => { + throw new Error("host policy boom"); + }, + onError, + }); + const result = await policy(makeCtx()); + expect(result).toEqual({ kind: "approve-once" }); + expect(onError).toHaveBeenCalledTimes(1); + }); + + it("falls through to a hard-coded reject if onError also throws", async () => { + const policy = delegatingPolicy({ + onRequest: () => { + throw new Error("host boom"); + }, + onError: () => { + throw new Error("fallback boom"); + }, + }); + const result = await policy(makeCtx()); + expect(result?.kind).toBe("reject"); + expect((result as { feedback?: string }).feedback).toContain("host boom"); + }); + + it("formats non-Error throws via JSON.stringify", async () => { + const policy = delegatingPolicy({ + onRequest: () => { + throw { code: 42, msg: "weird" } as unknown as Error; + }, + }); + const result = await policy(makeCtx()); + expect((result as { feedback?: string }).feedback).toContain('"code":42'); + }); +}); + +describe("composePolicies", () => { + it("returns the first non-undefined result and skips subsequent policies", async () => { + const a: CopilotPermissionPolicy = () => undefined; + const b: CopilotPermissionPolicy = () => ({ kind: "approve-once" }); + const c = vi.fn(() => ({ + kind: "reject", + feedback: "should never run", + })); + const policy = composePolicies(a, b, c); + const result = await policy(makeCtx()); + expect(result).toEqual({ kind: "approve-once" }); + expect(c).not.toHaveBeenCalled(); + }); + + it("falls through to fail-closed reject when all policies return undefined", async () => { + const policy = composePolicies( + () => undefined, + () => undefined, + ); + const result = await policy(makeCtx()); + expect(result).toEqual({ kind: "reject", feedback: REJECT_ALL_FEEDBACK }); + }); + + it("short-circuits to reject if any policy throws (does not consult later policies)", async () => { + const later = vi.fn(() => ({ kind: "approve-once" })); + const policy = composePolicies(() => { + throw new Error("nope"); + }, later); + const result = await policy(makeCtx()); + expect(result?.kind).toBe("reject"); + expect((result as { feedback?: string }).feedback).toContain("nope"); + expect(later).not.toHaveBeenCalled(); + }); +}); + +describe("createPermissionBridge", () => { + it("adapts a policy to the SDK PermissionHandler shape", async () => { + const handler = createPermissionBridge(allowOncePolicy); + const result = await handler(makeRequest(), { sessionId: "sess-1" }); + expect(result).toEqual({ kind: "approve-once" }); + }); + + it("defaults to rejectAllPolicy when no policy is passed", async () => { + const handler = createPermissionBridge(); + const result = await handler(makeRequest({ kind: "shell" }), { sessionId: "sess-1" }); + expect(result).toEqual({ kind: "reject", feedback: REJECT_ALL_FEEDBACK }); + }); + + it("forwards the SDK sessionId into the policy context", async () => { + const policy = vi.fn(() => ({ kind: "approve-once" })); + const handler = createPermissionBridge(policy); + await handler(makeRequest({ kind: "read" }), { sessionId: "sess-xyz" }); + expect(policy).toHaveBeenCalledTimes(1); + expect(policy.mock.calls[0]?.[0]).toEqual({ + sessionId: "sess-xyz", + request: { kind: "read", toolCallId: "call-1" }, + }); + }); + + it("never throws when policy throws; returns reject with the error message instead", async () => { + const handler = createPermissionBridge(() => { + throw new Error("policy boom"); + }); + const result = await handler(makeRequest(), { sessionId: "sess-1" }); + expect(result?.kind).toBe("reject"); + expect((result as { feedback?: string }).feedback).toContain("policy boom"); + }); + + it("never returns undefined: a policy returning undefined yields fail-closed reject", async () => { + const handler = createPermissionBridge(() => undefined); + const result = await handler(makeRequest(), { sessionId: "sess-1" }); + expect(result).toEqual({ kind: "reject", feedback: REJECT_ALL_FEEDBACK }); + }); + + it("handles all SDK permission kinds without throwing", async () => { + const handler = createPermissionBridge(allowOncePolicy); + for (const kind of [ + "shell", + "write", + "mcp", + "read", + "url", + "custom-tool", + "memory", + "hook", + ] as const) { + const result = await handler(makeRequest({ kind }), { sessionId: "sess-1" }); + expect(result).toEqual({ kind: "approve-once" }); + } + }); +}); diff --git a/extensions/copilot/src/permission-bridge.ts b/extensions/copilot/src/permission-bridge.ts new file mode 100755 index 000000000000..94b740e93d5b --- /dev/null +++ b/extensions/copilot/src/permission-bridge.ts @@ -0,0 +1,219 @@ +/** + * Permission bridge for the copilot agent runtime. + * + * BACK-POINTER: The full runtime-neutral permission/tool-policy logic + * lives in `src/agents/pi-tools.before-tool-call.ts` (820 LOC, exports + * `runBeforeToolCallHook`, `BeforeToolCallBlockedError`, etc.). Per Q4 + * (proposal section 3.4), we deliberately do NOT extract a shared helper + * - PI source stays untouched. Instead, this module: + * + * 1. Defines a small `CopilotPermissionPolicy` contract that the + * host can implement to mirror PI's policy decisions for the + * copilot agent runtime. + * 2. Provides built-in policies for common defaults (fail-closed, + * approve-all-for-test, allow-list-by-kind). + * 3. Provides a `delegatingPolicy({ onRequest })` so the core layer + * can plug in a host-side callback that calls into + * `runBeforeToolCallHook` / `effective-tool-policy` and returns + * the SDK-shaped decision. + * 4. Adapts the resulting policy into the SDK's + * `PermissionHandler` shape via `createPermissionBridge(policy)`. + * + * Cross-package boundary note: the heavy `pi-tools.before-tool-call` + * surface cannot be imported here (`tsconfig.package-boundary.base.json`). + * The host bridges core PI logic into this module by injecting a + * `delegatingPolicy` from the core wiring layer that constructs + * `AgentHarnessAttemptParams` for the copilot agent runtime. + * + * If PI's permission semantics change materially, the contract here + * must be revisited in lockstep. The unit tests in + * `permission-bridge.test.ts` exercise the SDK-shaped decision + * envelope so any silent drift in the SDK type is caught at typecheck. + */ + +import type { + PermissionHandler, + PermissionRequest as SdkPermissionRequest, + PermissionRequestResult as SdkPermissionRequestResult, +} from "@github/copilot-sdk"; + +/** Request shape forwarded to host-implemented policies. */ +export interface CopilotPermissionContext { + /** SDK session id that originated the request. */ + sessionId: string; + /** Original SDK request payload. */ + request: SdkPermissionRequest; +} + +/** + * Policy contract. Implementors return an SDK-shaped decision (or a + * Promise of one). + * + * Returning `undefined` is treated as "no opinion" and falls through to + * the default fail-closed decision (`reject` with `REJECT_ALL_FEEDBACK`). + * This keeps composition trivial without requiring explicit `reject` + * returns from every code path. + */ +export type CopilotPermissionPolicy = ( + ctx: CopilotPermissionContext, +) => SdkPermissionRequestResult | undefined | Promise; + +/** Built-in fail-closed default. Mirrors the pre-bridge attempt.ts stub. */ +export const REJECT_ALL_FEEDBACK = + "copilot agent runtime: no permission policy installed (fail-closed default)"; + +export const rejectAllPolicy: CopilotPermissionPolicy = () => ({ + kind: "reject", + feedback: REJECT_ALL_FEEDBACK, +}); + +/** + * Approve every request as "approve-once". Use only in tests / live + * smoke runs where the operator has accepted the risk. This is the + * SDK-bundled `approveAll` behavior re-exported as an explicit named + * policy so test sites can opt in without `@github/copilot-sdk` + * imports leaking into call sites. + */ +export const allowOncePolicy: CopilotPermissionPolicy = () => ({ + kind: "approve-once", +}); + +export interface AllowListPolicyOptions { + /** Permission kinds that should be approved once. */ + kinds: ReadonlyArray; + /** Optional feedback text attached to rejections. */ + rejectFeedback?: string; +} + +/** + * Approve requests whose `kind` is in the allow-list; reject everything + * else with `rejectFeedback` (defaulting to `REJECT_ALL_FEEDBACK`). + */ +export function allowListPolicy(options: AllowListPolicyOptions): CopilotPermissionPolicy { + const allowed = new Set(options.kinds); + const feedback = options.rejectFeedback ?? REJECT_ALL_FEEDBACK; + return ({ request }) => { + if (allowed.has(request.kind)) { + return { kind: "approve-once" }; + } + return { kind: "reject", feedback }; + }; +} + +export interface DelegatingPolicyOptions { + /** + * Host-supplied callback. Returning `undefined` falls through to the + * fail-closed default. Throwing falls back to the configured + * `onError` policy if provided; otherwise the throw is converted to a + * reject with the error message embedded in `feedback` (so the model + * sees the diagnostic instead of a generic RPC failure). + */ + onRequest: CopilotPermissionPolicy; + /** + * Optional fallback when `onRequest` throws. If omitted, throws are + * reflected back as `reject` with the error message in `feedback`. + * If supplied and `onError` also throws, fall through to the + * error-message reject. + */ + onError?: CopilotPermissionPolicy; +} + +/** + * Wrap a host callback into a policy, catching synchronous throws and + * async rejections so the SDK never sees an exception (which would + * surface as a generic RPC failure to the model). + */ +export function delegatingPolicy(options: DelegatingPolicyOptions): CopilotPermissionPolicy { + const { onRequest, onError } = options; + return async (ctx) => { + try { + const result = await onRequest(ctx); + if (result !== undefined) { + return result; + } + return { kind: "reject", feedback: REJECT_ALL_FEEDBACK }; + } catch (error) { + if (onError) { + try { + const fallback = await onError(ctx); + if (fallback !== undefined) { + return fallback; + } + } catch { + // fall through to error-message reject + } + } + return { + kind: "reject", + feedback: `copilot permission policy threw: ${formatError(error)}`, + }; + } + }; +} + +/** + * Compose policies in order. The first policy to return a non-undefined + * result wins. If all return undefined, a fail-closed `reject` is + * produced. Throws inside any policy short-circuit to `reject` with the + * error message; downstream policies are not consulted after a throw + * (so a misbehaving host policy cannot mask itself by being followed by + * an allow-policy). + */ +export function composePolicies(...policies: CopilotPermissionPolicy[]): CopilotPermissionPolicy { + return async (ctx) => { + for (const policy of policies) { + try { + const result = await policy(ctx); + if (result !== undefined) { + return result; + } + } catch (error) { + return { + kind: "reject", + feedback: `copilot permission policy threw: ${formatError(error)}`, + }; + } + } + return { kind: "reject", feedback: REJECT_ALL_FEEDBACK }; + }; +} + +/** + * Adapt a `CopilotPermissionPolicy` to the SDK's + * `PermissionHandler` shape. The returned handler always resolves + * (never rejects), defaulting to fail-closed when the policy returns + * undefined or throws. + */ +export function createPermissionBridge( + policy: CopilotPermissionPolicy = rejectAllPolicy, +): PermissionHandler { + return async (request, invocation) => { + const ctx: CopilotPermissionContext = { + request, + sessionId: invocation.sessionId, + }; + try { + const result = await policy(ctx); + if (result !== undefined) { + return result; + } + } catch (error) { + return { + kind: "reject", + feedback: `copilot permission policy threw: ${formatError(error)}`, + }; + } + return { kind: "reject", feedback: REJECT_ALL_FEEDBACK }; + }; +} + +function formatError(error: unknown): string { + if (error instanceof Error) { + return error.message; + } + try { + return JSON.stringify(error); + } catch { + return String(error); + } +} diff --git a/extensions/copilot/src/replay-shim.test.ts b/extensions/copilot/src/replay-shim.test.ts new file mode 100755 index 000000000000..65917d3d9373 --- /dev/null +++ b/extensions/copilot/src/replay-shim.test.ts @@ -0,0 +1,301 @@ +import { describe, expect, it } from "vitest"; +import { + classifyResumeFailure, + computeReplayMetadata, + copilotToolMetasHavePotentialSideEffects, + decideReplayAction, +} from "./replay-shim.js"; + +describe("decideReplayAction", () => { + it("returns create when no input is supplied", () => { + const decision = decideReplayAction(); + expect(decision).toEqual({ + action: "create", + downgradedFromResume: false, + downgradeReason: "no-replay-state", + }); + }); + + it("returns create when sdkSessionId is absent", () => { + expect(decideReplayAction({})).toEqual({ + action: "create", + downgradedFromResume: false, + downgradeReason: "no-sdk-session-id", + }); + expect(decideReplayAction({ replayInvalid: false })).toEqual({ + action: "create", + downgradedFromResume: false, + downgradeReason: "no-sdk-session-id", + }); + }); + + it("returns create for empty or whitespace-only sdkSessionId", () => { + for (const sdkSessionId of ["", " ", "\t\n"]) { + expect(decideReplayAction({ sdkSessionId })).toMatchObject({ + action: "create", + downgradeReason: "no-sdk-session-id", + }); + } + }); + + it("returns resume when sdkSessionId is present and replayInvalid is not true", () => { + expect(decideReplayAction({ sdkSessionId: "sess-1" })).toEqual({ + action: "resume", + sdkSessionId: "sess-1", + downgradedFromResume: false, + }); + expect(decideReplayAction({ sdkSessionId: "sess-2", replayInvalid: false })).toEqual({ + action: "resume", + sdkSessionId: "sess-2", + downgradedFromResume: false, + }); + }); + + it("trims whitespace around sdkSessionId before resuming", () => { + expect(decideReplayAction({ sdkSessionId: " sess-3 " })).toEqual({ + action: "resume", + sdkSessionId: "sess-3", + downgradedFromResume: false, + }); + }); + + it("downgrades to create when replayInvalid is true even with sdkSessionId", () => { + expect(decideReplayAction({ sdkSessionId: "sess-4", replayInvalid: true })).toEqual({ + action: "create", + downgradedFromResume: true, + downgradeReason: "replay-invalid", + }); + }); +}); + +describe("classifyResumeFailure", () => { + it("treats undefined / null as unrecoverable", () => { + expect(classifyResumeFailure(undefined)).toEqual({ + recoverable: false, + kind: "unknown", + }); + expect(classifyResumeFailure(null)).toEqual({ + recoverable: false, + kind: "unknown", + }); + }); + + it("treats a generic Error as unrecoverable", () => { + expect(classifyResumeFailure(new Error("boom"))).toEqual({ + recoverable: false, + kind: "unknown", + }); + }); + + it("treats a non-Error throw value as unrecoverable", () => { + expect(classifyResumeFailure("string-error")).toEqual({ + recoverable: false, + kind: "unknown", + }); + expect(classifyResumeFailure(42)).toEqual({ + recoverable: false, + kind: "unknown", + }); + }); + + it("classifies status:404 errors as missing/recoverable", () => { + const error = Object.assign(new Error("Not Found"), { status: 404 }); + expect(classifyResumeFailure(error)).toEqual({ + recoverable: true, + kind: "missing", + }); + }); + + it("classifies statusCode:404 errors as missing/recoverable", () => { + const error = Object.assign(new Error("Not Found"), { statusCode: 404 }); + expect(classifyResumeFailure(error)).toEqual({ + recoverable: true, + kind: "missing", + }); + }); + + it("classifies recognised code strings as missing/recoverable", () => { + for (const code of ["SESSION_NOT_FOUND", "session_not_found", "NotFound", "ENOENT"]) { + const error = Object.assign(new Error("session gone"), { code }); + expect(classifyResumeFailure(error)).toEqual({ + recoverable: true, + kind: "missing", + }); + } + }); + + it("classifies recognised message patterns as missing/recoverable", () => { + const messages = [ + "session not found", + "Session sess-1 not found", + "Unknown session id sess-1", + "session id sess-1 does not exist", + "no such session", + ]; + for (const message of messages) { + expect(classifyResumeFailure(new Error(message))).toEqual({ + recoverable: true, + kind: "missing", + }); + } + }); + + it("does not over-match unrelated errors", () => { + expect(classifyResumeFailure(new Error("network ECONNRESET"))).toEqual({ + recoverable: false, + kind: "unknown", + }); + expect(classifyResumeFailure(new Error("Unauthorized"))).toEqual({ + recoverable: false, + kind: "unknown", + }); + expect(classifyResumeFailure(new Error("rate limit exceeded"))).toEqual({ + recoverable: false, + kind: "unknown", + }); + }); + + it("reads message from plain objects with a message string", () => { + const error = { message: "session not found" }; + expect(classifyResumeFailure(error)).toEqual({ + recoverable: true, + kind: "missing", + }); + }); + + it("prefers structured signals over message heuristics", () => { + // status:404 wins even when message is unrelated + const error = Object.assign(new Error("Internal server error"), { status: 404 }); + expect(classifyResumeFailure(error)).toEqual({ + recoverable: true, + kind: "missing", + }); + }); +}); + +describe("computeReplayMetadata", () => { + it("clean attempt with no prior state → replaySafe true", () => { + expect(computeReplayMetadata({})).toEqual({ + hadPotentialSideEffects: false, + replaySafe: true, + }); + }); + + it("timeout flips both flags", () => { + expect(computeReplayMetadata({ thisAttemptTimedOut: true })).toEqual({ + hadPotentialSideEffects: true, + replaySafe: false, + }); + }); + + it("prior side effects propagate forward", () => { + expect(computeReplayMetadata({ priorHadPotentialSideEffects: true })).toEqual({ + hadPotentialSideEffects: true, + replaySafe: false, + }); + }); + + it("current attempt side effects make replay unsafe", () => { + expect(computeReplayMetadata({ thisAttemptHadPotentialSideEffects: true })).toEqual({ + hadPotentialSideEffects: true, + replaySafe: false, + }); + }); + + it("prior replayInvalid invalidates replay even without side effects", () => { + expect(computeReplayMetadata({ priorReplayInvalid: true })).toEqual({ + hadPotentialSideEffects: false, + replaySafe: false, + }); + }); + + it("downgradedFromResume invalidates replay even without side effects", () => { + expect(computeReplayMetadata({ thisAttemptDowngradedFromResume: true })).toEqual({ + hadPotentialSideEffects: false, + replaySafe: false, + }); + }); + + it("resumeFailureRecovered invalidates replay even without side effects", () => { + expect(computeReplayMetadata({ thisAttemptResumeFailureRecovered: true })).toEqual({ + hadPotentialSideEffects: false, + replaySafe: false, + }); + }); + + it("combinations: prior side effects + timeout still hadSideEffects:true (no double-count)", () => { + expect( + computeReplayMetadata({ + priorHadPotentialSideEffects: true, + thisAttemptTimedOut: true, + }), + ).toEqual({ + hadPotentialSideEffects: true, + replaySafe: false, + }); + }); + + it("combinations: clean attempt with prior replayInvalid+sideEffects propagates both invariants", () => { + expect( + computeReplayMetadata({ + priorReplayInvalid: true, + priorHadPotentialSideEffects: true, + }), + ).toEqual({ + hadPotentialSideEffects: true, + replaySafe: false, + }); + }); + + it("treats explicit false flags as if they were absent", () => { + expect( + computeReplayMetadata({ + priorReplayInvalid: false, + priorHadPotentialSideEffects: false, + thisAttemptTimedOut: false, + thisAttemptDowngradedFromResume: false, + thisAttemptResumeFailureRecovered: false, + }), + ).toEqual({ + hadPotentialSideEffects: false, + replaySafe: true, + }); + }); +}); + +describe("copilotToolMetasHavePotentialSideEffects", () => { + it("detects mutating tool names", () => { + expect(copilotToolMetasHavePotentialSideEffects([{ toolName: "write" }])).toBe(true); + expect(copilotToolMetasHavePotentialSideEffects([{ toolName: "message_send" }])).toBe(true); + expect(copilotToolMetasHavePotentialSideEffects([{ toolName: "browser" }])).toBe(true); + expect(copilotToolMetasHavePotentialSideEffects([{ toolName: "file_fetch" }])).toBe(true); + expect(copilotToolMetasHavePotentialSideEffects([{ toolName: "file_write" }])).toBe(true); + expect(copilotToolMetasHavePotentialSideEffects([{ toolName: "read_and_delete" }])).toBe(true); + expect(copilotToolMetasHavePotentialSideEffects([{ toolName: "search_and_replace" }])).toBe( + true, + ); + expect(copilotToolMetasHavePotentialSideEffects([{ toolName: "session_status" }])).toBe(true); + }); + + it("treats read-only tool names as replay-safe", () => { + expect(copilotToolMetasHavePotentialSideEffects([{ toolName: "read" }])).toBe(false); + expect(copilotToolMetasHavePotentialSideEffects([{ toolName: "search" }])).toBe(false); + expect(copilotToolMetasHavePotentialSideEffects([{ toolName: "status" }])).toBe(false); + expect(copilotToolMetasHavePotentialSideEffects([{ toolName: "file_read" }])).toBe(false); + expect(copilotToolMetasHavePotentialSideEffects([{ toolName: "memory_get" }])).toBe(false); + expect(copilotToolMetasHavePotentialSideEffects([{ toolName: "memory_search" }])).toBe(false); + expect(copilotToolMetasHavePotentialSideEffects([{ toolName: "sessions_history" }])).toBe( + false, + ); + expect(copilotToolMetasHavePotentialSideEffects([{ toolName: "sessions_list" }])).toBe(false); + expect(copilotToolMetasHavePotentialSideEffects([{ toolName: "tool_search" }])).toBe(false); + expect(copilotToolMetasHavePotentialSideEffects([{ toolName: "web_fetch" }])).toBe(false); + expect(copilotToolMetasHavePotentialSideEffects([{ toolName: "web_search" }])).toBe(false); + }); + + it("detects async-started tools even without a mutating name", () => { + expect( + copilotToolMetasHavePotentialSideEffects([{ asyncStarted: true, toolName: "read" }]), + ).toBe(true); + }); +}); diff --git a/extensions/copilot/src/replay-shim.ts b/extensions/copilot/src/replay-shim.ts new file mode 100755 index 000000000000..8997c7ff7b5f --- /dev/null +++ b/extensions/copilot/src/replay-shim.ts @@ -0,0 +1,252 @@ +// Replay-shim for the GitHub Copilot agent runtime. +// +// Owns three concerns: +// 1. Pre-call: should this attempt resume an existing SDK session or +// start a new one? Honours `initialReplayState.sdkSessionId` and +// `initialReplayState.replayInvalid`. +// 2. Post-call: if `resumeSession` fails, was the failure recoverable +// (session-gone) so we should downgrade to `createSession`, or +// unrecoverable so the error should surface as a prompt error? +// 3. Result-time: compute the `replayMetadata` to attach to the attempt +// result, propagating prior state with worst-case-wins semantics so +// the orchestrator never replays an attempt that may have committed +// partial side effects. +// +// Host back-pointers (NOT imported here to keep the package boundary +// clean): +// - `src/agents/pi-embedded-runner/replay-state.ts` — canonical +// `EmbeddedRunReplayState` / `EmbeddedRunReplayMetadata` shapes +// and `replayMetadataFromState`. +// - `src/agents/pi-embedded-runner/run/types.ts` — +// `AgentHarnessAttemptResult.replayMetadata` field requirement. + +export type ReplayDecision = + | { + readonly action: "resume"; + readonly sdkSessionId: string; + readonly downgradedFromResume: false; + } + | { + readonly action: "create"; + readonly downgradedFromResume: boolean; + readonly downgradeReason: "no-replay-state" | "no-sdk-session-id" | "replay-invalid"; + }; + +export interface ReplayShimInput { + readonly sdkSessionId?: string; + readonly replayInvalid?: boolean; +} + +function normalizeSdkSessionId(value: unknown): string | undefined { + if (typeof value !== "string") { + return undefined; + } + const trimmed = value.trim(); + return trimmed.length > 0 ? trimmed : undefined; +} + +/** + * Pure pre-call decision: should attempt.ts call resumeSession or + * createSession? + * + * Rules: + * - No input → create (no-replay-state) + * - No (trimmed) sdkSessionId → create (no-sdk-session-id) + * - sdkSessionId + replayInvalid=true → create (replay-invalid), + * downgradedFromResume=true + * - sdkSessionId + replayInvalid=false → resume + */ +export function decideReplayAction(input?: ReplayShimInput): ReplayDecision { + if (!input) { + return { + action: "create", + downgradedFromResume: false, + downgradeReason: "no-replay-state", + }; + } + const sdkSessionId = normalizeSdkSessionId(input.sdkSessionId); + if (!sdkSessionId) { + return { + action: "create", + downgradedFromResume: false, + downgradeReason: "no-sdk-session-id", + }; + } + if (input.replayInvalid === true) { + return { + action: "create", + downgradedFromResume: true, + downgradeReason: "replay-invalid", + }; + } + return { + action: "resume", + sdkSessionId, + downgradedFromResume: false, + }; +} + +export type ResumeFailureKind = "missing" | "unknown"; + +export interface ResumeFailureClassification { + readonly recoverable: boolean; + readonly kind: ResumeFailureKind; +} + +const MISSING_SESSION_CODES = new Set([ + "SESSION_NOT_FOUND", + "session_not_found", + "NotFound", + "ENOENT", +]); + +const MISSING_SESSION_MESSAGE_PATTERNS: readonly RegExp[] = [ + /\bsession not found\b/i, + /\bsession .* not found\b/i, + /\bunknown session id\b/i, + /\bsession id .* (does not exist|not found)\b/i, + /\bsession .* does not exist\b/i, + /\bno such session\b/i, +]; + +function readErrorField(error: unknown, key: string): unknown { + if (!error || typeof error !== "object") { + return undefined; + } + return (error as Record)[key]; +} + +/** + * Post-call: classify a resumeSession() failure so attempt.ts can + * decide whether to downgrade silently to createSession. + * + * Conservative: only treats clearly session-gone signals as recoverable. + * Structured signals (status === 404, recognised code strings) are + * checked first; message matching is a fallback because SDK error + * messages are not part of the typed contract. + * + * Everything else (transport errors, auth failures, generic Error) is + * unrecoverable and should surface to the outer attempt.ts try/catch + * which converts it to a prompt error. + */ +export function classifyResumeFailure(error: unknown): ResumeFailureClassification { + if (error === undefined || error === null) { + return { recoverable: false, kind: "unknown" }; + } + + const status = readErrorField(error, "status"); + if (status === 404) { + return { recoverable: true, kind: "missing" }; + } + const statusCode = readErrorField(error, "statusCode"); + if (statusCode === 404) { + return { recoverable: true, kind: "missing" }; + } + + const code = readErrorField(error, "code"); + if (typeof code === "string" && MISSING_SESSION_CODES.has(code)) { + return { recoverable: true, kind: "missing" }; + } + + const message = + error instanceof Error + ? error.message + : typeof error === "object" + ? typeof (error as { message?: unknown }).message === "string" + ? (error as { message: string }).message + : undefined + : undefined; + if (typeof message === "string") { + for (const pattern of MISSING_SESSION_MESSAGE_PATTERNS) { + if (pattern.test(message)) { + return { recoverable: true, kind: "missing" }; + } + } + } + + return { recoverable: false, kind: "unknown" }; +} + +export interface ReplayMetadataComputeInput { + readonly priorReplayInvalid?: boolean; + readonly priorHadPotentialSideEffects?: boolean; + readonly thisAttemptTimedOut?: boolean; + readonly thisAttemptHadPotentialSideEffects?: boolean; + readonly thisAttemptDowngradedFromResume?: boolean; + readonly thisAttemptResumeFailureRecovered?: boolean; +} + +export interface ComputedReplayMetadata { + readonly hadPotentialSideEffects: boolean; + readonly replaySafe: boolean; +} + +/** + * Compute the `EmbeddedRunReplayMetadata` to attach to the attempt + * result. Worst-case-wins: + * + * hadPotentialSideEffects = priorHadPotentialSideEffects OR timedOut + * OR thisAttemptHadPotentialSideEffects + * (timeout means we cannot prove the prompt was not partially + * committed server-side; treat as side-effecting so the + * orchestrator will not blindly re-issue the same prompt). + * + * replaySafe = NOT ( + * priorReplayInvalid + * OR thisAttemptDowngradedFromResume + * OR thisAttemptResumeFailureRecovered + * OR hadPotentialSideEffects + * ) + * + * Matches the parity rule in + * `src/agents/pi-embedded-runner/replay-state.ts#replayMetadataFromState`. + */ +export function computeReplayMetadata(input: ReplayMetadataComputeInput): ComputedReplayMetadata { + const priorReplayInvalid = input.priorReplayInvalid === true; + const priorHadPotentialSideEffects = input.priorHadPotentialSideEffects === true; + const timedOut = input.thisAttemptTimedOut === true; + const thisAttemptHadPotentialSideEffects = input.thisAttemptHadPotentialSideEffects === true; + const downgraded = input.thisAttemptDowngradedFromResume === true; + const recovered = input.thisAttemptResumeFailureRecovered === true; + const hadPotentialSideEffects = + priorHadPotentialSideEffects || timedOut || thisAttemptHadPotentialSideEffects; + const replaySafe = !(priorReplayInvalid || downgraded || recovered || hadPotentialSideEffects); + return { hadPotentialSideEffects, replaySafe }; +} + +const COPILOT_REPLAY_SAFE_READ_ONLY_TOOL_NAMES = new Set([ + "get", + "file_read", + "glob", + "grep", + "inspect", + "list", + "ls", + "memory_get", + "memory_search", + "probe", + "query", + "read", + "search", + "sessions_history", + "sessions_list", + "status", + "tool_search", + "update_plan", + "view", + "web_fetch", + "web_search", +]); + +export function copilotToolMetasHavePotentialSideEffects( + toolMetas?: readonly { asyncStarted?: boolean; toolName: string }[], +): boolean { + return (toolMetas ?? []).some( + (entry) => entry.asyncStarted === true || !isReplaySafeReadOnlyToolName(entry.toolName), + ); +} + +function isReplaySafeReadOnlyToolName(toolName: string): boolean { + const normalized = toolName.trim().toLowerCase(); + return COPILOT_REPLAY_SAFE_READ_ONLY_TOOL_NAMES.has(normalized); +} diff --git a/extensions/copilot/src/runtime.test.ts b/extensions/copilot/src/runtime.test.ts new file mode 100644 index 000000000000..97f9b9a5302c --- /dev/null +++ b/extensions/copilot/src/runtime.test.ts @@ -0,0 +1,488 @@ +import { normalize, resolve, sep } from "node:path"; +import type { CopilotClient, CopilotClientOptions } from "@github/copilot-sdk"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import type { ClientCreateOptions, PoolKey } from "./runtime.js"; +import { createCopilotClientPool } from "./runtime.js"; + +interface FakeClient { + readonly id: number; + readonly copilotHome: string; + readonly start: ReturnType; + readonly stop: ReturnType; + readonly createSession: ReturnType; + readonly disconnect: ReturnType; +} + +interface FakeFactoryOptions { + readonly create?: ( + opts: CopilotClientOptions, + id: number, + ) => CopilotClient | Promise; + readonly stop?: (client: FakeClient) => Promise | Error[]; +} + +function createDeferred() { + let resolveValue: ((value: T | PromiseLike) => void) | undefined; + let rejectValue: ((reason?: unknown) => void) | undefined; + const promise = new Promise((resolvePromise, rejectPromise) => { + resolveValue = resolvePromise; + rejectValue = rejectPromise; + }); + return { + promise, + resolve(value: T) { + resolveValue?.(value); + }, + reject(reason: unknown) { + rejectValue?.(reason); + }, + }; +} + +function normalizeHomeForTest(copilotHome: string): string { + let normalizedHome = resolve(copilotHome); + normalizedHome = normalize(normalizedHome); + if (normalizedHome.endsWith(sep) && normalizedHome.length > 1) { + normalizedHome = normalizedHome.slice(0, -1); + } + if (process.platform === "win32") { + normalizedHome = normalizedHome.toLowerCase(); + } + return normalizedHome; +} + +function makeKey(overrides: Partial = {}): PoolKey { + return { + agentId: overrides.agentId ?? "agent-1", + copilotHome: overrides.copilotHome ?? "copilot-home", + authMode: overrides.authMode ?? "useLoggedInUser", + authProfileId: overrides.authProfileId, + authProfileVersion: overrides.authProfileVersion, + }; +} + +function makeOptions(overrides: Partial = {}): ClientCreateOptions { + return { + copilotHome: overrides.copilotHome ?? "copilot-home", + useLoggedInUser: overrides.useLoggedInUser ?? true, + gitHubToken: overrides.gitHubToken, + cwd: overrides.cwd, + }; +} + +function makeFake(options: FakeFactoryOptions = {}) { + const stops: number[] = []; + const ctorCalls: CopilotClientOptions[] = []; + const instances: FakeClient[] = []; + let nextId = 0; + + const fake = async (clientOptions: CopilotClientOptions) => { + ctorCalls.push(clientOptions); + const id = ++nextId; + if (options.create) { + return options.create(clientOptions, id); + } + + const client: FakeClient = { + id, + copilotHome: clientOptions.copilotHome ?? "", + start: vi.fn(async () => undefined), + stop: vi.fn(async () => { + stops.push(id); + if (options.stop) { + return options.stop(client); + } + return []; + }), + createSession: vi.fn(async () => ({})), + disconnect: vi.fn(), + }; + instances.push(client); + return client as unknown as CopilotClient; + }; + + return { fake, stops, ctorCalls, instances }; +} + +afterEach(() => { + vi.useRealTimers(); + vi.restoreAllMocks(); +}); + +describe("createCopilotClientPool", () => { + it("same key reuses client", async () => { + const sdk = makeFake(); + const pool = createCopilotClientPool({ sdkFactory: sdk.fake }); + const key = makeKey(); + const options = makeOptions(); + + const first = await pool.acquire(key, options); + const second = await pool.acquire(key, options); + + expect(first.client).toBe(second.client); + expect(first.key).toEqual(second.key); + expect(sdk.ctorCalls.length).toBe(1); + }); + + it("different agentId same copilotHome creates distinct clients", async () => { + const sdk = makeFake(); + const pool = createCopilotClientPool({ sdkFactory: sdk.fake }); + const options = makeOptions(); + + const first = await pool.acquire(makeKey({ agentId: "agent-a" }), options); + const second = await pool.acquire(makeKey({ agentId: "agent-b" }), options); + + expect(first.client).not.toBe(second.client); + expect(sdk.ctorCalls.length).toBe(2); + }); + + it("different authProfileVersion creates distinct clients", async () => { + const sdk = makeFake(); + const pool = createCopilotClientPool({ sdkFactory: sdk.fake }); + const options = makeOptions({ gitHubToken: "token-a", useLoggedInUser: false }); + + const first = await pool.acquire( + makeKey({ authMode: "gitHubToken", authProfileId: "profile", authProfileVersion: "v1" }), + options, + ); + const second = await pool.acquire( + makeKey({ authMode: "gitHubToken", authProfileId: "profile", authProfileVersion: "v2" }), + options, + ); + + expect(first.client).not.toBe(second.client); + expect(sdk.ctorCalls.length).toBe(2); + }); + + it("release decrements; non-zero refcount keeps client alive", async () => { + const sdk = makeFake(); + const pool = createCopilotClientPool({ idleTtlMs: 100, sdkFactory: sdk.fake }); + const key = makeKey(); + const options = makeOptions(); + + const first = await pool.acquire(key, options); + const second = await pool.acquire(key, options); + await pool.release(first); + + expect(first.client).toBe(second.client); + expect(sdk.stops).toEqual([]); + expect(pool.size()).toBe(1); + }); + + it("release to zero schedules idle teardown; teardown fires after idleTtlMs and calls stop() exactly once", async () => { + vi.useFakeTimers(); + const sdk = makeFake(); + const pool = createCopilotClientPool({ idleTtlMs: 50, sdkFactory: sdk.fake }); + const handle = await pool.acquire(makeKey(), makeOptions()); + + await pool.release(handle); + await vi.advanceTimersByTimeAsync(49); + expect(sdk.stops).toEqual([]); + + await vi.advanceTimersByTimeAsync(1); + expect(sdk.stops).toEqual([1]); + expect(pool.size()).toBe(0); + expect(sdk.instances[0]?.start.mock.calls.length).toBe(0); + + await vi.advanceTimersByTimeAsync(50); + expect(sdk.stops).toEqual([1]); + }); + + it("acquire during idle window cancels teardown and reuses", async () => { + vi.useFakeTimers(); + const sdk = makeFake(); + const pool = createCopilotClientPool({ idleTtlMs: 50, sdkFactory: sdk.fake }); + const key = makeKey(); + const options = makeOptions(); + + const first = await pool.acquire(key, options); + await pool.release(first); + await vi.advanceTimersByTimeAsync(25); + + const second = await pool.acquire(key, options); + + expect(second.client).toBe(first.client); + expect(sdk.ctorCalls.length).toBe(1); + expect(sdk.stops).toEqual([]); + + await vi.advanceTimersByTimeAsync(50); + expect(sdk.stops).toEqual([]); + + await pool.release(second); + await vi.advanceTimersByTimeAsync(50); + expect(sdk.stops).toEqual([1]); + }); + + it("acquire during stopping awaits stop(), then creates fresh client", async () => { + vi.useFakeTimers(); + const stopDeferred = createDeferred(); + const sdk = makeFake({ + stop: async () => stopDeferred.promise, + }); + const pool = createCopilotClientPool({ idleTtlMs: 10, sdkFactory: sdk.fake }); + const key = makeKey(); + const options = makeOptions(); + + const first = await pool.acquire(key, options); + await pool.release(first); + await vi.advanceTimersByTimeAsync(10); + + let settled = false; + const secondPromise = pool.acquire(key, options).then((value) => { + settled = true; + return value; + }); + await Promise.resolve(); + + expect(settled).toBe(false); + expect(sdk.stops).toEqual([1]); + + stopDeferred.resolve([]); + const second = await secondPromise; + + expect(settled).toBe(true); + expect(second.client).not.toBe(first.client); + expect(sdk.ctorCalls.length).toBe(2); + }); + + it("concurrent acquire dedupes", async () => { + const clientDeferred = createDeferred(); + const sdkFactory = vi.fn(async () => clientDeferred.promise); + const pool = createCopilotClientPool({ sdkFactory }); + const key = makeKey(); + const options = makeOptions(); + + const firstPromise = pool.acquire(key, options); + const secondPromise = pool.acquire(key, options); + await Promise.resolve(); + + expect(sdkFactory.mock.calls.length).toBe(1); + + const client = { + id: 1, + copilotHome: "copilot-home", + start: vi.fn(async () => undefined), + stop: vi.fn(async () => []), + createSession: vi.fn(async () => ({})), + disconnect: vi.fn(), + } as unknown as CopilotClient; + clientDeferred.resolve(client); + const [first, second] = await Promise.all([firstPromise, secondPromise]); + + expect(first.client).toBe(second.client); + expect(sdkFactory.mock.calls.length).toBe(1); + }); + + it("constructor failure is not cached", async () => { + let attempt = 0; + const sdkFactory = async (clientOptions: CopilotClientOptions) => { + attempt += 1; + if (attempt === 1) { + throw new Error(`constructor failed for ${String(clientOptions.copilotHome)}`); + } + return { + id: attempt, + copilotHome: clientOptions.copilotHome, + start: vi.fn(async () => undefined), + stop: vi.fn(async () => []), + createSession: vi.fn(async () => ({})), + disconnect: vi.fn(), + } as unknown as CopilotClient; + }; + const pool = createCopilotClientPool({ sdkFactory }); + + await expect(pool.acquire(makeKey(), makeOptions())).rejects.toThrow("constructor failed for"); + + const second = await pool.acquire(makeKey(), makeOptions()); + + expect(attempt).toBe(2); + expect(second.key.agentId).toBe("agent-1"); + }); + + it("double release is a no-op", async () => { + vi.useFakeTimers(); + const sdk = makeFake(); + const pool = createCopilotClientPool({ idleTtlMs: 100, sdkFactory: sdk.fake }); + const handle = await pool.acquire(makeKey(), makeOptions()); + + await pool.release(handle); + await pool.release(handle); + await vi.advanceTimersByTimeAsync(99); + + expect(sdk.stops).toEqual([]); + + await vi.advanceTimersByTimeAsync(1); + expect(sdk.stops).toEqual([1]); + }); + + it("dispose stops all clients exactly once, aggregates errors, clears the map", async () => { + const sdk = makeFake({ + stop: (client) => [new Error(`stop-${client.id}-a`), new Error(`stop-${client.id}-b`)], + }); + const pool = createCopilotClientPool({ idleTtlMs: 1000, sdkFactory: sdk.fake }); + + const first = await pool.acquire( + makeKey({ agentId: "agent-a", copilotHome: "home-a" }), + makeOptions({ copilotHome: "home-a" }), + ); + const second = await pool.acquire( + makeKey({ agentId: "agent-b", copilotHome: "home-b" }), + makeOptions({ copilotHome: "home-b" }), + ); + await pool.acquire( + makeKey({ agentId: "agent-c", copilotHome: "home-c" }), + makeOptions({ copilotHome: "home-c" }), + ); + await pool.release(second); + + const errors = await pool.dispose(); + + expect(errors.map((error) => error.message)).toEqual([ + "stop-1-a", + "stop-1-b", + "stop-2-a", + "stop-2-b", + "stop-3-a", + "stop-3-b", + ]); + expect(sdk.stops).toEqual([1, 2, 3]); + expect(pool.size()).toBe(0); + + const secondDispose = await pool.dispose(); + expect(secondDispose).toEqual([]); + expect(sdk.stops).toEqual([1, 2, 3]); + await pool.release(first); + }); + + it("dispose during in-flight acquire", async () => { + const clientDeferred = createDeferred(); + const stopped: number[] = []; + const sdkFactory = async () => { + const client = { + id: 1, + copilotHome: "copilot-home", + start: vi.fn(async () => undefined), + stop: vi.fn(async () => { + stopped.push(1); + return []; + }), + createSession: vi.fn(async () => ({})), + disconnect: vi.fn(), + } as unknown as CopilotClient; + await clientDeferred.promise; + return client; + }; + const pool = createCopilotClientPool({ sdkFactory }); + + const acquirePromise = pool.acquire(makeKey(), makeOptions()); + const disposePromise = pool.dispose(); + const client = { + id: 1, + copilotHome: "copilot-home", + start: vi.fn(async () => undefined), + stop: vi.fn(async () => []), + createSession: vi.fn(async () => ({})), + disconnect: vi.fn(), + } as unknown as CopilotClient; + clientDeferred.resolve(client); + + await expect(acquirePromise).rejects.toThrow("[copilot-pool] pool disposed"); + expect(await disposePromise).toEqual([]); + expect(stopped).toEqual([1]); + await expect(pool.acquire(makeKey(), makeOptions())).rejects.toThrow( + "[copilot-pool] pool disposed", + ); + }); + + it("concurrent dispose waits for the in-flight shutdown and does not duplicate errors", async () => { + const stopDeferred = createDeferred(); + const sdk = makeFake({ + stop: async () => stopDeferred.promise, + }); + const pool = createCopilotClientPool({ sdkFactory: sdk.fake }); + + await pool.acquire(makeKey(), makeOptions()); + + const firstDisposePromise = pool.dispose(); + const secondDisposePromise = pool.dispose(); + await Promise.resolve(); + + expect(sdk.stops).toEqual([1]); + + stopDeferred.resolve([new Error("stop failed")]); + const firstErrors = await firstDisposePromise; + const secondErrors = await secondDisposePromise; + + expect(firstErrors.map((error) => error.message)).toEqual(["stop failed"]); + expect(secondErrors).toEqual([]); + }); + + it("normalizes non-Error stop failures during dispose", async () => { + const sdk = makeFake({ + stop: () => { + throw "stop-string"; + }, + }); + const pool = createCopilotClientPool({ sdkFactory: sdk.fake }); + + await pool.acquire(makeKey(), makeOptions()); + + const errors = await pool.dispose(); + + expect(errors.map((error) => error.message)).toEqual(["stop-string"]); + }); + + it("treats Windows copilotHome paths as case-insensitive when keying the pool", async () => { + const originalPlatform = process.platform; + Object.defineProperty(process, "platform", { configurable: true, value: "win32" }); + + try { + const sdk = makeFake(); + const pool = createCopilotClientPool({ sdkFactory: sdk.fake }); + const firstHome = "C:/Users/Tester/CopilotHome/"; + const secondHome = "c:/users/tester/copilothome"; + + const first = await pool.acquire( + makeKey({ copilotHome: firstHome }), + makeOptions({ copilotHome: firstHome }), + ); + const second = await pool.acquire( + makeKey({ copilotHome: secondHome }), + makeOptions({ copilotHome: secondHome }), + ); + + const normalizedHome = normalizeHomeForTest(firstHome); + expect(first.client).toBe(second.client); + expect(first.key.copilotHome).toBe(normalizedHome); + expect(second.key.copilotHome).toBe(normalizedHome); + expect(String(sdk.ctorCalls[0]?.copilotHome)).toBe(normalizedHome); + } finally { + Object.defineProperty(process, "platform", { configurable: true, value: originalPlatform }); + } + }); + + it("path normalization", async () => { + const sdk = makeFake(); + const pool = createCopilotClientPool({ sdkFactory: sdk.fake }); + const firstHome = + process.platform === "win32" ? "C:\\Users\\Tester\\CopilotHome\\" : "copilot-home/"; + const secondHome = + process.platform === "win32" ? "c:\\users\\tester\\copilothome" : "copilot-home"; + + const first = await pool.acquire( + makeKey({ copilotHome: firstHome }), + makeOptions({ copilotHome: firstHome }), + ); + const second = await pool.acquire( + makeKey({ copilotHome: secondHome }), + makeOptions({ copilotHome: secondHome }), + ); + + const normalizedHome = normalizeHomeForTest(firstHome); + expect(first.client).toBe(second.client); + expect(first.key.copilotHome).toBe(normalizedHome); + expect(second.key.copilotHome).toBe(normalizedHome); + expect(sdk.ctorCalls.length).toBe(1); + expect(String(sdk.ctorCalls[0]?.copilotHome)).toBe(normalizedHome); + }); +}); diff --git a/extensions/copilot/src/runtime.ts b/extensions/copilot/src/runtime.ts new file mode 100644 index 000000000000..13b73b9cdb0f --- /dev/null +++ b/extensions/copilot/src/runtime.ts @@ -0,0 +1,387 @@ +import { normalize, resolve, sep } from "node:path"; +import type { CopilotClient, CopilotClientOptions } from "@github/copilot-sdk"; +import { loadCopilotSdk } from "./sdk-loader.js"; + +// SAFETY: The pool reuses CopilotClient instances per normalized PoolKey and does not +// serialize concurrent client.createSession() calls. attempt-bridge MUST treat shared +// CopilotClients as having safe concurrent multi-session semantics that are NOT YET PROVEN; +// if probe q4 reveals concurrency hazards, attempt-bridge must add per-key serialization. + +const DEFAULT_IDLE_TTL_MS = 5 * 60 * 1000; +const POOL_DISPOSED_MESSAGE = "[copilot-pool] pool disposed"; + +export interface PoolKey { + readonly agentId: string; + readonly copilotHome: string; + readonly authMode: "useLoggedInUser" | "gitHubToken"; + readonly authProfileId?: string; + readonly authProfileVersion?: string; +} + +export interface ClientCreateOptions extends Omit< + CopilotClientOptions, + "copilotHome" | "useLoggedInUser" | "gitHubToken" +> { + readonly copilotHome: string; + readonly useLoggedInUser?: boolean; + readonly gitHubToken?: string; +} + +export interface PooledClient { + readonly key: PoolKey; + readonly client: CopilotClient; +} + +export interface CopilotClientPoolOptions { + readonly sdkFactory?: (opts: CopilotClientOptions) => CopilotClient | Promise; + readonly idleTtlMs?: number; + readonly now?: () => number; +} + +export interface CopilotClientPool { + acquire(key: PoolKey, options: ClientCreateOptions): Promise; + release(handle: PooledClient): Promise; + dispose(): Promise; + size(): number; +} + +type EntryState = + | { kind: "creating"; promise: Promise } + | { kind: "ready"; client: CopilotClient } + | { + kind: "idle"; + client: CopilotClient; + idleTimer: ReturnType; + idleSinceMs: number; + } + | { kind: "stopping"; client: CopilotClient; promise: Promise } + | { kind: "stopped" }; + +interface PoolEntry { + readonly key: PoolKey; + readonly cacheKey: string; + refCount: number; + stopRan: boolean; + state: EntryState; +} + +export function createCopilotClientPool(options: CopilotClientPoolOptions = {}): CopilotClientPool { + const sdkFactory = + options.sdkFactory ?? + (async (clientOptions: CopilotClientOptions) => { + // Lazy-load the SDK so packaged installs without @github/copilot-sdk + // (the default; see sdk-loader.ts for rationale) crash with an + // actionable install message instead of a generic MODULE_NOT_FOUND + // at import time. The loader caches the resolved module after the + // first successful load. + const sdk = await loadCopilotSdk(); + return new sdk.CopilotClient(clientOptions); + }); + const idleTtlMs = options.idleTtlMs ?? DEFAULT_IDLE_TTL_MS; + const now = options.now ?? Date.now; + const entries = new Map(); + const releasedHandles = new WeakSet(); + let disposed = false; + let disposePromise: Promise | undefined; + let disposeCompleted = false; + + const createDisposedError = () => new Error(POOL_DISPOSED_MESSAGE); + + const maybeDeleteEntry = (entry: PoolEntry) => { + if (entries.get(entry.cacheKey) === entry) { + entries.delete(entry.cacheKey); + } + }; + + const stopReadyOrIdleEntry = ( + entry: PoolEntry, + client: CopilotClient, + idleTimer?: ReturnType, + ) => { + if (idleTimer) { + clearTimeout(idleTimer); + } + if (entry.stopRan) { + if (entry.state.kind === "stopping") { + return entry.state.promise; + } + if (entry.state.kind === "stopped") { + return Promise.resolve([]); + } + } + + entry.stopRan = true; + const stopPromise = (async () => { + try { + return await client.stop(); + } catch (error: unknown) { + return [toError(error)]; + } finally { + entry.state = { kind: "stopped" }; + maybeDeleteEntry(entry); + } + })(); + + entry.state = { kind: "stopping", client, promise: stopPromise }; + return stopPromise; + }; + + const stopEntry = async (entry: PoolEntry): Promise => { + switch (entry.state.kind) { + case "creating": { + try { + await entry.state.promise; + } catch (error: unknown) { + maybeDeleteEntry(entry); + return [toError(error)]; + } + return stopEntry(entry); + } + case "ready": + return stopReadyOrIdleEntry(entry, entry.state.client); + case "idle": + return stopReadyOrIdleEntry(entry, entry.state.client, entry.state.idleTimer); + case "stopping": + return entry.state.promise; + case "stopped": + return []; + default: { + const exhaustive: never = entry.state; + return exhaustive; + } + } + }; + + const scheduleIdleStop = (entry: PoolEntry, client: CopilotClient) => { + const idleTimer = setTimeout(() => { + void stopEntry(entry); + }, idleTtlMs); + entry.state = { + kind: "idle", + client, + idleTimer, + idleSinceMs: now(), + }; + }; + + const createEntry = (key: PoolKey, cacheKey: string, clientOptions: CopilotClientOptions) => { + const entry: PoolEntry = { + key, + cacheKey, + refCount: 1, + stopRan: false, + state: { + kind: "creating", + promise: Promise.resolve(undefined as unknown as CopilotClient), + }, + }; + + const createPromise = (async () => { + try { + const client = await sdkFactory(clientOptions); + entry.state = { kind: "ready", client }; + return client; + } catch (error: unknown) { + entry.state = { kind: "stopped" }; + maybeDeleteEntry(entry); + throw toError(error); + } + })(); + + entry.state = { kind: "creating", promise: createPromise }; + entries.set(cacheKey, entry); + return { entry, createPromise }; + }; + + const acquire = async ( + inputKey: PoolKey, + optionsForCreate: ClientCreateOptions, + ): Promise => { + const key = normalizePoolKey(inputKey, optionsForCreate.copilotHome); + const cacheKey = JSON.stringify(key); + const clientOptions = normalizeClientCreateOptions(optionsForCreate, key.copilotHome); + + while (true) { + if (disposed) { + throw createDisposedError(); + } + + const existing = entries.get(cacheKey); + if (!existing) { + const created = createEntry(key, cacheKey, clientOptions); + try { + const client = await created.createPromise; + if (disposed) { + await stopEntry(created.entry); + throw createDisposedError(); + } + return { key: created.entry.key, client }; + } catch (error: unknown) { + throw toError(error); + } + } + + switch (existing.state.kind) { + case "creating": { + existing.refCount += 1; + try { + const client = await existing.state.promise; + if (disposed) { + await stopEntry(existing); + throw createDisposedError(); + } + return { key: existing.key, client }; + } catch (error: unknown) { + throw toError(error); + } + } + case "ready": + existing.refCount += 1; + return { key: existing.key, client: existing.state.client }; + case "idle": { + const client = existing.state.client; + clearTimeout(existing.state.idleTimer); + existing.refCount += 1; + existing.state = { kind: "ready", client }; + return { key: existing.key, client }; + } + case "stopping": + await existing.state.promise; + continue; + case "stopped": + maybeDeleteEntry(existing); + continue; + } + } + }; + + const release = async (handle: PooledClient): Promise => { + if (releasedHandles.has(handle)) { + return; + } + releasedHandles.add(handle); + + const entry = entries.get(JSON.stringify(handle.key)); + if (!entry) { + return; + } + + switch (entry.state.kind) { + case "creating": + case "stopping": + case "stopped": + return; + case "ready": + case "idle": + if (entry.state.client !== handle.client) { + return; + } + break; + } + + if (entry.refCount <= 0) { + return; + } + + entry.refCount -= 1; + if (entry.refCount > 0) { + return; + } + + if (disposed) { + await stopEntry(entry); + return; + } + + if (entry.state.kind === "ready") { + scheduleIdleStop(entry, entry.state.client); + return; + } + + if (entry.state.kind === "idle") { + clearTimeout(entry.state.idleTimer); + scheduleIdleStop(entry, entry.state.client); + } + }; + + const dispose = async (): Promise => { + if (disposeCompleted) { + return []; + } + if (disposePromise) { + await disposePromise; + return []; + } + + disposed = true; + const snapshot = [...entries.values()]; + for (const entry of snapshot) { + if (entry.state.kind === "idle") { + clearTimeout(entry.state.idleTimer); + } + } + + disposePromise = (async () => { + const errors: Error[] = []; + for (const entry of snapshot) { + const stopErrors = await stopEntry(entry); + errors.push(...stopErrors); + } + entries.clear(); + disposeCompleted = true; + return errors; + })(); + + try { + return await disposePromise; + } finally { + disposePromise = undefined; + } + }; + + return { + acquire, + release, + dispose, + size: () => entries.size, + }; +} + +function normalizePoolKey(key: PoolKey, rawCopilotHome: string): PoolKey { + return { + agentId: key.agentId, + copilotHome: normalizeCopilotHome(rawCopilotHome), + authMode: key.authMode, + authProfileId: key.authProfileId, + authProfileVersion: key.authProfileVersion, + }; +} + +function normalizeClientCreateOptions( + options: ClientCreateOptions, + normalizedCopilotHome: string, +): CopilotClientOptions { + return { + ...options, + copilotHome: normalizedCopilotHome, + }; +} + +function normalizeCopilotHome(copilotHome: string): string { + let normalizedHome = resolve(copilotHome); + normalizedHome = normalize(normalizedHome); + if (normalizedHome.endsWith(sep) && normalizedHome.length > 1) { + normalizedHome = normalizedHome.slice(0, -1); + } + if (process.platform === "win32") { + normalizedHome = normalizedHome.toLowerCase(); + } + return normalizedHome; +} + +function toError(error: unknown): Error { + if (error instanceof Error) { + return error; + } + return new Error(String(error)); +} diff --git a/extensions/copilot/src/sdk-loader.test.ts b/extensions/copilot/src/sdk-loader.test.ts new file mode 100755 index 000000000000..bcd5fd884c73 --- /dev/null +++ b/extensions/copilot/src/sdk-loader.test.ts @@ -0,0 +1,232 @@ +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + COPILOT_SDK_FALLBACK_DIR, + COPILOT_SDK_SPEC, + resetCopilotSdkCacheForTests, + loadCopilotSdk, + resolveCopilotSdkFallbackDir, +} from "./sdk-loader.js"; + +const FAKE_SDK = { + CopilotClient: class FakeCopilotClient { + _fake = true; + }, +} as unknown as typeof import("@github/copilot-sdk"); + +describe("sdk-loader", () => { + beforeEach(() => { + resetCopilotSdkCacheForTests(); + }); + + it("returns the primary import when it succeeds", async () => { + const primaryImport = vi.fn(async () => FAKE_SDK); + const fallbackImport = vi.fn(async () => { + throw new Error("should not be called"); + }); + + const sdk = await loadCopilotSdk({ + cache: false, + fallbackDir: "/dev/null/does-not-exist", + primaryImport, + fallbackImport, + }); + + expect(sdk).toBe(FAKE_SDK); + expect(primaryImport).toHaveBeenCalledTimes(1); + expect(fallbackImport).not.toHaveBeenCalled(); + }); + + it("falls back to the on-demand install location when primary import fails", async () => { + const tmp = mkdtempSync(path.join(tmpdir(), "copilot-sdk-loader-")); + try { + // Materialize the fallback path so the existsSync check passes. + const fallbackPath = path.join(tmp, "node_modules", "@github", "copilot-sdk"); + mkdirSync(fallbackPath, { recursive: true }); + writeFileSync(path.join(fallbackPath, "index.js"), "// placeholder"); + + const primaryImport = vi.fn(async () => { + const err = new Error("Cannot find module '@github/copilot-sdk'") as Error & { + code: string; + }; + err.code = "ERR_MODULE_NOT_FOUND"; + throw err; + }); + const fallbackImport = vi.fn(async (abs: string) => { + expect(abs).toBe(fallbackPath); + return FAKE_SDK; + }); + + const sdk = await loadCopilotSdk({ + cache: false, + fallbackDir: tmp, + primaryImport, + fallbackImport, + }); + + expect(sdk).toBe(FAKE_SDK); + expect(primaryImport).toHaveBeenCalledTimes(1); + expect(fallbackImport).toHaveBeenCalledTimes(1); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it("default fallback importer resolves and imports the installed SDK entry", async () => { + // Exercise the real default fallback importer (no fallbackImport injection) + // to prove it imports a concrete entry file rather than the package + // directory, which Node ESM would reject with ERR_UNSUPPORTED_DIR_IMPORT. + const tmp = mkdtempSync(path.join(tmpdir(), "copilot-sdk-loader-default-")); + try { + const pkgDir = path.join(tmp, "node_modules", "@github", "copilot-sdk"); + mkdirSync(pkgDir, { recursive: true }); + writeFileSync( + path.join(pkgDir, "package.json"), + JSON.stringify({ + name: "@github/copilot-sdk", + version: "0.0.0-test", + main: "./index.cjs", + }), + ); + writeFileSync( + path.join(pkgDir, "index.cjs"), + "module.exports = { openclawDefaultImporterSentinel: true };", + ); + + const primaryImport = vi.fn(async () => { + const err = new Error("Cannot find module '@github/copilot-sdk'") as Error & { + code: string; + }; + err.code = "ERR_MODULE_NOT_FOUND"; + throw err; + }); + + const sdk = (await loadCopilotSdk({ + cache: false, + fallbackDir: tmp, + primaryImport, + // Intentionally NOT injecting fallbackImport; exercise the default. + })) as unknown as { openclawDefaultImporterSentinel?: boolean }; + + expect(sdk.openclawDefaultImporterSentinel).toBe(true); + expect(primaryImport).toHaveBeenCalledTimes(1); + } finally { + rmSync(tmp, { recursive: true, force: true }); + } + }); + + it("throws an actionable error with install instructions when both probes fail", async () => { + const primaryImport = vi.fn(async () => { + throw new Error("Cannot find module '@github/copilot-sdk'"); + }); + const fallbackImport = vi.fn(async () => { + throw new Error("should not be called when fallback dir does not exist"); + }); + + await expect( + loadCopilotSdk({ + cache: false, + fallbackDir: path.join(tmpdir(), "copilot-sdk-loader-missing-" + Date.now()), + primaryImport, + fallbackImport, + }), + ).rejects.toMatchObject({ + code: "COPILOT_SDK_MISSING", + message: expect.stringContaining(COPILOT_SDK_SPEC), + }); + + expect(fallbackImport).not.toHaveBeenCalled(); + }); + + it("error message includes the fallback path and underlying primary error", async () => { + const primaryImport = vi.fn(async () => { + throw new Error("primary boom"); + }); + + const fallbackDir = path.join(tmpdir(), "copilot-sdk-loader-missing-" + Date.now()); + let captured: Error | undefined; + try { + await loadCopilotSdk({ + cache: false, + fallbackDir, + primaryImport, + }); + } catch (err) { + captured = err as Error; + } + expect(captured).toBeDefined(); + const message = captured?.message ?? ""; + expect(message).toContain("primary boom"); + expect(message).toContain(path.join(fallbackDir, "node_modules", "@github", "copilot-sdk")); + expect(message).toContain("pnpm add"); + }); + + it("caches successful loads across calls when cache is enabled", async () => { + const primaryImport = vi.fn(async () => FAKE_SDK); + + const a = await loadCopilotSdk({ primaryImport, fallbackDir: "/dev/null/does-not-exist" }); + const b = await loadCopilotSdk({ primaryImport, fallbackDir: "/dev/null/does-not-exist" }); + + expect(a).toBe(FAKE_SDK); + expect(b).toBe(FAKE_SDK); + expect(primaryImport).toHaveBeenCalledTimes(1); + }); + + it("does not poison the cache after a failed load", async () => { + const primaryImport = vi + .fn() + .mockRejectedValueOnce(new Error("first boom")) + .mockResolvedValueOnce(FAKE_SDK); + + await expect( + loadCopilotSdk({ + primaryImport: primaryImport as unknown as () => Promise< + typeof import("@github/copilot-sdk") + >, + fallbackDir: "/dev/null/does-not-exist", + }), + ).rejects.toBeInstanceOf(Error); + + const sdk = await loadCopilotSdk({ + primaryImport: primaryImport as unknown as () => Promise< + typeof import("@github/copilot-sdk") + >, + fallbackDir: "/dev/null/does-not-exist", + }); + expect(sdk).toBe(FAKE_SDK); + expect(primaryImport).toHaveBeenCalledTimes(2); + }); + + it("default fallback dir points at ~/.openclaw/npm-runtime/copilot", () => { + expect(COPILOT_SDK_FALLBACK_DIR).toMatch(/\.openclaw[\\/]+npm-runtime[\\/]+copilot$/); + }); + + it("resolves the fallback dir from OPENCLAW_STATE_DIR for relocated profiles", () => { + expect( + resolveCopilotSdkFallbackDir({ + ...process.env, + OPENCLAW_STATE_DIR: "/tmp/openclaw-state", + }), + ).toBe(path.join("/tmp/openclaw-state", "npm-runtime", "copilot")); + }); + + afterEach(() => { + resetCopilotSdkCacheForTests(); + }); +}); + +describe("contract with core copilot-sdk-install", () => { + // We assert literal values rather than importing core's exports because + // extension test files must stay on public plugin-sdk surfaces. The + // symmetric test in src/commands/copilot-sdk-install.test.ts asserts the + // same literals against core's exports, so any drift on either side fails + // one of the two tests. + it("COPILOT_SDK_FALLBACK_DIR matches the canonical core install fallback path", () => { + expect(COPILOT_SDK_FALLBACK_DIR).toMatch(/\.openclaw[\\/]+npm-runtime[\\/]+copilot$/); + }); + it("COPILOT_SDK_SPEC pins the canonical SDK spec", () => { + expect(COPILOT_SDK_SPEC).toBe("@github/copilot-sdk@1.0.0-beta.4"); + }); +}); diff --git a/extensions/copilot/src/sdk-loader.ts b/extensions/copilot/src/sdk-loader.ts new file mode 100755 index 000000000000..b592ebe48183 --- /dev/null +++ b/extensions/copilot/src/sdk-loader.ts @@ -0,0 +1,123 @@ +import { existsSync } from "node:fs"; +import { createRequire } from "node:module"; +import path from "node:path"; +import { pathToFileURL } from "node:url"; +import type * as Sdk from "@github/copilot-sdk"; +import { resolveStateDir } from "openclaw/plugin-sdk/state-paths"; + +export function resolveCopilotSdkFallbackDir(env: NodeJS.ProcessEnv = process.env): string { + return path.join(resolveStateDir(env), "npm-runtime", "copilot"); +} + +export const COPILOT_SDK_FALLBACK_DIR = resolveCopilotSdkFallbackDir(); + +export const COPILOT_SDK_SPEC = "@github/copilot-sdk@1.0.0-beta.4"; + +let cached: Promise | undefined; + +export interface LoadCopilotSdkOptions { + readonly fallbackDir?: string; + readonly primaryImport?: () => Promise; + readonly fallbackImport?: (absolutePath: string) => Promise; + readonly cache?: boolean; +} + +export async function loadCopilotSdk(options: LoadCopilotSdkOptions = {}): Promise { + const useCache = options.cache !== false; + if (useCache && cached) { + return cached; + } + + const promise = doLoad(options); + if (useCache) { + cached = promise.catch((err) => { + cached = undefined; + throw err; + }); + return cached; + } + return promise; +} + +export function resetCopilotSdkCacheForTests(): void { + cached = undefined; +} + +async function doLoad(options: LoadCopilotSdkOptions): Promise { + const fallbackDir = options.fallbackDir ?? resolveCopilotSdkFallbackDir(); + const primaryImport = options.primaryImport ?? (async () => await import("@github/copilot-sdk")); + + let primaryErr: unknown; + try { + return await primaryImport(); + } catch (err) { + primaryErr = err; + } + + const fallbackPath = path.join(fallbackDir, "node_modules", "@github", "copilot-sdk"); + if (!existsSync(fallbackPath)) { + throw createMissingSdkError(primaryErr, undefined, fallbackPath); + } + + const fallbackImport = + options.fallbackImport ?? + (async () => { + // Node ESM rejects directory imports (ERR_UNSUPPORTED_DIR_IMPORT), so + // resolve the package's real entry through Node's module resolver + // anchored at fallbackDir before importing. + const requireFromFallback = createRequire(path.join(fallbackDir, "package.json")); + const entry = requireFromFallback.resolve("@github/copilot-sdk"); + return (await import(pathToFileURL(entry).href)) as typeof Sdk; + }); + + try { + return await fallbackImport(fallbackPath); + } catch (fallbackErr) { + throw createMissingSdkError(primaryErr, fallbackErr, fallbackPath); + } +} + +function createMissingSdkError( + primaryErr: unknown, + fallbackErr: unknown, + fallbackPath: string, +): Error { + const lines = [ + "[copilot] @github/copilot-sdk is not installed.", + "", + "The Copilot agent runtime requires @github/copilot-sdk (~260 MB", + "after pulling its platform-specific @github/copilot CLI binary).", + "Install it once with:", + "", + ` pnpm add ${COPILOT_SDK_SPEC}`, + ` # or: npm install ${COPILOT_SDK_SPEC}`, + "", + `Alternatively, install into the on-demand fallback location at\n ${fallbackPath}`, + "", + "Primary resolution error:", + ` ${summarizeError(primaryErr)}`, + ]; + if (fallbackErr !== undefined) { + lines.push("", "Fallback resolution error:", ` ${summarizeError(fallbackErr)}`); + } + const err = new Error(lines.join("\n")); + (err as Error & { code?: string }).code = "COPILOT_SDK_MISSING"; + return err; +} + +function summarizeError(value: unknown): string { + if (value === undefined || value === null) { + return "(none)"; + } + if (value instanceof Error) { + return value.message || String(value); + } + if (typeof value === "string") { + return value; + } + try { + return JSON.stringify(value); + } catch { + return Object.prototype.toString.call(value); + } +} diff --git a/extensions/copilot/src/telemetry-bridge.test.ts b/extensions/copilot/src/telemetry-bridge.test.ts new file mode 100755 index 000000000000..9c2af2814a13 --- /dev/null +++ b/extensions/copilot/src/telemetry-bridge.test.ts @@ -0,0 +1,238 @@ +import { describe, expect, it, vi } from "vitest"; +import { + createTelemetryConfig, + createTraceContextProvider, + type CopilotTraceContextErrorInfo, +} from "./telemetry-bridge.js"; + +describe("createTelemetryConfig", () => { + it("returns undefined for undefined input", () => { + expect(createTelemetryConfig()).toBeUndefined(); + }); + + it("returns undefined when every field is undefined", () => { + expect(createTelemetryConfig({})).toBeUndefined(); + expect( + createTelemetryConfig({ + otlpEndpoint: undefined, + filePath: undefined, + }), + ).toBeUndefined(); + }); + + it("includes only the fields that were explicitly set", () => { + expect(createTelemetryConfig({ otlpEndpoint: "https://otel.example/v1/traces" })).toEqual({ + otlpEndpoint: "https://otel.example/v1/traces", + }); + expect(createTelemetryConfig({ sourceName: "openclaw" })).toEqual({ + sourceName: "openclaw", + }); + }); + + it("round-trips a fully populated config", () => { + const result = createTelemetryConfig({ + otlpEndpoint: "https://otel.example/v1/traces", + filePath: "/tmp/openclaw-traces.jsonl", + exporterType: "otlp-http", + sourceName: "openclaw", + captureContent: true, + }); + expect(result).toEqual({ + otlpEndpoint: "https://otel.example/v1/traces", + filePath: "/tmp/openclaw-traces.jsonl", + exporterType: "otlp-http", + sourceName: "openclaw", + captureContent: true, + }); + }); + + it("preserves captureContent: false (explicit disable, not undefined)", () => { + expect(createTelemetryConfig({ captureContent: false })).toEqual({ + captureContent: false, + }); + }); + + it("preserves empty-string values (caller chose to set them)", () => { + expect(createTelemetryConfig({ otlpEndpoint: "" })).toEqual({ otlpEndpoint: "" }); + }); +}); + +describe("createTraceContextProvider", () => { + it("returns an empty context when no sources are configured", async () => { + const provider = createTraceContextProvider(); + await expect(provider()).resolves.toEqual({}); + }); + + it("prefers getTraceContext over the convenience sources", async () => { + const getTraceContext = vi.fn().mockResolvedValue({ + traceparent: "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01", + tracestate: "vendor=value", + }); + const getTraceparent = vi.fn().mockResolvedValue("00-ffff-ffff-01"); + const provider = createTraceContextProvider({ getTraceContext, getTraceparent }); + const ctx = await provider(); + expect(ctx).toEqual({ + traceparent: "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01", + tracestate: "vendor=value", + }); + expect(getTraceparent).not.toHaveBeenCalled(); + }); + + it("falls back to getTraceparent when getTraceContext returns undefined", async () => { + const getTraceContext = vi.fn().mockResolvedValue(undefined); + const getTraceparent = vi + .fn() + .mockResolvedValue("00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01"); + const provider = createTraceContextProvider({ getTraceContext, getTraceparent }); + await expect(provider()).resolves.toEqual({ + traceparent: "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01", + }); + expect(getTraceContext).toHaveBeenCalledTimes(1); + expect(getTraceparent).toHaveBeenCalledTimes(1); + }); + + it("includes tracestate when both convenience sources return non-empty values", async () => { + const provider = createTraceContextProvider({ + getTraceparent: () => "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01", + getTracestate: () => "vendor=value", + }); + await expect(provider()).resolves.toEqual({ + traceparent: "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01", + tracestate: "vendor=value", + }); + }); + + it("omits empty/undefined tracestate even when traceparent is present", async () => { + const providerUndef = createTraceContextProvider({ + getTraceparent: () => "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01", + getTracestate: () => undefined, + }); + await expect(providerUndef()).resolves.toEqual({ + traceparent: "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01", + }); + const providerEmpty = createTraceContextProvider({ + getTraceparent: () => "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01", + getTracestate: () => "", + }); + await expect(providerEmpty()).resolves.toEqual({ + traceparent: "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01", + }); + }); + + it("does not propagate tracestate without traceparent (W3C requirement)", async () => { + const getTracestate = vi.fn().mockResolvedValue("vendor=value"); + const provider = createTraceContextProvider({ + getTraceparent: () => undefined, + getTracestate, + }); + await expect(provider()).resolves.toEqual({}); + expect(getTracestate).not.toHaveBeenCalled(); + }); + + it("re-reads sources on every invocation (so caching the provider is safe)", async () => { + let parent = "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01"; + const provider = createTraceContextProvider({ getTraceparent: () => parent }); + await expect(provider()).resolves.toEqual({ traceparent: parent }); + parent = "00-cccccccccccccccccccccccccccccccc-dddddddddddddddd-01"; + await expect(provider()).resolves.toEqual({ traceparent: parent }); + }); + + it("getTraceContext failure → empty context + notifier called with the original error", async () => { + const onError = vi.fn(); + const provider = createTraceContextProvider({ + getTraceContext: () => { + throw new Error("ctx-boom"); + }, + getTraceparent: () => "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01", + onError, + }); + await expect(provider()).resolves.toEqual({}); + expect(onError).toHaveBeenCalledTimes(1); + const info = onError.mock.calls[0]?.[0] as CopilotTraceContextErrorInfo; + expect(info.part).toBe("traceContext"); + expect(info.error.message).toBe("ctx-boom"); + }); + + it("getTraceparent failure → empty context + notifier called", async () => { + const onError = vi.fn(); + const provider = createTraceContextProvider({ + getTraceparent: async () => { + throw new Error("parent-boom"); + }, + getTracestate: () => "vendor=value", + onError, + }); + await expect(provider()).resolves.toEqual({}); + expect(onError).toHaveBeenCalledTimes(1); + expect((onError.mock.calls[0]?.[0] as CopilotTraceContextErrorInfo).part).toBe("traceparent"); + }); + + it("getTracestate failure → partial success (traceparent kept) + notifier called", async () => { + const onError = vi.fn(); + const provider = createTraceContextProvider({ + getTraceparent: () => "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01", + getTracestate: () => { + throw new Error("state-boom"); + }, + onError, + }); + await expect(provider()).resolves.toEqual({ + traceparent: "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01", + }); + expect(onError).toHaveBeenCalledTimes(1); + expect((onError.mock.calls[0]?.[0] as CopilotTraceContextErrorInfo).part).toBe("tracestate"); + }); + + it("default notifier uses console.warn", async () => { + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => undefined); + try { + const provider = createTraceContextProvider({ + getTraceparent: () => { + throw new Error("default-warn-path"); + }, + }); + await expect(provider()).resolves.toEqual({}); + expect(warnSpy).toHaveBeenCalledTimes(1); + expect(String(warnSpy.mock.calls[0]?.[0])).toContain("traceparent"); + expect(String(warnSpy.mock.calls[0]?.[0])).toContain("default-warn-path"); + } finally { + warnSpy.mockRestore(); + } + }); + + it("normalizes non-Error throws into Error before notifying", async () => { + const onError = vi.fn(); + const provider = createTraceContextProvider({ + getTraceparent: () => { + throw "string-boom"; + }, + onError, + }); + await expect(provider()).resolves.toEqual({}); + const info = onError.mock.calls[0]?.[0] as CopilotTraceContextErrorInfo; + expect(info.error).toBeInstanceOf(Error); + expect(info.error.message).toBe("string-boom"); + }); + + it("notifier throws are swallowed (provider always resolves)", async () => { + const provider = createTraceContextProvider({ + getTraceparent: () => { + throw new Error("boom"); + }, + onError: () => { + throw new Error("notifier-boom"); + }, + }); + await expect(provider()).resolves.toEqual({}); + }); + + it("treats only-traceContext source returning empty object as a valid context (no fallback)", async () => { + const getTraceparent = vi.fn(); + const provider = createTraceContextProvider({ + getTraceContext: () => ({}), + getTraceparent, + }); + await expect(provider()).resolves.toEqual({}); + expect(getTraceparent).not.toHaveBeenCalled(); + }); +}); diff --git a/extensions/copilot/src/telemetry-bridge.ts b/extensions/copilot/src/telemetry-bridge.ts new file mode 100755 index 000000000000..fb1345d7d9c2 --- /dev/null +++ b/extensions/copilot/src/telemetry-bridge.ts @@ -0,0 +1,218 @@ +import type { CopilotClientOptions } from "@github/copilot-sdk"; + +// Telemetry bridge for the GitHub Copilot agent runtime. +// +// SDK surface: +// - `CopilotClientOptions.telemetry?: TelemetryConfig` — OpenTelemetry +// configuration applied to the spawned CLI process via env vars. +// - `CopilotClientOptions.onGetTraceContext?: TraceContextProvider` — +// async callback returning a W3C `{traceparent?, tracestate?}` that the +// SDK injects into `session.create`, `session.resume`, and +// `session.send` RPCs for distributed trace propagation. +// +// Host-side back-pointers (NOT imported here to keep the package boundary +// clean — the wiring layer injects these via callbacks): +// - `src/infra/diagnostic-trace-context.ts` — `getActiveDiagnosticTraceContext`, +// `formatDiagnosticTraceparent`, `DiagnosticTraceContext`. +// - `src/infra/diagnostic-events.ts` — `formatDiagnosticTraceparentForPropagation` +// for trusted-only propagation. +// +// IMPORTANT — pool reuse caveat: +// `CopilotClientPool` keys on `{agentId, copilotHome, authMode, +// authProfileId, authProfileVersion}`. Client-level telemetry and +// `onGetTraceContext` are NOT part of the pool key. Two callers that +// share a pool key but supply different telemetry options will get the +// first-acquire's options ("first wins"). Mitigation: +// - The trace-context provider returned by `createTraceContextProvider` +// reads the active context **on every invocation**, so even when the +// provider function is cached the propagated `traceparent` reflects +// the current scope at RPC time. Per-call accuracy is preserved. +// - `TelemetryConfig` (OTel env vars) is genuinely first-wins because +// the CLI subprocess is spawned once per pool entry. Wire telemetry +// as a process-wide / per-agent setting, not per-attempt. + +type SdkTraceContext = NonNullable< + Awaited>> +>; +type SdkTraceContextProvider = NonNullable; +type SdkTelemetryConfig = NonNullable; + +export type { SdkTraceContext as CopilotTraceContext }; +export type { SdkTelemetryConfig as CopilotTelemetryConfig }; + +export type CopilotTraceContextSource = () => + | SdkTraceContext + | undefined + | Promise; +export type CopilotTraceparentSource = () => string | undefined | Promise; +export type CopilotTracestateSource = () => string | undefined | Promise; + +export interface CopilotTraceContextErrorInfo { + readonly part: "traceContext" | "traceparent" | "tracestate"; + readonly error: Error; +} + +export interface CopilotTraceContextOptions { + /** + * Primary source: a single callback returning the full SDK trace context + * (`{traceparent?, tracestate?}`). Use this when the host has one + * authoritative source of trace context so that traceparent and tracestate + * always reflect the same logical scope. + */ + getTraceContext?: CopilotTraceContextSource; + /** + * Convenience source: returns just the W3C `traceparent` header. Used + * when {@link getTraceContext} is not supplied OR returns undefined. + */ + getTraceparent?: CopilotTraceparentSource; + /** + * Convenience source: returns the W3C `tracestate` header. Only used + * when {@link getTraceContext} is not supplied AND a non-empty + * `traceparent` was obtained via {@link getTraceparent}. (Per W3C, + * `tracestate` is meaningless without an accompanying `traceparent`.) + */ + getTracestate?: CopilotTracestateSource; + /** + * Notifier for errors thrown by any source. Defaults to `console.warn`. + * Notifier failures are themselves swallowed. + */ + onError?: (info: CopilotTraceContextErrorInfo) => void; +} + +const EMPTY_TRACE_CONTEXT: SdkTraceContext = Object.freeze({}) as SdkTraceContext; + +function toError(error: unknown): Error { + if (error instanceof Error) { + return error; + } + return new Error(String(error)); +} + +function defaultOnTraceContextError(info: CopilotTraceContextErrorInfo): void { + console.warn(`[copilot:telemetry-bridge] ${info.part} source failed: ${info.error.message}`); +} + +function safeNotify( + notifier: (info: CopilotTraceContextErrorInfo) => void, + info: CopilotTraceContextErrorInfo, +): void { + try { + notifier(info); + } catch { + // Notifier failures are swallowed: telemetry is best-effort. + } +} + +function isNonEmptyString(value: unknown): value is string { + return typeof value === "string" && value.length > 0; +} + +/** + * Build a TraceContextProvider suitable for `CopilotClientOptions.onGetTraceContext`. + * + * Resolution order on each invocation: + * 1. If `getTraceContext` is supplied and returns a non-undefined value, + * return it as-is. Errors from this source → return `{}` and notify. + * 2. Otherwise call `getTraceparent` (if supplied). On error → return + * `{}` and notify (no traceparent = no propagation). + * 3. If traceparent is non-empty, call `getTracestate` (if supplied) + * and attach the result. Errors on tracestate are partial-success: + * notify and return `{traceparent}` (do not lose the parent). + * 4. If no source provided OR all return undefined, return `{}` so the + * SDK behaves as if no provider were configured. + */ +export function createTraceContextProvider( + options?: CopilotTraceContextOptions, +): SdkTraceContextProvider { + const onError = options?.onError ?? defaultOnTraceContextError; + const getTraceContext = options?.getTraceContext; + const getTraceparent = options?.getTraceparent; + const getTracestate = options?.getTracestate; + + return async () => { + if (getTraceContext) { + try { + const ctx = await getTraceContext(); + if (ctx !== undefined) { + return ctx; + } + } catch (error) { + safeNotify(onError, { part: "traceContext", error: toError(error) }); + return EMPTY_TRACE_CONTEXT; + } + } + + if (!getTraceparent) { + return EMPTY_TRACE_CONTEXT; + } + + let traceparent: string | undefined; + try { + traceparent = await getTraceparent(); + } catch (error) { + safeNotify(onError, { part: "traceparent", error: toError(error) }); + return EMPTY_TRACE_CONTEXT; + } + if (!isNonEmptyString(traceparent)) { + return EMPTY_TRACE_CONTEXT; + } + + if (!getTracestate) { + return { traceparent } as SdkTraceContext; + } + + let tracestate: string | undefined; + try { + tracestate = await getTracestate(); + } catch (error) { + safeNotify(onError, { part: "tracestate", error: toError(error) }); + return { traceparent } as SdkTraceContext; + } + + return isNonEmptyString(tracestate) + ? ({ traceparent, tracestate } as SdkTraceContext) + : ({ traceparent } as SdkTraceContext); + }; +} + +export interface CopilotTelemetryOptions { + otlpEndpoint?: string; + filePath?: string; + exporterType?: string; + sourceName?: string; + captureContent?: boolean; +} + +/** + * Shape a `TelemetryConfig` for `CopilotClientOptions.telemetry`. Returns + * `undefined` when no fields are supplied so callers can spread + * conditionally without producing an empty telemetry object that would + * still partially configure the CLI's OTel env layout. + * + * Any explicitly-set value (including `false` for `captureContent`) is + * preserved — only `undefined` is treated as "no opinion". + */ +export function createTelemetryConfig( + options?: CopilotTelemetryOptions, +): SdkTelemetryConfig | undefined { + if (!options) { + return undefined; + } + const result: SdkTelemetryConfig = {}; + if (options.otlpEndpoint !== undefined) { + result.otlpEndpoint = options.otlpEndpoint; + } + if (options.filePath !== undefined) { + result.filePath = options.filePath; + } + if (options.exporterType !== undefined) { + result.exporterType = options.exporterType; + } + if (options.sourceName !== undefined) { + result.sourceName = options.sourceName; + } + if (options.captureContent !== undefined) { + result.captureContent = options.captureContent; + } + return Object.keys(result).length > 0 ? result : undefined; +} diff --git a/extensions/copilot/src/tool-bridge.test.ts b/extensions/copilot/src/tool-bridge.test.ts new file mode 100644 index 000000000000..b3f5fc100862 --- /dev/null +++ b/extensions/copilot/src/tool-bridge.test.ts @@ -0,0 +1,1404 @@ +import type { Tool as SdkTool, ToolInvocation, ToolResultObject } from "@github/copilot-sdk"; +import type { AnyAgentTool, SandboxContext } from "openclaw/plugin-sdk/agent-harness-runtime"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import { + createCopilotToolBridge, + convertOpenClawToolToSdkTool, + supportsModelTools, +} from "./tool-bridge.js"; + +type FakeTool = AnyAgentTool & { + execute: ReturnType; + prepareArguments?: ReturnType; +}; + +function createDeferred() { + let rejectPromise: ((reason?: unknown) => void) | undefined; + let resolvePromise: ((value: T | PromiseLike) => void) | undefined; + const promise = new Promise((resolve, reject) => { + resolvePromise = resolve; + rejectPromise = reject; + }); + return { + promise, + reject(reason?: unknown) { + rejectPromise?.(reason); + }, + resolve(value: T) { + resolvePromise?.(value); + }, + }; +} + +function flushAsync() { + return Promise.resolve().then(() => {}); +} + +function makeInvocation(overrides: Partial = {}): ToolInvocation { + return { + arguments: { value: "input" }, + sessionId: "session-1", + toolCallId: "call-1", + toolName: "tool-a", + ...overrides, + }; +} + +function makeTool( + overrides: Partial = {}, + result: { content?: unknown; details: unknown } = { + content: [{ text: "done", type: "text" }], + details: null, + }, +): FakeTool { + return { + description: "A fake tool", + execute: vi.fn(async () => result), + label: "Fake Tool", + name: "tool-a", + parameters: { + properties: { value: { type: "string" } }, + type: "object", + } as never, + ...overrides, + } as unknown as FakeTool; +} + +function getError(result: ToolResultObject): string | undefined { + return result.error; +} + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe("supportsModelTools", () => { + it("returns true for github-copilot and false otherwise", () => { + expect(supportsModelTools("github-copilot")).toBe(true); + expect(supportsModelTools("openai")).toBe(false); + expect(supportsModelTools("github")).toBe(false); + expect(supportsModelTools("openclaw")).toBe(false); + expect(supportsModelTools("copilot")).toBe(false); + expect(supportsModelTools("")).toBe(false); + }); +}); + +describe("createCopilotToolBridge", () => { + it("returns empty arrays for unsupported providers without calling the seam", async () => { + const createOpenClawCodingTools = vi.fn(async () => [makeTool()]); + + const result = await createCopilotToolBridge({ + agentId: "agent-1", + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "openai", + sessionId: "session-1", + }); + + expect(result).toEqual({ sdkTools: [], sourceTools: [] }); + expect(createOpenClawCodingTools).toHaveBeenCalledTimes(0); + }); + + it("forwards supported fields to injected createOpenClawCodingTools", async () => { + const controller = new AbortController(); + const createOpenClawCodingTools = vi.fn(async () => [makeTool()]); + + await createCopilotToolBridge({ + abortSignal: controller.signal, + agentDir: "/agent", + agentId: "agent-1", + createOpenClawCodingTools, + cwd: "/workspace/task", + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + sessionKey: "session-key", + workspaceDir: "/workspace", + }); + + expect(createOpenClawCodingTools).toHaveBeenCalledTimes(1); + // F6: the bridge now forwards PI-parity context fields too. This + // test continues to assert the core flat fields plumb through; full + // PI-parity is asserted in dedicated tests below. + expect(createOpenClawCodingTools).toHaveBeenCalledWith( + expect.objectContaining({ + abortSignal: controller.signal, + agentDir: "/agent", + agentId: "agent-1", + cwd: "/workspace/task", + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + // sessionKey is the sandboxSessionKey derivation; with no + // attemptParams the bridge falls back to input.sessionKey. + sessionKey: "session-key", + workspaceDir: "/workspace", + }), + ); + }); + + it("returns sdkTools and sourceTools with matching lengths", async () => { + const sourceTools = [makeTool(), makeTool({ name: "tool-b" })]; + + const result = await createCopilotToolBridge({ + agentId: "agent-1", + createOpenClawCodingTools: async () => sourceTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + + expect(result.sourceTools).toBe(sourceTools); + expect(result.sdkTools).toHaveLength(2); + expect(result.sdkTools.map((tool) => tool.name)).toEqual(["tool-a", "tool-b"]); + }); + + it("throws when createOpenClawCodingTools returns a non-array", async () => { + await expect( + createCopilotToolBridge({ + agentId: "agent-1", + createOpenClawCodingTools: async () => ({ tools: [] }) as never, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }), + ).rejects.toThrow("createOpenClawCodingTools must return an array"); + }); + + it("throws when createOpenClawCodingTools rejects and includes the cause", async () => { + await expect( + createCopilotToolBridge({ + agentId: "agent-1", + createOpenClawCodingTools: async () => { + throw new Error("factory failed"); + }, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }), + ).rejects.toThrow("factory failed"); + }); + + it("throws on duplicate tool names and lists all duplicates", async () => { + await expect( + createCopilotToolBridge({ + agentId: "agent-1", + createOpenClawCodingTools: async () => [ + makeTool({ name: "alpha" }), + makeTool({ name: "beta" }), + makeTool({ name: "alpha" }), + makeTool({ name: "beta" }), + ], + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }), + ).rejects.toThrow("duplicate tool names: alpha, beta"); + }); + + // F6: PI-parity tool context. The bridged OpenClaw tools register + // with the SDK as `overridesBuiltInTool: true, skipPermission: true`, + // so the wrapped-tool enforcement layer + // (src/agents/pi-tools.before-tool-call.ts) is the single gate for + // permission, owner-only allowlists, loop detection, trusted-plugin + // policies, and two-phase plugin approvals. Missing context fields + // silently degrade those policy decisions. See round-3 maintainer + // finding F6 and docs/plugins/copilot.md. + describe("PI-parity attempt context (F6)", () => { + function captureCall() { + const createOpenClawCodingTools = vi.fn(async () => [makeTool()]); + return { + createOpenClawCodingTools, + getOpts: () => + (createOpenClawCodingTools.mock.calls[0] as unknown[] | undefined)?.[0] as Record< + string, + unknown + >, + }; + } + + it("forwards identity, owner/policy, and channel/routing fields from attemptParams", async () => { + const { createOpenClawCodingTools, getOpts } = captureCall(); + + await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { + agentAccountId: "acct-1", + senderId: "sender-1", + senderName: "Ada", + senderUsername: "ada", + senderE164: "+15551234567", + senderIsOwner: true, + memberRoleIds: ["role-admin"], + allowGatewaySubagentBinding: true, + spawnedBy: "parent:agent", + groupId: "g-1", + groupChannel: "#general", + groupSpace: "team-1", + currentChannelId: "C123", + currentThreadTs: "1700000000.000100", + currentMessageId: "M-1", + messageProvider: "slack", + messageTo: "U-1", + messageThreadId: "1700000000.000100", + replyToMode: "first", + requireExplicitMessageTarget: true, + disableMessageTool: false, + forceMessageTool: true, + enableHeartbeatTool: true, + forceHeartbeatTool: false, + } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + + const opts = getOpts(); + expect(opts).toMatchObject({ + agentAccountId: "acct-1", + senderId: "sender-1", + senderName: "Ada", + senderUsername: "ada", + senderE164: "+15551234567", + senderIsOwner: true, + memberRoleIds: ["role-admin"], + allowGatewaySubagentBinding: true, + spawnedBy: "parent:agent", + groupId: "g-1", + groupChannel: "#general", + groupSpace: "team-1", + currentChannelId: "C123", + currentThreadTs: "1700000000.000100", + currentMessageId: "M-1", + messageProvider: "slack", + messageTo: "U-1", + messageThreadId: "1700000000.000100", + replyToMode: "first", + requireExplicitMessageTarget: true, + forceMessageTool: true, + enableHeartbeatTool: true, + }); + }); + + it("falls back messageProvider to attemptParams.messageChannel when messageProvider is absent (codex parity)", async () => { + const { createOpenClawCodingTools, getOpts } = captureCall(); + + await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { messageChannel: "telegram" } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + + expect(getOpts().messageProvider).toBe("telegram"); + }); + + it("forwards authProfileStore, runId, config, and run hooks (onToolOutcome) from attemptParams", async () => { + const { createOpenClawCodingTools, getOpts } = captureCall(); + const authProfileStore = { kind: "fake-store" } as never; + const config = { agents: {} } as never; + const onToolOutcome = vi.fn(); + + await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { + authProfileStore, + runId: "run-1", + config, + onToolOutcome, + } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + + const opts = getOpts(); + expect(opts.authProfileStore).toBe(authProfileStore); + expect(opts.runId).toBe("run-1"); + expect(opts.config).toBe(config); + expect(opts.onToolOutcome).toBe(onToolOutcome); + }); + + it("prefers the unscoped toolAuthProfileStore when building OpenClaw tools", async () => { + const { createOpenClawCodingTools, getOpts } = captureCall(); + const authProfileStore = { kind: "transport-scoped-store" } as never; + const toolAuthProfileStore = { kind: "tool-store" } as never; + + await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { + authProfileStore, + toolAuthProfileStore, + } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + + expect(getOpts().authProfileStore).toBe(toolAuthProfileStore); + }); + + it("derives sandboxSessionKey and runSessionKey from attemptParams (PI parity)", async () => { + const { createOpenClawCodingTools, getOpts } = captureCall(); + + await createCopilotToolBridge({ + agentId: "agent-1", + // Mirrors PI attempt.ts:1053-1060: when sandboxSessionKey + // differs from sessionKey, sessionKey is published as the + // sandbox key and the real run key is exposed as runSessionKey + // so `session_status: "current"` resolves to the live session. + attemptParams: { + sandboxSessionKey: "sandbox:agent:main", + sessionKey: "agent:main:main", + } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + + const opts = getOpts(); + expect(opts.sessionKey).toBe("sandbox:agent:main"); + expect(opts.runSessionKey).toBe("agent:main:main"); + }); + + it("derives runSessionKey as undefined when sandboxSessionKey equals sessionKey", async () => { + const { createOpenClawCodingTools, getOpts } = captureCall(); + + await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { sessionKey: "agent:main:main" } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + + const opts = getOpts(); + expect(opts.sessionKey).toBe("agent:main:main"); + expect(opts.runSessionKey).toBeUndefined(); + }); + + it("falls back sessionKey to input.sessionKey when attemptParams omits it (legacy callers)", async () => { + const { createOpenClawCodingTools, getOpts } = captureCall(); + + await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: {}, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + sessionKey: "fallback-key", + }); + + expect(getOpts().sessionKey).toBe("fallback-key"); + }); + + it("computes modelApi, modelContextWindowTokens, modelCompat, and modelHasVision from attemptParams.model", async () => { + const { createOpenClawCodingTools, getOpts } = captureCall(); + + await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { + model: { + api: "openai-responses", + contextWindow: 200_000, + input: ["text", "image"], + compat: { some: "shape" }, + }, + } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + + const opts = getOpts(); + expect(opts.modelApi).toBe("openai-responses"); + expect(opts.modelContextWindowTokens).toBe(200_000); + expect(opts.modelHasVision).toBe(true); + expect(opts.modelCompat).toEqual({ some: "shape" }); + }); + + it("modelHasVision is false when model.input does not include 'image'", async () => { + const { createOpenClawCodingTools, getOpts } = captureCall(); + + await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { model: { input: ["text"] } } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + + expect(getOpts().modelHasVision).toBe(false); + }); + + it("spreads execOverrides and bashElevated into the exec field (PI parity)", async () => { + const { createOpenClawCodingTools, getOpts } = captureCall(); + const execOverrides = { security: "fast" } as never; + const bashElevated = { allowed: true } as never; + + await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { execOverrides, bashElevated } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + + const exec = getOpts().exec as Record; + expect(exec).toMatchObject({ security: "fast", elevated: { allowed: true } }); + }); + + it("forwards run-trace context (trigger, jobId, memoryFlushWritePath, toolsAllow) via buildEmbeddedAttemptToolRunContext", async () => { + const { createOpenClawCodingTools, getOpts } = captureCall(); + + await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { + trigger: "cron", + jobId: "job-1", + memoryFlushWritePath: ".memory/append.md", + toolsAllow: ["read", "edit"], + } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + + const opts = getOpts(); + expect(opts.trigger).toBe("cron"); + expect(opts.jobId).toBe("job-1"); + expect(opts.memoryFlushWritePath).toBe(".memory/append.md"); + // buildEmbeddedAttemptToolRunContext renames toolsAllow -> + // runtimeToolAllowlist; consumers (PI plugin tools) read the + // renamed key, so the bridge must surface the renamed shape too. + expect(opts.runtimeToolAllowlist).toEqual(["read", "edit"]); + }); + + it("onYield routes to sessionRef.current.abort() and invokes onYieldDetected when the live session is bound", async () => { + const { createOpenClawCodingTools, getOpts } = captureCall(); + const abort = vi.fn(); + const sessionRef: { current: { abort?: () => unknown } | undefined } = { + current: undefined, + }; + const onYieldDetected = vi.fn(); + + await createCopilotToolBridge({ + agentId: "agent-1", + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + onYieldDetected, + sessionId: "session-1", + sessionRef, + }); + + const onYield = getOpts().onYield as (msg?: string) => void; + // No session bound yet: onYield must no-op the abort path + // without throwing, but the onYieldDetected notification fires + // regardless so a yield before session-bind is still surfaced + // to the final attempt result. + expect(() => onYield("early yield")).not.toThrow(); + expect(abort).toHaveBeenCalledTimes(0); + expect(onYieldDetected).toHaveBeenCalledTimes(1); + expect(onYieldDetected).toHaveBeenCalledWith("early yield"); + + // Bind the session after the fact (attempt.ts does this after + // createSession/resumeSession resolves) and verify subsequent + // yields abort it and continue to notify. + sessionRef.current = { abort }; + onYield("now yield"); + expect(abort).toHaveBeenCalledTimes(1); + expect(onYieldDetected).toHaveBeenCalledTimes(2); + expect(onYieldDetected).toHaveBeenLastCalledWith("now yield"); + }); + + it("onYield still aborts the live session when onYieldDetected throws (defense in depth)", async () => { + const { createOpenClawCodingTools, getOpts } = captureCall(); + const abort = vi.fn(); + const sessionRef: { current: { abort?: () => unknown } | undefined } = { + current: { abort }, + }; + const warn = vi.spyOn(console, "warn").mockImplementation(() => undefined); + + await createCopilotToolBridge({ + agentId: "agent-1", + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + onYieldDetected: () => { + throw new Error("handler boom"); + }, + sessionId: "session-1", + sessionRef, + }); + + const onYield = getOpts().onYield as (msg?: string) => void; + expect(() => onYield("handler-fails-but-abort-must-fire")).not.toThrow(); + expect(abort).toHaveBeenCalledTimes(1); + warn.mockRestore(); + }); + + it("requireExplicitMessageTarget defaults to isSubagentSessionKey(sessionKey) when undefined", async () => { + const { createOpenClawCodingTools, getOpts } = captureCall(); + + await createCopilotToolBridge({ + agentId: "agent-1", + // No requireExplicitMessageTarget; sessionKey looks like a + // subagent key so the default must be true. Mirrors PI + // attempt.ts:1097-1098. + attemptParams: { sessionKey: "subagent:envelope:abc" } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + + const opts = getOpts(); + // We don't assert the exact boolean (subagent detection is owned + // by isSubagentSessionKey) — only that the bridge consulted the + // helper rather than emitting `undefined`. + expect(typeof opts.requireExplicitMessageTarget).toBe("boolean"); + }); + }); + + describe("sandbox forwarding (PR #86155 [P1])", () => { + function makeSandboxStub(overrides: Partial = {}): SandboxContext { + return { + enabled: true, + workspaceAccess: "ro", + workspaceDir: "/sandbox/copy", + agentWorkspaceDir: "/sandbox/agent", + scopeKey: "agent-1:session-1", + sessionKey: "session-1", + backend: { kind: "local" } as never, + cfg: {} as never, + ...overrides, + } as unknown as SandboxContext; + } + + it("defaults sandbox to undefined and derives spawnWorkspaceDir from workspaceDir when no sandbox is passed (back-compat)", async () => { + const createOpenClawCodingTools = vi.fn(async () => [makeTool()]); + await createCopilotToolBridge({ + agentId: "agent-1", + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + sessionKey: "session-1", + workspaceDir: "/workspace", + }); + const opts = (createOpenClawCodingTools.mock.calls[0] as unknown[] | undefined)?.[0] as { + sandbox?: unknown; + spawnWorkspaceDir?: unknown; + workspaceDir?: unknown; + }; + expect(opts.sandbox).toBeUndefined(); + expect(opts.workspaceDir).toBe("/workspace"); + // resolveAttemptSpawnWorkspaceDir returns undefined for the + // no-sandbox path; the back-compat fallback emits that. + expect(opts.spawnWorkspaceDir).toBeUndefined(); + }); + + it("forwards an explicit sandbox and spawnWorkspaceDir verbatim to createOpenClawCodingTools", async () => { + const sandbox = makeSandboxStub(); + const createOpenClawCodingTools = vi.fn(async () => [makeTool()]); + await createCopilotToolBridge({ + agentId: "agent-1", + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sandbox, + sessionId: "session-1", + sessionKey: "session-1", + spawnWorkspaceDir: "/original-workspace", + workspaceDir: "/sandbox/copy", + }); + const opts = (createOpenClawCodingTools.mock.calls[0] as unknown[] | undefined)?.[0] as { + sandbox?: unknown; + spawnWorkspaceDir?: unknown; + workspaceDir?: unknown; + }; + expect(opts.sandbox).toBe(sandbox); + expect(opts.workspaceDir).toBe("/sandbox/copy"); + expect(opts.spawnWorkspaceDir).toBe("/original-workspace"); + }); + + it("derives spawnWorkspaceDir from sandbox when caller omits it (fallback path)", async () => { + const sandbox = makeSandboxStub({ workspaceAccess: "ro" }); + const createOpenClawCodingTools = vi.fn(async () => [makeTool()]); + await createCopilotToolBridge({ + agentId: "agent-1", + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sandbox, + sessionId: "session-1", + sessionKey: "session-1", + workspaceDir: "/sandbox/copy", + }); + const opts = (createOpenClawCodingTools.mock.calls[0] as unknown[] | undefined)?.[0] as { + spawnWorkspaceDir?: unknown; + }; + // Fallback derives spawnWorkspaceDir from (effective) workspaceDir + // since the caller didn't pre-compute one. For a ro/none sandbox + // this yields the effective dir (= sandbox copy). Production + // callers (attempt.ts) always pre-compute spawnWorkspaceDir from + // the original workspace; the fallback is for test fixtures. + expect(opts.spawnWorkspaceDir).toBe("/sandbox/copy"); + }); + }); + + // The Copilot bridge mirrors the PI runner's disable/raw/allowlist + // gates locally (codex-precedent at + // extensions/codex/src/app-server/run-attempt.ts:3813,3906-3939,4220-4234) + // so a Copilot run cannot expose the SDK any tool that the same + // OpenClaw attempt would suppress. These tests pin the contract. + describe("tool-surface gating (PR #86155 [P1] round-6)", () => { + it("short-circuits when attemptParams.disableTools is true and never calls createOpenClawCodingTools", async () => { + const createOpenClawCodingTools = vi.fn(async () => [makeTool()]); + const result = await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { disableTools: true } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + expect(result).toEqual({ sdkTools: [], sourceTools: [] }); + expect(createOpenClawCodingTools).toHaveBeenCalledTimes(0); + }); + + it('short-circuits raw model runs signalled via promptMode: "none"', async () => { + const createOpenClawCodingTools = vi.fn(async () => [makeTool()]); + const result = await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { promptMode: "none" } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + expect(result).toEqual({ sdkTools: [], sourceTools: [] }); + expect(createOpenClawCodingTools).toHaveBeenCalledTimes(0); + }); + + it("short-circuits raw model runs signalled via modelRun: true", async () => { + const createOpenClawCodingTools = vi.fn(async () => [makeTool()]); + const result = await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { modelRun: true } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + expect(result).toEqual({ sdkTools: [], sourceTools: [] }); + expect(createOpenClawCodingTools).toHaveBeenCalledTimes(0); + }); + + it("filters constructed tools to exactly the allowlist when toolsAllow is narrow", async () => { + const createOpenClawCodingTools = vi.fn(async () => [ + makeTool({ name: "read" }), + makeTool({ name: "edit" }), + makeTool({ name: "message" }), + ]); + const result = await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { toolsAllow: ["read"] } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + expect(result.sourceTools.map((tool) => tool.name)).toEqual(["read"]); + expect(result.sdkTools.map((tool) => tool.name)).toEqual(["read"]); + }); + + it("returns no tools when toolsAllow is an empty list and nothing is forced", async () => { + const createOpenClawCodingTools = vi.fn(async () => [ + makeTool({ name: "read" }), + makeTool({ name: "edit" }), + ]); + const result = await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { toolsAllow: [] } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + expect(result.sourceTools).toEqual([]); + expect(result.sdkTools).toEqual([]); + }); + + it('merges "message" into an empty allowlist when forceMessageTool is true', async () => { + const createOpenClawCodingTools = vi.fn(async () => [ + makeTool({ name: "read" }), + makeTool({ name: "message" }), + ]); + const result = await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { toolsAllow: [], forceMessageTool: true } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + expect(result.sourceTools.map((tool) => tool.name)).toEqual(["message"]); + }); + + it('merges "message" into an empty allowlist when sourceReplyDeliveryMode is message_tool_only', async () => { + const createOpenClawCodingTools = vi.fn(async () => [ + makeTool({ name: "read" }), + makeTool({ name: "message" }), + ]); + const result = await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { + toolsAllow: [], + sourceReplyDeliveryMode: "message_tool_only", + } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + expect(result.sourceTools.map((tool) => tool.name)).toEqual(["message"]); + }); + + it('appends "message" to a narrow allowlist when forceMessageTool is true', async () => { + const createOpenClawCodingTools = vi.fn(async () => [ + makeTool({ name: "read" }), + makeTool({ name: "edit" }), + makeTool({ name: "message" }), + ]); + const result = await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { + toolsAllow: ["read"], + forceMessageTool: true, + } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + expect(result.sourceTools.map((tool) => tool.name).toSorted()).toEqual(["message", "read"]); + }); + + it("does NOT force a message tool when disableMessageTool is true (disable wins over force)", async () => { + const createOpenClawCodingTools = vi.fn(async () => [ + makeTool({ name: "read" }), + makeTool({ name: "message" }), + ]); + const result = await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { + toolsAllow: ["read"], + forceMessageTool: true, + disableMessageTool: true, + } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + expect(result.sourceTools.map((tool) => tool.name)).toEqual(["read"]); + }); + + it("leaves the tool list unchanged when toolsAllow is undefined", async () => { + const tools = [makeTool({ name: "read" }), makeTool({ name: "edit" })]; + const createOpenClawCodingTools = vi.fn(async () => tools); + const result = await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: {} as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + expect(result.sourceTools.map((tool) => tool.name)).toEqual(["read", "edit"]); + }); + + it("leaves the tool list unchanged when toolsAllow contains a wildcard", async () => { + const tools = [makeTool({ name: "read" }), makeTool({ name: "edit" })]; + const createOpenClawCodingTools = vi.fn(async () => tools); + const result = await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { toolsAllow: ["*"] } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + expect(result.sourceTools.map((tool) => tool.name)).toEqual(["read", "edit"]); + }); + + it("runs duplicate detection AFTER allowlist filtering so a suppressed duplicate does not fail a narrow run", async () => { + // The raw construction returns duplicate "edit" entries, but the + // allowlist excludes "edit" entirely. PI parity: the duplicate + // never reaches the SDK, so the bridge must not throw. + const createOpenClawCodingTools = vi.fn(async () => [ + makeTool({ name: "read" }), + makeTool({ name: "edit" }), + makeTool({ name: "edit" }), + ]); + const result = await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { toolsAllow: ["read"] } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + expect(result.sourceTools.map((tool) => tool.name)).toEqual(["read"]); + }); + + it("still throws when the filtered tool set itself contains duplicates", async () => { + // Both copies of "read" survive the allowlist, so the duplicate + // truly reaches the SDK and the bridge must fail loudly. + await expect( + createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { toolsAllow: ["read"] } as never, + createOpenClawCodingTools: async () => [ + makeTool({ name: "read" }), + makeTool({ name: "read" }), + ], + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }), + ).rejects.toThrow("duplicate tool names: read"); + }); + }); + + // Codex extension already normalises a small set of tool-name aliases + // before allowlist matching + // (extensions/codex/src/app-server/dynamic-tool-profile.ts:17-30 + // + extensions/codex/src/app-server/run-attempt.test.ts:2062). The + // Copilot bridge mirrors the same two aliases so a `toolsAllow: ["bash"]` + // or `toolsAllow: ["apply-patch"]` resolves to the underlying tool. + describe("tool-name aliases (PR #86155 [P1] round-7)", () => { + it('matches the "exec" tool when toolsAllow contains "bash"', async () => { + const createOpenClawCodingTools = vi.fn(async () => [ + makeTool({ name: "exec" }), + makeTool({ name: "read" }), + ]); + const result = await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { toolsAllow: ["bash"] } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + expect(result.sourceTools.map((tool) => tool.name)).toEqual(["exec"]); + }); + + it('matches the "apply_patch" tool when toolsAllow contains "apply-patch"', async () => { + const createOpenClawCodingTools = vi.fn(async () => [ + makeTool({ name: "apply_patch" }), + makeTool({ name: "read" }), + ]); + const result = await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { toolsAllow: ["apply-patch"] } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + expect(result.sourceTools.map((tool) => tool.name)).toEqual(["apply_patch"]); + }); + + it("normalises case so uppercase/whitespace aliases still resolve", async () => { + const createOpenClawCodingTools = vi.fn(async () => [ + makeTool({ name: "exec" }), + makeTool({ name: "apply_patch" }), + makeTool({ name: "read" }), + ]); + const result = await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { toolsAllow: [" BASH ", "Apply-Patch", "READ"] } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + expect(result.sourceTools.map((tool) => tool.name).toSorted()).toEqual([ + "apply_patch", + "exec", + "read", + ]); + }); + + it("continues to match canonical names directly (no double-aliasing)", async () => { + const createOpenClawCodingTools = vi.fn(async () => [ + makeTool({ name: "exec" }), + makeTool({ name: "apply_patch" }), + ]); + const result = await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { toolsAllow: ["exec", "apply_patch"] } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + expect(result.sourceTools.map((tool) => tool.name).toSorted()).toEqual([ + "apply_patch", + "exec", + ]); + }); + + it("honors core group allowlists through the shared embedded-runner filter", async () => { + const createOpenClawCodingTools = vi.fn(async () => [ + makeTool({ name: "read" }), + makeTool({ name: "edit" }), + ]); + const result = await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { toolsAllow: ["group:fs"] } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + expect(result.sourceTools.map((tool) => tool.name).toSorted()).toEqual(["edit", "read"]); + }); + + it("keeps plugin tools for plugin group allowlists", async () => { + const createOpenClawCodingTools = vi.fn(async () => [ + makeTool({ name: "memory_search", pluginId: "active-memory" } as never), + makeTool({ name: "read" }), + ]); + const result = await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { toolsAllow: ["group:plugins"] } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + expect(result.sourceTools.map((tool) => tool.name)).toEqual(["memory_search"]); + }); + + it("keeps core tools available for glob allowlists", async () => { + const createOpenClawCodingTools = vi.fn(async () => [ + makeTool({ name: "web_fetch" }), + makeTool({ name: "read" }), + ]); + const result = await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { toolsAllow: ["web_*"] } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + expect(result.sourceTools.map((tool) => tool.name)).toEqual(["web_fetch"]); + const options = (createOpenClawCodingTools.mock.calls[0] as unknown[] | undefined)?.[0] as { + toolConstructionPlan?: { includeOpenClawTools?: boolean }; + }; + expect(options?.toolConstructionPlan?.includeOpenClawTools).toBe(true); + }); + + it("does not keep apply_patch for a write-only allowlist", async () => { + const createOpenClawCodingTools = vi.fn(async () => [ + makeTool({ name: "write" }), + makeTool({ name: "apply_patch" }), + ]); + const result = await createCopilotToolBridge({ + agentId: "agent-1", + attemptParams: { toolsAllow: ["write"] } as never, + createOpenClawCodingTools, + modelId: "gpt-4o", + modelProvider: "github-copilot", + sessionId: "session-1", + }); + expect(result.sourceTools.map((tool) => tool.name)).toEqual(["write"]); + const options = (createOpenClawCodingTools.mock.calls[0] as unknown[] | undefined)?.[0] as { + toolConstructionPlan?: { includeShellTools?: boolean }; + }; + expect(options?.toolConstructionPlan?.includeShellTools).toBe(false); + }); + }); +}); + +describe("convertOpenClawToolToSdkTool", () => { + it("throws on empty and non-string names", () => { + expect(() => convertOpenClawToolToSdkTool(makeTool({ name: "" as never }), {})).toThrow( + "tool name must be a non-empty string", + ); + expect(() => convertOpenClawToolToSdkTool(makeTool({ name: 42 as never }), {})).toThrow( + "tool name must be a non-empty string", + ); + }); + + it("throws on non-function execute", () => { + expect(() => convertOpenClawToolToSdkTool(makeTool({ execute: "nope" as never }), {})).toThrow( + "must define an execute function", + ); + }); + + it("preserves name, description, and parameters exactly", () => { + const parameters = { + properties: { path: { type: "string" } }, + type: "object", + }; + const sourceTool = makeTool({ + description: "Read a file", + name: "read_file", + parameters: parameters as never, + }); + + const result = convertOpenClawToolToSdkTool(sourceTool, {}); + + expect(result.name).toBe("read_file"); + expect(result.description).toBe("Read a file"); + expect(result.parameters).toBe(parameters); + }); + + it("sets skipPermission: true so OpenClaw's wrapped-tool internal enforcement handles permission decisions (PI-parity model)", () => { + // Per the harness docs: every bridged OpenClaw tool comes from + // `createOpenClawCodingTools`, which already wraps each tool with + // `wrapToolWithBeforeToolCallHook` (loop detection, trusted plugin + // policies, before-tool-call hooks, two-phase plugin approvals via + // the gateway). Asking the SDK to run its own `onPermissionRequest` + // for kind: "custom-tool" would either short-circuit OpenClaw's + // richer enforcement (allow-all) or block every call (reject-all). + // Setting `skipPermission: true` lets the wrapped execute() run + // OpenClaw's hook with the right context — mirrors codex + // (`extensions/codex/src/app-server/dynamic-tools.ts`). + const result = convertOpenClawToolToSdkTool(makeTool(), {}) as SdkTool & { + skipPermission?: boolean; + }; + + expect(result.skipPermission).toBe(true); + }); + + it("marks every bridged tool as overridesBuiltInTool so OpenClaw owns names that collide with Copilot CLI built-ins (edit/read/write/bash/...)", () => { + // Real-world dogfood found that openclaw's createOpenClawCodingTools + // returns a tool named `edit`, which the bundled Copilot CLI also ships + // as a built-in. The SDK rejects the registration unless the external + // tool is explicitly marked as an override. + for (const name of ["edit", "read", "write", "bash", "live_echo"]) { + const result = convertOpenClawToolToSdkTool(makeTool({ name }), {}) as SdkTool & { + overridesBuiltInTool?: boolean; + }; + expect(result.overridesBuiltInTool).toBe(true); + } + }); + + it("returns a failure result when the signal is already aborted", async () => { + const controller = new AbortController(); + controller.abort(); + const sourceTool = makeTool(); + const sdkTool = convertOpenClawToolToSdkTool(sourceTool, { abortSignal: controller.signal }); + + const result = await sdkTool.handler({}, makeInvocation()); + + expect(sourceTool.execute).toHaveBeenCalledTimes(0); + expect(result).toMatchObject({ + resultType: "failure", + textResultForLlm: "[copilot-tool-bridge] aborted before execution", + }); + expect(getError(result as ToolResultObject)).toBe( + "[copilot-tool-bridge] aborted before execution", + ); + }); + + it("calls beforeExecute with the invocation context before execute", async () => { + const beforeExecute = vi.fn(async () => undefined); + const sourceTool = makeTool(); + const sdkTool = convertOpenClawToolToSdkTool(sourceTool, { beforeExecute }); + const invocation = makeInvocation({ toolCallId: "call-42" }); + const args = { value: "input" }; + + await sdkTool.handler(args, invocation); + + expect(beforeExecute).toHaveBeenCalledTimes(1); + expect(beforeExecute).toHaveBeenCalledWith({ + args, + invocation, + sourceTool, + toolCallId: "call-42", + toolName: "tool-a", + }); + expect(beforeExecute.mock.invocationCallOrder[0]).toBeLessThan( + sourceTool.execute.mock.invocationCallOrder[0], + ); + }); + + it("returns a failure result when beforeExecute throws", async () => { + const error = new Error("permission denied"); + const sourceTool = makeTool(); + const sdkTool = convertOpenClawToolToSdkTool(sourceTool, { + beforeExecute: vi.fn(async () => { + throw error; + }), + }); + + const result = await sdkTool.handler({}, makeInvocation()); + + expect(sourceTool.execute).toHaveBeenCalledTimes(0); + expect(result).toMatchObject({ + resultType: "failure", + textResultForLlm: + "[copilot-tool-bridge] beforeExecute failed for tool 'tool-a': permission denied", + }); + expect(getError(result as ToolResultObject)).toBe(error.message); + }); + + it("calls prepareArguments and passes the prepared args and toolCallId to execute", async () => { + const preparedArgs = { value: "prepared" }; + const prepareArguments = vi.fn(() => preparedArgs); + const sourceTool = makeTool({ prepareArguments }); + const sdkTool = convertOpenClawToolToSdkTool(sourceTool, {}); + + await sdkTool.handler({ value: "raw" }, makeInvocation({ toolCallId: "call-99" })); + + expect(prepareArguments).toHaveBeenCalledTimes(1); + expect(prepareArguments).toHaveBeenCalledWith({ value: "raw" }); + expect(sourceTool.execute).toHaveBeenCalledWith("call-99", preparedArgs, undefined, undefined); + }); + + it("returns a failure result when prepareArguments throws", async () => { + const error = new Error("bad args"); + const sourceTool = makeTool({ + prepareArguments: vi.fn(() => { + throw error; + }), + }); + const sdkTool = convertOpenClawToolToSdkTool(sourceTool, {}); + + const result = await sdkTool.handler({}, makeInvocation()); + + expect(sourceTool.execute).toHaveBeenCalledTimes(0); + expect(result).toMatchObject({ + resultType: "failure", + textResultForLlm: "[copilot-tool-bridge] prepareArguments failed for tool 'tool-a': bad args", + }); + expect(getError(result as ToolResultObject)).toBe(error.message); + }); + + it("returns success with empty text when content is missing", async () => { + const sourceTool = makeTool({}, { details: null }); + const sdkTool = convertOpenClawToolToSdkTool(sourceTool, {}); + + const result = await sdkTool.handler({}, makeInvocation()); + + expect(result).toEqual({ resultType: "success", textResultForLlm: "" }); + }); + + it("converts single text content to an exact textResultForLlm", async () => { + const sdkTool = convertOpenClawToolToSdkTool( + makeTool({}, { content: [{ text: "hello", type: "text" }], details: null }), + {}, + ); + + const result = await sdkTool.handler({}, makeInvocation()); + + expect(result).toEqual({ resultType: "success", textResultForLlm: "hello" }); + }); + + it("joins multiple text blocks with newlines", async () => { + const sdkTool = convertOpenClawToolToSdkTool( + makeTool( + {}, + { + content: [ + { text: "first", type: "text" }, + { text: "second", type: "text" }, + { text: "third", type: "text" }, + ], + details: null, + }, + ), + {}, + ); + + const result = await sdkTool.handler({}, makeInvocation()); + + expect(result).toEqual({ resultType: "success", textResultForLlm: "first\nsecond\nthird" }); + }); + + it("converts image content into binaryResultsForLlm while preserving text", async () => { + const sdkTool = convertOpenClawToolToSdkTool( + makeTool( + {}, + { + content: [ + { text: "preview", type: "text" }, + { data: "base64-data", mimeType: "image/png", type: "image" }, + ], + details: null, + }, + ), + {}, + ); + + const result = await sdkTool.handler({}, makeInvocation()); + + expect(result).toEqual({ + binaryResultsForLlm: [ + { + base64Data: "base64-data", + data: "base64-data", + mimeType: "image/png", + type: "image", + }, + ], + resultType: "success", + textResultForLlm: "preview", + }); + }); + + it("returns a failure result for unsupported content shapes", async () => { + const sdkTool = convertOpenClawToolToSdkTool( + makeTool( + {}, + { + content: [{ type: "resource" }], + details: null, + }, + ), + {}, + ); + + const result = await sdkTool.handler({}, makeInvocation()); + + expect(result).toMatchObject({ + resultType: "failure", + textResultForLlm: "[copilot-tool-bridge] unsupported AgentToolResult content shape: resource", + }); + expect(getError(result as ToolResultObject)).toBe( + "[copilot-tool-bridge] unsupported AgentToolResult content shape: resource", + ); + }); + + it("returns a failure result when execute throws and preserves the error", async () => { + const error = new Error("tool exploded"); + const sourceTool = makeTool({ + execute: vi.fn(async () => { + throw error; + }), + }); + const sdkTool = convertOpenClawToolToSdkTool(sourceTool, {}); + + const result = await sdkTool.handler({}, makeInvocation()); + + expect(result).toMatchObject({ + resultType: "failure", + textResultForLlm: "[copilot-tool-bridge] tool 'tool-a' failed: tool exploded", + }); + expect(getError(result as ToolResultObject)).toBe(error.message); + }); + + it("runs default tools in parallel", async () => { + const first = createDeferred<{ + content: Array<{ text: string; type: string }>; + details: null; + }>(); + const second = createDeferred<{ + content: Array<{ text: string; type: string }>; + details: null; + }>(); + const execute = vi + .fn() + .mockImplementationOnce(async () => first.promise) + .mockImplementationOnce(async () => second.promise); + const sourceTool = makeTool({ execute }); + const sdkTool = convertOpenClawToolToSdkTool(sourceTool, {}); + + const firstRun = sdkTool.handler({}, makeInvocation({ toolCallId: "call-1" })); + const secondRun = sdkTool.handler({}, makeInvocation({ toolCallId: "call-2" })); + await flushAsync(); + + expect(execute).toHaveBeenCalledTimes(2); + first.resolve({ content: [{ text: "one", type: "text" }], details: null }); + second.resolve({ content: [{ text: "two", type: "text" }], details: null }); + + await expect(Promise.all([firstRun, secondRun])).resolves.toEqual([ + { resultType: "success", textResultForLlm: "one" }, + { resultType: "success", textResultForLlm: "two" }, + ]); + }); + + it("serializes sequential tools so the second call waits for the first", async () => { + const first = createDeferred<{ + content: Array<{ text: string; type: string }>; + details: null; + }>(); + const second = createDeferred<{ + content: Array<{ text: string; type: string }>; + details: null; + }>(); + const execute = vi + .fn() + .mockImplementationOnce(async () => first.promise) + .mockImplementationOnce(async () => second.promise); + const sourceTool = makeTool({ execute, executionMode: "sequential" }); + const sdkTool = convertOpenClawToolToSdkTool(sourceTool, {}); + + const firstRun = sdkTool.handler({}, makeInvocation({ toolCallId: "call-1" })); + const secondRun = sdkTool.handler({}, makeInvocation({ toolCallId: "call-2" })); + await flushAsync(); + + expect(execute).toHaveBeenCalledTimes(1); + first.resolve({ content: [{ text: "one", type: "text" }], details: null }); + await firstRun; + await flushAsync(); + expect(execute).toHaveBeenCalledTimes(2); + second.resolve({ content: [{ text: "two", type: "text" }], details: null }); + + await expect(Promise.all([firstRun, secondRun])).resolves.toEqual([ + { resultType: "success", textResultForLlm: "one" }, + { resultType: "success", textResultForLlm: "two" }, + ]); + }); + + it("returns a failure result when execute observes an abort after start", async () => { + const controller = new AbortController(); + const sourceTool = makeTool({ + execute: vi.fn( + (_toolCallId: string, _args: unknown, signal?: AbortSignal) => + new Promise((_, reject) => { + signal?.addEventListener( + "abort", + () => { + reject(new Error("aborted during execute")); + }, + { once: true }, + ); + }), + ), + }); + const sdkTool = convertOpenClawToolToSdkTool(sourceTool, { abortSignal: controller.signal }); + + const resultPromise = sdkTool.handler({}, makeInvocation()); + await flushAsync(); + controller.abort(); + const result = await resultPromise; + + expect(sourceTool.execute).toHaveBeenCalledTimes(1); + expect(result).toMatchObject({ + resultType: "failure", + textResultForLlm: "[copilot-tool-bridge] tool 'tool-a' failed: aborted during execute", + }); + expect(getError(result as ToolResultObject)).toBe("aborted during execute"); + }); +}); diff --git a/extensions/copilot/src/tool-bridge.ts b/extensions/copilot/src/tool-bridge.ts new file mode 100644 index 000000000000..b0599ce88b28 --- /dev/null +++ b/extensions/copilot/src/tool-bridge.ts @@ -0,0 +1,680 @@ +import type { Tool as SdkTool, ToolInvocation, ToolResultObject } from "@github/copilot-sdk"; +import type { + AnyAgentTool, + EmbeddedRunAttemptParams, + SandboxContext, +} from "openclaw/plugin-sdk/agent-harness-runtime"; +import { + applyEmbeddedAttemptToolsAllow, + buildEmbeddedAttemptToolRunContext, + getPluginToolMeta, + isSubagentSessionKey, + resolveAttemptSpawnWorkspaceDir, + resolveEmbeddedAttemptToolConstructionPlan, + resolveModelAuthMode, +} from "openclaw/plugin-sdk/agent-harness-runtime"; + +type CreateOpenClawCodingTools = + (typeof import("openclaw/plugin-sdk/agent-harness"))["createOpenClawCodingTools"]; +type OpenClawCodingToolsOptions = NonNullable[0]>; + +type AgentToolResultLike = { + content?: unknown; +}; + +/** + * Mutable holder populated by `attempt.ts` *after* `client.createSession()` + * (or `client.resumeSession()`) succeeds, so that the tool bridge — which is + * constructed *before* the SDK session exists — can route `onYield` events + * to the live session's `abort()` later in the run. Bridged tools cannot + * execute before the SDK session is up, so reading `current === undefined` + * inside `onYield` is a no-op by design. + */ +export interface CopilotSessionHolder { + current: { abort?: () => unknown } | undefined; +} + +/** + * Structural subset of `EmbeddedRunAttemptParams` carried into the tool + * bridge for PI-parity tool context (see + * `src/agents/pi-embedded-runner/run/attempt.ts:1029-1117` — the + * authoritative `createOpenClawCodingTools({...})` call shape). + * + * Declared as `Partial` (imported from the + * `openclaw/plugin-sdk/agent-harness-runtime` boundary, *not* from + * `attempt.ts` in this extension) to avoid an `attempt.ts` ↔ + * `tool-bridge.ts` import cycle while keeping the field shapes + * authoritative. Production callers pass the live attempt params; test + * fixtures may omit this field entirely and fall back to the flat + * fields below for minimal-config wiring. + */ +export type CopilotToolAttemptParams = Partial; + +export interface CopilotToolBridgeInput { + modelProvider: string; + modelId: string; + agentId: string; + sessionId: string; + sessionKey?: string; + agentDir?: string; + workspaceDir?: string; + cwd?: string; + /** + * Sandbox context resolved by the caller (typically `attempt.ts` via + * `resolveSandboxContext` from the plugin-sdk). When provided, wrapped + * tools see the same sandbox-aware behavior PI provides. `null` (or + * omitted) means sandbox is disabled. + */ + sandbox?: SandboxContext | null; + /** + * Pre-computed `spawnWorkspaceDir` for subagent inheritance. The caller + * derives this from the *original* workspace via + * `resolveAttemptSpawnWorkspaceDir({ sandbox, resolvedWorkspace })`. + * When omitted, the bridge falls back to computing it from the + * (possibly sandbox-effective) `workspaceDir` it sees; production + * callers should pass it explicitly so `ro`/`none` sandboxes are + * handled correctly. + */ + spawnWorkspaceDir?: string; + abortSignal?: AbortSignal; + /** + * Full PI-parity attempt parameters. When set, the bridge forwards + * identity, channel, owner/policy, auth-profile, message-routing, + * model, and run-trace fields to `createOpenClawCodingTools` so the + * wrapped-tool enforcement layer + * (`src/agents/pi-tools.before-tool-call.ts`) receives the same + * context the in-tree PI runner provides. See + * `src/agents/pi-embedded-runner/run/attempt.ts:1029-1117`. + */ + attemptParams?: CopilotToolAttemptParams; + /** + * Mutable session holder used to wire `onYield` to the live + * `session.abort()` once the SDK session is established. See + * {@link CopilotSessionHolder}. + */ + sessionRef?: CopilotSessionHolder; + /** + * Invoked when a wrapped tool fires `sessions_yield`. The bridge + * always also calls `sessionRef.current?.abort?.()` to interrupt + * the in-flight SDK session; this callback lets the caller track + * the yield so the final attempt result can carry + * `yieldDetected: true` (the parent runner uses it to mark + * liveness as paused and stop_reason as `end_turn`). Mirrors + * the PI/codex contract — see + * `src/agents/pi-embedded-runner/run/attempt.ts:1107-1113` and + * `extensions/codex/src/app-server/run-attempt.ts:539-541`. + */ + onYieldDetected?: (message?: string) => void; + createOpenClawCodingTools?: (opts: unknown) => AnyAgentTool[] | Promise; + beforeExecute?: (ctx: { + toolName: string; + toolCallId: string; + args: unknown; + sourceTool: AnyAgentTool; + invocation: ToolInvocation; + }) => void | Promise; +} + +export interface CopilotToolBridge { + sdkTools: SdkTool[]; + sourceTools: AnyAgentTool[]; +} + +export const SUPPORTED_TOOL_PROVIDERS: ReadonlySet = new Set(["github-copilot"]); +const BASE_COPILOT_CODING_TOOL_NAMES = new Set(["edit", "read", "write"]); +const SHELL_COPILOT_CODING_TOOL_NAMES = new Set(["apply_patch", "exec", "process"]); + +export function supportsModelTools(modelProvider: string): boolean { + return SUPPORTED_TOOL_PROVIDERS.has(modelProvider); +} + +export async function createCopilotToolBridge( + input: CopilotToolBridgeInput, +): Promise { + if (!supportsModelTools(input.modelProvider)) { + return { sdkTools: [], sourceTools: [] }; + } + + const attemptParams = input.attemptParams ?? ({} as CopilotToolAttemptParams); + const toolPlan = resolveEmbeddedAttemptToolConstructionPlan({ + disableTools: attemptParams.disableTools, + forceMessageTool: shouldForceCopilotMessageTool(attemptParams), + isRawModelRun: isCopilotRawModelRun(attemptParams), + toolsAllow: attemptParams.toolsAllow, + }); + const effectiveToolPlan = hasNonWildcardGlobAllowlist(toolPlan.runtimeToolAllowlist) + ? { + ...toolPlan, + codingToolConstructionPlan: { + includeBaseCodingTools: true, + includeChannelTools: true, + includeOpenClawTools: true, + includePluginTools: true, + includeShellTools: true, + }, + constructTools: true, + includeCoreTools: true, + } + : toolPlan; + if (!effectiveToolPlan.constructTools) { + return { sdkTools: [], sourceTools: [] }; + } + + const createOpenClawCodingTools = + input.createOpenClawCodingTools ?? + (await import("openclaw/plugin-sdk/agent-harness")).createOpenClawCodingTools; + + const toolOptions = buildOpenClawCodingToolsOptions(input, effectiveToolPlan); + + let sourceTools: unknown; + try { + sourceTools = await createOpenClawCodingTools(toolOptions); + } catch (error: unknown) { + throw createError( + `[copilot-tool-bridge] createOpenClawCodingTools failed: ${toError(error).message}`, + error, + ); + } + + if (!Array.isArray(sourceTools)) { + throw new Error( + "[copilot-tool-bridge] createOpenClawCodingTools must return an array of tools", + ); + } + + const plannedTools = filterCopilotToolsForConstructionPlan( + sourceTools as AnyAgentTool[], + effectiveToolPlan.codingToolConstructionPlan, + ); + const filteredTools = filterCopilotToolsForAllowlist( + plannedTools, + effectiveToolPlan.runtimeToolAllowlist, + ); + + // Run duplicate detection after filtering so a duplicate in a + // suppressed tool does not fail a narrow run (PI parity: PI never + // sees the duplicate either when the allowlist excludes it). + const duplicateNames = findDuplicateToolNames(filteredTools); + if (duplicateNames.length > 0) { + throw new Error(`[copilot-tool-bridge] duplicate tool names: ${duplicateNames.join(", ")}`); + } + + return { + sdkTools: filteredTools.map((sourceTool) => + convertOpenClawToolToSdkTool(sourceTool, { + abortSignal: input.abortSignal, + beforeExecute: input.beforeExecute, + }), + ), + sourceTools: filteredTools, + }; +} + +/** + * Builds the full `createOpenClawCodingTools` options bag mirroring the + * PI in-tree call at `src/agents/pi-embedded-runner/run/attempt.ts:1029-1117`. + * + * Why PI parity matters: bridged OpenClaw tools register with the SDK + * as `overridesBuiltInTool: true, skipPermission: true` (see + * `convertOpenClawToolToSdkTool` below). That means the wrapped-tool + * enforcement layer + * (`src/agents/pi-tools.before-tool-call.ts → wrapToolWithBeforeToolCallHook`) + * is the single gate for permission, owner-only allowlists, loop + * detection, trusted-plugin policies, and two-phase plugin approvals. + * That layer reads its context from the fields forwarded here; missing + * fields silently degrade policy decisions. See docs/plugins/copilot.md. + * + * The shared embedded-runner tool plan is forwarded so the bridge does + * not construct broad tool families only to filter them later. That + * preserves PI allowlist semantics such as `write` not materializing + * `apply_patch`. + * Sandbox is forwarded via the explicit `sandbox` field on + * {@link CopilotToolBridgeInput}; callers resolve it via + * `resolveSandboxContext` before constructing the bridge. + */ +function buildOpenClawCodingToolsOptions( + input: CopilotToolBridgeInput, + toolPlan: ReturnType, +): OpenClawCodingToolsOptions { + const a = input.attemptParams ?? ({} as CopilotToolAttemptParams); + + // Mirror PI's `sandboxSessionKey` derivation (attempt.ts:873-874) so + // wrapped tools see the same policy key PI uses. When the attempt + // exposes neither sandboxSessionKey nor sessionKey, fall back to the + // flat input.sessionKey/sessionId. + const sandboxSessionKey = + a.sandboxSessionKey?.trim() || a.sessionKey?.trim() || input.sessionKey || input.sessionId; + + // When sandboxSessionKey differs from the real run session key (e.g. + // Telegram direct peer key vs `agent:main:main`), pass the live key + // so `session_status: "current"` resolves to the active run session, + // not the stale sandbox key. Mirrors PI attempt.ts:1057-1060. + const liveSessionKey = a.sessionKey ?? input.sessionKey; + const runSessionKey = + liveSessionKey && liveSessionKey !== sandboxSessionKey ? liveSessionKey : undefined; + + const workspaceDir = input.workspaceDir ?? a.workspaceDir; + const cwd = input.cwd ?? a.cwd; + const agentDir = input.agentDir ?? a.agentDir; + // Sandbox forwarded from the caller (attempt.ts derives it via + // `resolveSandboxContext`). Wrapped tools that opt into sandbox-aware + // behavior now see the same policy PI provides. Spawn workspace falls + // through to the caller-provided value when supplied; otherwise we + // derive it locally from the (possibly sandbox-effective) workspaceDir + // — sufficient for legacy/test fixtures that didn't pre-compute it. + const sandbox = input.sandbox ?? undefined; + const spawnWorkspaceDir = + input.spawnWorkspaceDir ?? + (workspaceDir + ? resolveAttemptSpawnWorkspaceDir({ + sandbox, + resolvedWorkspace: workspaceDir, + }) + : undefined); + + const model = a.model; + const modelHasVision = Array.isArray(model?.input) && model.input.includes("image"); + const modelCompat = + model && + typeof model === "object" && + "compat" in model && + model.compat && + typeof model.compat === "object" + ? (model.compat as OpenClawCodingToolsOptions["modelCompat"]) + : undefined; + + return { + agentId: input.agentId, + ...buildEmbeddedAttemptToolRunContext({ + trigger: a.trigger, + jobId: a.jobId, + memoryFlushWritePath: a.memoryFlushWritePath, + toolsAllow: a.toolsAllow, + }), + exec: { + ...a.execOverrides, + elevated: a.bashElevated, + }, + messageProvider: a.messageProvider ?? a.messageChannel, + agentAccountId: a.agentAccountId, + messageTo: a.messageTo, + messageThreadId: a.messageThreadId, + groupId: a.groupId, + groupChannel: a.groupChannel, + groupSpace: a.groupSpace, + memberRoleIds: a.memberRoleIds, + spawnedBy: a.spawnedBy, + senderId: a.senderId, + senderName: a.senderName, + senderUsername: a.senderUsername, + senderE164: a.senderE164, + senderIsOwner: a.senderIsOwner, + allowGatewaySubagentBinding: a.allowGatewaySubagentBinding, + sessionKey: sandboxSessionKey, + runSessionKey, + sessionId: input.sessionId, + runId: a.runId, + agentDir, + workspaceDir, + cwd, + // Sandbox parity with PI + // (`src/agents/pi-embedded-runner/run/attempt.ts:1238-1262`): + // forwarded from the caller (attempt.ts derives it via + // `resolveSandboxContext`). + sandbox, + spawnWorkspaceDir, + config: a.config, + abortSignal: input.abortSignal, + modelProvider: input.modelProvider, + modelId: input.modelId, + includeCoreTools: toolPlan.includeCoreTools, + runtimeToolAllowlist: toolPlan.runtimeToolAllowlist, + toolConstructionPlan: toolPlan.codingToolConstructionPlan, + modelCompat, + modelApi: model?.api, + modelContextWindowTokens: model?.contextWindow, + modelAuthMode: resolveModelAuthMode(input.modelProvider, a.config, undefined, { + workspaceDir, + }), + currentChannelId: a.currentChannelId, + currentThreadTs: a.currentThreadTs, + currentMessageId: a.currentMessageId, + replyToMode: a.replyToMode, + hasRepliedRef: a.hasRepliedRef, + modelHasVision, + requireExplicitMessageTarget: + a.requireExplicitMessageTarget ?? isSubagentSessionKey(liveSessionKey), + sourceReplyDeliveryMode: a.sourceReplyDeliveryMode, + disableMessageTool: a.disableMessageTool, + forceMessageTool: a.forceMessageTool, + enableHeartbeatTool: a.enableHeartbeatTool, + forceHeartbeatTool: a.forceHeartbeatTool, + authProfileStore: a.toolAuthProfileStore ?? a.authProfileStore, + // recordToolPrepStage intentionally omitted: copilot does not + // surface attempt-stage telemetry yet. Codex omits this too. + onToolOutcome: a.onToolOutcome, + onYield: (message) => { + // Notify the caller first so the final attempt result can carry + // yieldDetected even if the abort below races a concurrent + // settle path. Errors thrown by the caller's handler must not + // skip the abort, so wrap defensively. Mirrors PI (`attempt.ts` + // sets `yieldDetected = true; yieldMessage = message;` before + // calling abort) and codex (`onYieldDetected()` runs before the + // run-abort controller fires). + try { + input.onYieldDetected?.(message); + } catch (error) { + console.warn("[copilot-tool-bridge] onYieldDetected handler threw; continuing", error); + } + // The SDK session does not exist at bridge-construction time, so + // we route yield events through a mutable holder populated by + // attempt.ts immediately after `createSession()` / + // `resumeSession()` resolves. Bridged tools cannot execute before + // the SDK session is up, so a missing `current` is a no-op by + // design (e.g. early aborts handled by the abortSignal path). + const target = input.sessionRef?.current; + void target?.abort?.(); + }, + }; +} + +export function convertOpenClawToolToSdkTool( + sourceTool: AnyAgentTool, + ctx: { + abortSignal?: AbortSignal; + beforeExecute?: CopilotToolBridgeInput["beforeExecute"]; + }, +): SdkTool { + if (typeof sourceTool.name !== "string" || sourceTool.name.trim().length === 0) { + throw new Error("[copilot-tool-bridge] tool name must be a non-empty string"); + } + + if (typeof sourceTool.execute !== "function") { + throw new Error( + `[copilot-tool-bridge] tool '${sourceTool.name}' must define an execute function`, + ); + } + + let sequentialLock = Promise.resolve(); + const executeOnce = async ( + args: unknown, + invocation: ToolInvocation, + ): Promise => { + if (ctx.abortSignal?.aborted) { + const error = new Error("[copilot-tool-bridge] aborted before execution"); + return createFailureResult(error.message, error); + } + + try { + await ctx.beforeExecute?.({ + args, + invocation, + sourceTool, + toolCallId: invocation.toolCallId, + toolName: sourceTool.name, + }); + } catch (error: unknown) { + return createFailureResult( + `[copilot-tool-bridge] beforeExecute failed for tool '${sourceTool.name}': ${toError(error).message}`, + error, + ); + } + + let preparedArgs = args; + try { + preparedArgs = sourceTool.prepareArguments ? sourceTool.prepareArguments(args) : args; + } catch (error: unknown) { + return createFailureResult( + `[copilot-tool-bridge] prepareArguments failed for tool '${sourceTool.name}': ${toError(error).message}`, + error, + ); + } + + let result: AgentToolResultLike; + try { + result = await sourceTool.execute( + invocation.toolCallId, + preparedArgs, + ctx.abortSignal, + undefined, + ); + } catch (error: unknown) { + return createFailureResult( + `[copilot-tool-bridge] tool '${sourceTool.name}' failed: ${toError(error).message}`, + error, + ); + } + + return agentToolResultToSdk(result); + }; + + const handler = + sourceTool.executionMode === "sequential" + ? (args: unknown, invocation: ToolInvocation) => { + const run = sequentialLock.then( + () => executeOnce(args, invocation), + () => executeOnce(args, invocation), + ); + sequentialLock = run.then( + () => undefined, + () => undefined, + ); + return run; + } + : executeOnce; + + return { + description: sourceTool.description, + handler, + name: sourceTool.name, + // OpenClaw owns its bridged tools by design (the harness docs: + // "OpenClaw still owns ... OpenClaw dynamic tools (bridged)"). The bundled + // Copilot CLI ships built-in tools whose names (edit, read, write, bash, + // ...) collide with OpenClaw's coding-tool set. Mark every bridged tool as + // an explicit override so the SDK accepts the registration rather than + // throwing "External tool 'edit' conflicts with a built-in tool of the + // same name." OpenClaw's tool layer is the source of truth for these + // names within a copilot attempt. + overridesBuiltInTool: true, + parameters: sourceTool.parameters as Record | undefined, + // Bridged OpenClaw tools enforce their own permission/policy decisions + // inside `wrapToolWithBeforeToolCallHook` (see + // `src/agents/pi-tools.before-tool-call.ts` — the same hook PI itself + // uses, providing loop detection, trusted plugin policies, + // before-tool-call hooks, and two-phase plugin approvals via the + // gateway). Asking the SDK to fire `onPermissionRequest` for + // `kind: "custom-tool"` would either short-circuit OpenClaw's richer + // enforcement (if we allow-all) or block every call (if we + // reject-all) — neither matches PI parity. The in-tree codex harness + // takes the same approach: bridged OpenClaw tools are wrapped with + // `wrapToolWithBeforeToolCallHook` and the SDK gate is bypassed + // (see `extensions/codex/src/app-server/dynamic-tools.ts`). + skipPermission: true, + }; +} + +function agentToolResultToSdk(result: AgentToolResultLike | undefined): ToolResultObject { + const content = result?.content; + if (content == null) { + return createSuccessResult(""); + } + + if (!Array.isArray(content)) { + return createUnsupportedContentFailure(typeof content); + } + + const textParts: string[] = []; + const binaryResults: Array> = []; + for (const block of content) { + if (!block || typeof block !== "object") { + return createUnsupportedContentFailure(typeof block); + } + + const kind = readString((block as { type?: unknown }).type); + if (kind === "text") { + const text = readString((block as { text?: unknown }).text, { allowEmpty: true }); + if (text === undefined) { + return createUnsupportedContentFailure(kind); + } + textParts.push(text); + continue; + } + + if (kind === "image") { + const base64Data = readString((block as { data?: unknown }).data); + const mimeType = readString((block as { mimeType?: unknown }).mimeType); + if (!base64Data || !mimeType) { + return createUnsupportedContentFailure(kind); + } + binaryResults.push({ + base64Data, + data: base64Data, + mimeType, + type: "image", + }); + continue; + } + + return createUnsupportedContentFailure(kind ?? typeof block); + } + + return { + ...(binaryResults.length > 0 + ? { binaryResultsForLlm: binaryResults as ToolResultObject["binaryResultsForLlm"] } + : {}), + resultType: "success", + textResultForLlm: textParts.join("\n"), + }; +} + +function createUnsupportedContentFailure(kind: string): ToolResultObject { + const message = `[copilot-tool-bridge] unsupported AgentToolResult content shape: ${kind}`; + return createFailureResult(message, new Error(message)); +} + +function createSuccessResult(textResultForLlm: string): ToolResultObject { + return { + resultType: "success", + textResultForLlm, + }; +} + +function createFailureResult(message: string, error: unknown): ToolResultObject { + // ToolResultObject.error is typed as `string | undefined` in the SDK contract + // (see `node_modules/@github/copilot-sdk/dist/types.d.ts`). Returning an + // Error object would produce a non-serializable JSON-RPC payload, so we + // surface the message string instead. + return { + error: toError(error).message, + resultType: "failure", + textResultForLlm: message, + }; +} + +function createError(message: string, cause: unknown): Error { + const error = new Error(message) as Error & { cause?: unknown }; + error.cause = cause; + return error; +} + +/** + * Returns true when the attempt was launched as a raw-model run, which + * suppresses tool construction in PI + * (`src/agents/pi-embedded-runner/run/attempt.ts:1305-1310` and + * `attempt-tool-construction-plan.ts:165-184`). A run is raw when the + * caller explicitly sets `modelRun: true` or asks for no system prompt + * via `promptMode: "none"`. + */ +function isCopilotRawModelRun(params: CopilotToolAttemptParams): boolean { + return params.modelRun === true || params.promptMode === "none"; +} + +/** + * Mirrors PI's `shouldForceMessageTool` semantics: a message tool is + * forced when the caller asked for it explicitly or when the source + * reply delivery mode is `message_tool_only`, but never when + * `disableMessageTool` is set (the suppress flag always wins). Compare + * `src/agents/pi-embedded-runner/run/attempt.ts:1361-1366` and the + * codex equivalent at + * `extensions/codex/src/app-server/run-attempt.ts:4253-4258`. + */ +function shouldForceCopilotMessageTool(params: CopilotToolAttemptParams): boolean { + if (params.disableMessageTool === true) { + return false; + } + return params.forceMessageTool === true || params.sourceReplyDeliveryMode === "message_tool_only"; +} + +/** + * Mirrors PI's `applyEmbeddedAttemptToolsAllow` + * (`src/agents/embedded-agent-runner/run/attempt-tool-construction-plan.ts`) + * so final filtering keeps aliases, groups, plugin policies, and glob + * semantics identical to the in-tree embedded runner. + */ +function filterCopilotToolsForAllowlist( + tools: T[], + toolsAllow?: string[], +): T[] { + return applyEmbeddedAttemptToolsAllow(tools, toolsAllow, { + toolMeta: (tool) => + getPluginToolMeta(tool as unknown as AnyAgentTool) ?? readInlinePluginToolMeta(tool), + }); +} + +function filterCopilotToolsForConstructionPlan( + tools: T[], + plan: ReturnType["codingToolConstructionPlan"], +): T[] { + if (plan.includeBaseCodingTools && plan.includeShellTools) { + return tools; + } + return tools.filter((tool) => { + if (!plan.includeBaseCodingTools && BASE_COPILOT_CODING_TOOL_NAMES.has(tool.name)) { + return false; + } + if (!plan.includeShellTools && SHELL_COPILOT_CODING_TOOL_NAMES.has(tool.name)) { + return false; + } + return true; + }); +} + +function hasNonWildcardGlobAllowlist(toolsAllow: string[] | undefined): boolean { + return (toolsAllow ?? []).some((entry) => { + const trimmed = entry.trim(); + return trimmed !== "*" && trimmed.includes("*"); + }); +} + +function readInlinePluginToolMeta(tool: { name: string }): { pluginId: string } | undefined { + const pluginId = (tool as { pluginId?: unknown }).pluginId; + return typeof pluginId === "string" && pluginId.trim() ? { pluginId } : undefined; +} + +function findDuplicateToolNames(sourceTools: AnyAgentTool[]): string[] { + const counts = new Map(); + for (const sourceTool of sourceTools) { + if (typeof sourceTool.name !== "string" || sourceTool.name.length === 0) { + continue; + } + counts.set(sourceTool.name, (counts.get(sourceTool.name) ?? 0) + 1); + } + return [...counts.entries()] + .filter(([, count]) => count > 1) + .map(([name]) => name) + .toSorted(); +} + +function readString(value: unknown, options: { allowEmpty?: boolean } = {}): string | undefined { + if (typeof value !== "string") { + return undefined; + } + if (options.allowEmpty || value.length > 0) { + return value; + } + return undefined; +} + +function toError(error: unknown): Error { + return error instanceof Error ? error : new Error(String(error)); +} diff --git a/extensions/copilot/src/usage-bridge.test.ts b/extensions/copilot/src/usage-bridge.test.ts new file mode 100644 index 000000000000..e7f43b9b8661 --- /dev/null +++ b/extensions/copilot/src/usage-bridge.test.ts @@ -0,0 +1,270 @@ +import type { NormalizedUsage } from "openclaw/plugin-sdk/agent-harness-runtime"; +import { describe, expect, it } from "vitest"; +import { + buildCopilotAssistantUsage, + deriveCopilotUsageTotal, + normalizeCopilotUsage, +} from "./usage-bridge.js"; + +const ZERO_SNAPSHOT: NormalizedUsage = { + cacheRead: undefined, + cacheWrite: undefined, + input: undefined, + output: undefined, + total: 0, +}; + +describe("usage-bridge", () => { + describe("normalizeCopilotUsage", () => { + it("normalizes SDK inputTokens and outputTokens into NormalizedUsage", () => { + expect(normalizeCopilotUsage({ inputTokens: 10, outputTokens: 5 })).toEqual({ + cacheRead: undefined, + cacheWrite: undefined, + input: 10, + output: 5, + total: 15, + }); + }); + + it("normalizes SDK cacheReadTokens and cacheWriteTokens when present", () => { + expect(normalizeCopilotUsage({ cacheReadTokens: 3, cacheWriteTokens: 4 })).toEqual({ + cacheRead: 3, + cacheWrite: 4, + input: undefined, + output: undefined, + total: 7, + }); + }); + + it("leaves missing cache token fields undefined rather than zero", () => { + const usage = normalizeCopilotUsage({ inputTokens: 2 }); + + expect(usage).toEqual({ + cacheRead: undefined, + cacheWrite: undefined, + input: 2, + output: undefined, + total: 2, + }); + expect(usage?.cacheRead).toBeUndefined(); + expect(usage?.cacheWrite).toBeUndefined(); + }); + + it("returns a defined zero-snapshot when SDK event is an object with no valid fields", () => { + expect(normalizeCopilotUsage({})).toEqual(ZERO_SNAPSHOT); + expect(normalizeCopilotUsage({ inputTokens: undefined })).toEqual(ZERO_SNAPSHOT); + }); + + it("returns undefined for null / non-object input", () => { + expect(normalizeCopilotUsage(null)).toBeUndefined(); + expect(normalizeCopilotUsage(undefined)).toBeUndefined(); + expect(normalizeCopilotUsage("usage")).toBeUndefined(); + }); + + it("ignores string-typed token counts", () => { + expect(normalizeCopilotUsage({ inputTokens: "5" })).toEqual(ZERO_SNAPSHOT); + }); + + it("ignores NaN and Infinity token counts", () => { + expect(normalizeCopilotUsage({ inputTokens: Number.NaN })).toEqual(ZERO_SNAPSHOT); + expect(normalizeCopilotUsage({ outputTokens: Number.POSITIVE_INFINITY })).toEqual( + ZERO_SNAPSHOT, + ); + expect(normalizeCopilotUsage({ cacheReadTokens: Number.NEGATIVE_INFINITY })).toEqual( + ZERO_SNAPSHOT, + ); + expect(normalizeCopilotUsage({ inputTokens: 2, outputTokens: Number.NaN })).toEqual({ + cacheRead: undefined, + cacheWrite: undefined, + input: 2, + output: undefined, + total: 2, + }); + }); + + it("clamps negative token counts to zero", () => { + expect(normalizeCopilotUsage({ inputTokens: -3 })).toEqual({ + cacheRead: undefined, + cacheWrite: undefined, + input: 0, + output: undefined, + total: 0, + }); + }); + + it("truncates fractional token counts", () => { + expect(normalizeCopilotUsage({ inputTokens: 3.7 })).toEqual({ + cacheRead: undefined, + cacheWrite: undefined, + input: 3, + output: undefined, + total: 3, + }); + }); + + it("derives total from normalized SDK component counts for compatibility", () => { + expect( + normalizeCopilotUsage({ + cacheReadTokens: 3, + cacheWriteTokens: 4, + inputTokens: 1, + outputTokens: 2, + }), + ).toEqual({ + cacheRead: 3, + cacheWrite: 4, + input: 1, + output: 2, + total: 10, + }); + }); + + it("does not mutate the caller-provided SDK event data", () => { + const data = Object.freeze({ inputTokens: 4, outputTokens: 6 }); + + expect(normalizeCopilotUsage(data)).toEqual({ + cacheRead: undefined, + cacheWrite: undefined, + input: 4, + output: 6, + total: 10, + }); + expect(data).toEqual({ inputTokens: 4, outputTokens: 6 }); + }); + + it("only whitelists known SDK fields and ignores unrelated input keys", () => { + expect( + normalizeCopilotUsage({ + inputTokens: 5, + malicious_field: 999, + outputTokens: "bad", + prompt_tokens: 100, + }), + ).toEqual({ + cacheRead: undefined, + cacheWrite: undefined, + input: 5, + output: undefined, + total: 5, + }); + }); + }); + + describe("buildCopilotAssistantUsage", () => { + it("builds rich AssistantMessage usage with zero cost fields", () => { + expect( + buildCopilotAssistantUsage({ + usage: { cacheRead: 3, cacheWrite: 4, input: 1, output: 2, total: 10 }, + }), + ).toEqual({ + cacheRead: 3, + cacheWrite: 4, + cost: { + cacheRead: 0, + cacheWrite: 0, + input: 0, + output: 0, + total: 0, + }, + input: 1, + output: 2, + totalTokens: 10, + }); + }); + + it("defaults missing usage fields to zero in the rich block only", () => { + expect( + buildCopilotAssistantUsage({ + usage: { input: 4 }, + }), + ).toEqual({ + cacheRead: 0, + cacheWrite: 0, + cost: { + cacheRead: 0, + cacheWrite: 0, + input: 0, + output: 0, + total: 0, + }, + input: 4, + output: 0, + totalTokens: 0, + }); + }); + + it("uses fallback outputTokens when no usage event was captured", () => { + expect(buildCopilotAssistantUsage({ fallbackOutputTokens: 7 })).toEqual({ + cacheRead: 0, + cacheWrite: 0, + cost: { + cacheRead: 0, + cacheWrite: 0, + input: 0, + output: 0, + total: 0, + }, + input: 0, + output: 7, + totalTokens: 7, + }); + }); + + it("does not use fallback outputTokens when normalized usage is already present", () => { + expect( + buildCopilotAssistantUsage({ + fallbackOutputTokens: 9, + usage: { input: 4, total: 4 }, + }), + ).toEqual({ + cacheRead: 0, + cacheWrite: 0, + cost: { + cacheRead: 0, + cacheWrite: 0, + input: 0, + output: 0, + total: 0, + }, + input: 4, + output: 0, + totalTokens: 4, + }); + }); + + it("returns an all-zero block when both usage and fallback are missing", () => { + expect(buildCopilotAssistantUsage({})).toEqual({ + cacheRead: 0, + cacheWrite: 0, + cost: { + cacheRead: 0, + cacheWrite: 0, + input: 0, + output: 0, + total: 0, + }, + input: 0, + output: 0, + totalTokens: 0, + }); + }); + }); + + describe("deriveCopilotUsageTotal", () => { + it("returns undefined when usage is undefined", () => { + expect(deriveCopilotUsageTotal(undefined)).toBeUndefined(); + }); + + it("sums input/output/cacheRead/cacheWrite for total", () => { + const usage: NormalizedUsage = { + cacheRead: 3, + cacheWrite: 4, + input: 1, + output: 2, + total: 999, + }; + + expect(deriveCopilotUsageTotal(usage)).toBe(10); + }); + }); +}); diff --git a/extensions/copilot/src/usage-bridge.ts b/extensions/copilot/src/usage-bridge.ts new file mode 100644 index 000000000000..ae58673764a4 --- /dev/null +++ b/extensions/copilot/src/usage-bridge.ts @@ -0,0 +1,83 @@ +import type { AgentMessage, NormalizedUsage } from "openclaw/plugin-sdk/agent-harness-runtime"; + +type AssistantMessage = Extract; +type AssistantUsage = NonNullable; + +type CopilotUsageSource = { + cacheReadTokens?: unknown; + cacheWriteTokens?: unknown; + inputTokens?: unknown; + outputTokens?: unknown; +}; + +export type CopilotUsageSnapshot = NormalizedUsage; + +function isCopilotUsageSource(data: unknown): data is CopilotUsageSource { + return typeof data === "object" && data !== null; +} + +function buildZeroCost(): AssistantUsage["cost"] { + return { + cacheRead: 0, + cacheWrite: 0, + input: 0, + output: 0, + total: 0, + }; +} + +function coerceTokenCount(value: unknown): number | undefined { + return typeof value === "number" && Number.isFinite(value) + ? Math.max(0, Math.trunc(value)) + : undefined; +} + +export function normalizeCopilotUsage(data: unknown): NormalizedUsage | undefined { + if (!isCopilotUsageSource(data)) { + return undefined; + } + + // SDK usage events only expose these four fields. Keep coercion identical to + // the prior event-bridge implementation so invalid object-shaped events still + // overwrite state with the legacy all-zero snapshot. + const input = coerceTokenCount(data.inputTokens); + const output = coerceTokenCount(data.outputTokens); + const cacheRead = coerceTokenCount(data.cacheReadTokens); + const cacheWrite = coerceTokenCount(data.cacheWriteTokens); + const total = (input ?? 0) + (output ?? 0) + (cacheRead ?? 0) + (cacheWrite ?? 0); + + return { + cacheRead, + cacheWrite, + input, + output, + total, + }; +} + +export function buildCopilotAssistantUsage(params: { + usage?: NormalizedUsage; + fallbackOutputTokens?: unknown; +}): AssistantMessage["usage"] { + const usage = + params.usage ?? normalizeCopilotUsage({ outputTokens: params.fallbackOutputTokens }); + + return { + cacheRead: usage?.cacheRead ?? 0, + cacheWrite: usage?.cacheWrite ?? 0, + cost: buildZeroCost(), + input: usage?.input ?? 0, + output: usage?.output ?? 0, + totalTokens: usage?.total ?? 0, + }; +} + +export function deriveCopilotUsageTotal(usage?: NormalizedUsage): number | undefined { + if (!usage) { + return undefined; + } + + return ( + (usage.input ?? 0) + (usage.output ?? 0) + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0) + ); +} diff --git a/extensions/copilot/src/user-input-bridge.test.ts b/extensions/copilot/src/user-input-bridge.test.ts new file mode 100755 index 000000000000..188472bf4778 --- /dev/null +++ b/extensions/copilot/src/user-input-bridge.test.ts @@ -0,0 +1,230 @@ +import type { SessionConfig } from "@github/copilot-sdk"; +import { describe, expect, it, vi } from "vitest"; + +type UserInputHandler = NonNullable; +type SdkUserInputRequest = Parameters[0]; +type SdkUserInputResponse = Awaited>; + +import { + composeUserInputPolicies, + createUserInputBridge, + delegatingUserInputPolicy, + denyAllUserInputPolicy, + firstChoicePolicy, + staticAnswerPolicy, + DENY_ALL_ANSWER, + type CopilotUserInputContext, + type CopilotUserInputPolicy, +} from "./user-input-bridge.js"; + +function makeRequest(overrides: Partial = {}): SdkUserInputRequest { + return { + question: "what is your name?", + ...overrides, + }; +} + +function makeCtx(overrides: Partial = {}): CopilotUserInputContext { + return { + request: makeRequest(), + sessionId: "sess-1", + ...overrides, + }; +} + +describe("denyAllUserInputPolicy", () => { + it("returns the fail-closed DENY_ALL_ANSWER as a freeform answer", async () => { + const result = await denyAllUserInputPolicy(makeCtx()); + expect(result).toEqual({ answer: DENY_ALL_ANSWER, wasFreeform: true }); + }); +}); + +describe("firstChoicePolicy", () => { + it("returns the first choice (wasFreeform: false) when choices are present", async () => { + const result = await firstChoicePolicy( + makeCtx({ request: makeRequest({ choices: ["yes", "no"] }) }), + ); + expect(result).toEqual({ answer: "yes", wasFreeform: false }); + }); + + it("falls back to DENY_ALL_ANSWER when choices are empty", async () => { + const result = await firstChoicePolicy(makeCtx({ request: makeRequest({ choices: [] }) })); + expect(result).toEqual({ answer: DENY_ALL_ANSWER, wasFreeform: true }); + }); + + it("falls back to DENY_ALL_ANSWER when choices are absent", async () => { + const result = await firstChoicePolicy(makeCtx()); + expect(result).toEqual({ answer: DENY_ALL_ANSWER, wasFreeform: true }); + }); +}); + +describe("staticAnswerPolicy", () => { + it("returns the configured answer for every request", async () => { + const policy = staticAnswerPolicy({ answer: "Alice" }); + for (const question of ["a?", "b?", "c?"]) { + const result = await policy(makeCtx({ request: makeRequest({ question }) })); + expect(result).toEqual({ answer: "Alice", wasFreeform: true }); + } + }); + + it("respects wasFreeform=false override", async () => { + const policy = staticAnswerPolicy({ answer: "yes", wasFreeform: false }); + const result = await policy(makeCtx()); + expect(result).toEqual({ answer: "yes", wasFreeform: false }); + }); +}); + +describe("delegatingUserInputPolicy", () => { + it("forwards the request and returns the host response", async () => { + const onRequest = vi + .fn() + .mockResolvedValue({ answer: "Bob", wasFreeform: true } satisfies SdkUserInputResponse); + const policy = delegatingUserInputPolicy({ onRequest }); + const ctx = makeCtx({ sessionId: "sess-xyz" }); + const result = await policy(ctx); + expect(result).toEqual({ answer: "Bob", wasFreeform: true }); + expect(onRequest).toHaveBeenCalledTimes(1); + expect(onRequest).toHaveBeenCalledWith(ctx); + }); + + it("returns DENY_ALL_ANSWER when host callback returns undefined", async () => { + const onRequest = vi.fn().mockResolvedValue(undefined); + const policy = delegatingUserInputPolicy({ onRequest }); + const result = await policy(makeCtx()); + expect(result).toEqual({ answer: DENY_ALL_ANSWER, wasFreeform: true }); + }); + + it("converts thrown errors into a DENY_ALL_ANSWER with the error message appended", async () => { + const policy = delegatingUserInputPolicy({ + onRequest: () => { + throw new Error("prompt timeout"); + }, + }); + const result = await policy(makeCtx()); + expect(result).toBeDefined(); + expect(result!.wasFreeform).toBe(true); + expect(result!.answer).toContain(DENY_ALL_ANSWER); + expect(result!.answer).toContain("prompt timeout"); + }); + + it("falls back to onError policy when onRequest throws", async () => { + const onError = vi + .fn() + .mockResolvedValue({ answer: "fallback", wasFreeform: true }); + const policy = delegatingUserInputPolicy({ + onRequest: () => { + throw new Error("host boom"); + }, + onError, + }); + const result = await policy(makeCtx()); + expect(result).toEqual({ answer: "fallback", wasFreeform: true }); + expect(onError).toHaveBeenCalledTimes(1); + }); + + it("falls through to error-message response when onError also throws", async () => { + const policy = delegatingUserInputPolicy({ + onRequest: () => { + throw new Error("host boom"); + }, + onError: () => { + throw new Error("fallback boom"); + }, + }); + const result = await policy(makeCtx()); + expect(result).toBeDefined(); + expect(result!.answer).toContain("host boom"); + }); + + it("formats non-Error throws via JSON.stringify", async () => { + const policy = delegatingUserInputPolicy({ + onRequest: () => { + throw { code: 7, msg: "weird" } as unknown as Error; + }, + }); + const result = await policy(makeCtx()); + expect(result).toBeDefined(); + expect(result!.answer).toContain('"code":7'); + }); +}); + +describe("composeUserInputPolicies", () => { + it("returns the first non-undefined result and skips subsequent policies", async () => { + const a: CopilotUserInputPolicy = () => undefined; + const b: CopilotUserInputPolicy = () => ({ answer: "from-b", wasFreeform: true }); + const c = vi.fn(() => ({ answer: "from-c", wasFreeform: true })); + const policy = composeUserInputPolicies(a, b, c); + const result = await policy(makeCtx()); + expect(result).toEqual({ answer: "from-b", wasFreeform: true }); + expect(c).not.toHaveBeenCalled(); + }); + + it("falls through to DENY_ALL_ANSWER when all policies return undefined", async () => { + const policy = composeUserInputPolicies( + () => undefined, + () => undefined, + ); + const result = await policy(makeCtx()); + expect(result).toEqual({ answer: DENY_ALL_ANSWER, wasFreeform: true }); + }); + + it("short-circuits to error-message response when any policy throws", async () => { + const later = vi.fn(() => ({ answer: "later", wasFreeform: true })); + const policy = composeUserInputPolicies(() => { + throw new Error("compose boom"); + }, later); + const result = await policy(makeCtx()); + expect(result).toBeDefined(); + expect(result!.answer).toContain("compose boom"); + expect(later).not.toHaveBeenCalled(); + }); +}); + +describe("createUserInputBridge", () => { + it("adapts a policy to the SDK UserInputHandler shape", async () => { + const handler = createUserInputBridge(staticAnswerPolicy({ answer: "Alice" })); + const result = await handler(makeRequest(), { sessionId: "sess-1" }); + expect(result).toEqual({ answer: "Alice", wasFreeform: true }); + }); + + it("defaults to denyAllUserInputPolicy when no policy is passed", async () => { + const handler = createUserInputBridge(); + const result = await handler(makeRequest(), { sessionId: "sess-1" }); + expect(result).toEqual({ answer: DENY_ALL_ANSWER, wasFreeform: true }); + }); + + it("forwards the SDK sessionId into the policy context", async () => { + const policy = vi.fn(() => ({ answer: "x", wasFreeform: true })); + const handler = createUserInputBridge(policy); + await handler(makeRequest({ question: "q?", choices: ["a"] }), { sessionId: "sess-xyz" }); + expect(policy).toHaveBeenCalledTimes(1); + expect(policy.mock.calls[0]?.[0]).toEqual({ + sessionId: "sess-xyz", + request: { question: "q?", choices: ["a"] }, + }); + }); + + it("never throws when policy throws; returns DENY_ALL_ANSWER with the error message", async () => { + const handler = createUserInputBridge(() => { + throw new Error("policy boom"); + }); + const result = await handler(makeRequest(), { sessionId: "sess-1" }); + expect(result.answer).toContain(DENY_ALL_ANSWER); + expect(result.answer).toContain("policy boom"); + expect(result.wasFreeform).toBe(true); + }); + + it("never returns undefined: a policy returning undefined yields fail-closed answer", async () => { + const handler = createUserInputBridge(() => undefined); + const result = await handler(makeRequest(), { sessionId: "sess-1" }); + expect(result).toEqual({ answer: DENY_ALL_ANSWER, wasFreeform: true }); + }); + + it("preserves wasFreeform=false from a policy that picked from choices", async () => { + const handler = createUserInputBridge(firstChoicePolicy); + const result = await handler(makeRequest({ choices: ["one", "two"], allowFreeform: false }), { + sessionId: "sess-1", + }); + expect(result).toEqual({ answer: "one", wasFreeform: false }); + }); +}); diff --git a/extensions/copilot/src/user-input-bridge.ts b/extensions/copilot/src/user-input-bridge.ts new file mode 100755 index 000000000000..71e4b2a1d34e --- /dev/null +++ b/extensions/copilot/src/user-input-bridge.ts @@ -0,0 +1,244 @@ +/** + * User-input bridge for the copilot agent runtime. + * + * STATUS — MVP DORMANT: This module is intentionally NOT registered with + * the SDK in the current harness (see `attempt.ts` / `side-question.ts`). + * The SDK contract is "When `onUserInputRequest` is provided, enables the + * `ask_user` tool allowing the agent to ask questions" (see + * `node_modules/@github/copilot-sdk/dist/types.d.ts` `SessionConfig`); + * by omitting the handler we hide `ask_user` from the model entirely. + * Agents under the MVP must make best-judgment decisions from the + * initial prompt rather than asking clarifying questions mid-turn. + * + * FOLLOW-UP: The scaffolding below stays in tree so the follow-up that + * ports the codex user-input-bridge pattern + * (`extensions/codex/src/app-server/user-input-bridge.ts`) has a stable + * surface to wire — that change will route SDK `UserInputRequest`s + * through `params.onBlockReply` / `onPartialReply` and resolve the + * pending promise from the next inbound channel message, then register + * `createUserInputBridge(delegatingUserInputPolicy(...))` from + * `createSessionConfig`. + * + * BACK-POINTER: The host-side channel/TUI prompt flow lives outside + * this package boundary in `commitments/` and the channel plugins + * (slack/discord/cli/tui). Per proposal §50, this bridge does NOT + * import that flow directly (the package boundary + * `tsconfig.package-boundary.base.json` only allows + * `openclaw/plugin-sdk/*` and `@github/copilot-sdk`). Instead, this + * module: + * + * 1. Defines a small `CopilotUserInputPolicy` contract that the + * core wiring layer implements to forward `UserInputRequest`s to + * the host's channel/TUI prompt path. + * 2. Provides built-in policies for common defaults (deny-all with a + * synthetic answer, auto-first-choice, static-answer). + * 3. Provides a `delegatingUserInputPolicy({ onRequest })` so the + * core wiring layer can plug in a host-side callback that calls + * into `commitments/` and returns the SDK-shaped response. + * 4. Adapts the resulting policy into the SDK's `UserInputHandler` + * shape via `createUserInputBridge(policy)`. + * + * SDK contract note: unlike `PermissionHandler` (which has a + * `no-result` escape hatch), `UserInputHandler` MUST resolve with a + * `UserInputResponse`. The bridge therefore never returns `undefined` + * to the SDK; if a policy returns `undefined` or throws, the default + * fail-closed answer is used so the model sees a real string rather + * than a generic RPC failure. + * + * If the host's prompt contract changes materially, the contract here + * must be revisited in lockstep. The unit tests in + * `user-input-bridge.test.ts` exercise the SDK-shaped response envelope + * so any silent drift in the SDK type is caught at typecheck. + */ + +import type { SessionConfig } from "@github/copilot-sdk"; + +type UserInputHandler = NonNullable; +type SdkUserInputRequest = Parameters[0]; +type SdkUserInputResponse = Awaited>; + +/** Request shape forwarded to host-implemented user-input policies. */ +export interface CopilotUserInputContext { + /** SDK session id that originated the request. */ + sessionId: string; + /** Original SDK request payload. */ + request: SdkUserInputRequest; +} + +/** + * Policy contract. Implementors return an SDK-shaped response (or a + * Promise of one). + * + * Returning `undefined` is treated as "no opinion" and falls through + * to the default fail-closed response (`DENY_ALL_ANSWER`). This keeps + * composition trivial without requiring explicit responses from every + * code path. + */ +export type CopilotUserInputPolicy = ( + ctx: CopilotUserInputContext, +) => SdkUserInputResponse | undefined | Promise; + +/** + * Default answer used when no host policy provides one. The string is + * intentionally explicit so the model can detect the missing-prompt + * condition rather than treating it as a real user answer. + */ +export const DENY_ALL_ANSWER = + "[copilot agent runtime: no user-input policy installed; request declined]"; + +export const denyAllUserInputPolicy: CopilotUserInputPolicy = () => ({ + answer: DENY_ALL_ANSWER, + wasFreeform: true, +}); + +/** + * Auto-pick the first choice if the request offers choices; otherwise + * fall back to `DENY_ALL_ANSWER` as a freeform answer. Useful for + * non-interactive test runs. + */ +export const firstChoicePolicy: CopilotUserInputPolicy = ({ request }) => { + if (request.choices && request.choices.length > 0) { + return { answer: request.choices[0], wasFreeform: false }; + } + return { answer: DENY_ALL_ANSWER, wasFreeform: true }; +}; + +export interface StaticAnswerPolicyOptions { + /** Answer returned for every request. */ + answer: string; + /** + * Whether the answer should be flagged as a freeform response. + * Defaults to `true` (caller did not pick from `choices`). + */ + wasFreeform?: boolean; +} + +/** Always return a fixed answer. Useful for deterministic tests. */ +export function staticAnswerPolicy(options: StaticAnswerPolicyOptions): CopilotUserInputPolicy { + const wasFreeform = options.wasFreeform ?? true; + return () => ({ answer: options.answer, wasFreeform }); +} + +export interface DelegatingUserInputPolicyOptions { + /** + * Host-supplied callback. Returning `undefined` falls through to the + * fail-closed default. Throwing falls back to the configured + * `onError` policy if provided; otherwise the throw is converted to + * a `DENY_ALL_ANSWER` response so the SDK never sees an exception + * (which would surface as a generic RPC failure to the model). + */ + onRequest: CopilotUserInputPolicy; + /** + * Optional fallback when `onRequest` throws. If omitted, throws are + * converted to a `DENY_ALL_ANSWER` response with the error message + * appended. If supplied and `onError` also throws, fall through to + * the error-message response. + */ + onError?: CopilotUserInputPolicy; +} + +/** + * Wrap a host callback into a policy, catching synchronous throws and + * async rejections so the SDK never sees an exception. + */ +export function delegatingUserInputPolicy( + options: DelegatingUserInputPolicyOptions, +): CopilotUserInputPolicy { + const { onRequest, onError } = options; + return async (ctx) => { + try { + const result = await onRequest(ctx); + if (result !== undefined) { + return result; + } + return { answer: DENY_ALL_ANSWER, wasFreeform: true }; + } catch (error) { + if (onError) { + try { + const fallback = await onError(ctx); + if (fallback !== undefined) { + return fallback; + } + } catch { + // fall through to error-message response + } + } + return { + answer: `${DENY_ALL_ANSWER} (host policy threw: ${formatError(error)})`, + wasFreeform: true, + }; + } + }; +} + +/** + * Compose policies in order. The first policy to return a non-undefined + * result wins. If all return undefined, a fail-closed `DENY_ALL_ANSWER` + * response is produced. Throws inside any policy short-circuit to the + * error-message response; downstream policies are not consulted after a + * throw. + */ +export function composeUserInputPolicies( + ...policies: CopilotUserInputPolicy[] +): CopilotUserInputPolicy { + return async (ctx) => { + for (const policy of policies) { + try { + const result = await policy(ctx); + if (result !== undefined) { + return result; + } + } catch (error) { + return { + answer: `${DENY_ALL_ANSWER} (host policy threw: ${formatError(error)})`, + wasFreeform: true, + }; + } + } + return { answer: DENY_ALL_ANSWER, wasFreeform: true }; + }; +} + +/** + * Adapt a `CopilotUserInputPolicy` to the SDK's `UserInputHandler` + * shape. The returned handler always resolves with a valid + * `UserInputResponse` (never throws, never returns undefined), + * defaulting to `DENY_ALL_ANSWER` when the policy returns undefined or + * throws. + */ +export function createUserInputBridge( + policy: CopilotUserInputPolicy = denyAllUserInputPolicy, +): UserInputHandler { + return async ( + request: SdkUserInputRequest, + invocation: { sessionId: string }, + ): Promise => { + const ctx: CopilotUserInputContext = { + request, + sessionId: invocation.sessionId, + }; + try { + const result = await policy(ctx); + if (result !== undefined) { + return result; + } + } catch (error) { + return { + answer: `${DENY_ALL_ANSWER} (host policy threw: ${formatError(error)})`, + wasFreeform: true, + }; + } + return { answer: DENY_ALL_ANSWER, wasFreeform: true }; + }; +} + +function formatError(error: unknown): string { + if (error instanceof Error) { + return error.message; + } + try { + return JSON.stringify(error); + } catch { + return String(error); + } +} diff --git a/extensions/copilot/src/workspace-bootstrap.test.ts b/extensions/copilot/src/workspace-bootstrap.test.ts new file mode 100644 index 000000000000..65b632d384d4 --- /dev/null +++ b/extensions/copilot/src/workspace-bootstrap.test.ts @@ -0,0 +1,268 @@ +import { mkdtemp, rm, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import path from "node:path"; +import type { AgentHarnessAttemptParams } from "openclaw/plugin-sdk/agent-harness-runtime"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + TESTING_EXPORTS, + remapCopilotBootstrapContextFiles, + renderCopilotWorkspaceBootstrapInstructions, + resolveCopilotWorkspaceBootstrapContext, +} from "./workspace-bootstrap.js"; + +const { COPILOT_NATIVE_PROJECT_DOC_BASENAMES, compareCopilotContextFiles } = TESTING_EXPORTS; + +function makeAttempt( + overrides: Partial = {}, +): AgentHarnessAttemptParams { + return { + agentId: "agent-1", + prompt: "hello", + runId: "run-1", + sessionFile: "session.json", + sessionId: "session-1", + timeoutMs: 5000, + workspaceDir: "C:\\workspace", + ...overrides, + } as unknown as AgentHarnessAttemptParams; +} + +describe("renderCopilotWorkspaceBootstrapInstructions", () => { + it("returns undefined when there are no context files", () => { + expect(renderCopilotWorkspaceBootstrapInstructions([])).toBeUndefined(); + }); + + it("returns undefined when every file is filtered as SDK-native", () => { + expect( + renderCopilotWorkspaceBootstrapInstructions([ + { path: "/ws/AGENTS.md", content: "Follow AGENTS guidance." }, + ]), + ).toBeUndefined(); + }); + + it("filters AGENTS.md (the SDK loads it natively from workingDirectory)", () => { + const rendered = renderCopilotWorkspaceBootstrapInstructions([ + { path: "/ws/AGENTS.md", content: "Follow AGENTS guidance." }, + { path: "/ws/SOUL.md", content: "Soul voice goes here." }, + ]); + expect(rendered).toBeDefined(); + expect(rendered).toContain("Soul voice goes here."); + expect(rendered).not.toContain("Follow AGENTS guidance."); + }); + + it("renders persona files ahead of free-form context (SOUL before USER)", () => { + const rendered = renderCopilotWorkspaceBootstrapInstructions([ + { path: "/ws/USER.md", content: "USER body" }, + { path: "/ws/SOUL.md", content: "SOUL body" }, + ]); + expect(rendered).toBeDefined(); + const soulIdx = rendered!.indexOf("SOUL body"); + const userIdx = rendered!.indexOf("USER body"); + expect(soulIdx).toBeGreaterThanOrEqual(0); + expect(userIdx).toBeGreaterThan(soulIdx); + }); + + it("adds the SOUL.md hint line only when SOUL.md is present", () => { + const withSoul = renderCopilotWorkspaceBootstrapInstructions([ + { path: "/ws/SOUL.md", content: "S" }, + ]); + const withoutSoul = renderCopilotWorkspaceBootstrapInstructions([ + { path: "/ws/IDENTITY.md", content: "I" }, + ]); + expect(withSoul).toContain("SOUL.md: persona/tone"); + expect(withoutSoul).not.toContain("SOUL.md: persona/tone"); + }); + + it("includes file path and content for every rendered file", () => { + const rendered = renderCopilotWorkspaceBootstrapInstructions([ + { path: "/ws/IDENTITY.md", content: "I am the agent." }, + { path: "/ws/HEARTBEAT.md", content: "Heartbeat task list." }, + ]); + expect(rendered).toContain("## /ws/IDENTITY.md"); + expect(rendered).toContain("I am the agent."); + expect(rendered).toContain("## /ws/HEARTBEAT.md"); + expect(rendered).toContain("Heartbeat task list."); + }); +}); + +describe("COPILOT_NATIVE_PROJECT_DOC_BASENAMES", () => { + it("matches the SDK auto-load list documented in types.d.ts:1036", () => { + // If this set drifts away from the SDK's auto-loaded basenames the + // copilot harness will start duplicating instructions content. + // Keep this list in sync with the SDK release notes for + // `enableConfigDiscovery` / "custom instruction files". + expect([...COPILOT_NATIVE_PROJECT_DOC_BASENAMES]).toEqual(["agents.md"]); + }); +}); + +describe("compareCopilotContextFiles", () => { + it("orders unknown files lexicographically after the ordered set", () => { + const sorted = [ + { path: "/ws/zzz.md", content: "" }, + { path: "/ws/aaa.md", content: "" }, + { path: "/ws/SOUL.md", content: "" }, + ].toSorted(compareCopilotContextFiles); + expect(sorted.map((file) => file.path)).toEqual(["/ws/SOUL.md", "/ws/aaa.md", "/ws/zzz.md"]); + }); +}); + +describe("resolveCopilotWorkspaceBootstrapContext", () => { + let workspaceDir: string; + + beforeEach(async () => { + workspaceDir = await mkdtemp(path.join(tmpdir(), "copilot-bootstrap-")); + }); + + afterEach(async () => { + await rm(workspaceDir, { force: true, recursive: true }); + }); + + it("returns empty result and undefined instructions when workspaceDir is missing", async () => { + const result = await resolveCopilotWorkspaceBootstrapContext({ + attempt: makeAttempt({ workspaceDir: undefined }), + effectiveWorkspaceDir: undefined, + }); + expect(result.bootstrapFiles).toEqual([]); + expect(result.contextFiles).toEqual([]); + expect(result.instructions).toBeUndefined(); + }); + + it("loads SOUL.md from the workspace and renders it into instructions", async () => { + await writeFile(path.join(workspaceDir, "SOUL.md"), "Soul voice goes here."); + const result = await resolveCopilotWorkspaceBootstrapContext({ + attempt: makeAttempt({ workspaceDir }), + effectiveWorkspaceDir: workspaceDir, + }); + expect(result.bootstrapFiles.length).toBeGreaterThan(0); + expect(result.instructions).toBeDefined(); + expect(result.instructions).toContain("Soul voice goes here."); + }); + + it("filters AGENTS.md out of the rendered block (SDK loads it natively)", async () => { + await writeFile(path.join(workspaceDir, "AGENTS.md"), "Follow AGENTS guidance."); + await writeFile(path.join(workspaceDir, "SOUL.md"), "Soul voice goes here."); + const result = await resolveCopilotWorkspaceBootstrapContext({ + attempt: makeAttempt({ workspaceDir }), + effectiveWorkspaceDir: workspaceDir, + }); + expect(result.instructions).toContain("Soul voice goes here."); + expect(result.instructions).not.toContain("Follow AGENTS guidance."); + expect(result.instructions).toContain("Copilot SDK loads AGENTS.md natively"); + }); + + it("includes [MISSING] placeholders for files that don't exist (parity with PI/codex)", async () => { + await writeFile(path.join(workspaceDir, "AGENTS.md"), "Follow AGENTS guidance."); + const result = await resolveCopilotWorkspaceBootstrapContext({ + attempt: makeAttempt({ workspaceDir }), + effectiveWorkspaceDir: workspaceDir, + }); + // The shared loader synthesizes `[MISSING] Expected at: ` + // entries for every known bootstrap file the workspace hasn't + // provided yet. This is intentional — PI and codex inject the + // same placeholders so the model can see what bootstrap files are + // expected and prompt the user / create them. See + // src/agents/pi-embedded-helpers/bootstrap.ts:293-296. + // We surface these in the rendered block exactly like codex does. + expect(result.instructions).toBeDefined(); + expect(result.instructions).toContain("[MISSING] Expected at:"); + expect(result.instructions).toContain("SOUL.md"); + // AGENTS.md content is still suppressed because the SDK auto-loads + // it natively from workingDirectory. + expect(result.instructions).not.toContain("Follow AGENTS guidance."); + }); +}); + +describe("remapCopilotBootstrapContextFiles (PR #86155 [P2] round-9)", () => { + // The helper mirrors PI's `remapInjectedContextFilesToWorkspace` + // byte-for-byte so a Copilot run with a `ro`/`none` sandbox renders + // bootstrap context paths the same way PI does: in-workspace files + // get their host root rewritten to the sandbox root; out-of-workspace + // (parent-traversal, absolute, sibling) paths stay verbatim so the + // model never sees a pretend-sandboxed path for something that + // actually lives elsewhere. + it("returns input unchanged when source equals target (PI fast path)", () => { + const files = [ + { path: "/host/ws/SOUL.md", content: "soul" }, + { path: "/host/ws/IDENTITY.md", content: "id" }, + ]; + const out = remapCopilotBootstrapContextFiles({ + files, + sourceWorkspaceDir: "/host/ws", + targetWorkspaceDir: "/host/ws", + }); + expect(out).toBe(files); + }); + + it("rewrites in-workspace paths but leaves outside-workspace paths intact", () => { + const out = remapCopilotBootstrapContextFiles({ + files: [ + { path: "/host/ws/SOUL.md", content: "soul" }, + { path: "/host/ws/.openclaw/agents/main/IDENTITY.md", content: "id" }, + { path: "/host/other/UNRELATED.md", content: "u" }, + { path: "/host/ws", content: "root" }, + ], + sourceWorkspaceDir: "/host/ws", + targetWorkspaceDir: "/sandbox/copy", + }); + expect(out.map((f) => f.path)).toEqual([ + "/sandbox/copy/SOUL.md", + "/sandbox/copy/.openclaw/agents/main/IDENTITY.md", + "/host/other/UNRELATED.md", + "/sandbox/copy", + ]); + }); +}); + +describe("resolveCopilotWorkspaceBootstrapContext sandbox remap (PR #86155 [P2] round-9)", () => { + let workspaceDir: string; + let sandboxDir: string; + + beforeEach(async () => { + workspaceDir = await mkdtemp(path.join(tmpdir(), "copilot-bootstrap-host-")); + sandboxDir = await mkdtemp(path.join(tmpdir(), "copilot-bootstrap-sbx-")); + }); + + afterEach(async () => { + await rm(workspaceDir, { force: true, recursive: true }); + await rm(sandboxDir, { force: true, recursive: true }); + }); + + it("rewrites rendered context paths from host workspace to sandbox workspace when effective differs", async () => { + // Readonly sandbox: bootstrap files live on the host workspace + // (the canonical source of SOUL.md / .openclaw conventions), but + // the SDK session's workingDirectory and bridged tools see the + // sandbox copy. The rendered systemMessage must show the model + // sandbox paths, not host paths, so it matches what the native + // SDK loader and the wrapped tools report. + await writeFile(path.join(workspaceDir, "SOUL.md"), "Soul voice from host."); + const result = await resolveCopilotWorkspaceBootstrapContext({ + attempt: makeAttempt({ workspaceDir }), + effectiveWorkspaceDir: sandboxDir, + }); + expect(result.instructions).toBeDefined(); + expect(result.instructions).toContain("Soul voice from host."); + // Positive: every rendered `## ` file header is now under the + // sandbox root so the model sees a workspace it can actually + // dereference through the bridged tools. + expect(result.instructions).toContain(`## ${path.join(sandboxDir, "SOUL.md")}`); + // Negative: no rendered file header may still point at the + // host workspace root (would otherwise let the model dereference + // a path its tools cannot reach in a readonly sandbox). We scope + // this check to `## ` headers because PI deliberately leaves the + // host path inside any `[MISSING] Expected at: ` body — it + // refers to the canonical source location the user should create + // the file at, not the runtime workspace. + const headerLines = (result.instructions ?? "") + .split("\n") + .filter((line) => line.startsWith("## ")); + expect(headerLines.length).toBeGreaterThan(0); + for (const line of headerLines) { + expect(line).not.toContain(workspaceDir); + } + // Returned contextFiles array reflects the remap too, so any + // future consumer that reads `contextFiles` directly stays in + // lock-step with `instructions`. + expect(result.contextFiles.map((f) => f.path)).toContain(path.join(sandboxDir, "SOUL.md")); + expect(result.contextFiles.every((f) => !f.path.startsWith(workspaceDir))).toBe(true); + }); +}); diff --git a/extensions/copilot/src/workspace-bootstrap.ts b/extensions/copilot/src/workspace-bootstrap.ts new file mode 100644 index 000000000000..1c644d55f65c --- /dev/null +++ b/extensions/copilot/src/workspace-bootstrap.ts @@ -0,0 +1,255 @@ +import path from "node:path"; +import type { + AgentHarnessAttemptParams, + EmbeddedContextFile, +} from "openclaw/plugin-sdk/agent-harness-runtime"; +import { + resolveBootstrapContextForRun, + resolveUserPath, +} from "openclaw/plugin-sdk/agent-harness-runtime"; + +// Filenames the Copilot SDK already loads natively from the working +// directory / instructionDirectories (per +// `@github/copilot-sdk/dist/types.d.ts:1036,1155` — +// "custom instruction files (.github/copilot-instructions.md, +// AGENTS.md, etc.) are always loaded from the working directory"). +// Filtering them out of the OpenClaw bootstrap injection avoids +// duplicating their content into `SessionConfig.systemMessage`, which +// would otherwise inflate every prompt with the same text the SDK +// already includes. Mirrors codex's CODEX_NATIVE_PROJECT_DOC_BASENAMES +// (extensions/codex/src/app-server/run-attempt.ts:160). +const COPILOT_NATIVE_PROJECT_DOC_BASENAMES = new Set(["agents.md"]); + +// Persona/identity files get sorted to the top of the rendered block +// so they precede the freer-form context like USER.md / MEMORY.md. +// Mirrors codex's CODEX_BOOTSTRAP_CONTEXT_ORDER ordering (same files). +const COPILOT_BOOTSTRAP_CONTEXT_ORDER = new Map([ + ["soul.md", 10], + ["identity.md", 20], + ["heartbeat.md", 30], + ["bootstrap.md", 40], + ["tools.md", 50], + ["user.md", 60], + ["memory.md", 70], +]); + +export type CopilotWorkspaceBootstrapResult = { + bootstrapFiles: Awaited>["bootstrapFiles"]; + contextFiles: EmbeddedContextFile[]; + instructions?: string; +}; + +/** + * Loads OpenClaw workspace bootstrap files (IDENTITY.md, SOUL.md, + * HEARTBEAT.md, USER.md, TOOLS.md, BOOTSTRAP.md, MEMORY.md, ...) using + * the shared core helper PI and codex both use, then renders them as a + * single string suitable for `SessionConfig.systemMessage.content` on + * the Copilot SDK. + * + * Returns `instructions: undefined` when there are no relevant files + * (after filtering out SDK-native docs) so the caller can omit the + * `systemMessage` field entirely rather than passing an empty string. + * + * Mirrors codex's `buildCodexWorkspaceBootstrapContext` / + * `renderCodexWorkspaceBootstrapInstructions` pair + * (`extensions/codex/src/app-server/run-attempt.ts:2877,3047`). The + * shape divergence — codex returns instructions inside the same object + * as bootstrapFiles+contextFiles for its developerInstructions field; + * copilot exposes the rendered string for SDK `systemMessage` — is the + * intended difference between the two runtimes' system-prompt + * surfaces. + */ +export async function resolveCopilotWorkspaceBootstrapContext(params: { + attempt: AgentHarnessAttemptParams; + /** + * Sandbox-aware working directory the SDK session will run in. + * When this differs from the canonical `attempt.workspaceDir` + * (sandbox `ro` / `none` runs that redirect to a copy), bootstrap + * context file paths are remapped so the rendered `systemMessage` + * shows the model the same workspace the SDK's native loader and + * bridged tools operate on. Pass `undefined` only when no sandbox + * resolution has happened (e.g. tests not exercising sandbox + * redirection). Required so future callers cannot silently miss + * the remap. Mirrors PI's + * `remapInjectedContextFilesToWorkspace` call in + * `src/agents/pi-embedded-runner/run/attempt.ts:1595`. + */ + effectiveWorkspaceDir: string | undefined; + warn?: (message: string) => void; +}): Promise { + const { attempt } = params; + const workspaceDir = readResolvedWorkspacePath(attempt.workspaceDir); + if (!workspaceDir) { + return { bootstrapFiles: [], contextFiles: [] }; + } + try { + const bootstrapContext = await resolveBootstrapContextForRun({ + workspaceDir, + config: attempt.config, + sessionKey: readNonEmptyString((attempt as { sessionKey?: unknown }).sessionKey), + sessionId: readNonEmptyString(attempt.sessionId), + agentId: readNonEmptyString(attempt.agentId), + warn: params.warn, + contextMode: attempt.bootstrapContextMode, + runKind: attempt.bootstrapContextRunKind, + }); + // Remap context-file paths from the workspace we LOADED them + // from (`workspaceDir`, the canonical host workspace where + // SOUL.md / IDENTITY.md / .openclaw conventions live) onto the + // workspace the SDK session will actually OPERATE in + // (`effectiveWorkspaceDir`). When the two are identical (no + // sandbox, or sandbox `rw`), remap is a no-op. The render below + // and the returned `contextFiles` use the remapped array so the + // model never sees a host path while its native loader and + // bridged tools see only the sandbox copy. + const contextFiles = remapCopilotBootstrapContextFiles({ + files: bootstrapContext.contextFiles, + sourceWorkspaceDir: workspaceDir, + targetWorkspaceDir: readResolvedWorkspacePath(params.effectiveWorkspaceDir) ?? workspaceDir, + }); + return { + bootstrapFiles: bootstrapContext.bootstrapFiles, + contextFiles, + instructions: renderCopilotWorkspaceBootstrapInstructions(contextFiles), + }; + } catch (error) { + params.warn?.( + `[copilot-attempt] failed to load workspace bootstrap instructions: ${ + error instanceof Error ? error.message : String(error) + }`, + ); + return { bootstrapFiles: [], contextFiles: [] }; + } +} + +/** + * Rewrites context-file paths from a source workspace root to a + * target workspace root, mirroring PI's + * `remapInjectedContextFilesToWorkspace` + * (`src/agents/pi-embedded-runner/run/attempt.ts:603`). Files whose + * resolved relative path escapes the source workspace (parent + * traversal or absolute) are left untouched so we never pretend a + * file lives inside the sandbox when it does not. Exported for unit + * tests; intentionally local to the Copilot extension (codex keeps + * similar helpers extension-local rather than importing from PI). + */ +export function remapCopilotBootstrapContextFiles(params: { + files: EmbeddedContextFile[]; + sourceWorkspaceDir: string; + targetWorkspaceDir: string; +}): EmbeddedContextFile[] { + if (params.sourceWorkspaceDir === params.targetWorkspaceDir) { + return params.files; + } + return params.files.map((file) => { + const relative = path.relative(params.sourceWorkspaceDir, file.path); + if (!isRelativePathInsideOrEqual(relative)) { + return file; + } + return { + ...file, + path: + relative === "" + ? params.targetWorkspaceDir + : path.join(params.targetWorkspaceDir, relative), + }; + }); +} + +function isRelativePathInsideOrEqual(relativePath: string): boolean { + return ( + relativePath === "" || + (relativePath !== ".." && + !relativePath.startsWith(`..${path.sep}`) && + !path.isAbsolute(relativePath)) + ); +} + +/** + * Renders bootstrap context files into a single string for + * `SessionConfig.systemMessage.content` (append mode). Returns + * `undefined` when no relevant files remain after filtering, so the + * caller can skip setting `systemMessage` altogether. + * + * Files whose basename matches a doc the Copilot SDK already loads + * natively (see {@link COPILOT_NATIVE_PROJECT_DOC_BASENAMES}) are + * dropped to avoid duplication with SDK-managed sections. + */ +export function renderCopilotWorkspaceBootstrapInstructions( + contextFiles: EmbeddedContextFile[], +): string | undefined { + const files = contextFiles + .filter((file) => { + const baseName = getCopilotContextFileBasename(file.path); + return baseName.length > 0 && !COPILOT_NATIVE_PROJECT_DOC_BASENAMES.has(baseName); + }) + .toSorted(compareCopilotContextFiles); + if (files.length === 0) { + return undefined; + } + const hasSoulFile = files.some((file) => getCopilotContextFileBasename(file.path) === "soul.md"); + const lines: string[] = [ + "OpenClaw loaded these user-editable workspace files. Treat them as project/user context. The Copilot SDK loads AGENTS.md natively from its instruction directories, so AGENTS.md is not repeated here.", + "", + "# Project Context", + "", + "The following project context files have been loaded:", + ]; + if (hasSoulFile) { + lines.push("SOUL.md: persona/tone. Follow it unless higher-priority instructions override."); + } + lines.push(""); + for (const file of files) { + lines.push(`## ${file.path}`, "", file.content, ""); + } + return lines.join("\n").trim(); +} + +function compareCopilotContextFiles(left: EmbeddedContextFile, right: EmbeddedContextFile): number { + const leftBase = getCopilotContextFileBasename(left.path); + const rightBase = getCopilotContextFileBasename(right.path); + const leftOrder = COPILOT_BOOTSTRAP_CONTEXT_ORDER.get(leftBase) ?? Number.MAX_SAFE_INTEGER; + const rightOrder = COPILOT_BOOTSTRAP_CONTEXT_ORDER.get(rightBase) ?? Number.MAX_SAFE_INTEGER; + if (leftOrder !== rightOrder) { + return leftOrder - rightOrder; + } + const leftPath = normalizeCopilotContextFilePath(left.path); + const rightPath = normalizeCopilotContextFilePath(right.path); + if (leftPath < rightPath) { + return -1; + } + if (leftPath > rightPath) { + return 1; + } + return 0; +} + +function normalizeCopilotContextFilePath(filePath: string): string { + return filePath.trim().replaceAll("\\", "/").toLowerCase(); +} + +function getCopilotContextFileBasename(filePath: string): string { + return normalizeCopilotContextFilePath(filePath).split("/").pop() ?? ""; +} + +function readNonEmptyString(value: unknown): string | undefined { + return typeof value === "string" && value.trim().length > 0 ? value : undefined; +} + +function readResolvedWorkspacePath(value: unknown): string | undefined { + const raw = readNonEmptyString(value); + if (!raw) { + return undefined; + } + if (process.platform !== "win32" && /^[A-Za-z]:[\\/]/.test(raw)) { + return raw.trim(); + } + return resolveUserPath(raw); +} + +export const TESTING_EXPORTS = { + COPILOT_NATIVE_PROJECT_DOC_BASENAMES, + COPILOT_BOOTSTRAP_CONTEXT_ORDER, + compareCopilotContextFiles, + getCopilotContextFileBasename, +}; diff --git a/extensions/copilot/tsconfig.json b/extensions/copilot/tsconfig.json new file mode 100644 index 000000000000..b8a85a99ac3d --- /dev/null +++ b/extensions/copilot/tsconfig.json @@ -0,0 +1,16 @@ +{ + "extends": "../tsconfig.package-boundary.base.json", + "compilerOptions": { + "rootDir": "." + }, + "include": ["./*.ts", "./src/**/*.ts"], + "exclude": [ + "./**/*.test.ts", + "./dist/**", + "./node_modules/**", + "./src/test-support/**", + "./src/**/*test-helpers.ts", + "./src/**/*test-harness.ts", + "./src/**/*test-support.ts" + ] +} diff --git a/package.json b/package.json index 25e080395419..2476f495f3e4 100644 --- a/package.json +++ b/package.json @@ -1397,10 +1397,10 @@ "audit:seams": "node scripts/audit-seams.mjs", "build": "node scripts/build-all.mjs", "build:ci-artifacts": "node scripts/build-all.mjs ciArtifacts", - "build:docker": "node scripts/tsdown-build.mjs && node scripts/check-cli-bootstrap-imports.mjs && node scripts/runtime-postbuild.mjs && node scripts/build-stamp.mjs && node scripts/runtime-postbuild-stamp.mjs && pnpm plugins:assets:build && pnpm plugins:assets:copy && node --experimental-strip-types scripts/copy-hook-metadata.ts && node --experimental-strip-types scripts/copy-export-html-templates.ts && node --experimental-strip-types scripts/write-build-info.ts && node --experimental-strip-types scripts/write-cli-startup-metadata.ts && node --experimental-strip-types scripts/write-cli-compat.ts", + "build:docker": "node scripts/tsdown-build.mjs && node scripts/check-cli-bootstrap-imports.mjs && node scripts/runtime-postbuild.mjs && node scripts/build-stamp.mjs && node scripts/runtime-postbuild-stamp.mjs && pnpm plugins:assets:build && pnpm plugins:assets:copy && node --experimental-strip-types scripts/copy-hook-metadata.ts && node --experimental-strip-types scripts/copy-copilot-sdk-manifest.ts && node --experimental-strip-types scripts/copy-export-html-templates.ts && node --experimental-strip-types scripts/write-build-info.ts && node --experimental-strip-types scripts/write-cli-startup-metadata.ts && node --experimental-strip-types scripts/write-cli-compat.ts", "build:plugin-sdk:dts": "node scripts/run-tsgo.mjs -p tsconfig.plugin-sdk.dts.json --declaration true", "build:plugin-sdk:strict-smoke": "pnpm build:plugin-sdk:dts && node --experimental-strip-types scripts/write-plugin-sdk-entry-dts.ts", - "build:strict-smoke": "pnpm plugins:assets:build && node scripts/tsdown-build.mjs && node scripts/check-cli-bootstrap-imports.mjs && node scripts/runtime-postbuild.mjs && node scripts/build-stamp.mjs && node scripts/runtime-postbuild-stamp.mjs && pnpm build:plugin-sdk:dts && node --experimental-strip-types scripts/write-plugin-sdk-entry-dts.ts && node scripts/check-plugin-sdk-exports.mjs", + "build:strict-smoke": "pnpm plugins:assets:build && node scripts/tsdown-build.mjs && node scripts/check-cli-bootstrap-imports.mjs && node scripts/runtime-postbuild.mjs && node scripts/build-stamp.mjs && node scripts/runtime-postbuild-stamp.mjs && node --experimental-strip-types scripts/copy-copilot-sdk-manifest.ts && pnpm build:plugin-sdk:dts && node --experimental-strip-types scripts/write-plugin-sdk-entry-dts.ts && node scripts/check-plugin-sdk-exports.mjs", "canvas:a2ui:bundle": "node scripts/bundle-a2ui.mjs", "changed:lanes": "node scripts/changed-lanes.mjs", "check": "node scripts/check.mjs", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7c6ea2f9c862..b7d73131f4e1 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -570,6 +570,18 @@ importers: specifier: workspace:* version: link:../../packages/plugin-sdk + extensions/copilot: + devDependencies: + '@github/copilot': + specifier: 1.0.48 + version: 1.0.48 + '@github/copilot-sdk': + specifier: 1.0.0-beta.4 + version: 1.0.0-beta.4 + '@openclaw/plugin-sdk': + specifier: workspace:* + version: link:../../packages/plugin-sdk + extensions/copilot-proxy: devDependencies: '@openclaw/plugin-sdk': @@ -2564,6 +2576,50 @@ packages: '@noble/hashes': optional: true + '@github/copilot-darwin-arm64@1.0.48': + resolution: {integrity: sha512-82MLoMQwPVVFM8EYssihFxSEPUYtZADE8rMzQ3jG9HgRg2qjQSfnHQS1mKe64dlXswZUK/onw6/8kjnW5I4pPg==} + cpu: [arm64] + os: [darwin] + hasBin: true + + '@github/copilot-darwin-x64@1.0.48': + resolution: {integrity: sha512-1VQ5r5F0h8GwboXmZTcutqcJT+iCpPXAF27QqodmpKEvW9aYfG8g9X2kFJOzDZoX+SA3Uaka9qXdYKF2xT6Uog==} + cpu: [x64] + os: [darwin] + hasBin: true + + '@github/copilot-linux-arm64@1.0.48': + resolution: {integrity: sha512-PmsGnb0DZlI+Bf53l9HM1PAHHkUcMyB4y8v/7tnC/jDOV5dGF124n0HnDNfJLOLiJGiQGodthIif6QtPaAxpeA==} + cpu: [arm64] + os: [linux] + hasBin: true + + '@github/copilot-linux-x64@1.0.48': + resolution: {integrity: sha512-b2cc4euSlke9fYHXXsS2EL9UYbctN0h4lZvtAcKUDY+RCnpYAQOVBZK+c1R9dQrtsT6Z/yUv7PuFPSs8qdtc2Q==} + cpu: [x64] + os: [linux] + hasBin: true + + '@github/copilot-sdk@1.0.0-beta.4': + resolution: {integrity: sha512-DcVMN2FWODxamFS9nTls8AW3QsyMnj6JDVBNRVBXaTY9kEhGHCjt8lp7sJp95/vyl52hvEb4/68Oh6SdFU9O/Q==} + engines: {node: '>=20.0.0'} + + '@github/copilot-win32-arm64@1.0.48': + resolution: {integrity: sha512-VEEOwddtpJ3DTbXGhnK6K8im4ofl9m08q1m/K++sNvWV8wkkOSOQBTiPdyUsuU/TXAoFhb8tZMIJv+6NnMBtMw==} + cpu: [arm64] + os: [win32] + hasBin: true + + '@github/copilot-win32-x64@1.0.48': + resolution: {integrity: sha512-93BzvXLPHTyy1gWBXQY/IWIHor4IAwZuuo7/obG80/Qa6U0WeaN9slz/FBJvrsgVNrrRfEID5Xm3At+S6Kj67Q==} + cpu: [x64] + os: [win32] + hasBin: true + + '@github/copilot@1.0.48': + resolution: {integrity: sha512-U5SzyTEq376UU9A4Sd3TEKz+Y2nRUd90cLO4Hc1otaB8yFSy9Ur2UVGcI2/wCoodL3a39k6WbdgNzFxr0gWFRQ==} + hasBin: true + '@google/genai@2.6.0': resolution: {integrity: sha512-HjoW3mPuEn7pnuKABJl9VbDoWDSF4nbwYKYvYYor7YjPeDxrrBxHzu2d1Prcd+BAuC4w+85UP6y7ZdcrQAoO7g==} engines: {node: '>=20.0.0'} @@ -7008,6 +7064,10 @@ packages: resolution: {integrity: sha512-Dhxzh5HZuiHQhbvTW9AMetFfBHDMYpo23Uo9btPXgdYP+3T5S+p+jgNy7spra+veYhBP2dCSgxR/i2Y02h5/6w==} engines: {node: '>=0.10.0'} + vscode-jsonrpc@8.2.1: + resolution: {integrity: sha512-kdjOSJ2lLIn7r1rtrMbbNCHjyMPfRnowdKjBQ+mGq6NAW5QY2bEZC/khaC5OR8svbbjvLEaIXkOq45e2X9BIbQ==} + engines: {node: '>=14.0.0'} + w3c-xmlserializer@5.0.0: resolution: {integrity: sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==} engines: {node: '>=18'} @@ -8144,6 +8204,39 @@ snapshots: optionalDependencies: '@noble/hashes': 2.0.1 + '@github/copilot-darwin-arm64@1.0.48': + optional: true + + '@github/copilot-darwin-x64@1.0.48': + optional: true + + '@github/copilot-linux-arm64@1.0.48': + optional: true + + '@github/copilot-linux-x64@1.0.48': + optional: true + + '@github/copilot-sdk@1.0.0-beta.4': + dependencies: + '@github/copilot': 1.0.48 + vscode-jsonrpc: 8.2.1 + zod: 4.4.3 + + '@github/copilot-win32-arm64@1.0.48': + optional: true + + '@github/copilot-win32-x64@1.0.48': + optional: true + + '@github/copilot@1.0.48': + optionalDependencies: + '@github/copilot-darwin-arm64': 1.0.48 + '@github/copilot-darwin-x64': 1.0.48 + '@github/copilot-linux-arm64': 1.0.48 + '@github/copilot-linux-x64': 1.0.48 + '@github/copilot-win32-arm64': 1.0.48 + '@github/copilot-win32-x64': 1.0.48 + '@google/genai@2.6.0(@modelcontextprotocol/sdk@1.29.0(zod@4.4.3))': dependencies: google-auth-library: 10.6.2 @@ -12915,6 +13008,8 @@ snapshots: void-elements@3.1.0: {} + vscode-jsonrpc@8.2.1: {} + w3c-xmlserializer@5.0.0: dependencies: xml-name-validator: 5.0.0 diff --git a/qa/copilot-capabilities.md b/qa/copilot-capabilities.md new file mode 100644 index 000000000000..53d4b46312c8 --- /dev/null +++ b/qa/copilot-capabilities.md @@ -0,0 +1,390 @@ +# Copilot SDK capability inventory (`@github/copilot-sdk@1.0.0-beta.4`) + +> Public preview audit for the `1.0.0-beta.4` pin. Per task contract, treat this as the current `latest` dist-tag snapshot and re-generate this document whenever the pinned SDK version changes. + +This inventory documents the shipped TypeScript surface that the bundled `copilot` plugin pins against, instead of guessing. Every claim below is tied to the installed SDK's `.d.ts` files and bundled docs; where the inventory is silent, this document says so explicitly. + +## 1. Package metadata + +- Package name: `@github/copilot-sdk`. +- Version: `1.0.0-beta.4`. +- Export map: + - `.` -> ESM `./dist/index.js`, CJS `./dist/cjs/index.js`, types `./dist/index.d.ts`. + - `./extension` -> ESM `./dist/extension.js`, CJS `./dist/cjs/extension.js`, types `./dist/extension.d.ts`. +- Primary type barrel `dist/index.d.ts` re-exports `CopilotClient`, `CopilotSession`, `AssistantMessageEvent`, helpers like `defineTool`/`approveAll`, and the full public type surface from `dist/types.d.ts`. +- Declared runtime deps: + - `@github/copilot` `^1.0.46` (bundled CLI/runtime dependency) + - `vscode-jsonrpc` `^8.2.1` + - `zod` `^4.3.6` + +Sources: `package.json` (on-disk install): 2-32, 58-62; `dist/index.d.ts` (sdk-inventory.txt:1033-1042). + +## 2. Lifecycle methods on `CopilotClient` + +Public methods/getters visible in `dist/client.d.ts`: + +| Member | Signature | Return shape | What it does | +| ------------------------ | ------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------- | ---------------------------------------------------- | +| `rpc` | `get rpc(): ReturnType` | typed server RPC facade | Low-level server-scoped RPC surface; throws if not connected. | +| `start` | `start(): Promise` | `void` | Starts/spawns the CLI server and connects. | +| `stop` | `stop(): Promise` | cleanup errors array | Graceful shutdown: closes sessions, JSON-RPC connection, then spawned CLI; preserves on-disk session state. | +| `forceStop` | `forceStop(): Promise` | `void` | Force-kills client state/process without graceful cleanup. | +| `createSession` | `createSession(config: SessionConfig): Promise` | `CopilotSession` | Creates a new conversation session; auto-starts when enabled. | +| `resumeSession` | `resumeSession(sessionId: string, config: ResumeSessionConfig): Promise` | `CopilotSession` | Re-attaches to a persisted session; returns `workspacePath` when infinite sessions were enabled. | +| `getState` | `getState(): ConnectionState` | `"disconnected" \| "connecting" \| "connected" \| "error"` | Returns client connection state. | +| `ping` | `ping(message?: string): Promise<{ message: string; timestamp: number; protocolVersion?: number; }>` | echo payload | Connectivity/protocol sanity check. | +| `getStatus` | `getStatus(): Promise` | `{ version: string; protocolVersion: number }` | Returns CLI package version and negotiated protocol version. | +| `getAuthStatus` | `getAuthStatus(): Promise` | `{ isAuthenticated, authType?, host?, login?, statusMessage? }` | Returns current auth mode/status. | +| `listModels` | `listModels(): Promise` | model metadata array | Lists models; caches first successful result unless overridden by `onListModels`. | +| `getLastSessionId` | `getLastSessionId(): Promise` | optional session id | Returns most recently updated session id. | +| `deleteSession` | `deleteSession(sessionId: string): Promise` | `void` | Irreversibly deletes persisted session data from disk. | +| `listSessions` | `listSessions(filter?: SessionListFilter): Promise` | session metadata array | Lists persisted sessions, optionally filtered by cwd/git context. | +| `getSessionMetadata` | `getSessionMetadata(sessionId: string): Promise` | optional metadata | O(1)-style lookup for one session's metadata. | +| `getForegroundSessionId` | `getForegroundSessionId(): Promise` | optional session id | TUI+server-only: returns current foreground session. | +| `setForegroundSessionId` | `setForegroundSessionId(sessionId: string): Promise` | `void` | TUI+server-only: asks the TUI to foreground a session. | +| `on` (typed) | `on(eventType: K, handler: TypedSessionLifecycleHandler): () => void` | unsubscribe fn | Subscribes to one lifecycle event type. | +| `on` (catch-all) | `on(handler: SessionLifecycleHandler): () => void` | unsubscribe fn | Subscribes to all lifecycle events. | + +Lifecycle event types for `client.on(...)`: `session.created`, `session.deleted`, `session.updated`, `session.foreground`, `session.background`. + +Sources: `dist/client.d.ts` (sdk-inventory.txt:1081-1518), especially 1112-1477; `dist/types.d.ts` (sdk-inventory.txt:3421-3528); README API docs (sdk-inventory.txt:96-199). + +## 3. Lifecycle methods on `CopilotSession` + +Public properties/getters/methods visible in `dist/session.d.ts`: + +| Member | Signature | Return shape | Notes | +| ----------------------- | ----------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------ | --------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------- | +| `rpc` | `get rpc(): ReturnType` | typed session RPC facade | Low-level session RPC surface. | +| `workspacePath` | `get workspacePath(): string | undefined` | optional path | Present only when infinite sessions are enabled; workspace contains `checkpoints/`, `plan.md`, `files/`. | +| `capabilities` | `get capabilities(): SessionCapabilities` | `{ ui?: { elicitation?: boolean } }` | Host capability snapshot; auto-updated on capability change events. | +| `ui` | `get ui(): SessionUiApi` | convenience UI API | Exposes `elicitation`, `confirm`, `select`, `input`; requires `capabilities.ui?.elicitation`. | +| `send` | `send(options: MessageOptions): Promise` | message id | Queues a user prompt and returns immediately. | +| `sendAndWait` | `sendAndWait(options: MessageOptions, timeout?: number): Promise` | final assistant message or `undefined` | Waits for `session.idle`; timeout defaults to 60000ms and does **not** abort in-flight work. | +| `on` (typed) | `on(eventType: K, handler: TypedSessionEventHandler): () => void` | unsubscribe fn | Subscribes to one event type. | +| `on` (catch-all) | `on(handler: SessionEventHandler): () => void` | unsubscribe fn | Subscribes to all session events. | +| `getMessages` | `getMessages(): Promise` | complete event history | Returns the full persisted conversation/event stream. | +| `disconnect` | `disconnect(): Promise` | `void` | Releases in-memory resources but preserves on-disk session state for resume. | +| `destroy` | `destroy(): Promise` | `void` | Deprecated alias for `disconnect()`. | +| `[Symbol.asyncDispose]` | `[Symbol.asyncDispose](): Promise` | `void` | Enables `await using`. | +| `abort` | `abort(): Promise` | `void` | Cancels the currently processing message without invalidating the session. | +| `setModel` | `setModel(model: string, options?: { reasoningEffort?: ReasoningEffort; modelCapabilities?: ModelCapabilitiesOverride; }): Promise` | `void` | Switches model for future turns while preserving history. | +| `log` | `log(message: string, options?: { level?: "info" \| "warning" \| "error"; ephemeral?: boolean; }): Promise` | `void` | Writes timeline messages; docs explicitly say to use this instead of `console.log()`. | + +`MessageOptions` supports `prompt`, `attachments`, optional `mode` (`enqueue` or `immediate`), and per-turn `requestHeaders`. + +Sources: `dist/session.d.ts` (sdk-inventory.txt:1520-2003); `dist/types.d.ts` (sdk-inventory.txt:3292-3339); docs/examples.md (sdk-inventory.txt:3829-3894). + +## 4. Event types + +### 4.1 Harness-relevant event types with inspected payloads + +#### Streaming deltas / assistant turn + +| Event | Payload shape | Sources | +| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------- | +| `assistant.turn_start` | `{ interactionId?, turnId }` | `dist/generated/session-events.d.ts`: 1633-1668 | +| `assistant.intent` | `{ intent: string }` | `dist/generated/session-events.d.ts`: 1670-1699 | +| `assistant.reasoning` | `{ content: string, reasoningId: string }` | `dist/generated/session-events.d.ts`: 1700-1735 | +| `assistant.reasoning_delta` | `{ deltaContent: string, reasoningId: string }` | `dist/generated/session-events.d.ts`: 1737-1770 | +| `assistant.streaming_delta` | `{ totalResponseSizeBytes: number }` | `dist/generated/session-events.d.ts`: 1771-1800 | +| `assistant.message_start` | `{ messageId: string, phase?: string }` | `dist/generated/session-events.d.ts`: 1927-1960 | +| `assistant.message_delta` | `{ deltaContent: string, messageId: string, parentToolCallId? }` | `dist/generated/session-events.d.ts`: 1961-1999 | +| `assistant.message` | `{ content, messageId, model?, outputTokens?, toolRequests?, reasoningText?, reasoningOpaque?, encryptedContent?, interactionId?, requestId?, phase?, turnId?, anthropicAdvisorBlocks?, anthropicAdvisorModel?, parentToolCallId? }` | `dist/generated/session-events.d.ts`: 1801-1926 | +| `assistant.turn_end` | `{ turnId: string }` | `dist/generated/session-events.d.ts`: 2000-2032 | +| `assistant.usage` | usage metrics including `{ model, inputTokens?, outputTokens?, reasoningTokens?, reasoningEffort?, duration?, cost?, cacheReadTokens?, cacheWriteTokens?, ttftMs?, interTokenLatencyMs?, quotaSnapshots?, copilotUsage? }` | `dist/generated/session-events.d.ts`: 2033-2215 | + +#### Tool execution + +| Event | Payload shape | Sources | +| ------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------- | +| `tool.execution_start` | `{ toolCallId, toolName, arguments?, mcpServerName?, mcpToolName?, parentToolCallId?, turnId? }` | `dist/generated/session-events.d.ts`: 2323-2382 | +| `tool.execution_partial_result` | `{ partialOutput: string, toolCallId: string }` | `dist/generated/session-events.d.ts`: 2383-2416 | +| `tool.execution_progress` | `{ progressMessage: string, toolCallId: string }` | `dist/generated/session-events.d.ts`: 2417-2450 | +| `tool.execution_complete` | `{ success: boolean, toolCallId: string, result?, error?, model?, interactionId?, isUserRequested?, toolTelemetry?, turnId?, parentToolCallId? }`; `result` is `{ content, contents?, detailedContent? }`; `error` is `{ code?, message }` | `dist/generated/session-events.d.ts`: 2451-2665 | + +#### Interactivity / permissions / user prompts + +| Event | Payload shape | Sources | +| ----------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------- | +| `permission.requested` | `{ requestId: string, permissionRequest, promptRequest?, resolvedByHook? }`; `permissionRequest` is a rich union, not just a bare kind | `dist/generated/session-events.d.ts`: 3293-3628 | +| `permission.completed` | `{ requestId: string, result: PermissionResult, toolCallId? }` where result kinds include `approved`, `approved-for-session`, `approved-for-location`, `cancelled`, `denied-by-rules`, `denied-no-approval-rule-and-could-not-request-from-user`, `denied-interactively-by-user`, `denied-by-content-exclusion-policy`, `denied-by-permission-request-hook` | `dist/generated/session-events.d.ts`: 3909-4120 | +| `user_input.requested` | `{ question: string, choices?, allowFreeform?, requestId: string, toolCallId? }` | `dist/generated/session-events.d.ts`: 4121-4166 | +| `user_input.completed` | `{ answer?, requestId: string, wasFreeform? }` | `dist/generated/session-events.d.ts`: 4167-4204 | +| `elicitation.requested` | `{ message: string, requestId: string, elicitationSource?, mode?, requestedSchema?, toolCallId?, url? }` | `dist/generated/session-events.d.ts`: 4205-4257 | +| `elicitation.completed` | `{ requestId: string, action?, content? }` | `dist/generated/session-events.d.ts`: 4273-4308 | +| `command.execute` | `{ commandName, command, args, requestId }` | `dist/generated/session-events.d.ts`: 4588-4629 | +| `commands.changed` | `{ commands: Array<{ name: string, description?: string }> }` | `dist/generated/session-events.d.ts`: 4732-4765 | +| `capabilities.changed` | `{ ui?: { elicitation?: boolean } }` | `dist/generated/session-events.d.ts`: 4766-4801 | + +#### Lifecycle / error / compaction + +| Event | Payload shape | Sources | +| ----------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------- | +| `session.start` | session bootstrap metadata including `{ sessionId, startTime, copilotVersion, producer, selectedModel?, reasoningEffort?, remoteSteerable?, context? }` | `dist/generated/session-events.d.ts`: 135-238 | +| `session.resume` | `{ eventCount, resumeTime, selectedModel?, reasoningEffort?, continuePendingWork?, sessionWasActive?, context? }` | `dist/generated/session-events.d.ts`: 239-300 | +| `session.error` | `{ errorType: string, message: string, errorCode?, eligibleForAutoSwitch?, providerCallId?, stack?, statusCode?, url? }` | `dist/generated/session-events.d.ts`: 334-394 | +| `session.idle` | `{ aborted?: boolean }` | `dist/generated/session-events.d.ts`: 395-424 | +| `session.usage_info` | `{ currentTokens, tokenLimit, messagesLength, conversationTokens?, systemTokens?, toolDefinitionsTokens?, isInitial? }` | `dist/generated/session-events.d.ts`: 1116-1169 | +| `session.compaction_start` | `{ conversationTokens?, systemTokens?, toolDefinitionsTokens? }` | `dist/generated/session-events.d.ts`: 1170-1210 | +| `session.compaction_complete` | `{ success, checkpointNumber?, checkpointPath?, summaryContent?, messagesRemoved?, preCompactionTokens?, postCompactionTokens?, tokensRemoved?, compactionTokensUsed?, error?, requestId? }` | `dist/generated/session-events.d.ts`: 1211-1308 | +| `model.call_failure` | `{ source, model?, statusCode?, durationMs?, apiCallId?, providerCallId?, errorMessage?, initiator? }` | `dist/generated/session-events.d.ts`: 2195-2249 | +| `abort` | `{ reason: "user_initiated" \| "remote_command" \| "user_abort" }` | `dist/generated/session-events.d.ts`: 2250-2279 | + +### 4.2 Full `SessionEvent` union members + +The generated `SessionEvent` union is authoritative and currently includes all of these members: + +- `StartEvent`, `ResumeEvent`, `RemoteSteerableChangedEvent`, `ErrorEvent`, `IdleEvent`, `TitleChangedEvent`, `ScheduleCreatedEvent`, `ScheduleCancelledEvent`, `InfoEvent`, `WarningEvent`, `ModelChangeEvent`, `ModeChangedEvent`, `PlanChangedEvent`, `WorkspaceFileChangedEvent`, `HandoffEvent`, `TruncationEvent`, `SnapshotRewindEvent`, `ShutdownEvent`, `ContextChangedEvent`, `UsageInfoEvent`, `CompactionStartEvent`, `CompactionCompleteEvent`, `TaskCompleteEvent`, `UserMessageEvent`, `PendingMessagesModifiedEvent`, `AssistantTurnStartEvent`, `AssistantIntentEvent`, `AssistantReasoningEvent`, `AssistantReasoningDeltaEvent`, `AssistantStreamingDeltaEvent`, `AssistantMessageEvent`, `AssistantMessageStartEvent`, `AssistantMessageDeltaEvent`, `AssistantTurnEndEvent`, `AssistantUsageEvent`, `ModelCallFailureEvent`, `AbortEvent`, `ToolUserRequestedEvent`, `ToolExecutionStartEvent`, `ToolExecutionPartialResultEvent`, `ToolExecutionProgressEvent`, `ToolExecutionCompleteEvent`, `SkillInvokedEvent`, `SubagentStartedEvent`, `SubagentCompletedEvent`, `SubagentFailedEvent`, `SubagentSelectedEvent`, `SubagentDeselectedEvent`, `HookStartEvent`, `HookEndEvent`, `SystemMessageEvent`, `SystemNotificationEvent`, `PermissionRequestedEvent`, `PermissionCompletedEvent`, `UserInputRequestedEvent`, `UserInputCompletedEvent`, `ElicitationRequestedEvent`, `ElicitationCompletedEvent`, `SamplingRequestedEvent`, `SamplingCompletedEvent`, `McpOauthRequiredEvent`, `McpOauthCompletedEvent`, `ExternalToolRequestedEvent`, `ExternalToolCompletedEvent`, `CommandQueuedEvent`, `CommandExecuteEvent`, `CommandCompletedEvent`, `AutoModeSwitchRequestedEvent`, `AutoModeSwitchCompletedEvent`, `CommandsChangedEvent`, `CapabilitiesChangedEvent`, `ExitPlanModeRequestedEvent`, `ExitPlanModeCompletedEvent`, `ToolsUpdatedEvent`, `BackgroundTasksChangedEvent`, `SkillsLoadedEvent`, `CustomAgentsUpdatedEvent`, `McpServersLoadedEvent`, `McpServerStatusChangedEvent`, `ExtensionsLoadedEvent`. + +For OpenClaw harness work, the inspected payloads above are the important ones; the remaining union members exist in the shipped schema but are not otherwise documented in the README. + +Source: `dist/generated/session-events.d.ts`: 5. + +## 5. Tool contract + +- Public tool shape: + - `name: string` + - `description?: string` + - `parameters?: ZodSchema | Record` + - `handler: ToolHandler` + - `overridesBuiltInTool?: boolean` + - `skipPermission?: boolean` +- `ToolHandler` signature: `(args: TArgs, invocation: ToolInvocation) => Promise | unknown`. +- `ToolInvocation` carries `{ sessionId, toolCallId, toolName, arguments, traceparent?, tracestate? }`. +- Return values: + - A plain `string` + - A `ToolResultObject` with `{ textResultForLlm, binaryResultsForLlm?, resultType, error?, sessionLog?, toolTelemetry? }` + - README/examples also state any JSON-serializable handler return is accepted and auto-wrapped; extension docs add that `undefined` becomes an empty success and throwing becomes a failure/error message. +- `ToolResultType` is `"success" | "failure" | "rejected" | "denied" | "timeout"`. +- Built-in tool override semantics: using a built-in tool name without `overridesBuiltInTool: true` throws. +- Permission bypass semantics: `skipPermission: true` suppresses permission prompts for that custom tool. +- Helper: `defineTool(name, config)` exists purely to preserve type inference from Zod schemas. + +Sources: `dist/types.d.ts` (sdk-inventory.txt:2203-2304); README tools section (sdk-inventory.txt:430-485); docs/agent-author.md (sdk-inventory.txt:3708-3745, 3905). + +## 6. Permission contract (`onPermissionRequest`) + +- Session config requires `onPermissionRequest: PermissionHandler` for both `createSession` and `resumeSession`. +- Declared handler type in `dist/types.d.ts`: + - `type PermissionHandler = (request: PermissionRequest, invocation: { sessionId: string }) => Promise | PermissionRequestResult` + - `PermissionRequest` is typed only as `{ kind: "shell" | "write" | "mcp" | "read" | "url" | "custom-tool" | "memory" | "hook"; toolCallId?: string }` + - `PermissionRequestResult` is `PermissionDecisionRequest["result"] | { kind: "no-result" }` +- README claims the runtime supplies richer fields such as `toolName`, `fileName`, and `fullCommandText` to custom handlers; the generated `permission.requested` event schema confirms a richer union exists with per-kind payloads: + - `shell`: `fullCommandText`, `commands[]`, `possiblePaths[]`, `possibleUrls[]`, `hasWriteFileRedirection`, `intention`, `warning`, `canOfferSessionApproval` + - `write`: `fileName`, `diff`, `newFileContents?`, `intention`, `canOfferSessionApproval` + - `read`: `path`, `intention` + - `mcp`: `serverName`, `toolName`, `toolTitle`, `args?`, `readOnly` + - `url`: `url`, `intention` + - `memory`: `action?`, `fact`, `subject?`, `citations?`, `direction?`, `reason?` + - `custom-tool`: `toolName`, `toolDescription`, `args?` + - `hook`: `toolName`, `toolArgs?`, `hookMessage?` + - plus extension-specific `extension-management` and `extension-permission-access` variants in the event schema. +- Result kinds explicitly documented in README: `approved`, `denied-interactively-by-user`, `denied-no-approval-rule-and-could-not-request-from-user`, `denied-by-rules`, `denied-by-content-exclusion-policy`, `no-result`. +- Protocol-v2 caveat: `NO_RESULT_PERMISSION_V2_ERROR = "Permission handlers cannot return 'no-result' when connected to a protocol v2 server."` +- Timeout behavior: not documented in the public types/docs inspected. + +Sources: `dist/types.d.ts` (sdk-inventory.txt:2608-2619); README permission handling (sdk-inventory.txt:804-879); `dist/session.d.ts` (sdk-inventory.txt:1529, 1813-1822, 1866-1873); `dist/generated/session-events.d.ts`: 3293-3628, 3909-4120. + +## 7. User-input contract (`onUserInputRequest`) + +- Session config field: `onUserInputRequest?: UserInputHandler`. +- Declared handler type: `(request: UserInputRequest, invocation: { sessionId: string }) => Promise | UserInputResponse`. +- `UserInputRequest` fields: + - `question: string` + - `choices?: string[]` + - `allowFreeform?: boolean` (default `true`) +- `UserInputResponse` fields: + - `answer: string` + - `wasFreeform: boolean` +- README says providing the handler enables the `ask_user` tool. +- The event stream adds request/response correlation fields not present in the handler type: + - `user_input.requested` includes `requestId` and optional `toolCallId` + - `user_input.completed` includes `requestId`, optional `answer`, optional `wasFreeform` +- Timeout behavior is not documented in the inspected public surface. + +Sources: `dist/types.d.ts` (sdk-inventory.txt:2624-2657, 3091-3095); README user-input section (sdk-inventory.txt:881-905); `dist/generated/session-events.d.ts`: 4121-4204. + +## 8. Infinite sessions + +- `SessionConfig.infiniteSessions?: InfiniteSessionConfig` controls the feature. +- `InfiniteSessionConfig` fields: + - `enabled?: boolean` (default `true`) + - `backgroundCompactionThreshold?: number` (default `0.80`) + - `bufferExhaustionThreshold?: number` (default `0.95`) +- README says infinite sessions are the default, automatically manage context limits, and persist state to a workspace directory. +- `CopilotSession.workspacePath` is populated only when infinite sessions are enabled. +- The workspace is explicitly documented as containing `checkpoints/`, `plan.md`, and `files/`. +- README example shows the default location as `~/.copilot/session-state/{sessionId}/`. +- Auto-compaction trigger semantics: + - background compaction starts at the configured `backgroundCompactionThreshold` + - the session blocks at `bufferExhaustionThreshold` until compaction finishes + - events emitted: `session.compaction_start` and `session.compaction_complete` +- Compaction result payload includes checkpoint metadata (`checkpointNumber`, `checkpointPath`), summary text (`summaryContent`), before/after token counts, messages removed, tokens removed, and nested `compactionTokensUsed` usage breakdown. + +Sources: README infinite sessions section (sdk-inventory.txt:627-660); `dist/session.d.ts` (sdk-inventory.txt:1594-1598); `dist/types.d.ts` (sdk-inventory.txt:2980-3006, 3168-3172); docs/examples.md (sdk-inventory.txt:4330-4346); `dist/generated/session-events.d.ts`: 1170-1308. + +## 9. Reasoning effort + +- Declared enum/type: `type ReasoningEffort = "low" | "medium" | "high" | "xhigh"`. +- Session config field: `reasoningEffort?: ReasoningEffort`. +- It is only valid when `ModelCapabilities.supports.reasoningEffort` is `true`. +- Discovery/model metadata surface: + - `ModelInfo.supportedReasoningEfforts?: ReasoningEffort[]` + - `ModelInfo.defaultReasoningEffort?: ReasoningEffort` +- The README repeatedly points callers to `listModels()` to discover support/defaults rather than assuming a global SDK default. +- Runtime/event reflection: + - `session.start` / `session.resume` metadata may include `reasoningEffort?: string` + - `assistant.usage` may also include `reasoningEffort?: string` plus `reasoningTokens?` + +Sources: README API docs (sdk-inventory.txt:116-123, 118); `dist/types.d.ts` (sdk-inventory.txt:3003-3006, 3023-3027, 3445-3498); `dist/generated/session-events.d.ts`: 181-183, 281-283, 2115-2121. + +## 10. Telemetry + +- `TelemetryConfig` shape: + - `otlpEndpoint?: string` + - `filePath?: string` + - `exporterType?: string` (`"otlp-http"` or `"file"` in README) + - `sourceName?: string` + - `captureContent?: boolean` +- `CopilotClientOptions.telemetry?: TelemetryConfig` configures CLI-process telemetry by setting environment variables on the spawned CLI. +- `TraceContextProvider` signature: `() => TraceContext | Promise`. +- `TraceContext` shape: `{ traceparent?: string; tracestate?: string }`. +- `CopilotClientOptions.onGetTraceContext?: TraceContextProvider` is called before `session.create`, `session.resume`, and `session.send` RPCs to inject distributed trace headers. +- Tool handlers receive inbound trace context on `ToolInvocation.traceparent` and `ToolInvocation.tracestate`. +- `dist/telemetry.d.ts` exports `getTraceContext(provider?)` as a helper that returns `{}` when no provider is configured. + +Sources: README telemetry section (sdk-inventory.txt:759-803); `dist/types.d.ts` (sdk-inventory.txt:2020-2049, 2137-2167, 2253-2262); `dist/telemetry.d.ts` (sdk-inventory.txt:3560-3574). + +## 11. Auth modes + +### Client-level auth/config + +- `gitHubToken?: string`: explicit GitHub token; takes priority over other auth methods. +- `useLoggedInUser?: boolean`: default `true`, but defaults to `false` when `gitHubToken` is provided. +- `copilotHome?: string`: base directory for Copilot data; only used when the SDK spawns the CLI process. +- `cliUrl?: string`: connect to an existing server instead of spawning the CLI. +- `useLoggedInUser` cannot be used with `cliUrl`; `copilotHome` is ignored with `cliUrl`. +- `getAuthStatus()` returns `{ isAuthenticated, authType?, host?, login?, statusMessage? }`, where `authType` can be `user`, `env`, `gh-cli`, `hmac`, `api-key`, or `token`. + +### Session-level auth/BYOK + +- `SessionConfig.gitHubToken?: string` is separate from client auth. The docs say it is resolved into a full GitHub identity used for content exclusion, model routing, and quota checks, enabling multitenant sessions. +- `SessionConfig.provider?: ProviderConfig` switches the session to a custom API provider (`openai`, `azure`, or `anthropic`) with `baseUrl`, optional `apiKey`, optional `bearerToken` (takes precedence over `apiKey`), optional `wireApi`, optional `azure.apiVersion`, optional `headers`, `modelId`, `wireModel`, `maxInputTokens`, `maxOutputTokens`. +- README explicitly says `model` is required when using `provider`. +- `enableSessionTelemetry` is always disabled when a custom `provider` is configured. + +### Legality / unresolved combinations + +- Explicitly documented illegal/mutually exclusive combos: + - `cliUrl` with `useLoggedInUser` + - constructor rejects mutually exclusive options such as `cliUrl` with `useStdio` or `cliPath` +- The inspected inventory does **not** explicitly document whether `provider` may be combined with client-level/session-level GitHub auth, so treat that as an open probe. + +Sources: README options/custom-provider docs (sdk-inventory.txt:83-94, 116-123, 696-757); `dist/client.d.ts` (sdk-inventory.txt:1121-1123, 1304-1308); `dist/types.d.ts` (sdk-inventory.txt:2051-2167, 3077-3085, 3174-3183, 3223-3288, 3430-3441). + +## 12. `copilotHome` + +What is explicit in the inventory: + +- `copilotHome` is the base directory for Copilot data: "session state, config, etc."; it sets `COPILOT_HOME` on the spawned CLI process. +- If omitted, the CLI defaults to `~/.copilot`. +- `workspacePath` examples place per-session state under `~/.copilot/session-state/{sessionId}/`, with `checkpoints/`, `plan.md`, and `files/` inside that session directory. + +What is **not** explicit in the inventory: + +- Exact full directory tree under `copilotHome` +- File/lock semantics for multiple `CopilotClient` instances sharing the same `copilotHome` +- Whether same-process sharing is safe under concurrent session creation/resume/delete + +OpenClaw implication: the docs are not strong enough to justify shared `copilotHome` pools. Q5's per-agent-pool decision should therefore keep isolated `copilotHome` directories until `spike-app` proves concurrency safety. + +Sources: README options/infinite-session docs (sdk-inventory.txt:90-94, 627-660); `dist/types.d.ts` (sdk-inventory.txt:2067-2073); `dist/session.d.ts` (sdk-inventory.txt:1594-1598). + +## 13. Replay / resume + +- `resumeSession(sessionId, config)` re-attaches to a previous session and keeps conversation history. +- `disconnect()` preserves on-disk session state; `stop()` also preserves it; `deleteSession()` is the destructive operation. +- `getMessages()` returns the complete session event history (`SessionEvent[]`). +- `listSessions(filter?)` returns persisted session metadata including `sessionId`, `startTime`, `modifiedTime`, `summary?`, `isRemote`, `context?`. +- `getSessionMetadata(sessionId)` is a targeted metadata lookup. +- `getLastSessionId()` returns the most recently updated session id. +- Resume-specific semantics in `ResumeSessionConfig`: + - `disableResume?: boolean` skips emitting `session.resume` + - `continuePendingWork?: boolean` resumes in-flight permissions/tool work; otherwise pending work is treated as interrupted and permissions are re-emitted as `permission.requested` +- Resume event metadata distinguishes hot vs cold attach: + - `sessionWasActive?: boolean` means the runtime already had the session in memory + - `false`/missing means a cold resume reconstructed from persisted event log + +Sources: README API docs (sdk-inventory.txt:128-170, 281-287, 867-875); `dist/client.d.ts` (sdk-inventory.txt:1246-1395); `dist/session.d.ts` (sdk-inventory.txt:1892-1944); `dist/types.d.ts` (sdk-inventory.txt:3200-3221, 3409-3417); `dist/generated/session-events.d.ts`: 266-299. + +## 14. Models advertised + +Explicit model ids mentioned in the inspected inventory: + +- `gpt-5` +- `gpt-4` +- `gpt-4.1` +- `claude-sonnet-4.5` +- `claude-sonnet-4.6` +- example BYOK/Ollama model: `deepseek-coder-v2:16b` + +Discovery API: + +- `client.listModels(): Promise` is the authoritative discovery path. +- `ModelInfo` carries `id`, `name`, `capabilities`, optional `policy`, optional `billing`, optional `supportedReasoningEfforts`, optional `defaultReasoningEffort`. +- `CopilotClientOptions.onListModels` can override discovery entirely (useful for BYOK mode). + +What is **not** in the inspected inventory: + +- A static canonical built-in model catalog beyond the handful of examples above. + +Sources: README/examples (sdk-inventory.txt:38, 65, 117-118, 633-665, 713-749); `dist/client.d.ts` (sdk-inventory.txt:1310-1320); `dist/types.d.ts` (sdk-inventory.txt:2130-2135, 3483-3498); `dist/session.d.ts` (sdk-inventory.txt:1975-1982). + +## 15. Error surface + +### Public methods + +- Public methods generally document `@throws Error`; the SDK does **not** expose a rich public exception-class hierarchy in the inspected `.d.ts` files. +- `stop()` is unusual: instead of throwing cleanup failures, it resolves to `Error[]`. +- Constructor may throw on mutually exclusive options. +- `createSession()` can throw if auto-start is disabled and the client is disconnected. +- `resumeSession()` can throw if the session does not exist or the client is not connected. +- `sendAndWait()` throws on timeout or connection/disconnect failure. +- `Tool` registration can throw for built-in name collisions unless `overridesBuiltInTool: true` is set. +- README says missing `model` with custom `provider` throws. +- Protocol-v2 permission adapter throws the exported `NO_RESULT_PERMISSION_V2_ERROR` if a handler returns `no-result`. + +### Event / telemetry error reporting + +- `session.error` carries `{ errorType, message, errorCode?, statusCode?, providerCallId?, stack?, url?, eligibleForAutoSwitch? }`. +- `model.call_failure` carries failed model-call telemetry (`source`, `model?`, `statusCode?`, `durationMs?`, `providerCallId?`, `errorMessage?`). +- `tool.execution_complete.error` carries `{ code?, message }`. +- Hook APIs expose explicit recovery output: `onErrorOccurred` may return `errorHandling: "retry" | "skip" | "abort"` plus `retryCount?`. + +### Retryability + +- Explicitly retry-like signals in the inspected surface: + - `session.error.eligibleForAutoSwitch` for rate-limit flows + - `auto_mode_switch.requested` / `auto_mode_switch.completed` events + - `onErrorOccurred` hook output `errorHandling: "retry"` +- The SDK does **not** publish a general retryable/non-retryable error enum for all thrown errors. Anything beyond the rate-limit/auto-switch path needs probing. + +Sources: README/tool/provider/error docs (sdk-inventory.txt:459-480, 753-757, 1013-1021); `dist/client.d.ts` (sdk-inventory.txt:1119-1123, 1147-1214, 1222-1225, 1252-1255, 1284-1288, 1346-1355); `dist/session.d.ts` (sdk-inventory.txt:1529, 1645-1650, 1813-1822, 1866-1889); `dist/generated/session-events.d.ts`: 361-393, 2195-2279, 2478-2529; `dist/types.d.ts` (sdk-inventory.txt:2822-2871). + +## 16. Open SDK questions + +Concrete gaps to answer in `spike-app` before landing a real harness: + +1. **Permission handler typing mismatch:** README says `onPermissionRequest` receives rich per-kind fields (`toolName`, `fileName`, `fullCommandText`), but `dist/types.d.ts` types `PermissionRequest` as just `{ kind, toolCallId? }`. What object shape does runtime actually deliver to JS/TS handlers? +2. **Permission timeouts:** what happens if `onPermissionRequest` never resolves? Is there a default timeout, cancellation, or session hang? +3. **User-input timeouts/cancellation:** same question for `onUserInputRequest`. +4. **`copilotHome` concurrency:** can multiple `CopilotClient` instances in one process safely share one `copilotHome`, or are there lock/race hazards around `session-state/` and config files? +5. **Exact `copilotHome` layout:** beyond `session-state//{checkpoints,plan.md,files}`, what other top-level files/directories are created, and which are session-global versus client-global? +6. **Provider/auth combination matrix:** what combinations of client-level `gitHubToken`, session-level `gitHubToken`, `useLoggedInUser`, and `provider` are accepted or rejected in practice? +7. **Resume behavior for encrypted reasoning fields:** `assistant.message` notes `encryptedContent`/`reasoningOpaque` are session-bound and stripped on resume. What survives after process restart versus live reconnect? +8. **Event coverage needed by OpenClaw:** do we need additional exact-string handling for non-core events like `ToolsUpdatedEvent`, `SkillsLoadedEvent`, `McpServersLoadedEvent`, `ExtensionsLoadedEvent`, or is the harness safe to ignore them? +9. **Cold-resume pending work:** with `continuePendingWork: true`, what concrete low-level RPCs are required to finish previously pending external tool calls in an SDK-only consumer? +10. **Model discovery under BYOK:** when `provider` is set without `onListModels`, what does `listModels()` return, if anything? + +Sources: `dist/types.d.ts` (sdk-inventory.txt:2608-2619, 2624-2657, 3203-3221, 3174-3183, 3226-3288); README permission/user-input/provider docs (sdk-inventory.txt:823-845, 883-905, 696-757); `dist/generated/session-events.d.ts`: 266-299, 1828-1889, 3293-3628. diff --git a/scripts/build-all.mjs b/scripts/build-all.mjs index 833e77797900..16bded7fc93b 100644 --- a/scripts/build-all.mjs +++ b/scripts/build-all.mjs @@ -73,6 +73,19 @@ export const BUILD_ALL_STEPS = [ kind: "node", args: ["--experimental-strip-types", "scripts/copy-hook-metadata.ts"], }, + { + label: "copy-copilot-sdk-manifest", + kind: "node", + args: ["--experimental-strip-types", "scripts/copy-copilot-sdk-manifest.ts"], + cache: { + inputs: [ + "scripts/copy-copilot-sdk-manifest.ts", + "scripts/lib/copy-assets.ts", + "src/commands/copilot-sdk-install-manifest", + ], + outputs: ["dist/commands/copilot-sdk-install-manifest"], + }, + }, { label: "copy-export-html-templates", kind: "node", @@ -127,6 +140,7 @@ export const BUILD_ALL_PROFILES = { "check-plugin-sdk-exports", "plugins:assets:copy", "copy-hook-metadata", + "copy-copilot-sdk-manifest", "copy-export-html-templates", "ui:build", "write-build-info", diff --git a/scripts/copy-copilot-sdk-manifest.ts b/scripts/copy-copilot-sdk-manifest.ts new file mode 100644 index 000000000000..dde6c3b53c1a --- /dev/null +++ b/scripts/copy-copilot-sdk-manifest.ts @@ -0,0 +1,61 @@ +#!/usr/bin/env tsx +/** + * Copy the Copilot SDK install manifest (package.json + package-lock.json) + * from src/commands/copilot-sdk-install-manifest/ to dist/commands/copilot-sdk-install-manifest/. + * + * The Copilot agent runtime's on-demand SDK installer + * (src/commands/copilot-sdk-install.ts) resolves the manifest dir + * relative to its compiled location via `import.meta.url`. tsdown does + * not copy non-source files alongside compiled output, so we mirror the + * manifest here as part of the build chain. Mirrors the precedent set + * by scripts/copy-hook-metadata.ts. + */ + +import fs from "node:fs"; +import path from "node:path"; +import { ensureDirectory, logVerboseCopy, resolveBuildCopyContext } from "./lib/copy-assets.ts"; + +const context = resolveBuildCopyContext(import.meta.url); + +const SRC_MANIFEST_DIR = path.join( + context.projectRoot, + "src", + "commands", + "copilot-sdk-install-manifest", +); +const DIST_MANIFEST_DIR = path.join( + context.projectRoot, + "dist", + "commands", + "copilot-sdk-install-manifest", +); + +const MANIFEST_FILES = ["package.json", "package-lock.json"]; + +function copyCopilotSdkManifest(): void { + if (!fs.existsSync(SRC_MANIFEST_DIR)) { + throw new Error( + `${context.prefix} Source manifest dir missing: ${SRC_MANIFEST_DIR}. This directory is part of the Copilot agent runtime pinned install graph and must exist in the repo.`, + ); + } + + ensureDirectory(DIST_MANIFEST_DIR); + + for (const fileName of MANIFEST_FILES) { + const sourcePath = path.join(SRC_MANIFEST_DIR, fileName); + const destPath = path.join(DIST_MANIFEST_DIR, fileName); + if (!fs.existsSync(sourcePath)) { + throw new Error( + `${context.prefix} Missing manifest file ${sourcePath}. Re-generate with \`npm install --package-lock-only\` in src/commands/copilot-sdk-install-manifest/.`, + ); + } + fs.copyFileSync(sourcePath, destPath); + logVerboseCopy(context, `Copied copilot-sdk-install-manifest/${fileName}`); + } + + console.log( + `${context.prefix} Copied Copilot SDK install manifest (${MANIFEST_FILES.length} files).`, + ); +} + +copyCopilotSdkManifest(); diff --git a/scripts/deadcode-unused-files.allowlist.mjs b/scripts/deadcode-unused-files.allowlist.mjs index 3c7f9aa01f57..78da8175f0e7 100644 --- a/scripts/deadcode-unused-files.allowlist.mjs +++ b/scripts/deadcode-unused-files.allowlist.mjs @@ -13,6 +13,9 @@ export const KNIP_OPTIONAL_UNUSED_FILE_ALLOWLIST = [ "extensions/acpx/src/runtime-internals/mcp-proxy.mjs", "extensions/canvas/src/host/a2ui-app/bootstrap.js", "extensions/canvas/src/host/a2ui-app/rolldown.config.mjs", + "extensions/copilot/src/doctor-probes.ts", + "extensions/copilot/src/telemetry-bridge.ts", + "extensions/copilot/src/user-input-bridge.ts", "extensions/diffs/src/viewer-client.ts", "extensions/diffs/src/viewer-payload.ts", "extensions/matrix/src/plugin-entry.runtime.js", diff --git a/scripts/lib/bundled-plugin-build-entries.mjs b/scripts/lib/bundled-plugin-build-entries.mjs index e3b5cdc83e93..d6881d4f2fe3 100644 --- a/scripts/lib/bundled-plugin-build-entries.mjs +++ b/scripts/lib/bundled-plugin-build-entries.mjs @@ -52,6 +52,17 @@ function shouldBuildBundledDistEntry(packageJson) { return packageJson?.openclaw?.build?.bundledDist !== false; } +function isExcludedTopLevelPublicSurfaceFile(fileName) { + const normalizedName = fileName.toLowerCase(); + return ( + normalizedName.endsWith(".d.ts") || + /^config-api\.(?:[cm]?[jt]s)$/u.test(normalizedName) || + TOP_LEVEL_PRIVATE_TEST_SURFACE_RE.test(normalizedName) || + normalizedName.includes(".fixture.") || + normalizedName.includes(".snap") + ); +} + export function collectPluginSourceEntries(packageJson) { let packageEntries = Array.isArray(packageJson?.openclaw?.extensions) ? packageJson.openclaw.extensions.filter( @@ -86,14 +97,7 @@ export function collectTopLevelPublicSurfaceEntries(pluginDir) { return []; } - const normalizedName = dirent.name.toLowerCase(); - if ( - normalizedName.endsWith(".d.ts") || - /^config-api\.(?:[cm]?[jt]s)$/u.test(normalizedName) || - TOP_LEVEL_PRIVATE_TEST_SURFACE_RE.test(normalizedName) || - normalizedName.includes(".fixture.") || - normalizedName.includes(".snap") - ) { + if (isExcludedTopLevelPublicSurfaceFile(dirent.name)) { return []; } @@ -114,14 +118,7 @@ function collectTopLevelPublicSurfaceEntriesFromFiles(relativeFiles) { return []; } - const normalizedName = relativeFile.toLowerCase(); - if ( - normalizedName.endsWith(".d.ts") || - /^config-api\.(?:[cm]?[jt]s)$/u.test(normalizedName) || - TOP_LEVEL_PRIVATE_TEST_SURFACE_RE.test(normalizedName) || - normalizedName.includes(".fixture.") || - normalizedName.includes(".snap") - ) { + if (isExcludedTopLevelPublicSurfaceFile(relativeFile)) { return []; } diff --git a/src/agents/copilot-routing.test.ts b/src/agents/copilot-routing.test.ts new file mode 100755 index 000000000000..71f095135ec6 --- /dev/null +++ b/src/agents/copilot-routing.test.ts @@ -0,0 +1,142 @@ +import { describe, expect, it } from "vitest"; +import type { OpenClawConfig } from "../config/types.openclaw.js"; +import { modelSelectionShouldEnsureCopilotSdk } from "./copilot-routing.js"; + +const emptyCfg = {} as OpenClawConfig; + +function cfgWithProviderRuntime(id: string): OpenClawConfig { + return { + models: { + providers: { + "github-copilot": { agentRuntime: { id } }, + }, + }, + } as unknown as OpenClawConfig; +} + +function cfgWithModelRuntime(modelId: string, id: string): OpenClawConfig { + return { + models: { + providers: { + "github-copilot": { + models: [{ id: modelId, agentRuntime: { id } }], + }, + }, + }, + } as unknown as OpenClawConfig; +} + +describe("modelSelectionShouldEnsureCopilotSdk", () => { + it("returns false for github-copilot/* without explicit agentRuntime opt-in", () => { + // Built-in GitHub Copilot provider already supports these models; + // we must not nag users with a 260 MB SDK install prompt. + expect( + modelSelectionShouldEnsureCopilotSdk({ + model: "github-copilot/gpt-4o", + config: emptyCfg, + }), + ).toBe(false); + }); + + it("returns true when the provider config sets agentRuntime.id = copilot", () => { + expect( + modelSelectionShouldEnsureCopilotSdk({ + model: "github-copilot/gpt-4o", + config: cfgWithProviderRuntime("copilot"), + }), + ).toBe(true); + }); + + it("returns true when a model override sets agentRuntime.id = copilot", () => { + expect( + modelSelectionShouldEnsureCopilotSdk({ + model: "github-copilot/claude-sonnet-4", + config: cfgWithModelRuntime("claude-sonnet-4", "copilot"), + }), + ).toBe(true); + }); + + it("normalizes id casing/whitespace before matching", () => { + expect( + modelSelectionShouldEnsureCopilotSdk({ + model: "github-copilot/gpt-4o", + config: cfgWithProviderRuntime(" Copilot "), + }), + ).toBe(true); + }); + + it("returns false when the runtime id is anything other than copilot", () => { + expect( + modelSelectionShouldEnsureCopilotSdk({ + model: "github-copilot/gpt-4o", + config: cfgWithProviderRuntime("pi"), + }), + ).toBe(false); + expect( + modelSelectionShouldEnsureCopilotSdk({ + model: "github-copilot/gpt-4o", + config: cfgWithProviderRuntime("codex"), + }), + ).toBe(false); + }); + + it("model-scope override takes precedence over provider scope", () => { + const cfg = { + models: { + providers: { + "github-copilot": { + agentRuntime: { id: "copilot" }, + models: [{ id: "gpt-4o", agentRuntime: { id: "pi" } }], + }, + }, + }, + } as unknown as OpenClawConfig; + expect( + modelSelectionShouldEnsureCopilotSdk({ + model: "github-copilot/gpt-4o", + config: cfg, + }), + ).toBe(false); + // A different model that has no override still inherits the provider-level opt-in. + expect( + modelSelectionShouldEnsureCopilotSdk({ + model: "github-copilot/claude-sonnet-4", + config: cfg, + }), + ).toBe(true); + }); + + it("returns false for other providers regardless of agentRuntime config", () => { + const cfg = { + models: { + providers: { + openai: { agentRuntime: { id: "copilot" } }, + }, + }, + } as unknown as OpenClawConfig; + expect(modelSelectionShouldEnsureCopilotSdk({ model: "openai/gpt-4o", config: cfg })).toBe( + false, + ); + expect( + modelSelectionShouldEnsureCopilotSdk({ + model: "anthropic/claude-3", + config: emptyCfg, + }), + ).toBe(false); + expect( + modelSelectionShouldEnsureCopilotSdk({ + model: "openai-codex/gpt-4o", + config: emptyCfg, + }), + ).toBe(false); + }); + + it("returns false for undefined, empty, or unprefixed model refs", () => { + expect(modelSelectionShouldEnsureCopilotSdk({ config: emptyCfg })).toBe(false); + expect(modelSelectionShouldEnsureCopilotSdk({ model: "", config: emptyCfg })).toBe(false); + expect(modelSelectionShouldEnsureCopilotSdk({ model: "gpt-4o", config: emptyCfg })).toBe(false); + expect( + modelSelectionShouldEnsureCopilotSdk({ model: "github-copilot/", config: emptyCfg }), + ).toBe(false); + }); +}); diff --git a/src/agents/copilot-routing.ts b/src/agents/copilot-routing.ts new file mode 100755 index 000000000000..aeaa16aa6636 --- /dev/null +++ b/src/agents/copilot-routing.ts @@ -0,0 +1,55 @@ +import type { OpenClawConfig } from "../config/types.openclaw.js"; +import { resolveModelRuntimePolicy } from "./model-runtime-policy.js"; +import { parseModelRefProvider } from "./openai-codex-routing.js"; + +export const GITHUB_COPILOT_PROVIDER_ID = "github-copilot"; + +/** + * Canonical id of the Copilot agent runtime plugin + * (see `extensions/copilot/index.ts`, which registers as `id: "copilot"`). + */ +export const COPILOT_RUNTIME_ID = "copilot"; + +function parseModelRefId(model: string | undefined): string | undefined { + if (typeof model !== "string") { + return undefined; + } + const trimmed = model.trim(); + const slash = trimmed.indexOf("/"); + if (slash <= 0 || slash === trimmed.length - 1) { + return undefined; + } + return trimmed.slice(slash + 1); +} + +/** + * Returns true when the selected model should trigger the on-demand + * install of `@github/copilot-sdk` for the Copilot agent runtime. + * + * Gating contract (review #2, P1): + * - Model ref must use the `github-copilot/*` provider prefix. + * - The user's config must explicitly opt in by setting + * `agentRuntime.id: "copilot"` at the provider, model, or agent scope + * (resolved via `resolveModelRuntimePolicy`). + * + * Without the explicit opt-in we fall through to the built-in GitHub + * Copilot provider, which has shipped support for `github-copilot/*` + * models for a long time and must not surface a 260 MB SDK install + * prompt to users who never asked for the runtime. + */ +export function modelSelectionShouldEnsureCopilotSdk(params: { + model?: string; + config?: OpenClawConfig; +}): boolean { + if (parseModelRefProvider(params.model) !== GITHUB_COPILOT_PROVIDER_ID) { + return false; + } + const modelId = parseModelRefId(params.model); + const resolved = resolveModelRuntimePolicy({ + config: params.config, + provider: GITHUB_COPILOT_PROVIDER_ID, + modelId, + }); + const runtimeId = resolved.policy?.id?.trim().toLowerCase(); + return runtimeId === COPILOT_RUNTIME_ID; +} diff --git a/src/agents/embedded-agent-runner/run.overflow-compaction.test.ts b/src/agents/embedded-agent-runner/run.overflow-compaction.test.ts index 0292859c21cf..c4319f663808 100644 --- a/src/agents/embedded-agent-runner/run.overflow-compaction.test.ts +++ b/src/agents/embedded-agent-runner/run.overflow-compaction.test.ts @@ -680,6 +680,89 @@ describe("runEmbeddedAgent overflow compaction trigger routing", () => { ).toBeUndefined(); }); + it("forwards unscoped tool auth profiles to Copilot plugin harnesses", async () => { + const { clearAgentHarnesses, registerAgentHarness } = await import("../harness/registry.js"); + const pluginRunAttempt = vi.fn(async () => + makeAttemptResult({ assistantTexts: ["ok"] }), + ); + const runtimePlan = makeForwardedRuntimePlan({ + resolvedRef: { + provider: "github-copilot", + modelId: "gpt-4o", + harnessId: "copilot", + }, + auth: { + harnessAuthProvider: "github-copilot", + forwardedAuthProfileId: "github-copilot:work", + }, + }); + clearAgentHarnesses(); + registerAgentHarness({ + id: "copilot", + label: "Copilot", + supports: (ctx) => + ctx.provider === "github-copilot" + ? { supported: true, priority: 100 } + : { supported: false }, + runAttempt: pluginRunAttempt, + }); + mockedBuildAgentRuntimePlan.mockReturnValueOnce(runtimePlan); + mockedGetApiKeyForModel.mockRejectedValueOnce(new Error("generic auth should be skipped")); + const copilotAuthStore = { + version: 1, + profiles: { + "github-copilot:work": { + type: "oauth" as const, + provider: "github-copilot", + access: "access", + refresh: "refresh", + expires: Date.now() + 60_000, + }, + "anthropic:work": { + type: "api_key" as const, + provider: "anthropic", + key: "sk-ant", + }, + }, + }; + mockedEnsureAuthProfileStoreWithoutExternalProfiles.mockReturnValueOnce(copilotAuthStore); + + try { + await runEmbeddedAgent({ + ...overflowBaseRunParams, + provider: "github-copilot", + model: "gpt-4o", + config: { + models: { + providers: { + "github-copilot": { + agentRuntime: { id: "copilot" }, + baseUrl: "https://api.githubcopilot.com", + models: [], + }, + }, + }, + }, + authProfileId: "github-copilot:work", + authProfileIdSource: "user", + runId: "copilot-plugin-harness-forwards-tool-auth-store", + }); + } finally { + clearAgentHarnesses(); + } + + expect(mockedGetApiKeyForModel).not.toHaveBeenCalled(); + expect(pluginRunAttempt).toHaveBeenCalledTimes(1); + const harnessParams = mockCallArg(pluginRunAttempt) as { + authProfileStore?: { profiles?: Record }; + toolAuthProfileStore?: unknown; + }; + const forwardedAuthStore = expectRecordFields(harnessParams.authProfileStore, {}); + const authProfiles = expectRecordFields(forwardedAuthStore.profiles, {}); + expect(Object.keys(authProfiles)).toEqual(["github-copilot:work"]); + expect(harnessParams.toolAuthProfileStore).toBe(copilotAuthStore); + }); + it("forwards optional attempt params and the runtime plan into one attempt call", async () => { const internalEvents: AgentInternalEvent[] = []; const forwardingCase = makeForwardingCase(internalEvents); diff --git a/src/agents/embedded-agent-runner/run.ts b/src/agents/embedded-agent-runner/run.ts index 3ca564c36f6a..bbb3ad5de5d9 100644 --- a/src/agents/embedded-agent-runner/run.ts +++ b/src/agents/embedded-agent-runner/run.ts @@ -1119,6 +1119,8 @@ export async function runEmbeddedAgent( : lastProfileId, ) : attemptAuthProfileStore; + const harnessBuildsOpenClawTools = + agentHarness.id === "codex" || agentHarness.id === "copilot"; const { sessionAgentId } = resolveSessionAgentIds({ sessionKey: params.sessionKey, config: params.config, @@ -1605,9 +1607,9 @@ export async function runEmbeddedAgent( initialReplayState: accumulatedReplayState, authStorage, authProfileStore: runAttemptAuthProfileStore, - // Codex builds OpenClaw tools inside its harness. Keep transport - // auth scoped while letting tool construction see plugin creds. - toolAuthProfileStore: agentHarness.id === "codex" ? attemptAuthProfileStore : undefined, + // These harnesses build OpenClaw tools internally. Keep transport auth + // scoped while letting tool construction see plugin/provider creds. + toolAuthProfileStore: harnessBuildsOpenClawTools ? attemptAuthProfileStore : undefined, modelRegistry, agentId: workspaceResolution.agentId, beforeAgentStartResult, diff --git a/src/agents/embedded-agent-runner/run/images.ts b/src/agents/embedded-agent-runner/run/images.ts index 9f08dae03d2c..4975fd50e630 100644 --- a/src/agents/embedded-agent-runner/run/images.ts +++ b/src/agents/embedded-agent-runner/run/images.ts @@ -448,6 +448,7 @@ export async function loadImageFromRef( options?: { maxBytes?: number; workspaceOnly?: boolean; + localRoots?: readonly string[]; sandbox?: { root: string; bridge: SandboxFsBridge }; }, ): Promise { @@ -491,7 +492,7 @@ export async function loadImageFromRef( : await loadWebMedia( targetPath, options?.workspaceOnly - ? { maxBytes: options.maxBytes, localRoots: [workspaceDir] } + ? { maxBytes: options.maxBytes, localRoots: options.localRoots ?? [workspaceDir] } : options?.maxBytes, ); @@ -542,6 +543,7 @@ export async function detectAndLoadPromptImages(params: { maxBytes?: number; maxDimensionPx?: number; workspaceOnly?: boolean; + localRoots?: readonly string[]; sandbox?: { root: string; bridge: SandboxFsBridge }; }): Promise<{ /** Images for the current prompt (existingImages + detected in current prompt) */ @@ -594,6 +596,7 @@ export async function detectAndLoadPromptImages(params: { const image = await loadImageFromRef(ref, params.workspaceDir, { maxBytes: params.maxBytes, workspaceOnly: params.workspaceOnly, + localRoots: params.localRoots, sandbox: params.sandbox, }); if (image) { @@ -609,6 +612,7 @@ export async function detectAndLoadPromptImages(params: { const image = await loadImageFromRef(ref, params.workspaceDir, { maxBytes: params.maxBytes, workspaceOnly: params.workspaceOnly, + localRoots: params.localRoots, sandbox: params.sandbox, }); offloadedImages.push(image); diff --git a/src/agents/harness/runtime-plugin.test.ts b/src/agents/harness/runtime-plugin.test.ts index e6ce0931457c..ed9f544ec34c 100644 --- a/src/agents/harness/runtime-plugin.test.ts +++ b/src/agents/harness/runtime-plugin.test.ts @@ -93,6 +93,83 @@ describe("ensureSelectedAgentHarnessPlugin", () => { ); }); + it("loads a configured Copilot harness plugin before selection", async () => { + await ensureSelectedAgentHarnessPlugin({ + provider: "github-copilot", + modelId: "gpt-4o", + config: { + models: { + providers: { + "github-copilot": { + agentRuntime: { id: "copilot" }, + baseUrl: "https://api.githubcopilot.com", + models: [], + }, + }, + }, + } as OpenClawConfig, + workspaceDir: "/tmp/workspace", + }); + + expect(mocks.resolveOwningPluginIdsForProvider).not.toHaveBeenCalled(); + expect(mocks.ensurePluginRegistryLoaded).toHaveBeenCalledWith( + expect.objectContaining({ + scope: "all", + workspaceDir: "/tmp/workspace", + onlyPluginIds: ["copilot"], + config: expect.objectContaining({ + plugins: expect.objectContaining({ + allow: ["copilot"], + entries: expect.objectContaining({ + copilot: expect.objectContaining({ enabled: true }), + }), + }), + }), + }), + ); + }); + + it("does not bypass a restrictive allowlist that omits a configured Copilot harness", async () => { + await ensureSelectedAgentHarnessPlugin({ + provider: "github-copilot", + modelId: "gpt-4o", + config: { + plugins: { + allow: ["telegram"], + entries: { + telegram: { enabled: true }, + }, + }, + models: { + providers: { + "github-copilot": { + agentRuntime: { id: "copilot" }, + baseUrl: "https://api.githubcopilot.com", + models: [], + }, + }, + }, + } as OpenClawConfig, + workspaceDir: "/tmp/workspace", + }); + + expect(mocks.ensurePluginRegistryLoaded).toHaveBeenCalledWith( + expect.objectContaining({ + scope: "all", + workspaceDir: "/tmp/workspace", + onlyPluginIds: ["copilot"], + config: expect.objectContaining({ + plugins: expect.objectContaining({ + allow: ["telegram"], + entries: expect.not.objectContaining({ + copilot: expect.anything(), + }), + }), + }), + }), + ); + }); + it("widens a scoped harness allowlist with the provider owner for openai-codex models", async () => { await ensureSelectedAgentHarnessPlugin({ provider: "openai-codex", @@ -241,4 +318,26 @@ describe("ensureSelectedAgentHarnessPlugin", () => { expect(mocks.ensurePluginRegistryLoaded).not.toHaveBeenCalled(); expect(mocks.resolveOwningPluginIdsForProvider).not.toHaveBeenCalled(); }); + + it("does not treat CLI backend runtime aliases as plugin ids", async () => { + await ensureSelectedAgentHarnessPlugin({ + provider: "anthropic", + modelId: "claude-opus-4-7", + config: { + models: { + providers: { + anthropic: { + agentRuntime: { id: "claude-cli" }, + baseUrl: "https://api.anthropic.com", + models: [], + }, + }, + }, + } as OpenClawConfig, + workspaceDir: "/tmp/workspace", + }); + + expect(mocks.ensurePluginRegistryLoaded).not.toHaveBeenCalled(); + expect(mocks.resolveOwningPluginIdsForProvider).not.toHaveBeenCalled(); + }); }); diff --git a/src/agents/harness/runtime-plugin.ts b/src/agents/harness/runtime-plugin.ts index a9cc9b5761ec..2f3e77a6fcda 100644 --- a/src/agents/harness/runtime-plugin.ts +++ b/src/agents/harness/runtime-plugin.ts @@ -5,10 +5,12 @@ import { resolveBundledProviderCompatPluginIds, resolveOwningPluginIdsForProviderRef, } from "../../plugins/providers.js"; -import { isDefaultAgentRuntimeId } from "../agent-runtime-id.js"; +import { isDefaultAgentRuntimeId, OPENCLAW_AGENT_RUNTIME_ID } from "../agent-runtime-id.js"; import { normalizeOptionalAgentRuntimeId } from "../agent-runtime-id.js"; import { resolveAgentHarnessPolicy } from "./policy.js"; +const COLD_LOADABLE_HARNESS_PLUGIN_IDS = new Set(["codex", "copilot"]); + function dedupePluginIds(values: readonly string[]): string[] { const seen = new Set(); const result: string[] = []; @@ -28,11 +30,15 @@ function restrictiveAllowlistOmitsPlugin(config: OpenClawConfig | undefined, plu return allow.length > 0 && !allow.includes(pluginId); } -function resolveCodexHarnessPluginIds(params: { +function resolveHarnessPluginIds(params: { + runtime: string; provider: string; config?: OpenClawConfig; workspaceDir: string; }): string[] { + if (params.runtime !== "codex") { + return [params.runtime]; + } if (restrictiveAllowlistOmitsPlugin(params.config, "codex")) { return ["codex"]; } @@ -106,20 +112,25 @@ export async function ensureSelectedAgentHarnessPlugin(params: { }); const runtime = runtimeOverride && !isDefaultAgentRuntimeId(runtimeOverride) ? runtimeOverride : policy.runtime; - if (runtime !== "codex") { + if ( + isDefaultAgentRuntimeId(runtime) || + runtime === OPENCLAW_AGENT_RUNTIME_ID || + !COLD_LOADABLE_HARNESS_PLUGIN_IDS.has(runtime) + ) { return; } const { ensurePluginRegistryLoaded } = await import("../../plugins/runtime/runtime-registry-loader.js"); - const pluginIds = resolveCodexHarnessPluginIds({ + const pluginIds = resolveHarnessPluginIds({ + runtime, provider: params.provider, config: params.config, workspaceDir: params.workspaceDir, }); const configWithAllowedRuntimePlugins = withRuntimePluginIdsAllowed({ config: params.config, - requiredPluginId: "codex", + requiredPluginId: runtime, pluginIds, }); const activatedConfig = diff --git a/src/agents/sandbox-media-paths.test.ts b/src/agents/sandbox-media-paths.test.ts index ae766f984738..c61b770aedb9 100644 --- a/src/agents/sandbox-media-paths.test.ts +++ b/src/agents/sandbox-media-paths.test.ts @@ -43,6 +43,63 @@ describe("createSandboxBridgeReadFile", () => { expect(stat).not.toHaveBeenCalled(); }); + it("keeps workspace-only container paths under the sandbox workspace mount", async () => { + const resolvePath = vi.fn(({ filePath }: { filePath: string }) => { + if (filePath === "/tmp/sandbox-root") { + return { + relativePath: "", + containerPath: "/remote/workspace", + }; + } + return { + relativePath: filePath, + containerPath: `/remote/workspace/${filePath}`, + }; + }); + + const resolved = await resolveSandboxedBridgeMediaPath({ + sandbox: { + root: "/tmp/sandbox-root", + workspaceOnly: true, + bridge: { + resolvePath, + } as unknown as SandboxFsBridge, + }, + mediaPath: "image.png", + }); + + expect(resolved).toEqual({ resolved: "/remote/workspace/image.png" }); + expect(resolvePath).toHaveBeenCalledWith({ + filePath: "/tmp/sandbox-root", + cwd: "/tmp/sandbox-root", + }); + }); + + it("rejects workspace-only container paths outside the sandbox workspace mount", async () => { + await expect( + resolveSandboxedBridgeMediaPath({ + sandbox: { + root: "/tmp/sandbox-root", + workspaceOnly: true, + bridge: { + resolvePath: vi.fn(({ filePath }: { filePath: string }) => + filePath === "/tmp/sandbox-root" + ? { + relativePath: "", + containerPath: "/remote/workspace", + } + : { + relativePath: filePath, + containerPath: "/remote/agent/secret.png", + }, + ), + } as unknown as SandboxFsBridge, + }, + mediaPath: "/remote/agent/secret.png", + }), + ).rejects.toThrow("Sandbox path escapes workspace root: /remote/agent/secret.png"); + }); + it("rewrites inbound media URIs before direct sandbox resolution", async () => { const resolvePath = vi.fn(({ filePath }: { filePath: string }) => ({ hostPath: `/tmp/sandbox-root/${filePath}`, diff --git a/src/agents/sandbox-media-paths.ts b/src/agents/sandbox-media-paths.ts index 479691e95439..5165f5d25f74 100644 --- a/src/agents/sandbox-media-paths.ts +++ b/src/agents/sandbox-media-paths.ts @@ -1,7 +1,8 @@ import path from "node:path"; import { resolveMediaReferenceSandboxPath } from "../media/media-reference.js"; import { assertSandboxPath } from "./sandbox-paths.js"; -import type { SandboxFsBridge } from "./sandbox/fs-bridge.js"; +import type { SandboxFsBridge, SandboxResolvedPath } from "./sandbox/fs-bridge.js"; +import { isPathInsideContainerRoot, normalizeContainerPath } from "./sandbox/path-utils.js"; export type SandboxedBridgeMediaPathConfig = { root: string; @@ -40,15 +41,30 @@ export async function resolveSandboxedBridgeMediaPath(params: { throw new Error(`Sandbox media reference is not staged: ${rewrittenFrom}`); } } - const enforceWorkspaceBoundary = async (hostPath: string) => { + const enforceWorkspaceBoundary = async (resolved: SandboxResolvedPath) => { if (!params.sandbox.workspaceOnly) { return; } - await assertSandboxPath({ - filePath: hostPath, + if (resolved.hostPath) { + await assertSandboxPath({ + filePath: resolved.hostPath, + cwd: params.sandbox.root, + root: params.sandbox.root, + }); + return; + } + const workspaceRoot = params.sandbox.bridge.resolvePath({ + filePath: params.sandbox.root, cwd: params.sandbox.root, - root: params.sandbox.root, }); + if ( + !isPathInsideContainerRoot( + normalizeContainerPath(workspaceRoot.containerPath), + normalizeContainerPath(resolved.containerPath), + ) + ) { + throw new Error(`Sandbox path escapes workspace root: ${resolved.containerPath}`); + } }; const resolveDirect = () => @@ -58,9 +74,7 @@ export async function resolveSandboxedBridgeMediaPath(params: { }); try { const resolved = resolveDirect(); - if (resolved.hostPath) { - await enforceWorkspaceBoundary(resolved.hostPath); - } + await enforceWorkspaceBoundary(resolved); return { resolved: resolved.hostPath ?? resolved.containerPath, ...(rewrittenFrom ? { rewrittenFrom } : {}), @@ -86,9 +100,7 @@ export async function resolveSandboxedBridgeMediaPath(params: { filePath: fallbackPath, cwd: params.sandbox.root, }); - if (resolvedFallback.hostPath) { - await enforceWorkspaceBoundary(resolvedFallback.hostPath); - } + await enforceWorkspaceBoundary(resolvedFallback); return { resolved: resolvedFallback.hostPath ?? resolvedFallback.containerPath, rewrittenFrom: filePath, diff --git a/src/commands/auth-choice.test.ts b/src/commands/auth-choice.test.ts index eaf2f552878a..1880431de1b5 100644 --- a/src/commands/auth-choice.test.ts +++ b/src/commands/auth-choice.test.ts @@ -84,6 +84,13 @@ vi.mock("../agents/agent-scope.js", () => ({ `${process.env.OPENCLAW_STATE_DIR ?? "/tmp/openclaw-state"}/agents/${agentId}/agent`, resolveAgentWorkspaceDir: (configForTest: unknown, agentId: string) => `/tmp/openclaw-workspaces/${agentId}`, + // Required by src/agents/model-runtime-policy.ts, which is transitively + // imported through provider-auth-choice -> copilot-sdk-install -> + // copilot-routing -> model-runtime-policy. Without these stubs the mock + // surface is incomplete and the dynamic import of copilot-sdk-install + // explodes inside applyAuthChoice. + resolveSessionAgentIds: () => ({ defaultAgentId: "main", sessionAgentId: "main" }), + listAgentEntries: () => [], })); vi.mock("../agents/workspace.js", () => ({ diff --git a/src/commands/copilot-sdk-install-manifest/package-lock.json b/src/commands/copilot-sdk-install-manifest/package-lock.json new file mode 100644 index 000000000000..6138d21fd803 --- /dev/null +++ b/src/commands/copilot-sdk-install-manifest/package-lock.json @@ -0,0 +1,160 @@ +{ + "name": "openclaw-copilot-sdk-bootstrap", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "openclaw-copilot-sdk-bootstrap", + "version": "1.0.0", + "dependencies": { + "@github/copilot-sdk": "1.0.0-beta.4" + } + }, + "node_modules/@github/copilot": { + "version": "1.0.48", + "resolved": "https://registry.npmjs.org/@github/copilot/-/copilot-1.0.48.tgz", + "integrity": "sha512-U5SzyTEq376UU9A4Sd3TEKz+Y2nRUd90cLO4Hc1otaB8yFSy9Ur2UVGcI2/wCoodL3a39k6WbdgNzFxr0gWFRQ==", + "license": "SEE LICENSE IN LICENSE.md", + "bin": { + "copilot": "npm-loader.js" + }, + "optionalDependencies": { + "@github/copilot-darwin-arm64": "1.0.48", + "@github/copilot-darwin-x64": "1.0.48", + "@github/copilot-linux-arm64": "1.0.48", + "@github/copilot-linux-x64": "1.0.48", + "@github/copilot-win32-arm64": "1.0.48", + "@github/copilot-win32-x64": "1.0.48" + } + }, + "node_modules/@github/copilot-darwin-arm64": { + "version": "1.0.48", + "resolved": "https://registry.npmjs.org/@github/copilot-darwin-arm64/-/copilot-darwin-arm64-1.0.48.tgz", + "integrity": "sha512-82MLoMQwPVVFM8EYssihFxSEPUYtZADE8rMzQ3jG9HgRg2qjQSfnHQS1mKe64dlXswZUK/onw6/8kjnW5I4pPg==", + "cpu": [ + "arm64" + ], + "license": "SEE LICENSE IN LICENSE.md", + "optional": true, + "os": [ + "darwin" + ], + "bin": { + "copilot-darwin-arm64": "copilot" + } + }, + "node_modules/@github/copilot-darwin-x64": { + "version": "1.0.48", + "resolved": "https://registry.npmjs.org/@github/copilot-darwin-x64/-/copilot-darwin-x64-1.0.48.tgz", + "integrity": "sha512-1VQ5r5F0h8GwboXmZTcutqcJT+iCpPXAF27QqodmpKEvW9aYfG8g9X2kFJOzDZoX+SA3Uaka9qXdYKF2xT6Uog==", + "cpu": [ + "x64" + ], + "license": "SEE LICENSE IN LICENSE.md", + "optional": true, + "os": [ + "darwin" + ], + "bin": { + "copilot-darwin-x64": "copilot" + } + }, + "node_modules/@github/copilot-linux-arm64": { + "version": "1.0.48", + "resolved": "https://registry.npmjs.org/@github/copilot-linux-arm64/-/copilot-linux-arm64-1.0.48.tgz", + "integrity": "sha512-PmsGnb0DZlI+Bf53l9HM1PAHHkUcMyB4y8v/7tnC/jDOV5dGF124n0HnDNfJLOLiJGiQGodthIif6QtPaAxpeA==", + "cpu": [ + "arm64" + ], + "license": "SEE LICENSE IN LICENSE.md", + "optional": true, + "os": [ + "linux" + ], + "bin": { + "copilot-linux-arm64": "copilot" + } + }, + "node_modules/@github/copilot-linux-x64": { + "version": "1.0.48", + "resolved": "https://registry.npmjs.org/@github/copilot-linux-x64/-/copilot-linux-x64-1.0.48.tgz", + "integrity": "sha512-b2cc4euSlke9fYHXXsS2EL9UYbctN0h4lZvtAcKUDY+RCnpYAQOVBZK+c1R9dQrtsT6Z/yUv7PuFPSs8qdtc2Q==", + "cpu": [ + "x64" + ], + "license": "SEE LICENSE IN LICENSE.md", + "optional": true, + "os": [ + "linux" + ], + "bin": { + "copilot-linux-x64": "copilot" + } + }, + "node_modules/@github/copilot-sdk": { + "version": "1.0.0-beta.4", + "resolved": "https://registry.npmjs.org/@github/copilot-sdk/-/copilot-sdk-1.0.0-beta.4.tgz", + "integrity": "sha512-DcVMN2FWODxamFS9nTls8AW3QsyMnj6JDVBNRVBXaTY9kEhGHCjt8lp7sJp95/vyl52hvEb4/68Oh6SdFU9O/Q==", + "license": "MIT", + "dependencies": { + "@github/copilot": "^1.0.46", + "vscode-jsonrpc": "^8.2.1", + "zod": "^4.3.6" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@github/copilot-win32-arm64": { + "version": "1.0.48", + "resolved": "https://registry.npmjs.org/@github/copilot-win32-arm64/-/copilot-win32-arm64-1.0.48.tgz", + "integrity": "sha512-VEEOwddtpJ3DTbXGhnK6K8im4ofl9m08q1m/K++sNvWV8wkkOSOQBTiPdyUsuU/TXAoFhb8tZMIJv+6NnMBtMw==", + "cpu": [ + "arm64" + ], + "license": "SEE LICENSE IN LICENSE.md", + "optional": true, + "os": [ + "win32" + ], + "bin": { + "copilot-win32-arm64": "copilot.exe" + } + }, + "node_modules/@github/copilot-win32-x64": { + "version": "1.0.48", + "resolved": "https://registry.npmjs.org/@github/copilot-win32-x64/-/copilot-win32-x64-1.0.48.tgz", + "integrity": "sha512-93BzvXLPHTyy1gWBXQY/IWIHor4IAwZuuo7/obG80/Qa6U0WeaN9slz/FBJvrsgVNrrRfEID5Xm3At+S6Kj67Q==", + "cpu": [ + "x64" + ], + "license": "SEE LICENSE IN LICENSE.md", + "optional": true, + "os": [ + "win32" + ], + "bin": { + "copilot-win32-x64": "copilot.exe" + } + }, + "node_modules/vscode-jsonrpc": { + "version": "8.2.1", + "resolved": "https://registry.npmjs.org/vscode-jsonrpc/-/vscode-jsonrpc-8.2.1.tgz", + "integrity": "sha512-kdjOSJ2lLIn7r1rtrMbbNCHjyMPfRnowdKjBQ+mGq6NAW5QY2bEZC/khaC5OR8svbbjvLEaIXkOq45e2X9BIbQ==", + "license": "MIT", + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/zod": { + "version": "4.4.3", + "resolved": "https://registry.npmjs.org/zod/-/zod-4.4.3.tgz", + "integrity": "sha512-ytENFjIJFl2UwYglde2jchW2Hwm4GJFLDiSXWdTrJQBIN9Fcyp7n4DhxJEiWNAJMV1/BqWfW/kkg71UDcHJyTQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + } + } +} diff --git a/src/commands/copilot-sdk-install-manifest/package.json b/src/commands/copilot-sdk-install-manifest/package.json new file mode 100644 index 000000000000..1164874c1bae --- /dev/null +++ b/src/commands/copilot-sdk-install-manifest/package.json @@ -0,0 +1,12 @@ +{ + "name": "openclaw-copilot-sdk-bootstrap", + "version": "1.0.0", + "private": true, + "description": "Pinned dependency graph for @github/copilot-sdk used by the Copilot agent runtime installer.", + "dependencies": { + "@github/copilot-sdk": "1.0.0-beta.4" + }, + "overrides": { + "@github/copilot": "1.0.48" + } +} diff --git a/src/commands/copilot-sdk-install.test.ts b/src/commands/copilot-sdk-install.test.ts new file mode 100755 index 000000000000..0d0fee4d7d21 --- /dev/null +++ b/src/commands/copilot-sdk-install.test.ts @@ -0,0 +1,721 @@ +import path from "node:path"; +import { describe, expect, it, vi } from "vitest"; +import type { OpenClawConfig } from "../config/types.openclaw.js"; +import type { RuntimeEnv } from "../runtime.js"; +import type { WizardPrompter } from "../wizard/prompts.js"; +import { + COPILOT_SDK_FALLBACK_DIR, + COPILOT_SDK_INSTALL_MANIFEST_DIR, + COPILOT_SDK_SPEC, + ensureCopilotSdkForModelSelection, + installCopilotSdk, + isCopilotSdkInstalled, + resolveCopilotSdkFallbackDir, + selectedModelShouldEnsureCopilotSdk, + verifyCopilotSdkInstall, +} from "./copilot-sdk-install.js"; + +function fakeRuntime(): RuntimeEnv { + return { + log: () => undefined, + error: () => undefined, + exit: () => undefined, + }; +} + +function fakePrompter(overrides: Partial = {}): WizardPrompter { + const noop = async () => undefined as never; + return { + intro: async () => undefined, + outro: async () => undefined, + note: async () => undefined, + plain: async () => undefined, + select: noop, + multiselect: noop, + text: async () => "", + confirm: async () => true, + progress: () => ({ update: () => undefined, stop: () => undefined }), + ...overrides, + } as WizardPrompter; +} + +const emptyCfg = {} as OpenClawConfig; + +function cfgWithCopilotRuntime(): OpenClawConfig { + return { + models: { + providers: { + "github-copilot": { agentRuntime: { id: "copilot" } }, + }, + }, + } as unknown as OpenClawConfig; +} + +describe("selectedModelShouldEnsureCopilotSdk", () => { + it("returns false for github-copilot/* without explicit agentRuntime opt-in", () => { + // Built-in GitHub Copilot provider already supports github-copilot/*; + // we must not nag users with the SDK install prompt by default. + expect( + selectedModelShouldEnsureCopilotSdk({ + cfg: emptyCfg, + model: "github-copilot/gpt-4o", + }), + ).toBe(false); + }); + + it("returns true for github-copilot/* when agentRuntime.id = copilot is set", () => { + expect( + selectedModelShouldEnsureCopilotSdk({ + cfg: cfgWithCopilotRuntime(), + model: "github-copilot/gpt-4o", + }), + ).toBe(true); + }); + + it("returns false for other providers", () => { + expect( + selectedModelShouldEnsureCopilotSdk({ cfg: emptyCfg, model: "anthropic/claude-3" }), + ).toBe(false); + expect(selectedModelShouldEnsureCopilotSdk({ cfg: emptyCfg, model: "openai/gpt-4o" })).toBe( + false, + ); + }); + + it("returns false when model is undefined", () => { + expect(selectedModelShouldEnsureCopilotSdk({ cfg: emptyCfg })).toBe(false); + }); +}); + +describe("ensureCopilotSdkForModelSelection", () => { + it("returns required=false and no-ops when model is not github-copilot", async () => { + const confirm = vi.fn(); + const result = await ensureCopilotSdkForModelSelection({ + cfg: emptyCfg, + model: "anthropic/claude-3", + prompter: fakePrompter({ confirm }), + runtime: fakeRuntime(), + isInstalled: () => false, + }); + expect(result.required).toBe(false); + expect(result.installed).toBe(false); + expect(confirm).not.toHaveBeenCalled(); + }); + + it("returns required=false for github-copilot when config does not opt into the SDK runtime", async () => { + // Same model, same env, but no agentRuntime.id=copilot anywhere in the + // config -> the built-in GitHub Copilot provider stays in charge and the + // SDK installer is not invoked. This is the entire point of P1 gating. + const confirm = vi.fn(); + const install = vi.fn(); + const result = await ensureCopilotSdkForModelSelection({ + cfg: emptyCfg, + model: "github-copilot/gpt-4o", + prompter: fakePrompter({ confirm }), + runtime: fakeRuntime(), + isInstalled: () => false, + install, + }); + expect(result.required).toBe(false); + expect(confirm).not.toHaveBeenCalled(); + expect(install).not.toHaveBeenCalled(); + }); + + it("returns already-installed without prompting when SDK is present", async () => { + const confirm = vi.fn(); + const install = vi.fn(); + const result = await ensureCopilotSdkForModelSelection({ + cfg: cfgWithCopilotRuntime(), + model: "github-copilot/gpt-4o", + prompter: fakePrompter({ confirm }), + runtime: fakeRuntime(), + isInstalled: () => true, + install, + }); + expect(result.required).toBe(true); + expect(result.installed).toBe(false); + expect(result.status).toBe("already-installed"); + expect(confirm).not.toHaveBeenCalled(); + expect(install).not.toHaveBeenCalled(); + }); + + it("does not prompt or auto-install in Nix mode", async () => { + const previousNixMode = process.env.OPENCLAW_NIX_MODE; + process.env.OPENCLAW_NIX_MODE = "1"; + try { + const confirm = vi.fn(); + const install = vi.fn(); + const note = vi.fn(); + const result = await ensureCopilotSdkForModelSelection({ + cfg: cfgWithCopilotRuntime(), + model: "github-copilot/gpt-4o", + prompter: fakePrompter({ confirm, note }), + runtime: fakeRuntime(), + isInstalled: () => false, + install, + }); + expect(result).toMatchObject({ + required: true, + installed: false, + status: "nix-mode", + }); + expect(confirm).not.toHaveBeenCalled(); + expect(install).not.toHaveBeenCalled(); + expect(note).toHaveBeenCalledOnce(); + expect(String(note.mock.calls[0]?.[0])).toContain("OPENCLAW_NIX_MODE=1"); + } finally { + if (previousNixMode === undefined) { + delete process.env.OPENCLAW_NIX_MODE; + } else { + process.env.OPENCLAW_NIX_MODE = previousNixMode; + } + } + }); + + it("prompts and installs when SDK is missing and user confirms", async () => { + const confirm = vi.fn(async () => true); + const install = vi.fn(async () => ({ + installed: true, + fallbackDir: COPILOT_SDK_FALLBACK_DIR, + spec: COPILOT_SDK_SPEC, + })); + const result = await ensureCopilotSdkForModelSelection({ + cfg: cfgWithCopilotRuntime(), + model: "github-copilot/gpt-4o", + prompter: fakePrompter({ confirm }), + runtime: fakeRuntime(), + isInstalled: () => false, + install, + }); + expect(confirm).toHaveBeenCalledOnce(); + expect(install).toHaveBeenCalledOnce(); + expect(result.required).toBe(true); + expect(result.installed).toBe(true); + expect(result.status).toBe("installed"); + }); + + it("respects user decline and reports status=declined", async () => { + const confirm = vi.fn(async () => false); + const install = vi.fn(); + const note = vi.fn(); + const result = await ensureCopilotSdkForModelSelection({ + cfg: cfgWithCopilotRuntime(), + model: "github-copilot/gpt-4o", + prompter: fakePrompter({ confirm, note }), + runtime: fakeRuntime(), + isInstalled: () => false, + install, + }); + expect(confirm).toHaveBeenCalledOnce(); + expect(install).not.toHaveBeenCalled(); + expect(note).toHaveBeenCalledOnce(); + expect(result.required).toBe(true); + expect(result.installed).toBe(false); + expect(result.status).toBe("declined"); + }); + + it("reports status=failed and surfaces error via note when install throws", async () => { + const confirm = vi.fn(async () => true); + const install = vi.fn(async () => { + throw new Error("network down"); + }); + const note = vi.fn(); + const result = await ensureCopilotSdkForModelSelection({ + cfg: cfgWithCopilotRuntime(), + model: "github-copilot/gpt-4o", + prompter: fakePrompter({ confirm, note }), + runtime: fakeRuntime(), + isInstalled: () => false, + install, + }); + expect(result.required).toBe(true); + expect(result.installed).toBe(false); + expect(result.status).toBe("failed"); + expect(note).toHaveBeenCalledOnce(); + const noteMessage = (note as unknown as { mock: { calls: string[][] } }).mock.calls[0][0]; + expect(noteMessage).toContain("network down"); + expect(noteMessage).toContain("copilot-sdk-install-manifest"); + }); +}); + +function writeFakePinnedManifest(manifestDir: string): void { + const fs = require("node:fs") as typeof import("node:fs"); + const path = require("node:path") as typeof import("node:path"); + fs.writeFileSync( + path.join(manifestDir, "package.json"), + JSON.stringify({ dependencies: { "@github/copilot-sdk": "1.0.0-beta.4" } }), + ); + fs.writeFileSync( + path.join(manifestDir, "package-lock.json"), + JSON.stringify({ + lockfileVersion: 3, + packages: { + "node_modules/@github/copilot-sdk": { version: "1.0.0-beta.4" }, + "node_modules/@github/copilot": { version: "1.0.48" }, + }, + }), + ); +} + +function installFakeFallbackGraph(dir: string, sdkVersion: string, cliVersion: string): void { + const fs = require("node:fs") as typeof import("node:fs"); + const path = require("node:path") as typeof import("node:path"); + const sdkDir = path.join(dir, "node_modules", "@github", "copilot-sdk"); + const cliDir = path.join(dir, "node_modules", "@github", "copilot"); + fs.mkdirSync(sdkDir, { recursive: true }); + fs.mkdirSync(cliDir, { recursive: true }); + fs.writeFileSync( + path.join(sdkDir, "package.json"), + JSON.stringify({ name: "@github/copilot-sdk", version: sdkVersion }), + ); + fs.writeFileSync( + path.join(cliDir, "package.json"), + JSON.stringify({ name: "@github/copilot", version: cliVersion }), + ); +} + +describe("installCopilotSdk", () => { + it("stages the pinned manifest and runs the install command when SDK is missing", async () => { + const fs = await import("node:fs"); + const path = await import("node:path"); + const os = await import("node:os"); + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-install-")); + const manifestDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-manifest-")); + fs.writeFileSync( + path.join(manifestDir, "package.json"), + JSON.stringify({ dependencies: { "@github/copilot-sdk": "1.0.0-beta.4" } }), + ); + fs.writeFileSync( + path.join(manifestDir, "package-lock.json"), + JSON.stringify({ + lockfileVersion: 3, + packages: { + "node_modules/@github/copilot-sdk": { version: "1.0.0-beta.4" }, + "node_modules/@github/copilot": { version: "1.0.48" }, + }, + }), + ); + try { + const runInstall = vi.fn( + async ({ dir }: { dir: string; spec: string; manifestDir: string }) => { + const sdkDir = path.join(dir, "node_modules", "@github", "copilot-sdk"); + const cliDir = path.join(dir, "node_modules", "@github", "copilot"); + fs.mkdirSync(sdkDir, { recursive: true }); + fs.mkdirSync(cliDir, { recursive: true }); + fs.writeFileSync( + path.join(sdkDir, "package.json"), + JSON.stringify({ name: "@github/copilot-sdk", version: "1.0.0-beta.4" }), + ); + fs.writeFileSync( + path.join(cliDir, "package.json"), + JSON.stringify({ name: "@github/copilot", version: "1.0.48" }), + ); + }, + ); + const result = await installCopilotSdk({ + fallbackDir: tmp, + manifestDir, + runInstall, + }); + expect(runInstall).toHaveBeenCalledOnce(); + // Staged manifest must land in fallbackDir for `npm ci` to use. + expect(fs.existsSync(path.join(tmp, "package.json"))).toBe(true); + expect(fs.existsSync(path.join(tmp, "package-lock.json"))).toBe(true); + // And the staged manifest must be byte-identical to the pinned source. + expect(fs.readFileSync(path.join(tmp, "package-lock.json"), "utf8")).toBe( + fs.readFileSync(path.join(manifestDir, "package-lock.json"), "utf8"), + ); + // runInstall receives the manifestDir argument so it can rely on it. + const call = runInstall.mock.calls[0][0]; + expect(call.manifestDir).toBe(manifestDir); + expect(call.dir).toBe(tmp); + expect(result.installed).toBe(true); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + fs.rmSync(manifestDir, { recursive: true, force: true }); + } + }); + + it("returns installed=false when fallback graph matches the pinned manifest (skip install)", async () => { + const fs = await import("node:fs"); + const path = await import("node:path"); + const os = await import("node:os"); + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-install-")); + const manifestDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-manifest-")); + try { + writeFakePinnedManifest(manifestDir); + installFakeFallbackGraph(tmp, "1.0.0-beta.4", "1.0.48"); + // Copy the manifest lock into the fallback dir to simulate a prior + // successful install having staged it (npm ci does this). + fs.copyFileSync( + path.join(manifestDir, "package-lock.json"), + path.join(tmp, "package-lock.json"), + ); + const runInstall = vi.fn(); + const result = await installCopilotSdk({ fallbackDir: tmp, manifestDir, runInstall }); + expect(runInstall).not.toHaveBeenCalled(); + expect(result.installed).toBe(false); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + fs.rmSync(manifestDir, { recursive: true, force: true }); + } + }); + + it("reinstalls when the fallback dir has the SDK but no pinned lock (stale tree)", async () => { + const fs = await import("node:fs"); + const path = await import("node:path"); + const os = await import("node:os"); + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-install-")); + const manifestDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-manifest-")); + try { + writeFakePinnedManifest(manifestDir); + // Stale install: SDK dir exists but no package-lock.json at the + // fallback root, so the verifier must reject. + installFakeFallbackGraph(tmp, "1.0.0-beta.4", "1.0.48"); + const runInstall = vi.fn(async ({ dir }: { dir: string }) => { + installFakeFallbackGraph(dir, "1.0.0-beta.4", "1.0.48"); + }); + const result = await installCopilotSdk({ fallbackDir: tmp, manifestDir, runInstall }); + expect(runInstall).toHaveBeenCalledOnce(); + expect(result.installed).toBe(true); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + fs.rmSync(manifestDir, { recursive: true, force: true }); + } + }); + + it("reinstalls when the installed SDK version differs from the pinned manifest", async () => { + const fs = await import("node:fs"); + const path = await import("node:path"); + const os = await import("node:os"); + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-install-")); + const manifestDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-manifest-")); + try { + writeFakePinnedManifest(manifestDir); + // Stage a fallback graph whose @github/copilot-sdk version drifts. + installFakeFallbackGraph(tmp, "1.0.0-beta.3", "1.0.48"); + fs.copyFileSync( + path.join(manifestDir, "package-lock.json"), + path.join(tmp, "package-lock.json"), + ); + const runInstall = vi.fn(async ({ dir }: { dir: string }) => { + installFakeFallbackGraph(dir, "1.0.0-beta.4", "1.0.48"); + }); + const result = await installCopilotSdk({ fallbackDir: tmp, manifestDir, runInstall }); + expect(runInstall).toHaveBeenCalledOnce(); + expect(result.installed).toBe(true); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + fs.rmSync(manifestDir, { recursive: true, force: true }); + } + }); + + it("reinstalls when the installed Copilot CLI version drifts from the pinned manifest", async () => { + const fs = await import("node:fs"); + const path = await import("node:path"); + const os = await import("node:os"); + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-install-")); + const manifestDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-manifest-")); + try { + writeFakePinnedManifest(manifestDir); + // CLI version drift only; SDK matches. + installFakeFallbackGraph(tmp, "1.0.0-beta.4", "1.0.54"); + fs.copyFileSync( + path.join(manifestDir, "package-lock.json"), + path.join(tmp, "package-lock.json"), + ); + const runInstall = vi.fn(async ({ dir }: { dir: string }) => { + installFakeFallbackGraph(dir, "1.0.0-beta.4", "1.0.48"); + }); + const result = await installCopilotSdk({ fallbackDir: tmp, manifestDir, runInstall }); + expect(runInstall).toHaveBeenCalledOnce(); + expect(result.installed).toBe(true); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + fs.rmSync(manifestDir, { recursive: true, force: true }); + } + }); + + it("throws when runInstall succeeds but SDK still missing", async () => { + const fs = await import("node:fs"); + const path = await import("node:path"); + const os = await import("node:os"); + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-install-")); + const manifestDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-manifest-")); + writeFakePinnedManifest(manifestDir); + try { + const runInstall = vi.fn(async () => undefined); + await expect( + installCopilotSdk({ fallbackDir: tmp, manifestDir, runInstall }), + ).rejects.toThrow(/does not match the pinned manifest/); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + fs.rmSync(manifestDir, { recursive: true, force: true }); + } + }); + + it("throws a useful error when the manifest dir is missing the pinned files", async () => { + const fs = await import("node:fs"); + const path = await import("node:path"); + const os = await import("node:os"); + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-install-")); + const manifestDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-manifest-")); + try { + const runInstall = vi.fn(); + await expect( + installCopilotSdk({ fallbackDir: tmp, manifestDir, runInstall }), + ).rejects.toThrow(/cannot read pinned SDK manifest/); + expect(runInstall).not.toHaveBeenCalled(); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + fs.rmSync(manifestDir, { recursive: true, force: true }); + } + }); +}); + +describe("constants", () => { + it("exports fallback dir under ~/.openclaw/npm-runtime/copilot", () => { + expect(COPILOT_SDK_FALLBACK_DIR).toMatch(/\.openclaw[\\/]+npm-runtime[\\/]+copilot$/); + }); + + it("resolves fallback dir from OPENCLAW_STATE_DIR when the profile is relocated", () => { + expect( + resolveCopilotSdkFallbackDir({ + ...process.env, + OPENCLAW_STATE_DIR: "/tmp/openclaw-state", + }), + ).toBe(path.join("/tmp/openclaw-state", "npm-runtime", "copilot")); + }); + + it("pins SDK spec to @github/copilot-sdk@1.0.0-beta.4", () => { + expect(COPILOT_SDK_SPEC).toBe("@github/copilot-sdk@1.0.0-beta.4"); + }); + + it("isCopilotSdkInstalled returns false for nonexistent dirs", () => { + expect(isCopilotSdkInstalled("/tmp/definitely-does-not-exist-openclaw")).toBe(false); + }); +}); + +describe("verifyCopilotSdkInstall", () => { + it("returns ok when fallback lock and installed package.json match the pinned manifest", async () => { + const fs = await import("node:fs"); + const path = await import("node:path"); + const os = await import("node:os"); + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-verify-")); + const manifestDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-manifest-")); + try { + writeFakePinnedManifest(manifestDir); + installFakeFallbackGraph(tmp, "1.0.0-beta.4", "1.0.48"); + fs.copyFileSync( + path.join(manifestDir, "package-lock.json"), + path.join(tmp, "package-lock.json"), + ); + expect(verifyCopilotSdkInstall(tmp, manifestDir)).toEqual({ ok: true }); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + fs.rmSync(manifestDir, { recursive: true, force: true }); + } + }); + + it("reports the missing fallback lock with the full path so logs are actionable", async () => { + const fs = await import("node:fs"); + const path = await import("node:path"); + const os = await import("node:os"); + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-verify-")); + const manifestDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-manifest-")); + try { + writeFakePinnedManifest(manifestDir); + installFakeFallbackGraph(tmp, "1.0.0-beta.4", "1.0.48"); + const result = verifyCopilotSdkInstall(tmp, manifestDir); + expect(result.ok).toBe(false); + expect(result.reason).toContain(path.join(tmp, "package-lock.json")); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + fs.rmSync(manifestDir, { recursive: true, force: true }); + } + }); + + it("reports drift when the installed package.json version differs from the manifest", async () => { + const fs = await import("node:fs"); + const path = await import("node:path"); + const os = await import("node:os"); + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-verify-")); + const manifestDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-manifest-")); + try { + writeFakePinnedManifest(manifestDir); + // Lock looks correct; on-disk @github/copilot/package.json drifts. + installFakeFallbackGraph(tmp, "1.0.0-beta.4", "1.0.48"); + fs.copyFileSync( + path.join(manifestDir, "package-lock.json"), + path.join(tmp, "package-lock.json"), + ); + fs.writeFileSync( + path.join(tmp, "node_modules", "@github", "copilot", "package.json"), + JSON.stringify({ name: "@github/copilot", version: "1.0.54" }), + ); + const result = verifyCopilotSdkInstall(tmp, manifestDir); + expect(result.ok).toBe(false); + expect(result.reason).toContain("version drift"); + expect(result.reason).toContain("1.0.54"); + expect(result.reason).toContain("1.0.48"); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + fs.rmSync(manifestDir, { recursive: true, force: true }); + } + }); + + it("reports drift when the fallback lock differs outside the entry package versions", async () => { + const fs = await import("node:fs"); + const path = await import("node:path"); + const os = await import("node:os"); + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-verify-")); + const manifestDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-manifest-")); + try { + writeFakePinnedManifest(manifestDir); + installFakeFallbackGraph(tmp, "1.0.0-beta.4", "1.0.48"); + const fallbackLockPath = path.join(tmp, "package-lock.json"); + fs.copyFileSync(path.join(manifestDir, "package-lock.json"), fallbackLockPath); + const fallbackLock = JSON.parse(fs.readFileSync(fallbackLockPath, "utf8")) as { + packages?: Record; + }; + fallbackLock.packages = { + ...fallbackLock.packages, + "node_modules/drifted-transitive": { version: "9.9.9" }, + }; + fs.writeFileSync(fallbackLockPath, JSON.stringify(fallbackLock)); + + const result = verifyCopilotSdkInstall(tmp, manifestDir); + expect(result.ok).toBe(false); + expect(result.reason).toContain("package-lock drift"); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + fs.rmSync(manifestDir, { recursive: true, force: true }); + } + }); + + it("reports missing installed package dir even when the lock is present", async () => { + const fs = await import("node:fs"); + const path = await import("node:path"); + const os = await import("node:os"); + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-verify-")); + const manifestDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-manifest-")); + try { + writeFakePinnedManifest(manifestDir); + fs.copyFileSync( + path.join(manifestDir, "package-lock.json"), + path.join(tmp, "package-lock.json"), + ); + // node_modules/@github/copilot-sdk was never created. + const result = verifyCopilotSdkInstall(tmp, manifestDir); + expect(result.ok).toBe(false); + expect(result.reason).toContain("missing installed package"); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + fs.rmSync(manifestDir, { recursive: true, force: true }); + } + }); + + it("throws when the shipped manifest is missing a pinned version (build broke contract)", async () => { + const fs = await import("node:fs"); + const path = await import("node:path"); + const os = await import("node:os"); + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-verify-")); + const manifestDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-manifest-")); + try { + // Manifest lock declares no packages at all -> fatal misconfiguration. + fs.writeFileSync( + path.join(manifestDir, "package-lock.json"), + JSON.stringify({ lockfileVersion: 3, packages: {} }), + ); + expect(() => verifyCopilotSdkInstall(tmp, manifestDir)).toThrow( + /missing a version for node_modules\/@github\/copilot-sdk/, + ); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + fs.rmSync(manifestDir, { recursive: true, force: true }); + } + }); + + it("throws when the shipped manifest package-lock.json cannot be read", async () => { + const fs = await import("node:fs"); + const path = await import("node:path"); + const os = await import("node:os"); + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-verify-")); + const manifestDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-manifest-")); + try { + // No package-lock.json in manifestDir -> readFileSync throws -> fatal. + expect(() => verifyCopilotSdkInstall(tmp, manifestDir)).toThrow( + /cannot read pinned SDK manifest/, + ); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + fs.rmSync(manifestDir, { recursive: true, force: true }); + } + }); + + it("contract: the shipped manifest at COPILOT_SDK_INSTALL_MANIFEST_DIR pins both packages", () => { + // Reading from the real shipped manifest dir must not throw, which means + // the build pipeline keeps the pinned versions for both keys present. + // The verifier returns ok=false here because the fallback dir is empty, + // but it must not throw. + const fs = require("node:fs") as typeof import("node:fs"); + const os = require("node:os") as typeof import("node:os"); + const path = require("node:path") as typeof import("node:path"); + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-copilot-sdk-verify-real-")); + try { + const result = verifyCopilotSdkInstall(tmp, COPILOT_SDK_INSTALL_MANIFEST_DIR); + expect(result.ok).toBe(false); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); +}); + +describe("copilot-sdk install manifest (contract)", () => { + it("pins the manifest package.json to the exact spec advertised by COPILOT_SDK_SPEC", async () => { + const fs = await import("node:fs"); + const path = await import("node:path"); + const raw = fs.readFileSync( + path.join(COPILOT_SDK_INSTALL_MANIFEST_DIR, "package.json"), + "utf8", + ); + const parsed = JSON.parse(raw) as { + dependencies?: Record; + }; + const expectedVersion = COPILOT_SDK_SPEC.split("@").pop()!; + expect(parsed.dependencies?.["@github/copilot-sdk"]).toBe(expectedVersion); + }); + + it("ships a lockfile that includes the SDK and a Copilot CLI binary", async () => { + const fs = await import("node:fs"); + const path = await import("node:path"); + const raw = fs.readFileSync( + path.join(COPILOT_SDK_INSTALL_MANIFEST_DIR, "package-lock.json"), + "utf8", + ); + const parsed = JSON.parse(raw) as { + lockfileVersion?: number; + packages?: Record; + }; + // Reject older lockfile formats so the install graph stays npm v7+ compatible. + expect(parsed.lockfileVersion).toBeGreaterThanOrEqual(2); + const sdkEntry = parsed.packages?.["node_modules/@github/copilot-sdk"]; + expect(sdkEntry).toBeDefined(); + expect(sdkEntry?.version).toBe(COPILOT_SDK_SPEC.split("@").pop()!); + expect(sdkEntry?.integrity).toMatch(/^sha512-/); + // The Copilot CLI is what gives the runtime its native shell/write tools; + // its presence here proves the lockfile resolves the transitive graph. + const cliEntry = parsed.packages?.["node_modules/@github/copilot"]; + expect(cliEntry).toBeDefined(); + // Pin to the exact @github/copilot version that the repository pnpm-lock + // also resolves (and that CI tests exercise). Drift here means users would + // install a different Copilot CLI graph than the one reviewed/tested. + expect(cliEntry?.version).toBe("1.0.48"); + // Every platform-specific @github/copilot-* optional dependency must + // resolve to the same version as the parent CLI package. + for (const [key, entry] of Object.entries(parsed.packages ?? {})) { + if (/^node_modules\/@github\/copilot-(?:darwin|linux|linuxmusl|win32)-/.test(key)) { + expect(entry?.version).toBe("1.0.48"); + } + } + }); +}); diff --git a/src/commands/copilot-sdk-install.ts b/src/commands/copilot-sdk-install.ts new file mode 100755 index 000000000000..be67c1593dcc --- /dev/null +++ b/src/commands/copilot-sdk-install.ts @@ -0,0 +1,402 @@ +import { spawn } from "node:child_process"; +import { copyFileSync, existsSync, mkdirSync, readFileSync } from "node:fs"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import { modelSelectionShouldEnsureCopilotSdk as routingShouldEnsure } from "../agents/copilot-routing.js"; +import { resolveIsNixMode, resolveStateDir } from "../config/paths.js"; +import type { OpenClawConfig } from "../config/types.openclaw.js"; +import type { RuntimeEnv } from "../runtime.js"; +import type { WizardPrompter } from "../wizard/prompts.js"; + +/** + * On-demand install for `@github/copilot-sdk`, the runtime dependency of + * the bundled `copilot` agent runtime extension. + * + * The extension itself is shipped inside the openclaw tarball, but the + * SDK and its platform-specific CLI binary add ~260 MB of download to a + * baseline openclaw install. Most openclaw users do not use the Copilot + * runtime, so we install the SDK lazily: the wizard offers to install + * it the first time the user selects a `github-copilot/*` model. + * + * Mirrors the codex on-demand install pattern in + * `./codex-runtime-plugin-install.ts`, but installs a single npm + * package (the SDK) rather than a full openclaw plugin, so the install + * machinery here is much smaller than `ensureCodexRuntimePluginForModelSelection`. + * + * The fallback-dir resolver and `COPILOT_SDK_SPEC` are mirrored in the + * copilot extension's sdk-loader module; contract tests keep them aligned. + */ +export function resolveCopilotSdkFallbackDir(env: NodeJS.ProcessEnv = process.env): string { + return path.join(resolveStateDir(env), "npm-runtime", "copilot"); +} + +export const COPILOT_SDK_FALLBACK_DIR = resolveCopilotSdkFallbackDir(); + +export const COPILOT_SDK_SPEC = "@github/copilot-sdk@1.0.0-beta.4"; + +export const COPILOT_SDK_PACKAGE_LABEL = "GitHub Copilot SDK (@github/copilot-sdk)"; + +/** + * Directory containing the checked-in {@link COPILOT_SDK_SPEC} install graph + * (`package.json` + `package-lock.json`). Both files are generated via + * `npm install --package-lock-only` and committed under + * `src/commands/copilot-sdk-install-manifest/`. The build step in + * `scripts/copy-copilot-sdk-manifest.ts` copies them alongside the + * compiled output so `import.meta.url`-based resolution works in + * published tarballs. + * + * Using `npm ci` against this graph means user installs cannot pull a + * newer Copilot CLI or transitive dependency set than the one this PR + * was reviewed against (review #2, P1). + */ +export const COPILOT_SDK_INSTALL_MANIFEST_DIR = fileURLToPath( + new URL("./copilot-sdk-install-manifest/", import.meta.url), +); + +export type CopilotSdkInstallStatus = + | "already-installed" + | "installed" + | "declined" + | "failed" + | "nix-mode"; + +export type CopilotSdkInstallResult = { + cfg: OpenClawConfig; + required: boolean; + installed: boolean; + status?: CopilotSdkInstallStatus; +}; + +export function selectedModelShouldEnsureCopilotSdk(params: { + cfg: OpenClawConfig; + model?: string; +}): boolean { + return routingShouldEnsure({ config: params.cfg, model: params.model }); +} + +export function isCopilotSdkInstalled( + fallbackDir: string = resolveCopilotSdkFallbackDir(), +): boolean { + const sdkPath = path.join(fallbackDir, "node_modules", "@github", "copilot-sdk"); + return existsSync(sdkPath); +} + +export interface InstallCopilotSdkOptions { + readonly fallbackDir?: string; + readonly spec?: string; + readonly manifestDir?: string; + readonly logger?: (message: string) => void; + readonly runInstall?: (cmd: { dir: string; spec: string; manifestDir: string }) => Promise; +} + +export interface InstallCopilotSdkResult { + readonly installed: boolean; + readonly fallbackDir: string; + readonly spec: string; +} + +/** + * Result of {@link verifyCopilotSdkInstall}. `ok: true` means the install + * at `fallbackDir` matches the pinned manifest in `manifestDir` exactly, + * and the caller can skip running `npm ci` again. Any `ok: false` carries a + * `reason` suitable for surfacing in setup logs and triggering a reinstall. + */ +export interface CopilotSdkVerifyResult { + readonly ok: boolean; + readonly reason?: string; +} + +const COPILOT_SDK_PINNED_PACKAGE_KEYS = [ + "node_modules/@github/copilot-sdk", + "node_modules/@github/copilot", +] as const; + +function stableStringifyJson(value: unknown): string { + return JSON.stringify(sortJsonValue(value)); +} + +function sortJsonValue(value: unknown): unknown { + if (Array.isArray(value)) { + return value.map(sortJsonValue); + } + if (value && typeof value === "object") { + return Object.fromEntries( + Object.entries(value as Record) + .toSorted(([left], [right]) => left.localeCompare(right)) + .map(([key, entry]) => [key, sortJsonValue(entry)]), + ); + } + return value; +} + +/** + * Confirms that the on-demand install at `fallbackDir` matches the + * pinned lock graph declared in the shipped manifest at `manifestDir`. + * The directory check used to be the only gate (`isCopilotSdkInstalled`), + * but that lets stale, partial, or manually placed trees bypass the + * reviewed dependency graph. This verifier closes that hole by comparing + * the shipped `package-lock.json` as a whole against the install's lock + * AND the installed package.json files for the runtime entry packages. + * + * Manifest-side errors (missing file, malformed JSON, missing pinned + * version entry) are treated as fatal because a packaged openclaw install + * cannot recover from a broken shipped manifest. Install-side errors + * (missing lock, unreadable package.json) are returned as reinstall + * signals so npm ci can wipe and restage. + */ +export function verifyCopilotSdkInstall( + fallbackDir: string, + manifestDir: string, +): CopilotSdkVerifyResult { + let manifestLock: { packages?: Record }; + const manifestLockPath = path.join(manifestDir, "package-lock.json"); + try { + manifestLock = JSON.parse(readFileSync(manifestLockPath, "utf8")) as { + packages?: Record; + }; + } catch (err) { + throw new Error( + `[copilot] cannot read pinned SDK manifest at ${manifestLockPath}: ${ + err instanceof Error ? err.message : String(err) + }`, + { cause: err }, + ); + } + + // Validate the shipped manifest contract upfront before touching the install + // tree. A broken manifest is a fatal build/packaging error and must surface + // regardless of whether the fallback dir is empty, partial, or already + // installed. + const expectedVersions: Record = {}; + for (const key of COPILOT_SDK_PINNED_PACKAGE_KEYS) { + const expected = manifestLock.packages?.[key]?.version; + if (!expected) { + throw new Error( + `[copilot] pinned SDK manifest at ${manifestLockPath} is missing a version for ${key}; refusing to verify install`, + ); + } + expectedVersions[key] = expected; + } + + const installedLockPath = path.join(fallbackDir, "package-lock.json"); + if (!existsSync(installedLockPath)) { + return { ok: false, reason: `no pinned package-lock.json at ${installedLockPath}` }; + } + let installedLock: { packages?: Record }; + try { + installedLock = JSON.parse(readFileSync(installedLockPath, "utf8")) as { + packages?: Record; + }; + } catch (err) { + return { + ok: false, + reason: `unreadable fallback package-lock.json: ${ + err instanceof Error ? err.message : String(err) + }`, + }; + } + + for (const key of COPILOT_SDK_PINNED_PACKAGE_KEYS) { + const expected = expectedVersions[key]; + const actualInLock = installedLock.packages?.[key]?.version; + if (actualInLock !== expected) { + return { + ok: false, + reason: `${key} lock drift: installed=${actualInLock ?? "(missing)"}, pinned=${expected}`, + }; + } + const pkgJsonPath = path.join(fallbackDir, key, "package.json"); + if (!existsSync(pkgJsonPath)) { + return { ok: false, reason: `missing installed package ${key}` }; + } + try { + const actualVersion = (JSON.parse(readFileSync(pkgJsonPath, "utf8")) as { version?: string }) + .version; + if (actualVersion !== expected) { + return { + ok: false, + reason: `${key} version drift: installed=${actualVersion ?? "(missing)"}, pinned=${expected}`, + }; + } + } catch (err) { + return { + ok: false, + reason: `unreadable ${key}/package.json: ${ + err instanceof Error ? err.message : String(err) + }`, + }; + } + } + + if (stableStringifyJson(installedLock) !== stableStringifyJson(manifestLock)) { + return { + ok: false, + reason: "fallback package-lock drift: installed lock does not match pinned manifest", + }; + } + + return { ok: true }; +} + +export async function installCopilotSdk( + options: InstallCopilotSdkOptions = {}, +): Promise { + const fallbackDir = options.fallbackDir ?? resolveCopilotSdkFallbackDir(); + const spec = options.spec ?? COPILOT_SDK_SPEC; + const logger = options.logger ?? (() => undefined); + const manifestDir = options.manifestDir ?? COPILOT_SDK_INSTALL_MANIFEST_DIR; + + const verify = verifyCopilotSdkInstall(fallbackDir, manifestDir); + if (verify.ok) { + logger( + `[copilot] @github/copilot-sdk already installed at ${fallbackDir} (pinned graph matches)`, + ); + return { installed: false, fallbackDir, spec }; + } + if (isCopilotSdkInstalled(fallbackDir)) { + // Stale, partial, or manually-placed tree. Log the drift before letting + // `npm ci` wipe node_modules and reinstall from the pinned lock. + logger( + `[copilot] reinstalling Copilot SDK: ${verify.reason ?? "fallback install does not match pinned manifest"}`, + ); + } + + mkdirSync(fallbackDir, { recursive: true }); + // Stage the pinned package.json + package-lock.json into the fallback dir + // so the subsequent `npm ci` resolves the same dependency graph that this + // PR was reviewed against. We intentionally overwrite any prior copies so a + // bumped manifest in a later openclaw release re-pins user installs cleanly. + for (const file of ["package.json", "package-lock.json"]) { + const source = path.join(manifestDir, file); + if (!existsSync(source)) { + throw new Error( + `[copilot] missing Copilot SDK install manifest at ${source}; expected the openclaw build to copy src/commands/copilot-sdk-install-manifest/`, + ); + } + copyFileSync(source, path.join(fallbackDir, file)); + } + + const runInstall = options.runInstall ?? defaultRunInstall; + logger(`[copilot] installing ${spec} into ${fallbackDir} (npm ci against pinned manifest) ...`); + await runInstall({ dir: fallbackDir, spec, manifestDir }); + const postVerify = verifyCopilotSdkInstall(fallbackDir, manifestDir); + if (!postVerify.ok) { + throw new Error( + `[copilot] install of ${spec} reported success but the resulting fallback graph does not match the pinned manifest at ${manifestDir}: ${ + postVerify.reason ?? "unknown" + }`, + ); + } + logger(`[copilot] installed ${spec}`); + return { installed: true, fallbackDir, spec }; +} + +async function defaultRunInstall(cmd: { + dir: string; + spec: string; + manifestDir: string; +}): Promise { + await new Promise((resolve, reject) => { + // `npm ci` requires the lockfile we just staged into cmd.dir and refuses + // to resolve anything outside it; this is what gives us a deterministic + // graph across user machines. We deliberately keep install scripts + // enabled because the @github/copilot CLI has a postinstall that pulls + // the platform-specific binary, which is the whole reason we run npm + // here instead of a single tarball fetch. + const child = spawn("npm", ["ci", "--no-audit", "--no-fund", "--loglevel=error"], { + cwd: cmd.dir, + stdio: ["ignore", "inherit", "inherit"], + shell: process.platform === "win32", + }); + child.on("error", reject); + child.on("exit", (code) => { + if (code === 0) { + resolve(); + return; + } + reject(new Error(`[copilot] npm ci ${cmd.spec} exited with code ${code ?? "null"}`)); + }); + }); +} + +/** + * Wizard hook called from `src/plugins/provider-auth-choice.ts` after + * the user selects a model. If the selected model needs the Copilot + * SDK and it is not installed, prompts the user to install it now. + * + * Returns `{ required: false }` and a no-op if the selection does not + * need the SDK; this is the hot path for most model selections. + */ +export async function ensureCopilotSdkForModelSelection(params: { + cfg: OpenClawConfig; + model?: string; + prompter: WizardPrompter; + runtime: RuntimeEnv; + isInstalled?: () => boolean; + install?: (options: InstallCopilotSdkOptions) => Promise; +}): Promise { + if (!selectedModelShouldEnsureCopilotSdk({ cfg: params.cfg, model: params.model })) { + return { cfg: params.cfg, required: false, installed: false }; + } + + const isInstalled = + params.isInstalled ?? + (() => + verifyCopilotSdkInstall(resolveCopilotSdkFallbackDir(), COPILOT_SDK_INSTALL_MANIFEST_DIR).ok); + if (isInstalled()) { + return { + cfg: params.cfg, + required: true, + installed: false, + status: "already-installed", + }; + } + + if (resolveIsNixMode()) { + await params.prompter.note( + "Nix mode detected (OPENCLAW_NIX_MODE=1). The Copilot agent runtime SDK cannot be auto-installed; add the pinned @github/copilot-sdk manifest dependency to the Nix-managed OpenClaw package set, then rebuild.", + COPILOT_SDK_PACKAGE_LABEL, + ); + return { cfg: params.cfg, required: true, installed: false, status: "nix-mode" }; + } + + const proceed = await params.prompter.confirm({ + message: + "The Copilot agent runtime needs @github/copilot-sdk (~260 MB on first install, downloads the @github/copilot CLI binary for your platform). Install now?", + initialValue: true, + }); + + if (!proceed) { + await params.prompter.note( + "Skipped. The Copilot agent runtime will fail at first invocation with an install message. Re-run setup to retry; the pinned dependency graph ships with openclaw under src/commands/copilot-sdk-install-manifest/.", + COPILOT_SDK_PACKAGE_LABEL, + ); + return { cfg: params.cfg, required: true, installed: false, status: "declined" }; + } + + const progress = params.prompter.progress(`Installing ${COPILOT_SDK_PACKAGE_LABEL}`); + try { + const installer = params.install ?? installCopilotSdk; + const result = await installer({ + logger: (message) => { + progress.update(message); + params.runtime.log(message); + }, + }); + progress.stop(result.installed ? "Installed." : "Already installed."); + return { + cfg: params.cfg, + required: true, + installed: result.installed, + status: "installed", + }; + } catch (err) { + progress.stop("Install failed."); + const message = err instanceof Error ? err.message : String(err); + await params.prompter.note( + `Install failed: ${message}\n\nRe-run setup to retry the install (the pinned dependency graph ships with openclaw under src/commands/copilot-sdk-install-manifest/).`, + COPILOT_SDK_PACKAGE_LABEL, + ); + return { cfg: params.cfg, required: true, installed: false, status: "failed" }; + } +} diff --git a/src/plugin-sdk/agent-harness-runtime.ts b/src/plugin-sdk/agent-harness-runtime.ts index 47b9b8d97c45..1e4aeeff2e99 100644 --- a/src/plugin-sdk/agent-harness-runtime.ts +++ b/src/plugin-sdk/agent-harness-runtime.ts @@ -15,8 +15,11 @@ import { setActiveEmbeddedRun, type EmbeddedAgentQueueMessageOptions, } from "../agents/embedded-agent-runner/runs.js"; +import type { SandboxFsBridge } from "../agents/sandbox/fs-bridge.js"; import { formatToolDetail, resolveToolDisplay } from "../agents/tool-display.js"; +import type { ImageContent } from "../llm/types.js"; import { redactToolDetail } from "../logging/redact.js"; +import type { PromptImageOrderEntry } from "../media/prompt-image-order.js"; import { truncateUtf16Safe } from "../utils.js"; export const TOOL_PROGRESS_OUTPUT_MAX_CHARS = 8_000; @@ -130,8 +133,14 @@ export { } from "../agents/agent-scope.js"; export { resolveModelAuthMode } from "../agents/model-auth.js"; export { supportsModelTools } from "../agents/model-tool-support.js"; +export { resolveAttemptFsWorkspaceOnly } from "../agents/embedded-agent-runner/run/attempt.prompt-helpers.js"; export { resolveAttemptSpawnWorkspaceDir } from "../agents/embedded-agent-runner/run/attempt.thread-helpers.js"; export { buildEmbeddedAttemptToolRunContext } from "../agents/embedded-agent-runner/run/attempt.tool-run-context.js"; +export { + applyEmbeddedAttemptToolsAllow, + resolveEmbeddedAttemptToolConstructionPlan, +} from "../agents/embedded-agent-runner/run/attempt-tool-construction-plan.js"; +export { getPluginToolMeta } from "../plugins/tools.js"; export { abortEmbeddedAgentRun as abortAgentHarnessRun, clearActiveEmbeddedRun, @@ -170,6 +179,43 @@ export type { } from "../agents/codex-mcp-config.types.js"; export { normalizeProviderToolSchemas } from "../agents/embedded-agent-runner/tool-schema-runtime.js"; +export async function detectAndLoadAgentHarnessPromptImages(params: { + prompt: string; + workspaceDir: string; + model: { input?: string[] }; + existingImages?: ImageContent[]; + imageOrder?: PromptImageOrderEntry[]; + config?: import("../config/types.openclaw.js").OpenClawConfig; + workspaceOnly?: boolean; + localRoots?: readonly string[]; + sandbox?: { root: string; bridge: SandboxFsBridge }; +}): Promise<{ + images: ImageContent[]; + detectedRefs: Array<{ raw: string; resolved: string; type: "path" | "media-uri" }>; + loadedCount: number; + skippedCount: number; +}> { + const [{ resolveImageSanitizationLimits }, { detectAndLoadPromptImages }, { MAX_IMAGE_BYTES }] = + await Promise.all([ + import("../agents/image-sanitization.js"), + import("../agents/embedded-agent-runner/run/images.js"), + import("../media/constants.js"), + ]); + + return detectAndLoadPromptImages({ + prompt: params.prompt, + workspaceDir: params.workspaceDir, + model: params.model, + existingImages: params.existingImages, + imageOrder: params.imageOrder, + maxBytes: MAX_IMAGE_BYTES, + maxDimensionPx: resolveImageSanitizationLimits(params.config).maxDimensionPx, + workspaceOnly: params.workspaceOnly, + localRoots: params.localRoots, + sandbox: params.sandbox, + }); +} + export async function loadCodexBundleMcpThreadConfig( params: LoadCodexBundleMcpThreadConfigParams, ): Promise { @@ -177,6 +223,7 @@ export async function loadCodexBundleMcpThreadConfig( return load(params); } export { resolveSandboxContext } from "../agents/sandbox.js"; +export type { SandboxContext, SandboxWorkspaceAccess } from "../agents/sandbox.js"; export { hasSandboxBindContainerPathAliases, hasSandboxBindReadonlyHostShadows, diff --git a/src/plugins/provider-auth-choice.ts b/src/plugins/provider-auth-choice.ts index e9aa0cdda59b..bef2d01627b5 100644 --- a/src/plugins/provider-auth-choice.ts +++ b/src/plugins/provider-auth-choice.ts @@ -189,6 +189,15 @@ async function applyDefaultModelFromAuthChoice(params: { }); nextConfig = migrationResult.config; } + const { ensureCopilotSdkForModelSelection } = + await import("../commands/copilot-sdk-install.js"); + const copilotInstall = await ensureCopilotSdkForModelSelection({ + cfg: nextConfig, + model: params.selectedModel, + prompter: params.prompter, + runtime: params.runtime, + }); + nextConfig = copilotInstall.cfg; } await noteDefaultModelResult({ previousPrimary, diff --git a/test/scripts/build-all.test.ts b/test/scripts/build-all.test.ts index 0c24c1c0067c..a8457ff476fa 100644 --- a/test/scripts/build-all.test.ts +++ b/test/scripts/build-all.test.ts @@ -186,6 +186,7 @@ describe("resolveBuildAllSteps", () => { "check-plugin-sdk-exports", "plugins:assets:copy", "copy-hook-metadata", + "copy-copilot-sdk-manifest", "copy-export-html-templates", "ui:build", "write-build-info", diff --git a/test/scripts/bundled-plugin-build-entries.test.ts b/test/scripts/bundled-plugin-build-entries.test.ts index e070684c8e24..e01361974c17 100644 --- a/test/scripts/bundled-plugin-build-entries.test.ts +++ b/test/scripts/bundled-plugin-build-entries.test.ts @@ -105,6 +105,8 @@ describe("bundled plugin build entries", () => { const entries = listBundledPluginBuildEntries(); expect(entries["extensions/browser/test-support"]).toBeUndefined(); + expect(entries["extensions/comfy/test-helpers"]).toBeUndefined(); + expect(entries["extensions/minimax/provider-http.test-helpers"]).toBeUndefined(); }); it("discovers repo plugin build entries without directory scans", () => { diff --git a/ui/src/i18n/.i18n/ar.meta.json b/ui/src/i18n/.i18n/ar.meta.json index 0ac42e67703e..b452873031b3 100644 --- a/ui/src/i18n/.i18n/ar.meta.json +++ b/ui/src/i18n/.i18n/ar.meta.json @@ -1,15 +1,11 @@ { - "fallbackKeys": [ - "chat.queue.retry", - "chat.queue.retryQueuedMessage", - "chat.queue.retrySend" - ], - "generatedAt": "2026-05-28T16:05:45.085Z", + "fallbackKeys": [], + "generatedAt": "2026-05-28T18:48:31.545Z", "locale": "ar", "model": "gpt-5.5", "provider": "openai", "sourceHash": "7c867296be27a09ab0e35f76d2518f479e24ab667179c5b3fabf83d6c57f3ef9", "totalKeys": 1158, - "translatedKeys": 1155, + "translatedKeys": 1158, "workflow": 1 } diff --git a/ui/src/i18n/.i18n/de.meta.json b/ui/src/i18n/.i18n/de.meta.json index 486e8a0a18d0..fd3aee35db04 100644 --- a/ui/src/i18n/.i18n/de.meta.json +++ b/ui/src/i18n/.i18n/de.meta.json @@ -1,15 +1,11 @@ { - "fallbackKeys": [ - "chat.queue.retry", - "chat.queue.retryQueuedMessage", - "chat.queue.retrySend" - ], - "generatedAt": "2026-05-28T16:05:40.369Z", + "fallbackKeys": [], + "generatedAt": "2026-05-28T18:48:21.334Z", "locale": "de", "model": "gpt-5.5", "provider": "openai", "sourceHash": "7c867296be27a09ab0e35f76d2518f479e24ab667179c5b3fabf83d6c57f3ef9", "totalKeys": 1158, - "translatedKeys": 1155, + "translatedKeys": 1158, "workflow": 1 } diff --git a/ui/src/i18n/.i18n/es.meta.json b/ui/src/i18n/.i18n/es.meta.json index 0b47185c55f6..03fb6a9d038a 100644 --- a/ui/src/i18n/.i18n/es.meta.json +++ b/ui/src/i18n/.i18n/es.meta.json @@ -1,15 +1,11 @@ { - "fallbackKeys": [ - "chat.queue.retry", - "chat.queue.retryQueuedMessage", - "chat.queue.retrySend" - ], - "generatedAt": "2026-05-28T16:05:41.157Z", + "fallbackKeys": [], + "generatedAt": "2026-05-28T18:48:23.802Z", "locale": "es", "model": "gpt-5.5", "provider": "openai", "sourceHash": "7c867296be27a09ab0e35f76d2518f479e24ab667179c5b3fabf83d6c57f3ef9", "totalKeys": 1158, - "translatedKeys": 1155, + "translatedKeys": 1158, "workflow": 1 } diff --git a/ui/src/i18n/.i18n/fa.meta.json b/ui/src/i18n/.i18n/fa.meta.json index 1d126ea3418d..49fd54d95616 100644 --- a/ui/src/i18n/.i18n/fa.meta.json +++ b/ui/src/i18n/.i18n/fa.meta.json @@ -1,15 +1,11 @@ { - "fallbackKeys": [ - "chat.queue.retry", - "chat.queue.retryQueuedMessage", - "chat.queue.retrySend" - ], - "generatedAt": "2026-05-28T16:05:52.569Z", + "fallbackKeys": [], + "generatedAt": "2026-05-28T18:48:52.280Z", "locale": "fa", "model": "gpt-5.5", "provider": "openai", "sourceHash": "7c867296be27a09ab0e35f76d2518f479e24ab667179c5b3fabf83d6c57f3ef9", "totalKeys": 1158, - "translatedKeys": 1155, + "translatedKeys": 1158, "workflow": 1 } diff --git a/ui/src/i18n/.i18n/fr.meta.json b/ui/src/i18n/.i18n/fr.meta.json index 6e1c64e8d620..b0bd29320337 100644 --- a/ui/src/i18n/.i18n/fr.meta.json +++ b/ui/src/i18n/.i18n/fr.meta.json @@ -1,15 +1,11 @@ { - "fallbackKeys": [ - "chat.queue.retry", - "chat.queue.retryQueuedMessage", - "chat.queue.retrySend" - ], - "generatedAt": "2026-05-28T16:05:44.275Z", + "fallbackKeys": [], + "generatedAt": "2026-05-28T18:48:29.028Z", "locale": "fr", "model": "gpt-5.5", "provider": "openai", "sourceHash": "7c867296be27a09ab0e35f76d2518f479e24ab667179c5b3fabf83d6c57f3ef9", "totalKeys": 1158, - "translatedKeys": 1155, + "translatedKeys": 1158, "workflow": 1 } diff --git a/ui/src/i18n/.i18n/id.meta.json b/ui/src/i18n/.i18n/id.meta.json index da3ba04daa59..4dcf40c0db0c 100644 --- a/ui/src/i18n/.i18n/id.meta.json +++ b/ui/src/i18n/.i18n/id.meta.json @@ -1,15 +1,11 @@ { - "fallbackKeys": [ - "chat.queue.retry", - "chat.queue.retryQueuedMessage", - "chat.queue.retrySend" - ], - "generatedAt": "2026-05-28T16:05:48.486Z", + "fallbackKeys": [], + "generatedAt": "2026-05-28T18:48:42.075Z", "locale": "id", "model": "gpt-5.5", "provider": "openai", "sourceHash": "7c867296be27a09ab0e35f76d2518f479e24ab667179c5b3fabf83d6c57f3ef9", "totalKeys": 1158, - "translatedKeys": 1155, + "translatedKeys": 1158, "workflow": 1 } diff --git a/ui/src/i18n/.i18n/it.meta.json b/ui/src/i18n/.i18n/it.meta.json index 80544c821bb1..1aa8e0f52ce2 100644 --- a/ui/src/i18n/.i18n/it.meta.json +++ b/ui/src/i18n/.i18n/it.meta.json @@ -1,15 +1,11 @@ { - "fallbackKeys": [ - "chat.queue.retry", - "chat.queue.retryQueuedMessage", - "chat.queue.retrySend" - ], - "generatedAt": "2026-05-28T16:05:45.949Z", + "fallbackKeys": [], + "generatedAt": "2026-05-28T18:48:33.241Z", "locale": "it", "model": "gpt-5.5", "provider": "openai", "sourceHash": "7c867296be27a09ab0e35f76d2518f479e24ab667179c5b3fabf83d6c57f3ef9", "totalKeys": 1158, - "translatedKeys": 1155, + "translatedKeys": 1158, "workflow": 1 } diff --git a/ui/src/i18n/.i18n/ja-JP.meta.json b/ui/src/i18n/.i18n/ja-JP.meta.json index 3d1981b740b0..0d423e3f4a25 100644 --- a/ui/src/i18n/.i18n/ja-JP.meta.json +++ b/ui/src/i18n/.i18n/ja-JP.meta.json @@ -1,15 +1,11 @@ { - "fallbackKeys": [ - "chat.queue.retry", - "chat.queue.retryQueuedMessage", - "chat.queue.retrySend" - ], - "generatedAt": "2026-05-28T16:05:41.993Z", + "fallbackKeys": [], + "generatedAt": "2026-05-28T18:48:25.623Z", "locale": "ja-JP", "model": "gpt-5.5", "provider": "openai", "sourceHash": "7c867296be27a09ab0e35f76d2518f479e24ab667179c5b3fabf83d6c57f3ef9", "totalKeys": 1158, - "translatedKeys": 1155, + "translatedKeys": 1158, "workflow": 1 } diff --git a/ui/src/i18n/.i18n/ko.meta.json b/ui/src/i18n/.i18n/ko.meta.json index 1cb801912aee..2279c85cc014 100644 --- a/ui/src/i18n/.i18n/ko.meta.json +++ b/ui/src/i18n/.i18n/ko.meta.json @@ -1,15 +1,11 @@ { - "fallbackKeys": [ - "chat.queue.retry", - "chat.queue.retryQueuedMessage", - "chat.queue.retrySend" - ], - "generatedAt": "2026-05-28T16:05:43.457Z", + "fallbackKeys": [], + "generatedAt": "2026-05-28T18:48:27.331Z", "locale": "ko", "model": "gpt-5.5", "provider": "openai", "sourceHash": "7c867296be27a09ab0e35f76d2518f479e24ab667179c5b3fabf83d6c57f3ef9", "totalKeys": 1158, - "translatedKeys": 1155, + "translatedKeys": 1158, "workflow": 1 } diff --git a/ui/src/i18n/.i18n/nl.meta.json b/ui/src/i18n/.i18n/nl.meta.json index aee4916825c4..46d09dcbf096 100644 --- a/ui/src/i18n/.i18n/nl.meta.json +++ b/ui/src/i18n/.i18n/nl.meta.json @@ -1,15 +1,11 @@ { - "fallbackKeys": [ - "chat.queue.retry", - "chat.queue.retryQueuedMessage", - "chat.queue.retrySend" - ], - "generatedAt": "2026-05-28T16:05:51.760Z", + "fallbackKeys": [], + "generatedAt": "2026-05-28T18:48:49.875Z", "locale": "nl", "model": "gpt-5.5", "provider": "openai", "sourceHash": "7c867296be27a09ab0e35f76d2518f479e24ab667179c5b3fabf83d6c57f3ef9", "totalKeys": 1158, - "translatedKeys": 1155, + "translatedKeys": 1158, "workflow": 1 } diff --git a/ui/src/i18n/.i18n/pl.meta.json b/ui/src/i18n/.i18n/pl.meta.json index 7ea558461023..f7d48f93d819 100644 --- a/ui/src/i18n/.i18n/pl.meta.json +++ b/ui/src/i18n/.i18n/pl.meta.json @@ -1,15 +1,11 @@ { - "fallbackKeys": [ - "chat.queue.retry", - "chat.queue.retryQueuedMessage", - "chat.queue.retrySend" - ], - "generatedAt": "2026-05-28T16:05:49.306Z", + "fallbackKeys": [], + "generatedAt": "2026-05-28T18:48:43.834Z", "locale": "pl", "model": "gpt-5.5", "provider": "openai", "sourceHash": "7c867296be27a09ab0e35f76d2518f479e24ab667179c5b3fabf83d6c57f3ef9", "totalKeys": 1158, - "translatedKeys": 1155, + "translatedKeys": 1158, "workflow": 1 } diff --git a/ui/src/i18n/.i18n/pt-BR.meta.json b/ui/src/i18n/.i18n/pt-BR.meta.json index a2db29b2395d..261b49e1c5dc 100644 --- a/ui/src/i18n/.i18n/pt-BR.meta.json +++ b/ui/src/i18n/.i18n/pt-BR.meta.json @@ -1,15 +1,11 @@ { - "fallbackKeys": [ - "chat.queue.retry", - "chat.queue.retryQueuedMessage", - "chat.queue.retrySend" - ], - "generatedAt": "2026-05-28T16:05:39.541Z", + "fallbackKeys": [], + "generatedAt": "2026-05-28T18:48:18.990Z", "locale": "pt-BR", "model": "gpt-5.5", "provider": "openai", "sourceHash": "7c867296be27a09ab0e35f76d2518f479e24ab667179c5b3fabf83d6c57f3ef9", "totalKeys": 1158, - "translatedKeys": 1155, + "translatedKeys": 1158, "workflow": 1 } diff --git a/ui/src/i18n/.i18n/th.meta.json b/ui/src/i18n/.i18n/th.meta.json index 74e2ed41410d..3d70f3264571 100644 --- a/ui/src/i18n/.i18n/th.meta.json +++ b/ui/src/i18n/.i18n/th.meta.json @@ -1,15 +1,11 @@ { - "fallbackKeys": [ - "chat.queue.retry", - "chat.queue.retryQueuedMessage", - "chat.queue.retrySend" - ], - "generatedAt": "2026-05-28T16:05:50.121Z", + "fallbackKeys": [], + "generatedAt": "2026-05-28T18:48:45.871Z", "locale": "th", "model": "gpt-5.5", "provider": "openai", "sourceHash": "7c867296be27a09ab0e35f76d2518f479e24ab667179c5b3fabf83d6c57f3ef9", "totalKeys": 1158, - "translatedKeys": 1155, + "translatedKeys": 1158, "workflow": 1 } diff --git a/ui/src/i18n/.i18n/tr.meta.json b/ui/src/i18n/.i18n/tr.meta.json index e95cab728c0e..584576748f9c 100644 --- a/ui/src/i18n/.i18n/tr.meta.json +++ b/ui/src/i18n/.i18n/tr.meta.json @@ -1,15 +1,11 @@ { - "fallbackKeys": [ - "chat.queue.retry", - "chat.queue.retryQueuedMessage", - "chat.queue.retrySend" - ], - "generatedAt": "2026-05-28T16:05:46.798Z", + "fallbackKeys": [], + "generatedAt": "2026-05-28T18:48:35.897Z", "locale": "tr", "model": "gpt-5.5", "provider": "openai", "sourceHash": "7c867296be27a09ab0e35f76d2518f479e24ab667179c5b3fabf83d6c57f3ef9", "totalKeys": 1158, - "translatedKeys": 1155, + "translatedKeys": 1158, "workflow": 1 } diff --git a/ui/src/i18n/.i18n/uk.meta.json b/ui/src/i18n/.i18n/uk.meta.json index 8a68761b91bd..c8c308b8b741 100644 --- a/ui/src/i18n/.i18n/uk.meta.json +++ b/ui/src/i18n/.i18n/uk.meta.json @@ -1,15 +1,11 @@ { - "fallbackKeys": [ - "chat.queue.retry", - "chat.queue.retryQueuedMessage", - "chat.queue.retrySend" - ], - "generatedAt": "2026-05-28T16:05:47.638Z", + "fallbackKeys": [], + "generatedAt": "2026-05-28T18:48:39.972Z", "locale": "uk", "model": "gpt-5.5", "provider": "openai", "sourceHash": "7c867296be27a09ab0e35f76d2518f479e24ab667179c5b3fabf83d6c57f3ef9", "totalKeys": 1158, - "translatedKeys": 1155, + "translatedKeys": 1158, "workflow": 1 } diff --git a/ui/src/i18n/.i18n/vi.meta.json b/ui/src/i18n/.i18n/vi.meta.json index f4cfe1089563..f139e0b176a2 100644 --- a/ui/src/i18n/.i18n/vi.meta.json +++ b/ui/src/i18n/.i18n/vi.meta.json @@ -1,15 +1,11 @@ { - "fallbackKeys": [ - "chat.queue.retry", - "chat.queue.retryQueuedMessage", - "chat.queue.retrySend" - ], - "generatedAt": "2026-05-28T16:05:50.934Z", + "fallbackKeys": [], + "generatedAt": "2026-05-28T18:48:47.804Z", "locale": "vi", "model": "gpt-5.5", "provider": "openai", "sourceHash": "7c867296be27a09ab0e35f76d2518f479e24ab667179c5b3fabf83d6c57f3ef9", "totalKeys": 1158, - "translatedKeys": 1155, + "translatedKeys": 1158, "workflow": 1 } diff --git a/ui/src/i18n/.i18n/zh-CN.meta.json b/ui/src/i18n/.i18n/zh-CN.meta.json index 7123907add68..a183b16a9ea5 100644 --- a/ui/src/i18n/.i18n/zh-CN.meta.json +++ b/ui/src/i18n/.i18n/zh-CN.meta.json @@ -1,15 +1,11 @@ { - "fallbackKeys": [ - "chat.queue.retry", - "chat.queue.retryQueuedMessage", - "chat.queue.retrySend" - ], - "generatedAt": "2026-05-28T16:05:37.925Z", + "fallbackKeys": [], + "generatedAt": "2026-05-28T18:48:15.047Z", "locale": "zh-CN", "model": "gpt-5.5", "provider": "openai", "sourceHash": "7c867296be27a09ab0e35f76d2518f479e24ab667179c5b3fabf83d6c57f3ef9", "totalKeys": 1158, - "translatedKeys": 1155, + "translatedKeys": 1158, "workflow": 1 } diff --git a/ui/src/i18n/.i18n/zh-TW.meta.json b/ui/src/i18n/.i18n/zh-TW.meta.json index be8135eb63ae..643dd295946e 100644 --- a/ui/src/i18n/.i18n/zh-TW.meta.json +++ b/ui/src/i18n/.i18n/zh-TW.meta.json @@ -1,15 +1,11 @@ { - "fallbackKeys": [ - "chat.queue.retry", - "chat.queue.retryQueuedMessage", - "chat.queue.retrySend" - ], - "generatedAt": "2026-05-28T16:05:38.739Z", + "fallbackKeys": [], + "generatedAt": "2026-05-28T18:48:17.187Z", "locale": "zh-TW", "model": "gpt-5.5", "provider": "openai", "sourceHash": "7c867296be27a09ab0e35f76d2518f479e24ab667179c5b3fabf83d6c57f3ef9", "totalKeys": 1158, - "translatedKeys": 1155, + "translatedKeys": 1158, "workflow": 1 } diff --git a/ui/src/i18n/locales/ar.ts b/ui/src/i18n/locales/ar.ts index 3ca7e5b12dca..e4b6f27169bc 100644 --- a/ui/src/i18n/locales/ar.ts +++ b/ui/src/i18n/locales/ar.ts @@ -1111,9 +1111,9 @@ export const ar: TranslationMap = { sendMessage: "Send message", }, queue: { - retry: "Retry", - retrySend: "Retry send", - retryQueuedMessage: "Retry queued message", + retry: "إعادة المحاولة", + retrySend: "إعادة الإرسال", + retryQueuedMessage: "إعادة محاولة الرسالة في قائمة الانتظار", }, composer: { placeholder: "Message {name} (Enter to send)", diff --git a/ui/src/i18n/locales/de.ts b/ui/src/i18n/locales/de.ts index 20f54abfdf1b..3c794610ebda 100644 --- a/ui/src/i18n/locales/de.ts +++ b/ui/src/i18n/locales/de.ts @@ -1135,9 +1135,9 @@ export const de: TranslationMap = { sendMessage: "Send message", }, queue: { - retry: "Retry", - retrySend: "Retry send", - retryQueuedMessage: "Retry queued message", + retry: "Erneut versuchen", + retrySend: "Senden erneut versuchen", + retryQueuedMessage: "Nachricht in der Warteschlange erneut versuchen", }, composer: { placeholder: "Message {name} (Enter to send)", diff --git a/ui/src/i18n/locales/es.ts b/ui/src/i18n/locales/es.ts index 6b2719e3d70a..7ec0669908b6 100644 --- a/ui/src/i18n/locales/es.ts +++ b/ui/src/i18n/locales/es.ts @@ -1132,9 +1132,9 @@ export const es: TranslationMap = { sendMessage: "Send message", }, queue: { - retry: "Retry", - retrySend: "Retry send", - retryQueuedMessage: "Retry queued message", + retry: "Reintentar", + retrySend: "Reintentar envío", + retryQueuedMessage: "Reintentar mensaje en cola", }, composer: { placeholder: "Message {name} (Enter to send)", diff --git a/ui/src/i18n/locales/fa.ts b/ui/src/i18n/locales/fa.ts index 29ab855a1ab6..73855832ed1d 100644 --- a/ui/src/i18n/locales/fa.ts +++ b/ui/src/i18n/locales/fa.ts @@ -1128,9 +1128,9 @@ export const fa: TranslationMap = { sendMessage: "Send message", }, queue: { - retry: "Retry", - retrySend: "Retry send", - retryQueuedMessage: "Retry queued message", + retry: "تلاش مجدد", + retrySend: "ارسال مجدد", + retryQueuedMessage: "تلاش مجدد برای پیام در صف", }, composer: { placeholder: "Message {name} (Enter to send)", diff --git a/ui/src/i18n/locales/fr.ts b/ui/src/i18n/locales/fr.ts index eaee471e6019..f56d83b33395 100644 --- a/ui/src/i18n/locales/fr.ts +++ b/ui/src/i18n/locales/fr.ts @@ -1139,9 +1139,9 @@ export const fr: TranslationMap = { sendMessage: "Send message", }, queue: { - retry: "Retry", - retrySend: "Retry send", - retryQueuedMessage: "Retry queued message", + retry: "Réessayer", + retrySend: "Réessayer l’envoi", + retryQueuedMessage: "Réessayer le message en file d’attente", }, composer: { placeholder: "Message {name} (Enter to send)", diff --git a/ui/src/i18n/locales/id.ts b/ui/src/i18n/locales/id.ts index 7e86a91c33c6..f40edbd44c50 100644 --- a/ui/src/i18n/locales/id.ts +++ b/ui/src/i18n/locales/id.ts @@ -1126,9 +1126,9 @@ export const id: TranslationMap = { sendMessage: "Send message", }, queue: { - retry: "Retry", - retrySend: "Retry send", - retryQueuedMessage: "Retry queued message", + retry: "Coba lagi", + retrySend: "Coba kirim lagi", + retryQueuedMessage: "Coba lagi pesan dalam antrean", }, composer: { placeholder: "Message {name} (Enter to send)", diff --git a/ui/src/i18n/locales/it.ts b/ui/src/i18n/locales/it.ts index 1a684c09c87d..63a246490c15 100644 --- a/ui/src/i18n/locales/it.ts +++ b/ui/src/i18n/locales/it.ts @@ -1133,9 +1133,9 @@ export const it: TranslationMap = { sendMessage: "Send message", }, queue: { - retry: "Retry", - retrySend: "Retry send", - retryQueuedMessage: "Retry queued message", + retry: "Riprova", + retrySend: "Riprova invio", + retryQueuedMessage: "Riprova messaggio in coda", }, composer: { placeholder: "Message {name} (Enter to send)", diff --git a/ui/src/i18n/locales/ja-JP.ts b/ui/src/i18n/locales/ja-JP.ts index 1eb96ba56f48..5f1ad4c51023 100644 --- a/ui/src/i18n/locales/ja-JP.ts +++ b/ui/src/i18n/locales/ja-JP.ts @@ -1130,9 +1130,9 @@ export const ja_JP: TranslationMap = { sendMessage: "Send message", }, queue: { - retry: "Retry", - retrySend: "Retry send", - retryQueuedMessage: "Retry queued message", + retry: "再試行", + retrySend: "送信を再試行", + retryQueuedMessage: "キュー内のメッセージを再試行", }, composer: { placeholder: "Message {name} (Enter to send)", diff --git a/ui/src/i18n/locales/ko.ts b/ui/src/i18n/locales/ko.ts index 0099e3f444ed..25fe1d79ffc4 100644 --- a/ui/src/i18n/locales/ko.ts +++ b/ui/src/i18n/locales/ko.ts @@ -1119,9 +1119,9 @@ export const ko: TranslationMap = { sendMessage: "Send message", }, queue: { - retry: "Retry", - retrySend: "Retry send", - retryQueuedMessage: "Retry queued message", + retry: "다시 시도", + retrySend: "전송 다시 시도", + retryQueuedMessage: "대기 중인 메시지 다시 시도", }, composer: { placeholder: "Message {name} (Enter to send)", diff --git a/ui/src/i18n/locales/nl.ts b/ui/src/i18n/locales/nl.ts index 99896677e0b1..3f215ca64359 100644 --- a/ui/src/i18n/locales/nl.ts +++ b/ui/src/i18n/locales/nl.ts @@ -1131,9 +1131,9 @@ export const nl: TranslationMap = { sendMessage: "Send message", }, queue: { - retry: "Retry", - retrySend: "Retry send", - retryQueuedMessage: "Retry queued message", + retry: "Opnieuw proberen", + retrySend: "Verzenden opnieuw proberen", + retryQueuedMessage: "Bericht in wachtrij opnieuw proberen", }, composer: { placeholder: "Message {name} (Enter to send)", diff --git a/ui/src/i18n/locales/pl.ts b/ui/src/i18n/locales/pl.ts index 01e2a54fbf49..7dbec7ec0367 100644 --- a/ui/src/i18n/locales/pl.ts +++ b/ui/src/i18n/locales/pl.ts @@ -1131,9 +1131,9 @@ export const pl: TranslationMap = { sendMessage: "Send message", }, queue: { - retry: "Retry", - retrySend: "Retry send", - retryQueuedMessage: "Retry queued message", + retry: "Ponów", + retrySend: "Ponów wysyłanie", + retryQueuedMessage: "Ponów wysłanie wiadomości w kolejce", }, composer: { placeholder: "Message {name} (Enter to send)", diff --git a/ui/src/i18n/locales/pt-BR.ts b/ui/src/i18n/locales/pt-BR.ts index 72407ac4a4ed..d1cdba10594b 100644 --- a/ui/src/i18n/locales/pt-BR.ts +++ b/ui/src/i18n/locales/pt-BR.ts @@ -1127,9 +1127,9 @@ export const pt_BR: TranslationMap = { sendMessage: "Send message", }, queue: { - retry: "Retry", - retrySend: "Retry send", - retryQueuedMessage: "Retry queued message", + retry: "Tentar novamente", + retrySend: "Tentar enviar novamente", + retryQueuedMessage: "Tentar novamente a mensagem na fila", }, composer: { placeholder: "Message {name} (Enter to send)", diff --git a/ui/src/i18n/locales/th.ts b/ui/src/i18n/locales/th.ts index c6fe0c0d4e1e..067a2a70046d 100644 --- a/ui/src/i18n/locales/th.ts +++ b/ui/src/i18n/locales/th.ts @@ -1096,9 +1096,9 @@ export const th: TranslationMap = { sendMessage: "Send message", }, queue: { - retry: "Retry", - retrySend: "Retry send", - retryQueuedMessage: "Retry queued message", + retry: "ลองอีกครั้ง", + retrySend: "ลองส่งอีกครั้ง", + retryQueuedMessage: "ลองส่งข้อความในคิวอีกครั้ง", }, composer: { placeholder: "Message {name} (Enter to send)", diff --git a/ui/src/i18n/locales/tr.ts b/ui/src/i18n/locales/tr.ts index d0025435cfa1..3102aa9cfd00 100644 --- a/ui/src/i18n/locales/tr.ts +++ b/ui/src/i18n/locales/tr.ts @@ -1132,9 +1132,9 @@ export const tr: TranslationMap = { sendMessage: "Send message", }, queue: { - retry: "Retry", - retrySend: "Retry send", - retryQueuedMessage: "Retry queued message", + retry: "Yeniden dene", + retrySend: "Göndermeyi yeniden dene", + retryQueuedMessage: "Kuyruktaki mesajı yeniden dene", }, composer: { placeholder: "Message {name} (Enter to send)", diff --git a/ui/src/i18n/locales/uk.ts b/ui/src/i18n/locales/uk.ts index 8d4fd5209638..3a269ecaec6d 100644 --- a/ui/src/i18n/locales/uk.ts +++ b/ui/src/i18n/locales/uk.ts @@ -1129,9 +1129,9 @@ export const uk: TranslationMap = { sendMessage: "Send message", }, queue: { - retry: "Retry", - retrySend: "Retry send", - retryQueuedMessage: "Retry queued message", + retry: "Повторити", + retrySend: "Повторити надсилання", + retryQueuedMessage: "Повторити надсилання повідомлення в черзі", }, composer: { placeholder: "Message {name} (Enter to send)", diff --git a/ui/src/i18n/locales/vi.ts b/ui/src/i18n/locales/vi.ts index 3fcf2502780b..a4bd998e5ba1 100644 --- a/ui/src/i18n/locales/vi.ts +++ b/ui/src/i18n/locales/vi.ts @@ -1118,9 +1118,9 @@ export const vi: TranslationMap = { sendMessage: "Send message", }, queue: { - retry: "Retry", - retrySend: "Retry send", - retryQueuedMessage: "Retry queued message", + retry: "Thử lại", + retrySend: "Thử gửi lại", + retryQueuedMessage: "Thử lại tin nhắn trong hàng đợi", }, composer: { placeholder: "Message {name} (Enter to send)", diff --git a/ui/src/i18n/locales/zh-CN.ts b/ui/src/i18n/locales/zh-CN.ts index c966c038aad7..a41f52d193e8 100644 --- a/ui/src/i18n/locales/zh-CN.ts +++ b/ui/src/i18n/locales/zh-CN.ts @@ -1091,9 +1091,9 @@ export const zh_CN: TranslationMap = { sendMessage: "发送消息", }, queue: { - retry: "Retry", - retrySend: "Retry send", - retryQueuedMessage: "Retry queued message", + retry: "重试", + retrySend: "重试发送", + retryQueuedMessage: "重试队列中的消息", }, composer: { placeholder: "给 {name} 发消息(Enter 发送)", diff --git a/ui/src/i18n/locales/zh-TW.ts b/ui/src/i18n/locales/zh-TW.ts index 3fd669150845..888bb37b2420 100644 --- a/ui/src/i18n/locales/zh-TW.ts +++ b/ui/src/i18n/locales/zh-TW.ts @@ -1093,9 +1093,9 @@ export const zh_TW: TranslationMap = { sendMessage: "Send message", }, queue: { - retry: "Retry", - retrySend: "Retry send", - retryQueuedMessage: "Retry queued message", + retry: "重試", + retrySend: "重試傳送", + retryQueuedMessage: "重試佇列中的訊息", }, composer: { placeholder: "Message {name} (Enter to send)",