From 1188aa3b81ef5c6494ab4d3d617222e2cc07ae2b Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 29 May 2026 06:10:42 +0100 Subject: [PATCH] feat: add Claude Opus 4.8 support (#87890) * feat: add Claude Opus 4.8 support * fix: omit Vertex Opus sampling overrides * fix: preserve Opus adaptive thinking levels * fix: clamp Anthropic max effort support * fix: use sha256 for QA mock call ids * fix: type Anthropic transport test model metadata * test: update PDF model default for Opus 4.8 --- .../workflows/control-ui-locale-refresh.yml | 2 +- .../openclaw-live-and-e2e-checks-reusable.yml | 2 +- .github/workflows/openclaw-release-checks.yml | 4 +- .../workflows/qa-live-transports-convex.yml | 6 +- docs/cli/crestodian.md | 6 +- docs/concepts/agent-runtimes.md | 6 +- docs/concepts/model-providers.md | 2 +- docs/concepts/qa-e2e-automation.md | 8 +- docs/gateway/config-agents.md | 8 +- docs/help/faq-models.md | 2 +- docs/plugins/sdk-agent-harness.md | 4 +- docs/providers/anthropic.md | 22 ++-- docs/reference/token-use.md | 2 +- docs/tools/thinking.md | 12 +- .../mantle-anthropic.runtime.ts | 1 + extensions/amazon-bedrock/discovery.test.ts | 52 ++++++++ extensions/amazon-bedrock/discovery.ts | 35 ++++- extensions/amazon-bedrock/index.test.ts | 81 ++++++++++++ .../provider-policy-api.test.ts | 20 +++ .../amazon-bedrock/register.sync.runtime.ts | 21 ++- .../amazon-bedrock/stream.runtime.test.ts | 26 ++++ extensions/amazon-bedrock/stream.runtime.ts | 12 +- extensions/amazon-bedrock/thinking-policy.ts | 16 +++ extensions/anthropic-vertex/index.test.ts | 17 +++ extensions/anthropic-vertex/index.ts | 6 +- .../anthropic-vertex/provider-catalog.ts | 11 ++ .../provider-policy-api.test.ts | 29 +++++ .../anthropic-vertex/provider-policy-api.ts | 8 ++ .../anthropic-vertex/stream-runtime.test.ts | 52 +++++++- extensions/anthropic-vertex/stream-runtime.ts | 21 ++- extensions/anthropic/claude-model-refs.ts | 26 ++-- extensions/anthropic/cli-catalog.ts | 23 ++-- extensions/anthropic/cli-constants.ts | 17 ++- extensions/anthropic/cli-migration.test.ts | 14 +- extensions/anthropic/cli-migration.ts | 2 +- extensions/anthropic/cli-shared.test.ts | 14 ++ extensions/anthropic/index.test.ts | 48 ++++++- .../anthropic/media-understanding-provider.ts | 2 +- extensions/anthropic/openclaw.plugin.json | 28 +++- .../anthropic/provider-policy-api.test.ts | 4 +- extensions/anthropic/register.runtime.ts | 122 ++++++++++++++++-- extensions/anthropic/stream-wrappers.ts | 2 + extensions/google/transport-stream.ts | 2 +- .../qa-lab/src/agentic-parity-report.test.ts | 80 ++++++------ .../qa-lab/src/agentic-parity-report.ts | 2 +- extensions/qa-lab/src/character-eval.test.ts | 10 +- extensions/qa-lab/src/cli.runtime.test.ts | 14 +- extensions/qa-lab/src/live-timeout.test.ts | 8 +- .../providers/live-frontier/character-eval.ts | 6 +- .../src/providers/live-frontier/parity.ts | 2 +- .../src/providers/mock-openai/server.test.ts | 54 ++++---- .../src/providers/mock-openai/server.ts | 14 +- .../src/providers/shared/mock-model-config.ts | 8 +- .../qa-lab/src/qa-gateway-config.test.ts | 8 +- extensions/qa-lab/src/suite-planning.test.ts | 4 +- .../qa-lab/src/suite.summary-json.test.ts | 6 +- packages/agent-core/src/llm.ts | 3 +- packages/agent-core/src/types.ts | 2 +- .../models/anthropic-opus-api-key-smoke.md | 6 +- .../anthropic-opus-setup-token-smoke.md | 6 +- .../e2e/crestodian-planner-docker-client.mjs | 2 +- src/agents/anthropic-transport-stream.test.ts | 55 +++++++- src/agents/anthropic-transport-stream.ts | 17 ++- src/agents/cli-runner/prepare.test.ts | 4 +- src/agents/cli-runner/prepare.ts | 4 +- src/agents/context.test.ts | 2 + src/agents/context.ts | 2 + .../embedded-agent-runner/utils.test.ts | 8 ++ src/agents/embedded-agent-runner/utils.ts | 13 +- src/agents/live-model-filter.ts | 1 + src/agents/model-compat.test.ts | 4 +- src/agents/model-selection.test.ts | 90 +++++++++++++ src/agents/model-thinking-default.ts | 12 +- src/agents/sessions/model-registry.ts | 1 + src/agents/sessions/model-resolver.ts | 2 +- src/agents/sessions/sdk.test.ts | 20 +++ src/agents/sessions/settings-manager.ts | 17 ++- .../tools/pdf-tool.model-config.test.ts | 2 +- src/config/defaults.ts | 2 +- src/config/model-alias-defaults.test.ts | 8 +- src/config/types.models.ts | 3 + src/crestodian/assistant-backends.ts | 2 +- src/crestodian/assistant.test.ts | 4 +- src/crestodian/operations.ts | 4 +- src/llm/model-utils.ts | 3 +- src/llm/providers/anthropic.test.ts | 28 +++- src/llm/providers/anthropic.ts | 23 ++-- src/llm/providers/azure-openai-responses.ts | 7 +- src/llm/providers/google-vertex.ts | 6 +- src/llm/providers/google.ts | 6 +- src/llm/providers/openai-codex-responses.ts | 7 +- src/llm/providers/openai-completions.ts | 7 +- src/llm/providers/openai-responses.ts | 7 +- src/llm/providers/simple-options.ts | 1 + src/llm/types.ts | 3 +- src/plugin-sdk/provider-model-shared.test.ts | 8 ++ src/plugin-sdk/provider-model-shared.ts | 11 ++ .../package-acceptance-workflow.test.ts | 2 +- 98 files changed, 1134 insertions(+), 295 deletions(-) create mode 100644 extensions/anthropic-vertex/provider-policy-api.test.ts create mode 100644 extensions/anthropic-vertex/provider-policy-api.ts create mode 100644 src/agents/embedded-agent-runner/utils.test.ts diff --git a/.github/workflows/control-ui-locale-refresh.yml b/.github/workflows/control-ui-locale-refresh.yml index 4529e86df0f3..f5c2e87c06db 100644 --- a/.github/workflows/control-ui-locale-refresh.yml +++ b/.github/workflows/control-ui-locale-refresh.yml @@ -138,7 +138,7 @@ jobs: OPENAI_API_KEY: ${{ secrets.OPENCLAW_DOCS_I18N_OPENAI_API_KEY || secrets.OPENAI_API_KEY }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} OPENCLAW_CONTROL_UI_I18N_PROVIDER: ${{ secrets.ANTHROPIC_API_KEY != '' && 'anthropic' || 'openai' }} - OPENCLAW_CONTROL_UI_I18N_MODEL: ${{ secrets.ANTHROPIC_API_KEY != '' && 'claude-opus-4-7' || vars.OPENCLAW_CI_OPENAI_MODEL_BARE }} + OPENCLAW_CONTROL_UI_I18N_MODEL: ${{ secrets.ANTHROPIC_API_KEY != '' && 'claude-opus-4-8' || vars.OPENCLAW_CI_OPENAI_MODEL_BARE }} OPENCLAW_CONTROL_UI_I18N_THINKING: low OPENCLAW_CONTROL_UI_I18N_AUTH_OPTIONAL: "1" LOCALE: ${{ matrix.locale }} diff --git a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml index 6daa69fb7d66..bff576662bd0 100644 --- a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml +++ b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml @@ -1932,7 +1932,7 @@ jobs: - suite_id: native-live-src-gateway-profiles-anthropic-opus suite_group: native-live-src-gateway-profiles-anthropic label: Native live gateway profiles Anthropic Opus - command: OPENCLAW_LIVE_GATEWAY_THINKING=low OPENCLAW_LIVE_GATEWAY_PROVIDERS=anthropic OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-7 node .release-harness/scripts/test-live-shard.mjs native-live-src-gateway-profiles + command: OPENCLAW_LIVE_GATEWAY_THINKING=low OPENCLAW_LIVE_GATEWAY_PROVIDERS=anthropic OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-8 node .release-harness/scripts/test-live-shard.mjs native-live-src-gateway-profiles timeout_minutes: 30 profile_env_only: false advisory: true diff --git a/.github/workflows/openclaw-release-checks.yml b/.github/workflows/openclaw-release-checks.yml index 1adcdb7966ae..896fc8e26fd9 100644 --- a/.github/workflows/openclaw-release-checks.yml +++ b/.github/workflows/openclaw-release-checks.yml @@ -813,7 +813,7 @@ jobs: alt_model="openai/gpt-5.5-alt" ;; baseline) - model="anthropic/claude-opus-4-7" + model="anthropic/claude-opus-4-8" alt_model="anthropic/claude-sonnet-4-6" ;; *) @@ -885,7 +885,7 @@ jobs: --candidate-summary .artifacts/qa-e2e/openai-candidate/qa-suite-summary.json \ --baseline-summary .artifacts/qa-e2e/anthropic-baseline/qa-suite-summary.json \ --candidate-label "${OPENCLAW_CI_OPENAI_MODEL}" \ - --baseline-label anthropic/claude-opus-4-7 \ + --baseline-label anthropic/claude-opus-4-8 \ --output-dir .artifacts/qa-e2e/parity - name: Upload parity artifacts diff --git a/.github/workflows/qa-live-transports-convex.yml b/.github/workflows/qa-live-transports-convex.yml index f4c49e713e59..e57933866fee 100644 --- a/.github/workflows/qa-live-transports-convex.yml +++ b/.github/workflows/qa-live-transports-convex.yml @@ -199,13 +199,13 @@ jobs: --alt-model openai/gpt-5.5-alt \ --output-dir .artifacts/qa-e2e/openai-candidate - - name: Run Opus 4.7 lane + - name: Run Opus 4.8 lane run: | pnpm openclaw qa suite \ --provider-mode mock-openai \ --parity-pack agentic \ --concurrency "${QA_PARITY_CONCURRENCY}" \ - --model anthropic/claude-opus-4-7 \ + --model anthropic/claude-opus-4-8 \ --alt-model anthropic/claude-sonnet-4-6 \ --output-dir .artifacts/qa-e2e/anthropic-baseline @@ -216,7 +216,7 @@ jobs: --candidate-summary .artifacts/qa-e2e/openai-candidate/qa-suite-summary.json \ --baseline-summary .artifacts/qa-e2e/anthropic-baseline/qa-suite-summary.json \ --candidate-label "${OPENCLAW_CI_OPENAI_MODEL}" \ - --baseline-label anthropic/claude-opus-4-7 \ + --baseline-label anthropic/claude-opus-4-8 \ --output-dir .artifacts/qa-e2e/parity - name: Upload parity artifacts diff --git a/docs/cli/crestodian.md b/docs/cli/crestodian.md index ec38c1600931..66f92e0f08c8 100644 --- a/docs/cli/crestodian.md +++ b/docs/cli/crestodian.md @@ -157,8 +157,8 @@ order and tells you what it chose: - existing explicit model, if already configured - `OPENAI_API_KEY` -> `openai/gpt-5.5` -- `ANTHROPIC_API_KEY` -> `anthropic/claude-opus-4-7` -- Claude Code CLI -> `claude-cli/claude-opus-4-7` +- `ANTHROPIC_API_KEY` -> `anthropic/claude-opus-4-8` +- Claude Code CLI -> `claude-cli/claude-opus-4-8` - Codex -> `openai/gpt-5.5` through the Codex app-server harness If none are available, setup still writes the default workspace and leaves the @@ -173,7 +173,7 @@ planner turn through OpenClaw's normal runtime paths. It first uses the configured OpenClaw model. If no configured model is usable yet, it can fall back to local runtimes already present on the machine: -- Claude Code CLI: `claude-cli/claude-opus-4-7` +- Claude Code CLI: `claude-cli/claude-opus-4-8` - Codex app-server harness: `openai/gpt-5.5` The model-assisted planner cannot mutate config directly. It must translate the diff --git a/docs/concepts/agent-runtimes.md b/docs/concepts/agent-runtimes.md index 7c6d06085bec..2d9a2036703c 100644 --- a/docs/concepts/agent-runtimes.md +++ b/docs/concepts/agent-runtimes.md @@ -35,7 +35,7 @@ There are two runtime families: is the built-in `openclaw` runtime plus registered plugin harnesses such as `codex` and `copilot`. - **CLI backends** run a local CLI process while keeping the model ref - canonical. For example, `anthropic/claude-opus-4-7` with + canonical. For example, `anthropic/claude-opus-4-8` with a model-scoped `agentRuntime.id: "claude-cli"` means "select the Anthropic model, execute through Claude CLI." `claude-cli` is not an embedded harness id and must not be passed to AgentHarness selection. @@ -174,9 +174,9 @@ Claude CLI form is: { agents: { defaults: { - model: "anthropic/claude-opus-4-7", + model: "anthropic/claude-opus-4-8", models: { - "anthropic/claude-opus-4-7": { + "anthropic/claude-opus-4-8": { agentRuntime: { id: "claude-cli" }, }, }, diff --git a/docs/concepts/model-providers.md b/docs/concepts/model-providers.md index 28ea17417b98..c26bb269e886 100644 --- a/docs/concepts/model-providers.md +++ b/docs/concepts/model-providers.md @@ -116,7 +116,7 @@ Official provider plugins publish their own model catalog rows. These providers - CLI: `openclaw onboard --auth-choice apiKey` - Direct public Anthropic requests support the shared `/fast` toggle and `params.fastMode`, including API-key and OAuth-authenticated traffic sent to `api.anthropic.com`; OpenClaw maps that to Anthropic `service_tier` (`auto` vs `standard_only`) - Preferred Claude CLI config keeps the model ref canonical and selects the CLI - backend separately: `anthropic/claude-opus-4-7` with + backend separately: `anthropic/claude-opus-4-8` with model-scoped `agentRuntime.id: "claude-cli"`. Legacy `claude-cli/claude-opus-4-7` refs still work for compatibility. diff --git a/docs/concepts/qa-e2e-automation.md b/docs/concepts/qa-e2e-automation.md index 8dacdefdcec1..af513d4c9efd 100644 --- a/docs/concepts/qa-e2e-automation.md +++ b/docs/concepts/qa-e2e-automation.md @@ -889,13 +889,13 @@ pnpm openclaw qa character-eval \ --model openai/gpt-5.5,thinking=medium,fast \ --model openai/gpt-5.2,thinking=xhigh \ --model openai/gpt-5,thinking=xhigh \ - --model anthropic/claude-opus-4-7,thinking=high \ + --model anthropic/claude-opus-4-8,thinking=high \ --model anthropic/claude-sonnet-4-6,thinking=high \ --model zai/glm-5.1,thinking=high \ --model moonshot/kimi-k2.5,thinking=high \ --model google/gemini-3.1-pro-preview,thinking=high \ --judge-model openai/gpt-5.5,thinking=xhigh,fast \ - --judge-model anthropic/claude-opus-4-7,thinking=high \ + --judge-model anthropic/claude-opus-4-8,thinking=high \ --blind-judge-models \ --concurrency 16 \ --judge-concurrency 16 @@ -926,13 +926,13 @@ Candidate and judge model runs both default to concurrency 16. Lower `--concurrency` or `--judge-concurrency` when provider limits or local gateway pressure make a run too noisy. When no candidate `--model` is passed, the character eval defaults to -`openai/gpt-5.5`, `openai/gpt-5.2`, `openai/gpt-5`, `anthropic/claude-opus-4-7`, +`openai/gpt-5.5`, `openai/gpt-5.2`, `openai/gpt-5`, `anthropic/claude-opus-4-8`, `anthropic/claude-sonnet-4-6`, `zai/glm-5.1`, `moonshot/kimi-k2.5`, and `google/gemini-3.1-pro-preview` when no `--model` is passed. When no `--judge-model` is passed, the judges default to `openai/gpt-5.5,thinking=xhigh,fast` and -`anthropic/claude-opus-4-7,thinking=high`. +`anthropic/claude-opus-4-8,thinking=high`. ## Related docs diff --git a/docs/gateway/config-agents.md b/docs/gateway/config-agents.md index 5b3a018583e7..a8840506138e 100644 --- a/docs/gateway/config-agents.md +++ b/docs/gateway/config-agents.md @@ -334,7 +334,7 @@ Higher values preserve more visual detail. Image-tool compression/detail preference for images loaded from file paths, URLs, and media references. Default: `auto`. -OpenClaw adapts the resize ladder to the selected image model. For example, Claude Opus 4.7, OpenAI GPT-5.5, Qwen VL, and hosted Llama 4 vision models can use larger images than older/default high-detail vision paths, while multi-image turns are compressed more aggressively in `auto` mode to control token and latency cost. +OpenClaw adapts the resize ladder to the selected image model. For example, Claude Opus 4.8, OpenAI GPT-5.5, Qwen VL, and hosted Llama 4 vision models can use larger images than older/default high-detail vision paths, while multi-image turns are compressed more aggressively in `auto` mode to control token and latency cost. Values: @@ -483,7 +483,7 @@ Time format in system prompt. Default: `auto` (OS preference). defaults: { model: "openai/gpt-5.5", models: { - "anthropic/claude-opus-4-7": { + "anthropic/claude-opus-4-8": { agentRuntime: { id: "claude-cli" }, }, "vllm/*": { @@ -501,7 +501,7 @@ Time format in system prompt. Default: `auto` (OS preference). - Runtime precedence is exact model policy first (`agents.list[].models["provider/model"]`, `agents.defaults.models["provider/model"]`, or `models.providers..models[]`), then `agents.list[]` / `agents.defaults.models["provider/*"]`, then provider-wide policy at `models.providers..agentRuntime`. - Whole-agent runtime keys are legacy. `agents.defaults.agentRuntime`, `agents.list[].agentRuntime`, session runtime pins, and `OPENCLAW_AGENT_RUNTIME` are ignored by runtime selection. Run `openclaw doctor --fix` to remove stale values. - OpenAI agent models use the Codex harness by default; provider/model `agentRuntime.id: "codex"` remains valid when you want to make that explicit. -- For Claude CLI deployments, prefer `model: "anthropic/claude-opus-4-7"` plus model-scoped `agentRuntime.id: "claude-cli"`. Legacy `claude-cli/claude-opus-4-7` model refs still work for compatibility, but new config should keep provider/model selection canonical and put the execution backend in provider/model runtime policy. +- For Claude CLI deployments, prefer `model: "anthropic/claude-opus-4-8"` plus model-scoped `agentRuntime.id: "claude-cli"`. Legacy `claude-cli/claude-opus-4-7` model refs still work for compatibility, but new config should keep provider/model selection canonical and put the execution backend in provider/model runtime policy. - This only controls text agent-turn execution. Media generation, vision, PDF, music, video, and TTS still use their provider/model settings. **Built-in alias shorthands** (only apply when the model is in `agents.defaults.models`): @@ -521,7 +521,7 @@ Your configured aliases always win over defaults. Z.AI GLM-4.x models automatically enable thinking mode unless you set `--thinking off` or define `agents.defaults.models["zai/"].params.thinking` yourself. Z.AI models enable `tool_stream` by default for tool call streaming. Set `agents.defaults.models["zai/"].params.tool_stream` to `false` to disable it. -Anthropic Claude 4.6 models default to `adaptive` thinking when no explicit thinking level is set. +Anthropic Claude Opus 4.8 keeps thinking off by default in OpenClaw; when adaptive thinking is explicitly enabled, Anthropic's provider-owned effort default is `high`. Claude 4.6 models default to `adaptive` when no explicit thinking level is set. ### `agents.defaults.cliBackends` diff --git a/docs/help/faq-models.md b/docs/help/faq-models.md index de2ac81f0080..9cadd51b582f 100644 --- a/docs/help/faq-models.md +++ b/docs/help/faq-models.md @@ -282,7 +282,7 @@ troubleshooting, see the main [FAQ](/help/faq). Yes. OpenClaw ships a few default shorthands (only applied when the model exists in `agents.defaults.models`): - - `opus` → `anthropic/claude-opus-4-7` + - `opus` → `anthropic/claude-opus-4-8` - `sonnet` → `anthropic/claude-sonnet-4-6` - `gpt` → `openai/gpt-5.4` - `gpt-mini` → `openai/gpt-5.4-mini` diff --git a/docs/plugins/sdk-agent-harness.md b/docs/plugins/sdk-agent-harness.md index 655e59004855..924605b5314a 100644 --- a/docs/plugins/sdk-agent-harness.md +++ b/docs/plugins/sdk-agent-harness.md @@ -238,9 +238,9 @@ model entry: { "agents": { "defaults": { - "model": "anthropic/claude-opus-4-7", + "model": "anthropic/claude-opus-4-8", "models": { - "anthropic/claude-opus-4-7": { + "anthropic/claude-opus-4-8": { "agentRuntime": { "id": "claude-cli" } diff --git a/docs/providers/anthropic.md b/docs/providers/anthropic.md index d5698a481e70..4e8832cd109a 100644 --- a/docs/providers/anthropic.md +++ b/docs/providers/anthropic.md @@ -61,7 +61,7 @@ Anthropic's current public docs: ```json5 { env: { ANTHROPIC_API_KEY: "example-anthropic-key-not-real" }, - agents: { defaults: { model: { primary: "anthropic/claude-opus-4-6" } } }, + agents: { defaults: { model: { primary: "anthropic/claude-opus-4-8" } } }, } ``` @@ -113,9 +113,9 @@ Anthropic's current public docs: { agents: { defaults: { - model: { primary: "anthropic/claude-opus-4-7" }, + model: { primary: "anthropic/claude-opus-4-8" }, models: { - "anthropic/claude-opus-4-7": { + "anthropic/claude-opus-4-8": { agentRuntime: { id: "claude-cli" }, }, }, @@ -135,9 +135,9 @@ Anthropic's current public docs: -## Thinking defaults (Claude 4.6) +## Thinking defaults (Claude 4.8 and 4.6) -Claude 4.6 models default to `adaptive` thinking in OpenClaw when no explicit thinking level is set. +Claude Opus 4.8 keeps thinking off by default in OpenClaw. When you explicitly enable adaptive thinking with `/think high|xhigh|max`, OpenClaw sends Anthropic's Opus 4.8 effort values; Claude 4.6 models default to `adaptive`. Override per-message with `/think:` or in model params: @@ -146,8 +146,8 @@ Override per-message with `/think:` or in model params: agents: { defaults: { models: { - "anthropic/claude-opus-4-6": { - params: { thinking: "adaptive" }, + "anthropic/claude-opus-4-8": { + params: { thinking: "high" }, }, }, }, @@ -267,7 +267,7 @@ OpenClaw supports Anthropic's prompt caching feature for API-key auth. | Property | Value | | --------------- | --------------------- | - | Default model | `claude-opus-4-7` | + | Default model | `claude-opus-4-8` | | Supported input | Images, PDF documents | When an image or PDF is attached to a conversation, OpenClaw automatically @@ -277,7 +277,7 @@ OpenClaw supports Anthropic's prompt caching feature for API-key auth. Anthropic's 1M context window is available on GA-capable Claude 4.x models - such as Opus 4.6, Opus 4.7, and Sonnet 4.6. OpenClaw sizes those models at + such as Opus 4.8, Opus 4.7, Opus 4.6, and Sonnet 4.6. OpenClaw sizes those models at 1M automatically: ```json5 @@ -308,8 +308,8 @@ OpenClaw supports Anthropic's prompt caching feature for API-key auth. - - `anthropic/claude-opus-4-7` and its `claude-cli` variant have a 1M context + + `anthropic/claude-opus-4-8` and its `claude-cli` variant have a 1M context window by default — no `params.context1m: true` needed. diff --git a/docs/reference/token-use.md b/docs/reference/token-use.md index 2c4d2ff0bd8c..a0fc5f09d30c 100644 --- a/docs/reference/token-use.md +++ b/docs/reference/token-use.md @@ -205,7 +205,7 @@ override only `cacheRetention` and inherit other model defaults unchanged. ### Anthropic 1M context -OpenClaw sizes GA-capable Claude 4.x models such as Opus 4.6, Opus 4.7, and +OpenClaw sizes GA-capable Claude 4.x models such as Opus 4.8, Opus 4.7, Opus 4.6, and Sonnet 4.6 with Anthropic's 1M context window. You do not need `params.context1m: true` for those models. diff --git a/docs/tools/thinking.md b/docs/tools/thinking.md index 56e78bb1d3c5..87bf8c4f307c 100644 --- a/docs/tools/thinking.md +++ b/docs/tools/thinking.md @@ -13,9 +13,9 @@ title: "Thinking levels" - low → "think hard" - medium → "think harder" - high → "ultrathink" (max budget) - - xhigh → "ultrathink+" (GPT-5.2+ and Codex models, plus Anthropic Claude Opus 4.7 effort) - - adaptive → provider-managed adaptive thinking (supported for Claude 4.6 on Anthropic/Bedrock, Anthropic Claude Opus 4.7, and Google Gemini dynamic thinking) - - max → provider max reasoning (Anthropic Claude Opus 4.7; Ollama maps this to its highest native `think` effort) + - xhigh → "ultrathink+" (GPT-5.2+ and Codex models, plus Anthropic Claude Opus 4.7+ effort) + - adaptive → provider-managed adaptive thinking (supported for Claude 4.6 on Anthropic/Bedrock, Anthropic Claude Opus 4.7+, and Google Gemini dynamic thinking) + - max → provider max reasoning (Anthropic Claude Opus 4.7+; Ollama maps this to its highest native `think` effort) - `x-high`, `x_high`, `extra-high`, `extra high`, and `extra_high` map to `xhigh`. - `highest` maps to `high`. - Provider notes: @@ -23,9 +23,9 @@ title: "Thinking levels" - `adaptive`, `xhigh`, and `max` are only advertised for provider/model profiles that support them. Typed directives for unsupported levels are rejected with that model's valid options. - Existing stored unsupported levels are remapped by provider profile rank. `adaptive` falls back to `medium` on non-adaptive models, while `xhigh` and `max` fall back to the largest supported non-off level for the selected model. - Anthropic Claude 4.6 models default to `adaptive` when no explicit thinking level is set. - - Anthropic Claude Opus 4.7 does not default to adaptive thinking. Its API effort default remains provider-owned unless you explicitly set a thinking level. - - Anthropic Claude Opus 4.7 maps `/think xhigh` to adaptive thinking plus `output_config.effort: "xhigh"`, because `/think` is a thinking directive and `xhigh` is the Opus 4.7 effort setting. - - Anthropic Claude Opus 4.7 also exposes `/think max`; it maps to the same provider-owned max effort path. + - Anthropic Claude Opus 4.8 and Opus 4.7 keep thinking off unless you explicitly set a thinking level. Opus 4.8's provider-owned effort default is `high` after adaptive thinking is enabled. + - Anthropic Claude Opus 4.7+ maps `/think xhigh` to adaptive thinking plus `output_config.effort: "xhigh"`, because `/think` is a thinking directive and `xhigh` is the Opus effort setting. + - Anthropic Claude Opus 4.7+ also exposes `/think max`; it maps to the same provider-owned max effort path. - Direct DeepSeek V4 models expose `/think xhigh|max`; both map to DeepSeek `reasoning_effort: "max"` while lower non-off levels map to `high`. - OpenRouter-routed DeepSeek V4 models expose `/think xhigh` and send OpenRouter-supported `reasoning_effort` values. Stored `max` overrides fall back to `xhigh`. - Ollama thinking-capable models expose `/think low|medium|high|max`; `max` maps to native `think: "high"` because Ollama's native API accepts `low`, `medium`, and `high` effort strings. diff --git a/extensions/amazon-bedrock-mantle/mantle-anthropic.runtime.ts b/extensions/amazon-bedrock-mantle/mantle-anthropic.runtime.ts index 6766fc8a3de8..d26181a57a18 100644 --- a/extensions/amazon-bedrock-mantle/mantle-anthropic.runtime.ts +++ b/extensions/amazon-bedrock-mantle/mantle-anthropic.runtime.ts @@ -64,6 +64,7 @@ function adjustMaxTokensForThinking( medium: 8192, high: 16384, xhigh: 16384, + max: 16384, } as const; const budgets = { ...defaultBudgets, ...customBudgets }; const minOutputTokens = 1024; diff --git a/extensions/amazon-bedrock/discovery.test.ts b/extensions/amazon-bedrock/discovery.test.ts index 8c23c9f07329..4f2933b8415a 100644 --- a/extensions/amazon-bedrock/discovery.test.ts +++ b/extensions/amazon-bedrock/discovery.test.ts @@ -195,6 +195,58 @@ describe("bedrock discovery", () => { }); }); + it("uses 1M context window for dotted Claude Opus 4.8 Bedrock refs", async () => { + sendMock + .mockResolvedValueOnce({ + modelSummaries: [ + { + modelId: "anthropic.claude-opus-4.8-v1:0", + modelName: "Claude Opus 4.8", + providerName: "anthropic", + inputModalities: ["TEXT"], + outputModalities: ["TEXT"], + responseStreamingSupported: true, + modelLifecycle: { status: "ACTIVE" }, + }, + ], + }) + .mockResolvedValueOnce({ + inferenceProfileSummaries: [ + { + inferenceProfileId: "us.anthropic.claude-opus-4.8-v1:0", + inferenceProfileName: "US Claude Opus 4.8", + status: "ACTIVE", + type: "SYSTEM_DEFINED", + models: [ + { + modelArn: + "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-opus-4.8-v1:0", + }, + ], + }, + ], + }); + + const models = await discoverBedrockModels({ region: "us-east-1", clientFactory }); + + expectModelFields( + models.find((model) => model.id === "anthropic.claude-opus-4.8-v1:0"), + { + contextWindow: 1_000_000, + reasoning: true, + thinkingLevelMap: { xhigh: "xhigh", max: "max" }, + }, + ); + expectModelFields( + models.find((model) => model.id === "us.anthropic.claude-opus-4.8-v1:0"), + { + contextWindow: 1_000_000, + reasoning: true, + thinkingLevelMap: { xhigh: "xhigh", max: "max" }, + }, + ); + }); + it("caches results when refreshInterval is enabled", async () => { mockSingleActiveSummary(); diff --git a/extensions/amazon-bedrock/discovery.ts b/extensions/amazon-bedrock/discovery.ts index c669d01b632d..b49c218e01ae 100644 --- a/extensions/amazon-bedrock/discovery.ts +++ b/extensions/amazon-bedrock/discovery.ts @@ -46,6 +46,7 @@ const DEFAULT_MAX_TOKENS = 4096; const KNOWN_CONTEXT_WINDOWS: Record = { // Anthropic Claude "anthropic.claude-3-7-sonnet-20250219-v1:0": 200_000, + "anthropic.claude-opus-4-8": 1_000_000, "anthropic.claude-opus-4-7": 1_000_000, "anthropic.claude-opus-4-6-v1": 1_000_000, "anthropic.claude-opus-4-6-v1:0": 1_000_000, @@ -121,6 +122,9 @@ function resolveKnownContextWindow(modelId: string): number | undefined { const stripped = modelId.replace(/^(?:us|eu|ap|apac|au|jp|global)\./, ""); const candidates = [modelId, stripped]; for (const candidate of candidates) { + if (/(?:^|[/.:])anthropic\.claude-opus-4[.-]8(?:$|[-.:/])/i.test(candidate)) { + return 1_000_000; + } if (KNOWN_CONTEXT_WINDOWS[candidate] !== undefined) { return KNOWN_CONTEXT_WINDOWS[candidate]; } @@ -135,6 +139,22 @@ function resolveKnownContextWindow(modelId: string): number | undefined { return undefined; } +function isKnownClaudeOpus47OrNewerModelId(modelId: string): boolean { + const stripped = modelId.replace(/^(?:us|eu|ap|apac|au|jp|global)\./, ""); + return [modelId, stripped].some((candidate) => + /(?:^|[/.:])anthropic\.claude-opus-4[.-][78](?:$|[-.:/])/i.test(candidate), + ); +} + +function resolveKnownThinkingLevelMap( + modelId: string, +): ModelDefinitionConfig["thinkingLevelMap"] | undefined { + if (!isKnownClaudeOpus47OrNewerModelId(modelId)) { + return undefined; + } + return { xhigh: "xhigh", max: "max" }; +} + const DEFAULT_COST = { input: 0, output: 0, @@ -243,6 +263,9 @@ function mapInputModalities(summary: BedrockModelSummary): Array<"text" | "image } function inferReasoningSupport(summary: BedrockModelSummary): boolean { + if (isKnownClaudeOpus47OrNewerModelId(summary.modelId ?? "")) { + return true; + } const haystack = normalizeLowercaseStringOrEmpty( `${summary.modelId ?? ""} ${summary.modelName ?? ""}`, ); @@ -301,6 +324,7 @@ function toModelDefinition( defaults: { contextWindow: number; maxTokens: number }, ): ModelDefinitionConfig { const id = summary.modelId?.trim() ?? ""; + const thinkingLevelMap = resolveKnownThinkingLevelMap(id); return { id, name: summary.modelName?.trim() || id, @@ -309,6 +333,7 @@ function toModelDefinition( cost: DEFAULT_COST, contextWindow: resolveKnownContextWindow(id) ?? defaults.contextWindow, maxTokens: defaults.maxTokens, + ...(thinkingLevelMap ? { thinkingLevelMap } : {}), }; } @@ -420,11 +445,16 @@ function resolveInferenceProfiles( const baseModel = baseModelId ? foundationModels.get(normalizeLowercaseStringOrEmpty(baseModelId)) : undefined; + const knownThinkingLevelMap = resolveKnownThinkingLevelMap( + baseModelId ?? profile.inferenceProfileId, + ); discovered.push({ id: profile.inferenceProfileId, name: profile.inferenceProfileName?.trim() || profile.inferenceProfileId, - reasoning: baseModel?.reasoning ?? false, + reasoning: + baseModel?.reasoning ?? + isKnownClaudeOpus47OrNewerModelId(baseModelId ?? profile.inferenceProfileId), input: baseModel?.input ?? ["text"], cost: baseModel?.cost ?? DEFAULT_COST, contextWindow: @@ -432,6 +462,9 @@ function resolveInferenceProfiles( resolveKnownContextWindow(baseModelId ?? profile.inferenceProfileId ?? "") ?? defaults.contextWindow, maxTokens: baseModel?.maxTokens ?? defaults.maxTokens, + ...(baseModel?.thinkingLevelMap || knownThinkingLevelMap + ? { thinkingLevelMap: baseModel?.thinkingLevelMap ?? knownThinkingLevelMap } + : {}), }); } return discovered; diff --git a/extensions/amazon-bedrock/index.test.ts b/extensions/amazon-bedrock/index.test.ts index 6d24b2e2aaa2..1df6edc084f6 100644 --- a/extensions/amazon-bedrock/index.test.ts +++ b/extensions/amazon-bedrock/index.test.ts @@ -333,6 +333,27 @@ describe("amazon-bedrock provider plugin", () => { } }); + it("leaves Claude Opus 4.8 Bedrock model refs off by default", async () => { + const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); + + for (const modelId of [ + "us.anthropic.claude-opus-4-8", + "us.anthropic.claude-opus-4.8-v1:0", + "arn:aws:bedrock:us-west-2:123456789012:inference-profile/us.anthropic.claude-opus-4-8", + ]) { + expectThinkingProfile( + provider.resolveThinkingProfile?.({ + provider: "amazon-bedrock", + modelId, + } as never), + { + levelIds: ["off", "minimal", "low", "medium", "high", "xhigh", "adaptive", "max"], + defaultLevel: "off", + }, + ); + } + }); + it("owns Anthropic-style replay policy for Claude Bedrock models", async () => { const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); @@ -432,6 +453,28 @@ describe("amazon-bedrock provider plugin", () => { expect(result).not.toHaveProperty("temperature"); }); + it("omits temperature for Bedrock Opus 4.8 model ids", async () => { + const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); + const wrapped = provider.wrapStreamFn?.({ + provider: "amazon-bedrock", + modelId: "us.anthropic.claude-opus-4-8", + streamFn: spyStreamFn, + } as never); + + const result = wrapped?.( + { + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + id: "us.anthropic.claude-opus-4-8", + } as never, + { messages: [] } as never, + { temperature: 0.2, maxTokens: 10 }, + ) as Record | undefined; + + expectWrappedResultFields(result, { maxTokens: 10 }); + expect(result).not.toHaveProperty("temperature"); + }); + it("omits temperature for dotted Bedrock Opus 4.7 model ids", async () => { const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); const wrapped = provider.wrapStreamFn?.({ @@ -590,6 +633,44 @@ describe("amazon-bedrock provider plugin", () => { expect(payload.additionalModelRequestFields.output_config).toEqual({ effort: "xhigh" }); }); + it("uses adaptive max thinking for Bedrock Opus 4.8", async () => { + const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); + const wrapped = provider.wrapStreamFn?.({ + provider: "amazon-bedrock", + modelId: "us.anthropic.claude-opus-4-8", + streamFn: spyStreamFn, + thinkingLevel: "max", + } as never); + + const result = wrapped?.( + { + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + id: "us.anthropic.claude-opus-4-8", + name: "Claude Opus 4.8", + reasoning: true, + } as never, + { messages: [] } as never, + { reasoning: "max" } as never, + ) as Record | undefined; + + const payload = { + inferenceConfig: { temperature: 0.2 }, + additionalModelRequestFields: { + thinking: { type: "adaptive" }, + output_config: { effort: "xhigh" }, + }, + }; + + await (result?.onPayload as ((p: Record) => unknown) | undefined)?.(payload); + + expect(payload.additionalModelRequestFields).toEqual({ + thinking: { type: "adaptive" }, + output_config: { effort: "max" }, + }); + expect(payload.inferenceConfig).toEqual({}); + }); + it("classifies nested Bedrock deprecated-temperature validation as format failover", async () => { const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); diff --git a/extensions/amazon-bedrock/provider-policy-api.test.ts b/extensions/amazon-bedrock/provider-policy-api.test.ts index 9264073faaf9..a5def199326c 100644 --- a/extensions/amazon-bedrock/provider-policy-api.test.ts +++ b/extensions/amazon-bedrock/provider-policy-api.test.ts @@ -19,6 +19,26 @@ describe("amazon-bedrock provider-policy-api", () => { expect(profile?.defaultLevel).toBe("adaptive"); }); + it("leaves Bedrock Claude Opus 4.8 thinking off by default with max effort available", () => { + const profile = resolveThinkingProfile({ + provider: "amazon-bedrock", + modelId: + "arn:aws:bedrock:us-west-2:123456789012:inference-profile/us.anthropic.claude-opus-4-8", + }); + + expect(profile?.levels.map((level) => level.id)).toEqual([ + "off", + "minimal", + "low", + "medium", + "high", + "xhigh", + "adaptive", + "max", + ]); + expect(profile?.defaultLevel).toBe("off"); + }); + it("exposes max thinking for Bedrock Claude Opus 4.7 refs", () => { expect( resolveThinkingProfile({ diff --git a/extensions/amazon-bedrock/register.sync.runtime.ts b/extensions/amazon-bedrock/register.sync.runtime.ts index e9229d6cd83c..fa59e5f0b5a0 100644 --- a/extensions/amazon-bedrock/register.sync.runtime.ts +++ b/extensions/amazon-bedrock/register.sync.runtime.ts @@ -13,7 +13,10 @@ import { supportsBedrockPromptCaching } from "./bedrock-options.js"; import { mergeImplicitBedrockProvider, resolveBedrockConfigApiKey } from "./discovery-shared.js"; import { bedrockMemoryEmbeddingProviderAdapter } from "./memory-embedding-adapter.js"; import { streamBedrock, streamSimpleBedrock } from "./stream.runtime.js"; -import { isOpus47BedrockModelRef, resolveBedrockClaudeThinkingProfile } from "./thinking-policy.js"; +import { + isOpus47OrNewerBedrockModelRef, + resolveBedrockClaudeThinkingProfile, +} from "./thinking-policy.js"; type GuardrailConfig = { guardrailIdentifier: string; @@ -252,7 +255,7 @@ async function resolveAppProfileTraits( const traits = { cacheEligible: models.length > 0 && modelArns.every((modelArn) => resolvedModelSupportsCaching(modelArn)), - omitTemperature: modelArns.some(isOpus47BedrockModelRef), + omitTemperature: modelArns.some(isOpus47OrNewerBedrockModelRef), }; appProfileTraitsCache.set(modelId, traits); return traits; @@ -261,7 +264,7 @@ async function resolveAppProfileTraits( // return the heuristic fallback but allow retry on the next request. return { cacheEligible: isAnthropicBedrockModel(modelId), - omitTemperature: isOpus47BedrockModelRef(modelId), + omitTemperature: isOpus47OrNewerBedrockModelRef(modelId), }; } } @@ -388,7 +391,7 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { modelId: string, options: TOptions, ): TOptions { - if (!isOpus47BedrockModelRef(modelId) || !("temperature" in options)) { + if (!isOpus47OrNewerBedrockModelRef(modelId) || !("temperature" in options)) { return options; } const next = { ...options } as typeof options & { temperature?: unknown }; @@ -513,7 +516,7 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { currentPluginConfig?.discovery?.region; const mayNeedCacheInjection = isBedrockAppInferenceProfile(modelId) && !sharedRuntimeWouldInjectCachePoints(modelId); - const shouldOmitTemperature = isOpus47BedrockModelRef(modelId); + const shouldOmitTemperature = isOpus47OrNewerBedrockModelRef(modelId); const shouldPatchMaxThinking = shouldOmitTemperature && thinkingLevel === "max"; // For known Anthropic models (heuristic match), enable injection immediately. @@ -548,7 +551,9 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { ? { onPayload: (payload: unknown, payloadModel: unknown) => { if (payload && typeof payload === "object") { - patchOpus47MaxThinkingEffort(payload as Record); + const payloadRecord = payload as Record; + patchOpus47MaxThinkingEffort(payloadRecord); + omitDeprecatedOpus47PayloadTemperature(payloadRecord); } return originalOnPayload?.(payload, payloadModel); }, @@ -584,7 +589,9 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { if (shouldPatchMaxThinking) { patchOpus47MaxThinkingEffort(payloadRecord); } - if (mayNeedTemperatureTrait) { + if (shouldOmitTemperature) { + omitDeprecatedOpus47PayloadTemperature(payloadRecord); + } else if (mayNeedTemperatureTrait) { const traits = await resolveAppProfileTraits(modelId, region); if (traits.omitTemperature) { omitDeprecatedOpus47PayloadTemperature(payloadRecord); diff --git a/extensions/amazon-bedrock/stream.runtime.test.ts b/extensions/amazon-bedrock/stream.runtime.test.ts index abe866d347a4..75e0f2813da8 100644 --- a/extensions/amazon-bedrock/stream.runtime.test.ts +++ b/extensions/amazon-bedrock/stream.runtime.test.ts @@ -89,3 +89,29 @@ describe("Bedrock profile endpoint resolution", () => { ).toBe(false); }); }); + +describe("Bedrock thinking effort mapping", () => { + it("clamps max effort for Claude models without native max support", () => { + expect( + testing.mapThinkingLevelToEffort( + bedrockModel({ + id: "anthropic.claude-sonnet-4-6-v1:0", + name: "Claude Sonnet 4.6", + }), + "max", + ), + ).toBe("high"); + }); + + it("preserves max effort for Claude Opus 4.8", () => { + expect( + testing.mapThinkingLevelToEffort( + bedrockModel({ + id: "anthropic.claude-opus-4.8-v1:0", + name: "Claude Opus 4.8", + }), + "max", + ), + ).toBe("max"); + }); +}); diff --git a/extensions/amazon-bedrock/stream.runtime.ts b/extensions/amazon-bedrock/stream.runtime.ts index 6e01d733f024..5568188c2f03 100644 --- a/extensions/amazon-bedrock/stream.runtime.ts +++ b/extensions/amazon-bedrock/stream.runtime.ts @@ -473,13 +473,17 @@ function getModelMatchCandidates(modelId: string, modelName?: string): string[] function supportsAdaptiveThinking(modelId: string, modelName?: string): boolean { const candidates = getModelMatchCandidates(modelId, modelName); return candidates.some( - (s) => s.includes("opus-4-6") || s.includes("opus-4-7") || s.includes("sonnet-4-6"), + (s) => + s.includes("opus-4-6") || + s.includes("opus-4-7") || + s.includes("opus-4-8") || + s.includes("sonnet-4-6"), ); } function supportsNativeXhighEffort(model: Model<"bedrock-converse-stream">): boolean { const candidates = getModelMatchCandidates(model.id, model.name); - return candidates.some((s) => s.includes("opus-4-7")); + return candidates.some((s) => s.includes("opus-4-7") || s.includes("opus-4-8")); } function mapThinkingLevelToEffort( @@ -503,6 +507,8 @@ function mapThinkingLevelToEffort( return "medium"; case "high": return "high"; + case "max": + return supportsNativeXhighEffort(model) ? "max" : "high"; default: return "high"; } @@ -887,6 +893,7 @@ function buildAdditionalModelRequestFields( medium: 8192, high: 16384, xhigh: 16384, // Claude doesn't support xhigh, clamp to high + max: 16384, }; // Custom budgets override defaults (xhigh not in ThinkingBudgets, use high) @@ -945,5 +952,6 @@ export const testing = { convertMessages, getConfiguredBedrockRegion, hasConfiguredBedrockProfile, + mapThinkingLevelToEffort, shouldUseExplicitBedrockEndpoint, }; diff --git a/extensions/amazon-bedrock/thinking-policy.ts b/extensions/amazon-bedrock/thinking-policy.ts index 90a71f7d494c..e4ec42edf1b6 100644 --- a/extensions/amazon-bedrock/thinking-policy.ts +++ b/extensions/amazon-bedrock/thinking-policy.ts @@ -8,14 +8,30 @@ const BASE_CLAUDE_THINKING_LEVELS = [ { id: "high" }, ] as const satisfies ProviderThinkingProfile["levels"]; +function isOpus48BedrockModelRef(modelRef: string): boolean { + return /(?:^|[/.:])(?:(?:us|eu|ap|apac|au|jp|global)\.)?anthropic\.claude-opus-4[.-]8(?:$|[-.:/])/i.test( + modelRef, + ); +} + export function isOpus47BedrockModelRef(modelRef: string): boolean { return /(?:^|[/.:])(?:(?:us|eu|ap|apac|au|jp|global)\.)?anthropic\.claude-opus-4[.-]7(?:$|[-.:/])/i.test( modelRef, ); } +export function isOpus47OrNewerBedrockModelRef(modelRef: string): boolean { + return isOpus47BedrockModelRef(modelRef) || isOpus48BedrockModelRef(modelRef); +} + export function resolveBedrockClaudeThinkingProfile(modelId: string): ProviderThinkingProfile { const trimmed = modelId.trim(); + if (isOpus48BedrockModelRef(trimmed)) { + return { + levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }], + defaultLevel: "off", + }; + } if (isOpus47BedrockModelRef(trimmed)) { return { levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }], diff --git a/extensions/anthropic-vertex/index.test.ts b/extensions/anthropic-vertex/index.test.ts index cc77c646c41f..b9d088f5fa8c 100644 --- a/extensions/anthropic-vertex/index.test.ts +++ b/extensions/anthropic-vertex/index.test.ts @@ -77,9 +77,14 @@ describe("anthropic-vertex provider plugin", () => { expect(result.provider.baseUrl).toBe("https://europe-west4-aiplatform.googleapis.com"); expect(result.provider.headers).toEqual({ "x-test-header": "1" }); expect(result.provider.models.map((model) => model.id)).toEqual([ + "claude-opus-4-8", "claude-opus-4-6", "claude-sonnet-4-6", ]); + expect(result.provider.models[0]?.thinkingLevelMap).toEqual({ + xhigh: "xhigh", + max: "max", + }); }); it("owns Anthropic-style replay policy", async () => { @@ -103,6 +108,18 @@ describe("anthropic-vertex provider plugin", () => { }); }); + it("owns Anthropic-style thinking policy", async () => { + const provider = await registerSingleProviderPlugin(anthropicVertexPlugin); + + const opus48Profile = provider.resolveThinkingProfile?.({ + provider: "anthropic-vertex", + modelId: "claude-opus-4-8", + } as never); + + expect(opus48Profile?.defaultLevel).toBe("off"); + expect(opus48Profile?.levels.map((level) => level.id)).toContain("max"); + }); + it("resolves synthetic auth when ADC is available", async () => { hasAnthropicVertexAvailableAuthMock.mockReturnValue(true); const provider = await registerSingleProviderPlugin(anthropicVertexPlugin); diff --git a/extensions/anthropic-vertex/index.ts b/extensions/anthropic-vertex/index.ts index 5733e2ab0618..9141f4596181 100644 --- a/extensions/anthropic-vertex/index.ts +++ b/extensions/anthropic-vertex/index.ts @@ -1,6 +1,9 @@ import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry"; import { readConfiguredProviderCatalogEntries } from "openclaw/plugin-sdk/provider-catalog-shared"; -import { NATIVE_ANTHROPIC_REPLAY_HOOKS } from "openclaw/plugin-sdk/provider-model-shared"; +import { + NATIVE_ANTHROPIC_REPLAY_HOOKS, + resolveClaudeThinkingProfile, +} from "openclaw/plugin-sdk/provider-model-shared"; import { hasAnthropicVertexAvailableAuth, mergeImplicitAnthropicVertexProvider, @@ -40,6 +43,7 @@ export default definePluginEntry({ }, resolveConfigApiKey: ({ env }) => resolveAnthropicVertexConfigApiKey(env), ...NATIVE_ANTHROPIC_REPLAY_HOOKS, + resolveThinkingProfile: ({ modelId }) => resolveClaudeThinkingProfile(modelId), resolveSyntheticAuth: () => { if (!hasAnthropicVertexAvailableAuth()) { return undefined; diff --git a/extensions/anthropic-vertex/provider-catalog.ts b/extensions/anthropic-vertex/provider-catalog.ts index d372b4289560..295eddf62a25 100644 --- a/extensions/anthropic-vertex/provider-catalog.ts +++ b/extensions/anthropic-vertex/provider-catalog.ts @@ -15,6 +15,7 @@ function buildAnthropicVertexModel(params: { input: ModelDefinitionConfig["input"]; cost: ModelDefinitionConfig["cost"]; maxTokens: number; + thinkingLevelMap?: ModelDefinitionConfig["thinkingLevelMap"]; }): ModelDefinitionConfig { return { id: params.id, @@ -24,11 +25,21 @@ function buildAnthropicVertexModel(params: { cost: params.cost, contextWindow: ANTHROPIC_VERTEX_DEFAULT_CONTEXT_WINDOW, maxTokens: params.maxTokens, + ...(params.thinkingLevelMap ? { thinkingLevelMap: params.thinkingLevelMap } : {}), }; } function buildAnthropicVertexCatalog(): ModelDefinitionConfig[] { return [ + buildAnthropicVertexModel({ + id: "claude-opus-4-8", + name: "Claude Opus 4.8", + reasoning: true, + input: ["text", "image"], + cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 }, + maxTokens: 128000, + thinkingLevelMap: { xhigh: "xhigh", max: "max" }, + }), buildAnthropicVertexModel({ id: "claude-opus-4-6", name: "Claude Opus 4.6", diff --git a/extensions/anthropic-vertex/provider-policy-api.test.ts b/extensions/anthropic-vertex/provider-policy-api.test.ts new file mode 100644 index 000000000000..c8937dbeb11b --- /dev/null +++ b/extensions/anthropic-vertex/provider-policy-api.test.ts @@ -0,0 +1,29 @@ +import { describe, expect, it } from "vitest"; +import { resolveThinkingProfile } from "./provider-policy-api.js"; + +describe("anthropic-vertex provider-policy-api", () => { + it("leaves Claude Opus 4.8 thinking off by default with max effort support", () => { + const profile = resolveThinkingProfile({ + provider: "anthropic-vertex", + modelId: "claude-opus-4-8", + }); + + expect(profile?.defaultLevel).toBe("off"); + expect(profile?.levels.map((level) => level.id)).toContain("max"); + }); + + it("keeps Claude Opus 4.7 thinking off by default", () => { + const profile = resolveThinkingProfile({ + provider: "anthropic-vertex", + modelId: "claude-opus-4-7", + }); + + expect(profile?.defaultLevel).toBe("off"); + }); + + it("ignores other providers", () => { + expect(resolveThinkingProfile({ provider: "anthropic", modelId: "claude-opus-4-8" })).toBe( + null, + ); + }); +}); diff --git a/extensions/anthropic-vertex/provider-policy-api.ts b/extensions/anthropic-vertex/provider-policy-api.ts new file mode 100644 index 000000000000..44f71012224d --- /dev/null +++ b/extensions/anthropic-vertex/provider-policy-api.ts @@ -0,0 +1,8 @@ +import { resolveClaudeThinkingProfile } from "openclaw/plugin-sdk/provider-model-shared"; + +export function resolveThinkingProfile(params: { provider: string; modelId: string }) { + if (params.provider.trim().toLowerCase() !== "anthropic-vertex") { + return null; + } + return resolveClaudeThinkingProfile(params.modelId); +} diff --git a/extensions/anthropic-vertex/stream-runtime.test.ts b/extensions/anthropic-vertex/stream-runtime.test.ts index 193177ac9af7..bec67b313c78 100644 --- a/extensions/anthropic-vertex/stream-runtime.test.ts +++ b/extensions/anthropic-vertex/stream-runtime.test.ts @@ -170,6 +170,30 @@ describe("createAnthropicVertexStreamFn", () => { expect(streamTransportOptions(streamAnthropicMock).maxTokens).toBe(128000); }); + it.each(["claude-opus-4-8", "claude-opus-4-7"])( + "omits unsupported temperature for %s", + (modelId) => { + const { deps, streamAnthropicMock } = createStreamDeps(); + const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps); + const model = makeModel({ id: modelId, maxTokens: 128000 }); + + void streamFn(model, { messages: [] }, { temperature: 0.7 }); + + const transportOptions = streamTransportOptions(streamAnthropicMock); + expect(Object.hasOwn(transportOptions, "temperature")).toBe(false); + }, + ); + + it("preserves temperature for Vertex models that support custom sampling", () => { + const { deps, streamAnthropicMock } = createStreamDeps(); + const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps); + const model = makeModel({ id: "claude-sonnet-4-6", maxTokens: 128000 }); + + void streamFn(model, { messages: [] }, { temperature: 0.7 }); + + expect(streamTransportOptions(streamAnthropicMock).temperature).toBe(0.7); + }); + it("maps xhigh reasoning to max effort for adaptive Opus models", () => { const { deps, streamAnthropicMock } = createStreamDeps(); const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps); @@ -182,10 +206,10 @@ describe("createAnthropicVertexStreamFn", () => { expect(transportOptions.effort).toBe("max"); }); - it("maps xhigh reasoning to xhigh effort for Opus 4.7", () => { + it("maps xhigh reasoning to xhigh effort for Opus 4.8", () => { const { deps, streamAnthropicMock } = createStreamDeps(); const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps); - const model = makeModel({ id: "claude-opus-4-7", maxTokens: 64000 }); + const model = makeModel({ id: "claude-opus-4-8", maxTokens: 128000 }); void streamFn(model, { messages: [] }, { reasoning: "xhigh" }); @@ -194,6 +218,30 @@ describe("createAnthropicVertexStreamFn", () => { expect(transportOptions.effort).toBe("xhigh"); }); + it("preserves max reasoning for Opus 4.8", () => { + const { deps, streamAnthropicMock } = createStreamDeps(); + const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps); + const model = makeModel({ id: "claude-opus-4-8", maxTokens: 128000 }); + + void streamFn(model, { messages: [] }, { reasoning: "max" }); + + const transportOptions = streamTransportOptions(streamAnthropicMock); + expect(transportOptions.thinkingEnabled).toBe(true); + expect(transportOptions.effort).toBe("max"); + }); + + it("clamps max reasoning for adaptive models without native max support", () => { + const { deps, streamAnthropicMock } = createStreamDeps(); + const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps); + const model = makeModel({ id: "claude-sonnet-4-6", maxTokens: 128000 }); + + void streamFn(model, { messages: [] }, { reasoning: "max" }); + + const transportOptions = streamTransportOptions(streamAnthropicMock); + expect(transportOptions.thinkingEnabled).toBe(true); + expect(transportOptions.effort).toBe("high"); + }); + it("applies Anthropic cache-boundary shaping before forwarding payload hooks", async () => { const { deps, streamAnthropicMock } = createStreamDeps(); const onPayload = vi.fn(async (payload: unknown) => payload); diff --git a/extensions/anthropic-vertex/stream-runtime.ts b/extensions/anthropic-vertex/stream-runtime.ts index 04d6be6b290c..f9b4ae0cc263 100644 --- a/extensions/anthropic-vertex/stream-runtime.ts +++ b/extensions/anthropic-vertex/stream-runtime.ts @@ -36,8 +36,13 @@ const defaultAnthropicVertexStreamDeps: AnthropicVertexStreamDeps = { streamAnthropic: streamDefault, }; -function isClaudeOpus47Model(modelId: string): boolean { - return modelId.includes("opus-4-7") || modelId.includes("opus-4.7"); +function isClaudeOpus47OrNewerModel(modelId: string): boolean { + return ( + modelId.includes("opus-4-8") || + modelId.includes("opus-4.8") || + modelId.includes("opus-4-7") || + modelId.includes("opus-4.7") + ); } function isClaudeOpus46Model(modelId: string): boolean { @@ -46,7 +51,7 @@ function isClaudeOpus46Model(modelId: string): boolean { function supportsAdaptiveThinking(modelId: string): boolean { return ( - isClaudeOpus47Model(modelId) || + isClaudeOpus47OrNewerModel(modelId) || isClaudeOpus46Model(modelId) || modelId.includes("sonnet-4-6") || modelId.includes("sonnet-4.6") @@ -62,7 +67,12 @@ function mapAnthropicAdaptiveEffort( low: "low", medium: "medium", high: "high", - xhigh: isClaudeOpus47Model(modelId) ? "xhigh" : isClaudeOpus46Model(modelId) ? "max" : "high", + xhigh: isClaudeOpus47OrNewerModel(modelId) + ? "xhigh" + : isClaudeOpus46Model(modelId) + ? "max" + : "high", + max: isClaudeOpus47OrNewerModel(modelId) ? "max" : "high", }; return effortMap[reasoning] ?? "high"; } @@ -148,9 +158,10 @@ export function createAnthropicVertexStreamFn( modelMaxTokens: transportModel.maxTokens, requestedMaxTokens: options?.maxTokens, }); + const temperature = isClaudeOpus47OrNewerModel(model.id) ? undefined : options?.temperature; const opts: AnthropicVertexTransportOptions = { client, - temperature: options?.temperature, + ...(temperature !== undefined ? { temperature } : {}), ...(maxTokens !== undefined ? { maxTokens } : {}), signal: options?.signal, cacheRetention: options?.cacheRetention, diff --git a/extensions/anthropic/claude-model-refs.ts b/extensions/anthropic/claude-model-refs.ts index 0b6899549836..ac1f33baa003 100644 --- a/extensions/anthropic/claude-model-refs.ts +++ b/extensions/anthropic/claude-model-refs.ts @@ -2,7 +2,7 @@ import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/string-coer import { CLAUDE_CLI_BACKEND_ID, CLAUDE_CLI_MODEL_ALIASES } from "./cli-constants.js"; const DEFAULT_CLAUDE_MODEL_BY_FAMILY: Record = { - opus: "claude-opus-4-7", + opus: "claude-opus-4-8", sonnet: "claude-sonnet-4-6", haiku: "claude-haiku-4-5", }; @@ -96,18 +96,16 @@ function canonicalizeKnownClaudeCliModelId(modelId: string): string | null { if (defaultModel) { return attachModelAuthProfile(defaultModel, split.profile); } - const family = CLAUDE_CLI_MODEL_ALIASES[normalized]; - if (!family) { - return null; - } - const version = normalized.slice(`${family}-`.length); - if (!version || version === normalized) { - return null; - } - return attachModelAuthProfile(`claude-${family}-${version.replaceAll(".", "-")}`, split.profile); + const aliasedModel = CLAUDE_CLI_MODEL_ALIASES[normalized]; + return aliasedModel?.startsWith("claude-") + ? attachModelAuthProfile(aliasedModel, split.profile) + : null; } function upgradeOldClaudeModelId(normalized: string): string | null { + if (normalized.startsWith("claude-opus-4-8") || normalized.startsWith("claude-opus-4.8")) { + return null; + } if (normalized.startsWith("claude-opus-4-7") || normalized.startsWith("claude-opus-4.7")) { return null; } @@ -124,6 +122,8 @@ function upgradeOldClaudeModelId(normalized: string): string | null { if ( normalized === "claude-opus-4" || hasAnyRetiredVersionPrefix(normalized, [ + "claude-opus-4-7", + "claude-opus-4.7", "claude-opus-4-5", "claude-opus-4.5", "claude-opus-4-1", @@ -133,7 +133,7 @@ function upgradeOldClaudeModelId(normalized: string): string | null { ]) || /^claude-opus-4-20\d{6}/.test(normalized) ) { - return "claude-opus-4-7"; + return "claude-opus-4-8"; } if ( normalized === "claude-sonnet-4" || @@ -150,7 +150,7 @@ function upgradeOldClaudeModelId(normalized: string): string | null { return "claude-sonnet-4-6"; } if (normalized.startsWith("claude-3") && normalized.includes("opus")) { - return "claude-opus-4-7"; + return "claude-opus-4-8"; } if ( normalized.startsWith("claude-3") && @@ -164,7 +164,7 @@ function upgradeOldClaudeModelId(normalized: string): string | null { normalized === "opus-4" || normalized === "opus-3" ) { - return "claude-opus-4-7"; + return "claude-opus-4-8"; } if ( normalized === "sonnet-4.5" || diff --git a/extensions/anthropic/cli-catalog.ts b/extensions/anthropic/cli-catalog.ts index 3438584f9eab..0d72225e639b 100644 --- a/extensions/anthropic/cli-catalog.ts +++ b/extensions/anthropic/cli-catalog.ts @@ -5,13 +5,14 @@ import { CLAUDE_CLI_BACKEND_ID, CLAUDE_CLI_DEFAULT_ALLOWLIST_REFS } from "./cli- const CLAUDE_CLI_DEFAULT_CONTEXT_WINDOW = 200_000; const CLAUDE_CLI_MODEL_LABELS: Record = { + "claude-opus-4-8": "Claude Opus 4.8 (Claude CLI)", "claude-opus-4-7": "Claude Opus 4.7 (Claude CLI)", "claude-opus-4-6": "Claude Opus 4.6 (Claude CLI)", "claude-sonnet-4-6": "Claude Sonnet 4.6 (Claude CLI)", }; function resolveClaudeCliImageMediaInput(id: string): ModelCatalogEntry["mediaInput"] { - const maxSidePx = id === "claude-opus-4-7" ? 2576 : 1568; + const maxSidePx = id === "claude-opus-4-8" || id === "claude-opus-4-7" ? 2576 : 1568; return { image: { maxSidePx, @@ -39,13 +40,15 @@ function extractClaudeCliModelIds(): string[] { } export function buildClaudeCliCatalogEntries(): ModelCatalogEntry[] { - return extractClaudeCliModelIds().map((id) => ({ - id, - name: CLAUDE_CLI_MODEL_LABELS[id] ?? `${id} (Claude CLI)`, - provider: CLAUDE_CLI_BACKEND_ID, - reasoning: true, - input: ["text", "image"], - mediaInput: resolveClaudeCliImageMediaInput(id), - contextWindow: CLAUDE_CLI_DEFAULT_CONTEXT_WINDOW, - })); + return extractClaudeCliModelIds().map((id) => { + return { + id, + name: CLAUDE_CLI_MODEL_LABELS[id] ?? `${id} (Claude CLI)`, + provider: CLAUDE_CLI_BACKEND_ID, + reasoning: true, + input: ["text", "image"], + mediaInput: resolveClaudeCliImageMediaInput(id), + contextWindow: id === "claude-opus-4-8" ? 1_048_576 : CLAUDE_CLI_DEFAULT_CONTEXT_WINDOW, + }; + }); } diff --git a/extensions/anthropic/cli-constants.ts b/extensions/anthropic/cli-constants.ts index ccf81e0c64f8..9d19d1bb5d74 100644 --- a/extensions/anthropic/cli-constants.ts +++ b/extensions/anthropic/cli-constants.ts @@ -1,20 +1,23 @@ export const CLAUDE_CLI_BACKEND_ID = "claude-cli"; -export const CLAUDE_CLI_DEFAULT_MODEL_REF = `${CLAUDE_CLI_BACKEND_ID}/claude-opus-4-7`; +export const CLAUDE_CLI_DEFAULT_MODEL_REF = `${CLAUDE_CLI_BACKEND_ID}/claude-opus-4-8`; export const CLAUDE_CLI_DEFAULT_ALLOWLIST_REFS = [ CLAUDE_CLI_DEFAULT_MODEL_REF, + `${CLAUDE_CLI_BACKEND_ID}/claude-opus-4-7`, `${CLAUDE_CLI_BACKEND_ID}/claude-sonnet-4-6`, `${CLAUDE_CLI_BACKEND_ID}/claude-opus-4-6`, ] as const; export const CLAUDE_CLI_MODEL_ALIASES: Record = { opus: "opus", - "opus-4.7": "opus", - "opus-4.6": "opus", - "claude-opus-4-7": "opus", - "claude-opus-4-6": "opus", + "opus-4.8": "claude-opus-4-8", + "opus-4.7": "claude-opus-4-7", + "opus-4.6": "claude-opus-4-6", + "claude-opus-4-8": "claude-opus-4-8", + "claude-opus-4-7": "claude-opus-4-7", + "claude-opus-4-6": "claude-opus-4-6", sonnet: "sonnet", - "sonnet-4.6": "sonnet", - "claude-sonnet-4-6": "sonnet", + "sonnet-4.6": "claude-sonnet-4-6", + "claude-sonnet-4-6": "claude-sonnet-4-6", haiku: "haiku", }; diff --git a/extensions/anthropic/cli-migration.test.ts b/extensions/anthropic/cli-migration.test.ts index dad8b3691530..da966af27ccd 100644 --- a/extensions/anthropic/cli-migration.test.ts +++ b/extensions/anthropic/cli-migration.test.ts @@ -38,10 +38,10 @@ afterAll(() => { describe("anthropic Claude model refs", () => { it("upgrades retired refs without rewriting future canonical refs", () => { expect(resolveKnownAnthropicModelRef("anthropic/claude-opus-4-5")).toBe( - "anthropic/claude-opus-4-7", + "anthropic/claude-opus-4-8", ); expect(resolveKnownAnthropicModelRef("anthropic/claude-opus-4-5@anthropic:work")).toBe( - "anthropic/claude-opus-4-7@anthropic:work", + "anthropic/claude-opus-4-8@anthropic:work", ); expect(resolveKnownAnthropicModelRef("anthropic/claude-sonnet-4-20250514")).toBe( "anthropic/claude-sonnet-4-6", @@ -182,6 +182,7 @@ describe("anthropic cli migration", () => { alias: "Opus", agentRuntime: { id: "claude-cli" }, }, + "anthropic/claude-opus-4-8": { agentRuntime: { id: "claude-cli" } }, "anthropic/claude-sonnet-4-6": { agentRuntime: { id: "claude-cli" } }, "anthropic/claude-opus-4-6": { alias: "Opus", @@ -267,12 +268,13 @@ describe("anthropic cli migration", () => { }, }); - expect(result.defaultModel).toBe("anthropic/claude-opus-4-7"); + expect(result.defaultModel).toBe("anthropic/claude-opus-4-8"); expect(result.configPatch).toEqual({ agents: { defaults: { models: { "openai/gpt-5.2": {}, + "anthropic/claude-opus-4-8": { agentRuntime: { id: "claude-cli" } }, "anthropic/claude-opus-4-7": { agentRuntime: { id: "claude-cli" } }, "anthropic/claude-sonnet-4-6": { agentRuntime: { id: "claude-cli" } }, "anthropic/claude-opus-4-6": { agentRuntime: { id: "claude-cli" } }, @@ -294,7 +296,7 @@ describe("anthropic cli migration", () => { }, }); - expect(result.defaultModel).toBe("anthropic/claude-opus-4-7"); + expect(result.defaultModel).toBe("anthropic/claude-opus-4-8"); expect(result.configPatch?.agents?.defaults?.model).toBeUndefined(); expect(result.configPatch?.agents?.defaults?.models?.["anthropic/gpt-5.2"]).toBeUndefined(); }); @@ -316,6 +318,7 @@ describe("anthropic cli migration", () => { defaults: { model: { primary: "anthropic/claude-opus-4-7" }, models: { + "anthropic/claude-opus-4-8": { agentRuntime: { id: "claude-cli" } }, "anthropic/claude-opus-4-7": { agentRuntime: { id: "claude-cli" } }, "anthropic/claude-sonnet-4-6": { agentRuntime: { id: "claude-cli" } }, "anthropic/claude-opus-4-6": { agentRuntime: { id: "claude-cli" } }, @@ -499,6 +502,9 @@ describe("anthropic cli migration", () => { alias: "Opus", agentRuntime: { id: "claude-cli" }, }); + expect(defaults?.models?.["anthropic/claude-opus-4-8"]).toEqual({ + agentRuntime: { id: "claude-cli" }, + }); expect(defaults?.models?.["openai/gpt-5.2"]).toEqual({}); }); diff --git a/extensions/anthropic/cli-migration.ts b/extensions/anthropic/cli-migration.ts index f2623728f56b..545afd25131c 100644 --- a/extensions/anthropic/cli-migration.ts +++ b/extensions/anthropic/cli-migration.ts @@ -224,7 +224,7 @@ export function buildAnthropicCliMigrationResult( ...rewrittenModels.runtimeRefs, ...rewrittenModels.migrated, ]); - const defaultModel = rewrittenModel.primary ?? "anthropic/claude-opus-4-7"; + const defaultModel = rewrittenModel.primary ?? "anthropic/claude-opus-4-8"; return { profiles: buildClaudeCliAuthProfiles(credential), diff --git a/extensions/anthropic/cli-shared.test.ts b/extensions/anthropic/cli-shared.test.ts index 85caa63f07f0..f35bf3430843 100644 --- a/extensions/anthropic/cli-shared.test.ts +++ b/extensions/anthropic/cli-shared.test.ts @@ -76,6 +76,20 @@ describe("normalizeClaudeSettingSourcesArgs", () => { }); }); +describe("Claude CLI model aliases", () => { + it("keeps pinned Claude CLI model refs on exact selectors", () => { + const aliases = buildAnthropicCliBackend().config.modelAliases; + + expect(aliases?.["opus"]).toBe("opus"); + expect(aliases?.["opus-4.8"]).toBe("claude-opus-4-8"); + expect(aliases?.["opus-4.7"]).toBe("claude-opus-4-7"); + expect(aliases?.["opus-4.6"]).toBe("claude-opus-4-6"); + expect(aliases?.["claude-opus-4-8"]).toBe("claude-opus-4-8"); + expect(aliases?.["claude-opus-4-7"]).toBe("claude-opus-4-7"); + expect(aliases?.["claude-opus-4-6"]).toBe("claude-opus-4-6"); + }); +}); + describe("resolveClaudeCliExecutionArgs", () => { it("omits effort args when thinking is off", () => { expect( diff --git a/extensions/anthropic/index.test.ts b/extensions/anthropic/index.test.ts index 63061bf0e3f1..78f753640565 100644 --- a/extensions/anthropic/index.test.ts +++ b/extensions/anthropic/index.test.ts @@ -257,6 +257,7 @@ describe("anthropic provider replay hooks", () => { }); const models = requireRecord(next?.agents?.defaults?.models, "models"); for (const modelId of [ + "anthropic/claude-opus-4-8", "anthropic/claude-opus-4-7", "anthropic/claude-sonnet-4-6", "anthropic/claude-opus-4-6", @@ -444,15 +445,15 @@ describe("anthropic provider replay hooks", () => { expect(models["anthropic/claude-opus-5-0"]).toBeUndefined(); }); - it("resolves explicit claude-opus-4-7 refs from the 4.6 template family", async () => { + it("resolves explicit claude-opus-4-8 refs from the 4.7 template family", async () => { const provider = await registerSingleProviderPlugin(anthropicPlugin); const resolved = provider.resolveDynamicModel?.({ provider: "anthropic", - modelId: "claude-opus-4-7", + modelId: "claude-opus-4-8", modelRegistry: createModelRegistry([ { - id: "claude-opus-4-6", - name: "Claude Opus 4.6", + id: "claude-opus-4-7", + name: "Claude Opus 4.7", provider: "anthropic", api: "anthropic-messages", reasoning: true, @@ -466,12 +467,22 @@ describe("anthropic provider replay hooks", () => { expectFields(resolved, { provider: "anthropic", - id: "claude-opus-4-7", + id: "claude-opus-4-8", api: "anthropic-messages", reasoning: true, contextWindow: 1_048_576, contextTokens: 1_048_576, + maxTokens: 128_000, }); + const opus48Profile = provider.resolveThinkingProfile?.({ + provider: "anthropic", + modelId: "claude-opus-4-8", + } as never); + const opus48LevelIds = levelIds(opus48Profile); + expect(opus48LevelIds).toContain("xhigh"); + expect(opus48LevelIds).toContain("adaptive"); + expect(opus48LevelIds).toContain("max"); + expect(requireRecord(opus48Profile, "opus 4.8 thinking profile").defaultLevel).toBe("off"); const opus47Profile = provider.resolveThinkingProfile?.({ provider: "anthropic", modelId: "claude-opus-4-7", @@ -593,6 +604,7 @@ describe("anthropic provider replay hooks", () => { const provider = await registerSingleProviderPlugin(anthropicPlugin); for (const [runtimeProvider, modelId] of [ + ["anthropic", "claude-opus-4-8"], ["anthropic", "claude-opus-4-7"], ["claude-cli", "claude-opus-4.7-20260219"], ["anthropic", "claude-opus-4-6"], @@ -623,6 +635,32 @@ describe("anthropic provider replay hooks", () => { } }); + it("normalizes Claude Opus 4.8 to 128k max output tokens", async () => { + const provider = await registerSingleProviderPlugin(anthropicPlugin); + + const normalized = provider.normalizeResolvedModel?.({ + provider: "anthropic", + modelId: "claude-opus-4-8", + model: { + id: "claude-opus-4-8", + name: "Claude Opus 4.8", + provider: "anthropic", + api: "anthropic-messages", + reasoning: true, + input: ["text", "image"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200_000, + maxTokens: 64_000, + }, + } as never); + + expectFields(normalized, { + contextWindow: 1_048_576, + contextTokens: 1_048_576, + maxTokens: 128_000, + }); + }); + it("does not normalize legacy Claude 4.5 models to 1M context", async () => { const provider = await registerSingleProviderPlugin(anthropicPlugin); diff --git a/extensions/anthropic/media-understanding-provider.ts b/extensions/anthropic/media-understanding-provider.ts index 0eec5c14ecc5..758e382eea7b 100644 --- a/extensions/anthropic/media-understanding-provider.ts +++ b/extensions/anthropic/media-understanding-provider.ts @@ -7,7 +7,7 @@ import { export const anthropicMediaUnderstandingProvider: MediaUnderstandingProvider = { id: "anthropic", capabilities: ["image"], - defaultModels: { image: "claude-opus-4-7" }, + defaultModels: { image: "claude-opus-4-8" }, autoPriority: { image: 20 }, nativeDocumentInputs: ["pdf"], describeImage: describeImageWithModel, diff --git a/extensions/anthropic/openclaw.plugin.json b/extensions/anthropic/openclaw.plugin.json index 776ccd68f0ff..ea7f182e1af4 100644 --- a/extensions/anthropic/openclaw.plugin.json +++ b/extensions/anthropic/openclaw.plugin.json @@ -8,9 +8,20 @@ "providerCatalogEntry": "./provider-discovery.ts", "modelCatalog": { "runtimeAugment": true, - "providers": { + "providers": { "claude-cli": { "models": [ + { + "id": "claude-opus-4-8", + "name": "Claude Opus 4.8 (Claude CLI)", + "reasoning": true, + "input": ["text", "image"], + "mediaInput": { + "image": { "maxSidePx": 2576, "preferredSidePx": 2576, "tokenMode": "provider" } + }, + "contextWindow": 1048576, + "maxTokens": 128000 + }, { "id": "claude-opus-4-7", "name": "Claude Opus 4.7 (Claude CLI)", @@ -50,6 +61,17 @@ "baseUrl": "https://api.anthropic.com", "api": "anthropic-messages", "models": [ + { + "id": "claude-opus-4-8", + "name": "Claude Opus 4.8", + "reasoning": true, + "input": ["text", "image"], + "mediaInput": { + "image": { "maxSidePx": 2576, "preferredSidePx": 2576, "tokenMode": "provider" } + }, + "contextWindow": 1048576, + "maxTokens": 128000 + }, { "id": "claude-opus-4-7", "name": "Claude Opus 4.7", @@ -98,6 +120,8 @@ "providers": { "anthropic": { "aliases": { + "opus-4.8": "claude-opus-4-8", + "opus": "claude-opus-4-8", "opus-4.6": "claude-opus-4-6", "sonnet-4.6": "claude-sonnet-4-6" } @@ -184,7 +208,7 @@ "anthropic": { "capabilities": ["image"], "defaultModels": { - "image": "claude-opus-4-7" + "image": "claude-opus-4-8" }, "autoPriority": { "image": 20 diff --git a/extensions/anthropic/provider-policy-api.test.ts b/extensions/anthropic/provider-policy-api.test.ts index 1ae4903cdcad..be5dc1990a8d 100644 --- a/extensions/anthropic/provider-policy-api.test.ts +++ b/extensions/anthropic/provider-policy-api.test.ts @@ -123,10 +123,10 @@ describe("anthropic provider policy public artifact", () => { ).toBe("short"); }); - it("exposes Claude Opus 4.7 thinking levels without loading the full provider plugin", () => { + it("exposes Claude Opus 4.8 thinking levels without loading the full provider plugin", () => { const profile = resolveThinkingProfile({ provider: "anthropic", - modelId: "claude-opus-4-7", + modelId: "claude-opus-4-8", }); const ids = levelIds(profile?.levels); expect(ids).toContain("xhigh"); diff --git a/extensions/anthropic/register.runtime.ts b/extensions/anthropic/register.runtime.ts index 536bfa95bf03..279675e3bd37 100644 --- a/extensions/anthropic/register.runtime.ts +++ b/extensions/anthropic/register.runtime.ts @@ -45,10 +45,13 @@ import { wrapAnthropicProviderStream } from "./stream-wrappers.js"; const PROVIDER_ID = "anthropic"; type UpsertAuthProfileParams = Parameters[0]; -const DEFAULT_ANTHROPIC_MODEL = "anthropic/claude-opus-4-7"; +const DEFAULT_ANTHROPIC_MODEL = "anthropic/claude-opus-4-8"; +const ANTHROPIC_OPUS_48_MODEL_ID = "claude-opus-4-8"; +const ANTHROPIC_OPUS_48_DOT_MODEL_ID = "claude-opus-4.8"; const ANTHROPIC_OPUS_47_MODEL_ID = "claude-opus-4-7"; const ANTHROPIC_OPUS_47_DOT_MODEL_ID = "claude-opus-4.7"; const ANTHROPIC_GA_1M_CONTEXT_TOKENS = 1_048_576; +const ANTHROPIC_OPUS_48_MAX_OUTPUT_TOKENS = 128_000; const ANTHROPIC_OPUS_46_MODEL_ID = "claude-opus-4-6"; const ANTHROPIC_OPUS_46_DOT_MODEL_ID = "claude-opus-4.6"; const ANTHROPIC_OPUS_47_TEMPLATE_MODEL_IDS = [ @@ -58,6 +61,8 @@ const ANTHROPIC_OPUS_47_TEMPLATE_MODEL_IDS = [ const ANTHROPIC_SONNET_46_MODEL_ID = "claude-sonnet-4-6"; const ANTHROPIC_SONNET_46_DOT_MODEL_ID = "claude-sonnet-4.6"; const ANTHROPIC_GA_1M_MODEL_PREFIXES = [ + ANTHROPIC_OPUS_48_MODEL_ID, + ANTHROPIC_OPUS_48_DOT_MODEL_ID, ANTHROPIC_OPUS_46_MODEL_ID, ANTHROPIC_OPUS_46_DOT_MODEL_ID, ANTHROPIC_OPUS_47_MODEL_ID, @@ -66,6 +71,8 @@ const ANTHROPIC_GA_1M_MODEL_PREFIXES = [ ANTHROPIC_SONNET_46_DOT_MODEL_ID, ] as const; const ANTHROPIC_MODERN_MODEL_PREFIXES = [ + "claude-opus-4-8", + "claude-opus-4.8", "claude-opus-4-7", "claude-opus-4.7", "claude-opus-4-6", @@ -282,8 +289,10 @@ function buildAnthropicForwardCompatModel( reasoning: true, input: ["text", "image"], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, - contextWindow: 200_000, - maxTokens: 64_000, + contextWindow: isAnthropicGa1MModel(trimmedModelId) ? ANTHROPIC_GA_1M_CONTEXT_TOKENS : 200_000, + maxTokens: isAnthropicOpus48Model(trimmedModelId) + ? ANTHROPIC_OPUS_48_MAX_OUTPUT_TOKENS + : 64_000, }; } @@ -291,6 +300,14 @@ function resolveAnthropicForwardCompatModel( ctx: ProviderResolveDynamicModelContext, ): ProviderRuntimeModel | undefined { return ( + resolveAnthropic46ForwardCompatModel({ + ctx, + dashModelId: ANTHROPIC_OPUS_48_MODEL_ID, + dotModelId: ANTHROPIC_OPUS_48_DOT_MODEL_ID, + dashTemplateId: ANTHROPIC_OPUS_47_MODEL_ID, + dotTemplateId: ANTHROPIC_OPUS_47_DOT_MODEL_ID, + fallbackTemplateIds: ANTHROPIC_OPUS_47_TEMPLATE_MODEL_IDS, + }) ?? resolveAnthropic46ForwardCompatModel({ ctx, dashModelId: ANTHROPIC_OPUS_47_MODEL_ID, @@ -324,6 +341,23 @@ function isAnthropicGa1MModel(modelId: string): boolean { return ANTHROPIC_GA_1M_MODEL_PREFIXES.some((prefix) => normalized.startsWith(prefix)); } +function isAnthropicOpus48Model(modelId: string): boolean { + const normalized = normalizeLowercaseStringOrEmpty(modelId); + return [ANTHROPIC_OPUS_48_MODEL_ID, ANTHROPIC_OPUS_48_DOT_MODEL_ID].some((prefix) => + normalized.startsWith(prefix), + ); +} + +function isAnthropicOpus47OrNewerModel(modelId: string): boolean { + const normalized = normalizeLowercaseStringOrEmpty(modelId); + return [ + ANTHROPIC_OPUS_48_MODEL_ID, + ANTHROPIC_OPUS_48_DOT_MODEL_ID, + ANTHROPIC_OPUS_47_MODEL_ID, + ANTHROPIC_OPUS_47_DOT_MODEL_ID, + ].some((prefix) => normalized.startsWith(prefix)); +} + function hasConfiguredModelContextOverride( config: ProviderNormalizeResolvedModelContext["config"], provider: string, @@ -393,6 +427,45 @@ function applyAnthropicGa1MContextWindow(params: { }; } +function applyAnthropicOpus48MaxTokens(params: { + modelId: string; + model: ProviderRuntimeModel; +}): ProviderRuntimeModel | undefined { + if (!isAnthropicOpus48Model(params.modelId)) { + return undefined; + } + if ((params.model.maxTokens ?? 0) >= ANTHROPIC_OPUS_48_MAX_OUTPUT_TOKENS) { + return undefined; + } + return { + ...params.model, + maxTokens: ANTHROPIC_OPUS_48_MAX_OUTPUT_TOKENS, + }; +} + +function applyAnthropicOpusThinkingLevelMap(params: { + modelId: string; + model: ProviderRuntimeModel; +}): ProviderRuntimeModel | undefined { + if (!isAnthropicOpus47OrNewerModel(params.modelId)) { + return undefined; + } + if ( + params.model.thinkingLevelMap?.xhigh === "xhigh" && + params.model.thinkingLevelMap?.max === "max" + ) { + return undefined; + } + return { + ...params.model, + thinkingLevelMap: { + ...params.model.thinkingLevelMap, + xhigh: "xhigh", + max: "max", + }, + }; +} + function matchesAnthropicModernModel(modelId: string): boolean { const lower = normalizeLowercaseStringOrEmpty(modelId); return ANTHROPIC_MODERN_MODEL_PREFIXES.some((prefix) => lower.startsWith(prefix)); @@ -413,15 +486,18 @@ function resolveAnthropicImageMediaInput(modelId: string, modelName?: string) { return undefined; } const refs = [modelId, modelName].filter((value): value is string => typeof value === "string"); - const opus47 = refs.some((ref) => - [ANTHROPIC_OPUS_47_MODEL_ID, ANTHROPIC_OPUS_47_DOT_MODEL_ID].some((prefix) => - normalizeLowercaseStringOrEmpty(ref).startsWith(prefix), - ), + const largeImageOpus = refs.some((ref) => + [ + ANTHROPIC_OPUS_48_MODEL_ID, + ANTHROPIC_OPUS_48_DOT_MODEL_ID, + ANTHROPIC_OPUS_47_MODEL_ID, + ANTHROPIC_OPUS_47_DOT_MODEL_ID, + ].some((prefix) => normalizeLowercaseStringOrEmpty(ref).startsWith(prefix)), ); return { image: { - maxSidePx: opus47 ? 2576 : 1568, - preferredSidePx: opus47 ? 2576 : 1568, + maxSidePx: largeImageOpus ? 2576 : 1568, + preferredSidePx: largeImageOpus ? 2576 : 1568, tokenMode: "provider" as const, }, }; @@ -461,13 +537,23 @@ function normalizeAnthropicResolvedModel( }, } : imageCapableModel; + const outputModel = + applyAnthropicOpus48MaxTokens({ + modelId: ctx.modelId, + model: mediaInputModel, + }) ?? mediaInputModel; + const thinkingLevelModel = + applyAnthropicOpusThinkingLevelMap({ + modelId: ctx.modelId, + model: outputModel, + }) ?? outputModel; const contextWindowModel = applyAnthropicGa1MContextWindow({ config: ctx.config, provider: ctx.provider, modelId: ctx.modelId, - model: mediaInputModel, - }) ?? mediaInputModel; + model: thinkingLevelModel, + }) ?? thinkingLevelModel; return contextWindowModel === ctx.model ? undefined : contextWindowModel; } @@ -682,13 +768,23 @@ export function buildAnthropicProvider(): ProviderPlugin { modelId: ctx.modelId, model, }) ?? model; + const outputModel = + applyAnthropicOpus48MaxTokens({ + modelId: ctx.modelId, + model: imageCapableModel, + }) ?? imageCapableModel; + const thinkingLevelModel = + applyAnthropicOpusThinkingLevelMap({ + modelId: ctx.modelId, + model: outputModel, + }) ?? outputModel; return ( applyAnthropicGa1MContextWindow({ config: ctx.config, provider: ctx.provider, modelId: ctx.modelId, - model: imageCapableModel, - }) ?? imageCapableModel + model: thinkingLevelModel, + }) ?? thinkingLevelModel ); }, normalizeResolvedModel: (ctx) => normalizeAnthropicResolvedModel(ctx), diff --git a/extensions/anthropic/stream-wrappers.ts b/extensions/anthropic/stream-wrappers.ts index 154b693d3014..1ab3acd465ab 100644 --- a/extensions/anthropic/stream-wrappers.ts +++ b/extensions/anthropic/stream-wrappers.ts @@ -20,6 +20,8 @@ const log = createSubsystemLogger("anthropic-stream"); const ANTHROPIC_CONTEXT_1M_BETA_LEGACY = "context-1m-2025-08-07"; const ANTHROPIC_GA_1M_MODEL_PREFIXES = [ + "claude-opus-4-8", + "claude-opus-4.8", "claude-opus-4-6", "claude-opus-4.6", "claude-opus-4-7", diff --git a/extensions/google/transport-stream.ts b/extensions/google/transport-stream.ts index 4b1305aac9a4..fa6a7c852844 100644 --- a/extensions/google/transport-stream.ts +++ b/extensions/google/transport-stream.ts @@ -417,7 +417,7 @@ function getGoogleThinkingBudget( effort: ThinkingLevel, customBudgets?: GoogleTransportOptions["thinkingBudgets"], ): number | undefined { - const normalizedEffort = effort === "xhigh" ? "high" : effort; + const normalizedEffort = effort === "xhigh" || effort === "max" ? "high" : effort; if (customBudgets?.[normalizedEffort] !== undefined) { return customBudgets[normalizedEffort]; } diff --git a/extensions/qa-lab/src/agentic-parity-report.test.ts b/extensions/qa-lab/src/agentic-parity-report.test.ts index 1587fc5b89e0..39b87b3c33ef 100644 --- a/extensions/qa-lab/src/agentic-parity-report.test.ts +++ b/extensions/qa-lab/src/agentic-parity-report.test.ts @@ -153,7 +153,7 @@ describe("qa agentic parity report", () => { it("fails the parity gate when the candidate regresses against baseline", () => { const comparison = buildQaAgenticParityComparison({ candidateLabel: "openai/gpt-5.5", - baselineLabel: "anthropic/claude-opus-4-7", + baselineLabel: "anthropic/claude-opus-4-8", candidateSummary: { scenarios: [ { name: "Approval turn tool followthrough", status: "pass" }, @@ -181,10 +181,10 @@ describe("qa agentic parity report", () => { expect(comparison.pass).toBe(false); expect(comparison.failures).toContain( - "openai/gpt-5.5 completion rate 80.0% is below anthropic/claude-opus-4-7 100.0%.", + "openai/gpt-5.5 completion rate 80.0% is below anthropic/claude-opus-4-8 100.0%.", ); expect(comparison.failures).toContain( - "openai/gpt-5.5 unintended-stop rate 20.0% exceeds anthropic/claude-opus-4-7 0.0%.", + "openai/gpt-5.5 unintended-stop rate 20.0% exceeds anthropic/claude-opus-4-8 0.0%.", ); }); @@ -199,7 +199,7 @@ describe("qa agentic parity report", () => { ]; const comparison = buildQaAgenticParityComparison({ candidateLabel: "openai/gpt-5.5", - baselineLabel: "anthropic/claude-opus-4-7", + baselineLabel: "anthropic/claude-opus-4-8", candidateSummary: { scenarios: baselineScenarios.filter( (scenario) => scenario.name !== "Extra non-parity lane", @@ -211,14 +211,14 @@ describe("qa agentic parity report", () => { expect(comparison.pass).toBe(false); expect(comparison.failures).toContain( - "Scenario coverage mismatch for Extra non-parity lane: openai/gpt-5.5=missing, anthropic/claude-opus-4-7=pass.", + "Scenario coverage mismatch for Extra non-parity lane: openai/gpt-5.5=missing, anthropic/claude-opus-4-8=pass.", ); }); it("reports each missing required parity scenario exactly once (no double-counting)", () => { const comparison = buildQaAgenticParityComparison({ candidateLabel: "openai/gpt-5.5", - baselineLabel: "anthropic/claude-opus-4-7", + baselineLabel: "anthropic/claude-opus-4-8", candidateSummary: { scenarios: [{ name: "Approval turn tool followthrough", status: "pass" }], }, @@ -260,7 +260,7 @@ describe("qa agentic parity report", () => { const comparison = buildQaAgenticParityComparison({ candidateLabel: "openai/gpt-5.5", - baselineLabel: "anthropic/claude-opus-4-7", + baselineLabel: "anthropic/claude-opus-4-8", candidateSummary: summaryWithExtras, baselineSummary: scopedSummary, comparedAt: "2026-04-11T00:00:00.000Z", @@ -282,7 +282,7 @@ describe("qa agentic parity report", () => { it("fails the parity gate when required parity scenarios are missing on both sides", () => { const comparison = buildQaAgenticParityComparison({ candidateLabel: "openai/gpt-5.5", - baselineLabel: "anthropic/claude-opus-4-7", + baselineLabel: "anthropic/claude-opus-4-8", candidateSummary: { scenarios: [{ name: "Approval turn tool followthrough", status: "pass" }], }, @@ -294,14 +294,14 @@ describe("qa agentic parity report", () => { expect(comparison.pass).toBe(false); expect(comparison.failures).toContain( - "Missing required parity scenario coverage for Image understanding from attachment: openai/gpt-5.5=missing, anthropic/claude-opus-4-7=missing.", + "Missing required parity scenario coverage for Image understanding from attachment: openai/gpt-5.5=missing, anthropic/claude-opus-4-8=missing.", ); }); it("fails the parity gate when required parity scenarios are skipped", () => { const comparison = buildQaAgenticParityComparison({ candidateLabel: "openai/gpt-5.5", - baselineLabel: "anthropic/claude-opus-4-7", + baselineLabel: "anthropic/claude-opus-4-8", candidateSummary: { scenarios: [ { name: "Approval turn tool followthrough", status: "pass" }, @@ -325,7 +325,7 @@ describe("qa agentic parity report", () => { expect(comparison.pass).toBe(false); expect(comparison.failures).toContain( - "Missing required parity scenario coverage for Compaction retry after mutating tool: openai/gpt-5.5=skip, anthropic/claude-opus-4-7=skip.", + "Missing required parity scenario coverage for Compaction retry after mutating tool: openai/gpt-5.5=skip, anthropic/claude-opus-4-8=skip.", ); }); @@ -342,7 +342,7 @@ describe("qa agentic parity report", () => { }); const comparison = buildQaAgenticParityComparison({ candidateLabel: "openai/gpt-5.5", - baselineLabel: "anthropic/claude-opus-4-7", + baselineLabel: "anthropic/claude-opus-4-8", candidateSummary: { scenarios: scenariosWithBothFail }, baselineSummary: { scenarios: scenariosWithBothFail }, comparedAt: "2026-04-11T00:00:00.000Z", @@ -350,7 +350,7 @@ describe("qa agentic parity report", () => { expect(comparison.pass).toBe(false); expect(comparison.failures).toContain( - "Required parity scenario Approval turn tool followthrough failed: openai/gpt-5.5=fail, anthropic/claude-opus-4-7=fail.", + "Required parity scenario Approval turn tool followthrough failed: openai/gpt-5.5=fail, anthropic/claude-opus-4-8=fail.", ); // Metric comparisons are relative, so a same-on-both-sides failure // must not appear as a relative metric failure. The required-scenario @@ -370,7 +370,7 @@ describe("qa agentic parity report", () => { }); const comparison = buildQaAgenticParityComparison({ candidateLabel: "openai/gpt-5.5", - baselineLabel: "anthropic/claude-opus-4-7", + baselineLabel: "anthropic/claude-opus-4-8", candidateSummary: { scenarios: candidateWithOneFail }, baselineSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS }, comparedAt: "2026-04-11T00:00:00.000Z", @@ -378,7 +378,7 @@ describe("qa agentic parity report", () => { expect(comparison.pass).toBe(false); expect(comparison.failures).toContain( - "Required parity scenario Approval turn tool followthrough failed: openai/gpt-5.5=fail, anthropic/claude-opus-4-7=pass.", + "Required parity scenario Approval turn tool followthrough failed: openai/gpt-5.5=fail, anthropic/claude-opus-4-8=pass.", ); }); @@ -387,7 +387,7 @@ describe("qa agentic parity report", () => { // below is the isolated gate failure under test (no coverage-gap noise). const comparison = buildQaAgenticParityComparison({ candidateLabel: "openai/gpt-5.5", - baselineLabel: "anthropic/claude-opus-4-7", + baselineLabel: "anthropic/claude-opus-4-8", candidateSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS, }, @@ -401,7 +401,7 @@ describe("qa agentic parity report", () => { expect(comparison.pass).toBe(false); expect(comparison.failures).toEqual([ - "anthropic/claude-opus-4-7 produced 1 suspicious pass result(s); baseline fake-success count must also be 0.", + "anthropic/claude-opus-4-8 produced 1 suspicious pass result(s); baseline fake-success count must also be 0.", ]); }); @@ -571,14 +571,14 @@ status=done`, expect(() => buildQaAgenticParityComparison({ candidateLabel: "openai/gpt-5.5", - baselineLabel: "anthropic/claude-opus-4-7", + baselineLabel: "anthropic/claude-opus-4-8", candidateSummary: { scenarios: parityPassScenarios, - run: { primaryProvider: "anthropic", primaryModel: "claude-opus-4-7" }, + run: { primaryProvider: "anthropic", primaryModel: "claude-opus-4-8" }, }, baselineSummary: { scenarios: parityPassScenarios, - run: { primaryProvider: "anthropic", primaryModel: "claude-opus-4-7" }, + run: { primaryProvider: "anthropic", primaryModel: "claude-opus-4-8" }, }, comparedAt: "2026-04-11T00:00:00.000Z", }), @@ -593,7 +593,7 @@ status=done`, expect(() => buildQaAgenticParityComparison({ candidateLabel: "openai/gpt-5.5", - baselineLabel: "anthropic/claude-opus-4-7", + baselineLabel: "anthropic/claude-opus-4-8", candidateSummary: { scenarios: parityPassScenarios, run: { primaryProvider: "openai" }, @@ -612,7 +612,7 @@ status=done`, it("accepts matching run.primaryProvider labels without throwing", () => { const comparison = buildQaAgenticParityComparison({ candidateLabel: "openai/gpt-5.5", - baselineLabel: "anthropic/claude-opus-4-7", + baselineLabel: "anthropic/claude-opus-4-8", candidateSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS, run: { @@ -625,8 +625,8 @@ status=done`, scenarios: FULL_PARITY_PASS_SCENARIOS, run: { primaryProvider: "anthropic", - primaryModel: "anthropic/claude-opus-4-7", - primaryModelName: "claude-opus-4-7", + primaryModel: "anthropic/claude-opus-4-8", + primaryModelName: "claude-opus-4-8", }, }, comparedAt: "2026-04-11T00:00:00.000Z", @@ -639,7 +639,7 @@ status=done`, // work against those, trusting the caller-supplied label. const comparison = buildQaAgenticParityComparison({ candidateLabel: "openai/gpt-5.5", - baselineLabel: "anthropic/claude-opus-4-7", + baselineLabel: "anthropic/claude-opus-4-8", candidateSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS }, baselineSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS }, comparedAt: "2026-04-11T00:00:00.000Z", @@ -650,7 +650,7 @@ status=done`, it("skips provider verification for arbitrary display labels when run metadata is present", () => { const comparison = buildQaAgenticParityComparison({ candidateLabel: "GPT-5.5 candidate", - baselineLabel: "Opus 4.7 baseline", + baselineLabel: "Opus 4.8 baseline", candidateSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS, run: { @@ -663,8 +663,8 @@ status=done`, scenarios: FULL_PARITY_PASS_SCENARIOS, run: { primaryProvider: "anthropic", - primaryModel: "anthropic/claude-opus-4-7", - primaryModelName: "claude-opus-4-7", + primaryModel: "anthropic/claude-opus-4-8", + primaryModelName: "claude-opus-4-8", }, }, comparedAt: "2026-04-11T00:00:00.000Z", @@ -676,7 +676,7 @@ status=done`, it("skips provider verification for mixed-case or decorated display labels", () => { const comparison = buildQaAgenticParityComparison({ candidateLabel: "Candidate: GPT-5.5", - baselineLabel: "Opus 4.7 / baseline", + baselineLabel: "Opus 4.8 / baseline", candidateSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS, run: { @@ -689,8 +689,8 @@ status=done`, scenarios: FULL_PARITY_PASS_SCENARIOS, run: { primaryProvider: "anthropic", - primaryModel: "anthropic/claude-opus-4-7", - primaryModelName: "claude-opus-4-7", + primaryModel: "anthropic/claude-opus-4-8", + primaryModelName: "claude-opus-4-8", }, }, comparedAt: "2026-04-11T00:00:00.000Z", @@ -703,7 +703,7 @@ status=done`, expect(() => buildQaAgenticParityComparison({ candidateLabel: "openai/gpt-5.5", - baselineLabel: "anthropic/claude-opus-4-7", + baselineLabel: "anthropic/claude-opus-4-8", candidateSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS, run: { @@ -716,8 +716,8 @@ status=done`, scenarios: FULL_PARITY_PASS_SCENARIOS, run: { primaryProvider: "anthropic", - primaryModel: "anthropic/claude-opus-4-7", - primaryModelName: "claude-opus-4-7", + primaryModel: "anthropic/claude-opus-4-8", + primaryModelName: "claude-opus-4-8", }, }, comparedAt: "2026-04-11T00:00:00.000Z", @@ -730,7 +730,7 @@ status=done`, it("accepts colon-delimited structured labels when provider and model both match", () => { const comparison = buildQaAgenticParityComparison({ candidateLabel: "openai:gpt-5.5", - baselineLabel: "anthropic:claude-opus-4-7", + baselineLabel: "anthropic:claude-opus-4-8", candidateSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS, run: { @@ -743,8 +743,8 @@ status=done`, scenarios: FULL_PARITY_PASS_SCENARIOS, run: { primaryProvider: "anthropic", - primaryModel: "anthropic/claude-opus-4-7", - primaryModelName: "claude-opus-4-7", + primaryModel: "anthropic/claude-opus-4-8", + primaryModelName: "claude-opus-4-8", }, }, comparedAt: "2026-04-11T00:00:00.000Z", @@ -759,7 +759,7 @@ status=done`, // added by the second-wave expansion. const comparison = buildQaAgenticParityComparison({ candidateLabel: "openai/gpt-5.5", - baselineLabel: "anthropic/claude-opus-4-7", + baselineLabel: "anthropic/claude-opus-4-8", candidateSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS }, baselineSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS }, comparedAt: "2026-04-11T00:00:00.000Z", @@ -768,7 +768,7 @@ status=done`, const report = renderQaAgenticParityMarkdownReport(comparison); expect(report).toContain( - "# OpenClaw Agentic Parity Report — openai/gpt-5.5 vs anthropic/claude-opus-4-7", + "# OpenClaw Agentic Parity Report — openai/gpt-5.5 vs anthropic/claude-opus-4-8", ); expect(report).toContain("| Completion rate | 100.0% | 100.0% |"); expect(report).toContain("### Approval turn tool followthrough"); @@ -779,7 +779,7 @@ status=done`, // Regression for the loop-7 Copilot finding: callers that configure // non-gpt-5.5 / non-opus labels (for example an internal candidate vs // another candidate) must see the labels in the rendered H1 instead of - // the hardcoded "GPT-5.5 / Opus 4.7" title that would otherwise confuse + // the hardcoded "GPT-5.5 / Opus 4.8" title that would otherwise confuse // readers of saved reports. const comparison = buildQaAgenticParityComparison({ candidateLabel: "openai/gpt-5.5-alt", diff --git a/extensions/qa-lab/src/agentic-parity-report.ts b/extensions/qa-lab/src/agentic-parity-report.ts index f220ded0aa97..45909fb39064 100644 --- a/extensions/qa-lab/src/agentic-parity-report.ts +++ b/extensions/qa-lab/src/agentic-parity-report.ts @@ -566,7 +566,7 @@ export function renderQaAgenticParityMarkdownReport(comparison: QaAgenticParityC // Title is parametrized from the candidate / baseline labels so reports // for any candidate/baseline pair (not only gpt-5.5 vs opus 4.6) render // with an accurate header. The default CLI labels are still - // openai/gpt-5.5 vs anthropic/claude-opus-4-7, but the helper works for + // openai/gpt-5.5 vs anthropic/claude-opus-4-8, but the helper works for // any parity comparison a caller configures. const lines = [ `# OpenClaw Agentic Parity Report — ${comparison.candidateLabel} vs ${comparison.baselineLabel}`, diff --git a/extensions/qa-lab/src/character-eval.test.ts b/extensions/qa-lab/src/character-eval.test.ts index 22d97466369b..e5db5a2ccfb2 100644 --- a/extensions/qa-lab/src/character-eval.test.ts +++ b/extensions/qa-lab/src/character-eval.test.ts @@ -274,7 +274,7 @@ describe("runQaCharacterEval", () => { { model: "openai/gpt-5.5", rank: 1, score: 8, summary: "ok" }, { model: "openai/gpt-5.2", rank: 2, score: 7.5, summary: "ok" }, { model: "openai/gpt-5", rank: 3, score: 7.2, summary: "ok" }, - { model: "anthropic/claude-opus-4-7", rank: 4, score: 7, summary: "ok" }, + { model: "anthropic/claude-opus-4-8", rank: 4, score: 7, summary: "ok" }, { model: "anthropic/claude-sonnet-4-6", rank: 5, score: 6.8, summary: "ok" }, { model: "zai/glm-5.1", rank: 6, score: 6.3, summary: "ok" }, { model: "moonshot/kimi-k2.5", rank: 7, score: 6.2, summary: "ok" }, @@ -294,7 +294,7 @@ describe("runQaCharacterEval", () => { "openai/gpt-5.5", "openai/gpt-5.2", "openai/gpt-5", - "anthropic/claude-opus-4-7", + "anthropic/claude-opus-4-8", "anthropic/claude-sonnet-4-6", "zai/glm-5.1", "moonshot/kimi-k2.5", @@ -323,7 +323,7 @@ describe("runQaCharacterEval", () => { expect(runJudge).toHaveBeenCalledTimes(2); expect(runJudge.mock.calls.map(([params]) => params.judgeModel)).toEqual([ "openai/gpt-5.5", - "anthropic/claude-opus-4-7", + "anthropic/claude-opus-4-8", ]); expect(runJudge.mock.calls.map(([params]) => params.judgeThinkingDefault)).toEqual([ "xhigh", @@ -577,11 +577,11 @@ describe("runQaCharacterEval", () => { candidateModelOptions: { "openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: false }, }, - judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-7"], + judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-8"], judgeThinkingDefault: "medium", judgeModelOptions: { "openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: true }, - "anthropic/claude-opus-4-7": { thinkingDefault: "high" }, + "anthropic/claude-opus-4-8": { thinkingDefault: "high" }, }, runSuite, runJudge, diff --git a/extensions/qa-lab/src/cli.runtime.test.ts b/extensions/qa-lab/src/cli.runtime.test.ts index c6aadbdbf5b8..77bbb2948f4a 100644 --- a/extensions/qa-lab/src/cli.runtime.test.ts +++ b/extensions/qa-lab/src/cli.runtime.test.ts @@ -662,7 +662,7 @@ describe("qa cli runtime", () => { repoRoot: "/tmp/openclaw-repo", providerMode: "mock-openai", primaryModel: "openai/gpt-5.5", - alternateModel: "anthropic/claude-opus-4-7", + alternateModel: "anthropic/claude-opus-4-8", preflight: true, }); @@ -672,7 +672,7 @@ describe("qa cli runtime", () => { transportId: "qa-channel", providerMode: "mock-openai", primaryModel: "openai/gpt-5.5", - alternateModel: "anthropic/claude-opus-4-7", + alternateModel: "anthropic/claude-opus-4-8", scenarioIds: ["approval-turn-tool-followthrough"], concurrency: 1, }); @@ -1261,7 +1261,7 @@ describe("qa cli runtime", () => { fast: true, thinking: "medium", modelThinking: ["codex-cli/test-model=medium"], - judgeModel: ["openai/gpt-5.5,thinking=xhigh,fast", "anthropic/claude-opus-4-7,thinking=high"], + judgeModel: ["openai/gpt-5.5,thinking=xhigh,fast", "anthropic/claude-opus-4-8,thinking=high"], judgeTimeoutMs: 180_000, blindJudgeModels: true, concurrency: 4, @@ -1282,10 +1282,10 @@ describe("qa cli runtime", () => { "openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: false }, "codex-cli/test-model": { thinkingDefault: "high", fastMode: true }, }, - judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-7"], + judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-8"], judgeModelOptions: { "openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: true }, - "anthropic/claude-opus-4-7": { thinkingDefault: "high" }, + "anthropic/claude-opus-4-8": { thinkingDefault: "high" }, }, judgeTimeoutMs: 180_000, judgeBlindModels: true, @@ -1616,7 +1616,7 @@ describe("qa cli runtime", () => { providerMode: "mock-openai", parityPack: "agentic", primaryModel: "openai/gpt-5.5", - alternateModel: "anthropic/claude-opus-4-7", + alternateModel: "anthropic/claude-opus-4-8", }); expect(runQaSuiteFromRuntime).toHaveBeenCalledWith({ @@ -1625,7 +1625,7 @@ describe("qa cli runtime", () => { transportId: "qa-channel", providerMode: "mock-openai", primaryModel: "openai/gpt-5.5", - alternateModel: "anthropic/claude-opus-4-7", + alternateModel: "anthropic/claude-opus-4-8", fastMode: undefined, scenarioIds: [ "approval-turn-tool-followthrough", diff --git a/extensions/qa-lab/src/live-timeout.test.ts b/extensions/qa-lab/src/live-timeout.test.ts index fa216cbc2980..bf3050a7a752 100644 --- a/extensions/qa-lab/src/live-timeout.test.ts +++ b/extensions/qa-lab/src/live-timeout.test.ts @@ -8,7 +8,7 @@ describe("qa live timeout policy", () => { { providerMode: "mock-openai", primaryModel: "anthropic/claude-sonnet-4-6", - alternateModel: "anthropic/claude-opus-4-7", + alternateModel: "anthropic/claude-opus-4-8", }, 30_000, ), @@ -47,7 +47,7 @@ describe("qa live timeout policy", () => { { providerMode: "live-frontier", primaryModel: "anthropic/claude-sonnet-4-6", - alternateModel: "anthropic/claude-opus-4-7", + alternateModel: "anthropic/claude-opus-4-8", }, 30_000, ), @@ -60,10 +60,10 @@ describe("qa live timeout policy", () => { { providerMode: "live-frontier", primaryModel: "anthropic/claude-sonnet-4-6", - alternateModel: "anthropic/claude-opus-4-7", + alternateModel: "anthropic/claude-opus-4-8", }, 30_000, - "anthropic/claude-opus-4-7", + "anthropic/claude-opus-4-8", ), ).toBe(240_000); }); diff --git a/extensions/qa-lab/src/providers/live-frontier/character-eval.ts b/extensions/qa-lab/src/providers/live-frontier/character-eval.ts index 89f5fa275496..00d38ca07eeb 100644 --- a/extensions/qa-lab/src/providers/live-frontier/character-eval.ts +++ b/extensions/qa-lab/src/providers/live-frontier/character-eval.ts @@ -9,7 +9,7 @@ export const QA_FRONTIER_CHARACTER_EVAL_MODELS = Object.freeze([ "openai/gpt-5.5", "openai/gpt-5.2", "openai/gpt-5", - "anthropic/claude-opus-4-7", + "anthropic/claude-opus-4-8", "anthropic/claude-sonnet-4-6", "zai/glm-5.1", "moonshot/kimi-k2.5", @@ -25,12 +25,12 @@ export const QA_FRONTIER_CHARACTER_THINKING_BY_MODEL: Readonly > = Object.freeze({ "openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: true }, - "anthropic/claude-opus-4-7": { thinkingDefault: "high" }, + "anthropic/claude-opus-4-8": { thinkingDefault: "high" }, }); diff --git a/extensions/qa-lab/src/providers/live-frontier/parity.ts b/extensions/qa-lab/src/providers/live-frontier/parity.ts index a0874010b6d0..53127848c697 100644 --- a/extensions/qa-lab/src/providers/live-frontier/parity.ts +++ b/extensions/qa-lab/src/providers/live-frontier/parity.ts @@ -1,2 +1,2 @@ export const QA_FRONTIER_PARITY_CANDIDATE_LABEL = "openai/gpt-5.5"; -export const QA_FRONTIER_PARITY_BASELINE_LABEL = "anthropic/claude-opus-4-7"; +export const QA_FRONTIER_PARITY_BASELINE_LABEL = "anthropic/claude-opus-4-8"; diff --git a/extensions/qa-lab/src/providers/mock-openai/server.test.ts b/extensions/qa-lab/src/providers/mock-openai/server.test.ts index a59317de5448..b812c0f11d85 100644 --- a/extensions/qa-lab/src/providers/mock-openai/server.test.ts +++ b/extensions/qa-lab/src/providers/mock-openai/server.test.ts @@ -3464,7 +3464,7 @@ describe("qa mock openai server", () => { expect(outputText(await response.json())).toBe("NO_REPLY"); }); - it("advertises Anthropic claude-opus-4-7 baseline model on /v1/models", async () => { + it("advertises Anthropic claude-opus-4-8 baseline model on /v1/models", async () => { const server = await startQaMockOpenAiServer({ host: "127.0.0.1", port: 0, @@ -3477,7 +3477,7 @@ describe("qa mock openai server", () => { expect(response.status).toBe(200); const body = (await response.json()) as { data: Array<{ id: string }> }; const ids = body.data.map((entry) => entry.id); - expect(ids).toContain("claude-opus-4-7"); + expect(ids).toContain("claude-opus-4-8"); expect(ids).toContain("gpt-5.5"); }); @@ -3494,7 +3494,7 @@ describe("qa mock openai server", () => { method: "POST", headers: { "content-type": "application/json" }, body: JSON.stringify({ - model: "claude-opus-4-7", + model: "claude-opus-4-8", max_tokens: 256, messages: [ { @@ -3519,7 +3519,7 @@ describe("qa mock openai server", () => { }; expect(body.type).toBe("message"); expect(body.role).toBe("assistant"); - expect(body.model).toBe("claude-opus-4-7"); + expect(body.model).toBe("claude-opus-4-8"); expect(body.stop_reason).toBe("tool_use"); const toolUseBlock = body.content.find((block) => block.type === "tool_use") as | { name: string; input: Record } @@ -3530,7 +3530,7 @@ describe("qa mock openai server", () => { const debugResponse = await fetch(`${server.baseUrl}/debug/last-request`); expect(debugResponse.status).toBe(200); const debugPayload = requireRecord(await debugResponse.json(), "debug request"); - expect(debugPayload.model).toBe("claude-opus-4-7"); + expect(debugPayload.model).toBe("claude-opus-4-8"); expect(debugPayload.plannedToolName).toBe("read"); }); @@ -3541,7 +3541,7 @@ describe("qa mock openai server", () => { method: "POST", headers: { "content-type": "application/json" }, body: JSON.stringify({ - model: "claude-opus-4-7", + model: "claude-opus-4-8", max_tokens: 256, tools: [ { @@ -3581,7 +3581,7 @@ describe("qa mock openai server", () => { const debugResponse = await fetch(`${server.baseUrl}/debug/last-request`); expect(debugResponse.status).toBe(200); const debugPayload = requireRecord(await debugResponse.json(), "debug request"); - expect(debugPayload.model).toBe("claude-opus-4-7"); + expect(debugPayload.model).toBe("claude-opus-4-8"); expect(debugPayload.plannedToolName).toBe("sessions_spawn"); }); @@ -3605,7 +3605,7 @@ describe("qa mock openai server", () => { method: "POST", headers: { "content-type": "application/json" }, body: JSON.stringify({ - model: "claude-opus-4-7", + model: "claude-opus-4-8", max_tokens: 256, messages: [ { @@ -3679,7 +3679,7 @@ describe("qa mock openai server", () => { method: "POST", headers: { "content-type": "application/json" }, body: JSON.stringify({ - model: "claude-opus-4-7", + model: "claude-opus-4-8", max_tokens: 256, messages: [ { @@ -3760,7 +3760,7 @@ describe("qa mock openai server", () => { method: "POST", headers: { "content-type": "application/json" }, body: JSON.stringify({ - model: "claude-opus-4-7", + model: "claude-opus-4-8", max_tokens: 256, stream: true, messages: [ @@ -3801,7 +3801,7 @@ describe("qa mock openai server", () => { method: "POST", headers: { "content-type": "application/json" }, body: JSON.stringify({ - model: "claude-opus-4-7", + model: "claude-opus-4-8", max_tokens: 256, stream: true, messages: [ @@ -3860,7 +3860,7 @@ describe("qa mock openai server", () => { method: "POST", headers: { "content-type": "application/json" }, body: JSON.stringify({ - model: "claude-opus-4-7", + model: "claude-opus-4-8", max_tokens: 256, stream: true, system: [ @@ -3903,7 +3903,7 @@ describe("qa mock openai server", () => { method: "POST", headers: { "content-type": "application/json" }, body: JSON.stringify({ - model: "claude-opus-4-7", + model: "claude-opus-4-8", max_tokens: 256, stream: true, system: [ @@ -3948,7 +3948,7 @@ describe("qa mock openai server", () => { const response = await fetch(`${server.baseUrl}/v1/messages`, { method: "POST", headers: { "content-type": "application/json" }, - body: '{"model":"claude-opus-4-7","messages":[', + body: '{"model":"claude-opus-4-8","messages":[', }); expect(response.status).toBe(400); @@ -3961,12 +3961,12 @@ describe("qa mock openai server", () => { expect(body.error.message).toContain("Malformed JSON body"); }); - it("defaults empty-string Anthropic /v1/messages model to claude-opus-4-7", async () => { + it("defaults empty-string Anthropic /v1/messages model to claude-opus-4-8", async () => { // Regression for the loop-7 Copilot finding: a bare `typeof // body.model === "string"` check lets an empty-string model leak // through to `lastRequest.model` and `responseBody.model`. Empty // strings must be treated the same as absent and default to - // `"claude-opus-4-7"` so parity consumers can trust the echoed label. + // `"claude-opus-4-8"` so parity consumers can trust the echoed label. const server = await startQaMockOpenAiServer({ host: "127.0.0.1", port: 0, @@ -3991,12 +3991,12 @@ describe("qa mock openai server", () => { }); expect(response.status).toBe(200); const body = (await response.json()) as { model: string }; - expect(body.model).toBe("claude-opus-4-7"); + expect(body.model).toBe("claude-opus-4-8"); const debugResponse = await fetch(`${server.baseUrl}/debug/last-request`); expect(debugResponse.status).toBe(200); const debug = (await debugResponse.json()) as { model: string }; - expect(debug.model).toBe("claude-opus-4-7"); + expect(debug.model).toBe("claude-opus-4-8"); }); it("scripts a reasoning-only recovery sequence after a replay-safe read", async () => { @@ -4247,9 +4247,9 @@ describe("resolveProviderVariant", () => { }); it("tags prefix-qualified anthropic models", () => { - expect(resolveProviderVariant("anthropic/claude-opus-4-7")).toBe("anthropic"); - expect(resolveProviderVariant("anthropic:claude-opus-4-7")).toBe("anthropic"); - expect(resolveProviderVariant("claude-cli/claude-opus-4-7")).toBe("anthropic"); + expect(resolveProviderVariant("anthropic/claude-opus-4-8")).toBe("anthropic"); + expect(resolveProviderVariant("anthropic:claude-opus-4-8")).toBe("anthropic"); + expect(resolveProviderVariant("claude-cli/claude-opus-4-8")).toBe("anthropic"); }); it("tags bare model names by prefix", () => { @@ -4257,7 +4257,7 @@ describe("resolveProviderVariant", () => { expect(resolveProviderVariant("gpt-5.5-alt")).toBe("openai"); expect(resolveProviderVariant("gpt-4.5")).toBe("openai"); expect(resolveProviderVariant("o1-preview")).toBe("openai"); - expect(resolveProviderVariant("claude-opus-4-7")).toBe("anthropic"); + expect(resolveProviderVariant("claude-opus-4-8")).toBe("anthropic"); expect(resolveProviderVariant("claude-sonnet-4-6")).toBe("anthropic"); }); @@ -4293,7 +4293,7 @@ describe("qa mock openai server provider variant tagging", () => { const anthropicSourceServer = await startMockServer(); const anthropicSource = await expectResponsesJson(anthropicSourceServer, { - model: "anthropic/claude-opus-4-7", + model: "anthropic/claude-opus-4-8", stream: false, input: [makeUserInput(sourcePrompt)], }); @@ -4312,7 +4312,7 @@ describe("qa mock openai server provider variant tagging", () => { const anthropicHandoffServer = await startMockServer(); const anthropicHandoff = await expectResponsesJson(anthropicHandoffServer, { - model: "claude-opus-4-7", + model: "claude-opus-4-8", stream: false, input: [makeUserInput(handoffPrompt)], }); @@ -4335,7 +4335,7 @@ describe("qa mock openai server provider variant tagging", () => { const anthropicFanoutServer = await startMockServer(); const anthropicFanout = await expectResponsesJson(anthropicFanoutServer, { - model: "anthropic/claude-opus-4-7", + model: "anthropic/claude-opus-4-8", stream: false, tools: [SESSIONS_SPAWN_TOOL], input: [makeUserInput(fanoutPrompt)], @@ -4386,7 +4386,7 @@ describe("qa mock openai server provider variant tagging", () => { method: "POST", headers: { "content-type": "application/json" }, body: JSON.stringify({ - model: "claude-opus-4-7", + model: "claude-opus-4-8", max_tokens: 256, messages: [{ role: "user", content: "Heartbeat check" }], }), @@ -4396,7 +4396,7 @@ describe("qa mock openai server provider variant tagging", () => { model: string; providerVariant: string; }; - expect(debug.model).toBe("claude-opus-4-7"); + expect(debug.model).toBe("claude-opus-4-8"); expect(debug.providerVariant).toBe("anthropic"); }); diff --git a/extensions/qa-lab/src/providers/mock-openai/server.ts b/extensions/qa-lab/src/providers/mock-openai/server.ts index 8de1e010c980..a74479d2ab62 100644 --- a/extensions/qa-lab/src/providers/mock-openai/server.ts +++ b/extensions/qa-lab/src/providers/mock-openai/server.ts @@ -81,7 +81,7 @@ export function resolveProviderVariant(model: string | undefined): MockOpenAiPro return "anthropic"; } // Fall back to model-name prefix matching for bare model strings like - // `gpt-5.5` or `claude-opus-4-7`. + // `gpt-5.5` or `claude-opus-4-8`. if (/^(?:gpt-|o1-|openai-)/.test(trimmed)) { return "openai"; } @@ -645,7 +645,7 @@ function execCommandFromToolProgressPrompt(prompt: string) { function buildMockFunctionCall(name: string, args: Record) { const serialized = JSON.stringify(args); - const callSuffix = createHash("sha1") + const callSuffix = createHash("sha256") .update(name) .update("\0") .update(serialized) @@ -2591,7 +2591,7 @@ async function buildResponsesPayload( // // The QA parity gate needs two comparable scenario runs: one against the // "candidate" (openai/gpt-5.5) and one against the "baseline" -// (anthropic/claude-opus-4-7). The OpenAI mock above already dispatches all +// (anthropic/claude-opus-4-8). The OpenAI mock above already dispatches all // the scenario prompt branches we care about. Rather than duplicating that // machinery, the /v1/messages route below translates Anthropic request // shapes into the shared ResponsesInputItem[] format, calls the same @@ -2814,7 +2814,7 @@ function buildAnthropicMessageResponse(params: { id: `msg_mock_${Math.floor(Math.random() * 1_000_000).toString(16)}`, type: "message", role: "assistant", - model: params.model || "claude-opus-4-7", + model: params.model || "claude-opus-4-8", content, stop_reason: stopReason, stop_sequence: null, @@ -2842,7 +2842,7 @@ function buildAnthropicMessageStreamEvents(params: { id: messageId, type: "message", role: "assistant", - model: params.model || "claude-opus-4-7", + model: params.model || "claude-opus-4-8", content: [], stop_reason: null, stop_sequence: null, @@ -2941,7 +2941,7 @@ async function buildMessagesPayload( // which then confuses parity consumers that assume the mock always // echoes the real provider label. Normalize once and reuse everywhere. const normalizedModel = - typeof body.model === "string" && body.model.trim() !== "" ? body.model : "claude-opus-4-7"; + typeof body.model === "string" && body.model.trim() !== "" ? body.model : "claude-opus-4-8"; // Dispatch through the same scenario logic the /v1/responses route uses. // Preserve declared tools so route-specific adapters mirror what the // real provider request made available to the model. @@ -2986,7 +2986,7 @@ export async function startQaMockOpenAiServer(params?: { host?: string; port?: n { id: "gpt-5.5-alt", object: "model" }, { id: "gpt-image-1", object: "model" }, { id: "text-embedding-3-small", object: "model" }, - { id: "claude-opus-4-7", object: "model" }, + { id: "claude-opus-4-8", object: "model" }, { id: "claude-sonnet-4-6", object: "model" }, ], }); diff --git a/extensions/qa-lab/src/providers/shared/mock-model-config.ts b/extensions/qa-lab/src/providers/shared/mock-model-config.ts index 349ee211e3f3..667046f74908 100644 --- a/extensions/qa-lab/src/providers/shared/mock-model-config.ts +++ b/extensions/qa-lab/src/providers/shared/mock-model-config.ts @@ -71,14 +71,14 @@ function createMockAnthropicMessagesProvider(baseUrl: string): ModelProviderConf }, models: [ { - id: "claude-opus-4-7", - name: "claude-opus-4-7", + id: "claude-opus-4-8", + name: "claude-opus-4-8", api: "anthropic-messages", reasoning: false, input: ["text", "image"], cost: ZERO_COST, - contextWindow: 200_000, - maxTokens: 4096, + contextWindow: 1_048_576, + maxTokens: 128_000, }, { id: "claude-sonnet-4-6", diff --git a/extensions/qa-lab/src/qa-gateway-config.test.ts b/extensions/qa-lab/src/qa-gateway-config.test.ts index 5d626a189970..b4e55ffa6340 100644 --- a/extensions/qa-lab/src/qa-gateway-config.test.ts +++ b/extensions/qa-lab/src/qa-gateway-config.test.ts @@ -108,12 +108,12 @@ describe("buildQaGatewayConfig", () => { workspaceDir: "/tmp/qa-workspace", providerMode: "mock-openai", primaryModel: "openai/gpt-5.5", - alternateModel: "anthropic/claude-opus-4-7", + alternateModel: "anthropic/claude-opus-4-8", }); expect(getPrimaryModel(cfg.agents?.defaults?.model)).toBe("openai/gpt-5.5"); - expect(getModelFallbacks(cfg.agents?.defaults?.model)).toEqual(["anthropic/claude-opus-4-7"]); - expect(getModelFallbacks(cfg.agents?.list?.[0]?.model)).toEqual(["anthropic/claude-opus-4-7"]); + expect(getModelFallbacks(cfg.agents?.defaults?.model)).toEqual(["anthropic/claude-opus-4-8"]); + expect(getModelFallbacks(cfg.agents?.list?.[0]?.model)).toEqual(["anthropic/claude-opus-4-8"]); expect(cfg.models?.providers?.openai?.api).toBe("openai-responses"); expect(cfg.models?.providers?.openai?.request).toEqual({ allowPrivateNetwork: true }); expect(cfg.models?.providers?.openai?.models.map((model) => model.id)).toContain("gpt-5.5"); @@ -121,7 +121,7 @@ describe("buildQaGatewayConfig", () => { expect(cfg.models?.providers?.anthropic?.baseUrl).toBe("http://127.0.0.1:44080"); expect(cfg.models?.providers?.anthropic?.request).toEqual({ allowPrivateNetwork: true }); expect(cfg.models?.providers?.anthropic?.models.map((model) => model.id)).toContain( - "claude-opus-4-7", + "claude-opus-4-8", ); expect(cfg.plugins?.allow).toEqual(["acpx", "memory-core"]); }); diff --git a/extensions/qa-lab/src/suite-planning.test.ts b/extensions/qa-lab/src/suite-planning.test.ts index 3f84f4806a0f..021f21697ca0 100644 --- a/extensions/qa-lab/src/suite-planning.test.ts +++ b/extensions/qa-lab/src/suite-planning.test.ts @@ -198,7 +198,7 @@ describe("qa suite planning helpers", () => { makeQaSuiteTestScenario("anthropic-only", { config: { requiredProvider: "anthropic", - requiredModel: "claude-opus-4-7", + requiredModel: "claude-opus-4-8", }, }), ]; @@ -384,7 +384,7 @@ describe("qa suite planning helpers", () => { config: { requiredProvider: "openai", requiredModel: "gpt-5.5" }, }), makeQaSuiteTestScenario("anthropic-only", { - config: { requiredProvider: "anthropic", requiredModel: "claude-opus-4-7" }, + config: { requiredProvider: "anthropic", requiredModel: "claude-opus-4-8" }, }), makeQaSuiteTestScenario("claude-subscription", { config: { requiredProvider: "claude-cli", authMode: "subscription" }, diff --git a/extensions/qa-lab/src/suite.summary-json.test.ts b/extensions/qa-lab/src/suite.summary-json.test.ts index 786bf46cb101..6ea40baadfb5 100644 --- a/extensions/qa-lab/src/suite.summary-json.test.ts +++ b/extensions/qa-lab/src/suite.summary-json.test.ts @@ -67,12 +67,12 @@ describe("buildQaSuiteSummaryJson", () => { it("records an Anthropic baseline lane cleanly for parity runs", () => { const json = buildQaSuiteSummaryJson({ ...baseParams, - primaryModel: "anthropic/claude-opus-4-7", + primaryModel: "anthropic/claude-opus-4-8", alternateModel: "anthropic/claude-sonnet-4-6", }); - expect(json.run.primaryModel).toBe("anthropic/claude-opus-4-7"); + expect(json.run.primaryModel).toBe("anthropic/claude-opus-4-8"); expect(json.run.primaryProvider).toBe("anthropic"); - expect(json.run.primaryModelName).toBe("claude-opus-4-7"); + expect(json.run.primaryModelName).toBe("claude-opus-4-8"); expect(json.run.alternateModel).toBe("anthropic/claude-sonnet-4-6"); expect(json.run.alternateProvider).toBe("anthropic"); expect(json.run.alternateModelName).toBe("claude-sonnet-4-6"); diff --git a/packages/agent-core/src/llm.ts b/packages/agent-core/src/llm.ts index f1d9b486159e..5468844c058a 100644 --- a/packages/agent-core/src/llm.ts +++ b/packages/agent-core/src/llm.ts @@ -3,7 +3,7 @@ import type { TSchema } from "typebox"; export type Api = string; export type CacheRetention = "none" | "short" | "long"; export type Transport = "sse" | "websocket" | "websocket-cached" | "auto"; -export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh"; +export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh" | "max"; export type ModelThinkingLevel = "off" | ThinkingLevel; export type MaybePromise = T | Promise; @@ -17,6 +17,7 @@ export interface ThinkingBudgets { low?: number; medium?: number; high?: number; + max?: number; } export interface DiagnosticErrorInfo { diff --git a/packages/agent-core/src/types.ts b/packages/agent-core/src/types.ts index d6e7f596e9d2..23d6d0aa5474 100644 --- a/packages/agent-core/src/types.ts +++ b/packages/agent-core/src/types.ts @@ -285,7 +285,7 @@ export interface AgentLoopConfig extends SimpleStreamOptions { * Note: "xhigh" is only supported by selected model families. Use model thinking-level metadata * from openclaw/plugin-sdk/llm to detect support for a concrete model. */ -export type ThinkingLevel = "off" | "minimal" | "low" | "medium" | "high" | "xhigh"; +export type ThinkingLevel = "off" | "minimal" | "low" | "medium" | "high" | "xhigh" | "max"; /** * Extensible interface for custom app messages. diff --git a/qa/scenarios/models/anthropic-opus-api-key-smoke.md b/qa/scenarios/models/anthropic-opus-api-key-smoke.md index d13b97c921fe..3cb1469070c3 100644 --- a/qa/scenarios/models/anthropic-opus-api-key-smoke.md +++ b/qa/scenarios/models/anthropic-opus-api-key-smoke.md @@ -12,7 +12,7 @@ coverage: objective: Verify the regular Anthropic Opus lane can complete a quick chat turn using API-key auth. successCriteria: - A live-frontier run fails fast unless the selected primary provider is anthropic. - - The selected primary model is Anthropic Opus 4.7. + - The selected primary model is Anthropic Opus 4.8. - The QA gateway worker has an Anthropic API key available through environment auth. - The agent replies through the regular Anthropic provider. docsRefs: @@ -24,10 +24,10 @@ codeRefs: - extensions/qa-lab/src/suite.ts execution: kind: flow - summary: Run with `pnpm openclaw qa suite --provider-mode live-frontier --model anthropic/claude-opus-4-7 --alt-model anthropic/claude-opus-4-7 --scenario anthropic-opus-api-key-smoke`. + summary: Run with `pnpm openclaw qa suite --provider-mode live-frontier --model anthropic/claude-opus-4-8 --alt-model anthropic/claude-opus-4-8 --scenario anthropic-opus-api-key-smoke`. config: requiredProvider: anthropic - requiredModel: claude-opus-4-7 + requiredModel: claude-opus-4-8 chatPrompt: "Anthropic Opus API key smoke. Reply exactly: ANTHROPIC-OPUS-API-KEY-OK" chatExpected: ANTHROPIC-OPUS-API-KEY-OK ``` diff --git a/qa/scenarios/models/anthropic-opus-setup-token-smoke.md b/qa/scenarios/models/anthropic-opus-setup-token-smoke.md index a67997ef4e6f..b15cc75d32ee 100644 --- a/qa/scenarios/models/anthropic-opus-setup-token-smoke.md +++ b/qa/scenarios/models/anthropic-opus-setup-token-smoke.md @@ -12,7 +12,7 @@ coverage: objective: Verify the regular Anthropic Opus lane can complete a quick chat turn using setup-token auth. successCriteria: - A live-frontier run fails fast unless the selected primary provider is anthropic. - - The selected primary model is Anthropic Opus 4.7. + - The selected primary model is Anthropic Opus 4.8. - The QA gateway worker stages a token auth profile in the isolated agent store. - The agent replies through the regular Anthropic provider. docsRefs: @@ -24,10 +24,10 @@ codeRefs: - extensions/qa-lab/src/suite.ts execution: kind: flow - summary: Run with `OPENCLAW_LIVE_SETUP_TOKEN_VALUE= pnpm openclaw qa suite --provider-mode live-frontier --model anthropic/claude-opus-4-7 --alt-model anthropic/claude-opus-4-7 --scenario anthropic-opus-setup-token-smoke`. + summary: Run with `OPENCLAW_LIVE_SETUP_TOKEN_VALUE= pnpm openclaw qa suite --provider-mode live-frontier --model anthropic/claude-opus-4-8 --alt-model anthropic/claude-opus-4-8 --scenario anthropic-opus-setup-token-smoke`. config: requiredProvider: anthropic - requiredModel: claude-opus-4-7 + requiredModel: claude-opus-4-8 profileId: "anthropic:qa-setup-token" chatPrompt: "Anthropic Opus setup-token smoke. Reply exactly: ANTHROPIC-OPUS-SETUP-TOKEN-OK" chatExpected: ANTHROPIC-OPUS-SETUP-TOKEN-OK diff --git a/scripts/e2e/crestodian-planner-docker-client.mjs b/scripts/e2e/crestodian-planner-docker-client.mjs index 8acb6800ef06..ef47f45ac9a0 100644 --- a/scripts/e2e/crestodian-planner-docker-client.mjs +++ b/scripts/e2e/crestodian-planner-docker-client.mjs @@ -79,7 +79,7 @@ async function main() { const output = runtime.lines.join("\n"); assertOutputIncludes( output, - "[crestodian] planner: claude-cli/claude-opus-4-7", + "[crestodian] planner: claude-cli/claude-opus-4-8", "configless planner did not use Claude CLI fallback", ); assertOutputIncludes( diff --git a/src/agents/anthropic-transport-stream.test.ts b/src/agents/anthropic-transport-stream.test.ts index 1491afbb72fb..f9c6129573d4 100644 --- a/src/agents/anthropic-transport-stream.test.ts +++ b/src/agents/anthropic-transport-stream.test.ts @@ -116,6 +116,7 @@ function makeAnthropicTransportModel( baseUrl?: string; reasoning?: boolean; maxTokens?: number; + thinkingLevelMap?: AnthropicMessagesModel["thinkingLevelMap"]; headers?: Record; requestTransport?: RequestTransportConfig; } = {}, @@ -132,6 +133,7 @@ function makeAnthropicTransportModel( cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, contextWindow: 200000, maxTokens: params.maxTokens ?? 8192, + ...(params.thinkingLevelMap ? { thinkingLevelMap: params.thinkingLevelMap } : {}), ...(params.headers ? { headers: params.headers } : {}), } satisfies AnthropicMessagesModel, params.requestTransport ?? { @@ -1867,10 +1869,10 @@ describe("anthropic transport stream", () => { expect(payload.output_config).toEqual({ effort: "max" }); }); - it("maps xhigh thinking effort for Claude Opus 4.7 transport runs", async () => { + it("maps xhigh thinking effort for Claude Opus 4.8 transport runs", async () => { const model = makeAnthropicTransportModel({ - id: "claude-opus-4-7", - name: "Claude Opus 4.7", + id: "claude-opus-4-8", + name: "Claude Opus 4.8", maxTokens: 8192, }); @@ -1889,4 +1891,51 @@ describe("anthropic transport stream", () => { expect(payload.thinking).toEqual({ type: "adaptive" }); expect(payload.output_config).toEqual({ effort: "xhigh" }); }); + + it("preserves max thinking effort for Claude Opus 4.8 transport runs", async () => { + const model = makeAnthropicTransportModel({ + id: "claude-opus-4-8", + name: "Claude Opus 4.8", + maxTokens: 8192, + thinkingLevelMap: { xhigh: "xhigh", max: "max" }, + }); + + await runTransportStream( + model, + { + messages: [{ role: "user", content: "Think as much as needed." }], + } as AnthropicStreamContext, + { + apiKey: "sk-ant-api", + reasoning: "max", + } as AnthropicStreamOptions, + ); + + const payload = latestAnthropicRequest().payload; + expect(payload.thinking).toEqual({ type: "adaptive" }); + expect(payload.output_config).toEqual({ effort: "max" }); + }); + + it("clamps max thinking effort for Claude models without native max support", async () => { + const model = makeAnthropicTransportModel({ + id: "claude-sonnet-4-6", + name: "Claude Sonnet 4.6", + maxTokens: 8192, + }); + + await runTransportStream( + model, + { + messages: [{ role: "user", content: "Think as much as supported." }], + } as AnthropicStreamContext, + { + apiKey: "sk-ant-api", + reasoning: "max", + } as AnthropicStreamOptions, + ); + + const payload = latestAnthropicRequest().payload; + expect(payload.thinking).toEqual({ type: "adaptive" }); + expect(payload.output_config).toEqual({ effort: "high" }); + }); }); diff --git a/src/agents/anthropic-transport-stream.ts b/src/agents/anthropic-transport-stream.ts index b5e028e3d3bd..7aad5f260433 100644 --- a/src/agents/anthropic-transport-stream.ts +++ b/src/agents/anthropic-transport-stream.ts @@ -113,8 +113,13 @@ type MutableAssistantOutput = { const EMPTY_ANTHROPIC_MESSAGES_FALLBACK_TEXT = "."; -function isClaudeOpus47Model(modelId: string): boolean { - return modelId.includes("opus-4-7") || modelId.includes("opus-4.7"); +function isClaudeOpus47OrNewerModel(modelId: string): boolean { + return ( + modelId.includes("opus-4-8") || + modelId.includes("opus-4.8") || + modelId.includes("opus-4-7") || + modelId.includes("opus-4.7") + ); } function isClaudeOpus46Model(modelId: string): boolean { @@ -123,7 +128,7 @@ function isClaudeOpus46Model(modelId: string): boolean { function supportsAdaptiveThinking(modelId: string): boolean { return ( - isClaudeOpus47Model(modelId) || + isClaudeOpus47OrNewerModel(modelId) || isClaudeOpus46Model(modelId) || modelId.includes("sonnet-4-6") || modelId.includes("sonnet-4.6") @@ -138,17 +143,19 @@ function mapThinkingLevelToEffort(level: ThinkingLevel, modelId: string): Anthro case "medium": return "medium"; case "xhigh": - if (isClaudeOpus47Model(modelId)) { + if (isClaudeOpus47OrNewerModel(modelId)) { return "xhigh"; } return isClaudeOpus46Model(modelId) ? "max" : "high"; + case "max": + return isClaudeOpus47OrNewerModel(modelId) ? "max" : "high"; default: return "high"; } } function clampReasoningLevel(level: ThinkingLevel): "minimal" | "low" | "medium" | "high" { - return level === "xhigh" ? "high" : level; + return level === "xhigh" || level === "max" ? "high" : level; } function resolvePositiveAnthropicMaxTokens(value: unknown): number | undefined { diff --git a/src/agents/cli-runner/prepare.test.ts b/src/agents/cli-runner/prepare.test.ts index 6ecbba77319c..0d1c8613a6c2 100644 --- a/src/agents/cli-runner/prepare.test.ts +++ b/src/agents/cli-runner/prepare.test.ts @@ -1849,7 +1849,7 @@ describe("shouldSkipLocalCliCredentialEpoch", () => { input: "stdin", sessionMode: "existing", modelAliases: { - "claude-opus-4-7": "opus", + "claude-opus-4-8": "opus", }, }, }, @@ -1873,7 +1873,7 @@ describe("shouldSkipLocalCliCredentialEpoch", () => { workspaceDir: dir, prompt: "latest ask", provider: "claude-cli", - model: "claude-opus-4-7", + model: "claude-opus-4-8", timeoutMs: 1_000, runId: "run-auto-claude-alias-reseed-history-chars", config: createCliBackendConfig(), diff --git a/src/agents/cli-runner/prepare.ts b/src/agents/cli-runner/prepare.ts index 5ea4157a0805..84cebe45c96b 100644 --- a/src/agents/cli-runner/prepare.ts +++ b/src/agents/cli-runner/prepare.ts @@ -89,7 +89,9 @@ const prepareDeps = { }; const CLAUDE_CLI_CONTEXT_MODEL_ALIASES: Record = { - opus: "claude-opus-4-7", + opus: "claude-opus-4-8", + "opus-4.8": "claude-opus-4-8", + "opus-4-8": "claude-opus-4-8", "opus-4.7": "claude-opus-4-7", "opus-4-7": "claude-opus-4-7", "opus-4.6": "claude-opus-4-6", diff --git a/src/agents/context.test.ts b/src/agents/context.test.ts index b1c9aaf479e8..5a6f0b0c35a2 100644 --- a/src/agents/context.test.ts +++ b/src/agents/context.test.ts @@ -68,11 +68,13 @@ describe("applyDiscoveredContextWindows", () => { applyDiscoveredContextWindows({ cache, models: [ + { id: "claude-cli/claude-opus-4.8-20260514", contextWindow: 200_000 }, { id: "claude-cli/claude-opus-4.7-20260219", contextWindow: 200_000 }, { id: "claude-cli/claude-sonnet-4-6", contextWindow: 200_000 }, ], }); + expect(cache.get("claude-cli/claude-opus-4.8-20260514")).toBe(ANTHROPIC_CONTEXT_1M_TOKENS); expect(cache.get("claude-cli/claude-opus-4.7-20260219")).toBe(ANTHROPIC_CONTEXT_1M_TOKENS); expect(cache.get("claude-cli/claude-sonnet-4-6")).toBe(ANTHROPIC_CONTEXT_1M_TOKENS); }); diff --git a/src/agents/context.ts b/src/agents/context.ts index 969fc9cbc4f0..8e0a50b9789c 100644 --- a/src/agents/context.ts +++ b/src/agents/context.ts @@ -36,6 +36,8 @@ type ProviderConfigEntry = { type ModelsConfig = { providers?: Record }; const ANTHROPIC_GA_1M_MODEL_PREFIXES = [ + "claude-opus-4-8", + "claude-opus-4.8", "claude-opus-4-6", "claude-opus-4.6", "claude-opus-4-7", diff --git a/src/agents/embedded-agent-runner/utils.test.ts b/src/agents/embedded-agent-runner/utils.test.ts new file mode 100644 index 000000000000..2f5f6d067c3e --- /dev/null +++ b/src/agents/embedded-agent-runner/utils.test.ts @@ -0,0 +1,8 @@ +import { describe, expect, it } from "vitest"; +import { mapThinkingLevel } from "./utils.js"; + +describe("mapThinkingLevel", () => { + it("maps adaptive to the provider-owned high effort default", () => { + expect(mapThinkingLevel("adaptive")).toBe("high"); + }); +}); diff --git a/src/agents/embedded-agent-runner/utils.ts b/src/agents/embedded-agent-runner/utils.ts index 3f032a76ef21..471f4e75f2be 100644 --- a/src/agents/embedded-agent-runner/utils.ts +++ b/src/agents/embedded-agent-runner/utils.ts @@ -8,19 +8,14 @@ export function normalizeContextTokenBudget(value: unknown): number | undefined } export function mapThinkingLevel(level?: ThinkLevel): ThinkingLevel { - // agent runtime supports "xhigh"; OpenClaw enables it for specific models. + // agent runtime supports elevated levels; OpenClaw enables them for specific models. if (!level) { return "off"; } - if (level === "max") { - return "xhigh"; - } - // "adaptive" maps to "medium" at the agent runtime layer. The provider adapter - // provider then translates this to `thinking.type: "adaptive"` with - // `output_config.effort: "medium"` for models that support it (Opus 4.6, - // Sonnet 4.6). + // Runtime streams do not expose a distinct adaptive level. Preserve the + // provider-owned adaptive default by using Claude's documented high effort. if (level === "adaptive") { - return "medium"; + return "high"; } return level; } diff --git a/src/agents/live-model-filter.ts b/src/agents/live-model-filter.ts index 27bbf694af79..f9f7fd837482 100644 --- a/src/agents/live-model-filter.ts +++ b/src/agents/live-model-filter.ts @@ -10,6 +10,7 @@ type ModelRef = { }; const HIGH_SIGNAL_LIVE_MODEL_PRIORITY = [ + "anthropic/claude-opus-4-8", "anthropic/claude-sonnet-4-6", "anthropic/claude-opus-4-7", "google/gemini-3.1-pro-preview", diff --git a/src/agents/model-compat.test.ts b/src/agents/model-compat.test.ts index f9c513782bf0..d330d99d1a3d 100644 --- a/src/agents/model-compat.test.ts +++ b/src/agents/model-compat.test.ts @@ -661,6 +661,7 @@ describe("isPrioritizedHighSignalLiveModelRef", () => { it("lists priority refs as provider/id pairs", () => { expect(listPrioritizedHighSignalLiveModelRefs()).toStrictEqual([ + { provider: "anthropic", id: "claude-opus-4-8" }, { provider: "anthropic", id: "claude-sonnet-4-6" }, { provider: "anthropic", id: "claude-opus-4-7" }, { provider: "google", id: "gemini-3.1-pro-preview" }, @@ -713,6 +714,7 @@ describe("isPrioritizedSmallLiveModelRef", () => { describe("selectHighSignalLiveItems", () => { it("prefers curated Google replacements before fallback provider spread", () => { const items = [ + { provider: "anthropic", id: "claude-opus-4-8" }, { provider: "anthropic", id: "claude-sonnet-4-6" }, { provider: "anthropic", id: "claude-opus-4-7" }, { provider: "anthropic", id: "claude-opus-4-6" }, @@ -731,10 +733,10 @@ describe("selectHighSignalLiveItems", () => { (item) => item.provider, ), ).toEqual([ + { provider: "anthropic", id: "claude-opus-4-8" }, { provider: "anthropic", id: "claude-sonnet-4-6" }, { provider: "anthropic", id: "claude-opus-4-7" }, { provider: "google", id: "gemini-3.1-pro-preview" }, - { provider: "google", id: "gemini-3-flash-preview" }, ]); }); diff --git a/src/agents/model-selection.test.ts b/src/agents/model-selection.test.ts index 13a65abcf0f7..ac71973b068b 100644 --- a/src/agents/model-selection.test.ts +++ b/src/agents/model-selection.test.ts @@ -147,6 +147,33 @@ const ANTHROPIC_OPUS_47_CATALOG = [ }, ]; +const ANTHROPIC_OPUS_48_CATALOG = [ + { + provider: "anthropic", + id: "claude-opus-4-8", + name: "Claude Opus 4.8", + reasoning: true, + }, +]; + +const ANTHROPIC_VERTEX_OPUS_48_CATALOG = [ + { + provider: "anthropic-vertex", + id: "claude-opus-4-8", + name: "Claude Opus 4.8", + reasoning: true, + }, +]; + +const CLAUDE_CLI_OPUS_48_CATALOG = [ + { + provider: "claude-cli", + id: "claude-opus-4-8", + name: "Claude Opus 4.8", + reasoning: true, + }, +]; + function resolveAnthropicOpusThinking(cfg: OpenClawConfig) { return resolveThinkingDefault({ cfg, @@ -165,6 +192,33 @@ function resolveAnthropicOpus47Thinking(cfg: OpenClawConfig) { }); } +function resolveAnthropicOpus48Thinking(cfg: OpenClawConfig) { + return resolveThinkingDefault({ + cfg, + provider: "anthropic", + model: "claude-opus-4-8", + catalog: ANTHROPIC_OPUS_48_CATALOG, + }); +} + +function resolveAnthropicVertexOpus48Thinking(cfg: OpenClawConfig) { + return resolveThinkingDefault({ + cfg, + provider: "anthropic-vertex", + model: "claude-opus-4-8", + catalog: ANTHROPIC_VERTEX_OPUS_48_CATALOG, + }); +} + +function resolveClaudeCliOpus48Thinking(cfg: OpenClawConfig) { + return resolveThinkingDefault({ + cfg, + provider: "claude-cli", + model: "claude-opus-4-8", + catalog: CLAUDE_CLI_OPUS_48_CATALOG, + }); +} + function createAgentFallbackConfig(params: { primary?: string; fallbacks?: string[]; @@ -2300,6 +2354,42 @@ describe("model-selection", () => { expect(resolveAnthropicOpus47Thinking(cfg)).toBe("off"); }); + it("leaves explicitly configured Anthropic Opus 4.8 thinking off by default", () => { + const cfg = { + agents: { + defaults: { + model: { primary: "anthropic/claude-opus-4-8" }, + }, + }, + } as OpenClawConfig; + + expect(resolveAnthropicOpus48Thinking(cfg)).toBe("off"); + }); + + it("leaves explicitly configured Anthropic Vertex Opus 4.8 thinking off by default", () => { + const cfg = { + agents: { + defaults: { + model: { primary: "anthropic-vertex/claude-opus-4-8" }, + }, + }, + } as OpenClawConfig; + + expect(resolveAnthropicVertexOpus48Thinking(cfg)).toBe("off"); + }); + + it("leaves explicitly configured Claude CLI Opus 4.8 thinking off by default", () => { + const cfg = { + agents: { + defaults: { + model: { primary: "claude-cli/claude-opus-4-8" }, + }, + }, + } as OpenClawConfig; + + expect(resolveClaudeCliOpus48Thinking(cfg)).toBe("off"); + }); + it("uses bundled provider thinking defaults when no explicit config overrides them", () => { const cfg = {} as OpenClawConfig; diff --git a/src/agents/model-thinking-default.ts b/src/agents/model-thinking-default.ts index e325c8c9d544..33bb8b12bb84 100644 --- a/src/agents/model-thinking-default.ts +++ b/src/agents/model-thinking-default.ts @@ -57,8 +57,18 @@ export function resolveThinkingDefault(params: { if (configured) { return configured; } + const isClaudeProvider = + normalizedProvider === "anthropic" || + normalizedProvider === "anthropic-vertex" || + normalizedProvider === "claude-cli"; if ( - normalizedProvider === "anthropic" && + isClaudeProvider && + (normalizedModel.startsWith("claude-opus-4-8") || normalizedModel.startsWith("claude-opus-4.8")) + ) { + return "off"; + } + if ( + isClaudeProvider && (normalizedModel.startsWith("claude-opus-4-7") || normalizedModel.startsWith("claude-opus-4.7")) ) { return "off"; diff --git a/src/agents/sessions/model-registry.ts b/src/agents/sessions/model-registry.ts index e950351046ce..1ae1bba6d532 100644 --- a/src/agents/sessions/model-registry.ts +++ b/src/agents/sessions/model-registry.ts @@ -96,6 +96,7 @@ const ThinkingLevelMapSchema = Type.Object({ medium: Type.Optional(ThinkingLevelMapValueSchema), high: Type.Optional(ThinkingLevelMapValueSchema), xhigh: Type.Optional(ThinkingLevelMapValueSchema), + max: Type.Optional(ThinkingLevelMapValueSchema), }); const OpenAICompletionsCompatSchema = Type.Object({ diff --git a/src/agents/sessions/model-resolver.ts b/src/agents/sessions/model-resolver.ts index 4acf8d887da7..81bce0da388e 100644 --- a/src/agents/sessions/model-resolver.ts +++ b/src/agents/sessions/model-resolver.ts @@ -11,7 +11,7 @@ import type { ThinkingLevel } from "../runtime/index.js"; import { DEFAULT_THINKING_LEVEL } from "./defaults.js"; import type { ModelRegistry } from "./model-registry.js"; -const VALID_THINKING_LEVELS = ["off", "minimal", "low", "medium", "high", "xhigh"] as const; +const VALID_THINKING_LEVELS = ["off", "minimal", "low", "medium", "high", "xhigh", "max"] as const; function isValidThinkingLevel(level: string): level is ThinkingLevel { return VALID_THINKING_LEVELS.includes(level as ThinkingLevel); diff --git a/src/agents/sessions/sdk.test.ts b/src/agents/sessions/sdk.test.ts index 9f814e333498..77bd5cb589de 100644 --- a/src/agents/sessions/sdk.test.ts +++ b/src/agents/sessions/sdk.test.ts @@ -65,6 +65,26 @@ function createResourceLoaderWithHandlers( } describe("createAgentSession tool defaults", () => { + it("forwards max thinking budgets from settings to the agent", async () => { + const { session } = await createAgentSession({ + model: testModel, + resourceLoader: createEmptyResourceLoader(), + sessionManager: SessionManager.inMemory(), + settingsManager: SettingsManager.inMemory({ + thinkingBudgets: { + high: 16_384, + max: 32_768, + }, + }), + modelRegistry: ModelRegistry.inMemory(AuthStorage.inMemory()), + }); + + expect(session.agent.thinkingBudgets).toEqual({ + high: 16_384, + max: 32_768, + }); + }); + it("keeps custom tools active when only builtin tools are disabled", async () => { const customTool: ToolDefinition = { name: "custom_lookup", diff --git a/src/agents/sessions/settings-manager.ts b/src/agents/sessions/settings-manager.ts index 2807fe2d3f17..d2715cea0900 100644 --- a/src/agents/sessions/settings-manager.ts +++ b/src/agents/sessions/settings-manager.ts @@ -47,6 +47,7 @@ export interface ThinkingBudgetsSettings { low?: number; medium?: number; high?: number; + max?: number; } export interface MarkdownSettings { @@ -78,7 +79,7 @@ export interface Settings { lastChangelogVersion?: string; defaultProvider?: string; defaultModel?: string; - defaultThinkingLevel?: "off" | "minimal" | "low" | "medium" | "high" | "xhigh"; + defaultThinkingLevel?: "off" | "minimal" | "low" | "medium" | "high" | "xhigh" | "max"; transport?: TransportSetting; // default: "auto" steeringMode?: "all" | "one-at-a-time"; followUpMode?: "all" | "one-at-a-time"; @@ -668,11 +669,21 @@ export class SettingsManager { this.save(); } - getDefaultThinkingLevel(): "off" | "minimal" | "low" | "medium" | "high" | "xhigh" | undefined { + getDefaultThinkingLevel(): + | "off" + | "minimal" + | "low" + | "medium" + | "high" + | "xhigh" + | "max" + | undefined { return this.settings.defaultThinkingLevel; } - setDefaultThinkingLevel(level: "off" | "minimal" | "low" | "medium" | "high" | "xhigh"): void { + setDefaultThinkingLevel( + level: "off" | "minimal" | "low" | "medium" | "high" | "xhigh" | "max", + ): void { this.globalSettings.defaultThinkingLevel = level; this.markModified("defaultThinkingLevel"); this.save(); diff --git a/src/agents/tools/pdf-tool.model-config.test.ts b/src/agents/tools/pdf-tool.model-config.test.ts index 0f8160052d08..4fc2273d77b8 100644 --- a/src/agents/tools/pdf-tool.model-config.test.ts +++ b/src/agents/tools/pdf-tool.model-config.test.ts @@ -3,7 +3,7 @@ import type { OpenClawConfig } from "../../config/config.js"; import { resolvePdfModelConfigForTool } from "./pdf-tool.model-config.js"; import { resetPdfToolAuthEnv } from "./pdf-tool.test-support.js"; -const ANTHROPIC_PDF_MODEL = "anthropic/claude-opus-4-7"; +const ANTHROPIC_PDF_MODEL = "anthropic/claude-opus-4-8"; const TEST_AGENT_DIR = "/tmp/openclaw-pdf-model-config"; vi.mock("./model-config.helpers.js", () => ({ diff --git a/src/config/defaults.ts b/src/config/defaults.ts index 7e69057c5398..427cf50b0852 100644 --- a/src/config/defaults.ts +++ b/src/config/defaults.ts @@ -27,7 +27,7 @@ let defaultWarnState: WarnState = { warned: false }; const DEFAULT_MODEL_ALIASES: Readonly> = { // Anthropic (shared model runtime catalog uses "latest" ids without date suffix) - opus: "anthropic/claude-opus-4-7", + opus: "anthropic/claude-opus-4-8", sonnet: "anthropic/claude-sonnet-4-6", // OpenAI diff --git a/src/config/model-alias-defaults.test.ts b/src/config/model-alias-defaults.test.ts index 5ee4160299a8..a717f2ca581f 100644 --- a/src/config/model-alias-defaults.test.ts +++ b/src/config/model-alias-defaults.test.ts @@ -75,7 +75,7 @@ describe("applyModelDefaults", () => { agents: { defaults: { models: { - "anthropic/claude-opus-4-7": {}, + "anthropic/claude-opus-4-8": {}, "openai/gpt-5.4": {}, }, }, @@ -83,7 +83,7 @@ describe("applyModelDefaults", () => { } satisfies OpenClawConfig; const next = applyModelDefaults(cfg); - expect(next.agents?.defaults?.models?.["anthropic/claude-opus-4-7"]?.alias).toBe("opus"); + expect(next.agents?.defaults?.models?.["anthropic/claude-opus-4-8"]?.alias).toBe("opus"); expect(next.agents?.defaults?.models?.["openai/gpt-5.4"]?.alias).toBe("gpt"); }); @@ -92,7 +92,7 @@ describe("applyModelDefaults", () => { agents: { defaults: { models: { - "anthropic/claude-opus-4-7": { alias: "Opus" }, + "anthropic/claude-opus-4-8": { alias: "Opus" }, }, }, }, @@ -100,7 +100,7 @@ describe("applyModelDefaults", () => { const next = applyModelDefaults(cfg); - expect(next.agents?.defaults?.models?.["anthropic/claude-opus-4-7"]?.alias).toBe("Opus"); + expect(next.agents?.defaults?.models?.["anthropic/claude-opus-4-8"]?.alias).toBe("Opus"); }); it("respects explicit empty alias disables", () => { diff --git a/src/config/types.models.ts b/src/config/types.models.ts index ea207fb1d379..b1823fe94007 100644 --- a/src/config/types.models.ts +++ b/src/config/types.models.ts @@ -2,6 +2,7 @@ import type { AnthropicMessagesCompat, OpenAICompletionsCompat, OpenAIResponsesCompat, + ThinkingLevelMap, } from "../llm/types.js"; import type { AgentRuntimePolicyConfig } from "./types.agents-shared.js"; import type { ConfiguredModelProviderRequest } from "./types.provider-request.js"; @@ -152,6 +153,8 @@ export type ModelDefinitionConfig = { */ contextTokens?: number; maxTokens: number; + /** Maps OpenClaw thinking levels to provider/model-specific values. */ + thinkingLevelMap?: ThinkingLevelMap; /** Provider-specific request/runtime parameters passed through to provider plugins. */ params?: Record; /** Optional agent execution runtime override for this provider/model pair. */ diff --git a/src/crestodian/assistant-backends.ts b/src/crestodian/assistant-backends.ts index f559aad99405..d89efcb25bfd 100644 --- a/src/crestodian/assistant-backends.ts +++ b/src/crestodian/assistant-backends.ts @@ -1,7 +1,7 @@ import type { OpenClawConfig } from "../config/types.openclaw.js"; import type { CrestodianOverview } from "./overview.js"; -const CRESTODIAN_CLAUDE_CLI_MODEL = "claude-opus-4-7"; +const CRESTODIAN_CLAUDE_CLI_MODEL = "claude-opus-4-8"; const CRESTODIAN_CODEX_MODEL = "gpt-5.5"; type CrestodianLocalPlannerBackend = { diff --git a/src/crestodian/assistant.test.ts b/src/crestodian/assistant.test.ts index a08f33ecaa5c..a78fc76f7e5c 100644 --- a/src/crestodian/assistant.test.ts +++ b/src/crestodian/assistant.test.ts @@ -141,12 +141,12 @@ describe("Crestodian assistant", () => { } expect(result.command).toBe("status"); expect(result.reply).toBe("Checking the shell."); - expect(result.modelLabel).toBe("claude-cli/claude-opus-4-7"); + expect(result.modelLabel).toBe("claude-cli/claude-opus-4-8"); expect(runCliAgent).toHaveBeenCalledTimes(1); const firstCliCall = firstMockArg(runCliAgent); expect(firstCliCall.provider).toBe("claude-cli"); - expect(firstCliCall.model).toBe("claude-opus-4-7"); + expect(firstCliCall.model).toBe("claude-opus-4-8"); expect(firstCliCall.cleanupCliLiveSessionOnRunEnd).toBe(true); const firstCliConfig = requireRecord(firstCliCall.config); const firstCliAgents = requireRecord(firstCliConfig.agents); diff --git a/src/crestodian/operations.ts b/src/crestodian/operations.ts index d20ffb3fb337..fd2918e41283 100644 --- a/src/crestodian/operations.ts +++ b/src/crestodian/operations.ts @@ -110,8 +110,8 @@ const PLUGIN_UNINSTALL_RE = /^(?:(?:plugins?)\s+(?:uninstall|remove)|(?:uninstall|remove)\s+plugins?)\s+(?[A-Za-z0-9_.@/-]+)$/i; const OPENAI_API_DEFAULT_MODEL_REF = `${DEFAULT_PROVIDER}/${DEFAULT_MODEL}`; -const ANTHROPIC_API_DEFAULT_MODEL_REF = "anthropic/claude-opus-4-7"; -const CLAUDE_CLI_DEFAULT_MODEL_REF = "claude-cli/claude-opus-4-7"; +const ANTHROPIC_API_DEFAULT_MODEL_REF = "anthropic/claude-opus-4-8"; +const CLAUDE_CLI_DEFAULT_MODEL_REF = "claude-cli/claude-opus-4-8"; const CODEX_APP_SERVER_DEFAULT_MODEL_REF = "openai/gpt-5.5"; export function parseCrestodianOperation(input: string): CrestodianOperation { diff --git a/src/llm/model-utils.ts b/src/llm/model-utils.ts index cb3643d95ec5..be3b041ab50d 100644 --- a/src/llm/model-utils.ts +++ b/src/llm/model-utils.ts @@ -17,6 +17,7 @@ const EXTENDED_THINKING_LEVELS: ModelThinkingLevel[] = [ "medium", "high", "xhigh", + "max", ]; export function getSupportedThinkingLevels( @@ -31,7 +32,7 @@ export function getSupportedThinkingLevels( if (mapped === null) { return false; } - if (level === "xhigh") { + if (level === "xhigh" || level === "max") { return mapped !== undefined; } return true; diff --git a/src/llm/providers/anthropic.test.ts b/src/llm/providers/anthropic.test.ts index 8e5d0e05c518..55f681db6bd1 100644 --- a/src/llm/providers/anthropic.test.ts +++ b/src/llm/providers/anthropic.test.ts @@ -19,7 +19,7 @@ vi.mock("@anthropic-ai/sdk", () => ({ }, })); -import { streamAnthropic } from "./anthropic.js"; +import { streamAnthropic, streamSimpleAnthropic } from "./anthropic.js"; function createSseResponse(events: Record[] = []): Response { const body = events.map((event) => `data: ${JSON.stringify(event)}\n\n`).join(""); @@ -167,4 +167,30 @@ describe("Anthropic provider", () => { }, ]); }); + + it("clamps max adaptive effort when the Claude model does not advertise it", async () => { + let capturedPayload: unknown; + const stream = streamSimpleAnthropic( + makeAnthropicModel({ + id: "claude-sonnet-4-6", + name: "Claude Sonnet 4.6", + }), + { + messages: [{ role: "user", content: "hello", timestamp: 0 }], + }, + { + apiKey: "sk-ant-provider", + reasoning: "max", + onPayload: (payload) => { + capturedPayload = payload; + }, + }, + ); + + await stream.result(); + + expect((capturedPayload as { output_config?: unknown }).output_config).toEqual({ + effort: "high", + }); + }); }); diff --git a/src/llm/providers/anthropic.ts b/src/llm/providers/anthropic.ts index 630c33bd296b..0d7a220414de 100644 --- a/src/llm/providers/anthropic.ts +++ b/src/llm/providers/anthropic.ts @@ -7,7 +7,7 @@ import type { RawMessageStreamEvent, } from "@anthropic-ai/sdk/resources/messages.js"; import { getEnvApiKey } from "../env-api-keys.js"; -import { calculateCost } from "../model-utils.js"; +import { calculateCost, clampThinkingLevel } from "../model-utils.js"; import type { AnthropicMessagesCompat, Api, @@ -183,20 +183,20 @@ function getAnthropicCompat(model: Model<"anthropic-messages">): Required, level: SimpleStreamOptions["reasoning"], ): AnthropicEffort { - const mapped = level ? model.thinkingLevelMap?.[level] : undefined; + const clampedLevel = level ? clampThinkingLevel(model, level) : undefined; + const mapped = clampedLevel ? model.thinkingLevelMap?.[clampedLevel] : undefined; if (typeof mapped === "string") { return mapped as AnthropicEffort; } - switch (level) { + switch (clampedLevel) { case "minimal": case "low": return "low"; @@ -756,6 +759,8 @@ function mapThinkingLevelToEffort( return "medium"; case "high": return "high"; + case "max": + return "max"; default: return "high"; } @@ -982,7 +987,7 @@ function buildParams( // budget-based (older models), or explicitly disabled. if (model.reasoning) { if (options?.thinkingEnabled) { - // Default to "summarized" so Opus 4.7 and Mythos Preview behave like + // Default to "summarized" so Opus 4.7+ and Mythos Preview behave like // older Claude 4 models (whose API default is also "summarized"). const display: AnthropicThinkingDisplay = options.thinkingDisplay ?? "summarized"; if (supportsAdaptiveThinking(model.id)) { diff --git a/src/llm/providers/azure-openai-responses.ts b/src/llm/providers/azure-openai-responses.ts index 6d1da8d98a76..9c82b6f32733 100644 --- a/src/llm/providers/azure-openai-responses.ts +++ b/src/llm/providers/azure-openai-responses.ts @@ -186,7 +186,12 @@ export const streamSimpleAzureOpenAIResponses: StreamFunction< const clampedReasoning = options?.reasoning ? clampThinkingLevel(model, options.reasoning) : undefined; - const reasoningEffort = clampedReasoning === "off" ? undefined : clampedReasoning; + const reasoningEffort = + clampedReasoning === "off" + ? undefined + : clampedReasoning === "max" + ? "xhigh" + : clampedReasoning; return streamAzureOpenAIResponses(model, context, { ...base, diff --git a/src/llm/providers/google-vertex.ts b/src/llm/providers/google-vertex.ts index 6c1eaf87214b..d533ff7fc007 100644 --- a/src/llm/providers/google-vertex.ts +++ b/src/llm/providers/google-vertex.ts @@ -132,7 +132,9 @@ export const streamSimpleGoogleVertex: StreamFunction<"google-vertex", SimpleStr } const clampedReasoning = clampThinkingLevel(model, options.reasoning); - const effort = (clampedReasoning === "off" ? "high" : clampedReasoning) as ClampedThinkingLevel; + const effort = ( + clampedReasoning === "off" || clampedReasoning === "max" ? "high" : clampedReasoning + ) as ClampedThinkingLevel; const geminiModel = model as unknown as Model<"google-generative-ai">; if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) { @@ -312,7 +314,7 @@ function buildParams( return params; } -type ClampedThinkingLevel = Exclude; +type ClampedThinkingLevel = Exclude; function isGemini3ProModel(model: Model<"google-generative-ai">): boolean { return /gemini-3(?:\.\d+)?-pro/.test(model.id.toLowerCase()); diff --git a/src/llm/providers/google.ts b/src/llm/providers/google.ts index 43769dba81cf..ff4c98593039 100644 --- a/src/llm/providers/google.ts +++ b/src/llm/providers/google.ts @@ -119,7 +119,9 @@ export const streamSimpleGoogle: StreamFunction<"google-generative-ai", SimpleSt } const clampedReasoning = clampThinkingLevel(model, options.reasoning); - const effort = (clampedReasoning === "off" ? "high" : clampedReasoning) as ClampedThinkingLevel; + const effort = ( + clampedReasoning === "off" || clampedReasoning === "max" ? "high" : clampedReasoning + ) as ClampedThinkingLevel; const googleModel = model; if ( @@ -225,7 +227,7 @@ function buildParams( return params; } -type ClampedThinkingLevel = Exclude; +type ClampedThinkingLevel = Exclude; function isGemma4Model(model: Model<"google-generative-ai">): boolean { return /gemma-?4/.test(model.id.toLowerCase()); diff --git a/src/llm/providers/openai-codex-responses.ts b/src/llm/providers/openai-codex-responses.ts index 28597d75ced0..75e1153b5e1b 100644 --- a/src/llm/providers/openai-codex-responses.ts +++ b/src/llm/providers/openai-codex-responses.ts @@ -452,7 +452,12 @@ export const streamSimpleOpenAICodexResponses: StreamFunction< const clampedReasoning = options?.reasoning ? clampThinkingLevel(model, options.reasoning) : undefined; - const reasoningEffort = clampedReasoning === "off" ? undefined : clampedReasoning; + const reasoningEffort = + clampedReasoning === "off" + ? undefined + : clampedReasoning === "max" + ? "xhigh" + : clampedReasoning; return streamOpenAICodexResponses(model, context, { ...base, diff --git a/src/llm/providers/openai-completions.ts b/src/llm/providers/openai-completions.ts index c95756fca21a..c36890ea424c 100644 --- a/src/llm/providers/openai-completions.ts +++ b/src/llm/providers/openai-completions.ts @@ -467,7 +467,12 @@ export const streamSimpleOpenAICompletions: StreamFunction< const clampedReasoning = options?.reasoning ? clampThinkingLevel(model, options.reasoning) : undefined; - const reasoningEffort = clampedReasoning === "off" ? undefined : clampedReasoning; + const reasoningEffort = + clampedReasoning === "off" + ? undefined + : clampedReasoning === "max" + ? "xhigh" + : clampedReasoning; const toolChoice = (options as OpenAICompletionsOptions | undefined)?.toolChoice; return streamOpenAICompletions(model, context, { diff --git a/src/llm/providers/openai-responses.ts b/src/llm/providers/openai-responses.ts index 22ca11afb523..337f5fc7d007 100644 --- a/src/llm/providers/openai-responses.ts +++ b/src/llm/providers/openai-responses.ts @@ -179,7 +179,12 @@ export const streamSimpleOpenAIResponses: StreamFunction< const clampedReasoning = options?.reasoning ? clampThinkingLevel(model, options.reasoning) : undefined; - const reasoningEffort = clampedReasoning === "off" ? undefined : clampedReasoning; + const reasoningEffort = + clampedReasoning === "off" + ? undefined + : clampedReasoning === "max" + ? "xhigh" + : clampedReasoning; return streamOpenAIResponses(model, context, { ...base, diff --git a/src/llm/providers/simple-options.ts b/src/llm/providers/simple-options.ts index db840f9390f6..def9ef552c66 100644 --- a/src/llm/providers/simple-options.ts +++ b/src/llm/providers/simple-options.ts @@ -49,6 +49,7 @@ export function adjustMaxTokensForThinking( low: 2048, medium: 8192, high: 16384, + max: 32768, }; const budgets = { ...defaultBudgets, ...customBudgets }; diff --git a/src/llm/types.ts b/src/llm/types.ts index 34779958a42c..cf3be032eadc 100644 --- a/src/llm/types.ts +++ b/src/llm/types.ts @@ -23,7 +23,7 @@ export type KnownImagesProvider = "openrouter"; export type ImagesProvider = string; -export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh"; +export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh" | "max"; export type ModelThinkingLevel = "off" | ThinkingLevel; export type ThinkingLevelMap = Partial>; @@ -33,6 +33,7 @@ export interface ThinkingBudgets { low?: number; medium?: number; high?: number; + max?: number; } // Base options all providers share diff --git a/src/plugin-sdk/provider-model-shared.test.ts b/src/plugin-sdk/provider-model-shared.test.ts index caf9a4d9b1f6..4991eab1052e 100644 --- a/src/plugin-sdk/provider-model-shared.test.ts +++ b/src/plugin-sdk/provider-model-shared.test.ts @@ -279,6 +279,14 @@ describe("buildProviderReplayFamilyHooks", () => { }); describe("resolveClaudeThinkingProfile", () => { + it("leaves Opus 4.8 thinking off by default with xhigh/adaptive/max options", () => { + const profile = resolveClaudeThinkingProfile("claude-opus-4-8"); + expectFields(profile, { + defaultLevel: "off", + }); + expectLevelIdsInclude(profile, ["xhigh", "adaptive", "max"]); + }); + it("exposes Opus 4.7 thinking levels for direct and proxied Claude providers", () => { const directProfile = resolveClaudeThinkingProfile("claude-opus-4-7"); expectFields(directProfile, { diff --git a/src/plugin-sdk/provider-model-shared.ts b/src/plugin-sdk/provider-model-shared.ts index 343a4e9ad9ad..57ea23cc6102 100644 --- a/src/plugin-sdk/provider-model-shared.ts +++ b/src/plugin-sdk/provider-model-shared.ts @@ -94,6 +94,7 @@ export { } from "../plugins/provider-model-helpers.js"; import { normalizeOptionalLowercaseString } from "../shared/string-coerce.js"; +const CLAUDE_OPUS_48_MODEL_PREFIXES = ["claude-opus-4-8", "claude-opus-4.8"] as const; const CLAUDE_OPUS_47_MODEL_PREFIXES = ["claude-opus-4-7", "claude-opus-4.7"] as const; const CLAUDE_ADAPTIVE_THINKING_DEFAULT_MODEL_PREFIXES = [ "claude-opus-4-6", @@ -135,6 +136,10 @@ function isClaudeOpus47ModelId(modelId: string): boolean { return matchesClaudeModelPrefix(modelId, CLAUDE_OPUS_47_MODEL_PREFIXES); } +function isClaudeOpus48ModelId(modelId: string): boolean { + return matchesClaudeModelPrefix(modelId, CLAUDE_OPUS_48_MODEL_PREFIXES); +} + /** @deprecated Anthropic provider-owned model helper; do not use from third-party plugins. */ export function isClaudeAdaptiveThinkingDefaultModelId(modelId: string): boolean { return matchesClaudeModelPrefix(modelId, CLAUDE_ADAPTIVE_THINKING_DEFAULT_MODEL_PREFIXES); @@ -142,6 +147,12 @@ export function isClaudeAdaptiveThinkingDefaultModelId(modelId: string): boolean /** @deprecated Anthropic provider-owned model helper; do not use from third-party plugins. */ export function resolveClaudeThinkingProfile(modelId: string): ProviderThinkingProfile { + if (isClaudeOpus48ModelId(modelId)) { + return { + levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }], + defaultLevel: "off", + }; + } if (isClaudeOpus47ModelId(modelId)) { return { levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }], diff --git a/test/scripts/package-acceptance-workflow.test.ts b/test/scripts/package-acceptance-workflow.test.ts index 02e6096f4d05..54ec3f971f54 100644 --- a/test/scripts/package-acceptance-workflow.test.ts +++ b/test/scripts/package-acceptance-workflow.test.ts @@ -577,7 +577,7 @@ describe("package artifact reuse", () => { expect(workflow).toContain("suite_id: native-live-src-gateway-profiles-anthropic-opus"); expect(workflow).toContain("suite_id: native-live-src-gateway-profiles-anthropic-sonnet-haiku"); expect(workflow).toContain("suite_group: native-live-src-gateway-profiles-anthropic"); - expect(workflow).toContain("OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-7"); + expect(workflow).toContain("OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-8"); expect(workflow).toContain("anthropic/claude-sonnet-4-6,anthropic/claude-haiku-4-5"); expect(workflow).toMatch( /suite_id: native-live-src-gateway-profiles-fireworks[\s\S]*?advisory: true/u,