feat: add Claude Opus 4.8 support (#87890)

* feat: add Claude Opus 4.8 support

* fix: omit Vertex Opus sampling overrides

* fix: preserve Opus adaptive thinking levels

* fix: clamp Anthropic max effort support

* fix: use sha256 for QA mock call ids

* fix: type Anthropic transport test model metadata

* test: update PDF model default for Opus 4.8
This commit is contained in:
Peter Steinberger
2026-05-29 06:10:42 +01:00
committed by GitHub
parent 98611e6272
commit 1188aa3b81
98 changed files with 1134 additions and 295 deletions

View File

@@ -138,7 +138,7 @@ jobs:
OPENAI_API_KEY: ${{ secrets.OPENCLAW_DOCS_I18N_OPENAI_API_KEY || secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENCLAW_CONTROL_UI_I18N_PROVIDER: ${{ secrets.ANTHROPIC_API_KEY != '' && 'anthropic' || 'openai' }}
OPENCLAW_CONTROL_UI_I18N_MODEL: ${{ secrets.ANTHROPIC_API_KEY != '' && 'claude-opus-4-7' || vars.OPENCLAW_CI_OPENAI_MODEL_BARE }}
OPENCLAW_CONTROL_UI_I18N_MODEL: ${{ secrets.ANTHROPIC_API_KEY != '' && 'claude-opus-4-8' || vars.OPENCLAW_CI_OPENAI_MODEL_BARE }}
OPENCLAW_CONTROL_UI_I18N_THINKING: low
OPENCLAW_CONTROL_UI_I18N_AUTH_OPTIONAL: "1"
LOCALE: ${{ matrix.locale }}

View File

@@ -1932,7 +1932,7 @@ jobs:
- suite_id: native-live-src-gateway-profiles-anthropic-opus
suite_group: native-live-src-gateway-profiles-anthropic
label: Native live gateway profiles Anthropic Opus
command: OPENCLAW_LIVE_GATEWAY_THINKING=low OPENCLAW_LIVE_GATEWAY_PROVIDERS=anthropic OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-7 node .release-harness/scripts/test-live-shard.mjs native-live-src-gateway-profiles
command: OPENCLAW_LIVE_GATEWAY_THINKING=low OPENCLAW_LIVE_GATEWAY_PROVIDERS=anthropic OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-8 node .release-harness/scripts/test-live-shard.mjs native-live-src-gateway-profiles
timeout_minutes: 30
profile_env_only: false
advisory: true

View File

@@ -813,7 +813,7 @@ jobs:
alt_model="openai/gpt-5.5-alt"
;;
baseline)
model="anthropic/claude-opus-4-7"
model="anthropic/claude-opus-4-8"
alt_model="anthropic/claude-sonnet-4-6"
;;
*)
@@ -885,7 +885,7 @@ jobs:
--candidate-summary .artifacts/qa-e2e/openai-candidate/qa-suite-summary.json \
--baseline-summary .artifacts/qa-e2e/anthropic-baseline/qa-suite-summary.json \
--candidate-label "${OPENCLAW_CI_OPENAI_MODEL}" \
--baseline-label anthropic/claude-opus-4-7 \
--baseline-label anthropic/claude-opus-4-8 \
--output-dir .artifacts/qa-e2e/parity
- name: Upload parity artifacts

View File

@@ -199,13 +199,13 @@ jobs:
--alt-model openai/gpt-5.5-alt \
--output-dir .artifacts/qa-e2e/openai-candidate
- name: Run Opus 4.7 lane
- name: Run Opus 4.8 lane
run: |
pnpm openclaw qa suite \
--provider-mode mock-openai \
--parity-pack agentic \
--concurrency "${QA_PARITY_CONCURRENCY}" \
--model anthropic/claude-opus-4-7 \
--model anthropic/claude-opus-4-8 \
--alt-model anthropic/claude-sonnet-4-6 \
--output-dir .artifacts/qa-e2e/anthropic-baseline
@@ -216,7 +216,7 @@ jobs:
--candidate-summary .artifacts/qa-e2e/openai-candidate/qa-suite-summary.json \
--baseline-summary .artifacts/qa-e2e/anthropic-baseline/qa-suite-summary.json \
--candidate-label "${OPENCLAW_CI_OPENAI_MODEL}" \
--baseline-label anthropic/claude-opus-4-7 \
--baseline-label anthropic/claude-opus-4-8 \
--output-dir .artifacts/qa-e2e/parity
- name: Upload parity artifacts

View File

@@ -157,8 +157,8 @@ order and tells you what it chose:
- existing explicit model, if already configured
- `OPENAI_API_KEY` -> `openai/gpt-5.5`
- `ANTHROPIC_API_KEY` -> `anthropic/claude-opus-4-7`
- Claude Code CLI -> `claude-cli/claude-opus-4-7`
- `ANTHROPIC_API_KEY` -> `anthropic/claude-opus-4-8`
- Claude Code CLI -> `claude-cli/claude-opus-4-8`
- Codex -> `openai/gpt-5.5` through the Codex app-server harness
If none are available, setup still writes the default workspace and leaves the
@@ -173,7 +173,7 @@ planner turn through OpenClaw's normal runtime paths. It first uses the
configured OpenClaw model. If no configured model is usable yet, it can fall
back to local runtimes already present on the machine:
- Claude Code CLI: `claude-cli/claude-opus-4-7`
- Claude Code CLI: `claude-cli/claude-opus-4-8`
- Codex app-server harness: `openai/gpt-5.5`
The model-assisted planner cannot mutate config directly. It must translate the

View File

@@ -35,7 +35,7 @@ There are two runtime families:
is the built-in `openclaw` runtime plus registered plugin harnesses such as
`codex` and `copilot`.
- **CLI backends** run a local CLI process while keeping the model ref
canonical. For example, `anthropic/claude-opus-4-7` with
canonical. For example, `anthropic/claude-opus-4-8` with
a model-scoped `agentRuntime.id: "claude-cli"` means "select the Anthropic
model, execute through Claude CLI." `claude-cli` is not an embedded harness id
and must not be passed to AgentHarness selection.
@@ -174,9 +174,9 @@ Claude CLI form is:
{
agents: {
defaults: {
model: "anthropic/claude-opus-4-7",
model: "anthropic/claude-opus-4-8",
models: {
"anthropic/claude-opus-4-7": {
"anthropic/claude-opus-4-8": {
agentRuntime: { id: "claude-cli" },
},
},

View File

@@ -116,7 +116,7 @@ Official provider plugins publish their own model catalog rows. These providers
- CLI: `openclaw onboard --auth-choice apiKey`
- Direct public Anthropic requests support the shared `/fast` toggle and `params.fastMode`, including API-key and OAuth-authenticated traffic sent to `api.anthropic.com`; OpenClaw maps that to Anthropic `service_tier` (`auto` vs `standard_only`)
- Preferred Claude CLI config keeps the model ref canonical and selects the CLI
backend separately: `anthropic/claude-opus-4-7` with
backend separately: `anthropic/claude-opus-4-8` with
model-scoped `agentRuntime.id: "claude-cli"`. Legacy
`claude-cli/claude-opus-4-7` refs still work for compatibility.

View File

@@ -889,13 +889,13 @@ pnpm openclaw qa character-eval \
--model openai/gpt-5.5,thinking=medium,fast \
--model openai/gpt-5.2,thinking=xhigh \
--model openai/gpt-5,thinking=xhigh \
--model anthropic/claude-opus-4-7,thinking=high \
--model anthropic/claude-opus-4-8,thinking=high \
--model anthropic/claude-sonnet-4-6,thinking=high \
--model zai/glm-5.1,thinking=high \
--model moonshot/kimi-k2.5,thinking=high \
--model google/gemini-3.1-pro-preview,thinking=high \
--judge-model openai/gpt-5.5,thinking=xhigh,fast \
--judge-model anthropic/claude-opus-4-7,thinking=high \
--judge-model anthropic/claude-opus-4-8,thinking=high \
--blind-judge-models \
--concurrency 16 \
--judge-concurrency 16
@@ -926,13 +926,13 @@ Candidate and judge model runs both default to concurrency 16. Lower
`--concurrency` or `--judge-concurrency` when provider limits or local gateway
pressure make a run too noisy.
When no candidate `--model` is passed, the character eval defaults to
`openai/gpt-5.5`, `openai/gpt-5.2`, `openai/gpt-5`, `anthropic/claude-opus-4-7`,
`openai/gpt-5.5`, `openai/gpt-5.2`, `openai/gpt-5`, `anthropic/claude-opus-4-8`,
`anthropic/claude-sonnet-4-6`, `zai/glm-5.1`,
`moonshot/kimi-k2.5`, and
`google/gemini-3.1-pro-preview` when no `--model` is passed.
When no `--judge-model` is passed, the judges default to
`openai/gpt-5.5,thinking=xhigh,fast` and
`anthropic/claude-opus-4-7,thinking=high`.
`anthropic/claude-opus-4-8,thinking=high`.
## Related docs

View File

@@ -334,7 +334,7 @@ Higher values preserve more visual detail.
Image-tool compression/detail preference for images loaded from file paths, URLs, and media references.
Default: `auto`.
OpenClaw adapts the resize ladder to the selected image model. For example, Claude Opus 4.7, OpenAI GPT-5.5, Qwen VL, and hosted Llama 4 vision models can use larger images than older/default high-detail vision paths, while multi-image turns are compressed more aggressively in `auto` mode to control token and latency cost.
OpenClaw adapts the resize ladder to the selected image model. For example, Claude Opus 4.8, OpenAI GPT-5.5, Qwen VL, and hosted Llama 4 vision models can use larger images than older/default high-detail vision paths, while multi-image turns are compressed more aggressively in `auto` mode to control token and latency cost.
Values:
@@ -483,7 +483,7 @@ Time format in system prompt. Default: `auto` (OS preference).
defaults: {
model: "openai/gpt-5.5",
models: {
"anthropic/claude-opus-4-7": {
"anthropic/claude-opus-4-8": {
agentRuntime: { id: "claude-cli" },
},
"vllm/*": {
@@ -501,7 +501,7 @@ Time format in system prompt. Default: `auto` (OS preference).
- Runtime precedence is exact model policy first (`agents.list[].models["provider/model"]`, `agents.defaults.models["provider/model"]`, or `models.providers.<provider>.models[]`), then `agents.list[]` / `agents.defaults.models["provider/*"]`, then provider-wide policy at `models.providers.<provider>.agentRuntime`.
- Whole-agent runtime keys are legacy. `agents.defaults.agentRuntime`, `agents.list[].agentRuntime`, session runtime pins, and `OPENCLAW_AGENT_RUNTIME` are ignored by runtime selection. Run `openclaw doctor --fix` to remove stale values.
- OpenAI agent models use the Codex harness by default; provider/model `agentRuntime.id: "codex"` remains valid when you want to make that explicit.
- For Claude CLI deployments, prefer `model: "anthropic/claude-opus-4-7"` plus model-scoped `agentRuntime.id: "claude-cli"`. Legacy `claude-cli/claude-opus-4-7` model refs still work for compatibility, but new config should keep provider/model selection canonical and put the execution backend in provider/model runtime policy.
- For Claude CLI deployments, prefer `model: "anthropic/claude-opus-4-8"` plus model-scoped `agentRuntime.id: "claude-cli"`. Legacy `claude-cli/claude-opus-4-7` model refs still work for compatibility, but new config should keep provider/model selection canonical and put the execution backend in provider/model runtime policy.
- This only controls text agent-turn execution. Media generation, vision, PDF, music, video, and TTS still use their provider/model settings.
**Built-in alias shorthands** (only apply when the model is in `agents.defaults.models`):
@@ -521,7 +521,7 @@ Your configured aliases always win over defaults.
Z.AI GLM-4.x models automatically enable thinking mode unless you set `--thinking off` or define `agents.defaults.models["zai/<model>"].params.thinking` yourself.
Z.AI models enable `tool_stream` by default for tool call streaming. Set `agents.defaults.models["zai/<model>"].params.tool_stream` to `false` to disable it.
Anthropic Claude 4.6 models default to `adaptive` thinking when no explicit thinking level is set.
Anthropic Claude Opus 4.8 keeps thinking off by default in OpenClaw; when adaptive thinking is explicitly enabled, Anthropic's provider-owned effort default is `high`. Claude 4.6 models default to `adaptive` when no explicit thinking level is set.
### `agents.defaults.cliBackends`

View File

@@ -282,7 +282,7 @@ troubleshooting, see the main [FAQ](/help/faq).
<Accordion title="Are opus / sonnet / gpt built-in shortcuts?">
Yes. OpenClaw ships a few default shorthands (only applied when the model exists in `agents.defaults.models`):
- `opus` → `anthropic/claude-opus-4-7`
- `opus` → `anthropic/claude-opus-4-8`
- `sonnet` → `anthropic/claude-sonnet-4-6`
- `gpt` → `openai/gpt-5.4`
- `gpt-mini` → `openai/gpt-5.4-mini`

View File

@@ -238,9 +238,9 @@ model entry:
{
"agents": {
"defaults": {
"model": "anthropic/claude-opus-4-7",
"model": "anthropic/claude-opus-4-8",
"models": {
"anthropic/claude-opus-4-7": {
"anthropic/claude-opus-4-8": {
"agentRuntime": {
"id": "claude-cli"
}

View File

@@ -61,7 +61,7 @@ Anthropic's current public docs:
```json5
{
env: { ANTHROPIC_API_KEY: "example-anthropic-key-not-real" },
agents: { defaults: { model: { primary: "anthropic/claude-opus-4-6" } } },
agents: { defaults: { model: { primary: "anthropic/claude-opus-4-8" } } },
}
```
@@ -113,9 +113,9 @@ Anthropic's current public docs:
{
agents: {
defaults: {
model: { primary: "anthropic/claude-opus-4-7" },
model: { primary: "anthropic/claude-opus-4-8" },
models: {
"anthropic/claude-opus-4-7": {
"anthropic/claude-opus-4-8": {
agentRuntime: { id: "claude-cli" },
},
},
@@ -135,9 +135,9 @@ Anthropic's current public docs:
</Tab>
</Tabs>
## Thinking defaults (Claude 4.6)
## Thinking defaults (Claude 4.8 and 4.6)
Claude 4.6 models default to `adaptive` thinking in OpenClaw when no explicit thinking level is set.
Claude Opus 4.8 keeps thinking off by default in OpenClaw. When you explicitly enable adaptive thinking with `/think high|xhigh|max`, OpenClaw sends Anthropic's Opus 4.8 effort values; Claude 4.6 models default to `adaptive`.
Override per-message with `/think:<level>` or in model params:
@@ -146,8 +146,8 @@ Override per-message with `/think:<level>` or in model params:
agents: {
defaults: {
models: {
"anthropic/claude-opus-4-6": {
params: { thinking: "adaptive" },
"anthropic/claude-opus-4-8": {
params: { thinking: "high" },
},
},
},
@@ -267,7 +267,7 @@ OpenClaw supports Anthropic's prompt caching feature for API-key auth.
| Property | Value |
| --------------- | --------------------- |
| Default model | `claude-opus-4-7` |
| Default model | `claude-opus-4-8` |
| Supported input | Images, PDF documents |
When an image or PDF is attached to a conversation, OpenClaw automatically
@@ -277,7 +277,7 @@ OpenClaw supports Anthropic's prompt caching feature for API-key auth.
<Accordion title="1M context window">
Anthropic's 1M context window is available on GA-capable Claude 4.x models
such as Opus 4.6, Opus 4.7, and Sonnet 4.6. OpenClaw sizes those models at
such as Opus 4.8, Opus 4.7, Opus 4.6, and Sonnet 4.6. OpenClaw sizes those models at
1M automatically:
```json5
@@ -308,8 +308,8 @@ OpenClaw supports Anthropic's prompt caching feature for API-key auth.
</Accordion>
<Accordion title="Claude Opus 4.7 1M context">
`anthropic/claude-opus-4-7` and its `claude-cli` variant have a 1M context
<Accordion title="Claude Opus 4.8 1M context">
`anthropic/claude-opus-4-8` and its `claude-cli` variant have a 1M context
window by default — no `params.context1m: true` needed.
</Accordion>
</AccordionGroup>

View File

@@ -205,7 +205,7 @@ override only `cacheRetention` and inherit other model defaults unchanged.
### Anthropic 1M context
OpenClaw sizes GA-capable Claude 4.x models such as Opus 4.6, Opus 4.7, and
OpenClaw sizes GA-capable Claude 4.x models such as Opus 4.8, Opus 4.7, Opus 4.6, and
Sonnet 4.6 with Anthropic's 1M context window. You do not need
`params.context1m: true` for those models.

View File

@@ -13,9 +13,9 @@ title: "Thinking levels"
- low → "think hard"
- medium → "think harder"
- high → "ultrathink" (max budget)
- xhigh → "ultrathink+" (GPT-5.2+ and Codex models, plus Anthropic Claude Opus 4.7 effort)
- adaptive → provider-managed adaptive thinking (supported for Claude 4.6 on Anthropic/Bedrock, Anthropic Claude Opus 4.7, and Google Gemini dynamic thinking)
- max → provider max reasoning (Anthropic Claude Opus 4.7; Ollama maps this to its highest native `think` effort)
- xhigh → "ultrathink+" (GPT-5.2+ and Codex models, plus Anthropic Claude Opus 4.7+ effort)
- adaptive → provider-managed adaptive thinking (supported for Claude 4.6 on Anthropic/Bedrock, Anthropic Claude Opus 4.7+, and Google Gemini dynamic thinking)
- max → provider max reasoning (Anthropic Claude Opus 4.7+; Ollama maps this to its highest native `think` effort)
- `x-high`, `x_high`, `extra-high`, `extra high`, and `extra_high` map to `xhigh`.
- `highest` maps to `high`.
- Provider notes:
@@ -23,9 +23,9 @@ title: "Thinking levels"
- `adaptive`, `xhigh`, and `max` are only advertised for provider/model profiles that support them. Typed directives for unsupported levels are rejected with that model's valid options.
- Existing stored unsupported levels are remapped by provider profile rank. `adaptive` falls back to `medium` on non-adaptive models, while `xhigh` and `max` fall back to the largest supported non-off level for the selected model.
- Anthropic Claude 4.6 models default to `adaptive` when no explicit thinking level is set.
- Anthropic Claude Opus 4.7 does not default to adaptive thinking. Its API effort default remains provider-owned unless you explicitly set a thinking level.
- Anthropic Claude Opus 4.7 maps `/think xhigh` to adaptive thinking plus `output_config.effort: "xhigh"`, because `/think` is a thinking directive and `xhigh` is the Opus 4.7 effort setting.
- Anthropic Claude Opus 4.7 also exposes `/think max`; it maps to the same provider-owned max effort path.
- Anthropic Claude Opus 4.8 and Opus 4.7 keep thinking off unless you explicitly set a thinking level. Opus 4.8's provider-owned effort default is `high` after adaptive thinking is enabled.
- Anthropic Claude Opus 4.7+ maps `/think xhigh` to adaptive thinking plus `output_config.effort: "xhigh"`, because `/think` is a thinking directive and `xhigh` is the Opus effort setting.
- Anthropic Claude Opus 4.7+ also exposes `/think max`; it maps to the same provider-owned max effort path.
- Direct DeepSeek V4 models expose `/think xhigh|max`; both map to DeepSeek `reasoning_effort: "max"` while lower non-off levels map to `high`.
- OpenRouter-routed DeepSeek V4 models expose `/think xhigh` and send OpenRouter-supported `reasoning_effort` values. Stored `max` overrides fall back to `xhigh`.
- Ollama thinking-capable models expose `/think low|medium|high|max`; `max` maps to native `think: "high"` because Ollama's native API accepts `low`, `medium`, and `high` effort strings.

View File

@@ -64,6 +64,7 @@ function adjustMaxTokensForThinking(
medium: 8192,
high: 16384,
xhigh: 16384,
max: 16384,
} as const;
const budgets = { ...defaultBudgets, ...customBudgets };
const minOutputTokens = 1024;

View File

@@ -195,6 +195,58 @@ describe("bedrock discovery", () => {
});
});
it("uses 1M context window for dotted Claude Opus 4.8 Bedrock refs", async () => {
sendMock
.mockResolvedValueOnce({
modelSummaries: [
{
modelId: "anthropic.claude-opus-4.8-v1:0",
modelName: "Claude Opus 4.8",
providerName: "anthropic",
inputModalities: ["TEXT"],
outputModalities: ["TEXT"],
responseStreamingSupported: true,
modelLifecycle: { status: "ACTIVE" },
},
],
})
.mockResolvedValueOnce({
inferenceProfileSummaries: [
{
inferenceProfileId: "us.anthropic.claude-opus-4.8-v1:0",
inferenceProfileName: "US Claude Opus 4.8",
status: "ACTIVE",
type: "SYSTEM_DEFINED",
models: [
{
modelArn:
"arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-opus-4.8-v1:0",
},
],
},
],
});
const models = await discoverBedrockModels({ region: "us-east-1", clientFactory });
expectModelFields(
models.find((model) => model.id === "anthropic.claude-opus-4.8-v1:0"),
{
contextWindow: 1_000_000,
reasoning: true,
thinkingLevelMap: { xhigh: "xhigh", max: "max" },
},
);
expectModelFields(
models.find((model) => model.id === "us.anthropic.claude-opus-4.8-v1:0"),
{
contextWindow: 1_000_000,
reasoning: true,
thinkingLevelMap: { xhigh: "xhigh", max: "max" },
},
);
});
it("caches results when refreshInterval is enabled", async () => {
mockSingleActiveSummary();

View File

@@ -46,6 +46,7 @@ const DEFAULT_MAX_TOKENS = 4096;
const KNOWN_CONTEXT_WINDOWS: Record<string, number> = {
// Anthropic Claude
"anthropic.claude-3-7-sonnet-20250219-v1:0": 200_000,
"anthropic.claude-opus-4-8": 1_000_000,
"anthropic.claude-opus-4-7": 1_000_000,
"anthropic.claude-opus-4-6-v1": 1_000_000,
"anthropic.claude-opus-4-6-v1:0": 1_000_000,
@@ -121,6 +122,9 @@ function resolveKnownContextWindow(modelId: string): number | undefined {
const stripped = modelId.replace(/^(?:us|eu|ap|apac|au|jp|global)\./, "");
const candidates = [modelId, stripped];
for (const candidate of candidates) {
if (/(?:^|[/.:])anthropic\.claude-opus-4[.-]8(?:$|[-.:/])/i.test(candidate)) {
return 1_000_000;
}
if (KNOWN_CONTEXT_WINDOWS[candidate] !== undefined) {
return KNOWN_CONTEXT_WINDOWS[candidate];
}
@@ -135,6 +139,22 @@ function resolveKnownContextWindow(modelId: string): number | undefined {
return undefined;
}
function isKnownClaudeOpus47OrNewerModelId(modelId: string): boolean {
const stripped = modelId.replace(/^(?:us|eu|ap|apac|au|jp|global)\./, "");
return [modelId, stripped].some((candidate) =>
/(?:^|[/.:])anthropic\.claude-opus-4[.-][78](?:$|[-.:/])/i.test(candidate),
);
}
function resolveKnownThinkingLevelMap(
modelId: string,
): ModelDefinitionConfig["thinkingLevelMap"] | undefined {
if (!isKnownClaudeOpus47OrNewerModelId(modelId)) {
return undefined;
}
return { xhigh: "xhigh", max: "max" };
}
const DEFAULT_COST = {
input: 0,
output: 0,
@@ -243,6 +263,9 @@ function mapInputModalities(summary: BedrockModelSummary): Array<"text" | "image
}
function inferReasoningSupport(summary: BedrockModelSummary): boolean {
if (isKnownClaudeOpus47OrNewerModelId(summary.modelId ?? "")) {
return true;
}
const haystack = normalizeLowercaseStringOrEmpty(
`${summary.modelId ?? ""} ${summary.modelName ?? ""}`,
);
@@ -301,6 +324,7 @@ function toModelDefinition(
defaults: { contextWindow: number; maxTokens: number },
): ModelDefinitionConfig {
const id = summary.modelId?.trim() ?? "";
const thinkingLevelMap = resolveKnownThinkingLevelMap(id);
return {
id,
name: summary.modelName?.trim() || id,
@@ -309,6 +333,7 @@ function toModelDefinition(
cost: DEFAULT_COST,
contextWindow: resolveKnownContextWindow(id) ?? defaults.contextWindow,
maxTokens: defaults.maxTokens,
...(thinkingLevelMap ? { thinkingLevelMap } : {}),
};
}
@@ -420,11 +445,16 @@ function resolveInferenceProfiles(
const baseModel = baseModelId
? foundationModels.get(normalizeLowercaseStringOrEmpty(baseModelId))
: undefined;
const knownThinkingLevelMap = resolveKnownThinkingLevelMap(
baseModelId ?? profile.inferenceProfileId,
);
discovered.push({
id: profile.inferenceProfileId,
name: profile.inferenceProfileName?.trim() || profile.inferenceProfileId,
reasoning: baseModel?.reasoning ?? false,
reasoning:
baseModel?.reasoning ??
isKnownClaudeOpus47OrNewerModelId(baseModelId ?? profile.inferenceProfileId),
input: baseModel?.input ?? ["text"],
cost: baseModel?.cost ?? DEFAULT_COST,
contextWindow:
@@ -432,6 +462,9 @@ function resolveInferenceProfiles(
resolveKnownContextWindow(baseModelId ?? profile.inferenceProfileId ?? "") ??
defaults.contextWindow,
maxTokens: baseModel?.maxTokens ?? defaults.maxTokens,
...(baseModel?.thinkingLevelMap || knownThinkingLevelMap
? { thinkingLevelMap: baseModel?.thinkingLevelMap ?? knownThinkingLevelMap }
: {}),
});
}
return discovered;

View File

@@ -333,6 +333,27 @@ describe("amazon-bedrock provider plugin", () => {
}
});
it("leaves Claude Opus 4.8 Bedrock model refs off by default", async () => {
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
for (const modelId of [
"us.anthropic.claude-opus-4-8",
"us.anthropic.claude-opus-4.8-v1:0",
"arn:aws:bedrock:us-west-2:123456789012:inference-profile/us.anthropic.claude-opus-4-8",
]) {
expectThinkingProfile(
provider.resolveThinkingProfile?.({
provider: "amazon-bedrock",
modelId,
} as never),
{
levelIds: ["off", "minimal", "low", "medium", "high", "xhigh", "adaptive", "max"],
defaultLevel: "off",
},
);
}
});
it("owns Anthropic-style replay policy for Claude Bedrock models", async () => {
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
@@ -432,6 +453,28 @@ describe("amazon-bedrock provider plugin", () => {
expect(result).not.toHaveProperty("temperature");
});
it("omits temperature for Bedrock Opus 4.8 model ids", async () => {
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
const wrapped = provider.wrapStreamFn?.({
provider: "amazon-bedrock",
modelId: "us.anthropic.claude-opus-4-8",
streamFn: spyStreamFn,
} as never);
const result = wrapped?.(
{
api: "bedrock-converse-stream",
provider: "amazon-bedrock",
id: "us.anthropic.claude-opus-4-8",
} as never,
{ messages: [] } as never,
{ temperature: 0.2, maxTokens: 10 },
) as Record<string, unknown> | undefined;
expectWrappedResultFields(result, { maxTokens: 10 });
expect(result).not.toHaveProperty("temperature");
});
it("omits temperature for dotted Bedrock Opus 4.7 model ids", async () => {
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
const wrapped = provider.wrapStreamFn?.({
@@ -590,6 +633,44 @@ describe("amazon-bedrock provider plugin", () => {
expect(payload.additionalModelRequestFields.output_config).toEqual({ effort: "xhigh" });
});
it("uses adaptive max thinking for Bedrock Opus 4.8", async () => {
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
const wrapped = provider.wrapStreamFn?.({
provider: "amazon-bedrock",
modelId: "us.anthropic.claude-opus-4-8",
streamFn: spyStreamFn,
thinkingLevel: "max",
} as never);
const result = wrapped?.(
{
api: "bedrock-converse-stream",
provider: "amazon-bedrock",
id: "us.anthropic.claude-opus-4-8",
name: "Claude Opus 4.8",
reasoning: true,
} as never,
{ messages: [] } as never,
{ reasoning: "max" } as never,
) as Record<string, unknown> | undefined;
const payload = {
inferenceConfig: { temperature: 0.2 },
additionalModelRequestFields: {
thinking: { type: "adaptive" },
output_config: { effort: "xhigh" },
},
};
await (result?.onPayload as ((p: Record<string, unknown>) => unknown) | undefined)?.(payload);
expect(payload.additionalModelRequestFields).toEqual({
thinking: { type: "adaptive" },
output_config: { effort: "max" },
});
expect(payload.inferenceConfig).toEqual({});
});
it("classifies nested Bedrock deprecated-temperature validation as format failover", async () => {
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);

View File

@@ -19,6 +19,26 @@ describe("amazon-bedrock provider-policy-api", () => {
expect(profile?.defaultLevel).toBe("adaptive");
});
it("leaves Bedrock Claude Opus 4.8 thinking off by default with max effort available", () => {
const profile = resolveThinkingProfile({
provider: "amazon-bedrock",
modelId:
"arn:aws:bedrock:us-west-2:123456789012:inference-profile/us.anthropic.claude-opus-4-8",
});
expect(profile?.levels.map((level) => level.id)).toEqual([
"off",
"minimal",
"low",
"medium",
"high",
"xhigh",
"adaptive",
"max",
]);
expect(profile?.defaultLevel).toBe("off");
});
it("exposes max thinking for Bedrock Claude Opus 4.7 refs", () => {
expect(
resolveThinkingProfile({

View File

@@ -13,7 +13,10 @@ import { supportsBedrockPromptCaching } from "./bedrock-options.js";
import { mergeImplicitBedrockProvider, resolveBedrockConfigApiKey } from "./discovery-shared.js";
import { bedrockMemoryEmbeddingProviderAdapter } from "./memory-embedding-adapter.js";
import { streamBedrock, streamSimpleBedrock } from "./stream.runtime.js";
import { isOpus47BedrockModelRef, resolveBedrockClaudeThinkingProfile } from "./thinking-policy.js";
import {
isOpus47OrNewerBedrockModelRef,
resolveBedrockClaudeThinkingProfile,
} from "./thinking-policy.js";
type GuardrailConfig = {
guardrailIdentifier: string;
@@ -252,7 +255,7 @@ async function resolveAppProfileTraits(
const traits = {
cacheEligible:
models.length > 0 && modelArns.every((modelArn) => resolvedModelSupportsCaching(modelArn)),
omitTemperature: modelArns.some(isOpus47BedrockModelRef),
omitTemperature: modelArns.some(isOpus47OrNewerBedrockModelRef),
};
appProfileTraitsCache.set(modelId, traits);
return traits;
@@ -261,7 +264,7 @@ async function resolveAppProfileTraits(
// return the heuristic fallback but allow retry on the next request.
return {
cacheEligible: isAnthropicBedrockModel(modelId),
omitTemperature: isOpus47BedrockModelRef(modelId),
omitTemperature: isOpus47OrNewerBedrockModelRef(modelId),
};
}
}
@@ -388,7 +391,7 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
modelId: string,
options: TOptions,
): TOptions {
if (!isOpus47BedrockModelRef(modelId) || !("temperature" in options)) {
if (!isOpus47OrNewerBedrockModelRef(modelId) || !("temperature" in options)) {
return options;
}
const next = { ...options } as typeof options & { temperature?: unknown };
@@ -513,7 +516,7 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
currentPluginConfig?.discovery?.region;
const mayNeedCacheInjection =
isBedrockAppInferenceProfile(modelId) && !sharedRuntimeWouldInjectCachePoints(modelId);
const shouldOmitTemperature = isOpus47BedrockModelRef(modelId);
const shouldOmitTemperature = isOpus47OrNewerBedrockModelRef(modelId);
const shouldPatchMaxThinking = shouldOmitTemperature && thinkingLevel === "max";
// For known Anthropic models (heuristic match), enable injection immediately.
@@ -548,7 +551,9 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
? {
onPayload: (payload: unknown, payloadModel: unknown) => {
if (payload && typeof payload === "object") {
patchOpus47MaxThinkingEffort(payload as Record<string, unknown>);
const payloadRecord = payload as Record<string, unknown>;
patchOpus47MaxThinkingEffort(payloadRecord);
omitDeprecatedOpus47PayloadTemperature(payloadRecord);
}
return originalOnPayload?.(payload, payloadModel);
},
@@ -584,7 +589,9 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
if (shouldPatchMaxThinking) {
patchOpus47MaxThinkingEffort(payloadRecord);
}
if (mayNeedTemperatureTrait) {
if (shouldOmitTemperature) {
omitDeprecatedOpus47PayloadTemperature(payloadRecord);
} else if (mayNeedTemperatureTrait) {
const traits = await resolveAppProfileTraits(modelId, region);
if (traits.omitTemperature) {
omitDeprecatedOpus47PayloadTemperature(payloadRecord);

View File

@@ -89,3 +89,29 @@ describe("Bedrock profile endpoint resolution", () => {
).toBe(false);
});
});
describe("Bedrock thinking effort mapping", () => {
it("clamps max effort for Claude models without native max support", () => {
expect(
testing.mapThinkingLevelToEffort(
bedrockModel({
id: "anthropic.claude-sonnet-4-6-v1:0",
name: "Claude Sonnet 4.6",
}),
"max",
),
).toBe("high");
});
it("preserves max effort for Claude Opus 4.8", () => {
expect(
testing.mapThinkingLevelToEffort(
bedrockModel({
id: "anthropic.claude-opus-4.8-v1:0",
name: "Claude Opus 4.8",
}),
"max",
),
).toBe("max");
});
});

View File

@@ -473,13 +473,17 @@ function getModelMatchCandidates(modelId: string, modelName?: string): string[]
function supportsAdaptiveThinking(modelId: string, modelName?: string): boolean {
const candidates = getModelMatchCandidates(modelId, modelName);
return candidates.some(
(s) => s.includes("opus-4-6") || s.includes("opus-4-7") || s.includes("sonnet-4-6"),
(s) =>
s.includes("opus-4-6") ||
s.includes("opus-4-7") ||
s.includes("opus-4-8") ||
s.includes("sonnet-4-6"),
);
}
function supportsNativeXhighEffort(model: Model<"bedrock-converse-stream">): boolean {
const candidates = getModelMatchCandidates(model.id, model.name);
return candidates.some((s) => s.includes("opus-4-7"));
return candidates.some((s) => s.includes("opus-4-7") || s.includes("opus-4-8"));
}
function mapThinkingLevelToEffort(
@@ -503,6 +507,8 @@ function mapThinkingLevelToEffort(
return "medium";
case "high":
return "high";
case "max":
return supportsNativeXhighEffort(model) ? "max" : "high";
default:
return "high";
}
@@ -887,6 +893,7 @@ function buildAdditionalModelRequestFields(
medium: 8192,
high: 16384,
xhigh: 16384, // Claude doesn't support xhigh, clamp to high
max: 16384,
};
// Custom budgets override defaults (xhigh not in ThinkingBudgets, use high)
@@ -945,5 +952,6 @@ export const testing = {
convertMessages,
getConfiguredBedrockRegion,
hasConfiguredBedrockProfile,
mapThinkingLevelToEffort,
shouldUseExplicitBedrockEndpoint,
};

View File

@@ -8,14 +8,30 @@ const BASE_CLAUDE_THINKING_LEVELS = [
{ id: "high" },
] as const satisfies ProviderThinkingProfile["levels"];
function isOpus48BedrockModelRef(modelRef: string): boolean {
return /(?:^|[/.:])(?:(?:us|eu|ap|apac|au|jp|global)\.)?anthropic\.claude-opus-4[.-]8(?:$|[-.:/])/i.test(
modelRef,
);
}
export function isOpus47BedrockModelRef(modelRef: string): boolean {
return /(?:^|[/.:])(?:(?:us|eu|ap|apac|au|jp|global)\.)?anthropic\.claude-opus-4[.-]7(?:$|[-.:/])/i.test(
modelRef,
);
}
export function isOpus47OrNewerBedrockModelRef(modelRef: string): boolean {
return isOpus47BedrockModelRef(modelRef) || isOpus48BedrockModelRef(modelRef);
}
export function resolveBedrockClaudeThinkingProfile(modelId: string): ProviderThinkingProfile {
const trimmed = modelId.trim();
if (isOpus48BedrockModelRef(trimmed)) {
return {
levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }],
defaultLevel: "off",
};
}
if (isOpus47BedrockModelRef(trimmed)) {
return {
levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }],

View File

@@ -77,9 +77,14 @@ describe("anthropic-vertex provider plugin", () => {
expect(result.provider.baseUrl).toBe("https://europe-west4-aiplatform.googleapis.com");
expect(result.provider.headers).toEqual({ "x-test-header": "1" });
expect(result.provider.models.map((model) => model.id)).toEqual([
"claude-opus-4-8",
"claude-opus-4-6",
"claude-sonnet-4-6",
]);
expect(result.provider.models[0]?.thinkingLevelMap).toEqual({
xhigh: "xhigh",
max: "max",
});
});
it("owns Anthropic-style replay policy", async () => {
@@ -103,6 +108,18 @@ describe("anthropic-vertex provider plugin", () => {
});
});
it("owns Anthropic-style thinking policy", async () => {
const provider = await registerSingleProviderPlugin(anthropicVertexPlugin);
const opus48Profile = provider.resolveThinkingProfile?.({
provider: "anthropic-vertex",
modelId: "claude-opus-4-8",
} as never);
expect(opus48Profile?.defaultLevel).toBe("off");
expect(opus48Profile?.levels.map((level) => level.id)).toContain("max");
});
it("resolves synthetic auth when ADC is available", async () => {
hasAnthropicVertexAvailableAuthMock.mockReturnValue(true);
const provider = await registerSingleProviderPlugin(anthropicVertexPlugin);

View File

@@ -1,6 +1,9 @@
import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
import { readConfiguredProviderCatalogEntries } from "openclaw/plugin-sdk/provider-catalog-shared";
import { NATIVE_ANTHROPIC_REPLAY_HOOKS } from "openclaw/plugin-sdk/provider-model-shared";
import {
NATIVE_ANTHROPIC_REPLAY_HOOKS,
resolveClaudeThinkingProfile,
} from "openclaw/plugin-sdk/provider-model-shared";
import {
hasAnthropicVertexAvailableAuth,
mergeImplicitAnthropicVertexProvider,
@@ -40,6 +43,7 @@ export default definePluginEntry({
},
resolveConfigApiKey: ({ env }) => resolveAnthropicVertexConfigApiKey(env),
...NATIVE_ANTHROPIC_REPLAY_HOOKS,
resolveThinkingProfile: ({ modelId }) => resolveClaudeThinkingProfile(modelId),
resolveSyntheticAuth: () => {
if (!hasAnthropicVertexAvailableAuth()) {
return undefined;

View File

@@ -15,6 +15,7 @@ function buildAnthropicVertexModel(params: {
input: ModelDefinitionConfig["input"];
cost: ModelDefinitionConfig["cost"];
maxTokens: number;
thinkingLevelMap?: ModelDefinitionConfig["thinkingLevelMap"];
}): ModelDefinitionConfig {
return {
id: params.id,
@@ -24,11 +25,21 @@ function buildAnthropicVertexModel(params: {
cost: params.cost,
contextWindow: ANTHROPIC_VERTEX_DEFAULT_CONTEXT_WINDOW,
maxTokens: params.maxTokens,
...(params.thinkingLevelMap ? { thinkingLevelMap: params.thinkingLevelMap } : {}),
};
}
function buildAnthropicVertexCatalog(): ModelDefinitionConfig[] {
return [
buildAnthropicVertexModel({
id: "claude-opus-4-8",
name: "Claude Opus 4.8",
reasoning: true,
input: ["text", "image"],
cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 },
maxTokens: 128000,
thinkingLevelMap: { xhigh: "xhigh", max: "max" },
}),
buildAnthropicVertexModel({
id: "claude-opus-4-6",
name: "Claude Opus 4.6",

View File

@@ -0,0 +1,29 @@
import { describe, expect, it } from "vitest";
import { resolveThinkingProfile } from "./provider-policy-api.js";
describe("anthropic-vertex provider-policy-api", () => {
it("leaves Claude Opus 4.8 thinking off by default with max effort support", () => {
const profile = resolveThinkingProfile({
provider: "anthropic-vertex",
modelId: "claude-opus-4-8",
});
expect(profile?.defaultLevel).toBe("off");
expect(profile?.levels.map((level) => level.id)).toContain("max");
});
it("keeps Claude Opus 4.7 thinking off by default", () => {
const profile = resolveThinkingProfile({
provider: "anthropic-vertex",
modelId: "claude-opus-4-7",
});
expect(profile?.defaultLevel).toBe("off");
});
it("ignores other providers", () => {
expect(resolveThinkingProfile({ provider: "anthropic", modelId: "claude-opus-4-8" })).toBe(
null,
);
});
});

View File

@@ -0,0 +1,8 @@
import { resolveClaudeThinkingProfile } from "openclaw/plugin-sdk/provider-model-shared";
export function resolveThinkingProfile(params: { provider: string; modelId: string }) {
if (params.provider.trim().toLowerCase() !== "anthropic-vertex") {
return null;
}
return resolveClaudeThinkingProfile(params.modelId);
}

View File

@@ -170,6 +170,30 @@ describe("createAnthropicVertexStreamFn", () => {
expect(streamTransportOptions(streamAnthropicMock).maxTokens).toBe(128000);
});
it.each(["claude-opus-4-8", "claude-opus-4-7"])(
"omits unsupported temperature for %s",
(modelId) => {
const { deps, streamAnthropicMock } = createStreamDeps();
const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps);
const model = makeModel({ id: modelId, maxTokens: 128000 });
void streamFn(model, { messages: [] }, { temperature: 0.7 });
const transportOptions = streamTransportOptions(streamAnthropicMock);
expect(Object.hasOwn(transportOptions, "temperature")).toBe(false);
},
);
it("preserves temperature for Vertex models that support custom sampling", () => {
const { deps, streamAnthropicMock } = createStreamDeps();
const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps);
const model = makeModel({ id: "claude-sonnet-4-6", maxTokens: 128000 });
void streamFn(model, { messages: [] }, { temperature: 0.7 });
expect(streamTransportOptions(streamAnthropicMock).temperature).toBe(0.7);
});
it("maps xhigh reasoning to max effort for adaptive Opus models", () => {
const { deps, streamAnthropicMock } = createStreamDeps();
const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps);
@@ -182,10 +206,10 @@ describe("createAnthropicVertexStreamFn", () => {
expect(transportOptions.effort).toBe("max");
});
it("maps xhigh reasoning to xhigh effort for Opus 4.7", () => {
it("maps xhigh reasoning to xhigh effort for Opus 4.8", () => {
const { deps, streamAnthropicMock } = createStreamDeps();
const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps);
const model = makeModel({ id: "claude-opus-4-7", maxTokens: 64000 });
const model = makeModel({ id: "claude-opus-4-8", maxTokens: 128000 });
void streamFn(model, { messages: [] }, { reasoning: "xhigh" });
@@ -194,6 +218,30 @@ describe("createAnthropicVertexStreamFn", () => {
expect(transportOptions.effort).toBe("xhigh");
});
it("preserves max reasoning for Opus 4.8", () => {
const { deps, streamAnthropicMock } = createStreamDeps();
const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps);
const model = makeModel({ id: "claude-opus-4-8", maxTokens: 128000 });
void streamFn(model, { messages: [] }, { reasoning: "max" });
const transportOptions = streamTransportOptions(streamAnthropicMock);
expect(transportOptions.thinkingEnabled).toBe(true);
expect(transportOptions.effort).toBe("max");
});
it("clamps max reasoning for adaptive models without native max support", () => {
const { deps, streamAnthropicMock } = createStreamDeps();
const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps);
const model = makeModel({ id: "claude-sonnet-4-6", maxTokens: 128000 });
void streamFn(model, { messages: [] }, { reasoning: "max" });
const transportOptions = streamTransportOptions(streamAnthropicMock);
expect(transportOptions.thinkingEnabled).toBe(true);
expect(transportOptions.effort).toBe("high");
});
it("applies Anthropic cache-boundary shaping before forwarding payload hooks", async () => {
const { deps, streamAnthropicMock } = createStreamDeps();
const onPayload = vi.fn(async (payload: unknown) => payload);

View File

@@ -36,8 +36,13 @@ const defaultAnthropicVertexStreamDeps: AnthropicVertexStreamDeps = {
streamAnthropic: streamDefault,
};
function isClaudeOpus47Model(modelId: string): boolean {
return modelId.includes("opus-4-7") || modelId.includes("opus-4.7");
function isClaudeOpus47OrNewerModel(modelId: string): boolean {
return (
modelId.includes("opus-4-8") ||
modelId.includes("opus-4.8") ||
modelId.includes("opus-4-7") ||
modelId.includes("opus-4.7")
);
}
function isClaudeOpus46Model(modelId: string): boolean {
@@ -46,7 +51,7 @@ function isClaudeOpus46Model(modelId: string): boolean {
function supportsAdaptiveThinking(modelId: string): boolean {
return (
isClaudeOpus47Model(modelId) ||
isClaudeOpus47OrNewerModel(modelId) ||
isClaudeOpus46Model(modelId) ||
modelId.includes("sonnet-4-6") ||
modelId.includes("sonnet-4.6")
@@ -62,7 +67,12 @@ function mapAnthropicAdaptiveEffort(
low: "low",
medium: "medium",
high: "high",
xhigh: isClaudeOpus47Model(modelId) ? "xhigh" : isClaudeOpus46Model(modelId) ? "max" : "high",
xhigh: isClaudeOpus47OrNewerModel(modelId)
? "xhigh"
: isClaudeOpus46Model(modelId)
? "max"
: "high",
max: isClaudeOpus47OrNewerModel(modelId) ? "max" : "high",
};
return effortMap[reasoning] ?? "high";
}
@@ -148,9 +158,10 @@ export function createAnthropicVertexStreamFn(
modelMaxTokens: transportModel.maxTokens,
requestedMaxTokens: options?.maxTokens,
});
const temperature = isClaudeOpus47OrNewerModel(model.id) ? undefined : options?.temperature;
const opts: AnthropicVertexTransportOptions = {
client,
temperature: options?.temperature,
...(temperature !== undefined ? { temperature } : {}),
...(maxTokens !== undefined ? { maxTokens } : {}),
signal: options?.signal,
cacheRetention: options?.cacheRetention,

View File

@@ -2,7 +2,7 @@ import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/string-coer
import { CLAUDE_CLI_BACKEND_ID, CLAUDE_CLI_MODEL_ALIASES } from "./cli-constants.js";
const DEFAULT_CLAUDE_MODEL_BY_FAMILY: Record<string, string> = {
opus: "claude-opus-4-7",
opus: "claude-opus-4-8",
sonnet: "claude-sonnet-4-6",
haiku: "claude-haiku-4-5",
};
@@ -96,18 +96,16 @@ function canonicalizeKnownClaudeCliModelId(modelId: string): string | null {
if (defaultModel) {
return attachModelAuthProfile(defaultModel, split.profile);
}
const family = CLAUDE_CLI_MODEL_ALIASES[normalized];
if (!family) {
return null;
}
const version = normalized.slice(`${family}-`.length);
if (!version || version === normalized) {
return null;
}
return attachModelAuthProfile(`claude-${family}-${version.replaceAll(".", "-")}`, split.profile);
const aliasedModel = CLAUDE_CLI_MODEL_ALIASES[normalized];
return aliasedModel?.startsWith("claude-")
? attachModelAuthProfile(aliasedModel, split.profile)
: null;
}
function upgradeOldClaudeModelId(normalized: string): string | null {
if (normalized.startsWith("claude-opus-4-8") || normalized.startsWith("claude-opus-4.8")) {
return null;
}
if (normalized.startsWith("claude-opus-4-7") || normalized.startsWith("claude-opus-4.7")) {
return null;
}
@@ -124,6 +122,8 @@ function upgradeOldClaudeModelId(normalized: string): string | null {
if (
normalized === "claude-opus-4" ||
hasAnyRetiredVersionPrefix(normalized, [
"claude-opus-4-7",
"claude-opus-4.7",
"claude-opus-4-5",
"claude-opus-4.5",
"claude-opus-4-1",
@@ -133,7 +133,7 @@ function upgradeOldClaudeModelId(normalized: string): string | null {
]) ||
/^claude-opus-4-20\d{6}/.test(normalized)
) {
return "claude-opus-4-7";
return "claude-opus-4-8";
}
if (
normalized === "claude-sonnet-4" ||
@@ -150,7 +150,7 @@ function upgradeOldClaudeModelId(normalized: string): string | null {
return "claude-sonnet-4-6";
}
if (normalized.startsWith("claude-3") && normalized.includes("opus")) {
return "claude-opus-4-7";
return "claude-opus-4-8";
}
if (
normalized.startsWith("claude-3") &&
@@ -164,7 +164,7 @@ function upgradeOldClaudeModelId(normalized: string): string | null {
normalized === "opus-4" ||
normalized === "opus-3"
) {
return "claude-opus-4-7";
return "claude-opus-4-8";
}
if (
normalized === "sonnet-4.5" ||

View File

@@ -5,13 +5,14 @@ import { CLAUDE_CLI_BACKEND_ID, CLAUDE_CLI_DEFAULT_ALLOWLIST_REFS } from "./cli-
const CLAUDE_CLI_DEFAULT_CONTEXT_WINDOW = 200_000;
const CLAUDE_CLI_MODEL_LABELS: Record<string, string> = {
"claude-opus-4-8": "Claude Opus 4.8 (Claude CLI)",
"claude-opus-4-7": "Claude Opus 4.7 (Claude CLI)",
"claude-opus-4-6": "Claude Opus 4.6 (Claude CLI)",
"claude-sonnet-4-6": "Claude Sonnet 4.6 (Claude CLI)",
};
function resolveClaudeCliImageMediaInput(id: string): ModelCatalogEntry["mediaInput"] {
const maxSidePx = id === "claude-opus-4-7" ? 2576 : 1568;
const maxSidePx = id === "claude-opus-4-8" || id === "claude-opus-4-7" ? 2576 : 1568;
return {
image: {
maxSidePx,
@@ -39,13 +40,15 @@ function extractClaudeCliModelIds(): string[] {
}
export function buildClaudeCliCatalogEntries(): ModelCatalogEntry[] {
return extractClaudeCliModelIds().map((id) => ({
id,
name: CLAUDE_CLI_MODEL_LABELS[id] ?? `${id} (Claude CLI)`,
provider: CLAUDE_CLI_BACKEND_ID,
reasoning: true,
input: ["text", "image"],
mediaInput: resolveClaudeCliImageMediaInput(id),
contextWindow: CLAUDE_CLI_DEFAULT_CONTEXT_WINDOW,
}));
return extractClaudeCliModelIds().map((id) => {
return {
id,
name: CLAUDE_CLI_MODEL_LABELS[id] ?? `${id} (Claude CLI)`,
provider: CLAUDE_CLI_BACKEND_ID,
reasoning: true,
input: ["text", "image"],
mediaInput: resolveClaudeCliImageMediaInput(id),
contextWindow: id === "claude-opus-4-8" ? 1_048_576 : CLAUDE_CLI_DEFAULT_CONTEXT_WINDOW,
};
});
}

View File

@@ -1,20 +1,23 @@
export const CLAUDE_CLI_BACKEND_ID = "claude-cli";
export const CLAUDE_CLI_DEFAULT_MODEL_REF = `${CLAUDE_CLI_BACKEND_ID}/claude-opus-4-7`;
export const CLAUDE_CLI_DEFAULT_MODEL_REF = `${CLAUDE_CLI_BACKEND_ID}/claude-opus-4-8`;
export const CLAUDE_CLI_DEFAULT_ALLOWLIST_REFS = [
CLAUDE_CLI_DEFAULT_MODEL_REF,
`${CLAUDE_CLI_BACKEND_ID}/claude-opus-4-7`,
`${CLAUDE_CLI_BACKEND_ID}/claude-sonnet-4-6`,
`${CLAUDE_CLI_BACKEND_ID}/claude-opus-4-6`,
] as const;
export const CLAUDE_CLI_MODEL_ALIASES: Record<string, string> = {
opus: "opus",
"opus-4.7": "opus",
"opus-4.6": "opus",
"claude-opus-4-7": "opus",
"claude-opus-4-6": "opus",
"opus-4.8": "claude-opus-4-8",
"opus-4.7": "claude-opus-4-7",
"opus-4.6": "claude-opus-4-6",
"claude-opus-4-8": "claude-opus-4-8",
"claude-opus-4-7": "claude-opus-4-7",
"claude-opus-4-6": "claude-opus-4-6",
sonnet: "sonnet",
"sonnet-4.6": "sonnet",
"claude-sonnet-4-6": "sonnet",
"sonnet-4.6": "claude-sonnet-4-6",
"claude-sonnet-4-6": "claude-sonnet-4-6",
haiku: "haiku",
};

View File

@@ -38,10 +38,10 @@ afterAll(() => {
describe("anthropic Claude model refs", () => {
it("upgrades retired refs without rewriting future canonical refs", () => {
expect(resolveKnownAnthropicModelRef("anthropic/claude-opus-4-5")).toBe(
"anthropic/claude-opus-4-7",
"anthropic/claude-opus-4-8",
);
expect(resolveKnownAnthropicModelRef("anthropic/claude-opus-4-5@anthropic:work")).toBe(
"anthropic/claude-opus-4-7@anthropic:work",
"anthropic/claude-opus-4-8@anthropic:work",
);
expect(resolveKnownAnthropicModelRef("anthropic/claude-sonnet-4-20250514")).toBe(
"anthropic/claude-sonnet-4-6",
@@ -182,6 +182,7 @@ describe("anthropic cli migration", () => {
alias: "Opus",
agentRuntime: { id: "claude-cli" },
},
"anthropic/claude-opus-4-8": { agentRuntime: { id: "claude-cli" } },
"anthropic/claude-sonnet-4-6": { agentRuntime: { id: "claude-cli" } },
"anthropic/claude-opus-4-6": {
alias: "Opus",
@@ -267,12 +268,13 @@ describe("anthropic cli migration", () => {
},
});
expect(result.defaultModel).toBe("anthropic/claude-opus-4-7");
expect(result.defaultModel).toBe("anthropic/claude-opus-4-8");
expect(result.configPatch).toEqual({
agents: {
defaults: {
models: {
"openai/gpt-5.2": {},
"anthropic/claude-opus-4-8": { agentRuntime: { id: "claude-cli" } },
"anthropic/claude-opus-4-7": { agentRuntime: { id: "claude-cli" } },
"anthropic/claude-sonnet-4-6": { agentRuntime: { id: "claude-cli" } },
"anthropic/claude-opus-4-6": { agentRuntime: { id: "claude-cli" } },
@@ -294,7 +296,7 @@ describe("anthropic cli migration", () => {
},
});
expect(result.defaultModel).toBe("anthropic/claude-opus-4-7");
expect(result.defaultModel).toBe("anthropic/claude-opus-4-8");
expect(result.configPatch?.agents?.defaults?.model).toBeUndefined();
expect(result.configPatch?.agents?.defaults?.models?.["anthropic/gpt-5.2"]).toBeUndefined();
});
@@ -316,6 +318,7 @@ describe("anthropic cli migration", () => {
defaults: {
model: { primary: "anthropic/claude-opus-4-7" },
models: {
"anthropic/claude-opus-4-8": { agentRuntime: { id: "claude-cli" } },
"anthropic/claude-opus-4-7": { agentRuntime: { id: "claude-cli" } },
"anthropic/claude-sonnet-4-6": { agentRuntime: { id: "claude-cli" } },
"anthropic/claude-opus-4-6": { agentRuntime: { id: "claude-cli" } },
@@ -499,6 +502,9 @@ describe("anthropic cli migration", () => {
alias: "Opus",
agentRuntime: { id: "claude-cli" },
});
expect(defaults?.models?.["anthropic/claude-opus-4-8"]).toEqual({
agentRuntime: { id: "claude-cli" },
});
expect(defaults?.models?.["openai/gpt-5.2"]).toEqual({});
});

View File

@@ -224,7 +224,7 @@ export function buildAnthropicCliMigrationResult(
...rewrittenModels.runtimeRefs,
...rewrittenModels.migrated,
]);
const defaultModel = rewrittenModel.primary ?? "anthropic/claude-opus-4-7";
const defaultModel = rewrittenModel.primary ?? "anthropic/claude-opus-4-8";
return {
profiles: buildClaudeCliAuthProfiles(credential),

View File

@@ -76,6 +76,20 @@ describe("normalizeClaudeSettingSourcesArgs", () => {
});
});
describe("Claude CLI model aliases", () => {
it("keeps pinned Claude CLI model refs on exact selectors", () => {
const aliases = buildAnthropicCliBackend().config.modelAliases;
expect(aliases?.["opus"]).toBe("opus");
expect(aliases?.["opus-4.8"]).toBe("claude-opus-4-8");
expect(aliases?.["opus-4.7"]).toBe("claude-opus-4-7");
expect(aliases?.["opus-4.6"]).toBe("claude-opus-4-6");
expect(aliases?.["claude-opus-4-8"]).toBe("claude-opus-4-8");
expect(aliases?.["claude-opus-4-7"]).toBe("claude-opus-4-7");
expect(aliases?.["claude-opus-4-6"]).toBe("claude-opus-4-6");
});
});
describe("resolveClaudeCliExecutionArgs", () => {
it("omits effort args when thinking is off", () => {
expect(

View File

@@ -257,6 +257,7 @@ describe("anthropic provider replay hooks", () => {
});
const models = requireRecord(next?.agents?.defaults?.models, "models");
for (const modelId of [
"anthropic/claude-opus-4-8",
"anthropic/claude-opus-4-7",
"anthropic/claude-sonnet-4-6",
"anthropic/claude-opus-4-6",
@@ -444,15 +445,15 @@ describe("anthropic provider replay hooks", () => {
expect(models["anthropic/claude-opus-5-0"]).toBeUndefined();
});
it("resolves explicit claude-opus-4-7 refs from the 4.6 template family", async () => {
it("resolves explicit claude-opus-4-8 refs from the 4.7 template family", async () => {
const provider = await registerSingleProviderPlugin(anthropicPlugin);
const resolved = provider.resolveDynamicModel?.({
provider: "anthropic",
modelId: "claude-opus-4-7",
modelId: "claude-opus-4-8",
modelRegistry: createModelRegistry([
{
id: "claude-opus-4-6",
name: "Claude Opus 4.6",
id: "claude-opus-4-7",
name: "Claude Opus 4.7",
provider: "anthropic",
api: "anthropic-messages",
reasoning: true,
@@ -466,12 +467,22 @@ describe("anthropic provider replay hooks", () => {
expectFields(resolved, {
provider: "anthropic",
id: "claude-opus-4-7",
id: "claude-opus-4-8",
api: "anthropic-messages",
reasoning: true,
contextWindow: 1_048_576,
contextTokens: 1_048_576,
maxTokens: 128_000,
});
const opus48Profile = provider.resolveThinkingProfile?.({
provider: "anthropic",
modelId: "claude-opus-4-8",
} as never);
const opus48LevelIds = levelIds(opus48Profile);
expect(opus48LevelIds).toContain("xhigh");
expect(opus48LevelIds).toContain("adaptive");
expect(opus48LevelIds).toContain("max");
expect(requireRecord(opus48Profile, "opus 4.8 thinking profile").defaultLevel).toBe("off");
const opus47Profile = provider.resolveThinkingProfile?.({
provider: "anthropic",
modelId: "claude-opus-4-7",
@@ -593,6 +604,7 @@ describe("anthropic provider replay hooks", () => {
const provider = await registerSingleProviderPlugin(anthropicPlugin);
for (const [runtimeProvider, modelId] of [
["anthropic", "claude-opus-4-8"],
["anthropic", "claude-opus-4-7"],
["claude-cli", "claude-opus-4.7-20260219"],
["anthropic", "claude-opus-4-6"],
@@ -623,6 +635,32 @@ describe("anthropic provider replay hooks", () => {
}
});
it("normalizes Claude Opus 4.8 to 128k max output tokens", async () => {
const provider = await registerSingleProviderPlugin(anthropicPlugin);
const normalized = provider.normalizeResolvedModel?.({
provider: "anthropic",
modelId: "claude-opus-4-8",
model: {
id: "claude-opus-4-8",
name: "Claude Opus 4.8",
provider: "anthropic",
api: "anthropic-messages",
reasoning: true,
input: ["text", "image"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200_000,
maxTokens: 64_000,
},
} as never);
expectFields(normalized, {
contextWindow: 1_048_576,
contextTokens: 1_048_576,
maxTokens: 128_000,
});
});
it("does not normalize legacy Claude 4.5 models to 1M context", async () => {
const provider = await registerSingleProviderPlugin(anthropicPlugin);

View File

@@ -7,7 +7,7 @@ import {
export const anthropicMediaUnderstandingProvider: MediaUnderstandingProvider = {
id: "anthropic",
capabilities: ["image"],
defaultModels: { image: "claude-opus-4-7" },
defaultModels: { image: "claude-opus-4-8" },
autoPriority: { image: 20 },
nativeDocumentInputs: ["pdf"],
describeImage: describeImageWithModel,

View File

@@ -8,9 +8,20 @@
"providerCatalogEntry": "./provider-discovery.ts",
"modelCatalog": {
"runtimeAugment": true,
"providers": {
"providers": {
"claude-cli": {
"models": [
{
"id": "claude-opus-4-8",
"name": "Claude Opus 4.8 (Claude CLI)",
"reasoning": true,
"input": ["text", "image"],
"mediaInput": {
"image": { "maxSidePx": 2576, "preferredSidePx": 2576, "tokenMode": "provider" }
},
"contextWindow": 1048576,
"maxTokens": 128000
},
{
"id": "claude-opus-4-7",
"name": "Claude Opus 4.7 (Claude CLI)",
@@ -50,6 +61,17 @@
"baseUrl": "https://api.anthropic.com",
"api": "anthropic-messages",
"models": [
{
"id": "claude-opus-4-8",
"name": "Claude Opus 4.8",
"reasoning": true,
"input": ["text", "image"],
"mediaInput": {
"image": { "maxSidePx": 2576, "preferredSidePx": 2576, "tokenMode": "provider" }
},
"contextWindow": 1048576,
"maxTokens": 128000
},
{
"id": "claude-opus-4-7",
"name": "Claude Opus 4.7",
@@ -98,6 +120,8 @@
"providers": {
"anthropic": {
"aliases": {
"opus-4.8": "claude-opus-4-8",
"opus": "claude-opus-4-8",
"opus-4.6": "claude-opus-4-6",
"sonnet-4.6": "claude-sonnet-4-6"
}
@@ -184,7 +208,7 @@
"anthropic": {
"capabilities": ["image"],
"defaultModels": {
"image": "claude-opus-4-7"
"image": "claude-opus-4-8"
},
"autoPriority": {
"image": 20

View File

@@ -123,10 +123,10 @@ describe("anthropic provider policy public artifact", () => {
).toBe("short");
});
it("exposes Claude Opus 4.7 thinking levels without loading the full provider plugin", () => {
it("exposes Claude Opus 4.8 thinking levels without loading the full provider plugin", () => {
const profile = resolveThinkingProfile({
provider: "anthropic",
modelId: "claude-opus-4-7",
modelId: "claude-opus-4-8",
});
const ids = levelIds(profile?.levels);
expect(ids).toContain("xhigh");

View File

@@ -45,10 +45,13 @@ import { wrapAnthropicProviderStream } from "./stream-wrappers.js";
const PROVIDER_ID = "anthropic";
type UpsertAuthProfileParams = Parameters<typeof upsertAuthProfileWithLock>[0];
const DEFAULT_ANTHROPIC_MODEL = "anthropic/claude-opus-4-7";
const DEFAULT_ANTHROPIC_MODEL = "anthropic/claude-opus-4-8";
const ANTHROPIC_OPUS_48_MODEL_ID = "claude-opus-4-8";
const ANTHROPIC_OPUS_48_DOT_MODEL_ID = "claude-opus-4.8";
const ANTHROPIC_OPUS_47_MODEL_ID = "claude-opus-4-7";
const ANTHROPIC_OPUS_47_DOT_MODEL_ID = "claude-opus-4.7";
const ANTHROPIC_GA_1M_CONTEXT_TOKENS = 1_048_576;
const ANTHROPIC_OPUS_48_MAX_OUTPUT_TOKENS = 128_000;
const ANTHROPIC_OPUS_46_MODEL_ID = "claude-opus-4-6";
const ANTHROPIC_OPUS_46_DOT_MODEL_ID = "claude-opus-4.6";
const ANTHROPIC_OPUS_47_TEMPLATE_MODEL_IDS = [
@@ -58,6 +61,8 @@ const ANTHROPIC_OPUS_47_TEMPLATE_MODEL_IDS = [
const ANTHROPIC_SONNET_46_MODEL_ID = "claude-sonnet-4-6";
const ANTHROPIC_SONNET_46_DOT_MODEL_ID = "claude-sonnet-4.6";
const ANTHROPIC_GA_1M_MODEL_PREFIXES = [
ANTHROPIC_OPUS_48_MODEL_ID,
ANTHROPIC_OPUS_48_DOT_MODEL_ID,
ANTHROPIC_OPUS_46_MODEL_ID,
ANTHROPIC_OPUS_46_DOT_MODEL_ID,
ANTHROPIC_OPUS_47_MODEL_ID,
@@ -66,6 +71,8 @@ const ANTHROPIC_GA_1M_MODEL_PREFIXES = [
ANTHROPIC_SONNET_46_DOT_MODEL_ID,
] as const;
const ANTHROPIC_MODERN_MODEL_PREFIXES = [
"claude-opus-4-8",
"claude-opus-4.8",
"claude-opus-4-7",
"claude-opus-4.7",
"claude-opus-4-6",
@@ -282,8 +289,10 @@ function buildAnthropicForwardCompatModel(
reasoning: true,
input: ["text", "image"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200_000,
maxTokens: 64_000,
contextWindow: isAnthropicGa1MModel(trimmedModelId) ? ANTHROPIC_GA_1M_CONTEXT_TOKENS : 200_000,
maxTokens: isAnthropicOpus48Model(trimmedModelId)
? ANTHROPIC_OPUS_48_MAX_OUTPUT_TOKENS
: 64_000,
};
}
@@ -291,6 +300,14 @@ function resolveAnthropicForwardCompatModel(
ctx: ProviderResolveDynamicModelContext,
): ProviderRuntimeModel | undefined {
return (
resolveAnthropic46ForwardCompatModel({
ctx,
dashModelId: ANTHROPIC_OPUS_48_MODEL_ID,
dotModelId: ANTHROPIC_OPUS_48_DOT_MODEL_ID,
dashTemplateId: ANTHROPIC_OPUS_47_MODEL_ID,
dotTemplateId: ANTHROPIC_OPUS_47_DOT_MODEL_ID,
fallbackTemplateIds: ANTHROPIC_OPUS_47_TEMPLATE_MODEL_IDS,
}) ??
resolveAnthropic46ForwardCompatModel({
ctx,
dashModelId: ANTHROPIC_OPUS_47_MODEL_ID,
@@ -324,6 +341,23 @@ function isAnthropicGa1MModel(modelId: string): boolean {
return ANTHROPIC_GA_1M_MODEL_PREFIXES.some((prefix) => normalized.startsWith(prefix));
}
function isAnthropicOpus48Model(modelId: string): boolean {
const normalized = normalizeLowercaseStringOrEmpty(modelId);
return [ANTHROPIC_OPUS_48_MODEL_ID, ANTHROPIC_OPUS_48_DOT_MODEL_ID].some((prefix) =>
normalized.startsWith(prefix),
);
}
function isAnthropicOpus47OrNewerModel(modelId: string): boolean {
const normalized = normalizeLowercaseStringOrEmpty(modelId);
return [
ANTHROPIC_OPUS_48_MODEL_ID,
ANTHROPIC_OPUS_48_DOT_MODEL_ID,
ANTHROPIC_OPUS_47_MODEL_ID,
ANTHROPIC_OPUS_47_DOT_MODEL_ID,
].some((prefix) => normalized.startsWith(prefix));
}
function hasConfiguredModelContextOverride(
config: ProviderNormalizeResolvedModelContext["config"],
provider: string,
@@ -393,6 +427,45 @@ function applyAnthropicGa1MContextWindow(params: {
};
}
function applyAnthropicOpus48MaxTokens(params: {
modelId: string;
model: ProviderRuntimeModel;
}): ProviderRuntimeModel | undefined {
if (!isAnthropicOpus48Model(params.modelId)) {
return undefined;
}
if ((params.model.maxTokens ?? 0) >= ANTHROPIC_OPUS_48_MAX_OUTPUT_TOKENS) {
return undefined;
}
return {
...params.model,
maxTokens: ANTHROPIC_OPUS_48_MAX_OUTPUT_TOKENS,
};
}
function applyAnthropicOpusThinkingLevelMap(params: {
modelId: string;
model: ProviderRuntimeModel;
}): ProviderRuntimeModel | undefined {
if (!isAnthropicOpus47OrNewerModel(params.modelId)) {
return undefined;
}
if (
params.model.thinkingLevelMap?.xhigh === "xhigh" &&
params.model.thinkingLevelMap?.max === "max"
) {
return undefined;
}
return {
...params.model,
thinkingLevelMap: {
...params.model.thinkingLevelMap,
xhigh: "xhigh",
max: "max",
},
};
}
function matchesAnthropicModernModel(modelId: string): boolean {
const lower = normalizeLowercaseStringOrEmpty(modelId);
return ANTHROPIC_MODERN_MODEL_PREFIXES.some((prefix) => lower.startsWith(prefix));
@@ -413,15 +486,18 @@ function resolveAnthropicImageMediaInput(modelId: string, modelName?: string) {
return undefined;
}
const refs = [modelId, modelName].filter((value): value is string => typeof value === "string");
const opus47 = refs.some((ref) =>
[ANTHROPIC_OPUS_47_MODEL_ID, ANTHROPIC_OPUS_47_DOT_MODEL_ID].some((prefix) =>
normalizeLowercaseStringOrEmpty(ref).startsWith(prefix),
),
const largeImageOpus = refs.some((ref) =>
[
ANTHROPIC_OPUS_48_MODEL_ID,
ANTHROPIC_OPUS_48_DOT_MODEL_ID,
ANTHROPIC_OPUS_47_MODEL_ID,
ANTHROPIC_OPUS_47_DOT_MODEL_ID,
].some((prefix) => normalizeLowercaseStringOrEmpty(ref).startsWith(prefix)),
);
return {
image: {
maxSidePx: opus47 ? 2576 : 1568,
preferredSidePx: opus47 ? 2576 : 1568,
maxSidePx: largeImageOpus ? 2576 : 1568,
preferredSidePx: largeImageOpus ? 2576 : 1568,
tokenMode: "provider" as const,
},
};
@@ -461,13 +537,23 @@ function normalizeAnthropicResolvedModel(
},
}
: imageCapableModel;
const outputModel =
applyAnthropicOpus48MaxTokens({
modelId: ctx.modelId,
model: mediaInputModel,
}) ?? mediaInputModel;
const thinkingLevelModel =
applyAnthropicOpusThinkingLevelMap({
modelId: ctx.modelId,
model: outputModel,
}) ?? outputModel;
const contextWindowModel =
applyAnthropicGa1MContextWindow({
config: ctx.config,
provider: ctx.provider,
modelId: ctx.modelId,
model: mediaInputModel,
}) ?? mediaInputModel;
model: thinkingLevelModel,
}) ?? thinkingLevelModel;
return contextWindowModel === ctx.model ? undefined : contextWindowModel;
}
@@ -682,13 +768,23 @@ export function buildAnthropicProvider(): ProviderPlugin {
modelId: ctx.modelId,
model,
}) ?? model;
const outputModel =
applyAnthropicOpus48MaxTokens({
modelId: ctx.modelId,
model: imageCapableModel,
}) ?? imageCapableModel;
const thinkingLevelModel =
applyAnthropicOpusThinkingLevelMap({
modelId: ctx.modelId,
model: outputModel,
}) ?? outputModel;
return (
applyAnthropicGa1MContextWindow({
config: ctx.config,
provider: ctx.provider,
modelId: ctx.modelId,
model: imageCapableModel,
}) ?? imageCapableModel
model: thinkingLevelModel,
}) ?? thinkingLevelModel
);
},
normalizeResolvedModel: (ctx) => normalizeAnthropicResolvedModel(ctx),

View File

@@ -20,6 +20,8 @@ const log = createSubsystemLogger("anthropic-stream");
const ANTHROPIC_CONTEXT_1M_BETA_LEGACY = "context-1m-2025-08-07";
const ANTHROPIC_GA_1M_MODEL_PREFIXES = [
"claude-opus-4-8",
"claude-opus-4.8",
"claude-opus-4-6",
"claude-opus-4.6",
"claude-opus-4-7",

View File

@@ -417,7 +417,7 @@ function getGoogleThinkingBudget(
effort: ThinkingLevel,
customBudgets?: GoogleTransportOptions["thinkingBudgets"],
): number | undefined {
const normalizedEffort = effort === "xhigh" ? "high" : effort;
const normalizedEffort = effort === "xhigh" || effort === "max" ? "high" : effort;
if (customBudgets?.[normalizedEffort] !== undefined) {
return customBudgets[normalizedEffort];
}

View File

@@ -153,7 +153,7 @@ describe("qa agentic parity report", () => {
it("fails the parity gate when the candidate regresses against baseline", () => {
const comparison = buildQaAgenticParityComparison({
candidateLabel: "openai/gpt-5.5",
baselineLabel: "anthropic/claude-opus-4-7",
baselineLabel: "anthropic/claude-opus-4-8",
candidateSummary: {
scenarios: [
{ name: "Approval turn tool followthrough", status: "pass" },
@@ -181,10 +181,10 @@ describe("qa agentic parity report", () => {
expect(comparison.pass).toBe(false);
expect(comparison.failures).toContain(
"openai/gpt-5.5 completion rate 80.0% is below anthropic/claude-opus-4-7 100.0%.",
"openai/gpt-5.5 completion rate 80.0% is below anthropic/claude-opus-4-8 100.0%.",
);
expect(comparison.failures).toContain(
"openai/gpt-5.5 unintended-stop rate 20.0% exceeds anthropic/claude-opus-4-7 0.0%.",
"openai/gpt-5.5 unintended-stop rate 20.0% exceeds anthropic/claude-opus-4-8 0.0%.",
);
});
@@ -199,7 +199,7 @@ describe("qa agentic parity report", () => {
];
const comparison = buildQaAgenticParityComparison({
candidateLabel: "openai/gpt-5.5",
baselineLabel: "anthropic/claude-opus-4-7",
baselineLabel: "anthropic/claude-opus-4-8",
candidateSummary: {
scenarios: baselineScenarios.filter(
(scenario) => scenario.name !== "Extra non-parity lane",
@@ -211,14 +211,14 @@ describe("qa agentic parity report", () => {
expect(comparison.pass).toBe(false);
expect(comparison.failures).toContain(
"Scenario coverage mismatch for Extra non-parity lane: openai/gpt-5.5=missing, anthropic/claude-opus-4-7=pass.",
"Scenario coverage mismatch for Extra non-parity lane: openai/gpt-5.5=missing, anthropic/claude-opus-4-8=pass.",
);
});
it("reports each missing required parity scenario exactly once (no double-counting)", () => {
const comparison = buildQaAgenticParityComparison({
candidateLabel: "openai/gpt-5.5",
baselineLabel: "anthropic/claude-opus-4-7",
baselineLabel: "anthropic/claude-opus-4-8",
candidateSummary: {
scenarios: [{ name: "Approval turn tool followthrough", status: "pass" }],
},
@@ -260,7 +260,7 @@ describe("qa agentic parity report", () => {
const comparison = buildQaAgenticParityComparison({
candidateLabel: "openai/gpt-5.5",
baselineLabel: "anthropic/claude-opus-4-7",
baselineLabel: "anthropic/claude-opus-4-8",
candidateSummary: summaryWithExtras,
baselineSummary: scopedSummary,
comparedAt: "2026-04-11T00:00:00.000Z",
@@ -282,7 +282,7 @@ describe("qa agentic parity report", () => {
it("fails the parity gate when required parity scenarios are missing on both sides", () => {
const comparison = buildQaAgenticParityComparison({
candidateLabel: "openai/gpt-5.5",
baselineLabel: "anthropic/claude-opus-4-7",
baselineLabel: "anthropic/claude-opus-4-8",
candidateSummary: {
scenarios: [{ name: "Approval turn tool followthrough", status: "pass" }],
},
@@ -294,14 +294,14 @@ describe("qa agentic parity report", () => {
expect(comparison.pass).toBe(false);
expect(comparison.failures).toContain(
"Missing required parity scenario coverage for Image understanding from attachment: openai/gpt-5.5=missing, anthropic/claude-opus-4-7=missing.",
"Missing required parity scenario coverage for Image understanding from attachment: openai/gpt-5.5=missing, anthropic/claude-opus-4-8=missing.",
);
});
it("fails the parity gate when required parity scenarios are skipped", () => {
const comparison = buildQaAgenticParityComparison({
candidateLabel: "openai/gpt-5.5",
baselineLabel: "anthropic/claude-opus-4-7",
baselineLabel: "anthropic/claude-opus-4-8",
candidateSummary: {
scenarios: [
{ name: "Approval turn tool followthrough", status: "pass" },
@@ -325,7 +325,7 @@ describe("qa agentic parity report", () => {
expect(comparison.pass).toBe(false);
expect(comparison.failures).toContain(
"Missing required parity scenario coverage for Compaction retry after mutating tool: openai/gpt-5.5=skip, anthropic/claude-opus-4-7=skip.",
"Missing required parity scenario coverage for Compaction retry after mutating tool: openai/gpt-5.5=skip, anthropic/claude-opus-4-8=skip.",
);
});
@@ -342,7 +342,7 @@ describe("qa agentic parity report", () => {
});
const comparison = buildQaAgenticParityComparison({
candidateLabel: "openai/gpt-5.5",
baselineLabel: "anthropic/claude-opus-4-7",
baselineLabel: "anthropic/claude-opus-4-8",
candidateSummary: { scenarios: scenariosWithBothFail },
baselineSummary: { scenarios: scenariosWithBothFail },
comparedAt: "2026-04-11T00:00:00.000Z",
@@ -350,7 +350,7 @@ describe("qa agentic parity report", () => {
expect(comparison.pass).toBe(false);
expect(comparison.failures).toContain(
"Required parity scenario Approval turn tool followthrough failed: openai/gpt-5.5=fail, anthropic/claude-opus-4-7=fail.",
"Required parity scenario Approval turn tool followthrough failed: openai/gpt-5.5=fail, anthropic/claude-opus-4-8=fail.",
);
// Metric comparisons are relative, so a same-on-both-sides failure
// must not appear as a relative metric failure. The required-scenario
@@ -370,7 +370,7 @@ describe("qa agentic parity report", () => {
});
const comparison = buildQaAgenticParityComparison({
candidateLabel: "openai/gpt-5.5",
baselineLabel: "anthropic/claude-opus-4-7",
baselineLabel: "anthropic/claude-opus-4-8",
candidateSummary: { scenarios: candidateWithOneFail },
baselineSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS },
comparedAt: "2026-04-11T00:00:00.000Z",
@@ -378,7 +378,7 @@ describe("qa agentic parity report", () => {
expect(comparison.pass).toBe(false);
expect(comparison.failures).toContain(
"Required parity scenario Approval turn tool followthrough failed: openai/gpt-5.5=fail, anthropic/claude-opus-4-7=pass.",
"Required parity scenario Approval turn tool followthrough failed: openai/gpt-5.5=fail, anthropic/claude-opus-4-8=pass.",
);
});
@@ -387,7 +387,7 @@ describe("qa agentic parity report", () => {
// below is the isolated gate failure under test (no coverage-gap noise).
const comparison = buildQaAgenticParityComparison({
candidateLabel: "openai/gpt-5.5",
baselineLabel: "anthropic/claude-opus-4-7",
baselineLabel: "anthropic/claude-opus-4-8",
candidateSummary: {
scenarios: FULL_PARITY_PASS_SCENARIOS,
},
@@ -401,7 +401,7 @@ describe("qa agentic parity report", () => {
expect(comparison.pass).toBe(false);
expect(comparison.failures).toEqual([
"anthropic/claude-opus-4-7 produced 1 suspicious pass result(s); baseline fake-success count must also be 0.",
"anthropic/claude-opus-4-8 produced 1 suspicious pass result(s); baseline fake-success count must also be 0.",
]);
});
@@ -571,14 +571,14 @@ status=done`,
expect(() =>
buildQaAgenticParityComparison({
candidateLabel: "openai/gpt-5.5",
baselineLabel: "anthropic/claude-opus-4-7",
baselineLabel: "anthropic/claude-opus-4-8",
candidateSummary: {
scenarios: parityPassScenarios,
run: { primaryProvider: "anthropic", primaryModel: "claude-opus-4-7" },
run: { primaryProvider: "anthropic", primaryModel: "claude-opus-4-8" },
},
baselineSummary: {
scenarios: parityPassScenarios,
run: { primaryProvider: "anthropic", primaryModel: "claude-opus-4-7" },
run: { primaryProvider: "anthropic", primaryModel: "claude-opus-4-8" },
},
comparedAt: "2026-04-11T00:00:00.000Z",
}),
@@ -593,7 +593,7 @@ status=done`,
expect(() =>
buildQaAgenticParityComparison({
candidateLabel: "openai/gpt-5.5",
baselineLabel: "anthropic/claude-opus-4-7",
baselineLabel: "anthropic/claude-opus-4-8",
candidateSummary: {
scenarios: parityPassScenarios,
run: { primaryProvider: "openai" },
@@ -612,7 +612,7 @@ status=done`,
it("accepts matching run.primaryProvider labels without throwing", () => {
const comparison = buildQaAgenticParityComparison({
candidateLabel: "openai/gpt-5.5",
baselineLabel: "anthropic/claude-opus-4-7",
baselineLabel: "anthropic/claude-opus-4-8",
candidateSummary: {
scenarios: FULL_PARITY_PASS_SCENARIOS,
run: {
@@ -625,8 +625,8 @@ status=done`,
scenarios: FULL_PARITY_PASS_SCENARIOS,
run: {
primaryProvider: "anthropic",
primaryModel: "anthropic/claude-opus-4-7",
primaryModelName: "claude-opus-4-7",
primaryModel: "anthropic/claude-opus-4-8",
primaryModelName: "claude-opus-4-8",
},
},
comparedAt: "2026-04-11T00:00:00.000Z",
@@ -639,7 +639,7 @@ status=done`,
// work against those, trusting the caller-supplied label.
const comparison = buildQaAgenticParityComparison({
candidateLabel: "openai/gpt-5.5",
baselineLabel: "anthropic/claude-opus-4-7",
baselineLabel: "anthropic/claude-opus-4-8",
candidateSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS },
baselineSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS },
comparedAt: "2026-04-11T00:00:00.000Z",
@@ -650,7 +650,7 @@ status=done`,
it("skips provider verification for arbitrary display labels when run metadata is present", () => {
const comparison = buildQaAgenticParityComparison({
candidateLabel: "GPT-5.5 candidate",
baselineLabel: "Opus 4.7 baseline",
baselineLabel: "Opus 4.8 baseline",
candidateSummary: {
scenarios: FULL_PARITY_PASS_SCENARIOS,
run: {
@@ -663,8 +663,8 @@ status=done`,
scenarios: FULL_PARITY_PASS_SCENARIOS,
run: {
primaryProvider: "anthropic",
primaryModel: "anthropic/claude-opus-4-7",
primaryModelName: "claude-opus-4-7",
primaryModel: "anthropic/claude-opus-4-8",
primaryModelName: "claude-opus-4-8",
},
},
comparedAt: "2026-04-11T00:00:00.000Z",
@@ -676,7 +676,7 @@ status=done`,
it("skips provider verification for mixed-case or decorated display labels", () => {
const comparison = buildQaAgenticParityComparison({
candidateLabel: "Candidate: GPT-5.5",
baselineLabel: "Opus 4.7 / baseline",
baselineLabel: "Opus 4.8 / baseline",
candidateSummary: {
scenarios: FULL_PARITY_PASS_SCENARIOS,
run: {
@@ -689,8 +689,8 @@ status=done`,
scenarios: FULL_PARITY_PASS_SCENARIOS,
run: {
primaryProvider: "anthropic",
primaryModel: "anthropic/claude-opus-4-7",
primaryModelName: "claude-opus-4-7",
primaryModel: "anthropic/claude-opus-4-8",
primaryModelName: "claude-opus-4-8",
},
},
comparedAt: "2026-04-11T00:00:00.000Z",
@@ -703,7 +703,7 @@ status=done`,
expect(() =>
buildQaAgenticParityComparison({
candidateLabel: "openai/gpt-5.5",
baselineLabel: "anthropic/claude-opus-4-7",
baselineLabel: "anthropic/claude-opus-4-8",
candidateSummary: {
scenarios: FULL_PARITY_PASS_SCENARIOS,
run: {
@@ -716,8 +716,8 @@ status=done`,
scenarios: FULL_PARITY_PASS_SCENARIOS,
run: {
primaryProvider: "anthropic",
primaryModel: "anthropic/claude-opus-4-7",
primaryModelName: "claude-opus-4-7",
primaryModel: "anthropic/claude-opus-4-8",
primaryModelName: "claude-opus-4-8",
},
},
comparedAt: "2026-04-11T00:00:00.000Z",
@@ -730,7 +730,7 @@ status=done`,
it("accepts colon-delimited structured labels when provider and model both match", () => {
const comparison = buildQaAgenticParityComparison({
candidateLabel: "openai:gpt-5.5",
baselineLabel: "anthropic:claude-opus-4-7",
baselineLabel: "anthropic:claude-opus-4-8",
candidateSummary: {
scenarios: FULL_PARITY_PASS_SCENARIOS,
run: {
@@ -743,8 +743,8 @@ status=done`,
scenarios: FULL_PARITY_PASS_SCENARIOS,
run: {
primaryProvider: "anthropic",
primaryModel: "anthropic/claude-opus-4-7",
primaryModelName: "claude-opus-4-7",
primaryModel: "anthropic/claude-opus-4-8",
primaryModelName: "claude-opus-4-8",
},
},
comparedAt: "2026-04-11T00:00:00.000Z",
@@ -759,7 +759,7 @@ status=done`,
// added by the second-wave expansion.
const comparison = buildQaAgenticParityComparison({
candidateLabel: "openai/gpt-5.5",
baselineLabel: "anthropic/claude-opus-4-7",
baselineLabel: "anthropic/claude-opus-4-8",
candidateSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS },
baselineSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS },
comparedAt: "2026-04-11T00:00:00.000Z",
@@ -768,7 +768,7 @@ status=done`,
const report = renderQaAgenticParityMarkdownReport(comparison);
expect(report).toContain(
"# OpenClaw Agentic Parity Report — openai/gpt-5.5 vs anthropic/claude-opus-4-7",
"# OpenClaw Agentic Parity Report — openai/gpt-5.5 vs anthropic/claude-opus-4-8",
);
expect(report).toContain("| Completion rate | 100.0% | 100.0% |");
expect(report).toContain("### Approval turn tool followthrough");
@@ -779,7 +779,7 @@ status=done`,
// Regression for the loop-7 Copilot finding: callers that configure
// non-gpt-5.5 / non-opus labels (for example an internal candidate vs
// another candidate) must see the labels in the rendered H1 instead of
// the hardcoded "GPT-5.5 / Opus 4.7" title that would otherwise confuse
// the hardcoded "GPT-5.5 / Opus 4.8" title that would otherwise confuse
// readers of saved reports.
const comparison = buildQaAgenticParityComparison({
candidateLabel: "openai/gpt-5.5-alt",

View File

@@ -566,7 +566,7 @@ export function renderQaAgenticParityMarkdownReport(comparison: QaAgenticParityC
// Title is parametrized from the candidate / baseline labels so reports
// for any candidate/baseline pair (not only gpt-5.5 vs opus 4.6) render
// with an accurate header. The default CLI labels are still
// openai/gpt-5.5 vs anthropic/claude-opus-4-7, but the helper works for
// openai/gpt-5.5 vs anthropic/claude-opus-4-8, but the helper works for
// any parity comparison a caller configures.
const lines = [
`# OpenClaw Agentic Parity Report — ${comparison.candidateLabel} vs ${comparison.baselineLabel}`,

View File

@@ -274,7 +274,7 @@ describe("runQaCharacterEval", () => {
{ model: "openai/gpt-5.5", rank: 1, score: 8, summary: "ok" },
{ model: "openai/gpt-5.2", rank: 2, score: 7.5, summary: "ok" },
{ model: "openai/gpt-5", rank: 3, score: 7.2, summary: "ok" },
{ model: "anthropic/claude-opus-4-7", rank: 4, score: 7, summary: "ok" },
{ model: "anthropic/claude-opus-4-8", rank: 4, score: 7, summary: "ok" },
{ model: "anthropic/claude-sonnet-4-6", rank: 5, score: 6.8, summary: "ok" },
{ model: "zai/glm-5.1", rank: 6, score: 6.3, summary: "ok" },
{ model: "moonshot/kimi-k2.5", rank: 7, score: 6.2, summary: "ok" },
@@ -294,7 +294,7 @@ describe("runQaCharacterEval", () => {
"openai/gpt-5.5",
"openai/gpt-5.2",
"openai/gpt-5",
"anthropic/claude-opus-4-7",
"anthropic/claude-opus-4-8",
"anthropic/claude-sonnet-4-6",
"zai/glm-5.1",
"moonshot/kimi-k2.5",
@@ -323,7 +323,7 @@ describe("runQaCharacterEval", () => {
expect(runJudge).toHaveBeenCalledTimes(2);
expect(runJudge.mock.calls.map(([params]) => params.judgeModel)).toEqual([
"openai/gpt-5.5",
"anthropic/claude-opus-4-7",
"anthropic/claude-opus-4-8",
]);
expect(runJudge.mock.calls.map(([params]) => params.judgeThinkingDefault)).toEqual([
"xhigh",
@@ -577,11 +577,11 @@ describe("runQaCharacterEval", () => {
candidateModelOptions: {
"openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: false },
},
judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-7"],
judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-8"],
judgeThinkingDefault: "medium",
judgeModelOptions: {
"openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: true },
"anthropic/claude-opus-4-7": { thinkingDefault: "high" },
"anthropic/claude-opus-4-8": { thinkingDefault: "high" },
},
runSuite,
runJudge,

View File

@@ -662,7 +662,7 @@ describe("qa cli runtime", () => {
repoRoot: "/tmp/openclaw-repo",
providerMode: "mock-openai",
primaryModel: "openai/gpt-5.5",
alternateModel: "anthropic/claude-opus-4-7",
alternateModel: "anthropic/claude-opus-4-8",
preflight: true,
});
@@ -672,7 +672,7 @@ describe("qa cli runtime", () => {
transportId: "qa-channel",
providerMode: "mock-openai",
primaryModel: "openai/gpt-5.5",
alternateModel: "anthropic/claude-opus-4-7",
alternateModel: "anthropic/claude-opus-4-8",
scenarioIds: ["approval-turn-tool-followthrough"],
concurrency: 1,
});
@@ -1261,7 +1261,7 @@ describe("qa cli runtime", () => {
fast: true,
thinking: "medium",
modelThinking: ["codex-cli/test-model=medium"],
judgeModel: ["openai/gpt-5.5,thinking=xhigh,fast", "anthropic/claude-opus-4-7,thinking=high"],
judgeModel: ["openai/gpt-5.5,thinking=xhigh,fast", "anthropic/claude-opus-4-8,thinking=high"],
judgeTimeoutMs: 180_000,
blindJudgeModels: true,
concurrency: 4,
@@ -1282,10 +1282,10 @@ describe("qa cli runtime", () => {
"openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: false },
"codex-cli/test-model": { thinkingDefault: "high", fastMode: true },
},
judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-7"],
judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-8"],
judgeModelOptions: {
"openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: true },
"anthropic/claude-opus-4-7": { thinkingDefault: "high" },
"anthropic/claude-opus-4-8": { thinkingDefault: "high" },
},
judgeTimeoutMs: 180_000,
judgeBlindModels: true,
@@ -1616,7 +1616,7 @@ describe("qa cli runtime", () => {
providerMode: "mock-openai",
parityPack: "agentic",
primaryModel: "openai/gpt-5.5",
alternateModel: "anthropic/claude-opus-4-7",
alternateModel: "anthropic/claude-opus-4-8",
});
expect(runQaSuiteFromRuntime).toHaveBeenCalledWith({
@@ -1625,7 +1625,7 @@ describe("qa cli runtime", () => {
transportId: "qa-channel",
providerMode: "mock-openai",
primaryModel: "openai/gpt-5.5",
alternateModel: "anthropic/claude-opus-4-7",
alternateModel: "anthropic/claude-opus-4-8",
fastMode: undefined,
scenarioIds: [
"approval-turn-tool-followthrough",

View File

@@ -8,7 +8,7 @@ describe("qa live timeout policy", () => {
{
providerMode: "mock-openai",
primaryModel: "anthropic/claude-sonnet-4-6",
alternateModel: "anthropic/claude-opus-4-7",
alternateModel: "anthropic/claude-opus-4-8",
},
30_000,
),
@@ -47,7 +47,7 @@ describe("qa live timeout policy", () => {
{
providerMode: "live-frontier",
primaryModel: "anthropic/claude-sonnet-4-6",
alternateModel: "anthropic/claude-opus-4-7",
alternateModel: "anthropic/claude-opus-4-8",
},
30_000,
),
@@ -60,10 +60,10 @@ describe("qa live timeout policy", () => {
{
providerMode: "live-frontier",
primaryModel: "anthropic/claude-sonnet-4-6",
alternateModel: "anthropic/claude-opus-4-7",
alternateModel: "anthropic/claude-opus-4-8",
},
30_000,
"anthropic/claude-opus-4-7",
"anthropic/claude-opus-4-8",
),
).toBe(240_000);
});

View File

@@ -9,7 +9,7 @@ export const QA_FRONTIER_CHARACTER_EVAL_MODELS = Object.freeze([
"openai/gpt-5.5",
"openai/gpt-5.2",
"openai/gpt-5",
"anthropic/claude-opus-4-7",
"anthropic/claude-opus-4-8",
"anthropic/claude-sonnet-4-6",
"zai/glm-5.1",
"moonshot/kimi-k2.5",
@@ -25,12 +25,12 @@ export const QA_FRONTIER_CHARACTER_THINKING_BY_MODEL: Readonly<Record<string, Qa
export const QA_FRONTIER_CHARACTER_JUDGE_MODELS = Object.freeze([
"openai/gpt-5.5",
"anthropic/claude-opus-4-7",
"anthropic/claude-opus-4-8",
]);
export const QA_FRONTIER_CHARACTER_JUDGE_MODEL_OPTIONS: Readonly<
Record<string, QaFrontierCharacterModelOptions>
> = Object.freeze({
"openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: true },
"anthropic/claude-opus-4-7": { thinkingDefault: "high" },
"anthropic/claude-opus-4-8": { thinkingDefault: "high" },
});

View File

@@ -1,2 +1,2 @@
export const QA_FRONTIER_PARITY_CANDIDATE_LABEL = "openai/gpt-5.5";
export const QA_FRONTIER_PARITY_BASELINE_LABEL = "anthropic/claude-opus-4-7";
export const QA_FRONTIER_PARITY_BASELINE_LABEL = "anthropic/claude-opus-4-8";

View File

@@ -3464,7 +3464,7 @@ describe("qa mock openai server", () => {
expect(outputText(await response.json())).toBe("NO_REPLY");
});
it("advertises Anthropic claude-opus-4-7 baseline model on /v1/models", async () => {
it("advertises Anthropic claude-opus-4-8 baseline model on /v1/models", async () => {
const server = await startQaMockOpenAiServer({
host: "127.0.0.1",
port: 0,
@@ -3477,7 +3477,7 @@ describe("qa mock openai server", () => {
expect(response.status).toBe(200);
const body = (await response.json()) as { data: Array<{ id: string }> };
const ids = body.data.map((entry) => entry.id);
expect(ids).toContain("claude-opus-4-7");
expect(ids).toContain("claude-opus-4-8");
expect(ids).toContain("gpt-5.5");
});
@@ -3494,7 +3494,7 @@ describe("qa mock openai server", () => {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
model: "claude-opus-4-7",
model: "claude-opus-4-8",
max_tokens: 256,
messages: [
{
@@ -3519,7 +3519,7 @@ describe("qa mock openai server", () => {
};
expect(body.type).toBe("message");
expect(body.role).toBe("assistant");
expect(body.model).toBe("claude-opus-4-7");
expect(body.model).toBe("claude-opus-4-8");
expect(body.stop_reason).toBe("tool_use");
const toolUseBlock = body.content.find((block) => block.type === "tool_use") as
| { name: string; input: Record<string, unknown> }
@@ -3530,7 +3530,7 @@ describe("qa mock openai server", () => {
const debugResponse = await fetch(`${server.baseUrl}/debug/last-request`);
expect(debugResponse.status).toBe(200);
const debugPayload = requireRecord(await debugResponse.json(), "debug request");
expect(debugPayload.model).toBe("claude-opus-4-7");
expect(debugPayload.model).toBe("claude-opus-4-8");
expect(debugPayload.plannedToolName).toBe("read");
});
@@ -3541,7 +3541,7 @@ describe("qa mock openai server", () => {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
model: "claude-opus-4-7",
model: "claude-opus-4-8",
max_tokens: 256,
tools: [
{
@@ -3581,7 +3581,7 @@ describe("qa mock openai server", () => {
const debugResponse = await fetch(`${server.baseUrl}/debug/last-request`);
expect(debugResponse.status).toBe(200);
const debugPayload = requireRecord(await debugResponse.json(), "debug request");
expect(debugPayload.model).toBe("claude-opus-4-7");
expect(debugPayload.model).toBe("claude-opus-4-8");
expect(debugPayload.plannedToolName).toBe("sessions_spawn");
});
@@ -3605,7 +3605,7 @@ describe("qa mock openai server", () => {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
model: "claude-opus-4-7",
model: "claude-opus-4-8",
max_tokens: 256,
messages: [
{
@@ -3679,7 +3679,7 @@ describe("qa mock openai server", () => {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
model: "claude-opus-4-7",
model: "claude-opus-4-8",
max_tokens: 256,
messages: [
{
@@ -3760,7 +3760,7 @@ describe("qa mock openai server", () => {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
model: "claude-opus-4-7",
model: "claude-opus-4-8",
max_tokens: 256,
stream: true,
messages: [
@@ -3801,7 +3801,7 @@ describe("qa mock openai server", () => {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
model: "claude-opus-4-7",
model: "claude-opus-4-8",
max_tokens: 256,
stream: true,
messages: [
@@ -3860,7 +3860,7 @@ describe("qa mock openai server", () => {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
model: "claude-opus-4-7",
model: "claude-opus-4-8",
max_tokens: 256,
stream: true,
system: [
@@ -3903,7 +3903,7 @@ describe("qa mock openai server", () => {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
model: "claude-opus-4-7",
model: "claude-opus-4-8",
max_tokens: 256,
stream: true,
system: [
@@ -3948,7 +3948,7 @@ describe("qa mock openai server", () => {
const response = await fetch(`${server.baseUrl}/v1/messages`, {
method: "POST",
headers: { "content-type": "application/json" },
body: '{"model":"claude-opus-4-7","messages":[',
body: '{"model":"claude-opus-4-8","messages":[',
});
expect(response.status).toBe(400);
@@ -3961,12 +3961,12 @@ describe("qa mock openai server", () => {
expect(body.error.message).toContain("Malformed JSON body");
});
it("defaults empty-string Anthropic /v1/messages model to claude-opus-4-7", async () => {
it("defaults empty-string Anthropic /v1/messages model to claude-opus-4-8", async () => {
// Regression for the loop-7 Copilot finding: a bare `typeof
// body.model === "string"` check lets an empty-string model leak
// through to `lastRequest.model` and `responseBody.model`. Empty
// strings must be treated the same as absent and default to
// `"claude-opus-4-7"` so parity consumers can trust the echoed label.
// `"claude-opus-4-8"` so parity consumers can trust the echoed label.
const server = await startQaMockOpenAiServer({
host: "127.0.0.1",
port: 0,
@@ -3991,12 +3991,12 @@ describe("qa mock openai server", () => {
});
expect(response.status).toBe(200);
const body = (await response.json()) as { model: string };
expect(body.model).toBe("claude-opus-4-7");
expect(body.model).toBe("claude-opus-4-8");
const debugResponse = await fetch(`${server.baseUrl}/debug/last-request`);
expect(debugResponse.status).toBe(200);
const debug = (await debugResponse.json()) as { model: string };
expect(debug.model).toBe("claude-opus-4-7");
expect(debug.model).toBe("claude-opus-4-8");
});
it("scripts a reasoning-only recovery sequence after a replay-safe read", async () => {
@@ -4247,9 +4247,9 @@ describe("resolveProviderVariant", () => {
});
it("tags prefix-qualified anthropic models", () => {
expect(resolveProviderVariant("anthropic/claude-opus-4-7")).toBe("anthropic");
expect(resolveProviderVariant("anthropic:claude-opus-4-7")).toBe("anthropic");
expect(resolveProviderVariant("claude-cli/claude-opus-4-7")).toBe("anthropic");
expect(resolveProviderVariant("anthropic/claude-opus-4-8")).toBe("anthropic");
expect(resolveProviderVariant("anthropic:claude-opus-4-8")).toBe("anthropic");
expect(resolveProviderVariant("claude-cli/claude-opus-4-8")).toBe("anthropic");
});
it("tags bare model names by prefix", () => {
@@ -4257,7 +4257,7 @@ describe("resolveProviderVariant", () => {
expect(resolveProviderVariant("gpt-5.5-alt")).toBe("openai");
expect(resolveProviderVariant("gpt-4.5")).toBe("openai");
expect(resolveProviderVariant("o1-preview")).toBe("openai");
expect(resolveProviderVariant("claude-opus-4-7")).toBe("anthropic");
expect(resolveProviderVariant("claude-opus-4-8")).toBe("anthropic");
expect(resolveProviderVariant("claude-sonnet-4-6")).toBe("anthropic");
});
@@ -4293,7 +4293,7 @@ describe("qa mock openai server provider variant tagging", () => {
const anthropicSourceServer = await startMockServer();
const anthropicSource = await expectResponsesJson(anthropicSourceServer, {
model: "anthropic/claude-opus-4-7",
model: "anthropic/claude-opus-4-8",
stream: false,
input: [makeUserInput(sourcePrompt)],
});
@@ -4312,7 +4312,7 @@ describe("qa mock openai server provider variant tagging", () => {
const anthropicHandoffServer = await startMockServer();
const anthropicHandoff = await expectResponsesJson(anthropicHandoffServer, {
model: "claude-opus-4-7",
model: "claude-opus-4-8",
stream: false,
input: [makeUserInput(handoffPrompt)],
});
@@ -4335,7 +4335,7 @@ describe("qa mock openai server provider variant tagging", () => {
const anthropicFanoutServer = await startMockServer();
const anthropicFanout = await expectResponsesJson(anthropicFanoutServer, {
model: "anthropic/claude-opus-4-7",
model: "anthropic/claude-opus-4-8",
stream: false,
tools: [SESSIONS_SPAWN_TOOL],
input: [makeUserInput(fanoutPrompt)],
@@ -4386,7 +4386,7 @@ describe("qa mock openai server provider variant tagging", () => {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
model: "claude-opus-4-7",
model: "claude-opus-4-8",
max_tokens: 256,
messages: [{ role: "user", content: "Heartbeat check" }],
}),
@@ -4396,7 +4396,7 @@ describe("qa mock openai server provider variant tagging", () => {
model: string;
providerVariant: string;
};
expect(debug.model).toBe("claude-opus-4-7");
expect(debug.model).toBe("claude-opus-4-8");
expect(debug.providerVariant).toBe("anthropic");
});

View File

@@ -81,7 +81,7 @@ export function resolveProviderVariant(model: string | undefined): MockOpenAiPro
return "anthropic";
}
// Fall back to model-name prefix matching for bare model strings like
// `gpt-5.5` or `claude-opus-4-7`.
// `gpt-5.5` or `claude-opus-4-8`.
if (/^(?:gpt-|o1-|openai-)/.test(trimmed)) {
return "openai";
}
@@ -645,7 +645,7 @@ function execCommandFromToolProgressPrompt(prompt: string) {
function buildMockFunctionCall(name: string, args: Record<string, unknown>) {
const serialized = JSON.stringify(args);
const callSuffix = createHash("sha1")
const callSuffix = createHash("sha256")
.update(name)
.update("\0")
.update(serialized)
@@ -2591,7 +2591,7 @@ async function buildResponsesPayload(
//
// The QA parity gate needs two comparable scenario runs: one against the
// "candidate" (openai/gpt-5.5) and one against the "baseline"
// (anthropic/claude-opus-4-7). The OpenAI mock above already dispatches all
// (anthropic/claude-opus-4-8). The OpenAI mock above already dispatches all
// the scenario prompt branches we care about. Rather than duplicating that
// machinery, the /v1/messages route below translates Anthropic request
// shapes into the shared ResponsesInputItem[] format, calls the same
@@ -2814,7 +2814,7 @@ function buildAnthropicMessageResponse(params: {
id: `msg_mock_${Math.floor(Math.random() * 1_000_000).toString(16)}`,
type: "message",
role: "assistant",
model: params.model || "claude-opus-4-7",
model: params.model || "claude-opus-4-8",
content,
stop_reason: stopReason,
stop_sequence: null,
@@ -2842,7 +2842,7 @@ function buildAnthropicMessageStreamEvents(params: {
id: messageId,
type: "message",
role: "assistant",
model: params.model || "claude-opus-4-7",
model: params.model || "claude-opus-4-8",
content: [],
stop_reason: null,
stop_sequence: null,
@@ -2941,7 +2941,7 @@ async function buildMessagesPayload(
// which then confuses parity consumers that assume the mock always
// echoes the real provider label. Normalize once and reuse everywhere.
const normalizedModel =
typeof body.model === "string" && body.model.trim() !== "" ? body.model : "claude-opus-4-7";
typeof body.model === "string" && body.model.trim() !== "" ? body.model : "claude-opus-4-8";
// Dispatch through the same scenario logic the /v1/responses route uses.
// Preserve declared tools so route-specific adapters mirror what the
// real provider request made available to the model.
@@ -2986,7 +2986,7 @@ export async function startQaMockOpenAiServer(params?: { host?: string; port?: n
{ id: "gpt-5.5-alt", object: "model" },
{ id: "gpt-image-1", object: "model" },
{ id: "text-embedding-3-small", object: "model" },
{ id: "claude-opus-4-7", object: "model" },
{ id: "claude-opus-4-8", object: "model" },
{ id: "claude-sonnet-4-6", object: "model" },
],
});

View File

@@ -71,14 +71,14 @@ function createMockAnthropicMessagesProvider(baseUrl: string): ModelProviderConf
},
models: [
{
id: "claude-opus-4-7",
name: "claude-opus-4-7",
id: "claude-opus-4-8",
name: "claude-opus-4-8",
api: "anthropic-messages",
reasoning: false,
input: ["text", "image"],
cost: ZERO_COST,
contextWindow: 200_000,
maxTokens: 4096,
contextWindow: 1_048_576,
maxTokens: 128_000,
},
{
id: "claude-sonnet-4-6",

View File

@@ -108,12 +108,12 @@ describe("buildQaGatewayConfig", () => {
workspaceDir: "/tmp/qa-workspace",
providerMode: "mock-openai",
primaryModel: "openai/gpt-5.5",
alternateModel: "anthropic/claude-opus-4-7",
alternateModel: "anthropic/claude-opus-4-8",
});
expect(getPrimaryModel(cfg.agents?.defaults?.model)).toBe("openai/gpt-5.5");
expect(getModelFallbacks(cfg.agents?.defaults?.model)).toEqual(["anthropic/claude-opus-4-7"]);
expect(getModelFallbacks(cfg.agents?.list?.[0]?.model)).toEqual(["anthropic/claude-opus-4-7"]);
expect(getModelFallbacks(cfg.agents?.defaults?.model)).toEqual(["anthropic/claude-opus-4-8"]);
expect(getModelFallbacks(cfg.agents?.list?.[0]?.model)).toEqual(["anthropic/claude-opus-4-8"]);
expect(cfg.models?.providers?.openai?.api).toBe("openai-responses");
expect(cfg.models?.providers?.openai?.request).toEqual({ allowPrivateNetwork: true });
expect(cfg.models?.providers?.openai?.models.map((model) => model.id)).toContain("gpt-5.5");
@@ -121,7 +121,7 @@ describe("buildQaGatewayConfig", () => {
expect(cfg.models?.providers?.anthropic?.baseUrl).toBe("http://127.0.0.1:44080");
expect(cfg.models?.providers?.anthropic?.request).toEqual({ allowPrivateNetwork: true });
expect(cfg.models?.providers?.anthropic?.models.map((model) => model.id)).toContain(
"claude-opus-4-7",
"claude-opus-4-8",
);
expect(cfg.plugins?.allow).toEqual(["acpx", "memory-core"]);
});

View File

@@ -198,7 +198,7 @@ describe("qa suite planning helpers", () => {
makeQaSuiteTestScenario("anthropic-only", {
config: {
requiredProvider: "anthropic",
requiredModel: "claude-opus-4-7",
requiredModel: "claude-opus-4-8",
},
}),
];
@@ -384,7 +384,7 @@ describe("qa suite planning helpers", () => {
config: { requiredProvider: "openai", requiredModel: "gpt-5.5" },
}),
makeQaSuiteTestScenario("anthropic-only", {
config: { requiredProvider: "anthropic", requiredModel: "claude-opus-4-7" },
config: { requiredProvider: "anthropic", requiredModel: "claude-opus-4-8" },
}),
makeQaSuiteTestScenario("claude-subscription", {
config: { requiredProvider: "claude-cli", authMode: "subscription" },

View File

@@ -67,12 +67,12 @@ describe("buildQaSuiteSummaryJson", () => {
it("records an Anthropic baseline lane cleanly for parity runs", () => {
const json = buildQaSuiteSummaryJson({
...baseParams,
primaryModel: "anthropic/claude-opus-4-7",
primaryModel: "anthropic/claude-opus-4-8",
alternateModel: "anthropic/claude-sonnet-4-6",
});
expect(json.run.primaryModel).toBe("anthropic/claude-opus-4-7");
expect(json.run.primaryModel).toBe("anthropic/claude-opus-4-8");
expect(json.run.primaryProvider).toBe("anthropic");
expect(json.run.primaryModelName).toBe("claude-opus-4-7");
expect(json.run.primaryModelName).toBe("claude-opus-4-8");
expect(json.run.alternateModel).toBe("anthropic/claude-sonnet-4-6");
expect(json.run.alternateProvider).toBe("anthropic");
expect(json.run.alternateModelName).toBe("claude-sonnet-4-6");

View File

@@ -3,7 +3,7 @@ import type { TSchema } from "typebox";
export type Api = string;
export type CacheRetention = "none" | "short" | "long";
export type Transport = "sse" | "websocket" | "websocket-cached" | "auto";
export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh" | "max";
export type ModelThinkingLevel = "off" | ThinkingLevel;
export type MaybePromise<T> = T | Promise<T>;
@@ -17,6 +17,7 @@ export interface ThinkingBudgets {
low?: number;
medium?: number;
high?: number;
max?: number;
}
export interface DiagnosticErrorInfo {

View File

@@ -285,7 +285,7 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
* Note: "xhigh" is only supported by selected model families. Use model thinking-level metadata
* from openclaw/plugin-sdk/llm to detect support for a concrete model.
*/
export type ThinkingLevel = "off" | "minimal" | "low" | "medium" | "high" | "xhigh";
export type ThinkingLevel = "off" | "minimal" | "low" | "medium" | "high" | "xhigh" | "max";
/**
* Extensible interface for custom app messages.

View File

@@ -12,7 +12,7 @@ coverage:
objective: Verify the regular Anthropic Opus lane can complete a quick chat turn using API-key auth.
successCriteria:
- A live-frontier run fails fast unless the selected primary provider is anthropic.
- The selected primary model is Anthropic Opus 4.7.
- The selected primary model is Anthropic Opus 4.8.
- The QA gateway worker has an Anthropic API key available through environment auth.
- The agent replies through the regular Anthropic provider.
docsRefs:
@@ -24,10 +24,10 @@ codeRefs:
- extensions/qa-lab/src/suite.ts
execution:
kind: flow
summary: Run with `pnpm openclaw qa suite --provider-mode live-frontier --model anthropic/claude-opus-4-7 --alt-model anthropic/claude-opus-4-7 --scenario anthropic-opus-api-key-smoke`.
summary: Run with `pnpm openclaw qa suite --provider-mode live-frontier --model anthropic/claude-opus-4-8 --alt-model anthropic/claude-opus-4-8 --scenario anthropic-opus-api-key-smoke`.
config:
requiredProvider: anthropic
requiredModel: claude-opus-4-7
requiredModel: claude-opus-4-8
chatPrompt: "Anthropic Opus API key smoke. Reply exactly: ANTHROPIC-OPUS-API-KEY-OK"
chatExpected: ANTHROPIC-OPUS-API-KEY-OK
```

View File

@@ -12,7 +12,7 @@ coverage:
objective: Verify the regular Anthropic Opus lane can complete a quick chat turn using setup-token auth.
successCriteria:
- A live-frontier run fails fast unless the selected primary provider is anthropic.
- The selected primary model is Anthropic Opus 4.7.
- The selected primary model is Anthropic Opus 4.8.
- The QA gateway worker stages a token auth profile in the isolated agent store.
- The agent replies through the regular Anthropic provider.
docsRefs:
@@ -24,10 +24,10 @@ codeRefs:
- extensions/qa-lab/src/suite.ts
execution:
kind: flow
summary: Run with `OPENCLAW_LIVE_SETUP_TOKEN_VALUE=<setup-token> pnpm openclaw qa suite --provider-mode live-frontier --model anthropic/claude-opus-4-7 --alt-model anthropic/claude-opus-4-7 --scenario anthropic-opus-setup-token-smoke`.
summary: Run with `OPENCLAW_LIVE_SETUP_TOKEN_VALUE=<setup-token> pnpm openclaw qa suite --provider-mode live-frontier --model anthropic/claude-opus-4-8 --alt-model anthropic/claude-opus-4-8 --scenario anthropic-opus-setup-token-smoke`.
config:
requiredProvider: anthropic
requiredModel: claude-opus-4-7
requiredModel: claude-opus-4-8
profileId: "anthropic:qa-setup-token"
chatPrompt: "Anthropic Opus setup-token smoke. Reply exactly: ANTHROPIC-OPUS-SETUP-TOKEN-OK"
chatExpected: ANTHROPIC-OPUS-SETUP-TOKEN-OK

View File

@@ -79,7 +79,7 @@ async function main() {
const output = runtime.lines.join("\n");
assertOutputIncludes(
output,
"[crestodian] planner: claude-cli/claude-opus-4-7",
"[crestodian] planner: claude-cli/claude-opus-4-8",
"configless planner did not use Claude CLI fallback",
);
assertOutputIncludes(

View File

@@ -116,6 +116,7 @@ function makeAnthropicTransportModel(
baseUrl?: string;
reasoning?: boolean;
maxTokens?: number;
thinkingLevelMap?: AnthropicMessagesModel["thinkingLevelMap"];
headers?: Record<string, string>;
requestTransport?: RequestTransportConfig;
} = {},
@@ -132,6 +133,7 @@ function makeAnthropicTransportModel(
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200000,
maxTokens: params.maxTokens ?? 8192,
...(params.thinkingLevelMap ? { thinkingLevelMap: params.thinkingLevelMap } : {}),
...(params.headers ? { headers: params.headers } : {}),
} satisfies AnthropicMessagesModel,
params.requestTransport ?? {
@@ -1867,10 +1869,10 @@ describe("anthropic transport stream", () => {
expect(payload.output_config).toEqual({ effort: "max" });
});
it("maps xhigh thinking effort for Claude Opus 4.7 transport runs", async () => {
it("maps xhigh thinking effort for Claude Opus 4.8 transport runs", async () => {
const model = makeAnthropicTransportModel({
id: "claude-opus-4-7",
name: "Claude Opus 4.7",
id: "claude-opus-4-8",
name: "Claude Opus 4.8",
maxTokens: 8192,
});
@@ -1889,4 +1891,51 @@ describe("anthropic transport stream", () => {
expect(payload.thinking).toEqual({ type: "adaptive" });
expect(payload.output_config).toEqual({ effort: "xhigh" });
});
it("preserves max thinking effort for Claude Opus 4.8 transport runs", async () => {
const model = makeAnthropicTransportModel({
id: "claude-opus-4-8",
name: "Claude Opus 4.8",
maxTokens: 8192,
thinkingLevelMap: { xhigh: "xhigh", max: "max" },
});
await runTransportStream(
model,
{
messages: [{ role: "user", content: "Think as much as needed." }],
} as AnthropicStreamContext,
{
apiKey: "sk-ant-api",
reasoning: "max",
} as AnthropicStreamOptions,
);
const payload = latestAnthropicRequest().payload;
expect(payload.thinking).toEqual({ type: "adaptive" });
expect(payload.output_config).toEqual({ effort: "max" });
});
it("clamps max thinking effort for Claude models without native max support", async () => {
const model = makeAnthropicTransportModel({
id: "claude-sonnet-4-6",
name: "Claude Sonnet 4.6",
maxTokens: 8192,
});
await runTransportStream(
model,
{
messages: [{ role: "user", content: "Think as much as supported." }],
} as AnthropicStreamContext,
{
apiKey: "sk-ant-api",
reasoning: "max",
} as AnthropicStreamOptions,
);
const payload = latestAnthropicRequest().payload;
expect(payload.thinking).toEqual({ type: "adaptive" });
expect(payload.output_config).toEqual({ effort: "high" });
});
});

View File

@@ -113,8 +113,13 @@ type MutableAssistantOutput = {
const EMPTY_ANTHROPIC_MESSAGES_FALLBACK_TEXT = ".";
function isClaudeOpus47Model(modelId: string): boolean {
return modelId.includes("opus-4-7") || modelId.includes("opus-4.7");
function isClaudeOpus47OrNewerModel(modelId: string): boolean {
return (
modelId.includes("opus-4-8") ||
modelId.includes("opus-4.8") ||
modelId.includes("opus-4-7") ||
modelId.includes("opus-4.7")
);
}
function isClaudeOpus46Model(modelId: string): boolean {
@@ -123,7 +128,7 @@ function isClaudeOpus46Model(modelId: string): boolean {
function supportsAdaptiveThinking(modelId: string): boolean {
return (
isClaudeOpus47Model(modelId) ||
isClaudeOpus47OrNewerModel(modelId) ||
isClaudeOpus46Model(modelId) ||
modelId.includes("sonnet-4-6") ||
modelId.includes("sonnet-4.6")
@@ -138,17 +143,19 @@ function mapThinkingLevelToEffort(level: ThinkingLevel, modelId: string): Anthro
case "medium":
return "medium";
case "xhigh":
if (isClaudeOpus47Model(modelId)) {
if (isClaudeOpus47OrNewerModel(modelId)) {
return "xhigh";
}
return isClaudeOpus46Model(modelId) ? "max" : "high";
case "max":
return isClaudeOpus47OrNewerModel(modelId) ? "max" : "high";
default:
return "high";
}
}
function clampReasoningLevel(level: ThinkingLevel): "minimal" | "low" | "medium" | "high" {
return level === "xhigh" ? "high" : level;
return level === "xhigh" || level === "max" ? "high" : level;
}
function resolvePositiveAnthropicMaxTokens(value: unknown): number | undefined {

View File

@@ -1849,7 +1849,7 @@ describe("shouldSkipLocalCliCredentialEpoch", () => {
input: "stdin",
sessionMode: "existing",
modelAliases: {
"claude-opus-4-7": "opus",
"claude-opus-4-8": "opus",
},
},
},
@@ -1873,7 +1873,7 @@ describe("shouldSkipLocalCliCredentialEpoch", () => {
workspaceDir: dir,
prompt: "latest ask",
provider: "claude-cli",
model: "claude-opus-4-7",
model: "claude-opus-4-8",
timeoutMs: 1_000,
runId: "run-auto-claude-alias-reseed-history-chars",
config: createCliBackendConfig(),

View File

@@ -89,7 +89,9 @@ const prepareDeps = {
};
const CLAUDE_CLI_CONTEXT_MODEL_ALIASES: Record<string, string> = {
opus: "claude-opus-4-7",
opus: "claude-opus-4-8",
"opus-4.8": "claude-opus-4-8",
"opus-4-8": "claude-opus-4-8",
"opus-4.7": "claude-opus-4-7",
"opus-4-7": "claude-opus-4-7",
"opus-4.6": "claude-opus-4-6",

View File

@@ -68,11 +68,13 @@ describe("applyDiscoveredContextWindows", () => {
applyDiscoveredContextWindows({
cache,
models: [
{ id: "claude-cli/claude-opus-4.8-20260514", contextWindow: 200_000 },
{ id: "claude-cli/claude-opus-4.7-20260219", contextWindow: 200_000 },
{ id: "claude-cli/claude-sonnet-4-6", contextWindow: 200_000 },
],
});
expect(cache.get("claude-cli/claude-opus-4.8-20260514")).toBe(ANTHROPIC_CONTEXT_1M_TOKENS);
expect(cache.get("claude-cli/claude-opus-4.7-20260219")).toBe(ANTHROPIC_CONTEXT_1M_TOKENS);
expect(cache.get("claude-cli/claude-sonnet-4-6")).toBe(ANTHROPIC_CONTEXT_1M_TOKENS);
});

View File

@@ -36,6 +36,8 @@ type ProviderConfigEntry = {
type ModelsConfig = { providers?: Record<string, ProviderConfigEntry | undefined> };
const ANTHROPIC_GA_1M_MODEL_PREFIXES = [
"claude-opus-4-8",
"claude-opus-4.8",
"claude-opus-4-6",
"claude-opus-4.6",
"claude-opus-4-7",

View File

@@ -0,0 +1,8 @@
import { describe, expect, it } from "vitest";
import { mapThinkingLevel } from "./utils.js";
describe("mapThinkingLevel", () => {
it("maps adaptive to the provider-owned high effort default", () => {
expect(mapThinkingLevel("adaptive")).toBe("high");
});
});

View File

@@ -8,19 +8,14 @@ export function normalizeContextTokenBudget(value: unknown): number | undefined
}
export function mapThinkingLevel(level?: ThinkLevel): ThinkingLevel {
// agent runtime supports "xhigh"; OpenClaw enables it for specific models.
// agent runtime supports elevated levels; OpenClaw enables them for specific models.
if (!level) {
return "off";
}
if (level === "max") {
return "xhigh";
}
// "adaptive" maps to "medium" at the agent runtime layer. The provider adapter
// provider then translates this to `thinking.type: "adaptive"` with
// `output_config.effort: "medium"` for models that support it (Opus 4.6,
// Sonnet 4.6).
// Runtime streams do not expose a distinct adaptive level. Preserve the
// provider-owned adaptive default by using Claude's documented high effort.
if (level === "adaptive") {
return "medium";
return "high";
}
return level;
}

View File

@@ -10,6 +10,7 @@ type ModelRef = {
};
const HIGH_SIGNAL_LIVE_MODEL_PRIORITY = [
"anthropic/claude-opus-4-8",
"anthropic/claude-sonnet-4-6",
"anthropic/claude-opus-4-7",
"google/gemini-3.1-pro-preview",

View File

@@ -661,6 +661,7 @@ describe("isPrioritizedHighSignalLiveModelRef", () => {
it("lists priority refs as provider/id pairs", () => {
expect(listPrioritizedHighSignalLiveModelRefs()).toStrictEqual([
{ provider: "anthropic", id: "claude-opus-4-8" },
{ provider: "anthropic", id: "claude-sonnet-4-6" },
{ provider: "anthropic", id: "claude-opus-4-7" },
{ provider: "google", id: "gemini-3.1-pro-preview" },
@@ -713,6 +714,7 @@ describe("isPrioritizedSmallLiveModelRef", () => {
describe("selectHighSignalLiveItems", () => {
it("prefers curated Google replacements before fallback provider spread", () => {
const items = [
{ provider: "anthropic", id: "claude-opus-4-8" },
{ provider: "anthropic", id: "claude-sonnet-4-6" },
{ provider: "anthropic", id: "claude-opus-4-7" },
{ provider: "anthropic", id: "claude-opus-4-6" },
@@ -731,10 +733,10 @@ describe("selectHighSignalLiveItems", () => {
(item) => item.provider,
),
).toEqual([
{ provider: "anthropic", id: "claude-opus-4-8" },
{ provider: "anthropic", id: "claude-sonnet-4-6" },
{ provider: "anthropic", id: "claude-opus-4-7" },
{ provider: "google", id: "gemini-3.1-pro-preview" },
{ provider: "google", id: "gemini-3-flash-preview" },
]);
});

View File

@@ -147,6 +147,33 @@ const ANTHROPIC_OPUS_47_CATALOG = [
},
];
const ANTHROPIC_OPUS_48_CATALOG = [
{
provider: "anthropic",
id: "claude-opus-4-8",
name: "Claude Opus 4.8",
reasoning: true,
},
];
const ANTHROPIC_VERTEX_OPUS_48_CATALOG = [
{
provider: "anthropic-vertex",
id: "claude-opus-4-8",
name: "Claude Opus 4.8",
reasoning: true,
},
];
const CLAUDE_CLI_OPUS_48_CATALOG = [
{
provider: "claude-cli",
id: "claude-opus-4-8",
name: "Claude Opus 4.8",
reasoning: true,
},
];
function resolveAnthropicOpusThinking(cfg: OpenClawConfig) {
return resolveThinkingDefault({
cfg,
@@ -165,6 +192,33 @@ function resolveAnthropicOpus47Thinking(cfg: OpenClawConfig) {
});
}
function resolveAnthropicOpus48Thinking(cfg: OpenClawConfig) {
return resolveThinkingDefault({
cfg,
provider: "anthropic",
model: "claude-opus-4-8",
catalog: ANTHROPIC_OPUS_48_CATALOG,
});
}
function resolveAnthropicVertexOpus48Thinking(cfg: OpenClawConfig) {
return resolveThinkingDefault({
cfg,
provider: "anthropic-vertex",
model: "claude-opus-4-8",
catalog: ANTHROPIC_VERTEX_OPUS_48_CATALOG,
});
}
function resolveClaudeCliOpus48Thinking(cfg: OpenClawConfig) {
return resolveThinkingDefault({
cfg,
provider: "claude-cli",
model: "claude-opus-4-8",
catalog: CLAUDE_CLI_OPUS_48_CATALOG,
});
}
function createAgentFallbackConfig(params: {
primary?: string;
fallbacks?: string[];
@@ -2300,6 +2354,42 @@ describe("model-selection", () => {
expect(resolveAnthropicOpus47Thinking(cfg)).toBe("off");
});
it("leaves explicitly configured Anthropic Opus 4.8 thinking off by default", () => {
const cfg = {
agents: {
defaults: {
model: { primary: "anthropic/claude-opus-4-8" },
},
},
} as OpenClawConfig;
expect(resolveAnthropicOpus48Thinking(cfg)).toBe("off");
});
it("leaves explicitly configured Anthropic Vertex Opus 4.8 thinking off by default", () => {
const cfg = {
agents: {
defaults: {
model: { primary: "anthropic-vertex/claude-opus-4-8" },
},
},
} as OpenClawConfig;
expect(resolveAnthropicVertexOpus48Thinking(cfg)).toBe("off");
});
it("leaves explicitly configured Claude CLI Opus 4.8 thinking off by default", () => {
const cfg = {
agents: {
defaults: {
model: { primary: "claude-cli/claude-opus-4-8" },
},
},
} as OpenClawConfig;
expect(resolveClaudeCliOpus48Thinking(cfg)).toBe("off");
});
it("uses bundled provider thinking defaults when no explicit config overrides them", () => {
const cfg = {} as OpenClawConfig;

View File

@@ -57,8 +57,18 @@ export function resolveThinkingDefault(params: {
if (configured) {
return configured;
}
const isClaudeProvider =
normalizedProvider === "anthropic" ||
normalizedProvider === "anthropic-vertex" ||
normalizedProvider === "claude-cli";
if (
normalizedProvider === "anthropic" &&
isClaudeProvider &&
(normalizedModel.startsWith("claude-opus-4-8") || normalizedModel.startsWith("claude-opus-4.8"))
) {
return "off";
}
if (
isClaudeProvider &&
(normalizedModel.startsWith("claude-opus-4-7") || normalizedModel.startsWith("claude-opus-4.7"))
) {
return "off";

View File

@@ -96,6 +96,7 @@ const ThinkingLevelMapSchema = Type.Object({
medium: Type.Optional(ThinkingLevelMapValueSchema),
high: Type.Optional(ThinkingLevelMapValueSchema),
xhigh: Type.Optional(ThinkingLevelMapValueSchema),
max: Type.Optional(ThinkingLevelMapValueSchema),
});
const OpenAICompletionsCompatSchema = Type.Object({

View File

@@ -11,7 +11,7 @@ import type { ThinkingLevel } from "../runtime/index.js";
import { DEFAULT_THINKING_LEVEL } from "./defaults.js";
import type { ModelRegistry } from "./model-registry.js";
const VALID_THINKING_LEVELS = ["off", "minimal", "low", "medium", "high", "xhigh"] as const;
const VALID_THINKING_LEVELS = ["off", "minimal", "low", "medium", "high", "xhigh", "max"] as const;
function isValidThinkingLevel(level: string): level is ThinkingLevel {
return VALID_THINKING_LEVELS.includes(level as ThinkingLevel);

View File

@@ -65,6 +65,26 @@ function createResourceLoaderWithHandlers(
}
describe("createAgentSession tool defaults", () => {
it("forwards max thinking budgets from settings to the agent", async () => {
const { session } = await createAgentSession({
model: testModel,
resourceLoader: createEmptyResourceLoader(),
sessionManager: SessionManager.inMemory(),
settingsManager: SettingsManager.inMemory({
thinkingBudgets: {
high: 16_384,
max: 32_768,
},
}),
modelRegistry: ModelRegistry.inMemory(AuthStorage.inMemory()),
});
expect(session.agent.thinkingBudgets).toEqual({
high: 16_384,
max: 32_768,
});
});
it("keeps custom tools active when only builtin tools are disabled", async () => {
const customTool: ToolDefinition = {
name: "custom_lookup",

View File

@@ -47,6 +47,7 @@ export interface ThinkingBudgetsSettings {
low?: number;
medium?: number;
high?: number;
max?: number;
}
export interface MarkdownSettings {
@@ -78,7 +79,7 @@ export interface Settings {
lastChangelogVersion?: string;
defaultProvider?: string;
defaultModel?: string;
defaultThinkingLevel?: "off" | "minimal" | "low" | "medium" | "high" | "xhigh";
defaultThinkingLevel?: "off" | "minimal" | "low" | "medium" | "high" | "xhigh" | "max";
transport?: TransportSetting; // default: "auto"
steeringMode?: "all" | "one-at-a-time";
followUpMode?: "all" | "one-at-a-time";
@@ -668,11 +669,21 @@ export class SettingsManager {
this.save();
}
getDefaultThinkingLevel(): "off" | "minimal" | "low" | "medium" | "high" | "xhigh" | undefined {
getDefaultThinkingLevel():
| "off"
| "minimal"
| "low"
| "medium"
| "high"
| "xhigh"
| "max"
| undefined {
return this.settings.defaultThinkingLevel;
}
setDefaultThinkingLevel(level: "off" | "minimal" | "low" | "medium" | "high" | "xhigh"): void {
setDefaultThinkingLevel(
level: "off" | "minimal" | "low" | "medium" | "high" | "xhigh" | "max",
): void {
this.globalSettings.defaultThinkingLevel = level;
this.markModified("defaultThinkingLevel");
this.save();

View File

@@ -3,7 +3,7 @@ import type { OpenClawConfig } from "../../config/config.js";
import { resolvePdfModelConfigForTool } from "./pdf-tool.model-config.js";
import { resetPdfToolAuthEnv } from "./pdf-tool.test-support.js";
const ANTHROPIC_PDF_MODEL = "anthropic/claude-opus-4-7";
const ANTHROPIC_PDF_MODEL = "anthropic/claude-opus-4-8";
const TEST_AGENT_DIR = "/tmp/openclaw-pdf-model-config";
vi.mock("./model-config.helpers.js", () => ({

View File

@@ -27,7 +27,7 @@ let defaultWarnState: WarnState = { warned: false };
const DEFAULT_MODEL_ALIASES: Readonly<Record<string, string>> = {
// Anthropic (shared model runtime catalog uses "latest" ids without date suffix)
opus: "anthropic/claude-opus-4-7",
opus: "anthropic/claude-opus-4-8",
sonnet: "anthropic/claude-sonnet-4-6",
// OpenAI

View File

@@ -75,7 +75,7 @@ describe("applyModelDefaults", () => {
agents: {
defaults: {
models: {
"anthropic/claude-opus-4-7": {},
"anthropic/claude-opus-4-8": {},
"openai/gpt-5.4": {},
},
},
@@ -83,7 +83,7 @@ describe("applyModelDefaults", () => {
} satisfies OpenClawConfig;
const next = applyModelDefaults(cfg);
expect(next.agents?.defaults?.models?.["anthropic/claude-opus-4-7"]?.alias).toBe("opus");
expect(next.agents?.defaults?.models?.["anthropic/claude-opus-4-8"]?.alias).toBe("opus");
expect(next.agents?.defaults?.models?.["openai/gpt-5.4"]?.alias).toBe("gpt");
});
@@ -92,7 +92,7 @@ describe("applyModelDefaults", () => {
agents: {
defaults: {
models: {
"anthropic/claude-opus-4-7": { alias: "Opus" },
"anthropic/claude-opus-4-8": { alias: "Opus" },
},
},
},
@@ -100,7 +100,7 @@ describe("applyModelDefaults", () => {
const next = applyModelDefaults(cfg);
expect(next.agents?.defaults?.models?.["anthropic/claude-opus-4-7"]?.alias).toBe("Opus");
expect(next.agents?.defaults?.models?.["anthropic/claude-opus-4-8"]?.alias).toBe("Opus");
});
it("respects explicit empty alias disables", () => {

View File

@@ -2,6 +2,7 @@ import type {
AnthropicMessagesCompat,
OpenAICompletionsCompat,
OpenAIResponsesCompat,
ThinkingLevelMap,
} from "../llm/types.js";
import type { AgentRuntimePolicyConfig } from "./types.agents-shared.js";
import type { ConfiguredModelProviderRequest } from "./types.provider-request.js";
@@ -152,6 +153,8 @@ export type ModelDefinitionConfig = {
*/
contextTokens?: number;
maxTokens: number;
/** Maps OpenClaw thinking levels to provider/model-specific values. */
thinkingLevelMap?: ThinkingLevelMap;
/** Provider-specific request/runtime parameters passed through to provider plugins. */
params?: Record<string, unknown>;
/** Optional agent execution runtime override for this provider/model pair. */

View File

@@ -1,7 +1,7 @@
import type { OpenClawConfig } from "../config/types.openclaw.js";
import type { CrestodianOverview } from "./overview.js";
const CRESTODIAN_CLAUDE_CLI_MODEL = "claude-opus-4-7";
const CRESTODIAN_CLAUDE_CLI_MODEL = "claude-opus-4-8";
const CRESTODIAN_CODEX_MODEL = "gpt-5.5";
type CrestodianLocalPlannerBackend = {

View File

@@ -141,12 +141,12 @@ describe("Crestodian assistant", () => {
}
expect(result.command).toBe("status");
expect(result.reply).toBe("Checking the shell.");
expect(result.modelLabel).toBe("claude-cli/claude-opus-4-7");
expect(result.modelLabel).toBe("claude-cli/claude-opus-4-8");
expect(runCliAgent).toHaveBeenCalledTimes(1);
const firstCliCall = firstMockArg(runCliAgent);
expect(firstCliCall.provider).toBe("claude-cli");
expect(firstCliCall.model).toBe("claude-opus-4-7");
expect(firstCliCall.model).toBe("claude-opus-4-8");
expect(firstCliCall.cleanupCliLiveSessionOnRunEnd).toBe(true);
const firstCliConfig = requireRecord(firstCliCall.config);
const firstCliAgents = requireRecord(firstCliConfig.agents);

View File

@@ -110,8 +110,8 @@ const PLUGIN_UNINSTALL_RE =
/^(?:(?:plugins?)\s+(?:uninstall|remove)|(?:uninstall|remove)\s+plugins?)\s+(?<pluginId>[A-Za-z0-9_.@/-]+)$/i;
const OPENAI_API_DEFAULT_MODEL_REF = `${DEFAULT_PROVIDER}/${DEFAULT_MODEL}`;
const ANTHROPIC_API_DEFAULT_MODEL_REF = "anthropic/claude-opus-4-7";
const CLAUDE_CLI_DEFAULT_MODEL_REF = "claude-cli/claude-opus-4-7";
const ANTHROPIC_API_DEFAULT_MODEL_REF = "anthropic/claude-opus-4-8";
const CLAUDE_CLI_DEFAULT_MODEL_REF = "claude-cli/claude-opus-4-8";
const CODEX_APP_SERVER_DEFAULT_MODEL_REF = "openai/gpt-5.5";
export function parseCrestodianOperation(input: string): CrestodianOperation {

View File

@@ -17,6 +17,7 @@ const EXTENDED_THINKING_LEVELS: ModelThinkingLevel[] = [
"medium",
"high",
"xhigh",
"max",
];
export function getSupportedThinkingLevels<TApi extends Api>(
@@ -31,7 +32,7 @@ export function getSupportedThinkingLevels<TApi extends Api>(
if (mapped === null) {
return false;
}
if (level === "xhigh") {
if (level === "xhigh" || level === "max") {
return mapped !== undefined;
}
return true;

View File

@@ -19,7 +19,7 @@ vi.mock("@anthropic-ai/sdk", () => ({
},
}));
import { streamAnthropic } from "./anthropic.js";
import { streamAnthropic, streamSimpleAnthropic } from "./anthropic.js";
function createSseResponse(events: Record<string, unknown>[] = []): Response {
const body = events.map((event) => `data: ${JSON.stringify(event)}\n\n`).join("");
@@ -167,4 +167,30 @@ describe("Anthropic provider", () => {
},
]);
});
it("clamps max adaptive effort when the Claude model does not advertise it", async () => {
let capturedPayload: unknown;
const stream = streamSimpleAnthropic(
makeAnthropicModel({
id: "claude-sonnet-4-6",
name: "Claude Sonnet 4.6",
}),
{
messages: [{ role: "user", content: "hello", timestamp: 0 }],
},
{
apiKey: "sk-ant-provider",
reasoning: "max",
onPayload: (payload) => {
capturedPayload = payload;
},
},
);
await stream.result();
expect((capturedPayload as { output_config?: unknown }).output_config).toEqual({
effort: "high",
});
});
});

View File

@@ -7,7 +7,7 @@ import type {
RawMessageStreamEvent,
} from "@anthropic-ai/sdk/resources/messages.js";
import { getEnvApiKey } from "../env-api-keys.js";
import { calculateCost } from "../model-utils.js";
import { calculateCost, clampThinkingLevel } from "../model-utils.js";
import type {
AnthropicMessagesCompat,
Api,
@@ -183,20 +183,20 @@ function getAnthropicCompat(model: Model<"anthropic-messages">): Required<Anthro
export interface AnthropicOptions extends StreamOptions {
/**
* Enable extended thinking.
* For Opus 4.6 and Sonnet 4.6: uses adaptive thinking (model decides when/how much to think).
* For Opus 4.6+ and Sonnet 4.6: uses adaptive thinking (model decides when/how much to think).
* For older models: uses budget-based thinking with thinkingBudgetTokens.
*/
thinkingEnabled?: boolean;
/**
* Token budget for extended thinking (older models only).
* Ignored for Opus 4.6 and Sonnet 4.6, which use adaptive thinking.
* Ignored for Opus 4.6+ and Sonnet 4.6, which use adaptive thinking.
*/
thinkingBudgetTokens?: number;
/**
* Effort level for adaptive thinking (Opus 4.6+ and Sonnet 4.6).
* Controls how much thinking Claude allocates:
* - "max": Always thinks with no constraints (Opus 4.6 only)
* - "xhigh": Highest reasoning level (Opus 4.7)
* - "xhigh": Highest reasoning level (Opus 4.7+)
* - "high": Always thinks, deep reasoning (default)
* - "medium": Moderate thinking, may skip for simple queries
* - "low": Minimal thinking, skips for simple tasks
@@ -210,7 +210,7 @@ export interface AnthropicOptions extends StreamOptions {
* signature still travels back for multi-turn continuity. Use for faster
* time-to-first-text-token when your UI does not surface thinking.
*
* Note: Anthropic's API default for Claude Opus 4.7 and Claude Mythos Preview
* Note: Anthropic's API default for Claude Opus 4.7+ and Claude Mythos Preview
* is "omitted". We default to "summarized" here to keep behavior consistent
* with older Claude 4 models. Set this explicitly to "omitted" to opt in.
*/
@@ -728,6 +728,8 @@ function supportsAdaptiveThinking(modelId: string): boolean {
return (
modelId.includes("opus-4-6") ||
modelId.includes("opus-4.6") ||
modelId.includes("opus-4-8") ||
modelId.includes("opus-4.8") ||
modelId.includes("opus-4-7") ||
modelId.includes("opus-4.7") ||
modelId.includes("sonnet-4-6") ||
@@ -737,18 +739,19 @@ function supportsAdaptiveThinking(modelId: string): boolean {
/**
* Map ThinkingLevel to Anthropic effort levels for adaptive thinking.
* Note: effort "max" is only valid on Opus 4.6, while Opus 4.7 supports "xhigh".
* Model metadata owns the provider-specific extended effort mapping.
*/
function mapThinkingLevelToEffort(
model: Model<"anthropic-messages">,
level: SimpleStreamOptions["reasoning"],
): AnthropicEffort {
const mapped = level ? model.thinkingLevelMap?.[level] : undefined;
const clampedLevel = level ? clampThinkingLevel(model, level) : undefined;
const mapped = clampedLevel ? model.thinkingLevelMap?.[clampedLevel] : undefined;
if (typeof mapped === "string") {
return mapped as AnthropicEffort;
}
switch (level) {
switch (clampedLevel) {
case "minimal":
case "low":
return "low";
@@ -756,6 +759,8 @@ function mapThinkingLevelToEffort(
return "medium";
case "high":
return "high";
case "max":
return "max";
default:
return "high";
}
@@ -982,7 +987,7 @@ function buildParams(
// budget-based (older models), or explicitly disabled.
if (model.reasoning) {
if (options?.thinkingEnabled) {
// Default to "summarized" so Opus 4.7 and Mythos Preview behave like
// Default to "summarized" so Opus 4.7+ and Mythos Preview behave like
// older Claude 4 models (whose API default is also "summarized").
const display: AnthropicThinkingDisplay = options.thinkingDisplay ?? "summarized";
if (supportsAdaptiveThinking(model.id)) {

View File

@@ -186,7 +186,12 @@ export const streamSimpleAzureOpenAIResponses: StreamFunction<
const clampedReasoning = options?.reasoning
? clampThinkingLevel(model, options.reasoning)
: undefined;
const reasoningEffort = clampedReasoning === "off" ? undefined : clampedReasoning;
const reasoningEffort =
clampedReasoning === "off"
? undefined
: clampedReasoning === "max"
? "xhigh"
: clampedReasoning;
return streamAzureOpenAIResponses(model, context, {
...base,

View File

@@ -132,7 +132,9 @@ export const streamSimpleGoogleVertex: StreamFunction<"google-vertex", SimpleStr
}
const clampedReasoning = clampThinkingLevel(model, options.reasoning);
const effort = (clampedReasoning === "off" ? "high" : clampedReasoning) as ClampedThinkingLevel;
const effort = (
clampedReasoning === "off" || clampedReasoning === "max" ? "high" : clampedReasoning
) as ClampedThinkingLevel;
const geminiModel = model as unknown as Model<"google-generative-ai">;
if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) {
@@ -312,7 +314,7 @@ function buildParams(
return params;
}
type ClampedThinkingLevel = Exclude<AgentThinkingLevel, "xhigh">;
type ClampedThinkingLevel = Exclude<AgentThinkingLevel, "xhigh" | "max">;
function isGemini3ProModel(model: Model<"google-generative-ai">): boolean {
return /gemini-3(?:\.\d+)?-pro/.test(model.id.toLowerCase());

View File

@@ -119,7 +119,9 @@ export const streamSimpleGoogle: StreamFunction<"google-generative-ai", SimpleSt
}
const clampedReasoning = clampThinkingLevel(model, options.reasoning);
const effort = (clampedReasoning === "off" ? "high" : clampedReasoning) as ClampedThinkingLevel;
const effort = (
clampedReasoning === "off" || clampedReasoning === "max" ? "high" : clampedReasoning
) as ClampedThinkingLevel;
const googleModel = model;
if (
@@ -225,7 +227,7 @@ function buildParams(
return params;
}
type ClampedThinkingLevel = Exclude<ThinkingLevel, "xhigh">;
type ClampedThinkingLevel = Exclude<ThinkingLevel, "xhigh" | "max">;
function isGemma4Model(model: Model<"google-generative-ai">): boolean {
return /gemma-?4/.test(model.id.toLowerCase());

View File

@@ -452,7 +452,12 @@ export const streamSimpleOpenAICodexResponses: StreamFunction<
const clampedReasoning = options?.reasoning
? clampThinkingLevel(model, options.reasoning)
: undefined;
const reasoningEffort = clampedReasoning === "off" ? undefined : clampedReasoning;
const reasoningEffort =
clampedReasoning === "off"
? undefined
: clampedReasoning === "max"
? "xhigh"
: clampedReasoning;
return streamOpenAICodexResponses(model, context, {
...base,

View File

@@ -467,7 +467,12 @@ export const streamSimpleOpenAICompletions: StreamFunction<
const clampedReasoning = options?.reasoning
? clampThinkingLevel(model, options.reasoning)
: undefined;
const reasoningEffort = clampedReasoning === "off" ? undefined : clampedReasoning;
const reasoningEffort =
clampedReasoning === "off"
? undefined
: clampedReasoning === "max"
? "xhigh"
: clampedReasoning;
const toolChoice = (options as OpenAICompletionsOptions | undefined)?.toolChoice;
return streamOpenAICompletions(model, context, {

View File

@@ -179,7 +179,12 @@ export const streamSimpleOpenAIResponses: StreamFunction<
const clampedReasoning = options?.reasoning
? clampThinkingLevel(model, options.reasoning)
: undefined;
const reasoningEffort = clampedReasoning === "off" ? undefined : clampedReasoning;
const reasoningEffort =
clampedReasoning === "off"
? undefined
: clampedReasoning === "max"
? "xhigh"
: clampedReasoning;
return streamOpenAIResponses(model, context, {
...base,

View File

@@ -49,6 +49,7 @@ export function adjustMaxTokensForThinking(
low: 2048,
medium: 8192,
high: 16384,
max: 32768,
};
const budgets = { ...defaultBudgets, ...customBudgets };

View File

@@ -23,7 +23,7 @@ export type KnownImagesProvider = "openrouter";
export type ImagesProvider = string;
export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh" | "max";
export type ModelThinkingLevel = "off" | ThinkingLevel;
export type ThinkingLevelMap = Partial<Record<ModelThinkingLevel, string | null>>;
@@ -33,6 +33,7 @@ export interface ThinkingBudgets {
low?: number;
medium?: number;
high?: number;
max?: number;
}
// Base options all providers share

View File

@@ -279,6 +279,14 @@ describe("buildProviderReplayFamilyHooks", () => {
});
describe("resolveClaudeThinkingProfile", () => {
it("leaves Opus 4.8 thinking off by default with xhigh/adaptive/max options", () => {
const profile = resolveClaudeThinkingProfile("claude-opus-4-8");
expectFields(profile, {
defaultLevel: "off",
});
expectLevelIdsInclude(profile, ["xhigh", "adaptive", "max"]);
});
it("exposes Opus 4.7 thinking levels for direct and proxied Claude providers", () => {
const directProfile = resolveClaudeThinkingProfile("claude-opus-4-7");
expectFields(directProfile, {

View File

@@ -94,6 +94,7 @@ export {
} from "../plugins/provider-model-helpers.js";
import { normalizeOptionalLowercaseString } from "../shared/string-coerce.js";
const CLAUDE_OPUS_48_MODEL_PREFIXES = ["claude-opus-4-8", "claude-opus-4.8"] as const;
const CLAUDE_OPUS_47_MODEL_PREFIXES = ["claude-opus-4-7", "claude-opus-4.7"] as const;
const CLAUDE_ADAPTIVE_THINKING_DEFAULT_MODEL_PREFIXES = [
"claude-opus-4-6",
@@ -135,6 +136,10 @@ function isClaudeOpus47ModelId(modelId: string): boolean {
return matchesClaudeModelPrefix(modelId, CLAUDE_OPUS_47_MODEL_PREFIXES);
}
function isClaudeOpus48ModelId(modelId: string): boolean {
return matchesClaudeModelPrefix(modelId, CLAUDE_OPUS_48_MODEL_PREFIXES);
}
/** @deprecated Anthropic provider-owned model helper; do not use from third-party plugins. */
export function isClaudeAdaptiveThinkingDefaultModelId(modelId: string): boolean {
return matchesClaudeModelPrefix(modelId, CLAUDE_ADAPTIVE_THINKING_DEFAULT_MODEL_PREFIXES);
@@ -142,6 +147,12 @@ export function isClaudeAdaptiveThinkingDefaultModelId(modelId: string): boolean
/** @deprecated Anthropic provider-owned model helper; do not use from third-party plugins. */
export function resolveClaudeThinkingProfile(modelId: string): ProviderThinkingProfile {
if (isClaudeOpus48ModelId(modelId)) {
return {
levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }],
defaultLevel: "off",
};
}
if (isClaudeOpus47ModelId(modelId)) {
return {
levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }],

View File

@@ -577,7 +577,7 @@ describe("package artifact reuse", () => {
expect(workflow).toContain("suite_id: native-live-src-gateway-profiles-anthropic-opus");
expect(workflow).toContain("suite_id: native-live-src-gateway-profiles-anthropic-sonnet-haiku");
expect(workflow).toContain("suite_group: native-live-src-gateway-profiles-anthropic");
expect(workflow).toContain("OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-7");
expect(workflow).toContain("OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-8");
expect(workflow).toContain("anthropic/claude-sonnet-4-6,anthropic/claude-haiku-4-5");
expect(workflow).toMatch(
/suite_id: native-live-src-gateway-profiles-fireworks[\s\S]*?advisory: true/u,