mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-06 05:51:15 +08:00
feat: add Claude Opus 4.8 support (#87890)
* feat: add Claude Opus 4.8 support * fix: omit Vertex Opus sampling overrides * fix: preserve Opus adaptive thinking levels * fix: clamp Anthropic max effort support * fix: use sha256 for QA mock call ids * fix: type Anthropic transport test model metadata * test: update PDF model default for Opus 4.8
This commit is contained in:
committed by
GitHub
parent
98611e6272
commit
1188aa3b81
@@ -138,7 +138,7 @@ jobs:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENCLAW_DOCS_I18N_OPENAI_API_KEY || secrets.OPENAI_API_KEY }}
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
OPENCLAW_CONTROL_UI_I18N_PROVIDER: ${{ secrets.ANTHROPIC_API_KEY != '' && 'anthropic' || 'openai' }}
|
||||
OPENCLAW_CONTROL_UI_I18N_MODEL: ${{ secrets.ANTHROPIC_API_KEY != '' && 'claude-opus-4-7' || vars.OPENCLAW_CI_OPENAI_MODEL_BARE }}
|
||||
OPENCLAW_CONTROL_UI_I18N_MODEL: ${{ secrets.ANTHROPIC_API_KEY != '' && 'claude-opus-4-8' || vars.OPENCLAW_CI_OPENAI_MODEL_BARE }}
|
||||
OPENCLAW_CONTROL_UI_I18N_THINKING: low
|
||||
OPENCLAW_CONTROL_UI_I18N_AUTH_OPTIONAL: "1"
|
||||
LOCALE: ${{ matrix.locale }}
|
||||
|
||||
@@ -1932,7 +1932,7 @@ jobs:
|
||||
- suite_id: native-live-src-gateway-profiles-anthropic-opus
|
||||
suite_group: native-live-src-gateway-profiles-anthropic
|
||||
label: Native live gateway profiles Anthropic Opus
|
||||
command: OPENCLAW_LIVE_GATEWAY_THINKING=low OPENCLAW_LIVE_GATEWAY_PROVIDERS=anthropic OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-7 node .release-harness/scripts/test-live-shard.mjs native-live-src-gateway-profiles
|
||||
command: OPENCLAW_LIVE_GATEWAY_THINKING=low OPENCLAW_LIVE_GATEWAY_PROVIDERS=anthropic OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-8 node .release-harness/scripts/test-live-shard.mjs native-live-src-gateway-profiles
|
||||
timeout_minutes: 30
|
||||
profile_env_only: false
|
||||
advisory: true
|
||||
|
||||
@@ -813,7 +813,7 @@ jobs:
|
||||
alt_model="openai/gpt-5.5-alt"
|
||||
;;
|
||||
baseline)
|
||||
model="anthropic/claude-opus-4-7"
|
||||
model="anthropic/claude-opus-4-8"
|
||||
alt_model="anthropic/claude-sonnet-4-6"
|
||||
;;
|
||||
*)
|
||||
@@ -885,7 +885,7 @@ jobs:
|
||||
--candidate-summary .artifacts/qa-e2e/openai-candidate/qa-suite-summary.json \
|
||||
--baseline-summary .artifacts/qa-e2e/anthropic-baseline/qa-suite-summary.json \
|
||||
--candidate-label "${OPENCLAW_CI_OPENAI_MODEL}" \
|
||||
--baseline-label anthropic/claude-opus-4-7 \
|
||||
--baseline-label anthropic/claude-opus-4-8 \
|
||||
--output-dir .artifacts/qa-e2e/parity
|
||||
|
||||
- name: Upload parity artifacts
|
||||
|
||||
@@ -199,13 +199,13 @@ jobs:
|
||||
--alt-model openai/gpt-5.5-alt \
|
||||
--output-dir .artifacts/qa-e2e/openai-candidate
|
||||
|
||||
- name: Run Opus 4.7 lane
|
||||
- name: Run Opus 4.8 lane
|
||||
run: |
|
||||
pnpm openclaw qa suite \
|
||||
--provider-mode mock-openai \
|
||||
--parity-pack agentic \
|
||||
--concurrency "${QA_PARITY_CONCURRENCY}" \
|
||||
--model anthropic/claude-opus-4-7 \
|
||||
--model anthropic/claude-opus-4-8 \
|
||||
--alt-model anthropic/claude-sonnet-4-6 \
|
||||
--output-dir .artifacts/qa-e2e/anthropic-baseline
|
||||
|
||||
@@ -216,7 +216,7 @@ jobs:
|
||||
--candidate-summary .artifacts/qa-e2e/openai-candidate/qa-suite-summary.json \
|
||||
--baseline-summary .artifacts/qa-e2e/anthropic-baseline/qa-suite-summary.json \
|
||||
--candidate-label "${OPENCLAW_CI_OPENAI_MODEL}" \
|
||||
--baseline-label anthropic/claude-opus-4-7 \
|
||||
--baseline-label anthropic/claude-opus-4-8 \
|
||||
--output-dir .artifacts/qa-e2e/parity
|
||||
|
||||
- name: Upload parity artifacts
|
||||
|
||||
@@ -157,8 +157,8 @@ order and tells you what it chose:
|
||||
|
||||
- existing explicit model, if already configured
|
||||
- `OPENAI_API_KEY` -> `openai/gpt-5.5`
|
||||
- `ANTHROPIC_API_KEY` -> `anthropic/claude-opus-4-7`
|
||||
- Claude Code CLI -> `claude-cli/claude-opus-4-7`
|
||||
- `ANTHROPIC_API_KEY` -> `anthropic/claude-opus-4-8`
|
||||
- Claude Code CLI -> `claude-cli/claude-opus-4-8`
|
||||
- Codex -> `openai/gpt-5.5` through the Codex app-server harness
|
||||
|
||||
If none are available, setup still writes the default workspace and leaves the
|
||||
@@ -173,7 +173,7 @@ planner turn through OpenClaw's normal runtime paths. It first uses the
|
||||
configured OpenClaw model. If no configured model is usable yet, it can fall
|
||||
back to local runtimes already present on the machine:
|
||||
|
||||
- Claude Code CLI: `claude-cli/claude-opus-4-7`
|
||||
- Claude Code CLI: `claude-cli/claude-opus-4-8`
|
||||
- Codex app-server harness: `openai/gpt-5.5`
|
||||
|
||||
The model-assisted planner cannot mutate config directly. It must translate the
|
||||
|
||||
@@ -35,7 +35,7 @@ There are two runtime families:
|
||||
is the built-in `openclaw` runtime plus registered plugin harnesses such as
|
||||
`codex` and `copilot`.
|
||||
- **CLI backends** run a local CLI process while keeping the model ref
|
||||
canonical. For example, `anthropic/claude-opus-4-7` with
|
||||
canonical. For example, `anthropic/claude-opus-4-8` with
|
||||
a model-scoped `agentRuntime.id: "claude-cli"` means "select the Anthropic
|
||||
model, execute through Claude CLI." `claude-cli` is not an embedded harness id
|
||||
and must not be passed to AgentHarness selection.
|
||||
@@ -174,9 +174,9 @@ Claude CLI form is:
|
||||
{
|
||||
agents: {
|
||||
defaults: {
|
||||
model: "anthropic/claude-opus-4-7",
|
||||
model: "anthropic/claude-opus-4-8",
|
||||
models: {
|
||||
"anthropic/claude-opus-4-7": {
|
||||
"anthropic/claude-opus-4-8": {
|
||||
agentRuntime: { id: "claude-cli" },
|
||||
},
|
||||
},
|
||||
|
||||
@@ -116,7 +116,7 @@ Official provider plugins publish their own model catalog rows. These providers
|
||||
- CLI: `openclaw onboard --auth-choice apiKey`
|
||||
- Direct public Anthropic requests support the shared `/fast` toggle and `params.fastMode`, including API-key and OAuth-authenticated traffic sent to `api.anthropic.com`; OpenClaw maps that to Anthropic `service_tier` (`auto` vs `standard_only`)
|
||||
- Preferred Claude CLI config keeps the model ref canonical and selects the CLI
|
||||
backend separately: `anthropic/claude-opus-4-7` with
|
||||
backend separately: `anthropic/claude-opus-4-8` with
|
||||
model-scoped `agentRuntime.id: "claude-cli"`. Legacy
|
||||
`claude-cli/claude-opus-4-7` refs still work for compatibility.
|
||||
|
||||
|
||||
@@ -889,13 +889,13 @@ pnpm openclaw qa character-eval \
|
||||
--model openai/gpt-5.5,thinking=medium,fast \
|
||||
--model openai/gpt-5.2,thinking=xhigh \
|
||||
--model openai/gpt-5,thinking=xhigh \
|
||||
--model anthropic/claude-opus-4-7,thinking=high \
|
||||
--model anthropic/claude-opus-4-8,thinking=high \
|
||||
--model anthropic/claude-sonnet-4-6,thinking=high \
|
||||
--model zai/glm-5.1,thinking=high \
|
||||
--model moonshot/kimi-k2.5,thinking=high \
|
||||
--model google/gemini-3.1-pro-preview,thinking=high \
|
||||
--judge-model openai/gpt-5.5,thinking=xhigh,fast \
|
||||
--judge-model anthropic/claude-opus-4-7,thinking=high \
|
||||
--judge-model anthropic/claude-opus-4-8,thinking=high \
|
||||
--blind-judge-models \
|
||||
--concurrency 16 \
|
||||
--judge-concurrency 16
|
||||
@@ -926,13 +926,13 @@ Candidate and judge model runs both default to concurrency 16. Lower
|
||||
`--concurrency` or `--judge-concurrency` when provider limits or local gateway
|
||||
pressure make a run too noisy.
|
||||
When no candidate `--model` is passed, the character eval defaults to
|
||||
`openai/gpt-5.5`, `openai/gpt-5.2`, `openai/gpt-5`, `anthropic/claude-opus-4-7`,
|
||||
`openai/gpt-5.5`, `openai/gpt-5.2`, `openai/gpt-5`, `anthropic/claude-opus-4-8`,
|
||||
`anthropic/claude-sonnet-4-6`, `zai/glm-5.1`,
|
||||
`moonshot/kimi-k2.5`, and
|
||||
`google/gemini-3.1-pro-preview` when no `--model` is passed.
|
||||
When no `--judge-model` is passed, the judges default to
|
||||
`openai/gpt-5.5,thinking=xhigh,fast` and
|
||||
`anthropic/claude-opus-4-7,thinking=high`.
|
||||
`anthropic/claude-opus-4-8,thinking=high`.
|
||||
|
||||
## Related docs
|
||||
|
||||
|
||||
@@ -334,7 +334,7 @@ Higher values preserve more visual detail.
|
||||
Image-tool compression/detail preference for images loaded from file paths, URLs, and media references.
|
||||
Default: `auto`.
|
||||
|
||||
OpenClaw adapts the resize ladder to the selected image model. For example, Claude Opus 4.7, OpenAI GPT-5.5, Qwen VL, and hosted Llama 4 vision models can use larger images than older/default high-detail vision paths, while multi-image turns are compressed more aggressively in `auto` mode to control token and latency cost.
|
||||
OpenClaw adapts the resize ladder to the selected image model. For example, Claude Opus 4.8, OpenAI GPT-5.5, Qwen VL, and hosted Llama 4 vision models can use larger images than older/default high-detail vision paths, while multi-image turns are compressed more aggressively in `auto` mode to control token and latency cost.
|
||||
|
||||
Values:
|
||||
|
||||
@@ -483,7 +483,7 @@ Time format in system prompt. Default: `auto` (OS preference).
|
||||
defaults: {
|
||||
model: "openai/gpt-5.5",
|
||||
models: {
|
||||
"anthropic/claude-opus-4-7": {
|
||||
"anthropic/claude-opus-4-8": {
|
||||
agentRuntime: { id: "claude-cli" },
|
||||
},
|
||||
"vllm/*": {
|
||||
@@ -501,7 +501,7 @@ Time format in system prompt. Default: `auto` (OS preference).
|
||||
- Runtime precedence is exact model policy first (`agents.list[].models["provider/model"]`, `agents.defaults.models["provider/model"]`, or `models.providers.<provider>.models[]`), then `agents.list[]` / `agents.defaults.models["provider/*"]`, then provider-wide policy at `models.providers.<provider>.agentRuntime`.
|
||||
- Whole-agent runtime keys are legacy. `agents.defaults.agentRuntime`, `agents.list[].agentRuntime`, session runtime pins, and `OPENCLAW_AGENT_RUNTIME` are ignored by runtime selection. Run `openclaw doctor --fix` to remove stale values.
|
||||
- OpenAI agent models use the Codex harness by default; provider/model `agentRuntime.id: "codex"` remains valid when you want to make that explicit.
|
||||
- For Claude CLI deployments, prefer `model: "anthropic/claude-opus-4-7"` plus model-scoped `agentRuntime.id: "claude-cli"`. Legacy `claude-cli/claude-opus-4-7` model refs still work for compatibility, but new config should keep provider/model selection canonical and put the execution backend in provider/model runtime policy.
|
||||
- For Claude CLI deployments, prefer `model: "anthropic/claude-opus-4-8"` plus model-scoped `agentRuntime.id: "claude-cli"`. Legacy `claude-cli/claude-opus-4-7` model refs still work for compatibility, but new config should keep provider/model selection canonical and put the execution backend in provider/model runtime policy.
|
||||
- This only controls text agent-turn execution. Media generation, vision, PDF, music, video, and TTS still use their provider/model settings.
|
||||
|
||||
**Built-in alias shorthands** (only apply when the model is in `agents.defaults.models`):
|
||||
@@ -521,7 +521,7 @@ Your configured aliases always win over defaults.
|
||||
|
||||
Z.AI GLM-4.x models automatically enable thinking mode unless you set `--thinking off` or define `agents.defaults.models["zai/<model>"].params.thinking` yourself.
|
||||
Z.AI models enable `tool_stream` by default for tool call streaming. Set `agents.defaults.models["zai/<model>"].params.tool_stream` to `false` to disable it.
|
||||
Anthropic Claude 4.6 models default to `adaptive` thinking when no explicit thinking level is set.
|
||||
Anthropic Claude Opus 4.8 keeps thinking off by default in OpenClaw; when adaptive thinking is explicitly enabled, Anthropic's provider-owned effort default is `high`. Claude 4.6 models default to `adaptive` when no explicit thinking level is set.
|
||||
|
||||
### `agents.defaults.cliBackends`
|
||||
|
||||
|
||||
@@ -282,7 +282,7 @@ troubleshooting, see the main [FAQ](/help/faq).
|
||||
<Accordion title="Are opus / sonnet / gpt built-in shortcuts?">
|
||||
Yes. OpenClaw ships a few default shorthands (only applied when the model exists in `agents.defaults.models`):
|
||||
|
||||
- `opus` → `anthropic/claude-opus-4-7`
|
||||
- `opus` → `anthropic/claude-opus-4-8`
|
||||
- `sonnet` → `anthropic/claude-sonnet-4-6`
|
||||
- `gpt` → `openai/gpt-5.4`
|
||||
- `gpt-mini` → `openai/gpt-5.4-mini`
|
||||
|
||||
@@ -238,9 +238,9 @@ model entry:
|
||||
{
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"model": "anthropic/claude-opus-4-7",
|
||||
"model": "anthropic/claude-opus-4-8",
|
||||
"models": {
|
||||
"anthropic/claude-opus-4-7": {
|
||||
"anthropic/claude-opus-4-8": {
|
||||
"agentRuntime": {
|
||||
"id": "claude-cli"
|
||||
}
|
||||
|
||||
@@ -61,7 +61,7 @@ Anthropic's current public docs:
|
||||
```json5
|
||||
{
|
||||
env: { ANTHROPIC_API_KEY: "example-anthropic-key-not-real" },
|
||||
agents: { defaults: { model: { primary: "anthropic/claude-opus-4-6" } } },
|
||||
agents: { defaults: { model: { primary: "anthropic/claude-opus-4-8" } } },
|
||||
}
|
||||
```
|
||||
|
||||
@@ -113,9 +113,9 @@ Anthropic's current public docs:
|
||||
{
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "anthropic/claude-opus-4-7" },
|
||||
model: { primary: "anthropic/claude-opus-4-8" },
|
||||
models: {
|
||||
"anthropic/claude-opus-4-7": {
|
||||
"anthropic/claude-opus-4-8": {
|
||||
agentRuntime: { id: "claude-cli" },
|
||||
},
|
||||
},
|
||||
@@ -135,9 +135,9 @@ Anthropic's current public docs:
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
## Thinking defaults (Claude 4.6)
|
||||
## Thinking defaults (Claude 4.8 and 4.6)
|
||||
|
||||
Claude 4.6 models default to `adaptive` thinking in OpenClaw when no explicit thinking level is set.
|
||||
Claude Opus 4.8 keeps thinking off by default in OpenClaw. When you explicitly enable adaptive thinking with `/think high|xhigh|max`, OpenClaw sends Anthropic's Opus 4.8 effort values; Claude 4.6 models default to `adaptive`.
|
||||
|
||||
Override per-message with `/think:<level>` or in model params:
|
||||
|
||||
@@ -146,8 +146,8 @@ Override per-message with `/think:<level>` or in model params:
|
||||
agents: {
|
||||
defaults: {
|
||||
models: {
|
||||
"anthropic/claude-opus-4-6": {
|
||||
params: { thinking: "adaptive" },
|
||||
"anthropic/claude-opus-4-8": {
|
||||
params: { thinking: "high" },
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -267,7 +267,7 @@ OpenClaw supports Anthropic's prompt caching feature for API-key auth.
|
||||
|
||||
| Property | Value |
|
||||
| --------------- | --------------------- |
|
||||
| Default model | `claude-opus-4-7` |
|
||||
| Default model | `claude-opus-4-8` |
|
||||
| Supported input | Images, PDF documents |
|
||||
|
||||
When an image or PDF is attached to a conversation, OpenClaw automatically
|
||||
@@ -277,7 +277,7 @@ OpenClaw supports Anthropic's prompt caching feature for API-key auth.
|
||||
|
||||
<Accordion title="1M context window">
|
||||
Anthropic's 1M context window is available on GA-capable Claude 4.x models
|
||||
such as Opus 4.6, Opus 4.7, and Sonnet 4.6. OpenClaw sizes those models at
|
||||
such as Opus 4.8, Opus 4.7, Opus 4.6, and Sonnet 4.6. OpenClaw sizes those models at
|
||||
1M automatically:
|
||||
|
||||
```json5
|
||||
@@ -308,8 +308,8 @@ OpenClaw supports Anthropic's prompt caching feature for API-key auth.
|
||||
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Claude Opus 4.7 1M context">
|
||||
`anthropic/claude-opus-4-7` and its `claude-cli` variant have a 1M context
|
||||
<Accordion title="Claude Opus 4.8 1M context">
|
||||
`anthropic/claude-opus-4-8` and its `claude-cli` variant have a 1M context
|
||||
window by default — no `params.context1m: true` needed.
|
||||
</Accordion>
|
||||
</AccordionGroup>
|
||||
|
||||
@@ -205,7 +205,7 @@ override only `cacheRetention` and inherit other model defaults unchanged.
|
||||
|
||||
### Anthropic 1M context
|
||||
|
||||
OpenClaw sizes GA-capable Claude 4.x models such as Opus 4.6, Opus 4.7, and
|
||||
OpenClaw sizes GA-capable Claude 4.x models such as Opus 4.8, Opus 4.7, Opus 4.6, and
|
||||
Sonnet 4.6 with Anthropic's 1M context window. You do not need
|
||||
`params.context1m: true` for those models.
|
||||
|
||||
|
||||
@@ -13,9 +13,9 @@ title: "Thinking levels"
|
||||
- low → "think hard"
|
||||
- medium → "think harder"
|
||||
- high → "ultrathink" (max budget)
|
||||
- xhigh → "ultrathink+" (GPT-5.2+ and Codex models, plus Anthropic Claude Opus 4.7 effort)
|
||||
- adaptive → provider-managed adaptive thinking (supported for Claude 4.6 on Anthropic/Bedrock, Anthropic Claude Opus 4.7, and Google Gemini dynamic thinking)
|
||||
- max → provider max reasoning (Anthropic Claude Opus 4.7; Ollama maps this to its highest native `think` effort)
|
||||
- xhigh → "ultrathink+" (GPT-5.2+ and Codex models, plus Anthropic Claude Opus 4.7+ effort)
|
||||
- adaptive → provider-managed adaptive thinking (supported for Claude 4.6 on Anthropic/Bedrock, Anthropic Claude Opus 4.7+, and Google Gemini dynamic thinking)
|
||||
- max → provider max reasoning (Anthropic Claude Opus 4.7+; Ollama maps this to its highest native `think` effort)
|
||||
- `x-high`, `x_high`, `extra-high`, `extra high`, and `extra_high` map to `xhigh`.
|
||||
- `highest` maps to `high`.
|
||||
- Provider notes:
|
||||
@@ -23,9 +23,9 @@ title: "Thinking levels"
|
||||
- `adaptive`, `xhigh`, and `max` are only advertised for provider/model profiles that support them. Typed directives for unsupported levels are rejected with that model's valid options.
|
||||
- Existing stored unsupported levels are remapped by provider profile rank. `adaptive` falls back to `medium` on non-adaptive models, while `xhigh` and `max` fall back to the largest supported non-off level for the selected model.
|
||||
- Anthropic Claude 4.6 models default to `adaptive` when no explicit thinking level is set.
|
||||
- Anthropic Claude Opus 4.7 does not default to adaptive thinking. Its API effort default remains provider-owned unless you explicitly set a thinking level.
|
||||
- Anthropic Claude Opus 4.7 maps `/think xhigh` to adaptive thinking plus `output_config.effort: "xhigh"`, because `/think` is a thinking directive and `xhigh` is the Opus 4.7 effort setting.
|
||||
- Anthropic Claude Opus 4.7 also exposes `/think max`; it maps to the same provider-owned max effort path.
|
||||
- Anthropic Claude Opus 4.8 and Opus 4.7 keep thinking off unless you explicitly set a thinking level. Opus 4.8's provider-owned effort default is `high` after adaptive thinking is enabled.
|
||||
- Anthropic Claude Opus 4.7+ maps `/think xhigh` to adaptive thinking plus `output_config.effort: "xhigh"`, because `/think` is a thinking directive and `xhigh` is the Opus effort setting.
|
||||
- Anthropic Claude Opus 4.7+ also exposes `/think max`; it maps to the same provider-owned max effort path.
|
||||
- Direct DeepSeek V4 models expose `/think xhigh|max`; both map to DeepSeek `reasoning_effort: "max"` while lower non-off levels map to `high`.
|
||||
- OpenRouter-routed DeepSeek V4 models expose `/think xhigh` and send OpenRouter-supported `reasoning_effort` values. Stored `max` overrides fall back to `xhigh`.
|
||||
- Ollama thinking-capable models expose `/think low|medium|high|max`; `max` maps to native `think: "high"` because Ollama's native API accepts `low`, `medium`, and `high` effort strings.
|
||||
|
||||
@@ -64,6 +64,7 @@ function adjustMaxTokensForThinking(
|
||||
medium: 8192,
|
||||
high: 16384,
|
||||
xhigh: 16384,
|
||||
max: 16384,
|
||||
} as const;
|
||||
const budgets = { ...defaultBudgets, ...customBudgets };
|
||||
const minOutputTokens = 1024;
|
||||
|
||||
@@ -195,6 +195,58 @@ describe("bedrock discovery", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("uses 1M context window for dotted Claude Opus 4.8 Bedrock refs", async () => {
|
||||
sendMock
|
||||
.mockResolvedValueOnce({
|
||||
modelSummaries: [
|
||||
{
|
||||
modelId: "anthropic.claude-opus-4.8-v1:0",
|
||||
modelName: "Claude Opus 4.8",
|
||||
providerName: "anthropic",
|
||||
inputModalities: ["TEXT"],
|
||||
outputModalities: ["TEXT"],
|
||||
responseStreamingSupported: true,
|
||||
modelLifecycle: { status: "ACTIVE" },
|
||||
},
|
||||
],
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
inferenceProfileSummaries: [
|
||||
{
|
||||
inferenceProfileId: "us.anthropic.claude-opus-4.8-v1:0",
|
||||
inferenceProfileName: "US Claude Opus 4.8",
|
||||
status: "ACTIVE",
|
||||
type: "SYSTEM_DEFINED",
|
||||
models: [
|
||||
{
|
||||
modelArn:
|
||||
"arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-opus-4.8-v1:0",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const models = await discoverBedrockModels({ region: "us-east-1", clientFactory });
|
||||
|
||||
expectModelFields(
|
||||
models.find((model) => model.id === "anthropic.claude-opus-4.8-v1:0"),
|
||||
{
|
||||
contextWindow: 1_000_000,
|
||||
reasoning: true,
|
||||
thinkingLevelMap: { xhigh: "xhigh", max: "max" },
|
||||
},
|
||||
);
|
||||
expectModelFields(
|
||||
models.find((model) => model.id === "us.anthropic.claude-opus-4.8-v1:0"),
|
||||
{
|
||||
contextWindow: 1_000_000,
|
||||
reasoning: true,
|
||||
thinkingLevelMap: { xhigh: "xhigh", max: "max" },
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it("caches results when refreshInterval is enabled", async () => {
|
||||
mockSingleActiveSummary();
|
||||
|
||||
|
||||
@@ -46,6 +46,7 @@ const DEFAULT_MAX_TOKENS = 4096;
|
||||
const KNOWN_CONTEXT_WINDOWS: Record<string, number> = {
|
||||
// Anthropic Claude
|
||||
"anthropic.claude-3-7-sonnet-20250219-v1:0": 200_000,
|
||||
"anthropic.claude-opus-4-8": 1_000_000,
|
||||
"anthropic.claude-opus-4-7": 1_000_000,
|
||||
"anthropic.claude-opus-4-6-v1": 1_000_000,
|
||||
"anthropic.claude-opus-4-6-v1:0": 1_000_000,
|
||||
@@ -121,6 +122,9 @@ function resolveKnownContextWindow(modelId: string): number | undefined {
|
||||
const stripped = modelId.replace(/^(?:us|eu|ap|apac|au|jp|global)\./, "");
|
||||
const candidates = [modelId, stripped];
|
||||
for (const candidate of candidates) {
|
||||
if (/(?:^|[/.:])anthropic\.claude-opus-4[.-]8(?:$|[-.:/])/i.test(candidate)) {
|
||||
return 1_000_000;
|
||||
}
|
||||
if (KNOWN_CONTEXT_WINDOWS[candidate] !== undefined) {
|
||||
return KNOWN_CONTEXT_WINDOWS[candidate];
|
||||
}
|
||||
@@ -135,6 +139,22 @@ function resolveKnownContextWindow(modelId: string): number | undefined {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function isKnownClaudeOpus47OrNewerModelId(modelId: string): boolean {
|
||||
const stripped = modelId.replace(/^(?:us|eu|ap|apac|au|jp|global)\./, "");
|
||||
return [modelId, stripped].some((candidate) =>
|
||||
/(?:^|[/.:])anthropic\.claude-opus-4[.-][78](?:$|[-.:/])/i.test(candidate),
|
||||
);
|
||||
}
|
||||
|
||||
function resolveKnownThinkingLevelMap(
|
||||
modelId: string,
|
||||
): ModelDefinitionConfig["thinkingLevelMap"] | undefined {
|
||||
if (!isKnownClaudeOpus47OrNewerModelId(modelId)) {
|
||||
return undefined;
|
||||
}
|
||||
return { xhigh: "xhigh", max: "max" };
|
||||
}
|
||||
|
||||
const DEFAULT_COST = {
|
||||
input: 0,
|
||||
output: 0,
|
||||
@@ -243,6 +263,9 @@ function mapInputModalities(summary: BedrockModelSummary): Array<"text" | "image
|
||||
}
|
||||
|
||||
function inferReasoningSupport(summary: BedrockModelSummary): boolean {
|
||||
if (isKnownClaudeOpus47OrNewerModelId(summary.modelId ?? "")) {
|
||||
return true;
|
||||
}
|
||||
const haystack = normalizeLowercaseStringOrEmpty(
|
||||
`${summary.modelId ?? ""} ${summary.modelName ?? ""}`,
|
||||
);
|
||||
@@ -301,6 +324,7 @@ function toModelDefinition(
|
||||
defaults: { contextWindow: number; maxTokens: number },
|
||||
): ModelDefinitionConfig {
|
||||
const id = summary.modelId?.trim() ?? "";
|
||||
const thinkingLevelMap = resolveKnownThinkingLevelMap(id);
|
||||
return {
|
||||
id,
|
||||
name: summary.modelName?.trim() || id,
|
||||
@@ -309,6 +333,7 @@ function toModelDefinition(
|
||||
cost: DEFAULT_COST,
|
||||
contextWindow: resolveKnownContextWindow(id) ?? defaults.contextWindow,
|
||||
maxTokens: defaults.maxTokens,
|
||||
...(thinkingLevelMap ? { thinkingLevelMap } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -420,11 +445,16 @@ function resolveInferenceProfiles(
|
||||
const baseModel = baseModelId
|
||||
? foundationModels.get(normalizeLowercaseStringOrEmpty(baseModelId))
|
||||
: undefined;
|
||||
const knownThinkingLevelMap = resolveKnownThinkingLevelMap(
|
||||
baseModelId ?? profile.inferenceProfileId,
|
||||
);
|
||||
|
||||
discovered.push({
|
||||
id: profile.inferenceProfileId,
|
||||
name: profile.inferenceProfileName?.trim() || profile.inferenceProfileId,
|
||||
reasoning: baseModel?.reasoning ?? false,
|
||||
reasoning:
|
||||
baseModel?.reasoning ??
|
||||
isKnownClaudeOpus47OrNewerModelId(baseModelId ?? profile.inferenceProfileId),
|
||||
input: baseModel?.input ?? ["text"],
|
||||
cost: baseModel?.cost ?? DEFAULT_COST,
|
||||
contextWindow:
|
||||
@@ -432,6 +462,9 @@ function resolveInferenceProfiles(
|
||||
resolveKnownContextWindow(baseModelId ?? profile.inferenceProfileId ?? "") ??
|
||||
defaults.contextWindow,
|
||||
maxTokens: baseModel?.maxTokens ?? defaults.maxTokens,
|
||||
...(baseModel?.thinkingLevelMap || knownThinkingLevelMap
|
||||
? { thinkingLevelMap: baseModel?.thinkingLevelMap ?? knownThinkingLevelMap }
|
||||
: {}),
|
||||
});
|
||||
}
|
||||
return discovered;
|
||||
|
||||
@@ -333,6 +333,27 @@ describe("amazon-bedrock provider plugin", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("leaves Claude Opus 4.8 Bedrock model refs off by default", async () => {
|
||||
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
|
||||
|
||||
for (const modelId of [
|
||||
"us.anthropic.claude-opus-4-8",
|
||||
"us.anthropic.claude-opus-4.8-v1:0",
|
||||
"arn:aws:bedrock:us-west-2:123456789012:inference-profile/us.anthropic.claude-opus-4-8",
|
||||
]) {
|
||||
expectThinkingProfile(
|
||||
provider.resolveThinkingProfile?.({
|
||||
provider: "amazon-bedrock",
|
||||
modelId,
|
||||
} as never),
|
||||
{
|
||||
levelIds: ["off", "minimal", "low", "medium", "high", "xhigh", "adaptive", "max"],
|
||||
defaultLevel: "off",
|
||||
},
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
it("owns Anthropic-style replay policy for Claude Bedrock models", async () => {
|
||||
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
|
||||
|
||||
@@ -432,6 +453,28 @@ describe("amazon-bedrock provider plugin", () => {
|
||||
expect(result).not.toHaveProperty("temperature");
|
||||
});
|
||||
|
||||
it("omits temperature for Bedrock Opus 4.8 model ids", async () => {
|
||||
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
|
||||
const wrapped = provider.wrapStreamFn?.({
|
||||
provider: "amazon-bedrock",
|
||||
modelId: "us.anthropic.claude-opus-4-8",
|
||||
streamFn: spyStreamFn,
|
||||
} as never);
|
||||
|
||||
const result = wrapped?.(
|
||||
{
|
||||
api: "bedrock-converse-stream",
|
||||
provider: "amazon-bedrock",
|
||||
id: "us.anthropic.claude-opus-4-8",
|
||||
} as never,
|
||||
{ messages: [] } as never,
|
||||
{ temperature: 0.2, maxTokens: 10 },
|
||||
) as Record<string, unknown> | undefined;
|
||||
|
||||
expectWrappedResultFields(result, { maxTokens: 10 });
|
||||
expect(result).not.toHaveProperty("temperature");
|
||||
});
|
||||
|
||||
it("omits temperature for dotted Bedrock Opus 4.7 model ids", async () => {
|
||||
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
|
||||
const wrapped = provider.wrapStreamFn?.({
|
||||
@@ -590,6 +633,44 @@ describe("amazon-bedrock provider plugin", () => {
|
||||
expect(payload.additionalModelRequestFields.output_config).toEqual({ effort: "xhigh" });
|
||||
});
|
||||
|
||||
it("uses adaptive max thinking for Bedrock Opus 4.8", async () => {
|
||||
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
|
||||
const wrapped = provider.wrapStreamFn?.({
|
||||
provider: "amazon-bedrock",
|
||||
modelId: "us.anthropic.claude-opus-4-8",
|
||||
streamFn: spyStreamFn,
|
||||
thinkingLevel: "max",
|
||||
} as never);
|
||||
|
||||
const result = wrapped?.(
|
||||
{
|
||||
api: "bedrock-converse-stream",
|
||||
provider: "amazon-bedrock",
|
||||
id: "us.anthropic.claude-opus-4-8",
|
||||
name: "Claude Opus 4.8",
|
||||
reasoning: true,
|
||||
} as never,
|
||||
{ messages: [] } as never,
|
||||
{ reasoning: "max" } as never,
|
||||
) as Record<string, unknown> | undefined;
|
||||
|
||||
const payload = {
|
||||
inferenceConfig: { temperature: 0.2 },
|
||||
additionalModelRequestFields: {
|
||||
thinking: { type: "adaptive" },
|
||||
output_config: { effort: "xhigh" },
|
||||
},
|
||||
};
|
||||
|
||||
await (result?.onPayload as ((p: Record<string, unknown>) => unknown) | undefined)?.(payload);
|
||||
|
||||
expect(payload.additionalModelRequestFields).toEqual({
|
||||
thinking: { type: "adaptive" },
|
||||
output_config: { effort: "max" },
|
||||
});
|
||||
expect(payload.inferenceConfig).toEqual({});
|
||||
});
|
||||
|
||||
it("classifies nested Bedrock deprecated-temperature validation as format failover", async () => {
|
||||
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
|
||||
|
||||
|
||||
@@ -19,6 +19,26 @@ describe("amazon-bedrock provider-policy-api", () => {
|
||||
expect(profile?.defaultLevel).toBe("adaptive");
|
||||
});
|
||||
|
||||
it("leaves Bedrock Claude Opus 4.8 thinking off by default with max effort available", () => {
|
||||
const profile = resolveThinkingProfile({
|
||||
provider: "amazon-bedrock",
|
||||
modelId:
|
||||
"arn:aws:bedrock:us-west-2:123456789012:inference-profile/us.anthropic.claude-opus-4-8",
|
||||
});
|
||||
|
||||
expect(profile?.levels.map((level) => level.id)).toEqual([
|
||||
"off",
|
||||
"minimal",
|
||||
"low",
|
||||
"medium",
|
||||
"high",
|
||||
"xhigh",
|
||||
"adaptive",
|
||||
"max",
|
||||
]);
|
||||
expect(profile?.defaultLevel).toBe("off");
|
||||
});
|
||||
|
||||
it("exposes max thinking for Bedrock Claude Opus 4.7 refs", () => {
|
||||
expect(
|
||||
resolveThinkingProfile({
|
||||
|
||||
@@ -13,7 +13,10 @@ import { supportsBedrockPromptCaching } from "./bedrock-options.js";
|
||||
import { mergeImplicitBedrockProvider, resolveBedrockConfigApiKey } from "./discovery-shared.js";
|
||||
import { bedrockMemoryEmbeddingProviderAdapter } from "./memory-embedding-adapter.js";
|
||||
import { streamBedrock, streamSimpleBedrock } from "./stream.runtime.js";
|
||||
import { isOpus47BedrockModelRef, resolveBedrockClaudeThinkingProfile } from "./thinking-policy.js";
|
||||
import {
|
||||
isOpus47OrNewerBedrockModelRef,
|
||||
resolveBedrockClaudeThinkingProfile,
|
||||
} from "./thinking-policy.js";
|
||||
|
||||
type GuardrailConfig = {
|
||||
guardrailIdentifier: string;
|
||||
@@ -252,7 +255,7 @@ async function resolveAppProfileTraits(
|
||||
const traits = {
|
||||
cacheEligible:
|
||||
models.length > 0 && modelArns.every((modelArn) => resolvedModelSupportsCaching(modelArn)),
|
||||
omitTemperature: modelArns.some(isOpus47BedrockModelRef),
|
||||
omitTemperature: modelArns.some(isOpus47OrNewerBedrockModelRef),
|
||||
};
|
||||
appProfileTraitsCache.set(modelId, traits);
|
||||
return traits;
|
||||
@@ -261,7 +264,7 @@ async function resolveAppProfileTraits(
|
||||
// return the heuristic fallback but allow retry on the next request.
|
||||
return {
|
||||
cacheEligible: isAnthropicBedrockModel(modelId),
|
||||
omitTemperature: isOpus47BedrockModelRef(modelId),
|
||||
omitTemperature: isOpus47OrNewerBedrockModelRef(modelId),
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -388,7 +391,7 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
|
||||
modelId: string,
|
||||
options: TOptions,
|
||||
): TOptions {
|
||||
if (!isOpus47BedrockModelRef(modelId) || !("temperature" in options)) {
|
||||
if (!isOpus47OrNewerBedrockModelRef(modelId) || !("temperature" in options)) {
|
||||
return options;
|
||||
}
|
||||
const next = { ...options } as typeof options & { temperature?: unknown };
|
||||
@@ -513,7 +516,7 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
|
||||
currentPluginConfig?.discovery?.region;
|
||||
const mayNeedCacheInjection =
|
||||
isBedrockAppInferenceProfile(modelId) && !sharedRuntimeWouldInjectCachePoints(modelId);
|
||||
const shouldOmitTemperature = isOpus47BedrockModelRef(modelId);
|
||||
const shouldOmitTemperature = isOpus47OrNewerBedrockModelRef(modelId);
|
||||
const shouldPatchMaxThinking = shouldOmitTemperature && thinkingLevel === "max";
|
||||
|
||||
// For known Anthropic models (heuristic match), enable injection immediately.
|
||||
@@ -548,7 +551,9 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
|
||||
? {
|
||||
onPayload: (payload: unknown, payloadModel: unknown) => {
|
||||
if (payload && typeof payload === "object") {
|
||||
patchOpus47MaxThinkingEffort(payload as Record<string, unknown>);
|
||||
const payloadRecord = payload as Record<string, unknown>;
|
||||
patchOpus47MaxThinkingEffort(payloadRecord);
|
||||
omitDeprecatedOpus47PayloadTemperature(payloadRecord);
|
||||
}
|
||||
return originalOnPayload?.(payload, payloadModel);
|
||||
},
|
||||
@@ -584,7 +589,9 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
|
||||
if (shouldPatchMaxThinking) {
|
||||
patchOpus47MaxThinkingEffort(payloadRecord);
|
||||
}
|
||||
if (mayNeedTemperatureTrait) {
|
||||
if (shouldOmitTemperature) {
|
||||
omitDeprecatedOpus47PayloadTemperature(payloadRecord);
|
||||
} else if (mayNeedTemperatureTrait) {
|
||||
const traits = await resolveAppProfileTraits(modelId, region);
|
||||
if (traits.omitTemperature) {
|
||||
omitDeprecatedOpus47PayloadTemperature(payloadRecord);
|
||||
|
||||
@@ -89,3 +89,29 @@ describe("Bedrock profile endpoint resolution", () => {
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Bedrock thinking effort mapping", () => {
|
||||
it("clamps max effort for Claude models without native max support", () => {
|
||||
expect(
|
||||
testing.mapThinkingLevelToEffort(
|
||||
bedrockModel({
|
||||
id: "anthropic.claude-sonnet-4-6-v1:0",
|
||||
name: "Claude Sonnet 4.6",
|
||||
}),
|
||||
"max",
|
||||
),
|
||||
).toBe("high");
|
||||
});
|
||||
|
||||
it("preserves max effort for Claude Opus 4.8", () => {
|
||||
expect(
|
||||
testing.mapThinkingLevelToEffort(
|
||||
bedrockModel({
|
||||
id: "anthropic.claude-opus-4.8-v1:0",
|
||||
name: "Claude Opus 4.8",
|
||||
}),
|
||||
"max",
|
||||
),
|
||||
).toBe("max");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -473,13 +473,17 @@ function getModelMatchCandidates(modelId: string, modelName?: string): string[]
|
||||
function supportsAdaptiveThinking(modelId: string, modelName?: string): boolean {
|
||||
const candidates = getModelMatchCandidates(modelId, modelName);
|
||||
return candidates.some(
|
||||
(s) => s.includes("opus-4-6") || s.includes("opus-4-7") || s.includes("sonnet-4-6"),
|
||||
(s) =>
|
||||
s.includes("opus-4-6") ||
|
||||
s.includes("opus-4-7") ||
|
||||
s.includes("opus-4-8") ||
|
||||
s.includes("sonnet-4-6"),
|
||||
);
|
||||
}
|
||||
|
||||
function supportsNativeXhighEffort(model: Model<"bedrock-converse-stream">): boolean {
|
||||
const candidates = getModelMatchCandidates(model.id, model.name);
|
||||
return candidates.some((s) => s.includes("opus-4-7"));
|
||||
return candidates.some((s) => s.includes("opus-4-7") || s.includes("opus-4-8"));
|
||||
}
|
||||
|
||||
function mapThinkingLevelToEffort(
|
||||
@@ -503,6 +507,8 @@ function mapThinkingLevelToEffort(
|
||||
return "medium";
|
||||
case "high":
|
||||
return "high";
|
||||
case "max":
|
||||
return supportsNativeXhighEffort(model) ? "max" : "high";
|
||||
default:
|
||||
return "high";
|
||||
}
|
||||
@@ -887,6 +893,7 @@ function buildAdditionalModelRequestFields(
|
||||
medium: 8192,
|
||||
high: 16384,
|
||||
xhigh: 16384, // Claude doesn't support xhigh, clamp to high
|
||||
max: 16384,
|
||||
};
|
||||
|
||||
// Custom budgets override defaults (xhigh not in ThinkingBudgets, use high)
|
||||
@@ -945,5 +952,6 @@ export const testing = {
|
||||
convertMessages,
|
||||
getConfiguredBedrockRegion,
|
||||
hasConfiguredBedrockProfile,
|
||||
mapThinkingLevelToEffort,
|
||||
shouldUseExplicitBedrockEndpoint,
|
||||
};
|
||||
|
||||
@@ -8,14 +8,30 @@ const BASE_CLAUDE_THINKING_LEVELS = [
|
||||
{ id: "high" },
|
||||
] as const satisfies ProviderThinkingProfile["levels"];
|
||||
|
||||
function isOpus48BedrockModelRef(modelRef: string): boolean {
|
||||
return /(?:^|[/.:])(?:(?:us|eu|ap|apac|au|jp|global)\.)?anthropic\.claude-opus-4[.-]8(?:$|[-.:/])/i.test(
|
||||
modelRef,
|
||||
);
|
||||
}
|
||||
|
||||
export function isOpus47BedrockModelRef(modelRef: string): boolean {
|
||||
return /(?:^|[/.:])(?:(?:us|eu|ap|apac|au|jp|global)\.)?anthropic\.claude-opus-4[.-]7(?:$|[-.:/])/i.test(
|
||||
modelRef,
|
||||
);
|
||||
}
|
||||
|
||||
export function isOpus47OrNewerBedrockModelRef(modelRef: string): boolean {
|
||||
return isOpus47BedrockModelRef(modelRef) || isOpus48BedrockModelRef(modelRef);
|
||||
}
|
||||
|
||||
export function resolveBedrockClaudeThinkingProfile(modelId: string): ProviderThinkingProfile {
|
||||
const trimmed = modelId.trim();
|
||||
if (isOpus48BedrockModelRef(trimmed)) {
|
||||
return {
|
||||
levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }],
|
||||
defaultLevel: "off",
|
||||
};
|
||||
}
|
||||
if (isOpus47BedrockModelRef(trimmed)) {
|
||||
return {
|
||||
levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }],
|
||||
|
||||
@@ -77,9 +77,14 @@ describe("anthropic-vertex provider plugin", () => {
|
||||
expect(result.provider.baseUrl).toBe("https://europe-west4-aiplatform.googleapis.com");
|
||||
expect(result.provider.headers).toEqual({ "x-test-header": "1" });
|
||||
expect(result.provider.models.map((model) => model.id)).toEqual([
|
||||
"claude-opus-4-8",
|
||||
"claude-opus-4-6",
|
||||
"claude-sonnet-4-6",
|
||||
]);
|
||||
expect(result.provider.models[0]?.thinkingLevelMap).toEqual({
|
||||
xhigh: "xhigh",
|
||||
max: "max",
|
||||
});
|
||||
});
|
||||
|
||||
it("owns Anthropic-style replay policy", async () => {
|
||||
@@ -103,6 +108,18 @@ describe("anthropic-vertex provider plugin", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("owns Anthropic-style thinking policy", async () => {
|
||||
const provider = await registerSingleProviderPlugin(anthropicVertexPlugin);
|
||||
|
||||
const opus48Profile = provider.resolveThinkingProfile?.({
|
||||
provider: "anthropic-vertex",
|
||||
modelId: "claude-opus-4-8",
|
||||
} as never);
|
||||
|
||||
expect(opus48Profile?.defaultLevel).toBe("off");
|
||||
expect(opus48Profile?.levels.map((level) => level.id)).toContain("max");
|
||||
});
|
||||
|
||||
it("resolves synthetic auth when ADC is available", async () => {
|
||||
hasAnthropicVertexAvailableAuthMock.mockReturnValue(true);
|
||||
const provider = await registerSingleProviderPlugin(anthropicVertexPlugin);
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
|
||||
import { readConfiguredProviderCatalogEntries } from "openclaw/plugin-sdk/provider-catalog-shared";
|
||||
import { NATIVE_ANTHROPIC_REPLAY_HOOKS } from "openclaw/plugin-sdk/provider-model-shared";
|
||||
import {
|
||||
NATIVE_ANTHROPIC_REPLAY_HOOKS,
|
||||
resolveClaudeThinkingProfile,
|
||||
} from "openclaw/plugin-sdk/provider-model-shared";
|
||||
import {
|
||||
hasAnthropicVertexAvailableAuth,
|
||||
mergeImplicitAnthropicVertexProvider,
|
||||
@@ -40,6 +43,7 @@ export default definePluginEntry({
|
||||
},
|
||||
resolveConfigApiKey: ({ env }) => resolveAnthropicVertexConfigApiKey(env),
|
||||
...NATIVE_ANTHROPIC_REPLAY_HOOKS,
|
||||
resolveThinkingProfile: ({ modelId }) => resolveClaudeThinkingProfile(modelId),
|
||||
resolveSyntheticAuth: () => {
|
||||
if (!hasAnthropicVertexAvailableAuth()) {
|
||||
return undefined;
|
||||
|
||||
@@ -15,6 +15,7 @@ function buildAnthropicVertexModel(params: {
|
||||
input: ModelDefinitionConfig["input"];
|
||||
cost: ModelDefinitionConfig["cost"];
|
||||
maxTokens: number;
|
||||
thinkingLevelMap?: ModelDefinitionConfig["thinkingLevelMap"];
|
||||
}): ModelDefinitionConfig {
|
||||
return {
|
||||
id: params.id,
|
||||
@@ -24,11 +25,21 @@ function buildAnthropicVertexModel(params: {
|
||||
cost: params.cost,
|
||||
contextWindow: ANTHROPIC_VERTEX_DEFAULT_CONTEXT_WINDOW,
|
||||
maxTokens: params.maxTokens,
|
||||
...(params.thinkingLevelMap ? { thinkingLevelMap: params.thinkingLevelMap } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function buildAnthropicVertexCatalog(): ModelDefinitionConfig[] {
|
||||
return [
|
||||
buildAnthropicVertexModel({
|
||||
id: "claude-opus-4-8",
|
||||
name: "Claude Opus 4.8",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 },
|
||||
maxTokens: 128000,
|
||||
thinkingLevelMap: { xhigh: "xhigh", max: "max" },
|
||||
}),
|
||||
buildAnthropicVertexModel({
|
||||
id: "claude-opus-4-6",
|
||||
name: "Claude Opus 4.6",
|
||||
|
||||
29
extensions/anthropic-vertex/provider-policy-api.test.ts
Normal file
29
extensions/anthropic-vertex/provider-policy-api.test.ts
Normal file
@@ -0,0 +1,29 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { resolveThinkingProfile } from "./provider-policy-api.js";
|
||||
|
||||
describe("anthropic-vertex provider-policy-api", () => {
|
||||
it("leaves Claude Opus 4.8 thinking off by default with max effort support", () => {
|
||||
const profile = resolveThinkingProfile({
|
||||
provider: "anthropic-vertex",
|
||||
modelId: "claude-opus-4-8",
|
||||
});
|
||||
|
||||
expect(profile?.defaultLevel).toBe("off");
|
||||
expect(profile?.levels.map((level) => level.id)).toContain("max");
|
||||
});
|
||||
|
||||
it("keeps Claude Opus 4.7 thinking off by default", () => {
|
||||
const profile = resolveThinkingProfile({
|
||||
provider: "anthropic-vertex",
|
||||
modelId: "claude-opus-4-7",
|
||||
});
|
||||
|
||||
expect(profile?.defaultLevel).toBe("off");
|
||||
});
|
||||
|
||||
it("ignores other providers", () => {
|
||||
expect(resolveThinkingProfile({ provider: "anthropic", modelId: "claude-opus-4-8" })).toBe(
|
||||
null,
|
||||
);
|
||||
});
|
||||
});
|
||||
8
extensions/anthropic-vertex/provider-policy-api.ts
Normal file
8
extensions/anthropic-vertex/provider-policy-api.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
import { resolveClaudeThinkingProfile } from "openclaw/plugin-sdk/provider-model-shared";
|
||||
|
||||
export function resolveThinkingProfile(params: { provider: string; modelId: string }) {
|
||||
if (params.provider.trim().toLowerCase() !== "anthropic-vertex") {
|
||||
return null;
|
||||
}
|
||||
return resolveClaudeThinkingProfile(params.modelId);
|
||||
}
|
||||
@@ -170,6 +170,30 @@ describe("createAnthropicVertexStreamFn", () => {
|
||||
expect(streamTransportOptions(streamAnthropicMock).maxTokens).toBe(128000);
|
||||
});
|
||||
|
||||
it.each(["claude-opus-4-8", "claude-opus-4-7"])(
|
||||
"omits unsupported temperature for %s",
|
||||
(modelId) => {
|
||||
const { deps, streamAnthropicMock } = createStreamDeps();
|
||||
const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps);
|
||||
const model = makeModel({ id: modelId, maxTokens: 128000 });
|
||||
|
||||
void streamFn(model, { messages: [] }, { temperature: 0.7 });
|
||||
|
||||
const transportOptions = streamTransportOptions(streamAnthropicMock);
|
||||
expect(Object.hasOwn(transportOptions, "temperature")).toBe(false);
|
||||
},
|
||||
);
|
||||
|
||||
it("preserves temperature for Vertex models that support custom sampling", () => {
|
||||
const { deps, streamAnthropicMock } = createStreamDeps();
|
||||
const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps);
|
||||
const model = makeModel({ id: "claude-sonnet-4-6", maxTokens: 128000 });
|
||||
|
||||
void streamFn(model, { messages: [] }, { temperature: 0.7 });
|
||||
|
||||
expect(streamTransportOptions(streamAnthropicMock).temperature).toBe(0.7);
|
||||
});
|
||||
|
||||
it("maps xhigh reasoning to max effort for adaptive Opus models", () => {
|
||||
const { deps, streamAnthropicMock } = createStreamDeps();
|
||||
const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps);
|
||||
@@ -182,10 +206,10 @@ describe("createAnthropicVertexStreamFn", () => {
|
||||
expect(transportOptions.effort).toBe("max");
|
||||
});
|
||||
|
||||
it("maps xhigh reasoning to xhigh effort for Opus 4.7", () => {
|
||||
it("maps xhigh reasoning to xhigh effort for Opus 4.8", () => {
|
||||
const { deps, streamAnthropicMock } = createStreamDeps();
|
||||
const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps);
|
||||
const model = makeModel({ id: "claude-opus-4-7", maxTokens: 64000 });
|
||||
const model = makeModel({ id: "claude-opus-4-8", maxTokens: 128000 });
|
||||
|
||||
void streamFn(model, { messages: [] }, { reasoning: "xhigh" });
|
||||
|
||||
@@ -194,6 +218,30 @@ describe("createAnthropicVertexStreamFn", () => {
|
||||
expect(transportOptions.effort).toBe("xhigh");
|
||||
});
|
||||
|
||||
it("preserves max reasoning for Opus 4.8", () => {
|
||||
const { deps, streamAnthropicMock } = createStreamDeps();
|
||||
const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps);
|
||||
const model = makeModel({ id: "claude-opus-4-8", maxTokens: 128000 });
|
||||
|
||||
void streamFn(model, { messages: [] }, { reasoning: "max" });
|
||||
|
||||
const transportOptions = streamTransportOptions(streamAnthropicMock);
|
||||
expect(transportOptions.thinkingEnabled).toBe(true);
|
||||
expect(transportOptions.effort).toBe("max");
|
||||
});
|
||||
|
||||
it("clamps max reasoning for adaptive models without native max support", () => {
|
||||
const { deps, streamAnthropicMock } = createStreamDeps();
|
||||
const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps);
|
||||
const model = makeModel({ id: "claude-sonnet-4-6", maxTokens: 128000 });
|
||||
|
||||
void streamFn(model, { messages: [] }, { reasoning: "max" });
|
||||
|
||||
const transportOptions = streamTransportOptions(streamAnthropicMock);
|
||||
expect(transportOptions.thinkingEnabled).toBe(true);
|
||||
expect(transportOptions.effort).toBe("high");
|
||||
});
|
||||
|
||||
it("applies Anthropic cache-boundary shaping before forwarding payload hooks", async () => {
|
||||
const { deps, streamAnthropicMock } = createStreamDeps();
|
||||
const onPayload = vi.fn(async (payload: unknown) => payload);
|
||||
|
||||
@@ -36,8 +36,13 @@ const defaultAnthropicVertexStreamDeps: AnthropicVertexStreamDeps = {
|
||||
streamAnthropic: streamDefault,
|
||||
};
|
||||
|
||||
function isClaudeOpus47Model(modelId: string): boolean {
|
||||
return modelId.includes("opus-4-7") || modelId.includes("opus-4.7");
|
||||
function isClaudeOpus47OrNewerModel(modelId: string): boolean {
|
||||
return (
|
||||
modelId.includes("opus-4-8") ||
|
||||
modelId.includes("opus-4.8") ||
|
||||
modelId.includes("opus-4-7") ||
|
||||
modelId.includes("opus-4.7")
|
||||
);
|
||||
}
|
||||
|
||||
function isClaudeOpus46Model(modelId: string): boolean {
|
||||
@@ -46,7 +51,7 @@ function isClaudeOpus46Model(modelId: string): boolean {
|
||||
|
||||
function supportsAdaptiveThinking(modelId: string): boolean {
|
||||
return (
|
||||
isClaudeOpus47Model(modelId) ||
|
||||
isClaudeOpus47OrNewerModel(modelId) ||
|
||||
isClaudeOpus46Model(modelId) ||
|
||||
modelId.includes("sonnet-4-6") ||
|
||||
modelId.includes("sonnet-4.6")
|
||||
@@ -62,7 +67,12 @@ function mapAnthropicAdaptiveEffort(
|
||||
low: "low",
|
||||
medium: "medium",
|
||||
high: "high",
|
||||
xhigh: isClaudeOpus47Model(modelId) ? "xhigh" : isClaudeOpus46Model(modelId) ? "max" : "high",
|
||||
xhigh: isClaudeOpus47OrNewerModel(modelId)
|
||||
? "xhigh"
|
||||
: isClaudeOpus46Model(modelId)
|
||||
? "max"
|
||||
: "high",
|
||||
max: isClaudeOpus47OrNewerModel(modelId) ? "max" : "high",
|
||||
};
|
||||
return effortMap[reasoning] ?? "high";
|
||||
}
|
||||
@@ -148,9 +158,10 @@ export function createAnthropicVertexStreamFn(
|
||||
modelMaxTokens: transportModel.maxTokens,
|
||||
requestedMaxTokens: options?.maxTokens,
|
||||
});
|
||||
const temperature = isClaudeOpus47OrNewerModel(model.id) ? undefined : options?.temperature;
|
||||
const opts: AnthropicVertexTransportOptions = {
|
||||
client,
|
||||
temperature: options?.temperature,
|
||||
...(temperature !== undefined ? { temperature } : {}),
|
||||
...(maxTokens !== undefined ? { maxTokens } : {}),
|
||||
signal: options?.signal,
|
||||
cacheRetention: options?.cacheRetention,
|
||||
|
||||
@@ -2,7 +2,7 @@ import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/string-coer
|
||||
import { CLAUDE_CLI_BACKEND_ID, CLAUDE_CLI_MODEL_ALIASES } from "./cli-constants.js";
|
||||
|
||||
const DEFAULT_CLAUDE_MODEL_BY_FAMILY: Record<string, string> = {
|
||||
opus: "claude-opus-4-7",
|
||||
opus: "claude-opus-4-8",
|
||||
sonnet: "claude-sonnet-4-6",
|
||||
haiku: "claude-haiku-4-5",
|
||||
};
|
||||
@@ -96,18 +96,16 @@ function canonicalizeKnownClaudeCliModelId(modelId: string): string | null {
|
||||
if (defaultModel) {
|
||||
return attachModelAuthProfile(defaultModel, split.profile);
|
||||
}
|
||||
const family = CLAUDE_CLI_MODEL_ALIASES[normalized];
|
||||
if (!family) {
|
||||
return null;
|
||||
}
|
||||
const version = normalized.slice(`${family}-`.length);
|
||||
if (!version || version === normalized) {
|
||||
return null;
|
||||
}
|
||||
return attachModelAuthProfile(`claude-${family}-${version.replaceAll(".", "-")}`, split.profile);
|
||||
const aliasedModel = CLAUDE_CLI_MODEL_ALIASES[normalized];
|
||||
return aliasedModel?.startsWith("claude-")
|
||||
? attachModelAuthProfile(aliasedModel, split.profile)
|
||||
: null;
|
||||
}
|
||||
|
||||
function upgradeOldClaudeModelId(normalized: string): string | null {
|
||||
if (normalized.startsWith("claude-opus-4-8") || normalized.startsWith("claude-opus-4.8")) {
|
||||
return null;
|
||||
}
|
||||
if (normalized.startsWith("claude-opus-4-7") || normalized.startsWith("claude-opus-4.7")) {
|
||||
return null;
|
||||
}
|
||||
@@ -124,6 +122,8 @@ function upgradeOldClaudeModelId(normalized: string): string | null {
|
||||
if (
|
||||
normalized === "claude-opus-4" ||
|
||||
hasAnyRetiredVersionPrefix(normalized, [
|
||||
"claude-opus-4-7",
|
||||
"claude-opus-4.7",
|
||||
"claude-opus-4-5",
|
||||
"claude-opus-4.5",
|
||||
"claude-opus-4-1",
|
||||
@@ -133,7 +133,7 @@ function upgradeOldClaudeModelId(normalized: string): string | null {
|
||||
]) ||
|
||||
/^claude-opus-4-20\d{6}/.test(normalized)
|
||||
) {
|
||||
return "claude-opus-4-7";
|
||||
return "claude-opus-4-8";
|
||||
}
|
||||
if (
|
||||
normalized === "claude-sonnet-4" ||
|
||||
@@ -150,7 +150,7 @@ function upgradeOldClaudeModelId(normalized: string): string | null {
|
||||
return "claude-sonnet-4-6";
|
||||
}
|
||||
if (normalized.startsWith("claude-3") && normalized.includes("opus")) {
|
||||
return "claude-opus-4-7";
|
||||
return "claude-opus-4-8";
|
||||
}
|
||||
if (
|
||||
normalized.startsWith("claude-3") &&
|
||||
@@ -164,7 +164,7 @@ function upgradeOldClaudeModelId(normalized: string): string | null {
|
||||
normalized === "opus-4" ||
|
||||
normalized === "opus-3"
|
||||
) {
|
||||
return "claude-opus-4-7";
|
||||
return "claude-opus-4-8";
|
||||
}
|
||||
if (
|
||||
normalized === "sonnet-4.5" ||
|
||||
|
||||
@@ -5,13 +5,14 @@ import { CLAUDE_CLI_BACKEND_ID, CLAUDE_CLI_DEFAULT_ALLOWLIST_REFS } from "./cli-
|
||||
const CLAUDE_CLI_DEFAULT_CONTEXT_WINDOW = 200_000;
|
||||
|
||||
const CLAUDE_CLI_MODEL_LABELS: Record<string, string> = {
|
||||
"claude-opus-4-8": "Claude Opus 4.8 (Claude CLI)",
|
||||
"claude-opus-4-7": "Claude Opus 4.7 (Claude CLI)",
|
||||
"claude-opus-4-6": "Claude Opus 4.6 (Claude CLI)",
|
||||
"claude-sonnet-4-6": "Claude Sonnet 4.6 (Claude CLI)",
|
||||
};
|
||||
|
||||
function resolveClaudeCliImageMediaInput(id: string): ModelCatalogEntry["mediaInput"] {
|
||||
const maxSidePx = id === "claude-opus-4-7" ? 2576 : 1568;
|
||||
const maxSidePx = id === "claude-opus-4-8" || id === "claude-opus-4-7" ? 2576 : 1568;
|
||||
return {
|
||||
image: {
|
||||
maxSidePx,
|
||||
@@ -39,13 +40,15 @@ function extractClaudeCliModelIds(): string[] {
|
||||
}
|
||||
|
||||
export function buildClaudeCliCatalogEntries(): ModelCatalogEntry[] {
|
||||
return extractClaudeCliModelIds().map((id) => ({
|
||||
id,
|
||||
name: CLAUDE_CLI_MODEL_LABELS[id] ?? `${id} (Claude CLI)`,
|
||||
provider: CLAUDE_CLI_BACKEND_ID,
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
mediaInput: resolveClaudeCliImageMediaInput(id),
|
||||
contextWindow: CLAUDE_CLI_DEFAULT_CONTEXT_WINDOW,
|
||||
}));
|
||||
return extractClaudeCliModelIds().map((id) => {
|
||||
return {
|
||||
id,
|
||||
name: CLAUDE_CLI_MODEL_LABELS[id] ?? `${id} (Claude CLI)`,
|
||||
provider: CLAUDE_CLI_BACKEND_ID,
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
mediaInput: resolveClaudeCliImageMediaInput(id),
|
||||
contextWindow: id === "claude-opus-4-8" ? 1_048_576 : CLAUDE_CLI_DEFAULT_CONTEXT_WINDOW,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1,20 +1,23 @@
|
||||
export const CLAUDE_CLI_BACKEND_ID = "claude-cli";
|
||||
export const CLAUDE_CLI_DEFAULT_MODEL_REF = `${CLAUDE_CLI_BACKEND_ID}/claude-opus-4-7`;
|
||||
export const CLAUDE_CLI_DEFAULT_MODEL_REF = `${CLAUDE_CLI_BACKEND_ID}/claude-opus-4-8`;
|
||||
export const CLAUDE_CLI_DEFAULT_ALLOWLIST_REFS = [
|
||||
CLAUDE_CLI_DEFAULT_MODEL_REF,
|
||||
`${CLAUDE_CLI_BACKEND_ID}/claude-opus-4-7`,
|
||||
`${CLAUDE_CLI_BACKEND_ID}/claude-sonnet-4-6`,
|
||||
`${CLAUDE_CLI_BACKEND_ID}/claude-opus-4-6`,
|
||||
] as const;
|
||||
|
||||
export const CLAUDE_CLI_MODEL_ALIASES: Record<string, string> = {
|
||||
opus: "opus",
|
||||
"opus-4.7": "opus",
|
||||
"opus-4.6": "opus",
|
||||
"claude-opus-4-7": "opus",
|
||||
"claude-opus-4-6": "opus",
|
||||
"opus-4.8": "claude-opus-4-8",
|
||||
"opus-4.7": "claude-opus-4-7",
|
||||
"opus-4.6": "claude-opus-4-6",
|
||||
"claude-opus-4-8": "claude-opus-4-8",
|
||||
"claude-opus-4-7": "claude-opus-4-7",
|
||||
"claude-opus-4-6": "claude-opus-4-6",
|
||||
sonnet: "sonnet",
|
||||
"sonnet-4.6": "sonnet",
|
||||
"claude-sonnet-4-6": "sonnet",
|
||||
"sonnet-4.6": "claude-sonnet-4-6",
|
||||
"claude-sonnet-4-6": "claude-sonnet-4-6",
|
||||
haiku: "haiku",
|
||||
};
|
||||
|
||||
|
||||
@@ -38,10 +38,10 @@ afterAll(() => {
|
||||
describe("anthropic Claude model refs", () => {
|
||||
it("upgrades retired refs without rewriting future canonical refs", () => {
|
||||
expect(resolveKnownAnthropicModelRef("anthropic/claude-opus-4-5")).toBe(
|
||||
"anthropic/claude-opus-4-7",
|
||||
"anthropic/claude-opus-4-8",
|
||||
);
|
||||
expect(resolveKnownAnthropicModelRef("anthropic/claude-opus-4-5@anthropic:work")).toBe(
|
||||
"anthropic/claude-opus-4-7@anthropic:work",
|
||||
"anthropic/claude-opus-4-8@anthropic:work",
|
||||
);
|
||||
expect(resolveKnownAnthropicModelRef("anthropic/claude-sonnet-4-20250514")).toBe(
|
||||
"anthropic/claude-sonnet-4-6",
|
||||
@@ -182,6 +182,7 @@ describe("anthropic cli migration", () => {
|
||||
alias: "Opus",
|
||||
agentRuntime: { id: "claude-cli" },
|
||||
},
|
||||
"anthropic/claude-opus-4-8": { agentRuntime: { id: "claude-cli" } },
|
||||
"anthropic/claude-sonnet-4-6": { agentRuntime: { id: "claude-cli" } },
|
||||
"anthropic/claude-opus-4-6": {
|
||||
alias: "Opus",
|
||||
@@ -267,12 +268,13 @@ describe("anthropic cli migration", () => {
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.defaultModel).toBe("anthropic/claude-opus-4-7");
|
||||
expect(result.defaultModel).toBe("anthropic/claude-opus-4-8");
|
||||
expect(result.configPatch).toEqual({
|
||||
agents: {
|
||||
defaults: {
|
||||
models: {
|
||||
"openai/gpt-5.2": {},
|
||||
"anthropic/claude-opus-4-8": { agentRuntime: { id: "claude-cli" } },
|
||||
"anthropic/claude-opus-4-7": { agentRuntime: { id: "claude-cli" } },
|
||||
"anthropic/claude-sonnet-4-6": { agentRuntime: { id: "claude-cli" } },
|
||||
"anthropic/claude-opus-4-6": { agentRuntime: { id: "claude-cli" } },
|
||||
@@ -294,7 +296,7 @@ describe("anthropic cli migration", () => {
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.defaultModel).toBe("anthropic/claude-opus-4-7");
|
||||
expect(result.defaultModel).toBe("anthropic/claude-opus-4-8");
|
||||
expect(result.configPatch?.agents?.defaults?.model).toBeUndefined();
|
||||
expect(result.configPatch?.agents?.defaults?.models?.["anthropic/gpt-5.2"]).toBeUndefined();
|
||||
});
|
||||
@@ -316,6 +318,7 @@ describe("anthropic cli migration", () => {
|
||||
defaults: {
|
||||
model: { primary: "anthropic/claude-opus-4-7" },
|
||||
models: {
|
||||
"anthropic/claude-opus-4-8": { agentRuntime: { id: "claude-cli" } },
|
||||
"anthropic/claude-opus-4-7": { agentRuntime: { id: "claude-cli" } },
|
||||
"anthropic/claude-sonnet-4-6": { agentRuntime: { id: "claude-cli" } },
|
||||
"anthropic/claude-opus-4-6": { agentRuntime: { id: "claude-cli" } },
|
||||
@@ -499,6 +502,9 @@ describe("anthropic cli migration", () => {
|
||||
alias: "Opus",
|
||||
agentRuntime: { id: "claude-cli" },
|
||||
});
|
||||
expect(defaults?.models?.["anthropic/claude-opus-4-8"]).toEqual({
|
||||
agentRuntime: { id: "claude-cli" },
|
||||
});
|
||||
expect(defaults?.models?.["openai/gpt-5.2"]).toEqual({});
|
||||
});
|
||||
|
||||
|
||||
@@ -224,7 +224,7 @@ export function buildAnthropicCliMigrationResult(
|
||||
...rewrittenModels.runtimeRefs,
|
||||
...rewrittenModels.migrated,
|
||||
]);
|
||||
const defaultModel = rewrittenModel.primary ?? "anthropic/claude-opus-4-7";
|
||||
const defaultModel = rewrittenModel.primary ?? "anthropic/claude-opus-4-8";
|
||||
|
||||
return {
|
||||
profiles: buildClaudeCliAuthProfiles(credential),
|
||||
|
||||
@@ -76,6 +76,20 @@ describe("normalizeClaudeSettingSourcesArgs", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("Claude CLI model aliases", () => {
|
||||
it("keeps pinned Claude CLI model refs on exact selectors", () => {
|
||||
const aliases = buildAnthropicCliBackend().config.modelAliases;
|
||||
|
||||
expect(aliases?.["opus"]).toBe("opus");
|
||||
expect(aliases?.["opus-4.8"]).toBe("claude-opus-4-8");
|
||||
expect(aliases?.["opus-4.7"]).toBe("claude-opus-4-7");
|
||||
expect(aliases?.["opus-4.6"]).toBe("claude-opus-4-6");
|
||||
expect(aliases?.["claude-opus-4-8"]).toBe("claude-opus-4-8");
|
||||
expect(aliases?.["claude-opus-4-7"]).toBe("claude-opus-4-7");
|
||||
expect(aliases?.["claude-opus-4-6"]).toBe("claude-opus-4-6");
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveClaudeCliExecutionArgs", () => {
|
||||
it("omits effort args when thinking is off", () => {
|
||||
expect(
|
||||
|
||||
@@ -257,6 +257,7 @@ describe("anthropic provider replay hooks", () => {
|
||||
});
|
||||
const models = requireRecord(next?.agents?.defaults?.models, "models");
|
||||
for (const modelId of [
|
||||
"anthropic/claude-opus-4-8",
|
||||
"anthropic/claude-opus-4-7",
|
||||
"anthropic/claude-sonnet-4-6",
|
||||
"anthropic/claude-opus-4-6",
|
||||
@@ -444,15 +445,15 @@ describe("anthropic provider replay hooks", () => {
|
||||
expect(models["anthropic/claude-opus-5-0"]).toBeUndefined();
|
||||
});
|
||||
|
||||
it("resolves explicit claude-opus-4-7 refs from the 4.6 template family", async () => {
|
||||
it("resolves explicit claude-opus-4-8 refs from the 4.7 template family", async () => {
|
||||
const provider = await registerSingleProviderPlugin(anthropicPlugin);
|
||||
const resolved = provider.resolveDynamicModel?.({
|
||||
provider: "anthropic",
|
||||
modelId: "claude-opus-4-7",
|
||||
modelId: "claude-opus-4-8",
|
||||
modelRegistry: createModelRegistry([
|
||||
{
|
||||
id: "claude-opus-4-6",
|
||||
name: "Claude Opus 4.6",
|
||||
id: "claude-opus-4-7",
|
||||
name: "Claude Opus 4.7",
|
||||
provider: "anthropic",
|
||||
api: "anthropic-messages",
|
||||
reasoning: true,
|
||||
@@ -466,12 +467,22 @@ describe("anthropic provider replay hooks", () => {
|
||||
|
||||
expectFields(resolved, {
|
||||
provider: "anthropic",
|
||||
id: "claude-opus-4-7",
|
||||
id: "claude-opus-4-8",
|
||||
api: "anthropic-messages",
|
||||
reasoning: true,
|
||||
contextWindow: 1_048_576,
|
||||
contextTokens: 1_048_576,
|
||||
maxTokens: 128_000,
|
||||
});
|
||||
const opus48Profile = provider.resolveThinkingProfile?.({
|
||||
provider: "anthropic",
|
||||
modelId: "claude-opus-4-8",
|
||||
} as never);
|
||||
const opus48LevelIds = levelIds(opus48Profile);
|
||||
expect(opus48LevelIds).toContain("xhigh");
|
||||
expect(opus48LevelIds).toContain("adaptive");
|
||||
expect(opus48LevelIds).toContain("max");
|
||||
expect(requireRecord(opus48Profile, "opus 4.8 thinking profile").defaultLevel).toBe("off");
|
||||
const opus47Profile = provider.resolveThinkingProfile?.({
|
||||
provider: "anthropic",
|
||||
modelId: "claude-opus-4-7",
|
||||
@@ -593,6 +604,7 @@ describe("anthropic provider replay hooks", () => {
|
||||
const provider = await registerSingleProviderPlugin(anthropicPlugin);
|
||||
|
||||
for (const [runtimeProvider, modelId] of [
|
||||
["anthropic", "claude-opus-4-8"],
|
||||
["anthropic", "claude-opus-4-7"],
|
||||
["claude-cli", "claude-opus-4.7-20260219"],
|
||||
["anthropic", "claude-opus-4-6"],
|
||||
@@ -623,6 +635,32 @@ describe("anthropic provider replay hooks", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("normalizes Claude Opus 4.8 to 128k max output tokens", async () => {
|
||||
const provider = await registerSingleProviderPlugin(anthropicPlugin);
|
||||
|
||||
const normalized = provider.normalizeResolvedModel?.({
|
||||
provider: "anthropic",
|
||||
modelId: "claude-opus-4-8",
|
||||
model: {
|
||||
id: "claude-opus-4-8",
|
||||
name: "Claude Opus 4.8",
|
||||
provider: "anthropic",
|
||||
api: "anthropic-messages",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 200_000,
|
||||
maxTokens: 64_000,
|
||||
},
|
||||
} as never);
|
||||
|
||||
expectFields(normalized, {
|
||||
contextWindow: 1_048_576,
|
||||
contextTokens: 1_048_576,
|
||||
maxTokens: 128_000,
|
||||
});
|
||||
});
|
||||
|
||||
it("does not normalize legacy Claude 4.5 models to 1M context", async () => {
|
||||
const provider = await registerSingleProviderPlugin(anthropicPlugin);
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ import {
|
||||
export const anthropicMediaUnderstandingProvider: MediaUnderstandingProvider = {
|
||||
id: "anthropic",
|
||||
capabilities: ["image"],
|
||||
defaultModels: { image: "claude-opus-4-7" },
|
||||
defaultModels: { image: "claude-opus-4-8" },
|
||||
autoPriority: { image: 20 },
|
||||
nativeDocumentInputs: ["pdf"],
|
||||
describeImage: describeImageWithModel,
|
||||
|
||||
@@ -8,9 +8,20 @@
|
||||
"providerCatalogEntry": "./provider-discovery.ts",
|
||||
"modelCatalog": {
|
||||
"runtimeAugment": true,
|
||||
"providers": {
|
||||
"providers": {
|
||||
"claude-cli": {
|
||||
"models": [
|
||||
{
|
||||
"id": "claude-opus-4-8",
|
||||
"name": "Claude Opus 4.8 (Claude CLI)",
|
||||
"reasoning": true,
|
||||
"input": ["text", "image"],
|
||||
"mediaInput": {
|
||||
"image": { "maxSidePx": 2576, "preferredSidePx": 2576, "tokenMode": "provider" }
|
||||
},
|
||||
"contextWindow": 1048576,
|
||||
"maxTokens": 128000
|
||||
},
|
||||
{
|
||||
"id": "claude-opus-4-7",
|
||||
"name": "Claude Opus 4.7 (Claude CLI)",
|
||||
@@ -50,6 +61,17 @@
|
||||
"baseUrl": "https://api.anthropic.com",
|
||||
"api": "anthropic-messages",
|
||||
"models": [
|
||||
{
|
||||
"id": "claude-opus-4-8",
|
||||
"name": "Claude Opus 4.8",
|
||||
"reasoning": true,
|
||||
"input": ["text", "image"],
|
||||
"mediaInput": {
|
||||
"image": { "maxSidePx": 2576, "preferredSidePx": 2576, "tokenMode": "provider" }
|
||||
},
|
||||
"contextWindow": 1048576,
|
||||
"maxTokens": 128000
|
||||
},
|
||||
{
|
||||
"id": "claude-opus-4-7",
|
||||
"name": "Claude Opus 4.7",
|
||||
@@ -98,6 +120,8 @@
|
||||
"providers": {
|
||||
"anthropic": {
|
||||
"aliases": {
|
||||
"opus-4.8": "claude-opus-4-8",
|
||||
"opus": "claude-opus-4-8",
|
||||
"opus-4.6": "claude-opus-4-6",
|
||||
"sonnet-4.6": "claude-sonnet-4-6"
|
||||
}
|
||||
@@ -184,7 +208,7 @@
|
||||
"anthropic": {
|
||||
"capabilities": ["image"],
|
||||
"defaultModels": {
|
||||
"image": "claude-opus-4-7"
|
||||
"image": "claude-opus-4-8"
|
||||
},
|
||||
"autoPriority": {
|
||||
"image": 20
|
||||
|
||||
@@ -123,10 +123,10 @@ describe("anthropic provider policy public artifact", () => {
|
||||
).toBe("short");
|
||||
});
|
||||
|
||||
it("exposes Claude Opus 4.7 thinking levels without loading the full provider plugin", () => {
|
||||
it("exposes Claude Opus 4.8 thinking levels without loading the full provider plugin", () => {
|
||||
const profile = resolveThinkingProfile({
|
||||
provider: "anthropic",
|
||||
modelId: "claude-opus-4-7",
|
||||
modelId: "claude-opus-4-8",
|
||||
});
|
||||
const ids = levelIds(profile?.levels);
|
||||
expect(ids).toContain("xhigh");
|
||||
|
||||
@@ -45,10 +45,13 @@ import { wrapAnthropicProviderStream } from "./stream-wrappers.js";
|
||||
|
||||
const PROVIDER_ID = "anthropic";
|
||||
type UpsertAuthProfileParams = Parameters<typeof upsertAuthProfileWithLock>[0];
|
||||
const DEFAULT_ANTHROPIC_MODEL = "anthropic/claude-opus-4-7";
|
||||
const DEFAULT_ANTHROPIC_MODEL = "anthropic/claude-opus-4-8";
|
||||
const ANTHROPIC_OPUS_48_MODEL_ID = "claude-opus-4-8";
|
||||
const ANTHROPIC_OPUS_48_DOT_MODEL_ID = "claude-opus-4.8";
|
||||
const ANTHROPIC_OPUS_47_MODEL_ID = "claude-opus-4-7";
|
||||
const ANTHROPIC_OPUS_47_DOT_MODEL_ID = "claude-opus-4.7";
|
||||
const ANTHROPIC_GA_1M_CONTEXT_TOKENS = 1_048_576;
|
||||
const ANTHROPIC_OPUS_48_MAX_OUTPUT_TOKENS = 128_000;
|
||||
const ANTHROPIC_OPUS_46_MODEL_ID = "claude-opus-4-6";
|
||||
const ANTHROPIC_OPUS_46_DOT_MODEL_ID = "claude-opus-4.6";
|
||||
const ANTHROPIC_OPUS_47_TEMPLATE_MODEL_IDS = [
|
||||
@@ -58,6 +61,8 @@ const ANTHROPIC_OPUS_47_TEMPLATE_MODEL_IDS = [
|
||||
const ANTHROPIC_SONNET_46_MODEL_ID = "claude-sonnet-4-6";
|
||||
const ANTHROPIC_SONNET_46_DOT_MODEL_ID = "claude-sonnet-4.6";
|
||||
const ANTHROPIC_GA_1M_MODEL_PREFIXES = [
|
||||
ANTHROPIC_OPUS_48_MODEL_ID,
|
||||
ANTHROPIC_OPUS_48_DOT_MODEL_ID,
|
||||
ANTHROPIC_OPUS_46_MODEL_ID,
|
||||
ANTHROPIC_OPUS_46_DOT_MODEL_ID,
|
||||
ANTHROPIC_OPUS_47_MODEL_ID,
|
||||
@@ -66,6 +71,8 @@ const ANTHROPIC_GA_1M_MODEL_PREFIXES = [
|
||||
ANTHROPIC_SONNET_46_DOT_MODEL_ID,
|
||||
] as const;
|
||||
const ANTHROPIC_MODERN_MODEL_PREFIXES = [
|
||||
"claude-opus-4-8",
|
||||
"claude-opus-4.8",
|
||||
"claude-opus-4-7",
|
||||
"claude-opus-4.7",
|
||||
"claude-opus-4-6",
|
||||
@@ -282,8 +289,10 @@ function buildAnthropicForwardCompatModel(
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 200_000,
|
||||
maxTokens: 64_000,
|
||||
contextWindow: isAnthropicGa1MModel(trimmedModelId) ? ANTHROPIC_GA_1M_CONTEXT_TOKENS : 200_000,
|
||||
maxTokens: isAnthropicOpus48Model(trimmedModelId)
|
||||
? ANTHROPIC_OPUS_48_MAX_OUTPUT_TOKENS
|
||||
: 64_000,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -291,6 +300,14 @@ function resolveAnthropicForwardCompatModel(
|
||||
ctx: ProviderResolveDynamicModelContext,
|
||||
): ProviderRuntimeModel | undefined {
|
||||
return (
|
||||
resolveAnthropic46ForwardCompatModel({
|
||||
ctx,
|
||||
dashModelId: ANTHROPIC_OPUS_48_MODEL_ID,
|
||||
dotModelId: ANTHROPIC_OPUS_48_DOT_MODEL_ID,
|
||||
dashTemplateId: ANTHROPIC_OPUS_47_MODEL_ID,
|
||||
dotTemplateId: ANTHROPIC_OPUS_47_DOT_MODEL_ID,
|
||||
fallbackTemplateIds: ANTHROPIC_OPUS_47_TEMPLATE_MODEL_IDS,
|
||||
}) ??
|
||||
resolveAnthropic46ForwardCompatModel({
|
||||
ctx,
|
||||
dashModelId: ANTHROPIC_OPUS_47_MODEL_ID,
|
||||
@@ -324,6 +341,23 @@ function isAnthropicGa1MModel(modelId: string): boolean {
|
||||
return ANTHROPIC_GA_1M_MODEL_PREFIXES.some((prefix) => normalized.startsWith(prefix));
|
||||
}
|
||||
|
||||
function isAnthropicOpus48Model(modelId: string): boolean {
|
||||
const normalized = normalizeLowercaseStringOrEmpty(modelId);
|
||||
return [ANTHROPIC_OPUS_48_MODEL_ID, ANTHROPIC_OPUS_48_DOT_MODEL_ID].some((prefix) =>
|
||||
normalized.startsWith(prefix),
|
||||
);
|
||||
}
|
||||
|
||||
function isAnthropicOpus47OrNewerModel(modelId: string): boolean {
|
||||
const normalized = normalizeLowercaseStringOrEmpty(modelId);
|
||||
return [
|
||||
ANTHROPIC_OPUS_48_MODEL_ID,
|
||||
ANTHROPIC_OPUS_48_DOT_MODEL_ID,
|
||||
ANTHROPIC_OPUS_47_MODEL_ID,
|
||||
ANTHROPIC_OPUS_47_DOT_MODEL_ID,
|
||||
].some((prefix) => normalized.startsWith(prefix));
|
||||
}
|
||||
|
||||
function hasConfiguredModelContextOverride(
|
||||
config: ProviderNormalizeResolvedModelContext["config"],
|
||||
provider: string,
|
||||
@@ -393,6 +427,45 @@ function applyAnthropicGa1MContextWindow(params: {
|
||||
};
|
||||
}
|
||||
|
||||
function applyAnthropicOpus48MaxTokens(params: {
|
||||
modelId: string;
|
||||
model: ProviderRuntimeModel;
|
||||
}): ProviderRuntimeModel | undefined {
|
||||
if (!isAnthropicOpus48Model(params.modelId)) {
|
||||
return undefined;
|
||||
}
|
||||
if ((params.model.maxTokens ?? 0) >= ANTHROPIC_OPUS_48_MAX_OUTPUT_TOKENS) {
|
||||
return undefined;
|
||||
}
|
||||
return {
|
||||
...params.model,
|
||||
maxTokens: ANTHROPIC_OPUS_48_MAX_OUTPUT_TOKENS,
|
||||
};
|
||||
}
|
||||
|
||||
function applyAnthropicOpusThinkingLevelMap(params: {
|
||||
modelId: string;
|
||||
model: ProviderRuntimeModel;
|
||||
}): ProviderRuntimeModel | undefined {
|
||||
if (!isAnthropicOpus47OrNewerModel(params.modelId)) {
|
||||
return undefined;
|
||||
}
|
||||
if (
|
||||
params.model.thinkingLevelMap?.xhigh === "xhigh" &&
|
||||
params.model.thinkingLevelMap?.max === "max"
|
||||
) {
|
||||
return undefined;
|
||||
}
|
||||
return {
|
||||
...params.model,
|
||||
thinkingLevelMap: {
|
||||
...params.model.thinkingLevelMap,
|
||||
xhigh: "xhigh",
|
||||
max: "max",
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function matchesAnthropicModernModel(modelId: string): boolean {
|
||||
const lower = normalizeLowercaseStringOrEmpty(modelId);
|
||||
return ANTHROPIC_MODERN_MODEL_PREFIXES.some((prefix) => lower.startsWith(prefix));
|
||||
@@ -413,15 +486,18 @@ function resolveAnthropicImageMediaInput(modelId: string, modelName?: string) {
|
||||
return undefined;
|
||||
}
|
||||
const refs = [modelId, modelName].filter((value): value is string => typeof value === "string");
|
||||
const opus47 = refs.some((ref) =>
|
||||
[ANTHROPIC_OPUS_47_MODEL_ID, ANTHROPIC_OPUS_47_DOT_MODEL_ID].some((prefix) =>
|
||||
normalizeLowercaseStringOrEmpty(ref).startsWith(prefix),
|
||||
),
|
||||
const largeImageOpus = refs.some((ref) =>
|
||||
[
|
||||
ANTHROPIC_OPUS_48_MODEL_ID,
|
||||
ANTHROPIC_OPUS_48_DOT_MODEL_ID,
|
||||
ANTHROPIC_OPUS_47_MODEL_ID,
|
||||
ANTHROPIC_OPUS_47_DOT_MODEL_ID,
|
||||
].some((prefix) => normalizeLowercaseStringOrEmpty(ref).startsWith(prefix)),
|
||||
);
|
||||
return {
|
||||
image: {
|
||||
maxSidePx: opus47 ? 2576 : 1568,
|
||||
preferredSidePx: opus47 ? 2576 : 1568,
|
||||
maxSidePx: largeImageOpus ? 2576 : 1568,
|
||||
preferredSidePx: largeImageOpus ? 2576 : 1568,
|
||||
tokenMode: "provider" as const,
|
||||
},
|
||||
};
|
||||
@@ -461,13 +537,23 @@ function normalizeAnthropicResolvedModel(
|
||||
},
|
||||
}
|
||||
: imageCapableModel;
|
||||
const outputModel =
|
||||
applyAnthropicOpus48MaxTokens({
|
||||
modelId: ctx.modelId,
|
||||
model: mediaInputModel,
|
||||
}) ?? mediaInputModel;
|
||||
const thinkingLevelModel =
|
||||
applyAnthropicOpusThinkingLevelMap({
|
||||
modelId: ctx.modelId,
|
||||
model: outputModel,
|
||||
}) ?? outputModel;
|
||||
const contextWindowModel =
|
||||
applyAnthropicGa1MContextWindow({
|
||||
config: ctx.config,
|
||||
provider: ctx.provider,
|
||||
modelId: ctx.modelId,
|
||||
model: mediaInputModel,
|
||||
}) ?? mediaInputModel;
|
||||
model: thinkingLevelModel,
|
||||
}) ?? thinkingLevelModel;
|
||||
return contextWindowModel === ctx.model ? undefined : contextWindowModel;
|
||||
}
|
||||
|
||||
@@ -682,13 +768,23 @@ export function buildAnthropicProvider(): ProviderPlugin {
|
||||
modelId: ctx.modelId,
|
||||
model,
|
||||
}) ?? model;
|
||||
const outputModel =
|
||||
applyAnthropicOpus48MaxTokens({
|
||||
modelId: ctx.modelId,
|
||||
model: imageCapableModel,
|
||||
}) ?? imageCapableModel;
|
||||
const thinkingLevelModel =
|
||||
applyAnthropicOpusThinkingLevelMap({
|
||||
modelId: ctx.modelId,
|
||||
model: outputModel,
|
||||
}) ?? outputModel;
|
||||
return (
|
||||
applyAnthropicGa1MContextWindow({
|
||||
config: ctx.config,
|
||||
provider: ctx.provider,
|
||||
modelId: ctx.modelId,
|
||||
model: imageCapableModel,
|
||||
}) ?? imageCapableModel
|
||||
model: thinkingLevelModel,
|
||||
}) ?? thinkingLevelModel
|
||||
);
|
||||
},
|
||||
normalizeResolvedModel: (ctx) => normalizeAnthropicResolvedModel(ctx),
|
||||
|
||||
@@ -20,6 +20,8 @@ const log = createSubsystemLogger("anthropic-stream");
|
||||
|
||||
const ANTHROPIC_CONTEXT_1M_BETA_LEGACY = "context-1m-2025-08-07";
|
||||
const ANTHROPIC_GA_1M_MODEL_PREFIXES = [
|
||||
"claude-opus-4-8",
|
||||
"claude-opus-4.8",
|
||||
"claude-opus-4-6",
|
||||
"claude-opus-4.6",
|
||||
"claude-opus-4-7",
|
||||
|
||||
@@ -417,7 +417,7 @@ function getGoogleThinkingBudget(
|
||||
effort: ThinkingLevel,
|
||||
customBudgets?: GoogleTransportOptions["thinkingBudgets"],
|
||||
): number | undefined {
|
||||
const normalizedEffort = effort === "xhigh" ? "high" : effort;
|
||||
const normalizedEffort = effort === "xhigh" || effort === "max" ? "high" : effort;
|
||||
if (customBudgets?.[normalizedEffort] !== undefined) {
|
||||
return customBudgets[normalizedEffort];
|
||||
}
|
||||
|
||||
@@ -153,7 +153,7 @@ describe("qa agentic parity report", () => {
|
||||
it("fails the parity gate when the candidate regresses against baseline", () => {
|
||||
const comparison = buildQaAgenticParityComparison({
|
||||
candidateLabel: "openai/gpt-5.5",
|
||||
baselineLabel: "anthropic/claude-opus-4-7",
|
||||
baselineLabel: "anthropic/claude-opus-4-8",
|
||||
candidateSummary: {
|
||||
scenarios: [
|
||||
{ name: "Approval turn tool followthrough", status: "pass" },
|
||||
@@ -181,10 +181,10 @@ describe("qa agentic parity report", () => {
|
||||
|
||||
expect(comparison.pass).toBe(false);
|
||||
expect(comparison.failures).toContain(
|
||||
"openai/gpt-5.5 completion rate 80.0% is below anthropic/claude-opus-4-7 100.0%.",
|
||||
"openai/gpt-5.5 completion rate 80.0% is below anthropic/claude-opus-4-8 100.0%.",
|
||||
);
|
||||
expect(comparison.failures).toContain(
|
||||
"openai/gpt-5.5 unintended-stop rate 20.0% exceeds anthropic/claude-opus-4-7 0.0%.",
|
||||
"openai/gpt-5.5 unintended-stop rate 20.0% exceeds anthropic/claude-opus-4-8 0.0%.",
|
||||
);
|
||||
});
|
||||
|
||||
@@ -199,7 +199,7 @@ describe("qa agentic parity report", () => {
|
||||
];
|
||||
const comparison = buildQaAgenticParityComparison({
|
||||
candidateLabel: "openai/gpt-5.5",
|
||||
baselineLabel: "anthropic/claude-opus-4-7",
|
||||
baselineLabel: "anthropic/claude-opus-4-8",
|
||||
candidateSummary: {
|
||||
scenarios: baselineScenarios.filter(
|
||||
(scenario) => scenario.name !== "Extra non-parity lane",
|
||||
@@ -211,14 +211,14 @@ describe("qa agentic parity report", () => {
|
||||
|
||||
expect(comparison.pass).toBe(false);
|
||||
expect(comparison.failures).toContain(
|
||||
"Scenario coverage mismatch for Extra non-parity lane: openai/gpt-5.5=missing, anthropic/claude-opus-4-7=pass.",
|
||||
"Scenario coverage mismatch for Extra non-parity lane: openai/gpt-5.5=missing, anthropic/claude-opus-4-8=pass.",
|
||||
);
|
||||
});
|
||||
|
||||
it("reports each missing required parity scenario exactly once (no double-counting)", () => {
|
||||
const comparison = buildQaAgenticParityComparison({
|
||||
candidateLabel: "openai/gpt-5.5",
|
||||
baselineLabel: "anthropic/claude-opus-4-7",
|
||||
baselineLabel: "anthropic/claude-opus-4-8",
|
||||
candidateSummary: {
|
||||
scenarios: [{ name: "Approval turn tool followthrough", status: "pass" }],
|
||||
},
|
||||
@@ -260,7 +260,7 @@ describe("qa agentic parity report", () => {
|
||||
|
||||
const comparison = buildQaAgenticParityComparison({
|
||||
candidateLabel: "openai/gpt-5.5",
|
||||
baselineLabel: "anthropic/claude-opus-4-7",
|
||||
baselineLabel: "anthropic/claude-opus-4-8",
|
||||
candidateSummary: summaryWithExtras,
|
||||
baselineSummary: scopedSummary,
|
||||
comparedAt: "2026-04-11T00:00:00.000Z",
|
||||
@@ -282,7 +282,7 @@ describe("qa agentic parity report", () => {
|
||||
it("fails the parity gate when required parity scenarios are missing on both sides", () => {
|
||||
const comparison = buildQaAgenticParityComparison({
|
||||
candidateLabel: "openai/gpt-5.5",
|
||||
baselineLabel: "anthropic/claude-opus-4-7",
|
||||
baselineLabel: "anthropic/claude-opus-4-8",
|
||||
candidateSummary: {
|
||||
scenarios: [{ name: "Approval turn tool followthrough", status: "pass" }],
|
||||
},
|
||||
@@ -294,14 +294,14 @@ describe("qa agentic parity report", () => {
|
||||
|
||||
expect(comparison.pass).toBe(false);
|
||||
expect(comparison.failures).toContain(
|
||||
"Missing required parity scenario coverage for Image understanding from attachment: openai/gpt-5.5=missing, anthropic/claude-opus-4-7=missing.",
|
||||
"Missing required parity scenario coverage for Image understanding from attachment: openai/gpt-5.5=missing, anthropic/claude-opus-4-8=missing.",
|
||||
);
|
||||
});
|
||||
|
||||
it("fails the parity gate when required parity scenarios are skipped", () => {
|
||||
const comparison = buildQaAgenticParityComparison({
|
||||
candidateLabel: "openai/gpt-5.5",
|
||||
baselineLabel: "anthropic/claude-opus-4-7",
|
||||
baselineLabel: "anthropic/claude-opus-4-8",
|
||||
candidateSummary: {
|
||||
scenarios: [
|
||||
{ name: "Approval turn tool followthrough", status: "pass" },
|
||||
@@ -325,7 +325,7 @@ describe("qa agentic parity report", () => {
|
||||
|
||||
expect(comparison.pass).toBe(false);
|
||||
expect(comparison.failures).toContain(
|
||||
"Missing required parity scenario coverage for Compaction retry after mutating tool: openai/gpt-5.5=skip, anthropic/claude-opus-4-7=skip.",
|
||||
"Missing required parity scenario coverage for Compaction retry after mutating tool: openai/gpt-5.5=skip, anthropic/claude-opus-4-8=skip.",
|
||||
);
|
||||
});
|
||||
|
||||
@@ -342,7 +342,7 @@ describe("qa agentic parity report", () => {
|
||||
});
|
||||
const comparison = buildQaAgenticParityComparison({
|
||||
candidateLabel: "openai/gpt-5.5",
|
||||
baselineLabel: "anthropic/claude-opus-4-7",
|
||||
baselineLabel: "anthropic/claude-opus-4-8",
|
||||
candidateSummary: { scenarios: scenariosWithBothFail },
|
||||
baselineSummary: { scenarios: scenariosWithBothFail },
|
||||
comparedAt: "2026-04-11T00:00:00.000Z",
|
||||
@@ -350,7 +350,7 @@ describe("qa agentic parity report", () => {
|
||||
|
||||
expect(comparison.pass).toBe(false);
|
||||
expect(comparison.failures).toContain(
|
||||
"Required parity scenario Approval turn tool followthrough failed: openai/gpt-5.5=fail, anthropic/claude-opus-4-7=fail.",
|
||||
"Required parity scenario Approval turn tool followthrough failed: openai/gpt-5.5=fail, anthropic/claude-opus-4-8=fail.",
|
||||
);
|
||||
// Metric comparisons are relative, so a same-on-both-sides failure
|
||||
// must not appear as a relative metric failure. The required-scenario
|
||||
@@ -370,7 +370,7 @@ describe("qa agentic parity report", () => {
|
||||
});
|
||||
const comparison = buildQaAgenticParityComparison({
|
||||
candidateLabel: "openai/gpt-5.5",
|
||||
baselineLabel: "anthropic/claude-opus-4-7",
|
||||
baselineLabel: "anthropic/claude-opus-4-8",
|
||||
candidateSummary: { scenarios: candidateWithOneFail },
|
||||
baselineSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS },
|
||||
comparedAt: "2026-04-11T00:00:00.000Z",
|
||||
@@ -378,7 +378,7 @@ describe("qa agentic parity report", () => {
|
||||
|
||||
expect(comparison.pass).toBe(false);
|
||||
expect(comparison.failures).toContain(
|
||||
"Required parity scenario Approval turn tool followthrough failed: openai/gpt-5.5=fail, anthropic/claude-opus-4-7=pass.",
|
||||
"Required parity scenario Approval turn tool followthrough failed: openai/gpt-5.5=fail, anthropic/claude-opus-4-8=pass.",
|
||||
);
|
||||
});
|
||||
|
||||
@@ -387,7 +387,7 @@ describe("qa agentic parity report", () => {
|
||||
// below is the isolated gate failure under test (no coverage-gap noise).
|
||||
const comparison = buildQaAgenticParityComparison({
|
||||
candidateLabel: "openai/gpt-5.5",
|
||||
baselineLabel: "anthropic/claude-opus-4-7",
|
||||
baselineLabel: "anthropic/claude-opus-4-8",
|
||||
candidateSummary: {
|
||||
scenarios: FULL_PARITY_PASS_SCENARIOS,
|
||||
},
|
||||
@@ -401,7 +401,7 @@ describe("qa agentic parity report", () => {
|
||||
|
||||
expect(comparison.pass).toBe(false);
|
||||
expect(comparison.failures).toEqual([
|
||||
"anthropic/claude-opus-4-7 produced 1 suspicious pass result(s); baseline fake-success count must also be 0.",
|
||||
"anthropic/claude-opus-4-8 produced 1 suspicious pass result(s); baseline fake-success count must also be 0.",
|
||||
]);
|
||||
});
|
||||
|
||||
@@ -571,14 +571,14 @@ status=done`,
|
||||
expect(() =>
|
||||
buildQaAgenticParityComparison({
|
||||
candidateLabel: "openai/gpt-5.5",
|
||||
baselineLabel: "anthropic/claude-opus-4-7",
|
||||
baselineLabel: "anthropic/claude-opus-4-8",
|
||||
candidateSummary: {
|
||||
scenarios: parityPassScenarios,
|
||||
run: { primaryProvider: "anthropic", primaryModel: "claude-opus-4-7" },
|
||||
run: { primaryProvider: "anthropic", primaryModel: "claude-opus-4-8" },
|
||||
},
|
||||
baselineSummary: {
|
||||
scenarios: parityPassScenarios,
|
||||
run: { primaryProvider: "anthropic", primaryModel: "claude-opus-4-7" },
|
||||
run: { primaryProvider: "anthropic", primaryModel: "claude-opus-4-8" },
|
||||
},
|
||||
comparedAt: "2026-04-11T00:00:00.000Z",
|
||||
}),
|
||||
@@ -593,7 +593,7 @@ status=done`,
|
||||
expect(() =>
|
||||
buildQaAgenticParityComparison({
|
||||
candidateLabel: "openai/gpt-5.5",
|
||||
baselineLabel: "anthropic/claude-opus-4-7",
|
||||
baselineLabel: "anthropic/claude-opus-4-8",
|
||||
candidateSummary: {
|
||||
scenarios: parityPassScenarios,
|
||||
run: { primaryProvider: "openai" },
|
||||
@@ -612,7 +612,7 @@ status=done`,
|
||||
it("accepts matching run.primaryProvider labels without throwing", () => {
|
||||
const comparison = buildQaAgenticParityComparison({
|
||||
candidateLabel: "openai/gpt-5.5",
|
||||
baselineLabel: "anthropic/claude-opus-4-7",
|
||||
baselineLabel: "anthropic/claude-opus-4-8",
|
||||
candidateSummary: {
|
||||
scenarios: FULL_PARITY_PASS_SCENARIOS,
|
||||
run: {
|
||||
@@ -625,8 +625,8 @@ status=done`,
|
||||
scenarios: FULL_PARITY_PASS_SCENARIOS,
|
||||
run: {
|
||||
primaryProvider: "anthropic",
|
||||
primaryModel: "anthropic/claude-opus-4-7",
|
||||
primaryModelName: "claude-opus-4-7",
|
||||
primaryModel: "anthropic/claude-opus-4-8",
|
||||
primaryModelName: "claude-opus-4-8",
|
||||
},
|
||||
},
|
||||
comparedAt: "2026-04-11T00:00:00.000Z",
|
||||
@@ -639,7 +639,7 @@ status=done`,
|
||||
// work against those, trusting the caller-supplied label.
|
||||
const comparison = buildQaAgenticParityComparison({
|
||||
candidateLabel: "openai/gpt-5.5",
|
||||
baselineLabel: "anthropic/claude-opus-4-7",
|
||||
baselineLabel: "anthropic/claude-opus-4-8",
|
||||
candidateSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS },
|
||||
baselineSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS },
|
||||
comparedAt: "2026-04-11T00:00:00.000Z",
|
||||
@@ -650,7 +650,7 @@ status=done`,
|
||||
it("skips provider verification for arbitrary display labels when run metadata is present", () => {
|
||||
const comparison = buildQaAgenticParityComparison({
|
||||
candidateLabel: "GPT-5.5 candidate",
|
||||
baselineLabel: "Opus 4.7 baseline",
|
||||
baselineLabel: "Opus 4.8 baseline",
|
||||
candidateSummary: {
|
||||
scenarios: FULL_PARITY_PASS_SCENARIOS,
|
||||
run: {
|
||||
@@ -663,8 +663,8 @@ status=done`,
|
||||
scenarios: FULL_PARITY_PASS_SCENARIOS,
|
||||
run: {
|
||||
primaryProvider: "anthropic",
|
||||
primaryModel: "anthropic/claude-opus-4-7",
|
||||
primaryModelName: "claude-opus-4-7",
|
||||
primaryModel: "anthropic/claude-opus-4-8",
|
||||
primaryModelName: "claude-opus-4-8",
|
||||
},
|
||||
},
|
||||
comparedAt: "2026-04-11T00:00:00.000Z",
|
||||
@@ -676,7 +676,7 @@ status=done`,
|
||||
it("skips provider verification for mixed-case or decorated display labels", () => {
|
||||
const comparison = buildQaAgenticParityComparison({
|
||||
candidateLabel: "Candidate: GPT-5.5",
|
||||
baselineLabel: "Opus 4.7 / baseline",
|
||||
baselineLabel: "Opus 4.8 / baseline",
|
||||
candidateSummary: {
|
||||
scenarios: FULL_PARITY_PASS_SCENARIOS,
|
||||
run: {
|
||||
@@ -689,8 +689,8 @@ status=done`,
|
||||
scenarios: FULL_PARITY_PASS_SCENARIOS,
|
||||
run: {
|
||||
primaryProvider: "anthropic",
|
||||
primaryModel: "anthropic/claude-opus-4-7",
|
||||
primaryModelName: "claude-opus-4-7",
|
||||
primaryModel: "anthropic/claude-opus-4-8",
|
||||
primaryModelName: "claude-opus-4-8",
|
||||
},
|
||||
},
|
||||
comparedAt: "2026-04-11T00:00:00.000Z",
|
||||
@@ -703,7 +703,7 @@ status=done`,
|
||||
expect(() =>
|
||||
buildQaAgenticParityComparison({
|
||||
candidateLabel: "openai/gpt-5.5",
|
||||
baselineLabel: "anthropic/claude-opus-4-7",
|
||||
baselineLabel: "anthropic/claude-opus-4-8",
|
||||
candidateSummary: {
|
||||
scenarios: FULL_PARITY_PASS_SCENARIOS,
|
||||
run: {
|
||||
@@ -716,8 +716,8 @@ status=done`,
|
||||
scenarios: FULL_PARITY_PASS_SCENARIOS,
|
||||
run: {
|
||||
primaryProvider: "anthropic",
|
||||
primaryModel: "anthropic/claude-opus-4-7",
|
||||
primaryModelName: "claude-opus-4-7",
|
||||
primaryModel: "anthropic/claude-opus-4-8",
|
||||
primaryModelName: "claude-opus-4-8",
|
||||
},
|
||||
},
|
||||
comparedAt: "2026-04-11T00:00:00.000Z",
|
||||
@@ -730,7 +730,7 @@ status=done`,
|
||||
it("accepts colon-delimited structured labels when provider and model both match", () => {
|
||||
const comparison = buildQaAgenticParityComparison({
|
||||
candidateLabel: "openai:gpt-5.5",
|
||||
baselineLabel: "anthropic:claude-opus-4-7",
|
||||
baselineLabel: "anthropic:claude-opus-4-8",
|
||||
candidateSummary: {
|
||||
scenarios: FULL_PARITY_PASS_SCENARIOS,
|
||||
run: {
|
||||
@@ -743,8 +743,8 @@ status=done`,
|
||||
scenarios: FULL_PARITY_PASS_SCENARIOS,
|
||||
run: {
|
||||
primaryProvider: "anthropic",
|
||||
primaryModel: "anthropic/claude-opus-4-7",
|
||||
primaryModelName: "claude-opus-4-7",
|
||||
primaryModel: "anthropic/claude-opus-4-8",
|
||||
primaryModelName: "claude-opus-4-8",
|
||||
},
|
||||
},
|
||||
comparedAt: "2026-04-11T00:00:00.000Z",
|
||||
@@ -759,7 +759,7 @@ status=done`,
|
||||
// added by the second-wave expansion.
|
||||
const comparison = buildQaAgenticParityComparison({
|
||||
candidateLabel: "openai/gpt-5.5",
|
||||
baselineLabel: "anthropic/claude-opus-4-7",
|
||||
baselineLabel: "anthropic/claude-opus-4-8",
|
||||
candidateSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS },
|
||||
baselineSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS },
|
||||
comparedAt: "2026-04-11T00:00:00.000Z",
|
||||
@@ -768,7 +768,7 @@ status=done`,
|
||||
const report = renderQaAgenticParityMarkdownReport(comparison);
|
||||
|
||||
expect(report).toContain(
|
||||
"# OpenClaw Agentic Parity Report — openai/gpt-5.5 vs anthropic/claude-opus-4-7",
|
||||
"# OpenClaw Agentic Parity Report — openai/gpt-5.5 vs anthropic/claude-opus-4-8",
|
||||
);
|
||||
expect(report).toContain("| Completion rate | 100.0% | 100.0% |");
|
||||
expect(report).toContain("### Approval turn tool followthrough");
|
||||
@@ -779,7 +779,7 @@ status=done`,
|
||||
// Regression for the loop-7 Copilot finding: callers that configure
|
||||
// non-gpt-5.5 / non-opus labels (for example an internal candidate vs
|
||||
// another candidate) must see the labels in the rendered H1 instead of
|
||||
// the hardcoded "GPT-5.5 / Opus 4.7" title that would otherwise confuse
|
||||
// the hardcoded "GPT-5.5 / Opus 4.8" title that would otherwise confuse
|
||||
// readers of saved reports.
|
||||
const comparison = buildQaAgenticParityComparison({
|
||||
candidateLabel: "openai/gpt-5.5-alt",
|
||||
|
||||
@@ -566,7 +566,7 @@ export function renderQaAgenticParityMarkdownReport(comparison: QaAgenticParityC
|
||||
// Title is parametrized from the candidate / baseline labels so reports
|
||||
// for any candidate/baseline pair (not only gpt-5.5 vs opus 4.6) render
|
||||
// with an accurate header. The default CLI labels are still
|
||||
// openai/gpt-5.5 vs anthropic/claude-opus-4-7, but the helper works for
|
||||
// openai/gpt-5.5 vs anthropic/claude-opus-4-8, but the helper works for
|
||||
// any parity comparison a caller configures.
|
||||
const lines = [
|
||||
`# OpenClaw Agentic Parity Report — ${comparison.candidateLabel} vs ${comparison.baselineLabel}`,
|
||||
|
||||
@@ -274,7 +274,7 @@ describe("runQaCharacterEval", () => {
|
||||
{ model: "openai/gpt-5.5", rank: 1, score: 8, summary: "ok" },
|
||||
{ model: "openai/gpt-5.2", rank: 2, score: 7.5, summary: "ok" },
|
||||
{ model: "openai/gpt-5", rank: 3, score: 7.2, summary: "ok" },
|
||||
{ model: "anthropic/claude-opus-4-7", rank: 4, score: 7, summary: "ok" },
|
||||
{ model: "anthropic/claude-opus-4-8", rank: 4, score: 7, summary: "ok" },
|
||||
{ model: "anthropic/claude-sonnet-4-6", rank: 5, score: 6.8, summary: "ok" },
|
||||
{ model: "zai/glm-5.1", rank: 6, score: 6.3, summary: "ok" },
|
||||
{ model: "moonshot/kimi-k2.5", rank: 7, score: 6.2, summary: "ok" },
|
||||
@@ -294,7 +294,7 @@ describe("runQaCharacterEval", () => {
|
||||
"openai/gpt-5.5",
|
||||
"openai/gpt-5.2",
|
||||
"openai/gpt-5",
|
||||
"anthropic/claude-opus-4-7",
|
||||
"anthropic/claude-opus-4-8",
|
||||
"anthropic/claude-sonnet-4-6",
|
||||
"zai/glm-5.1",
|
||||
"moonshot/kimi-k2.5",
|
||||
@@ -323,7 +323,7 @@ describe("runQaCharacterEval", () => {
|
||||
expect(runJudge).toHaveBeenCalledTimes(2);
|
||||
expect(runJudge.mock.calls.map(([params]) => params.judgeModel)).toEqual([
|
||||
"openai/gpt-5.5",
|
||||
"anthropic/claude-opus-4-7",
|
||||
"anthropic/claude-opus-4-8",
|
||||
]);
|
||||
expect(runJudge.mock.calls.map(([params]) => params.judgeThinkingDefault)).toEqual([
|
||||
"xhigh",
|
||||
@@ -577,11 +577,11 @@ describe("runQaCharacterEval", () => {
|
||||
candidateModelOptions: {
|
||||
"openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: false },
|
||||
},
|
||||
judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-7"],
|
||||
judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-8"],
|
||||
judgeThinkingDefault: "medium",
|
||||
judgeModelOptions: {
|
||||
"openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: true },
|
||||
"anthropic/claude-opus-4-7": { thinkingDefault: "high" },
|
||||
"anthropic/claude-opus-4-8": { thinkingDefault: "high" },
|
||||
},
|
||||
runSuite,
|
||||
runJudge,
|
||||
|
||||
@@ -662,7 +662,7 @@ describe("qa cli runtime", () => {
|
||||
repoRoot: "/tmp/openclaw-repo",
|
||||
providerMode: "mock-openai",
|
||||
primaryModel: "openai/gpt-5.5",
|
||||
alternateModel: "anthropic/claude-opus-4-7",
|
||||
alternateModel: "anthropic/claude-opus-4-8",
|
||||
preflight: true,
|
||||
});
|
||||
|
||||
@@ -672,7 +672,7 @@ describe("qa cli runtime", () => {
|
||||
transportId: "qa-channel",
|
||||
providerMode: "mock-openai",
|
||||
primaryModel: "openai/gpt-5.5",
|
||||
alternateModel: "anthropic/claude-opus-4-7",
|
||||
alternateModel: "anthropic/claude-opus-4-8",
|
||||
scenarioIds: ["approval-turn-tool-followthrough"],
|
||||
concurrency: 1,
|
||||
});
|
||||
@@ -1261,7 +1261,7 @@ describe("qa cli runtime", () => {
|
||||
fast: true,
|
||||
thinking: "medium",
|
||||
modelThinking: ["codex-cli/test-model=medium"],
|
||||
judgeModel: ["openai/gpt-5.5,thinking=xhigh,fast", "anthropic/claude-opus-4-7,thinking=high"],
|
||||
judgeModel: ["openai/gpt-5.5,thinking=xhigh,fast", "anthropic/claude-opus-4-8,thinking=high"],
|
||||
judgeTimeoutMs: 180_000,
|
||||
blindJudgeModels: true,
|
||||
concurrency: 4,
|
||||
@@ -1282,10 +1282,10 @@ describe("qa cli runtime", () => {
|
||||
"openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: false },
|
||||
"codex-cli/test-model": { thinkingDefault: "high", fastMode: true },
|
||||
},
|
||||
judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-7"],
|
||||
judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-8"],
|
||||
judgeModelOptions: {
|
||||
"openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: true },
|
||||
"anthropic/claude-opus-4-7": { thinkingDefault: "high" },
|
||||
"anthropic/claude-opus-4-8": { thinkingDefault: "high" },
|
||||
},
|
||||
judgeTimeoutMs: 180_000,
|
||||
judgeBlindModels: true,
|
||||
@@ -1616,7 +1616,7 @@ describe("qa cli runtime", () => {
|
||||
providerMode: "mock-openai",
|
||||
parityPack: "agentic",
|
||||
primaryModel: "openai/gpt-5.5",
|
||||
alternateModel: "anthropic/claude-opus-4-7",
|
||||
alternateModel: "anthropic/claude-opus-4-8",
|
||||
});
|
||||
|
||||
expect(runQaSuiteFromRuntime).toHaveBeenCalledWith({
|
||||
@@ -1625,7 +1625,7 @@ describe("qa cli runtime", () => {
|
||||
transportId: "qa-channel",
|
||||
providerMode: "mock-openai",
|
||||
primaryModel: "openai/gpt-5.5",
|
||||
alternateModel: "anthropic/claude-opus-4-7",
|
||||
alternateModel: "anthropic/claude-opus-4-8",
|
||||
fastMode: undefined,
|
||||
scenarioIds: [
|
||||
"approval-turn-tool-followthrough",
|
||||
|
||||
@@ -8,7 +8,7 @@ describe("qa live timeout policy", () => {
|
||||
{
|
||||
providerMode: "mock-openai",
|
||||
primaryModel: "anthropic/claude-sonnet-4-6",
|
||||
alternateModel: "anthropic/claude-opus-4-7",
|
||||
alternateModel: "anthropic/claude-opus-4-8",
|
||||
},
|
||||
30_000,
|
||||
),
|
||||
@@ -47,7 +47,7 @@ describe("qa live timeout policy", () => {
|
||||
{
|
||||
providerMode: "live-frontier",
|
||||
primaryModel: "anthropic/claude-sonnet-4-6",
|
||||
alternateModel: "anthropic/claude-opus-4-7",
|
||||
alternateModel: "anthropic/claude-opus-4-8",
|
||||
},
|
||||
30_000,
|
||||
),
|
||||
@@ -60,10 +60,10 @@ describe("qa live timeout policy", () => {
|
||||
{
|
||||
providerMode: "live-frontier",
|
||||
primaryModel: "anthropic/claude-sonnet-4-6",
|
||||
alternateModel: "anthropic/claude-opus-4-7",
|
||||
alternateModel: "anthropic/claude-opus-4-8",
|
||||
},
|
||||
30_000,
|
||||
"anthropic/claude-opus-4-7",
|
||||
"anthropic/claude-opus-4-8",
|
||||
),
|
||||
).toBe(240_000);
|
||||
});
|
||||
|
||||
@@ -9,7 +9,7 @@ export const QA_FRONTIER_CHARACTER_EVAL_MODELS = Object.freeze([
|
||||
"openai/gpt-5.5",
|
||||
"openai/gpt-5.2",
|
||||
"openai/gpt-5",
|
||||
"anthropic/claude-opus-4-7",
|
||||
"anthropic/claude-opus-4-8",
|
||||
"anthropic/claude-sonnet-4-6",
|
||||
"zai/glm-5.1",
|
||||
"moonshot/kimi-k2.5",
|
||||
@@ -25,12 +25,12 @@ export const QA_FRONTIER_CHARACTER_THINKING_BY_MODEL: Readonly<Record<string, Qa
|
||||
|
||||
export const QA_FRONTIER_CHARACTER_JUDGE_MODELS = Object.freeze([
|
||||
"openai/gpt-5.5",
|
||||
"anthropic/claude-opus-4-7",
|
||||
"anthropic/claude-opus-4-8",
|
||||
]);
|
||||
|
||||
export const QA_FRONTIER_CHARACTER_JUDGE_MODEL_OPTIONS: Readonly<
|
||||
Record<string, QaFrontierCharacterModelOptions>
|
||||
> = Object.freeze({
|
||||
"openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: true },
|
||||
"anthropic/claude-opus-4-7": { thinkingDefault: "high" },
|
||||
"anthropic/claude-opus-4-8": { thinkingDefault: "high" },
|
||||
});
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
export const QA_FRONTIER_PARITY_CANDIDATE_LABEL = "openai/gpt-5.5";
|
||||
export const QA_FRONTIER_PARITY_BASELINE_LABEL = "anthropic/claude-opus-4-7";
|
||||
export const QA_FRONTIER_PARITY_BASELINE_LABEL = "anthropic/claude-opus-4-8";
|
||||
|
||||
@@ -3464,7 +3464,7 @@ describe("qa mock openai server", () => {
|
||||
expect(outputText(await response.json())).toBe("NO_REPLY");
|
||||
});
|
||||
|
||||
it("advertises Anthropic claude-opus-4-7 baseline model on /v1/models", async () => {
|
||||
it("advertises Anthropic claude-opus-4-8 baseline model on /v1/models", async () => {
|
||||
const server = await startQaMockOpenAiServer({
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
@@ -3477,7 +3477,7 @@ describe("qa mock openai server", () => {
|
||||
expect(response.status).toBe(200);
|
||||
const body = (await response.json()) as { data: Array<{ id: string }> };
|
||||
const ids = body.data.map((entry) => entry.id);
|
||||
expect(ids).toContain("claude-opus-4-7");
|
||||
expect(ids).toContain("claude-opus-4-8");
|
||||
expect(ids).toContain("gpt-5.5");
|
||||
});
|
||||
|
||||
@@ -3494,7 +3494,7 @@ describe("qa mock openai server", () => {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
model: "claude-opus-4-7",
|
||||
model: "claude-opus-4-8",
|
||||
max_tokens: 256,
|
||||
messages: [
|
||||
{
|
||||
@@ -3519,7 +3519,7 @@ describe("qa mock openai server", () => {
|
||||
};
|
||||
expect(body.type).toBe("message");
|
||||
expect(body.role).toBe("assistant");
|
||||
expect(body.model).toBe("claude-opus-4-7");
|
||||
expect(body.model).toBe("claude-opus-4-8");
|
||||
expect(body.stop_reason).toBe("tool_use");
|
||||
const toolUseBlock = body.content.find((block) => block.type === "tool_use") as
|
||||
| { name: string; input: Record<string, unknown> }
|
||||
@@ -3530,7 +3530,7 @@ describe("qa mock openai server", () => {
|
||||
const debugResponse = await fetch(`${server.baseUrl}/debug/last-request`);
|
||||
expect(debugResponse.status).toBe(200);
|
||||
const debugPayload = requireRecord(await debugResponse.json(), "debug request");
|
||||
expect(debugPayload.model).toBe("claude-opus-4-7");
|
||||
expect(debugPayload.model).toBe("claude-opus-4-8");
|
||||
expect(debugPayload.plannedToolName).toBe("read");
|
||||
});
|
||||
|
||||
@@ -3541,7 +3541,7 @@ describe("qa mock openai server", () => {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
model: "claude-opus-4-7",
|
||||
model: "claude-opus-4-8",
|
||||
max_tokens: 256,
|
||||
tools: [
|
||||
{
|
||||
@@ -3581,7 +3581,7 @@ describe("qa mock openai server", () => {
|
||||
const debugResponse = await fetch(`${server.baseUrl}/debug/last-request`);
|
||||
expect(debugResponse.status).toBe(200);
|
||||
const debugPayload = requireRecord(await debugResponse.json(), "debug request");
|
||||
expect(debugPayload.model).toBe("claude-opus-4-7");
|
||||
expect(debugPayload.model).toBe("claude-opus-4-8");
|
||||
expect(debugPayload.plannedToolName).toBe("sessions_spawn");
|
||||
});
|
||||
|
||||
@@ -3605,7 +3605,7 @@ describe("qa mock openai server", () => {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
model: "claude-opus-4-7",
|
||||
model: "claude-opus-4-8",
|
||||
max_tokens: 256,
|
||||
messages: [
|
||||
{
|
||||
@@ -3679,7 +3679,7 @@ describe("qa mock openai server", () => {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
model: "claude-opus-4-7",
|
||||
model: "claude-opus-4-8",
|
||||
max_tokens: 256,
|
||||
messages: [
|
||||
{
|
||||
@@ -3760,7 +3760,7 @@ describe("qa mock openai server", () => {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
model: "claude-opus-4-7",
|
||||
model: "claude-opus-4-8",
|
||||
max_tokens: 256,
|
||||
stream: true,
|
||||
messages: [
|
||||
@@ -3801,7 +3801,7 @@ describe("qa mock openai server", () => {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
model: "claude-opus-4-7",
|
||||
model: "claude-opus-4-8",
|
||||
max_tokens: 256,
|
||||
stream: true,
|
||||
messages: [
|
||||
@@ -3860,7 +3860,7 @@ describe("qa mock openai server", () => {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
model: "claude-opus-4-7",
|
||||
model: "claude-opus-4-8",
|
||||
max_tokens: 256,
|
||||
stream: true,
|
||||
system: [
|
||||
@@ -3903,7 +3903,7 @@ describe("qa mock openai server", () => {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
model: "claude-opus-4-7",
|
||||
model: "claude-opus-4-8",
|
||||
max_tokens: 256,
|
||||
stream: true,
|
||||
system: [
|
||||
@@ -3948,7 +3948,7 @@ describe("qa mock openai server", () => {
|
||||
const response = await fetch(`${server.baseUrl}/v1/messages`, {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: '{"model":"claude-opus-4-7","messages":[',
|
||||
body: '{"model":"claude-opus-4-8","messages":[',
|
||||
});
|
||||
|
||||
expect(response.status).toBe(400);
|
||||
@@ -3961,12 +3961,12 @@ describe("qa mock openai server", () => {
|
||||
expect(body.error.message).toContain("Malformed JSON body");
|
||||
});
|
||||
|
||||
it("defaults empty-string Anthropic /v1/messages model to claude-opus-4-7", async () => {
|
||||
it("defaults empty-string Anthropic /v1/messages model to claude-opus-4-8", async () => {
|
||||
// Regression for the loop-7 Copilot finding: a bare `typeof
|
||||
// body.model === "string"` check lets an empty-string model leak
|
||||
// through to `lastRequest.model` and `responseBody.model`. Empty
|
||||
// strings must be treated the same as absent and default to
|
||||
// `"claude-opus-4-7"` so parity consumers can trust the echoed label.
|
||||
// `"claude-opus-4-8"` so parity consumers can trust the echoed label.
|
||||
const server = await startQaMockOpenAiServer({
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
@@ -3991,12 +3991,12 @@ describe("qa mock openai server", () => {
|
||||
});
|
||||
expect(response.status).toBe(200);
|
||||
const body = (await response.json()) as { model: string };
|
||||
expect(body.model).toBe("claude-opus-4-7");
|
||||
expect(body.model).toBe("claude-opus-4-8");
|
||||
|
||||
const debugResponse = await fetch(`${server.baseUrl}/debug/last-request`);
|
||||
expect(debugResponse.status).toBe(200);
|
||||
const debug = (await debugResponse.json()) as { model: string };
|
||||
expect(debug.model).toBe("claude-opus-4-7");
|
||||
expect(debug.model).toBe("claude-opus-4-8");
|
||||
});
|
||||
|
||||
it("scripts a reasoning-only recovery sequence after a replay-safe read", async () => {
|
||||
@@ -4247,9 +4247,9 @@ describe("resolveProviderVariant", () => {
|
||||
});
|
||||
|
||||
it("tags prefix-qualified anthropic models", () => {
|
||||
expect(resolveProviderVariant("anthropic/claude-opus-4-7")).toBe("anthropic");
|
||||
expect(resolveProviderVariant("anthropic:claude-opus-4-7")).toBe("anthropic");
|
||||
expect(resolveProviderVariant("claude-cli/claude-opus-4-7")).toBe("anthropic");
|
||||
expect(resolveProviderVariant("anthropic/claude-opus-4-8")).toBe("anthropic");
|
||||
expect(resolveProviderVariant("anthropic:claude-opus-4-8")).toBe("anthropic");
|
||||
expect(resolveProviderVariant("claude-cli/claude-opus-4-8")).toBe("anthropic");
|
||||
});
|
||||
|
||||
it("tags bare model names by prefix", () => {
|
||||
@@ -4257,7 +4257,7 @@ describe("resolveProviderVariant", () => {
|
||||
expect(resolveProviderVariant("gpt-5.5-alt")).toBe("openai");
|
||||
expect(resolveProviderVariant("gpt-4.5")).toBe("openai");
|
||||
expect(resolveProviderVariant("o1-preview")).toBe("openai");
|
||||
expect(resolveProviderVariant("claude-opus-4-7")).toBe("anthropic");
|
||||
expect(resolveProviderVariant("claude-opus-4-8")).toBe("anthropic");
|
||||
expect(resolveProviderVariant("claude-sonnet-4-6")).toBe("anthropic");
|
||||
});
|
||||
|
||||
@@ -4293,7 +4293,7 @@ describe("qa mock openai server provider variant tagging", () => {
|
||||
|
||||
const anthropicSourceServer = await startMockServer();
|
||||
const anthropicSource = await expectResponsesJson(anthropicSourceServer, {
|
||||
model: "anthropic/claude-opus-4-7",
|
||||
model: "anthropic/claude-opus-4-8",
|
||||
stream: false,
|
||||
input: [makeUserInput(sourcePrompt)],
|
||||
});
|
||||
@@ -4312,7 +4312,7 @@ describe("qa mock openai server provider variant tagging", () => {
|
||||
|
||||
const anthropicHandoffServer = await startMockServer();
|
||||
const anthropicHandoff = await expectResponsesJson(anthropicHandoffServer, {
|
||||
model: "claude-opus-4-7",
|
||||
model: "claude-opus-4-8",
|
||||
stream: false,
|
||||
input: [makeUserInput(handoffPrompt)],
|
||||
});
|
||||
@@ -4335,7 +4335,7 @@ describe("qa mock openai server provider variant tagging", () => {
|
||||
|
||||
const anthropicFanoutServer = await startMockServer();
|
||||
const anthropicFanout = await expectResponsesJson(anthropicFanoutServer, {
|
||||
model: "anthropic/claude-opus-4-7",
|
||||
model: "anthropic/claude-opus-4-8",
|
||||
stream: false,
|
||||
tools: [SESSIONS_SPAWN_TOOL],
|
||||
input: [makeUserInput(fanoutPrompt)],
|
||||
@@ -4386,7 +4386,7 @@ describe("qa mock openai server provider variant tagging", () => {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
model: "claude-opus-4-7",
|
||||
model: "claude-opus-4-8",
|
||||
max_tokens: 256,
|
||||
messages: [{ role: "user", content: "Heartbeat check" }],
|
||||
}),
|
||||
@@ -4396,7 +4396,7 @@ describe("qa mock openai server provider variant tagging", () => {
|
||||
model: string;
|
||||
providerVariant: string;
|
||||
};
|
||||
expect(debug.model).toBe("claude-opus-4-7");
|
||||
expect(debug.model).toBe("claude-opus-4-8");
|
||||
expect(debug.providerVariant).toBe("anthropic");
|
||||
});
|
||||
|
||||
|
||||
@@ -81,7 +81,7 @@ export function resolveProviderVariant(model: string | undefined): MockOpenAiPro
|
||||
return "anthropic";
|
||||
}
|
||||
// Fall back to model-name prefix matching for bare model strings like
|
||||
// `gpt-5.5` or `claude-opus-4-7`.
|
||||
// `gpt-5.5` or `claude-opus-4-8`.
|
||||
if (/^(?:gpt-|o1-|openai-)/.test(trimmed)) {
|
||||
return "openai";
|
||||
}
|
||||
@@ -645,7 +645,7 @@ function execCommandFromToolProgressPrompt(prompt: string) {
|
||||
|
||||
function buildMockFunctionCall(name: string, args: Record<string, unknown>) {
|
||||
const serialized = JSON.stringify(args);
|
||||
const callSuffix = createHash("sha1")
|
||||
const callSuffix = createHash("sha256")
|
||||
.update(name)
|
||||
.update("\0")
|
||||
.update(serialized)
|
||||
@@ -2591,7 +2591,7 @@ async function buildResponsesPayload(
|
||||
//
|
||||
// The QA parity gate needs two comparable scenario runs: one against the
|
||||
// "candidate" (openai/gpt-5.5) and one against the "baseline"
|
||||
// (anthropic/claude-opus-4-7). The OpenAI mock above already dispatches all
|
||||
// (anthropic/claude-opus-4-8). The OpenAI mock above already dispatches all
|
||||
// the scenario prompt branches we care about. Rather than duplicating that
|
||||
// machinery, the /v1/messages route below translates Anthropic request
|
||||
// shapes into the shared ResponsesInputItem[] format, calls the same
|
||||
@@ -2814,7 +2814,7 @@ function buildAnthropicMessageResponse(params: {
|
||||
id: `msg_mock_${Math.floor(Math.random() * 1_000_000).toString(16)}`,
|
||||
type: "message",
|
||||
role: "assistant",
|
||||
model: params.model || "claude-opus-4-7",
|
||||
model: params.model || "claude-opus-4-8",
|
||||
content,
|
||||
stop_reason: stopReason,
|
||||
stop_sequence: null,
|
||||
@@ -2842,7 +2842,7 @@ function buildAnthropicMessageStreamEvents(params: {
|
||||
id: messageId,
|
||||
type: "message",
|
||||
role: "assistant",
|
||||
model: params.model || "claude-opus-4-7",
|
||||
model: params.model || "claude-opus-4-8",
|
||||
content: [],
|
||||
stop_reason: null,
|
||||
stop_sequence: null,
|
||||
@@ -2941,7 +2941,7 @@ async function buildMessagesPayload(
|
||||
// which then confuses parity consumers that assume the mock always
|
||||
// echoes the real provider label. Normalize once and reuse everywhere.
|
||||
const normalizedModel =
|
||||
typeof body.model === "string" && body.model.trim() !== "" ? body.model : "claude-opus-4-7";
|
||||
typeof body.model === "string" && body.model.trim() !== "" ? body.model : "claude-opus-4-8";
|
||||
// Dispatch through the same scenario logic the /v1/responses route uses.
|
||||
// Preserve declared tools so route-specific adapters mirror what the
|
||||
// real provider request made available to the model.
|
||||
@@ -2986,7 +2986,7 @@ export async function startQaMockOpenAiServer(params?: { host?: string; port?: n
|
||||
{ id: "gpt-5.5-alt", object: "model" },
|
||||
{ id: "gpt-image-1", object: "model" },
|
||||
{ id: "text-embedding-3-small", object: "model" },
|
||||
{ id: "claude-opus-4-7", object: "model" },
|
||||
{ id: "claude-opus-4-8", object: "model" },
|
||||
{ id: "claude-sonnet-4-6", object: "model" },
|
||||
],
|
||||
});
|
||||
|
||||
@@ -71,14 +71,14 @@ function createMockAnthropicMessagesProvider(baseUrl: string): ModelProviderConf
|
||||
},
|
||||
models: [
|
||||
{
|
||||
id: "claude-opus-4-7",
|
||||
name: "claude-opus-4-7",
|
||||
id: "claude-opus-4-8",
|
||||
name: "claude-opus-4-8",
|
||||
api: "anthropic-messages",
|
||||
reasoning: false,
|
||||
input: ["text", "image"],
|
||||
cost: ZERO_COST,
|
||||
contextWindow: 200_000,
|
||||
maxTokens: 4096,
|
||||
contextWindow: 1_048_576,
|
||||
maxTokens: 128_000,
|
||||
},
|
||||
{
|
||||
id: "claude-sonnet-4-6",
|
||||
|
||||
@@ -108,12 +108,12 @@ describe("buildQaGatewayConfig", () => {
|
||||
workspaceDir: "/tmp/qa-workspace",
|
||||
providerMode: "mock-openai",
|
||||
primaryModel: "openai/gpt-5.5",
|
||||
alternateModel: "anthropic/claude-opus-4-7",
|
||||
alternateModel: "anthropic/claude-opus-4-8",
|
||||
});
|
||||
|
||||
expect(getPrimaryModel(cfg.agents?.defaults?.model)).toBe("openai/gpt-5.5");
|
||||
expect(getModelFallbacks(cfg.agents?.defaults?.model)).toEqual(["anthropic/claude-opus-4-7"]);
|
||||
expect(getModelFallbacks(cfg.agents?.list?.[0]?.model)).toEqual(["anthropic/claude-opus-4-7"]);
|
||||
expect(getModelFallbacks(cfg.agents?.defaults?.model)).toEqual(["anthropic/claude-opus-4-8"]);
|
||||
expect(getModelFallbacks(cfg.agents?.list?.[0]?.model)).toEqual(["anthropic/claude-opus-4-8"]);
|
||||
expect(cfg.models?.providers?.openai?.api).toBe("openai-responses");
|
||||
expect(cfg.models?.providers?.openai?.request).toEqual({ allowPrivateNetwork: true });
|
||||
expect(cfg.models?.providers?.openai?.models.map((model) => model.id)).toContain("gpt-5.5");
|
||||
@@ -121,7 +121,7 @@ describe("buildQaGatewayConfig", () => {
|
||||
expect(cfg.models?.providers?.anthropic?.baseUrl).toBe("http://127.0.0.1:44080");
|
||||
expect(cfg.models?.providers?.anthropic?.request).toEqual({ allowPrivateNetwork: true });
|
||||
expect(cfg.models?.providers?.anthropic?.models.map((model) => model.id)).toContain(
|
||||
"claude-opus-4-7",
|
||||
"claude-opus-4-8",
|
||||
);
|
||||
expect(cfg.plugins?.allow).toEqual(["acpx", "memory-core"]);
|
||||
});
|
||||
|
||||
@@ -198,7 +198,7 @@ describe("qa suite planning helpers", () => {
|
||||
makeQaSuiteTestScenario("anthropic-only", {
|
||||
config: {
|
||||
requiredProvider: "anthropic",
|
||||
requiredModel: "claude-opus-4-7",
|
||||
requiredModel: "claude-opus-4-8",
|
||||
},
|
||||
}),
|
||||
];
|
||||
@@ -384,7 +384,7 @@ describe("qa suite planning helpers", () => {
|
||||
config: { requiredProvider: "openai", requiredModel: "gpt-5.5" },
|
||||
}),
|
||||
makeQaSuiteTestScenario("anthropic-only", {
|
||||
config: { requiredProvider: "anthropic", requiredModel: "claude-opus-4-7" },
|
||||
config: { requiredProvider: "anthropic", requiredModel: "claude-opus-4-8" },
|
||||
}),
|
||||
makeQaSuiteTestScenario("claude-subscription", {
|
||||
config: { requiredProvider: "claude-cli", authMode: "subscription" },
|
||||
|
||||
@@ -67,12 +67,12 @@ describe("buildQaSuiteSummaryJson", () => {
|
||||
it("records an Anthropic baseline lane cleanly for parity runs", () => {
|
||||
const json = buildQaSuiteSummaryJson({
|
||||
...baseParams,
|
||||
primaryModel: "anthropic/claude-opus-4-7",
|
||||
primaryModel: "anthropic/claude-opus-4-8",
|
||||
alternateModel: "anthropic/claude-sonnet-4-6",
|
||||
});
|
||||
expect(json.run.primaryModel).toBe("anthropic/claude-opus-4-7");
|
||||
expect(json.run.primaryModel).toBe("anthropic/claude-opus-4-8");
|
||||
expect(json.run.primaryProvider).toBe("anthropic");
|
||||
expect(json.run.primaryModelName).toBe("claude-opus-4-7");
|
||||
expect(json.run.primaryModelName).toBe("claude-opus-4-8");
|
||||
expect(json.run.alternateModel).toBe("anthropic/claude-sonnet-4-6");
|
||||
expect(json.run.alternateProvider).toBe("anthropic");
|
||||
expect(json.run.alternateModelName).toBe("claude-sonnet-4-6");
|
||||
|
||||
@@ -3,7 +3,7 @@ import type { TSchema } from "typebox";
|
||||
export type Api = string;
|
||||
export type CacheRetention = "none" | "short" | "long";
|
||||
export type Transport = "sse" | "websocket" | "websocket-cached" | "auto";
|
||||
export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
|
||||
export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh" | "max";
|
||||
export type ModelThinkingLevel = "off" | ThinkingLevel;
|
||||
export type MaybePromise<T> = T | Promise<T>;
|
||||
|
||||
@@ -17,6 +17,7 @@ export interface ThinkingBudgets {
|
||||
low?: number;
|
||||
medium?: number;
|
||||
high?: number;
|
||||
max?: number;
|
||||
}
|
||||
|
||||
export interface DiagnosticErrorInfo {
|
||||
|
||||
@@ -285,7 +285,7 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
|
||||
* Note: "xhigh" is only supported by selected model families. Use model thinking-level metadata
|
||||
* from openclaw/plugin-sdk/llm to detect support for a concrete model.
|
||||
*/
|
||||
export type ThinkingLevel = "off" | "minimal" | "low" | "medium" | "high" | "xhigh";
|
||||
export type ThinkingLevel = "off" | "minimal" | "low" | "medium" | "high" | "xhigh" | "max";
|
||||
|
||||
/**
|
||||
* Extensible interface for custom app messages.
|
||||
|
||||
@@ -12,7 +12,7 @@ coverage:
|
||||
objective: Verify the regular Anthropic Opus lane can complete a quick chat turn using API-key auth.
|
||||
successCriteria:
|
||||
- A live-frontier run fails fast unless the selected primary provider is anthropic.
|
||||
- The selected primary model is Anthropic Opus 4.7.
|
||||
- The selected primary model is Anthropic Opus 4.8.
|
||||
- The QA gateway worker has an Anthropic API key available through environment auth.
|
||||
- The agent replies through the regular Anthropic provider.
|
||||
docsRefs:
|
||||
@@ -24,10 +24,10 @@ codeRefs:
|
||||
- extensions/qa-lab/src/suite.ts
|
||||
execution:
|
||||
kind: flow
|
||||
summary: Run with `pnpm openclaw qa suite --provider-mode live-frontier --model anthropic/claude-opus-4-7 --alt-model anthropic/claude-opus-4-7 --scenario anthropic-opus-api-key-smoke`.
|
||||
summary: Run with `pnpm openclaw qa suite --provider-mode live-frontier --model anthropic/claude-opus-4-8 --alt-model anthropic/claude-opus-4-8 --scenario anthropic-opus-api-key-smoke`.
|
||||
config:
|
||||
requiredProvider: anthropic
|
||||
requiredModel: claude-opus-4-7
|
||||
requiredModel: claude-opus-4-8
|
||||
chatPrompt: "Anthropic Opus API key smoke. Reply exactly: ANTHROPIC-OPUS-API-KEY-OK"
|
||||
chatExpected: ANTHROPIC-OPUS-API-KEY-OK
|
||||
```
|
||||
|
||||
@@ -12,7 +12,7 @@ coverage:
|
||||
objective: Verify the regular Anthropic Opus lane can complete a quick chat turn using setup-token auth.
|
||||
successCriteria:
|
||||
- A live-frontier run fails fast unless the selected primary provider is anthropic.
|
||||
- The selected primary model is Anthropic Opus 4.7.
|
||||
- The selected primary model is Anthropic Opus 4.8.
|
||||
- The QA gateway worker stages a token auth profile in the isolated agent store.
|
||||
- The agent replies through the regular Anthropic provider.
|
||||
docsRefs:
|
||||
@@ -24,10 +24,10 @@ codeRefs:
|
||||
- extensions/qa-lab/src/suite.ts
|
||||
execution:
|
||||
kind: flow
|
||||
summary: Run with `OPENCLAW_LIVE_SETUP_TOKEN_VALUE=<setup-token> pnpm openclaw qa suite --provider-mode live-frontier --model anthropic/claude-opus-4-7 --alt-model anthropic/claude-opus-4-7 --scenario anthropic-opus-setup-token-smoke`.
|
||||
summary: Run with `OPENCLAW_LIVE_SETUP_TOKEN_VALUE=<setup-token> pnpm openclaw qa suite --provider-mode live-frontier --model anthropic/claude-opus-4-8 --alt-model anthropic/claude-opus-4-8 --scenario anthropic-opus-setup-token-smoke`.
|
||||
config:
|
||||
requiredProvider: anthropic
|
||||
requiredModel: claude-opus-4-7
|
||||
requiredModel: claude-opus-4-8
|
||||
profileId: "anthropic:qa-setup-token"
|
||||
chatPrompt: "Anthropic Opus setup-token smoke. Reply exactly: ANTHROPIC-OPUS-SETUP-TOKEN-OK"
|
||||
chatExpected: ANTHROPIC-OPUS-SETUP-TOKEN-OK
|
||||
|
||||
@@ -79,7 +79,7 @@ async function main() {
|
||||
const output = runtime.lines.join("\n");
|
||||
assertOutputIncludes(
|
||||
output,
|
||||
"[crestodian] planner: claude-cli/claude-opus-4-7",
|
||||
"[crestodian] planner: claude-cli/claude-opus-4-8",
|
||||
"configless planner did not use Claude CLI fallback",
|
||||
);
|
||||
assertOutputIncludes(
|
||||
|
||||
@@ -116,6 +116,7 @@ function makeAnthropicTransportModel(
|
||||
baseUrl?: string;
|
||||
reasoning?: boolean;
|
||||
maxTokens?: number;
|
||||
thinkingLevelMap?: AnthropicMessagesModel["thinkingLevelMap"];
|
||||
headers?: Record<string, string>;
|
||||
requestTransport?: RequestTransportConfig;
|
||||
} = {},
|
||||
@@ -132,6 +133,7 @@ function makeAnthropicTransportModel(
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 200000,
|
||||
maxTokens: params.maxTokens ?? 8192,
|
||||
...(params.thinkingLevelMap ? { thinkingLevelMap: params.thinkingLevelMap } : {}),
|
||||
...(params.headers ? { headers: params.headers } : {}),
|
||||
} satisfies AnthropicMessagesModel,
|
||||
params.requestTransport ?? {
|
||||
@@ -1867,10 +1869,10 @@ describe("anthropic transport stream", () => {
|
||||
expect(payload.output_config).toEqual({ effort: "max" });
|
||||
});
|
||||
|
||||
it("maps xhigh thinking effort for Claude Opus 4.7 transport runs", async () => {
|
||||
it("maps xhigh thinking effort for Claude Opus 4.8 transport runs", async () => {
|
||||
const model = makeAnthropicTransportModel({
|
||||
id: "claude-opus-4-7",
|
||||
name: "Claude Opus 4.7",
|
||||
id: "claude-opus-4-8",
|
||||
name: "Claude Opus 4.8",
|
||||
maxTokens: 8192,
|
||||
});
|
||||
|
||||
@@ -1889,4 +1891,51 @@ describe("anthropic transport stream", () => {
|
||||
expect(payload.thinking).toEqual({ type: "adaptive" });
|
||||
expect(payload.output_config).toEqual({ effort: "xhigh" });
|
||||
});
|
||||
|
||||
it("preserves max thinking effort for Claude Opus 4.8 transport runs", async () => {
|
||||
const model = makeAnthropicTransportModel({
|
||||
id: "claude-opus-4-8",
|
||||
name: "Claude Opus 4.8",
|
||||
maxTokens: 8192,
|
||||
thinkingLevelMap: { xhigh: "xhigh", max: "max" },
|
||||
});
|
||||
|
||||
await runTransportStream(
|
||||
model,
|
||||
{
|
||||
messages: [{ role: "user", content: "Think as much as needed." }],
|
||||
} as AnthropicStreamContext,
|
||||
{
|
||||
apiKey: "sk-ant-api",
|
||||
reasoning: "max",
|
||||
} as AnthropicStreamOptions,
|
||||
);
|
||||
|
||||
const payload = latestAnthropicRequest().payload;
|
||||
expect(payload.thinking).toEqual({ type: "adaptive" });
|
||||
expect(payload.output_config).toEqual({ effort: "max" });
|
||||
});
|
||||
|
||||
it("clamps max thinking effort for Claude models without native max support", async () => {
|
||||
const model = makeAnthropicTransportModel({
|
||||
id: "claude-sonnet-4-6",
|
||||
name: "Claude Sonnet 4.6",
|
||||
maxTokens: 8192,
|
||||
});
|
||||
|
||||
await runTransportStream(
|
||||
model,
|
||||
{
|
||||
messages: [{ role: "user", content: "Think as much as supported." }],
|
||||
} as AnthropicStreamContext,
|
||||
{
|
||||
apiKey: "sk-ant-api",
|
||||
reasoning: "max",
|
||||
} as AnthropicStreamOptions,
|
||||
);
|
||||
|
||||
const payload = latestAnthropicRequest().payload;
|
||||
expect(payload.thinking).toEqual({ type: "adaptive" });
|
||||
expect(payload.output_config).toEqual({ effort: "high" });
|
||||
});
|
||||
});
|
||||
|
||||
@@ -113,8 +113,13 @@ type MutableAssistantOutput = {
|
||||
|
||||
const EMPTY_ANTHROPIC_MESSAGES_FALLBACK_TEXT = ".";
|
||||
|
||||
function isClaudeOpus47Model(modelId: string): boolean {
|
||||
return modelId.includes("opus-4-7") || modelId.includes("opus-4.7");
|
||||
function isClaudeOpus47OrNewerModel(modelId: string): boolean {
|
||||
return (
|
||||
modelId.includes("opus-4-8") ||
|
||||
modelId.includes("opus-4.8") ||
|
||||
modelId.includes("opus-4-7") ||
|
||||
modelId.includes("opus-4.7")
|
||||
);
|
||||
}
|
||||
|
||||
function isClaudeOpus46Model(modelId: string): boolean {
|
||||
@@ -123,7 +128,7 @@ function isClaudeOpus46Model(modelId: string): boolean {
|
||||
|
||||
function supportsAdaptiveThinking(modelId: string): boolean {
|
||||
return (
|
||||
isClaudeOpus47Model(modelId) ||
|
||||
isClaudeOpus47OrNewerModel(modelId) ||
|
||||
isClaudeOpus46Model(modelId) ||
|
||||
modelId.includes("sonnet-4-6") ||
|
||||
modelId.includes("sonnet-4.6")
|
||||
@@ -138,17 +143,19 @@ function mapThinkingLevelToEffort(level: ThinkingLevel, modelId: string): Anthro
|
||||
case "medium":
|
||||
return "medium";
|
||||
case "xhigh":
|
||||
if (isClaudeOpus47Model(modelId)) {
|
||||
if (isClaudeOpus47OrNewerModel(modelId)) {
|
||||
return "xhigh";
|
||||
}
|
||||
return isClaudeOpus46Model(modelId) ? "max" : "high";
|
||||
case "max":
|
||||
return isClaudeOpus47OrNewerModel(modelId) ? "max" : "high";
|
||||
default:
|
||||
return "high";
|
||||
}
|
||||
}
|
||||
|
||||
function clampReasoningLevel(level: ThinkingLevel): "minimal" | "low" | "medium" | "high" {
|
||||
return level === "xhigh" ? "high" : level;
|
||||
return level === "xhigh" || level === "max" ? "high" : level;
|
||||
}
|
||||
|
||||
function resolvePositiveAnthropicMaxTokens(value: unknown): number | undefined {
|
||||
|
||||
@@ -1849,7 +1849,7 @@ describe("shouldSkipLocalCliCredentialEpoch", () => {
|
||||
input: "stdin",
|
||||
sessionMode: "existing",
|
||||
modelAliases: {
|
||||
"claude-opus-4-7": "opus",
|
||||
"claude-opus-4-8": "opus",
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -1873,7 +1873,7 @@ describe("shouldSkipLocalCliCredentialEpoch", () => {
|
||||
workspaceDir: dir,
|
||||
prompt: "latest ask",
|
||||
provider: "claude-cli",
|
||||
model: "claude-opus-4-7",
|
||||
model: "claude-opus-4-8",
|
||||
timeoutMs: 1_000,
|
||||
runId: "run-auto-claude-alias-reseed-history-chars",
|
||||
config: createCliBackendConfig(),
|
||||
|
||||
@@ -89,7 +89,9 @@ const prepareDeps = {
|
||||
};
|
||||
|
||||
const CLAUDE_CLI_CONTEXT_MODEL_ALIASES: Record<string, string> = {
|
||||
opus: "claude-opus-4-7",
|
||||
opus: "claude-opus-4-8",
|
||||
"opus-4.8": "claude-opus-4-8",
|
||||
"opus-4-8": "claude-opus-4-8",
|
||||
"opus-4.7": "claude-opus-4-7",
|
||||
"opus-4-7": "claude-opus-4-7",
|
||||
"opus-4.6": "claude-opus-4-6",
|
||||
|
||||
@@ -68,11 +68,13 @@ describe("applyDiscoveredContextWindows", () => {
|
||||
applyDiscoveredContextWindows({
|
||||
cache,
|
||||
models: [
|
||||
{ id: "claude-cli/claude-opus-4.8-20260514", contextWindow: 200_000 },
|
||||
{ id: "claude-cli/claude-opus-4.7-20260219", contextWindow: 200_000 },
|
||||
{ id: "claude-cli/claude-sonnet-4-6", contextWindow: 200_000 },
|
||||
],
|
||||
});
|
||||
|
||||
expect(cache.get("claude-cli/claude-opus-4.8-20260514")).toBe(ANTHROPIC_CONTEXT_1M_TOKENS);
|
||||
expect(cache.get("claude-cli/claude-opus-4.7-20260219")).toBe(ANTHROPIC_CONTEXT_1M_TOKENS);
|
||||
expect(cache.get("claude-cli/claude-sonnet-4-6")).toBe(ANTHROPIC_CONTEXT_1M_TOKENS);
|
||||
});
|
||||
|
||||
@@ -36,6 +36,8 @@ type ProviderConfigEntry = {
|
||||
type ModelsConfig = { providers?: Record<string, ProviderConfigEntry | undefined> };
|
||||
|
||||
const ANTHROPIC_GA_1M_MODEL_PREFIXES = [
|
||||
"claude-opus-4-8",
|
||||
"claude-opus-4.8",
|
||||
"claude-opus-4-6",
|
||||
"claude-opus-4.6",
|
||||
"claude-opus-4-7",
|
||||
|
||||
8
src/agents/embedded-agent-runner/utils.test.ts
Normal file
8
src/agents/embedded-agent-runner/utils.test.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { mapThinkingLevel } from "./utils.js";
|
||||
|
||||
describe("mapThinkingLevel", () => {
|
||||
it("maps adaptive to the provider-owned high effort default", () => {
|
||||
expect(mapThinkingLevel("adaptive")).toBe("high");
|
||||
});
|
||||
});
|
||||
@@ -8,19 +8,14 @@ export function normalizeContextTokenBudget(value: unknown): number | undefined
|
||||
}
|
||||
|
||||
export function mapThinkingLevel(level?: ThinkLevel): ThinkingLevel {
|
||||
// agent runtime supports "xhigh"; OpenClaw enables it for specific models.
|
||||
// agent runtime supports elevated levels; OpenClaw enables them for specific models.
|
||||
if (!level) {
|
||||
return "off";
|
||||
}
|
||||
if (level === "max") {
|
||||
return "xhigh";
|
||||
}
|
||||
// "adaptive" maps to "medium" at the agent runtime layer. The provider adapter
|
||||
// provider then translates this to `thinking.type: "adaptive"` with
|
||||
// `output_config.effort: "medium"` for models that support it (Opus 4.6,
|
||||
// Sonnet 4.6).
|
||||
// Runtime streams do not expose a distinct adaptive level. Preserve the
|
||||
// provider-owned adaptive default by using Claude's documented high effort.
|
||||
if (level === "adaptive") {
|
||||
return "medium";
|
||||
return "high";
|
||||
}
|
||||
return level;
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ type ModelRef = {
|
||||
};
|
||||
|
||||
const HIGH_SIGNAL_LIVE_MODEL_PRIORITY = [
|
||||
"anthropic/claude-opus-4-8",
|
||||
"anthropic/claude-sonnet-4-6",
|
||||
"anthropic/claude-opus-4-7",
|
||||
"google/gemini-3.1-pro-preview",
|
||||
|
||||
@@ -661,6 +661,7 @@ describe("isPrioritizedHighSignalLiveModelRef", () => {
|
||||
|
||||
it("lists priority refs as provider/id pairs", () => {
|
||||
expect(listPrioritizedHighSignalLiveModelRefs()).toStrictEqual([
|
||||
{ provider: "anthropic", id: "claude-opus-4-8" },
|
||||
{ provider: "anthropic", id: "claude-sonnet-4-6" },
|
||||
{ provider: "anthropic", id: "claude-opus-4-7" },
|
||||
{ provider: "google", id: "gemini-3.1-pro-preview" },
|
||||
@@ -713,6 +714,7 @@ describe("isPrioritizedSmallLiveModelRef", () => {
|
||||
describe("selectHighSignalLiveItems", () => {
|
||||
it("prefers curated Google replacements before fallback provider spread", () => {
|
||||
const items = [
|
||||
{ provider: "anthropic", id: "claude-opus-4-8" },
|
||||
{ provider: "anthropic", id: "claude-sonnet-4-6" },
|
||||
{ provider: "anthropic", id: "claude-opus-4-7" },
|
||||
{ provider: "anthropic", id: "claude-opus-4-6" },
|
||||
@@ -731,10 +733,10 @@ describe("selectHighSignalLiveItems", () => {
|
||||
(item) => item.provider,
|
||||
),
|
||||
).toEqual([
|
||||
{ provider: "anthropic", id: "claude-opus-4-8" },
|
||||
{ provider: "anthropic", id: "claude-sonnet-4-6" },
|
||||
{ provider: "anthropic", id: "claude-opus-4-7" },
|
||||
{ provider: "google", id: "gemini-3.1-pro-preview" },
|
||||
{ provider: "google", id: "gemini-3-flash-preview" },
|
||||
]);
|
||||
});
|
||||
|
||||
|
||||
@@ -147,6 +147,33 @@ const ANTHROPIC_OPUS_47_CATALOG = [
|
||||
},
|
||||
];
|
||||
|
||||
const ANTHROPIC_OPUS_48_CATALOG = [
|
||||
{
|
||||
provider: "anthropic",
|
||||
id: "claude-opus-4-8",
|
||||
name: "Claude Opus 4.8",
|
||||
reasoning: true,
|
||||
},
|
||||
];
|
||||
|
||||
const ANTHROPIC_VERTEX_OPUS_48_CATALOG = [
|
||||
{
|
||||
provider: "anthropic-vertex",
|
||||
id: "claude-opus-4-8",
|
||||
name: "Claude Opus 4.8",
|
||||
reasoning: true,
|
||||
},
|
||||
];
|
||||
|
||||
const CLAUDE_CLI_OPUS_48_CATALOG = [
|
||||
{
|
||||
provider: "claude-cli",
|
||||
id: "claude-opus-4-8",
|
||||
name: "Claude Opus 4.8",
|
||||
reasoning: true,
|
||||
},
|
||||
];
|
||||
|
||||
function resolveAnthropicOpusThinking(cfg: OpenClawConfig) {
|
||||
return resolveThinkingDefault({
|
||||
cfg,
|
||||
@@ -165,6 +192,33 @@ function resolveAnthropicOpus47Thinking(cfg: OpenClawConfig) {
|
||||
});
|
||||
}
|
||||
|
||||
function resolveAnthropicOpus48Thinking(cfg: OpenClawConfig) {
|
||||
return resolveThinkingDefault({
|
||||
cfg,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-8",
|
||||
catalog: ANTHROPIC_OPUS_48_CATALOG,
|
||||
});
|
||||
}
|
||||
|
||||
function resolveAnthropicVertexOpus48Thinking(cfg: OpenClawConfig) {
|
||||
return resolveThinkingDefault({
|
||||
cfg,
|
||||
provider: "anthropic-vertex",
|
||||
model: "claude-opus-4-8",
|
||||
catalog: ANTHROPIC_VERTEX_OPUS_48_CATALOG,
|
||||
});
|
||||
}
|
||||
|
||||
function resolveClaudeCliOpus48Thinking(cfg: OpenClawConfig) {
|
||||
return resolveThinkingDefault({
|
||||
cfg,
|
||||
provider: "claude-cli",
|
||||
model: "claude-opus-4-8",
|
||||
catalog: CLAUDE_CLI_OPUS_48_CATALOG,
|
||||
});
|
||||
}
|
||||
|
||||
function createAgentFallbackConfig(params: {
|
||||
primary?: string;
|
||||
fallbacks?: string[];
|
||||
@@ -2300,6 +2354,42 @@ describe("model-selection", () => {
|
||||
expect(resolveAnthropicOpus47Thinking(cfg)).toBe("off");
|
||||
});
|
||||
|
||||
it("leaves explicitly configured Anthropic Opus 4.8 thinking off by default", () => {
|
||||
const cfg = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "anthropic/claude-opus-4-8" },
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
|
||||
expect(resolveAnthropicOpus48Thinking(cfg)).toBe("off");
|
||||
});
|
||||
|
||||
it("leaves explicitly configured Anthropic Vertex Opus 4.8 thinking off by default", () => {
|
||||
const cfg = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "anthropic-vertex/claude-opus-4-8" },
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
|
||||
expect(resolveAnthropicVertexOpus48Thinking(cfg)).toBe("off");
|
||||
});
|
||||
|
||||
it("leaves explicitly configured Claude CLI Opus 4.8 thinking off by default", () => {
|
||||
const cfg = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "claude-cli/claude-opus-4-8" },
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
|
||||
expect(resolveClaudeCliOpus48Thinking(cfg)).toBe("off");
|
||||
});
|
||||
|
||||
it("uses bundled provider thinking defaults when no explicit config overrides them", () => {
|
||||
const cfg = {} as OpenClawConfig;
|
||||
|
||||
|
||||
@@ -57,8 +57,18 @@ export function resolveThinkingDefault(params: {
|
||||
if (configured) {
|
||||
return configured;
|
||||
}
|
||||
const isClaudeProvider =
|
||||
normalizedProvider === "anthropic" ||
|
||||
normalizedProvider === "anthropic-vertex" ||
|
||||
normalizedProvider === "claude-cli";
|
||||
if (
|
||||
normalizedProvider === "anthropic" &&
|
||||
isClaudeProvider &&
|
||||
(normalizedModel.startsWith("claude-opus-4-8") || normalizedModel.startsWith("claude-opus-4.8"))
|
||||
) {
|
||||
return "off";
|
||||
}
|
||||
if (
|
||||
isClaudeProvider &&
|
||||
(normalizedModel.startsWith("claude-opus-4-7") || normalizedModel.startsWith("claude-opus-4.7"))
|
||||
) {
|
||||
return "off";
|
||||
|
||||
@@ -96,6 +96,7 @@ const ThinkingLevelMapSchema = Type.Object({
|
||||
medium: Type.Optional(ThinkingLevelMapValueSchema),
|
||||
high: Type.Optional(ThinkingLevelMapValueSchema),
|
||||
xhigh: Type.Optional(ThinkingLevelMapValueSchema),
|
||||
max: Type.Optional(ThinkingLevelMapValueSchema),
|
||||
});
|
||||
|
||||
const OpenAICompletionsCompatSchema = Type.Object({
|
||||
|
||||
@@ -11,7 +11,7 @@ import type { ThinkingLevel } from "../runtime/index.js";
|
||||
import { DEFAULT_THINKING_LEVEL } from "./defaults.js";
|
||||
import type { ModelRegistry } from "./model-registry.js";
|
||||
|
||||
const VALID_THINKING_LEVELS = ["off", "minimal", "low", "medium", "high", "xhigh"] as const;
|
||||
const VALID_THINKING_LEVELS = ["off", "minimal", "low", "medium", "high", "xhigh", "max"] as const;
|
||||
|
||||
function isValidThinkingLevel(level: string): level is ThinkingLevel {
|
||||
return VALID_THINKING_LEVELS.includes(level as ThinkingLevel);
|
||||
|
||||
@@ -65,6 +65,26 @@ function createResourceLoaderWithHandlers(
|
||||
}
|
||||
|
||||
describe("createAgentSession tool defaults", () => {
|
||||
it("forwards max thinking budgets from settings to the agent", async () => {
|
||||
const { session } = await createAgentSession({
|
||||
model: testModel,
|
||||
resourceLoader: createEmptyResourceLoader(),
|
||||
sessionManager: SessionManager.inMemory(),
|
||||
settingsManager: SettingsManager.inMemory({
|
||||
thinkingBudgets: {
|
||||
high: 16_384,
|
||||
max: 32_768,
|
||||
},
|
||||
}),
|
||||
modelRegistry: ModelRegistry.inMemory(AuthStorage.inMemory()),
|
||||
});
|
||||
|
||||
expect(session.agent.thinkingBudgets).toEqual({
|
||||
high: 16_384,
|
||||
max: 32_768,
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps custom tools active when only builtin tools are disabled", async () => {
|
||||
const customTool: ToolDefinition = {
|
||||
name: "custom_lookup",
|
||||
|
||||
@@ -47,6 +47,7 @@ export interface ThinkingBudgetsSettings {
|
||||
low?: number;
|
||||
medium?: number;
|
||||
high?: number;
|
||||
max?: number;
|
||||
}
|
||||
|
||||
export interface MarkdownSettings {
|
||||
@@ -78,7 +79,7 @@ export interface Settings {
|
||||
lastChangelogVersion?: string;
|
||||
defaultProvider?: string;
|
||||
defaultModel?: string;
|
||||
defaultThinkingLevel?: "off" | "minimal" | "low" | "medium" | "high" | "xhigh";
|
||||
defaultThinkingLevel?: "off" | "minimal" | "low" | "medium" | "high" | "xhigh" | "max";
|
||||
transport?: TransportSetting; // default: "auto"
|
||||
steeringMode?: "all" | "one-at-a-time";
|
||||
followUpMode?: "all" | "one-at-a-time";
|
||||
@@ -668,11 +669,21 @@ export class SettingsManager {
|
||||
this.save();
|
||||
}
|
||||
|
||||
getDefaultThinkingLevel(): "off" | "minimal" | "low" | "medium" | "high" | "xhigh" | undefined {
|
||||
getDefaultThinkingLevel():
|
||||
| "off"
|
||||
| "minimal"
|
||||
| "low"
|
||||
| "medium"
|
||||
| "high"
|
||||
| "xhigh"
|
||||
| "max"
|
||||
| undefined {
|
||||
return this.settings.defaultThinkingLevel;
|
||||
}
|
||||
|
||||
setDefaultThinkingLevel(level: "off" | "minimal" | "low" | "medium" | "high" | "xhigh"): void {
|
||||
setDefaultThinkingLevel(
|
||||
level: "off" | "minimal" | "low" | "medium" | "high" | "xhigh" | "max",
|
||||
): void {
|
||||
this.globalSettings.defaultThinkingLevel = level;
|
||||
this.markModified("defaultThinkingLevel");
|
||||
this.save();
|
||||
|
||||
@@ -3,7 +3,7 @@ import type { OpenClawConfig } from "../../config/config.js";
|
||||
import { resolvePdfModelConfigForTool } from "./pdf-tool.model-config.js";
|
||||
import { resetPdfToolAuthEnv } from "./pdf-tool.test-support.js";
|
||||
|
||||
const ANTHROPIC_PDF_MODEL = "anthropic/claude-opus-4-7";
|
||||
const ANTHROPIC_PDF_MODEL = "anthropic/claude-opus-4-8";
|
||||
const TEST_AGENT_DIR = "/tmp/openclaw-pdf-model-config";
|
||||
|
||||
vi.mock("./model-config.helpers.js", () => ({
|
||||
|
||||
@@ -27,7 +27,7 @@ let defaultWarnState: WarnState = { warned: false };
|
||||
|
||||
const DEFAULT_MODEL_ALIASES: Readonly<Record<string, string>> = {
|
||||
// Anthropic (shared model runtime catalog uses "latest" ids without date suffix)
|
||||
opus: "anthropic/claude-opus-4-7",
|
||||
opus: "anthropic/claude-opus-4-8",
|
||||
sonnet: "anthropic/claude-sonnet-4-6",
|
||||
|
||||
// OpenAI
|
||||
|
||||
@@ -75,7 +75,7 @@ describe("applyModelDefaults", () => {
|
||||
agents: {
|
||||
defaults: {
|
||||
models: {
|
||||
"anthropic/claude-opus-4-7": {},
|
||||
"anthropic/claude-opus-4-8": {},
|
||||
"openai/gpt-5.4": {},
|
||||
},
|
||||
},
|
||||
@@ -83,7 +83,7 @@ describe("applyModelDefaults", () => {
|
||||
} satisfies OpenClawConfig;
|
||||
const next = applyModelDefaults(cfg);
|
||||
|
||||
expect(next.agents?.defaults?.models?.["anthropic/claude-opus-4-7"]?.alias).toBe("opus");
|
||||
expect(next.agents?.defaults?.models?.["anthropic/claude-opus-4-8"]?.alias).toBe("opus");
|
||||
expect(next.agents?.defaults?.models?.["openai/gpt-5.4"]?.alias).toBe("gpt");
|
||||
});
|
||||
|
||||
@@ -92,7 +92,7 @@ describe("applyModelDefaults", () => {
|
||||
agents: {
|
||||
defaults: {
|
||||
models: {
|
||||
"anthropic/claude-opus-4-7": { alias: "Opus" },
|
||||
"anthropic/claude-opus-4-8": { alias: "Opus" },
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -100,7 +100,7 @@ describe("applyModelDefaults", () => {
|
||||
|
||||
const next = applyModelDefaults(cfg);
|
||||
|
||||
expect(next.agents?.defaults?.models?.["anthropic/claude-opus-4-7"]?.alias).toBe("Opus");
|
||||
expect(next.agents?.defaults?.models?.["anthropic/claude-opus-4-8"]?.alias).toBe("Opus");
|
||||
});
|
||||
|
||||
it("respects explicit empty alias disables", () => {
|
||||
|
||||
@@ -2,6 +2,7 @@ import type {
|
||||
AnthropicMessagesCompat,
|
||||
OpenAICompletionsCompat,
|
||||
OpenAIResponsesCompat,
|
||||
ThinkingLevelMap,
|
||||
} from "../llm/types.js";
|
||||
import type { AgentRuntimePolicyConfig } from "./types.agents-shared.js";
|
||||
import type { ConfiguredModelProviderRequest } from "./types.provider-request.js";
|
||||
@@ -152,6 +153,8 @@ export type ModelDefinitionConfig = {
|
||||
*/
|
||||
contextTokens?: number;
|
||||
maxTokens: number;
|
||||
/** Maps OpenClaw thinking levels to provider/model-specific values. */
|
||||
thinkingLevelMap?: ThinkingLevelMap;
|
||||
/** Provider-specific request/runtime parameters passed through to provider plugins. */
|
||||
params?: Record<string, unknown>;
|
||||
/** Optional agent execution runtime override for this provider/model pair. */
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import type { OpenClawConfig } from "../config/types.openclaw.js";
|
||||
import type { CrestodianOverview } from "./overview.js";
|
||||
|
||||
const CRESTODIAN_CLAUDE_CLI_MODEL = "claude-opus-4-7";
|
||||
const CRESTODIAN_CLAUDE_CLI_MODEL = "claude-opus-4-8";
|
||||
const CRESTODIAN_CODEX_MODEL = "gpt-5.5";
|
||||
|
||||
type CrestodianLocalPlannerBackend = {
|
||||
|
||||
@@ -141,12 +141,12 @@ describe("Crestodian assistant", () => {
|
||||
}
|
||||
expect(result.command).toBe("status");
|
||||
expect(result.reply).toBe("Checking the shell.");
|
||||
expect(result.modelLabel).toBe("claude-cli/claude-opus-4-7");
|
||||
expect(result.modelLabel).toBe("claude-cli/claude-opus-4-8");
|
||||
|
||||
expect(runCliAgent).toHaveBeenCalledTimes(1);
|
||||
const firstCliCall = firstMockArg(runCliAgent);
|
||||
expect(firstCliCall.provider).toBe("claude-cli");
|
||||
expect(firstCliCall.model).toBe("claude-opus-4-7");
|
||||
expect(firstCliCall.model).toBe("claude-opus-4-8");
|
||||
expect(firstCliCall.cleanupCliLiveSessionOnRunEnd).toBe(true);
|
||||
const firstCliConfig = requireRecord(firstCliCall.config);
|
||||
const firstCliAgents = requireRecord(firstCliConfig.agents);
|
||||
|
||||
@@ -110,8 +110,8 @@ const PLUGIN_UNINSTALL_RE =
|
||||
/^(?:(?:plugins?)\s+(?:uninstall|remove)|(?:uninstall|remove)\s+plugins?)\s+(?<pluginId>[A-Za-z0-9_.@/-]+)$/i;
|
||||
|
||||
const OPENAI_API_DEFAULT_MODEL_REF = `${DEFAULT_PROVIDER}/${DEFAULT_MODEL}`;
|
||||
const ANTHROPIC_API_DEFAULT_MODEL_REF = "anthropic/claude-opus-4-7";
|
||||
const CLAUDE_CLI_DEFAULT_MODEL_REF = "claude-cli/claude-opus-4-7";
|
||||
const ANTHROPIC_API_DEFAULT_MODEL_REF = "anthropic/claude-opus-4-8";
|
||||
const CLAUDE_CLI_DEFAULT_MODEL_REF = "claude-cli/claude-opus-4-8";
|
||||
const CODEX_APP_SERVER_DEFAULT_MODEL_REF = "openai/gpt-5.5";
|
||||
|
||||
export function parseCrestodianOperation(input: string): CrestodianOperation {
|
||||
|
||||
@@ -17,6 +17,7 @@ const EXTENDED_THINKING_LEVELS: ModelThinkingLevel[] = [
|
||||
"medium",
|
||||
"high",
|
||||
"xhigh",
|
||||
"max",
|
||||
];
|
||||
|
||||
export function getSupportedThinkingLevels<TApi extends Api>(
|
||||
@@ -31,7 +32,7 @@ export function getSupportedThinkingLevels<TApi extends Api>(
|
||||
if (mapped === null) {
|
||||
return false;
|
||||
}
|
||||
if (level === "xhigh") {
|
||||
if (level === "xhigh" || level === "max") {
|
||||
return mapped !== undefined;
|
||||
}
|
||||
return true;
|
||||
|
||||
@@ -19,7 +19,7 @@ vi.mock("@anthropic-ai/sdk", () => ({
|
||||
},
|
||||
}));
|
||||
|
||||
import { streamAnthropic } from "./anthropic.js";
|
||||
import { streamAnthropic, streamSimpleAnthropic } from "./anthropic.js";
|
||||
|
||||
function createSseResponse(events: Record<string, unknown>[] = []): Response {
|
||||
const body = events.map((event) => `data: ${JSON.stringify(event)}\n\n`).join("");
|
||||
@@ -167,4 +167,30 @@ describe("Anthropic provider", () => {
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("clamps max adaptive effort when the Claude model does not advertise it", async () => {
|
||||
let capturedPayload: unknown;
|
||||
const stream = streamSimpleAnthropic(
|
||||
makeAnthropicModel({
|
||||
id: "claude-sonnet-4-6",
|
||||
name: "Claude Sonnet 4.6",
|
||||
}),
|
||||
{
|
||||
messages: [{ role: "user", content: "hello", timestamp: 0 }],
|
||||
},
|
||||
{
|
||||
apiKey: "sk-ant-provider",
|
||||
reasoning: "max",
|
||||
onPayload: (payload) => {
|
||||
capturedPayload = payload;
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
await stream.result();
|
||||
|
||||
expect((capturedPayload as { output_config?: unknown }).output_config).toEqual({
|
||||
effort: "high",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -7,7 +7,7 @@ import type {
|
||||
RawMessageStreamEvent,
|
||||
} from "@anthropic-ai/sdk/resources/messages.js";
|
||||
import { getEnvApiKey } from "../env-api-keys.js";
|
||||
import { calculateCost } from "../model-utils.js";
|
||||
import { calculateCost, clampThinkingLevel } from "../model-utils.js";
|
||||
import type {
|
||||
AnthropicMessagesCompat,
|
||||
Api,
|
||||
@@ -183,20 +183,20 @@ function getAnthropicCompat(model: Model<"anthropic-messages">): Required<Anthro
|
||||
export interface AnthropicOptions extends StreamOptions {
|
||||
/**
|
||||
* Enable extended thinking.
|
||||
* For Opus 4.6 and Sonnet 4.6: uses adaptive thinking (model decides when/how much to think).
|
||||
* For Opus 4.6+ and Sonnet 4.6: uses adaptive thinking (model decides when/how much to think).
|
||||
* For older models: uses budget-based thinking with thinkingBudgetTokens.
|
||||
*/
|
||||
thinkingEnabled?: boolean;
|
||||
/**
|
||||
* Token budget for extended thinking (older models only).
|
||||
* Ignored for Opus 4.6 and Sonnet 4.6, which use adaptive thinking.
|
||||
* Ignored for Opus 4.6+ and Sonnet 4.6, which use adaptive thinking.
|
||||
*/
|
||||
thinkingBudgetTokens?: number;
|
||||
/**
|
||||
* Effort level for adaptive thinking (Opus 4.6+ and Sonnet 4.6).
|
||||
* Controls how much thinking Claude allocates:
|
||||
* - "max": Always thinks with no constraints (Opus 4.6 only)
|
||||
* - "xhigh": Highest reasoning level (Opus 4.7)
|
||||
* - "xhigh": Highest reasoning level (Opus 4.7+)
|
||||
* - "high": Always thinks, deep reasoning (default)
|
||||
* - "medium": Moderate thinking, may skip for simple queries
|
||||
* - "low": Minimal thinking, skips for simple tasks
|
||||
@@ -210,7 +210,7 @@ export interface AnthropicOptions extends StreamOptions {
|
||||
* signature still travels back for multi-turn continuity. Use for faster
|
||||
* time-to-first-text-token when your UI does not surface thinking.
|
||||
*
|
||||
* Note: Anthropic's API default for Claude Opus 4.7 and Claude Mythos Preview
|
||||
* Note: Anthropic's API default for Claude Opus 4.7+ and Claude Mythos Preview
|
||||
* is "omitted". We default to "summarized" here to keep behavior consistent
|
||||
* with older Claude 4 models. Set this explicitly to "omitted" to opt in.
|
||||
*/
|
||||
@@ -728,6 +728,8 @@ function supportsAdaptiveThinking(modelId: string): boolean {
|
||||
return (
|
||||
modelId.includes("opus-4-6") ||
|
||||
modelId.includes("opus-4.6") ||
|
||||
modelId.includes("opus-4-8") ||
|
||||
modelId.includes("opus-4.8") ||
|
||||
modelId.includes("opus-4-7") ||
|
||||
modelId.includes("opus-4.7") ||
|
||||
modelId.includes("sonnet-4-6") ||
|
||||
@@ -737,18 +739,19 @@ function supportsAdaptiveThinking(modelId: string): boolean {
|
||||
|
||||
/**
|
||||
* Map ThinkingLevel to Anthropic effort levels for adaptive thinking.
|
||||
* Note: effort "max" is only valid on Opus 4.6, while Opus 4.7 supports "xhigh".
|
||||
* Model metadata owns the provider-specific extended effort mapping.
|
||||
*/
|
||||
function mapThinkingLevelToEffort(
|
||||
model: Model<"anthropic-messages">,
|
||||
level: SimpleStreamOptions["reasoning"],
|
||||
): AnthropicEffort {
|
||||
const mapped = level ? model.thinkingLevelMap?.[level] : undefined;
|
||||
const clampedLevel = level ? clampThinkingLevel(model, level) : undefined;
|
||||
const mapped = clampedLevel ? model.thinkingLevelMap?.[clampedLevel] : undefined;
|
||||
if (typeof mapped === "string") {
|
||||
return mapped as AnthropicEffort;
|
||||
}
|
||||
|
||||
switch (level) {
|
||||
switch (clampedLevel) {
|
||||
case "minimal":
|
||||
case "low":
|
||||
return "low";
|
||||
@@ -756,6 +759,8 @@ function mapThinkingLevelToEffort(
|
||||
return "medium";
|
||||
case "high":
|
||||
return "high";
|
||||
case "max":
|
||||
return "max";
|
||||
default:
|
||||
return "high";
|
||||
}
|
||||
@@ -982,7 +987,7 @@ function buildParams(
|
||||
// budget-based (older models), or explicitly disabled.
|
||||
if (model.reasoning) {
|
||||
if (options?.thinkingEnabled) {
|
||||
// Default to "summarized" so Opus 4.7 and Mythos Preview behave like
|
||||
// Default to "summarized" so Opus 4.7+ and Mythos Preview behave like
|
||||
// older Claude 4 models (whose API default is also "summarized").
|
||||
const display: AnthropicThinkingDisplay = options.thinkingDisplay ?? "summarized";
|
||||
if (supportsAdaptiveThinking(model.id)) {
|
||||
|
||||
@@ -186,7 +186,12 @@ export const streamSimpleAzureOpenAIResponses: StreamFunction<
|
||||
const clampedReasoning = options?.reasoning
|
||||
? clampThinkingLevel(model, options.reasoning)
|
||||
: undefined;
|
||||
const reasoningEffort = clampedReasoning === "off" ? undefined : clampedReasoning;
|
||||
const reasoningEffort =
|
||||
clampedReasoning === "off"
|
||||
? undefined
|
||||
: clampedReasoning === "max"
|
||||
? "xhigh"
|
||||
: clampedReasoning;
|
||||
|
||||
return streamAzureOpenAIResponses(model, context, {
|
||||
...base,
|
||||
|
||||
@@ -132,7 +132,9 @@ export const streamSimpleGoogleVertex: StreamFunction<"google-vertex", SimpleStr
|
||||
}
|
||||
|
||||
const clampedReasoning = clampThinkingLevel(model, options.reasoning);
|
||||
const effort = (clampedReasoning === "off" ? "high" : clampedReasoning) as ClampedThinkingLevel;
|
||||
const effort = (
|
||||
clampedReasoning === "off" || clampedReasoning === "max" ? "high" : clampedReasoning
|
||||
) as ClampedThinkingLevel;
|
||||
const geminiModel = model as unknown as Model<"google-generative-ai">;
|
||||
|
||||
if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) {
|
||||
@@ -312,7 +314,7 @@ function buildParams(
|
||||
return params;
|
||||
}
|
||||
|
||||
type ClampedThinkingLevel = Exclude<AgentThinkingLevel, "xhigh">;
|
||||
type ClampedThinkingLevel = Exclude<AgentThinkingLevel, "xhigh" | "max">;
|
||||
|
||||
function isGemini3ProModel(model: Model<"google-generative-ai">): boolean {
|
||||
return /gemini-3(?:\.\d+)?-pro/.test(model.id.toLowerCase());
|
||||
|
||||
@@ -119,7 +119,9 @@ export const streamSimpleGoogle: StreamFunction<"google-generative-ai", SimpleSt
|
||||
}
|
||||
|
||||
const clampedReasoning = clampThinkingLevel(model, options.reasoning);
|
||||
const effort = (clampedReasoning === "off" ? "high" : clampedReasoning) as ClampedThinkingLevel;
|
||||
const effort = (
|
||||
clampedReasoning === "off" || clampedReasoning === "max" ? "high" : clampedReasoning
|
||||
) as ClampedThinkingLevel;
|
||||
const googleModel = model;
|
||||
|
||||
if (
|
||||
@@ -225,7 +227,7 @@ function buildParams(
|
||||
return params;
|
||||
}
|
||||
|
||||
type ClampedThinkingLevel = Exclude<ThinkingLevel, "xhigh">;
|
||||
type ClampedThinkingLevel = Exclude<ThinkingLevel, "xhigh" | "max">;
|
||||
|
||||
function isGemma4Model(model: Model<"google-generative-ai">): boolean {
|
||||
return /gemma-?4/.test(model.id.toLowerCase());
|
||||
|
||||
@@ -452,7 +452,12 @@ export const streamSimpleOpenAICodexResponses: StreamFunction<
|
||||
const clampedReasoning = options?.reasoning
|
||||
? clampThinkingLevel(model, options.reasoning)
|
||||
: undefined;
|
||||
const reasoningEffort = clampedReasoning === "off" ? undefined : clampedReasoning;
|
||||
const reasoningEffort =
|
||||
clampedReasoning === "off"
|
||||
? undefined
|
||||
: clampedReasoning === "max"
|
||||
? "xhigh"
|
||||
: clampedReasoning;
|
||||
|
||||
return streamOpenAICodexResponses(model, context, {
|
||||
...base,
|
||||
|
||||
@@ -467,7 +467,12 @@ export const streamSimpleOpenAICompletions: StreamFunction<
|
||||
const clampedReasoning = options?.reasoning
|
||||
? clampThinkingLevel(model, options.reasoning)
|
||||
: undefined;
|
||||
const reasoningEffort = clampedReasoning === "off" ? undefined : clampedReasoning;
|
||||
const reasoningEffort =
|
||||
clampedReasoning === "off"
|
||||
? undefined
|
||||
: clampedReasoning === "max"
|
||||
? "xhigh"
|
||||
: clampedReasoning;
|
||||
const toolChoice = (options as OpenAICompletionsOptions | undefined)?.toolChoice;
|
||||
|
||||
return streamOpenAICompletions(model, context, {
|
||||
|
||||
@@ -179,7 +179,12 @@ export const streamSimpleOpenAIResponses: StreamFunction<
|
||||
const clampedReasoning = options?.reasoning
|
||||
? clampThinkingLevel(model, options.reasoning)
|
||||
: undefined;
|
||||
const reasoningEffort = clampedReasoning === "off" ? undefined : clampedReasoning;
|
||||
const reasoningEffort =
|
||||
clampedReasoning === "off"
|
||||
? undefined
|
||||
: clampedReasoning === "max"
|
||||
? "xhigh"
|
||||
: clampedReasoning;
|
||||
|
||||
return streamOpenAIResponses(model, context, {
|
||||
...base,
|
||||
|
||||
@@ -49,6 +49,7 @@ export function adjustMaxTokensForThinking(
|
||||
low: 2048,
|
||||
medium: 8192,
|
||||
high: 16384,
|
||||
max: 32768,
|
||||
};
|
||||
const budgets = { ...defaultBudgets, ...customBudgets };
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ export type KnownImagesProvider = "openrouter";
|
||||
|
||||
export type ImagesProvider = string;
|
||||
|
||||
export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
|
||||
export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh" | "max";
|
||||
export type ModelThinkingLevel = "off" | ThinkingLevel;
|
||||
export type ThinkingLevelMap = Partial<Record<ModelThinkingLevel, string | null>>;
|
||||
|
||||
@@ -33,6 +33,7 @@ export interface ThinkingBudgets {
|
||||
low?: number;
|
||||
medium?: number;
|
||||
high?: number;
|
||||
max?: number;
|
||||
}
|
||||
|
||||
// Base options all providers share
|
||||
|
||||
@@ -279,6 +279,14 @@ describe("buildProviderReplayFamilyHooks", () => {
|
||||
});
|
||||
|
||||
describe("resolveClaudeThinkingProfile", () => {
|
||||
it("leaves Opus 4.8 thinking off by default with xhigh/adaptive/max options", () => {
|
||||
const profile = resolveClaudeThinkingProfile("claude-opus-4-8");
|
||||
expectFields(profile, {
|
||||
defaultLevel: "off",
|
||||
});
|
||||
expectLevelIdsInclude(profile, ["xhigh", "adaptive", "max"]);
|
||||
});
|
||||
|
||||
it("exposes Opus 4.7 thinking levels for direct and proxied Claude providers", () => {
|
||||
const directProfile = resolveClaudeThinkingProfile("claude-opus-4-7");
|
||||
expectFields(directProfile, {
|
||||
|
||||
@@ -94,6 +94,7 @@ export {
|
||||
} from "../plugins/provider-model-helpers.js";
|
||||
import { normalizeOptionalLowercaseString } from "../shared/string-coerce.js";
|
||||
|
||||
const CLAUDE_OPUS_48_MODEL_PREFIXES = ["claude-opus-4-8", "claude-opus-4.8"] as const;
|
||||
const CLAUDE_OPUS_47_MODEL_PREFIXES = ["claude-opus-4-7", "claude-opus-4.7"] as const;
|
||||
const CLAUDE_ADAPTIVE_THINKING_DEFAULT_MODEL_PREFIXES = [
|
||||
"claude-opus-4-6",
|
||||
@@ -135,6 +136,10 @@ function isClaudeOpus47ModelId(modelId: string): boolean {
|
||||
return matchesClaudeModelPrefix(modelId, CLAUDE_OPUS_47_MODEL_PREFIXES);
|
||||
}
|
||||
|
||||
function isClaudeOpus48ModelId(modelId: string): boolean {
|
||||
return matchesClaudeModelPrefix(modelId, CLAUDE_OPUS_48_MODEL_PREFIXES);
|
||||
}
|
||||
|
||||
/** @deprecated Anthropic provider-owned model helper; do not use from third-party plugins. */
|
||||
export function isClaudeAdaptiveThinkingDefaultModelId(modelId: string): boolean {
|
||||
return matchesClaudeModelPrefix(modelId, CLAUDE_ADAPTIVE_THINKING_DEFAULT_MODEL_PREFIXES);
|
||||
@@ -142,6 +147,12 @@ export function isClaudeAdaptiveThinkingDefaultModelId(modelId: string): boolean
|
||||
|
||||
/** @deprecated Anthropic provider-owned model helper; do not use from third-party plugins. */
|
||||
export function resolveClaudeThinkingProfile(modelId: string): ProviderThinkingProfile {
|
||||
if (isClaudeOpus48ModelId(modelId)) {
|
||||
return {
|
||||
levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }],
|
||||
defaultLevel: "off",
|
||||
};
|
||||
}
|
||||
if (isClaudeOpus47ModelId(modelId)) {
|
||||
return {
|
||||
levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }],
|
||||
|
||||
@@ -577,7 +577,7 @@ describe("package artifact reuse", () => {
|
||||
expect(workflow).toContain("suite_id: native-live-src-gateway-profiles-anthropic-opus");
|
||||
expect(workflow).toContain("suite_id: native-live-src-gateway-profiles-anthropic-sonnet-haiku");
|
||||
expect(workflow).toContain("suite_group: native-live-src-gateway-profiles-anthropic");
|
||||
expect(workflow).toContain("OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-7");
|
||||
expect(workflow).toContain("OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-8");
|
||||
expect(workflow).toContain("anthropic/claude-sonnet-4-6,anthropic/claude-haiku-4-5");
|
||||
expect(workflow).toMatch(
|
||||
/suite_id: native-live-src-gateway-profiles-fireworks[\s\S]*?advisory: true/u,
|
||||
|
||||
Reference in New Issue
Block a user