fix(qa-lab): refresh parity model targets

2026-06-06 05:51:15 +08:00 · 2026-05-17 23:12:20 +08:00
parent 019dbcc749
commit 1926982c4c
37 changed files with 187 additions and 186 deletions
--- a/.github/workflows/control-ui-locale-refresh.yml
+++ b/.github/workflows/control-ui-locale-refresh.yml
@@ -138,7 +138,7 @@ jobs:
          OPENAI_API_KEY: ${{ secrets.OPENCLAW_DOCS_I18N_OPENAI_API_KEY || secrets.OPENAI_API_KEY }}
          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
          OPENCLAW_CONTROL_UI_I18N_PROVIDER: ${{ secrets.ANTHROPIC_API_KEY != '' && 'anthropic' || 'openai' }}
-          OPENCLAW_CONTROL_UI_I18N_MODEL: ${{ secrets.ANTHROPIC_API_KEY != '' && 'claude-opus-4-6' || vars.OPENCLAW_CI_OPENAI_MODEL_BARE }}
+          OPENCLAW_CONTROL_UI_I18N_MODEL: ${{ secrets.ANTHROPIC_API_KEY != '' && 'claude-opus-4-7' || vars.OPENCLAW_CI_OPENAI_MODEL_BARE }}
          OPENCLAW_CONTROL_UI_I18N_THINKING: low
          OPENCLAW_CONTROL_UI_I18N_AUTH_OPTIONAL: "1"
          LOCALE: ${{ matrix.locale }}
--- a/.github/workflows/mantis-discord-status-reactions.yml
+++ b/.github/workflows/mantis-discord-status-reactions.yml
@@ -349,8 +349,8 @@ jobs:
              --repo-root "$repo_root" \
              --output-dir "$output_dir" \
              --provider-mode live-frontier \
-              --model openai/gpt-5.4 \
-              --alt-model openai/gpt-5.4 \
+              --model openai/gpt-5.5 \
+              --alt-model openai/gpt-5.5 \
              --fast \
              --credential-source convex \
              --credential-role ci \
--- a/.github/workflows/mantis-slack-desktop-smoke.yml
+++ b/.github/workflows/mantis-slack-desktop-smoke.yml
@@ -281,8 +281,8 @@ jobs:
            --credential-role ci \
            --provider-mode live-frontier \
            --hydrate-mode "$HYDRATE_MODE" \
-            --model openai/gpt-5.4 \
-            --alt-model openai/gpt-5.4 \
+            --model openai/gpt-5.5 \
+            --alt-model openai/gpt-5.5 \
            --fast \
            --scenario "$SCENARIO_ID" \
            "${keep_args[@]}" \
--- a/.github/workflows/mantis-telegram-live.yml
+++ b/.github/workflows/mantis-telegram-live.yml
@@ -386,7 +386,7 @@ jobs:
          output_rel=".artifacts/qa-e2e/mantis/telegram-live"
          root="$candidate_repo/$output_rel"
          echo "output_dir=${root}" >> "$GITHUB_OUTPUT"
-          model="${OPENCLAW_CI_OPENAI_MODEL:-openai/gpt-5.4}"
+          model="${OPENCLAW_CI_OPENAI_MODEL:-openai/gpt-5.5}"

          scenario_args=()
          if [[ -n "${SCENARIO_INPUT// }" ]]; then
--- a/.github/workflows/openclaw-cross-os-release-checks-reusable.yml
+++ b/.github/workflows/openclaw-cross-os-release-checks-reusable.yml
@@ -186,7 +186,7 @@ env:
  PNPM_VERSION: "11.0.8"
  OPENCLAW_REPOSITORY: openclaw/openclaw
  TSX_VERSION: "4.21.0"
-  OPENCLAW_CROSS_OS_OPENAI_MODEL: ${{ inputs.openai_model || vars.OPENCLAW_CROSS_OS_OPENAI_MODEL || 'openai/gpt-5.4' }}
+  OPENCLAW_CROSS_OS_OPENAI_MODEL: ${{ inputs.openai_model || vars.OPENCLAW_CROSS_OS_OPENAI_MODEL || 'openai/gpt-5.5' }}

 jobs:
  prepare:
--- a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml
+++ b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml
@@ -1911,7 +1911,7 @@ jobs:
          - suite_id: native-live-src-gateway-profiles-anthropic-opus
            suite_group: native-live-src-gateway-profiles-anthropic
            label: Native live gateway profiles Anthropic Opus
-            command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=anthropic OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-7,anthropic/claude-opus-4-6 node .release-harness/scripts/test-live-shard.mjs native-live-src-gateway-profiles
+            command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=anthropic OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-7 node .release-harness/scripts/test-live-shard.mjs native-live-src-gateway-profiles
            timeout_minutes: 30
            profile_env_only: false
            advisory: true
--- a/.github/workflows/openclaw-performance.yml
+++ b/.github/workflows/openclaw-performance.yml
@@ -30,8 +30,8 @@ on:
        required: false
        default: false
        type: boolean
-      live_gpt54:
-        description: Run the live OpenAI GPT 5.4 agent-turn lane
+      live_openai_candidate:
+        description: Run the live OpenAI GPT 5.5 agent-turn lane
        required: false
        default: false
        type: boolean
@@ -57,7 +57,7 @@ env:
  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
  OCM_VERSION: v0.2.15
  KOVA_REPOSITORY: openclaw/Kova
-  PERFORMANCE_MODEL_ID: gpt-5.4
+  PERFORMANCE_MODEL_ID: gpt-5.5

 jobs:
  kova:
@@ -82,8 +82,8 @@ jobs:
            deep_profile: "true"
            live: "false"
            include_filters: "scenario:fresh-install scenario:gateway-performance scenario:agent-cold-warm-message"
-          - lane: live-gpt54
-            title: Kova live OpenAI GPT 5.4 agent turn
+          - lane: live-openai-candidate
+            title: Kova live OpenAI GPT 5.5 agent turn
            auth: live
            repeat: "1"
            deep_profile: "false"
@@ -119,9 +119,9 @@ jobs:
            run_lane=false
            reason="deep_profile input is false"
          fi
-          if [[ "$LANE_ID" == "live-gpt54" && "${{ github.event_name }}" != "schedule" && "${{ inputs.live_gpt54 || 'false' }}" != "true" ]]; then
+          if [[ "$LANE_ID" == "live-openai-candidate" && "${{ github.event_name }}" != "schedule" && "${{ inputs.live_openai_candidate || 'false' }}" != "true" ]]; then
            run_lane=false
-            reason="live_gpt54 input is false"
+            reason="live_openai_candidate input is false"
          fi
          echo "run=$run_lane" >> "$GITHUB_OUTPUT"
          if [[ "$run_lane" != "true" ]]; then
@@ -200,7 +200,7 @@ jobs:
          chmod 0755 "$HOME/.local/bin/kova"
          echo "$HOME/.local/bin" >> "$GITHUB_PATH"

-      - name: Pin Kova OpenAI model to GPT 5.4
+      - name: Pin Kova OpenAI model to GPT 5.5
        if: steps.lane.outputs.run == 'true'
        shell: bash
        run: |
@@ -244,7 +244,7 @@ jobs:
        run: |
          set -euo pipefail
          if [[ -z "${OPENAI_API_KEY:-}" ]]; then
-            echo "OPENAI_API_KEY is not configured; live GPT 5.4 lane will be skipped." >> "$GITHUB_STEP_SUMMARY"
+            echo "OPENAI_API_KEY is not configured; live GPT 5.5 lane will be skipped." >> "$GITHUB_STEP_SUMMARY"
            exit 0
          fi
          kova setup --ci --json
--- a/.github/workflows/openclaw-release-checks.yml
+++ b/.github/workflows/openclaw-release-checks.yml
@@ -542,7 +542,7 @@ jobs:
      candidate_file_name: openclaw-current.tgz
      candidate_version: ${{ needs.prepare_release_package.outputs.package_version }}
      candidate_source_sha: ${{ needs.prepare_release_package.outputs.source_sha }}
-      openai_model: openai/gpt-5.4
+      openai_model: openai/gpt-5.5
      ubuntu_runner: ubuntu-24.04
      windows_runner: windows-2025
      macos_runner: macos-26
@@ -724,9 +724,9 @@ jobs:
      matrix:
        include:
          - lane: candidate
-            output_dir: gpt54
+            output_dir: openai-candidate
          - lane: baseline
-            output_dir: opus46
+            output_dir: anthropic-baseline
    env:
      QA_PARITY_CONCURRENCY: "1"
      OPENCLAW_QA_TRANSPORT_READY_TIMEOUT_MS: "180000"
@@ -772,7 +772,7 @@ jobs:
              ;;
            baseline)
              model="anthropic/claude-opus-4-7"
-              alt_model="anthropic/claude-sonnet-4-7"
+              alt_model="anthropic/claude-sonnet-4-6"
              ;;
            *)
              echo "Unknown QA parity lane: ${QA_PARITY_LANE}" >&2
@@ -841,8 +841,8 @@ jobs:
        run: |
          pnpm openclaw qa parity-report \
            --repo-root . \
-            --candidate-summary .artifacts/qa-e2e/gpt54/qa-suite-summary.json \
-            --baseline-summary .artifacts/qa-e2e/opus46/qa-suite-summary.json \
+            --candidate-summary .artifacts/qa-e2e/openai-candidate/qa-suite-summary.json \
+            --baseline-summary .artifacts/qa-e2e/anthropic-baseline/qa-suite-summary.json \
            --candidate-label "${OPENCLAW_CI_OPENAI_MODEL}" \
            --baseline-label anthropic/claude-opus-4-7 \
            --output-dir .artifacts/qa-e2e/parity
--- a/.github/workflows/qa-live-transports-convex.yml
+++ b/.github/workflows/qa-live-transports-convex.yml
@@ -198,7 +198,7 @@ jobs:
            --concurrency "${QA_PARITY_CONCURRENCY}" \
            --model "${OPENCLAW_CI_OPENAI_MODEL}" \
            --alt-model openai/gpt-5.5-alt \
-            --output-dir .artifacts/qa-e2e/gpt54
+            --output-dir .artifacts/qa-e2e/openai-candidate

      - name: Run Opus 4.7 lane
        run: |
@@ -207,15 +207,15 @@ jobs:
            --parity-pack agentic \
            --concurrency "${QA_PARITY_CONCURRENCY}" \
            --model anthropic/claude-opus-4-7 \
-            --alt-model anthropic/claude-sonnet-4-7 \
-            --output-dir .artifacts/qa-e2e/opus46
+            --alt-model anthropic/claude-sonnet-4-6 \
+            --output-dir .artifacts/qa-e2e/anthropic-baseline

      - name: Generate parity report
        run: |
          pnpm openclaw qa parity-report \
            --repo-root . \
-            --candidate-summary .artifacts/qa-e2e/gpt54/qa-suite-summary.json \
-            --baseline-summary .artifacts/qa-e2e/opus46/qa-suite-summary.json \
+            --candidate-summary .artifacts/qa-e2e/openai-candidate/qa-suite-summary.json \
+            --baseline-summary .artifacts/qa-e2e/anthropic-baseline/qa-suite-summary.json \
            --candidate-label "${OPENCLAW_CI_OPENAI_MODEL}" \
            --baseline-label anthropic/claude-opus-4-7 \
            --output-dir .artifacts/qa-e2e/parity
@@ -565,8 +565,8 @@ jobs:
            --repo-root . \
            --output-dir "${output_dir}" \
            --provider-mode live-frontier \
-            --model openai/gpt-5.4 \
-            --alt-model openai/gpt-5.4 \
+            --model openai/gpt-5.5 \
+            --alt-model openai/gpt-5.5 \
            --fast \
            --credential-source convex \
            --credential-role ci \
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai
 - QA-Lab/qa-channel: attach redacted agent tool-start traces to outbound `QaBusMessage` records so scenarios can assert actual tool use instead of relying only on reply text. Fixes #67637. Thanks @100yenadmin.
 - QA-Lab: fail live runtime parity reports when assistant-message usage is missing, preventing `0 vs 0` live token rows from being reported as passing proof. Fixes #80411. Thanks @100yenadmin.
 - QA-Lab: fail Codex-backed OpenAI live runtime-pair runs before launching isolated workers when no portable Codex auth is available, while staging API-key fallbacks and configured Codex keys for isolated QA agents. Fixes #80412. Thanks @100yenadmin.
+- QA-Lab: refresh parity gates, mock frontier fixtures, model scenarios, and workflow artifact lanes to compare GPT-5.5 against Claude Opus 4.7. Fixes #74262. Thanks @100yenadmin.
 - QA-Lab: stop returning Control UI bearer tokens from unauthenticated bootstrap payloads and bind Docker harness ports to loopback-only host addresses. (#66355) Thanks @pgondhi987.
 - Mac app: avoid a SwiftUI metadata crash when rendering the Cron Jobs settings pane.
 - Agents/OpenAI streams: yield via `setTimeout(0)` instead of `setImmediate` between bursty Responses chunks so abort timers can fire during the yield, keeping cancel-on-timeout responsive on hot streams. Refs #82462.
--- a/docs/ci.md
+++ b/docs/ci.md
@@ -35,7 +35,7 @@ OpenClaw CI runs on every push to `main` and every pull request. The `preflight`
 | `macos-swift`                    | Swift lint, build, and tests for the macOS app                                                            | macOS-relevant changes             |
 | `android`                        | Android unit tests for both flavors plus one debug APK build                                              | Android-relevant changes           |
 | `test-performance-agent`         | Daily Codex slow-test optimization after trusted activity                                                 | Main CI success or manual dispatch |
-| `openclaw-performance`           | Daily/on-demand Kova runtime performance reports with mock-provider, deep-profile, and GPT 5.4 live lanes | Scheduled and manual dispatch      |
+| `openclaw-performance`           | Daily/on-demand Kova runtime performance reports with mock-provider, deep-profile, and GPT 5.5 live lanes | Scheduled and manual dispatch      |

 ## Fail-fast order

@@ -138,7 +138,7 @@ pnpm perf:kova:summary --report .artifacts/kova/reports/mock-provider/report.jso

 ```bash
 gh workflow run openclaw-performance.yml --ref main -f profile=diagnostic -f repeat=3
-gh workflow run openclaw-performance.yml --ref main -f profile=smoke -f repeat=1 -f deep_profile=true -f live_gpt54=true
+gh workflow run openclaw-performance.yml --ref main -f profile=smoke -f repeat=1 -f deep_profile=true -f live_openai_candidate=true
 gh workflow run openclaw-performance.yml --ref main -f target_ref=v2026.5.2 -f profile=diagnostic -f repeat=3
 ```

@@ -148,7 +148,7 @@ The workflow installs OCM from a pinned release and Kova from `openclaw/Kova` at

 - `mock-provider`: Kova diagnostic scenarios against a local-build runtime with deterministic fake OpenAI-compatible auth.
 - `mock-deep-profile`: CPU/heap/trace profiling for startup, gateway, and agent-turn hotspots.
- `live-gpt54`: a real OpenAI `openai/gpt-5.4` agent turn, skipped when `OPENAI_API_KEY` is unavailable.
+- `live-openai-candidate`: a real OpenAI `openai/gpt-5.5` agent turn, skipped when `OPENAI_API_KEY` is unavailable.

 The mock-provider lane also runs OpenClaw-native source probes after the Kova pass: gateway boot timing and memory across default, hook, and 50-plugin startup cases; repeated mock-OpenAI `channel-chat-baseline` hello loops; and CLI startup commands against the booted gateway. The source probe Markdown summary lives at `source/index.md` in the report bundle, with raw JSON beside it.

@@ -269,7 +269,7 @@ For the dedicated update and plugin testing policy, including local commands,
 Docker lanes, Package Acceptance inputs, release defaults, and failure triage,
 see [Testing updates and plugins](/help/testing-updates-plugins).

-Release checks call Package Acceptance with `source=artifact`, the prepared release package artifact, `suite_profile=custom`, `docker_lanes='doctor-switch update-channel-switch skill-install update-corrupt-plugin upgrade-survivor published-upgrade-survivor update-restart-auth plugins-offline plugin-update'`, and `telegram_mode=mock-openai`. This keeps package migration, update, live ClawHub skill install, stale-plugin-dependency cleanup, configured-plugin install repair, offline plugin, plugin-update, and Telegram proof on the same resolved package tarball. Set `release_package_spec` on Full Release Validation or OpenClaw Release Checks after publishing a beta to run the same matrix against the shipped npm package without rebuilding; set `package_acceptance_package_spec` only when Package Acceptance needs a different package from the rest of release validation. Cross-OS release checks still cover OS-specific onboarding, installer, and platform behavior; package/update product validation should start with Package Acceptance. The `published-upgrade-survivor` Docker lane validates one published package baseline per run in the blocking release path. In Package Acceptance, the resolved `package-under-test` tarball is always the candidate and `published_upgrade_survivor_baseline` selects the fallback published baseline, defaulting to `openclaw@latest`; failed-lane rerun commands preserve that baseline. Full Release Validation with `run_release_soak=true` or `release_profile=full` sets `published_upgrade_survivor_baselines='last-stable-4 2026.4.23 2026.5.2 2026.4.15'` and `published_upgrade_survivor_scenarios=reported-issues` to expand across the four latest stable npm releases plus pinned plugin-compatibility boundary releases and issue-shaped fixtures for Feishu config, preserved bootstrap/persona files, configured OpenClaw plugin installs, tilde log paths, and stale legacy plugin dependency roots. Multi-baseline published-upgrade survivor selections are sharded by baseline into separate targeted Docker runner jobs. The separate `Update Migration` workflow uses the `update-migration` Docker lane with `all-since-2026.4.23` and `plugin-deps-cleanup` when the question is exhaustive published update cleanup, not normal Full Release CI breadth. Local aggregate runs can pass exact package specs with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS`, keep a single lane with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC` such as `openclaw@2026.4.15`, or set `OPENCLAW_UPGRADE_SURVIVOR_SCENARIOS` for the scenario matrix. The published lane configures the baseline with a baked `openclaw config set` command recipe, records recipe steps in `summary.json`, and probes `/healthz`, `/readyz`, plus RPC status after Gateway start. The Windows packaged and installer fresh lanes also verify that an installed package can import a browser-control override from a raw absolute Windows path. The OpenAI cross-OS agent-turn smoke defaults to `OPENCLAW_CROSS_OS_OPENAI_MODEL` when set, otherwise `openai/gpt-5.4`, so the install and gateway proof stays on a GPT-5 test model while avoiding GPT-4.x defaults.
+Release checks call Package Acceptance with `source=artifact`, the prepared release package artifact, `suite_profile=custom`, `docker_lanes='doctor-switch update-channel-switch skill-install update-corrupt-plugin upgrade-survivor published-upgrade-survivor update-restart-auth plugins-offline plugin-update'`, and `telegram_mode=mock-openai`. This keeps package migration, update, live ClawHub skill install, stale-plugin-dependency cleanup, configured-plugin install repair, offline plugin, plugin-update, and Telegram proof on the same resolved package tarball. Set `release_package_spec` on Full Release Validation or OpenClaw Release Checks after publishing a beta to run the same matrix against the shipped npm package without rebuilding; set `package_acceptance_package_spec` only when Package Acceptance needs a different package from the rest of release validation. Cross-OS release checks still cover OS-specific onboarding, installer, and platform behavior; package/update product validation should start with Package Acceptance. The `published-upgrade-survivor` Docker lane validates one published package baseline per run in the blocking release path. In Package Acceptance, the resolved `package-under-test` tarball is always the candidate and `published_upgrade_survivor_baseline` selects the fallback published baseline, defaulting to `openclaw@latest`; failed-lane rerun commands preserve that baseline. Full Release Validation with `run_release_soak=true` or `release_profile=full` sets `published_upgrade_survivor_baselines='last-stable-4 2026.4.23 2026.5.2 2026.4.15'` and `published_upgrade_survivor_scenarios=reported-issues` to expand across the four latest stable npm releases plus pinned plugin-compatibility boundary releases and issue-shaped fixtures for Feishu config, preserved bootstrap/persona files, configured OpenClaw plugin installs, tilde log paths, and stale legacy plugin dependency roots. Multi-baseline published-upgrade survivor selections are sharded by baseline into separate targeted Docker runner jobs. The separate `Update Migration` workflow uses the `update-migration` Docker lane with `all-since-2026.4.23` and `plugin-deps-cleanup` when the question is exhaustive published update cleanup, not normal Full Release CI breadth. Local aggregate runs can pass exact package specs with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS`, keep a single lane with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC` such as `openclaw@2026.4.15`, or set `OPENCLAW_UPGRADE_SURVIVOR_SCENARIOS` for the scenario matrix. The published lane configures the baseline with a baked `openclaw config set` command recipe, records recipe steps in `summary.json`, and probes `/healthz`, `/readyz`, plus RPC status after Gateway start. The Windows packaged and installer fresh lanes also verify that an installed package can import a browser-control override from a raw absolute Windows path. The OpenAI cross-OS agent-turn smoke defaults to `OPENCLAW_CROSS_OS_OPENAI_MODEL` when set, otherwise `openai/gpt-5.5`, so the install and gateway proof stays on a GPT-5 test model while avoiding GPT-4.x defaults.

 ### Legacy compatibility windows

--- a/docs/concepts/qa-e2e-automation.md
+++ b/docs/concepts/qa-e2e-automation.md
@@ -175,7 +175,7 @@ For an agent/CV style desktop task, run:
 pnpm openclaw qa mantis visual-task \
  --browser-url https://example.net \
  --expect-text "Example Domain" \
-  --vision-model openai/gpt-5.4
+  --vision-model openai/gpt-5.5
 ```

 `visual-task` leases or reuses a Crabbox desktop/browser machine, starts
@@ -370,8 +370,8 @@ Run the Mantis status-reaction scenario explicitly:
 pnpm openclaw qa discord \
  --scenario discord-status-reactions-tool-only \
  --provider-mode live-frontier \
-  --model openai/gpt-5.4 \
-  --alt-model openai/gpt-5.4 \
+  --model openai/gpt-5.5 \
+  --alt-model openai/gpt-5.5 \
  --fast
 ```

@@ -780,13 +780,13 @@ pnpm openclaw qa character-eval \
  --model openai/gpt-5.5,thinking=medium,fast \
  --model openai/gpt-5.2,thinking=xhigh \
  --model openai/gpt-5,thinking=xhigh \
-  --model anthropic/claude-opus-4-6,thinking=high \
+  --model anthropic/claude-opus-4-7,thinking=high \
  --model anthropic/claude-sonnet-4-6,thinking=high \
  --model zai/glm-5.1,thinking=high \
  --model moonshot/kimi-k2.5,thinking=high \
  --model google/gemini-3.1-pro-preview,thinking=high \
  --judge-model openai/gpt-5.5,thinking=xhigh,fast \
-  --judge-model anthropic/claude-opus-4-6,thinking=high \
+  --judge-model anthropic/claude-opus-4-7,thinking=high \
  --blind-judge-models \
  --concurrency 16 \
  --judge-concurrency 16
@@ -817,13 +817,13 @@ Candidate and judge model runs both default to concurrency 16. Lower
 `--concurrency` or `--judge-concurrency` when provider limits or local gateway
 pressure make a run too noisy.
 When no candidate `--model` is passed, the character eval defaults to
-`openai/gpt-5.5`, `openai/gpt-5.2`, `openai/gpt-5`, `anthropic/claude-opus-4-6`,
+`openai/gpt-5.5`, `openai/gpt-5.2`, `openai/gpt-5`, `anthropic/claude-opus-4-7`,
 `anthropic/claude-sonnet-4-6`, `zai/glm-5.1`,
 `moonshot/kimi-k2.5`, and
 `google/gemini-3.1-pro-preview` when no `--model` is passed.
 When no `--judge-model` is passed, the judges default to
 `openai/gpt-5.5,thinking=xhigh,fast` and
-`anthropic/claude-opus-4-6,thinking=high`.
+`anthropic/claude-opus-4-7,thinking=high`.

 ## Related docs

--- a/docs/help/gpt55-codex-agentic-parity-maintainers.md
+++ b/docs/help/gpt55-codex-agentic-parity-maintainers.md
@@ -59,7 +59,7 @@ Does not own:

 Owns:

- first-wave GPT-5.5 vs Opus 4.6 scenario pack
+- first-wave GPT-5.5 vs Opus 4.7 scenario pack
 - parity documentation
 - parity report and release-gate mechanics

@@ -123,7 +123,7 @@ Expected artifacts from PR D:

 ## Release gate

-Do not claim GPT-5.5 parity or superiority over Opus 4.6 until:
+Do not claim GPT-5.5 parity or superiority over Opus 4.7 until:

 - PR A, PR B, and PR C are merged
 - PR D runs the first-wave parity pack cleanly
@@ -133,7 +133,7 @@ Do not claim GPT-5.5 parity or superiority over Opus 4.6 until:
 ```mermaid
 flowchart LR
    A["PR A-C merged"] --> B["Run GPT-5.5 parity pack"]
-    A --> C["Run Opus 4.6 parity pack"]
+    A --> C["Run Opus 4.7 parity pack"]
    B --> D["qa-suite-summary.json"]
    C --> E["qa-suite-summary.json"]
    D --> F["qa parity-report"]
@@ -146,7 +146,7 @@ flowchart LR

 The parity harness is not the only evidence source. Keep this split explicit in review:

- PR D owns the scenario-based GPT-5.5 vs Opus 4.6 comparison
+- PR D owns the scenario-based GPT-5.5 vs Opus 4.7 comparison
 - PR B deterministic suites still own auth/proxy/DNS and full-access truthfulness evidence

 ## Quick maintainer merge workflow
@@ -179,7 +179,7 @@ If any one of the evidence bar items is missing, request changes instead of merg
 | No fake progress or fake tool completion | PR A + PR D   | parity fake-success count plus scenario-level report details        |
 | No false `/elevated full` guidance       | PR B          | deterministic runtime-truthfulness suites                           |
 | Replay/liveness failures remain explicit | PR C + PR D   | lifecycle/replay suites plus `compaction-retry-mutating-tool`       |
-| GPT-5.5 matches or beats Opus 4.6        | PR D          | `qa-agentic-parity-report.md` and `qa-agentic-parity-summary.json`  |
+| GPT-5.5 matches or beats Opus 4.7        | PR D          | `qa-agentic-parity-report.md` and `qa-agentic-parity-summary.json`  |

 ## Reviewer shorthand: before vs after

--- a/docs/help/gpt55-codex-agentic-parity.md
+++ b/docs/help/gpt55-codex-agentic-parity.md
@@ -13,7 +13,7 @@ OpenClaw already worked well with tool-using frontier models, but GPT-5.5 and Co
 - they could use strict OpenAI/Codex tool schemas incorrectly
 - they could ask for `/elevated full` even when full access was impossible
 - they could lose long-running task state during replay or compaction
- parity claims against Claude Opus 4.6 were based on anecdotes instead of repeatable scenarios
+- parity claims against Claude Opus 4.7 were based on anecdotes instead of repeatable scenarios

 This parity program fixes those gaps in four reviewable slices.

@@ -51,7 +51,7 @@ The tool-compat work reduces schema friction for strict OpenAI/Codex tool regist

 ### PR D: parity harness

-This slice adds the first-wave QA-lab parity pack so GPT-5.5 and Opus 4.6 can be exercised through the same scenarios and compared using shared evidence.
+This slice adds the first-wave QA-lab parity pack so GPT-5.5 and Opus 4.7 can be exercised through the same scenarios and compared using shared evidence.

 The parity pack is the proof layer. It does not change runtime behavior by itself.

@@ -60,8 +60,8 @@ After you have two `qa-suite-summary.json` artifacts, generate the release-gate
 ```bash
 pnpm openclaw qa parity-report \
  --repo-root . \
-  --candidate-summary .artifacts/qa-e2e/gpt55/qa-suite-summary.json \
-  --baseline-summary .artifacts/qa-e2e/opus46/qa-suite-summary.json \
+  --candidate-summary .artifacts/qa-e2e/openai-candidate/qa-suite-summary.json \
+  --baseline-summary .artifacts/qa-e2e/anthropic-baseline/qa-suite-summary.json \
  --output-dir .artifacts/qa-e2e/parity
 ```

@@ -122,7 +122,7 @@ flowchart TD
 ```mermaid
 flowchart LR
    A["Merged runtime slices (PR A-C)"] --> B["Run GPT-5.5 parity pack"]
-    A --> C["Run Opus 4.6 parity pack"]
+    A --> C["Run Opus 4.7 parity pack"]
    B --> D["qa-suite-summary.json"]
    C --> E["qa-suite-summary.json"]
    D --> F["openclaw qa parity-report"]
@@ -178,7 +178,7 @@ Required outcomes:
 - no fake completion without real execution
 - no incorrect `/elevated full` guidance
 - no silent replay or compaction abandonment
- parity-pack metrics that are at least as strong as the agreed Opus 4.6 baseline
+- parity-pack metrics that are at least as strong as the agreed Opus 4.7 baseline

 For the first-wave harness, the gate compares:

@@ -189,7 +189,7 @@ For the first-wave harness, the gate compares:

 Parity evidence is intentionally split across two layers:

- PR D proves same-scenario GPT-5.5 vs Opus 4.6 behavior with QA-lab
+- PR D proves same-scenario GPT-5.5 vs Opus 4.7 behavior with QA-lab
 - PR B deterministic suites prove auth, proxy, DNS, and `/elevated full` truthfulness outside the harness

 ## Goal-to-evidence matrix
@@ -200,13 +200,13 @@ Parity evidence is intentionally split across two layers:
 | GPT-5.5 no longer fakes progress or fake tool completion | PR A + PR D | parity report scenario outcomes and fake-success count             | no suspicious pass results and no commentary-only completion                             |
 | GPT-5.5 no longer gives false `/elevated full` guidance  | PR B        | deterministic truthfulness suites                                  | blocked reasons and full-access hints stay runtime-accurate                              |
 | Replay/liveness failures stay explicit                   | PR C + PR D | PR C lifecycle/replay suites plus `compaction-retry-mutating-tool` | mutating work keeps replay-unsafety explicit instead of silently disappearing            |
-| GPT-5.5 matches or beats Opus 4.6 on the agreed metrics  | PR D        | `qa-agentic-parity-report.md` and `qa-agentic-parity-summary.json` | same scenario coverage and no regression on completion, stop behavior, or valid tool use |
+| GPT-5.5 matches or beats Opus 4.7 on the agreed metrics  | PR D        | `qa-agentic-parity-report.md` and `qa-agentic-parity-summary.json` | same scenario coverage and no regression on completion, stop behavior, or valid tool use |

 ## How to read the parity verdict

 Use the verdict in `qa-agentic-parity-summary.json` as the final machine-readable decision for the first-wave parity pack.

- `pass` means GPT-5.5 covered the same scenarios as Opus 4.6 and did not regress on the agreed aggregate metrics.
+- `pass` means GPT-5.5 covered the same scenarios as Opus 4.7 and did not regress on the agreed aggregate metrics.
 - `fail` means at least one hard gate tripped: weaker completion, worse unintended stops, weaker valid tool use, any fake-success case, or mismatched scenario coverage.
 - "shared/base CI issue" is not itself a parity result. If CI noise outside PR D blocks a run, the verdict should wait for a clean merged-runtime execution instead of being inferred from branch-era logs.
 - Auth, proxy, DNS, and `/elevated full` truthfulness still come from PR B's deterministic suites, so the final release claim needs both: a passing PR D parity verdict and green PR B truthfulness coverage.
--- a/docs/help/testing.md
+++ b/docs/help/testing.md
@@ -47,9 +47,9 @@ When debugging real providers/models (requires real creds):
 - Live suite (models + gateway tool/image probes): `pnpm test:live`
 - Target one live file quietly: `pnpm test:live -- src/agents/models.profiles.live.test.ts`
 - Runtime performance reports: dispatch `OpenClaw Performance` with
-  `live_gpt54=true` for a real `openai/gpt-5.4` agent turn or
+  `live_openai_candidate=true` for a real `openai/gpt-5.5` agent turn or
  `deep_profile=true` for Kova CPU/heap/trace artifacts. Daily scheduled runs
-  publish mock-provider, deep-profile, and GPT 5.4 lane artifacts to
+  publish mock-provider, deep-profile, and GPT 5.5 lane artifacts to
  `openclaw/clawgrit-reports` when `CLAWGRIT_REPORTS_TOKEN` is configured. The
  mock-provider report also includes source-level gateway boot, memory,
  plugin-pressure, repeated fake-model hello-loop, and CLI startup numbers.
--- a/extensions/qa-lab/src/agentic-parity-report.test.ts
+++ b/extensions/qa-lab/src/agentic-parity-report.test.ts
@@ -153,7 +153,7 @@ describe("qa agentic parity report", () => {
  it("fails the parity gate when the candidate regresses against baseline", () => {
    const comparison = buildQaAgenticParityComparison({
      candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
      candidateSummary: {
        scenarios: [
          { name: "Approval turn tool followthrough", status: "pass" },
@@ -181,10 +181,10 @@ describe("qa agentic parity report", () => {

    expect(comparison.pass).toBe(false);
    expect(comparison.failures).toContain(
-      "openai/gpt-5.5 completion rate 80.0% is below anthropic/claude-opus-4-6 100.0%.",
+      "openai/gpt-5.5 completion rate 80.0% is below anthropic/claude-opus-4-7 100.0%.",
    );
    expect(comparison.failures).toContain(
-      "openai/gpt-5.5 unintended-stop rate 20.0% exceeds anthropic/claude-opus-4-6 0.0%.",
+      "openai/gpt-5.5 unintended-stop rate 20.0% exceeds anthropic/claude-opus-4-7 0.0%.",
    );
  });

@@ -199,7 +199,7 @@ describe("qa agentic parity report", () => {
    ];
    const comparison = buildQaAgenticParityComparison({
      candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
      candidateSummary: {
        scenarios: baselineScenarios.filter(
          (scenario) => scenario.name !== "Extra non-parity lane",
@@ -211,14 +211,14 @@ describe("qa agentic parity report", () => {

    expect(comparison.pass).toBe(false);
    expect(comparison.failures).toContain(
-      "Scenario coverage mismatch for Extra non-parity lane: openai/gpt-5.5=missing, anthropic/claude-opus-4-6=pass.",
+      "Scenario coverage mismatch for Extra non-parity lane: openai/gpt-5.5=missing, anthropic/claude-opus-4-7=pass.",
    );
  });

  it("reports each missing required parity scenario exactly once (no double-counting)", () => {
    const comparison = buildQaAgenticParityComparison({
      candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
      candidateSummary: {
        scenarios: [{ name: "Approval turn tool followthrough", status: "pass" }],
      },
@@ -260,7 +260,7 @@ describe("qa agentic parity report", () => {

    const comparison = buildQaAgenticParityComparison({
      candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
      candidateSummary: summaryWithExtras,
      baselineSummary: scopedSummary,
      comparedAt: "2026-04-11T00:00:00.000Z",
@@ -282,7 +282,7 @@ describe("qa agentic parity report", () => {
  it("fails the parity gate when required parity scenarios are missing on both sides", () => {
    const comparison = buildQaAgenticParityComparison({
      candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
      candidateSummary: {
        scenarios: [{ name: "Approval turn tool followthrough", status: "pass" }],
      },
@@ -294,14 +294,14 @@ describe("qa agentic parity report", () => {

    expect(comparison.pass).toBe(false);
    expect(comparison.failures).toContain(
-      "Missing required parity scenario coverage for Image understanding from attachment: openai/gpt-5.5=missing, anthropic/claude-opus-4-6=missing.",
+      "Missing required parity scenario coverage for Image understanding from attachment: openai/gpt-5.5=missing, anthropic/claude-opus-4-7=missing.",
    );
  });

  it("fails the parity gate when required parity scenarios are skipped", () => {
    const comparison = buildQaAgenticParityComparison({
      candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
      candidateSummary: {
        scenarios: [
          { name: "Approval turn tool followthrough", status: "pass" },
@@ -325,7 +325,7 @@ describe("qa agentic parity report", () => {

    expect(comparison.pass).toBe(false);
    expect(comparison.failures).toContain(
-      "Missing required parity scenario coverage for Compaction retry after mutating tool: openai/gpt-5.5=skip, anthropic/claude-opus-4-6=skip.",
+      "Missing required parity scenario coverage for Compaction retry after mutating tool: openai/gpt-5.5=skip, anthropic/claude-opus-4-7=skip.",
    );
  });

@@ -342,7 +342,7 @@ describe("qa agentic parity report", () => {
    });
    const comparison = buildQaAgenticParityComparison({
      candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
      candidateSummary: { scenarios: scenariosWithBothFail },
      baselineSummary: { scenarios: scenariosWithBothFail },
      comparedAt: "2026-04-11T00:00:00.000Z",
@@ -350,7 +350,7 @@ describe("qa agentic parity report", () => {

    expect(comparison.pass).toBe(false);
    expect(comparison.failures).toContain(
-      "Required parity scenario Approval turn tool followthrough failed: openai/gpt-5.5=fail, anthropic/claude-opus-4-6=fail.",
+      "Required parity scenario Approval turn tool followthrough failed: openai/gpt-5.5=fail, anthropic/claude-opus-4-7=fail.",
    );
    // Metric comparisons are relative, so a same-on-both-sides failure
    // must not appear as a relative metric failure. The required-scenario
@@ -370,7 +370,7 @@ describe("qa agentic parity report", () => {
    });
    const comparison = buildQaAgenticParityComparison({
      candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
      candidateSummary: { scenarios: candidateWithOneFail },
      baselineSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS },
      comparedAt: "2026-04-11T00:00:00.000Z",
@@ -378,7 +378,7 @@ describe("qa agentic parity report", () => {

    expect(comparison.pass).toBe(false);
    expect(comparison.failures).toContain(
-      "Required parity scenario Approval turn tool followthrough failed: openai/gpt-5.5=fail, anthropic/claude-opus-4-6=pass.",
+      "Required parity scenario Approval turn tool followthrough failed: openai/gpt-5.5=fail, anthropic/claude-opus-4-7=pass.",
    );
  });

@@ -387,7 +387,7 @@ describe("qa agentic parity report", () => {
    // below is the isolated gate failure under test (no coverage-gap noise).
    const comparison = buildQaAgenticParityComparison({
      candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
      candidateSummary: {
        scenarios: FULL_PARITY_PASS_SCENARIOS,
      },
@@ -401,7 +401,7 @@ describe("qa agentic parity report", () => {

    expect(comparison.pass).toBe(false);
    expect(comparison.failures).toEqual([
-      "anthropic/claude-opus-4-6 produced 1 suspicious pass result(s); baseline fake-success count must also be 0.",
+      "anthropic/claude-opus-4-7 produced 1 suspicious pass result(s); baseline fake-success count must also be 0.",
    ]);
  });

@@ -571,14 +571,14 @@ status=done`,
    expect(() =>
      buildQaAgenticParityComparison({
        candidateLabel: "openai/gpt-5.5",
-        baselineLabel: "anthropic/claude-opus-4-6",
+        baselineLabel: "anthropic/claude-opus-4-7",
        candidateSummary: {
          scenarios: parityPassScenarios,
-          run: { primaryProvider: "anthropic", primaryModel: "claude-opus-4-6" },
+          run: { primaryProvider: "anthropic", primaryModel: "claude-opus-4-7" },
        },
        baselineSummary: {
          scenarios: parityPassScenarios,
-          run: { primaryProvider: "anthropic", primaryModel: "claude-opus-4-6" },
+          run: { primaryProvider: "anthropic", primaryModel: "claude-opus-4-7" },
        },
        comparedAt: "2026-04-11T00:00:00.000Z",
      }),
@@ -593,7 +593,7 @@ status=done`,
    expect(() =>
      buildQaAgenticParityComparison({
        candidateLabel: "openai/gpt-5.5",
-        baselineLabel: "anthropic/claude-opus-4-6",
+        baselineLabel: "anthropic/claude-opus-4-7",
        candidateSummary: {
          scenarios: parityPassScenarios,
          run: { primaryProvider: "openai" },
@@ -612,7 +612,7 @@ status=done`,
  it("accepts matching run.primaryProvider labels without throwing", () => {
    const comparison = buildQaAgenticParityComparison({
      candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
      candidateSummary: {
        scenarios: FULL_PARITY_PASS_SCENARIOS,
        run: {
@@ -625,8 +625,8 @@ status=done`,
        scenarios: FULL_PARITY_PASS_SCENARIOS,
        run: {
          primaryProvider: "anthropic",
-          primaryModel: "anthropic/claude-opus-4-6",
-          primaryModelName: "claude-opus-4-6",
+          primaryModel: "anthropic/claude-opus-4-7",
+          primaryModelName: "claude-opus-4-7",
        },
      },
      comparedAt: "2026-04-11T00:00:00.000Z",
@@ -639,7 +639,7 @@ status=done`,
    // work against those, trusting the caller-supplied label.
    const comparison = buildQaAgenticParityComparison({
      candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
      candidateSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS },
      baselineSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS },
      comparedAt: "2026-04-11T00:00:00.000Z",
@@ -650,7 +650,7 @@ status=done`,
  it("skips provider verification for arbitrary display labels when run metadata is present", () => {
    const comparison = buildQaAgenticParityComparison({
      candidateLabel: "GPT-5.5 candidate",
-      baselineLabel: "Opus 4.6 baseline",
+      baselineLabel: "Opus 4.7 baseline",
      candidateSummary: {
        scenarios: FULL_PARITY_PASS_SCENARIOS,
        run: {
@@ -663,8 +663,8 @@ status=done`,
        scenarios: FULL_PARITY_PASS_SCENARIOS,
        run: {
          primaryProvider: "anthropic",
-          primaryModel: "anthropic/claude-opus-4-6",
-          primaryModelName: "claude-opus-4-6",
+          primaryModel: "anthropic/claude-opus-4-7",
+          primaryModelName: "claude-opus-4-7",
        },
      },
      comparedAt: "2026-04-11T00:00:00.000Z",
@@ -676,7 +676,7 @@ status=done`,
  it("skips provider verification for mixed-case or decorated display labels", () => {
    const comparison = buildQaAgenticParityComparison({
      candidateLabel: "Candidate: GPT-5.5",
-      baselineLabel: "Opus 4.6 / baseline",
+      baselineLabel: "Opus 4.7 / baseline",
      candidateSummary: {
        scenarios: FULL_PARITY_PASS_SCENARIOS,
        run: {
@@ -689,8 +689,8 @@ status=done`,
        scenarios: FULL_PARITY_PASS_SCENARIOS,
        run: {
          primaryProvider: "anthropic",
-          primaryModel: "anthropic/claude-opus-4-6",
-          primaryModelName: "claude-opus-4-6",
+          primaryModel: "anthropic/claude-opus-4-7",
+          primaryModelName: "claude-opus-4-7",
        },
      },
      comparedAt: "2026-04-11T00:00:00.000Z",
@@ -703,7 +703,7 @@ status=done`,
    expect(() =>
      buildQaAgenticParityComparison({
        candidateLabel: "openai/gpt-5.5",
-        baselineLabel: "anthropic/claude-opus-4-6",
+        baselineLabel: "anthropic/claude-opus-4-7",
        candidateSummary: {
          scenarios: FULL_PARITY_PASS_SCENARIOS,
          run: {
@@ -716,8 +716,8 @@ status=done`,
          scenarios: FULL_PARITY_PASS_SCENARIOS,
          run: {
            primaryProvider: "anthropic",
-            primaryModel: "anthropic/claude-opus-4-6",
-            primaryModelName: "claude-opus-4-6",
+            primaryModel: "anthropic/claude-opus-4-7",
+            primaryModelName: "claude-opus-4-7",
          },
        },
        comparedAt: "2026-04-11T00:00:00.000Z",
@@ -730,7 +730,7 @@ status=done`,
  it("accepts colon-delimited structured labels when provider and model both match", () => {
    const comparison = buildQaAgenticParityComparison({
      candidateLabel: "openai:gpt-5.5",
-      baselineLabel: "anthropic:claude-opus-4-6",
+      baselineLabel: "anthropic:claude-opus-4-7",
      candidateSummary: {
        scenarios: FULL_PARITY_PASS_SCENARIOS,
        run: {
@@ -743,8 +743,8 @@ status=done`,
        scenarios: FULL_PARITY_PASS_SCENARIOS,
        run: {
          primaryProvider: "anthropic",
-          primaryModel: "anthropic/claude-opus-4-6",
-          primaryModelName: "claude-opus-4-6",
+          primaryModel: "anthropic/claude-opus-4-7",
+          primaryModelName: "claude-opus-4-7",
        },
      },
      comparedAt: "2026-04-11T00:00:00.000Z",
@@ -759,7 +759,7 @@ status=done`,
    // added by the second-wave expansion.
    const comparison = buildQaAgenticParityComparison({
      candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
      candidateSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS },
      baselineSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS },
      comparedAt: "2026-04-11T00:00:00.000Z",
@@ -768,7 +768,7 @@ status=done`,
    const report = renderQaAgenticParityMarkdownReport(comparison);

    expect(report).toContain(
-      "# OpenClaw Agentic Parity Report — openai/gpt-5.5 vs anthropic/claude-opus-4-6",
+      "# OpenClaw Agentic Parity Report — openai/gpt-5.5 vs anthropic/claude-opus-4-7",
    );
    expect(report).toContain("| Completion rate | 100.0% | 100.0% |");
    expect(report).toContain("### Approval turn tool followthrough");
@@ -779,7 +779,7 @@ status=done`,
    // Regression for the loop-7 Copilot finding: callers that configure
    // non-gpt-5.5 / non-opus labels (for example an internal candidate vs
    // another candidate) must see the labels in the rendered H1 instead of
-    // the hardcoded "GPT-5.5 / Opus 4.6" title that would otherwise confuse
+    // the hardcoded "GPT-5.5 / Opus 4.7" title that would otherwise confuse
    // readers of saved reports.
    const comparison = buildQaAgenticParityComparison({
      candidateLabel: "openai/gpt-5.5-alt",
--- a/extensions/qa-lab/src/agentic-parity-report.ts
+++ b/extensions/qa-lab/src/agentic-parity-report.ts
@@ -564,7 +564,7 @@ export function renderQaAgenticParityMarkdownReport(comparison: QaAgenticParityC
  // Title is parametrized from the candidate / baseline labels so reports
  // for any candidate/baseline pair (not only gpt-5.5 vs opus 4.6) render
  // with an accurate header. The default CLI labels are still
-  // openai/gpt-5.5 vs anthropic/claude-opus-4-6, but the helper works for
+  // openai/gpt-5.5 vs anthropic/claude-opus-4-7, but the helper works for
  // any parity comparison a caller configures.
  const lines = [
    `# OpenClaw Agentic Parity Report — ${comparison.candidateLabel} vs ${comparison.baselineLabel}`,
--- a/extensions/qa-lab/src/character-eval.test.ts
+++ b/extensions/qa-lab/src/character-eval.test.ts
@@ -274,7 +274,7 @@ describe("runQaCharacterEval", () => {
      { model: "openai/gpt-5.5", rank: 1, score: 8, summary: "ok" },
      { model: "openai/gpt-5.2", rank: 2, score: 7.5, summary: "ok" },
      { model: "openai/gpt-5", rank: 3, score: 7.2, summary: "ok" },
-      { model: "anthropic/claude-opus-4-6", rank: 4, score: 7, summary: "ok" },
+      { model: "anthropic/claude-opus-4-7", rank: 4, score: 7, summary: "ok" },
      { model: "anthropic/claude-sonnet-4-6", rank: 5, score: 6.8, summary: "ok" },
      { model: "zai/glm-5.1", rank: 6, score: 6.3, summary: "ok" },
      { model: "moonshot/kimi-k2.5", rank: 7, score: 6.2, summary: "ok" },
@@ -294,7 +294,7 @@ describe("runQaCharacterEval", () => {
      "openai/gpt-5.5",
      "openai/gpt-5.2",
      "openai/gpt-5",
-      "anthropic/claude-opus-4-6",
+      "anthropic/claude-opus-4-7",
      "anthropic/claude-sonnet-4-6",
      "zai/glm-5.1",
      "moonshot/kimi-k2.5",
@@ -323,7 +323,7 @@ describe("runQaCharacterEval", () => {
    expect(runJudge).toHaveBeenCalledTimes(2);
    expect(runJudge.mock.calls.map(([params]) => params.judgeModel)).toEqual([
      "openai/gpt-5.5",
-      "anthropic/claude-opus-4-6",
+      "anthropic/claude-opus-4-7",
    ]);
    expect(runJudge.mock.calls.map(([params]) => params.judgeThinkingDefault)).toEqual([
      "xhigh",
@@ -577,11 +577,11 @@ describe("runQaCharacterEval", () => {
      candidateModelOptions: {
        "openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: false },
      },
-      judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-6"],
+      judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-7"],
      judgeThinkingDefault: "medium",
      judgeModelOptions: {
        "openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: true },
-        "anthropic/claude-opus-4-6": { thinkingDefault: "high" },
+        "anthropic/claude-opus-4-7": { thinkingDefault: "high" },
      },
      runSuite,
      runJudge,
--- a/extensions/qa-lab/src/cli.runtime.test.ts
+++ b/extensions/qa-lab/src/cli.runtime.test.ts
@@ -622,7 +622,7 @@ describe("qa cli runtime", () => {
      repoRoot: "/tmp/openclaw-repo",
      providerMode: "mock-openai",
      primaryModel: "openai/gpt-5.5",
-      alternateModel: "anthropic/claude-opus-4-6",
+      alternateModel: "anthropic/claude-opus-4-7",
      preflight: true,
    });

@@ -632,7 +632,7 @@ describe("qa cli runtime", () => {
      transportId: "qa-channel",
      providerMode: "mock-openai",
      primaryModel: "openai/gpt-5.5",
-      alternateModel: "anthropic/claude-opus-4-6",
+      alternateModel: "anthropic/claude-opus-4-7",
      scenarioIds: ["approval-turn-tool-followthrough"],
      concurrency: 1,
    });
@@ -930,7 +930,7 @@ describe("qa cli runtime", () => {
      fast: true,
      thinking: "medium",
      modelThinking: ["codex-cli/test-model=medium"],
-      judgeModel: ["openai/gpt-5.5,thinking=xhigh,fast", "anthropic/claude-opus-4-6,thinking=high"],
+      judgeModel: ["openai/gpt-5.5,thinking=xhigh,fast", "anthropic/claude-opus-4-7,thinking=high"],
      judgeTimeoutMs: 180_000,
      blindJudgeModels: true,
      concurrency: 4,
@@ -951,10 +951,10 @@ describe("qa cli runtime", () => {
        "openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: false },
        "codex-cli/test-model": { thinkingDefault: "high", fastMode: true },
      },
-      judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-6"],
+      judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-7"],
      judgeModelOptions: {
        "openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: true },
-        "anthropic/claude-opus-4-6": { thinkingDefault: "high" },
+        "anthropic/claude-opus-4-7": { thinkingDefault: "high" },
      },
      judgeTimeoutMs: 180_000,
      judgeBlindModels: true,
@@ -1285,7 +1285,7 @@ describe("qa cli runtime", () => {
      providerMode: "mock-openai",
      parityPack: "agentic",
      primaryModel: "openai/gpt-5.5",
-      alternateModel: "anthropic/claude-opus-4-6",
+      alternateModel: "anthropic/claude-opus-4-7",
    });

    expect(runQaSuiteFromRuntime).toHaveBeenCalledWith({
@@ -1294,7 +1294,7 @@ describe("qa cli runtime", () => {
      transportId: "qa-channel",
      providerMode: "mock-openai",
      primaryModel: "openai/gpt-5.5",
-      alternateModel: "anthropic/claude-opus-4-6",
+      alternateModel: "anthropic/claude-opus-4-7",
      fastMode: undefined,
      scenarioIds: [
        "approval-turn-tool-followthrough",
--- a/extensions/qa-lab/src/cli.test.ts
+++ b/extensions/qa-lab/src/cli.test.ts
@@ -346,9 +346,9 @@ describe("qa cli registration", () => {
      "--provider-mode",
      "live-frontier",
      "--model",
-      "openai/gpt-5.4",
+      "openai/gpt-5.5",
      "--alt-model",
-      "openai/gpt-5.4",
+      "openai/gpt-5.5",
      "--scenario",
      "slack-canary",
      "--credential-source",
@@ -360,7 +360,7 @@ describe("qa cli registration", () => {
    ]);

    expect(runMantisSlackDesktopSmokeCommand).toHaveBeenCalledWith({
-      alternateModel: "openai/gpt-5.4",
+      alternateModel: "openai/gpt-5.5",
      crabboxBin: "/tmp/crabbox",
      credentialRole: "maintainer",
      credentialSource: "env",
@@ -371,7 +371,7 @@ describe("qa cli registration", () => {
      leaseId: "cbx_123abc",
      machineClass: "beast",
      outputDir: ".artifacts/qa-e2e/mantis/slack-desktop",
-      primaryModel: "openai/gpt-5.4",
+      primaryModel: "openai/gpt-5.5",
      provider: "hetzner",
      providerMode: "live-frontier",
      repoRoot: "/tmp/openclaw-repo",
--- a/extensions/qa-lab/src/gateway-log-sentinel.test.ts
+++ b/extensions/qa-lab/src/gateway-log-sentinel.test.ts
@@ -16,7 +16,7 @@ describe("gateway log sentinels", () => {
        "[plugins] plugin must declare contracts.tools for: runtime_tool",
        "2026-05-13T00:00:04Z codex app-server attempt timed out after 180000ms",
        "2026-05-13T00:00:05Z codex_app_server progress stalled for run abc123",
-        "2026-05-13T00:00:06Z cron payload model openai/gpt-5.4 is not in model allowlist",
+        "2026-05-13T00:00:06Z cron payload model openai/gpt-5.5 is not in model allowlist",
        "2026-05-13T00:00:07Z OpenAI quota exceeded for live-frontier request",
      ].join("\n"),
    );
--- a/extensions/qa-lab/src/live-timeout.test.ts
+++ b/extensions/qa-lab/src/live-timeout.test.ts
@@ -8,7 +8,7 @@ describe("qa live timeout policy", () => {
        {
          providerMode: "mock-openai",
          primaryModel: "anthropic/claude-sonnet-4-6",
-          alternateModel: "anthropic/claude-opus-4-6",
+          alternateModel: "anthropic/claude-opus-4-7",
        },
        30_000,
      ),
@@ -47,7 +47,7 @@ describe("qa live timeout policy", () => {
        {
          providerMode: "live-frontier",
          primaryModel: "anthropic/claude-sonnet-4-6",
-          alternateModel: "anthropic/claude-opus-4-6",
+          alternateModel: "anthropic/claude-opus-4-7",
        },
        30_000,
      ),
@@ -60,10 +60,10 @@ describe("qa live timeout policy", () => {
        {
          providerMode: "live-frontier",
          primaryModel: "anthropic/claude-sonnet-4-6",
-          alternateModel: "anthropic/claude-opus-4-6",
+          alternateModel: "anthropic/claude-opus-4-7",
        },
        30_000,
-        "anthropic/claude-opus-4-6",
+        "anthropic/claude-opus-4-7",
      ),
    ).toBe(240_000);
  });
--- a/extensions/qa-lab/src/providers/live-frontier/character-eval.ts
+++ b/extensions/qa-lab/src/providers/live-frontier/character-eval.ts
@@ -9,7 +9,7 @@ export const QA_FRONTIER_CHARACTER_EVAL_MODELS = Object.freeze([
  "openai/gpt-5.5",
  "openai/gpt-5.2",
  "openai/gpt-5",
-  "anthropic/claude-opus-4-6",
+  "anthropic/claude-opus-4-7",
  "anthropic/claude-sonnet-4-6",
  "zai/glm-5.1",
  "moonshot/kimi-k2.5",
@@ -25,12 +25,12 @@ export const QA_FRONTIER_CHARACTER_THINKING_BY_MODEL: Readonly<Record<string, Qa

 export const QA_FRONTIER_CHARACTER_JUDGE_MODELS = Object.freeze([
  "openai/gpt-5.5",
-  "anthropic/claude-opus-4-6",
+  "anthropic/claude-opus-4-7",
 ]);

 export const QA_FRONTIER_CHARACTER_JUDGE_MODEL_OPTIONS: Readonly<
  Record<string, QaFrontierCharacterModelOptions>
 > = Object.freeze({
  "openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: true },
-  "anthropic/claude-opus-4-6": { thinkingDefault: "high" },
+  "anthropic/claude-opus-4-7": { thinkingDefault: "high" },
 });
--- a/extensions/qa-lab/src/providers/live-frontier/parity.ts
+++ b/extensions/qa-lab/src/providers/live-frontier/parity.ts
@@ -1,2 +1,2 @@
 export const QA_FRONTIER_PARITY_CANDIDATE_LABEL = "openai/gpt-5.5";
-export const QA_FRONTIER_PARITY_BASELINE_LABEL = "anthropic/claude-opus-4-6";
+export const QA_FRONTIER_PARITY_BASELINE_LABEL = "anthropic/claude-opus-4-7";
--- a/extensions/qa-lab/src/providers/mock-openai/server.test.ts
+++ b/extensions/qa-lab/src/providers/mock-openai/server.test.ts
@@ -2727,7 +2727,7 @@ describe("qa mock openai server", () => {
      headers: { "content-type": "application/json" },
      body: JSON.stringify({
        stream: false,
-        model: "mock-openai/gpt-5.4",
+        model: "mock-openai/gpt-5.5",
        input: [
          {
            role: "user",
@@ -2783,7 +2783,7 @@ describe("qa mock openai server", () => {
      headers: { "content-type": "application/json" },
      body: JSON.stringify({
        stream: false,
-        model: "mock-openai/gpt-5.4",
+        model: "mock-openai/gpt-5.5",
        input: [
          {
            role: "user",
@@ -2956,7 +2956,7 @@ describe("qa mock openai server", () => {
    expect(outputText(await response.json())).toBe("NO_REPLY");
  });

-  it("advertises Anthropic claude-opus-4-6 baseline model on /v1/models", async () => {
+  it("advertises Anthropic claude-opus-4-7 baseline model on /v1/models", async () => {
    const server = await startQaMockOpenAiServer({
      host: "127.0.0.1",
      port: 0,
@@ -2969,7 +2969,7 @@ describe("qa mock openai server", () => {
    expect(response.status).toBe(200);
    const body = (await response.json()) as { data: Array<{ id: string }> };
    const ids = body.data.map((entry) => entry.id);
-    expect(ids).toContain("claude-opus-4-6");
+    expect(ids).toContain("claude-opus-4-7");
    expect(ids).toContain("gpt-5.5");
  });

@@ -2986,7 +2986,7 @@ describe("qa mock openai server", () => {
      method: "POST",
      headers: { "content-type": "application/json" },
      body: JSON.stringify({
-        model: "claude-opus-4-6",
+        model: "claude-opus-4-7",
        max_tokens: 256,
        messages: [
          {
@@ -3011,7 +3011,7 @@ describe("qa mock openai server", () => {
    };
    expect(body.type).toBe("message");
    expect(body.role).toBe("assistant");
-    expect(body.model).toBe("claude-opus-4-6");
+    expect(body.model).toBe("claude-opus-4-7");
    expect(body.stop_reason).toBe("tool_use");
    const toolUseBlock = body.content.find((block) => block.type === "tool_use") as
      | { name: string; input: Record<string, unknown> }
@@ -3022,7 +3022,7 @@ describe("qa mock openai server", () => {
    const debugResponse = await fetch(`${server.baseUrl}/debug/last-request`);
    expect(debugResponse.status).toBe(200);
    const debugPayload = requireRecord(await debugResponse.json(), "debug request");
-    expect(debugPayload.model).toBe("claude-opus-4-6");
+    expect(debugPayload.model).toBe("claude-opus-4-7");
    expect(debugPayload.plannedToolName).toBe("read");
  });

@@ -3033,7 +3033,7 @@ describe("qa mock openai server", () => {
      method: "POST",
      headers: { "content-type": "application/json" },
      body: JSON.stringify({
-        model: "claude-opus-4-6",
+        model: "claude-opus-4-7",
        max_tokens: 256,
        tools: [
          {
@@ -3073,7 +3073,7 @@ describe("qa mock openai server", () => {
    const debugResponse = await fetch(`${server.baseUrl}/debug/last-request`);
    expect(debugResponse.status).toBe(200);
    const debugPayload = requireRecord(await debugResponse.json(), "debug request");
-    expect(debugPayload.model).toBe("claude-opus-4-6");
+    expect(debugPayload.model).toBe("claude-opus-4-7");
    expect(debugPayload.plannedToolName).toBe("sessions_spawn");
  });

@@ -3097,7 +3097,7 @@ describe("qa mock openai server", () => {
      method: "POST",
      headers: { "content-type": "application/json" },
      body: JSON.stringify({
-        model: "claude-opus-4-6",
+        model: "claude-opus-4-7",
        max_tokens: 256,
        messages: [
          {
@@ -3171,7 +3171,7 @@ describe("qa mock openai server", () => {
      method: "POST",
      headers: { "content-type": "application/json" },
      body: JSON.stringify({
-        model: "claude-opus-4-6",
+        model: "claude-opus-4-7",
        max_tokens: 256,
        messages: [
          {
@@ -3252,7 +3252,7 @@ describe("qa mock openai server", () => {
      method: "POST",
      headers: { "content-type": "application/json" },
      body: JSON.stringify({
-        model: "claude-opus-4-6",
+        model: "claude-opus-4-7",
        max_tokens: 256,
        stream: true,
        messages: [
@@ -3293,7 +3293,7 @@ describe("qa mock openai server", () => {
      method: "POST",
      headers: { "content-type": "application/json" },
      body: JSON.stringify({
-        model: "claude-opus-4-6",
+        model: "claude-opus-4-7",
        max_tokens: 256,
        stream: true,
        messages: [
@@ -3352,7 +3352,7 @@ describe("qa mock openai server", () => {
      method: "POST",
      headers: { "content-type": "application/json" },
      body: JSON.stringify({
-        model: "claude-opus-4-6",
+        model: "claude-opus-4-7",
        max_tokens: 256,
        stream: true,
        system: [
@@ -3395,7 +3395,7 @@ describe("qa mock openai server", () => {
      method: "POST",
      headers: { "content-type": "application/json" },
      body: JSON.stringify({
-        model: "claude-opus-4-6",
+        model: "claude-opus-4-7",
        max_tokens: 256,
        stream: true,
        system: [
@@ -3440,7 +3440,7 @@ describe("qa mock openai server", () => {
    const response = await fetch(`${server.baseUrl}/v1/messages`, {
      method: "POST",
      headers: { "content-type": "application/json" },
-      body: '{"model":"claude-opus-4-6","messages":[',
+      body: '{"model":"claude-opus-4-7","messages":[',
    });

    expect(response.status).toBe(400);
@@ -3453,12 +3453,12 @@ describe("qa mock openai server", () => {
    expect(body.error.message).toContain("Malformed JSON body");
  });

-  it("defaults empty-string Anthropic /v1/messages model to claude-opus-4-6", async () => {
+  it("defaults empty-string Anthropic /v1/messages model to claude-opus-4-7", async () => {
    // Regression for the loop-7 Copilot finding: a bare `typeof
    // body.model === "string"` check lets an empty-string model leak
    // through to `lastRequest.model` and `responseBody.model`. Empty
    // strings must be treated the same as absent and default to
-    // `"claude-opus-4-6"` so parity consumers can trust the echoed label.
+    // `"claude-opus-4-7"` so parity consumers can trust the echoed label.
    const server = await startQaMockOpenAiServer({
      host: "127.0.0.1",
      port: 0,
@@ -3483,12 +3483,12 @@ describe("qa mock openai server", () => {
    });
    expect(response.status).toBe(200);
    const body = (await response.json()) as { model: string };
-    expect(body.model).toBe("claude-opus-4-6");
+    expect(body.model).toBe("claude-opus-4-7");

    const debugResponse = await fetch(`${server.baseUrl}/debug/last-request`);
    expect(debugResponse.status).toBe(200);
    const debug = (await debugResponse.json()) as { model: string };
-    expect(debug.model).toBe("claude-opus-4-6");
+    expect(debug.model).toBe("claude-opus-4-7");
  });

  it("scripts a reasoning-only recovery sequence after a replay-safe read", async () => {
@@ -3711,9 +3711,9 @@ describe("resolveProviderVariant", () => {
  });

  it("tags prefix-qualified anthropic models", () => {
-    expect(resolveProviderVariant("anthropic/claude-opus-4-6")).toBe("anthropic");
-    expect(resolveProviderVariant("anthropic:claude-opus-4-6")).toBe("anthropic");
-    expect(resolveProviderVariant("claude-cli/claude-opus-4-6")).toBe("anthropic");
+    expect(resolveProviderVariant("anthropic/claude-opus-4-7")).toBe("anthropic");
+    expect(resolveProviderVariant("anthropic:claude-opus-4-7")).toBe("anthropic");
+    expect(resolveProviderVariant("claude-cli/claude-opus-4-7")).toBe("anthropic");
  });

  it("tags bare model names by prefix", () => {
@@ -3721,7 +3721,7 @@ describe("resolveProviderVariant", () => {
    expect(resolveProviderVariant("gpt-5.5-alt")).toBe("openai");
    expect(resolveProviderVariant("gpt-4.5")).toBe("openai");
    expect(resolveProviderVariant("o1-preview")).toBe("openai");
-    expect(resolveProviderVariant("claude-opus-4-6")).toBe("anthropic");
+    expect(resolveProviderVariant("claude-opus-4-7")).toBe("anthropic");
    expect(resolveProviderVariant("claude-sonnet-4-6")).toBe("anthropic");
  });

@@ -3779,7 +3779,7 @@ describe("qa mock openai server provider variant tagging", () => {
      method: "POST",
      headers: { "content-type": "application/json" },
      body: JSON.stringify({
-        model: "claude-opus-4-6",
+        model: "claude-opus-4-7",
        max_tokens: 256,
        messages: [{ role: "user", content: "Heartbeat check" }],
      }),
@@ -3789,7 +3789,7 @@ describe("qa mock openai server provider variant tagging", () => {
      model: string;
      providerVariant: string;
    };
-    expect(debug.model).toBe("claude-opus-4-6");
+    expect(debug.model).toBe("claude-opus-4-7");
    expect(debug.providerVariant).toBe("anthropic");
  });

--- a/extensions/qa-lab/src/providers/mock-openai/server.ts
+++ b/extensions/qa-lab/src/providers/mock-openai/server.ts
@@ -81,7 +81,7 @@ export function resolveProviderVariant(model: string | undefined): MockOpenAiPro
    return "anthropic";
  }
  // Fall back to model-name prefix matching for bare model strings like
-  // `gpt-5.5` or `claude-opus-4-6`.
+  // `gpt-5.5` or `claude-opus-4-7`.
  if (/^(?:gpt-|o1-|openai-)/.test(trimmed)) {
    return "openai";
  }
@@ -2161,7 +2161,7 @@ async function buildResponsesPayload(
 //
 // The QA parity gate needs two comparable scenario runs: one against the
 // "candidate" (openai/gpt-5.5) and one against the "baseline"
-// (anthropic/claude-opus-4-6). The OpenAI mock above already dispatches all
+// (anthropic/claude-opus-4-7). The OpenAI mock above already dispatches all
 // the scenario prompt branches we care about. Rather than duplicating that
 // machinery, the /v1/messages route below translates Anthropic request
 // shapes into the shared ResponsesInputItem[] format, calls the same
@@ -2384,7 +2384,7 @@ function buildAnthropicMessageResponse(params: {
    id: `msg_mock_${Math.floor(Math.random() * 1_000_000).toString(16)}`,
    type: "message",
    role: "assistant",
-    model: params.model || "claude-opus-4-6",
+    model: params.model || "claude-opus-4-7",
    content,
    stop_reason: stopReason,
    stop_sequence: null,
@@ -2412,7 +2412,7 @@ function buildAnthropicMessageStreamEvents(params: {
        id: messageId,
        type: "message",
        role: "assistant",
-        model: params.model || "claude-opus-4-6",
+        model: params.model || "claude-opus-4-7",
        content: [],
        stop_reason: null,
        stop_sequence: null,
@@ -2511,7 +2511,7 @@ async function buildMessagesPayload(
  // which then confuses parity consumers that assume the mock always
  // echoes the real provider label. Normalize once and reuse everywhere.
  const normalizedModel =
-    typeof body.model === "string" && body.model.trim() !== "" ? body.model : "claude-opus-4-6";
+    typeof body.model === "string" && body.model.trim() !== "" ? body.model : "claude-opus-4-7";
  // Dispatch through the same scenario logic the /v1/responses route uses.
  // Preserve declared tools so route-specific adapters mirror what the
  // real provider request made available to the model.
@@ -2556,7 +2556,7 @@ export async function startQaMockOpenAiServer(params?: { host?: string; port?: n
          { id: "gpt-5.5-alt", object: "model" },
          { id: "gpt-image-1", object: "model" },
          { id: "text-embedding-3-small", object: "model" },
-          { id: "claude-opus-4-6", object: "model" },
+          { id: "claude-opus-4-7", object: "model" },
          { id: "claude-sonnet-4-6", object: "model" },
        ],
      });
--- a/extensions/qa-lab/src/providers/shared/mock-model-config.ts
+++ b/extensions/qa-lab/src/providers/shared/mock-model-config.ts
@@ -71,8 +71,8 @@ function createMockAnthropicMessagesProvider(baseUrl: string): ModelProviderConf
    },
    models: [
      {
-        id: "claude-opus-4-6",
-        name: "claude-opus-4-6",
+        id: "claude-opus-4-7",
+        name: "claude-opus-4-7",
        api: "anthropic-messages",
        reasoning: false,
        input: ["text", "image"],
--- a/extensions/qa-lab/src/qa-gateway-config.test.ts
+++ b/extensions/qa-lab/src/qa-gateway-config.test.ts
@@ -90,7 +90,7 @@ describe("buildQaGatewayConfig", () => {
      workspaceDir: "/tmp/qa-workspace",
      providerMode: "mock-openai",
      primaryModel: "openai/gpt-5.5",
-      alternateModel: "anthropic/claude-opus-4-6",
+      alternateModel: "anthropic/claude-opus-4-7",
    });

    expect(getPrimaryModel(cfg.agents?.defaults?.model)).toBe("openai/gpt-5.5");
@@ -101,7 +101,7 @@ describe("buildQaGatewayConfig", () => {
    expect(cfg.models?.providers?.anthropic?.baseUrl).toBe("http://127.0.0.1:44080");
    expect(cfg.models?.providers?.anthropic?.request).toEqual({ allowPrivateNetwork: true });
    expect(cfg.models?.providers?.anthropic?.models.map((model) => model.id)).toContain(
-      "claude-opus-4-6",
+      "claude-opus-4-7",
    );
    expect(cfg.plugins?.allow).toEqual(["acpx", "memory-core"]);
  });
--- a/extensions/qa-lab/src/suite-planning.test.ts
+++ b/extensions/qa-lab/src/suite-planning.test.ts
@@ -174,7 +174,7 @@ describe("qa suite planning helpers", () => {
      makeQaSuiteTestScenario("anthropic-only", {
        config: {
          requiredProvider: "anthropic",
-          requiredModel: "claude-opus-4-6",
+          requiredModel: "claude-opus-4-7",
        },
      }),
    ];
@@ -320,7 +320,7 @@ describe("qa suite planning helpers", () => {
        config: { requiredProvider: "openai", requiredModel: "gpt-5.5" },
      }),
      makeQaSuiteTestScenario("anthropic-only", {
-        config: { requiredProvider: "anthropic", requiredModel: "claude-opus-4-6" },
+        config: { requiredProvider: "anthropic", requiredModel: "claude-opus-4-7" },
      }),
      makeQaSuiteTestScenario("claude-subscription", {
        config: { requiredProvider: "claude-cli", authMode: "subscription" },
--- a/extensions/qa-lab/src/suite.summary-json.test.ts
+++ b/extensions/qa-lab/src/suite.summary-json.test.ts
@@ -67,12 +67,12 @@ describe("buildQaSuiteSummaryJson", () => {
  it("records an Anthropic baseline lane cleanly for parity runs", () => {
    const json = buildQaSuiteSummaryJson({
      ...baseParams,
-      primaryModel: "anthropic/claude-opus-4-6",
+      primaryModel: "anthropic/claude-opus-4-7",
      alternateModel: "anthropic/claude-sonnet-4-6",
    });
-    expect(json.run.primaryModel).toBe("anthropic/claude-opus-4-6");
+    expect(json.run.primaryModel).toBe("anthropic/claude-opus-4-7");
    expect(json.run.primaryProvider).toBe("anthropic");
-    expect(json.run.primaryModelName).toBe("claude-opus-4-6");
+    expect(json.run.primaryModelName).toBe("claude-opus-4-7");
    expect(json.run.alternateModel).toBe("anthropic/claude-sonnet-4-6");
    expect(json.run.alternateProvider).toBe("anthropic");
    expect(json.run.alternateModelName).toBe("claude-sonnet-4-6");
--- a/qa/scenarios/models/anthropic-opus-api-key-smoke.md
+++ b/qa/scenarios/models/anthropic-opus-api-key-smoke.md
@@ -12,7 +12,7 @@ coverage:
 objective: Verify the regular Anthropic Opus lane can complete a quick chat turn using API-key auth.
 successCriteria:
  - A live-frontier run fails fast unless the selected primary provider is anthropic.
-  - The selected primary model is Anthropic Opus 4.6.
+  - The selected primary model is Anthropic Opus 4.7.
  - The QA gateway worker has an Anthropic API key available through environment auth.
  - The agent replies through the regular Anthropic provider.
 docsRefs:
@@ -24,10 +24,10 @@ codeRefs:
  - extensions/qa-lab/src/suite.ts
 execution:
  kind: flow
-  summary: Run with `pnpm openclaw qa suite --provider-mode live-frontier --model anthropic/claude-opus-4-6 --alt-model anthropic/claude-opus-4-6 --scenario anthropic-opus-api-key-smoke`.
+  summary: Run with `pnpm openclaw qa suite --provider-mode live-frontier --model anthropic/claude-opus-4-7 --alt-model anthropic/claude-opus-4-7 --scenario anthropic-opus-api-key-smoke`.
  config:
    requiredProvider: anthropic
-    requiredModel: claude-opus-4-6
+    requiredModel: claude-opus-4-7
    chatPrompt: "Anthropic Opus API key smoke. Reply exactly: ANTHROPIC-OPUS-API-KEY-OK"
    chatExpected: ANTHROPIC-OPUS-API-KEY-OK
 ```
--- a/qa/scenarios/models/anthropic-opus-setup-token-smoke.md
+++ b/qa/scenarios/models/anthropic-opus-setup-token-smoke.md
@@ -12,7 +12,7 @@ coverage:
 objective: Verify the regular Anthropic Opus lane can complete a quick chat turn using setup-token auth.
 successCriteria:
  - A live-frontier run fails fast unless the selected primary provider is anthropic.
-  - The selected primary model is Anthropic Opus 4.6.
+  - The selected primary model is Anthropic Opus 4.7.
  - The QA gateway worker stages a token auth profile in the isolated agent store.
  - The agent replies through the regular Anthropic provider.
 docsRefs:
@@ -24,10 +24,10 @@ codeRefs:
  - extensions/qa-lab/src/suite.ts
 execution:
  kind: flow
-  summary: Run with `OPENCLAW_LIVE_SETUP_TOKEN_VALUE=<setup-token> pnpm openclaw qa suite --provider-mode live-frontier --model anthropic/claude-opus-4-6 --alt-model anthropic/claude-opus-4-6 --scenario anthropic-opus-setup-token-smoke`.
+  summary: Run with `OPENCLAW_LIVE_SETUP_TOKEN_VALUE=<setup-token> pnpm openclaw qa suite --provider-mode live-frontier --model anthropic/claude-opus-4-7 --alt-model anthropic/claude-opus-4-7 --scenario anthropic-opus-setup-token-smoke`.
  config:
    requiredProvider: anthropic
-    requiredModel: claude-opus-4-6
+    requiredModel: claude-opus-4-7
    profileId: "anthropic:qa-setup-token"
    chatPrompt: "Anthropic Opus setup-token smoke. Reply exactly: ANTHROPIC-OPUS-SETUP-TOKEN-OK"
    chatExpected: ANTHROPIC-OPUS-SETUP-TOKEN-OK
--- a/scripts/openclaw-cross-os-release-checks.ts
+++ b/scripts/openclaw-cross-os-release-checks.ts
@@ -46,7 +46,7 @@ const providerConfig = {
    extensionId: "openai",
    secretEnv: "OPENAI_API_KEY",
    authChoice: "openai-api-key",
-    model: "openai/gpt-5.4",
+    model: "openai/gpt-5.5",
    baseUrl: "https://api.openai.com/v1",
    timeoutSeconds: CROSS_OS_AGENT_TURN_TIMEOUT_SECONDS,
  },
--- a/src/infra/run-node.test.ts
+++ b/src/infra/run-node.test.ts
@@ -1047,9 +1047,9 @@ describe("run-node script", () => {
          "qa",
          "parity-report",
          "--candidate-summary",
-          ".artifacts/qa-e2e/gpt54/qa-suite-summary.json",
+          ".artifacts/qa-e2e/openai-candidate/qa-suite-summary.json",
          "--baseline-summary",
-          ".artifacts/qa-e2e/opus46/qa-suite-summary.json",
+          ".artifacts/qa-e2e/anthropic-baseline/qa-suite-summary.json",
        ],
        env: {
          ...process.env,
@@ -1068,9 +1068,9 @@ describe("run-node script", () => {
          "tsx",
          path.join(tmp, "scripts", "qa-parity-report.ts"),
          "--candidate-summary",
-          ".artifacts/qa-e2e/gpt54/qa-suite-summary.json",
+          ".artifacts/qa-e2e/openai-candidate/qa-suite-summary.json",
          "--baseline-summary",
-          ".artifacts/qa-e2e/opus46/qa-suite-summary.json",
+          ".artifacts/qa-e2e/anthropic-baseline/qa-suite-summary.json",
        ],
      ]);
    });
--- a/test/helpers/auto-reply/trigger-handling-test-harness.ts
+++ b/test/helpers/auto-reply/trigger-handling-test-harness.ts
@@ -100,17 +100,17 @@ const modelCatalogMocks = getSharedMocks("openclaw.trigger-handling.model-catalo
  loadModelCatalog: vi.fn().mockResolvedValue([
    {
      provider: "anthropic",
-      id: "claude-opus-4-6",
-      name: "Claude Opus 4.5",
+      id: "claude-opus-4-7",
+      name: "Claude Opus 4.7",
      contextWindow: 200000,
    },
    {
      provider: "openrouter",
-      id: "anthropic/claude-opus-4-6",
-      name: "Claude Opus 4.5 (OpenRouter)",
+      id: "anthropic/claude-opus-4-7",
+      name: "Claude Opus 4.7 (OpenRouter)",
      contextWindow: 200000,
    },
-    { provider: "openai", id: "gpt-5.4-mini", name: "GPT-5.4 mini" },
+    { provider: "openai", id: "gpt-5.5-mini", name: "GPT-5.5 mini" },
    { provider: "openai", id: "gpt-5.5", name: "GPT-5.5" },
    { provider: "openai-codex", id: "gpt-5.5", name: "GPT-5.5 (Codex)" },
    { provider: "minimax", id: "MiniMax-M2.7", name: "MiniMax M2.7" },
@@ -284,7 +284,7 @@ export function makeCfg(home: string): OpenClawConfig {
  return withFastReplyConfig({
    agents: {
      defaults: {
-        model: { primary: "anthropic/claude-opus-4-6" },
+        model: { primary: "anthropic/claude-opus-4-7" },
        workspace: join(home, "openclaw"),
        // Test harness: avoid 1s coalescer idle sleeps that dominate trigger suites.
        blockStreamingCoalesce: { idleMs: 1 },
--- a/test/scripts/openclaw-cross-os-release-checks.test.ts
+++ b/test/scripts/openclaw-cross-os-release-checks.test.ts
@@ -205,10 +205,10 @@ describe("scripts/openclaw-cross-os-release-checks", () => {
        OPENCLAW_CROSS_OS_MODEL: "openai/gpt-5.4-nano",
      })?.model,
    ).toBe("openai/gpt-5.4-nano");
-    expect(resolveProviderConfig("openai", {})?.model).toBe("openai/gpt-5.4");
+    expect(resolveProviderConfig("openai", {})?.model).toBe("openai/gpt-5.5");
  });

-  it("keeps release cross-OS OpenAI smoke on GPT-5.4", () => {
+  it("keeps release cross-OS OpenAI smoke on GPT-5.5", () => {
    const workflow = readFileSync(
      ".github/workflows/openclaw-cross-os-release-checks-reusable.yml",
      "utf8",
@@ -216,9 +216,9 @@ describe("scripts/openclaw-cross-os-release-checks", () => {
    const releaseChecks = readFileSync(".github/workflows/openclaw-release-checks.yml", "utf8");

    expect(workflow).toContain(
-      "OPENCLAW_CROSS_OS_OPENAI_MODEL: ${{ inputs.openai_model || vars.OPENCLAW_CROSS_OS_OPENAI_MODEL || 'openai/gpt-5.4' }}",
+      "OPENCLAW_CROSS_OS_OPENAI_MODEL: ${{ inputs.openai_model || vars.OPENCLAW_CROSS_OS_OPENAI_MODEL || 'openai/gpt-5.5' }}",
    );
-    expect(releaseChecks).toContain("openai_model: openai/gpt-5.4");
+    expect(releaseChecks).toContain("openai_model: openai/gpt-5.5");
  });

  it("keeps release smoke plugin allowlists focused on agent-turn essentials", () => {
--- a/test/scripts/package-acceptance-workflow.test.ts
+++ b/test/scripts/package-acceptance-workflow.test.ts
@@ -413,7 +413,7 @@ describe("package artifact reuse", () => {
    expect(workflow).toContain("suite_id: native-live-src-gateway-profiles-anthropic-opus");
    expect(workflow).toContain("suite_id: native-live-src-gateway-profiles-anthropic-sonnet-haiku");
    expect(workflow).toContain("suite_group: native-live-src-gateway-profiles-anthropic");
-    expect(workflow).toContain("anthropic/claude-opus-4-7,anthropic/claude-opus-4-6");
+    expect(workflow).toContain("OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-7");
    expect(workflow).toContain("anthropic/claude-sonnet-4-6,anthropic/claude-haiku-4-5");
    expect(workflow).toMatch(
      /suite_id: native-live-src-gateway-profiles-fireworks[\s\S]*?advisory: true/u,