From 1926982c4c39e23eba82722df84b8215e1850243 Mon Sep 17 00:00:00 2001
From: Vincent Koc <vincentkoc@ieee.org>
Date: Sun, 17 May 2026 23:12:20 +0800
Subject: [PATCH] fix(qa-lab): refresh parity model targets

---
 .../workflows/control-ui-locale-refresh.yml   |  2 +-
 .../mantis-discord-status-reactions.yml       |  4 +-
 .../workflows/mantis-slack-desktop-smoke.yml  |  4 +-
 .github/workflows/mantis-telegram-live.yml    |  2 +-
 ...nclaw-cross-os-release-checks-reusable.yml |  2 +-
 .../openclaw-live-and-e2e-checks-reusable.yml |  2 +-
 .github/workflows/openclaw-performance.yml    | 18 ++---
 .github/workflows/openclaw-release-checks.yml | 12 +--
 .../workflows/qa-live-transports-convex.yml   | 14 ++--
 CHANGELOG.md                                  |  1 +
 docs/ci.md                                    |  8 +-
 docs/concepts/qa-e2e-automation.md            | 14 ++--
 .../gpt55-codex-agentic-parity-maintainers.md | 10 +--
 docs/help/gpt55-codex-agentic-parity.md       | 18 ++---
 docs/help/testing.md                          |  4 +-
 .../qa-lab/src/agentic-parity-report.test.ts  | 80 +++++++++----------
 .../qa-lab/src/agentic-parity-report.ts       |  2 +-
 extensions/qa-lab/src/character-eval.test.ts  | 10 +--
 extensions/qa-lab/src/cli.runtime.test.ts     | 14 ++--
 extensions/qa-lab/src/cli.test.ts             |  8 +-
 .../qa-lab/src/gateway-log-sentinel.test.ts   |  2 +-
 extensions/qa-lab/src/live-timeout.test.ts    |  8 +-
 .../providers/live-frontier/character-eval.ts |  6 +-
 .../src/providers/live-frontier/parity.ts     |  2 +-
 .../src/providers/mock-openai/server.test.ts  | 52 ++++++------
 .../src/providers/mock-openai/server.ts       | 12 +--
 .../src/providers/shared/mock-model-config.ts |  4 +-
 .../qa-lab/src/qa-gateway-config.test.ts      |  4 +-
 extensions/qa-lab/src/suite-planning.test.ts  |  4 +-
 .../qa-lab/src/suite.summary-json.test.ts     |  6 +-
 .../models/anthropic-opus-api-key-smoke.md    |  6 +-
 .../anthropic-opus-setup-token-smoke.md       |  6 +-
 scripts/openclaw-cross-os-release-checks.ts   |  2 +-
 src/infra/run-node.test.ts                    |  8 +-
 .../trigger-handling-test-harness.ts          | 12 +--
 .../openclaw-cross-os-release-checks.test.ts  |  8 +-
 .../package-acceptance-workflow.test.ts       |  2 +-
 37 files changed, 187 insertions(+), 186 deletions(-)

diff --git a/.github/workflows/control-ui-locale-refresh.yml b/.github/workflows/control-ui-locale-refresh.yml
index f7ac7004adb0..4529e86df0f3 100644
--- a/.github/workflows/control-ui-locale-refresh.yml
+++ b/.github/workflows/control-ui-locale-refresh.yml
@@ -138,7 +138,7 @@ jobs:
           OPENAI_API_KEY: ${{ secrets.OPENCLAW_DOCS_I18N_OPENAI_API_KEY || secrets.OPENAI_API_KEY }}
           ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
           OPENCLAW_CONTROL_UI_I18N_PROVIDER: ${{ secrets.ANTHROPIC_API_KEY != '' && 'anthropic' || 'openai' }}
-          OPENCLAW_CONTROL_UI_I18N_MODEL: ${{ secrets.ANTHROPIC_API_KEY != '' && 'claude-opus-4-6' || vars.OPENCLAW_CI_OPENAI_MODEL_BARE }}
+          OPENCLAW_CONTROL_UI_I18N_MODEL: ${{ secrets.ANTHROPIC_API_KEY != '' && 'claude-opus-4-7' || vars.OPENCLAW_CI_OPENAI_MODEL_BARE }}
           OPENCLAW_CONTROL_UI_I18N_THINKING: low
           OPENCLAW_CONTROL_UI_I18N_AUTH_OPTIONAL: "1"
           LOCALE: ${{ matrix.locale }}
diff --git a/.github/workflows/mantis-discord-status-reactions.yml b/.github/workflows/mantis-discord-status-reactions.yml
index d159a6cb02de..0bd813cb9e1b 100644
--- a/.github/workflows/mantis-discord-status-reactions.yml
+++ b/.github/workflows/mantis-discord-status-reactions.yml
@@ -349,8 +349,8 @@ jobs:
               --repo-root "$repo_root" \
               --output-dir "$output_dir" \
               --provider-mode live-frontier \
-              --model openai/gpt-5.4 \
-              --alt-model openai/gpt-5.4 \
+              --model openai/gpt-5.5 \
+              --alt-model openai/gpt-5.5 \
               --fast \
               --credential-source convex \
               --credential-role ci \
diff --git a/.github/workflows/mantis-slack-desktop-smoke.yml b/.github/workflows/mantis-slack-desktop-smoke.yml
index 6b71b9fac6c5..a608b0bc9800 100644
--- a/.github/workflows/mantis-slack-desktop-smoke.yml
+++ b/.github/workflows/mantis-slack-desktop-smoke.yml
@@ -281,8 +281,8 @@ jobs:
             --credential-role ci \
             --provider-mode live-frontier \
             --hydrate-mode "$HYDRATE_MODE" \
-            --model openai/gpt-5.4 \
-            --alt-model openai/gpt-5.4 \
+            --model openai/gpt-5.5 \
+            --alt-model openai/gpt-5.5 \
             --fast \
             --scenario "$SCENARIO_ID" \
             "${keep_args[@]}" \
diff --git a/.github/workflows/mantis-telegram-live.yml b/.github/workflows/mantis-telegram-live.yml
index 2287921c02be..400b5b8333db 100644
--- a/.github/workflows/mantis-telegram-live.yml
+++ b/.github/workflows/mantis-telegram-live.yml
@@ -386,7 +386,7 @@ jobs:
           output_rel=".artifacts/qa-e2e/mantis/telegram-live"
           root="$candidate_repo/$output_rel"
           echo "output_dir=${root}" >> "$GITHUB_OUTPUT"
-          model="${OPENCLAW_CI_OPENAI_MODEL:-openai/gpt-5.4}"
+          model="${OPENCLAW_CI_OPENAI_MODEL:-openai/gpt-5.5}"
 
           scenario_args=()
           if [[ -n "${SCENARIO_INPUT// }" ]]; then
diff --git a/.github/workflows/openclaw-cross-os-release-checks-reusable.yml b/.github/workflows/openclaw-cross-os-release-checks-reusable.yml
index c90260b22da6..b94220dc22f8 100644
--- a/.github/workflows/openclaw-cross-os-release-checks-reusable.yml
+++ b/.github/workflows/openclaw-cross-os-release-checks-reusable.yml
@@ -186,7 +186,7 @@ env:
   PNPM_VERSION: "11.0.8"
   OPENCLAW_REPOSITORY: openclaw/openclaw
   TSX_VERSION: "4.21.0"
-  OPENCLAW_CROSS_OS_OPENAI_MODEL: ${{ inputs.openai_model || vars.OPENCLAW_CROSS_OS_OPENAI_MODEL || 'openai/gpt-5.4' }}
+  OPENCLAW_CROSS_OS_OPENAI_MODEL: ${{ inputs.openai_model || vars.OPENCLAW_CROSS_OS_OPENAI_MODEL || 'openai/gpt-5.5' }}
 
 jobs:
   prepare:
diff --git a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml
index dbceabb72ab1..326b671ce4db 100644
--- a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml
+++ b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml
@@ -1911,7 +1911,7 @@ jobs:
           - suite_id: native-live-src-gateway-profiles-anthropic-opus
             suite_group: native-live-src-gateway-profiles-anthropic
             label: Native live gateway profiles Anthropic Opus
-            command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=anthropic OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-7,anthropic/claude-opus-4-6 node .release-harness/scripts/test-live-shard.mjs native-live-src-gateway-profiles
+            command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=anthropic OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-7 node .release-harness/scripts/test-live-shard.mjs native-live-src-gateway-profiles
             timeout_minutes: 30
             profile_env_only: false
             advisory: true
diff --git a/.github/workflows/openclaw-performance.yml b/.github/workflows/openclaw-performance.yml
index 44c61b7c5b33..25fbbef79165 100644
--- a/.github/workflows/openclaw-performance.yml
+++ b/.github/workflows/openclaw-performance.yml
@@ -30,8 +30,8 @@ on:
         required: false
         default: false
         type: boolean
-      live_gpt54:
-        description: Run the live OpenAI GPT 5.4 agent-turn lane
+      live_openai_candidate:
+        description: Run the live OpenAI GPT 5.5 agent-turn lane
         required: false
         default: false
         type: boolean
@@ -57,7 +57,7 @@ env:
   FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
   OCM_VERSION: v0.2.15
   KOVA_REPOSITORY: openclaw/Kova
-  PERFORMANCE_MODEL_ID: gpt-5.4
+  PERFORMANCE_MODEL_ID: gpt-5.5
 
 jobs:
   kova:
@@ -82,8 +82,8 @@ jobs:
             deep_profile: "true"
             live: "false"
             include_filters: "scenario:fresh-install scenario:gateway-performance scenario:agent-cold-warm-message"
-          - lane: live-gpt54
-            title: Kova live OpenAI GPT 5.4 agent turn
+          - lane: live-openai-candidate
+            title: Kova live OpenAI GPT 5.5 agent turn
             auth: live
             repeat: "1"
             deep_profile: "false"
@@ -119,9 +119,9 @@ jobs:
             run_lane=false
             reason="deep_profile input is false"
           fi
-          if [[ "$LANE_ID" == "live-gpt54" && "${{ github.event_name }}" != "schedule" && "${{ inputs.live_gpt54 || 'false' }}" != "true" ]]; then
+          if [[ "$LANE_ID" == "live-openai-candidate" && "${{ github.event_name }}" != "schedule" && "${{ inputs.live_openai_candidate || 'false' }}" != "true" ]]; then
             run_lane=false
-            reason="live_gpt54 input is false"
+            reason="live_openai_candidate input is false"
           fi
           echo "run=$run_lane" >> "$GITHUB_OUTPUT"
           if [[ "$run_lane" != "true" ]]; then
@@ -200,7 +200,7 @@ jobs:
           chmod 0755 "$HOME/.local/bin/kova"
           echo "$HOME/.local/bin" >> "$GITHUB_PATH"
 
-      - name: Pin Kova OpenAI model to GPT 5.4
+      - name: Pin Kova OpenAI model to GPT 5.5
         if: steps.lane.outputs.run == 'true'
         shell: bash
         run: |
@@ -244,7 +244,7 @@ jobs:
         run: |
           set -euo pipefail
           if [[ -z "${OPENAI_API_KEY:-}" ]]; then
-            echo "OPENAI_API_KEY is not configured; live GPT 5.4 lane will be skipped." >> "$GITHUB_STEP_SUMMARY"
+            echo "OPENAI_API_KEY is not configured; live GPT 5.5 lane will be skipped." >> "$GITHUB_STEP_SUMMARY"
             exit 0
           fi
           kova setup --ci --json
diff --git a/.github/workflows/openclaw-release-checks.yml b/.github/workflows/openclaw-release-checks.yml
index 8fc41f4d1cab..b69376d60655 100644
--- a/.github/workflows/openclaw-release-checks.yml
+++ b/.github/workflows/openclaw-release-checks.yml
@@ -542,7 +542,7 @@ jobs:
       candidate_file_name: openclaw-current.tgz
       candidate_version: ${{ needs.prepare_release_package.outputs.package_version }}
       candidate_source_sha: ${{ needs.prepare_release_package.outputs.source_sha }}
-      openai_model: openai/gpt-5.4
+      openai_model: openai/gpt-5.5
       ubuntu_runner: ubuntu-24.04
       windows_runner: windows-2025
       macos_runner: macos-26
@@ -724,9 +724,9 @@ jobs:
       matrix:
         include:
           - lane: candidate
-            output_dir: gpt54
+            output_dir: openai-candidate
           - lane: baseline
-            output_dir: opus46
+            output_dir: anthropic-baseline
     env:
       QA_PARITY_CONCURRENCY: "1"
       OPENCLAW_QA_TRANSPORT_READY_TIMEOUT_MS: "180000"
@@ -772,7 +772,7 @@ jobs:
               ;;
             baseline)
               model="anthropic/claude-opus-4-7"
-              alt_model="anthropic/claude-sonnet-4-7"
+              alt_model="anthropic/claude-sonnet-4-6"
               ;;
             *)
               echo "Unknown QA parity lane: ${QA_PARITY_LANE}" >&2
@@ -841,8 +841,8 @@ jobs:
         run: |
           pnpm openclaw qa parity-report \
             --repo-root . \
-            --candidate-summary .artifacts/qa-e2e/gpt54/qa-suite-summary.json \
-            --baseline-summary .artifacts/qa-e2e/opus46/qa-suite-summary.json \
+            --candidate-summary .artifacts/qa-e2e/openai-candidate/qa-suite-summary.json \
+            --baseline-summary .artifacts/qa-e2e/anthropic-baseline/qa-suite-summary.json \
             --candidate-label "${OPENCLAW_CI_OPENAI_MODEL}" \
             --baseline-label anthropic/claude-opus-4-7 \
             --output-dir .artifacts/qa-e2e/parity
diff --git a/.github/workflows/qa-live-transports-convex.yml b/.github/workflows/qa-live-transports-convex.yml
index e02e9e1e92ac..96167052ba53 100644
--- a/.github/workflows/qa-live-transports-convex.yml
+++ b/.github/workflows/qa-live-transports-convex.yml
@@ -198,7 +198,7 @@ jobs:
             --concurrency "${QA_PARITY_CONCURRENCY}" \
             --model "${OPENCLAW_CI_OPENAI_MODEL}" \
             --alt-model openai/gpt-5.5-alt \
-            --output-dir .artifacts/qa-e2e/gpt54
+            --output-dir .artifacts/qa-e2e/openai-candidate
 
       - name: Run Opus 4.7 lane
         run: |
@@ -207,15 +207,15 @@ jobs:
             --parity-pack agentic \
             --concurrency "${QA_PARITY_CONCURRENCY}" \
             --model anthropic/claude-opus-4-7 \
-            --alt-model anthropic/claude-sonnet-4-7 \
-            --output-dir .artifacts/qa-e2e/opus46
+            --alt-model anthropic/claude-sonnet-4-6 \
+            --output-dir .artifacts/qa-e2e/anthropic-baseline
 
       - name: Generate parity report
         run: |
           pnpm openclaw qa parity-report \
             --repo-root . \
-            --candidate-summary .artifacts/qa-e2e/gpt54/qa-suite-summary.json \
-            --baseline-summary .artifacts/qa-e2e/opus46/qa-suite-summary.json \
+            --candidate-summary .artifacts/qa-e2e/openai-candidate/qa-suite-summary.json \
+            --baseline-summary .artifacts/qa-e2e/anthropic-baseline/qa-suite-summary.json \
             --candidate-label "${OPENCLAW_CI_OPENAI_MODEL}" \
             --baseline-label anthropic/claude-opus-4-7 \
             --output-dir .artifacts/qa-e2e/parity
@@ -565,8 +565,8 @@ jobs:
             --repo-root . \
             --output-dir "${output_dir}" \
             --provider-mode live-frontier \
-            --model openai/gpt-5.4 \
-            --alt-model openai/gpt-5.4 \
+            --model openai/gpt-5.5 \
+            --alt-model openai/gpt-5.5 \
             --fast \
             --credential-source convex \
             --credential-role ci \
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 558a40b62592..1077bb02595b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai
 - QA-Lab/qa-channel: attach redacted agent tool-start traces to outbound `QaBusMessage` records so scenarios can assert actual tool use instead of relying only on reply text. Fixes #67637. Thanks @100yenadmin.
 - QA-Lab: fail live runtime parity reports when assistant-message usage is missing, preventing `0 vs 0` live token rows from being reported as passing proof. Fixes #80411. Thanks @100yenadmin.
 - QA-Lab: fail Codex-backed OpenAI live runtime-pair runs before launching isolated workers when no portable Codex auth is available, while staging API-key fallbacks and configured Codex keys for isolated QA agents. Fixes #80412. Thanks @100yenadmin.
+- QA-Lab: refresh parity gates, mock frontier fixtures, model scenarios, and workflow artifact lanes to compare GPT-5.5 against Claude Opus 4.7. Fixes #74262. Thanks @100yenadmin.
 - QA-Lab: stop returning Control UI bearer tokens from unauthenticated bootstrap payloads and bind Docker harness ports to loopback-only host addresses. (#66355) Thanks @pgondhi987.
 - Mac app: avoid a SwiftUI metadata crash when rendering the Cron Jobs settings pane.
 - Agents/OpenAI streams: yield via `setTimeout(0)` instead of `setImmediate` between bursty Responses chunks so abort timers can fire during the yield, keeping cancel-on-timeout responsive on hot streams. Refs #82462.
diff --git a/docs/ci.md b/docs/ci.md
index 5274d757600d..f6fd19238220 100644
--- a/docs/ci.md
+++ b/docs/ci.md
@@ -35,7 +35,7 @@ OpenClaw CI runs on every push to `main` and every pull request. The `preflight`
 | `macos-swift`                    | Swift lint, build, and tests for the macOS app                                                            | macOS-relevant changes             |
 | `android`                        | Android unit tests for both flavors plus one debug APK build                                              | Android-relevant changes           |
 | `test-performance-agent`         | Daily Codex slow-test optimization after trusted activity                                                 | Main CI success or manual dispatch |
-| `openclaw-performance`           | Daily/on-demand Kova runtime performance reports with mock-provider, deep-profile, and GPT 5.4 live lanes | Scheduled and manual dispatch      |
+| `openclaw-performance`           | Daily/on-demand Kova runtime performance reports with mock-provider, deep-profile, and GPT 5.5 live lanes | Scheduled and manual dispatch      |
 
 ## Fail-fast order
 
@@ -138,7 +138,7 @@ pnpm perf:kova:summary --report .artifacts/kova/reports/mock-provider/report.jso
 
 ```bash
 gh workflow run openclaw-performance.yml --ref main -f profile=diagnostic -f repeat=3
-gh workflow run openclaw-performance.yml --ref main -f profile=smoke -f repeat=1 -f deep_profile=true -f live_gpt54=true
+gh workflow run openclaw-performance.yml --ref main -f profile=smoke -f repeat=1 -f deep_profile=true -f live_openai_candidate=true
 gh workflow run openclaw-performance.yml --ref main -f target_ref=v2026.5.2 -f profile=diagnostic -f repeat=3
 ```
 
@@ -148,7 +148,7 @@ The workflow installs OCM from a pinned release and Kova from `openclaw/Kova` at
 
 - `mock-provider`: Kova diagnostic scenarios against a local-build runtime with deterministic fake OpenAI-compatible auth.
 - `mock-deep-profile`: CPU/heap/trace profiling for startup, gateway, and agent-turn hotspots.
-- `live-gpt54`: a real OpenAI `openai/gpt-5.4` agent turn, skipped when `OPENAI_API_KEY` is unavailable.
+- `live-openai-candidate`: a real OpenAI `openai/gpt-5.5` agent turn, skipped when `OPENAI_API_KEY` is unavailable.
 
 The mock-provider lane also runs OpenClaw-native source probes after the Kova pass: gateway boot timing and memory across default, hook, and 50-plugin startup cases; repeated mock-OpenAI `channel-chat-baseline` hello loops; and CLI startup commands against the booted gateway. The source probe Markdown summary lives at `source/index.md` in the report bundle, with raw JSON beside it.
 
@@ -269,7 +269,7 @@ For the dedicated update and plugin testing policy, including local commands,
 Docker lanes, Package Acceptance inputs, release defaults, and failure triage,
 see [Testing updates and plugins](/help/testing-updates-plugins).
 
-Release checks call Package Acceptance with `source=artifact`, the prepared release package artifact, `suite_profile=custom`, `docker_lanes='doctor-switch update-channel-switch skill-install update-corrupt-plugin upgrade-survivor published-upgrade-survivor update-restart-auth plugins-offline plugin-update'`, and `telegram_mode=mock-openai`. This keeps package migration, update, live ClawHub skill install, stale-plugin-dependency cleanup, configured-plugin install repair, offline plugin, plugin-update, and Telegram proof on the same resolved package tarball. Set `release_package_spec` on Full Release Validation or OpenClaw Release Checks after publishing a beta to run the same matrix against the shipped npm package without rebuilding; set `package_acceptance_package_spec` only when Package Acceptance needs a different package from the rest of release validation. Cross-OS release checks still cover OS-specific onboarding, installer, and platform behavior; package/update product validation should start with Package Acceptance. The `published-upgrade-survivor` Docker lane validates one published package baseline per run in the blocking release path. In Package Acceptance, the resolved `package-under-test` tarball is always the candidate and `published_upgrade_survivor_baseline` selects the fallback published baseline, defaulting to `openclaw@latest`; failed-lane rerun commands preserve that baseline. Full Release Validation with `run_release_soak=true` or `release_profile=full` sets `published_upgrade_survivor_baselines='last-stable-4 2026.4.23 2026.5.2 2026.4.15'` and `published_upgrade_survivor_scenarios=reported-issues` to expand across the four latest stable npm releases plus pinned plugin-compatibility boundary releases and issue-shaped fixtures for Feishu config, preserved bootstrap/persona files, configured OpenClaw plugin installs, tilde log paths, and stale legacy plugin dependency roots. Multi-baseline published-upgrade survivor selections are sharded by baseline into separate targeted Docker runner jobs. The separate `Update Migration` workflow uses the `update-migration` Docker lane with `all-since-2026.4.23` and `plugin-deps-cleanup` when the question is exhaustive published update cleanup, not normal Full Release CI breadth. Local aggregate runs can pass exact package specs with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS`, keep a single lane with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC` such as `openclaw@2026.4.15`, or set `OPENCLAW_UPGRADE_SURVIVOR_SCENARIOS` for the scenario matrix. The published lane configures the baseline with a baked `openclaw config set` command recipe, records recipe steps in `summary.json`, and probes `/healthz`, `/readyz`, plus RPC status after Gateway start. The Windows packaged and installer fresh lanes also verify that an installed package can import a browser-control override from a raw absolute Windows path. The OpenAI cross-OS agent-turn smoke defaults to `OPENCLAW_CROSS_OS_OPENAI_MODEL` when set, otherwise `openai/gpt-5.4`, so the install and gateway proof stays on a GPT-5 test model while avoiding GPT-4.x defaults.
+Release checks call Package Acceptance with `source=artifact`, the prepared release package artifact, `suite_profile=custom`, `docker_lanes='doctor-switch update-channel-switch skill-install update-corrupt-plugin upgrade-survivor published-upgrade-survivor update-restart-auth plugins-offline plugin-update'`, and `telegram_mode=mock-openai`. This keeps package migration, update, live ClawHub skill install, stale-plugin-dependency cleanup, configured-plugin install repair, offline plugin, plugin-update, and Telegram proof on the same resolved package tarball. Set `release_package_spec` on Full Release Validation or OpenClaw Release Checks after publishing a beta to run the same matrix against the shipped npm package without rebuilding; set `package_acceptance_package_spec` only when Package Acceptance needs a different package from the rest of release validation. Cross-OS release checks still cover OS-specific onboarding, installer, and platform behavior; package/update product validation should start with Package Acceptance. The `published-upgrade-survivor` Docker lane validates one published package baseline per run in the blocking release path. In Package Acceptance, the resolved `package-under-test` tarball is always the candidate and `published_upgrade_survivor_baseline` selects the fallback published baseline, defaulting to `openclaw@latest`; failed-lane rerun commands preserve that baseline. Full Release Validation with `run_release_soak=true` or `release_profile=full` sets `published_upgrade_survivor_baselines='last-stable-4 2026.4.23 2026.5.2 2026.4.15'` and `published_upgrade_survivor_scenarios=reported-issues` to expand across the four latest stable npm releases plus pinned plugin-compatibility boundary releases and issue-shaped fixtures for Feishu config, preserved bootstrap/persona files, configured OpenClaw plugin installs, tilde log paths, and stale legacy plugin dependency roots. Multi-baseline published-upgrade survivor selections are sharded by baseline into separate targeted Docker runner jobs. The separate `Update Migration` workflow uses the `update-migration` Docker lane with `all-since-2026.4.23` and `plugin-deps-cleanup` when the question is exhaustive published update cleanup, not normal Full Release CI breadth. Local aggregate runs can pass exact package specs with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS`, keep a single lane with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC` such as `openclaw@2026.4.15`, or set `OPENCLAW_UPGRADE_SURVIVOR_SCENARIOS` for the scenario matrix. The published lane configures the baseline with a baked `openclaw config set` command recipe, records recipe steps in `summary.json`, and probes `/healthz`, `/readyz`, plus RPC status after Gateway start. The Windows packaged and installer fresh lanes also verify that an installed package can import a browser-control override from a raw absolute Windows path. The OpenAI cross-OS agent-turn smoke defaults to `OPENCLAW_CROSS_OS_OPENAI_MODEL` when set, otherwise `openai/gpt-5.5`, so the install and gateway proof stays on a GPT-5 test model while avoiding GPT-4.x defaults.
 
 ### Legacy compatibility windows
 
diff --git a/docs/concepts/qa-e2e-automation.md b/docs/concepts/qa-e2e-automation.md
index 187d1f6cb636..06ba341d853f 100644
--- a/docs/concepts/qa-e2e-automation.md
+++ b/docs/concepts/qa-e2e-automation.md
@@ -175,7 +175,7 @@ For an agent/CV style desktop task, run:
 pnpm openclaw qa mantis visual-task \
   --browser-url https://example.net \
   --expect-text "Example Domain" \
-  --vision-model openai/gpt-5.4
+  --vision-model openai/gpt-5.5
 ```
 
 `visual-task` leases or reuses a Crabbox desktop/browser machine, starts
@@ -370,8 +370,8 @@ Run the Mantis status-reaction scenario explicitly:
 pnpm openclaw qa discord \
   --scenario discord-status-reactions-tool-only \
   --provider-mode live-frontier \
-  --model openai/gpt-5.4 \
-  --alt-model openai/gpt-5.4 \
+  --model openai/gpt-5.5 \
+  --alt-model openai/gpt-5.5 \
   --fast
 ```
 
@@ -780,13 +780,13 @@ pnpm openclaw qa character-eval \
   --model openai/gpt-5.5,thinking=medium,fast \
   --model openai/gpt-5.2,thinking=xhigh \
   --model openai/gpt-5,thinking=xhigh \
-  --model anthropic/claude-opus-4-6,thinking=high \
+  --model anthropic/claude-opus-4-7,thinking=high \
   --model anthropic/claude-sonnet-4-6,thinking=high \
   --model zai/glm-5.1,thinking=high \
   --model moonshot/kimi-k2.5,thinking=high \
   --model google/gemini-3.1-pro-preview,thinking=high \
   --judge-model openai/gpt-5.5,thinking=xhigh,fast \
-  --judge-model anthropic/claude-opus-4-6,thinking=high \
+  --judge-model anthropic/claude-opus-4-7,thinking=high \
   --blind-judge-models \
   --concurrency 16 \
   --judge-concurrency 16
@@ -817,13 +817,13 @@ Candidate and judge model runs both default to concurrency 16. Lower
 `--concurrency` or `--judge-concurrency` when provider limits or local gateway
 pressure make a run too noisy.
 When no candidate `--model` is passed, the character eval defaults to
-`openai/gpt-5.5`, `openai/gpt-5.2`, `openai/gpt-5`, `anthropic/claude-opus-4-6`,
+`openai/gpt-5.5`, `openai/gpt-5.2`, `openai/gpt-5`, `anthropic/claude-opus-4-7`,
 `anthropic/claude-sonnet-4-6`, `zai/glm-5.1`,
 `moonshot/kimi-k2.5`, and
 `google/gemini-3.1-pro-preview` when no `--model` is passed.
 When no `--judge-model` is passed, the judges default to
 `openai/gpt-5.5,thinking=xhigh,fast` and
-`anthropic/claude-opus-4-6,thinking=high`.
+`anthropic/claude-opus-4-7,thinking=high`.
 
 ## Related docs
 
diff --git a/docs/help/gpt55-codex-agentic-parity-maintainers.md b/docs/help/gpt55-codex-agentic-parity-maintainers.md
index e683a259d057..2cf69baadc9d 100644
--- a/docs/help/gpt55-codex-agentic-parity-maintainers.md
+++ b/docs/help/gpt55-codex-agentic-parity-maintainers.md
@@ -59,7 +59,7 @@ Does not own:
 
 Owns:
 
-- first-wave GPT-5.5 vs Opus 4.6 scenario pack
+- first-wave GPT-5.5 vs Opus 4.7 scenario pack
 - parity documentation
 - parity report and release-gate mechanics
 
@@ -123,7 +123,7 @@ Expected artifacts from PR D:
 
 ## Release gate
 
-Do not claim GPT-5.5 parity or superiority over Opus 4.6 until:
+Do not claim GPT-5.5 parity or superiority over Opus 4.7 until:
 
 - PR A, PR B, and PR C are merged
 - PR D runs the first-wave parity pack cleanly
@@ -133,7 +133,7 @@ Do not claim GPT-5.5 parity or superiority over Opus 4.6 until:
 ```mermaid
 flowchart LR
     A["PR A-C merged"] --> B["Run GPT-5.5 parity pack"]
-    A --> C["Run Opus 4.6 parity pack"]
+    A --> C["Run Opus 4.7 parity pack"]
     B --> D["qa-suite-summary.json"]
     C --> E["qa-suite-summary.json"]
     D --> F["qa parity-report"]
@@ -146,7 +146,7 @@ flowchart LR
 
 The parity harness is not the only evidence source. Keep this split explicit in review:
 
-- PR D owns the scenario-based GPT-5.5 vs Opus 4.6 comparison
+- PR D owns the scenario-based GPT-5.5 vs Opus 4.7 comparison
 - PR B deterministic suites still own auth/proxy/DNS and full-access truthfulness evidence
 
 ## Quick maintainer merge workflow
@@ -179,7 +179,7 @@ If any one of the evidence bar items is missing, request changes instead of merg
 | No fake progress or fake tool completion | PR A + PR D   | parity fake-success count plus scenario-level report details        |
 | No false `/elevated full` guidance       | PR B          | deterministic runtime-truthfulness suites                           |
 | Replay/liveness failures remain explicit | PR C + PR D   | lifecycle/replay suites plus `compaction-retry-mutating-tool`       |
-| GPT-5.5 matches or beats Opus 4.6        | PR D          | `qa-agentic-parity-report.md` and `qa-agentic-parity-summary.json`  |
+| GPT-5.5 matches or beats Opus 4.7        | PR D          | `qa-agentic-parity-report.md` and `qa-agentic-parity-summary.json`  |
 
 ## Reviewer shorthand: before vs after
 
diff --git a/docs/help/gpt55-codex-agentic-parity.md b/docs/help/gpt55-codex-agentic-parity.md
index d833a556a239..dd9ed85dfa0e 100644
--- a/docs/help/gpt55-codex-agentic-parity.md
+++ b/docs/help/gpt55-codex-agentic-parity.md
@@ -13,7 +13,7 @@ OpenClaw already worked well with tool-using frontier models, but GPT-5.5 and Co
 - they could use strict OpenAI/Codex tool schemas incorrectly
 - they could ask for `/elevated full` even when full access was impossible
 - they could lose long-running task state during replay or compaction
-- parity claims against Claude Opus 4.6 were based on anecdotes instead of repeatable scenarios
+- parity claims against Claude Opus 4.7 were based on anecdotes instead of repeatable scenarios
 
 This parity program fixes those gaps in four reviewable slices.
 
@@ -51,7 +51,7 @@ The tool-compat work reduces schema friction for strict OpenAI/Codex tool regist
 
 ### PR D: parity harness
 
-This slice adds the first-wave QA-lab parity pack so GPT-5.5 and Opus 4.6 can be exercised through the same scenarios and compared using shared evidence.
+This slice adds the first-wave QA-lab parity pack so GPT-5.5 and Opus 4.7 can be exercised through the same scenarios and compared using shared evidence.
 
 The parity pack is the proof layer. It does not change runtime behavior by itself.
 
@@ -60,8 +60,8 @@ After you have two `qa-suite-summary.json` artifacts, generate the release-gate
 ```bash
 pnpm openclaw qa parity-report \
   --repo-root . \
-  --candidate-summary .artifacts/qa-e2e/gpt55/qa-suite-summary.json \
-  --baseline-summary .artifacts/qa-e2e/opus46/qa-suite-summary.json \
+  --candidate-summary .artifacts/qa-e2e/openai-candidate/qa-suite-summary.json \
+  --baseline-summary .artifacts/qa-e2e/anthropic-baseline/qa-suite-summary.json \
   --output-dir .artifacts/qa-e2e/parity
 ```
 
@@ -122,7 +122,7 @@ flowchart TD
 ```mermaid
 flowchart LR
     A["Merged runtime slices (PR A-C)"] --> B["Run GPT-5.5 parity pack"]
-    A --> C["Run Opus 4.6 parity pack"]
+    A --> C["Run Opus 4.7 parity pack"]
     B --> D["qa-suite-summary.json"]
     C --> E["qa-suite-summary.json"]
     D --> F["openclaw qa parity-report"]
@@ -178,7 +178,7 @@ Required outcomes:
 - no fake completion without real execution
 - no incorrect `/elevated full` guidance
 - no silent replay or compaction abandonment
-- parity-pack metrics that are at least as strong as the agreed Opus 4.6 baseline
+- parity-pack metrics that are at least as strong as the agreed Opus 4.7 baseline
 
 For the first-wave harness, the gate compares:
 
@@ -189,7 +189,7 @@ For the first-wave harness, the gate compares:
 
 Parity evidence is intentionally split across two layers:
 
-- PR D proves same-scenario GPT-5.5 vs Opus 4.6 behavior with QA-lab
+- PR D proves same-scenario GPT-5.5 vs Opus 4.7 behavior with QA-lab
 - PR B deterministic suites prove auth, proxy, DNS, and `/elevated full` truthfulness outside the harness
 
 ## Goal-to-evidence matrix
@@ -200,13 +200,13 @@ Parity evidence is intentionally split across two layers:
 | GPT-5.5 no longer fakes progress or fake tool completion | PR A + PR D | parity report scenario outcomes and fake-success count             | no suspicious pass results and no commentary-only completion                             |
 | GPT-5.5 no longer gives false `/elevated full` guidance  | PR B        | deterministic truthfulness suites                                  | blocked reasons and full-access hints stay runtime-accurate                              |
 | Replay/liveness failures stay explicit                   | PR C + PR D | PR C lifecycle/replay suites plus `compaction-retry-mutating-tool` | mutating work keeps replay-unsafety explicit instead of silently disappearing            |
-| GPT-5.5 matches or beats Opus 4.6 on the agreed metrics  | PR D        | `qa-agentic-parity-report.md` and `qa-agentic-parity-summary.json` | same scenario coverage and no regression on completion, stop behavior, or valid tool use |
+| GPT-5.5 matches or beats Opus 4.7 on the agreed metrics  | PR D        | `qa-agentic-parity-report.md` and `qa-agentic-parity-summary.json` | same scenario coverage and no regression on completion, stop behavior, or valid tool use |
 
 ## How to read the parity verdict
 
 Use the verdict in `qa-agentic-parity-summary.json` as the final machine-readable decision for the first-wave parity pack.
 
-- `pass` means GPT-5.5 covered the same scenarios as Opus 4.6 and did not regress on the agreed aggregate metrics.
+- `pass` means GPT-5.5 covered the same scenarios as Opus 4.7 and did not regress on the agreed aggregate metrics.
 - `fail` means at least one hard gate tripped: weaker completion, worse unintended stops, weaker valid tool use, any fake-success case, or mismatched scenario coverage.
 - "shared/base CI issue" is not itself a parity result. If CI noise outside PR D blocks a run, the verdict should wait for a clean merged-runtime execution instead of being inferred from branch-era logs.
 - Auth, proxy, DNS, and `/elevated full` truthfulness still come from PR B's deterministic suites, so the final release claim needs both: a passing PR D parity verdict and green PR B truthfulness coverage.
diff --git a/docs/help/testing.md b/docs/help/testing.md
index 5d8124ee4d27..9928d9161b2d 100644
--- a/docs/help/testing.md
+++ b/docs/help/testing.md
@@ -47,9 +47,9 @@ When debugging real providers/models (requires real creds):
 - Live suite (models + gateway tool/image probes): `pnpm test:live`
 - Target one live file quietly: `pnpm test:live -- src/agents/models.profiles.live.test.ts`
 - Runtime performance reports: dispatch `OpenClaw Performance` with
-  `live_gpt54=true` for a real `openai/gpt-5.4` agent turn or
+  `live_openai_candidate=true` for a real `openai/gpt-5.5` agent turn or
   `deep_profile=true` for Kova CPU/heap/trace artifacts. Daily scheduled runs
-  publish mock-provider, deep-profile, and GPT 5.4 lane artifacts to
+  publish mock-provider, deep-profile, and GPT 5.5 lane artifacts to
   `openclaw/clawgrit-reports` when `CLAWGRIT_REPORTS_TOKEN` is configured. The
   mock-provider report also includes source-level gateway boot, memory,
   plugin-pressure, repeated fake-model hello-loop, and CLI startup numbers.
diff --git a/extensions/qa-lab/src/agentic-parity-report.test.ts b/extensions/qa-lab/src/agentic-parity-report.test.ts
index 2472ee2ee10d..9c9c3d87f1f8 100644
--- a/extensions/qa-lab/src/agentic-parity-report.test.ts
+++ b/extensions/qa-lab/src/agentic-parity-report.test.ts
@@ -153,7 +153,7 @@ describe("qa agentic parity report", () => {
   it("fails the parity gate when the candidate regresses against baseline", () => {
     const comparison = buildQaAgenticParityComparison({
       candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
       candidateSummary: {
         scenarios: [
           { name: "Approval turn tool followthrough", status: "pass" },
@@ -181,10 +181,10 @@ describe("qa agentic parity report", () => {
 
     expect(comparison.pass).toBe(false);
     expect(comparison.failures).toContain(
-      "openai/gpt-5.5 completion rate 80.0% is below anthropic/claude-opus-4-6 100.0%.",
+      "openai/gpt-5.5 completion rate 80.0% is below anthropic/claude-opus-4-7 100.0%.",
     );
     expect(comparison.failures).toContain(
-      "openai/gpt-5.5 unintended-stop rate 20.0% exceeds anthropic/claude-opus-4-6 0.0%.",
+      "openai/gpt-5.5 unintended-stop rate 20.0% exceeds anthropic/claude-opus-4-7 0.0%.",
     );
   });
 
@@ -199,7 +199,7 @@ describe("qa agentic parity report", () => {
     ];
     const comparison = buildQaAgenticParityComparison({
       candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
       candidateSummary: {
         scenarios: baselineScenarios.filter(
           (scenario) => scenario.name !== "Extra non-parity lane",
@@ -211,14 +211,14 @@ describe("qa agentic parity report", () => {
 
     expect(comparison.pass).toBe(false);
     expect(comparison.failures).toContain(
-      "Scenario coverage mismatch for Extra non-parity lane: openai/gpt-5.5=missing, anthropic/claude-opus-4-6=pass.",
+      "Scenario coverage mismatch for Extra non-parity lane: openai/gpt-5.5=missing, anthropic/claude-opus-4-7=pass.",
     );
   });
 
   it("reports each missing required parity scenario exactly once (no double-counting)", () => {
     const comparison = buildQaAgenticParityComparison({
       candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
       candidateSummary: {
         scenarios: [{ name: "Approval turn tool followthrough", status: "pass" }],
       },
@@ -260,7 +260,7 @@ describe("qa agentic parity report", () => {
 
     const comparison = buildQaAgenticParityComparison({
       candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
       candidateSummary: summaryWithExtras,
       baselineSummary: scopedSummary,
       comparedAt: "2026-04-11T00:00:00.000Z",
@@ -282,7 +282,7 @@ describe("qa agentic parity report", () => {
   it("fails the parity gate when required parity scenarios are missing on both sides", () => {
     const comparison = buildQaAgenticParityComparison({
       candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
       candidateSummary: {
         scenarios: [{ name: "Approval turn tool followthrough", status: "pass" }],
       },
@@ -294,14 +294,14 @@ describe("qa agentic parity report", () => {
 
     expect(comparison.pass).toBe(false);
     expect(comparison.failures).toContain(
-      "Missing required parity scenario coverage for Image understanding from attachment: openai/gpt-5.5=missing, anthropic/claude-opus-4-6=missing.",
+      "Missing required parity scenario coverage for Image understanding from attachment: openai/gpt-5.5=missing, anthropic/claude-opus-4-7=missing.",
     );
   });
 
   it("fails the parity gate when required parity scenarios are skipped", () => {
     const comparison = buildQaAgenticParityComparison({
       candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
       candidateSummary: {
         scenarios: [
           { name: "Approval turn tool followthrough", status: "pass" },
@@ -325,7 +325,7 @@ describe("qa agentic parity report", () => {
 
     expect(comparison.pass).toBe(false);
     expect(comparison.failures).toContain(
-      "Missing required parity scenario coverage for Compaction retry after mutating tool: openai/gpt-5.5=skip, anthropic/claude-opus-4-6=skip.",
+      "Missing required parity scenario coverage for Compaction retry after mutating tool: openai/gpt-5.5=skip, anthropic/claude-opus-4-7=skip.",
     );
   });
 
@@ -342,7 +342,7 @@ describe("qa agentic parity report", () => {
     });
     const comparison = buildQaAgenticParityComparison({
       candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
       candidateSummary: { scenarios: scenariosWithBothFail },
       baselineSummary: { scenarios: scenariosWithBothFail },
       comparedAt: "2026-04-11T00:00:00.000Z",
@@ -350,7 +350,7 @@ describe("qa agentic parity report", () => {
 
     expect(comparison.pass).toBe(false);
     expect(comparison.failures).toContain(
-      "Required parity scenario Approval turn tool followthrough failed: openai/gpt-5.5=fail, anthropic/claude-opus-4-6=fail.",
+      "Required parity scenario Approval turn tool followthrough failed: openai/gpt-5.5=fail, anthropic/claude-opus-4-7=fail.",
     );
     // Metric comparisons are relative, so a same-on-both-sides failure
     // must not appear as a relative metric failure. The required-scenario
@@ -370,7 +370,7 @@ describe("qa agentic parity report", () => {
     });
     const comparison = buildQaAgenticParityComparison({
       candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
       candidateSummary: { scenarios: candidateWithOneFail },
       baselineSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS },
       comparedAt: "2026-04-11T00:00:00.000Z",
@@ -378,7 +378,7 @@ describe("qa agentic parity report", () => {
 
     expect(comparison.pass).toBe(false);
     expect(comparison.failures).toContain(
-      "Required parity scenario Approval turn tool followthrough failed: openai/gpt-5.5=fail, anthropic/claude-opus-4-6=pass.",
+      "Required parity scenario Approval turn tool followthrough failed: openai/gpt-5.5=fail, anthropic/claude-opus-4-7=pass.",
     );
   });
 
@@ -387,7 +387,7 @@ describe("qa agentic parity report", () => {
     // below is the isolated gate failure under test (no coverage-gap noise).
     const comparison = buildQaAgenticParityComparison({
       candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
       candidateSummary: {
         scenarios: FULL_PARITY_PASS_SCENARIOS,
       },
@@ -401,7 +401,7 @@ describe("qa agentic parity report", () => {
 
     expect(comparison.pass).toBe(false);
     expect(comparison.failures).toEqual([
-      "anthropic/claude-opus-4-6 produced 1 suspicious pass result(s); baseline fake-success count must also be 0.",
+      "anthropic/claude-opus-4-7 produced 1 suspicious pass result(s); baseline fake-success count must also be 0.",
     ]);
   });
 
@@ -571,14 +571,14 @@ status=done`,
     expect(() =>
       buildQaAgenticParityComparison({
         candidateLabel: "openai/gpt-5.5",
-        baselineLabel: "anthropic/claude-opus-4-6",
+        baselineLabel: "anthropic/claude-opus-4-7",
         candidateSummary: {
           scenarios: parityPassScenarios,
-          run: { primaryProvider: "anthropic", primaryModel: "claude-opus-4-6" },
+          run: { primaryProvider: "anthropic", primaryModel: "claude-opus-4-7" },
         },
         baselineSummary: {
           scenarios: parityPassScenarios,
-          run: { primaryProvider: "anthropic", primaryModel: "claude-opus-4-6" },
+          run: { primaryProvider: "anthropic", primaryModel: "claude-opus-4-7" },
         },
         comparedAt: "2026-04-11T00:00:00.000Z",
       }),
@@ -593,7 +593,7 @@ status=done`,
     expect(() =>
       buildQaAgenticParityComparison({
         candidateLabel: "openai/gpt-5.5",
-        baselineLabel: "anthropic/claude-opus-4-6",
+        baselineLabel: "anthropic/claude-opus-4-7",
         candidateSummary: {
           scenarios: parityPassScenarios,
           run: { primaryProvider: "openai" },
@@ -612,7 +612,7 @@ status=done`,
   it("accepts matching run.primaryProvider labels without throwing", () => {
     const comparison = buildQaAgenticParityComparison({
       candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
       candidateSummary: {
         scenarios: FULL_PARITY_PASS_SCENARIOS,
         run: {
@@ -625,8 +625,8 @@ status=done`,
         scenarios: FULL_PARITY_PASS_SCENARIOS,
         run: {
           primaryProvider: "anthropic",
-          primaryModel: "anthropic/claude-opus-4-6",
-          primaryModelName: "claude-opus-4-6",
+          primaryModel: "anthropic/claude-opus-4-7",
+          primaryModelName: "claude-opus-4-7",
         },
       },
       comparedAt: "2026-04-11T00:00:00.000Z",
@@ -639,7 +639,7 @@ status=done`,
     // work against those, trusting the caller-supplied label.
     const comparison = buildQaAgenticParityComparison({
       candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
       candidateSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS },
       baselineSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS },
       comparedAt: "2026-04-11T00:00:00.000Z",
@@ -650,7 +650,7 @@ status=done`,
   it("skips provider verification for arbitrary display labels when run metadata is present", () => {
     const comparison = buildQaAgenticParityComparison({
       candidateLabel: "GPT-5.5 candidate",
-      baselineLabel: "Opus 4.6 baseline",
+      baselineLabel: "Opus 4.7 baseline",
       candidateSummary: {
         scenarios: FULL_PARITY_PASS_SCENARIOS,
         run: {
@@ -663,8 +663,8 @@ status=done`,
         scenarios: FULL_PARITY_PASS_SCENARIOS,
         run: {
           primaryProvider: "anthropic",
-          primaryModel: "anthropic/claude-opus-4-6",
-          primaryModelName: "claude-opus-4-6",
+          primaryModel: "anthropic/claude-opus-4-7",
+          primaryModelName: "claude-opus-4-7",
         },
       },
       comparedAt: "2026-04-11T00:00:00.000Z",
@@ -676,7 +676,7 @@ status=done`,
   it("skips provider verification for mixed-case or decorated display labels", () => {
     const comparison = buildQaAgenticParityComparison({
       candidateLabel: "Candidate: GPT-5.5",
-      baselineLabel: "Opus 4.6 / baseline",
+      baselineLabel: "Opus 4.7 / baseline",
       candidateSummary: {
         scenarios: FULL_PARITY_PASS_SCENARIOS,
         run: {
@@ -689,8 +689,8 @@ status=done`,
         scenarios: FULL_PARITY_PASS_SCENARIOS,
         run: {
           primaryProvider: "anthropic",
-          primaryModel: "anthropic/claude-opus-4-6",
-          primaryModelName: "claude-opus-4-6",
+          primaryModel: "anthropic/claude-opus-4-7",
+          primaryModelName: "claude-opus-4-7",
         },
       },
       comparedAt: "2026-04-11T00:00:00.000Z",
@@ -703,7 +703,7 @@ status=done`,
     expect(() =>
       buildQaAgenticParityComparison({
         candidateLabel: "openai/gpt-5.5",
-        baselineLabel: "anthropic/claude-opus-4-6",
+        baselineLabel: "anthropic/claude-opus-4-7",
         candidateSummary: {
           scenarios: FULL_PARITY_PASS_SCENARIOS,
           run: {
@@ -716,8 +716,8 @@ status=done`,
           scenarios: FULL_PARITY_PASS_SCENARIOS,
           run: {
             primaryProvider: "anthropic",
-            primaryModel: "anthropic/claude-opus-4-6",
-            primaryModelName: "claude-opus-4-6",
+            primaryModel: "anthropic/claude-opus-4-7",
+            primaryModelName: "claude-opus-4-7",
           },
         },
         comparedAt: "2026-04-11T00:00:00.000Z",
@@ -730,7 +730,7 @@ status=done`,
   it("accepts colon-delimited structured labels when provider and model both match", () => {
     const comparison = buildQaAgenticParityComparison({
       candidateLabel: "openai:gpt-5.5",
-      baselineLabel: "anthropic:claude-opus-4-6",
+      baselineLabel: "anthropic:claude-opus-4-7",
       candidateSummary: {
         scenarios: FULL_PARITY_PASS_SCENARIOS,
         run: {
@@ -743,8 +743,8 @@ status=done`,
         scenarios: FULL_PARITY_PASS_SCENARIOS,
         run: {
           primaryProvider: "anthropic",
-          primaryModel: "anthropic/claude-opus-4-6",
-          primaryModelName: "claude-opus-4-6",
+          primaryModel: "anthropic/claude-opus-4-7",
+          primaryModelName: "claude-opus-4-7",
         },
       },
       comparedAt: "2026-04-11T00:00:00.000Z",
@@ -759,7 +759,7 @@ status=done`,
     // added by the second-wave expansion.
     const comparison = buildQaAgenticParityComparison({
       candidateLabel: "openai/gpt-5.5",
-      baselineLabel: "anthropic/claude-opus-4-6",
+      baselineLabel: "anthropic/claude-opus-4-7",
       candidateSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS },
       baselineSummary: { scenarios: FULL_PARITY_PASS_SCENARIOS },
       comparedAt: "2026-04-11T00:00:00.000Z",
@@ -768,7 +768,7 @@ status=done`,
     const report = renderQaAgenticParityMarkdownReport(comparison);
 
     expect(report).toContain(
-      "# OpenClaw Agentic Parity Report — openai/gpt-5.5 vs anthropic/claude-opus-4-6",
+      "# OpenClaw Agentic Parity Report — openai/gpt-5.5 vs anthropic/claude-opus-4-7",
     );
     expect(report).toContain("| Completion rate | 100.0% | 100.0% |");
     expect(report).toContain("### Approval turn tool followthrough");
@@ -779,7 +779,7 @@ status=done`,
     // Regression for the loop-7 Copilot finding: callers that configure
     // non-gpt-5.5 / non-opus labels (for example an internal candidate vs
     // another candidate) must see the labels in the rendered H1 instead of
-    // the hardcoded "GPT-5.5 / Opus 4.6" title that would otherwise confuse
+    // the hardcoded "GPT-5.5 / Opus 4.7" title that would otherwise confuse
     // readers of saved reports.
     const comparison = buildQaAgenticParityComparison({
       candidateLabel: "openai/gpt-5.5-alt",
diff --git a/extensions/qa-lab/src/agentic-parity-report.ts b/extensions/qa-lab/src/agentic-parity-report.ts
index 1d1aba152c59..d73251f65afd 100644
--- a/extensions/qa-lab/src/agentic-parity-report.ts
+++ b/extensions/qa-lab/src/agentic-parity-report.ts
@@ -564,7 +564,7 @@ export function renderQaAgenticParityMarkdownReport(comparison: QaAgenticParityC
   // Title is parametrized from the candidate / baseline labels so reports
   // for any candidate/baseline pair (not only gpt-5.5 vs opus 4.6) render
   // with an accurate header. The default CLI labels are still
-  // openai/gpt-5.5 vs anthropic/claude-opus-4-6, but the helper works for
+  // openai/gpt-5.5 vs anthropic/claude-opus-4-7, but the helper works for
   // any parity comparison a caller configures.
   const lines = [
     `# OpenClaw Agentic Parity Report — ${comparison.candidateLabel} vs ${comparison.baselineLabel}`,
diff --git a/extensions/qa-lab/src/character-eval.test.ts b/extensions/qa-lab/src/character-eval.test.ts
index 5b5046151edc..22d97466369b 100644
--- a/extensions/qa-lab/src/character-eval.test.ts
+++ b/extensions/qa-lab/src/character-eval.test.ts
@@ -274,7 +274,7 @@ describe("runQaCharacterEval", () => {
       { model: "openai/gpt-5.5", rank: 1, score: 8, summary: "ok" },
       { model: "openai/gpt-5.2", rank: 2, score: 7.5, summary: "ok" },
       { model: "openai/gpt-5", rank: 3, score: 7.2, summary: "ok" },
-      { model: "anthropic/claude-opus-4-6", rank: 4, score: 7, summary: "ok" },
+      { model: "anthropic/claude-opus-4-7", rank: 4, score: 7, summary: "ok" },
       { model: "anthropic/claude-sonnet-4-6", rank: 5, score: 6.8, summary: "ok" },
       { model: "zai/glm-5.1", rank: 6, score: 6.3, summary: "ok" },
       { model: "moonshot/kimi-k2.5", rank: 7, score: 6.2, summary: "ok" },
@@ -294,7 +294,7 @@ describe("runQaCharacterEval", () => {
       "openai/gpt-5.5",
       "openai/gpt-5.2",
       "openai/gpt-5",
-      "anthropic/claude-opus-4-6",
+      "anthropic/claude-opus-4-7",
       "anthropic/claude-sonnet-4-6",
       "zai/glm-5.1",
       "moonshot/kimi-k2.5",
@@ -323,7 +323,7 @@ describe("runQaCharacterEval", () => {
     expect(runJudge).toHaveBeenCalledTimes(2);
     expect(runJudge.mock.calls.map(([params]) => params.judgeModel)).toEqual([
       "openai/gpt-5.5",
-      "anthropic/claude-opus-4-6",
+      "anthropic/claude-opus-4-7",
     ]);
     expect(runJudge.mock.calls.map(([params]) => params.judgeThinkingDefault)).toEqual([
       "xhigh",
@@ -577,11 +577,11 @@ describe("runQaCharacterEval", () => {
       candidateModelOptions: {
         "openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: false },
       },
-      judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-6"],
+      judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-7"],
       judgeThinkingDefault: "medium",
       judgeModelOptions: {
         "openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: true },
-        "anthropic/claude-opus-4-6": { thinkingDefault: "high" },
+        "anthropic/claude-opus-4-7": { thinkingDefault: "high" },
       },
       runSuite,
       runJudge,
diff --git a/extensions/qa-lab/src/cli.runtime.test.ts b/extensions/qa-lab/src/cli.runtime.test.ts
index 96ab12effe44..73311918fca7 100644
--- a/extensions/qa-lab/src/cli.runtime.test.ts
+++ b/extensions/qa-lab/src/cli.runtime.test.ts
@@ -622,7 +622,7 @@ describe("qa cli runtime", () => {
       repoRoot: "/tmp/openclaw-repo",
       providerMode: "mock-openai",
       primaryModel: "openai/gpt-5.5",
-      alternateModel: "anthropic/claude-opus-4-6",
+      alternateModel: "anthropic/claude-opus-4-7",
       preflight: true,
     });
 
@@ -632,7 +632,7 @@ describe("qa cli runtime", () => {
       transportId: "qa-channel",
       providerMode: "mock-openai",
       primaryModel: "openai/gpt-5.5",
-      alternateModel: "anthropic/claude-opus-4-6",
+      alternateModel: "anthropic/claude-opus-4-7",
       scenarioIds: ["approval-turn-tool-followthrough"],
       concurrency: 1,
     });
@@ -930,7 +930,7 @@ describe("qa cli runtime", () => {
       fast: true,
       thinking: "medium",
       modelThinking: ["codex-cli/test-model=medium"],
-      judgeModel: ["openai/gpt-5.5,thinking=xhigh,fast", "anthropic/claude-opus-4-6,thinking=high"],
+      judgeModel: ["openai/gpt-5.5,thinking=xhigh,fast", "anthropic/claude-opus-4-7,thinking=high"],
       judgeTimeoutMs: 180_000,
       blindJudgeModels: true,
       concurrency: 4,
@@ -951,10 +951,10 @@ describe("qa cli runtime", () => {
         "openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: false },
         "codex-cli/test-model": { thinkingDefault: "high", fastMode: true },
       },
-      judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-6"],
+      judgeModels: ["openai/gpt-5.5", "anthropic/claude-opus-4-7"],
       judgeModelOptions: {
         "openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: true },
-        "anthropic/claude-opus-4-6": { thinkingDefault: "high" },
+        "anthropic/claude-opus-4-7": { thinkingDefault: "high" },
       },
       judgeTimeoutMs: 180_000,
       judgeBlindModels: true,
@@ -1285,7 +1285,7 @@ describe("qa cli runtime", () => {
       providerMode: "mock-openai",
       parityPack: "agentic",
       primaryModel: "openai/gpt-5.5",
-      alternateModel: "anthropic/claude-opus-4-6",
+      alternateModel: "anthropic/claude-opus-4-7",
     });
 
     expect(runQaSuiteFromRuntime).toHaveBeenCalledWith({
@@ -1294,7 +1294,7 @@ describe("qa cli runtime", () => {
       transportId: "qa-channel",
       providerMode: "mock-openai",
       primaryModel: "openai/gpt-5.5",
-      alternateModel: "anthropic/claude-opus-4-6",
+      alternateModel: "anthropic/claude-opus-4-7",
       fastMode: undefined,
       scenarioIds: [
         "approval-turn-tool-followthrough",
diff --git a/extensions/qa-lab/src/cli.test.ts b/extensions/qa-lab/src/cli.test.ts
index 6da79240107f..97bc91e7d274 100644
--- a/extensions/qa-lab/src/cli.test.ts
+++ b/extensions/qa-lab/src/cli.test.ts
@@ -346,9 +346,9 @@ describe("qa cli registration", () => {
       "--provider-mode",
       "live-frontier",
       "--model",
-      "openai/gpt-5.4",
+      "openai/gpt-5.5",
       "--alt-model",
-      "openai/gpt-5.4",
+      "openai/gpt-5.5",
       "--scenario",
       "slack-canary",
       "--credential-source",
@@ -360,7 +360,7 @@ describe("qa cli registration", () => {
     ]);
 
     expect(runMantisSlackDesktopSmokeCommand).toHaveBeenCalledWith({
-      alternateModel: "openai/gpt-5.4",
+      alternateModel: "openai/gpt-5.5",
       crabboxBin: "/tmp/crabbox",
       credentialRole: "maintainer",
       credentialSource: "env",
@@ -371,7 +371,7 @@ describe("qa cli registration", () => {
       leaseId: "cbx_123abc",
       machineClass: "beast",
       outputDir: ".artifacts/qa-e2e/mantis/slack-desktop",
-      primaryModel: "openai/gpt-5.4",
+      primaryModel: "openai/gpt-5.5",
       provider: "hetzner",
       providerMode: "live-frontier",
       repoRoot: "/tmp/openclaw-repo",
diff --git a/extensions/qa-lab/src/gateway-log-sentinel.test.ts b/extensions/qa-lab/src/gateway-log-sentinel.test.ts
index 8cde3a317efe..8f7d77ae9a98 100644
--- a/extensions/qa-lab/src/gateway-log-sentinel.test.ts
+++ b/extensions/qa-lab/src/gateway-log-sentinel.test.ts
@@ -16,7 +16,7 @@ describe("gateway log sentinels", () => {
         "[plugins] plugin must declare contracts.tools for: runtime_tool",
         "2026-05-13T00:00:04Z codex app-server attempt timed out after 180000ms",
         "2026-05-13T00:00:05Z codex_app_server progress stalled for run abc123",
-        "2026-05-13T00:00:06Z cron payload model openai/gpt-5.4 is not in model allowlist",
+        "2026-05-13T00:00:06Z cron payload model openai/gpt-5.5 is not in model allowlist",
         "2026-05-13T00:00:07Z OpenAI quota exceeded for live-frontier request",
       ].join("\n"),
     );
diff --git a/extensions/qa-lab/src/live-timeout.test.ts b/extensions/qa-lab/src/live-timeout.test.ts
index c696a9c9e540..fa216cbc2980 100644
--- a/extensions/qa-lab/src/live-timeout.test.ts
+++ b/extensions/qa-lab/src/live-timeout.test.ts
@@ -8,7 +8,7 @@ describe("qa live timeout policy", () => {
         {
           providerMode: "mock-openai",
           primaryModel: "anthropic/claude-sonnet-4-6",
-          alternateModel: "anthropic/claude-opus-4-6",
+          alternateModel: "anthropic/claude-opus-4-7",
         },
         30_000,
       ),
@@ -47,7 +47,7 @@ describe("qa live timeout policy", () => {
         {
           providerMode: "live-frontier",
           primaryModel: "anthropic/claude-sonnet-4-6",
-          alternateModel: "anthropic/claude-opus-4-6",
+          alternateModel: "anthropic/claude-opus-4-7",
         },
         30_000,
       ),
@@ -60,10 +60,10 @@ describe("qa live timeout policy", () => {
         {
           providerMode: "live-frontier",
           primaryModel: "anthropic/claude-sonnet-4-6",
-          alternateModel: "anthropic/claude-opus-4-6",
+          alternateModel: "anthropic/claude-opus-4-7",
         },
         30_000,
-        "anthropic/claude-opus-4-6",
+        "anthropic/claude-opus-4-7",
       ),
     ).toBe(240_000);
   });
diff --git a/extensions/qa-lab/src/providers/live-frontier/character-eval.ts b/extensions/qa-lab/src/providers/live-frontier/character-eval.ts
index 5019ff5b8dc2..89f5fa275496 100644
--- a/extensions/qa-lab/src/providers/live-frontier/character-eval.ts
+++ b/extensions/qa-lab/src/providers/live-frontier/character-eval.ts
@@ -9,7 +9,7 @@ export const QA_FRONTIER_CHARACTER_EVAL_MODELS = Object.freeze([
   "openai/gpt-5.5",
   "openai/gpt-5.2",
   "openai/gpt-5",
-  "anthropic/claude-opus-4-6",
+  "anthropic/claude-opus-4-7",
   "anthropic/claude-sonnet-4-6",
   "zai/glm-5.1",
   "moonshot/kimi-k2.5",
@@ -25,12 +25,12 @@ export const QA_FRONTIER_CHARACTER_THINKING_BY_MODEL: Readonly<Record<string, Qa
 
 export const QA_FRONTIER_CHARACTER_JUDGE_MODELS = Object.freeze([
   "openai/gpt-5.5",
-  "anthropic/claude-opus-4-6",
+  "anthropic/claude-opus-4-7",
 ]);
 
 export const QA_FRONTIER_CHARACTER_JUDGE_MODEL_OPTIONS: Readonly<
   Record<string, QaFrontierCharacterModelOptions>
 > = Object.freeze({
   "openai/gpt-5.5": { thinkingDefault: "xhigh", fastMode: true },
-  "anthropic/claude-opus-4-6": { thinkingDefault: "high" },
+  "anthropic/claude-opus-4-7": { thinkingDefault: "high" },
 });
diff --git a/extensions/qa-lab/src/providers/live-frontier/parity.ts b/extensions/qa-lab/src/providers/live-frontier/parity.ts
index 62bcd5556ce1..a0874010b6d0 100644
--- a/extensions/qa-lab/src/providers/live-frontier/parity.ts
+++ b/extensions/qa-lab/src/providers/live-frontier/parity.ts
@@ -1,2 +1,2 @@
 export const QA_FRONTIER_PARITY_CANDIDATE_LABEL = "openai/gpt-5.5";
-export const QA_FRONTIER_PARITY_BASELINE_LABEL = "anthropic/claude-opus-4-6";
+export const QA_FRONTIER_PARITY_BASELINE_LABEL = "anthropic/claude-opus-4-7";
diff --git a/extensions/qa-lab/src/providers/mock-openai/server.test.ts b/extensions/qa-lab/src/providers/mock-openai/server.test.ts
index ed6cd9dddb2e..db2f56cebbe2 100644
--- a/extensions/qa-lab/src/providers/mock-openai/server.test.ts
+++ b/extensions/qa-lab/src/providers/mock-openai/server.test.ts
@@ -2727,7 +2727,7 @@ describe("qa mock openai server", () => {
       headers: { "content-type": "application/json" },
       body: JSON.stringify({
         stream: false,
-        model: "mock-openai/gpt-5.4",
+        model: "mock-openai/gpt-5.5",
         input: [
           {
             role: "user",
@@ -2783,7 +2783,7 @@ describe("qa mock openai server", () => {
       headers: { "content-type": "application/json" },
       body: JSON.stringify({
         stream: false,
-        model: "mock-openai/gpt-5.4",
+        model: "mock-openai/gpt-5.5",
         input: [
           {
             role: "user",
@@ -2956,7 +2956,7 @@ describe("qa mock openai server", () => {
     expect(outputText(await response.json())).toBe("NO_REPLY");
   });
 
-  it("advertises Anthropic claude-opus-4-6 baseline model on /v1/models", async () => {
+  it("advertises Anthropic claude-opus-4-7 baseline model on /v1/models", async () => {
     const server = await startQaMockOpenAiServer({
       host: "127.0.0.1",
       port: 0,
@@ -2969,7 +2969,7 @@ describe("qa mock openai server", () => {
     expect(response.status).toBe(200);
     const body = (await response.json()) as { data: Array<{ id: string }> };
     const ids = body.data.map((entry) => entry.id);
-    expect(ids).toContain("claude-opus-4-6");
+    expect(ids).toContain("claude-opus-4-7");
     expect(ids).toContain("gpt-5.5");
   });
 
@@ -2986,7 +2986,7 @@ describe("qa mock openai server", () => {
       method: "POST",
       headers: { "content-type": "application/json" },
       body: JSON.stringify({
-        model: "claude-opus-4-6",
+        model: "claude-opus-4-7",
         max_tokens: 256,
         messages: [
           {
@@ -3011,7 +3011,7 @@ describe("qa mock openai server", () => {
     };
     expect(body.type).toBe("message");
     expect(body.role).toBe("assistant");
-    expect(body.model).toBe("claude-opus-4-6");
+    expect(body.model).toBe("claude-opus-4-7");
     expect(body.stop_reason).toBe("tool_use");
     const toolUseBlock = body.content.find((block) => block.type === "tool_use") as
       | { name: string; input: Record<string, unknown> }
@@ -3022,7 +3022,7 @@ describe("qa mock openai server", () => {
     const debugResponse = await fetch(`${server.baseUrl}/debug/last-request`);
     expect(debugResponse.status).toBe(200);
     const debugPayload = requireRecord(await debugResponse.json(), "debug request");
-    expect(debugPayload.model).toBe("claude-opus-4-6");
+    expect(debugPayload.model).toBe("claude-opus-4-7");
     expect(debugPayload.plannedToolName).toBe("read");
   });
 
@@ -3033,7 +3033,7 @@ describe("qa mock openai server", () => {
       method: "POST",
       headers: { "content-type": "application/json" },
       body: JSON.stringify({
-        model: "claude-opus-4-6",
+        model: "claude-opus-4-7",
         max_tokens: 256,
         tools: [
           {
@@ -3073,7 +3073,7 @@ describe("qa mock openai server", () => {
     const debugResponse = await fetch(`${server.baseUrl}/debug/last-request`);
     expect(debugResponse.status).toBe(200);
     const debugPayload = requireRecord(await debugResponse.json(), "debug request");
-    expect(debugPayload.model).toBe("claude-opus-4-6");
+    expect(debugPayload.model).toBe("claude-opus-4-7");
     expect(debugPayload.plannedToolName).toBe("sessions_spawn");
   });
 
@@ -3097,7 +3097,7 @@ describe("qa mock openai server", () => {
       method: "POST",
       headers: { "content-type": "application/json" },
       body: JSON.stringify({
-        model: "claude-opus-4-6",
+        model: "claude-opus-4-7",
         max_tokens: 256,
         messages: [
           {
@@ -3171,7 +3171,7 @@ describe("qa mock openai server", () => {
       method: "POST",
       headers: { "content-type": "application/json" },
       body: JSON.stringify({
-        model: "claude-opus-4-6",
+        model: "claude-opus-4-7",
         max_tokens: 256,
         messages: [
           {
@@ -3252,7 +3252,7 @@ describe("qa mock openai server", () => {
       method: "POST",
       headers: { "content-type": "application/json" },
       body: JSON.stringify({
-        model: "claude-opus-4-6",
+        model: "claude-opus-4-7",
         max_tokens: 256,
         stream: true,
         messages: [
@@ -3293,7 +3293,7 @@ describe("qa mock openai server", () => {
       method: "POST",
       headers: { "content-type": "application/json" },
       body: JSON.stringify({
-        model: "claude-opus-4-6",
+        model: "claude-opus-4-7",
         max_tokens: 256,
         stream: true,
         messages: [
@@ -3352,7 +3352,7 @@ describe("qa mock openai server", () => {
       method: "POST",
       headers: { "content-type": "application/json" },
       body: JSON.stringify({
-        model: "claude-opus-4-6",
+        model: "claude-opus-4-7",
         max_tokens: 256,
         stream: true,
         system: [
@@ -3395,7 +3395,7 @@ describe("qa mock openai server", () => {
       method: "POST",
       headers: { "content-type": "application/json" },
       body: JSON.stringify({
-        model: "claude-opus-4-6",
+        model: "claude-opus-4-7",
         max_tokens: 256,
         stream: true,
         system: [
@@ -3440,7 +3440,7 @@ describe("qa mock openai server", () => {
     const response = await fetch(`${server.baseUrl}/v1/messages`, {
       method: "POST",
       headers: { "content-type": "application/json" },
-      body: '{"model":"claude-opus-4-6","messages":[',
+      body: '{"model":"claude-opus-4-7","messages":[',
     });
 
     expect(response.status).toBe(400);
@@ -3453,12 +3453,12 @@ describe("qa mock openai server", () => {
     expect(body.error.message).toContain("Malformed JSON body");
   });
 
-  it("defaults empty-string Anthropic /v1/messages model to claude-opus-4-6", async () => {
+  it("defaults empty-string Anthropic /v1/messages model to claude-opus-4-7", async () => {
     // Regression for the loop-7 Copilot finding: a bare `typeof
     // body.model === "string"` check lets an empty-string model leak
     // through to `lastRequest.model` and `responseBody.model`. Empty
     // strings must be treated the same as absent and default to
-    // `"claude-opus-4-6"` so parity consumers can trust the echoed label.
+    // `"claude-opus-4-7"` so parity consumers can trust the echoed label.
     const server = await startQaMockOpenAiServer({
       host: "127.0.0.1",
       port: 0,
@@ -3483,12 +3483,12 @@ describe("qa mock openai server", () => {
     });
     expect(response.status).toBe(200);
     const body = (await response.json()) as { model: string };
-    expect(body.model).toBe("claude-opus-4-6");
+    expect(body.model).toBe("claude-opus-4-7");
 
     const debugResponse = await fetch(`${server.baseUrl}/debug/last-request`);
     expect(debugResponse.status).toBe(200);
     const debug = (await debugResponse.json()) as { model: string };
-    expect(debug.model).toBe("claude-opus-4-6");
+    expect(debug.model).toBe("claude-opus-4-7");
   });
 
   it("scripts a reasoning-only recovery sequence after a replay-safe read", async () => {
@@ -3711,9 +3711,9 @@ describe("resolveProviderVariant", () => {
   });
 
   it("tags prefix-qualified anthropic models", () => {
-    expect(resolveProviderVariant("anthropic/claude-opus-4-6")).toBe("anthropic");
-    expect(resolveProviderVariant("anthropic:claude-opus-4-6")).toBe("anthropic");
-    expect(resolveProviderVariant("claude-cli/claude-opus-4-6")).toBe("anthropic");
+    expect(resolveProviderVariant("anthropic/claude-opus-4-7")).toBe("anthropic");
+    expect(resolveProviderVariant("anthropic:claude-opus-4-7")).toBe("anthropic");
+    expect(resolveProviderVariant("claude-cli/claude-opus-4-7")).toBe("anthropic");
   });
 
   it("tags bare model names by prefix", () => {
@@ -3721,7 +3721,7 @@ describe("resolveProviderVariant", () => {
     expect(resolveProviderVariant("gpt-5.5-alt")).toBe("openai");
     expect(resolveProviderVariant("gpt-4.5")).toBe("openai");
     expect(resolveProviderVariant("o1-preview")).toBe("openai");
-    expect(resolveProviderVariant("claude-opus-4-6")).toBe("anthropic");
+    expect(resolveProviderVariant("claude-opus-4-7")).toBe("anthropic");
     expect(resolveProviderVariant("claude-sonnet-4-6")).toBe("anthropic");
   });
 
@@ -3779,7 +3779,7 @@ describe("qa mock openai server provider variant tagging", () => {
       method: "POST",
       headers: { "content-type": "application/json" },
       body: JSON.stringify({
-        model: "claude-opus-4-6",
+        model: "claude-opus-4-7",
         max_tokens: 256,
         messages: [{ role: "user", content: "Heartbeat check" }],
       }),
@@ -3789,7 +3789,7 @@ describe("qa mock openai server provider variant tagging", () => {
       model: string;
       providerVariant: string;
     };
-    expect(debug.model).toBe("claude-opus-4-6");
+    expect(debug.model).toBe("claude-opus-4-7");
     expect(debug.providerVariant).toBe("anthropic");
   });
 
diff --git a/extensions/qa-lab/src/providers/mock-openai/server.ts b/extensions/qa-lab/src/providers/mock-openai/server.ts
index 5eecb8edb270..be831461d038 100644
--- a/extensions/qa-lab/src/providers/mock-openai/server.ts
+++ b/extensions/qa-lab/src/providers/mock-openai/server.ts
@@ -81,7 +81,7 @@ export function resolveProviderVariant(model: string | undefined): MockOpenAiPro
     return "anthropic";
   }
   // Fall back to model-name prefix matching for bare model strings like
-  // `gpt-5.5` or `claude-opus-4-6`.
+  // `gpt-5.5` or `claude-opus-4-7`.
   if (/^(?:gpt-|o1-|openai-)/.test(trimmed)) {
     return "openai";
   }
@@ -2161,7 +2161,7 @@ async function buildResponsesPayload(
 //
 // The QA parity gate needs two comparable scenario runs: one against the
 // "candidate" (openai/gpt-5.5) and one against the "baseline"
-// (anthropic/claude-opus-4-6). The OpenAI mock above already dispatches all
+// (anthropic/claude-opus-4-7). The OpenAI mock above already dispatches all
 // the scenario prompt branches we care about. Rather than duplicating that
 // machinery, the /v1/messages route below translates Anthropic request
 // shapes into the shared ResponsesInputItem[] format, calls the same
@@ -2384,7 +2384,7 @@ function buildAnthropicMessageResponse(params: {
     id: `msg_mock_${Math.floor(Math.random() * 1_000_000).toString(16)}`,
     type: "message",
     role: "assistant",
-    model: params.model || "claude-opus-4-6",
+    model: params.model || "claude-opus-4-7",
     content,
     stop_reason: stopReason,
     stop_sequence: null,
@@ -2412,7 +2412,7 @@ function buildAnthropicMessageStreamEvents(params: {
         id: messageId,
         type: "message",
         role: "assistant",
-        model: params.model || "claude-opus-4-6",
+        model: params.model || "claude-opus-4-7",
         content: [],
         stop_reason: null,
         stop_sequence: null,
@@ -2511,7 +2511,7 @@ async function buildMessagesPayload(
   // which then confuses parity consumers that assume the mock always
   // echoes the real provider label. Normalize once and reuse everywhere.
   const normalizedModel =
-    typeof body.model === "string" && body.model.trim() !== "" ? body.model : "claude-opus-4-6";
+    typeof body.model === "string" && body.model.trim() !== "" ? body.model : "claude-opus-4-7";
   // Dispatch through the same scenario logic the /v1/responses route uses.
   // Preserve declared tools so route-specific adapters mirror what the
   // real provider request made available to the model.
@@ -2556,7 +2556,7 @@ export async function startQaMockOpenAiServer(params?: { host?: string; port?: n
           { id: "gpt-5.5-alt", object: "model" },
           { id: "gpt-image-1", object: "model" },
           { id: "text-embedding-3-small", object: "model" },
-          { id: "claude-opus-4-6", object: "model" },
+          { id: "claude-opus-4-7", object: "model" },
           { id: "claude-sonnet-4-6", object: "model" },
         ],
       });
diff --git a/extensions/qa-lab/src/providers/shared/mock-model-config.ts b/extensions/qa-lab/src/providers/shared/mock-model-config.ts
index 58e2eab15d50..308b7f23507f 100644
--- a/extensions/qa-lab/src/providers/shared/mock-model-config.ts
+++ b/extensions/qa-lab/src/providers/shared/mock-model-config.ts
@@ -71,8 +71,8 @@ function createMockAnthropicMessagesProvider(baseUrl: string): ModelProviderConf
     },
     models: [
       {
-        id: "claude-opus-4-6",
-        name: "claude-opus-4-6",
+        id: "claude-opus-4-7",
+        name: "claude-opus-4-7",
         api: "anthropic-messages",
         reasoning: false,
         input: ["text", "image"],
diff --git a/extensions/qa-lab/src/qa-gateway-config.test.ts b/extensions/qa-lab/src/qa-gateway-config.test.ts
index 1429ddd88699..42cbc0b6ba40 100644
--- a/extensions/qa-lab/src/qa-gateway-config.test.ts
+++ b/extensions/qa-lab/src/qa-gateway-config.test.ts
@@ -90,7 +90,7 @@ describe("buildQaGatewayConfig", () => {
       workspaceDir: "/tmp/qa-workspace",
       providerMode: "mock-openai",
       primaryModel: "openai/gpt-5.5",
-      alternateModel: "anthropic/claude-opus-4-6",
+      alternateModel: "anthropic/claude-opus-4-7",
     });
 
     expect(getPrimaryModel(cfg.agents?.defaults?.model)).toBe("openai/gpt-5.5");
@@ -101,7 +101,7 @@ describe("buildQaGatewayConfig", () => {
     expect(cfg.models?.providers?.anthropic?.baseUrl).toBe("http://127.0.0.1:44080");
     expect(cfg.models?.providers?.anthropic?.request).toEqual({ allowPrivateNetwork: true });
     expect(cfg.models?.providers?.anthropic?.models.map((model) => model.id)).toContain(
-      "claude-opus-4-6",
+      "claude-opus-4-7",
     );
     expect(cfg.plugins?.allow).toEqual(["acpx", "memory-core"]);
   });
diff --git a/extensions/qa-lab/src/suite-planning.test.ts b/extensions/qa-lab/src/suite-planning.test.ts
index 6a3ffa6f8af8..32b0604e8294 100644
--- a/extensions/qa-lab/src/suite-planning.test.ts
+++ b/extensions/qa-lab/src/suite-planning.test.ts
@@ -174,7 +174,7 @@ describe("qa suite planning helpers", () => {
       makeQaSuiteTestScenario("anthropic-only", {
         config: {
           requiredProvider: "anthropic",
-          requiredModel: "claude-opus-4-6",
+          requiredModel: "claude-opus-4-7",
         },
       }),
     ];
@@ -320,7 +320,7 @@ describe("qa suite planning helpers", () => {
         config: { requiredProvider: "openai", requiredModel: "gpt-5.5" },
       }),
       makeQaSuiteTestScenario("anthropic-only", {
-        config: { requiredProvider: "anthropic", requiredModel: "claude-opus-4-6" },
+        config: { requiredProvider: "anthropic", requiredModel: "claude-opus-4-7" },
       }),
       makeQaSuiteTestScenario("claude-subscription", {
         config: { requiredProvider: "claude-cli", authMode: "subscription" },
diff --git a/extensions/qa-lab/src/suite.summary-json.test.ts b/extensions/qa-lab/src/suite.summary-json.test.ts
index 632e448933a9..dead9540c7d8 100644
--- a/extensions/qa-lab/src/suite.summary-json.test.ts
+++ b/extensions/qa-lab/src/suite.summary-json.test.ts
@@ -67,12 +67,12 @@ describe("buildQaSuiteSummaryJson", () => {
   it("records an Anthropic baseline lane cleanly for parity runs", () => {
     const json = buildQaSuiteSummaryJson({
       ...baseParams,
-      primaryModel: "anthropic/claude-opus-4-6",
+      primaryModel: "anthropic/claude-opus-4-7",
       alternateModel: "anthropic/claude-sonnet-4-6",
     });
-    expect(json.run.primaryModel).toBe("anthropic/claude-opus-4-6");
+    expect(json.run.primaryModel).toBe("anthropic/claude-opus-4-7");
     expect(json.run.primaryProvider).toBe("anthropic");
-    expect(json.run.primaryModelName).toBe("claude-opus-4-6");
+    expect(json.run.primaryModelName).toBe("claude-opus-4-7");
     expect(json.run.alternateModel).toBe("anthropic/claude-sonnet-4-6");
     expect(json.run.alternateProvider).toBe("anthropic");
     expect(json.run.alternateModelName).toBe("claude-sonnet-4-6");
diff --git a/qa/scenarios/models/anthropic-opus-api-key-smoke.md b/qa/scenarios/models/anthropic-opus-api-key-smoke.md
index 21b1f993171c..d13b97c921fe 100644
--- a/qa/scenarios/models/anthropic-opus-api-key-smoke.md
+++ b/qa/scenarios/models/anthropic-opus-api-key-smoke.md
@@ -12,7 +12,7 @@ coverage:
 objective: Verify the regular Anthropic Opus lane can complete a quick chat turn using API-key auth.
 successCriteria:
   - A live-frontier run fails fast unless the selected primary provider is anthropic.
-  - The selected primary model is Anthropic Opus 4.6.
+  - The selected primary model is Anthropic Opus 4.7.
   - The QA gateway worker has an Anthropic API key available through environment auth.
   - The agent replies through the regular Anthropic provider.
 docsRefs:
@@ -24,10 +24,10 @@ codeRefs:
   - extensions/qa-lab/src/suite.ts
 execution:
   kind: flow
-  summary: Run with `pnpm openclaw qa suite --provider-mode live-frontier --model anthropic/claude-opus-4-6 --alt-model anthropic/claude-opus-4-6 --scenario anthropic-opus-api-key-smoke`.
+  summary: Run with `pnpm openclaw qa suite --provider-mode live-frontier --model anthropic/claude-opus-4-7 --alt-model anthropic/claude-opus-4-7 --scenario anthropic-opus-api-key-smoke`.
   config:
     requiredProvider: anthropic
-    requiredModel: claude-opus-4-6
+    requiredModel: claude-opus-4-7
     chatPrompt: "Anthropic Opus API key smoke. Reply exactly: ANTHROPIC-OPUS-API-KEY-OK"
     chatExpected: ANTHROPIC-OPUS-API-KEY-OK
 ```
diff --git a/qa/scenarios/models/anthropic-opus-setup-token-smoke.md b/qa/scenarios/models/anthropic-opus-setup-token-smoke.md
index 231403d1e7c4..a67997ef4e6f 100644
--- a/qa/scenarios/models/anthropic-opus-setup-token-smoke.md
+++ b/qa/scenarios/models/anthropic-opus-setup-token-smoke.md
@@ -12,7 +12,7 @@ coverage:
 objective: Verify the regular Anthropic Opus lane can complete a quick chat turn using setup-token auth.
 successCriteria:
   - A live-frontier run fails fast unless the selected primary provider is anthropic.
-  - The selected primary model is Anthropic Opus 4.6.
+  - The selected primary model is Anthropic Opus 4.7.
   - The QA gateway worker stages a token auth profile in the isolated agent store.
   - The agent replies through the regular Anthropic provider.
 docsRefs:
@@ -24,10 +24,10 @@ codeRefs:
   - extensions/qa-lab/src/suite.ts
 execution:
   kind: flow
-  summary: Run with `OPENCLAW_LIVE_SETUP_TOKEN_VALUE=<setup-token> pnpm openclaw qa suite --provider-mode live-frontier --model anthropic/claude-opus-4-6 --alt-model anthropic/claude-opus-4-6 --scenario anthropic-opus-setup-token-smoke`.
+  summary: Run with `OPENCLAW_LIVE_SETUP_TOKEN_VALUE=<setup-token> pnpm openclaw qa suite --provider-mode live-frontier --model anthropic/claude-opus-4-7 --alt-model anthropic/claude-opus-4-7 --scenario anthropic-opus-setup-token-smoke`.
   config:
     requiredProvider: anthropic
-    requiredModel: claude-opus-4-6
+    requiredModel: claude-opus-4-7
     profileId: "anthropic:qa-setup-token"
     chatPrompt: "Anthropic Opus setup-token smoke. Reply exactly: ANTHROPIC-OPUS-SETUP-TOKEN-OK"
     chatExpected: ANTHROPIC-OPUS-SETUP-TOKEN-OK
diff --git a/scripts/openclaw-cross-os-release-checks.ts b/scripts/openclaw-cross-os-release-checks.ts
index e1f3c7056d54..086afd3f14a2 100644
--- a/scripts/openclaw-cross-os-release-checks.ts
+++ b/scripts/openclaw-cross-os-release-checks.ts
@@ -46,7 +46,7 @@ const providerConfig = {
     extensionId: "openai",
     secretEnv: "OPENAI_API_KEY",
     authChoice: "openai-api-key",
-    model: "openai/gpt-5.4",
+    model: "openai/gpt-5.5",
     baseUrl: "https://api.openai.com/v1",
     timeoutSeconds: CROSS_OS_AGENT_TURN_TIMEOUT_SECONDS,
   },
diff --git a/src/infra/run-node.test.ts b/src/infra/run-node.test.ts
index 30211b0e0cf1..8c8e553b0523 100644
--- a/src/infra/run-node.test.ts
+++ b/src/infra/run-node.test.ts
@@ -1047,9 +1047,9 @@ describe("run-node script", () => {
           "qa",
           "parity-report",
           "--candidate-summary",
-          ".artifacts/qa-e2e/gpt54/qa-suite-summary.json",
+          ".artifacts/qa-e2e/openai-candidate/qa-suite-summary.json",
           "--baseline-summary",
-          ".artifacts/qa-e2e/opus46/qa-suite-summary.json",
+          ".artifacts/qa-e2e/anthropic-baseline/qa-suite-summary.json",
         ],
         env: {
           ...process.env,
@@ -1068,9 +1068,9 @@ describe("run-node script", () => {
           "tsx",
           path.join(tmp, "scripts", "qa-parity-report.ts"),
           "--candidate-summary",
-          ".artifacts/qa-e2e/gpt54/qa-suite-summary.json",
+          ".artifacts/qa-e2e/openai-candidate/qa-suite-summary.json",
           "--baseline-summary",
-          ".artifacts/qa-e2e/opus46/qa-suite-summary.json",
+          ".artifacts/qa-e2e/anthropic-baseline/qa-suite-summary.json",
         ],
       ]);
     });
diff --git a/test/helpers/auto-reply/trigger-handling-test-harness.ts b/test/helpers/auto-reply/trigger-handling-test-harness.ts
index 52112ff96a45..3b1bf9bf7b0f 100644
--- a/test/helpers/auto-reply/trigger-handling-test-harness.ts
+++ b/test/helpers/auto-reply/trigger-handling-test-harness.ts
@@ -100,17 +100,17 @@ const modelCatalogMocks = getSharedMocks("openclaw.trigger-handling.model-catalo
   loadModelCatalog: vi.fn().mockResolvedValue([
     {
       provider: "anthropic",
-      id: "claude-opus-4-6",
-      name: "Claude Opus 4.5",
+      id: "claude-opus-4-7",
+      name: "Claude Opus 4.7",
       contextWindow: 200000,
     },
     {
       provider: "openrouter",
-      id: "anthropic/claude-opus-4-6",
-      name: "Claude Opus 4.5 (OpenRouter)",
+      id: "anthropic/claude-opus-4-7",
+      name: "Claude Opus 4.7 (OpenRouter)",
       contextWindow: 200000,
     },
-    { provider: "openai", id: "gpt-5.4-mini", name: "GPT-5.4 mini" },
+    { provider: "openai", id: "gpt-5.5-mini", name: "GPT-5.5 mini" },
     { provider: "openai", id: "gpt-5.5", name: "GPT-5.5" },
     { provider: "openai-codex", id: "gpt-5.5", name: "GPT-5.5 (Codex)" },
     { provider: "minimax", id: "MiniMax-M2.7", name: "MiniMax M2.7" },
@@ -284,7 +284,7 @@ export function makeCfg(home: string): OpenClawConfig {
   return withFastReplyConfig({
     agents: {
       defaults: {
-        model: { primary: "anthropic/claude-opus-4-6" },
+        model: { primary: "anthropic/claude-opus-4-7" },
         workspace: join(home, "openclaw"),
         // Test harness: avoid 1s coalescer idle sleeps that dominate trigger suites.
         blockStreamingCoalesce: { idleMs: 1 },
diff --git a/test/scripts/openclaw-cross-os-release-checks.test.ts b/test/scripts/openclaw-cross-os-release-checks.test.ts
index 3294afb54d44..2a1f7d949d2d 100644
--- a/test/scripts/openclaw-cross-os-release-checks.test.ts
+++ b/test/scripts/openclaw-cross-os-release-checks.test.ts
@@ -205,10 +205,10 @@ describe("scripts/openclaw-cross-os-release-checks", () => {
         OPENCLAW_CROSS_OS_MODEL: "openai/gpt-5.4-nano",
       })?.model,
     ).toBe("openai/gpt-5.4-nano");
-    expect(resolveProviderConfig("openai", {})?.model).toBe("openai/gpt-5.4");
+    expect(resolveProviderConfig("openai", {})?.model).toBe("openai/gpt-5.5");
   });
 
-  it("keeps release cross-OS OpenAI smoke on GPT-5.4", () => {
+  it("keeps release cross-OS OpenAI smoke on GPT-5.5", () => {
     const workflow = readFileSync(
       ".github/workflows/openclaw-cross-os-release-checks-reusable.yml",
       "utf8",
@@ -216,9 +216,9 @@ describe("scripts/openclaw-cross-os-release-checks", () => {
     const releaseChecks = readFileSync(".github/workflows/openclaw-release-checks.yml", "utf8");
 
     expect(workflow).toContain(
-      "OPENCLAW_CROSS_OS_OPENAI_MODEL: ${{ inputs.openai_model || vars.OPENCLAW_CROSS_OS_OPENAI_MODEL || 'openai/gpt-5.4' }}",
+      "OPENCLAW_CROSS_OS_OPENAI_MODEL: ${{ inputs.openai_model || vars.OPENCLAW_CROSS_OS_OPENAI_MODEL || 'openai/gpt-5.5' }}",
     );
-    expect(releaseChecks).toContain("openai_model: openai/gpt-5.4");
+    expect(releaseChecks).toContain("openai_model: openai/gpt-5.5");
   });
 
   it("keeps release smoke plugin allowlists focused on agent-turn essentials", () => {
diff --git a/test/scripts/package-acceptance-workflow.test.ts b/test/scripts/package-acceptance-workflow.test.ts
index fe5f3b017cec..d14a0132935e 100644
--- a/test/scripts/package-acceptance-workflow.test.ts
+++ b/test/scripts/package-acceptance-workflow.test.ts
@@ -413,7 +413,7 @@ describe("package artifact reuse", () => {
     expect(workflow).toContain("suite_id: native-live-src-gateway-profiles-anthropic-opus");
     expect(workflow).toContain("suite_id: native-live-src-gateway-profiles-anthropic-sonnet-haiku");
     expect(workflow).toContain("suite_group: native-live-src-gateway-profiles-anthropic");
-    expect(workflow).toContain("anthropic/claude-opus-4-7,anthropic/claude-opus-4-6");
+    expect(workflow).toContain("OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-7");
     expect(workflow).toContain("anthropic/claude-sonnet-4-6,anthropic/claude-haiku-4-5");
     expect(workflow).toMatch(
       /suite_id: native-live-src-gateway-profiles-fireworks[\s\S]*?advisory: true/u,