diff --git a/extensions/codex/src/app-server/run-attempt.test.ts b/extensions/codex/src/app-server/run-attempt.test.ts
index 881c93d215df..373dc6ac3bd4 100644
--- a/extensions/codex/src/app-server/run-attempt.test.ts
+++ b/extensions/codex/src/app-server/run-attempt.test.ts
@@ -2844,11 +2844,17 @@ describe("runCodexAppServerAttempt", () => {
     expect(report?.provider).toBe("codex");
     expect(report?.model).toBe("gpt-5.4-codex");
     expect(report?.systemPrompt.chars).toBeGreaterThan(0);
+    expect(report?.systemPrompt.hash).toMatch(/^[a-f0-9]{64}$/u);
+    expect(report?.skills.hash).toMatch(/^[a-f0-9]{64}$/u);
 
     const message = report?.tools.entries.find((tool) => tool.name === "message");
     const webSearch = report?.tools.entries.find((tool) => tool.name === "web_search");
     expect(message?.schemaChars).toBeGreaterThan(0);
+    expect(message?.summaryHash).toMatch(/^[a-f0-9]{64}$/u);
+    expect(message?.schemaHash).toMatch(/^[a-f0-9]{64}$/u);
     expect(webSearch?.schemaChars).toBe(0);
+    expect(webSearch?.summaryHash).toMatch(/^[a-f0-9]{64}$/u);
+    expect(webSearch?.schemaHash).toMatch(/^[a-f0-9]{64}$/u);
     expect(report?.tools.schemaChars).toBe(message?.schemaChars);
   });
 
@@ -6574,7 +6580,8 @@ describe("runCodexAppServerAttempt", () => {
       input?: Array<{ text?: string }>;
     };
     expect(turnStartParams.input?.[0]?.text).toBe(exactCommand);
-    expect(result.systemPromptReport?.skills).toEqual({ promptChars: 0, entries: [] });
+    expect(result.systemPromptReport?.skills).toMatchObject({ promptChars: 0, entries: [] });
+    expect(result.systemPromptReport?.skills.hash).toMatch(/^[a-f0-9]{64}$/u);
   });
 
   it("fires llm_input, llm_output, and agent_end hooks for codex turns", async () => {
diff --git a/extensions/codex/src/app-server/run-attempt.ts b/extensions/codex/src/app-server/run-attempt.ts
index 968abe24fb66..8a782d736d5e 100644
--- a/extensions/codex/src/app-server/run-attempt.ts
+++ b/extensions/codex/src/app-server/run-attempt.ts
@@ -5095,6 +5095,7 @@ function buildCodexSystemPromptReport(params: {
       chars: params.developerInstructions.length,
       projectContextChars: 0,
       nonProjectContextChars: params.developerInstructions.length,
+      hash: sha256Text(params.developerInstructions),
     },
     injectedWorkspaceFiles: buildCodexBootstrapInjectionStats({
       bootstrapFiles: params.workspaceBootstrapContext.bootstrapFiles,
@@ -5106,6 +5107,7 @@ function buildCodexSystemPromptReport(params: {
     }),
     skills: {
       promptChars: skillsPrompt.length,
+      hash: sha256Text(skillsPrompt),
       entries: buildCodexSkillReportEntries(skillsPrompt),
     },
     tools: {
@@ -5137,20 +5139,23 @@ function buildCodexToolReportEntry(tool: CodexDynamicToolSpec): CodexToolReportE
     return {
       name: tool.name,
       summaryChars: summary.length,
+      summaryHash: sha256Text(summary),
       schemaChars: 0,
+      schemaHash: stableJsonHash(null),
       propertiesCount: null,
     };
   }
   return {
     name: tool.name,
     summaryChars: summary.length,
+    summaryHash: sha256Text(summary),
     ...buildCodexToolSchemaStats(tool.inputSchema),
   };
 }
 
 function buildCodexToolSchemaStats(
   schema: JsonValue,
-): Pick<CodexToolReportEntry, "schemaChars" | "propertiesCount"> {
+): Pick<CodexToolReportEntry, "schemaChars" | "schemaHash" | "propertiesCount"> {
   const schemaChars = (() => {
     try {
       return JSON.stringify(schema).length;
@@ -5162,10 +5167,34 @@ function buildCodexToolSchemaStats(
     isJsonObject(schema) && isJsonObject(schema.properties) ? schema.properties : null;
   return {
     schemaChars,
+    schemaHash: stableJsonHash(schema),
     propertiesCount: properties ? Object.keys(properties).length : null,
   };
 }
 
+function sha256Text(value: string): string {
+  return createHash("sha256").update(value).digest("hex");
+}
+
+function normalizeForStableHash(value: unknown): unknown {
+  if (Array.isArray(value)) {
+    return value.map((entry) => normalizeForStableHash(entry));
+  }
+  if (value && typeof value === "object") {
+    const record = value as Record<string, unknown>;
+    return Object.fromEntries(
+      Object.keys(record)
+        .toSorted((left, right) => left.localeCompare(right))
+        .map((key) => [key, normalizeForStableHash(record[key])]),
+    );
+  }
+  return value;
+}
+
+function stableJsonHash(value: JsonValue): string {
+  return sha256Text(JSON.stringify(normalizeForStableHash(value)) ?? "null");
+}
+
 function buildCodexBootstrapInjectionStats(params: {
   bootstrapFiles: CodexBootstrapFile[];
   injectedFiles: EmbeddedContextFile[];
diff --git a/extensions/qa-lab/confidence-profiles/codex-100.json b/extensions/qa-lab/confidence-profiles/codex-100.json
new file mode 100644
index 000000000000..05053ad13e70
--- /dev/null
+++ b/extensions/qa-lab/confidence-profiles/codex-100.json
@@ -0,0 +1,168 @@
+{
+  "version": 1,
+  "profile": "codex-100",
+  "lanes": [
+    {
+      "id": "tool-defaults-direct",
+      "title": "Tool-defaults direct runtime parity",
+      "kind": "qa-suite-summary",
+      "artifact": "tool-defaults-direct/qa-suite-summary.json",
+      "required": true,
+      "productImpact": "P2",
+      "qaImpact": "P0",
+      "issue": "https://github.com/openclaw/openclaw/issues/80319",
+      "ownerAction": "Fix product or harness before claiming the tool-defaults gate is trusted.",
+      "labels": ["qa-lab", "runtime-parity", "codex"]
+    },
+    {
+      "id": "openclaw-dynamic-tools-direct",
+      "title": "OpenClaw dynamic integration tools direct runtime parity",
+      "kind": "qa-suite-summary",
+      "artifact": "openclaw-dynamic-tools-direct/qa-suite-summary.json",
+      "required": true,
+      "productImpact": "P1",
+      "qaImpact": "P0",
+      "issue": "https://github.com/openclaw/openclaw/issues/80319",
+      "ownerAction": "Investigate any hard failure as an OpenClaw dynamic integration or QA loading regression.",
+      "labels": ["qa-lab", "runtime-parity", "openclaw-dynamic-tools"]
+    },
+    {
+      "id": "tool-defaults-searchable",
+      "title": "Tool-defaults searchable runtime parity",
+      "kind": "qa-suite-summary",
+      "artifact": "tool-defaults-searchable/qa-suite-summary.json",
+      "required": true,
+      "failureVerdict": "mock-limitation",
+      "skipBackfillLane": "openclaw-dynamic-tools-searchable-live",
+      "productImpact": "P4",
+      "qaImpact": "P2",
+      "issue": "https://github.com/openclaw/openclaw/issues/80319",
+      "ownerAction": "Keep as report-only until searchable/deferred tool modeling has no mock-only ambiguity.",
+      "labels": ["qa-lab", "runtime-parity", "searchable-tools"]
+    },
+    {
+      "id": "first-hour-20-direct",
+      "title": "First-hour 20-turn direct runtime parity",
+      "kind": "qa-suite-summary",
+      "artifact": "first-hour-20-direct/qa-suite-summary.json",
+      "required": true,
+      "skipBackfillLane": "codex-native-live",
+      "productImpact": "P1",
+      "qaImpact": "P0",
+      "ownerAction": "Triage row-by-row; do not file product bugs unless live/native proof reproduces.",
+      "labels": ["qa-lab", "runtime-parity", "first-hour"]
+    },
+    {
+      "id": "mock-token-efficiency",
+      "title": "Mock assistant-message token efficiency estimate",
+      "kind": "token-efficiency-summary",
+      "artifact": "first-hour-20-direct-report/qa-runtime-token-efficiency-summary.json",
+      "required": true,
+      "expectedTokenUsageSource": "mock-estimate",
+      "productImpact": "P4",
+      "qaImpact": "P1",
+      "ownerAction": "Fix labeling before trusting token-efficiency comparisons.",
+      "labels": ["qa-lab", "runtime-parity", "token-efficiency"]
+    },
+    {
+      "id": "fault-injection-mock",
+      "title": "Mock fault-injection runtime parity",
+      "kind": "qa-suite-summary",
+      "artifact": "fault-injection-mock/qa-suite-summary.json",
+      "required": true,
+      "skipBackfillLane": "codex-native-live",
+      "productImpact": "P2",
+      "qaImpact": "P0",
+      "ownerAction": "Treat failures as retry/recovery regressions unless evidence shows fixture drift.",
+      "labels": ["qa-lab", "runtime-parity", "fault-injection"]
+    },
+    {
+      "id": "jsonl-expanded",
+      "title": "Expanded curated JSONL replay",
+      "kind": "jsonl-replay-summary",
+      "artifact": "jsonl-expanded/qa-jsonl-replay-summary.json",
+      "required": true,
+      "productImpact": "P2",
+      "qaImpact": "P0",
+      "ownerAction": "Inspect first drift turn and transcript class before filing any product issue.",
+      "labels": ["qa-lab", "runtime-parity", "jsonl-replay"]
+    },
+    {
+      "id": "confidence-self-test",
+      "title": "Seeded confidence negative controls",
+      "kind": "self-test-summary",
+      "artifact": "confidence-self-test/qa-confidence-self-test-summary.json",
+      "required": true,
+      "productImpact": "P4",
+      "qaImpact": "P0",
+      "ownerAction": "Fix the harness before trusting any green parity result.",
+      "labels": ["qa-lab", "confidence-gate", "negative-controls"]
+    },
+    {
+      "id": "codex-native-live",
+      "title": "Codex-native live workspace capability proof",
+      "kind": "qa-suite-summary",
+      "artifact": "codex-native-live/qa-suite-summary.json",
+      "required": true,
+      "missingVerdict": "environment-blocked",
+      "missingReason": "Live/OAuth runner or OpenAI credentials were unavailable for this proof bundle.",
+      "productImpact": "P1",
+      "qaImpact": "P1",
+      "ownerAction": "Run with live-frontier OAuth before using this lane as product proof.",
+      "labels": ["qa-lab", "runtime-parity", "live-proof"]
+    },
+    {
+      "id": "first-hour-live",
+      "title": "Live first-hour capability proof",
+      "kind": "qa-suite-summary",
+      "artifact": "first-hour-live/qa-suite-summary.json",
+      "required": true,
+      "missingVerdict": "environment-blocked",
+      "missingReason": "Live/OAuth runner or OpenAI credentials were unavailable for this proof bundle.",
+      "productImpact": "P1",
+      "qaImpact": "P1",
+      "ownerAction": "Run with live-frontier OAuth before claiming live first-hour coverage.",
+      "labels": ["qa-lab", "runtime-parity", "live-proof"]
+    },
+    {
+      "id": "openclaw-dynamic-tools-searchable-live",
+      "title": "Live OpenClaw dynamic tools searchable proof",
+      "kind": "qa-suite-summary",
+      "artifact": "openclaw-dynamic-tools-searchable-live/qa-suite-summary.json",
+      "required": true,
+      "missingVerdict": "environment-blocked",
+      "missingReason": "Live/OAuth runner or OpenAI credentials were unavailable for this proof bundle.",
+      "productImpact": "P1",
+      "qaImpact": "P1",
+      "ownerAction": "Run with live-frontier OAuth before claiming production-shaped searchable OpenClaw dynamic tool coverage.",
+      "labels": ["qa-lab", "runtime-parity", "searchable-tools", "live-proof"]
+    },
+    {
+      "id": "live-token-efficiency",
+      "title": "Live assistant-message token efficiency",
+      "kind": "token-efficiency-summary",
+      "artifact": "live-token-efficiency/qa-runtime-token-efficiency-summary.json",
+      "required": true,
+      "expectedTokenUsageSource": "live-usage",
+      "missingVerdict": "environment-blocked",
+      "missingReason": "Live/OAuth runner or OpenAI credentials were unavailable for this proof bundle.",
+      "productImpact": "P3",
+      "qaImpact": "P1",
+      "ownerAction": "Run a live-frontier runtime parity summary and regenerate token efficiency.",
+      "labels": ["qa-lab", "runtime-parity", "token-efficiency"]
+    },
+    {
+      "id": "soak-100",
+      "title": "Optional 100-turn soak",
+      "kind": "qa-suite-summary",
+      "artifact": "soak-100/qa-suite-summary.json",
+      "required": true,
+      "missingVerdict": "environment-blocked",
+      "missingReason": "Scheduled/Testbox soak runner did not upload artifacts for this proof bundle.",
+      "productImpact": "P3",
+      "qaImpact": "P2",
+      "ownerAction": "Run remotely with a long timeout or record the runner budget blocker.",
+      "labels": ["qa-lab", "runtime-parity", "soak"]
+    }
+  ]
+}
diff --git a/extensions/qa-lab/src/cli.runtime.ts b/extensions/qa-lab/src/cli.runtime.ts
index b1ed377da6ae..db6bc12f3ac9 100644
--- a/extensions/qa-lab/src/cli.runtime.ts
+++ b/extensions/qa-lab/src/cli.runtime.ts
@@ -13,6 +13,12 @@ import {
 import { resolveQaParityPackScenarioIds } from "./agentic-parity.js";
 import { runQaCharacterEval, type QaCharacterModelOptions } from "./character-eval.js";
 import { resolveRepoRelativeOutputDir } from "./cli-paths.js";
+import {
+  buildQaConfidenceReport,
+  readQaConfidenceManifestFile,
+  renderQaConfidenceMarkdownReport,
+  writeQaConfidenceSelfTestArtifacts,
+} from "./confidence-report.js";
 import {
   buildQaCoverageInventory,
   findQaScenarioMatches,
@@ -786,6 +792,60 @@ export async function runQaParityReportCommand(opts: {
   }
 }
 
+export async function runQaConfidenceReportCommand(opts: {
+  repoRoot?: string;
+  manifest: string;
+  artifactRoot?: string;
+  outputDir?: string;
+  strictZeroUnknowns?: boolean;
+  strictGlobalPass?: boolean;
+}) {
+  const repoRoot = path.resolve(opts.repoRoot ?? process.cwd());
+  const manifestPath = path.resolve(repoRoot, opts.manifest);
+  const artifactRoot = path.resolve(repoRoot, opts.artifactRoot ?? ".");
+  const outputDir =
+    resolveRepoRelativeOutputDir(repoRoot, opts.outputDir) ??
+    path.join(repoRoot, ".artifacts", "qa-e2e", `confidence-${Date.now().toString(36)}`);
+  await fs.mkdir(outputDir, { recursive: true });
+  const manifest = await readQaConfidenceManifestFile(manifestPath);
+  const reportPayload = await buildQaConfidenceReport({
+    manifest,
+    artifactRoot,
+    strictZeroUnknowns: opts.strictZeroUnknowns === true,
+    strictGlobalPass: opts.strictGlobalPass === true,
+  });
+  const report = renderQaConfidenceMarkdownReport(reportPayload);
+  const reportPath = path.join(outputDir, "qa-confidence-report.md");
+  const summaryPath = path.join(outputDir, "qa-confidence-summary.json");
+  await fs.writeFile(reportPath, report, "utf8");
+  await fs.writeFile(summaryPath, `${JSON.stringify(reportPayload, null, 2)}\n`, "utf8");
+  process.stdout.write(`QA confidence report: ${reportPath}\n`);
+  process.stdout.write(`QA confidence summary: ${summaryPath}\n`);
+  process.stdout.write(`QA confidence verdict: ${reportPayload.pass ? "pass" : "fail"}\n`);
+  if (!reportPayload.pass) {
+    process.exitCode = 1;
+  }
+}
+
+export async function runQaConfidenceSelfTestCommand(opts: {
+  repoRoot?: string;
+  outputDir?: string;
+}) {
+  const repoRoot = path.resolve(opts.repoRoot ?? process.cwd());
+  const outputDir =
+    resolveRepoRelativeOutputDir(repoRoot, opts.outputDir) ??
+    path.join(repoRoot, ".artifacts", "qa-e2e", `confidence-self-test-${Date.now().toString(36)}`);
+  const result = await writeQaConfidenceSelfTestArtifacts({ outputDir });
+  process.stdout.write(`QA confidence self-test report: ${result.reportPath}\n`);
+  process.stdout.write(`QA confidence self-test summary: ${result.summaryPath}\n`);
+  process.stdout.write(
+    `QA confidence self-test verdict: ${result.summary.pass ? "pass" : "fail"}\n`,
+  );
+  if (!result.summary.pass) {
+    process.exitCode = 1;
+  }
+}
+
 export async function runQaCoverageReportCommand(opts: {
   repoRoot?: string;
   output?: string;
diff --git a/extensions/qa-lab/src/cli.ts b/extensions/qa-lab/src/cli.ts
index bc156636bfd9..d34c5460831d 100644
--- a/extensions/qa-lab/src/cli.ts
+++ b/extensions/qa-lab/src/cli.ts
@@ -72,6 +72,23 @@ async function runQaParityReport(opts: {
   await runtime.runQaParityReportCommand(opts);
 }
 
+async function runQaConfidenceReport(opts: {
+  repoRoot?: string;
+  manifest: string;
+  artifactRoot?: string;
+  outputDir?: string;
+  strictZeroUnknowns?: boolean;
+  strictGlobalPass?: boolean;
+}) {
+  const runtime = await loadQaLabCliRuntime();
+  await runtime.runQaConfidenceReportCommand(opts);
+}
+
+async function runQaConfidenceSelfTest(opts: { repoRoot?: string; outputDir?: string }) {
+  const runtime = await loadQaLabCliRuntime();
+  await runtime.runQaConfidenceSelfTestCommand(opts);
+}
+
 async function runQaCoverageReport(opts: {
   repoRoot?: string;
   output?: string;
@@ -424,6 +441,43 @@ export function registerQaLabCli(program: Command) {
       },
     );
 
+  qa.command("confidence-report")
+    .description("Classify QA proof artifacts into a zero-unknown confidence report")
+    .requiredOption("--manifest <path>", "Confidence profile manifest JSON")
+    .option("--repo-root <path>", "Repository root to target when running from a neutral cwd")
+    .option("--artifact-root <path>", "Root directory for relative artifact paths", ".")
+    .option("--output-dir <path>", "Artifact directory for the confidence report")
+    .option(
+      "--strict-zero-unknowns",
+      "Fail unless every lane passes or has an explicit non-unknown verdict",
+      false,
+    )
+    .option(
+      "--strict-global-pass",
+      "Fail unless every lane passes with no blocked, missing, unknown, classified-fail, or unbackfilled skipped rows",
+      false,
+    )
+    .action(
+      async (opts: {
+        repoRoot?: string;
+        manifest: string;
+        artifactRoot?: string;
+        outputDir?: string;
+        strictZeroUnknowns?: boolean;
+        strictGlobalPass?: boolean;
+      }) => {
+        await runQaConfidenceReport(opts);
+      },
+    );
+
+  qa.command("confidence-self-test")
+    .description("Write seeded negative-control canaries proving the confidence gate detects drift")
+    .option("--repo-root <path>", "Repository root to target when running from a neutral cwd")
+    .option("--output-dir <path>", "Artifact directory for the confidence self-test")
+    .action(async (opts: { repoRoot?: string; outputDir?: string }) => {
+      await runQaConfidenceSelfTest(opts);
+    });
+
   qa.command("jsonl-replay")
     .description("Replay curated JSONL transcripts through the runtime parity replay harness")
     .option("--repo-root <path>", "Repository root to target when running from a neutral cwd")
diff --git a/extensions/qa-lab/src/confidence-report.test.ts b/extensions/qa-lab/src/confidence-report.test.ts
new file mode 100644
index 000000000000..47cbcdadc9f5
--- /dev/null
+++ b/extensions/qa-lab/src/confidence-report.test.ts
@@ -0,0 +1,881 @@
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import {
+  buildQaConfidenceReport,
+  buildQaConfidenceSelfTestSummary,
+  renderQaConfidenceMarkdownReport,
+  writeQaConfidenceSelfTestArtifacts,
+  type QaConfidenceManifest,
+} from "./confidence-report.js";
+
+describe("qa confidence report", () => {
+  let tempRoot: string;
+
+  beforeEach(async () => {
+    tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-confidence-"));
+  });
+
+  afterEach(async () => {
+    await fs.rm(tempRoot, { recursive: true, force: true });
+  });
+
+  async function writeJson(relativePath: string, payload: unknown) {
+    const filePath = path.join(tempRoot, relativePath);
+    await fs.mkdir(path.dirname(filePath), { recursive: true });
+    await fs.writeFile(filePath, `${JSON.stringify(payload, null, 2)}\n`, "utf8");
+    return filePath;
+  }
+
+  it("passes strict zero-unknowns when every lane passes or has an allowed blocked verdict", async () => {
+    await writeJson("tool-defaults/qa-suite-summary.json", {
+      counts: { total: 20, passed: 18, skipped: 2, failed: 0 },
+      scenarios: [],
+    });
+    await writeJson("token/qa-runtime-token-efficiency-summary.json", {
+      status: "estimated",
+      pass: true,
+      rows: [{ scenarioId: "one", usageSource: "mock-estimate" }],
+    });
+
+    const manifest: QaConfidenceManifest = {
+      version: 1,
+      profile: "codex-100",
+      lanes: [
+        {
+          id: "tool-defaults-direct",
+          title: "Tool defaults direct",
+          kind: "qa-suite-summary",
+          artifact: "tool-defaults/qa-suite-summary.json",
+          required: true,
+        },
+        {
+          id: "mock-token-efficiency",
+          title: "Mock token efficiency",
+          kind: "token-efficiency-summary",
+          artifact: "token/qa-runtime-token-efficiency-summary.json",
+          required: true,
+          expectedTokenUsageSource: "mock-estimate",
+        },
+        {
+          id: "live-token-efficiency",
+          title: "Live token efficiency",
+          kind: "token-efficiency-summary",
+          artifact: "live/qa-runtime-token-efficiency-summary.json",
+          required: true,
+          missingVerdict: "environment-blocked",
+          missingReason: "OPENAI OAuth credentials are not available in this runner.",
+        },
+      ],
+    };
+
+    const report = await buildQaConfidenceReport({
+      manifest,
+      artifactRoot: tempRoot,
+      strictZeroUnknowns: true,
+      generatedAt: "2026-05-12T00:00:00.000Z",
+    });
+
+    expect(report.pass).toBe(true);
+    expect(report.globalPass).toBe(false);
+    expect(report.counts).toMatchObject({ passed: 2, blocked: 1, unknown: 0, failed: 0 });
+    expect(report.lanes.map((lane) => lane.verdict)).toEqual([
+      "pass",
+      "pass",
+      "environment-blocked",
+    ]);
+    expect(report.lanes[0]?.artifactPath).toBe("tool-defaults/qa-suite-summary.json");
+    expect(report.lanes[0]?.artifactPath).not.toContain(tempRoot);
+    expect(report.lanes[0]?.details).toContain("counts.skipped=2");
+    expect(renderQaConfidenceMarkdownReport(report)).toContain("Zero unknowns: yes");
+    expect(renderQaConfidenceMarkdownReport(report)).toContain("Global pass: no");
+  });
+
+  it("does not let optional lanes block strict gates", async () => {
+    await writeJson("required/qa-suite-summary.json", {
+      counts: { total: 1, passed: 1, skipped: 0, failed: 0 },
+      scenarios: [],
+    });
+
+    const report = await buildQaConfidenceReport({
+      manifest: {
+        version: 1,
+        profile: "codex-100",
+        lanes: [
+          {
+            id: "required",
+            title: "Required",
+            kind: "qa-suite-summary",
+            artifact: "required/qa-suite-summary.json",
+            required: true,
+          },
+          {
+            id: "optional-missing",
+            title: "Optional missing",
+            kind: "qa-suite-summary",
+            artifact: "optional/qa-suite-summary.json",
+            required: false,
+          },
+        ],
+      },
+      artifactRoot: tempRoot,
+      strictZeroUnknowns: true,
+      strictGlobalPass: true,
+      generatedAt: "2026-05-13T00:00:00.000Z",
+    });
+
+    expect(report.pass).toBe(true);
+    expect(report.counts).toMatchObject({ total: 1, passed: 1, unknown: 0 });
+    expect(report.failures).toEqual([]);
+    expect(report.lanes[1]).toMatchObject({ id: "optional-missing", status: "missing" });
+  });
+
+  it("fails strict global pass when any lane is blocked, missing, unknown, or classified failed", async () => {
+    await writeJson("classified/qa-suite-summary.json", {
+      counts: { total: 1, passed: 0, skipped: 0, failed: 1 },
+      scenarios: [{ name: "classified", status: "fail" }],
+    });
+    await writeJson("unknown/qa-suite-summary.json", {
+      counts: { total: 1, passed: 0, skipped: 0, failed: 1 },
+      scenarios: [{ name: "unknown", status: "fail" }],
+    });
+
+    const report = await buildQaConfidenceReport({
+      manifest: {
+        version: 1,
+        profile: "codex-100",
+        lanes: [
+          {
+            id: "blocked-live",
+            title: "Blocked live",
+            kind: "qa-suite-summary",
+            artifact: "live/qa-suite-summary.json",
+            required: true,
+            missingVerdict: "environment-blocked",
+            missingReason: "OPENAI_API_KEY missing.",
+          },
+          {
+            id: "missing-soak",
+            title: "Missing soak",
+            kind: "qa-suite-summary",
+            artifact: "soak/qa-suite-summary.json",
+            required: true,
+          },
+          {
+            id: "classified-fixture",
+            title: "Classified fixture",
+            kind: "qa-suite-summary",
+            artifact: "classified/qa-suite-summary.json",
+            required: true,
+            failureVerdict: "fixture-bug",
+          },
+          {
+            id: "unknown-failure",
+            title: "Unknown failure",
+            kind: "qa-suite-summary",
+            artifact: "unknown/qa-suite-summary.json",
+            required: true,
+          },
+        ],
+      },
+      artifactRoot: tempRoot,
+      strictZeroUnknowns: true,
+      strictGlobalPass: true,
+      generatedAt: "2026-05-12T00:00:00.000Z",
+    });
+
+    expect(report.pass).toBe(false);
+    expect(report.zeroUnknowns).toBe(false);
+    expect(report.globalPass).toBe(false);
+    expect(report.counts).toMatchObject({
+      blocked: 1,
+      missing: 1,
+      failed: 1,
+      unknown: 2,
+    });
+    expect(report.failures).toEqual([
+      "blocked-live is blocked: OPENAI_API_KEY missing.",
+      "missing-soak is missing: artifact missing and no missingVerdict was configured",
+      "classified-fixture is classified fixture-bug: qa-suite-summary counts.failed=1 counts.total=1 counts.skipped=0",
+      "unknown-failure is unclassified: qa-suite-summary counts.failed=1 counts.total=1 counts.skipped=0",
+    ]);
+  });
+
+  it("fails strict global pass for skipped suite rows until a backfill lane passes", async () => {
+    await writeJson("report-only/qa-suite-summary.json", {
+      counts: { total: 3, passed: 2, skipped: 1, failed: 0 },
+      scenarios: [],
+    });
+
+    const report = await buildQaConfidenceReport({
+      manifest: {
+        version: 1,
+        profile: "codex-100",
+        lanes: [
+          {
+            id: "report-only",
+            title: "Report-only",
+            kind: "qa-suite-summary",
+            artifact: "report-only/qa-suite-summary.json",
+            required: true,
+          },
+        ],
+      },
+      artifactRoot: tempRoot,
+      strictZeroUnknowns: true,
+      strictGlobalPass: true,
+      generatedAt: "2026-05-12T00:00:00.000Z",
+    });
+
+    expect(report.zeroUnknowns).toBe(true);
+    expect(report.globalPass).toBe(false);
+    expect(report.failures).toEqual([
+      "report-only has 1 skipped row(s) with no passing backfill lane",
+    ]);
+  });
+
+  it("infers skipped suite rows from totals and scenario status", async () => {
+    for (const [artifact, expectedDetail] of [
+      [{ counts: { total: 3, passed: 2, failed: 0 }, scenarios: [] }, "counts.skipped=1"],
+      [
+        {
+          counts: { total: 2, passed: 2, failed: 0 },
+          scenarios: [
+            { name: "passing", status: "pass" },
+            { name: "skipped", status: "skip" },
+          ],
+        },
+        "counts.skipped=1",
+      ],
+    ] as const) {
+      await writeJson("report-only/qa-suite-summary.json", artifact);
+
+      const report = await buildQaConfidenceReport({
+        manifest: {
+          version: 1,
+          profile: "codex-100",
+          lanes: [
+            {
+              id: "report-only",
+              title: "Report-only",
+              kind: "qa-suite-summary",
+              artifact: "report-only/qa-suite-summary.json",
+              required: true,
+            },
+          ],
+        },
+        artifactRoot: tempRoot,
+        strictZeroUnknowns: true,
+        strictGlobalPass: true,
+        generatedAt: "2026-05-12T00:00:00.000Z",
+      });
+
+      expect(report.globalPass).toBe(false);
+      expect(report.failures).toEqual([
+        "report-only has 1 skipped row(s) with no passing backfill lane",
+      ]);
+      expect(report.lanes[0]).toMatchObject({ skippedCount: 1 });
+      expect(report.lanes[0]?.details).toContain(expectedDetail);
+    }
+  });
+
+  it("rejects skipped token reports when a live usage source is required", async () => {
+    await writeJson("live-token/qa-runtime-token-efficiency-summary.json", {
+      status: "skipped",
+      pass: true,
+      rows: [],
+    });
+
+    const report = await buildQaConfidenceReport({
+      manifest: {
+        version: 1,
+        profile: "codex-100",
+        lanes: [
+          {
+            id: "live-token-efficiency",
+            title: "Live token efficiency",
+            kind: "token-efficiency-summary",
+            artifact: "live-token/qa-runtime-token-efficiency-summary.json",
+            required: true,
+            expectedTokenUsageSource: "live-usage",
+          },
+        ],
+      },
+      artifactRoot: tempRoot,
+      strictZeroUnknowns: true,
+      generatedAt: "2026-05-12T00:00:00.000Z",
+    });
+
+    expect(report.pass).toBe(false);
+    expect(report.lanes[0]).toMatchObject({
+      status: "unknown",
+      details: "token summary has no live-usage rows",
+    });
+  });
+
+  it("preserves partial zero-unknown mode for classified failing lanes", async () => {
+    await writeJson("classified/qa-suite-summary.json", {
+      counts: { total: 1, passed: 0, skipped: 0, failed: 1 },
+      scenarios: [{ name: "classified", status: "fail" }],
+    });
+
+    const report = await buildQaConfidenceReport({
+      manifest: {
+        version: 1,
+        profile: "codex-100",
+        lanes: [
+          {
+            id: "classified-fixture",
+            title: "Classified fixture",
+            kind: "qa-suite-summary",
+            artifact: "classified/qa-suite-summary.json",
+            required: true,
+            failureVerdict: "fixture-bug",
+          },
+        ],
+      },
+      artifactRoot: tempRoot,
+      strictZeroUnknowns: true,
+      generatedAt: "2026-05-12T00:00:00.000Z",
+    });
+
+    expect(report.pass).toBe(true);
+    expect(report.zeroUnknowns).toBe(true);
+    expect(report.globalPass).toBe(false);
+    expect(report.counts.failed).toBe(1);
+  });
+
+  it("passes strict global pass when skipped suite rows are backfilled by a passing lane", async () => {
+    await writeJson("report-only/qa-suite-summary.json", {
+      counts: { total: 3, passed: 2, skipped: 1, failed: 0 },
+      scenarios: [],
+    });
+    await writeJson("live-backfill/qa-suite-summary.json", {
+      counts: { total: 1, passed: 1, skipped: 0, failed: 0 },
+      scenarios: [],
+    });
+
+    const report = await buildQaConfidenceReport({
+      manifest: {
+        version: 1,
+        profile: "codex-100",
+        lanes: [
+          {
+            id: "report-only",
+            title: "Report-only",
+            kind: "qa-suite-summary",
+            artifact: "report-only/qa-suite-summary.json",
+            required: true,
+            skipBackfillLane: "live-backfill",
+          },
+          {
+            id: "live-backfill",
+            title: "Live backfill",
+            kind: "qa-suite-summary",
+            artifact: "live-backfill/qa-suite-summary.json",
+            required: true,
+          },
+        ],
+      },
+      artifactRoot: tempRoot,
+      strictZeroUnknowns: true,
+      strictGlobalPass: true,
+      generatedAt: "2026-05-12T00:00:00.000Z",
+    });
+
+    expect(report.pass).toBe(true);
+    expect(report.zeroUnknowns).toBe(true);
+    expect(report.globalPass).toBe(true);
+    expect(report.lanes[0]).toMatchObject({
+      skippedCount: 1,
+      skipBackfillLane: "live-backfill",
+      skipBackfilled: true,
+    });
+  });
+
+  it("classifies environment-blocking gateway sentinels without turning them into unknowns", async () => {
+    await writeJson("live/qa-suite-summary.json", {
+      counts: { total: 1, passed: 1, skipped: 0, failed: 0 },
+      gatewayLogSentinels: [
+        {
+          kind: "live-quota-or-subscription",
+          verdict: "environment-blocked",
+          owner: "environment",
+          productImpact: "P4",
+          qaImpact: "P0",
+          line: 12,
+          text: "OpenAI quota exceeded",
+        },
+      ],
+      scenarios: [],
+    });
+
+    const report = await buildQaConfidenceReport({
+      manifest: {
+        version: 1,
+        profile: "codex-100",
+        lanes: [
+          {
+            id: "first-hour-live",
+            title: "First hour live",
+            kind: "qa-suite-summary",
+            artifact: "live/qa-suite-summary.json",
+            required: true,
+          },
+        ],
+      },
+      artifactRoot: tempRoot,
+      strictZeroUnknowns: true,
+      generatedAt: "2026-05-13T00:00:00.000Z",
+    });
+
+    expect(report.pass).toBe(true);
+    expect(report.globalPass).toBe(false);
+    expect(report.counts).toMatchObject({ blocked: 1, unknown: 0 });
+    expect(report.lanes[0]).toMatchObject({
+      status: "blocked",
+      verdict: "environment-blocked",
+    });
+  });
+
+  it("does not let environment sentinels hide separate suite failures", async () => {
+    await writeJson("live/qa-suite-summary.json", {
+      counts: { total: 2, passed: 1, skipped: 0, failed: 1 },
+      gatewayLogSentinels: [
+        {
+          kind: "live-quota-or-subscription",
+          verdict: "environment-blocked",
+          owner: "environment",
+          line: 12,
+          text: "OpenAI quota exceeded",
+        },
+      ],
+      scenarios: [
+        { name: "quota", status: "pass" },
+        { name: "unrelated-drift", status: "fail" },
+      ],
+    });
+
+    const report = await buildQaConfidenceReport({
+      manifest: {
+        version: 1,
+        profile: "codex-100",
+        lanes: [
+          {
+            id: "first-hour-live",
+            title: "First hour live",
+            kind: "qa-suite-summary",
+            artifact: "live/qa-suite-summary.json",
+            required: true,
+            missingVerdict: "environment-blocked",
+          },
+        ],
+      },
+      artifactRoot: tempRoot,
+      strictZeroUnknowns: true,
+      generatedAt: "2026-05-13T00:00:00.000Z",
+    });
+
+    expect(report.pass).toBe(false);
+    expect(report.counts).toMatchObject({ blocked: 0, unknown: 1 });
+    expect(report.lanes[0]).toMatchObject({ status: "unknown" });
+    expect(report.lanes[0]?.details).toContain("suite also reports failures");
+  });
+
+  it("classifies product and plugin gateway sentinels as known failing lanes", async () => {
+    await writeJson("live/qa-suite-summary.json", {
+      counts: { total: 1, passed: 1, skipped: 0, failed: 0 },
+      scenarios: [
+        {
+          name: "plugin hook health sentinel",
+          status: "pass",
+          steps: [],
+          runtimeParity: {
+            scenarioId: "plugin-hook-health-sentinel",
+            drift: "none",
+            cells: {
+              pi: { sentinelFindings: [] },
+              codex: {
+                sentinelFindings: [
+                  {
+                    kind: "plugin-hook-failure",
+                    verdict: "qa-harness-bug",
+                    owner: "plugin",
+                    productImpact: "P1",
+                    qaImpact: "P0",
+                    line: 4,
+                    text: "before_prompt_build hook failed",
+                  },
+                ],
+              },
+            },
+          },
+        },
+      ],
+    });
+
+    const report = await buildQaConfidenceReport({
+      manifest: {
+        version: 1,
+        profile: "codex-100",
+        lanes: [
+          {
+            id: "first-hour-live",
+            title: "First hour live",
+            kind: "qa-suite-summary",
+            artifact: "live/qa-suite-summary.json",
+            required: true,
+          },
+        ],
+      },
+      artifactRoot: tempRoot,
+      strictZeroUnknowns: true,
+      generatedAt: "2026-05-13T00:00:00.000Z",
+    });
+
+    expect(report.pass).toBe(true);
+    expect(report.globalPass).toBe(false);
+    expect(report.counts).toMatchObject({ failed: 1, unknown: 0 });
+    expect(report.lanes[0]).toMatchObject({
+      status: "fail",
+      verdict: "qa-harness-bug",
+    });
+  });
+
+  it("treats corrupt artifacts as unknown instead of allowed missing lanes", async () => {
+    const artifactPath = path.join(tempRoot, "live", "qa-suite-summary.json");
+    await fs.mkdir(path.dirname(artifactPath), { recursive: true });
+    await fs.writeFile(artifactPath, "{not-json", "utf8");
+
+    const report = await buildQaConfidenceReport({
+      manifest: {
+        version: 1,
+        profile: "codex-100",
+        lanes: [
+          {
+            id: "first-hour-live",
+            title: "First hour live",
+            kind: "qa-suite-summary",
+            artifact: "live/qa-suite-summary.json",
+            required: true,
+            missingVerdict: "environment-blocked",
+          },
+        ],
+      },
+      artifactRoot: tempRoot,
+      strictZeroUnknowns: true,
+      generatedAt: "2026-05-13T00:00:00.000Z",
+    });
+
+    expect(report.pass).toBe(false);
+    expect(report.counts).toMatchObject({ blocked: 0, unknown: 1 });
+    expect(report.lanes[0]).toMatchObject({
+      status: "unknown",
+    });
+    expect(report.lanes[0]?.details).toContain("artifact unreadable");
+  });
+
+  it("treats schema-invalid suite artifacts as unknown", async () => {
+    await writeJson("live/qa-suite-summary.json", {});
+
+    const report = await buildQaConfidenceReport({
+      manifest: {
+        version: 1,
+        profile: "codex-100",
+        lanes: [
+          {
+            id: "first-hour-live",
+            title: "First hour live",
+            kind: "qa-suite-summary",
+            artifact: "live/qa-suite-summary.json",
+            required: true,
+          },
+        ],
+      },
+      artifactRoot: tempRoot,
+      strictZeroUnknowns: true,
+      generatedAt: "2026-05-13T00:00:00.000Z",
+    });
+
+    expect(report.pass).toBe(false);
+    expect(report.counts.unknown).toBe(1);
+    expect(report.lanes[0]?.details).toContain("missing counts.failed and scenarios[]");
+  });
+
+  it("treats empty suite artifacts as unknown", async () => {
+    await writeJson("live/qa-suite-summary.json", {
+      counts: { total: 0, passed: 0, skipped: 0, failed: 0 },
+      scenarios: [],
+    });
+
+    const report = await buildQaConfidenceReport({
+      manifest: {
+        version: 1,
+        profile: "codex-100",
+        lanes: [
+          {
+            id: "first-hour-live",
+            title: "First hour live",
+            kind: "qa-suite-summary",
+            artifact: "live/qa-suite-summary.json",
+            required: true,
+            failureVerdict: "qa-harness-bug",
+          },
+        ],
+      },
+      artifactRoot: tempRoot,
+      strictZeroUnknowns: true,
+      generatedAt: "2026-05-13T00:00:00.000Z",
+    });
+
+    expect(report.pass).toBe(false);
+    expect(report.counts).toMatchObject({ failed: 0, unknown: 1 });
+    expect(report.lanes[0]).toMatchObject({ status: "unknown" });
+    expect(report.lanes[0]?.details).toContain("no executed scenarios");
+  });
+
+  it("treats suite count and scenario mismatches as unknown", async () => {
+    await writeJson("live/qa-suite-summary.json", {
+      counts: { total: 2, passed: 2, skipped: 0, failed: 0 },
+      scenarios: [
+        { name: "passing", status: "pass" },
+        { name: "stale-failure", status: "fail" },
+      ],
+    });
+
+    const report = await buildQaConfidenceReport({
+      manifest: {
+        version: 1,
+        profile: "codex-100",
+        lanes: [
+          {
+            id: "first-hour-live",
+            title: "First hour live",
+            kind: "qa-suite-summary",
+            artifact: "live/qa-suite-summary.json",
+            required: true,
+            failureVerdict: "qa-harness-bug",
+          },
+        ],
+      },
+      artifactRoot: tempRoot,
+      strictZeroUnknowns: true,
+      generatedAt: "2026-05-13T00:00:00.000Z",
+    });
+
+    expect(report.pass).toBe(false);
+    expect(report.counts).toMatchObject({ failed: 0, unknown: 1 });
+    expect(report.lanes[0]).toMatchObject({ status: "unknown" });
+    expect(report.lanes[0]?.details).toContain("count/scenario mismatch");
+  });
+
+  it("requires generic summary lanes to expose an explicit pass signal", async () => {
+    await writeJson("runtime/qa-runtime-parity-summary.json", {});
+
+    const report = await buildQaConfidenceReport({
+      manifest: {
+        version: 1,
+        profile: "codex-100",
+        lanes: [
+          {
+            id: "runtime-parity",
+            title: "Runtime parity",
+            kind: "runtime-parity-summary",
+            artifact: "runtime/qa-runtime-parity-summary.json",
+            required: true,
+          },
+        ],
+      },
+      artifactRoot: tempRoot,
+      strictZeroUnknowns: true,
+      generatedAt: "2026-05-13T00:00:00.000Z",
+    });
+
+    expect(report.pass).toBe(false);
+    expect(report.counts.unknown).toBe(1);
+    expect(report.lanes[0]?.details).toContain("explicit pass signal");
+  });
+
+  it("requires JSONL replay summaries to contain replayed user turns", async () => {
+    for (const [artifact, expectedDetail] of [
+      [{ transcripts: [] }, "no transcripts"],
+      [
+        { transcripts: [{ transcriptPath: "empty.jsonl", userTurnCount: 0, drift: [] }] },
+        "no replayed user turns",
+      ],
+      [
+        { transcripts: [{ transcriptPath: "missing-drift.jsonl", userTurnCount: 1 }] },
+        "missing drift array",
+      ],
+    ] as const) {
+      await writeJson("jsonl/qa-jsonl-replay-summary.json", artifact);
+
+      const report = await buildQaConfidenceReport({
+        manifest: {
+          version: 1,
+          profile: "codex-100",
+          lanes: [
+            {
+              id: "jsonl-expanded",
+              title: "Expanded JSONL replay",
+              kind: "jsonl-replay-summary",
+              artifact: "jsonl/qa-jsonl-replay-summary.json",
+              required: true,
+              failureVerdict: "fixture-bug",
+            },
+          ],
+        },
+        artifactRoot: tempRoot,
+        strictZeroUnknowns: true,
+        generatedAt: "2026-05-13T00:00:00.000Z",
+      });
+
+      expect(report.pass).toBe(false);
+      expect(report.counts).toMatchObject({ failed: 0, unknown: 1 });
+      expect(report.lanes[0]).toMatchObject({ status: "unknown" });
+      expect(report.lanes[0]?.details).toContain(expectedDetail);
+    }
+  });
+
+  it("requires confidence self-test summaries to contain every seeded canary", async () => {
+    for (const [artifact, expectedDetail] of [
+      [{ pass: true, canaries: [] }, "no canaries"],
+      [
+        { pass: true, canaries: [{ id: "prompt-drift", detected: true }] },
+        "missing expected canaries",
+      ],
+    ] as const) {
+      await writeJson("confidence-self-test/qa-confidence-self-test-summary.json", artifact);
+
+      const report = await buildQaConfidenceReport({
+        manifest: {
+          version: 1,
+          profile: "codex-100",
+          lanes: [
+            {
+              id: "confidence-self-test",
+              title: "Confidence self-test",
+              kind: "self-test-summary",
+              artifact: "confidence-self-test/qa-confidence-self-test-summary.json",
+              required: true,
+              failureVerdict: "qa-harness-bug",
+            },
+          ],
+        },
+        artifactRoot: tempRoot,
+        strictZeroUnknowns: true,
+        generatedAt: "2026-05-13T00:00:00.000Z",
+      });
+
+      expect(report.pass).toBe(false);
+      expect(report.counts).toMatchObject({ failed: 0, unknown: 1 });
+      expect(report.lanes[0]).toMatchObject({ status: "unknown" });
+      expect(report.lanes[0]?.details).toContain(expectedDetail);
+    }
+  });
+
+  it("fails strict zero-unknowns for an unclassified failing lane", async () => {
+    await writeJson("first-hour/qa-suite-summary.json", {
+      counts: { total: 18, passed: 17, failed: 1 },
+      scenarios: [{ name: "approval-turn-tool-followthrough", status: "fail", steps: [] }],
+    });
+
+    const report = await buildQaConfidenceReport({
+      manifest: {
+        version: 1,
+        profile: "codex-100",
+        lanes: [
+          {
+            id: "first-hour-20-direct",
+            title: "First-hour 20 direct",
+            kind: "qa-suite-summary",
+            artifact: "first-hour/qa-suite-summary.json",
+            required: true,
+          },
+        ],
+      },
+      artifactRoot: tempRoot,
+      strictZeroUnknowns: true,
+      generatedAt: "2026-05-12T00:00:00.000Z",
+    });
+
+    expect(report.pass).toBe(false);
+    expect(report.counts.unknown).toBe(1);
+    expect(report.failures[0]).toContain("first-hour-20-direct is unclassified");
+  });
+
+  it("accepts a classified failing lane without treating it as unknown", async () => {
+    await writeJson("jsonl/qa-jsonl-replay-summary.json", {
+      transcripts: [
+        {
+          transcriptPath: "curated.jsonl",
+          userTurnCount: 2,
+          drift: ["none", "tool-result-shape"],
+          firstDriftAtTurn: 2,
+        },
+      ],
+    });
+
+    const report = await buildQaConfidenceReport({
+      manifest: {
+        version: 1,
+        profile: "codex-100",
+        lanes: [
+          {
+            id: "jsonl-expanded",
+            title: "Expanded JSONL replay",
+            kind: "jsonl-replay-summary",
+            artifact: "jsonl/qa-jsonl-replay-summary.json",
+            required: true,
+            failureVerdict: "fixture-bug",
+            productImpact: "P4",
+            qaImpact: "P1",
+          },
+        ],
+      },
+      artifactRoot: tempRoot,
+      strictZeroUnknowns: true,
+      generatedAt: "2026-05-12T00:00:00.000Z",
+    });
+
+    expect(report.pass).toBe(true);
+    expect(report.globalPass).toBe(false);
+    expect(report.counts.failed).toBe(1);
+    expect(report.counts.unknown).toBe(0);
+    expect(report.lanes[0]).toMatchObject({
+      status: "fail",
+      verdict: "fixture-bug",
+      productImpact: "P4",
+      qaImpact: "P1",
+    });
+  });
+
+  it("emits confidence self-test canaries for every drift class we need to catch", async () => {
+    const summary = await buildQaConfidenceSelfTestSummary("2026-05-12T00:00:00.000Z");
+
+    expect(summary.pass).toBe(true);
+    expect(summary.canaries.map((canary) => canary.id)).toEqual([
+      "prompt-drift",
+      "tool-description-schema-drift",
+      "runtime-tool-call-drop",
+      "tool-result-mismatch",
+      "failure-mode-drift",
+      "token-efficiency-regression",
+      "jsonl-replay-ordering-drift",
+    ]);
+    expect(summary.canaries.every((canary) => canary.detected)).toBe(true);
+  });
+
+  it("writes confidence self-test artifacts", async () => {
+    const result = await writeQaConfidenceSelfTestArtifacts({
+      outputDir: tempRoot,
+      generatedAt: "2026-05-12T00:00:00.000Z",
+    });
+
+    await expect(fs.stat(result.summaryPath)).resolves.toBeTruthy();
+    await expect(fs.stat(result.reportPath)).resolves.toBeTruthy();
+    const summary = JSON.parse(await fs.readFile(result.summaryPath, "utf8")) as { pass: boolean };
+    expect(summary.pass).toBe(true);
+  });
+});
diff --git a/extensions/qa-lab/src/confidence-report.ts b/extensions/qa-lab/src/confidence-report.ts
new file mode 100644
index 000000000000..285a6606baa8
--- /dev/null
+++ b/extensions/qa-lab/src/confidence-report.ts
@@ -0,0 +1,1238 @@
+import fs from "node:fs/promises";
+import path from "node:path";
+import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
+import {
+  formatGatewayLogSentinelSummary,
+  type GatewayLogSentinelFinding,
+} from "./gateway-log-sentinel.js";
+import {
+  buildHarnessParityCell,
+  buildHarnessParityResult,
+  type HarnessParityDrift,
+  type HarnessRuntimeParityCell,
+  type RuntimeParitySystemPromptReport,
+} from "./harness-parity.js";
+import {
+  runRuntimeParityScenario,
+  type RuntimeParityCell,
+  type RuntimeParityDrift,
+  type RuntimeParityResult,
+  type RuntimeParityToolCall,
+} from "./runtime-parity.js";
+import { buildTokenEfficiencyReport } from "./token-efficiency-report.js";
+
+export const QA_CONFIDENCE_VERDICTS = [
+  "pass",
+  "product-bug",
+  "qa-harness-bug",
+  "fixture-bug",
+  "optional-gap",
+  "mock-limitation",
+  "environment-blocked",
+] as const;
+
+export type QaConfidenceVerdict = (typeof QA_CONFIDENCE_VERDICTS)[number];
+
+export type QaConfidenceLaneKind =
+  | "qa-suite-summary"
+  | "runtime-parity-summary"
+  | "harness-parity-summary"
+  | "token-efficiency-summary"
+  | "jsonl-replay-summary"
+  | "self-test-summary"
+  | "generic-pass-summary";
+
+export type QaConfidenceManifestLane = {
+  id: string;
+  title: string;
+  kind: QaConfidenceLaneKind;
+  artifact: string;
+  required: boolean;
+  failureVerdict?: Exclude<QaConfidenceVerdict, "pass" | "environment-blocked">;
+  missingVerdict?: "environment-blocked" | "optional-gap";
+  missingReason?: string;
+  expectedTokenUsageSource?: "mock-estimate" | "live-usage";
+  skipBackfillLane?: string;
+  productImpact?: string;
+  qaImpact?: string;
+  issue?: string;
+  ownerAction?: string;
+  labels?: string[];
+};
+
+export type QaConfidenceManifest = {
+  version: 1;
+  profile: string;
+  lanes: QaConfidenceManifestLane[];
+};
+
+export type QaConfidenceLaneStatus = "pass" | "fail" | "blocked" | "missing" | "unknown";
+
+export type QaConfidenceLaneResult = {
+  id: string;
+  title: string;
+  kind: QaConfidenceLaneKind;
+  artifact: string;
+  artifactPath: string;
+  required: boolean;
+  status: QaConfidenceLaneStatus;
+  verdict?: QaConfidenceVerdict;
+  details: string;
+  productImpact?: string;
+  qaImpact?: string;
+  issue?: string;
+  ownerAction?: string;
+  labels?: string[];
+  skippedCount?: number;
+  skipBackfillLane?: string;
+  skipBackfilled?: boolean;
+};
+
+export type QaConfidenceReport = {
+  generatedAt: string;
+  profile: string;
+  strictZeroUnknowns: boolean;
+  strictGlobalPass: boolean;
+  pass: boolean;
+  zeroUnknowns: boolean;
+  globalPass: boolean;
+  counts: {
+    total: number;
+    passed: number;
+    failed: number;
+    blocked: number;
+    missing: number;
+    unknown: number;
+  };
+  failures: string[];
+  lanes: QaConfidenceLaneResult[];
+};
+
+export type QaConfidenceSelfTestCanary = {
+  id: string;
+  category:
+    | "prompt"
+    | "tool-schema"
+    | "tool-call"
+    | "tool-result"
+    | "failure-mode"
+    | "token-efficiency"
+    | "jsonl-replay";
+  detected: boolean;
+  expectedVerdict: Exclude<QaConfidenceVerdict, "pass" | "environment-blocked">;
+  details: string;
+};
+
+export type QaConfidenceSelfTestSummary = {
+  generatedAt: string;
+  pass: boolean;
+  canaries: QaConfidenceSelfTestCanary[];
+};
+
+const QA_CONFIDENCE_SELF_TEST_CANARY_IDS = [
+  "prompt-drift",
+  "tool-description-schema-drift",
+  "runtime-tool-call-drop",
+  "tool-result-mismatch",
+  "failure-mode-drift",
+  "token-efficiency-regression",
+  "jsonl-replay-ordering-drift",
+] as const;
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return Boolean(value) && typeof value === "object" && !Array.isArray(value);
+}
+
+function readString(value: unknown): string | undefined {
+  return typeof value === "string" && value.trim().length > 0 ? value.trim() : undefined;
+}
+
+function readNumber(value: unknown): number | undefined {
+  return typeof value === "number" && Number.isFinite(value) ? value : undefined;
+}
+
+function readBoolean(value: unknown): boolean | undefined {
+  return typeof value === "boolean" ? value : undefined;
+}
+
+function readStringArray(value: unknown): string[] | undefined {
+  if (!Array.isArray(value)) {
+    return undefined;
+  }
+  const values = value.filter((entry): entry is string => typeof entry === "string");
+  return values.length === value.length ? values : undefined;
+}
+
+function isGatewayLogSentinelFinding(value: unknown): value is GatewayLogSentinelFinding {
+  if (!isRecord(value)) {
+    return false;
+  }
+  const kind = readString(value.kind);
+  const verdict = readString(value.verdict);
+  return Boolean(kind && verdict && isQaConfidenceVerdict(verdict));
+}
+
+function collectGatewayLogSentinels(value: unknown): GatewayLogSentinelFinding[] {
+  const findings: GatewayLogSentinelFinding[] = [];
+  const visit = (candidate: unknown) => {
+    if (Array.isArray(candidate)) {
+      for (const entry of candidate) {
+        visit(entry);
+      }
+      return;
+    }
+    if (!isRecord(candidate)) {
+      return;
+    }
+    if (Array.isArray(candidate.gatewayLogSentinels)) {
+      findings.push(...candidate.gatewayLogSentinels.filter(isGatewayLogSentinelFinding));
+    }
+    if (Array.isArray(candidate.sentinelFindings)) {
+      findings.push(...candidate.sentinelFindings.filter(isGatewayLogSentinelFinding));
+    }
+    for (const [key, nested] of Object.entries(candidate)) {
+      if (key === "gatewayLogSentinels" || key === "sentinelFindings") {
+        continue;
+      }
+      visit(nested);
+    }
+  };
+  visit(value);
+  return findings;
+}
+
+function isQaConfidenceVerdict(value: string): value is QaConfidenceVerdict {
+  return QA_CONFIDENCE_VERDICTS.includes(value as QaConfidenceVerdict);
+}
+
+function readRequiredString(record: Record<string, unknown>, key: string): string {
+  const value = readString(record[key]);
+  if (!value) {
+    throw new Error(`confidence manifest lane missing ${key}`);
+  }
+  return value;
+}
+
+function readVerdict(value: unknown, key: string): QaConfidenceVerdict | undefined {
+  const text = readString(value);
+  if (!text) {
+    return undefined;
+  }
+  if (!isQaConfidenceVerdict(text)) {
+    throw new Error(
+      `confidence manifest ${key} must be one of ${QA_CONFIDENCE_VERDICTS.join(", ")}`,
+    );
+  }
+  return text;
+}
+
+function readLaneKind(value: unknown): QaConfidenceLaneKind {
+  const text = readString(value);
+  switch (text) {
+    case "qa-suite-summary":
+    case "runtime-parity-summary":
+    case "harness-parity-summary":
+    case "token-efficiency-summary":
+    case "jsonl-replay-summary":
+    case "self-test-summary":
+    case "generic-pass-summary":
+      return text;
+    default:
+      throw new Error(`unknown confidence manifest lane kind: ${text ?? "missing"}`);
+  }
+}
+
+function normalizeManifestLane(value: unknown): QaConfidenceManifestLane {
+  if (!isRecord(value)) {
+    throw new Error("confidence manifest lanes must be objects");
+  }
+  const failureVerdict = readVerdict(value.failureVerdict, "failureVerdict");
+  if (failureVerdict === "pass" || failureVerdict === "environment-blocked") {
+    throw new Error("confidence manifest failureVerdict must classify an actual failure");
+  }
+  const missingVerdict = readVerdict(value.missingVerdict, "missingVerdict");
+  if (
+    missingVerdict !== undefined &&
+    missingVerdict !== "environment-blocked" &&
+    missingVerdict !== "optional-gap"
+  ) {
+    throw new Error(
+      "confidence manifest missingVerdict must be environment-blocked or optional-gap",
+    );
+  }
+  const expectedTokenUsageSource = readString(value.expectedTokenUsageSource);
+  if (
+    expectedTokenUsageSource !== undefined &&
+    expectedTokenUsageSource !== "mock-estimate" &&
+    expectedTokenUsageSource !== "live-usage"
+  ) {
+    throw new Error(
+      "confidence manifest expectedTokenUsageSource must be mock-estimate or live-usage",
+    );
+  }
+  return {
+    id: readRequiredString(value, "id"),
+    title: readRequiredString(value, "title"),
+    kind: readLaneKind(value.kind),
+    artifact: readRequiredString(value, "artifact"),
+    required: readBoolean(value.required) ?? true,
+    ...(failureVerdict ? { failureVerdict } : {}),
+    ...(missingVerdict ? { missingVerdict } : {}),
+    ...(readString(value.missingReason) ? { missingReason: readString(value.missingReason) } : {}),
+    ...(expectedTokenUsageSource ? { expectedTokenUsageSource } : {}),
+    ...(readString(value.skipBackfillLane)
+      ? { skipBackfillLane: readString(value.skipBackfillLane) }
+      : {}),
+    ...(readString(value.productImpact) ? { productImpact: readString(value.productImpact) } : {}),
+    ...(readString(value.qaImpact) ? { qaImpact: readString(value.qaImpact) } : {}),
+    ...(readString(value.issue) ? { issue: readString(value.issue) } : {}),
+    ...(readString(value.ownerAction) ? { ownerAction: readString(value.ownerAction) } : {}),
+    ...(readStringArray(value.labels) ? { labels: readStringArray(value.labels) } : {}),
+  };
+}
+
+export function normalizeQaConfidenceManifest(value: unknown): QaConfidenceManifest {
+  if (!isRecord(value)) {
+    throw new Error("confidence manifest must be an object");
+  }
+  if (value.version !== 1) {
+    throw new Error("confidence manifest version must be 1");
+  }
+  const profile = readString(value.profile);
+  if (!profile) {
+    throw new Error("confidence manifest missing profile");
+  }
+  if (!Array.isArray(value.lanes) || value.lanes.length === 0) {
+    throw new Error("confidence manifest must include at least one lane");
+  }
+  const lanes = value.lanes.map(normalizeManifestLane);
+  const ids = new Set<string>();
+  for (const lane of lanes) {
+    if (ids.has(lane.id)) {
+      throw new Error(`confidence manifest duplicate lane id: ${lane.id}`);
+    }
+    ids.add(lane.id);
+  }
+  return {
+    version: 1,
+    profile,
+    lanes,
+  };
+}
+
+export async function readQaConfidenceManifestFile(
+  filePath: string,
+): Promise<QaConfidenceManifest> {
+  let payload: unknown;
+  try {
+    payload = JSON.parse(await fs.readFile(filePath, "utf8")) as unknown;
+  } catch (error) {
+    throw new Error(
+      `Could not read confidence manifest at ${filePath}: ${formatErrorMessage(error)}`,
+      {
+        cause: error,
+      },
+    );
+  }
+  return normalizeQaConfidenceManifest(payload);
+}
+
+function resolveArtifactPath(artifactRoot: string, artifact: string): string {
+  return path.isAbsolute(artifact) ? artifact : path.resolve(artifactRoot, artifact);
+}
+
+async function readJsonFile(filePath: string): Promise<unknown> {
+  return JSON.parse(await fs.readFile(filePath, "utf8")) as unknown;
+}
+
+function isMissingFileError(error: unknown): boolean {
+  return isRecord(error) && error.code === "ENOENT";
+}
+
+function statusFromPassed(passed: boolean): Pick<QaConfidenceLaneResult, "status" | "verdict"> {
+  return passed ? { status: "pass", verdict: "pass" } : { status: "unknown" };
+}
+
+type QaConfidenceLaneEvaluation = {
+  passed: boolean;
+  details: string;
+  skippedCount?: number;
+  status?: QaConfidenceLaneStatus;
+  verdict?: QaConfidenceVerdict;
+};
+
+function evaluateQaSuiteSummary(payload: unknown): QaConfidenceLaneEvaluation {
+  if (!isRecord(payload)) {
+    return {
+      passed: false,
+      status: "unknown",
+      details: "qa-suite-summary payload was not an object",
+    };
+  }
+  const counts = isRecord(payload.counts) ? payload.counts : undefined;
+  const totalCount = readNumber(counts?.total);
+  const passedCount = readNumber(counts?.passed);
+  const failedCount = readNumber(counts?.failed);
+  const scenarios = Array.isArray(payload.scenarios) ? payload.scenarios : undefined;
+  const failedScenarios = scenarios?.filter(
+    (scenario) => isRecord(scenario) && scenario.status === "fail",
+  );
+  const skippedScenarioCount =
+    scenarios?.filter(
+      (scenario) =>
+        isRecord(scenario) && (scenario.status === "skip" || scenario.status === "skipped"),
+    ).length ?? 0;
+  const hasScenarioRows = scenarios !== undefined && scenarios.length > 0;
+  const gatewayLogSentinels = collectGatewayLogSentinels(payload);
+  if (gatewayLogSentinels.length > 0) {
+    const allEnvironmentBlocked = gatewayLogSentinels.every(
+      (finding) => finding.verdict === "environment-blocked",
+    );
+    const suiteHasFailures =
+      (failedCount !== undefined && failedCount > 0) || (failedScenarios?.length ?? 0) > 0;
+    if (allEnvironmentBlocked && suiteHasFailures) {
+      return {
+        passed: false,
+        status: "unknown",
+        details: `gateway log sentinel(s): ${formatGatewayLogSentinelSummary(
+          gatewayLogSentinels,
+        )}; suite also reports failures`,
+      };
+    }
+    const firstBlockingSentinel =
+      gatewayLogSentinels.find((finding) => finding.verdict !== "environment-blocked") ??
+      gatewayLogSentinels[0];
+    return {
+      passed: false,
+      status: allEnvironmentBlocked ? "blocked" : "fail",
+      verdict: allEnvironmentBlocked
+        ? "environment-blocked"
+        : (firstBlockingSentinel?.verdict ?? "product-bug"),
+      details: `gateway log sentinel(s): ${formatGatewayLogSentinelSummary(gatewayLogSentinels)}`,
+    };
+  }
+  if (failedCount !== undefined) {
+    if (failedCount === 0 && !(totalCount !== undefined && totalCount > 0) && !hasScenarioRows) {
+      return {
+        passed: false,
+        status: "unknown",
+        details: "qa-suite-summary has no executed scenarios",
+      };
+    }
+    if (failedScenarios !== undefined && Math.floor(failedCount) !== failedScenarios.length) {
+      return {
+        passed: false,
+        status: "unknown",
+        details: `qa-suite-summary count/scenario mismatch: counts.failed=${Math.max(
+          0,
+          Math.floor(failedCount),
+        )}, failed scenarios=${failedScenarios.length}`,
+      };
+    }
+    const explicitSkippedCount = readNumber(counts?.skipped);
+    const inferredSkippedCount =
+      totalCount === undefined || passedCount === undefined
+        ? undefined
+        : Math.max(0, Math.floor(totalCount) - Math.floor(passedCount) - Math.floor(failedCount));
+    const skippedCount = Math.max(
+      0,
+      ...[explicitSkippedCount, inferredSkippedCount, skippedScenarioCount].filter(
+        (count): count is number => count !== undefined,
+      ),
+    );
+    const shouldReportSkippedCount = explicitSkippedCount !== undefined || skippedCount > 0;
+    const skippedDetails = shouldReportSkippedCount
+      ? ` counts.skipped=${Math.max(0, Math.floor(skippedCount))}`
+      : "";
+    const totalDetails =
+      totalCount === undefined ? "" : ` counts.total=${Math.max(0, Math.floor(totalCount))}`;
+    return {
+      passed: failedCount === 0,
+      details: `qa-suite-summary counts.failed=${Math.max(0, Math.floor(failedCount))}${totalDetails}${skippedDetails}`,
+      ...(skippedCount === 0 ? {} : { skippedCount: Math.max(0, Math.floor(skippedCount)) }),
+    };
+  }
+  if (!Array.isArray(payload.scenarios)) {
+    return {
+      passed: false,
+      status: "unknown",
+      details: "qa-suite-summary missing counts.failed and scenarios[]",
+    };
+  }
+  if (payload.scenarios.length === 0) {
+    return {
+      passed: false,
+      status: "unknown",
+      details: "qa-suite-summary has no executed scenarios",
+    };
+  }
+  const fallbackFailedScenarios = payload.scenarios.filter(
+    (scenario) => isRecord(scenario) && scenario.status === "fail",
+  );
+  return {
+    passed: fallbackFailedScenarios.length === 0,
+    details: `qa-suite-summary failed scenarios=${fallbackFailedScenarios.length}`,
+  };
+}
+
+function evaluatePassSummary(payload: unknown): QaConfidenceLaneEvaluation {
+  if (!isRecord(payload)) {
+    return { passed: false, details: "summary payload was not an object" };
+  }
+  const pass = readBoolean(payload.pass);
+  if (pass !== undefined) {
+    return { passed: pass, details: `summary pass=${String(pass)}` };
+  }
+  const verdict = readString(payload.verdict);
+  if (verdict) {
+    return { passed: verdict === "pass", details: `summary verdict=${verdict}` };
+  }
+  const status = readString(payload.status);
+  if (status) {
+    if (
+      status === "pass" ||
+      status === "passed" ||
+      status === "success" ||
+      status === "succeeded"
+    ) {
+      return { passed: true, details: `summary status=${status}` };
+    }
+    if (status === "fail" || status === "failed" || status === "error") {
+      return { passed: false, details: `summary status=${status}` };
+    }
+    return {
+      passed: false,
+      status: "unknown",
+      details: `summary status=${status}`,
+    };
+  }
+  return {
+    passed: false,
+    status: "unknown",
+    details: "summary did not expose an explicit pass signal",
+  };
+}
+
+function evaluateTokenEfficiencySummary(
+  payload: unknown,
+  expectedTokenUsageSource: QaConfidenceManifestLane["expectedTokenUsageSource"],
+): QaConfidenceLaneEvaluation {
+  const base = evaluatePassSummary(payload);
+  if (!base.passed || !expectedTokenUsageSource) {
+    return base;
+  }
+  if (!isRecord(payload) || !Array.isArray(payload.rows)) {
+    return {
+      passed: false,
+      details: `token summary missing rows for expected usageSource=${expectedTokenUsageSource}`,
+    };
+  }
+  if (readString(payload.status) === "skipped" || payload.rows.length === 0) {
+    return {
+      passed: false,
+      details: `token summary has no ${expectedTokenUsageSource} rows`,
+    };
+  }
+  const mismatched = payload.rows.filter(
+    (row) => !isRecord(row) || row.usageSource !== expectedTokenUsageSource,
+  );
+  return {
+    passed: mismatched.length === 0,
+    details:
+      mismatched.length === 0
+        ? `token summary rows all usageSource=${expectedTokenUsageSource}`
+        : `token summary has ${mismatched.length} row(s) not labeled ${expectedTokenUsageSource}`,
+  };
+}
+
+function evaluateJsonlReplaySummary(payload: unknown): QaConfidenceLaneEvaluation {
+  if (!isRecord(payload) || !Array.isArray(payload.transcripts)) {
+    return {
+      passed: false,
+      status: "unknown",
+      details: "jsonl replay summary missing transcripts array",
+    };
+  }
+  if (payload.transcripts.length === 0) {
+    return {
+      passed: false,
+      status: "unknown",
+      details: "jsonl replay summary has no transcripts",
+    };
+  }
+  let drifted = 0;
+  let replayedUserTurns = 0;
+  for (const transcript of payload.transcripts) {
+    if (!isRecord(transcript)) {
+      return {
+        passed: false,
+        status: "unknown",
+        details: "jsonl replay summary has an invalid transcript row",
+      };
+    }
+    const userTurnCount = readNumber(transcript.userTurnCount);
+    if (userTurnCount !== undefined && userTurnCount > 0) {
+      replayedUserTurns += userTurnCount;
+    }
+    const hasFirstDrift = transcript.firstDriftAtTurn !== undefined;
+    if (!Array.isArray(transcript.drift)) {
+      return {
+        passed: false,
+        status: "unknown",
+        details: "jsonl replay transcript missing drift array",
+      };
+    }
+    if (userTurnCount !== undefined && transcript.drift.length !== userTurnCount) {
+      return {
+        passed: false,
+        status: "unknown",
+        details: "jsonl replay transcript drift count does not match userTurnCount",
+      };
+    }
+    const drift = transcript.drift;
+    const hasDrift = drift.some((entry) => entry !== "none");
+    if (hasFirstDrift || hasDrift) {
+      drifted += 1;
+    }
+  }
+  if (replayedUserTurns === 0) {
+    return {
+      passed: false,
+      status: "unknown",
+      details: "jsonl replay summary has no replayed user turns",
+    };
+  }
+  return {
+    passed: drifted === 0,
+    details: `jsonl replay turns=${replayedUserTurns}, drifted transcripts=${drifted}`,
+  };
+}
+
+function evaluateSelfTestSummary(payload: unknown): QaConfidenceLaneEvaluation {
+  if (!isRecord(payload) || !Array.isArray(payload.canaries)) {
+    return {
+      passed: false,
+      status: "unknown",
+      details: "confidence self-test summary missing canaries array",
+    };
+  }
+  if (payload.canaries.length === 0) {
+    return {
+      passed: false,
+      status: "unknown",
+      details: "confidence self-test summary has no canaries",
+    };
+  }
+  const canariesById = new Map(
+    payload.canaries
+      .filter((canary): canary is Record<string, unknown> => isRecord(canary))
+      .map((canary) => [readString(canary.id), canary]),
+  );
+  const missingExpected = QA_CONFIDENCE_SELF_TEST_CANARY_IDS.filter(
+    (canaryId) => !canariesById.has(canaryId),
+  );
+  if (missingExpected.length > 0) {
+    return {
+      passed: false,
+      status: "unknown",
+      details: `confidence self-test missing expected canaries: ${missingExpected.join(", ")}`,
+    };
+  }
+  const missed = QA_CONFIDENCE_SELF_TEST_CANARY_IDS.filter(
+    (canaryId) => canariesById.get(canaryId)?.detected !== true,
+  );
+  const pass = readBoolean(payload.pass) ?? missed.length === 0;
+  return {
+    passed: pass && missed.length === 0,
+    details: `confidence self-test detected=${
+      QA_CONFIDENCE_SELF_TEST_CANARY_IDS.length - missed.length
+    }/${QA_CONFIDENCE_SELF_TEST_CANARY_IDS.length}`,
+  };
+}
+
+function evaluateLaneArtifact(
+  lane: QaConfidenceManifestLane,
+  payload: unknown,
+): QaConfidenceLaneEvaluation {
+  switch (lane.kind) {
+    case "qa-suite-summary":
+      return evaluateQaSuiteSummary(payload);
+    case "runtime-parity-summary":
+    case "harness-parity-summary":
+    case "generic-pass-summary":
+      return evaluatePassSummary(payload);
+    case "token-efficiency-summary":
+      return evaluateTokenEfficiencySummary(payload, lane.expectedTokenUsageSource);
+    case "jsonl-replay-summary":
+      return evaluateJsonlReplaySummary(payload);
+    case "self-test-summary":
+      return evaluateSelfTestSummary(payload);
+    default:
+      return {
+        passed: false,
+        details: `unknown confidence lane kind: ${(lane as { kind?: string }).kind ?? "missing"}`,
+      };
+  }
+}
+
+function resultForMissingLane(
+  lane: QaConfidenceManifestLane,
+  artifactPath: string,
+): QaConfidenceLaneResult {
+  if (lane.missingVerdict) {
+    return {
+      ...baseLaneResult(lane, artifactPath),
+      status: lane.missingVerdict === "environment-blocked" ? "blocked" : "fail",
+      verdict: lane.missingVerdict,
+      details: lane.missingReason ?? "artifact missing with explicit missing verdict",
+    };
+  }
+  return {
+    ...baseLaneResult(lane, artifactPath),
+    status: "missing",
+    details: "artifact missing and no missingVerdict was configured",
+  };
+}
+
+function baseLaneResult(
+  lane: QaConfidenceManifestLane,
+  artifactPath: string,
+): Omit<QaConfidenceLaneResult, "status" | "details"> {
+  const reportArtifactPath = path.isAbsolute(lane.artifact)
+    ? path.basename(artifactPath)
+    : lane.artifact;
+  return {
+    id: lane.id,
+    title: lane.title,
+    kind: lane.kind,
+    artifact: lane.artifact,
+    artifactPath: reportArtifactPath,
+    required: lane.required,
+    ...(lane.productImpact ? { productImpact: lane.productImpact } : {}),
+    ...(lane.qaImpact ? { qaImpact: lane.qaImpact } : {}),
+    ...(lane.issue ? { issue: lane.issue } : {}),
+    ...(lane.ownerAction ? { ownerAction: lane.ownerAction } : {}),
+    ...(lane.labels ? { labels: lane.labels } : {}),
+    ...(lane.skipBackfillLane ? { skipBackfillLane: lane.skipBackfillLane } : {}),
+  };
+}
+
+function classifiedFailureResult(
+  lane: QaConfidenceManifestLane,
+  artifactPath: string,
+  details: string,
+): QaConfidenceLaneResult {
+  const base = baseLaneResult(lane, artifactPath);
+  if (lane.failureVerdict) {
+    return {
+      ...base,
+      status: "fail",
+      verdict: lane.failureVerdict,
+      details,
+    };
+  }
+  return {
+    ...base,
+    status: "unknown",
+    details,
+  };
+}
+
+function evaluatedFailureResult(
+  lane: QaConfidenceManifestLane,
+  artifactPath: string,
+  evaluated: QaConfidenceLaneEvaluation,
+): QaConfidenceLaneResult {
+  if (evaluated.status || evaluated.verdict) {
+    return {
+      ...baseLaneResult(lane, artifactPath),
+      status: evaluated.status ?? "fail",
+      ...(evaluated.verdict ? { verdict: evaluated.verdict } : {}),
+      details: evaluated.details,
+    };
+  }
+  return classifiedFailureResult(lane, artifactPath, evaluated.details);
+}
+
+async function evaluateLane(
+  lane: QaConfidenceManifestLane,
+  artifactRoot: string,
+): Promise<QaConfidenceLaneResult> {
+  const artifactPath = resolveArtifactPath(artifactRoot, lane.artifact);
+  let payload: unknown;
+  try {
+    payload = await readJsonFile(artifactPath);
+  } catch (error) {
+    if (!isMissingFileError(error)) {
+      return {
+        ...baseLaneResult(lane, artifactPath),
+        status: "unknown",
+        details: `artifact unreadable: ${formatErrorMessage(error)}`,
+      };
+    }
+    return resultForMissingLane(lane, artifactPath);
+  }
+  const evaluated = evaluateLaneArtifact(lane, payload);
+  if (!evaluated.passed) {
+    return {
+      ...evaluatedFailureResult(lane, artifactPath, evaluated),
+      ...(evaluated.skippedCount === undefined ? {} : { skippedCount: evaluated.skippedCount }),
+    };
+  }
+  return {
+    ...baseLaneResult(lane, artifactPath),
+    ...statusFromPassed(true),
+    details: evaluated.details,
+    ...(evaluated.skippedCount === undefined ? {} : { skippedCount: evaluated.skippedCount }),
+  };
+}
+
+function applySkipBackfillState(
+  lanes: readonly QaConfidenceLaneResult[],
+): QaConfidenceLaneResult[] {
+  const byId = new Map(lanes.map((lane) => [lane.id, lane]));
+  return lanes.map((lane) => {
+    if (!lane.skippedCount || lane.skippedCount <= 0 || !lane.skipBackfillLane) {
+      return lane;
+    }
+    const backfillLane = byId.get(lane.skipBackfillLane);
+    const skipBackfilled = backfillLane?.status === "pass";
+    return {
+      ...lane,
+      skipBackfilled,
+      details: `${lane.details}; skipped rows backfilled by ${lane.skipBackfillLane}: ${
+        skipBackfilled ? "yes" : "no"
+      }`,
+    };
+  });
+}
+
+function countLaneResults(lanes: readonly QaConfidenceLaneResult[]): QaConfidenceReport["counts"] {
+  return {
+    total: lanes.length,
+    passed: lanes.filter((lane) => lane.status === "pass").length,
+    failed: lanes.filter((lane) => lane.status === "fail").length,
+    blocked: lanes.filter((lane) => lane.status === "blocked").length,
+    missing: lanes.filter((lane) => lane.status === "missing").length,
+    unknown: lanes.filter((lane) => lane.status === "unknown" || lane.status === "missing").length,
+  };
+}
+
+function failuresForLaneResults(lanes: readonly QaConfidenceLaneResult[]): string[] {
+  return lanes
+    .filter((lane) => lane.status === "unknown" || lane.status === "missing")
+    .map((lane) => `${lane.id} is unclassified: ${lane.details}`);
+}
+
+function globalFailuresForLaneResults(lanes: readonly QaConfidenceLaneResult[]): string[] {
+  return lanes.flatMap((lane) => {
+    if (lane.status === "blocked") {
+      return [`${lane.id} is blocked: ${lane.details}`];
+    }
+    if (lane.status === "missing") {
+      return [`${lane.id} is missing: ${lane.details}`];
+    }
+    if (lane.status === "unknown") {
+      return [`${lane.id} is unclassified: ${lane.details}`];
+    }
+    if (lane.status === "fail") {
+      return [`${lane.id} is classified ${lane.verdict ?? "unclassified"}: ${lane.details}`];
+    }
+    if ((lane.skippedCount ?? 0) > 0 && lane.skipBackfilled !== true) {
+      return [`${lane.id} has ${lane.skippedCount} skipped row(s) with no passing backfill lane`];
+    }
+    return [];
+  });
+}
+
+export async function buildQaConfidenceReport(params: {
+  manifest: QaConfidenceManifest;
+  artifactRoot: string;
+  strictZeroUnknowns?: boolean;
+  strictGlobalPass?: boolean;
+  generatedAt?: string;
+}): Promise<QaConfidenceReport> {
+  const evaluatedLanes = [];
+  for (const lane of params.manifest.lanes) {
+    evaluatedLanes.push(await evaluateLane(lane, params.artifactRoot));
+  }
+  const lanes = applySkipBackfillState(evaluatedLanes);
+  const requiredLanes = lanes.filter((lane) => lane.required);
+  const counts = countLaneResults(requiredLanes);
+  const unclassifiedFailures = failuresForLaneResults(requiredLanes);
+  const globalFailures = globalFailuresForLaneResults(requiredLanes);
+  const zeroUnknowns = counts.unknown === 0;
+  const globalPass = zeroUnknowns && globalFailures.length === 0;
+  const strictZeroUnknowns = params.strictZeroUnknowns === true;
+  const strictGlobalPass = params.strictGlobalPass === true;
+  return {
+    generatedAt: params.generatedAt ?? new Date().toISOString(),
+    profile: params.manifest.profile,
+    strictZeroUnknowns,
+    strictGlobalPass,
+    pass: strictGlobalPass
+      ? globalPass
+      : strictZeroUnknowns
+        ? zeroUnknowns
+        : unclassifiedFailures.length === 0,
+    zeroUnknowns,
+    globalPass,
+    counts,
+    failures: strictGlobalPass ? globalFailures : unclassifiedFailures,
+    lanes,
+  };
+}
+
+function formatVerdict(lane: QaConfidenceLaneResult): string {
+  return lane.verdict ?? "unclassified";
+}
+
+function escapeTableCell(value: string): string {
+  return value.replace(/\|/gu, "\\|").replace(/\s+/gu, " ").trim();
+}
+
+export function renderQaConfidenceMarkdownReport(report: QaConfidenceReport): string {
+  const lines = [
+    `# OpenClaw QA Confidence Report - ${report.profile}`,
+    "",
+    `- Generated at: ${report.generatedAt}`,
+    `- Verdict: ${report.pass ? "pass" : "fail"}`,
+    `- Strict zero unknowns: ${report.strictZeroUnknowns ? "yes" : "no"}`,
+    `- Strict global pass: ${report.strictGlobalPass ? "yes" : "no"}`,
+    `- Zero unknowns: ${report.zeroUnknowns ? "yes" : "no"}`,
+    `- Global pass: ${report.globalPass ? "yes" : "no"}`,
+    `- Counts: ${report.counts.passed} pass, ${report.counts.failed} classified fail, ${report.counts.blocked} blocked, ${report.counts.unknown} unknown`,
+    "",
+    "| Lane | Status | Verdict | Product impact | QA impact | Details |",
+    "| --- | --- | --- | --- | --- | --- |",
+  ];
+  for (const lane of report.lanes) {
+    lines.push(
+      `| ${escapeTableCell(lane.id)} | ${lane.status} | ${formatVerdict(lane)} | ${lane.productImpact ?? ""} | ${lane.qaImpact ?? ""} | ${escapeTableCell(lane.details)} |`,
+    );
+  }
+  if (report.failures.length > 0) {
+    lines.push(
+      "",
+      report.strictGlobalPass ? "## Global Gate Failures" : "## Unclassified Failures",
+      "",
+    );
+    for (const failure of report.failures) {
+      lines.push(`- ${failure}`);
+    }
+  }
+  return `${lines.join("\n")}\n`;
+}
+
+function syntheticRuntimeCell(
+  runtime: RuntimeParityCell["runtime"],
+  overrides: Partial<HarnessRuntimeParityCell> = {},
+): HarnessRuntimeParityCell {
+  return {
+    runtime,
+    transcriptBytes: JSON.stringify({ message: { role: "assistant", content: "ok" } }),
+    toolCalls: [],
+    finalText: "ok",
+    usage: {
+      inputTokens: 10,
+      outputTokens: 5,
+      totalTokens: 15,
+    },
+    wallClockMs: 10,
+    bootStateLines: [],
+    ...overrides,
+  };
+}
+
+function syntheticToolCall(overrides: Partial<RuntimeParityToolCall> = {}): RuntimeParityToolCall {
+  return {
+    tool: "openclaw.synthetic",
+    argsHash: "args-a",
+    resultHash: "result-a",
+    ...overrides,
+  };
+}
+
+async function detectRuntimeDrift(params: {
+  scenarioId: string;
+  pi: RuntimeParityCell;
+  codex: RuntimeParityCell;
+  expectedDrift: RuntimeParityDrift;
+}): Promise<boolean> {
+  const result = await runRuntimeParityScenario({
+    scenarioId: params.scenarioId,
+    runCell: async (runtime) => ({
+      scenarioStatus: "pass",
+      cell: runtime === "pi" ? params.pi : params.codex,
+    }),
+  });
+  return result.drift === params.expectedDrift;
+}
+
+function syntheticPromptReport(
+  overrides: Partial<RuntimeParitySystemPromptReport> = {},
+): RuntimeParitySystemPromptReport {
+  return {
+    systemPrompt: {
+      chars: 100,
+      projectContextChars: 10,
+      nonProjectContextChars: 90,
+      hash: "system-prompt-a",
+    },
+    skills: {
+      promptChars: 20,
+      hash: "skills-a",
+    },
+    tools: {
+      listChars: 30,
+      schemaChars: 40,
+      entries: [
+        {
+          name: "openclaw.synthetic",
+          summaryChars: 12,
+          summaryHash: "summary-a",
+          schemaChars: 18,
+          schemaHash: "schema-a",
+          propertiesCount: 2,
+        },
+      ],
+    },
+    ...overrides,
+  };
+}
+
+function detectHarnessDrift(params: {
+  leftReport: RuntimeParitySystemPromptReport;
+  rightReport: RuntimeParitySystemPromptReport;
+  expectedDrift: HarnessParityDrift;
+}): boolean {
+  const left = buildHarnessParityCell({
+    variant: { id: "left", label: "Left" },
+    cell: syntheticRuntimeCell("pi", { systemPromptReport: params.leftReport }),
+    tokenUsageSource: "mock-estimate",
+  });
+  const right = buildHarnessParityCell({
+    variant: { id: "right", label: "Right" },
+    cell: syntheticRuntimeCell("codex", { systemPromptReport: params.rightReport }),
+    tokenUsageSource: "mock-estimate",
+  });
+  return (
+    buildHarnessParityResult({
+      scenarioId: "confidence-self-test",
+      left,
+      right,
+    }).drift === params.expectedDrift
+  );
+}
+
+function detectTokenEfficiencyRegression(): boolean {
+  const pi = syntheticRuntimeCell("pi", {
+    usage: { inputTokens: 100, outputTokens: 20, totalTokens: 120 },
+  });
+  const codex = syntheticRuntimeCell("codex", {
+    usage: { inputTokens: 200, outputTokens: 40, totalTokens: 240 },
+  });
+  const runtimeParity: RuntimeParityResult = {
+    scenarioId: "token-efficiency-regression",
+    cells: { pi, codex },
+    drift: "none",
+  };
+  const report = buildTokenEfficiencyReport({
+    summary: {
+      run: {
+        providerMode: "live-frontier",
+        runtimePair: ["pi", "codex"],
+      },
+      scenarios: [
+        {
+          name: "token-efficiency-regression",
+          status: "pass",
+          runtimeParity,
+        },
+      ],
+    },
+    thresholdPercent: 15,
+    generatedAt: "2026-05-12T00:00:00.000Z",
+  });
+  return !report.pass && report.failures.length === 1;
+}
+
+function detectJsonlReplayDrift(): boolean {
+  return !evaluateJsonlReplaySummary({
+    transcripts: [
+      {
+        transcriptPath: "synthetic.jsonl",
+        userTurnCount: 2,
+        drift: ["none", "tool-result-shape"],
+        firstDriftAtTurn: 2,
+      },
+    ],
+  }).passed;
+}
+
+export async function buildQaConfidenceSelfTestSummary(
+  generatedAt = new Date().toISOString(),
+): Promise<QaConfidenceSelfTestSummary> {
+  const promptDriftDetected = detectHarnessDrift({
+    leftReport: syntheticPromptReport(),
+    rightReport: syntheticPromptReport({
+      systemPrompt: {
+        chars: 100,
+        projectContextChars: 10,
+        nonProjectContextChars: 90,
+        hash: "system-prompt-b",
+      },
+    }),
+    expectedDrift: "system-prompt",
+  });
+  const toolDescriptionDetected = detectHarnessDrift({
+    leftReport: syntheticPromptReport(),
+    rightReport: syntheticPromptReport({
+      tools: {
+        listChars: 30,
+        schemaChars: 40,
+        entries: [
+          {
+            name: "openclaw.synthetic",
+            summaryChars: 12,
+            summaryHash: "summary-b",
+            schemaChars: 18,
+            schemaHash: "schema-a",
+            propertiesCount: 2,
+          },
+        ],
+      },
+    }),
+    expectedDrift: "tool-description",
+  });
+  const toolSchemaDetected = detectHarnessDrift({
+    leftReport: syntheticPromptReport(),
+    rightReport: syntheticPromptReport({
+      tools: {
+        listChars: 30,
+        schemaChars: 40,
+        entries: [
+          {
+            name: "openclaw.synthetic",
+            summaryChars: 12,
+            summaryHash: "summary-a",
+            schemaChars: 18,
+            schemaHash: "schema-b",
+            propertiesCount: 2,
+          },
+        ],
+      },
+    }),
+    expectedDrift: "tool-schema",
+  });
+  const runtimeToolCallDropDetected = await detectRuntimeDrift({
+    scenarioId: "runtime-tool-call-drop",
+    pi: syntheticRuntimeCell("pi", { toolCalls: [syntheticToolCall()] }),
+    codex: syntheticRuntimeCell("codex", { toolCalls: [] }),
+    expectedDrift: "tool-call-shape",
+  });
+  const toolResultMismatchDetected = await detectRuntimeDrift({
+    scenarioId: "tool-result-mismatch",
+    pi: syntheticRuntimeCell("pi", { toolCalls: [syntheticToolCall()] }),
+    codex: syntheticRuntimeCell("codex", {
+      toolCalls: [syntheticToolCall({ resultHash: "result-b" })],
+    }),
+    expectedDrift: "tool-result-shape",
+  });
+  const failureModeDriftDetected = await detectRuntimeDrift({
+    scenarioId: "failure-mode-drift",
+    pi: syntheticRuntimeCell("pi"),
+    codex: syntheticRuntimeCell("codex", { transportErrorClass: "synthetic-transport" }),
+    expectedDrift: "failure-mode",
+  });
+  const canaries: QaConfidenceSelfTestCanary[] = [
+    {
+      id: "prompt-drift",
+      category: "prompt",
+      detected: promptDriftDetected,
+      expectedVerdict: "qa-harness-bug",
+      details: "synthetic harness prompt hash changed",
+    },
+    {
+      id: "tool-description-schema-drift",
+      category: "tool-schema",
+      detected: toolDescriptionDetected && toolSchemaDetected,
+      expectedVerdict: "qa-harness-bug",
+      details: "synthetic tool description/schema hash changed",
+    },
+    {
+      id: "runtime-tool-call-drop",
+      category: "tool-call",
+      detected: runtimeToolCallDropDetected,
+      expectedVerdict: "product-bug",
+      details: "synthetic runtime transcript omitted a required tool call",
+    },
+    {
+      id: "tool-result-mismatch",
+      category: "tool-result",
+      detected: toolResultMismatchDetected,
+      expectedVerdict: "product-bug",
+      details: "synthetic runtime transcript returned a mismatched tool result",
+    },
+    {
+      id: "failure-mode-drift",
+      category: "failure-mode",
+      detected: failureModeDriftDetected,
+      expectedVerdict: "product-bug",
+      details: "synthetic runtime failed with a different failure mode",
+    },
+    {
+      id: "token-efficiency-regression",
+      category: "token-efficiency",
+      detected: detectTokenEfficiencyRegression(),
+      expectedVerdict: "qa-harness-bug",
+      details: "synthetic token row exceeded the configured efficiency threshold",
+    },
+    {
+      id: "jsonl-replay-ordering-drift",
+      category: "jsonl-replay",
+      detected: detectJsonlReplayDrift(),
+      expectedVerdict: "fixture-bug",
+      details: "synthetic JSONL replay drifted after turn ordering changed",
+    },
+  ];
+  return {
+    generatedAt,
+    pass: canaries.every((canary) => canary.detected),
+    canaries,
+  };
+}
+
+export function renderQaConfidenceSelfTestMarkdownReport(
+  summary: QaConfidenceSelfTestSummary,
+): string {
+  const lines = [
+    "# OpenClaw QA Confidence Self-Test",
+    "",
+    `- Generated at: ${summary.generatedAt}`,
+    `- Verdict: ${summary.pass ? "pass" : "fail"}`,
+    "",
+    "| Canary | Category | Detected | Expected verdict | Details |",
+    "| --- | --- | --- | --- | --- |",
+  ];
+  for (const canary of summary.canaries) {
+    lines.push(
+      `| ${canary.id} | ${canary.category} | ${canary.detected ? "yes" : "no"} | ${canary.expectedVerdict} | ${escapeTableCell(canary.details)} |`,
+    );
+  }
+  return `${lines.join("\n")}\n`;
+}
+
+export async function writeQaConfidenceSelfTestArtifacts(params: {
+  outputDir: string;
+  generatedAt?: string;
+}): Promise<{ reportPath: string; summaryPath: string; summary: QaConfidenceSelfTestSummary }> {
+  await fs.mkdir(params.outputDir, { recursive: true });
+  const summary = await buildQaConfidenceSelfTestSummary(params.generatedAt);
+  const report = renderQaConfidenceSelfTestMarkdownReport(summary);
+  const reportPath = path.join(params.outputDir, "qa-confidence-self-test-report.md");
+  const summaryPath = path.join(params.outputDir, "qa-confidence-self-test-summary.json");
+  await fs.writeFile(reportPath, report, "utf8");
+  await fs.writeFile(summaryPath, `${JSON.stringify(summary, null, 2)}\n`, "utf8");
+  return { reportPath, summaryPath, summary };
+}
diff --git a/extensions/qa-lab/src/harness-parity.test.ts b/extensions/qa-lab/src/harness-parity.test.ts
new file mode 100644
index 000000000000..523108be120c
--- /dev/null
+++ b/extensions/qa-lab/src/harness-parity.test.ts
@@ -0,0 +1,284 @@
+import { describe, expect, it } from "vitest";
+import {
+  buildHarnessParityCell,
+  buildHarnessParityResult,
+  type HarnessRuntimeParityCell,
+  type HarnessVariant,
+} from "./harness-parity.js";
+import type { RuntimeId } from "./runtime-parity.js";
+import type { RuntimeParityComparisonMode } from "./runtime-tool-metadata.js";
+
+const LEFT: HarnessVariant = { id: "left", label: "Left", runtime: "pi" };
+const RIGHT: HarnessVariant = { id: "right", label: "Right", runtime: "pi" };
+
+const BASE_PROMPT_REPORT = {
+  systemPrompt: {
+    chars: 100,
+    projectContextChars: 40,
+    nonProjectContextChars: 60,
+    hash: "system-a",
+  },
+  skills: {
+    promptChars: 12,
+    hash: "skills-a",
+  },
+  tools: {
+    schemaChars: 20,
+    entries: [
+      {
+        name: "read",
+        summaryChars: 8,
+        summaryHash: "summary-a",
+        schemaChars: 20,
+        schemaHash: "schema-a",
+        propertiesCount: 1,
+      },
+    ],
+  },
+};
+
+function makeCell(
+  runtime: RuntimeId,
+  overrides: Partial<HarnessRuntimeParityCell> = {},
+): HarnessRuntimeParityCell {
+  return {
+    runtime,
+    transcriptBytes: '{"message":{"role":"assistant","content":"same"}}\n',
+    toolCalls: [],
+    finalText: "same",
+    usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 },
+    wallClockMs: 1,
+    bootStateLines: [],
+    systemPromptReport: BASE_PROMPT_REPORT,
+    ...overrides,
+  };
+}
+
+function classify(
+  left: Partial<HarnessRuntimeParityCell>,
+  right: Partial<HarnessRuntimeParityCell>,
+  comparisonMode?: RuntimeParityComparisonMode,
+) {
+  return buildHarnessParityResult({
+    scenarioId: "scenario",
+    left: buildHarnessParityCell({
+      variant: LEFT,
+      cell: makeCell("pi", left),
+      tokenUsageSource: "live-usage",
+    }),
+    right: buildHarnessParityCell({
+      variant: RIGHT,
+      cell: makeCell("pi", right),
+      tokenUsageSource: "live-usage",
+    }),
+    ...(comparisonMode ? { comparisonMode } : {}),
+  }).drift;
+}
+
+describe("harness parity", () => {
+  it("classifies prompt and tool surface drift before behavioral drift", () => {
+    expect(
+      classify(
+        {},
+        {
+          systemPromptReport: {
+            ...BASE_PROMPT_REPORT,
+            systemPrompt: { chars: 101, projectContextChars: 40, nonProjectContextChars: 61 },
+          },
+        },
+      ),
+    ).toBe("system-prompt");
+    expect(
+      classify(
+        {},
+        {
+          systemPromptReport: {
+            ...BASE_PROMPT_REPORT,
+            systemPrompt: {
+              chars: 100,
+              projectContextChars: 40,
+              nonProjectContextChars: 60,
+              hash: "system-b",
+            },
+          },
+        },
+      ),
+    ).toBe("system-prompt");
+    expect(
+      classify(
+        {},
+        {
+          systemPromptReport: {
+            ...BASE_PROMPT_REPORT,
+            skills: { promptChars: 12, hash: "skills-b" },
+          },
+        },
+      ),
+    ).toBe("system-prompt");
+    expect(
+      classify(
+        {},
+        {
+          systemPromptReport: {
+            ...BASE_PROMPT_REPORT,
+            tools: {
+              schemaChars: 20,
+              entries: [
+                {
+                  name: "read",
+                  summaryChars: 8,
+                  summaryHash: "summary-b",
+                  schemaChars: 20,
+                  schemaHash: "schema-a",
+                  propertiesCount: 1,
+                },
+              ],
+            },
+          },
+        },
+      ),
+    ).toBe("tool-description");
+    expect(
+      classify(
+        {},
+        {
+          systemPromptReport: {
+            ...BASE_PROMPT_REPORT,
+            tools: {
+              schemaChars: 20,
+              entries: [
+                {
+                  name: "read",
+                  summaryChars: 8,
+                  summaryHash: "summary-a",
+                  schemaChars: 20,
+                  schemaHash: "schema-b",
+                  propertiesCount: 1,
+                },
+              ],
+            },
+          },
+        },
+      ),
+    ).toBe("tool-schema");
+  });
+
+  it("classifies behavioral harness drift", () => {
+    expect(
+      classify(
+        { toolCalls: [{ tool: "read", argsHash: "a", resultHash: "r" }] },
+        { toolCalls: [{ tool: "read", argsHash: "b", resultHash: "r" }] },
+      ),
+    ).toBe("tool-call-shape");
+    expect(
+      classify(
+        { toolCalls: [{ tool: "read", argsHash: "a", resultHash: "r1" }] },
+        { toolCalls: [{ tool: "read", argsHash: "a", resultHash: "r2" }] },
+      ),
+    ).toBe("tool-result-shape");
+    expect(classify({ finalText: "same text" }, { finalText: "different text" })).toBe("text-only");
+    expect(
+      classify(
+        {
+          transcriptBytes:
+            '{"type":"model_change","modelId":"gpt-5.5"}\n' +
+            '{"type":"thinking_level_change","thinkingLevel":"off"}\n' +
+            '{"type":"custom","customType":"model-snapshot"}\n' +
+            '{"message":{"role":"assistant","content":"same"}}\n',
+        },
+        { transcriptBytes: '{"message":{"role":"assistant","content":"same"}}\n' },
+      ),
+    ).toBe("none");
+    expect(
+      classify(
+        { transcriptBytes: '{"message":{"role":"assistant"}}\n' },
+        { transcriptBytes: '{"message":{"role":"assistant"}}\n{"message":{"role":"tool"}}\n' },
+      ),
+    ).toBe("structural");
+    expect(
+      classify(
+        { transcriptBytes: '{"role":"assistant","content":"same"}\n' },
+        {
+          transcriptBytes:
+            '{"role":"assistant","content":"same"}\n{"role":"tool","content":"same"}\n',
+        },
+      ),
+    ).toBe("structural");
+    expect(classify({ runtimeErrorClass: "timeout" }, {})).toBe("failure-mode");
+  });
+
+  it("honors native workspace comparison mode for outcome-only harness proofs", () => {
+    expect(
+      classify(
+        {
+          transcriptBytes:
+            '{"message":{"role":"assistant","content":"same"}}\n' +
+            '{"message":{"role":"tool","content":"same result"}}\n',
+          toolCalls: [{ tool: "bash", argsHash: "sed-160", resultHash: "same-result" }],
+        },
+        {
+          transcriptBytes: '{"message":{"role":"assistant","content":"same"}}\n',
+          toolCalls: [{ tool: "bash", argsHash: "sed-200", resultHash: "same-result" }],
+        },
+        "codex-native-workspace",
+      ),
+    ).toBe("none");
+
+    expect(
+      classify(
+        { toolCalls: [{ tool: "bash", argsHash: "a", resultHash: "r1" }] },
+        { toolCalls: [{ tool: "bash", argsHash: "b", resultHash: "r2" }] },
+        "outcome-only",
+      ),
+    ).toBe("none");
+  });
+
+  it("keeps prompt and tool surface checks strict under native workspace comparison mode", () => {
+    expect(
+      classify(
+        {},
+        {
+          systemPromptReport: {
+            ...BASE_PROMPT_REPORT,
+            systemPrompt: { chars: 101, projectContextChars: 40, nonProjectContextChars: 61 },
+          },
+          toolCalls: [{ tool: "bash", argsHash: "changed", resultHash: "changed" }],
+        },
+        "codex-native-workspace",
+      ),
+    ).toBe("system-prompt");
+    expect(
+      classify(
+        {},
+        {
+          systemPromptReport: {
+            ...BASE_PROMPT_REPORT,
+            tools: {
+              schemaChars: 20,
+              entries: [{ name: "read", summaryChars: 9, schemaChars: 20, propertiesCount: 1 }],
+            },
+          },
+          toolCalls: [{ tool: "bash", argsHash: "changed", resultHash: "changed" }],
+        },
+        "outcome-only",
+      ),
+    ).toBe("tool-description");
+  });
+
+  it("labels mock token estimates separately from live usage", () => {
+    const sourceCell = makeCell("pi", {
+      usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 },
+    });
+    const cell = buildHarnessParityCell({
+      variant: LEFT,
+      cell: sourceCell,
+      tokenUsageSource: "mock-estimate",
+    });
+    const inputChars = 100 + 12 + 8 + 20 + sourceCell.transcriptBytes.length;
+
+    expect(cell.tokenUsageSource).toBe("mock-estimate");
+    expect(cell.tokenUsage.totalTokens).toBeGreaterThan(0);
+    expect(cell.tokenUsage.inputTokens).toBe(Math.ceil(inputChars / 4));
+    expect(cell.promptStats.toolCount).toBe(1);
+  });
+});
diff --git a/extensions/qa-lab/src/harness-parity.ts b/extensions/qa-lab/src/harness-parity.ts
new file mode 100644
index 000000000000..bc55ae4f1853
--- /dev/null
+++ b/extensions/qa-lab/src/harness-parity.ts
@@ -0,0 +1,491 @@
+import { createHash } from "node:crypto";
+import type {
+  RuntimeId,
+  RuntimeParityCell,
+  RuntimeParityDrift,
+  RuntimeParityToolCall,
+  RuntimeParityUsage,
+} from "./runtime-parity.js";
+import type { RuntimeParityComparisonMode } from "./runtime-tool-metadata.js";
+
+export type HarnessVariant = {
+  id: string;
+  label: string;
+  runtime?: RuntimeId;
+  model?: string;
+  configPatch?: Record<string, unknown>;
+  systemPromptOverlay?: string;
+  toolDescriptionOverlay?: Record<string, string>;
+};
+
+export type HarnessParityDrift =
+  | RuntimeParityDrift
+  | "system-prompt"
+  | "tool-description"
+  | "tool-schema";
+
+export type HarnessParityPromptStats = {
+  systemPromptChars: number;
+  projectContextChars: number;
+  nonProjectContextChars: number;
+  skillPromptChars: number;
+  toolSummaryChars: number;
+  toolSchemaChars: number;
+  toolCount: number;
+};
+
+export type RuntimeParitySystemPromptReport = {
+  systemPrompt?: {
+    chars?: number;
+    projectContextChars?: number;
+    nonProjectContextChars?: number;
+    text?: string;
+    hash?: string;
+    contentHash?: string;
+  };
+  skills?: {
+    promptChars?: number;
+    prompt?: string;
+    hash?: string;
+    contentHash?: string;
+  };
+  tools?: {
+    listChars?: number;
+    schemaChars?: number;
+    entries?: Array<{
+      name?: string;
+      summary?: string;
+      summaryHash?: string;
+      summaryChars?: number;
+      schema?: unknown;
+      schemaHash?: string;
+      schemaChars?: number;
+      propertiesCount?: number;
+    }>;
+  };
+};
+
+export type HarnessRuntimeParityCell = RuntimeParityCell & {
+  systemPromptReport?: RuntimeParitySystemPromptReport;
+};
+
+export type HarnessParityCell = HarnessRuntimeParityCell & {
+  variant: HarnessVariant;
+  promptStats: HarnessParityPromptStats;
+  systemPromptHash: string;
+  toolDescriptionHash: string;
+  toolSchemaHash: string;
+  tokenUsage: RuntimeParityUsage;
+  tokenUsageSource: "live-usage" | "mock-estimate";
+};
+
+export type HarnessParityResult = {
+  scenarioId: string;
+  left: HarnessParityCell;
+  right: HarnessParityCell;
+  drift: HarnessParityDrift;
+  driftDetails?: string;
+  promptDelta: {
+    systemPromptChars: number;
+    projectContextChars: number;
+    skillPromptChars: number;
+    toolSummaryChars: number;
+    toolSchemaChars: number;
+    toolCount: number;
+  };
+  tokenDeltaPercent: number;
+  firstDriftTurn?: number;
+};
+
+export type HarnessParityReport = {
+  generatedAt: string;
+  providerMode: string;
+  left: HarnessVariant;
+  right: HarnessVariant;
+  results: HarnessParityResult[];
+  pass: boolean;
+  failures: string[];
+};
+
+function sha256(value: string) {
+  return createHash("sha256").update(value).digest("hex");
+}
+
+function countComparableTranscriptRecords(transcriptBytes: string) {
+  let count = 0;
+  for (const line of transcriptBytes.split(/\r?\n/u)) {
+    const trimmed = line.trim();
+    if (!trimmed) {
+      continue;
+    }
+    try {
+      const parsed = JSON.parse(trimmed) as {
+        message?: { role?: unknown };
+        role?: unknown;
+      };
+      if (
+        (parsed.message && typeof parsed.message.role === "string") ||
+        typeof parsed.role === "string"
+      ) {
+        count += 1;
+      }
+    } catch {
+      // Ignore malformed QA transcript rows and keep parity classification deterministic.
+    }
+  }
+  return count;
+}
+
+function normalizeForStableHash(value: unknown): unknown {
+  if (Array.isArray(value)) {
+    return value.map((entry) => normalizeForStableHash(entry));
+  }
+  if (value && typeof value === "object") {
+    const record = value as Record<string, unknown>;
+    return Object.fromEntries(
+      Object.keys(record)
+        .toSorted((left, right) => left.localeCompare(right))
+        .map((key) => [key, normalizeForStableHash(record[key])]),
+    );
+  }
+  return value;
+}
+
+function stableHash(value: unknown) {
+  return sha256(JSON.stringify(normalizeForStableHash(value)) ?? "null");
+}
+
+function readPositiveNumber(value: unknown) {
+  return typeof value === "number" && Number.isFinite(value) && value > 0 ? Math.floor(value) : 0;
+}
+
+function buildPromptStats(report: RuntimeParitySystemPromptReport | undefined) {
+  const toolEntries = Array.isArray(report?.tools?.entries) ? report.tools.entries : [];
+  return {
+    systemPromptChars: readPositiveNumber(report?.systemPrompt?.chars),
+    projectContextChars: readPositiveNumber(report?.systemPrompt?.projectContextChars),
+    nonProjectContextChars: readPositiveNumber(report?.systemPrompt?.nonProjectContextChars),
+    skillPromptChars: readPositiveNumber(report?.skills?.promptChars),
+    toolSummaryChars: toolEntries.reduce(
+      (sum, entry) => sum + readPositiveNumber(entry.summaryChars),
+      0,
+    ),
+    toolSchemaChars: readPositiveNumber(report?.tools?.schemaChars),
+    toolCount: toolEntries.length,
+  };
+}
+
+function estimateUsage(
+  cell: RuntimeParityCell,
+  stats: HarnessParityPromptStats,
+): RuntimeParityUsage {
+  const inputChars =
+    stats.systemPromptChars +
+    stats.skillPromptChars +
+    stats.toolSummaryChars +
+    stats.toolSchemaChars +
+    cell.transcriptBytes.length;
+  const outputChars = cell.finalText.length + cell.toolCalls.length * 80;
+  const inputTokens = Math.ceil(inputChars / 4);
+  const outputTokens = Math.ceil(outputChars / 4);
+  return {
+    inputTokens,
+    outputTokens,
+    totalTokens: inputTokens + outputTokens,
+  };
+}
+
+function normalizeTextForParity(text: string) {
+  return text.replace(/\s+/gu, " ").trim();
+}
+
+function compareToolCallShape(left: RuntimeParityToolCall[], right: RuntimeParityToolCall[]) {
+  if (left.length !== right.length) {
+    return `tool call count differs (${left.length} vs ${right.length})`;
+  }
+  for (let index = 0; index < left.length; index += 1) {
+    const leftCall = left[index];
+    const rightCall = right[index];
+    if (!leftCall || !rightCall) {
+      return `tool call row ${index + 1} missing`;
+    }
+    if (leftCall.tool !== rightCall.tool || leftCall.argsHash !== rightCall.argsHash) {
+      return `tool call ${index + 1} differs (${leftCall.tool}/${leftCall.argsHash} vs ${rightCall.tool}/${rightCall.argsHash})`;
+    }
+  }
+  return undefined;
+}
+
+function compareToolResultShape(left: RuntimeParityToolCall[], right: RuntimeParityToolCall[]) {
+  const total = Math.min(left.length, right.length);
+  for (let index = 0; index < total; index += 1) {
+    const leftCall = left[index];
+    const rightCall = right[index];
+    if (!leftCall || !rightCall) {
+      continue;
+    }
+    if (
+      leftCall.resultHash !== rightCall.resultHash ||
+      (leftCall.errorClass ?? "") !== (rightCall.errorClass ?? "")
+    ) {
+      return `tool result ${index + 1} differs (${leftCall.tool})`;
+    }
+  }
+  return undefined;
+}
+
+function firstDriftTurn(leftTranscript: string, rightTranscript: string): number | undefined {
+  const leftLines = leftTranscript.trim().length ? leftTranscript.trim().split(/\r?\n/u) : [];
+  const rightLines = rightTranscript.trim().length ? rightTranscript.trim().split(/\r?\n/u) : [];
+  const total = Math.max(leftLines.length, rightLines.length);
+  for (let index = 0; index < total; index += 1) {
+    if ((leftLines[index] ?? "") !== (rightLines[index] ?? "")) {
+      return index + 1;
+    }
+  }
+  return undefined;
+}
+
+export function buildHarnessParityCell(params: {
+  variant: HarnessVariant;
+  cell: HarnessRuntimeParityCell;
+  tokenUsageSource: HarnessParityCell["tokenUsageSource"];
+}): HarnessParityCell {
+  const report = params.cell.systemPromptReport;
+  const promptStats = buildPromptStats(report);
+  const toolEntries = report?.tools?.entries ?? [];
+  const tokenUsage =
+    params.tokenUsageSource === "live-usage"
+      ? params.cell.usage
+      : estimateUsage(params.cell, promptStats);
+  return {
+    ...params.cell,
+    variant: params.variant,
+    ...(report ? { systemPromptReport: report } : {}),
+    promptStats,
+    systemPromptHash: stableHash({
+      systemPrompt: report?.systemPrompt ?? null,
+      skills: report?.skills ?? null,
+    }),
+    toolDescriptionHash: stableHash(
+      toolEntries.map((entry) => {
+        return {
+          name: entry.name,
+          summary: entry.summary,
+          summaryHash: entry.summaryHash,
+          summaryChars: entry.summaryChars,
+        };
+      }),
+    ),
+    toolSchemaHash: stableHash({
+      listChars: report?.tools?.listChars,
+      schemaChars: report?.tools?.schemaChars,
+      entries: toolEntries.map((entry) => {
+        return {
+          name: entry.name,
+          schema: entry.schema,
+          schemaHash: entry.schemaHash,
+          schemaChars: entry.schemaChars,
+          propertiesCount: entry.propertiesCount,
+        };
+      }),
+    }),
+    tokenUsage,
+    tokenUsageSource: params.tokenUsageSource,
+  };
+}
+
+export function buildHarnessParityResult(params: {
+  scenarioId: string;
+  left: HarnessParityCell;
+  right: HarnessParityCell;
+  comparisonMode?: RuntimeParityComparisonMode;
+}): HarnessParityResult {
+  const promptDelta = {
+    systemPromptChars:
+      params.right.promptStats.systemPromptChars - params.left.promptStats.systemPromptChars,
+    projectContextChars:
+      params.right.promptStats.projectContextChars - params.left.promptStats.projectContextChars,
+    skillPromptChars:
+      params.right.promptStats.skillPromptChars - params.left.promptStats.skillPromptChars,
+    toolSummaryChars:
+      params.right.promptStats.toolSummaryChars - params.left.promptStats.toolSummaryChars,
+    toolSchemaChars:
+      params.right.promptStats.toolSchemaChars - params.left.promptStats.toolSchemaChars,
+    toolCount: params.right.promptStats.toolCount - params.left.promptStats.toolCount,
+  };
+  const tokenDeltaPercent =
+    params.left.tokenUsage.totalTokens === 0
+      ? params.right.tokenUsage.totalTokens === 0
+        ? 0
+        : 100
+      : ((params.right.tokenUsage.totalTokens - params.left.tokenUsage.totalTokens) /
+          params.left.tokenUsage.totalTokens) *
+        100;
+  const failDetails =
+    params.left.transportErrorClass || params.right.transportErrorClass
+      ? "at least one harness variant hit a transport failure"
+      : params.left.runtimeErrorClass || params.right.runtimeErrorClass
+        ? "at least one harness variant hit a runtime failure"
+        : undefined;
+  if (failDetails) {
+    return {
+      scenarioId: params.scenarioId,
+      left: params.left,
+      right: params.right,
+      drift: "failure-mode",
+      driftDetails: failDetails,
+      promptDelta,
+      tokenDeltaPercent,
+      firstDriftTurn: firstDriftTurn(params.left.transcriptBytes, params.right.transcriptBytes),
+    };
+  }
+  if (params.left.systemPromptHash !== params.right.systemPromptHash) {
+    return {
+      scenarioId: params.scenarioId,
+      left: params.left,
+      right: params.right,
+      drift: "system-prompt",
+      driftDetails: "system prompt report differs",
+      promptDelta,
+      tokenDeltaPercent,
+      firstDriftTurn: firstDriftTurn(params.left.transcriptBytes, params.right.transcriptBytes),
+    };
+  }
+  if (params.left.toolDescriptionHash !== params.right.toolDescriptionHash) {
+    return {
+      scenarioId: params.scenarioId,
+      left: params.left,
+      right: params.right,
+      drift: "tool-description",
+      driftDetails: "tool description summary shape differs",
+      promptDelta,
+      tokenDeltaPercent,
+      firstDriftTurn: firstDriftTurn(params.left.transcriptBytes, params.right.transcriptBytes),
+    };
+  }
+  if (params.left.toolSchemaHash !== params.right.toolSchemaHash) {
+    return {
+      scenarioId: params.scenarioId,
+      left: params.left,
+      right: params.right,
+      drift: "tool-schema",
+      driftDetails: "tool schema shape differs",
+      promptDelta,
+      tokenDeltaPercent,
+      firstDriftTurn: firstDriftTurn(params.left.transcriptBytes, params.right.transcriptBytes),
+    };
+  }
+  const compareToolShapes =
+    params.comparisonMode !== "codex-native-workspace" && params.comparisonMode !== "outcome-only";
+  const compareTranscriptStructure =
+    params.comparisonMode !== "codex-native-workspace" && params.comparisonMode !== "outcome-only";
+
+  if (compareToolShapes) {
+    const toolCallDrift = compareToolCallShape(params.left.toolCalls, params.right.toolCalls);
+    if (toolCallDrift) {
+      return {
+        scenarioId: params.scenarioId,
+        left: params.left,
+        right: params.right,
+        drift: "tool-call-shape",
+        driftDetails: toolCallDrift,
+        promptDelta,
+        tokenDeltaPercent,
+        firstDriftTurn: firstDriftTurn(params.left.transcriptBytes, params.right.transcriptBytes),
+      };
+    }
+    const toolResultDrift = compareToolResultShape(params.left.toolCalls, params.right.toolCalls);
+    if (toolResultDrift) {
+      return {
+        scenarioId: params.scenarioId,
+        left: params.left,
+        right: params.right,
+        drift: "tool-result-shape",
+        driftDetails: toolResultDrift,
+        promptDelta,
+        tokenDeltaPercent,
+        firstDriftTurn: firstDriftTurn(params.left.transcriptBytes, params.right.transcriptBytes),
+      };
+    }
+  }
+  const leftTranscriptRecords = countComparableTranscriptRecords(params.left.transcriptBytes);
+  const rightTranscriptRecords = countComparableTranscriptRecords(params.right.transcriptBytes);
+  if (
+    compareTranscriptStructure &&
+    (leftTranscriptRecords !== rightTranscriptRecords ||
+      (!params.left.finalText && !!params.right.finalText) ||
+      (!!params.left.finalText && !params.right.finalText))
+  ) {
+    return {
+      scenarioId: params.scenarioId,
+      left: params.left,
+      right: params.right,
+      drift: "structural",
+      driftDetails: `transcript/final-text structure differs (${leftTranscriptRecords} message records vs ${rightTranscriptRecords} message records)`,
+      promptDelta,
+      tokenDeltaPercent,
+      firstDriftTurn: firstDriftTurn(params.left.transcriptBytes, params.right.transcriptBytes),
+    };
+  }
+  if (
+    normalizeTextForParity(params.left.finalText) !== normalizeTextForParity(params.right.finalText)
+  ) {
+    return {
+      scenarioId: params.scenarioId,
+      left: params.left,
+      right: params.right,
+      drift: "text-only",
+      driftDetails: "final text differs after whitespace normalization",
+      promptDelta,
+      tokenDeltaPercent,
+      firstDriftTurn: firstDriftTurn(params.left.transcriptBytes, params.right.transcriptBytes),
+    };
+  }
+  return {
+    scenarioId: params.scenarioId,
+    left: params.left,
+    right: params.right,
+    drift: "none",
+    promptDelta,
+    tokenDeltaPercent,
+  };
+}
+
+function formatPercent(value: number) {
+  const normalized = Math.abs(value) < 0.05 ? 0 : value;
+  const prefix = normalized > 0 ? "+" : "";
+  return `${prefix}${normalized.toFixed(1)}%`;
+}
+
+export function renderHarnessParityMarkdownReport(report: HarnessParityReport): string {
+  const lines = [
+    `# OpenClaw Harness Parity - ${report.left.label} vs ${report.right.label}`,
+    "",
+    `- Generated at: ${report.generatedAt}`,
+    `- Provider mode: ${report.providerMode}`,
+    `- Verdict: ${report.pass ? "pass" : "fail"}`,
+    "",
+    "| Scenario | Drift | First drift turn | Token delta | Prompt chars delta | Tool count delta | Details |",
+    "| --- | --- | ---: | ---: | ---: | ---: | --- |",
+  ];
+
+  for (const result of report.results) {
+    lines.push(
+      `| ${result.scenarioId} | ${result.drift} | ${result.firstDriftTurn ?? ""} | ${formatPercent(
+        result.tokenDeltaPercent,
+      )} | ${result.promptDelta.systemPromptChars} | ${result.promptDelta.toolCount} | ${
+        result.driftDetails ?? ""
+      } |`,
+    );
+  }
+
+  if (report.failures.length > 0) {
+    lines.push("", "## Gate Failures", "");
+    for (const failure of report.failures) {
+      lines.push(`- ${failure}`);
+    }
+  }
+
+  return `${lines.join("\n").trimEnd()}\n`;
+}
diff --git a/src/agents/system-prompt-report.test.ts b/src/agents/system-prompt-report.test.ts
index 0a98a032a9b7..4f5095c6503d 100644
--- a/src/agents/system-prompt-report.test.ts
+++ b/src/agents/system-prompt-report.test.ts
@@ -144,4 +144,76 @@ describe("buildSystemPromptReport", () => {
     expect(report.systemPrompt.projectContextChars).toBe(0);
     expect(report.systemPrompt.nonProjectContextChars).toBe("custom override".length);
   });
+
+  it("emits content hashes for prompt and tool parity checks", () => {
+    const file = makeBootstrapFile({ path: "/tmp/workspace/AGENTS.md" });
+    const report = buildSystemPromptReport({
+      source: "run",
+      generatedAt: 0,
+      bootstrapMaxChars: 20_000,
+      systemPrompt: "system",
+      bootstrapFiles: [file],
+      injectedFiles: [],
+      skillsPrompt: "<skill><name>docs</name></skill>",
+      tools: [
+        {
+          name: "read",
+          description: "Read files",
+          parameters: {
+            type: "object",
+            properties: { path: { type: "string" } },
+          },
+        },
+      ] as never,
+    });
+    const sameLengthChangedPrompt = buildSystemPromptReport({
+      source: "run",
+      generatedAt: 0,
+      bootstrapMaxChars: 20_000,
+      systemPrompt: "systen",
+      bootstrapFiles: [file],
+      injectedFiles: [],
+      skillsPrompt: "<skill><name>docs</name></skill>",
+      tools: [],
+    });
+
+    expect(report.systemPrompt.hash).toMatch(/^[a-f0-9]{64}$/u);
+    expect(report.skills.hash).toMatch(/^[a-f0-9]{64}$/u);
+    expect(report.tools.entries[0]?.summaryHash).toMatch(/^[a-f0-9]{64}$/u);
+    expect(report.tools.entries[0]?.schemaHash).toMatch(/^[a-f0-9]{64}$/u);
+    expect(sameLengthChangedPrompt.systemPrompt.hash).not.toBe(report.systemPrompt.hash);
+  });
+
+  it("keeps reporting when a tool schema cannot be stringified", () => {
+    const file = makeBootstrapFile({ path: "/tmp/workspace/AGENTS.md" });
+    const circularSchema: Record<string, unknown> = {
+      type: "object",
+      properties: { count: { type: "integer" } },
+    };
+    circularSchema.self = circularSchema;
+
+    const report = buildSystemPromptReport({
+      source: "run",
+      generatedAt: 0,
+      bootstrapMaxChars: 20_000,
+      systemPrompt: "system",
+      bootstrapFiles: [file],
+      injectedFiles: [],
+      skillsPrompt: "",
+      tools: [
+        {
+          name: "broken",
+          description: "Broken schema",
+          parameters: circularSchema,
+        },
+      ] as never,
+    });
+
+    expect(report.tools.entries[0]).toMatchObject({
+      name: "broken",
+      schemaChars: 0,
+      propertiesCount: 1,
+    });
+    expect(report.tools.entries[0]?.schemaHash).toMatch(/^[a-f0-9]{64}$/u);
+  });
 });
diff --git a/src/agents/system-prompt-report.ts b/src/agents/system-prompt-report.ts
index 2792ec34db57..d888c74c9602 100644
--- a/src/agents/system-prompt-report.ts
+++ b/src/agents/system-prompt-report.ts
@@ -1,3 +1,4 @@
+import { createHash } from "node:crypto";
 import type { AgentTool } from "@earendil-works/pi-agent-core";
 import type { SessionSystemPromptReport } from "../config/sessions/types.js";
 import { buildBootstrapInjectionStats } from "./bootstrap-budget.js";
@@ -9,9 +10,47 @@ type ToolReportEntry = SessionSystemPromptReport["tools"]["entries"][number];
 const toolReportEntryCache = new WeakMap<AgentTool, ToolReportEntry>();
 const toolSchemaStatsCache = new WeakMap<
   object,
-  Pick<ToolReportEntry, "propertiesCount" | "schemaChars">
+  Pick<ToolReportEntry, "propertiesCount" | "schemaChars" | "schemaHash">
 >();
 
+function sha256(value: string): string {
+  return createHash("sha256").update(value).digest("hex");
+}
+
+function normalizeForStableHash(value: unknown, seen = new WeakSet<object>()): unknown {
+  if (typeof value === "bigint") {
+    return `${value.toString()}n`;
+  }
+  if (value && typeof value === "object") {
+    if (seen.has(value)) {
+      return "[Circular]";
+    }
+    seen.add(value);
+    if (Array.isArray(value)) {
+      const normalized = value.map((entry) => normalizeForStableHash(entry, seen));
+      seen.delete(value);
+      return normalized;
+    }
+    const record = value as Record<string, unknown>;
+    const normalized = Object.fromEntries(
+      Object.keys(record)
+        .toSorted((left, right) => left.localeCompare(right))
+        .map((key) => [key, normalizeForStableHash(record[key], seen)]),
+    );
+    seen.delete(value);
+    return normalized;
+  }
+  return value;
+}
+
+function stableJsonHash(value: unknown): string {
+  try {
+    return sha256(JSON.stringify(normalizeForStableHash(value)) ?? "null");
+  } catch {
+    return sha256("[unserializable]");
+  }
+}
+
 function extractBetween(input: string, startMarker: string, endMarker: string): string {
   const start = input.indexOf(startMarker);
   if (start === -1) {
@@ -39,9 +78,9 @@ function parseSkillBlocks(skillsPrompt: string): Array<{ name: string; blockChar
 
 function buildToolSchemaStats(
   parameters: AgentTool["parameters"],
-): Pick<ToolReportEntry, "propertiesCount" | "schemaChars"> {
+): Pick<ToolReportEntry, "propertiesCount" | "schemaChars" | "schemaHash"> {
   if (!parameters || typeof parameters !== "object") {
-    return { schemaChars: 0, propertiesCount: null };
+    return { schemaChars: 0, schemaHash: stableJsonHash(null), propertiesCount: null };
   }
   const cached = toolSchemaStatsCache.get(parameters);
   if (cached) {
@@ -55,6 +94,7 @@ function buildToolSchemaStats(
         return 0;
       }
     })(),
+    schemaHash: stableJsonHash(parameters),
     propertiesCount: (() => {
       const schema = parameters as Record<string, unknown>;
       const props = typeof schema.properties === "object" ? schema.properties : null;
@@ -78,7 +118,7 @@ function buildToolsEntries(tools: AgentTool[]): SessionSystemPromptReport["tools
     const summary = tool.description?.trim() || tool.label?.trim() || "";
     const summaryChars = summary.length;
     const schemaStats = buildToolSchemaStats(tool.parameters);
-    const entry = { name, summaryChars, ...schemaStats };
+    const entry = { name, summaryChars, summaryHash: sha256(summary), ...schemaStats };
     toolReportEntryCache.set(tool, entry);
     return entry;
   });
@@ -129,6 +169,7 @@ export function buildSystemPromptReport(params: {
       chars: systemPromptChars,
       projectContextChars,
       nonProjectContextChars: Math.max(0, systemPromptChars - projectContextChars),
+      hash: sha256(params.systemPrompt),
     },
     ...(params.currentTurn ? { currentTurn: params.currentTurn } : {}),
     injectedWorkspaceFiles: buildBootstrapInjectionStats({
@@ -137,6 +178,7 @@ export function buildSystemPromptReport(params: {
     }),
     skills: {
       promptChars: params.skillsPrompt.length,
+      hash: sha256(params.skillsPrompt),
       entries: skillsEntries,
     },
     tools: {
diff --git a/src/config/sessions/types.ts b/src/config/sessions/types.ts
index cacaddbb7880..53b93121f90d 100644
--- a/src/config/sessions/types.ts
+++ b/src/config/sessions/types.ts
@@ -644,6 +644,7 @@ export type SessionSystemPromptReport = {
     chars: number;
     projectContextChars: number;
     nonProjectContextChars: number;
+    hash?: string;
   };
   currentTurn?: {
     kind?: "user_request" | "room_event";
@@ -660,6 +661,7 @@ export type SessionSystemPromptReport = {
   }>;
   skills: {
     promptChars: number;
+    hash?: string;
     entries: Array<{ name: string; blockChars: number }>;
   };
   tools: {
@@ -668,7 +670,9 @@ export type SessionSystemPromptReport = {
     entries: Array<{
       name: string;
       summaryChars: number;
+      summaryHash?: string;
       schemaChars: number;
+      schemaHash?: string;
       propertiesCount?: number | null;
     }>;
   };