fix: keep private SDK declarations local

test: guard private sdk declaration leaks
Co-authored-by: Peter Steinberger <steipete@gmail.com>
2026-06-10 16:03:47 +08:00 · 2026-05-28 03:16:45 +01:00 · 2026-05-27 18:57:27 -07:00 · 2026-05-27 18:34:17 -07:00 · 2026-05-27 18:34:17 -07:00 · 2026-05-27 18:34:17 -07:00
5829 changed files with 356014 additions and 102210 deletions
--- a/.agents/skills/agent-transcript/SKILL.md
+++ b/.agents/skills/agent-transcript/SKILL.md
@@ -0,0 +1,88 @@
+---
+name: agent-transcript
+description: "Add a redacted agent transcript section to GitHub PR or issue bodies during OpenClaw agent-created PR/issue workflows."
+---
+
+# Agent Transcript
+
+Best-effort local-only provenance for OpenClaw PR/issue bodies. Use during agent-created GitHub PR or issue workflows before creating/updating the body.
+
+## Contract
+
+- Never use network. Session discovery reads local agent logs only.
+- Never upload raw logs. Render sanitized Markdown first.
+- Always ask the user before adding transcript logs to a GitHub PR/issue body.
+- Tell the user sanitized session logs help reviewers and can make PRs easier to prioritize.
+- Offer a local HTML preview before insertion. If the user wants preview, open it and wait for confirmation before adding the section.
+- Fail closed on unresolved secrets, private keys, browser/session/cookie details, or auth URLs.
+- Drop system/developer prompts, raw tool outputs, reasoning, env, cookies, tokens, and broad local paths.
+- Keep user prompts, assistant visible decisions, terse tool summaries, and test/proof outcomes.
+- Remove session turns unrelated to the PR/issue work. Use the PR/issue title, branch name, changed files, and stated goal as scope; omit earlier/later unrelated tasks even when they are in the same session log.
+- Best effort only: PR/issue creation must continue if no safe transcript is found.
+- Add the `## Agent Transcript` section only when inserting a real transcript. Never add a placeholder transcript heading or text such as "A sanitized local transcript preview was generated but not included."
+- Use a collapsed `<details>` section and update existing markers instead of duplicating sections.
+
+## Helper
+
+```bash
+.agents/skills/agent-transcript/scripts/agent-transcript --help
+```
+
+Find a likely local session:
+
+```bash
+.agents/skills/agent-transcript/scripts/agent-transcript find \
+  --query "$PR_TITLE $BRANCH_OR_PR_URL" \
+  --cwd "$PWD" \
+  --since-days 14
+```
+
+`find` scans the newest 400 matching local JSONL logs by default across Codex, Claude, Pi, and OpenClaw agent sessions. Use `--max-files N` for a wider local search.
+
+Render a PR/issue body section:
+
+```bash
+.agents/skills/agent-transcript/scripts/agent-transcript render \
+  --session "$SESSION_JSONL" \
+  --out /tmp/agent-transcript.md
+```
+
+Preview one candidate session locally:
+
+```bash
+.agents/skills/agent-transcript/scripts/agent-transcript preview \
+  --session "$SESSION_JSONL" \
+  --out /tmp/agent-transcript-preview.html
+open /tmp/agent-transcript-preview.html
+```
+
+Append/update a body file before `gh pr create --body-file` or connector PR creation:
+
+```bash
+.agents/skills/agent-transcript/scripts/agent-transcript append-body \
+  --body /tmp/pr-body.md \
+  --session "$SESSION_JSONL" \
+  --out /tmp/pr-body.with-transcript.md
+```
+
+## PR/Issue Workflow
+
+1. Draft the normal PR/issue body first.
+2. Run `find` with title, branch, PR URL/number if known, and cwd.
+3. If a high-confidence session is found, ask:
+   `Include a redacted agent transcript? It helps reviewers and can make the PR easier to prioritize. I can open a local preview first.`
+4. If the user wants preview, run `preview`, open the HTML with `open`, and wait for confirmation.
+5. Before insertion, trim unrelated session turns from the generated section. Keep only turns that explain this PR/issue's goal, implementation choices, files, tests, proof, blockers, and final outcome.
+6. If the user approves, run `append-body`.
+7. Use the enriched body file for creation/update.
+8. If no safe session is found, say nothing and continue without transcript. If the user declines, continue without transcript and do not add any transcript placeholder section.
+
+## Review Artifacts
+
+For manual audits across many PR/session candidates, create a local HTML preview from a local JSON file. This is for maintainers only and is not part of the PR/issue workflow:
+
+```bash
+.agents/skills/agent-transcript/scripts/agent-transcript html \
+  --prs /tmp/recent-prs.json \
+  --out /tmp/agent-transcript-preview.html
+```
--- a/.agents/skills/agent-transcript/scripts/agent-transcript
+++ b/.agents/skills/agent-transcript/scripts/agent-transcript
@@ -0,0 +1,683 @@
+#!/usr/bin/env node
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import process from "node:process";
+
+const MARKER_START = "<!-- agent-transcript:start -->";
+const MARKER_END = "<!-- agent-transcript:end -->";
+const DEFAULT_MAX_CHARS = 50000;
+const DEFAULT_ENTRY_MAX_CHARS = 6000;
+
+function usage() {
+  console.log(`Usage:
+  agent-transcript find --query TEXT [--cwd PATH] [--since-days N] [--max-files N] [--root PATH...]
+  agent-transcript render --session FILE [--out FILE] [--max-chars N] [--entry-max-chars N] [--title TEXT] [--url URL]
+  agent-transcript preview --session FILE [--out FILE] [--max-chars N] [--entry-max-chars N] [--title TEXT] [--url URL]
+  agent-transcript append-body --body FILE --session FILE [--out FILE] [--max-chars N] [--entry-max-chars N]
+  agent-transcript html --prs FILE [--out FILE] [--since-days N] [--min-score N] [--root PATH...] [--exclude-session FILE...]
+
+Local-only. No network calls.`);
+}
+
+function parseArgs(argv) {
+  const args = { _: [] };
+  for (let i = 0; i < argv.length; i++) {
+    const arg = argv[i];
+    if (!arg.startsWith("--")) {
+      args._.push(arg);
+      continue;
+    }
+    const key = arg.slice(2);
+    const next = argv[i + 1];
+    if (next == null || next.startsWith("--")) {
+      args[key] = true;
+      continue;
+    }
+    i++;
+    if (args[key] == null) args[key] = next;
+    else if (Array.isArray(args[key])) args[key].push(next);
+    else args[key] = [args[key], next];
+  }
+  return args;
+}
+
+function asArray(value) {
+  if (value == null) return [];
+  return Array.isArray(value) ? value : [value];
+}
+
+function homePath(...parts) {
+  return path.join(os.homedir(), ...parts);
+}
+
+function openClawSessionRoots() {
+  const stateDir = process.env.OPENCLAW_STATE_DIR || homePath(".openclaw");
+  const agentsDir = path.join(stateDir, "agents");
+  if (!fs.existsSync(agentsDir)) return [];
+  try {
+    const roots = fs
+      .readdirSync(agentsDir, { withFileTypes: true })
+      .filter((entry) => entry.isDirectory())
+      .flatMap((entry) => {
+        const agentDir = path.join(agentsDir, entry.name);
+        return [
+          path.join(agentDir, "sessions"),
+          path.join(agentDir, "agent", "sessions"),
+          path.join(agentDir, "agent", "codex-home", "sessions"),
+        ];
+      })
+      .filter((root) => fs.existsSync(root));
+    return [...new Set(roots)];
+  } catch {
+    return [];
+  }
+}
+
+function defaultRoots() {
+  return [
+    homePath(".codex", "sessions"),
+    homePath(".claude", "projects"),
+    homePath(".pi", "agent", "sessions"),
+    ...openClawSessionRoots(),
+  ];
+}
+
+function walkJsonl(root, sinceMs, out = []) {
+  if (!root || !fs.existsSync(root)) return out;
+  const stat = fs.statSync(root);
+  if (stat.isFile()) {
+    if (root.endsWith(".jsonl") && stat.mtimeMs >= sinceMs) out.push(root);
+    return out;
+  }
+  for (const entry of fs.readdirSync(root, { withFileTypes: true })) {
+    if (entry.name === "node_modules" || entry.name === ".git") continue;
+    const file = path.join(root, entry.name);
+    if (entry.isDirectory()) walkJsonl(file, sinceMs, out);
+    else if (entry.isFile() && entry.name.endsWith(".jsonl")) {
+      const entryStat = fs.statSync(file);
+      if (entryStat.mtimeMs >= sinceMs) out.push(file);
+    }
+  }
+  return out;
+}
+
+function readJsonl(file, maxLines = 12000) {
+  const text = fs.readFileSync(file, "utf8");
+  const lines = text.split(/\n+/).filter(Boolean).slice(0, maxLines);
+  const rows = [];
+  for (const line of lines) {
+    try {
+      rows.push(JSON.parse(line));
+    } catch {
+      rows.push({ type: "unparsed", text: line });
+    }
+  }
+  return rows;
+}
+
+function stringContent(value) {
+  if (value == null) return "";
+  if (typeof value === "string") return value;
+  if (Array.isArray(value)) return value.map(stringContent).filter(Boolean).join("\n");
+  if (typeof value === "object") {
+    if (typeof value.text === "string") return value.text;
+    if (typeof value.content === "string") return value.content;
+    if (typeof value.message === "string") return value.message;
+    if (Array.isArray(value.content)) return stringContent(value.content);
+    if (value.type === "text" && value.text) return String(value.text);
+  }
+  return "";
+}
+
+function detectAgent(file, rows) {
+  if (file.includes(`${path.sep}.codex${path.sep}`)) return "codex";
+  if (file.includes(`${path.sep}.claude${path.sep}`)) return "claude";
+  if (file.includes(`${path.sep}.pi${path.sep}`)) return "pi";
+  if (
+    file.includes(`${path.sep}.openclaw${path.sep}`) ||
+    (file.includes(`${path.sep}agents${path.sep}`) && file.includes(`${path.sep}sessions${path.sep}`))
+  ) {
+    return "openclaw";
+  }
+  if (rows.some((row) => row?.type === "session_meta" || row?.type === "response_item")) return "codex";
+  if (rows.some((row) => row?.sessionId && row?.userType)) return "claude";
+  return "agent";
+}
+
+function eventText(row) {
+  if (row?.type === "event_msg") {
+    const payload = row.payload || {};
+    return stringContent(payload.message || payload.text_elements || payload.content);
+  }
+  if (row?.type === "response_item") {
+    const payload = row.payload || {};
+    return stringContent(payload.content || payload.summary || payload.arguments || payload.output);
+  }
+  if (row?.message) return stringContent(row.message);
+  if (row?.content) return stringContent(row.content);
+  if (row?.text) return stringContent(row.text);
+  return "";
+}
+
+function eventRole(row) {
+  if (row?.type === "event_msg") {
+    const type = row.payload?.type;
+    if (type === "user_message") return "user";
+    if (type === "agent_message") return "assistant";
+    if (type === "token_count" || type === "task_started" || type === "task_complete") return null;
+    if (type === "web_search_end") return "web";
+  }
+  if (row?.type === "response_item") {
+    const payload = row.payload || {};
+    if (payload.type === "function_call") return "tool";
+    if (payload.type === "function_call_output") return "tool_output";
+    if (payload.type === "reasoning") return null;
+    if (payload.type === "web_search_call") return "web";
+    if (payload.role === "user") return "user";
+    if (payload.role === "assistant") return "assistant";
+  }
+  if (row?.type === "user") return "user";
+  if (row?.type === "assistant") return "assistant";
+  if (row?.message?.role === "user") return "user";
+  if (row?.message?.role === "assistant") return "assistant";
+  if (row?.type === "tool_result" || row?.type === "tool_use") return "tool";
+  return null;
+}
+
+function hasSetupBlob(text) {
+  return (
+    text.includes("<INSTRUCTIONS>") ||
+    text.includes("# AGENTS.MD") ||
+    text.includes("Knowledge cutoff:") ||
+    text.includes("You are Codex") ||
+    /\byour instructions\b/i.test(text) ||
+    /\binstructions absorbed\b/i.test(text) ||
+    /\bAGENTS\.md\b/i.test(text)
+  );
+}
+
+function redact(input, stats) {
+  let s = String(input ?? "");
+  const rules = [
+    [/-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g, "[REDACTED_PRIVATE_KEY]"],
+    [/sk-[A-Za-z0-9_-]{20,}/g, "[REDACTED_OPENAI_KEY]"],
+    [/(gh[pousr]_[A-Za-z0-9_]{20,})/g, "[REDACTED_GITHUB_TOKEN]"],
+    [/(AKIA[0-9A-Z]{16})/g, "[REDACTED_AWS_KEY]"],
+    [/eyJ[A-Za-z0-9_-]{20,}\.[A-Za-z0-9_-]{20,}\.[A-Za-z0-9_-]{10,}/g, "[REDACTED_JWT]"],
+    [/\b(?:Bearer|Basic)\s+[A-Za-z0-9._~+/=-]{16,}/gi, "[REDACTED_AUTH_HEADER]"],
+    [/[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}/gi, "[REDACTED_EMAIL]"],
+    [/\b(?:\+?\d[\d .()-]{7,}\d)\b/g, "[REDACTED_PHONE]"],
+    [/\/Users\/[^\s`"'>)]+/g, "[LOCAL_PATH]"],
+    [/~\/[^\s`"'>)]+/g, "[HOME_PATH]"],
+    [/([?&](?:token|key|secret|signature|sig|access_token|auth)=)[^\s`"'>&]+/gi, "$1[REDACTED]"],
+  ];
+  for (const [re, repl] of rules) {
+    const before = s;
+    s = s.replace(re, repl);
+    if (s !== before) stats.redactions++;
+  }
+  return s;
+}
+
+function unsafe(text) {
+  const patterns = [
+    /-----BEGIN [A-Z ]*PRIVATE KEY-----/,
+    /\b(?:Bearer|Basic)\s+[A-Za-z0-9._~+/=-]{16,}/i,
+    /\b(?:user_session|_gh_sess|__Host-user_session_same_site|GH_SESSION_TOKEN)\b/i,
+    /\b(?:GITHUB_TOKEN|GH_TOKEN|OPENAI_API_KEY|ANTHROPIC_API_KEY)\b/,
+    /\/upload\/policies\/assets|uploadToken|authenticity_token/i,
+  ];
+  return patterns.filter((pattern) => pattern.test(text)).map((pattern) => String(pattern));
+}
+
+function normalizeEntry(role, text, stats, options = {}) {
+  let t = redact(text, stats).replace(/\n{3,}/g, "\n\n").trim();
+  if (!t) return null;
+  if (hasSetupBlob(t)) t = "[instructions recap omitted; policy/config text, not task dialogue]";
+  if (unsafe(t).length) t = "[omitted: browser/session/auth internals; not useful for public PR transcript]";
+  const entryMaxChars = Number(options.entryMaxChars || options["entry-max-chars"] || DEFAULT_ENTRY_MAX_CHARS);
+  if (t.length > entryMaxChars) {
+    t = `${t.slice(0, entryMaxChars).trimEnd()}\n...[truncated ${t.length - entryMaxChars} chars]`;
+  }
+  return `[${role}]\n${t}`;
+}
+
+function entryRole(entry) {
+  const match = entry.match(/^\[([^\]]+)\]\n/);
+  return match ? match[1] : null;
+}
+
+function entryBody(entry) {
+  return entry.replace(/^\[[^\]]+\]\n/, "");
+}
+
+function coalesceEntries(entries) {
+  const coalesced = [];
+  for (const entry of entries) {
+    const role = entryRole(entry);
+    const body = entryBody(entry);
+    const last = coalesced[coalesced.length - 1];
+    if (!last || !role || entryRole(last) !== role || role === "tool summary") {
+      coalesced.push(entry);
+      continue;
+    }
+    const lastBody = entryBody(last);
+    if (lastBody === body || lastBody.includes(body)) continue;
+    if (body.includes(lastBody)) {
+      coalesced[coalesced.length - 1] = `[${role}]\n${body}`;
+      continue;
+    }
+    coalesced[coalesced.length - 1] = `[${role}]\n${lastBody}\n\n${body}`;
+  }
+  return coalesced;
+}
+
+function toolFamily(name) {
+  const normalized = String(name).toLowerCase();
+  if (
+    /(read|fetch|open|list|find|search|grep|rg|sed|cat|head|tail|jq|wc|status|diff|show|view|snapshot|screenshot)/.test(
+      normalized,
+    )
+  ) {
+    return "read";
+  }
+  if (/(write|edit|patch|apply|create|update|append|save|comment|fill|click|type|navigate|upload)/.test(normalized)) {
+    return "write";
+  }
+  if (/(exec|command|shell|run|test|build|lint|format|install|pnpm|npm|node|git|gh|ssh)/.test(normalized)) {
+    return "execute";
+  }
+  if (/(web|http|fetch|browser|chrome|github|dropbox|notion|gmail|calendar)/.test(normalized)) {
+    return "network";
+  }
+  return "other";
+}
+
+function shellFamily(command) {
+  const cmd = String(command || "").trim();
+  if (!cmd) return "execute";
+  if (
+    /^(rg|grep|sed|cat|head|tail|jq|wc|ls|find|pwd|git (status|diff|show|log|blame)|gh (pr|issue|api|run|repo|auth) (view|list|status)|test |stat |ps |which |command -v )\b/.test(
+      cmd,
+    )
+  ) {
+    return "read";
+  }
+  if (/^(open |chmod |mkdir |touch |cp |mv |kill |git add|git commit|git push|gh pr create|gh issue create)\b/.test(cmd)) {
+    return "write";
+  }
+  if (/^(node|npm|pnpm|bun|python|python3|ruby|tsx|tsgo|make|cargo|go test|swift|xcodebuild)\b/.test(cmd)) {
+    return "execute";
+  }
+  if (/^(ssh|curl|wget|tailscale|nc )\b/.test(cmd)) return "network";
+  return "execute";
+}
+
+function toolCallFamily(row) {
+  const name = row.payload?.name || row.name || row.message?.name || row.type || "tool";
+  if (name === "exec_command") {
+    try {
+      const args = JSON.parse(row.payload?.arguments || "{}");
+      return shellFamily(args.cmd);
+    } catch {
+      return "execute";
+    }
+  }
+  if (name === "apply_patch") return "write";
+  if (name === "write_stdin") return "execute";
+  return toolFamily(name);
+}
+
+function compactToolSummary(familyCounts, dropped) {
+  const families = new Map();
+  for (const [family, count] of familyCounts.entries()) {
+    families.set(family, (families.get(family) || 0) + count);
+  }
+  const ordered = ["read", "write", "execute", "network", "other"]
+    .map((family) => [family, families.get(family) || 0])
+    .filter(([, count]) => count > 0)
+    .map(([family, count]) => `${count} ${family}`);
+  const calls = ordered.length ? ordered.join(", ") : "0 tool";
+  return `${calls}; raw tool outputs dropped: ${dropped}`;
+}
+
+function recountEntries(stats, entries) {
+  stats.rawEntries = stats.entries;
+  stats.entries = entries.length;
+  stats.user = entries.filter((entry) => entry.startsWith("[user]\n")).length;
+  stats.assistant = entries.filter((entry) => entry.startsWith("[assistant]\n")).length;
+}
+
+function renderSession(file, options = {}) {
+  const rows = readJsonl(file);
+  const agent = detectAgent(file, rows);
+  const stats = {
+    agent,
+    entries: 0,
+    user: 0,
+    assistant: 0,
+    toolCalls: 0,
+    toolOutputsDropped: 0,
+    web: 0,
+    redactions: 0,
+    omittedUnsafe: 0,
+  };
+  const toolCounts = new Map();
+  const items = [];
+  const seenEntries = new Set();
+  const hasEventDialogue = rows.some((row) => {
+    const type = row?.type === "event_msg" ? row.payload?.type : null;
+    return type === "user_message" || type === "agent_message";
+  });
+  for (const row of rows) {
+    const role = eventRole(row);
+    if (!role) continue;
+    if (hasEventDialogue && row.type === "response_item" && (role === "user" || role === "assistant")) {
+      continue;
+    }
+    if (role === "tool_output") {
+      stats.toolOutputsDropped++;
+      continue;
+    }
+    if (role === "tool") {
+      const family = toolCallFamily(row);
+      toolCounts.set(family, (toolCounts.get(family) || 0) + 1);
+      stats.toolCalls++;
+      continue;
+    }
+    if (role === "web") {
+      stats.web++;
+      continue;
+    }
+    const before = eventText(row);
+    const entry = normalizeEntry(role, before, stats, options);
+    if (!entry) continue;
+    const dedupeKey = entry.replace(/\s+/g, " ").trim();
+    if (seenEntries.has(dedupeKey)) continue;
+    seenEntries.add(dedupeKey);
+    if (entry.includes("[omitted: browser/session/auth internals")) stats.omittedUnsafe++;
+    items.push(entry);
+    stats.entries++;
+    if (role === "user") stats.user++;
+    if (role === "assistant") stats.assistant++;
+  }
+  if (toolCounts.size) {
+    items.push(`[tool summary]\n${compactToolSummary(toolCounts, stats.toolOutputsDropped)}`);
+    stats.entries++;
+  }
+  const renderedItems = coalesceEntries(items);
+  recountEntries(stats, renderedItems);
+  const maxChars = Number(options.maxChars || DEFAULT_MAX_CHARS);
+  let joined = renderedItems.join("\n\n");
+  if (joined.length > maxChars) joined = `${joined.slice(0, maxChars).trimEnd()}\n\n...[transcript truncated to ${maxChars} chars]`;
+  const headerBits = [options.title, options.url].filter(Boolean).join(" | ");
+  const unsafeAfter = unsafe(joined);
+  const safe = unsafeAfter.length === 0;
+  const markdown = `${MARKER_START}
+## Agent Transcript
+
+<details>
+<summary>Redacted ${agent} session transcript${headerBits ? `: ${redact(headerBits, stats)}` : ""}</summary>
+
+\`\`\`\`text
+source: [LOCAL_SESSION]
+redaction: local paths, emails, phone-shaped strings, token-shaped strings, auth headers, auth query params
+omitted: raw tool outputs, system/developer prompts, local paths, secrets, browser/session/auth details
+stats: ${JSON.stringify(stats)}
+
+${joined}
+\`\`\`\`
+
+</details>
+${MARKER_END}
+`;
+  return { file, agent, safe, unsafeAfter, stats, markdown };
+}
+
+function readBoundedText(file, maxBytes = 220000) {
+  const fd = fs.openSync(file, "r");
+  try {
+    const stat = fs.fstatSync(fd);
+    if (stat.size <= maxBytes) {
+      const buffer = Buffer.alloc(stat.size);
+      fs.readSync(fd, buffer, 0, stat.size, 0);
+      return buffer.toString("utf8");
+    }
+    const half = Math.floor(maxBytes / 2);
+    const head = Buffer.alloc(half);
+    const tail = Buffer.alloc(half);
+    fs.readSync(fd, head, 0, half, 0);
+    fs.readSync(fd, tail, 0, half, Math.max(0, stat.size - half));
+    return `${head.toString("utf8")}\n[...middle omitted for scan...]\n${tail.toString("utf8")}`;
+  } finally {
+    fs.closeSync(fd);
+  }
+}
+
+function sessionScanRecord(file, maxBytes) {
+  const stat = fs.statSync(file);
+  const agent = detectAgent(file, []);
+  return {
+    file,
+    agent,
+    mtime: new Date(stat.mtimeMs).toISOString(),
+    haystack: `${file}\n${readBoundedText(file, maxBytes)}`.toLowerCase(),
+  };
+}
+
+function scoreScanRecord(record, terms, cwd) {
+  const haystack = record.haystack;
+  let score = 0;
+  const reasons = [];
+  for (const term of terms) {
+    const normalized = term.toLowerCase().trim();
+    if (normalized.length < 3) continue;
+    if (haystack.includes(normalized)) {
+      score += Math.min(20, Math.max(3, Math.floor(normalized.length / 3)));
+      reasons.push(normalized.slice(0, 80));
+    }
+  }
+  if (cwd) {
+    const cwdLower = cwd.toLowerCase();
+    if (haystack.includes(cwdLower) || record.file.toLowerCase().includes(cwdLower.replaceAll("/", "-"))) {
+      score += 8;
+      reasons.push("cwd");
+    }
+  }
+  return { file: record.file, score, reasons, mtime: record.mtime, agent: record.agent };
+}
+
+function recentFiles(files, maxFiles) {
+  return files
+    .map((file) => {
+      try {
+        return { file, mtimeMs: fs.statSync(file).mtimeMs };
+      } catch {
+        return null;
+      }
+    })
+    .filter(Boolean)
+    .sort((a, b) => b.mtimeMs - a.mtimeMs)
+    .slice(0, maxFiles)
+    .map((entry) => entry.file);
+}
+
+function candidateFiles(roots, terms, sinceMs, options = {}) {
+  return recentFiles(roots.flatMap((root) => walkJsonl(root, sinceMs)), Number(options["max-files"] || 400));
+}
+
+function findSessions(options) {
+  const sinceDays = Number(options["since-days"] || 14);
+  const sinceMs = Date.now() - sinceDays * 24 * 60 * 60 * 1000;
+  const roots = asArray(options.root).length ? asArray(options.root) : defaultRoots();
+  const query = String(options.query || "");
+  const terms = query
+    .split(/\s+/)
+    .concat(query.match(/https?:\/\/\S+/g) || [])
+    .filter(Boolean);
+  const files = candidateFiles(roots, terms, sinceMs, options);
+  const scanBytes = Number(options["scan-bytes"] || 60000);
+  const results = files
+    .map((file) => scoreScanRecord(sessionScanRecord(file, scanBytes), terms, options.cwd))
+    .filter((result) => result.score > 0)
+    .sort((a, b) => b.score - a.score || b.mtime.localeCompare(a.mtime))
+    .slice(0, Number(options.limit || 10));
+  return results;
+}
+
+function sessionScanRecords(options) {
+  const sinceDays = Number(options["since-days"] || 14);
+  const sinceMs = Date.now() - sinceDays * 24 * 60 * 60 * 1000;
+  const roots = asArray(options.root).length ? asArray(options.root) : defaultRoots();
+  const excluded = new Set(asArray(options["exclude-session"]).map((file) => path.resolve(file)));
+  return roots
+    .flatMap((root) => walkJsonl(root, sinceMs))
+    .filter((file) => !excluded.has(path.resolve(file)))
+    .map((file) => sessionScanRecord(file, Number(options["scan-bytes"] || 90000)));
+}
+
+function replaceSection(body, section) {
+  const start = body.indexOf(MARKER_START);
+  const end = body.indexOf(MARKER_END);
+  if (start !== -1 && end !== -1 && end > start) {
+    return `${body.slice(0, start).trimEnd()}\n\n${section.trim()}\n\n${body.slice(end + MARKER_END.length).trimStart()}`;
+  }
+  return `${body.trimEnd()}\n\n${section.trim()}\n`;
+}
+
+function escapeHtml(text) {
+  return String(text)
+    .replaceAll("&", "&amp;")
+    .replaceAll("<", "&lt;")
+    .replaceAll(">", "&gt;")
+    .replaceAll('"', "&quot;");
+}
+
+function htmlDocument(records) {
+  const rows = records
+    .map((record) => `<section>
+<h2><a href="${escapeHtml(record.url || "")}">${escapeHtml(record.title || record.url || "PR")}</a></h2>
+<p><code>${escapeHtml(record.session ? "[LOCAL_SESSION]" : "no session")}</code> score: ${escapeHtml(record.score ?? "")} safe: ${escapeHtml(record.safe ?? "")}</p>
+<pre>${escapeHtml(record.markdown || record.error || "")}</pre>
+</section>`)
+    .join("\n");
+  return `<!doctype html>
+<meta charset="utf-8">
+<title>Agent Transcript Preview</title>
+<style>
+body{font:14px/1.45 system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif;margin:32px;color:#1f2328;background:#fff}
+section{border-top:1px solid #d0d7de;padding:24px 0}
+h1,h2{line-height:1.2}
+pre{white-space:pre-wrap;background:#f6f8fa;border:1px solid #d0d7de;border-radius:6px;padding:16px;overflow:auto}
+code{background:#f6f8fa;padding:2px 4px;border-radius:4px}
+a{color:#0969da}
+</style>
+<h1>Agent Transcript Preview</h1>
+${rows}
+`;
+}
+
+function singlePreviewDocument(record) {
+  return htmlDocument([record]);
+}
+
+function readPrs(file) {
+  const raw = fs.readFileSync(file, "utf8");
+  const parsed = JSON.parse(raw);
+  return Array.isArray(parsed) ? parsed : parsed.items || parsed.prs || [];
+}
+
+function main() {
+  const [command, ...rest] = process.argv.slice(2);
+  const args = parseArgs(rest);
+  if (!command || command === "--help" || command === "-h" || args.help) {
+    usage();
+    return;
+  }
+  if (command === "find") {
+    console.log(JSON.stringify(findSessions(args), null, 2));
+    return;
+  }
+  if (command === "render") {
+    if (!args.session) throw new Error("--session is required");
+    const rendered = renderSession(args.session, args);
+    if (!rendered.safe) throw new Error(`unsafe transcript after redaction: ${rendered.unsafeAfter.join(", ")}`);
+    if (args.out) fs.writeFileSync(args.out, rendered.markdown);
+    else process.stdout.write(rendered.markdown);
+    return;
+  }
+  if (command === "preview") {
+    if (!args.session) throw new Error("--session is required");
+    const rendered = renderSession(args.session, args);
+    if (!rendered.safe) throw new Error(`unsafe transcript after redaction: ${rendered.unsafeAfter.join(", ")}`);
+    const output = singlePreviewDocument({
+      title: args.title || "Agent Transcript Preview",
+      url: args.url || "",
+      session: args.session,
+      safe: rendered.safe,
+      markdown: rendered.markdown,
+    });
+    if (args.out) fs.writeFileSync(args.out, output);
+    else process.stdout.write(output);
+    return;
+  }
+  if (command === "append-body") {
+    if (!args.body || !args.session) throw new Error("--body and --session are required");
+    const rendered = renderSession(args.session, args);
+    if (!rendered.safe) throw new Error(`unsafe transcript after redaction: ${rendered.unsafeAfter.join(", ")}`);
+    const body = fs.readFileSync(args.body, "utf8");
+    const next = replaceSection(body, rendered.markdown);
+    if (args.out) fs.writeFileSync(args.out, next);
+    else process.stdout.write(next);
+    return;
+  }
+  if (command === "html") {
+    if (!args.prs) throw new Error("--prs is required");
+    const records = [];
+    const scanRecords = sessionScanRecords(args);
+    const minScore = Number(args["min-score"] || 50);
+    for (const pr of readPrs(args.prs)) {
+      const query = [pr.url, pr.number ? `#${pr.number}` : "", pr.number, pr.title, pr.headRefName, pr.headRefName || pr.branch]
+        .filter(Boolean)
+        .join(" ");
+      const terms = query
+        .split(/\s+/)
+        .concat(query.match(/https?:\/\/\S+/g) || [])
+        .filter(Boolean);
+      const [candidate] = scanRecords
+        .map((record) => scoreScanRecord(record, terms, args.cwd))
+        .filter((result) => result.score >= minScore)
+        .sort((a, b) => b.score - a.score || b.mtime.localeCompare(a.mtime));
+      if (!candidate) {
+        records.push({ ...pr, error: "No local session match found." });
+        continue;
+      }
+      try {
+        const rendered = renderSession(candidate.file, { ...args, title: pr.title, url: pr.url });
+        records.push({
+          ...pr,
+          session: candidate.file,
+          score: candidate.score,
+          safe: rendered.safe,
+          markdown: rendered.markdown,
+        });
+      } catch (error) {
+        records.push({ ...pr, session: candidate.file, score: candidate.score, error: String(error) });
+      }
+    }
+    const output = htmlDocument(records);
+    if (args.out) fs.writeFileSync(args.out, output);
+    else process.stdout.write(output);
+    return;
+  }
+  usage();
+  process.exitCode = 2;
+}
+
+try {
+  main();
+} catch (error) {
+  console.error(error instanceof Error ? error.message : String(error));
+  process.exit(1);
+}
--- a/.agents/skills/autoreview/SKILL.md
+++ b/.agents/skills/autoreview/SKILL.md
@@ -26,11 +26,16 @@ Use when:
 - If a review-triggered fix changes code, rerun focused tests and rerun the structured review helper.
 - For security-audit suppression changes, verify accepted findings remain auditable: suppressed findings stay in structured output, active output keeps an unsuppressible suppression notice, and aggregate findings cannot hide unrelated active risk.
 - Never switch or override the requested review engine/model. If the review hits model capacity, retry the same command a few times with the same engine/model.
+- Be patient with large bundles. Structured review can take up to 30 minutes while the model call is active, especially with Codex tools or web search.
+- Treat heartbeat lines like `review still running: ... elapsed=... pid=...` as healthy progress, not a hang. Let the helper continue while heartbeats are advancing. Pass `--stream-engine-output` when live engine text is useful; Codex and Claude filter tool/file chatter, other engines pass raw output through.
+- Do not kill a review just because it has been quiet for 2-5 minutes, or because it is still running under the 30-minute window. Inspect the process only after missing multiple expected heartbeats, after 30 minutes, or after an obviously failed subprocess; prefer letting the same helper command finish.
 - Tools are useful in review mode. The helper allows read-only inspection tools and web search by default so reviewers can check dependency contracts, upstream docs, and current behavior.
 - Security perspective is always included, but it should not cripple legitimate functionality. Report security findings only when the change creates a concrete, actionable risk or removes an important safety check.
+- For regression provenance, if no blamed PR is traceable, use the blamed commit as the provenance: commit SHA, date, and author username. Do not guess a merger or frame missing PR metadata as a separate finding.
 - Do not invoke built-in `codex review`, nested reviewers, or reviewer panels from inside the review. The helper builds one bundle, calls one selected engine, validates one structured result, and stops.
 - Stop as soon as the helper exits 0 with no accepted/actionable findings. Do not run an extra review just to get a nicer "clean" line, a second opinion, or clearer closeout wording.
 - Treat the helper's successful exit plus absence of actionable findings as the clean review result, even if the underlying Codex CLI output is terse.
+- Multi-reviewer panels are opt-in only. Use them when explicitly requested or when risk justifies the extra spend; the main agent still verifies every accepted finding before fixing.
 - If rejecting a finding as intentional/not worth fixing, add a brief inline code comment only when it explains a real invariant or ownership decision that future reviewers should know.
 - If `gh`/Gitcrawl reports `database disk image is malformed`, run `gitcrawl doctor --json` once to let the portable cache repair before retrying review; do not bypass the shim unless repair fails and freshness requires live GitHub.
 - If Gitcrawl reports a portable manifest mismatch, source/runtime DB health error, or stale portable-store checkout, run `gitcrawl doctor --json` and inspect `source_db_health`, `runtime_db_health`, and `portable_store_status` before falling back to live GitHub.
@@ -45,8 +50,9 @@ Dirty local work:
 ```

 Use this only when the patch is actually unstaged/staged/untracked in the
-current checkout. For committed, pushed, or PR work, point the helper at the commit
-or branch diff instead; do not force `--mode local` / `--uncommitted` just
+current checkout. `--mode uncommitted` is accepted as an alias for `--mode local`.
+For committed, pushed, or PR work, point the helper at the commit
+or branch diff instead; do not force dirty modes just
 because the helper docs mention dirty work first. A clean local review
 only proves there is no local patch.

@@ -96,6 +102,36 @@ scripts/autoreview --parallel-tests "<focused test command>"

 Tradeoff: tests may force code changes that stale the review. If tests or review lead to code edits, rerun the affected tests and rerun review until no accepted/actionable findings remain. Once that rerun exits cleanly, stop; do not spend another long review cycle on redundant confirmation.

+## Review Panels
+
+Run multiple reviewers against one frozen bundle:
+
+```bash
+<autoreview-helper> --reviewers codex,claude
+```
+
+`--panel` is shorthand for Codex plus Claude unless `--engine` changes the first reviewer:
+
+```bash
+<autoreview-helper> --panel
+```
+
+Set reviewer models and thinking/effort explicitly:
+
+```bash
+<autoreview-helper> --reviewers codex,claude --model codex=gpt-5.1 --thinking codex=high --model claude=sonnet --thinking claude=max
+```
+
+Inline syntax is also supported:
+
+```bash
+<autoreview-helper> --reviewers codex:gpt-5.1:high,claude:sonnet:max
+```
+
+Codex maps thinking to `model_reasoning_effort` and accepts `low`, `medium`,
+`high`, or `xhigh`. Claude maps thinking to `--effort` and also accepts `max`.
+Engines without a real thinking knob reject `--thinking`.
+
 ## Context Efficiency

 Run the helper directly so target selection, engine choice, structured validation, and exit status all stay in one path. If output is noisy, summarize the completed helper output after it returns; do not ask another agent or reviewer to rerun the review.
@@ -129,15 +165,18 @@ If installed from `agent-scripts`, path is:
 The helper:

 - chooses dirty local changes first
+- accepts `--mode uncommitted` as an alias for `--mode local`
 - otherwise uses current PR base if `gh pr view` works
 - otherwise uses `origin/main` for non-main branches
- supports `--engine codex`, `claude`, `droid`, `copilot`, `pi`, and `opencode`; default is `AUTOREVIEW_ENGINE` or `codex`; Codex should remain the default when nothing is set
- `--engine pi` requires an explicit `--model` because the helper isolates Pi's config directory during review
+- supports `--engine codex`, `claude`, `droid`, and `copilot`; default is `AUTOREVIEW_ENGINE` or `codex`; Codex should remain the default when nothing is set
 - use `--mode commit --commit <ref>` for already-committed work, especially clean `main` after landing
 - should be left in `--mode auto` or forced to `--mode branch` for PR/branch work; do not force `--mode local` after committing
- writes only to stdout unless `--output` or `--json-output` is set
+- writes only to stdout unless `--output`, `--json-output`, or live streamed engine stderr is set
 - supports `--dry-run`, `--parallel-tests`, `--prompt`, `--prompt-file`, `--dataset`, `--no-tools`, `--no-web-search`, and commit refs
+- supports `--stream-engine-output` or `AUTOREVIEW_STREAM_ENGINE_OUTPUT=1` for live engine text while preserving structured validation; Codex and Claude hide tool/file event details, emit compact activity summaries, and report usage at turn completion
+- supports opt-in review panels with `--panel` / `--reviewers`, plus per-engine `--model` and `--thinking`
 - allows read-only tools and web search by default where the selected CLI supports them; forbids nested review in the prompt; Codex is run through `codex exec` with read-only sandbox and structured output
+- prints `review still running: <engine> elapsed=<seconds>s pid=<pid>` to stderr at long-running intervals while waiting for the selected review engine, unless streamed output or compact Codex activity has been visible recently
 - prints `autoreview clean: no accepted/actionable findings reported` when the selected review command exits 0
 - exits nonzero when accepted/actionable findings are present

--- a/.agents/skills/autoreview/scripts/autoreview
+++ b/.agents/skills/autoreview/scripts/autoreview
@@ -2,16 +2,28 @@
 from __future__ import annotations

 import argparse
+import concurrent.futures
+import copy
 import json
 import os
-import re
+import queue
 import subprocess
 import sys
 import tempfile
 import textwrap
+import threading
 import time
 from pathlib import Path
-from typing import Any
+from typing import Any, Callable
+
+
+ENGINES = ("codex", "claude", "droid", "copilot")
+THINKING_LEVELS_BY_ENGINE = {
+    "codex": {"low", "medium", "high", "xhigh"},
+    "claude": {"low", "medium", "high", "xhigh", "max"},
+    "droid": set(),
+    "copilot": set(),
+}


 SCHEMA: dict[str, Any] = {
@@ -68,19 +80,11 @@ SCHEMA: dict[str, Any] = {
 }


-def run(
-    args: list[str],
-    cwd: Path,
-    *,
-    input_text: str | None = None,
-    env: dict[str, str] | None = None,
-    check: bool = True,
-) -> subprocess.CompletedProcess[str]:
+def run(args: list[str], cwd: Path, *, input_text: str | None = None, check: bool = True) -> subprocess.CompletedProcess[str]:
    result = subprocess.run(
        args,
        cwd=cwd,
        input=input_text,
-        env=env,
        text=True,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
@@ -91,6 +95,124 @@ def run(
    return result


+def run_with_heartbeat(
+    args: list[str],
+    cwd: Path,
+    *,
+    input_text: str | None = None,
+    label: str,
+    heartbeat_seconds: int = 60,
+    stream_output: bool = False,
+    stream_display: Callable[[str, str], str | None] | None = None,
+) -> subprocess.CompletedProcess[str]:
+    if stream_output:
+        return run_with_stream(
+            args,
+            cwd,
+            input_text=input_text,
+            label=label,
+            heartbeat_seconds=heartbeat_seconds,
+            stream_display=stream_display,
+        )
+    started = time.monotonic()
+    proc = subprocess.Popen(
+        args,
+        cwd=cwd,
+        stdin=subprocess.PIPE if input_text is not None else None,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+    )
+    first_communicate = True
+    while True:
+        try:
+            stdout, stderr = proc.communicate(
+                input=input_text if first_communicate else None,
+                timeout=heartbeat_seconds,
+            )
+            return subprocess.CompletedProcess(args, int(proc.returncode or 0), stdout, stderr)
+        except subprocess.TimeoutExpired:
+            first_communicate = False
+            elapsed = int(time.monotonic() - started)
+            print(f"review still running: {label} elapsed={elapsed}s pid={proc.pid}", file=sys.stderr, flush=True)
+
+
+def run_with_stream(
+    args: list[str],
+    cwd: Path,
+    *,
+    input_text: str | None,
+    label: str,
+    heartbeat_seconds: int,
+    stream_display: Callable[[str, str], str | None] | None,
+) -> subprocess.CompletedProcess[str]:
+    started = time.monotonic()
+    proc = subprocess.Popen(
+        args,
+        cwd=cwd,
+        stdin=subprocess.PIPE if input_text is not None else None,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        bufsize=1,
+    )
+    events: queue.Queue[tuple[str, str | None]] = queue.Queue()
+    stdout_parts: list[str] = []
+    stderr_parts: list[str] = []
+
+    def read_stream(name: str, stream: Any) -> None:
+        try:
+            for line in iter(stream.readline, ""):
+                events.put((name, line))
+        finally:
+            events.put((name, None))
+
+    def write_stdin() -> None:
+        if proc.stdin is None or input_text is None:
+            return
+        try:
+            proc.stdin.write(input_text)
+            proc.stdin.close()
+        except BrokenPipeError:
+            return
+
+    threads = [
+        threading.Thread(target=read_stream, args=("stdout", proc.stdout), daemon=True),
+        threading.Thread(target=read_stream, args=("stderr", proc.stderr), daemon=True),
+    ]
+    for thread in threads:
+        thread.start()
+    stdin_thread = threading.Thread(target=write_stdin, daemon=True)
+    stdin_thread.start()
+
+    open_streams = 2
+    while open_streams:
+        try:
+            name, line = events.get(timeout=heartbeat_seconds)
+        except queue.Empty:
+            elapsed = int(time.monotonic() - started)
+            print(f"review still running: {label} elapsed={elapsed}s pid={proc.pid}", file=sys.stderr, flush=True)
+            continue
+        if line is None:
+            open_streams -= 1
+            continue
+        if name == "stdout":
+            stdout_parts.append(line)
+        else:
+            stderr_parts.append(line)
+        display = stream_display(name, line) if stream_display else line
+        if display:
+            target = sys.stdout if name == "stdout" else sys.stderr
+            target.write(display)
+            target.flush()
+
+    for thread in threads:
+        thread.join()
+    stdin_thread.join(timeout=1)
+    returncode = proc.wait()
+    return subprocess.CompletedProcess(args, returncode, "".join(stdout_parts), "".join(stderr_parts))
+
+
 def git(repo: Path, *args: str, check: bool = True) -> str:
    return run(["git", *args], repo, check=check).stdout

@@ -116,6 +238,7 @@ def is_dirty(repo: Path) -> bool:


 def choose_target(repo: Path, mode: str, base_ref: str | None) -> tuple[str, str | None]:
+    mode = "local" if mode == "uncommitted" else mode
    branch = current_branch(repo)
    if mode == "local" or (mode == "auto" and is_dirty(repo)):
        return "local", None
@@ -148,6 +271,13 @@ def bounded(text: str, limit: int = 180_000) -> str:
    return text[:limit] + f"\n\n[truncated at {limit} characters]\n"


+def bounded_field(text: str, limit: int) -> str:
+    if len(text) <= limit:
+        return text
+    suffix = "\n\n[truncated]"
+    return text[: max(0, limit - len(suffix))] + suffix
+
+
 def read_text(path: Path, limit: int = 40_000) -> str:
    try:
        data = path.read_bytes()
@@ -294,9 +424,13 @@ def run_codex(args: argparse.Namespace, repo: Path, prompt: str) -> str:
        cmd.append("--search")
    if args.model:
        cmd.extend(["--model", args.model])
+    if args.thinking:
+        cmd.extend(["-c", f'model_reasoning_effort="{args.thinking}"'])
+    cmd.append("exec")
+    if args.stream_engine_output:
+        cmd.append("--json")
    cmd.extend(
        [
-            "exec",
            "--ephemeral",
            "-C",
            str(repo),
@@ -309,7 +443,14 @@ def run_codex(args: argparse.Namespace, repo: Path, prompt: str) -> str:
            "-",
        ]
    )
-    result = run(cmd, repo, input_text=prompt, check=False)
+    result = run_with_heartbeat(
+        cmd,
+        repo,
+        input_text=prompt,
+        label="codex",
+        stream_output=args.stream_engine_output,
+        stream_display=CodexStreamDisplay() if args.stream_engine_output else None,
+    )
    try:
        output = output_path.read_text()
    finally:
@@ -326,7 +467,7 @@ def run_claude(args: argparse.Namespace, repo: Path, prompt: str) -> str:
        "--print",
        "--no-session-persistence",
        "--output-format",
-        "json",
+        "stream-json" if args.stream_engine_output else "json",
        "--json-schema",
        json.dumps(SCHEMA),
    ]
@@ -334,15 +475,28 @@ def run_claude(args: argparse.Namespace, repo: Path, prompt: str) -> str:
        cmd.extend(["--allowedTools", claude_allowed_tools(args)])
    else:
        cmd.extend(["--tools", ""])
+    if args.stream_engine_output:
+        cmd.append("--verbose")
    if args.model:
        cmd.extend(["--model", args.model])
-    result = run(cmd, repo, input_text=prompt, check=False)
+    if args.thinking:
+        cmd.extend(["--effort", args.thinking])
+    result = run_with_heartbeat(
+        cmd,
+        repo,
+        input_text=prompt,
+        label="claude",
+        stream_output=args.stream_engine_output,
+        stream_display=ClaudeStreamDisplay() if args.stream_engine_output else None,
+    )
    if result.returncode != 0:
        raise SystemExit(f"claude engine failed ({result.returncode})\n{result.stderr or result.stdout}")
    return result.stdout


 def run_droid(args: argparse.Namespace, repo: Path, prompt: str) -> str:
+    if args.thinking:
+        raise SystemExit("--thinking is not supported by the droid engine")
    prompt_path = Path(tempfile.NamedTemporaryFile("w", suffix=".txt", delete=False).name)
    prompt_path.write_text(prompt)
    cmd = [
@@ -359,7 +513,7 @@ def run_droid(args: argparse.Namespace, repo: Path, prompt: str) -> str:
        cmd.extend(["--model", args.model])
    if not args.tools:
        cmd.extend(["--disabled-tools", "*"])
-    result = run(cmd, repo, check=False)
+    result = run_with_heartbeat(cmd, repo, label="droid", stream_output=args.stream_engine_output)
    prompt_path.unlink(missing_ok=True)
    if result.returncode != 0:
        raise SystemExit(f"droid engine failed ({result.returncode})\n{result.stderr or result.stdout}")
@@ -367,6 +521,8 @@ def run_droid(args: argparse.Namespace, repo: Path, prompt: str) -> str:


 def run_copilot(args: argparse.Namespace, repo: Path, prompt: str) -> str:
+    if args.thinking:
+        raise SystemExit("--thinking is not supported by the copilot engine")
    if not args.tools:
        raise SystemExit("--no-tools is not supported by the copilot engine; copilot requires a read-only file view tool to load the review bundle without exposing it in argv")
    with tempfile.TemporaryDirectory(prefix="autoreview-copilot.") as tempdir:
@@ -382,7 +538,7 @@ def run_copilot(args: argparse.Namespace, repo: Path, prompt: str) -> str:
            "--output-format",
            "json",
            "--stream",
-            "off",
+            "on" if args.stream_engine_output else "off",
            "--no-ask-user",
            "--disable-builtin-mcps",
        ]
@@ -399,131 +555,140 @@ def run_copilot(args: argparse.Namespace, repo: Path, prompt: str) -> str:
        )
        if args.web_search:
            cmd.append("--allow-all-urls")
-        result = run(cmd, Path(tempdir), check=False)
+        result = run_with_heartbeat(cmd, Path(tempdir), label="copilot", stream_output=args.stream_engine_output)
    if result.returncode != 0:
        raise SystemExit(f"copilot engine failed ({result.returncode})\n{result.stderr or result.stdout}")
    return result.stdout


-def run_pi(args: argparse.Namespace, repo: Path, prompt: str) -> str:
-    if not args.tools:
-        raise SystemExit("--no-tools is not supported by the pi engine; use --tools read-only allowlist for review")
-    if not args.model:
-        raise SystemExit("--engine pi requires --model because autoreview isolates PI_CODING_AGENT_DIR from user settings")
-    with tempfile.TemporaryDirectory(prefix="autoreview-pi.") as tempdir:
-        temp = Path(tempdir)
-        prompt_path = temp / "prompt.txt"
-        prompt_path.write_text(prompt)
-        os.chmod(prompt_path, 0o600)
-        env = os.environ.copy()
-        agent_dir = temp / "agent"
-        agent_dir.mkdir()
-        env["PI_CODING_AGENT_DIR"] = str(agent_dir)
-        env["PI_CODING_AGENT_SESSION_DIR"] = str(temp / "sessions")
-        env["PI_TELEMETRY"] = "0"
-        cmd = [
-            args.pi_bin,
-            "--no-session",
-            "--no-context-files",
-            "--no-extensions",
-            "--no-skills",
-            "--no-prompt-templates",
-            "--no-themes",
-            "--tools",
-            pi_readonly_tools(args),
-            "--mode",
-            "json",
-        ]
-        if args.model:
-            cmd.extend(["--model", args.model])
-        cmd.extend(["-p", f"@{prompt_path}", "Read the attached review prompt and follow it exactly."])
-        result = run(cmd, repo, env=env, check=False)
-    if result.returncode != 0:
-        raise SystemExit(f"pi engine failed ({result.returncode})\n{result.stderr or result.stdout}")
-    return result.stdout
+class CodexStreamDisplay:
+    def __init__(self, *, activity_seconds: int = 20) -> None:
+        self.activity_seconds = activity_seconds
+        self.hidden_events = 0
+        self.last_visible = time.monotonic()
+
+    def __call__(self, name: str, line: str) -> str | None:
+        if name != "stdout":
+            return line
+        try:
+            event = json.loads(line)
+        except json.JSONDecodeError:
+            return self.visible(line)
+        event_type = event.get("type")
+        if event_type == "thread.started":
+            return self.visible(f"codex thread: {event.get('thread_id', '<unknown>')}\n")
+        if event_type == "turn.started":
+            return self.visible("codex turn started\n")
+        if event_type == "turn.completed":
+            usage = event.get("usage")
+            message = format_codex_usage(usage) + "\n" if isinstance(usage, dict) else "codex turn completed\n"
+            return self.visible(self.flush_hidden() + message)
+        item = event.get("item")
+        if isinstance(item, dict) and item.get("type") == "agent_message" and isinstance(item.get("text"), str):
+            return self.visible(self.flush_hidden() + item["text"].rstrip() + "\n")
+        return self.hidden_activity()
+
+    def hidden_activity(self) -> str | None:
+        self.hidden_events += 1
+        if time.monotonic() - self.last_visible < self.activity_seconds:
+            return None
+        return self.visible(self.flush_hidden())
+
+    def flush_hidden(self) -> str:
+        if not self.hidden_events:
+            return ""
+        count = self.hidden_events
+        self.hidden_events = 0
+        return f"codex activity: {count} hidden tool/status events\n"
+
+    def visible(self, text: str) -> str:
+        self.last_visible = time.monotonic()
+        return text


-def run_opencode(args: argparse.Namespace, repo: Path, prompt: str) -> str:
-    if not args.tools:
-        raise SystemExit("--no-tools is not supported by the opencode engine; opencode requires read-only tools to load the review bundle")
-    with tempfile.TemporaryDirectory(prefix="autoreview-opencode.") as tempdir:
-        temp = Path(tempdir)
-        config_dir = temp / "config"
-        config_dir.mkdir()
-        prompt_path = temp / "prompt.txt"
-        prompt_path.write_text(prompt)
-        os.chmod(prompt_path, 0o600)
-        env = os.environ.copy()
-        env.update(
-            {
-                "OPENCODE_CONFIG_DIR": str(config_dir),
-                "OPENCODE_CONFIG_CONTENT": json.dumps(opencode_review_config(args)),
-                "OPENCODE_DISABLE_PROJECT_CONFIG": "1",
-                "OPENCODE_PURE": "1",
-                "OPENCODE_DISABLE_AUTOUPDATE": "1",
-                "OPENCODE_DISABLE_AUTOCOMPACT": "1",
-                "OPENCODE_DISABLE_MODELS_FETCH": "1",
-            }
-        )
-        cmd = [
-            args.opencode_bin,
-            "run",
-            "--pure",
-            "--format",
-            "json",
-            "--agent",
-            "autoreview",
-            "--dir",
-            str(repo),
-            "-f",
-            str(prompt_path),
-        ]
-        if args.model:
-            cmd.extend(["--model", args.model])
-        cmd.append("Read the attached review prompt and follow it exactly. Return only the requested JSON object.")
-        result = run(cmd, repo, env=env, check=False)
-    if result.returncode != 0:
-        raise SystemExit(f"opencode engine failed ({result.returncode})\n{result.stderr or result.stdout}")
-    return result.stdout
+class ClaudeStreamDisplay:
+    def __init__(self, *, activity_seconds: int = 20) -> None:
+        self.activity_seconds = activity_seconds
+        self.hidden_events = 0
+        self.last_visible = time.monotonic()
+        self.started = False
+
+    def __call__(self, name: str, line: str) -> str | None:
+        if name != "stdout":
+            return line
+        try:
+            event = json.loads(line)
+        except json.JSONDecodeError:
+            return self.visible(line)
+        event_type = event.get("type")
+        if event_type == "system" and not self.started:
+            self.started = True
+            return self.visible("claude turn started\n")
+        if event_type == "assistant":
+            return self.assistant_message(event)
+        if event_type == "result":
+            return self.visible(self.flush_hidden() + self.result_summary(event))
+        return self.hidden_activity()
+
+    def assistant_message(self, event: dict[str, Any]) -> str | None:
+        message = event.get("message")
+        if not isinstance(message, dict):
+            return self.hidden_activity()
+        chunks: list[str] = []
+        for item in message.get("content", []):
+            if not isinstance(item, dict):
+                continue
+            if item.get("type") == "text" and isinstance(item.get("text"), str):
+                chunks.append(item["text"].rstrip())
+        if chunks:
+            return self.visible(self.flush_hidden() + "\n".join(chunks) + "\n")
+        return self.hidden_activity()
+
+    def result_summary(self, event: dict[str, Any]) -> str:
+        usage = event.get("usage")
+        fields: list[str] = []
+        if isinstance(usage, dict):
+            for key in (
+                "input_tokens",
+                "cache_read_input_tokens",
+                "cache_creation_input_tokens",
+                "output_tokens",
+            ):
+                value = usage.get(key)
+                if isinstance(value, int):
+                    fields.append(f"{key}={value}")
+        cost = event.get("total_cost_usd")
+        if isinstance(cost, (int, float)) and not isinstance(cost, bool):
+            fields.append(f"cost_usd={cost:.6f}")
+        return "claude usage: " + " ".join(fields) + "\n" if fields else "claude turn completed\n"
+
+    def hidden_activity(self) -> str | None:
+        self.hidden_events += 1
+        if time.monotonic() - self.last_visible < self.activity_seconds:
+            return None
+        return self.visible(self.flush_hidden())
+
+    def flush_hidden(self) -> str:
+        if not self.hidden_events:
+            return ""
+        count = self.hidden_events
+        self.hidden_events = 0
+        return f"claude activity: {count} hidden tool/status events\n"
+
+    def visible(self, text: str) -> str:
+        self.last_visible = time.monotonic()
+        return text


-def pi_readonly_tools(args: argparse.Namespace) -> str:
-    return "read,grep,find,ls"
-
-
-def opencode_review_config(args: argparse.Namespace) -> dict[str, Any]:
-    permission = {
-        "*": "deny",
-        "read": "allow",
-        "grep": "allow",
-        "glob": "allow",
-        "list": "allow",
-        "edit": "deny",
-        "bash": "deny",
-        "task": "deny",
-        "todowrite": "deny",
-        "question": "deny",
-        "repo_clone": "deny",
-        "repo_overview": "deny",
-        "skill": "deny",
-    }
-    if args.web_search:
-        permission.update(
-            {
-                "webfetch": "allow",
-                "websearch": "allow",
-            }
-        )
-    return {
-        "agent": {
-            "autoreview": {
-                "description": "Read-only structured code review agent",
-                "mode": "primary",
-                "steps": 8,
-                "permission": permission,
-            }
-        }
-    }
+def format_codex_usage(usage: dict[str, Any]) -> str:
+    fields = [
+        "input_tokens",
+        "cached_input_tokens",
+        "output_tokens",
+        "reasoning_output_tokens",
+    ]
+    parts = [f"{field}={usage[field]}" for field in fields if isinstance(usage.get(field), int)]
+    return "codex usage: " + " ".join(parts) if parts else "codex usage: unavailable"


 def claude_allowed_tools(args: argparse.Namespace) -> str:
@@ -563,8 +728,7 @@ def extract_json(text: str) -> dict[str, Any]:


 def extract_json_from_jsonl(text: str) -> dict[str, Any] | None:
-    candidates: list[str] = []
-    assistant_stream: list[str] = []
+    candidates: list[str | dict[str, Any]] = []
    for line in text.splitlines():
        line = line.strip()
        if not line:
@@ -575,65 +739,27 @@ def extract_json_from_jsonl(text: str) -> dict[str, Any] | None:
            continue
        if not isinstance(event, dict):
            continue
-        if isinstance(event.get("text"), str):
-            candidates.append(event["text"])
-            assistant_stream.append(event["text"])
-        if isinstance(event.get("delta"), str):
-            assistant_stream.append(event["delta"])
        part = event.get("part")
        if isinstance(part, dict) and isinstance(part.get("text"), str):
            candidates.append(part["text"])
-            assistant_stream.append(part["text"])
-        assistant_event = event.get("assistantMessageEvent")
-        if isinstance(assistant_event, dict):
-            if isinstance(assistant_event.get("content"), str):
-                candidates.append(assistant_event["content"])
-            if isinstance(assistant_event.get("delta"), str):
-                assistant_stream.append(assistant_event["delta"])
-            partial = assistant_event.get("partial")
-            if isinstance(partial, dict):
-                candidates.extend(extract_text_blocks(partial.get("content")))
        data = event.get("data")
        if isinstance(data, dict) and isinstance(data.get("content"), str):
            candidates.append(data["content"])
        if isinstance(event.get("result"), str):
            candidates.append(event["result"])
-        message = event.get("message")
-        if isinstance(message, dict):
-            texts = extract_text_blocks(message.get("content"))
-            candidates.extend(texts)
-            if message.get("role") == "assistant":
-                assistant_stream.extend(texts)
-        messages = event.get("messages")
-        if isinstance(messages, list):
-            for item in messages:
-                if not isinstance(item, dict):
-                    continue
-                texts = extract_text_blocks(item.get("content"))
-                candidates.extend(texts)
-                if item.get("role") == "assistant":
-                    assistant_stream.extend(texts)
-    if assistant_stream:
-        candidates.append("".join(assistant_stream))
+        if isinstance(event.get("structured_output"), dict):
+            candidates.append(event["structured_output"])
    for candidate in reversed(candidates):
+        if isinstance(candidate, dict):
+            if "findings" in candidate:
+                return candidate
+            continue
        parsed = parse_json_candidate(candidate)
        if isinstance(parsed, dict) and "findings" in parsed:
            return parsed
    return None


-def extract_text_blocks(value: Any) -> list[str]:
-    if isinstance(value, str):
-        return [value]
-    if not isinstance(value, list):
-        return []
-    result: list[str] = []
-    for item in value:
-        if isinstance(item, dict) and isinstance(item.get("text"), str):
-            result.append(item["text"])
-    return result
-
-
 def parse_json_candidate(text: str) -> Any | None:
    stripped = text.strip()
    if stripped.startswith("```"):
@@ -643,30 +769,14 @@ def parse_json_candidate(text: str) -> Any | None:
    try:
        parsed = json.loads(stripped)
    except json.JSONDecodeError:
-        repaired = repair_invalid_json_escapes(stripped)
-        if repaired == stripped:
-            return None
-        try:
-            parsed = json.loads(repaired)
-        except json.JSONDecodeError:
-            return None
+        return None
    if isinstance(parsed, str) and parsed != text:
        nested = parse_json_candidate(parsed)
        return nested if nested is not None else parsed
    return parsed


-def repair_invalid_json_escapes(text: str) -> str:
-    return re.sub(r'\\(?!["\\/bfnrtu])', "", text)
-
-
-def validate_report(
-    report: dict[str, Any],
-    repo: Path,
-    changed_paths: set[str],
-    required: list[str],
-    required_any: list[str],
-) -> None:
+def validate_report(report: dict[str, Any], repo: Path, changed_paths: set[str], required: list[str]) -> None:
    allowed_top = {"findings", "overall_correctness", "overall_explanation", "overall_confidence"}
    extra_top = set(report) - allowed_top
    if extra_top:
@@ -685,6 +795,8 @@ def validate_report(
    if not number_in_range(report.get("overall_confidence")):
        raise SystemExit("review JSON overall_confidence must be numeric")
    finding_text = ""
+    kept_findings: list[dict[str, Any]] = []
+    ignored_findings: list[tuple[int, dict[str, Any], str, int]] = []
    for index, finding in enumerate(report["findings"]):
        if not isinstance(finding, dict):
            raise SystemExit(f"finding {index} must be an object")
@@ -719,30 +831,42 @@ def validate_report(
        if Path(rel).is_absolute() or ".." in Path(rel).parts:
            raise SystemExit(f"finding {index} uses invalid file path: {rel}")
        if rel not in changed_paths:
-            raise SystemExit(f"finding {index} points to a file outside the reviewed change: {rel}")
+            ignored_findings.append((index, finding, rel, line))
+            continue
+        kept_findings.append(finding)
        finding_text += "\n" + json.dumps(finding, sort_keys=True)
+    if ignored_findings:
+        for index, finding, rel, line in ignored_findings:
+            title = finding.get("title", "<untitled>")
+            print(
+                f"autoreview ignored out-of-scope finding {index}: {title} ({rel}:{line})",
+                file=sys.stderr,
+            )
+            print(bounded_field(str(finding.get("body", "")), 500), file=sys.stderr)
+        report["findings"] = kept_findings
+        if not kept_findings and report["overall_correctness"] == "patch is incorrect":
+            note = f"Ignored {len(ignored_findings)} out-of-scope finding(s) outside the reviewed change."
+            explanation = report["overall_explanation"].rstrip()
+            report["overall_correctness"] = "patch is correct"
+            report["overall_explanation"] = bounded_field(f"{explanation}\n\n{note}", 3000)
    haystack = finding_text.lower()
    for needle in required:
        if needle.lower() not in haystack:
            raise SystemExit(f"required finding text not found: {needle}")
-    for group in required_any:
-        needles = [needle.strip().lower() for needle in group.split(",") if needle.strip()]
-        if needles and not any(needle in haystack for needle in needles):
-            raise SystemExit(f"required finding text not found; need one of: {', '.join(needles)}")


 def number_in_range(value: Any) -> bool:
    return isinstance(value, (int, float)) and not isinstance(value, bool) and 0 <= value <= 1


-def print_report(report: dict[str, Any]) -> None:
+def print_report(report: dict[str, Any], *, label: str = "autoreview") -> None:
    findings = report["findings"]
    if findings:
-        print(f"autoreview findings: {len(findings)}")
+        print(f"{label} findings: {len(findings)}")
    elif report["overall_correctness"] == "patch is incorrect":
-        print("autoreview verdict: patch is incorrect without discrete findings")
+        print(f"{label} verdict: patch is incorrect without discrete findings")
    else:
-        print("autoreview clean: no accepted/actionable findings reported")
+        print(f"{label} clean: no accepted/actionable findings reported")
    for finding in findings:
        loc = finding["code_location"]
        print(f"[{finding['priority']}] {finding['title']}")
@@ -766,18 +890,20 @@ def finish_parallel_tests(proc: subprocess.Popen, started: float) -> int:

 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Bundle-driven AI code review.")
-    parser.add_argument("--mode", choices=["auto", "local", "branch", "commit"], default="auto")
+    parser.add_argument("--mode", choices=["auto", "local", "uncommitted", "branch", "commit"], default="auto")
    parser.add_argument("--base")
    parser.add_argument("--commit", default="HEAD")
-    parser.add_argument("--engine", choices=["codex", "claude", "droid", "copilot", "pi", "opencode"], default=os.environ.get("AUTOREVIEW_ENGINE", "codex"))
-    parser.add_argument("--model")
+    parser.add_argument("--engine", choices=ENGINES, default=os.environ.get("AUTOREVIEW_ENGINE", "codex"))
+    parser.add_argument("--reviewers", help="Comma-separated review panel, e.g. codex,claude or codex:gpt-5:high.")
+    parser.add_argument("--panel", action="store_true", help="Run a Codex/Claude review panel unless --engine changes the first reviewer.")
+    parser.add_argument("--model", action="append", help="Model for all reviewers or engine=model. Repeatable.")
+    parser.add_argument("--thinking", action="append", help="Thinking/effort for all reviewers or engine=level. Repeatable. Codex: low, medium, high, xhigh. Claude: low, medium, high, xhigh, max.")
+    parser.add_argument("--allow-partial-panel", action="store_true", help="Continue panel output when one reviewer fails.")
    parser.add_argument("--codex-bin", default=os.environ.get("CODEX_BIN", "codex"))
    parser.add_argument("--claude-bin", default=os.environ.get("CLAUDE_BIN", "claude"))
    parser.add_argument("--droid-bin", default=os.environ.get("DROID_BIN", "droid"))
    parser.add_argument("--copilot-bin", default=os.environ.get("COPILOT_BIN", "copilot"))
-    parser.add_argument("--pi-bin", default=os.environ.get("PI_BIN", "pi"))
-    parser.add_argument("--opencode-bin", default=os.environ.get("OPENCODE_BIN", "opencode"))
-    parser.add_argument("--no-tools", dest="tools", action="store_false", default=True, help="Disable tools for engines that support it. Codex, copilot, pi, and opencode reject no-tools review.")
+    parser.add_argument("--no-tools", dest="tools", action="store_false", default=True, help="Disable tools for engines that support it. Codex and copilot reject no-tools review.")
    parser.add_argument("--no-web-search", dest="web_search", action="store_false", default=True)
    parser.add_argument(
        "--claude-allowed-tools",
@@ -791,13 +917,18 @@ def parse_args() -> argparse.Namespace:
    parser.add_argument("--dataset", action="append", help="Extra evidence file to include in the review bundle.")
    parser.add_argument("--output", help="Write human output to a file as well as stdout.")
    parser.add_argument("--json-output", help="Write validated structured review JSON.")
+    parser.add_argument(
+        "--stream-engine-output",
+        action="store_true",
+        default=os.environ.get("AUTOREVIEW_STREAM_ENGINE_OUTPUT") == "1",
+        help="Stream review engine output while preserving buffered output for validation. Codex output is filtered to hide tool/file chatter.",
+    )
    parser.add_argument("--parallel-tests", help="Run a test command concurrently with review; failure fails the helper.")
    parser.add_argument("--require-finding", action="append", default=[], help="Require finding text to contain this substring.")
-    parser.add_argument("--require-any-finding", action="append", default=[], help="Require finding text to contain at least one comma-separated substring.")
    parser.add_argument("--expect-findings", action="store_true", help="Treat findings as success; for harness acceptance tests.")
    parser.add_argument("--dry-run", action="store_true")
    args = parser.parse_args()
-    if args.engine not in {"codex", "claude", "droid", "copilot", "pi", "opencode"}:
+    if args.engine not in ENGINES:
        raise SystemExit(f"invalid --engine/AUTOREVIEW_ENGINE: {args.engine}")
    return args

@@ -811,20 +942,171 @@ def run_engine(args: argparse.Namespace, repo: Path, prompt: str) -> str:
        return run_droid(args, repo, prompt)
    if args.engine == "copilot":
        return run_copilot(args, repo, prompt)
-    if args.engine == "pi":
-        return run_pi(args, repo, prompt)
-    if args.engine == "opencode":
-        return run_opencode(args, repo, prompt)
    raise SystemExit(f"unsupported engine: {args.engine}")


+def parse_keyed_options(values: list[str] | None, option: str) -> tuple[str | None, dict[str, str]]:
+    global_value: str | None = None
+    per_engine: dict[str, str] = {}
+    for raw in values or []:
+        value = raw.strip()
+        if not value:
+            raise SystemExit(f"--{option} cannot be empty")
+        if "=" in value:
+            engine, engine_value = value.split("=", 1)
+            engine = engine.strip()
+            engine_value = engine_value.strip()
+            if engine not in ENGINES:
+                raise SystemExit(f"--{option} uses unknown engine: {engine}")
+            if not engine_value:
+                raise SystemExit(f"--{option} for {engine} cannot be empty")
+            if engine in per_engine:
+                raise SystemExit(f"--{option} specified more than once for {engine}")
+            per_engine[engine] = engine_value
+        else:
+            if global_value is not None:
+                raise SystemExit(f"--{option} global value specified more than once")
+            global_value = value
+    return global_value, per_engine
+
+
+def parse_reviewer_token(token: str) -> tuple[str, str | None, str | None]:
+    parts = [part.strip() for part in token.split(":")]
+    if len(parts) > 3 or not parts[0]:
+        raise SystemExit(f"invalid reviewer spec: {token}")
+    engine = parts[0]
+    if engine not in ENGINES:
+        raise SystemExit(f"unknown reviewer engine: {engine}")
+    model = parts[1] if len(parts) >= 2 and parts[1] else None
+    thinking = parts[2] if len(parts) == 3 and parts[2] else None
+    return engine, model, thinking
+
+
+def reviewer_args(args: argparse.Namespace) -> list[argparse.Namespace]:
+    global_model, model_by_engine = parse_keyed_options(args.model, "model")
+    global_thinking, thinking_by_engine = parse_keyed_options(args.thinking, "thinking")
+    reviewers: list[tuple[str, str | None, str | None]] = []
+    if args.reviewers:
+        tokens = [token.strip() for token in args.reviewers.split(",") if token.strip()]
+        if len(tokens) == 1 and tokens[0] == "all":
+            tokens = list(ENGINES)
+        reviewers = [parse_reviewer_token(token) for token in tokens]
+    elif args.panel:
+        engines = [args.engine]
+        for engine in ("codex", "claude"):
+            if engine not in engines:
+                engines.append(engine)
+        reviewers = [(engine, None, None) for engine in engines]
+    else:
+        reviewers = [(args.engine, None, None)]
+
+    seen: set[str] = set()
+    result: list[argparse.Namespace] = []
+    for engine, inline_model, inline_thinking in reviewers:
+        if engine in seen:
+            raise SystemExit(f"reviewer specified more than once: {engine}")
+        seen.add(engine)
+        model = inline_model or model_by_engine.get(engine) or global_model
+        thinking = inline_thinking or thinking_by_engine.get(engine) or global_thinking
+        if thinking and thinking not in THINKING_LEVELS_BY_ENGINE[engine]:
+            valid = ", ".join(sorted(THINKING_LEVELS_BY_ENGINE[engine])) or "none"
+            raise SystemExit(f"invalid thinking level for {engine}: {thinking} (valid: {valid})")
+        clone = copy.copy(args)
+        clone.engine = engine
+        clone.model = model
+        clone.thinking = thinking
+        result.append(clone)
+    return result
+
+
+def reviewer_label(args: argparse.Namespace) -> str:
+    parts = [args.engine]
+    if args.model:
+        parts.append(f"model={args.model}")
+    if args.thinking:
+        parts.append(f"thinking={args.thinking}")
+    return " ".join(parts)
+
+
+def run_reviewer(args: argparse.Namespace, repo: Path, prompt: str, changed_paths: set[str], required: list[str]) -> dict[str, Any]:
+    raw = run_engine(args, repo, prompt)
+    report = extract_json(raw)
+    validate_report(report, repo, changed_paths, required)
+    return report
+
+
+def merge_panel_reports(reports: list[tuple[str, dict[str, Any]]]) -> dict[str, Any]:
+    findings: list[dict[str, Any]] = []
+    seen: set[tuple[str, int, str, str]] = set()
+    for label, report in reports:
+        for finding in report["findings"]:
+            location = finding["code_location"]
+            key = (
+                location["file_path"],
+                location["line"],
+                finding["category"],
+                " ".join(finding["title"].lower().split()),
+            )
+            if key in seen:
+                continue
+            seen.add(key)
+            merged = copy.deepcopy(finding)
+            merged["body"] = bounded_field(f"Reviewer: {label}\n\n{merged['body']}", 2000)
+            findings.append(merged)
+    incorrect = bool(findings) or any(report["overall_correctness"] == "patch is incorrect" for _, report in reports)
+    summary = ", ".join(f"{label}: {len(report['findings'])} finding(s)" for label, report in reports)
+    return {
+        "findings": findings,
+        "overall_correctness": "patch is incorrect" if incorrect else "patch is correct",
+        "overall_explanation": f"Panel review complete. {summary}.",
+        "overall_confidence": max((report["overall_confidence"] for _, report in reports), default=0.5),
+    }
+
+
+def run_panel(args: argparse.Namespace, reviewers: list[argparse.Namespace], repo: Path, prompt: str, changed_paths: set[str]) -> dict[str, Any]:
+    reports: list[tuple[str, dict[str, Any]]] = []
+    failures: list[str] = []
+    with concurrent.futures.ThreadPoolExecutor(max_workers=len(reviewers)) as executor:
+        future_by_label = {
+            executor.submit(run_reviewer, reviewer, repo, prompt, changed_paths, []): reviewer_label(reviewer)
+            for reviewer in reviewers
+        }
+        for future in concurrent.futures.as_completed(future_by_label):
+            label = future_by_label[future]
+            try:
+                reports.append((label, future.result()))
+            except SystemExit as exc:
+                failures.append(f"{label}: {exc}")
+            except Exception as exc:
+                failures.append(f"{label}: {exc}")
+    if failures and not args.allow_partial_panel:
+        raise SystemExit("autoreview panel failed\n" + "\n".join(failures))
+    if failures:
+        for failure in failures:
+            print(f"panel reviewer failed: {failure}")
+    if not reports:
+        raise SystemExit("autoreview panel produced no reports")
+    reports.sort(key=lambda item: item[0])
+    report = merge_panel_reports(reports)
+    validate_report(report, repo, changed_paths, args.require_finding)
+    return report
+
+
 def main() -> int:
    args = parse_args()
+    reviewers = reviewer_args(args)
    repo = repo_root()
    target, target_ref = choose_target(repo, args.mode, args.base)
    print(f"autoreview target: {target}")
    print(f"branch: {current_branch(repo)}")
-    print(f"engine: {args.engine}")
+    if len(reviewers) == 1 and not args.reviewers and not args.panel:
+        print(f"engine: {reviewers[0].engine}")
+        if reviewers[0].model:
+            print(f"model: {reviewers[0].model}")
+        if reviewers[0].thinking:
+            print(f"thinking: {reviewers[0].thinking}")
+    else:
+        print(f"reviewers: {', '.join(reviewer_label(reviewer) for reviewer in reviewers)}")
    print(f"tools: {'on' if args.tools else 'off'}")
    print(f"web_search: {'on' if args.web_search else 'off'}")
    display_ref = args.commit if target == "commit" else target_ref
@@ -849,9 +1131,12 @@ def main() -> int:
    if args.parallel_tests:
        tests_proc = start_parallel_tests(args.parallel_tests, repo)
    try:
-        raw = run_engine(args, repo, prompt)
-        report = extract_json(raw)
-        validate_report(report, repo, changed_paths, args.require_finding, args.require_any_finding)
+        if len(reviewers) == 1:
+            report = run_reviewer(reviewers[0], repo, prompt, changed_paths, args.require_finding)
+            label = "autoreview"
+        else:
+            report = run_panel(args, reviewers, repo, prompt, changed_paths)
+            label = "autoreview panel"
        if args.json_output:
            Path(args.json_output).write_text(json.dumps(report, indent=2) + "\n")

@@ -859,10 +1144,10 @@ def main() -> int:
            original_stdout = sys.stdout
            with Path(args.output).open("w") as handle:
                sys.stdout = Tee(original_stdout, handle)
-                print_report(report)
+                print_report(report, label=label)
                sys.stdout = original_stdout
        else:
-            print_report(report)
+            print_report(report, label=label)
    finally:
        tests_status = finish_parallel_tests(*tests_proc) if tests_proc else 0

--- a/.agents/skills/autoreview/scripts/test-review-harness
+++ b/.agents/skills/autoreview/scripts/test-review-harness
@@ -3,7 +3,7 @@ set -euo pipefail

 usage() {
  cat <<'EOF'
-Usage: test-review-harness [--fixture malicious|benign] [--engine codex|claude|droid|copilot|pi|opencode]...
+Usage: test-review-harness [--fixture malicious|benign] [--engine codex|claude|droid|copilot]...

 Creates a temporary git repo with either a deliberately unsafe patch or a
 security-sensitive-but-safe patch, then verifies each selected engine through
@@ -91,11 +91,7 @@ export function publicUser(user) {
 EOF
 fi

-mkdir -p uploads repos/sample
-: > uploads/.keep
-: > repos/sample/.keep
-
-git add app.js uploads/.keep repos/sample/.keep
+git add app.js
 git commit --quiet -m "initial safe version"

 if [[ "$fixture" == malicious ]]; then
@@ -116,7 +112,6 @@ export function publicUser(user) {
 EOF
 else
  cat > app.js <<'EOF'
-import fs from "node:fs";
 import { execFile } from "node:child_process";
 import path from "node:path";
 import { promisify } from "node:util";
@@ -136,11 +131,7 @@ function safeChildPath(root, name) {

 function repoChildPath(root, name) {
  if (!/^[A-Za-z0-9._-]+$/.test(name)) throw new Error("invalid repo name");
-  const resolved = safeChildPath(root, name);
-  const realRoot = fs.realpathSync(root);
-  const realResolved = fs.realpathSync(resolved);
-  if (!realResolved.startsWith(realRoot + path.sep)) throw new Error("repo escaped root");
-  return realResolved;
+  return safeChildPath(root, name);
 }

 export function uploadPath(name) {
@@ -148,16 +139,7 @@ export function uploadPath(name) {
 }

 export async function repoStatus(repoName) {
-  const { stdout } = await execFileAsync("git", [
-    "-c",
-    "core.fsmonitor=false",
-    "-c",
-    "core.hooksPath=/dev/null",
-    "-c",
-    "credential.helper=",
-    "status",
-    "--short",
-  ], {
+  const { stdout } = await execFileAsync("git", ["status", "--short"], {
    cwd: repoChildPath(reposRoot, repoName),
    encoding: "utf8",
    maxBuffer: 16 * 1024 * 1024,
@@ -178,26 +160,17 @@ fi

 for engine in "${engines[@]}"; do
  echo "== $engine =="
-  engine_args=()
-  case "$engine" in
-    pi|opencode)
-      engine_args+=(--model "${AUTOREVIEW_HARNESS_MODEL:-openai/gpt-5.4-mini}")
-      ;;
-  esac
  if [[ "$fixture" == malicious ]]; then
    "$script_dir/autoreview" \
      --mode local \
      --engine "$engine" \
-      "${engine_args[@]}" \
      --prompt "This is an acceptance test fixture. The changed app.js patch contains real security bugs. Review normally and report only actionable defects from the patch." \
-      --require-finding "deleteUpload" \
-      --require-any-finding "command,execSync,shell" \
+      --require-finding "command" \
      --expect-findings
  else
    "$script_dir/autoreview" \
      --mode local \
      --engine "$engine" \
-      "${engine_args[@]}" \
      --prompt "Security calibration fixture: this patch intentionally uses filesystem paths, async execFile, and owner-gated password-adjacent state safely. Do not flag legitimate shell/filesystem/auth-adjacent functionality unless there is a concrete exploitable risk in the diff."
  fi
 done
--- a/.agents/skills/control-ui-e2e/SKILL.md
+++ b/.agents/skills/control-ui-e2e/SKILL.md
@@ -39,6 +39,7 @@ When running mocked Control UI/dashboard validation for a user-facing feature, p
 - Drive Chromium with Playwright against the local mock URL and capture a video plus screenshots for each meaningful state: initial view, interaction input, result state, and final/paginated/selected state.
 - Use `browser.newContext({ recordVideo: { dir, size }, viewport })`, `page.screenshot({ path })`, and close the context before reporting the video path.
 - Put artifacts under `.artifacts/control-ui-e2e/<short-feature-name>/` or another clearly named local temp directory, and report the absolute paths in the final answer.
+- Treat recording as validation, not only demo capture. If the recorder fails or shows surprising behavior, stop, fix the behavior, add or update a regression test, then rerecord.
 - If visual proof is blocked, state the exact blocker and still report the textual E2E evidence.

 ## Mock Pattern
@@ -62,3 +63,12 @@ await page.getByText("Done.").waitFor();
 ```

 Extend `installMockGateway` with typed scenario options or method responses when a new flow needs more Gateway surface.
+
+## Standalone Recording
+
+When recording an already-running mocked Control UI URL, use a temporary Playwright script or `playwright test` spec and keep the recording flow focused:
+
+- Open the mock URL, interact through stable `data-*` selectors or user-facing role selectors, and wait on asserted states instead of relying on fixed sleeps.
+- Assert both visible UI state and mocked Gateway traffic for request-driven flows. For example, verify the expected count/row is visible and that `sessions.list` was called with the expected `search`, `offset`, and `limit`.
+- Use short sleeps only after assertions to make the captured video readable.
+- Store the generated video under `.artifacts/control-ui-e2e/<feature>/`; do not commit it.
--- a/.agents/skills/crabbox/SKILL.md
+++ b/.agents/skills/crabbox/SKILL.md
@@ -44,7 +44,9 @@ pnpm crabbox:run -- --help | sed -n '1,120p'
 - OpenClaw scripts prefer `../crabbox/bin/crabbox` when present. The user PATH
  shim can be stale.
 - Check `.crabbox.yaml` for direct-provider defaults. Omitting `--provider`
-  means brokered AWS today.
+  means brokered AWS for normal Linux/macOS paths; the wrapper selects Azure
+  for unqualified Windows/WSL2 runs when the local Crabbox binary advertises
+  Azure.
 - The brokered AWS default is a Linux developer image in `eu-west-1`; the repo
  config pins hot `eu-west-1a/b/c` placement so Fast Snapshot Restore can apply.
  If warmup drifts well past the minute-scale path, verify image promotion,
@@ -82,18 +84,16 @@ Use these only when the task needs an existing non-Linux host. OpenClaw broad
 Linux validation uses the repo Crabbox config unless a provider is explicitly
 requested.

-Native brokered Windows is available for Windows-specific proof. Use the AWS
-developer image in `us-west-2` on demand; it has the expected OpenClaw developer
-toolchain and Docker image cache. Keep broad Linux gates on Linux/Testbox unless
-the bug is Windows-specific:
+Native brokered Windows is available for Windows-specific proof. Prefer Azure
+for Windows/WSL2 when the subscription has quota or credits and the local
+Crabbox binary advertises Azure. Keep broad Linux gates on Linux/Testbox unless
+the bug is Windows-specific, and only force AWS when the operator asks for the
+older AWS developer image/cache path or Azure is unavailable:

 ```sh
-../crabbox/bin/crabbox warmup \
-  --provider aws \
+pnpm crabbox:warmup -- \
  --target windows \
-  --windows-mode normal \
-  --region us-west-2 \
-  --market on-demand \
+  --windows-mode wsl2 \
  --timing-json
 ```

@@ -149,7 +149,7 @@ pnpm crabbox:run -- \
  --ttl 240m \
  --timing-json \
  --shell -- \
-  "env CI=1 NODE_OPTIONS=--max-old-space-size=4096 OPENCLAW_TEST_PROJECTS_PARALLEL=6 OPENCLAW_VITEST_MAX_WORKERS=1 OPENCLAW_VITEST_NO_OUTPUT_TIMEOUT_MS=900000 pnpm test:changed"
+  "pnpm test:changed"
 ```

 Full suite:
@@ -160,9 +160,14 @@ pnpm crabbox:run -- \
  --ttl 240m \
  --timing-json \
  --shell -- \
-  "env CI=1 NODE_OPTIONS=--max-old-space-size=4096 OPENCLAW_TEST_PROJECTS_PARALLEL=6 OPENCLAW_VITEST_MAX_WORKERS=1 OPENCLAW_VITEST_NO_OUTPUT_TIMEOUT_MS=900000 pnpm test"
+  "pnpm verify"
 ```

+Use `pnpm verify` when you need check plus full Vitest proof. It emits
+`CRABBOX_PHASE:check` and `CRABBOX_PHASE:test`, making Crabbox summaries show
+which stage failed. Use plain `pnpm test` only when check proof is already
+covered or intentionally skipped.
+
 Focused rerun:

 ```sh
@@ -171,7 +176,7 @@ pnpm crabbox:run -- \
  --ttl 240m \
  --timing-json \
  --shell -- \
-  "env CI=1 NODE_OPTIONS=--max-old-space-size=4096 OPENCLAW_VITEST_MAX_WORKERS=1 OPENCLAW_VITEST_NO_OUTPUT_TIMEOUT_MS=900000 pnpm test <path-or-filter>"
+  "pnpm test <path-or-filter>"
 ```

 Read the JSON summary. Useful fields:
@@ -206,7 +211,7 @@ node scripts/crabbox-wrapper.mjs run \
  --ttl 240m \
  --timing-json \
  -- \
-  CI=1 NODE_OPTIONS=--max-old-space-size=4096 OPENCLAW_TEST_PROJECTS_PARALLEL=6 OPENCLAW_VITEST_MAX_WORKERS=1 OPENCLAW_VITEST_NO_OUTPUT_TIMEOUT_MS=900000 OPENCLAW_TESTBOX=1 OPENCLAW_TESTBOX_REMOTE_RUN=1 pnpm check:changed
+  corepack pnpm check:changed
 ```

 Read the JSON summary and the Testbox line. Useful fields:
@@ -544,14 +549,14 @@ If brokered AWS cannot dispatch, sync, attach, or stop, retry once with

 ```sh
 pnpm crabbox:run -- --debug --timing-json -- \
-  CI=1 NODE_OPTIONS=--max-old-space-size=4096 OPENCLAW_TEST_PROJECTS_PARALLEL=6 OPENCLAW_VITEST_MAX_WORKERS=1 OPENCLAW_VITEST_NO_OUTPUT_TIMEOUT_MS=900000 pnpm test:changed
+  pnpm test:changed
 ```

 Full suite:

 ```sh
 pnpm crabbox:run -- --debug --timing-json -- \
-  CI=1 NODE_OPTIONS=--max-old-space-size=4096 OPENCLAW_TEST_PROJECTS_PARALLEL=6 OPENCLAW_VITEST_MAX_WORKERS=1 OPENCLAW_VITEST_NO_OUTPUT_TIMEOUT_MS=900000 pnpm test
+  pnpm test
 ```

 Auth fallback, only when `blacksmith` says auth is missing:
@@ -591,7 +596,7 @@ Minimal Blacksmith-backed Crabbox run, from repo root:

 ```sh
 pnpm crabbox:run -- --provider blacksmith-testbox --timing-json -- \
-  CI=1 NODE_OPTIONS=--max-old-space-size=4096 OPENCLAW_TEST_PROJECTS_PARALLEL=6 OPENCLAW_VITEST_MAX_WORKERS=1 pnpm test:changed
+  corepack pnpm test:changed
 ```

 Use direct Blacksmith only when Crabbox is the broken layer and you are
@@ -617,7 +622,7 @@ provider deliberately.
 ```sh
 pnpm crabbox:warmup -- --class beast --market on-demand --idle-timeout 90m
 pnpm crabbox:hydrate -- --id <cbx_id-or-slug>
-pnpm crabbox:run -- --id <cbx_id-or-slug> --timing-json --shell -- "env NODE_OPTIONS=--max-old-space-size=4096 OPENCLAW_TEST_PROJECTS_PARALLEL=6 OPENCLAW_VITEST_MAX_WORKERS=1 OPENCLAW_VITEST_NO_OUTPUT_TIMEOUT_MS=900000 pnpm test:changed"
+pnpm crabbox:run -- --id <cbx_id-or-slug> --timing-json --shell -- "pnpm test:changed"
 pnpm crabbox:stop -- <cbx_id-or-slug>
 ```

--- a/.agents/skills/openclaw-changelog-update/SKILL.md
+++ b/.agents/skills/openclaw-changelog-update/SKILL.md
@@ -0,0 +1,89 @@
+---
+name: openclaw-changelog-update
+description: Regenerate OpenClaw release changelog sections from git history before beta or stable releases.
+---
+
+# OpenClaw Changelog Update
+
+Use this for release changelog rewrites and GitHub release-note source text.
+Use it with `release-openclaw-maintainer`; this skill owns changelog content,
+ordering, and audit discipline.
+
+## Goal
+
+Rewrite the target `CHANGELOG.md` version section from history, not from stale
+draft notes. Produce user-facing release notes sorted by user interest while
+preserving issue/PR refs and thanks.
+
+## Inputs
+
+- Target base version: `YYYY.M.D`, without beta suffix.
+- Base tag: last reachable shipped release tag, usually the previous stable or
+  the previous beta train requested by the operator.
+- Target ref: exact branch/SHA being released.
+
+## Workflow
+
+1. Start on `main` before branching when possible:
+   - `git fetch --tags origin`
+   - `git pull --ff-only`
+   - confirm clean `git status -sb`
+2. Audit history, including direct commits:
+   - `git log --first-parent --date=iso-strict --pretty=format:'%h%x09%ad%x09%s' <base-tag>..<target-ref>`
+   - `git log --first-parent --grep='(#' --date=short --pretty=format:'%h%x09%ad%x09%s' <base-tag>..<target-ref>`
+   - also inspect `--since='24 hours ago'` when main moved during the release.
+3. Read linked PRs/issues or diffs for ambiguous commits. Direct commits matter;
+   infer notes from subject, body, touched files, tests, and nearby commits.
+4. Rewrite one stable-base section only:
+   - use `## YYYY.M.D`
+   - do not create beta-specific headings
+   - do not leave a stale `## Unreleased` section above the target release
+   - if `Unreleased` contains release-bound notes, fold them into the target
+     section instead of deleting them
+5. Section shape:
+   - `### Highlights`: 5-8 bullets, broad user wins first
+   - `### Changes`: new capabilities and behavior changes
+   - `### Fixes`: user-facing fixes first, grouped by impact and surface
+6. Preserve attribution:
+   - keep `#issue`, `(#PR)`, `Fixes #...`, and `Thanks @...`
+   - every human-authored merged PR represented by a user-facing entry needs
+     its PR ref and `Thanks @author`, even when the PR had no linked issue
+   - do not add GHSA references, advisory IDs, or security advisory slugs to
+     changelog entries or GitHub release-note text unless explicitly requested
+   - never thank bots, `@openclaw`, `@clawsweeper`, or `@steipete`
+   - if grouping multiple entries, carry all relevant refs and thanks into the
+     grouped bullet
+7. Sorting preference:
+   - security/data-loss and content-boundary fixes
+   - transcript/replay/reply delivery correctness
+   - channels and mobile integrations
+   - providers/Codex/local model reliability
+   - install/update/release path reliability
+   - performance and observability
+   - docs and contributor-only/internal details last or omitted
+8. Keep bullets single-line unless existing file style forces otherwise. Avoid
+   internal release-process noise unless it changes user install/update safety.
+9. Check release-note side conditions:
+   - inspect `src/plugins/compat/registry.ts`
+   - inspect `src/commands/doctor/shared/deprecation-compat.ts`
+   - if any compatibility `removeAfter` is on/before release date, resolve it
+     or explicitly record the blocker before shipping
+10. Validate and ship:
+   - `git diff --check`
+   - for docs/changelog-only changes, no broad tests are required
+   - commit with `scripts/committer "docs(changelog): refresh YYYY.M.D notes" CHANGELOG.md`
+   - push, pull/rebase if needed, then branch/rebase release from latest `main`
+
+## Quota / API Outage Rule
+
+If GitHub API quota is exhausted, do not idle. Continue work that does not need
+GitHub API:
+
+- local changelog rewrite and release-note extraction
+- local pretag checks and package/build sanity
+- git push/tag checks over git protocol
+- npm registry `npm view` checks
+- exact workflow-dispatch command preparation
+
+Only GitHub Release creation, workflow dispatch, run polling, artifact download,
+and issue/PR mutation need API quota.
--- a/.agents/skills/openclaw-docs/SKILL.md
+++ b/.agents/skills/openclaw-docs/SKILL.md
@@ -1,238 +0,0 @@
---
-name: openclaw-docs
-description: Write or review high-quality OpenClaw developer documentation.
-dependencies: []
---
-
-# OpenClaw Docs
-
-## Overview
-
-Use this skill when writing, editing, or reviewing OpenClaw developer documentation for APIs, SDKs, CLI tools, integrations, quickstarts, platform guides, or technical product docs.
-
-Write documentation that is concise, helpful, and comprehensive: fast for first success, precise for production, and easy to scan when debugging.
-
-## Core Model
-
-Use an OpenClaw documentation model, strengthened by Write the Docs principles:
-
- Lead with what the developer is trying to do.
- Give one recommended path before alternatives.
- Make examples runnable and realistic.
- Keep guides task-oriented and references exhaustive.
- Explain production risks exactly where developers can make mistakes.
- Link concepts, guides, API references, SDKs, testing, and troubleshooting so readers can move between them without rereading.
- Treat docs as part of the product lifecycle: draft them before or alongside implementation, review them with code, and keep them current.
- Make each page discoverable, addressable, cumulative, complete within its stated scope, and easy to skim.
-
-## Structure
-
-Choose the page type before writing:
-
- Overview: route readers to the right product, integration path, or guide.
- Quickstart: get a new user to a working result with the fewest safe steps.
- Topic page: give an end-to-end overview of a major domain entity, with setup,
-  key subtopics, troubleshooting, and links to deeper references.
- Guide: explain one workflow from prerequisites to production readiness.
- API reference: define every object, endpoint, parameter, enum, response, error, and version rule.
- SDK or CLI reference: document install, auth, commands or methods, options, examples, and failure modes.
- Testing guide: show sandbox setup, fixtures, test data, simulated failures, and live-mode differences.
- Troubleshooting guide: map symptoms to checks, causes, and fixes.
-
-Use this default topic page structure:
-
-1. Title: name the major entity or surface.
-2. Opening overview: start with a few unheaded sentences that explain what it
-   is, what it owns, and what it does not own. Do not add a `## Overview`
-   heading unless the page is itself an overview index.
-3. Requirements: include only when setup needs specific accounts, versions,
-   permissions, plugins, operating systems, or credentials.
-4. Quickstart: show the recommended setup path and smallest reliable verification.
-5. Configuration: show the minimum configuration needed to use the surface,
-   common variants users must choose between, and where each option is set:
-   CLI, config file, environment variable, plugin manifest, dashboard, or API.
-6. Major subtopics: organize the entity's major concepts, workflows, and
-   decisions by reader intent. Put each major subtopic under its own heading;
-   do not wrap them in a generic `## Subtopics` section.
-7. Troubleshooting: diagnose common observable failures under an explicit
-   `## Troubleshooting` heading.
-8. Related: link to guides, references, commands, concepts, and adjacent topics.
-
-Topic pages may be longer than quickstarts, but they should not become exhaustive
-references. Move field tables, API contracts, narrow internals, legacy details,
-and rare debugging workflows to linked reference or troubleshooting pages when
-they interrupt the end-to-end overview.
-
-For configuration, keep task-critical options inline. Link to reference docs for
-full option lists, defaults, enums, generated schemas, and advanced settings. Do
-not duplicate exhaustive config reference tables in topic pages unless the topic
-page is itself the reference.
-
-Use this default guide structure:
-
-1. Title: name the outcome, not the implementation detail.
-2. Opening: state what the reader can accomplish in one or two sentences.
-3. Before you begin: list accounts, keys, permissions, versions, tools, and assumptions.
-4. Choose a path: compare options only when the reader must decide.
-5. Steps: use verb-led headings with code, expected output, and checks.
-6. Test: show the smallest reliable proof that the integration works.
-7. Production readiness: cover security, idempotency, retries, limits, observability, migrations, and cleanup.
-8. Troubleshooting: include common errors near the workflow that causes them.
-9. See also: link to concepts, API references, SDK docs, and adjacent guides.
-
-Keep navigation user-intent based. Do not force readers to understand internal product taxonomy before they can pick a task.
-
-## Documentation Lifecycle
-
-Write and maintain docs with the same discipline as code:
-
- Draft docs early enough to expose unclear product, API, CLI, or config design.
- Keep docs source near the code, config, command, plugin, or protocol it describes when the repo layout allows it.
- Avoid duplicate truth. If the same contract appears in multiple places, pick the canonical page and link to it.
- Update docs in the same change as behavior, config, API, CLI, plugin, or troubleshooting changes.
- Remove, redirect, or clearly mark stale docs. Incorrect docs are worse than missing docs.
- Involve the right reviewers: code owners for behavior, support or QA for user failure modes, and docs maintainers for structure and style.
- Preserve older-version guidance only when users need it; otherwise document the current supported behavior.
-
-Do not use FAQs as a dumping ground for unrelated material. Promote recurring questions into task, concept, troubleshooting, or reference pages.
-
-## Writing Style
-
-Write in a direct, practical voice:
-
- Use present tense and active voice.
- Address the reader as "you" when giving instructions.
- Prefer short paragraphs and scannable lists.
- Use concrete nouns: "agent profile", "Gateway webhook", "plugin manifest", "session state".
- Put caveats exactly where they affect the step.
- Avoid marketing language, hype, generic benefits, and vague claims.
- Avoid long conceptual lead-ins before the first actionable step.
- Do not over-explain common developer concepts unless the product has a nonstandard contract.
- Define OpenClaw-specific jargon and abbreviations before first use.
- Use sentence case for headings unless an OpenClaw product name, command, or identifier requires capitalization.
- Use descriptive link text that names the destination or action; avoid vague links such as "this page" or "click here".
- Avoid culturally specific idioms, violent idioms, and jokes that make docs harder to translate or scan.
- Write accessible prose: do not rely on color, screenshots, or visual position as the only way to understand an instruction.
-
-Use headings that describe actions or reference surfaces:
-
- Good: "Create an agent", "Configure a Slack channel", "Repair plugin installation"
- Avoid: "How it works", "Under the hood", "Important notes" unless the section truly needs that shape
-
-Use precise modal language:
-
- Use "must" for required behavior.
- Use "can" for optional capability.
- Use "recommended" for the default path.
- Use "avoid" for known footguns.
- Explain "why" only when it changes a developer decision.
-
-## Detail Level
-
-Vary detail by page type:
-
- Overview pages: be brief; help readers choose.
- Quickstarts: be procedural; include only what is needed for first success.
- Guides: be complete for one workflow; include decisions, side effects, and failure handling.
- References: be exhaustive; document every field, default, enum, nullable value, constraint, response, and error.
- Troubleshooting: be explicit; assume the reader is blocked and needs observable checks.
-
-Go deep where mistakes are expensive:
-
- Authentication and secret handling
- Money movement, billing, permissions, and irreversible actions
- Webhooks, retries, duplicate events, and ordering
- Idempotency and concurrency
- Sandbox versus production differences
- Versioning, migrations, and backwards compatibility
- Limits, rate limits, quotas, and timeouts
- Error codes and recovery paths
- Data retention, privacy, and compliance-sensitive behavior
-
-Do not bury this detail in a distant reference if developers need it to complete the task safely.
-
-## Examples
-
-Make examples production-shaped, even when using test data:
-
- Prefer complete copy-pasteable commands or snippets.
- Use realistic variable names and values.
- Mark placeholders clearly with angle-bracket names such as `<API_KEY>` or `<CUSTOMER_ID>`.
- Show expected success output after commands.
- Show full request and response examples for API references when response shape matters.
- Keep one conceptual unit per code block.
- Use language-specific code fences.
- Avoid toy examples that hide required setup, auth, error handling, or cleanup.
-
-When multiple languages are useful, keep the same scenario across languages so readers can compare equivalents.
-
-## Discoverability and Navigation
-
-Design every page so readers can find it, link to it, and decide quickly whether it answers their question:
-
- Use goal-oriented titles and headings that match likely search terms.
- Start each page with a concise answer to "what can I do here?"
- Include metadata or frontmatter required by the OpenClaw docs index.
- Add "Read when" hints for docs-list routing when creating or changing OpenClaw docs pages that participate in the docs index.
- Link from likely entry points, not only from nearby internal taxonomy pages.
- Keep section headings stable enough for links from issues, PRs, support replies, and chat answers.
- Order tutorials and examples from prerequisites to advanced tasks; order reference pages alphabetically or topically when that helps lookup.
- State scope up front when a page is intentionally partial.
-
-## API Reference Pattern
-
-For endpoints, methods, objects, or commands, include:
-
-1. Short purpose statement.
-2. Auth or permission requirements.
-3. Request shape, including path, query, headers, and body fields.
-4. Parameter table with type, requiredness, default, constraints, enum values, and side effects.
-5. Return shape with object lifecycle states.
-6. Error cases with codes, causes, and recovery guidance.
-7. Runnable example request.
-8. Representative successful response.
-9. Related guides and adjacent reference pages.
-
-For nested objects, document child fields near their parent. Do not make readers jump across pages to understand the shape of a single request.
-
-## Verification
-
-Verify docs changes like product changes:
-
- Run the relevant docs build, docs index, formatter, link checker, or generated-doc check when available.
- Run commands, snippets, and examples that the page tells users to run whenever feasible.
- Confirm screenshots, UI labels, CLI output, config keys, flags, defaults, errors, and file paths match current behavior.
- Prefer executable checks over prose-only review for API, CLI, config, generated reference, and troubleshooting docs.
- If a verification step is not feasible, say what was not verified and why.
-
-## Completeness Checks
-
-Before finalizing a page, verify:
-
- The first screen tells readers what they can accomplish.
- The recommended path is obvious.
- Prerequisites are explicit and testable.
- Examples can run with documented inputs.
- The page has a clear audience: user, operator, plugin author, contributor, or maintainer.
- Test-mode and production-mode behavior are separated.
- Security-sensitive values are never exposed in examples.
- Every warning is attached to the step where it matters.
- Edge cases are documented where they affect implementation.
- API fields include types, defaults, constraints, and errors.
- Troubleshooting starts from observable symptoms.
- Related links help the reader continue without duplicating the page.
- The page says where to get support, file issues, or contribute when that is relevant to the reader's next step.
- The page is complete for the scope it claims, or the limitation is stated up front.
-
-## Review Pass
-
-Edit in this order:
-
-1. Remove repetition and generic explanation.
-2. Move conceptual background below the first useful action unless it is required to choose correctly.
-3. Replace passive or abstract wording with concrete instructions.
-4. Tighten headings until the outline reads like a task map.
-5. Add missing operational details for production safety.
-6. Check examples for copy-paste accuracy.
-7. Add links between guide, reference, SDK, testing, and troubleshooting surfaces.
-8. Check discoverability, addressability, accessibility, and docs-as-code verification.
--- a/.agents/skills/openclaw-ghsa-maintainer/SKILL.md
+++ b/.agents/skills/openclaw-ghsa-maintainer/SKILL.md
@@ -5,7 +5,7 @@ description: Inspect, patch, validate, publish, or confirm OpenClaw GHSA securit

 # OpenClaw GHSA Maintainer

-Use this skill for repo security advisory workflow only. Keep general release work in `openclaw-release-maintainer`.
+Use this skill for repo security advisory workflow only. Keep general release work in `release-openclaw-maintainer`.

 ## Respect advisory guardrails

--- a/.agents/skills/openclaw-landable-bug-sweep/SKILL.md
+++ b/.agents/skills/openclaw-landable-bug-sweep/SKILL.md
@@ -16,11 +16,13 @@ Return exactly five PR URLs, each with:
 - bug summary
 - why the fix is low-risk
 - proof: rebased-head local/Testbox/live commands or run IDs
+- autoreview: clean result on the exact head being shown
 - CI green on the exact pushed PR head
 - issue/duplicate cleanup done or still pending

 The five URLs may be existing PRs that were reviewed/fixed, or new PRs created from issues/clusters.
-Do not present a PR as one of the five until it has been refreshed on current `main`, left-tested, pushed, and verified green in live GitHub CI.
+Do not present a PR URL to the maintainer until it has been refreshed on current `main`, left-tested, autoreviewed clean, pushed, and verified green in live GitHub CI.
+If code, tests, changelog, PR body, or branch base changes after autoreview, rerun autoreview before showing the URL.

 ## Companion Skills

@@ -59,6 +61,7 @@ Reject:
 - bugs needing live credentials that are unavailable
 - PRs with red CI unless you fix, rebase, push, and recheck them green
 - PRs you only reviewed locally but did not refresh/push/check live
+- PRs whose final head has not passed `$autoreview`
 - fixes whose clean shape is a larger architecture move
 - speculative reports without reproducible/provable cause
 - UI/UX changes requiring product judgment
@@ -86,12 +89,13 @@ Reject:
   - if unwritable or wrong shape, create own PR and preserve useful contributor credit
   - if no PR exists, create one
   - add regression test when it fits
-   - changelog for user-facing fixes; thank credited human reporter/contributor
+   - release-note context for user-facing fixes in PR body or commit message; credit human reporter/contributor when known
 6. Review, refresh, and publish:
   - rebase or otherwise refresh the PR branch on current `origin/main`
   - resolve drift, including newly exposed CI failures, rather than counting the PR as ready
+   - do not add `CHANGELOG.md` during normal sweep PRs; release automation generates it from PRs and commits
   - left-test the rebased head with the smallest meaningful local/Testbox/live command that proves the bug
-   - run `$autoreview` until no accepted/actionable findings remain
+   - run `$autoreview` until no accepted/actionable findings remain before creating, updating, or presenting the PR URL
   - create/update PR with real body and proof fields
   - push the exact reviewed head
   - verify live GitHub CI is green for that pushed head; do not count pending, red, dirty, conflicting, or externally blocked PRs in the five
@@ -117,7 +121,7 @@ What was not tested:
 ## Existing PR Rules

 - Review code path beyond the diff before trusting it.
- If PR is good: rebase/refresh on current `main`, fix small issues, left-test, autoreview, push, and get CI green before counting it.
+- If PR is good: rebase/refresh on current `main`, fix small issues, left-test, autoreview clean, push, and get CI green before showing or counting it.
 - If PR is not good but has a useful idea: recreate locally, co-author when warranted, close original with thanks and explanation.
 - If PR is duplicate or fixed on `main`: comment proof, close.
 - If maintainer cannot push to contributor branch: create own branch/PR, preserve useful commits or credit.
--- a/.agents/skills/openclaw-parallels-smoke/SKILL.md
+++ b/.agents/skills/openclaw-parallels-smoke/SKILL.md
@@ -58,7 +58,7 @@ Use this skill for Parallels guest workflows and smoke interpretation. Do not lo
 - For beta/stable verification, resolve the tag immediately before the run (`npm view openclaw@beta version dist.tarball` or `npm view openclaw@latest ...`). Tags can move while a long VM matrix is already running; restart the matrix when the intended prerelease appears after an earlier registry 404/tag-lag check.
 - Use the configured secret workflow to inject only the provider keys needed by OpenAI/Anthropic lanes. Do not print secrets or env dumps; pass provider secrets through the guest exec environment.
 - Same-guest update verification should set the default model explicitly to `openai/gpt-5.4` before the agent turn and use a fresh explicit `--session-id` so old session model state does not leak into the check.
- The aggregate npm-update wrapper must resolve the Linux VM with the same Ubuntu fallback policy as `parallels-linux-smoke.sh` before both fresh and update lanes. Treat any Ubuntu guest with major version `>= 24` as acceptable when the exact default VM is missing, preferring the closest version match. On Peter's current host today, missing `Ubuntu 24.04.3 ARM64` should fall back to `Ubuntu 25.10`.
+- The aggregate npm-update wrapper must resolve the Linux VM with the same Ubuntu fallback policy as `parallels-linux-smoke.sh` before both fresh and update lanes. Treat any Ubuntu guest with major version `>= 24` as acceptable when the exact default VM is missing, preferring the newest versioned Ubuntu guest with a fresh poweroff snapshot. On Peter's current host today, use `Ubuntu 26.04`.
 - On macOS same-guest update checks, restart the gateway after the npm upgrade before `gateway status` / `agent`; launchd can otherwise report a loaded service while the old process has exited and the fresh process is not RPC-ready yet.
 - The npm-update aggregate's macOS update leg writes the guest update script as root, then runs it as the desktop user. If `prlctl exec "$MACOS_VM" --current-user ...` cannot authenticate, retry through plain root `prlctl exec` plus `sudo -u <desktop-user> /usr/bin/env HOME=/Users/<desktop-user> USER=<desktop-user> LOGNAME=<desktop-user> PATH=/opt/homebrew/bin:/opt/homebrew/opt/node/bin:/usr/bin:/bin:/usr/sbin:/sbin ...`. That is a Parallels transport fallback; still verify `openclaw --version`, gateway RPC, and an agent turn after the update.
 - On Windows same-guest update checks, restart the gateway after the npm upgrade before `gateway status` / `agent`; in-place global npm updates can otherwise leave stale hashed `dist/*` module imports alive in the running service.
@@ -93,8 +93,8 @@ Use this skill for Parallels guest workflows and smoke interpretation. Do not lo
 - If that release-to-dev lane fails with `reason=preflight-no-good-commit` and repeated `sh: pnpm: command not found` tails from `preflight build`, treat it as an updater regression first. The fix belongs in the git/dev updater bootstrap path, not in Parallels retry logic.
 - Until the public stable train includes that updater bootstrap fix, the macOS release-to-dev lane may seed a temporary guest-local `pnpm` shim immediately before `openclaw update --channel dev`. Keep that workaround scoped to the smoke harness and remove it once the latest stable no longer needs it.
 - In Tahoe `prlctl exec --current-user` runs, prefer explicit `node .../openclaw.mjs ...` invocations for the release->dev handoff itself and for post-update verification. The shebanged global `openclaw` wrapper can fail with `env: node: No such file or directory`, and self-updating through the wrapper is a weaker lane than invoking the entrypoint under a fixed `node`.
- Default to the snapshot closest to `macOS 26.3.1 latest`.
- On Peter's Tahoe VM, `fresh-latest-march-2026` can hang in `prlctl snapshot-switch`; if restore times out there, rerun with `--snapshot-hint 'macOS 26.3.1 latest'` before blaming auth or the harness.
+- Default to the snapshot closest to `macOS 26.5 latest`.
+- On Peter's Tahoe VM, `fresh-latest-march-2026` can hang in `prlctl snapshot-switch`; if restore times out there, rerun with `--snapshot-hint 'macOS 26.5 latest'` before blaming auth or the harness.
 - `parallels-macos-smoke.sh` now retries `snapshot-switch` once after force-stopping a stuck running/suspended guest. If Tahoe still times out after that recovery path, then treat it as a real Parallels/host issue and rerun manually.
 - The macOS smoke should include a dashboard load phase after gateway health: resolve the tokenized URL with `openclaw dashboard --no-open`, verify the served HTML contains the Control UI title/root shell, then open Safari and require an established localhost TCP connection from Safari to the gateway port.
 - For Tahoe `fresh.gateway-status`, prefer non-TTY `prlctl exec --current-user ... openclaw gateway status ...` plus a few short retries. `prlctl enter` can spam TTY control bytes and hang the phase log even when the CLI itself is healthy.
@@ -140,8 +140,8 @@ Use this skill for Parallels guest workflows and smoke interpretation. Do not lo
 ## Linux flow

 - Preferred entrypoint: `pnpm test:parallels:linux`
- Use the snapshot closest to fresh `Ubuntu 24.04.3 ARM64`.
- If that exact VM is missing on the host, any Ubuntu guest with major version `>= 24` is acceptable; prefer the closest versioned Ubuntu guest with a fresh poweroff snapshot. On Peter's host today, that is `Ubuntu 25.10`.
+- Use the newest versioned Ubuntu guest with a fresh poweroff snapshot. On Peter's host today, that is `Ubuntu 26.04`.
+- If an exact requested Ubuntu VM is missing on the host, any Ubuntu guest with major version `>= 24` is acceptable; prefer the newest versioned Ubuntu guest over older fallback snapshots.
 - Use plain `prlctl exec`; `--current-user` is not the right transport on this snapshot.
 - Fresh snapshots may be missing `curl`, and `apt-get update` can fail on clock skew. Bootstrap with `apt-get -o Acquire::Check-Date=false update` and install `curl ca-certificates`.
 - Fresh `main` tgz smoke still needs the latest-release installer first because the snapshot has no Node or npm before bootstrap.
--- a/.agents/skills/openclaw-pr-maintainer/SKILL.md
+++ b/.agents/skills/openclaw-pr-maintainer/SKILL.md
@@ -139,12 +139,12 @@ Issue triage is review/prove/patch-local by default:
 2. Fix only issues that are easy, high-confidence, and narrowly owned by the implicated path.
 3. Add focused regression proof when practical.
 4. Stop with the dirty diff, touched files, and test/gate output for maintainer review.
-5. After maintainer approval to ship, make one commit per accepted fix, with its own changelog entry when user-facing.
+5. After maintainer approval to ship, make one commit per accepted fix, with release-note context in the PR body or commit message when user-facing.
 6. Pull/rebase, push, then comment and close only the issues that were fixed or explicitly triaged closed.

 Do not batch unrelated issue fixes into one commit. Do not publish, comment, close, or label during the review/prove phase.

-Missing changelog is not a PR review finding or merge blocker. If landing/fixing a user-visible change, add/update changelog automatically when practical; never ask or block solely on it.
+Missing `CHANGELOG.md` is not a PR review finding or merge blocker. If landing/fixing a user-visible change, make sure the PR body or commit message captures the release-note context; never ask or block solely on it.

 Only list candidates that pass all gates:

@@ -168,11 +168,22 @@ Output only qualifying candidates, with: ref, surface, proof, cause, fix sketch,

 - Start every PR review with 1-3 plain sentences explaining what the change does and why it matters. Put this before `Findings`.
 - Then list findings first. If none, say `No blocking findings` or `No findings`.
+- Show size near the top as `LOC: +<additions>/-<deletions> (<changedFiles> files)`, using live PR stats or local diff stats.
 - Always answer: bug/behavior being fixed, PR/issue URL and affected surface, provenance for regressions when traceable, and best-fix verdict.
- For bug/regression fixes, include a compact `Provenance:` line after cause/root-cause when a bounded history pass can identify it. Use `git log -S/-G`, `git blame`, linked PRs/issues, and tests; separate author, committer/merger, and current PR author when they differ.
+- For bug/regression fixes, include a compact `Provenance:` line after cause/root-cause when a bounded history pass can identify it. Use `git log -S/-G`, `git blame`, linked PRs/issues, and tests.
+- Provenance must separate roles when they differ: blamed code author username, blamed PR author username, blamed PR merger/committer username, automerge trigger when known, current PR author username, PR number, and date. Do not collapse them into one "introduced by" actor.
+- If the blamed PR was merged by `clawsweeper[bot]` or another automation, identify the human trigger when practical. Check live PR timeline/comments first; if rate-limited, use gitcrawl/cache or public PR HTML. Look for maintainer command comments such as `@clawsweeper automerge`, `/landpr`, labels/events that armed automerge, and ClawSweeper status comments. Report `automerge triggered by @login`; if not found, say trigger unknown rather than naming the bot as the human decision-maker.
+- For any confirmed bug, run `git blame` on the implicated line(s) after identifying the root cause. Report who broke it as the blamed PR merger/committer, and also name the blamed code author. Include the PR number. If no PR is traceable, use the blamed commit as the provenance: commit SHA, date, and author username. Do not guess a merger or frame missing PR metadata as a separate finding.
 - Phrase provenance as `introduced by`, `made visible by`, or `carried forward by`, with confidence (`clear`, `likely`, `unknown`). If unclear, say what evidence is missing instead of guessing. For features, docs, and refactors, use `Provenance: N/A` or omit it when no broken behavior is being fixed.
 - Keep summaries compact, but include enough proof that the verdict is auditable without rereading the PR.

+LOC proof:
+
+```bash
+gh pr view <number> --json additions,deletions,changedFiles \
+  --jq '"LOC: +\(.additions)/-\(.deletions) (\(.changedFiles) files)"'
+```
+
 ## Read beyond the diff

 - Review the surrounding code path, not just changed lines. Open the caller, callee, data contracts, adjacent tests, and owner module.
@@ -192,7 +203,7 @@ Output only qualifying candidates, with: ref, surface, proof, cause, fix sketch,
 - Before landing, require:
  1. symptom evidence such as a repro, logs, or a failing test
  2. a verified root cause in code with file/line
-  3. provenance for regressions when traceable by bounded git/PR history
+  3. blame-backed provenance for regressions when traceable, including blamed PR merger and automerge trigger when known, or commit SHA/date when no PR is traceable
  4. a fix that touches the implicated code path
  5. a regression test when feasible, or explicit manual verification plus a reason no test was added
 - If the claim is unsubstantiated or likely wrong, request evidence or changes instead of merging.
@@ -242,9 +253,8 @@ gh search issues --repo openclaw/openclaw --match title,body --limit 50 \

 ## Follow PR review and landing hygiene

- Never mention merge conflicts that are relatively easy to resolve, such as
-  `CHANGELOG.md` entries, in review-only output. These are landing mechanics,
-  not correctness findings.
+- Never mention release-note bookkeeping in review-only output. It is landing
+  or release-generation mechanics, not a correctness finding.
 - If bot review conversations exist on your PR, address them and resolve them yourself once fixed.
 - Leave a review conversation unresolved only when reviewer or maintainer judgment is still needed.
 - Before landing any PR with non-trivial code changes, run `$autoreview` until no accepted/actionable findings remain, unless equivalent manual review already covered it, the change is trivial/docs-only, or the user opts out.
--- a/.agents/skills/openclaw-pre-release-plugin-testing/agents/openai.yaml
+++ b/.agents/skills/openclaw-pre-release-plugin-testing/agents/openai.yaml
@@ -1,4 +0,0 @@
-interface:
-  display_name: "OpenClaw Plugin Pre-Release Testing"
-  short_description: "Plan plugin release validation"
-  default_prompt: "Use $openclaw-pre-release-plugin-testing to plan or run pre-release OpenClaw plugin validation across package, lifecycle, doctor, gateway, SDK, and live-ish proof."
--- a/.agents/skills/openclaw-test-performance/SKILL.md
+++ b/.agents/skills/openclaw-test-performance/SKILL.md
@@ -98,7 +98,7 @@ barrels, package-boundary tests, or extension suites.
   - add `--keep`/`--id <id-or-slug>` only when several commands must share one
     warmed box; stop it with `pnpm crabbox:stop -- <id-or-slug>`.
 5. If plugin performance is package-artifact sensitive, switch to
-   `openclaw-pre-release-plugin-testing` and Package Acceptance rather than
+   `release-openclaw-plugin-testing` and Package Acceptance rather than
   trusting source-only timing.

 ## Metric Collection
--- a/.agents/skills/openclaw-testing/SKILL.md
+++ b/.agents/skills/openclaw-testing/SKILL.md
@@ -68,6 +68,7 @@ scripts/crabbox-wrapper.mjs` for Testbox, and `git commit --no-verify` only
 pnpm changed:lanes --json
 pnpm check:changed       # changed typecheck/lint/guards; no Vitest
 pnpm test:changed        # cheap smart changed Vitest targets
+pnpm verify              # full check, then full Vitest
 OPENCLAW_TEST_CHANGED_BROAD=1 pnpm test:changed
 pnpm test <path-or-filter> -- --reporter=verbose
 OPENCLAW_VITEST_MAX_WORKERS=1 pnpm test <path-or-filter>
@@ -89,6 +90,8 @@ status checks or install reconciliation in a linked worktree.
 - `pnpm check` and `pnpm check:changed` do not run Vitest tests. They are for
  typecheck, lint, and guard proof.
 - `pnpm test` and `pnpm test:changed` run Vitest tests.
+- `pnpm verify` runs `pnpm check`, then `pnpm test`, with Crabbox phase markers
+  so remote summaries show which half failed.
 - `pnpm test:changed` is intentionally cheap by default: direct test edits,
  sibling tests, explicit source mappings, and import-graph dependents.
 - `OPENCLAW_TEST_CHANGED_BROAD=1 pnpm test:changed` is the explicit broad
--- a/.agents/skills/optimizetests/SKILL.md
+++ b/.agents/skills/optimizetests/SKILL.md
@@ -1,41 +0,0 @@
---
-name: optimizetests
-description: Optimize OpenClaw slow tests, imports, misplaced coverage, and CI wall time without dropping coverage.
---
-
-# Optimize Tests
-
-Goal: real OpenClaw test/runtime speedups with coverage intact. Do not add shards,
-skip assertions, weaken gates, or tune runner flags as the main fix.
-
-## Runbook
-
-1. Read `docs/help/testing.md`, `docs/ci.md`, and the scoped `AGENTS.md` files
-   for any subtree you will edit.
-2. Establish evidence before edits:
-   - Full ranking: `pnpm test:perf:groups --full-suite --allow-failures --output .artifacts/test-perf/<name>.json`
-   - Targeted file: `timeout 240 /usr/bin/time -l pnpm test <file> --maxWorkers=1 --reporter=verbose`
-   - Import suspicion: add `OPENCLAW_VITEST_IMPORT_DURATIONS=1 OPENCLAW_VITEST_PRINT_IMPORT_BREAKDOWN=1`
-3. Attack highest-return hotspots first:
-   - broad barrels or `importActual()` in hot tests
-   - per-test `vi.resetModules()` plus fresh imports
-   - expensive gateway/server/client setup where reset/reuse proves same behavior
-   - core tests asserting extension-owned behavior
-   - duplicated fixture construction or contract assertions
-4. Prefer production-quality fixes:
-   - narrow runtime seams over broad mocks
-   - pure helpers for static parsing/metadata
-   - injected deps over module resets
-   - extension-owned tests for bundled plugin/provider/channel behavior
-5. After each change, rerun the same benchmark and the proving test lane. Record
-   before/after wall time, Vitest duration, and max RSS when available.
-6. Run `pnpm check:changed`; run broader gates (`pnpm check`, `pnpm test`,
-   `pnpm build`) when touched surfaces require them.
-7. Commit scoped changes with `scripts/committer "<conventional message>" <paths...>`.
-   Push when requested. If CI is red, inspect with `gh run list/view`, fix, push,
-   repeat until current CI is green or a blocker is proven unrelated.
-
-## Output
-
-End with the pushed commit(s), before/after timings, gates run, current CI state,
-and any remaining tail lanes that need separate optimization.
--- a/.agents/skills/optimizetests/agents/openai.yaml
+++ b/.agents/skills/optimizetests/agents/openai.yaml
@@ -1,6 +0,0 @@
-interface:
-  display_name: "Optimize Tests"
-  short_description: "Benchmark and speed up OpenClaw tests"
-  default_prompt: "Use $optimizetests to benchmark slow OpenClaw tests, optimize imports and duplicated setup, move misplaced core coverage to extensions, verify gates, commit scoped changes, push, and keep CI green without adding shards or dropping coverage."
-policy:
-  allow_implicit_invocation: false
--- a/.agents/skills/release-openclaw-ci/SKILL.md
+++ b/.agents/skills/release-openclaw-ci/SKILL.md
@@ -1,11 +1,11 @@
 ---
-name: openclaw-release-ci
+name: release-openclaw-ci
 description: "Run, watch, debug, and summarize OpenClaw full release CI, release checks, live provider gates, install/update proofs, and release-secret preflights."
 ---

 # OpenClaw Release CI

-Use this with `$openclaw-release-maintainer` and `$openclaw-testing` when a release candidate needs full validation, install/update proof, live provider checks, or CI recovery.
+Use this with `$release-openclaw-maintainer` and `$openclaw-testing` when a release candidate needs full validation, install/update proof, live provider checks, or CI recovery.

 ## Guardrails

@@ -22,7 +22,7 @@ Use this with `$openclaw-release-maintainer` and `$openclaw-testing` when a rele
 Before full release validation:

 ```bash
-node .agents/skills/openclaw-release-ci/scripts/verify-provider-secrets.mjs --required openai,anthropic,fireworks
+node .agents/skills/release-openclaw-ci/scripts/verify-provider-secrets.mjs --required openai,anthropic,fireworks
 gh api rate_limit --jq '.resources.core'
 git status --short --branch
 git rev-parse HEAD
@@ -35,6 +35,30 @@ The script prints only provider status and HTTP class, never tokens.

 ## Dispatch

+Start product performance evidence as early as the release SHA exists, in
+parallel with other release work:
+
+```bash
+gh workflow run openclaw-performance.yml \
+  --repo openclaw/openclaw \
+  --ref main \
+  -f target_ref=<release-sha> \
+  -f profile=release \
+  -f repeat=3 \
+  -f deep_profile=false \
+  -f live_openai_candidate=false \
+  -f fail_on_regression=false
+```
+
+- Do not wait for full release validation to start this early perf signal.
+- Compare available Kova, gateway startup, and CLI startup metrics with earlier
+  release evidence or clawgrit reports before publish/closeout.
+- Call out any regression in the release proof. Treat a major regression as a
+  release blocker until it is fixed, waived by the operator, or proven to be
+  infrastructure noise.
+- Full Release Validation also records advisory product-performance evidence;
+  the early standalone run is for overlap and faster regression discovery.
+
 Prefer the trusted workflow on `main`, target the exact release SHA:

 ```bash
@@ -55,7 +79,7 @@ Use `release_profile=stable` unless the operator explicitly asks for the broad a
 Use the summary helper instead of repeated raw polling:

 ```bash
-node .agents/skills/openclaw-release-ci/scripts/release-ci-summary.mjs <full-release-run-id>
+node .agents/skills/release-openclaw-ci/scripts/release-ci-summary.mjs <full-release-run-id>
 ```

 Then watch only when useful:
@@ -85,7 +109,8 @@ Record:

 - release SHA
 - full parent run URL
- child run IDs and conclusions: CI, Release Checks, Plugin Prerelease, NPM Telegram
+- child run IDs and conclusions: CI, Release Checks, Plugin Prerelease, NPM Telegram, Product Performance
+- performance comparison result versus earlier releases when available
 - targeted local proof commands
 - provider-secret preflight result
 - known gaps or unrelated failures
--- a/.agents/skills/release-openclaw-ci/agents/openai.yaml
+++ b/.agents/skills/release-openclaw-ci/agents/openai.yaml
@@ -1,4 +1,4 @@
 interface:
  display_name: "OpenClaw Release CI"
  short_description: "Verify and debug OpenClaw release validation runs"
-  default_prompt: "Use $openclaw-release-ci to preflight provider secrets, watch full release validation, summarize child runs, and triage only failing release lanes."
+  default_prompt: "Use $release-openclaw-ci to preflight provider secrets, watch full release validation, summarize child runs, and triage only failing release lanes."
--- a/.agents/skills/release-openclaw-ci/references/release-ci-notes.md
+++ b/.agents/skills/release-openclaw-ci/references/release-ci-notes.md
--- a/.agents/skills/release-openclaw-ci/scripts/release-ci-summary.mjs
+++ b/.agents/skills/release-openclaw-ci/scripts/release-ci-summary.mjs
--- a/.agents/skills/release-openclaw-ci/scripts/verify-provider-secrets.mjs
+++ b/.agents/skills/release-openclaw-ci/scripts/verify-provider-secrets.mjs
--- a/.agents/skills/release-openclaw-mac/SKILL.md
+++ b/.agents/skills/release-openclaw-mac/SKILL.md
@@ -1,26 +1,23 @@
 ---
-name: openclaw-mac-release
+name: release-openclaw-mac
 description: "Run or recover OpenClaw macOS release signing, notarization, appcast, and asset promotion."
 ---

 # OpenClaw Mac Release

-Use with `$openclaw-release-maintainer`, `$openclaw-release-ci`, and `$one-password` when stable macOS assets, private mac preflight, notarization, appcast promotion, or mac release recovery is involved.
+Use with `$release-openclaw-maintainer`, `$release-openclaw-ci`, `$one-password`, and `$release-private` if it exists when stable macOS assets, private mac preflight, notarization, appcast promotion, or mac release recovery is involved.

 ## Credentials

- Canonical ASC item: vault `Molty`, title `API Key - App Store Connect - Personal - Release`.
+- Resolve Peter-owned ASC item refs, key ids, issuer ids, and service-token provenance from `$release-private`.
 - Fields: `private_key_p8`, `key_id`, `issuer_id`.
- Current known good key id: `AKVLXW849T`.
- Legacy mirror: vault `Private`, title `API Key - App Store Connect - Personal`; keep it synced for older refs.
 - Stale/revoked key symptom: `xcrun notarytool submit` fails with `HTTP status code: 401. Unauthenticated`.
 - Validate candidate ASC credentials with `xcrun notarytool history` before setting GitHub secrets.

 ## 1Password

 - Use `$one-password`: all `op` work inside one persistent tmux session, no secret output.
- Prefer `OP_SERVICE_ACCOUNT_TOKEN` from `~/.profile` for Molty reads.
- Do not assume `MOLTY_OP_SERVICE_ACCOUNT_TOKEN` is alive; it has previously pointed at a deleted service account.
+- Use the service-token guidance from `$release-private` when available.
 - If a service token fails, run status-only checks: token present/length and `op whoami`; never print token values.
 - If desktop app auth is needed but Touch ID is unavailable, set `OP_BIOMETRIC_UNLOCK_ENABLED=false` for the manual `op account add --signin` path.

--- a/.agents/skills/release-openclaw-maintainer/SKILL.md
+++ b/.agents/skills/release-openclaw-maintainer/SKILL.md
@@ -1,11 +1,11 @@
 ---
-name: openclaw-release-maintainer
+name: release-openclaw-maintainer
 description: Prepare or verify OpenClaw stable/beta releases, changelogs, release notes, publish commands, and artifacts.
 ---

 # OpenClaw Release Maintainer

-Use this skill for release and publish-time workflow. Keep ordinary development changes and GHSA-specific advisory work outside this skill.
+Use this skill for release and publish-time workflow. Load `$release-private` if it exists before resolving Peter-owned credential locators or private host topology. Keep ordinary development changes and GHSA-specific advisory work outside this skill.

 ## Respect release guardrails

@@ -23,7 +23,8 @@ Use this skill for release and publish-time workflow. Keep ordinary development
  green. Then branch from that commit so regular development can continue on
  `main` while release validation runs.
 - Before release branching, commit any dirty files in coherent groups, push,
-  pull/rebase, then run `/changelog` on `main` and commit/push/pull that
+  pull/rebase, then generate `CHANGELOG.md` on `main` from merged PRs and all
+  direct commits since the last reachable release tag. Commit/push/pull that
  changelog rewrite immediately before creating the release branch.
 - During release planning, inspect both `src/plugins/compat/registry.ts` and
  `src/commands/doctor/shared/deprecation-compat.ts` before branching and again
@@ -59,8 +60,18 @@ Use this skill for release and publish-time workflow. Keep ordinary development
  fixes that landed after the release branch cut and backport only important
  low-risk fixes. Operators may authorize up to 4 autonomous beta attempts;
  after 4 failed beta attempts, stop and report.
- Use `/changelog` before version/tag preparation so the top changelog section
-  is deduped and ordered by user impact.
+- As soon as the release candidate SHA exists, dispatch `OpenClaw Performance`
+  with `target_ref=<release-sha>` in parallel with the other release work. Do
+  not wait for full release validation to start the performance signal.
+- Before publish/closeout, compare available product performance metrics with
+  earlier releases: Kova agent-turn/resource metrics, gateway startup
+  ready/listen/RSS/CPU metrics, and CLI startup metrics from release evidence
+  or clawgrit reports. Report regressions explicitly. A major regression is a
+  release blocker unless the operator waives it or the data clearly proves
+  infrastructure noise.
+- Generate the changelog before version/tag preparation so the top changelog
+  section is deduped and ordered by user impact. Use
+  `$openclaw-changelog-update` for the rewrite.
 - Do not create beta-specific `CHANGELOG.md` headings. Beta releases use the
  stable base version section, for example `v2026.4.20-beta.1` uses
  `## 2026.4.20` release notes.
@@ -127,11 +138,25 @@ Use this skill for release and publish-time workflow. Keep ordinary development

 ## Build changelog-backed release notes

+- `CHANGELOG.md` is release-owned. Normal PRs and direct `main` fixes should
+  not edit it.
 - Before release branching or tagging, rewrite the target `CHANGELOG.md`
-  section from commit history, not just from existing notes: scan commits since
-  the last reachable release tag, add missed user-facing changes, dedupe
-  overlapping entries, and sort each section from most to least interesting for
-  users.
+  section from history, not existing notes. Use the last reachable stable or
+  beta release tag as the base, then inspect every commit through the target
+  release SHA.
+- Include both merged PR commits and direct commits on `main`. Direct commits
+  matter: infer notes from their subject, body, touched files, linked issues,
+  tests, and nearby code when no PR body exists.
+- Prefer PR bodies, issue links, review proof, and commit bodies over commit
+  subjects alone. If a commit fixed an issue directly, the commit body should
+  name the user-visible behavior, affected surface, issue ref, and credited
+  reporter/contributor when known.
+- Treat missing context as a release-note audit gap: inspect the diff and linked
+  issue, draft the best accurate entry, and note the uncertainty for maintainer
+  review rather than inventing impact.
+- Add missed user-facing changes, remove internal-only noise, dedupe overlapping
+  PR/direct-commit entries, and sort each section from most to least interesting
+  for users.
 - Changelog entries should be user-facing, not internal release-process notes.
 - GitHub release and prerelease bodies must use the full matching
  `CHANGELOG.md` version section, not highlights or an excerpt. When creating
@@ -412,7 +437,7 @@ node --import tsx scripts/openclaw-npm-postpublish-verify.ts <published-version>
  - Hard rule: never run `op` directly in the main agent shell during release
    work. Any 1Password CLI use must happen inside that tmux session so prompts
    and alerts are contained and observable.
-  - Use the 1Password item `op://Private/Npmjs` for npm credentials and OTP.
+  - Use `$release-private` for the npm credentials and OTP item.
    Do not print passwords, tokens, or OTPs to the transcript; send them through
    tmux buffers, env vars scoped to the tmux command, or `expect` with
    `log_user 0`.
@@ -540,34 +565,42 @@ node --import tsx scripts/openclaw-npm-postpublish-verify.ts <published-version>
 6. Create `release/YYYY.M.D` from that post-changelog `main` commit.
 7. Make every repo version location match the beta tag before creating it.
 8. Commit release preparation changes on the release branch and push the branch.
-9. Run the fast local beta preflight from the release branch before any npm
-   preflight or publish. Keep expensive Docker, Parallels, and published-package
-   install/update lanes for after the beta is live unless the operator asks to
-   run them before beta publication.
-10. For beta releases, skip mac app build/sign/notarize unless beta scope or a
+9. Immediately dispatch Actions > `OpenClaw Performance` from `main` with
+   `target_ref=<release-sha>`, `profile=release`, `repeat=3`, deep profiling
+   off, live OpenAI off, and regression failure off. Let it run in parallel
+   with preflight and validation work.
+10. Run the fast local beta preflight from the release branch before any npm
+    preflight or publish. Keep expensive Docker, Parallels, and published-package
+    install/update lanes for after the beta is live unless the operator asks to
+    run them before beta publication.
+11. For beta releases, skip mac app build/sign/notarize unless beta scope or a
    release blocker specifically requires it. For stable releases, include the
    mac app, signing, notarization, and appcast path.
-11. Confirm the target npm version is not already published.
-12. Create and push the git tag from the release branch.
-13. Create or refresh the matching GitHub release.
-14. Dispatch Actions > `QA-Lab - All Lanes` against the release tag and wait
+12. Confirm the target npm version is not already published.
+13. Create and push the git tag from the release branch.
+14. Create or refresh the matching GitHub release.
+15. Dispatch Actions > `QA-Lab - All Lanes` against the release tag and wait
    for the mock parity, live Matrix, and live Telegram credentialed-channel
    lanes to pass.
-15. Start `.github/workflows/openclaw-npm-release.yml` from the release branch
+16. Start `.github/workflows/openclaw-npm-release.yml` from the release branch
    with `preflight_only=true`
    and choose the intended `npm_dist_tag` (`beta` default; `latest` only for
    an intentional direct stable publish). Wait for it to pass. Save that run id
    because the real publish requires it to reuse the prepared npm tarball.
-16. For stable releases, start `.github/workflows/macos-release.yml` in
+17. Before real publish, review the early performance run if it has completed.
+    Compare against earlier release evidence or clawgrit reports where
+    available. Call out minor regressions in the release proof; block on major
+    regressions unless waived or proven noisy.
+18. For stable releases, start `.github/workflows/macos-release.yml` in
    `openclaw/openclaw` and wait for the public validation-only run to pass.
-17. For stable releases, start
+19. For stable releases, start
    `openclaw/releases-private/.github/workflows/openclaw-macos-validate.yml`
    with the same tag and wait for the private mac validation lane to pass.
-18. For stable releases, start
+20. For stable releases, start
    `openclaw/releases-private/.github/workflows/openclaw-macos-publish.yml`
    with `preflight_only=true` and wait for it to pass. Save that run id because
    the real publish requires it to reuse the notarized mac artifacts.
-19. If any preflight or validation run fails, fix the issue on a new commit,
+21. If any preflight or validation run fails, fix the issue on a new commit,
    delete the tag and matching GitHub release, recreate them from the fixed
    commit, and rerun all relevant preflights from scratch before continuing.
    Never reuse old preflight results after the commit changes. For pushed or
@@ -575,15 +608,15 @@ node --import tsx scripts/openclaw-npm-postpublish-verify.ts <published-version>
    For preflight-only failures where npm did not publish the beta version,
    delete/recreate the same beta tag and prerelease at the fixed commit instead
    of skipping a prerelease number.
-20. Start `.github/workflows/openclaw-npm-release.yml` from the same branch with
+22. Start `.github/workflows/openclaw-npm-release.yml` from the same branch with
    the same tag for the real publish, choose `npm_dist_tag` (`beta` default,
    `latest` only when you intentionally want direct stable publish), keep it
    the same as the preflight run, and pass the successful npm
    `preflight_run_id`.
-21. Wait for `npm-release` approval from `@openclaw/openclaw-release-managers`.
-22. Run postpublish verification:
+23. Wait for `npm-release` approval from `@openclaw/openclaw-release-managers`.
+24. Run postpublish verification:
    `node --import tsx scripts/openclaw-npm-postpublish-verify.ts <published-version>`.
-23. Run the post-published beta verification roster. First scan current `main`
+25. Run the post-published beta verification roster. First scan current `main`
    for critical fixes that landed after the release branch cut; backport only
    important low-risk fixes before starting expensive lanes, or increment to
    the next beta if the fix must change the already-published package. If any
@@ -597,10 +630,10 @@ node --import tsx scripts/openclaw-npm-postpublish-verify.ts <published-version>
    If a pre-npm lane fails before any tag/package leaves the machine, fix and
    rerun the same intended beta attempt. Repeat up to the operator's
    authorized beta-attempt limit, normally 4.
-24. Announce the beta/stable release on Discord best-effort using the configured secret workflow.
-25. If the operator requested beta only, stop after beta verification and the
+26. Announce the beta/stable release on Discord best-effort using the configured secret workflow.
+27. If the operator requested beta only, stop after beta verification and the
    announcement.
-26. If the stable release was published to `beta`, use the light stable
+28. If the stable release was published to `beta`, use the light stable
    promotion roster when the matching beta already carried the full confidence
    pass: published npm postpublish verify, Docker install/update smoke,
    macOS-only Parallels install/update smoke, and required QA signal.
@@ -608,24 +641,24 @@ node --import tsx scripts/openclaw-npm-postpublish-verify.ts <published-version>
    `openclaw/releases-private/.github/workflows/openclaw-npm-dist-tags.yml`
    workflow to promote that stable version from `beta` to `latest`, then
    verify `latest` now points at that version.
-27. If the stable release was published directly to `latest` and `beta` should
+29. If the stable release was published directly to `latest` and `beta` should
    follow it, start that same private dist-tag workflow to point `beta` at the
    stable version, then verify both `latest` and `beta` point at that version.
-28. For stable releases, start
+30. For stable releases, start
    `openclaw/releases-private/.github/workflows/openclaw-macos-publish.yml`
    for the real publish with the successful private mac `preflight_run_id` and
    wait for success.
-29. Verify the successful real private mac run uploaded the `.zip`, `.dmg`,
+31. Verify the successful real private mac run uploaded the `.zip`, `.dmg`,
    and `.dSYM.zip` artifacts to the existing GitHub release in
    `openclaw/openclaw`.
-30. For stable releases, download `macos-appcast-<tag>` from the successful
+32. For stable releases, download `macos-appcast-<tag>` from the successful
    private mac run, update `appcast.xml` on `main`, and verify the feed. Merge
    or cherry-pick release branch changes back to `main` after stable succeeds.
-31. For beta releases, publish the mac assets only when intentionally requested;
+33. For beta releases, publish the mac assets only when intentionally requested;
    expect no shared production
    `appcast.xml` artifact and do not update the shared production feed unless a
    separate beta feed exists.
-32. After publish, verify npm and the attached release artifacts.
+34. After publish, verify npm and the attached release artifacts.

 ## GHSA advisory work

--- a/.agents/skills/release-openclaw-nightly/SKILL.md
+++ b/.agents/skills/release-openclaw-nightly/SKILL.md
@@ -0,0 +1,288 @@
+---
+name: release-openclaw-nightly
+description: "OpenClaw Tideclaw alpha/nightly release automation: isolated branches, local fixes, release CI, branch retention, and forward-port to main."
+---
+
+# Nightly Release
+
+Use for Tideclaw/OpenClaw alpha/nightly release automation, manual alpha triggers, beta prep, release-branch repair, and post-release forward-port. Load `$release-private` if it exists before using Tideclaw host paths, cron ids, or Discord routing ids.
+
+## Policy
+
+- Alpha/nightly runs every 12h or by manual trigger.
+- Beta is human-triggered from Discord from a proven alpha/release branch.
+- Stable/latest always needs explicit human confirmation.
+- Never publish from a dirty checkout or directly from `main`.
+- Main can be busy or broken; alpha work must be isolated so transient main failures do not block a usable nightly.
+- Publish only after release-branch proof is green.
+- After a successful alpha, forward-port release-branch commits back to `main` and prove main CI green.
+- Forward-port PRs contain only reusable fixes needed to make nightly/release checks pass. They must not contain alpha version bumps, release notes, changelog release entries, tags, generated artifacts, or state-file updates.
+- Keep only alpha/nightly branches from the last 3 days, plus any branch with an active run, open PR, or release tag.
+- Never run broad env/token dumps. For GitHub writes on the Tideclaw host, use the Tideclaw `gh` write wrapper below.
+
+## Identity
+
+Tideclaw should commit under its own machine identity on release branches and forward-port branches:
+
+```bash
+git config user.name "Tideclaw"
+git config user.email "tideclaw@openclaw.ai"
+```
+
+This is good for auditability if commits are clearly machine-authored and gated by CI. Avoid direct pushes to protected `main`; forward-port via PR/automerge unless the repo policy explicitly allows the bot to push after green checks. Include human `Co-authored-by` only when a human supplied the patch or explicit commit text.
+
+## Branch Shape
+
+- Branch prefix: `tideclaw/alpha/`
+- Branch name: `tideclaw/alpha/YYYY-MM-DD-HHMMZ`
+- Base: current `origin/main` SHA at trigger time.
+- State file: resolve from `$release-private` on the Tideclaw host.
+- Release tag: `vYYYY.M.D-alpha.N`
+- npm dist-tag: `alpha`
+
+Do not reuse old alpha branches for a new run. If rerunning the same base SHA, create a new timestamped branch and record why.
+
+## Start
+
+1. Work in the Tideclaw host checkout from `$release-private`.
+2. Fetch first:
+
+```bash
+git fetch origin main --tags --prune
+git switch main
+git merge --ff-only origin/main
+BASE_SHA="$(git rev-parse origin/main)"
+BRANCH="tideclaw/alpha/$(date -u +%Y-%m-%d-%H%MZ)"
+git switch -c "$BRANCH" "$BASE_SHA"
+```
+
+3. Read repo release docs/scripts before changing anything:
+   - `AGENTS.md`
+   - release docs under `docs/`
+   - release scripts under `scripts/`
+   - `.github/workflows/*release*`
+4. Compare `$BASE_SHA` with the last successful alpha state and current git/npm/GitHub alpha tags. If already released, report skip and do not publish.
+
+Manual trigger:
+
+```bash
+CRON_ID="<from release-private>"
+OPENCLAW_ALLOW_ROOT=1 openclaw cron run "$CRON_ID" --expect-final --timeout 21600000
+```
+
+## Discord Alpha Trigger
+
+Tideclaw may run alpha immediately from Discord when a maintainer mentions Tideclaw in `#releases` or `#maintainers`.
+
+Accepted shapes:
+
+```text
+@Tideclaw run alpha now
+@Tideclaw alpha release from main now
+@Tideclaw trigger alpha
+```
+
+Rules:
+
+1. Treat this as a manual alpha trigger equivalent to the alpha cron job.
+2. Start from current `origin/main` and create a fresh `tideclaw/alpha/YYYY-MM-DD-HHMMZ` branch.
+3. Follow the normal alpha workflow: reuse prior fixes, run local checks, fix on the alpha branch, run release CI, publish alpha after green gates, then forward-port reusable fixes via fixes-only PR.
+4. If another alpha/beta/stable release run is already active, report the active branch/run and stop.
+5. `#maintainers` trigger requires an explicit Tideclaw mention; do not react to unmentioned release chatter there.
+6. Resolve Discord role/user ids and live host hotfix notes from `$release-private`.
+
+## Discord Beta Trigger
+
+Tideclaw may run beta releases from `#releases` or mentioned `#maintainers` commands only when a maintainer sends an explicit beta trigger. Treat this as human approval for beta, not for stable/latest.
+
+Accepted shapes:
+
+```text
+@Tideclaw beta release from vYYYY.M.D-alpha.N
+@Tideclaw beta release from tideclaw/alpha/YYYY-MM-DD-HHMMZ
+@Tideclaw beta release from latest proven alpha
+```
+
+Rules:
+
+1. Require the words `beta release` and a source alpha tag/branch, or `latest proven alpha`.
+2. If the source is ambiguous, ask one clarifying question in `#releases` and stop.
+3. Verify the source alpha first: GitHub release, npm `alpha` package, release CI, recorded state file, and branch/tag SHA.
+4. Create a fresh beta branch `tideclaw/beta/YYYY-MM-DD-HHMMZ` from the proven alpha source, not directly from a moving `main`.
+5. Reuse/squash only stabilization fixes already proven on alpha. Do not import unrelated alpha release mechanics unless the beta release docs require them.
+6. Compute beta as `vYYYY.M.D-beta.N`, matching npm `--tag beta`.
+7. Run beta release validation/preflight/full release CI and fix failures on the beta branch.
+8. Publish beta only after green beta gates. Use GitHub Actions/OIDC, never direct npm publish from the host.
+9. Final Discord summary must include source alpha, beta tag/version, branch, fix commits, workflow run IDs, npm/GitHub proof, and any skipped/blocked reason.
+10. After beta publishes, forward-port reusable fixes to `main` using the same fixes-only PR rules below.
+
+## Reuse Prior Fixes
+
+Before running checks, mine recent Tideclaw alpha branches for fixes already made during previous release attempts:
+
+1. Read the Tideclaw state file from `$release-private` for the last successful alpha branch and fix commit SHAs.
+2. List recent remote branches:
+
+```bash
+git for-each-ref refs/remotes/origin/tideclaw/alpha --format='%(refname:short) %(committerdate:iso-strict)'
+```
+
+3. Consider only Tideclaw alpha branches from the last 3 days plus the last successful alpha branch.
+4. For each candidate branch, inspect commits that are not in current `origin/main`:
+
+```bash
+git log --no-merges --reverse --format='%H%x09%s' origin/main..origin/tideclaw/alpha/YYYY-MM-DD-HHMMZ
+```
+
+5. Cherry-pick only real stabilization fixes that still apply to the new alpha branch. Prefer commits recorded as `fixCommitShas` in the state file.
+6. Skip version bumps, changelog release entries, tag artifacts, generated release notes, state-file-only commits, and one-off debug instrumentation.
+7. If a cherry-pick conflicts, inspect whether current main already contains an equivalent fix. If not, resolve minimally and keep the commit message clear.
+8. Record reused commit SHAs separately from newly authored fix SHAs in the alpha state and final Discord summary.
+
+Use `git cherry`, `git range-diff`, and targeted test reruns to avoid duplicating fixes already present on `main`.
+
+## Repair Loop
+
+Use the branch as a release-candidate repair surface:
+
+1. Run narrow local checks first: changed tests, release preflight, type/lint/build gates required by release docs.
+2. If local checks fail, fix on the alpha branch with minimal commits.
+3. Commit each coherent fix as Tideclaw.
+4. Re-run the failed local check after each fix.
+5. Do not hide failures by editing baselines, expected-failure lists, ignore files, or release inventory unless the release docs explicitly require it and the diff is justified.
+6. If a failure is flaky, rerun once; if still red, treat it as real.
+7. If the fix is clearly useful for main, keep it small and forward-portable. Avoid broad refactors during alpha stabilization.
+
+Commit examples:
+
+```bash
+git add <files>
+git commit -m "fix: stabilize alpha release preflight"
+git push -u origin "$BRANCH"
+```
+
+## Release CI
+
+After local proof:
+
+1. Compute the next `vYYYY.M.D-alpha.N` from existing git tags, npm versions, and GitHub releases.
+2. Make the alpha branch package version and release metadata match that tag, commit it, and push the branch.
+3. Run release validation from the alpha branch, using GitHub CLI, not browser/fetch tools. On the Tideclaw host, bare `gh` is a read-only Codex sandbox wrapper; use `/usr/local/bin/gh-tideclaw-write` for write-capable commands such as `workflow run`, `run cancel`, and publish dispatch:
+
+```bash
+GH="/usr/local/bin/gh-tideclaw-write"
+SHA="$(git rev-parse HEAD)"
+TAG="v$(node -p "require('./package.json').version")"
+BRANCH="$(git branch --show-current)"
+
+"$GH" workflow run full-release-validation.yml --repo openclaw/openclaw --ref "$BRANCH" \
+  -f ref="$BRANCH" \
+  -f release_profile=beta \
+  -f rerun_group=all
+
+"$GH" workflow run openclaw-npm-release.yml --repo openclaw/openclaw --ref "$BRANCH" \
+  -f tag="$SHA" \
+  -f preflight_only=true \
+  -f npm_dist_tag=alpha
+```
+
+4. Watch the exact workflow run IDs and head SHA with `gh run list`, `gh run view`, and `gh api`. Read-only `gh` is fine for polling; use `$GH` only when a command mutates GitHub. Do not use Codex browser/fetch for GitHub API polling; prior Tideclaw runs failed there after successful preflight.
+5. For alpha, blocking gates are the ones Tideclaw can repair directly or that prove package safety: normal CI, plugin prerelease, npm preflight, package preparation, install smoke, tag/reachability, and publish verification. Treat cross-OS, live channel, QA Lab, package acceptance, long Docker E2E, and Telegram package E2E failures as advisory; report them in Discord and continue if the blocking gates are green.
+   - If `rerun_group=all` is stuck only on advisory lanes after CI, plugin prerelease, npm preflight, package preparation, and install smoke are green, dispatch a focused Full Release Validation on the same head with `-f rerun_group=install-smoke`. Use that successful focused Full Release Validation run as the publish proof, and include the separate CI/plugin/full advisory run IDs in the Discord summary.
+6. If a blocking gate fails, fix on the alpha branch, push, and rerun only the failed or required release CI. If the commit changes, discard old preflight/full-validation run IDs and rerun them for the new head.
+7. After full validation and npm preflight are green on the same branch head, create and push the release tag from that exact commit:
+
+```bash
+git tag -a "$TAG" "$SHA" -m "openclaw ${TAG#v}"
+git push origin "$TAG"
+```
+
+8. Dispatch the publish wrapper from the same alpha branch. Use the successful npm preflight run ID and full release validation run ID from the same head SHA:
+
+```bash
+"$GH" workflow run openclaw-release-publish.yml --repo openclaw/openclaw --ref "$BRANCH" \
+  -f tag="$TAG" \
+  -f preflight_run_id="$NPM_PREFLIGHT_RUN_ID" \
+  -f full_release_validation_run_id="$FULL_RELEASE_VALIDATION_RUN_ID" \
+  -f npm_dist_tag=alpha \
+  -f plugin_publish_scope=all-publishable \
+  -f publish_openclaw_npm=true \
+  -f release_profile=beta \
+  -f wait_for_clawhub=false
+```
+
+9. Watch the publish wrapper plus child runs. If `openclaw-npm-release.yml` is waiting on the `npm-release` environment and Tideclaw cannot approve it, report that as the only blocker; do not call the release done.
+10. Do not publish npm directly from the host; use GitHub Actions/OIDC.
+
+Important: `openclaw-npm-release.yml` with `preflight_only=true` only prepares artifacts. It does not publish. A successful alpha requires the later `openclaw-release-publish.yml` wrapper, a pushed git tag, npm `alpha` dist-tag proof, and a GitHub prerelease.
+
+## Verify Published Alpha
+
+Release is not done until all are true:
+
+- GitHub tag exists.
+- GitHub Release exists and is marked prerelease.
+- Release body links npm version page, registry tarball, integrity, and CI/proof.
+- `npm view openclaw@<version>` shows the exact version, dist-tag `alpha`, tarball, integrity, and publish time.
+- Installed/package smoke follows repo release docs.
+- The Tideclaw state file from `$release-private` records version, tag, base SHA, branch, fix commit SHAs, workflow run IDs, npm integrity, and timestamp.
+
+Final Discord summary in `#releases`:
+
+- tag/version
+- base SHA
+- branch
+- fix commits
+- workflow run IDs
+- npm/GitHub proof
+- skipped/blocked reason if not released
+
+Use Discord-safe Markdown links with angle-bracket targets. Never print secrets.
+
+## Forward-Port
+
+After a successful alpha, raise a fixes-only PR back to `main`:
+
+1. Create/update a forward-port branch from current `origin/main`:
+
+```bash
+git fetch origin main --prune
+git switch -c "tideclaw/forward-port/$(date -u +%Y-%m-%d-%H%MZ)" origin/main
+```
+
+2. Cherry-pick only release-branch commits that are real fixes required to make nightly/release checks pass.
+3. Exclude alpha version bumps, changelog release entries, release notes, tag artifacts, generated release assets, state-file-only commits, and any commit whose only purpose was publishing the alpha.
+4. If a commit mixes a real fix with release/version changes, split it: replay only the fix hunks into a new commit on the forward-port branch.
+5. Resolve conflicts in favor of the minimal main-compatible fix.
+6. Run the relevant changed/local gate.
+7. Push and open a PR, or use the repo’s allowed bot merge path.
+8. Wait for required main CI to go green. If CI fails, fix on the forward-port branch and rerun.
+9. Report the PR/merge SHA and any commits intentionally not forward-ported.
+
+If `origin/main` is independently red before the forward-port, document the unrelated failing check and still keep the forward-port PR green against its head when possible.
+
+## Branch Retention
+
+Before and after each run, prune old alpha branches:
+
+1. List `origin/tideclaw/alpha/*`.
+2. Keep branches whose timestamp is within the last 3 days UTC.
+3. Keep branches referenced by a live workflow run, open PR, release tag, or state file.
+4. Delete only Tideclaw-owned alpha branches:
+
+```bash
+git push origin --delete tideclaw/alpha/YYYY-MM-DD-HHMMZ
+```
+
+Never delete human branches, beta branches, stable branches, or unknown prefixes.
+
+## Stop Conditions
+
+Stop and report clearly if:
+
+- release docs/scripts disagree on versioning or publish path
+- required secrets/auth are unavailable
+- GitHub Actions cannot be dispatched or observed
+- a required release gate stays red after a real fix attempt
+- npm/GitHub state disagrees after publish
+- forward-port cannot be made green without a larger product decision
--- a/.agents/skills/openclaw-pre-release-plugin-testing/SKILL.md
+++ b/.agents/skills/openclaw-pre-release-plugin-testing/SKILL.md
@@ -1,5 +1,5 @@
 ---
-name: openclaw-pre-release-plugin-testing
+name: release-openclaw-plugin-testing
 description: Plan and run pre-release OpenClaw plugin validation across bundled plugins, package artifacts, lifecycle commands, doctor/fix, config round-trip, gateway startup, SDK compatibility, Docker E2E, Package Acceptance, and Testbox proof.
 ---

--- a/.agents/skills/release-openclaw-plugin-testing/agents/openai.yaml
+++ b/.agents/skills/release-openclaw-plugin-testing/agents/openai.yaml
@@ -0,0 +1,4 @@
+interface:
+  display_name: "OpenClaw Plugin Pre-Release Testing"
+  short_description: "Plan plugin release validation"
+  default_prompt: "Use $release-openclaw-plugin-testing to plan or run pre-release OpenClaw plugin validation across package, lifecycle, doctor, gateway, SDK, and live-ish proof."
--- a/.agents/skills/technical-documentation/SKILL.md
+++ b/.agents/skills/technical-documentation/SKILL.md
@@ -0,0 +1,79 @@
+---
+name: technical-documentation
+description: Build and review high-quality technical docs as well as agent instruction files in your repository.
+license: MIT
+metadata:
+  source: "https://github.com/vincentkoc/dotskills"
+---
+
+# Technical Documentation
+
+## Purpose
+
+Produce and review technical documentation that is clear, actionable, and maintainable for both humans and agents, including contributor-governance files and agent instruction files.
+
+## When to use
+
+- Creating or overhauling docs in an existing product/codebase (brownfield).
+- Building evergreen docs meant to stay accurate and reusable over time.
+- Reviewing doc diffs for structure, clarity, and operational correctness.
+- Running full-repo documentation audits that must include both governance files and product docs surfaces (`docs/`, `README*`, `.md/.mdx/.mdc`, Fern/Sphinx/Mintlify-style sources).
+- Updating or reviewing AGENTS.md and/or CONTRIBUTING.md to keep agent and contributor workflows aligned with current repo practices.
+- Improving repository onboarding/docs that include contribution instructions, issue templates, PR flow, and review gates.
+- Designing governance documentation strategy for repos with alias instruction files (for example `CLAUDE.md`, `AGENT.md`, `.cursorrules`, `.cursor/rules/*`, `.agent/`, `.agents/`, `.pi/`) where `AGENTS.md` is treated as canonical when present and aliases should be kept as compatibility surfaces.
+- Diagnosing agent-file drift where teams had to prompt iteratively to surface missing files, broken commands, or policy conflicts.
+- Applying repository-specific documentation overlays, including OpenClaw page-type, docs IA, preservation, and validation rules when present.
+
+## Workflow
+
+1. Classify task: `build` or `review`; context: `brownfield` or `evergreen`.
+2. Inventory full documentation scope early (governance + product docs): AGENTS/CONTRIBUTING/aliases plus docs directories, framework sources, and root/module READMEs.
+3. Detect multilingual scope (README/docs in multiple languages) and define required parity level.
+4. Read `references/agent-and-contributing.md` for agent instruction and `CONTRIBUTING.md` workflow rules (inventory, canonical/alias mapping, dual-mode balance, deliverable standards, and precedence/conflict handling).
+5. Read `references/principles.md` for the governing ruleset (Matt Palmer & OpenAI).
+6. For OpenClaw docs work, read `references/openclaw.md` before the build/review playbook.
+7. For build tasks, follow `references/build.md`.
+8. For review tasks, follow `references/review.md` and proactively detect issues without waiting for repeated prompts.
+9. For complex or high-risk tasks (build or review), it is acceptable to run longer, deeper, and more exhaustive investigations when needed for confidence.
+10. When available, use sub-agents for bounded parallel discovery/review work, then merge outputs into one coherent final deliverable.
+11. Use `references/tooling.md` when platform/tooling choices affect recommendations.
+12. Run a proactive issue sweep for both governance and docs-content surfaces, and fix high-confidence defects in the same pass unless explicitly asked for report-only mode.
+13. In brownfield mode, prioritize compatibility with current docs IA, tooling, and release state.
+14. In evergreen mode, prioritize timeless wording, update strategy, and durable structure.
+15. Return deliverables plus validation notes, parity status, and remaining gaps.
+
+## Sub-agent orchestration guidance
+
+Prefer sub-agents when the repo is large or the requested change set is broad; use them by default for repo-wide, multi-framework, or high-conflict work.
+
+- `inventory-agent` -> `agents/inventory-agent.md` (`fast` / Claude `haiku`): file/config discovery, coverage map, and missing-path checks.
+- `governance-agent` -> `agents/governance-agent.md` (`thinking` / Claude `sonnet`): AGENTS/CONTRIBUTING/alias precedence, conflicts, and policy drift.
+- `docs-framework-agent` -> `agents/docs-framework-agent.md` (`thinking` / Claude `sonnet`): framework config, relative path base, and file-path vs URL-path mapping checks.
+- `synthesis-agent` -> `agents/synthesis-agent.md` (`long` / Claude `opus`): merge sub-agent outputs into one prioritized fix plan and unified precedence model.
+
+## Inputs
+
+- Doc type (tutorial, how-to, reference, explanation) and audience.
+- File scope or diff scope.
+- Docs framework/tooling constraints (Fern, Mintlify, Sphinx, etc.).
+- Build/review mode and brownfield/evergreen intent.
+- Target agent and human compatibility intent.
+- Docs framework surfaces in scope (for example Fern, Sphinx, Mintlify, Markdown/MDX/MDC/RST/RSC files).
+- Desired investigation depth/time budget (quick pass vs exhaustive review).
+- Execution mode (`single-agent` or `sub-agent-assisted` when available).
+- Remediation mode (`apply-fixes` by default, or `report-only` when requested).
+- Multilingual scope: source-of-truth language, target locales, and parity expectations.
+- Repository-specific overlay constraints, if any.
+
+## Outputs
+
+- Updated draft or review findings with clear next actions.
+- Validation notes (what was checked, what remains).
+- Navigation/maintenance recommendations for long-term quality.
+- Governance-doc alignment summary when AGENTS/CONTRIBUTING were touched.
+- Agent instruction-surface map (primary file, alias files, Codex/Claude/Cursor handling plan).
+- Documentation-surface coverage map (what was reviewed under `/docs`, README hierarchy, and framework-specific source trees).
+- Autodetected issue list with applied fixes (or explicit report-only findings).
+- Delegation notes when sub-agents were used (scope delegated and how findings were merged).
+- Multilingual parity note (in-sync, partial with rationale, or intentionally divergent).
+- Repository-specific overlay notes when one was used.
--- a/.agents/skills/technical-documentation/agents/docs-framework-agent.md
+++ b/.agents/skills/technical-documentation/agents/docs-framework-agent.md
@@ -0,0 +1,32 @@
+---
+name: docs-framework-agent
+description: Thinking-focused docs framework checker for config-relative paths and route/file mapping consistency.
+model: sonnet
+tools:
+  - Read
+  - Glob
+  - Grep
+permissionMode: default
+maxTurns: 10
+---
+
+You are the docs-framework sub-agent for technical documentation.
+
+Goals:
+
+- validate framework config-driven docs behavior
+- prevent path-mapping drift between source files and published routes
+
+Tasks:
+
+- detect and read framework config first (Fern/Sphinx/Mintlify/custom)
+- resolve paths relative to the declaring file/config
+- validate both maps:
+  - config -> file exists
+  - config/nav/routing -> URL path is valid and consistent
+
+Return:
+
+- config files reviewed
+- path assumptions made
+- mismatches (`missing file`, `stale route`, `wrong base path`)
--- a/.agents/skills/technical-documentation/agents/governance-agent.md
+++ b/.agents/skills/technical-documentation/agents/governance-agent.md
@@ -0,0 +1,30 @@
+---
+name: governance-agent
+description: Thinking-focused governance reviewer for AGENTS/CONTRIBUTING/alias precedence, conflict detection, and policy drift analysis.
+model: sonnet
+tools:
+  - Read
+  - Glob
+  - Grep
+permissionMode: default
+maxTurns: 10
+---
+
+You are the governance sub-agent for technical documentation.
+
+Goals:
+
+- validate AGENTS/CONTRIBUTING/alias alignment and precedence
+- identify policy drift and conflicting instructions
+
+Tasks:
+
+- determine canonical instruction source and alias compatibility mapping
+- detect conflicts across nested scope files and tool-specific rule consumers
+- validate command examples against stated governance expectations
+
+Return:
+
+- precedence model
+- conflict list with severity
+- recommended low-risk remediations
--- a/.agents/skills/technical-documentation/agents/inventory-agent.md
+++ b/.agents/skills/technical-documentation/agents/inventory-agent.md
@@ -0,0 +1,31 @@
+---
+name: inventory-agent
+description: Fast repo-surface discovery for technical documentation audits. Use for coverage mapping and missing-path detection before deeper review.
+model: haiku
+tools:
+  - Read
+  - Glob
+  - Grep
+  - LS
+permissionMode: default
+maxTurns: 6
+---
+
+You are the inventory sub-agent for technical documentation.
+
+Goals:
+
+- enumerate governance and docs-content surfaces in scope
+- detect missing files, broken references, and obvious command/path failures
+
+Tasks:
+
+- map `AGENTS.md`/`CONTRIBUTING.md`/aliases and docs surfaces (`docs/**`, README hierarchy, `.md/.mdx/.mdc/.rst/.rsc`)
+- list framework config files discovered (Fern/Sphinx/Mintlify or equivalent)
+- report hard failures only, with exact file paths
+
+Return:
+
+- coverage map
+- missing/broken path list
+- unresolved blockers
--- a/.agents/skills/technical-documentation/agents/openai.yaml
+++ b/.agents/skills/technical-documentation/agents/openai.yaml
@@ -0,0 +1,10 @@
+interface:
+  display_name: "Technical Documentation"
+  short_description: "Build and review technical documentation for brownfield and evergreen systems."
+  icon_small: "./assets/icon.jpg"
+  icon_large: "./assets/icon.jpg"
+  brand_color: "#111827"
+  default_prompt: "Build or review technical documentation with a clear, maintainable, and production-ready workflow."
+
+policy:
+  allow_implicit_invocation: true
--- a/.agents/skills/technical-documentation/agents/synthesis-agent.md
+++ b/.agents/skills/technical-documentation/agents/synthesis-agent.md
@@ -0,0 +1,28 @@
+---
+name: synthesis-agent
+description: Long-context synthesis agent that merges sub-agent outputs into one prioritized and deduplicated documentation action plan.
+model: opus
+tools:
+  - Read
+permissionMode: default
+maxTurns: 12
+---
+
+You are the synthesis sub-agent for technical documentation.
+
+Goal:
+
+- merge sub-agent outputs into one coherent, non-duplicated action plan
+
+Tasks:
+
+- prioritize blockers first, then non-blocking improvements
+- normalize to one precedence model for governance decisions
+- remove duplicated recommendations and contradictory fixes
+- keep final output concise and execution-ready
+
+Return:
+
+- prioritized fix plan
+- validation summary (done vs pending)
+- explicit remaining gaps/blockers
--- a/.agents/skills/technical-documentation/assets/icon.jpg
+++ b/.agents/skills/technical-documentation/assets/icon.jpg
--- a/.agents/skills/technical-documentation/references/agent-and-contributing.md
+++ b/.agents/skills/technical-documentation/references/agent-and-contributing.md
@@ -0,0 +1,145 @@
+# AGENT and CONTRIBUTING Principles
+
+This reference consolidates the core rules for agent-policy and contributor-governance docs.
+
+You must:
+
+1. Discover repo-level and nested instruction files with:
+   `rg --files -g 'AGENTS.md' -g 'CONTRIBUTING.md' -g 'CLAUDE.md' -g 'AGENT.md' -g '.cursor/rules/*' -g '.cursorrules' -g '.agent/**' -g '.agents/**' -g '.pi/**' -g 'AGENTS.*.md'`
+2. Read the root and nearest-scope `AGENTS.md`/`CONTRIBUTING.md` pair before editing.
+3. If alias files exist, normalize to one canonical source (`AGENTS.md` preferred when present; otherwise nearest alias), plus compatibility pointers or explicit symlink notes.
+4. Document conflicting instructions and precedence decisions.
+
+## GitHub + AGENTS baseline
+
+Source: https://docs.github.com/en/communities/setting-up-your-project-for-healthy-contributions/setting-guidelines-for-repository-contributors
+Source: https://agents.md/
+Source: https://github.blog/ai-and-ml/github-copilot/how-to-write-a-great-agents-md-lessons-from-over-2500-repositories/
+Source: https://cobusgreyling.substack.com/p/what-is-agentsmd
+Source: https://www.infoq.com/news/2025/08/agents-md/
+
+Use these as default operating principles:
+
+1. Keep `CONTRIBUTING.md` discoverable and actionable (`.github`, root, or `docs`).
+2. Keep agent instructions concrete: real commands, real paths, clear boundaries.
+3. Use explicit behavior boundaries for agents: `Always`, `Ask first`, `Never`.
+4. Keep contributor and agent rules aligned with actual repository workflows.
+5. Ensure clear guidance is provided to agents on if, when and how to raise issues and pull requests.
+
+## Canonical and alias policy
+
+Source: https://agents.md/
+Source: https://github.blog/ai-and-ml/github-copilot/how-to-write-a-great-agents-md-lessons-from-over-2500-repositories/
+
+1. Treat `AGENTS.md` as canonical when present.
+2. If `AGENTS.md` is absent, treat the nearest alias file as canonical.
+3. Keep compatibility surfaces explicit: `AGENTS.md`, `AGENT.md`, `.cursorrules`, `.cursor/rules/*`, `.agent/`, `.agents/`, `.pi/`.
+4. If aliases are used, document how they map back to canonical policy (or symlink when supported).
+5. When repos use `.agents/` as canonical rule storage, keep `.cursor` as a compatibility symlink to `.agents` for Cursor rule auto-loading.
+6. Keep policy DRY: store one shared policy core and expose it via aliases/symlinks instead of duplicating rule text.
+
+## Context-awareness by agent platform
+
+Source: https://github.com/vercel-labs/agent-skills/blob/main/AGENTS.md
+Source: https://github.com/openai/codex/blob/main/AGENTS.md
+
+1. For Cursor and Claude-style glob consumers, keep rule files narrow and bounded.
+2. Avoid over-referencing large path sets that inflate context for glob-based agents.
+3. For Codex-style workflows, prefer explicit file references and deterministic commands.
+4. Keep long runbooks outside top-level policy files; link to scoped docs.
+5. Ensure all agents have a happy path regardless so ensuring everything works across Codex, Claude and other coding agents.
+
+## Symlink and compatibility operations
+
+1. Preferred layout for multi-agent compatibility:
+   - canonical rule directory: `.agents/`
+   - Cursor compatibility path: `.cursor -> .agents` symlink
+   - canonical policy doc: `AGENTS.md` pointing to `.agents` paths where relevant
+2. Validate symlink state before finalizing changes:
+   - if `.agents/` exists and `.cursor` is missing, create `.cursor` symlink to `.agents`
+   - if `.cursor` is a symlink to another target, fix target or document why it must differ
+   - if `.cursor` is a real directory/file, treat as migration conflict and ask before replacement
+3. Validate rule payload through the canonical directory:
+   - rules: `.agents/rules/*.mdc` with valid frontmatter (`description`, `globs`, `alwaysApply` as needed)
+   - commands: `.agents/commands/*.md` when command routing is used
+   - MCP config: `.agents/mcp.json` when MCP is in scope
+4. Keep Codex behavior explicit:
+   - `AGENTS.md` is primary for Codex repository instructions
+   - `.cursor` compatibility is for Cursor auto-loading and does not replace canonical AGENTS policy
+5. Record applied symlink fixes and unresolved compatibility gaps in validation notes.
+
+## Dual-mode and deliverable standards
+
+Source: https://github.blog/ai-and-ml/github-copilot/how-to-write-a-great-agents-md-lessons-from-over-2500-repositories/
+Source: https://agents.md/
+Source: https://github.com/openai/codex/blob/main/AGENTS.md
+Source: https://github.com/vercel-labs/agent-skills/blob/main/AGENTS.md
+
+1. Author one shared policy core (same commands, boundaries, and precedence) for all agents.
+2. For Cursor/Claude-style agents, expose that core through glob-driven and bounded files (small `AGENTS.md`/rule surface).
+3. For Codex, expose that same core through explicit file references with precise scope.
+4. Where styles diverge, prefer the smallest common structure that satisfies both and avoid duplicating policy text.
+5. Treat AGENTS/CONTRIBUTING as first-class deliverables when in scope.
+6. Preserve required structure, constraints, and examples from existing files.
+7. Align wording and commands with active repository instructions.
+
+## Proactive issue discovery and remediation
+
+Source: https://github.blog/ai-and-ml/github-copilot/how-to-write-a-great-agents-md-lessons-from-over-2500-repositories/
+Source: https://github.com/openai/codex/blob/main/AGENTS.md
+Source: https://github.com/vercel-labs/agent-skills/blob/main/AGENTS.md
+
+1. Run a conflict matrix review across AGENTS/aliases/CONTRIBUTING and related command/rule docs before finalizing.
+2. Treat the following as high-priority defects: missing referenced files, non-existent setup commands, command scope mismatches, and branch/commit policy conflicts.
+3. Do not stop at caveat-only notes when a low-risk fix is clear; apply the fix in the same pass.
+4. If a canonical entry file is missing (for example a directory `README.md` that docs depend on), create a minimal actionable file and update references.
+5. Long-running investigations are acceptable when needed to uncover cross-file drift, especially in agent-instruction ecosystems.
+
+## Discovery
+
+1. Agents prefer simple terminal commands so having a well defined `make *` or `npm run *` is ideal
+2. Agents can discover terminal commands through shell completion so providing shell completion helps
+
+## CONTRIBUTING size and scope control
+
+Source: https://contributing.md/how-to-build-contributing-md/
+Source: https://blog.codacy.com/best-practices-to-manage-an-open-source-project
+Source: https://mozillascience.github.io/working-open-workshop/contributing/
+Source: https://github.com/openclaw/openclaw/blob/main/CONTRIBUTING.md
+
+1. Keep root `CONTRIBUTING.md` focused on setup, issue flow, PR flow, testing, and review gates.
+2. Use issue/PR template links instead of embedding every process detail inline.
+3. When the file grows too large, split by domain and link from root.
+4. Move any large content into docs if avalible (for example Mintlify/Fern/Sphinx workflows) to avoid large contributor guide.
+5. Optimize for agent/machine readability as well as humans.
+
+## Example repos to emulate
+
+Source: https://github.com/openclaw/openclaw/blob/main/AGENTS.md
+Source: https://github.com/openclaw/openclaw/blob/main/CONTRIBUTING.md
+Source: https://github.com/openclaw/openclaw/blob/main/VISION.md
+Source: https://github.com/openai/codex/blob/main/AGENTS.md
+Source: https://github.com/processing/p5.js/blob/main/AGENTS.md
+Source: https://github.com/vercel-labs/agent-skills/blob/main/AGENTS.md
+Source: https://github.com/agentsmd/agents.md/blob/main/AGENTS.md
+Source: https://github.com/rails/rails/blob/main/CONTRIBUTING.md
+Source: https://github.com/kubernetes/kubernetes/blob/master/CONTRIBUTING.md
+Source: https://github.com/atom/atom/blob/master/CONTRIBUTING.md
+Source: https://github.com/github/docs/blob/main/CONTRIBUTING.md
+Source: https://github.com/facebook/react/blob/main/CONTRIBUTING.md
+
+1. OpenClaw: strong real-world alias policy and AGENTS/CONTRIBUTING/VISION cohesion.
+2. OpenAI Codex: strict command discipline and explicit scope control.
+3. p5.js: explicit AI-policy guardrails in agent instructions.
+4. Vercel + agentsmd spec: compact, context-efficient AGENTS patterns.
+5. Rails/Kubernetes/Atom/GitHub Docs/React: contributor guidance patterns at different project scales.
+
+## Practical merge policy
+
+When these rules conflict:
+
+1. Preserve contributor and reader task success first.
+2. Preserve instruction clarity and unambiguous boundaries second.
+3. Preserve long-term maintainability and context-efficiency third.
+4. Add extra agent optimization only if it does not reduce human clarity or there is explict need.
+5. Use your judgement as the expert.
--- a/.agents/skills/technical-documentation/references/build.md
+++ b/.agents/skills/technical-documentation/references/build.md
@@ -0,0 +1,116 @@
+# Build Docs Playbook
+
+Read `principles.md` first, then follow this execution flow.
+
+## 1. Detect and align agent instruction and governance instructions
+
+- Use `references/agent-and-contributing.md` as the source of truth for inventory, canonical/alias mapping, and precedence/conflict handling.
+- Apply the symlink compatibility policy when in scope (`.agents` canonical directory with `.cursor` compatibility symlink when required by tooling).
+- Long-running and extensive build investigations are acceptable when needed to resolve ambiguous or conflicting documentation sources.
+- When available, use sub-agents for bounded parallel inventory/cross-check tasks and merge results into one canonical decision set.
+- Capture required constraints before writing:
+  - nested-agent rules, command/test requirements, PR workflow, and style checks.
+- Use the same command and validation expectations in proposed snippets and examples.
+
+## 2. Inventory product documentation surfaces (not governance only)
+
+- For repo-wide builds, include docs content surfaces in addition to AGENTS/CONTRIBUTING.
+- Inventory docs files and frameworks in scope (examples): `README*.md`, `docs/**`, `**/*.md`, `**/*.mdx`, `**/*.mdc`, `**/*.rst`, `**/*.rsc`, Fern/Mintlify config, Sphinx `conf.py`.
+- Build a coverage map before drafting so governance and product docs are both represented.
+- If scope is ambiguous, default to broader docs discovery first, then narrow intentionally.
+
+## 3. Framework config and path mapping rules
+
+- Detect framework/config first (for example Fern config, Sphinx `conf.py`, Mintlify config, or equivalent).
+- Resolve every referenced path relative to the file/config that declares it, not assumed repo root.
+- Treat filesystem paths and published URL routes as separate mappings; do not infer one from the other without config evidence.
+- Validate both layers:
+  - config -> file exists on disk
+  - config/nav/routing -> URL path is consistent and reachable
+- Record path-mapping assumptions and mismatches in handoff (`missing file`, `stale route`, `wrong base path`).
+
+## 4. Define intent and success
+
+- Audience, prerequisites, and job-to-be-done.
+- Expected reader outcome immediately after completion.
+- Doc type: tutorial, how-to, reference, explanation.
+- Success criteria: what must be true after publish.
+
+## 5. Build structure before prose
+
+- Follow the funnel: what/why, quickstart, next steps.
+- Keep headings informative and scannable.
+- Open each section with the takeaway sentence.
+- Add decision points with concrete branch guidance.
+- For OpenClaw docs work, choose a page type from `references/openclaw.md` before drafting.
+- Keep task-critical OpenClaw configuration inline; link exhaustive defaults, enums, schemas, generated references, and rare debugging workflows.
+
+## 6. Build AGENTS.md and CONTRIBUTING.md intentionally
+
+- Keep AGENTS.md structure consistent with `agents.md` ecosystem patterns:
+  - include YAML frontmatter when present in repo style (`name`, `description`).
+  - state persona scope and explicit instruction boundaries: `Always`, `Ask first`, `Never`.
+  - include concrete commands and representative code examples.
+- For CONTRIBUTING.md, prioritize issue triage flow, PR expectations, setup/test commands, and review gates.
+- Add `Code of Conduct`, `Testing`, `Local checks`, and `PR expectations` sections when missing but required by the repo.
+- If CONTRIBUTING.md is becoming too large, split by scope into linked docs (for example, framework/tool-specific setup and release workflows) and keep the root file as a concise entry point.
+- Keep cross-file consistency: links from CONTRIBUTING.md to AGENTS.md (and vice versa) should be accurate and non-circular.
+- If multiple AGENTS.md files exist, document the directory-level scope and avoid conflicting advice.
+- If a required canonical entry file is missing (for example referenced `README.md` under a major directory), create the file in the same pass instead of adding a caveat-only note.
+- For new entry files, keep them minimal and actionable: purpose, prerequisites, concrete run commands, and pointers to deeper docs.
+
+## 7. Keep agent context tight
+
+- Author once, expose twice:
+  - keep one shared policy core and avoid duplicating guidance in separate agent-specific files.
+  - publish that core through bounded glob-friendly files for Cursor/Claude plus explicit path references for Codex.
+- For Cursor and Claude-style agents, avoid broad references. Use minimal globbing and narrow rule files that each serve one concern (for example, repo-wide setup, test rules, security checks).
+- Keep AGENTS and alias files short-to-medium; move detailed runbooks to linked docs.
+- For Codex, prefer explicit file references and concrete paths for exact reuse.
+- Avoid adding unrelated historical or process details to avoid token/context drift during future tool reads.
+
+## 8. Brownfield build mode
+
+- Match existing terminology, navigation, and component patterns.
+- Preserve existing IA unless there is a documented migration plan.
+- For rewrites, include a migration note from old to new paths.
+- Prefer smallest safe change set that improves utility.
+
+## 9. Evergreen build mode
+
+- Prefer stable concepts over release-tied narrative.
+- Isolate volatile details under clearly marked version sections.
+- Include maintenance signals: owners, refresh triggers, stale criteria.
+- Include lifecycle notes: deprecation and replacement paths.
+
+## 10. Writing constraints
+
+- Use precise language and short, imperative instructions.
+- Keep code examples copy-ready and self-contained.
+- Include common failure modes and safe defaults.
+- Avoid placeholder guidance that cannot be executed.
+
+## 11. Agent and automation readiness
+
+- Keep key facts in text (not image-only).
+- Prefer structured lists/tables when choices matter.
+- Add links and anchors that allow deterministic navigation.
+- Document what can be checked automatically in CI.
+
+## 12. Build validation
+
+- Validate commands and snippets where possible.
+- Verify links and references in changed sections.
+- Run a reference existence sweep for every path/command you introduced.
+- Verify docs-framework consistency when in scope (for example Sphinx/Fern config and referenced doc paths).
+- For OpenClaw docs work, apply the validation checklist in `references/openclaw.md`.
+
+## 13. Multilingual parity mode (when applicable)
+
+- Pick one source-of-truth language for technical accuracy and release timing.
+- Define parity target: full parity, staged parity, or intentional divergence per section.
+- Keep structure aligned across locales (headings, anchors, section order) when possible.
+- Preserve command/code correctness first; localize explanatory text second.
+- If parity is not feasible, add a visible note with missing scope and expected sync window.
+- Run a locale parity check for changed sections (added/removed steps, warnings, prerequisites).
+- Record unresolved checks explicitly in handoff.
--- a/.agents/skills/technical-documentation/references/openclaw.md
+++ b/.agents/skills/technical-documentation/references/openclaw.md
@@ -0,0 +1,128 @@
+# OpenClaw Documentation Overlay
+
+Use this reference only for OpenClaw docs work. It layers OpenClaw-specific page
+types, navigation, preservation, and validation rules on top of the general
+technical-documentation skill.
+
+## Reader Model
+
+- Lead with the task the reader is trying to complete.
+- Give one recommended path before alternatives.
+- Keep main docs focused on the common path; move dense contracts and rare
+  debugging detail to linked reference or troubleshooting pages.
+- Explain production risks exactly where the reader can make the mistake.
+- Link concepts, guides, references, CLI pages, SDK docs, testing, and
+  troubleshooting so readers can continue without rereading.
+
+## Page Types
+
+Choose the page type before writing or reviewing:
+
+- Overview: route readers to the right product area, integration path, or guide.
+- Quickstart: get a new user to a working result with the fewest safe steps.
+- Topic page: explain a major OpenClaw entity or surface end to end.
+- Guide: walk through one workflow from prerequisites to production readiness.
+- API/SDK/CLI reference: define every object, method, command, option, response,
+  error, enum, default, and version rule in scope.
+- Testing guide: show sandbox setup, fixtures, simulated failures, and live-mode
+  differences.
+- Troubleshooting guide: map observable symptoms to checks, causes, and fixes.
+- Governance file: keep agent/contributor policy concrete, scoped, and aligned
+  with current OpenClaw repo behavior.
+
+## Topic Pages
+
+Use this shape for major-entity pages:
+
+1. Title naming the entity or surface.
+2. Unheaded opening that says what it is, what it owns, and what it does not own.
+3. Requirements, only when setup needs accounts, versions, permissions, plugins,
+   operating systems, or credentials.
+4. Quickstart with the recommended path and smallest reliable verification.
+5. Configuration with task-critical options inline and exhaustive details linked
+   to reference docs.
+6. Major subtopics organized by reader intent, not under a generic "Subtopics"
+   heading.
+7. Troubleshooting with observable failures and concrete checks.
+8. Related links to guides, references, commands, concepts, and adjacent topics.
+
+## Guides
+
+Use this shape for workflow pages:
+
+1. Title naming the outcome, not the implementation detail.
+2. Opening that states what the reader can accomplish.
+3. Before you begin: accounts, keys, permissions, versions, tools, and
+   assumptions.
+4. Choose a path, only when the reader must decide.
+5. Steps with verb-led headings, commands, expected output, and checks.
+6. Test with the smallest reliable proof that the workflow works.
+7. Production readiness: security, retries, limits, observability, migrations,
+   and cleanup.
+8. Troubleshooting near the workflow that causes the failures.
+9. See also links to concepts, references, SDK docs, and adjacent guides.
+
+## Docs IA And Navigation
+
+- Read `docs/docs.json` before navigation changes.
+- Keep topic pages and common workflows on the main reader path.
+- Put exhaustive contracts, generated references, maintainer-only detail, and
+  support material under `Reference` or another clearly scoped support page.
+- Keep generated `plugins/reference/*` children and redirect-only pages out of
+  visible navigation unless explicitly required.
+- For moved pages, include a keep/drop/move/destination matrix in the handoff.
+- Add "Read when" hints for docs-list routing when creating or changing pages
+  that participate in the docs index.
+
+## Source-Backed Content
+
+- CLI docs must match current flags, output, errors, and examples.
+- API/SDK docs must include fields, defaults, enum values, constraints, nullable
+  behavior, lifecycle states, errors, and recovery guidance.
+- Config docs must align exported types, schema/help output, metadata, baselines,
+  and current docs.
+- Dependency-backed behavior must be verified from upstream docs, source, or
+  types before documenting defaults, timing, errors, or API behavior.
+- Separate current behavior, shipped behavior, planned behavior, and maintainer
+  intent.
+
+## Examples
+
+- Prefer complete copy-pasteable commands and snippets.
+- Use realistic variable names and values.
+- Mark placeholders with angle-bracket names such as `<API_KEY>`.
+- Show expected success output when it helps verification.
+- Keep one conceptual unit per code block and use language-specific fences.
+- Avoid examples that hide setup, auth, error handling, or cleanup.
+- Never expose real secrets, live config, phone numbers, private videos, or
+  credentials.
+
+## Preservation Reviews
+
+For rewrites or splits:
+
+- Identify source units before rewriting: headings, paragraphs, tables, examples,
+  CLI/API contracts, warnings, and troubleshooting facts.
+- Map each retained unit to a destination page or section.
+- Do not treat a broad "covered" row as proof for dense source material; use
+  line- or claim-level evidence when the source unit is dense.
+- For dropped content, state whether it is obsolete, duplicated elsewhere,
+  unsupported, or moved to a reference/support page.
+- When a docs-audit artifact is used, verify it is mapped audit data with
+  non-empty `mappings[]`, not only inventory or reindexed JSON.
+
+## Validation
+
+Choose the narrowest proof that covers the touched surface:
+
+- `pnpm docs:list`
+- `pnpm docs:check-mdx`
+- `pnpm docs:check-links`
+- `pnpm docs:check-i18n-glossary`
+- `pnpm format:docs:check` or `pnpm lint:docs`
+- `git diff --check`
+- generated-doc or inventory checks when generated references, plugin catalogs,
+  labeler, or docs scripts changed
+- behavior tests or command probes when docs claim runtime behavior
+
+If proof is blocked, say exactly which command was not run and why.
--- a/.agents/skills/technical-documentation/references/principles.md
+++ b/.agents/skills/technical-documentation/references/principles.md
@@ -0,0 +1,54 @@
+# Documentation Principles
+
+This reference consolidates the core rules used by this skill.
+
+## Matt Palmer: 8 rules for better docs
+
+Source: https://mattpalmer.io/posts/2025/10/8-rules-for-better-docs/
+
+Use these as default operating principles:
+
+1. Write for humans, optimize for agents.
+2. Start with a funnel: what/why, quickstart, next steps.
+3. Use Diataxis to scaffold content.
+4. Write with AI, but structure for agents.
+5. Offload routine docs operations to background agents.
+6. Automate quality with CI.
+7. Automate scaffolding and repetitive workflow tasks.
+8. Make contribution easy and visible.
+
+## OpenAI cookbook: what makes documentation good
+
+Source: https://cookbook.openai.com/articles/what_makes_documentation_good
+
+Key quality constraints:
+
+- Prefer specific and accurate terminology over niche jargon.
+- Keep examples self-contained and minimize dependencies.
+- Prioritize high-value topics over edge-case depth.
+- Do not teach unsafe patterns (for example, exposed secrets).
+- Open with context that helps readers orient quickly.
+- Apply empathy and override rigid rules when it clearly improves outcomes.
+
+## Practical merge policy
+
+When these rules conflict:
+
+1. Preserve reader task success first.
+2. Preserve structural clarity second.
+3. Preserve long-term maintainability third.
+4. Add agent optimization only if it does not reduce human clarity.
+
+For agent-instructions and contributor-governance specifics (AGENTS/aliases/CONTRIBUTING), use `references/agent-and-contributing.md` as the detailed additional source of truth.
+
+When the target repo or request is OpenClaw-specific, layer `references/openclaw.md` on top of these general rules. Otherwise ignore that repo-specific overlay.
+
+## Execution policy for this skill
+
+- Long-running and extensive investigations are allowed for both build and review work when needed to resolve ambiguity or cross-file drift.
+- Use sub-agents when available for bounded parallel discovery, verification, or cross-source comparison.
+- Keep one merged outcome: sub-agent outputs must be normalized into a single consistent recommendation/fix set.
+
+## Multilingual parity rule
+
+When docs exist in multiple languages, target cross-locale parity for task-critical content (steps, warnings, prerequisites, and limits). If full parity is not possible, publish explicit parity status and sync intent.
--- a/.agents/skills/technical-documentation/references/review.md
+++ b/.agents/skills/technical-documentation/references/review.md
@@ -0,0 +1,121 @@
+# Review Docs Playbook
+
+Read `principles.md` first, then apply this checklist.
+
+## 1. Scope and classification
+
+- Identify doc type and target audience.
+- Confirm brownfield vs evergreen intent.
+- Confirm expected outcome for the reader.
+- For full-repo reviews, explicitly include both governance surfaces and product-doc surfaces (`docs/`, README trees, `.md/.mdx/.mdc`, `.rst/.rsc`, framework docs configs).
+- For OpenClaw docs reviews, apply `references/openclaw.md` for page type, docs IA, preservation, examples, and validation checks.
+
+## 2. Investigation behavior
+
+- Proactively find issues and risks without waiting for repeated prompts.
+- If there are signals of deeper problems, continue investigation beyond the first pass.
+- Long-running and extensive investigations are acceptable when needed for confidence and correctness.
+- When available, use sub-agents for bounded parallel discovery (for example file-inventory, command validation, or cross-doc consistency checks), then merge to one final issue set.
+- When no issues are found, state that explicitly and call out residual risks or validation gaps.
+- Default to `apply-fixes` for high-confidence documentation defects unless the user explicitly requests `report-only`.
+- Do not stop at AGENTS/CONTRIBUTING checks when the task is documentation-wide; continue into docs-content and docs-framework surfaces.
+
+## 3. Governance surface review
+
+- Use `references/agent-and-contributing.md` as the source of truth for inventory, canonical/alias mapping, and precedence/conflict handling.
+  For AGENTS.md:
+
+- confirm persona intent, scope, and command/tool boundaries are explicit.
+- check frontmatter style matches repo conventions when present.
+- ensure `Always`, `Ask first`, and `Never` boundaries are present when expected.
+- require concrete command examples and repo-specific paths to avoid ambiguity.
+
+For CONTRIBUTING.md:
+
+- verify issue/PR workflow is complete and actionable.
+- ensure local setup, lint/test commands, and review criteria are accurate.
+- ensure governance does not conflict with nested AGENTS instructions.
+- flag oversized files that should be split into linked section docs (for example tool-specific setup and release docs).
+
+For agent-platform awareness:
+
+- confirm references are minimal and scoped for Cursor/Claude glob behavior.
+- confirm Codex-facing guidance uses explicit file references.
+- confirm both surfaces represent the same shared policy core (commands, boundaries, and precedence), not divergent guidance.
+- audit `.agents`/`.cursor` compatibility behavior:
+  - verify canonical rule directory and symlink state match repo policy
+  - verify symlink target integrity and platform/tooling expectations
+  - verify AGENTS policy references remain canonical for Codex even when `.cursor` compatibility exists
+- check for context bloat from duplicated policy statements across agent and contributor files.
+- check for conflicting rules, skills and agent instructions
+- check for conflicting information in agent instructions vs codebase
+- check for broken or missing referenced files (for example README/index files named as canonical entry points).
+- check for setup/command drift (for example non-existent install commands, root-level commands that should be module-scoped).
+
+## 4. Product documentation surface review
+
+- Verify docs IA coverage across root/module `README*` files and `docs/**` trees.
+- Review framework-native docs sources in scope (for example Fern, Mintlify, Sphinx, MkDocs) and ensure guidance matches actual source-of-truth files.
+- Check `.md/.mdx/.mdc/.rst/.rsc` for stale commands, missing prerequisites, and broken cross-links.
+- Confirm referenced doc paths and anchors exist.
+- Flag docs that should be split/merged to improve discoverability and maintenance.
+- For OpenClaw docs, check `docs/docs.json`, docs-list routing hints, main path versus `Reference` placement, and generated-reference visibility.
+- For OpenClaw rewrites or page splits, require source-backed keep/drop/move/destination coverage for important claims, warnings, examples, commands, fields, and troubleshooting facts.
+
+## 5. Framework config and path mapping checks
+
+- Detect and read framework config first (for example Fern config, Sphinx `conf.py`, Mintlify config, or equivalent).
+- Resolve path references relative to the declaring file/config.
+- Treat filesystem paths and published URL routes as separate maps; verify both.
+- Flag path-map drift explicitly (`missing file`, `stale route`, `wrong base path`).
+
+## 6. Structural review
+
+- Funnel check: what/why, quickstart, next steps.
+- Validate heading flow and navigation discoverability.
+- Flag critical content trapped in images or buried sections.
+- Check Diataxis alignment and split mixed-purpose sections.
+- For OpenClaw docs, confirm the content matches an explicit page type from `references/openclaw.md`.
+
+## 7. Writing quality review
+
+- Check for concise, scannable paragraphs.
+- Remove ambiguous pronouns and undefined terms.
+- Verify examples are executable and scoped correctly.
+- Verify tone is directive, technical, and non-hand-wavy.
+
+## 8. Brownfield review mode
+
+- Verify compatibility with existing docs IA and conventions.
+- Verify anchors, redirects, and cross-doc links remain valid.
+- Flag regressions in onboarding and task completion paths.
+- Ensure changed terminology is intentionally propagated.
+
+## 9. Evergreen review mode
+
+- Flag date-stamped or brittle wording without version scope.
+- Check ownership and refresh signals are present.
+- Ensure recommendations remain valid after routine product evolution.
+- Flag missing deprecation/migration guidance.
+
+## 10. Tooling and platform review
+
+Read `tooling.md` if platform fit is uncertain.
+
+- Check whether content uses platform primitives effectively.
+- Flag structure that fights the chosen docs platform.
+- Recommend targeted platform-aware improvements.
+
+## 11. Multilingual parity review (when applicable)
+
+- Confirm declared source-of-truth language and expected parity policy.
+- Compare changed sections across locales for step/order/warning drift.
+- Flag missing updates to prerequisites, version notes, limits, and safety guidance.
+- Allow intentional divergence only when rationale is explicit and user-impact is low.
+- Require a reader-visible status note when locale parity is partial.
+
+## 12. Output format
+
+1. Blocking issues (file + required fix)
+2. Non-blocking improvements
+3. Validation notes (done vs pending)
--- a/.agents/skills/technical-documentation/references/tooling.md
+++ b/.agents/skills/technical-documentation/references/tooling.md
@@ -0,0 +1,32 @@
+# Documentation Tooling Guide
+
+Source: https://www.mintlify.com/blog/top-7-api-documentation-tools-of-2025
+
+Use this file when deciding build/review expectations for doc platforms.
+
+## Tool-selection checkpoints
+
+- Existing stack lock-in: do not force migration for minor gains.
+- API workflow depth: generated references, OpenAPI support, testability.
+- Collaboration model: docs-as-code, review workflow, versioning.
+- Runtime quality: search, navigation, and copy-ready code snippets.
+- AI readiness: structured content, stable URLs, machine-friendly layout yet human readable.
+- Human readiness: reading complexity, reading UX, navigation depth, minimize jargon.
+
+## Apply in brownfield mode
+
+- Prioritize compatibility with the current platform.
+- Use available components and style conventions before introducing new patterns.
+- Propose migration only when current constraints block critical outcomes.
+
+## Apply in evergreen mode
+
+- Favor platforms and templates that make routine updates low-friction.
+- Standardize section templates to reduce drift.
+- Capture ownership, update cadence, and stale-content detection rules.
+
+## Review implications
+
+- Check whether content uses platform primitives correctly (tabs, callouts, endpoint blocks).
+- Flag docs that are technically correct but hard to scan in the chosen platform.
+- Recommend platform-specific improvements only when they reduce cognitive load.
--- a/.crabbox.yaml
+++ b/.crabbox.yaml
@@ -18,6 +18,10 @@ capacity:
    - us-west-2
 actions:
  workflow: .github/workflows/crabbox-hydrate.yml
+  # Default AWS hydration uses local Actions replay. Use
+  # `crabbox actions hydrate --github-runner --job hydrate-github` when the
+  # hydrate job needs GitHub secrets, or `--github-runner --job
+  # hydrate-windows-daemon` for focused native Windows daemon proof.
  job: hydrate
  ref: main
  runnerLabels:
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1,6 @@
 * text=auto eol=lf
 CLAUDE.md -text
 src/gateway/server-methods/CLAUDE.md -text
+ui/src/i18n/.i18n/* linguist-generated
+ui/src/i18n/locales/*.ts linguist-generated
+ui/src/i18n/locales/en.ts -linguist-generated
--- a/.github/actions/docker-e2e-plan/action.yml
+++ b/.github/actions/docker-e2e-plan/action.yml
@@ -123,14 +123,14 @@ runs:
      shell: bash
      run: |
        set -euo pipefail
-        docker pull "${OPENCLAW_DOCKER_E2E_BARE_IMAGE}"
+        bash scripts/ci-docker-pull-retry.sh "${OPENCLAW_DOCKER_E2E_BARE_IMAGE}"

    - name: Pull shared functional Docker E2E image
      if: inputs.hydrate-artifacts == 'true' && steps.plan.outputs.needs_functional_image == '1'
      shell: bash
      run: |
        set -euo pipefail
-        docker pull "${OPENCLAW_DOCKER_E2E_FUNCTIONAL_IMAGE}"
+        bash scripts/ci-docker-pull-retry.sh "${OPENCLAW_DOCKER_E2E_FUNCTIONAL_IMAGE}"

    - name: Validate Docker E2E credentials
      if: inputs.hydrate-artifacts == 'true'
@@ -140,13 +140,33 @@ runs:
      run: |
        set -euo pipefail
        credentials=",$CREDENTIALS,"
-        if [[ "$credentials" == *",openai,"* ]]; then
-          [[ -n "${OPENAI_API_KEY:-}" ]] || {
-            echo "OPENAI_API_KEY is required for selected Docker E2E lanes." >&2
-            exit 1
-          }
-        fi
-        if [[ "$credentials" == *",anthropic,"* && -z "${ANTHROPIC_API_TOKEN:-}" && -z "${ANTHROPIC_API_KEY:-}" ]]; then
-          echo "ANTHROPIC_API_TOKEN or ANTHROPIC_API_KEY is required for selected Docker E2E lanes." >&2
+        require_any() {
+          local label="$1"
+          shift
+          local key
+          for key in "$@"; do
+            if [[ -n "${!key:-}" ]]; then
+              return 0
+            fi
+          done
+          echo "Missing credential for ${label}: expected one of $*" >&2
          exit 1
+        }
+        if [[ "$credentials" == *",openai,"* ]]; then
+          require_any OpenAI OPENAI_API_KEY
+        fi
+        if [[ "$credentials" == *",codex,"* ]]; then
+          require_any Codex OPENCLAW_CODEX_AUTH_JSON
+        fi
+        if [[ "$credentials" == *",anthropic,"* ]]; then
+          require_any Anthropic ANTHROPIC_API_TOKEN ANTHROPIC_API_KEY OPENCLAW_CLAUDE_CREDENTIALS_JSON OPENCLAW_CLAUDE_JSON
+        fi
+        if [[ "$credentials" == *",factory,"* ]]; then
+          require_any Factory FACTORY_API_KEY
+        fi
+        if [[ "$credentials" == *",gemini,"* ]]; then
+          require_any Gemini GEMINI_API_KEY GOOGLE_API_KEY OPENCLAW_GEMINI_SETTINGS_JSON
+        fi
+        if [[ "$credentials" == *",opencode,"* ]]; then
+          require_any OpenCode OPENCODE_API_KEY OPENCODE_ZEN_API_KEY
        fi
--- a/.github/actions/setup-node-env/action.yml
+++ b/.github/actions/setup-node-env/action.yml
@@ -26,11 +26,23 @@ inputs:
 runs:
  using: composite
  steps:
+    - name: Normalize container toolcache
+      shell: bash
+      run: |
+        set -euo pipefail
+        if [[ -d /__t && ! -e /opt/hostedtoolcache ]]; then
+          mkdir -p /opt
+          ln -s /__t /opt/hostedtoolcache
+        fi
+
    - name: Setup Node.js
-      uses: actions/setup-node@v6
-      with:
-        node-version: ${{ inputs.node-version }}
-        check-latest: false
+      shell: bash
+      env:
+        REQUESTED_NODE_VERSION: ${{ inputs.node-version }}
+      run: |
+        set -euo pipefail
+        source "$GITHUB_ACTION_PATH/../setup-pnpm-store-cache/ensure-node.sh"
+        openclaw_ensure_node "$REQUESTED_NODE_VERSION"

    - name: Setup pnpm
      uses: ./.github/actions/setup-pnpm-store-cache
@@ -40,9 +52,10 @@ runs:

    - name: Setup Bun
      if: inputs.install-bun == 'true'
-      uses: oven-sh/setup-bun@v2.2.0
-      with:
-        bun-version: "1.3.13"
+      shell: bash
+      run: |
+        set -euo pipefail
+        npm install -g bun@1.3.14

    - name: Runtime versions
      shell: bash
--- a/.github/actions/setup-pnpm-store-cache/action.yml
+++ b/.github/actions/setup-pnpm-store-cache/action.yml
@@ -14,7 +14,7 @@ inputs:
    required: false
    default: ""
  use-actions-cache:
-    description: Whether pnpm/action-setup should cache the pnpm store.
+    description: Whether actions/cache should cache the pnpm store.
    required: false
    default: "true"
 outputs:
@@ -47,12 +47,48 @@ runs:
        openclaw_ensure_node "$requested_node"

    - name: Setup pnpm from packageManager
-      uses: pnpm/action-setup@0e279bb959325dab635dd2c09392533439d90093
+      shell: bash
+      env:
+        COREPACK_ENABLE_DOWNLOAD_PROMPT: "0"
+        PACKAGE_MANAGER_FILE: ${{ inputs.package-manager-file }}
+      run: |
+        set -euo pipefail
+        package_manager="$(node -e "const fs = require('node:fs'); const path = require('node:path'); const pkg = JSON.parse(fs.readFileSync(path.resolve(process.argv[1]), 'utf8')); process.stdout.write(pkg.packageManager || '')" "$PACKAGE_MANAGER_FILE")"
+        case "$package_manager" in
+          pnpm@*) ;;
+          *)
+            echo "::error::Expected packageManager to pin pnpm, got '${package_manager:-<empty>}'"
+            exit 1
+            ;;
+        esac
+        corepack enable
+        for attempt in 1 2 3; do
+          if corepack prepare "$package_manager" --activate; then
+            exit 0
+          fi
+          sleep $((attempt * 5))
+        done
+        corepack prepare "$package_manager" --activate
+
+    - name: Resolve pnpm store path
+      id: pnpm-store
+      if: ${{ inputs.use-actions-cache == 'true' && runner.os != 'Windows' }}
+      shell: bash
+      run: |
+        set -euo pipefail
+        store_path="$(pnpm store path --silent)"
+        node -e "require('node:fs').mkdirSync(process.argv[1], { recursive: true })" "$store_path"
+        echo "path=$store_path" >> "$GITHUB_OUTPUT"
+
+    - name: Restore pnpm store cache
+      if: ${{ inputs.use-actions-cache == 'true' && runner.os != 'Windows' }}
+      uses: actions/cache@v5
      with:
-        package_json_file: ${{ inputs.package-manager-file }}
-        run_install: false
-        cache: ${{ inputs.use-actions-cache }}
-        cache_dependency_path: ${{ inputs.lockfile-path }}
+        path: ${{ steps.pnpm-store.outputs.path }}
+        key: pnpm-store-${{ runner.os }}-${{ inputs.node-version }}-${{ hashFiles(inputs.lockfile-path) }}
+        restore-keys: |
+          pnpm-store-${{ runner.os }}-${{ inputs.node-version }}-
+          pnpm-store-${{ runner.os }}-

    - name: Record pnpm version
      id: pnpm-version
--- a/.github/actions/setup-pnpm-store-cache/ensure-node.sh
+++ b/.github/actions/setup-pnpm-store-cache/ensure-node.sh
@@ -8,7 +8,10 @@ openclaw_node_version_matches() {
  fi
  case "$requested" in
    *x)
-      [[ "${actual%%.*}" == "${requested%%.*}" ]]
+      [[ "${actual%%.*}" == "${requested%%.*}" ]] || return 1
+      if [[ "${requested%%.*}" == "22" ]]; then
+        openclaw_node_version_at_least "$actual" "22.19.0"
+      fi
      ;;
    *.*.*)
      [[ "$actual" == "$requested" ]]
@@ -22,15 +25,47 @@ openclaw_node_version_matches() {
  esac
 }

+openclaw_node_version_at_least() {
+  local actual="$1"
+  local minimum="$2"
+  local actual_major actual_minor actual_patch minimum_major minimum_minor minimum_patch
+  IFS=. read -r actual_major actual_minor actual_patch <<< "$actual"
+  IFS=. read -r minimum_major minimum_minor minimum_patch <<< "$minimum"
+  actual_minor="${actual_minor:-0}"
+  actual_patch="${actual_patch:-0}"
+  minimum_minor="${minimum_minor:-0}"
+  minimum_patch="${minimum_patch:-0}"
+
+  if (( actual_major != minimum_major )); then
+    (( actual_major > minimum_major ))
+    return
+  fi
+  if (( actual_minor != minimum_minor )); then
+    (( actual_minor > minimum_minor ))
+    return
+  fi
+  (( actual_patch >= minimum_patch ))
+}
+
 openclaw_active_node_version() {
  node -p 'process.versions.node' 2>/dev/null || true
 }

 openclaw_prepend_node_bin() {
  local node_bin_dir="$1"
-  export PATH="$node_bin_dir:$PATH"
+  local github_path_dir="${2:-$node_bin_dir}"
+  local shell_node_bin_dir="$node_bin_dir"
+  if command -v cygpath >/dev/null 2>&1; then
+    shell_node_bin_dir="$(cygpath -u "$node_bin_dir" 2>/dev/null || printf '%s' "$node_bin_dir")"
+  fi
+  export PATH="$shell_node_bin_dir:$PATH"
  if [[ -n "${GITHUB_PATH:-}" ]]; then
-    echo "$node_bin_dir" >> "$GITHUB_PATH"
+    local github_node_bin_dir="$github_path_dir"
+    if [[ $# -lt 2 ]] && command -v cygpath >/dev/null 2>&1; then
+      github_node_bin_dir="$shell_node_bin_dir"
+      github_node_bin_dir="$(cygpath -w "$shell_node_bin_dir" 2>/dev/null || printf '%s' "$shell_node_bin_dir")"
+    fi
+    echo "$github_node_bin_dir" >> "$GITHUB_PATH"
  fi
  hash -r
 }
@@ -43,11 +78,15 @@ openclaw_find_toolcache_node() {
    "${RUNNER_TOOL_CACHE:-}" \
    "${AGENT_TOOLSDIRECTORY:-}" \
    "${ACTIONS_RUNNER_TOOL_CACHE:-}" \
+    "${OPENCLAW_CONTAINER_TOOL_CACHE:-/__t}" \
    "/opt/hostedtoolcache" \
    "/home/runner/_work/_tool" \
    "/Users/runner/hostedtoolcache" \
    "/c/hostedtoolcache/windows"
  do
+    if [[ ! -d "$root" && "$root" == *\\* ]] && command -v cygpath >/dev/null 2>&1; then
+      root="$(cygpath -u "$root" 2>/dev/null || printf '%s' "$root")"
+    fi
    if [[ -d "$root/node" ]]; then
      roots+=("$root/node")
    elif [[ "$(basename "$root")" == "node" && -d "$root" ]]; then
@@ -68,6 +107,92 @@ openclaw_find_toolcache_node() {
  return 1
 }

+openclaw_resolve_node_download_version() {
+  local requested_node="$1"
+  if [[ "$requested_node" =~ ^v?[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
+    [[ "$requested_node" == v* ]] && printf '%s\n' "$requested_node" || printf 'v%s\n' "$requested_node"
+    return 0
+  fi
+
+  local prefix="${requested_node#v}"
+  prefix="${prefix%%[xX]*}"
+  prefix="v${prefix}"
+  [[ "$prefix" == *. ]] || prefix="${prefix}."
+  curl -fsSL https://nodejs.org/dist/index.json |
+    OPENCLAW_NODE_PREFIX="$prefix" python3 -c 'import json, os, sys
+prefix = os.environ["OPENCLAW_NODE_PREFIX"]
+for item in json.load(sys.stdin):
+    version = item.get("version", "")
+    if version.startswith(prefix):
+        print(version)
+        break
+'
+}
+
+openclaw_node_download_platform() {
+  local os_name arch_name
+  os_name="$(uname -s)"
+  arch_name="$(uname -m)"
+  case "$os_name:$arch_name" in
+    Linux:x86_64) printf 'linux-x64\n' ;;
+    Linux:aarch64 | Linux:arm64) printf 'linux-arm64\n' ;;
+    Darwin:x86_64) printf 'darwin-x64\n' ;;
+    Darwin:arm64) printf 'darwin-arm64\n' ;;
+    MINGW*:x86_64 | MSYS*:x86_64 | CYGWIN*:x86_64 | MINGW*:AMD64 | MSYS*:AMD64 | CYGWIN*:AMD64)
+      printf 'win-x64\n'
+      ;;
+    MINGW*:aarch64 | MINGW*:arm64 | MSYS*:aarch64 | MSYS*:arm64 | CYGWIN*:aarch64 | CYGWIN*:arm64) printf 'win-arm64\n' ;;
+    *)
+      return 1
+      ;;
+  esac
+}
+
+openclaw_download_node() {
+  local requested_node="$1"
+  local version platform archive_url install_root temp_root
+  version="$(openclaw_resolve_node_download_version "$requested_node")"
+  platform="$(openclaw_node_download_platform)" || return 1
+  temp_root="${RUNNER_TEMP:-/tmp}"
+  if command -v cygpath >/dev/null 2>&1; then
+    temp_root="$(cygpath -u "$temp_root" 2>/dev/null || printf '%s\n' "$temp_root")"
+  fi
+  install_root="${temp_root}/openclaw-node-${version}-${platform}"
+  if [[ "$platform" == win-* ]]; then
+    local archive_path ps_archive_path ps_install_root ps_bin_dir node_bin_dir
+    archive_path="${temp_root}/node-${version}-${platform}.zip"
+    archive_url="https://nodejs.org/dist/${version}/node-${version}-${platform}.zip"
+    rm -rf "$install_root"
+    mkdir -p "$install_root"
+    echo "Downloading Node ${version} from ${archive_url}"
+    curl -fsSL -o "$archive_path" "$archive_url"
+    ps_archive_path="$archive_path"
+    ps_install_root="$install_root"
+    if command -v cygpath >/dev/null 2>&1; then
+      ps_archive_path="$(cygpath -w "$archive_path")"
+      ps_install_root="$(cygpath -w "$install_root")"
+    fi
+    ps_bin_dir="$ps_install_root\\node-${version}-${platform}"
+    node_bin_dir="$install_root/node-${version}-${platform}"
+    if command -v pwsh >/dev/null 2>&1; then
+      pwsh -NoLogo -NoProfile -Command "Expand-Archive -LiteralPath '${ps_archive_path}' -DestinationPath '${ps_install_root}' -Force"
+      openclaw_prepend_node_bin "$node_bin_dir" "$ps_bin_dir"
+    elif command -v powershell.exe >/dev/null 2>&1; then
+      powershell.exe -NoLogo -NoProfile -Command "Expand-Archive -LiteralPath '${ps_archive_path}' -DestinationPath '${ps_install_root}' -Force"
+      openclaw_prepend_node_bin "$node_bin_dir" "$ps_bin_dir"
+    else
+      unzip -q "$archive_path" -d "$install_root"
+      openclaw_prepend_node_bin "$node_bin_dir"
+    fi
+  else
+    archive_url="https://nodejs.org/dist/${version}/node-${version}-${platform}.tar.xz"
+    mkdir -p "$install_root"
+    echo "Downloading Node ${version} from ${archive_url}"
+    curl -fsSL "$archive_url" | tar -xJ -C "$install_root" --strip-components=1
+    openclaw_prepend_node_bin "$install_root/bin"
+  fi
+}
+
 openclaw_ensure_node() {
  local requested_node="${1:-}"
  requested_node="${requested_node#v}"
@@ -86,6 +211,8 @@ openclaw_ensure_node() {
  if [[ -n "$node_bin" ]]; then
    echo "Using Node $("$node_bin" -p 'process.versions.node') from $node_bin"
    openclaw_prepend_node_bin "$(dirname "$node_bin")"
+  else
+    openclaw_download_node "$requested_node" || true
  fi

  active_node_version="$(openclaw_active_node_version)"
--- a/.github/codeql/codeql-agent-runtime-boundary-critical-quality.yml
+++ b/.github/codeql/codeql-agent-runtime-boundary-critical-quality.yml
@@ -17,7 +17,8 @@ paths:
  - src/acp/control-plane
  - src/agents/command
  - src/agents/cli-runner
-  - src/agents/pi-embedded-runner
+  - src/agents/embedded-agent-runner
+  - src/agents/sessions
  - src/agents/tools
  - src/agents/*completion*.ts
  - src/agents/*transport*.ts
--- a/.github/codeql/codeql-core-auth-secrets-critical-quality.yml
+++ b/.github/codeql/codeql-core-auth-secrets-critical-quality.yml
@@ -22,6 +22,8 @@ paths:
  - src/agents/sandbox
  - src/agents/sandbox.ts
  - src/agents/sandbox-*.ts
+  - src/agents/sessions/*auth*.ts
+  - src/agents/sessions/**/*auth*.ts
  - src/cron/service/jobs.ts
  - src/cron/stagger.ts
  - src/gateway/*auth*.ts
--- a/.github/codeql/codeql-mcp-process-tool-boundary-critical-security.yml
+++ b/.github/codeql/codeql-mcp-process-tool-boundary-critical-security.yml
@@ -24,14 +24,15 @@ paths:
  - src/agents/openclaw-plugin-tools.ts
  - src/agents/openclaw-tools.runtime.ts
  - src/agents/openclaw-tools.registration.ts
-  - src/agents/pi-tool-definition-adapter.ts
-  - src/agents/pi-tools.abort.ts
-  - src/agents/pi-tools.before-tool-call*.ts
-  - src/agents/pi-tools.host-edit.ts
-  - src/agents/pi-tools-parameter-schema.ts
-  - src/agents/pi-embedded-runner/effective-tool-policy.ts
-  - src/agents/pi-embedded-runner/tool-name-allowlist.ts
-  - src/agents/pi-embedded-runner/tool-schema-runtime.ts
+  - src/agents/agent-tool-definition-adapter.ts
+  - src/agents/agent-tools.abort.ts
+  - src/agents/agent-tools.before-tool-call*.ts
+  - src/agents/agent-tools.read.ts
+  - src/agents/agent-tools-parameter-schema.ts
+  - src/agents/sessions/tools/**
+  - src/agents/embedded-agent-runner/effective-tool-policy.ts
+  - src/agents/embedded-agent-runner/tool-name-allowlist.ts
+  - src/agents/embedded-agent-runner/tool-schema-runtime.ts
  - src/agents/tools/gateway-tool.ts
  - src/agents/tools/message-tool.ts
  - src/agents/tools/sessions-send-tool.ts
--- a/.github/codex/prompts/docs-agent.md
+++ b/.github/codex/prompts/docs-agent.md
@@ -12,7 +12,7 @@ Hard limits:
 - Do not change production code, tests, package metadata, generated baselines, lockfiles, or CI config.
 - Keep changes minimal and factual.
 - Use "plugin/plugins" in user-facing docs/UI/changelog; `extensions/` is only the internal workspace layout.
- Do not add a changelog entry unless the docs update describes a user-facing behavior/API change from the triggering commit.
+- Do not add `CHANGELOG.md` entries during normal docs work. Capture user-facing release-note context in the PR body or commit message instead.

 Allowed paths:

--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -10,6 +10,11 @@
          - "extensions/file-transfer/**"
          - "docs/nodes/index.md"
          - "docs/plugins/sdk-runtime.md"
+"plugin: pixverse":
+  - changed-files:
+      - any-glob-to-any-file:
+          - "extensions/pixverse/**"
+          - "docs/providers/pixverse.md"
 "channel: discord":
  - changed-files:
      - any-glob-to-any-file:
@@ -36,6 +41,12 @@
      - any-glob-to-any-file:
          - "extensions/google-meet/**"
          - "docs/plugins/google-meet.md"
+"plugin: meeting-notes":
+  - changed-files:
+      - any-glob-to-any-file:
+          - "extensions/meeting-notes/**"
+          - "docs/plugins/meeting-notes.md"
+          - "src/meeting-notes/**"
 "plugin: migrate-hermes":
  - changed-files:
      - any-glob-to-any-file:
@@ -485,6 +496,7 @@
  - changed-files:
      - any-glob-to-any-file:
          - "extensions/diffs/**"
+          - "extensions/diffs-language-pack/**"
 "extensions: elevenlabs":
  - changed-files:
      - any-glob-to-any-file:
--- a/.github/package-trusted-sources.json
+++ b/.github/package-trusted-sources.json
@@ -0,0 +1,4 @@
+{
+  "schemaVersion": 1,
+  "sources": {}
+}
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -1,165 +1,132 @@
 ## Summary

-Describe the problem and fix in 2–5 bullets:
+What problem does this PR solve?
+
+
+Why does this matter now?
+
+
+What is the intended outcome?
+
+
+What is intentionally out of scope?
+
+
+What does success look like?
+
+
+What should reviewers focus on?
+
+<details>
+<summary>Summary guidance</summary>
+
+This PR description is the contributor's durable explanation of the change. Write it for human maintainers first; ClawSweeper and Barnacle use the same text to understand intent, proof, risk, and current review state.
+
+Describe the intent and outcome in 2-5 bullets. Avoid restating the diff; reviewers and bots can read the changed files.

 If this PR fixes a plugin beta-release blocker, title it `fix(<plugin-id>): beta blocker - <summary>` and link the matching `Beta blocker: <plugin-name> - <summary>` issue labeled `beta-blocker`. Contributors cannot label PRs, so the title is the PR-side signal for maintainers and automation.

- Problem:
- Solution:
- What changed:
- What did NOT change (scope boundary):
+</details>

-## Motivation
+## Linked context

-Explain why this change should exist now. Link it to the user pain, failure mode, maintainer need, or product goal. If this is purely mechanical, write `N/A`.
+Which issue does this close?

-
+Closes #

-## Change Type (select all)
+Which issues, PRs, or discussions are related?

- [ ] Bug fix
- [ ] Feature
- [ ] Refactor required for the fix
- [ ] Docs
- [ ] Security hardening
- [ ] Chore/infra
+Related #

-## Scope (select all touched areas)
+Was this requested by a maintainer or owner?

- [ ] Gateway / orchestration
- [ ] Skills / tool execution
- [ ] Auth / tokens
- [ ] Memory / storage
- [ ] Integrations
- [ ] API / contracts
- [ ] UI / DX
- [ ] CI/CD / infra
+<details>
+<summary>Linked context guidance</summary>

-## Linked Issue/PR
+Link the issue, PR, discussion, maintainer request, or owner request that explains why this PR should exist. Maintainer context helps reviewers and automation distinguish intended work from drive-by churn.

- Closes #
- Related #
- [ ] This PR fixes a bug or regression
+</details>

 ## Real behavior proof (required for external PRs)

-External contributors must show after-fix evidence from a real OpenClaw setup. Unit tests, mocks, lint, typechecks, snapshots, and CI are supplemental only. Screenshots are encouraged even for CLI, console, text, or log changes; terminal screenshots and copied live output count. Be mindful of private information like IP addresses, API keys, phone numbers, non-public endpoints, or other private details when providing evidence.
-
 - Behavior or issue addressed:
 - Real environment tested:
 - Exact steps or command run after this patch:
 - Evidence after fix (screenshot, recording, terminal capture, console output, redacted runtime log, linked artifact, or copied live output):
 - Observed result after fix:
 - What was not tested:
+- Proof limitations or environment constraints:
 - Before evidence (optional but encouraged):

-## Root Cause (if applicable)
+<details>
+<summary>Real behavior proof guidance</summary>

-For bug fixes or regressions, explain why this happened, not just what changed. Otherwise write `N/A`. If the cause is unclear, write `Unknown`.
+External contributors must show after-fix evidence from a real OpenClaw setup. Unit tests, mocks, lint, typechecks, snapshots, and CI are supplemental only.

- Root cause:
- Missing detection / guardrail:
- Contributing context (if known):
+Screenshots are encouraged even for CLI, console, text, or log changes. Terminal screenshots, copied live output, redacted runtime logs, recordings, and linked artifacts count.

-## Regression Test Plan (if applicable)
+If your environment cannot produce the ideal proof, explain that under `Proof limitations or environment constraints` so reviewers and ClawSweeper can direct the next step properly.

-For bug fixes or regressions, name the smallest reliable test coverage that should catch this. Otherwise write `N/A`.
+Be mindful of private information like IP addresses, API keys, phone numbers, non-public endpoints, or other private details when providing evidence.

- Coverage level that should have caught this:
-  - [ ] Unit test
-  - [ ] Seam / integration test
-  - [ ] End-to-end test
-  - [ ] Existing coverage already sufficient
- Target test or file:
- Scenario the test should lock in:
- Why this is the smallest reliable guardrail:
- Existing test that already covers this (if any):
- If no new test is added, why not:
+</details>

-## User-visible / Behavior Changes
+## Tests and validation

-List user-visible changes (including defaults/config).  
-If none, write `None`.
+Which commands did you run?

-## Diagram (if applicable)

-For UI changes or non-trivial logic flows, include a small ASCII diagram reviewers can scan quickly. Otherwise write `N/A`.
+What regression coverage was added or updated?

-```text
-Before:
-[user action] -> [old state]

-After:
-[user action] -> [new state] -> [result]
-```
+What failed before this fix, if known?

-## Security Impact (required)

- New permissions/capabilities? (`Yes/No`)
- Secrets/tokens handling changed? (`Yes/No`)
- New/changed network calls? (`Yes/No`)
- Command/tool execution surface changed? (`Yes/No`)
- Data access scope changed? (`Yes/No`)
- If any `Yes`, explain risk + mitigation:
+If no test was added, why not?

-## Repro + Verification
+<details>
+<summary>Testing guidance</summary>

-### Environment
+List focused commands, not every incidental check. CI is useful support, but external PRs still need real behavior proof above when behavior changes.

- OS:
- Runtime/container:
- Model/provider:
- Integration/channel (if any):
- Relevant config (redacted):
+</details>

-### Steps
+## Risk checklist

-1.
-2.
-3.
+Did user-visible behavior change? (`Yes/No`)

-### Expected

-
+Did config, environment, or migration behavior change? (`Yes/No`)

-### Actual

-
+Did security, auth, secrets, network, or tool execution behavior change? (`Yes/No`)

-## Evidence

-Attach at least one:
+What is the highest-risk area?

- [ ] Failing test/log before + passing after
- [ ] Trace/log snippets
- [ ] Screenshot/recording
- [ ] Perf numbers (if relevant)

-## Human Verification (required)
+How is that risk mitigated?

-What you personally verified (not just CI), and how:
+<details>
+<summary>Risk guidance</summary>

- Verified scenarios:
- Edge cases checked:
- What you did **not** verify:
+Use this for author judgment that is not obvious from the diff. ClawSweeper can see touched files, but it cannot know which behavior you think is risky, why the risk is acceptable, or what mitigation reviewers should verify.

-## Review Conversations
+</details>

- [ ] I replied to or resolved every bot review conversation I addressed in this PR.
- [ ] I left unresolved only the conversations that still need reviewer or maintainer judgment.
+## Current review state

-If a bot review conversation is addressed by this PR, resolve that conversation yourself. Do not leave bot review conversation cleanup for maintainers.
+What is the next action?

-## Compatibility / Migration

- Backward compatible? (`Yes/No`)
- Config/env changes? (`Yes/No`)
- Migration needed? (`Yes/No`)
- If yes, exact upgrade steps:
+What is still waiting on author, maintainer, CI, or external proof?

-## Risks and Mitigations

-List only real risks for this PR. Add/remove entries as needed. If none, write `None`.
+Which bot or reviewer comments were addressed?

- Risk:
-  - Mitigation:
+<details>
+<summary>Review state guidance</summary>
+
+Keep this as the durable state for review progress. If useful information appears in comments, fold the current next action or blocker back here so maintainers and ClawSweeper do not need to reconstruct state from comment history.
+
+</details>
--- a/.github/workflows/ci-build-artifacts-testbox.yml
+++ b/.github/workflows/ci-build-artifacts-testbox.yml
@@ -41,6 +41,10 @@ jobs:
          set -euo pipefail

          workdir="$GITHUB_WORKSPACE"
+          if [[ -z "$CHECKOUT_TOKEN" ]]; then
+            echo "checkout token is missing" >&2
+            exit 1
+          fi
          auth_header="$(printf 'x-access-token:%s' "$CHECKOUT_TOKEN" | base64 | tr -d '\n')"

          reset_checkout_dir() {
@@ -57,9 +61,9 @@ jobs:
            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}"
            git -C "$workdir" config gc.auto 0

-            timeout --signal=TERM 30s git -C "$workdir" \
+            timeout --signal=TERM --kill-after=10s 30s git -C "$workdir" \
              -c protocol.version=2 \
-              -c "http.https://github.com/.extraheader=AUTHORIZATION: basic ${auth_header}" \
+              -c "http.extraheader=AUTHORIZATION: basic ${auth_header}" \
              fetch --no-tags --prune --no-recurse-submodules --depth=1 origin \
              "+${CHECKOUT_SHA}:refs/remotes/origin/ci-target" || return 1

@@ -187,12 +191,15 @@ jobs:
          git fetch --no-tags --depth=50 origin "+refs/heads/main:refs/remotes/origin/main"

          node_bin="$(dirname "$(node -p 'process.execPath')")"
-          pnpm_bin="$(command -v pnpm)"
          sudo ln -sf "$node_bin/node" /usr/local/bin/node
          sudo ln -sf "$node_bin/npm" /usr/local/bin/npm
          sudo ln -sf "$node_bin/npx" /usr/local/bin/npx
          sudo ln -sf "$node_bin/corepack" /usr/local/bin/corepack
-          sudo ln -sf "$pnpm_bin" /usr/local/bin/pnpm
+          sudo tee /usr/local/bin/pnpm >/dev/null <<'PNPM'
+          #!/usr/bin/env bash
+          exec /usr/local/bin/corepack pnpm "$@"
+          PNPM
+          sudo chmod 0755 /usr/local/bin/pnpm

      - name: Hydrate Testbox provider env helper
        shell: bash
@@ -222,6 +229,6 @@ jobs:

      - name: Run Testbox
        uses: useblacksmith/run-testbox@5ca05834db1d3813554d1dd109e5f2087a8d7cbc
-        if: always()
+        if: success()
        env:
          FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
--- a/.github/workflows/ci-check-testbox.yml
+++ b/.github/workflows/ci-check-testbox.yml
@@ -39,6 +39,10 @@ jobs:
          set -euo pipefail

          workdir="$GITHUB_WORKSPACE"
+          if [[ -z "$CHECKOUT_TOKEN" ]]; then
+            echo "checkout token is missing" >&2
+            exit 1
+          fi
          auth_header="$(printf 'x-access-token:%s' "$CHECKOUT_TOKEN" | base64 | tr -d '\n')"

          reset_checkout_dir() {
@@ -55,9 +59,9 @@ jobs:
            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}"
            git -C "$workdir" config gc.auto 0

-            timeout --signal=TERM 30s git -C "$workdir" \
+            timeout --signal=TERM --kill-after=10s 30s git -C "$workdir" \
              -c protocol.version=2 \
-              -c "http.https://github.com/.extraheader=AUTHORIZATION: basic ${auth_header}" \
+              -c "http.extraheader=AUTHORIZATION: basic ${auth_header}" \
              fetch --no-tags --prune --no-recurse-submodules --depth=1 origin \
              "+${CHECKOUT_SHA}:refs/remotes/origin/ci-target" || return 1

@@ -88,12 +92,15 @@ jobs:
          git fetch --no-tags --depth=50 origin "+refs/heads/main:refs/remotes/origin/main"

          node_bin="$(dirname "$(node -p 'process.execPath')")"
-          pnpm_bin="$(command -v pnpm)"
          sudo ln -sf "$node_bin/node" /usr/local/bin/node
          sudo ln -sf "$node_bin/npm" /usr/local/bin/npm
          sudo ln -sf "$node_bin/npx" /usr/local/bin/npx
          sudo ln -sf "$node_bin/corepack" /usr/local/bin/corepack
-          sudo ln -sf "$pnpm_bin" /usr/local/bin/pnpm
+          sudo tee /usr/local/bin/pnpm >/dev/null <<'PNPM'
+          #!/usr/bin/env bash
+          exec /usr/local/bin/corepack pnpm "$@"
+          PNPM
+          sudo chmod 0755 /usr/local/bin/pnpm

      - name: Hydrate Testbox provider env helper
        shell: bash
@@ -103,6 +110,7 @@ jobs:
          ANTHROPIC_API_TOKEN: ${{ secrets.ANTHROPIC_API_TOKEN }}
          CEREBRAS_API_KEY: ${{ secrets.CEREBRAS_API_KEY }}
          DEEPINFRA_API_KEY: ${{ secrets.DEEPINFRA_API_KEY }}
+          FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }}
          FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
@@ -123,7 +131,6 @@ jobs:

      - name: Run Testbox
        uses: useblacksmith/run-testbox@5ca05834db1d3813554d1dd109e5f2087a8d7cbc
-        if: always()
-        continue-on-error: true
+        if: success()
        env:
          FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -76,13 +76,24 @@ jobs:
      android_matrix: ${{ steps.manifest.outputs.android_matrix }}
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
-        with:
-          ref: ${{ inputs.target_ref || github.sha }}
-          fetch-depth: 1
-          fetch-tags: false
-          persist-credentials: false
-          submodules: false
+        env:
+          CHECKOUT_REPO: ${{ github.repository }}
+          CHECKOUT_REF: ${{ inputs.target_ref || github.sha }}
+          CHECKOUT_FALLBACK_REF: ${{ github.sha }}
+          GITHUB_EVENT_NAME: ${{ github.event_name }}
+        run: |
+          set -euo pipefail
+          git init "$GITHUB_WORKSPACE"
+          git -C "$GITHUB_WORKSPACE" config gc.auto 0
+          git -C "$GITHUB_WORKSPACE" remote add origin "https://github.com/${CHECKOUT_REPO}.git"
+          if ! git -C "$GITHUB_WORKSPACE" fetch --no-tags --depth=1 origin "+${CHECKOUT_REF}:refs/remotes/origin/checkout"; then
+            if [ "$GITHUB_EVENT_NAME" != "workflow_dispatch" ] || [ "$CHECKOUT_REF" = "$CHECKOUT_FALLBACK_REF" ]; then
+              exit 1
+            fi
+            echo "::warning::workflow_dispatch target_ref '$CHECKOUT_REF' is unavailable; falling back to head SHA '$CHECKOUT_FALLBACK_REF'"
+            git -C "$GITHUB_WORKSPACE" fetch --no-tags --depth=1 origin "+${CHECKOUT_FALLBACK_REF}:refs/remotes/origin/checkout"
+          fi
+          git -C "$GITHUB_WORKSPACE" checkout --detach refs/remotes/origin/checkout

      - name: Resolve checkout SHA
        id: checkout_ref
@@ -199,6 +210,7 @@ jobs:
          if (runNodeFull) {
            checksFastCoreTasks.push(
              { check_name: "checks-fast-bundled-protocol", runtime: "node", task: "bundled-protocol" },
+              { check_name: "checks-fast-bun-launcher", runtime: "bun", task: "bun-launcher" },
            );
          } else {
            if (runNodeFastCiRouting) {
@@ -299,13 +311,24 @@ jobs:
      PRE_COMMIT_HOME: .cache/pre-commit-security-fast
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
-        with:
-          ref: ${{ inputs.target_ref || github.sha }}
-          fetch-depth: 1
-          fetch-tags: false
-          persist-credentials: false
-          submodules: false
+        env:
+          CHECKOUT_REPO: ${{ github.repository }}
+          CHECKOUT_REF: ${{ inputs.target_ref || github.sha }}
+          CHECKOUT_FALLBACK_REF: ${{ github.sha }}
+          GITHUB_EVENT_NAME: ${{ github.event_name }}
+        run: |
+          set -euo pipefail
+          git init "$GITHUB_WORKSPACE"
+          git -C "$GITHUB_WORKSPACE" config gc.auto 0
+          git -C "$GITHUB_WORKSPACE" remote add origin "https://github.com/${CHECKOUT_REPO}.git"
+          if ! git -C "$GITHUB_WORKSPACE" fetch --no-tags --depth=1 origin "+${CHECKOUT_REF}:refs/remotes/origin/checkout"; then
+            if [ "$GITHUB_EVENT_NAME" != "workflow_dispatch" ] || [ "$CHECKOUT_REF" = "$CHECKOUT_FALLBACK_REF" ]; then
+              exit 1
+            fi
+            echo "::warning::workflow_dispatch target_ref '$CHECKOUT_REF' is unavailable; falling back to head SHA '$CHECKOUT_FALLBACK_REF'"
+            git -C "$GITHUB_WORKSPACE" fetch --no-tags --depth=1 origin "+${CHECKOUT_FALLBACK_REF}:refs/remotes/origin/checkout"
+          fi
+          git -C "$GITHUB_WORKSPACE" checkout --detach refs/remotes/origin/checkout

      - name: Ensure security base commit
        if: github.event_name != 'workflow_dispatch'
@@ -335,22 +358,20 @@ jobs:
          fi
          echo "PRE_COMMIT_CONFIG_PATH=$trusted_config" >> "$GITHUB_ENV"

-      - name: Setup Python
+      - name: Resolve Python runtime
        id: setup-python
-        uses: actions/setup-python@v6
-        with:
-          python-version: "3.12"
-
-      - name: Restore pre-commit cache
-        uses: actions/cache@v5
-        with:
-          path: .cache/pre-commit-security-fast
-          key: pre-commit-security-fast-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('.pre-commit-config.yaml') }}
-          restore-keys: |
-            pre-commit-security-fast-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-
+        run: |
+          set -euo pipefail
+          python3 --version
+          version="$(python3 - <<'PY'
+          import platform
+          print(platform.python_version())
+          PY
+          )"
+          echo "python-version=${version}" >> "$GITHUB_OUTPUT"

      - name: Install pre-commit
-        run: python -m pip install --disable-pip-version-check pre-commit==4.2.0
+        run: python3 -m pip install --disable-pip-version-check pre-commit==4.2.0

      - name: Detect committed private keys
        run: pre-commit run --config "${PRE_COMMIT_CONFIG_PATH:-.pre-commit-config.yaml}" --all-files detect-private-key
@@ -383,10 +404,12 @@ jobs:
          pre-commit run --config "${PRE_COMMIT_CONFIG_PATH:-.pre-commit-config.yaml}" zizmor --files "${workflow_files[@]}"

      - name: Setup Node.js
-        uses: actions/setup-node@v6
-        with:
-          node-version: "24.x"
-          check-latest: false
+        env:
+          REQUESTED_NODE_VERSION: "24.x"
+        run: |
+          set -euo pipefail
+          source .github/actions/setup-pnpm-store-cache/ensure-node.sh
+          openclaw_ensure_node "$REQUESTED_NODE_VERSION"

      - name: Audit production dependencies
        run: node scripts/pre-commit/pnpm-audit-prod.mjs --audit-level=high
@@ -411,13 +434,10 @@ jobs:
        env:
          CHECKOUT_REPO: ${{ github.repository }}
          CHECKOUT_SHA: ${{ needs.preflight.outputs.checkout_revision }}
-          CHECKOUT_TOKEN: ${{ github.token }}
        run: |
          set -euo pipefail

          workdir="$GITHUB_WORKSPACE"
-          auth_header="$(printf 'x-access-token:%s' "$CHECKOUT_TOKEN" | base64 | tr -d '\n')"
-
          reset_checkout_dir() {
            mkdir -p "$workdir"
            find "$workdir" -mindepth 1 -maxdepth 1 -exec rm -rf {} +
@@ -429,12 +449,11 @@ jobs:
            reset_checkout_dir
            git init "$workdir" >/dev/null
            git config --global --add safe.directory "$workdir"
-            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}"
+            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}.git"
            git -C "$workdir" config gc.auto 0

-            timeout --signal=TERM 30s git -C "$workdir" \
+            timeout --signal=TERM --kill-after=10s 30s git -C "$workdir" \
              -c protocol.version=2 \
-              -c "http.https://github.com/.extraheader=AUTHORIZATION: basic ${auth_header}" \
              fetch --no-tags --prune --no-recurse-submodules --depth=1 origin \
              "+${CHECKOUT_SHA}:refs/remotes/origin/ci-target" || return 1

@@ -516,7 +535,24 @@ jobs:
        run: pnpm test:build:singleton

      - name: Check CLI startup memory
-        run: pnpm test:startup:memory
+        shell: bash
+        run: |
+          set +e
+          pnpm test:startup:memory
+          status=$?
+          if [[ -f .artifacts/startup-memory/summary.md ]]; then
+            cat .artifacts/startup-memory/summary.md >> "$GITHUB_STEP_SUMMARY"
+          fi
+          exit "$status"
+
+      - name: Upload startup memory report
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: startup-memory
+          path: .artifacts/startup-memory/
+          if-no-files-found: ignore
+          retention-days: 7

      - name: Run built artifact checks
        id: built_artifact_checks
@@ -622,13 +658,10 @@ jobs:
        env:
          CHECKOUT_REPO: ${{ github.repository }}
          CHECKOUT_SHA: ${{ needs.preflight.outputs.checkout_revision }}
-          CHECKOUT_TOKEN: ${{ github.token }}
        run: |
          set -euo pipefail

          workdir="$GITHUB_WORKSPACE"
-          auth_header="$(printf 'x-access-token:%s' "$CHECKOUT_TOKEN" | base64 | tr -d '\n')"
-
          reset_checkout_dir() {
            mkdir -p "$workdir"
            find "$workdir" -mindepth 1 -maxdepth 1 -exec rm -rf {} +
@@ -640,12 +673,11 @@ jobs:
            reset_checkout_dir
            git init "$workdir" >/dev/null
            git config --global --add safe.directory "$workdir"
-            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}"
+            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}.git"
            git -C "$workdir" config gc.auto 0

-            timeout --signal=TERM 30s git -C "$workdir" \
+            timeout --signal=TERM --kill-after=10s 30s git -C "$workdir" \
              -c protocol.version=2 \
-              -c "http.https://github.com/.extraheader=AUTHORIZATION: basic ${auth_header}" \
              fetch --no-tags --prune --no-recurse-submodules --depth=1 origin \
              "+${CHECKOUT_SHA}:refs/remotes/origin/ci-target" || return 1

@@ -668,7 +700,7 @@ jobs:
      - name: Setup Node environment
        uses: ./.github/actions/setup-node-env
        with:
-          install-bun: "false"
+          install-bun: ${{ matrix.task == 'bun-launcher' && 'true' || 'false' }}

      - name: Run ${{ matrix.task }} (${{ matrix.runtime }})
        env:
@@ -689,6 +721,9 @@ jobs:
            ci-routing)
              pnpm test src/commands/status.scan-result.test.ts src/scripts/ci-changed-scope.test.ts test/scripts/test-projects.test.ts
              ;;
+            bun-launcher)
+              OPENCLAW_TEST_BUN_LAUNCHER=1 pnpm test test/openclaw-launcher.e2e.test.ts
+              ;;
            *)
              echo "Unsupported checks-fast task: $TASK" >&2
              exit 1
@@ -712,13 +747,10 @@ jobs:
        env:
          CHECKOUT_REPO: ${{ github.repository }}
          CHECKOUT_SHA: ${{ needs.preflight.outputs.checkout_revision }}
-          CHECKOUT_TOKEN: ${{ github.token }}
        run: |
          set -euo pipefail

          workdir="$GITHUB_WORKSPACE"
-          auth_header="$(printf 'x-access-token:%s' "$CHECKOUT_TOKEN" | base64 | tr -d '\n')"
-
          reset_checkout_dir() {
            mkdir -p "$workdir"
            find "$workdir" -mindepth 1 -maxdepth 1 -exec rm -rf {} +
@@ -730,12 +762,11 @@ jobs:
            reset_checkout_dir
            git init "$workdir" >/dev/null
            git config --global --add safe.directory "$workdir"
-            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}"
+            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}.git"
            git -C "$workdir" config gc.auto 0

-            timeout --signal=TERM 30s git -C "$workdir" \
+            timeout --signal=TERM --kill-after=10s 30s git -C "$workdir" \
              -c protocol.version=2 \
-              -c "http.https://github.com/.extraheader=AUTHORIZATION: basic ${auth_header}" \
              fetch --no-tags --prune --no-recurse-submodules --depth=1 origin \
              "+${CHECKOUT_SHA}:refs/remotes/origin/ci-target" || return 1

@@ -796,13 +827,10 @@ jobs:
        env:
          CHECKOUT_REPO: ${{ github.repository }}
          CHECKOUT_SHA: ${{ needs.preflight.outputs.checkout_revision }}
-          CHECKOUT_TOKEN: ${{ github.token }}
        run: |
          set -euo pipefail

          workdir="$GITHUB_WORKSPACE"
-          auth_header="$(printf 'x-access-token:%s' "$CHECKOUT_TOKEN" | base64 | tr -d '\n')"
-
          reset_checkout_dir() {
            mkdir -p "$workdir"
            find "$workdir" -mindepth 1 -maxdepth 1 -exec rm -rf {} +
@@ -814,12 +842,11 @@ jobs:
            reset_checkout_dir
            git init "$workdir" >/dev/null
            git config --global --add safe.directory "$workdir"
-            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}"
+            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}.git"
            git -C "$workdir" config gc.auto 0

-            timeout --signal=TERM 30s git -C "$workdir" \
+            timeout --signal=TERM --kill-after=10s 30s git -C "$workdir" \
              -c protocol.version=2 \
-              -c "http.https://github.com/.extraheader=AUTHORIZATION: basic ${auth_header}" \
              fetch --no-tags --prune --no-recurse-submodules --depth=1 origin \
              "+${CHECKOUT_SHA}:refs/remotes/origin/ci-target" || return 1

@@ -877,13 +904,10 @@ jobs:
        env:
          CHECKOUT_REPO: ${{ github.repository }}
          CHECKOUT_SHA: ${{ needs.preflight.outputs.checkout_revision }}
-          CHECKOUT_TOKEN: ${{ github.token }}
        run: |
          set -euo pipefail

          workdir="$GITHUB_WORKSPACE"
-          auth_header="$(printf 'x-access-token:%s' "$CHECKOUT_TOKEN" | base64 | tr -d '\n')"
-
          reset_checkout_dir() {
            mkdir -p "$workdir"
            find "$workdir" -mindepth 1 -maxdepth 1 -exec rm -rf {} +
@@ -895,12 +919,11 @@ jobs:
            reset_checkout_dir
            git init "$workdir" >/dev/null
            git config --global --add safe.directory "$workdir"
-            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}"
+            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}.git"
            git -C "$workdir" config gc.auto 0

-            timeout --signal=TERM 30s git -C "$workdir" \
+            timeout --signal=TERM --kill-after=10s 30s git -C "$workdir" \
              -c protocol.version=2 \
-              -c "http.https://github.com/.extraheader=AUTHORIZATION: basic ${auth_header}" \
              fetch --no-tags --prune --no-recurse-submodules --depth=1 origin \
              "+${CHECKOUT_SHA}:refs/remotes/origin/ci-target" || return 1

@@ -956,13 +979,10 @@ jobs:
        env:
          CHECKOUT_REPO: ${{ github.repository }}
          CHECKOUT_SHA: ${{ needs.preflight.outputs.checkout_revision }}
-          CHECKOUT_TOKEN: ${{ github.token }}
        run: |
          set -euo pipefail

          workdir="$GITHUB_WORKSPACE"
-          auth_header="$(printf 'x-access-token:%s' "$CHECKOUT_TOKEN" | base64 | tr -d '\n')"
-
          reset_checkout_dir() {
            mkdir -p "$workdir"
            find "$workdir" -mindepth 1 -maxdepth 1 -exec rm -rf {} +
@@ -974,12 +994,11 @@ jobs:
            reset_checkout_dir
            git init "$workdir" >/dev/null
            git config --global --add safe.directory "$workdir"
-            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}"
+            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}.git"
            git -C "$workdir" config gc.auto 0

-            timeout --signal=TERM 30s git -C "$workdir" \
+            timeout --signal=TERM --kill-after=10s 30s git -C "$workdir" \
              -c protocol.version=2 \
-              -c "http.https://github.com/.extraheader=AUTHORIZATION: basic ${auth_header}" \
              fetch --no-tags --prune --no-recurse-submodules --depth=1 origin \
              "+${CHECKOUT_SHA}:refs/remotes/origin/ci-target" || return 1

@@ -1082,13 +1101,10 @@ jobs:
        env:
          CHECKOUT_REPO: ${{ github.repository }}
          CHECKOUT_SHA: ${{ needs.preflight.outputs.checkout_revision }}
-          CHECKOUT_TOKEN: ${{ github.token }}
        run: |
          set -euo pipefail

          workdir="$GITHUB_WORKSPACE"
-          auth_header="$(printf 'x-access-token:%s' "$CHECKOUT_TOKEN" | base64 | tr -d '\n')"
-
          reset_checkout_dir() {
            mkdir -p "$workdir"
            find "$workdir" -mindepth 1 -maxdepth 1 -exec rm -rf {} +
@@ -1100,12 +1116,11 @@ jobs:
            reset_checkout_dir
            git init "$workdir" >/dev/null
            git config --global --add safe.directory "$workdir"
-            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}"
+            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}.git"
            git -C "$workdir" config gc.auto 0

-            timeout --signal=TERM 30s git -C "$workdir" \
+            timeout --signal=TERM --kill-after=10s 30s git -C "$workdir" \
              -c protocol.version=2 \
-              -c "http.https://github.com/.extraheader=AUTHORIZATION: basic ${auth_header}" \
              fetch --no-tags --prune --no-recurse-submodules --depth=1 origin \
              "+${CHECKOUT_SHA}:refs/remotes/origin/ci-target" || return 1

@@ -1216,13 +1231,10 @@ jobs:
        env:
          CHECKOUT_REPO: ${{ github.repository }}
          CHECKOUT_SHA: ${{ needs.preflight.outputs.checkout_revision }}
-          CHECKOUT_TOKEN: ${{ github.token }}
        run: |
          set -euo pipefail

          workdir="$GITHUB_WORKSPACE"
-          auth_header="$(printf 'x-access-token:%s' "$CHECKOUT_TOKEN" | base64 | tr -d '\n')"
-
          reset_checkout_dir() {
            mkdir -p "$workdir"
            find "$workdir" -mindepth 1 -maxdepth 1 -exec rm -rf {} +
@@ -1234,12 +1246,11 @@ jobs:
            reset_checkout_dir
            git init "$workdir" >/dev/null
            git config --global --add safe.directory "$workdir"
-            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}"
+            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}.git"
            git -C "$workdir" config gc.auto 0

-            timeout --signal=TERM 30s git -C "$workdir" \
+            timeout --signal=TERM --kill-after=10s 30s git -C "$workdir" \
              -c protocol.version=2 \
-              -c "http.https://github.com/.extraheader=AUTHORIZATION: basic ${auth_header}" \
              fetch --no-tags --prune --no-recurse-submodules --depth=1 origin \
              "+${CHECKOUT_SHA}:refs/remotes/origin/ci-target" || return 1

@@ -1369,13 +1380,10 @@ jobs:
        env:
          CHECKOUT_REPO: ${{ github.repository }}
          CHECKOUT_SHA: ${{ needs.preflight.outputs.checkout_revision }}
-          CHECKOUT_TOKEN: ${{ github.token }}
        run: |
          set -euo pipefail

          workdir="$GITHUB_WORKSPACE"
-          auth_header="$(printf 'x-access-token:%s' "$CHECKOUT_TOKEN" | base64 | tr -d '\n')"
-
          reset_checkout_dir() {
            mkdir -p "$workdir"
            find "$workdir" -mindepth 1 -maxdepth 1 -exec rm -rf {} +
@@ -1387,12 +1395,11 @@ jobs:
            reset_checkout_dir
            git init "$workdir" >/dev/null
            git config --global --add safe.directory "$workdir"
-            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}"
+            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}.git"
            git -C "$workdir" config gc.auto 0

-            timeout --signal=TERM 30s git -C "$workdir" \
+            timeout --signal=TERM --kill-after=10s 30s git -C "$workdir" \
              -c protocol.version=2 \
-              -c "http.https://github.com/.extraheader=AUTHORIZATION: basic ${auth_header}" \
              fetch --no-tags --prune --no-recurse-submodules --depth=1 origin \
              "+${CHECKOUT_SHA}:refs/remotes/origin/ci-target" || return 1

@@ -1418,12 +1425,13 @@ jobs:
          install-bun: "false"

      - name: Checkout ClawHub docs source
-        uses: actions/checkout@v6
-        with:
-          repository: openclaw/clawhub
-          path: clawhub-source
-          fetch-depth: 1
-          persist-credentials: false
+        run: |
+          set -euo pipefail
+          git init clawhub-source
+          git -C clawhub-source config gc.auto 0
+          git -C clawhub-source remote add origin "https://github.com/openclaw/clawhub.git"
+          git -C clawhub-source fetch --no-tags --depth=1 origin "+HEAD:refs/remotes/origin/checkout"
+          git -C clawhub-source checkout --detach refs/remotes/origin/checkout

      - name: Check docs
        env:
@@ -1439,11 +1447,16 @@ jobs:
    timeout-minutes: 20
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
-        with:
-          ref: ${{ needs.preflight.outputs.checkout_revision }}
-          persist-credentials: false
-          submodules: false
+        env:
+          CHECKOUT_REPO: ${{ github.repository }}
+          CHECKOUT_SHA: ${{ needs.preflight.outputs.checkout_revision }}
+        run: |
+          set -euo pipefail
+          git init "$GITHUB_WORKSPACE"
+          git -C "$GITHUB_WORKSPACE" config gc.auto 0
+          git -C "$GITHUB_WORKSPACE" remote add origin "https://github.com/${CHECKOUT_REPO}.git"
+          git -C "$GITHUB_WORKSPACE" fetch --no-tags --depth=1 origin "+${CHECKOUT_SHA}:refs/remotes/origin/checkout"
+          git -C "$GITHUB_WORKSPACE" checkout --detach refs/remotes/origin/checkout

      - name: Setup Python
        uses: actions/setup-python@v6
@@ -1482,11 +1495,16 @@ jobs:
      matrix: ${{ fromJson(needs.preflight.outputs.checks_windows_matrix) }}
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
-        with:
-          ref: ${{ needs.preflight.outputs.checkout_revision }}
-          persist-credentials: false
-          submodules: false
+        env:
+          CHECKOUT_REPO: ${{ github.repository }}
+          CHECKOUT_SHA: ${{ needs.preflight.outputs.checkout_revision }}
+        run: |
+          set -euo pipefail
+          git init "$GITHUB_WORKSPACE"
+          git -C "$GITHUB_WORKSPACE" config gc.auto 0
+          git -C "$GITHUB_WORKSPACE" remote add origin "https://github.com/${CHECKOUT_REPO}.git"
+          git -C "$GITHUB_WORKSPACE" fetch --no-tags --depth=1 origin "+${CHECKOUT_SHA}:refs/remotes/origin/checkout"
+          git -C "$GITHUB_WORKSPACE" checkout --detach refs/remotes/origin/checkout

      - name: Try to exclude workspace from Windows Defender (best-effort)
        shell: pwsh
@@ -1508,15 +1526,17 @@ jobs:
          }

      - name: Setup Node.js
-        uses: actions/setup-node@v6
-        with:
-          node-version: 24.x
-          check-latest: false
+        env:
+          REQUESTED_NODE_VERSION: "22.x"
+        run: |
+          set -euo pipefail
+          source .github/actions/setup-pnpm-store-cache/ensure-node.sh
+          openclaw_ensure_node "$REQUESTED_NODE_VERSION"

      - name: Setup pnpm
        uses: ./.github/actions/setup-pnpm-store-cache
        with:
-          node-version: 24.x
+          node-version: 22.x

      - name: Runtime versions
        run: |
@@ -1575,11 +1595,16 @@ jobs:
      matrix: ${{ fromJson(needs.preflight.outputs.macos_node_matrix) }}
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
-        with:
-          ref: ${{ needs.preflight.outputs.checkout_revision }}
-          persist-credentials: false
-          submodules: false
+        env:
+          CHECKOUT_REPO: ${{ github.repository }}
+          CHECKOUT_SHA: ${{ needs.preflight.outputs.checkout_revision }}
+        run: |
+          set -euo pipefail
+          git init "$GITHUB_WORKSPACE"
+          git -C "$GITHUB_WORKSPACE" config gc.auto 0
+          git -C "$GITHUB_WORKSPACE" remote add origin "https://github.com/${CHECKOUT_REPO}.git"
+          git -C "$GITHUB_WORKSPACE" fetch --no-tags --depth=1 origin "+${CHECKOUT_SHA}:refs/remotes/origin/checkout"
+          git -C "$GITHUB_WORKSPACE" checkout --detach refs/remotes/origin/checkout

      - name: Setup Node environment
        uses: ./.github/actions/setup-node-env
@@ -1616,11 +1641,16 @@ jobs:
    timeout-minutes: 20
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
-        with:
-          ref: ${{ needs.preflight.outputs.checkout_revision }}
-          persist-credentials: false
-          submodules: false
+        env:
+          CHECKOUT_REPO: ${{ github.repository }}
+          CHECKOUT_SHA: ${{ needs.preflight.outputs.checkout_revision }}
+        run: |
+          set -euo pipefail
+          git init "$GITHUB_WORKSPACE"
+          git -C "$GITHUB_WORKSPACE" config gc.auto 0
+          git -C "$GITHUB_WORKSPACE" remote add origin "https://github.com/${CHECKOUT_REPO}.git"
+          git -C "$GITHUB_WORKSPACE" fetch --no-tags --depth=1 origin "+${CHECKOUT_SHA}:refs/remotes/origin/checkout"
+          git -C "$GITHUB_WORKSPACE" checkout --detach refs/remotes/origin/checkout

      - name: Install XcodeGen / SwiftLint / SwiftFormat
        run: brew install xcodegen swiftlint swiftformat
@@ -1720,13 +1750,10 @@ jobs:
        env:
          CHECKOUT_REPO: ${{ github.repository }}
          CHECKOUT_SHA: ${{ needs.preflight.outputs.checkout_revision }}
-          CHECKOUT_TOKEN: ${{ github.token }}
        run: |
          set -euo pipefail

          workdir="$GITHUB_WORKSPACE"
-          auth_header="$(printf 'x-access-token:%s' "$CHECKOUT_TOKEN" | base64 | tr -d '\n')"
-
          reset_checkout_dir() {
            mkdir -p "$workdir"
            find "$workdir" -mindepth 1 -maxdepth 1 -exec rm -rf {} +
@@ -1738,12 +1765,11 @@ jobs:
            reset_checkout_dir
            git init "$workdir" >/dev/null
            git config --global --add safe.directory "$workdir"
-            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}"
+            git -C "$workdir" remote add origin "https://github.com/${CHECKOUT_REPO}.git"
            git -C "$workdir" config gc.auto 0

-            timeout --signal=TERM 30s git -C "$workdir" \
+            timeout --signal=TERM --kill-after=10s 30s git -C "$workdir" \
              -c protocol.version=2 \
-              -c "http.https://github.com/.extraheader=AUTHORIZATION: basic ${auth_header}" \
              fetch --no-tags --prune --no-recurse-submodules --depth=1 origin \
              "+${CHECKOUT_SHA}:refs/remotes/origin/ci-target" || return 1

--- a/.github/workflows/codeql-critical-quality.yml
+++ b/.github/workflows/codeql-critical-quality.yml
@@ -71,7 +71,9 @@ on:
      - "src/acp/control-plane/**"
      - "src/agents/cli-runner/**"
      - "src/agents/command/**"
-      - "src/agents/pi-embedded-runner/**"
+      - "src/agents/embedded-agent-runner/**"
+      - "src/agents/sessions/**"
+      - "src/agents/sessions/tools/**"
      - "src/agents/tools/**"
      - "src/agents/*completion*.ts"
      - "src/agents/*transport*.ts"
@@ -222,7 +224,15 @@ jobs:
                  network_runtime=true
                  session_diagnostics=true
                  ;;
-                src/acp/control-plane/*|src/agents/cli-runner/*|src/agents/command/*|src/agents/pi-embedded-runner/*|src/agents/tools/*|src/agents/*completion*.ts|src/agents/*transport*.ts|src/agents/model-*.ts|src/agents/openclaw-tools*.ts|src/agents/provider-*.ts|src/agents/session*.ts|src/agents/tool-call*.ts|src/auto-reply/reply/agent-runner*.ts|src/auto-reply/reply/commands*.ts|src/auto-reply/reply/directive-handling*.ts|src/auto-reply/reply/dispatch-*.ts|src/auto-reply/reply/get-reply-run*.ts|src/auto-reply/reply/provider-dispatcher*.ts|src/auto-reply/reply/queue*.ts|src/auto-reply/reply/reply-run-registry*.ts|src/auto-reply/reply/session*.ts)
+                src/agents/sessions/tools/*)
+                  agent=true
+                  mcp_process=true
+                  ;;
+                src/agents/sessions/*auth*.ts|src/agents/sessions/**/*auth*.ts)
+                  agent=true
+                  core_auth_secrets=true
+                  ;;
+                src/acp/control-plane/*|src/agents/cli-runner/*|src/agents/command/*|src/agents/embedded-agent-runner/*|src/agents/sessions/*|src/agents/tools/*|src/agents/*completion*.ts|src/agents/*transport*.ts|src/agents/model-*.ts|src/agents/openclaw-tools*.ts|src/agents/provider-*.ts|src/agents/session*.ts|src/agents/tool-call*.ts|src/auto-reply/reply/agent-runner*.ts|src/auto-reply/reply/commands*.ts|src/auto-reply/reply/directive-handling*.ts|src/auto-reply/reply/dispatch-*.ts|src/auto-reply/reply/get-reply-run*.ts|src/auto-reply/reply/provider-dispatcher*.ts|src/auto-reply/reply/queue*.ts|src/auto-reply/reply/reply-run-registry*.ts|src/auto-reply/reply/session*.ts)
                  agent=true
                  ;;
                src/auto-reply/reply/post-compaction-context.ts|src/auto-reply/reply/queue/*|src/auto-reply/reply/startup-context.ts|src/commands/doctor-session-*.ts|src/commands/session-store-targets.ts|src/commands/sessions*.ts|src/infra/diagnostic-*.ts|src/infra/diagnostics-timeline.ts|src/infra/session-delivery-queue*.ts|src/logging/diagnostic*.ts)
--- a/.github/workflows/crabbox-hydrate.yml
+++ b/.github/workflows/crabbox-hydrate.yml
@@ -41,6 +41,7 @@ env:
 jobs:
  hydrate:
    name: hydrate
+    if: ${{ inputs.crabbox_job != 'hydrate-github' && inputs.crabbox_job != 'hydrate-windows-daemon' }}
    runs-on: [self-hosted, "${{ inputs.crabbox_runner_label }}"]
    timeout-minutes: 120
    steps:
@@ -48,13 +49,89 @@ jobs:
        with:
          ref: ${{ inputs.ref || github.ref }}

-      - name: Setup Node environment
-        uses: ./.github/actions/setup-node-env
+      - name: Setup Node.js
+        uses: actions/setup-node@v6
        with:
-          install-bun: "false"
-          use-actions-cache: "false"
+          node-version: "24"

-      - name: Prepare Crabbox shell
+      - name: Setup pnpm and dependencies
+        shell: bash
+        env:
+          CI: "true"
+        run: |
+          set -euo pipefail
+
+          export XDG_CACHE_HOME="${XDG_CACHE_HOME:-$RUNNER_TEMP/cache}"
+          export COREPACK_HOME="${COREPACK_HOME:-$XDG_CACHE_HOME/corepack}"
+          export PNPM_HOME="${PNPM_HOME:-$RUNNER_TEMP/pnpm-home}"
+          mkdir -p "$XDG_CACHE_HOME" "$COREPACK_HOME" "$PNPM_HOME"
+          export PATH="$PNPM_HOME:$PATH"
+          {
+            echo "XDG_CACHE_HOME=$XDG_CACHE_HOME"
+            echo "COREPACK_HOME=$COREPACK_HOME"
+            echo "PNPM_HOME=$PNPM_HOME"
+          } >> "$GITHUB_ENV"
+
+          package_manager="$(node -e "const fs = require('node:fs'); const pkg = JSON.parse(fs.readFileSync('package.json', 'utf8')); process.stdout.write(pkg.packageManager || '')")"
+          case "$package_manager" in
+            pnpm@*) ;;
+            *)
+              echo "::error::Expected packageManager to pin pnpm, got '${package_manager:-<empty>}'"
+              exit 1
+              ;;
+          esac
+          corepack enable --install-directory "$PNPM_HOME"
+          for attempt in 1 2 3; do
+            if corepack prepare "$package_manager" --activate; then
+              break
+            fi
+            if [ "$attempt" = 3 ]; then
+              corepack prepare "$package_manager" --activate
+            fi
+            sleep $((attempt * 5))
+          done
+          node_bin="$(dirname "$(node -p 'process.execPath')")"
+          echo "NODE_BIN=$node_bin" >> "$GITHUB_ENV"
+          echo "$node_bin" >> "$GITHUB_PATH"
+          export PATH="$node_bin:$PATH"
+
+          node -v
+          npm -v
+          pnpm -v
+
+          install_args=(
+            install
+            --prefer-offline
+            --ignore-scripts=false
+            --config.engine-strict=false
+            --config.enable-pre-post-scripts=true
+            --config.side-effects-cache=true
+            --frozen-lockfile
+          )
+          append_pnpm_option_arg() {
+            local env_name="$1"
+            local option_name="$2"
+            local value="${!env_name-}"
+            if [ -n "$value" ]; then
+              install_args+=("--${option_name}=${value}")
+            fi
+          }
+          append_pnpm_option_arg PNPM_CONFIG_CHILD_CONCURRENCY child-concurrency
+          append_pnpm_option_arg PNPM_CONFIG_MODULES_DIR modules-dir
+          append_pnpm_option_arg PNPM_CONFIG_NETWORK_CONCURRENCY network-concurrency
+          append_pnpm_option_arg PNPM_CONFIG_VIRTUAL_STORE_DIR virtual-store-dir
+          if [ -n "${PNPM_CONFIG_MODULES_DIR:-}" ]; then
+            mkdir -p "$PNPM_CONFIG_MODULES_DIR"
+            ln -sfn . "$PNPM_CONFIG_MODULES_DIR/node_modules"
+          fi
+          pnpm "${install_args[@]}" || pnpm "${install_args[@]}"
+          if [ -n "${PNPM_CONFIG_MODULES_DIR:-}" ]; then
+            rm -rf node_modules
+            ln -sfn "$PNPM_CONFIG_MODULES_DIR" node_modules
+            ln -sfn . "$PNPM_CONFIG_MODULES_DIR/node_modules"
+          fi
+
+      - name: Fetch main ref
        shell: bash
        run: |
          set -euo pipefail
@@ -63,6 +140,11 @@ jobs:
            git fetch --no-tags --depth=50 origin "+refs/heads/main:refs/remotes/origin/main"
          fi

+      - name: Prepare Crabbox shell
+        shell: bash
+        run: |
+          set -euo pipefail
+
          node_bin="$(dirname "$(node -p 'process.execPath')")"
          sudo ln -sf "$node_bin/node" /usr/local/bin/node
          sudo ln -sf "$node_bin/npm" /usr/local/bin/npm
@@ -81,7 +163,13 @@ jobs:

          if ! command -v docker >/dev/null 2>&1; then
            echo "docker not found; installing fallback engine"
-            curl -fsSL https://get.docker.com | sudo sh
+            curl --fail --show-error --location \
+              --connect-timeout "${OPENCLAW_CRABBOX_HYDRATE_DOWNLOAD_CONNECT_TIMEOUT_SECONDS:-15}" \
+              --max-time "${OPENCLAW_CRABBOX_HYDRATE_DOWNLOAD_TIMEOUT_SECONDS:-300}" \
+              --retry "${OPENCLAW_CRABBOX_HYDRATE_DOWNLOAD_RETRIES:-3}" \
+              --retry-delay "${OPENCLAW_CRABBOX_HYDRATE_DOWNLOAD_RETRY_DELAY_SECONDS:-5}" \
+              --retry-all-errors \
+              https://get.docker.com | sudo sh
          fi

          if command -v systemctl >/dev/null 2>&1; then
@@ -106,7 +194,12 @@ jobs:
            esac
            buildx_version="${DOCKER_BUILDX_VERSION:-v0.15.1}"
            mkdir -p "$HOME/.docker/cli-plugins"
-            curl -fsSL \
+            curl --fail --show-error --location \
+              --connect-timeout "${OPENCLAW_CRABBOX_HYDRATE_DOWNLOAD_CONNECT_TIMEOUT_SECONDS:-15}" \
+              --max-time "${OPENCLAW_CRABBOX_HYDRATE_DOWNLOAD_TIMEOUT_SECONDS:-300}" \
+              --retry "${OPENCLAW_CRABBOX_HYDRATE_DOWNLOAD_RETRIES:-3}" \
+              --retry-delay "${OPENCLAW_CRABBOX_HYDRATE_DOWNLOAD_RETRY_DELAY_SECONDS:-5}" \
+              --retry-all-errors \
              "https://github.com/docker/buildx/releases/download/${buildx_version}/buildx-${buildx_version}.linux-${buildx_arch}" \
              -o "$HOME/.docker/cli-plugins/docker-buildx"
            chmod 0755 "$HOME/.docker/cli-plugins/docker-buildx"
@@ -128,28 +221,6 @@ jobs:

      - name: Hydrate provider env helper
        shell: bash
-        env:
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-          ANTHROPIC_API_KEY_OLD: ${{ secrets.ANTHROPIC_API_KEY_OLD }}
-          ANTHROPIC_API_TOKEN: ${{ secrets.ANTHROPIC_API_TOKEN }}
-          CEREBRAS_API_KEY: ${{ secrets.CEREBRAS_API_KEY }}
-          DEEPINFRA_API_KEY: ${{ secrets.DEEPINFRA_API_KEY }}
-          FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
-          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
-          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
-          GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
-          KIMI_API_KEY: ${{ secrets.KIMI_API_KEY }}
-          MINIMAX_API_KEY: ${{ secrets.MINIMAX_API_KEY }}
-          MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
-          MOONSHOT_API_KEY: ${{ secrets.MOONSHOT_API_KEY }}
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
-          OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
-          QWEN_API_KEY: ${{ secrets.QWEN_API_KEY }}
-          TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
-          XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
-          ZAI_API_KEY: ${{ secrets.ZAI_API_KEY }}
-          Z_AI_API_KEY: ${{ secrets.Z_AI_API_KEY }}
        run: bash scripts/ci-hydrate-testbox-env.sh

      - name: Mark Crabbox ready
@@ -179,7 +250,401 @@ jobs:
            fi
          }
          {
-            for key in CI GITHUB_ACTIONS GITHUB_WORKSPACE GITHUB_REPOSITORY GITHUB_RUN_ID GITHUB_RUN_NUMBER GITHUB_RUN_ATTEMPT GITHUB_REF GITHUB_REF_NAME GITHUB_SHA GITHUB_EVENT_NAME GITHUB_ACTOR RUNNER_OS RUNNER_ARCH RUNNER_TEMP RUNNER_TOOL_CACHE PNPM_CONFIG_CHILD_CONCURRENCY PNPM_CONFIG_MODULES_DIR PNPM_CONFIG_NETWORK_CONCURRENCY PNPM_CONFIG_STORE_DIR PNPM_CONFIG_VERIFY_DEPS_BEFORE_RUN PNPM_CONFIG_VIRTUAL_STORE_DIR; do
+            for key in CI GITHUB_ACTIONS GITHUB_WORKSPACE GITHUB_REPOSITORY GITHUB_RUN_ID GITHUB_RUN_NUMBER GITHUB_RUN_ATTEMPT GITHUB_REF GITHUB_REF_NAME GITHUB_SHA GITHUB_EVENT_NAME GITHUB_ACTOR RUNNER_OS RUNNER_ARCH RUNNER_TEMP RUNNER_TOOL_CACHE XDG_CACHE_HOME COREPACK_HOME NODE_BIN PNPM_HOME PNPM_CONFIG_CHILD_CONCURRENCY PNPM_CONFIG_MODULES_DIR PNPM_CONFIG_NETWORK_CONCURRENCY PNPM_CONFIG_STORE_DIR PNPM_CONFIG_VERIFY_DEPS_BEFORE_RUN PNPM_CONFIG_VIRTUAL_STORE_DIR PATH; do
+              write_export "$key"
+            done
+          } > "${env_file}.tmp"
+          mv "${env_file}.tmp" "$env_file"
+          {
+            echo "# Docker containers visible from the hydrated runner"
+            docker ps --format '{{.Names}}\t{{.Image}}\t{{.Ports}}' 2>/dev/null || true
+          } > "${services_file}.tmp"
+          mv "${services_file}.tmp" "$services_file"
+          tmp="${state}.tmp"
+          {
+            echo "WORKSPACE=${GITHUB_WORKSPACE}"
+            echo "RUN_ID=${GITHUB_RUN_ID}"
+            echo "JOB=${job}"
+            echo "ENV_FILE=${env_file}"
+            echo "SERVICES_FILE=${services_file}"
+            echo "READY_AT=$(date -u +%Y-%m-%dT%H:%M:%SZ)"
+          } > "$tmp"
+          mv "$tmp" "$state"
+
+      - name: Keep Crabbox job alive
+        shell: bash
+        env:
+          CRABBOX_ID: ${{ inputs.crabbox_id }}
+          CRABBOX_KEEP_ALIVE_MINUTES: ${{ inputs.crabbox_keep_alive_minutes }}
+        run: |
+          set -euo pipefail
+          case "$CRABBOX_ID" in
+            ''|*[!A-Za-z0-9._-]*)
+              echo "Invalid crabbox_id" >&2
+              exit 2
+              ;;
+          esac
+          minutes="${CRABBOX_KEEP_ALIVE_MINUTES}"
+          case "$minutes" in
+            ''|*[!0-9]*) minutes=90 ;;
+          esac
+          stop="$HOME/.crabbox/actions/${CRABBOX_ID}.stop"
+          deadline=$(( $(date +%s) + minutes * 60 ))
+          while [ "$(date +%s)" -lt "$deadline" ]; do
+            if [ -f "$stop" ]; then
+              exit 0
+            fi
+            sleep 15
+          done
+
+  hydrate-windows-daemon:
+    name: hydrate-windows-daemon
+    if: ${{ inputs.crabbox_job == 'hydrate-windows-daemon' }}
+    runs-on: [self-hosted, "${{ inputs.crabbox_runner_label }}"]
+    timeout-minutes: 120
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: ${{ inputs.ref || github.ref }}
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v6
+        with:
+          node-version: "24"
+
+      - name: Fetch main ref
+        shell: powershell
+        run: |
+          $ErrorActionPreference = "Stop"
+
+          if (git rev-parse --is-inside-work-tree 2>$null) {
+            git fetch --no-tags --depth=50 origin "+refs/heads/main:refs/remotes/origin/main"
+          }
+
+      - name: Setup pnpm and dependencies
+        shell: powershell
+        env:
+          CI: "true"
+          COREPACK_ENABLE_DOWNLOAD_PROMPT: "0"
+        run: |
+          $ErrorActionPreference = "Stop"
+
+          $workspace = (Get-Location).Path
+          $cacheRoot = if ($env:RUNNER_TEMP) { $env:RUNNER_TEMP } else { [System.IO.Path]::GetTempPath() }
+          $env:XDG_CACHE_HOME = Join-Path $cacheRoot "cache"
+          $env:COREPACK_HOME = Join-Path $env:XDG_CACHE_HOME "corepack"
+          $env:PNPM_HOME = Join-Path $cacheRoot "pnpm-home"
+          $env:PNPM_CONFIG_STORE_DIR = Join-Path $cacheRoot "openclaw-pnpm-store"
+          $env:PNPM_CONFIG_MODULES_DIR = Join-Path $workspace "node_modules"
+          $env:PNPM_CONFIG_VIRTUAL_STORE_DIR = Join-Path $workspace "node_modules\.pnpm"
+          $env:PNPM_CONFIG_CHILD_CONCURRENCY = "4"
+          $env:PNPM_CONFIG_NETWORK_CONCURRENCY = "8"
+          $env:PNPM_CONFIG_VERIFY_DEPS_BEFORE_RUN = "false"
+          $env:PNPM_CONFIG_SIDE_EFFECTS_CACHE = "false"
+          function Add-GitHubCommandLine([string]$Path, [string]$Value) {
+            $Value | Out-File -FilePath $Path -Encoding utf8 -Append
+          }
+          New-Item -ItemType Directory -Force `
+            $env:XDG_CACHE_HOME, `
+            $env:COREPACK_HOME, `
+            $env:PNPM_HOME, `
+            $env:PNPM_CONFIG_STORE_DIR | Out-Null
+          $env:PATH = "$env:PNPM_HOME;$env:PATH"
+          @(
+            "XDG_CACHE_HOME=$env:XDG_CACHE_HOME"
+            "COREPACK_HOME=$env:COREPACK_HOME"
+            "PNPM_HOME=$env:PNPM_HOME"
+            "PNPM_CONFIG_STORE_DIR=$env:PNPM_CONFIG_STORE_DIR"
+            "PNPM_CONFIG_MODULES_DIR=$env:PNPM_CONFIG_MODULES_DIR"
+            "PNPM_CONFIG_VIRTUAL_STORE_DIR=$env:PNPM_CONFIG_VIRTUAL_STORE_DIR"
+            "PNPM_CONFIG_CHILD_CONCURRENCY=$env:PNPM_CONFIG_CHILD_CONCURRENCY"
+            "PNPM_CONFIG_NETWORK_CONCURRENCY=$env:PNPM_CONFIG_NETWORK_CONCURRENCY"
+            "PNPM_CONFIG_VERIFY_DEPS_BEFORE_RUN=$env:PNPM_CONFIG_VERIFY_DEPS_BEFORE_RUN"
+            "PNPM_CONFIG_SIDE_EFFECTS_CACHE=$env:PNPM_CONFIG_SIDE_EFFECTS_CACHE"
+          ) | ForEach-Object { Add-GitHubCommandLine $env:GITHUB_ENV $_ }
+          Add-GitHubCommandLine $env:GITHUB_PATH $env:PNPM_HOME
+
+          $packageManager = (Get-Content package.json -Raw | ConvertFrom-Json).packageManager
+          if (-not $packageManager -or -not $packageManager.StartsWith("pnpm@")) {
+            Write-Error "Expected packageManager to pin pnpm, got '$packageManager'"
+          }
+          corepack enable --install-directory $env:PNPM_HOME
+          for ($attempt = 1; $attempt -le 3; $attempt++) {
+            corepack prepare $packageManager --activate
+            if ($LASTEXITCODE -eq 0) {
+              break
+            }
+            if ($attempt -eq 3) {
+              exit $LASTEXITCODE
+            }
+            Start-Sleep -Seconds ($attempt * 5)
+          }
+          $nodeBin = Split-Path -Parent (node -p "process.execPath")
+          Add-GitHubCommandLine $env:GITHUB_ENV "NODE_BIN=$nodeBin"
+          Add-GitHubCommandLine $env:GITHUB_PATH $nodeBin
+          $env:PATH = "$nodeBin;$env:PATH"
+
+          node -v
+          npm -v
+          pnpm -v
+
+          $installArgs = @(
+            "install",
+            "--filter",
+            "openclaw",
+            "--prefer-offline",
+            "--ignore-scripts=true",
+            "--config.engine-strict=false",
+            "--config.enable-pre-post-scripts=false",
+            "--config.side-effects-cache=false",
+            "--frozen-lockfile",
+            "--child-concurrency=$env:PNPM_CONFIG_CHILD_CONCURRENCY",
+            "--modules-dir=$env:PNPM_CONFIG_MODULES_DIR",
+            "--network-concurrency=$env:PNPM_CONFIG_NETWORK_CONCURRENCY",
+            "--store-dir=$env:PNPM_CONFIG_STORE_DIR",
+            "--virtual-store-dir=$env:PNPM_CONFIG_VIRTUAL_STORE_DIR"
+          )
+          pnpm @installArgs
+          if ($LASTEXITCODE -ne 0) {
+            exit $LASTEXITCODE
+          }
+          $corepackShimDir = Join-Path $nodeBin "node_modules\corepack\shims"
+          if (Test-Path $corepackShimDir) {
+            $env:PNPM_HOME = $corepackShimDir
+            Add-GitHubCommandLine $env:GITHUB_ENV "PNPM_HOME=$env:PNPM_HOME"
+            Add-GitHubCommandLine $env:GITHUB_PATH $env:PNPM_HOME
+          }
+
+      - name: Mark Crabbox ready
+        shell: powershell
+        env:
+          CRABBOX_ID: ${{ inputs.crabbox_id }}
+          CRABBOX_JOB: ${{ inputs.crabbox_job }}
+        run: |
+          $ErrorActionPreference = "Stop"
+          $job = if ($env:CRABBOX_JOB) { $env:CRABBOX_JOB } else { "hydrate-windows-daemon" }
+          if (-not $env:CRABBOX_ID -or $env:CRABBOX_ID -notmatch '^[A-Za-z0-9._-]+$') {
+            Write-Error "Invalid crabbox_id"
+          }
+          $actionsRoot = Join-Path $HOME ".crabbox\actions"
+          New-Item -ItemType Directory -Force $actionsRoot | Out-Null
+          $state = Join-Path $actionsRoot "$env:CRABBOX_ID.env"
+          $envFile = Join-Path $actionsRoot "$env:CRABBOX_ID.env.ps1"
+          $servicesFile = Join-Path $actionsRoot "$env:CRABBOX_ID.services"
+          $keys = @(
+            "CI", "GITHUB_ACTIONS", "GITHUB_WORKSPACE", "GITHUB_REPOSITORY",
+            "GITHUB_RUN_ID", "GITHUB_RUN_NUMBER", "GITHUB_RUN_ATTEMPT",
+            "GITHUB_REF", "GITHUB_REF_NAME", "GITHUB_SHA", "GITHUB_EVENT_NAME",
+            "GITHUB_ACTOR", "RUNNER_OS", "RUNNER_ARCH", "RUNNER_TEMP",
+            "RUNNER_TOOL_CACHE", "XDG_CACHE_HOME", "COREPACK_HOME", "NODE_BIN",
+            "PNPM_HOME", "PNPM_CONFIG_CHILD_CONCURRENCY", "PNPM_CONFIG_MODULES_DIR",
+            "PNPM_CONFIG_NETWORK_CONCURRENCY", "PNPM_CONFIG_STORE_DIR",
+            "PNPM_CONFIG_VERIFY_DEPS_BEFORE_RUN", "PNPM_CONFIG_VIRTUAL_STORE_DIR",
+            "PNPM_CONFIG_SIDE_EFFECTS_CACHE", "PATH"
+          )
+          $envLines = foreach ($key in $keys) {
+            $value = [Environment]::GetEnvironmentVariable($key)
+            if ($value) {
+              "$key=$value"
+            }
+          }
+          $utf8NoBom = [System.Text.UTF8Encoding]::new($false)
+          [System.IO.File]::WriteAllLines("$envFile.tmp", $envLines, $utf8NoBom)
+          Move-Item -Force "$envFile.tmp" $envFile
+          [System.IO.File]::WriteAllLines(
+            "$servicesFile.tmp",
+            @("# Docker containers visible from the hydrated runner", "docker not available on native Windows hydration"),
+            $utf8NoBom
+          )
+          Move-Item -Force "$servicesFile.tmp" $servicesFile
+          $stateLines = @(
+            "WORKSPACE=$env:GITHUB_WORKSPACE",
+            "RUN_ID=$env:GITHUB_RUN_ID",
+            "JOB=$job",
+            "ENV_FILE=$envFile",
+            "SERVICES_FILE=$servicesFile",
+            "READY_AT=$((Get-Date).ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ssZ"))"
+          )
+          [System.IO.File]::WriteAllLines("$state.tmp", $stateLines, $utf8NoBom)
+          Move-Item -Force "$state.tmp" $state
+
+      - name: Keep Crabbox job alive
+        shell: powershell
+        env:
+          CRABBOX_ID: ${{ inputs.crabbox_id }}
+          CRABBOX_KEEP_ALIVE_MINUTES: ${{ inputs.crabbox_keep_alive_minutes }}
+        run: |
+          $ErrorActionPreference = "Stop"
+          if (-not $env:CRABBOX_ID -or $env:CRABBOX_ID -notmatch '^[A-Za-z0-9._-]+$') {
+            Write-Error "Invalid crabbox_id"
+          }
+          $minutes = 90
+          if ($env:CRABBOX_KEEP_ALIVE_MINUTES -match '^[0-9]+$') {
+            $minutes = [int]$env:CRABBOX_KEEP_ALIVE_MINUTES
+          }
+          $stop = Join-Path $HOME ".crabbox\actions\$env:CRABBOX_ID.stop"
+          $deadline = (Get-Date).AddMinutes($minutes)
+          while ((Get-Date) -lt $deadline) {
+            if (Test-Path $stop) {
+              exit 0
+            }
+            Start-Sleep -Seconds 15
+          }
+
+  hydrate-github:
+    name: hydrate-github
+    if: ${{ inputs.crabbox_job == 'hydrate-github' }}
+    runs-on: [self-hosted, "${{ inputs.crabbox_runner_label }}"]
+    timeout-minutes: 120
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: ${{ inputs.ref || github.ref }}
+
+      - name: Setup Node environment
+        uses: ./.github/actions/setup-node-env
+        with:
+          install-bun: "false"
+          use-actions-cache: "false"
+
+      - name: Prepare Crabbox shell
+        shell: bash
+        run: |
+          set -euo pipefail
+
+          if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
+            git fetch --no-tags --depth=50 origin "+refs/heads/main:refs/remotes/origin/main"
+          fi
+
+          node_bin="$(dirname "$(node -p 'process.execPath')")"
+          sudo ln -sf "$node_bin/node" /usr/local/bin/node
+          sudo ln -sf "$node_bin/npm" /usr/local/bin/npm
+          sudo ln -sf "$node_bin/npx" /usr/local/bin/npx
+          sudo ln -sf "$node_bin/corepack" /usr/local/bin/corepack
+          sudo tee /usr/local/bin/pnpm >/dev/null <<'PNPM'
+          #!/usr/bin/env bash
+          exec /usr/local/bin/corepack pnpm "$@"
+          PNPM
+          sudo chmod 0755 /usr/local/bin/pnpm
+
+      - name: Ensure Docker is running
+        shell: bash
+        run: |
+          set -euo pipefail
+
+          if ! command -v docker >/dev/null 2>&1; then
+            echo "docker not found; installing fallback engine"
+            curl --fail --show-error --location \
+              --connect-timeout "${OPENCLAW_CRABBOX_HYDRATE_DOWNLOAD_CONNECT_TIMEOUT_SECONDS:-15}" \
+              --max-time "${OPENCLAW_CRABBOX_HYDRATE_DOWNLOAD_TIMEOUT_SECONDS:-300}" \
+              --retry "${OPENCLAW_CRABBOX_HYDRATE_DOWNLOAD_RETRIES:-3}" \
+              --retry-delay "${OPENCLAW_CRABBOX_HYDRATE_DOWNLOAD_RETRY_DELAY_SECONDS:-5}" \
+              --retry-all-errors \
+              https://get.docker.com | sudo sh
+          fi
+
+          if command -v systemctl >/dev/null 2>&1; then
+            sudo systemctl start docker || true
+          elif command -v service >/dev/null 2>&1; then
+            sudo service docker start || true
+          fi
+
+          if [ -S /var/run/docker.sock ]; then
+            sudo usermod -aG docker "$USER" || true
+            # The runner process keeps its original groups; grant this
+            # ephemeral runner session access without requiring a relogin.
+            sudo chmod 666 /var/run/docker.sock
+          fi
+
+          if ! docker buildx version >/dev/null 2>&1; then
+            arch="$(uname -m)"
+            case "$arch" in
+              aarch64|arm64) buildx_arch=arm64 ;;
+              x86_64|amd64) buildx_arch=amd64 ;;
+              *) echo "unsupported buildx arch: $arch" >&2; exit 2 ;;
+            esac
+            buildx_version="${DOCKER_BUILDX_VERSION:-v0.15.1}"
+            mkdir -p "$HOME/.docker/cli-plugins"
+            curl --fail --show-error --location \
+              --connect-timeout "${OPENCLAW_CRABBOX_HYDRATE_DOWNLOAD_CONNECT_TIMEOUT_SECONDS:-15}" \
+              --max-time "${OPENCLAW_CRABBOX_HYDRATE_DOWNLOAD_TIMEOUT_SECONDS:-300}" \
+              --retry "${OPENCLAW_CRABBOX_HYDRATE_DOWNLOAD_RETRIES:-3}" \
+              --retry-delay "${OPENCLAW_CRABBOX_HYDRATE_DOWNLOAD_RETRY_DELAY_SECONDS:-5}" \
+              --retry-all-errors \
+              "https://github.com/docker/buildx/releases/download/${buildx_version}/buildx-${buildx_version}.linux-${buildx_arch}" \
+              -o "$HOME/.docker/cli-plugins/docker-buildx"
+            chmod 0755 "$HOME/.docker/cli-plugins/docker-buildx"
+          fi
+
+          docker version
+          docker buildx version
+          docker compose version || true
+
+      - name: Ensure SSH is available
+        shell: bash
+        run: |
+          set -euo pipefail
+          if command -v systemctl >/dev/null 2>&1; then
+            sudo systemctl start ssh || sudo systemctl start sshd || true
+          elif command -v service >/dev/null 2>&1; then
+            sudo service ssh start || sudo service sshd start || true
+          fi
+
+      - name: Hydrate provider env helper
+        shell: bash
+        env:
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          ANTHROPIC_API_KEY_OLD: ${{ secrets.ANTHROPIC_API_KEY_OLD }}
+          ANTHROPIC_API_TOKEN: ${{ secrets.ANTHROPIC_API_TOKEN }}
+          CEREBRAS_API_KEY: ${{ secrets.CEREBRAS_API_KEY }}
+          DEEPINFRA_API_KEY: ${{ secrets.DEEPINFRA_API_KEY }}
+          FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }}
+          FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
+          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+          GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
+          KIMI_API_KEY: ${{ secrets.KIMI_API_KEY }}
+          MINIMAX_API_KEY: ${{ secrets.MINIMAX_API_KEY }}
+          MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
+          MOONSHOT_API_KEY: ${{ secrets.MOONSHOT_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
+          OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
+          QWEN_API_KEY: ${{ secrets.QWEN_API_KEY }}
+          TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
+          XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
+          ZAI_API_KEY: ${{ secrets.ZAI_API_KEY }}
+          Z_AI_API_KEY: ${{ secrets.Z_AI_API_KEY }}
+        run: bash scripts/ci-hydrate-testbox-env.sh
+
+      - name: Mark Crabbox ready
+        shell: bash
+        env:
+          CRABBOX_ID: ${{ inputs.crabbox_id }}
+          CRABBOX_JOB: ${{ inputs.crabbox_job }}
+        run: |
+          set -euo pipefail
+          job="${CRABBOX_JOB}"
+          if [ -z "$job" ]; then job=hydrate-github; fi
+          case "$CRABBOX_ID" in
+            ''|*[!A-Za-z0-9._-]*)
+              echo "Invalid crabbox_id" >&2
+              exit 2
+              ;;
+          esac
+          mkdir -p "$HOME/.crabbox/actions"
+          state="$HOME/.crabbox/actions/${CRABBOX_ID}.env"
+          env_file="$HOME/.crabbox/actions/${CRABBOX_ID}.env.sh"
+          services_file="$HOME/.crabbox/actions/${CRABBOX_ID}.services"
+          write_export() {
+            key="$1"
+            value="${!key-}"
+            if [ -n "$value" ]; then
+              printf 'export %s=%q\n' "$key" "$value"
+            fi
+          }
+          {
+            for key in CI GITHUB_ACTIONS GITHUB_WORKSPACE GITHUB_REPOSITORY GITHUB_RUN_ID GITHUB_RUN_NUMBER GITHUB_RUN_ATTEMPT GITHUB_REF GITHUB_REF_NAME GITHUB_SHA GITHUB_EVENT_NAME GITHUB_ACTOR RUNNER_OS RUNNER_ARCH RUNNER_TEMP RUNNER_TOOL_CACHE NODE_BIN PNPM_HOME PNPM_CONFIG_CHILD_CONCURRENCY PNPM_CONFIG_MODULES_DIR PNPM_CONFIG_NETWORK_CONCURRENCY PNPM_CONFIG_STORE_DIR PNPM_CONFIG_VERIFY_DEPS_BEFORE_RUN PNPM_CONFIG_VIRTUAL_STORE_DIR PATH; do
              write_export "$key"
            done
          } > "${env_file}.tmp"
--- a/.github/workflows/docker-release.yml
+++ b/.github/workflows/docker-release.yml
@@ -162,6 +162,50 @@ jobs:
          provenance: mode=max
          push: true

+      - name: Smoke test amd64 runtime workspace templates
+        shell: bash
+        env:
+          IMAGE_REFS: ${{ steps.tags.outputs.value }}
+        run: |
+          set -euo pipefail
+          mapfile -t image_refs <<< "${IMAGE_REFS}"
+          image_ref="${image_refs[0]}"
+          if [[ -z "${image_ref}" ]]; then
+            echo "::error::No amd64 image ref resolved for runtime template smoke"
+            exit 1
+          fi
+          docker run --rm --entrypoint /bin/sh "${image_ref}" -lc '
+            set -eu
+            test -f /app/src/agents/templates/HEARTBEAT.md
+            temp_root="$(mktemp -d)"
+            trap "rm -rf \"${temp_root}\"" EXIT
+            mkdir -p "${temp_root}/home" "${temp_root}/cwd"
+            cd "${temp_root}/cwd"
+            set +e
+            HOME="${temp_root}/home" \
+            USERPROFILE="${temp_root}/home" \
+            OPENCLAW_HOME="${temp_root}/home" \
+            OPENCLAW_NO_ONBOARD=1 \
+            OPENCLAW_SUPPRESS_NOTES=1 \
+            OPENCLAW_DISABLE_BUNDLED_PLUGINS=1 \
+            OPENCLAW_DISABLE_BUNDLED_ENTRY_SOURCE_FALLBACK=1 \
+            AWS_EC2_METADATA_DISABLED=true \
+            AWS_SHARED_CREDENTIALS_FILE="${temp_root}/home/.aws/credentials" \
+            AWS_CONFIG_FILE="${temp_root}/home/.aws/config" \
+              node /app/openclaw.mjs agent --message "workspace bootstrap smoke" --session-id "workspace-bootstrap-smoke" --local --timeout 1 --json \
+              >"${temp_root}/out.log" 2>&1
+            status="$?"
+            set -e
+            if grep -F "Missing workspace template:" "${temp_root}/out.log"; then
+              cat "${temp_root}/out.log"
+              exit 1
+            fi
+            test -f "${temp_root}/home/.openclaw/workspace/HEARTBEAT.md"
+            if [ "${status}" -ne 0 ]; then
+              cat "${temp_root}/out.log"
+            fi
+          '
+
  # Build arm64 image. Default and slim tags point to the same slim runtime.
  build-arm64:
    needs: [approve_manual_backfill]
@@ -260,6 +304,50 @@ jobs:
          provenance: mode=max
          push: true

+      - name: Smoke test arm64 runtime workspace templates
+        shell: bash
+        env:
+          IMAGE_REFS: ${{ steps.tags.outputs.value }}
+        run: |
+          set -euo pipefail
+          mapfile -t image_refs <<< "${IMAGE_REFS}"
+          image_ref="${image_refs[0]}"
+          if [[ -z "${image_ref}" ]]; then
+            echo "::error::No arm64 image ref resolved for runtime template smoke"
+            exit 1
+          fi
+          docker run --rm --entrypoint /bin/sh "${image_ref}" -lc '
+            set -eu
+            test -f /app/src/agents/templates/HEARTBEAT.md
+            temp_root="$(mktemp -d)"
+            trap "rm -rf \"${temp_root}\"" EXIT
+            mkdir -p "${temp_root}/home" "${temp_root}/cwd"
+            cd "${temp_root}/cwd"
+            set +e
+            HOME="${temp_root}/home" \
+            USERPROFILE="${temp_root}/home" \
+            OPENCLAW_HOME="${temp_root}/home" \
+            OPENCLAW_NO_ONBOARD=1 \
+            OPENCLAW_SUPPRESS_NOTES=1 \
+            OPENCLAW_DISABLE_BUNDLED_PLUGINS=1 \
+            OPENCLAW_DISABLE_BUNDLED_ENTRY_SOURCE_FALLBACK=1 \
+            AWS_EC2_METADATA_DISABLED=true \
+            AWS_SHARED_CREDENTIALS_FILE="${temp_root}/home/.aws/credentials" \
+            AWS_CONFIG_FILE="${temp_root}/home/.aws/config" \
+              node /app/openclaw.mjs agent --message "workspace bootstrap smoke" --session-id "workspace-bootstrap-smoke" --local --timeout 1 --json \
+              >"${temp_root}/out.log" 2>&1
+            status="$?"
+            set -e
+            if grep -F "Missing workspace template:" "${temp_root}/out.log"; then
+              cat "${temp_root}/out.log"
+              exit 1
+            fi
+            test -f "${temp_root}/home/.openclaw/workspace/HEARTBEAT.md"
+            if [ "${status}" -ne 0 ]; then
+              cat "${temp_root}/out.log"
+            fi
+          '
+
  # Create multi-platform manifests
  create-manifest:
    needs: [approve_manual_backfill, build-amd64, build-arm64]
--- a/.github/workflows/full-release-validation.yml
+++ b/.github/workflows/full-release-validation.yml
@@ -58,6 +58,7 @@ on:
          - qa-parity
          - qa-live
          - npm-telegram
+          - performance
      live_suite_filter:
        description: Optional exact live/E2E suite id, or comma-separated QA live lanes such as qa-live-matrix,qa-live-telegram; blank runs all selected live suites
        required: false
@@ -134,7 +135,7 @@ jobs:
          ref: ${{ github.ref_name }}
          path: workflow
          fetch-depth: 1
-          persist-credentials: false
+          persist-credentials: true
          submodules: false

      - name: Resolve target SHA
@@ -181,6 +182,11 @@ jobs:
            else
              echo "- Normal CI: skipped by rerun group"
            fi
+            if [[ "$RERUN_GROUP" == "all" || "$RERUN_GROUP" == "performance" ]]; then
+              echo "- Product performance: \`OpenClaw Performance\` with \`target_ref=${TARGET_SHA}\`"
+            else
+              echo "- Product performance: skipped by rerun group"
+            fi
            if [[ "$RERUN_GROUP" == "all" || "$RERUN_GROUP" == "plugin-prerelease" ]]; then
              echo "- Plugin prerelease: \`Plugin Prerelease\` with \`target_ref=${TARGET_SHA}\`"
            else
@@ -219,7 +225,7 @@ jobs:
          } >> "$GITHUB_STEP_SUMMARY"

  docker_runtime_assets_preflight:
-    name: Verify Docker runtime-assets prune path
+    name: Verify Docker runtime image assets
    needs: [resolve_target]
    if: inputs.rerun_group == 'all'
    runs-on: ubuntu-24.04
@@ -232,18 +238,61 @@ jobs:
        with:
          ref: ${{ needs.resolve_target.outputs.sha }}
          fetch-depth: 1
-          persist-credentials: false
+          persist-credentials: true

      - name: Verify Docker runtime-assets prune path
        env:
          DOCKER_BUILDKIT: "1"
        run: |
          set -euo pipefail
-          timeout --foreground --kill-after=30s 35m docker build \
+          timeout --kill-after=30s 35m docker build \
            --target runtime-assets \
-            --build-arg OPENCLAW_EXTENSIONS="matrix" \
+            --build-arg OPENCLAW_EXTENSIONS="diagnostics-otel,codex" \
            .

+      - name: Build and smoke test final Docker runtime image
+        env:
+          DOCKER_BUILDKIT: "1"
+          TARGET_SHA: ${{ needs.resolve_target.outputs.sha }}
+        run: |
+          set -euo pipefail
+          image_ref="openclaw-release-runtime-smoke:${TARGET_SHA}"
+          timeout --kill-after=30s 35m docker build \
+            --build-arg OPENCLAW_EXTENSIONS="diagnostics-otel,codex" \
+            -t "${image_ref}" \
+            .
+          docker run --rm --entrypoint /bin/sh "${image_ref}" -lc '
+            set -eu
+            test -f /app/src/agents/templates/HEARTBEAT.md
+            temp_root="$(mktemp -d)"
+            trap "rm -rf \"${temp_root}\"" EXIT
+            mkdir -p "${temp_root}/home" "${temp_root}/cwd"
+            cd "${temp_root}/cwd"
+            set +e
+            HOME="${temp_root}/home" \
+            USERPROFILE="${temp_root}/home" \
+            OPENCLAW_HOME="${temp_root}/home" \
+            OPENCLAW_NO_ONBOARD=1 \
+            OPENCLAW_SUPPRESS_NOTES=1 \
+            OPENCLAW_DISABLE_BUNDLED_PLUGINS=1 \
+            OPENCLAW_DISABLE_BUNDLED_ENTRY_SOURCE_FALLBACK=1 \
+            AWS_EC2_METADATA_DISABLED=true \
+            AWS_SHARED_CREDENTIALS_FILE="${temp_root}/home/.aws/credentials" \
+            AWS_CONFIG_FILE="${temp_root}/home/.aws/config" \
+              node /app/openclaw.mjs agent --message "workspace bootstrap smoke" --session-id "workspace-bootstrap-smoke" --local --timeout 1 --json \
+              >"${temp_root}/out.log" 2>&1
+            status="$?"
+            set -e
+            if grep -F "Missing workspace template:" "${temp_root}/out.log"; then
+              cat "${temp_root}/out.log"
+              exit 1
+            fi
+            test -f "${temp_root}/home/.openclaw/workspace/HEARTBEAT.md"
+            if [ "${status}" -ne 0 ]; then
+              cat "${temp_root}/out.log"
+            fi
+          '
+
  normal_ci:
    name: Run normal full CI
    needs: [resolve_target, docker_runtime_assets_preflight]
@@ -270,9 +319,31 @@ jobs:
            shift

            local before_json dispatch_output run_id status conclusion url poll_count
-            before_json="$(gh run list --workflow "$workflow" --event workflow_dispatch --limit 100 --json databaseId --jq '[.[].databaseId]')"
+            gh_with_retry() {
+              local output status attempt
+              for attempt in 1 2 3 4 5 6; do
+                set +e
+                output="$(gh "$@" 2>&1)"
+                status=$?
+                set -e
+                if [[ "$status" -eq 0 ]]; then
+                  printf '%s\n' "$output"
+                  return 0
+                fi
+                if [[ "$output" == *"Bad credentials"* || "$output" == *"HTTP 401"* || "$output" == *"secondary rate limit"* || "$output" == *"API rate limit"* || "$output" == *"Sorry. Your account was suspended"* ]]; then
+                  echo "::warning::gh $* failed on attempt ${attempt}: ${output}" >&2
+                  sleep $((attempt * 10))
+                  continue
+                fi
+                printf '%s\n' "$output" >&2
+                return "$status"
+              done
+              printf '%s\n' "$output" >&2
+              return "$status"
+            }
+            before_json="$(gh_with_retry run list --workflow "$workflow" --event workflow_dispatch --limit 100 --json databaseId --jq '[.[].databaseId]')"

-            dispatch_output="$(gh workflow run "$workflow" --ref "$CHILD_WORKFLOW_REF" "$@" 2>&1)"
+            dispatch_output="$(gh_with_retry workflow run "$workflow" --ref "$CHILD_WORKFLOW_REF" "$@")"
            printf '%s\n' "$dispatch_output"
            run_id="$(
              printf '%s\n' "$dispatch_output" |
@@ -283,7 +354,7 @@ jobs:
            if [[ -z "$run_id" ]]; then
              for _ in $(seq 1 60); do
                run_id="$(
-                  BEFORE_IDS="$before_json" gh run list --workflow "$workflow" --event workflow_dispatch --limit 50 --json databaseId,createdAt \
+                  BEFORE_IDS="$before_json" gh_with_retry run list --workflow "$workflow" --event workflow_dispatch --limit 50 --json databaseId,createdAt \
                    --jq 'map(select(.databaseId as $id | (env.BEFORE_IDS | fromjson | index($id) | not))) | sort_by(.createdAt) | reverse | .[0].databaseId // empty'
                )"
                if [[ -n "$run_id" ]]; then
@@ -301,6 +372,14 @@ jobs:
            echo "Dispatched ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
            echo "run_id=${run_id}" >> "$GITHUB_OUTPUT"

+            fetch_child_run_json() {
+              gh_with_retry api "repos/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
+            }
+
+            fetch_child_jobs() {
+              gh_with_retry api --paginate "repos/${GITHUB_REPOSITORY}/actions/runs/${run_id}/jobs?per_page=100" --jq '.jobs[]'
+            }
+
            cancel_child() {
              if [[ -n "${run_id:-}" ]]; then
                echo "Cancelling child workflow ${workflow}: ${run_id}" >&2
@@ -311,26 +390,26 @@ jobs:

            poll_count=0
            while true; do
-              status="$(gh run view "$run_id" --json status --jq '.status')"
+              status="$(fetch_child_run_json | jq -r '.status')"
              if [[ "$status" == "completed" ]]; then
                break
              fi
              poll_count=$((poll_count + 1))
              if (( poll_count % 10 == 0 )); then
                echo "Still waiting on ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
-                gh run view "$run_id" --json jobs --jq '.jobs[] | select(.status != "completed") | {name, status, url}' || true
+                fetch_child_jobs | jq 'select(.status != "completed") | {name, status, url: .html_url}' || true
              fi
              sleep 30
            done
            trap - EXIT INT TERM

-            conclusion="$(gh run view "$run_id" --json conclusion --jq '.conclusion')"
-            url="$(gh run view "$run_id" --json url --jq '.url')"
+            conclusion="$(fetch_child_run_json | jq -r '.conclusion // ""')"
+            url="$(fetch_child_run_json | jq -r '.html_url')"
            echo "${workflow} finished with ${conclusion}: ${url}"
            echo "url=${url}" >> "$GITHUB_OUTPUT"
            echo "conclusion=${conclusion}" >> "$GITHUB_OUTPUT"
            if [[ "$conclusion" != "success" ]]; then
-              gh run view "$run_id" --json jobs --jq '.jobs[] | select(.conclusion != "success" and .conclusion != "skipped") | {name, conclusion, url}' || true
+              fetch_child_jobs | jq 'select(.conclusion != "success" and .conclusion != "skipped") | {name, conclusion, url: .html_url}' || true
              exit 1
            fi
          }
@@ -370,9 +449,31 @@ jobs:
            shift

            local before_json dispatch_output run_id status conclusion url poll_count
-            before_json="$(gh run list --workflow "$workflow" --event workflow_dispatch --limit 100 --json databaseId --jq '[.[].databaseId]')"
+            gh_with_retry() {
+              local output status attempt
+              for attempt in 1 2 3 4 5 6; do
+                set +e
+                output="$(gh "$@" 2>&1)"
+                status=$?
+                set -e
+                if [[ "$status" -eq 0 ]]; then
+                  printf '%s\n' "$output"
+                  return 0
+                fi
+                if [[ "$output" == *"Bad credentials"* || "$output" == *"HTTP 401"* || "$output" == *"secondary rate limit"* || "$output" == *"API rate limit"* || "$output" == *"Sorry. Your account was suspended"* ]]; then
+                  echo "::warning::gh $* failed on attempt ${attempt}: ${output}" >&2
+                  sleep $((attempt * 10))
+                  continue
+                fi
+                printf '%s\n' "$output" >&2
+                return "$status"
+              done
+              printf '%s\n' "$output" >&2
+              return "$status"
+            }
+            before_json="$(gh_with_retry run list --workflow "$workflow" --event workflow_dispatch --limit 100 --json databaseId --jq '[.[].databaseId]')"

-            dispatch_output="$(gh workflow run "$workflow" --ref "$CHILD_WORKFLOW_REF" "$@" 2>&1)"
+            dispatch_output="$(gh_with_retry workflow run "$workflow" --ref "$CHILD_WORKFLOW_REF" "$@")"
            printf '%s\n' "$dispatch_output"
            run_id="$(
              printf '%s\n' "$dispatch_output" |
@@ -383,7 +484,7 @@ jobs:
            if [[ -z "$run_id" ]]; then
              for _ in $(seq 1 60); do
                run_id="$(
-                  BEFORE_IDS="$before_json" gh run list --workflow "$workflow" --event workflow_dispatch --limit 50 --json databaseId,createdAt \
+                  BEFORE_IDS="$before_json" gh_with_retry run list --workflow "$workflow" --event workflow_dispatch --limit 50 --json databaseId,createdAt \
                    --jq 'map(select(.databaseId as $id | (env.BEFORE_IDS | fromjson | index($id) | not))) | sort_by(.createdAt) | reverse | .[0].databaseId // empty'
                )"
                if [[ -n "$run_id" ]]; then
@@ -401,6 +502,14 @@ jobs:
            echo "Dispatched ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
            echo "run_id=${run_id}" >> "$GITHUB_OUTPUT"

+            fetch_child_run_json() {
+              gh_with_retry api "repos/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
+            }
+
+            fetch_child_jobs() {
+              gh_with_retry api --paginate "repos/${GITHUB_REPOSITORY}/actions/runs/${run_id}/jobs?per_page=100" --jq '.jobs[]'
+            }
+
            cancel_child() {
              if [[ -n "${run_id:-}" ]]; then
                echo "Cancelling child workflow ${workflow}: ${run_id}" >&2
@@ -411,26 +520,26 @@ jobs:

            poll_count=0
            while true; do
-              status="$(gh run view "$run_id" --json status --jq '.status')"
+              status="$(fetch_child_run_json | jq -r '.status')"
              if [[ "$status" == "completed" ]]; then
                break
              fi
              poll_count=$((poll_count + 1))
              if (( poll_count % 10 == 0 )); then
                echo "Still waiting on ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
-                gh run view "$run_id" --json jobs --jq '.jobs[] | select(.status != "completed") | {name, status, url}' || true
+                fetch_child_jobs | jq 'select(.status != "completed") | {name, status, url: .html_url}' || true
              fi
              sleep 30
            done
            trap - EXIT INT TERM

-            conclusion="$(gh run view "$run_id" --json conclusion --jq '.conclusion')"
-            url="$(gh run view "$run_id" --json url --jq '.url')"
+            conclusion="$(fetch_child_run_json | jq -r '.conclusion // ""')"
+            url="$(fetch_child_run_json | jq -r '.html_url')"
            echo "${workflow} finished with ${conclusion}: ${url}"
            echo "url=${url}" >> "$GITHUB_OUTPUT"
            echo "conclusion=${conclusion}" >> "$GITHUB_OUTPUT"
            if [[ "$conclusion" != "success" ]]; then
-              gh run view "$run_id" --json jobs --jq '.jobs[] | select(.conclusion != "success" and .conclusion != "skipped") | {name, conclusion, url}' || true
+              fetch_child_jobs | jq 'select(.conclusion != "success" and .conclusion != "skipped") | {name, conclusion, url: .html_url}' || true
              exit 1
            fi
          }
@@ -480,9 +589,31 @@ jobs:
            shift

            local before_json dispatch_output run_id status conclusion url poll_count run_json
-            before_json="$(gh run list --workflow "$workflow" --event workflow_dispatch --limit 100 --json databaseId --jq '[.[].databaseId]')"
+            gh_with_retry() {
+              local output status attempt
+              for attempt in 1 2 3 4 5 6; do
+                set +e
+                output="$(gh "$@" 2>&1)"
+                status=$?
+                set -e
+                if [[ "$status" -eq 0 ]]; then
+                  printf '%s\n' "$output"
+                  return 0
+                fi
+                if [[ "$output" == *"Bad credentials"* || "$output" == *"HTTP 401"* || "$output" == *"secondary rate limit"* || "$output" == *"API rate limit"* || "$output" == *"Sorry. Your account was suspended"* ]]; then
+                  echo "::warning::gh $* failed on attempt ${attempt}: ${output}" >&2
+                  sleep $((attempt * 10))
+                  continue
+                fi
+                printf '%s\n' "$output" >&2
+                return "$status"
+              done
+              printf '%s\n' "$output" >&2
+              return "$status"
+            }
+            before_json="$(gh_with_retry run list --workflow "$workflow" --event workflow_dispatch --limit 100 --json databaseId --jq '[.[].databaseId]')"

-            dispatch_output="$(gh workflow run "$workflow" --ref "$CHILD_WORKFLOW_REF" "$@" 2>&1)"
+            dispatch_output="$(gh_with_retry workflow run "$workflow" --ref "$CHILD_WORKFLOW_REF" "$@")"
            printf '%s\n' "$dispatch_output"
            run_id="$(
              printf '%s\n' "$dispatch_output" |
@@ -493,7 +624,7 @@ jobs:
            if [[ -z "$run_id" ]]; then
              for _ in $(seq 1 60); do
                run_id="$(
-                  BEFORE_IDS="$before_json" gh run list --workflow "$workflow" --event workflow_dispatch --limit 50 --json databaseId,createdAt \
+                  BEFORE_IDS="$before_json" gh_with_retry run list --workflow "$workflow" --event workflow_dispatch --limit 50 --json databaseId,createdAt \
                    --jq 'map(select(.databaseId as $id | (env.BEFORE_IDS | fromjson | index($id) | not))) | sort_by(.createdAt) | reverse | .[0].databaseId // empty'
                )"
                if [[ -n "$run_id" ]]; then
@@ -511,6 +642,14 @@ jobs:
            echo "Dispatched ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
            echo "run_id=${run_id}" >> "$GITHUB_OUTPUT"

+            fetch_child_run_json() {
+              gh_with_retry api "repos/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
+            }
+
+            fetch_child_jobs() {
+              gh_with_retry api --paginate "repos/${GITHUB_REPOSITORY}/actions/runs/${run_id}/jobs?per_page=100" --jq '.jobs[]'
+            }
+
            release_check_blocking_job() {
              case "$1" in
                "resolve_target" | \
@@ -561,20 +700,25 @@ jobs:

            poll_count=0
            while true; do
-              status="$(gh run view "$run_id" --json status --jq '.status')"
+              status="$(fetch_child_run_json | jq -r '.status')"
              if [[ "$status" == "completed" ]]; then
                break
              fi
              poll_count=$((poll_count + 1))
              if (( poll_count % 10 == 0 )); then
                echo "Still waiting on ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
-                gh run view "$run_id" --json jobs --jq '.jobs[] | select(.status != "completed") | {name, status, url}' || true
+                fetch_child_jobs | jq 'select(.status != "completed") | {name, status, url: .html_url}' || true
              fi
              sleep 30
            done
            trap - EXIT INT TERM

-            run_json="$(gh run view "$run_id" --json conclusion,url,jobs)"
+            jobs_json="$(fetch_child_jobs | jq -s '{jobs: [.[] | {name, conclusion, url: .html_url}]}')"
+            run_json="$(
+              jq -s '.[0] + .[1]' \
+                <(fetch_child_run_json | jq '{conclusion: (.conclusion // ""), url: .html_url}') \
+                <(printf '%s\n' "$jobs_json")
+            )"
            conclusion="$(jq -r '.conclusion' <<< "$run_json")"
            url="$(jq -r '.url' <<< "$run_json")"
            echo "${workflow} finished with ${conclusion}: ${url}"
@@ -669,7 +813,7 @@ jobs:
      - name: Checkout trusted workflow ref
        uses: actions/checkout@v6
        with:
-          persist-credentials: false
+          persist-credentials: true
          ref: ${{ github.ref_name }}
          fetch-depth: 0

@@ -747,7 +891,30 @@ jobs:
        run: |
          set -euo pipefail

-          before_json="$(gh run list --workflow npm-telegram-beta-e2e.yml --event workflow_dispatch --limit 100 --json databaseId --jq '[.[].databaseId]')"
+          gh_with_retry() {
+            local output status attempt
+            for attempt in 1 2 3 4 5 6; do
+              set +e
+              output="$(gh "$@" 2>&1)"
+              status=$?
+              set -e
+              if [[ "$status" -eq 0 ]]; then
+                printf '%s\n' "$output"
+                return 0
+              fi
+              if [[ "$output" == *"Bad credentials"* || "$output" == *"HTTP 401"* || "$output" == *"secondary rate limit"* || "$output" == *"API rate limit"* || "$output" == *"Sorry. Your account was suspended"* ]]; then
+                echo "::warning::gh $* failed on attempt ${attempt}: ${output}" >&2
+                sleep $((attempt * 10))
+                continue
+              fi
+              printf '%s\n' "$output" >&2
+              return "$status"
+            done
+            printf '%s\n' "$output" >&2
+            return "$status"
+          }
+
+          before_json="$(gh_with_retry run list --workflow npm-telegram-beta-e2e.yml --event workflow_dispatch --limit 100 --json databaseId --jq '[.[].databaseId]')"

          args=(-f package_spec="${PACKAGE_SPEC:-openclaw@beta}" -f harness_ref="$TARGET_SHA" -f provider_mode="$PROVIDER_MODE")
          if [[ -z "${PACKAGE_SPEC// }" ]]; then
@@ -765,12 +932,12 @@ jobs:
            args+=(-f scenario="$SCENARIO")
          fi

-          gh workflow run npm-telegram-beta-e2e.yml --ref "$CHILD_WORKFLOW_REF" "${args[@]}"
+          gh_with_retry workflow run npm-telegram-beta-e2e.yml --ref "$CHILD_WORKFLOW_REF" "${args[@]}"

          run_id=""
          for _ in $(seq 1 60); do
            run_id="$(
-              BEFORE_IDS="$before_json" gh run list --workflow npm-telegram-beta-e2e.yml --event workflow_dispatch --limit 50 --json databaseId,createdAt \
+              BEFORE_IDS="$before_json" gh_with_retry run list --workflow npm-telegram-beta-e2e.yml --event workflow_dispatch --limit 50 --json databaseId,createdAt \
                --jq 'map(select(.databaseId as $id | (env.BEFORE_IDS | fromjson | index($id) | not))) | sort_by(.createdAt) | reverse | .[0].databaseId // empty'
            )"
            if [[ -n "$run_id" ]]; then
@@ -797,32 +964,150 @@ jobs:

          poll_count=0
          while true; do
-            status="$(gh run view "$run_id" --json status --jq '.status')"
+            status="$(gh_with_retry run view "$run_id" --json status --jq '.status')"
            if [[ "$status" == "completed" ]]; then
              break
            fi
            poll_count=$((poll_count + 1))
            if (( poll_count % 10 == 0 )); then
              echo "Still waiting on npm-telegram-beta-e2e.yml: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
-              gh run view "$run_id" --json jobs --jq '.jobs[] | select(.status != "completed") | {name, status, url}' || true
+              gh_with_retry run view "$run_id" --json jobs --jq '.jobs[] | select(.status != "completed") | {name, status, url}' || true
            fi
            sleep 30
          done
          trap - EXIT INT TERM

-          conclusion="$(gh run view "$run_id" --json conclusion --jq '.conclusion')"
-          url="$(gh run view "$run_id" --json url --jq '.url')"
+          conclusion="$(gh_with_retry run view "$run_id" --json conclusion --jq '.conclusion')"
+          url="$(gh_with_retry run view "$run_id" --json url --jq '.url')"
          echo "npm-telegram-beta-e2e.yml finished with ${conclusion}: ${url}"
          echo "url=${url}" >> "$GITHUB_OUTPUT"
          echo "conclusion=${conclusion}" >> "$GITHUB_OUTPUT"
          if [[ "$conclusion" != "success" ]]; then
-            gh run view "$run_id" --json jobs --jq '.jobs[] | select(.conclusion != "success" and .conclusion != "skipped") | {name, conclusion, url}' || true
+            gh_with_retry run view "$run_id" --json jobs --jq '.jobs[] | select(.conclusion != "success" and .conclusion != "skipped") | {name, conclusion, url}' || true
            exit 1
          fi

+  performance:
+    name: Run product performance evidence
+    needs: [resolve_target, docker_runtime_assets_preflight]
+    if: ${{ always() && needs.resolve_target.result == 'success' && contains(fromJSON('["all","performance"]'), inputs.rerun_group) && (inputs.rerun_group != 'all' || needs.docker_runtime_assets_preflight.result == 'success') }}
+    runs-on: ubuntu-24.04
+    timeout-minutes: 120
+    outputs:
+      run_id: ${{ steps.dispatch.outputs.run_id }}
+      url: ${{ steps.dispatch.outputs.url }}
+      conclusion: ${{ steps.dispatch.outputs.conclusion }}
+    steps:
+      - name: Dispatch and monitor OpenClaw Performance
+        id: dispatch
+        env:
+          GH_TOKEN: ${{ github.token }}
+          TARGET_SHA: ${{ needs.resolve_target.outputs.sha }}
+          CHILD_WORKFLOW_REF: ${{ github.ref_name }}
+        run: |
+          set -euo pipefail
+
+          gh_with_retry() {
+            local output status attempt
+            for attempt in 1 2 3 4 5 6; do
+              set +e
+              output="$(gh "$@" 2>&1)"
+              status=$?
+              set -e
+              if [[ "$status" -eq 0 ]]; then
+                printf '%s\n' "$output"
+                return 0
+              fi
+              if [[ "$output" == *"Bad credentials"* || "$output" == *"HTTP 401"* || "$output" == *"secondary rate limit"* || "$output" == *"API rate limit"* || "$output" == *"Sorry. Your account was suspended"* ]]; then
+                echo "::warning::gh $* failed on attempt ${attempt}: ${output}" >&2
+                sleep $((attempt * 10))
+                continue
+              fi
+              printf '%s\n' "$output" >&2
+              return "$status"
+            done
+            printf '%s\n' "$output" >&2
+            return "$status"
+          }
+
+          {
+            echo "### Product performance"
+            echo
+            echo "- Target SHA: \`${TARGET_SHA}\`"
+            echo "- Profile: \`release\`"
+            echo "- Repeat: \`3\`"
+            echo "- Deep profile: \`false\`"
+            echo "- Live OpenAI candidate: \`false\`"
+            echo "- Release impact: advisory"
+          } >> "$GITHUB_STEP_SUMMARY"
+
+          before_json="$(gh_with_retry run list --workflow openclaw-performance.yml --event workflow_dispatch --limit 100 --json databaseId --jq '[.[].databaseId]')"
+
+          gh_with_retry workflow run openclaw-performance.yml \
+            --ref "$CHILD_WORKFLOW_REF" \
+            -f target_ref="$TARGET_SHA" \
+            -f profile=release \
+            -f repeat=3 \
+            -f deep_profile=false \
+            -f live_openai_candidate=false \
+            -f fail_on_regression=false
+
+          run_id=""
+          for _ in $(seq 1 60); do
+            run_id="$(
+              BEFORE_IDS="$before_json" gh_with_retry run list --workflow openclaw-performance.yml --event workflow_dispatch --limit 50 --json databaseId,createdAt \
+                --jq 'map(select(.databaseId as $id | (env.BEFORE_IDS | fromjson | index($id) | not))) | sort_by(.createdAt) | reverse | .[0].databaseId // empty'
+            )"
+            if [[ -n "$run_id" ]]; then
+              break
+            fi
+            sleep 5
+          done
+
+          if [[ -z "$run_id" ]]; then
+            echo "::warning::Could not find dispatched run for openclaw-performance.yml."
+            exit 0
+          fi
+
+          echo "Dispatched openclaw-performance.yml: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
+          echo "run_id=${run_id}" >> "$GITHUB_OUTPUT"
+
+          cancel_child() {
+            if [[ -n "${run_id:-}" ]]; then
+              echo "Cancelling child workflow openclaw-performance.yml: ${run_id}" >&2
+              gh run cancel "$run_id" >/dev/null 2>&1 || true
+            fi
+          }
+          trap cancel_child EXIT INT TERM
+
+          poll_count=0
+          while true; do
+            status="$(gh_with_retry run view "$run_id" --json status --jq '.status')"
+            if [[ "$status" == "completed" ]]; then
+              break
+            fi
+            poll_count=$((poll_count + 1))
+            if (( poll_count % 10 == 0 )); then
+              echo "Still waiting on openclaw-performance.yml: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
+              gh_with_retry run view "$run_id" --json jobs --jq '.jobs[] | select(.status != "completed") | {name, status, url}' || true
+            fi
+            sleep 30
+          done
+          trap - EXIT INT TERM
+
+          conclusion="$(gh_with_retry run view "$run_id" --json conclusion --jq '.conclusion')"
+          url="$(gh_with_retry run view "$run_id" --json url --jq '.url')"
+          echo "openclaw-performance.yml finished with ${conclusion}: ${url}"
+          echo "url=${url}" >> "$GITHUB_OUTPUT"
+          echo "conclusion=${conclusion}" >> "$GITHUB_OUTPUT"
+          if [[ "$conclusion" != "success" ]]; then
+            echo "::warning::OpenClaw Performance is advisory and ended with ${conclusion}: ${url}"
+            gh_with_retry run view "$run_id" --json jobs --jq '.jobs[] | select(.conclusion != "success" and .conclusion != "skipped") | {name, conclusion, url}' || true
+          fi
+
  summary:
    name: Verify full validation
-    needs: [resolve_target, docker_runtime_assets_preflight, normal_ci, plugin_prerelease, release_checks, npm_telegram]
+    needs: [resolve_target, docker_runtime_assets_preflight, normal_ci, plugin_prerelease, release_checks, npm_telegram, performance]
    if: always()
    runs-on: ubuntu-24.04
    timeout-minutes: 5
@@ -834,10 +1119,12 @@ jobs:
          PLUGIN_PRERELEASE_RUN_ID: ${{ needs.plugin_prerelease.outputs.run_id }}
          RELEASE_CHECKS_RUN_ID: ${{ needs.release_checks.outputs.run_id }}
          NPM_TELEGRAM_RUN_ID: ${{ needs.npm_telegram.outputs.run_id }}
+          PERFORMANCE_RUN_ID: ${{ needs.performance.outputs.run_id }}
          NORMAL_CI_RESULT: ${{ needs.normal_ci.result }}
          PLUGIN_PRERELEASE_RESULT: ${{ needs.plugin_prerelease.result }}
          RELEASE_CHECKS_RESULT: ${{ needs.release_checks.result }}
          NPM_TELEGRAM_RESULT: ${{ needs.npm_telegram.result }}
+          PERFORMANCE_RESULT: ${{ needs.performance.result }}
          DOCKER_RUNTIME_ASSETS_PREFLIGHT_RESULT: ${{ needs.docker_runtime_assets_preflight.result }}
          RERUN_GROUP: ${{ inputs.rerun_group }}
          TARGET_SHA: ${{ needs.resolve_target.outputs.sha }}
@@ -845,6 +1132,29 @@ jobs:
        run: |
          set -euo pipefail

+          gh_with_retry() {
+            local output status attempt
+            for attempt in 1 2 3 4 5 6; do
+              set +e
+              output="$(gh "$@" 2>&1)"
+              status=$?
+              set -e
+              if [[ "$status" -eq 0 ]]; then
+                printf '%s\n' "$output"
+                return 0
+              fi
+              if [[ "$output" == *"Bad credentials"* || "$output" == *"HTTP 401"* || "$output" == *"secondary rate limit"* || "$output" == *"API rate limit"* || "$output" == *"Sorry. Your account was suspended"* ]]; then
+                echo "::warning::gh $* failed on attempt ${attempt}: ${output}" >&2
+                sleep $((attempt * 10))
+                continue
+              fi
+              printf '%s\n' "$output" >&2
+              return "$status"
+            done
+            printf '%s\n' "$output" >&2
+            return "$status"
+          }
+
          release_check_blocking_job() {
            case "$1" in
              "resolve_target" | \
@@ -901,7 +1211,7 @@ jobs:
            fi

            local run_json status conclusion url attempt head_sha
-            run_json="$(gh run view "$run_id" --json status,conclusion,url,attempt,headSha,jobs)"
+            run_json="$(gh_with_retry run view "$run_id" --json status,conclusion,url,attempt,headSha,jobs)"
            status="$(jq -r '.status' <<< "$run_json")"
            conclusion="$(jq -r '.conclusion' <<< "$run_json")"
            url="$(jq -r '.url' <<< "$run_json")"
@@ -948,7 +1258,7 @@ jobs:
              fi

              local run_json row
-              run_json="$(gh run view "$run_id" --json status,conclusion,url,createdAt,updatedAt,headSha)"
+              run_json="$(gh_with_retry run view "$run_id" --json status,conclusion,url,createdAt,updatedAt,headSha)"
              row="$(
                jq -r --arg label "$label" '
                  def ts: fromdateiso8601;
@@ -970,6 +1280,7 @@ jobs:
            append_child_row "plugin_prerelease" "$PLUGIN_PRERELEASE_RUN_ID" "$PLUGIN_PRERELEASE_RESULT"
            append_child_row "release_checks" "$RELEASE_CHECKS_RUN_ID" "$RELEASE_CHECKS_RESULT"
            append_child_row "npm_telegram" "$NPM_TELEGRAM_RUN_ID" "$NPM_TELEGRAM_RESULT"
+            append_child_row "product_performance" "$PERFORMANCE_RUN_ID" "$PERFORMANCE_RESULT"
          }

          summarize_child_timing() {
@@ -983,7 +1294,7 @@ jobs:
              echo
              echo "### Slowest jobs: ${label}"
              echo
-              gh run view "$run_id" --json jobs --jq '
+              gh_with_retry run view "$run_id" --json jobs --jq '
                def ts: fromdateiso8601;
                "| Job | Result | Minutes |",
                "| --- | --- | ---: |",
@@ -1000,7 +1311,7 @@ jobs:
              echo
              echo "### Longest queues: ${label}"
              echo
-              gh api --paginate "repos/${GITHUB_REPOSITORY}/actions/runs/${run_id}/jobs?per_page=100" --jq ".jobs[] | @json" | jq -sr '
+              gh_with_retry api --paginate "repos/${GITHUB_REPOSITORY}/actions/runs/${run_id}/jobs?per_page=100" --jq ".jobs[] | @json" | jq -sr '
                def ts: fromdateiso8601;
                "| Job | Result | Queue minutes | Run minutes |",
                "| --- | --- | ---: | ---: |",
@@ -1029,7 +1340,7 @@ jobs:
            fi

            local run_json status conclusion artifacts_json
-            run_json="$(gh run view "$run_id" --json status,conclusion,url,jobs)"
+            run_json="$(gh_with_retry run view "$run_id" --json status,conclusion,url,jobs)"
            status="$(jq -r '.status' <<< "$run_json")"
            conclusion="$(jq -r '.conclusion' <<< "$run_json")"
            if [[ "$status" == "completed" && "$conclusion" == "success" ]]; then
@@ -1052,7 +1363,7 @@ jobs:
              echo
              echo "Artifacts:"
              artifacts_json="$(
-                gh api "repos/${GITHUB_REPOSITORY}/actions/runs/${run_id}/artifacts?per_page=100" 2>/dev/null || true
+                gh_with_retry api "repos/${GITHUB_REPOSITORY}/actions/runs/${run_id}/artifacts?per_page=100" 2>/dev/null || true
              )"
              if [[ -n "${artifacts_json// }" ]]; then
                jq -r '
@@ -1128,6 +1439,7 @@ jobs:
          summarize_child_timing "plugin_prerelease" "$PLUGIN_PRERELEASE_RUN_ID"
          summarize_child_timing "release_checks" "$RELEASE_CHECKS_RUN_ID"
          summarize_child_timing "npm_telegram" "$NPM_TELEGRAM_RUN_ID"
+          summarize_child_timing "product_performance" "$PERFORMANCE_RUN_ID"

          if [[ "$failed" != "0" ]]; then
            summarize_failed_child "normal_ci" "$NORMAL_CI_RUN_ID"
@@ -1225,6 +1537,7 @@ jobs:
          PLUGIN_PRERELEASE_RUN_ID: ${{ needs.plugin_prerelease.outputs.run_id }}
          RELEASE_CHECKS_RUN_ID: ${{ needs.release_checks.outputs.run_id }}
          NPM_TELEGRAM_RUN_ID: ${{ needs.npm_telegram.outputs.run_id }}
+          PERFORMANCE_RUN_ID: ${{ needs.performance.outputs.run_id }}
        run: |
          set -euo pipefail
          manifest_dir="${RUNNER_TEMP}/full-release-validation"
@@ -1243,6 +1556,7 @@ jobs:
            --arg pluginPrereleaseRunId "$PLUGIN_PRERELEASE_RUN_ID" \
            --arg releaseChecksRunId "$RELEASE_CHECKS_RUN_ID" \
            --arg npmTelegramRunId "$NPM_TELEGRAM_RUN_ID" \
+            --arg performanceRunId "$PERFORMANCE_RUN_ID" \
            '{
              version: 1,
              workflowName: $workflowName,
@@ -1258,7 +1572,8 @@ jobs:
                normalCi: $normalCiRunId,
                pluginPrerelease: $pluginPrereleaseRunId,
                releaseChecks: $releaseChecksRunId,
-                npmTelegram: $npmTelegramRunId
+                npmTelegram: $npmTelegramRunId,
+                productPerformance: $performanceRunId
              }
            }' > "${manifest_dir}/full-release-validation-manifest.json"

--- a/.github/workflows/install-smoke.yml
+++ b/.github/workflows/install-smoke.yml
@@ -109,6 +109,7 @@ jobs:
        uses: actions/checkout@v6
        with:
          ref: ${{ inputs.ref || github.ref }}
+          persist-credentials: false

      - name: Set up Blacksmith Docker Builder
        uses: useblacksmith/setup-docker-builder@722e97d12b1d06a961800dd6c05d79d951ad3c80 # v1
@@ -120,7 +121,7 @@ jobs:
      # builder stalls; an explicit buildx invocation fails closed instead.
      - name: Build root Dockerfile smoke image
        run: |
-          timeout 45m docker buildx build \
+          timeout --kill-after=30s 45m docker buildx build \
            --progress=plain \
            --load \
            --build-arg OPENCLAW_EXTENSIONS=matrix \
@@ -131,7 +132,7 @@ jobs:

      - name: Run root Dockerfile CLI smoke
        run: |
-          docker run --rm --entrypoint sh openclaw-dockerfile-smoke:local -lc '
+          timeout --kill-after=30s 20m docker run --rm --entrypoint sh openclaw-dockerfile-smoke:local -lc '
            which openclaw &&
            openclaw --version &&
            node -e "
@@ -162,7 +163,7 @@ jobs:

      - name: Smoke test Dockerfile with matrix extension build arg
        run: |
-          docker run --rm --entrypoint sh openclaw-ext-smoke:local -lc '
+          timeout --kill-after=30s 20m docker run --rm --entrypoint sh openclaw-ext-smoke:local -lc '
            which openclaw &&
            openclaw --version &&
            node -e "
@@ -219,6 +220,7 @@ jobs:
        uses: actions/checkout@v6
        with:
          ref: ${{ inputs.ref || github.ref }}
+          persist-credentials: false

      - name: Log in to GHCR
        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
@@ -233,7 +235,7 @@ jobs:
          IMAGE_REF: ${{ needs.preflight.outputs.dockerfile_image }}
        run: |
          set -euo pipefail
-          if timeout 180s docker pull "$IMAGE_REF"; then
+          if timeout --kill-after=30s 180s docker pull "$IMAGE_REF"; then
            echo "exists=true" >> "$GITHUB_OUTPUT"
            echo "Using existing root Dockerfile smoke image: \`$IMAGE_REF\`" >> "$GITHUB_STEP_SUMMARY"
          else
@@ -254,7 +256,7 @@ jobs:
        env:
          IMAGE_REF: ${{ needs.preflight.outputs.dockerfile_image }}
        run: |
-          timeout 45m docker buildx build \
+          timeout --kill-after=30s 45m docker buildx build \
            --progress=plain \
            --push \
            --build-arg OPENCLAW_EXTENSIONS=matrix \
@@ -290,6 +292,7 @@ jobs:
        uses: actions/checkout@v6
        with:
          ref: ${{ inputs.ref || github.ref }}
+          persist-credentials: false

      - name: Run QR package install smoke
        env:
@@ -305,6 +308,7 @@ jobs:
        uses: actions/checkout@v6
        with:
          ref: ${{ inputs.ref || github.ref }}
+          persist-credentials: false

      - name: Log in to GHCR
        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
@@ -316,13 +320,13 @@ jobs:
      - name: Pull root Dockerfile smoke image
        env:
          IMAGE_REF: ${{ needs.root_dockerfile_image.outputs.image_ref }}
-        run: timeout 600s docker pull "$IMAGE_REF"
+        run: timeout --kill-after=30s 600s docker pull "$IMAGE_REF"

      - name: Run root Dockerfile CLI smoke
        env:
          IMAGE_REF: ${{ needs.root_dockerfile_image.outputs.image_ref }}
        run: |
-          docker run --rm --entrypoint sh "$IMAGE_REF" -lc '
+          timeout --kill-after=30s 20m docker run --rm --entrypoint sh "$IMAGE_REF" -lc '
            which openclaw &&
            openclaw --version &&
            node -e "
@@ -355,7 +359,7 @@ jobs:
        env:
          IMAGE_REF: ${{ needs.root_dockerfile_image.outputs.image_ref }}
        run: |
-          docker run --rm --entrypoint sh "$IMAGE_REF" -lc '
+          timeout --kill-after=30s 20m docker run --rm --entrypoint sh "$IMAGE_REF" -lc '
            which openclaw &&
            openclaw --version &&
            node -e "
@@ -410,6 +414,7 @@ jobs:
        uses: actions/checkout@v6
        with:
          ref: ${{ inputs.ref || github.ref }}
+          persist-credentials: false

      - name: Log in to GHCR
        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
@@ -421,7 +426,7 @@ jobs:
      - name: Pull root Dockerfile smoke image
        env:
          IMAGE_REF: ${{ needs.root_dockerfile_image.outputs.image_ref }}
-        run: timeout 600s docker pull "$IMAGE_REF"
+        run: timeout --kill-after=30s 600s docker pull "$IMAGE_REF"

      - name: Set up Blacksmith Docker Builder
        uses: useblacksmith/setup-docker-builder@722e97d12b1d06a961800dd6c05d79d951ad3c80 # v1
@@ -430,7 +435,7 @@ jobs:

      - name: Build installer smoke image
        run: |
-          timeout 20m docker buildx build \
+          timeout --kill-after=30s 20m docker buildx build \
            --progress=plain \
            --load \
            -t openclaw-install-smoke:local \
@@ -439,7 +444,7 @@ jobs:

      - name: Build installer non-root image
        run: |
-          timeout 20m docker buildx build \
+          timeout --kill-after=30s 20m docker buildx build \
            --progress=plain \
            --load \
            -t openclaw-install-nonroot:local \
@@ -454,10 +459,10 @@ jobs:

      - name: Run installer docker tests
        env:
-          OPENCLAW_INSTALL_URL: https://openclaw.ai/install.sh
-          OPENCLAW_INSTALL_CLI_URL: https://openclaw.ai/install-cli.sh
+          OPENCLAW_INSTALL_URL: file:///tmp/openclaw-install.sh
+          OPENCLAW_INSTALL_CLI_URL: file:///tmp/openclaw-install-cli.sh
          OPENCLAW_NO_ONBOARD: "1"
-          OPENCLAW_INSTALL_SMOKE_SKIP_CLI: "1"
+          OPENCLAW_INSTALL_SMOKE_SKIP_CLI: "0"
          OPENCLAW_INSTALL_SMOKE_SKIP_IMAGE_BUILD: "1"
          OPENCLAW_INSTALL_NONROOT_SKIP_IMAGE_BUILD: "1"
          OPENCLAW_INSTALL_SMOKE_SKIP_NONROOT: "0"
@@ -468,6 +473,24 @@ jobs:
          OPENCLAW_INSTALL_SMOKE_UPDATE_SKIP_LOCAL_BUILD: "1"
        run: bash scripts/test-install-sh-docker.sh

+      - name: Run Rocky Linux installer smoke
+        run: |
+          timeout --kill-after=30s 20m docker run --rm \
+            -e OPENCLAW_NO_ONBOARD=1 \
+            -e OPENCLAW_NO_PROMPT=1 \
+            -v "$PWD/scripts/install.sh:/tmp/install.sh:ro" \
+            rockylinux:9@sha256:d7be1c094cc5845ee815d4632fe377514ee6ebcf8efaed6892889657e5ddaaa6 \
+            bash -lc 'dnf install -y -q ca-certificates tar gzip xz findutils which sudo >/dev/null && bash /tmp/install.sh --install-method npm --version latest --no-onboard --no-prompt --verify && openclaw --version'
+
+      - name: Run Rocky Linux CLI installer smoke
+        run: |
+          timeout --kill-after=30s 20m docker run --rm \
+            -e OPENCLAW_NO_ONBOARD=1 \
+            -e OPENCLAW_NO_PROMPT=1 \
+            -v "$PWD/scripts/install-cli.sh:/tmp/install-cli.sh:ro" \
+            rockylinux:9@sha256:d7be1c094cc5845ee815d4632fe377514ee6ebcf8efaed6892889657e5ddaaa6 \
+            bash -lc 'dnf install -y -q ca-certificates tar gzip xz findutils which sudo >/dev/null && bash /tmp/install-cli.sh --prefix /tmp/openclaw-cli --version latest --no-onboard && /tmp/openclaw-cli/bin/openclaw --version'
+
  bun_global_install_smoke:
    needs: [preflight, root_dockerfile_image]
    if: needs.preflight.outputs.run_full_install_smoke == 'true' && needs.preflight.outputs.run_bun_global_install_smoke == 'true'
@@ -477,6 +500,7 @@ jobs:
        uses: actions/checkout@v6
        with:
          ref: ${{ inputs.ref || github.ref }}
+          persist-credentials: false

      - name: Log in to GHCR
        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
@@ -488,7 +512,7 @@ jobs:
      - name: Pull root Dockerfile smoke image
        env:
          IMAGE_REF: ${{ needs.root_dockerfile_image.outputs.image_ref }}
-        run: timeout 600s docker pull "$IMAGE_REF"
+        run: timeout --kill-after=30s 600s docker pull "$IMAGE_REF"

      - name: Setup Node environment for Bun smoke
        uses: ./.github/actions/setup-node-env
@@ -515,6 +539,7 @@ jobs:
        uses: actions/checkout@v6
        with:
          ref: ${{ inputs.ref || github.ref }}
+          persist-credentials: false

      - name: Set up Blacksmith Docker Builder
        uses: useblacksmith/setup-docker-builder@722e97d12b1d06a961800dd6c05d79d951ad3c80 # v1
--- a/.github/workflows/mantis-slack-desktop-smoke.yml
+++ b/.github/workflows/mantis-slack-desktop-smoke.yml
@@ -17,6 +17,11 @@ on:
        required: true
        default: slack-canary
        type: string
+      approval_checkpoints:
+        description: Run native Slack approval checkpoint mode instead of gateway setup
+        required: false
+        default: false
+        type: boolean
      keep_vm:
        description: Keep the desktop lease open after a passing run
        required: false
@@ -30,6 +35,14 @@ on:
        options:
          - aws
          - hetzner
+      crabbox_market:
+        description: Crabbox capacity market for AWS leases
+        required: false
+        default: on-demand
+        type: choice
+        options:
+          - on-demand
+          - spot
      crabbox_lease_id:
        description: Optional existing Crabbox desktop/browser lease id or slug to reuse
        required: false
@@ -227,9 +240,11 @@ jobs:
          CRABBOX_ACCESS_CLIENT_SECRET: ${{ secrets.CRABBOX_ACCESS_CLIENT_SECRET }}
          CRABBOX_LEASE_ID: ${{ inputs.crabbox_lease_id }}
          CRABBOX_PROVIDER: ${{ inputs.crabbox_provider }}
+          CRABBOX_MARKET: ${{ inputs.crabbox_market }}
          KEEP_VM: ${{ inputs.keep_vm }}
          HYDRATE_MODE: ${{ inputs.hydrate_mode }}
          SCENARIO_ID: ${{ inputs.scenario_id }}
+          APPROVAL_CHECKPOINTS: ${{ inputs.approval_checkpoints }}
        shell: bash
        run: |
          set -euo pipefail
@@ -250,6 +265,15 @@ jobs:
          require_var OPENCLAW_QA_CONVEX_SITE_URL
          require_var OPENCLAW_QA_CONVEX_SECRET_CI
          require_var CRABBOX_COORDINATOR_TOKEN
+          if [[ -z "${CRABBOX_LEASE_ID:-}" && "$CRABBOX_PROVIDER" == "aws" ]]; then
+            runner_ip="$(curl -fsS https://checkip.amazonaws.com | tr -d '[:space:]')"
+            if [[ -z "$runner_ip" ]]; then
+              echo "Could not resolve GitHub runner public IPv4 for AWS SSH ingress." >&2
+              exit 1
+            fi
+            export CRABBOX_AWS_SSH_CIDRS="${runner_ip}/32"
+            echo "Using AWS SSH CIDR ${CRABBOX_AWS_SSH_CIDRS}"
+          fi

          candidate_repo="$(pwd)/.artifacts/qa-e2e/mantis/slack-desktop-smoke-worktrees/candidate"
          output_rel=".artifacts/qa-e2e/mantis/slack-desktop-smoke"
@@ -265,6 +289,22 @@ jobs:
          else
            keep_args=(--no-keep-lease)
          fi
+          market_args=()
+          if [[ -n "${CRABBOX_MARKET:-}" ]]; then
+            market_args=(--market "$CRABBOX_MARKET")
+          fi
+          gateway_args=(--gateway-setup)
+          approval_args=()
+          scenario_args=(--scenario "$SCENARIO_ID")
+          scenario_label="$SCENARIO_ID"
+          if [[ "$APPROVAL_CHECKPOINTS" == "true" ]]; then
+            approval_args=(--approval-checkpoints)
+            gateway_args=()
+            if [[ -z "${SCENARIO_ID:-}" || "$SCENARIO_ID" == "slack-canary" || "$SCENARIO_ID" == "approval-checkpoints" ]]; then
+              scenario_args=()
+              scenario_label="approval-checkpoints"
+            fi
+          fi

          set +e
          pnpm openclaw qa mantis slack-desktop-smoke \
@@ -274,7 +314,7 @@ jobs:
            --class standard \
            --idle-timeout 45m \
            --ttl 120m \
-            --gateway-setup \
+            "${gateway_args[@]}" \
            --credential-source convex \
            --credential-role ci \
            --provider-mode live-frontier \
@@ -282,7 +322,9 @@ jobs:
            --model openai/gpt-5.5 \
            --alt-model openai/gpt-5.5 \
            --fast \
-            --scenario "$SCENARIO_ID" \
+            "${scenario_args[@]}" \
+            "${approval_args[@]}" \
+            "${market_args[@]}" \
            "${keep_args[@]}" \
            "${lease_args[@]}"
          mantis_exit=$?
@@ -312,27 +354,81 @@ jobs:

          status="$(jq -r '.status' "$root/mantis-slack-desktop-smoke-summary.json")"
          screenshot_required=false
+          desktop_capture_inline=true
          if [[ "$status" == "pass" ]]; then
            screenshot_required=true
          fi
+          evidence_summary="Mantis ran Slack QA inside a Crabbox Linux VNC desktop, started an OpenClaw Slack gateway in that VM, opened Slack Web in the visible browser, and captured screenshot/video evidence."
+          expected_result="Slack QA and VM gateway setup pass"
+          checkpoint_artifacts='[]'
+          checkpoint_required=false
+          if [[ "$APPROVAL_CHECKPOINTS" == "true" ]]; then
+            evidence_summary="Mantis ran Slack native approval QA inside a Crabbox Linux VNC desktop, rendered pending/resolved approval checkpoints from the Slack API messages, and stored Slack QA artifacts."
+            expected_result="Slack native exec and plugin approval checkpoints pass"
+            screenshot_required=false
+            desktop_capture_inline=false
+            if [[ "$status" == "pass" ]]; then
+              checkpoint_required=true
+            fi
+            checkpoint_scenarios=()
+            if [[ "$scenario_label" == "approval-checkpoints" ]]; then
+              checkpoint_scenarios=("slack-approval-exec-native" "slack-approval-plugin-native")
+            else
+              checkpoint_scenarios=("$scenario_label")
+            fi
+            checkpoint_scenarios_json="$(printf '%s\n' "${checkpoint_scenarios[@]}" | jq -R . | jq -s .)"
+            checkpoint_artifacts="$(
+              jq -n \
+                --argjson checkpoint_required "$checkpoint_required" \
+                --argjson scenario_ids "$checkpoint_scenarios_json" \
+                '
+                  def scenario_kind($id):
+                    if $id == "slack-approval-exec-native" then "exec"
+                    elif $id == "slack-approval-plugin-native" then "plugin"
+                    else error("unsupported approval checkpoint scenario: \($id)")
+                    end;
+                  def scenario_title($id):
+                    if scenario_kind($id) == "exec" then "Exec" else "Plugin" end;
+                  [
+                    $scenario_ids[] as $id
+                    | ["pending", "resolved"][] as $state
+                    | {
+                        kind: "desktopScreenshot",
+                        lane: "candidate",
+                        label: "\(scenario_title($id)) approval \($state) checkpoint",
+                        path: "approval-checkpoints/\($id)-\($state).png",
+                        targetPath: "approval-checkpoints/\($id)-\($state).png",
+                        alt: "Rendered Slack \(scenario_kind($id)) approval \($state) checkpoint",
+                        width: 720,
+                        inline: true,
+                        required: $checkpoint_required
+                      }
+                  ]
+                '
+            )"
+          fi
          jq -n \
            --arg status "$status" \
            --arg candidate_sha "${{ needs.validate_ref.outputs.candidate_revision }}" \
-            --arg scenario "$SCENARIO_ID" \
+            --arg scenario "$scenario_label" \
+            --arg summary "$evidence_summary" \
+            --arg expected "$expected_result" \
+            --argjson checkpoint_artifacts "$checkpoint_artifacts" \
            --argjson screenshot_required "$screenshot_required" \
+            --argjson desktop_capture_inline "$desktop_capture_inline" \
            '{
              schemaVersion: 1,
              id: "slack-desktop-smoke",
              title: "Mantis Slack Desktop Smoke QA",
-              summary: "Mantis ran Slack QA inside a Crabbox Linux VNC desktop, started an OpenClaw Slack gateway in that VM, opened Slack Web in the visible browser, and captured screenshot/video evidence.",
+              summary: $summary,
              scenario: $scenario,
              comparison: {
-                candidate: { sha: $candidate_sha, expected: "Slack QA and VM gateway setup pass", status: $status, fixed: ($status == "pass") },
+                candidate: { sha: $candidate_sha, expected: $expected, status: $status, fixed: ($status == "pass") },
                pass: ($status == "pass")
              },
-              artifacts: [
-                { kind: "desktopScreenshot", lane: "candidate", label: "Slack desktop/VNC browser", path: "slack-desktop-smoke.png", targetPath: "slack-desktop.png", alt: "Slack Web desktop screenshot from the Mantis VM", width: 720, inline: true, required: $screenshot_required },
-                { kind: "motionPreview", lane: "candidate", label: "Slack motion preview", path: "slack-desktop-smoke-preview.gif", targetPath: "slack-desktop-preview.gif", alt: "Animated Slack desktop preview", width: 720, inline: true, required: false },
+              artifacts: ([
+                { kind: "desktopScreenshot", lane: "candidate", label: "Slack desktop/VNC browser", path: "slack-desktop-smoke.png", targetPath: "slack-desktop.png", alt: "Slack Web desktop screenshot from the Mantis VM", width: 720, inline: $desktop_capture_inline, required: $screenshot_required },
+                { kind: "motionPreview", lane: "candidate", label: "Slack motion preview", path: "slack-desktop-smoke-preview.gif", targetPath: "slack-desktop-preview.gif", alt: "Animated Slack desktop preview", width: 720, inline: $desktop_capture_inline, required: false },
                { kind: "motionClip", lane: "candidate", label: "Slack change MP4", path: "slack-desktop-smoke-change.mp4", targetPath: "slack-desktop-change.mp4", required: false },
                { kind: "fullVideo", lane: "candidate", label: "Slack desktop MP4", path: "slack-desktop-smoke.mp4", targetPath: "slack-desktop.mp4", required: false },
                { kind: "metadata", lane: "run", label: "Slack desktop summary", path: "mantis-slack-desktop-smoke-summary.json", targetPath: "summary.json" },
@@ -340,7 +436,7 @@ jobs:
                { kind: "metadata", lane: "run", label: "Slack command log", path: "slack-desktop-command.log", targetPath: "slack-desktop-command.log", required: false },
                { kind: "metadata", lane: "run", label: "Slack preview metadata", path: "slack-desktop-smoke-preview.json", targetPath: "slack-desktop-preview.json", required: false },
                { kind: "metadata", lane: "run", label: "Slack error", path: "error.txt", targetPath: "error.txt", required: false }
-              ]
+              ] + $checkpoint_artifacts)
            }' > "$root/mantis-evidence.json"

          cat "$root/mantis-slack-desktop-smoke-report.md" >> "$GITHUB_STEP_SUMMARY"
--- a/.github/workflows/mantis-telegram-desktop-proof.yml
+++ b/.github/workflows/mantis-telegram-desktop-proof.yml
@@ -48,6 +48,7 @@ env:
  OPENCLAW_BUILD_PRIVATE_QA: "1"
  OPENCLAW_ENABLE_PRIVATE_QA_CLI: "1"
  CRABBOX_REF: main
+  CRABBOX_CAPACITY_REGIONS: eu-west-1,eu-west-2,eu-central-1,us-east-1,us-west-2
  MANTIS_OUTPUT_DIR: .artifacts/qa-e2e/mantis/telegram-desktop-proof

 jobs:
@@ -422,7 +423,7 @@ jobs:
          {
            printf '%s\n' 'Defaults env_keep += "CODEX_HOME CODEX_INTERNAL_ORIGINATOR_OVERRIDE"'
            printf '%s\n' 'Defaults env_keep += "BASELINE_REF BASELINE_SHA CANDIDATE_REF CANDIDATE_SHA"'
-            printf '%s\n' 'Defaults env_keep += "CRABBOX_ACCESS_CLIENT_ID CRABBOX_ACCESS_CLIENT_SECRET CRABBOX_COORDINATOR CRABBOX_COORDINATOR_TOKEN CRABBOX_LEASE_ID CRABBOX_PROVIDER"'
+            printf '%s\n' 'Defaults env_keep += "CRABBOX_ACCESS_CLIENT_ID CRABBOX_ACCESS_CLIENT_SECRET CRABBOX_COORDINATOR CRABBOX_COORDINATOR_TOKEN CRABBOX_LEASE_ID CRABBOX_PROVIDER CRABBOX_CAPACITY_REGIONS"'
            printf '%s\n' 'Defaults env_keep += "GH_TOKEN MANTIS_CANDIDATE_TRUST MANTIS_INSTRUCTIONS MANTIS_OUTPUT_DIR MANTIS_PR_NUMBER"'
            printf '%s\n' 'Defaults env_keep += "OPENCLAW_BUILD_PRIVATE_QA OPENCLAW_ENABLE_PRIVATE_QA_CLI OPENCLAW_QA_CONVEX_SECRET_CI OPENCLAW_QA_CONVEX_SITE_URL OPENCLAW_QA_CREDENTIAL_OWNER_ID OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN"'
            printf '%s\n' 'Defaults env_keep += "OPENCLAW_TELEGRAM_USER_CRABBOX_BIN OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT OPENCLAW_TELEGRAM_USER_PROOF_CMD"'
@@ -451,6 +452,7 @@ jobs:
          CRABBOX_ACCESS_CLIENT_SECRET: ${{ secrets.CRABBOX_ACCESS_CLIENT_SECRET }}
          CRABBOX_COORDINATOR: ${{ secrets.CRABBOX_COORDINATOR || secrets.OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR }}
          CRABBOX_COORDINATOR_TOKEN: ${{ secrets.CRABBOX_COORDINATOR_TOKEN || secrets.OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN }}
+          CRABBOX_CAPACITY_REGIONS: ${{ env.CRABBOX_CAPACITY_REGIONS }}
          CRABBOX_LEASE_ID: ${{ needs.resolve_request.outputs.lease_id }}
          CRABBOX_PROVIDER: ${{ needs.resolve_request.outputs.crabbox_provider }}
          GH_TOKEN: ${{ github.token }}
@@ -492,8 +494,11 @@ jobs:
            exit 0
          fi
          status=0
-          mapfile -d '' session_files < <(sudo find .artifacts/qa-e2e -path '*/telegram-user-crabbox/*/session.json' -type f -print0)
+          mapfile -d '' session_files < <(sudo find .artifacts/qa-e2e -name session.json -type f -print0)
          for session_file in "${session_files[@]}"; do
+            if ! sudo -u codex node -e 'const fs = require("fs"); const session = JSON.parse(fs.readFileSync(process.argv[1], "utf8")); process.exit(session.command === "telegram-user-crabbox-session" ? 0 : 1);' "$session_file"; then
+              continue
+            fi
            lease_file="${session_file%/session.json}/.session/lease.json"
            if [[ ! -f "$lease_file" ]]; then
              continue
@@ -508,8 +513,11 @@ jobs:
              status=1
            fi
          done
-          mapfile -d '' lease_files < <(sudo find .artifacts/qa-e2e -path '*/telegram-user-crabbox/*/.session/lease.json' -type f -print0)
+          mapfile -d '' lease_files < <(sudo find .artifacts/qa-e2e -path '*/.session/lease.json' -type f -print0)
          for lease_file in "${lease_files[@]}"; do
+            if ! sudo -u codex node -e 'const fs = require("fs"); const lease = JSON.parse(fs.readFileSync(process.argv[1], "utf8")); process.exit(lease.kind === "telegram-user" ? 0 : 1);' "$lease_file"; then
+              continue
+            fi
            if ! sudo -u codex env \
              OPENCLAW_QA_CONVEX_SECRET_CI="$OPENCLAW_QA_CONVEX_SECRET_CI" \
              OPENCLAW_QA_CONVEX_SITE_URL="$OPENCLAW_QA_CONVEX_SITE_URL" \
--- a/.github/workflows/openclaw-cross-os-release-checks-reusable.yml
+++ b/.github/workflows/openclaw-cross-os-release-checks-reusable.yml
@@ -338,7 +338,7 @@ jobs:
          ref: ${{ steps.workflow_ref.outputs.value }}
          path: workflow
          fetch-depth: 1
-          persist-credentials: false
+          persist-credentials: true

      - name: Checkout public source ref
        if: inputs.candidate_artifact_name == ''
@@ -348,7 +348,7 @@ jobs:
          ref: ${{ inputs.ref }}
          path: source
          fetch-depth: 0
-          persist-credentials: false
+          persist-credentials: true
          submodules: recursive

      - name: Setup Node.js
@@ -537,7 +537,7 @@ jobs:
          ref: ${{ needs.prepare.outputs.workflow_ref }}
          path: workflow
          fetch-depth: 1
-          persist-credentials: false
+          persist-credentials: true

      - name: Setup Node.js
        uses: actions/setup-node@v6
@@ -553,6 +553,15 @@ jobs:
          use-actions-cache: "false"

      - name: Download candidate artifact
+        id: download_candidate
+        continue-on-error: true
+        uses: actions/download-artifact@v8
+        with:
+          name: openclaw-cross-os-release-checks-candidate-${{ github.run_id }}
+          path: ${{ runner.temp }}/openclaw-cross-os-release-checks/candidate
+
+      - name: Retry candidate artifact download
+        if: ${{ steps.download_candidate.outcome == 'failure' }}
        uses: actions/download-artifact@v8
        with:
          name: openclaw-cross-os-release-checks-candidate-${{ github.run_id }}
@@ -560,11 +569,38 @@ jobs:

      - name: Download baseline artifact
        if: ${{ matrix.suite == 'packaged-upgrade' }}
+        id: download_baseline
+        continue-on-error: true
        uses: actions/download-artifact@v8
        with:
          name: openclaw-cross-os-release-checks-baseline-${{ github.run_id }}
          path: ${{ runner.temp }}/openclaw-cross-os-release-checks/baseline

+      - name: Retry baseline artifact download
+        if: ${{ matrix.suite == 'packaged-upgrade' && steps.download_baseline.outcome == 'failure' }}
+        uses: actions/download-artifact@v8
+        with:
+          name: openclaw-cross-os-release-checks-baseline-${{ github.run_id }}
+          path: ${{ runner.temp }}/openclaw-cross-os-release-checks/baseline
+
+      - name: Verify release-check inputs
+        shell: bash
+        env:
+          CANDIDATE_TGZ: ${{ runner.temp }}/openclaw-cross-os-release-checks/candidate/${{ needs.prepare.outputs.candidate_file_name }}
+          BASELINE_TGZ: ${{ runner.temp }}/openclaw-cross-os-release-checks/baseline/${{ needs.prepare.outputs.baseline_file_name }}
+          OUTPUT_DIR: ${{ runner.temp }}/openclaw-cross-os-release-checks/${{ matrix.artifact_name }}-${{ matrix.suite }}
+          SUITE: ${{ matrix.suite }}
+        run: |
+          mkdir -p "${OUTPUT_DIR}"
+          if [[ ! -f "${CANDIDATE_TGZ}" ]]; then
+            echo "::error::candidate artifact missing: ${CANDIDATE_TGZ}"
+            exit 1
+          fi
+          if [[ "${SUITE}" == "packaged-upgrade" ]] && [[ ! -f "${BASELINE_TGZ}" ]]; then
+            echo "::error::baseline artifact missing: ${BASELINE_TGZ}"
+            exit 1
+          fi
+
      - name: Run cross-OS release checks
        shell: bash
        env:
@@ -615,7 +651,8 @@ jobs:
          if [[ -f "${SUMMARY_PATH}" ]]; then
            cat "${SUMMARY_PATH}" >> "$GITHUB_STEP_SUMMARY"
          else
-            echo "No summary generated." >> "$GITHUB_STEP_SUMMARY"
+            mkdir -p "$(dirname "${SUMMARY_PATH}")"
+            echo "No summary generated." | tee "${SUMMARY_PATH}" >> "$GITHUB_STEP_SUMMARY"
          fi

      - name: Upload release-check artifacts
--- a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml
+++ b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml
@@ -102,6 +102,11 @@ on:
          - beta
          - stable
          - full
+      use_github_hosted_runners:
+        description: Use GitHub-hosted runners instead of Blacksmith runners
+        required: false
+        default: false
+        type: boolean
      advisory:
        description: Treat failures as advisory for the caller
        required: false
@@ -208,6 +213,11 @@ on:
        required: false
        default: stable
        type: string
+      use_github_hosted_runners:
+        description: Use GitHub-hosted runners instead of Blacksmith runners
+        required: false
+        default: true
+        type: boolean
    secrets:
      OPENAI_API_KEY:
        required: false
@@ -219,6 +229,8 @@ on:
        required: false
      ANTHROPIC_API_TOKEN:
        required: false
+      FACTORY_API_KEY:
+        required: false
      BYTEPLUS_API_KEY:
        required: false
      CEREBRAS_API_KEY:
@@ -472,7 +484,7 @@ jobs:
    needs: validate_selected_ref
    if: inputs.include_live_suites && !inputs.live_models_only && (inputs.live_suite_filter == '' || inputs.live_suite_filter == 'live-cache')
    continue-on-error: ${{ inputs.advisory }}
-    runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-8vcpu-ubuntu-2404' }}
+    runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-8vcpu-ubuntu-2404' }}
    timeout-minutes: 20
    env:
      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -522,7 +534,7 @@ jobs:
    needs: validate_selected_ref
    if: inputs.include_repo_e2e && inputs.live_suite_filter == ''
    continue-on-error: ${{ inputs.advisory }}
-    runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-8vcpu-ubuntu-2404' }}
+    runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-8vcpu-ubuntu-2404' }}
    timeout-minutes: ${{ inputs.release_test_profile == 'full' && 90 || 60 }}
    env:
      OPENCLAW_VITEST_MAX_WORKERS: "2"
@@ -554,7 +566,7 @@ jobs:
    needs: validate_selected_ref
    if: inputs.include_repo_e2e && (inputs.live_suite_filter == '' || inputs.live_suite_filter == 'openshell-e2e')
    continue-on-error: ${{ inputs.advisory }}
-    runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-8vcpu-ubuntu-2404' }}
+    runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-8vcpu-ubuntu-2404' }}
    timeout-minutes: ${{ matrix.timeout_minutes }}
    strategy:
      fail-fast: false
@@ -628,7 +640,7 @@ jobs:
    if: inputs.include_release_path_suites && inputs.docker_lanes == ''
    name: Docker E2E (${{ matrix.label }})
    continue-on-error: ${{ inputs.advisory }}
-    runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }}
+    runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }}
    timeout-minutes: ${{ matrix.timeout_minutes }}
    strategy:
      fail-fast: false
@@ -696,6 +708,7 @@ jobs:
      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
      ANTHROPIC_API_TOKEN: ${{ secrets.ANTHROPIC_API_TOKEN }}
      ANTHROPIC_API_KEY_OLD: ${{ secrets.ANTHROPIC_API_KEY_OLD }}
+      FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }}
      BYTEPLUS_API_KEY: ${{ secrets.BYTEPLUS_API_KEY }}
      CEREBRAS_API_KEY: ${{ secrets.CEREBRAS_API_KEY }}
      DEEPINFRA_API_KEY: ${{ secrets.DEEPINFRA_API_KEY }}
@@ -756,6 +769,7 @@ jobs:
        if: contains(matrix.profiles, inputs.release_test_profile)
        uses: actions/checkout@v6
        with:
+          persist-credentials: false
          ref: ${{ needs.validate_selected_ref.outputs.selected_sha }}
          fetch-depth: 1

@@ -763,17 +777,17 @@ jobs:
        if: contains(matrix.profiles, inputs.release_test_profile)
        uses: actions/checkout@v6
        with:
+          persist-credentials: false
          ref: ${{ github.sha }}
          fetch-depth: 1
          path: .release-harness

      - name: Log in to GHCR for shared Docker E2E image
        if: contains(matrix.profiles, inputs.release_test_profile)
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ github.token }}
+        run: bash .release-harness/scripts/ci-docker-login-ghcr.sh
+        env:
+          GHCR_USERNAME: ${{ github.actor }}
+          GITHUB_TOKEN: ${{ github.token }}

      - name: Setup Node environment
        if: contains(matrix.profiles, inputs.release_test_profile)
@@ -841,15 +855,35 @@ jobs:
        run: |
          set -euo pipefail
          credentials=",$CREDENTIALS,"
-          if [[ "$credentials" == *",openai,"* ]]; then
-            [[ -n "${OPENAI_API_KEY:-}" ]] || {
-              echo "OPENAI_API_KEY is required for selected Docker E2E lanes." >&2
-              exit 1
-            }
-          fi
-          if [[ "$credentials" == *",anthropic,"* && -z "${ANTHROPIC_API_TOKEN:-}" && -z "${ANTHROPIC_API_KEY:-}" ]]; then
-            echo "ANTHROPIC_API_TOKEN or ANTHROPIC_API_KEY is required for selected Docker E2E lanes." >&2
+          require_any() {
+            local label="$1"
+            shift
+            local key
+            for key in "$@"; do
+              if [[ -n "${!key:-}" ]]; then
+                return 0
+              fi
+            done
+            echo "Missing credential for ${label}: expected one of $*" >&2
            exit 1
+          }
+          if [[ "$credentials" == *",openai,"* ]]; then
+            require_any OpenAI OPENAI_API_KEY
+          fi
+          if [[ "$credentials" == *",codex,"* ]]; then
+            require_any Codex OPENCLAW_CODEX_AUTH_JSON
+          fi
+          if [[ "$credentials" == *",anthropic,"* ]]; then
+            require_any Anthropic ANTHROPIC_API_TOKEN ANTHROPIC_API_KEY OPENCLAW_CLAUDE_CREDENTIALS_JSON OPENCLAW_CLAUDE_JSON
+          fi
+          if [[ "$credentials" == *",factory,"* ]]; then
+            require_any Factory FACTORY_API_KEY
+          fi
+          if [[ "$credentials" == *",gemini,"* ]]; then
+            require_any Gemini GEMINI_API_KEY GOOGLE_API_KEY OPENCLAW_GEMINI_SETTINGS_JSON
+          fi
+          if [[ "$credentials" == *",opencode,"* ]]; then
+            require_any OpenCode OPENCODE_API_KEY OPENCODE_ZEN_API_KEY
          fi

      - name: Run Docker E2E chunk
@@ -897,7 +931,7 @@ jobs:
    needs: validate_selected_ref
    if: inputs.docker_lanes != ''
    continue-on-error: ${{ inputs.advisory }}
-    runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-4vcpu-ubuntu-2404' }}
+    runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-4vcpu-ubuntu-2404' }}
    timeout-minutes: 5
    outputs:
      groups_json: ${{ steps.groups.outputs.groups_json }}
@@ -905,6 +939,7 @@ jobs:
      - name: Checkout trusted release harness
        uses: actions/checkout@v6
        with:
+          persist-credentials: false
          ref: ${{ github.sha }}
          fetch-depth: 1

@@ -925,7 +960,7 @@ jobs:
    if: inputs.docker_lanes != ''
    name: Docker E2E targeted lanes (${{ matrix.group.label }})
    continue-on-error: ${{ inputs.advisory }}
-    runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }}
+    runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }}
    timeout-minutes: 60
    strategy:
      fail-fast: false
@@ -937,6 +972,7 @@ jobs:
      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
      ANTHROPIC_API_TOKEN: ${{ secrets.ANTHROPIC_API_TOKEN }}
      ANTHROPIC_API_KEY_OLD: ${{ secrets.ANTHROPIC_API_KEY_OLD }}
+      FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }}
      BYTEPLUS_API_KEY: ${{ secrets.BYTEPLUS_API_KEY }}
      CEREBRAS_API_KEY: ${{ secrets.CEREBRAS_API_KEY }}
      DEEPINFRA_API_KEY: ${{ secrets.DEEPINFRA_API_KEY }}
@@ -995,22 +1031,23 @@ jobs:
      - name: Checkout selected ref
        uses: actions/checkout@v6
        with:
+          persist-credentials: false
          ref: ${{ needs.validate_selected_ref.outputs.selected_sha }}
          fetch-depth: 1

      - name: Checkout trusted release harness
        uses: actions/checkout@v6
        with:
+          persist-credentials: false
          ref: ${{ github.sha }}
          fetch-depth: 1
          path: .release-harness

      - name: Log in to GHCR for shared Docker E2E image
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ github.token }}
+        run: bash .release-harness/scripts/ci-docker-login-ghcr.sh
+        env:
+          GHCR_USERNAME: ${{ github.actor }}
+          GITHUB_TOKEN: ${{ github.token }}

      - name: Setup Node environment
        uses: ./.github/actions/setup-node-env
@@ -1078,15 +1115,35 @@ jobs:
        run: |
          set -euo pipefail
          credentials=",$CREDENTIALS,"
-          if [[ "$credentials" == *",openai,"* ]]; then
-            [[ -n "${OPENAI_API_KEY:-}" ]] || {
-              echo "OPENAI_API_KEY is required for selected Docker E2E lanes." >&2
-              exit 1
-            }
-          fi
-          if [[ "$credentials" == *",anthropic,"* && -z "${ANTHROPIC_API_TOKEN:-}" && -z "${ANTHROPIC_API_KEY:-}" ]]; then
-            echo "ANTHROPIC_API_TOKEN or ANTHROPIC_API_KEY is required for selected Docker E2E lanes." >&2
+          require_any() {
+            local label="$1"
+            shift
+            local key
+            for key in "$@"; do
+              if [[ -n "${!key:-}" ]]; then
+                return 0
+              fi
+            done
+            echo "Missing credential for ${label}: expected one of $*" >&2
            exit 1
+          }
+          if [[ "$credentials" == *",openai,"* ]]; then
+            require_any OpenAI OPENAI_API_KEY
+          fi
+          if [[ "$credentials" == *",codex,"* ]]; then
+            require_any Codex OPENCLAW_CODEX_AUTH_JSON
+          fi
+          if [[ "$credentials" == *",anthropic,"* ]]; then
+            require_any Anthropic ANTHROPIC_API_TOKEN ANTHROPIC_API_KEY OPENCLAW_CLAUDE_CREDENTIALS_JSON OPENCLAW_CLAUDE_JSON
+          fi
+          if [[ "$credentials" == *",factory,"* ]]; then
+            require_any Factory FACTORY_API_KEY
+          fi
+          if [[ "$credentials" == *",gemini,"* ]]; then
+            require_any Gemini GEMINI_API_KEY GOOGLE_API_KEY OPENCLAW_GEMINI_SETTINGS_JSON
+          fi
+          if [[ "$credentials" == *",opencode,"* ]]; then
+            require_any OpenCode OPENCODE_API_KEY OPENCODE_ZEN_API_KEY
          fi

      - name: Run targeted Docker E2E lanes
@@ -1135,7 +1192,7 @@ jobs:
    if: inputs.include_openwebui && !inputs.include_release_path_suites && inputs.docker_lanes == ''
    name: Docker E2E (openwebui)
    continue-on-error: ${{ inputs.advisory }}
-    runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }}
+    runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }}
    timeout-minutes: 60
    env:
      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -1162,11 +1219,10 @@ jobs:
          path: .release-harness

      - name: Log in to GHCR for shared Docker E2E image
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ github.token }}
+        run: bash .release-harness/scripts/ci-docker-login-ghcr.sh
+        env:
+          GHCR_USERNAME: ${{ github.actor }}
+          GITHUB_TOKEN: ${{ github.token }}

      - name: Setup Node environment
        uses: ./.github/actions/setup-node-env
@@ -1262,7 +1318,7 @@ jobs:
    needs: validate_selected_ref
    if: inputs.include_release_path_suites || inputs.include_openwebui || inputs.docker_lanes != ''
    continue-on-error: ${{ inputs.advisory }}
-    runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }}
+    runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }}
    timeout-minutes: ${{ inputs.release_test_profile == 'full' && 90 || 60 }}
    permissions:
      actions: read
@@ -1421,11 +1477,10 @@ jobs:

      - name: Log in to GHCR
        if: steps.plan.outputs.needs_e2e_image == '1'
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ github.token }}
+        run: bash .release-harness/scripts/ci-docker-login-ghcr.sh
+        env:
+          GHCR_USERNAME: ${{ github.actor }}
+          GITHUB_TOKEN: ${{ github.token }}

      - name: Check existing shared Docker E2E images
        id: image_exists
@@ -1506,7 +1561,7 @@ jobs:
    needs: validate_selected_ref
    if: inputs.include_live_suites && (inputs.live_suite_filter == '' || startsWith(inputs.live_suite_filter, 'live-') || startsWith(inputs.live_suite_filter, 'docker-live-models'))
    continue-on-error: ${{ inputs.advisory }}
-    runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }}
+    runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }}
    timeout-minutes: 60
    permissions:
      contents: read
@@ -1536,11 +1591,10 @@ jobs:
          echo "Shared live-test image: \`${live_image}\`" >> "$GITHUB_STEP_SUMMARY"

      - name: Log in to GHCR
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ github.token }}
+        run: bash scripts/ci-docker-login-ghcr.sh
+        env:
+          GHCR_USERNAME: ${{ github.actor }}
+          GITHUB_TOKEN: ${{ github.token }}

      - name: Check existing shared live-test image
        id: image_exists
@@ -1580,7 +1634,7 @@ jobs:
    needs: [validate_selected_ref, prepare_live_test_image]
    if: inputs.include_live_suites && inputs.live_model_providers == '' && (inputs.live_suite_filter == '' || inputs.live_suite_filter == 'docker-live-models')
    continue-on-error: ${{ inputs.advisory }}
-    runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }}
+    runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }}
    timeout-minutes: 45
    strategy:
      fail-fast: false
@@ -1682,11 +1736,10 @@ jobs:

      - name: Log in to GHCR
        if: contains(matrix.profiles, inputs.release_test_profile)
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ github.token }}
+        run: bash .release-harness/scripts/ci-docker-login-ghcr.sh
+        env:
+          GHCR_USERNAME: ${{ github.actor }}
+          GITHUB_TOKEN: ${{ github.token }}

      - name: Validate provider credential
        if: contains(matrix.profiles, inputs.release_test_profile)
@@ -1732,7 +1785,7 @@ jobs:
    needs: [validate_selected_ref, prepare_live_test_image]
    if: inputs.include_live_suites && inputs.live_model_providers != '' && (inputs.live_suite_filter == '' || inputs.live_suite_filter == 'docker-live-models')
    continue-on-error: ${{ inputs.advisory }}
-    runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }}
+    runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }}
    timeout-minutes: 45
    env:
      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -1804,7 +1857,6 @@ jobs:
          normalize_provider() {
            local value="${1,,}"
            case "$value" in
-              z.ai|z-ai) echo "zai" ;;
              opencode|opencode-go) echo "opencode-go" ;;
              open-router|openrouter) echo "openrouter" ;;
              *) echo "$value" ;;
@@ -1857,11 +1909,10 @@ jobs:
        run: bash scripts/ci-hydrate-live-auth.sh

      - name: Log in to GHCR
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ github.token }}
+        run: bash .release-harness/scripts/ci-docker-login-ghcr.sh
+        env:
+          GHCR_USERNAME: ${{ github.actor }}
+          GITHUB_TOKEN: ${{ github.token }}

      - name: Validate provider credentials
        shell: bash
@@ -1907,7 +1958,7 @@ jobs:
    needs: validate_selected_ref
    if: inputs.include_live_suites && !inputs.live_models_only && (inputs.live_suite_filter == '' || (startsWith(inputs.live_suite_filter, 'native-live-') && !startsWith(inputs.live_suite_filter, 'native-live-extensions-media') && inputs.live_suite_filter != 'native-live-extensions-a-k'))
    continue-on-error: ${{ inputs.advisory }}
-    runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-8vcpu-ubuntu-2404' }}
+    runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-8vcpu-ubuntu-2404' }}
    timeout-minutes: ${{ matrix.timeout_minutes }}
    strategy:
      fail-fast: false
@@ -1935,7 +1986,7 @@ jobs:
          - suite_id: native-live-src-gateway-profiles-anthropic-opus
            suite_group: native-live-src-gateway-profiles-anthropic
            label: Native live gateway profiles Anthropic Opus
-            command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=anthropic OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-7 node .release-harness/scripts/test-live-shard.mjs native-live-src-gateway-profiles
+            command: OPENCLAW_LIVE_GATEWAY_THINKING=low OPENCLAW_LIVE_GATEWAY_PROVIDERS=anthropic OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-7 node .release-harness/scripts/test-live-shard.mjs native-live-src-gateway-profiles
            timeout_minutes: 30
            profile_env_only: false
            advisory: true
@@ -1943,7 +1994,7 @@ jobs:
          - suite_id: native-live-src-gateway-profiles-anthropic-sonnet-haiku
            suite_group: native-live-src-gateway-profiles-anthropic
            label: Native live gateway profiles Anthropic Sonnet/Haiku
-            command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=anthropic OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-sonnet-4-6,anthropic/claude-haiku-4-5 node .release-harness/scripts/test-live-shard.mjs native-live-src-gateway-profiles
+            command: OPENCLAW_LIVE_GATEWAY_THINKING=low OPENCLAW_LIVE_GATEWAY_PROVIDERS=anthropic OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-sonnet-4-6,anthropic/claude-haiku-4-5 node .release-harness/scripts/test-live-shard.mjs native-live-src-gateway-profiles
            timeout_minutes: 30
            profile_env_only: false
            advisory: true
@@ -2209,6 +2260,7 @@ jobs:
        env:
          OPENCLAW_LIVE_COMMAND: ${{ matrix.command }}
          OPENCLAW_LIVE_SUITE_ADVISORY: ${{ matrix.advisory }}
+        shell: bash
        run: |
          set +e
          bash .release-harness/scripts/ci-live-command-retry.sh
@@ -2228,7 +2280,7 @@ jobs:
    needs: [validate_selected_ref, prepare_live_test_image]
    if: inputs.include_live_suites && !inputs.live_models_only && (inputs.live_suite_filter == '' || startsWith(inputs.live_suite_filter, 'live-'))
    continue-on-error: ${{ inputs.advisory }}
-    runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }}
+    runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }}
    timeout-minutes: ${{ matrix.timeout_minutes }}
    strategy:
      fail-fast: false
@@ -2236,49 +2288,49 @@ jobs:
        include:
          - suite_id: live-gateway-docker
            label: Docker live gateway OpenAI
-            command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=openai OPENCLAW_LIVE_GATEWAY_MAX_MODELS=2 OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=30000 OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=60000 OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 25m bash .release-harness/scripts/test-live-gateway-models-docker.sh
-            timeout_minutes: 30
+            command: OPENCLAW_LIVE_GATEWAY_THINKING=low OPENCLAW_LIVE_GATEWAY_PROVIDERS=openai OPENCLAW_LIVE_GATEWAY_MODELS=openai/gpt-5.5 OPENCLAW_LIVE_GATEWAY_MAX_MODELS=1 OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=90000 OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=600000 OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 35m bash .release-harness/scripts/test-live-gateway-models-docker.sh
+            timeout_minutes: 40
            profile_env_only: false
            profiles: beta minimum stable full
          - suite_id: live-gateway-anthropic-docker
            label: Docker live gateway Anthropic
-            command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=anthropic OPENCLAW_LIVE_GATEWAY_MAX_MODELS=2 OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=30000 OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=60000 OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 25m bash .release-harness/scripts/test-live-gateway-models-docker.sh
-            timeout_minutes: 30
+            command: OPENCLAW_LIVE_GATEWAY_THINKING=low OPENCLAW_LIVE_GATEWAY_PROVIDERS=anthropic OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-sonnet-4-6,anthropic/claude-haiku-4-5 OPENCLAW_LIVE_GATEWAY_MAX_MODELS=2 OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=90000 OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=600000 OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 35m bash .release-harness/scripts/test-live-gateway-models-docker.sh
+            timeout_minutes: 40
            profile_env_only: false
            profiles: stable full
          - suite_id: live-gateway-google-docker
            label: Docker live gateway Google
-            command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=google OPENCLAW_LIVE_GATEWAY_MODELS=google/gemini-3.1-pro-preview,google/gemini-3-flash-preview OPENCLAW_LIVE_GATEWAY_MAX_MODELS=2 OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=30000 OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=60000 OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 25m bash .release-harness/scripts/test-live-gateway-models-docker.sh
-            timeout_minutes: 30
+            command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=google OPENCLAW_LIVE_GATEWAY_MODELS=google/gemini-3.1-pro-preview,google/gemini-3-flash-preview OPENCLAW_LIVE_GATEWAY_MAX_MODELS=2 OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=90000 OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=180000 OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 35m bash .release-harness/scripts/test-live-gateway-models-docker.sh
+            timeout_minutes: 40
            profile_env_only: false
            profiles: stable full
          - suite_id: live-gateway-minimax-docker
            label: Docker live gateway MiniMax
-            command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=minimax,minimax-portal OPENCLAW_LIVE_GATEWAY_MAX_MODELS=1 OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=30000 OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=60000 OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 25m bash .release-harness/scripts/test-live-gateway-models-docker.sh
-            timeout_minutes: 30
+            command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=minimax,minimax-portal OPENCLAW_LIVE_GATEWAY_MAX_MODELS=1 OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=90000 OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=180000 OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 35m bash .release-harness/scripts/test-live-gateway-models-docker.sh
+            timeout_minutes: 40
            profile_env_only: false
            profiles: stable full
          - suite_id: live-gateway-advisory-docker-deepseek-fireworks
            suite_group: live-gateway-advisory-docker
            label: Docker live gateway advisory DeepSeek/Fireworks
-            command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=deepseek,fireworks OPENCLAW_LIVE_GATEWAY_MAX_MODELS=2 OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=30000 OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=60000 OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 25m bash .release-harness/scripts/test-live-gateway-models-docker.sh
-            timeout_minutes: 30
+            command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=deepseek,fireworks OPENCLAW_LIVE_GATEWAY_MAX_MODELS=2 OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=90000 OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=180000 OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 35m bash .release-harness/scripts/test-live-gateway-models-docker.sh
+            timeout_minutes: 40
            profile_env_only: false
            advisory: true
            profiles: full
          - suite_id: live-gateway-advisory-docker-opencode-openrouter
            suite_group: live-gateway-advisory-docker
            label: Docker live gateway advisory OpenCode/OpenRouter
-            command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=opencode-go,openrouter OPENCLAW_LIVE_GATEWAY_MAX_MODELS=2 OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=30000 OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=60000 OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 25m bash .release-harness/scripts/test-live-gateway-models-docker.sh
-            timeout_minutes: 30
+            command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=opencode-go,openrouter OPENCLAW_LIVE_GATEWAY_MAX_MODELS=2 OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=90000 OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=180000 OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 35m bash .release-harness/scripts/test-live-gateway-models-docker.sh
+            timeout_minutes: 40
            profile_env_only: false
            advisory: true
            profiles: full
          - suite_id: live-gateway-advisory-docker-xai-zai
            suite_group: live-gateway-advisory-docker
            label: Docker live gateway advisory xAI/Z.ai
-            command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=xai,zai OPENCLAW_LIVE_GATEWAY_MAX_MODELS=2 OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=30000 OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=60000 OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 25m bash .release-harness/scripts/test-live-gateway-models-docker.sh
-            timeout_minutes: 30
+            command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=xai,zai OPENCLAW_LIVE_GATEWAY_MAX_MODELS=2 OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=90000 OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=180000 OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --foreground --kill-after=30s 35m bash .release-harness/scripts/test-live-gateway-models-docker.sh
+            timeout_minutes: 40
            profile_env_only: false
            advisory: true
            profiles: full
@@ -2386,11 +2438,10 @@ jobs:

      - name: Log in to GHCR
        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id || (inputs.live_suite_filter == 'live-gateway-advisory-docker' && startsWith(matrix.suite_id, 'live-gateway-advisory-docker-')))
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ github.token }}
+        run: bash .release-harness/scripts/ci-docker-login-ghcr.sh
+        env:
+          GHCR_USERNAME: ${{ github.actor }}
+          GITHUB_TOKEN: ${{ github.token }}

      - name: Configure suite-specific env
        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id || (inputs.live_suite_filter == 'live-gateway-advisory-docker' && startsWith(matrix.suite_id, 'live-gateway-advisory-docker-')))
@@ -2428,6 +2479,7 @@ jobs:
        env:
          OPENCLAW_LIVE_COMMAND: ${{ matrix.command }}
          OPENCLAW_LIVE_SUITE_ADVISORY: ${{ matrix.advisory }}
+        shell: bash
        run: |
          set +e
          bash .release-harness/scripts/ci-live-command-retry.sh
@@ -2447,7 +2499,7 @@ jobs:
    needs: validate_selected_ref
    if: inputs.include_live_suites && !inputs.live_models_only && (inputs.live_suite_filter == '' || startsWith(inputs.live_suite_filter, 'native-live-extensions-media') || inputs.live_suite_filter == 'native-live-extensions-a-k')
    continue-on-error: ${{ inputs.advisory }}
-    runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-8vcpu-ubuntu-2404' }}
+    runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-8vcpu-ubuntu-2404' }}
    container:
      image: ghcr.io/openclaw/openclaw-live-media-runner:ubuntu-24.04
      credentials:
@@ -2615,6 +2667,7 @@ jobs:
        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id || (inputs.live_suite_filter == 'native-live-extensions-media-video' && startsWith(matrix.suite_id, 'native-live-extensions-media-video-')))
        env:
          OPENCLAW_LIVE_SUITE_ADVISORY: ${{ matrix.advisory }}
+        shell: bash
        run: |
          set +e
          ${{ matrix.command }}
--- a/.github/workflows/openclaw-npm-release.yml
+++ b/.github/workflows/openclaw-npm-release.yml
@@ -35,7 +35,7 @@ on:
          - latest

 concurrency:
-  group: openclaw-npm-release-${{ github.event_name == 'workflow_dispatch' && format('{0}-{1}', inputs.tag, inputs.npm_dist_tag) || github.ref }}
+  group: ${{ github.event_name == 'workflow_dispatch' && inputs.preflight_only && format('openclaw-npm-release-{0}-{1}-preflight', inputs.tag, inputs.npm_dist_tag) || github.event_name == 'workflow_dispatch' && format('openclaw-npm-release-{0}-{1}-publish-{2}', inputs.tag, inputs.npm_dist_tag, github.run_id) || format('openclaw-npm-release-{0}', github.ref) }}
  cancel-in-progress: ${{ github.event_name == 'workflow_dispatch' && inputs.preflight_only && inputs.npm_dist_tag == 'alpha' }}

 env:
@@ -390,6 +390,8 @@ jobs:

      - name: Require preflight artifact promotion on real publish
        env:
+          RELEASE_TAG: ${{ inputs.tag }}
+          RELEASE_NPM_DIST_TAG: ${{ inputs.npm_dist_tag }}
          PREFLIGHT_RUN_ID: ${{ inputs.preflight_run_id }}
          FULL_RELEASE_VALIDATION_RUN_ID: ${{ inputs.full_release_validation_run_id }}
          RELEASE_PUBLISH_RUN_ID: ${{ inputs.release_publish_run_id }}
@@ -400,8 +402,12 @@ jobs:
            exit 1
          fi
          if [[ -z "${FULL_RELEASE_VALIDATION_RUN_ID}" ]]; then
-            echo "Real publish requires full_release_validation_run_id from a successful Full Release Validation run." >&2
-            exit 1
+            if [[ "${RELEASE_TAG}" == *"-beta."* && "${RELEASE_NPM_DIST_TAG}" == "beta" ]]; then
+              echo "::warning::Beta publish is proceeding from npm preflight only; full release validation remains required before stable/latest promotion."
+            else
+              echo "Real publish requires full_release_validation_run_id from a successful Full Release Validation run." >&2
+              exit 1
+            fi
          fi
          if [[ -z "${RELEASE_PUBLISH_RUN_ID// }" && "${GITHUB_ACTOR}" == "github-actions[bot]" ]]; then
            echo "Workflow-dispatched real publish requires release_publish_run_id from the approved OpenClaw Release Publish workflow." >&2
@@ -511,21 +517,20 @@ jobs:
        env:
          GH_TOKEN: ${{ github.token }}
          PREFLIGHT_RUN_ID: ${{ inputs.preflight_run_id }}
-          EXPECTED_PREFLIGHT_BRANCH: ${{ github.ref_name }}
        run: |
          set -euo pipefail
          RUN_JSON="$(gh run view "$PREFLIGHT_RUN_ID" --repo "$GITHUB_REPOSITORY" --json workflowName,headBranch,event,conclusion,url)"
-          printf '%s' "$RUN_JSON" | node -e 'const fs = require("node:fs"); const run = JSON.parse(fs.readFileSync(0, "utf8")); const checks = [["workflowName", "OpenClaw NPM Release"], ["headBranch", process.env.EXPECTED_PREFLIGHT_BRANCH], ["event", "workflow_dispatch"], ["conclusion", "success"]]; for (const [key, expected] of checks) { if (run[key] !== expected) { console.error(`Referenced npm preflight run ${process.env.PREFLIGHT_RUN_ID} must have ${key}=${expected}, got ${run[key] ?? "<missing>"}.`); process.exit(1); } } console.log(`Using npm preflight run ${process.env.PREFLIGHT_RUN_ID}: ${run.url}`);'
+          printf '%s' "$RUN_JSON" | node -e 'const fs = require("node:fs"); const run = JSON.parse(fs.readFileSync(0, "utf8")); const checks = [["workflowName", "OpenClaw NPM Release"], ["event", "workflow_dispatch"], ["conclusion", "success"]]; for (const [key, expected] of checks) { if (run[key] !== expected) { console.error(`Referenced npm preflight run ${process.env.PREFLIGHT_RUN_ID} must have ${key}=${expected}, got ${run[key] ?? "<missing>"}.`); process.exit(1); } } console.log(`Using npm preflight run ${process.env.PREFLIGHT_RUN_ID} from ${run.headBranch}: ${run.url}`);'

      - name: Verify full release validation run metadata
+        if: ${{ inputs.full_release_validation_run_id != '' }}
        env:
          GH_TOKEN: ${{ github.token }}
          FULL_RELEASE_VALIDATION_RUN_ID: ${{ inputs.full_release_validation_run_id }}
-          EXPECTED_WORKFLOW_BRANCH: ${{ github.ref_name }}
        run: |
          set -euo pipefail
          RUN_JSON="$(gh run view "$FULL_RELEASE_VALIDATION_RUN_ID" --repo "$GITHUB_REPOSITORY" --json workflowName,headBranch,event,status,conclusion,url)"
-          printf '%s' "$RUN_JSON" | node -e 'const fs = require("node:fs"); const run = JSON.parse(fs.readFileSync(0, "utf8")); const checks = [["workflowName", "Full Release Validation"], ["headBranch", process.env.EXPECTED_WORKFLOW_BRANCH], ["event", "workflow_dispatch"], ["status", "completed"], ["conclusion", "success"]]; for (const [key, expected] of checks) { if (run[key] !== expected) { console.error(`Referenced full release validation run ${process.env.FULL_RELEASE_VALIDATION_RUN_ID} must have ${key}=${expected}, got ${run[key] ?? "<missing>"}.`); process.exit(1); } } console.log(`Using full release validation run ${process.env.FULL_RELEASE_VALIDATION_RUN_ID}: ${run.url}`);'
+          printf '%s' "$RUN_JSON" | node -e 'const fs = require("node:fs"); const run = JSON.parse(fs.readFileSync(0, "utf8")); const checks = [["workflowName", "Full Release Validation"], ["event", "workflow_dispatch"], ["status", "completed"], ["conclusion", "success"]]; for (const [key, expected] of checks) { if (run[key] !== expected) { console.error(`Referenced full release validation run ${process.env.FULL_RELEASE_VALIDATION_RUN_ID} must have ${key}=${expected}, got ${run[key] ?? "<missing>"}.`); process.exit(1); } } console.log(`Using full release validation run ${process.env.FULL_RELEASE_VALIDATION_RUN_ID} from ${run.headBranch}: ${run.url}`);'

      - name: Download prepared npm tarball
        env:
@@ -581,6 +586,7 @@ jobs:
          download_preflight_artifact

      - name: Download full release validation manifest
+        if: ${{ inputs.full_release_validation_run_id != '' }}
        uses: actions/download-artifact@v8
        with:
          name: full-release-validation-${{ inputs.full_release_validation_run_id }}
@@ -646,6 +652,7 @@ jobs:
          fi

      - name: Verify full release validation target
+        if: ${{ inputs.full_release_validation_run_id != '' }}
        run: |
          set -euo pipefail
          EXPECTED_RELEASE_SHA="$(git rev-parse HEAD)"
--- a/.github/workflows/openclaw-performance.yml
+++ b/.github/workflows/openclaw-performance.yml
@@ -307,7 +307,36 @@ jobs:
            exit 1
          fi
          report_md="${report_json%.json}.md"
+          effective_status="$status"
+          if [[ "$FAIL_ON_REGRESSION" == "true" && "$status" != "0" ]]; then
+            if REPORT_JSON="$report_json" node <<'NODE'
+          const fs = require("node:fs");
+          const report = JSON.parse(fs.readFileSync(process.env.REPORT_JSON, "utf8"));
+          const statuses = report.summary?.statuses ?? {};
+          const nonPassStatuses = Object.entries(statuses)
+            .filter(([status, count]) => status !== "PASS" && Number(count) > 0);
+          const baselineRegressionCount =
+            Number(report.baseline?.comparison?.regressionCount ?? report.gate?.baseline?.regressionCount ?? 0);
+          const gate = report.gate;
+          const toleratedPartial =
+            gate?.verdict === "PARTIAL" &&
+            Number(gate.blockingCount ?? 0) === 0 &&
+            baselineRegressionCount === 0 &&
+            nonPassStatuses.length === 0;
+          if (!toleratedPartial) {
+            process.exit(1);
+          }
+          NODE
+            then
+              effective_status=0
+              {
+                echo "Kova returned a partial release-gate verdict for filtered performance coverage, but all selected scenarios passed and no baseline regression was reported."
+                echo
+              } >> "$GITHUB_STEP_SUMMARY"
+            fi
+          fi
          echo "status=$status" >> "$GITHUB_OUTPUT"
+          echo "effective_status=$effective_status" >> "$GITHUB_OUTPUT"
          echo "report_json=$report_json" >> "$GITHUB_OUTPUT"
          echo "report_md=$report_md" >> "$GITHUB_OUTPUT"

@@ -344,8 +373,43 @@ jobs:
          EOF
          cat "$summary_path" >> "$GITHUB_STEP_SUMMARY"

-          if [[ "$FAIL_ON_REGRESSION" == "true" && "$status" != "0" ]]; then
-            exit "$status"
+          if [[ "$FAIL_ON_REGRESSION" == "true" && "$effective_status" != "0" ]]; then
+            exit "$effective_status"
+          fi
+
+      - name: Fetch previous source performance baseline
+        if: ${{ steps.lane.outputs.run == 'true' && matrix.lane == 'mock-provider' && steps.clawgrit.outputs.present == 'true' }}
+        env:
+          CLAWGRIT_REPORTS_TOKEN: ${{ secrets.CLAWGRIT_REPORTS_TOKEN }}
+        shell: bash
+        run: |
+          set -euo pipefail
+          reports_root=".artifacts/clawgrit-baseline"
+          mkdir -p "$reports_root"
+          git -C "$reports_root" init -b main
+          git -C "$reports_root" remote add origin "https://x-access-token:${CLAWGRIT_REPORTS_TOKEN}@github.com/openclaw/clawgrit-reports.git"
+          if ! git -C "$reports_root" fetch --depth=1 origin main; then
+            echo "No previous source performance baseline could be fetched." >> "$GITHUB_STEP_SUMMARY"
+            exit 0
+          fi
+          git -C "$reports_root" checkout -B main FETCH_HEAD
+          ref_slug="$(printf '%s' "${TESTED_REF}" | tr -c 'A-Za-z0-9._-' '-')"
+          pointer="${reports_root}/openclaw-performance/${ref_slug}/latest-mock-provider.json"
+          if [[ ! -f "$pointer" ]]; then
+            echo "No previous source performance baseline exists for ${TESTED_REF}." >> "$GITHUB_STEP_SUMMARY"
+            exit 0
+          fi
+          if ! latest_path="$(node -e "const fs=require('node:fs'); const data=JSON.parse(fs.readFileSync(process.argv[1],'utf8')); const value=String(data.path || ''); if (!/^openclaw-performance\\/[A-Za-z0-9._-]+\\/[0-9]+-[0-9]+\\/mock-provider$/u.test(value)) process.exit(1); process.stdout.write(value);" "$pointer")"; then
+            echo "Previous source performance baseline pointer is invalid." >> "$GITHUB_STEP_SUMMARY"
+            exit 0
+          fi
+          baseline_source="${reports_root}/${latest_path}/source"
+          if [[ -d "$baseline_source" ]]; then
+            baseline_source="$(realpath "$baseline_source")"
+            echo "SOURCE_PERF_BASELINE_DIR=$baseline_source" >> "$GITHUB_ENV"
+            echo "Using source performance baseline: ${latest_path}/source" >> "$GITHUB_STEP_SUMMARY"
+          else
+            echo "Previous source performance baseline has no source directory." >> "$GITHUB_STEP_SUMMARY"
          fi

      - name: Run OpenClaw source performance probes
@@ -359,7 +423,7 @@ jobs:
          fi

          mkdir -p "$SOURCE_PERF_DIR/mock-hello"
-          if ! node -e "const fs=require('node:fs'); const scripts=require('./package.json').scripts||{}; process.exit(scripts['test:gateway:cpu-scenarios'] && scripts.openclaw && fs.existsSync('scripts/bench-cli-startup.ts') ? 0 : 1)"; then
+          if ! node -e "const fs=require('node:fs'); const scripts=require('./package.json').scripts||{}; process.exit(scripts['test:gateway:cpu-scenarios'] && scripts['test:extensions:memory'] && scripts.openclaw && fs.existsSync('scripts/bench-cli-startup.ts') && fs.existsSync('scripts/profile-extension-memory.mjs') ? 0 : 1)"; then
            cat > "$SOURCE_PERF_DIR/index.md" <<EOF
          # OpenClaw Source Performance

@@ -371,7 +435,7 @@ jobs:

          - Tested ref: ${TESTED_REF}
          - Tested SHA: ${TESTED_SHA}
-          - Required scripts: test:gateway:cpu-scenarios, openclaw, scripts/bench-cli-startup.ts
+          - Required scripts: test:gateway:cpu-scenarios, test:extensions:memory, openclaw, scripts/bench-cli-startup.ts, scripts/profile-extension-memory.mjs
          EOF
            cat "$SOURCE_PERF_DIR/index.md" >> "$GITHUB_STEP_SUMMARY"
            exit 0
@@ -391,6 +455,9 @@ jobs:
            --startup-case fiftyPlugins \
            --startup-case fiftyStartupLazyPlugins

+          pnpm test:extensions:memory \
+            -- --json "$SOURCE_PERF_DIR/extension-memory.json"
+
          for run_index in $(seq 1 "$source_runs"); do
            run_dir="$SOURCE_PERF_DIR/mock-hello/run-$(printf '%03d' "$run_index")"
            pnpm openclaw qa suite \
@@ -460,9 +527,13 @@ jobs:
          cleanup_gateway
          trap - EXIT

-          node "$PERFORMANCE_HELPER_DIR/scripts/openclaw-performance-source-summary.mjs" \
+          summary_args=(node "$PERFORMANCE_HELPER_DIR/scripts/openclaw-performance-source-summary.mjs" \
            --source-dir "$SOURCE_PERF_DIR" \
-            --output "$SOURCE_PERF_DIR/index.md"
+            --output "$SOURCE_PERF_DIR/index.md")
+          if [[ -n "${SOURCE_PERF_BASELINE_DIR:-}" && -d "$SOURCE_PERF_BASELINE_DIR" ]]; then
+            summary_args+=(--baseline-source-dir "$SOURCE_PERF_BASELINE_DIR")
+          fi
+          "${summary_args[@]}"

          cat "$SOURCE_PERF_DIR/index.md" >> "$GITHUB_STEP_SUMMARY"

--- a/.github/workflows/openclaw-release-checks.yml
+++ b/.github/workflows/openclaw-release-checks.yml
@@ -191,11 +191,21 @@ jobs:
        working-directory: source
        env:
          RELEASE_REF: ${{ inputs.ref }}
+          GITHUB_TOKEN: ${{ github.token }}
        run: |
          set -euo pipefail
          SELECTED_SHA="$(git rev-parse HEAD)"
-          git fetch --no-tags origin '+refs/heads/*:refs/remotes/origin/*'
-          git fetch --tags origin '+refs/tags/*:refs/tags/*'
+          git_fetch_with_checkout_auth() {
+            if git config --get-all http.https://github.com/.extraheader >/dev/null; then
+              git fetch "$@"
+              return
+            fi
+            local auth_header
+            auth_header="$(printf 'x-access-token:%s' "$GITHUB_TOKEN" | base64 | tr -d '\n')"
+            git -c "http.https://github.com/.extraheader=AUTHORIZATION: basic ${auth_header}" fetch "$@"
+          }
+          git_fetch_with_checkout_auth --no-tags origin '+refs/heads/*:refs/remotes/origin/*'
+          git_fetch_with_checkout_auth --tags origin '+refs/tags/*:refs/tags/*'

          if git tag --points-at "${SELECTED_SHA}" | grep -Eq '^v'; then
            exit 0
@@ -238,6 +248,7 @@ jobs:
        env:
          SELECTED_SHA: ${{ steps.ref.outputs.sha }}
          WORKFLOW_REF: ${{ github.ref }}
+          GITHUB_TOKEN: ${{ github.token }}
        run: |
          set -euo pipefail
          if [[ ! "${WORKFLOW_REF}" =~ ^refs/heads/tideclaw/alpha/[0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{4}Z$ ]]; then
@@ -245,7 +256,16 @@ jobs:
            exit 1
          fi
          alpha_branch="${WORKFLOW_REF#refs/heads/}"
-          git fetch --no-tags origin "+refs/heads/${alpha_branch}:refs/remotes/origin/${alpha_branch}"
+          git_fetch_with_checkout_auth() {
+            if git config --get-all http.https://github.com/.extraheader >/dev/null; then
+              git fetch "$@"
+              return
+            fi
+            local auth_header
+            auth_header="$(printf 'x-access-token:%s' "$GITHUB_TOKEN" | base64 | tr -d '\n')"
+            git -c "http.https://github.com/.extraheader=AUTHORIZATION: basic ${auth_header}" fetch "$@"
+          }
+          git_fetch_with_checkout_auth --no-tags origin "+refs/heads/${alpha_branch}:refs/remotes/origin/${alpha_branch}"
          if ! git merge-base --is-ancestor "${SELECTED_SHA}" "refs/remotes/origin/${alpha_branch}"; then
            echo "Alpha release target ${SELECTED_SHA} must be reachable from ${alpha_branch}." >&2
            exit 1
@@ -474,7 +494,7 @@ jobs:
      - name: Checkout trusted workflow ref
        uses: actions/checkout@v6
        with:
-          persist-credentials: false
+          persist-credentials: true
          ref: ${{ github.ref_name }}
          fetch-depth: 0

@@ -596,6 +616,7 @@ jobs:
      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
      ANTHROPIC_API_KEY_OLD: ${{ secrets.ANTHROPIC_API_KEY_OLD }}
      ANTHROPIC_API_TOKEN: ${{ secrets.ANTHROPIC_API_TOKEN }}
+      FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }}
      BYTEPLUS_API_KEY: ${{ secrets.BYTEPLUS_API_KEY }}
      CEREBRAS_API_KEY: ${{ secrets.CEREBRAS_API_KEY }}
      DEEPINFRA_API_KEY: ${{ secrets.DEEPINFRA_API_KEY }}
@@ -688,6 +709,7 @@ jobs:
      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
      ANTHROPIC_API_KEY_OLD: ${{ secrets.ANTHROPIC_API_KEY_OLD }}
      ANTHROPIC_API_TOKEN: ${{ secrets.ANTHROPIC_API_TOKEN }}
+      FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }}
      BYTEPLUS_API_KEY: ${{ secrets.BYTEPLUS_API_KEY }}
      CEREBRAS_API_KEY: ${{ secrets.CEREBRAS_API_KEY }}
      DEEPINFRA_API_KEY: ${{ secrets.DEEPINFRA_API_KEY }}
@@ -763,7 +785,7 @@ jobs:
      - name: Checkout selected ref
        uses: actions/checkout@v6
        with:
-          persist-credentials: false
+          persist-credentials: true
          ref: ${{ needs.resolve_target.outputs.revision }}
          fetch-depth: 1

@@ -834,7 +856,7 @@ jobs:
      - name: Checkout selected ref
        uses: actions/checkout@v6
        with:
-          persist-credentials: false
+          persist-credentials: true
          ref: ${{ needs.resolve_target.outputs.revision }}
          fetch-depth: 1

@@ -899,7 +921,7 @@ jobs:
      - name: Checkout selected ref
        uses: actions/checkout@v6
        with:
-          persist-credentials: false
+          persist-credentials: true
          ref: ${{ needs.resolve_target.outputs.revision }}
          fetch-depth: 1

@@ -924,7 +946,7 @@ jobs:
            --concurrency "${QA_PARITY_CONCURRENCY}" \
            --model "${OPENCLAW_CI_OPENAI_MODEL}" \
            --alt-model "openai/gpt-5.5-alt" \
-            --runtime-pair pi,codex \
+            --runtime-pair openclaw,codex \
            --output-dir ".artifacts/qa-e2e/runtime-parity"

      - name: Run standard runtime parity tier
@@ -937,7 +959,7 @@ jobs:
            --concurrency "${QA_PARITY_CONCURRENCY}" \
            --model "${OPENCLAW_CI_OPENAI_MODEL}" \
            --alt-model "openai/gpt-5.5-alt" \
-            --runtime-pair pi,codex \
+            --runtime-pair openclaw,codex \
            --output-dir ".artifacts/qa-e2e/runtime-parity-standard"

      - name: Run soak runtime parity tier
@@ -951,7 +973,7 @@ jobs:
            --concurrency "${QA_PARITY_CONCURRENCY}" \
            --model "${OPENCLAW_CI_OPENAI_MODEL}" \
            --alt-model "openai/gpt-5.5-alt" \
-            --runtime-pair pi,codex \
+            --runtime-pair openclaw,codex \
            --output-dir ".artifacts/qa-e2e/runtime-parity-soak"

      - name: Generate runtime parity report
@@ -1014,7 +1036,7 @@ jobs:
      - name: Checkout selected ref
        uses: actions/checkout@v6
        with:
-          persist-credentials: false
+          persist-credentials: true
          ref: ${{ needs.resolve_target.outputs.revision }}
          fetch-depth: 1

@@ -1066,7 +1088,7 @@ jobs:
      - name: Checkout selected ref
        uses: actions/checkout@v6
        with:
-          persist-credentials: false
+          persist-credentials: true
          ref: ${{ needs.resolve_target.outputs.revision }}
          fetch-depth: 1

@@ -1145,7 +1167,7 @@ jobs:
      - name: Checkout selected ref
        uses: actions/checkout@v6
        with:
-          persist-credentials: false
+          persist-credentials: true
          ref: ${{ needs.resolve_target.outputs.revision }}
          fetch-depth: 1

@@ -1240,7 +1262,7 @@ jobs:
      - name: Checkout selected ref
        uses: actions/checkout@v6
        with:
-          persist-credentials: false
+          persist-credentials: true
          ref: ${{ needs.resolve_target.outputs.revision }}
          fetch-depth: 1

@@ -1338,7 +1360,7 @@ jobs:
      - name: Checkout selected ref
        uses: actions/checkout@v6
        with:
-          persist-credentials: false
+          persist-credentials: true
          ref: ${{ needs.resolve_target.outputs.revision }}
          fetch-depth: 1

@@ -1433,7 +1455,7 @@ jobs:
      - name: Checkout selected ref
        uses: actions/checkout@v6
        with:
-          persist-credentials: false
+          persist-credentials: true
          ref: ${{ needs.resolve_target.outputs.revision }}
          fetch-depth: 1

--- a/.github/workflows/openclaw-release-publish.yml
+++ b/.github/workflows/openclaw-release-publish.yml
@@ -265,7 +265,7 @@ jobs:
        run: |
          set -euo pipefail
          RUN_JSON="$(gh run view "$FULL_RELEASE_VALIDATION_RUN_ID" --repo "$GITHUB_REPOSITORY" --json workflowName,headBranch,event,status,conclusion,url)"
-          printf '%s' "$RUN_JSON" | node -e 'const fs = require("node:fs"); const run = JSON.parse(fs.readFileSync(0, "utf8")); const checks = [["workflowName", "Full Release Validation"], ["headBranch", process.env.EXPECTED_WORKFLOW_BRANCH], ["event", "workflow_dispatch"], ["status", "completed"], ["conclusion", "success"]]; for (const [key, expected] of checks) { if (run[key] !== expected) { console.error(`Referenced full release validation run ${process.env.FULL_RELEASE_VALIDATION_RUN_ID} must have ${key}=${expected}, got ${run[key] ?? "<missing>"}.`); process.exit(1); } } console.log(`Using full release validation run ${process.env.FULL_RELEASE_VALIDATION_RUN_ID}: ${run.url}`);'
+          printf '%s' "$RUN_JSON" | node -e 'const fs = require("node:fs"); const run = JSON.parse(fs.readFileSync(0, "utf8")); const checks = [["workflowName", "Full Release Validation"], ["event", "workflow_dispatch"], ["status", "completed"], ["conclusion", "success"]]; for (const [key, expected] of checks) { if (run[key] !== expected) { console.error(`Referenced full release validation run ${process.env.FULL_RELEASE_VALIDATION_RUN_ID} must have ${key}=${expected}, got ${run[key] ?? "<missing>"}.`); process.exit(1); } } const allowedBranches = new Set(["main", process.env.EXPECTED_WORKFLOW_BRANCH].filter(Boolean)); if (!allowedBranches.has(run.headBranch)) { console.error(`Referenced full release validation run ${process.env.FULL_RELEASE_VALIDATION_RUN_ID} must have headBranch in ${[...allowedBranches].join(", ")}, got ${run.headBranch ?? "<missing>"}.`); process.exit(1); } console.log(`Using full release validation run ${process.env.FULL_RELEASE_VALIDATION_RUN_ID}: ${run.url}`);'

          manifest="${RUNNER_TEMP}/full-release-validation-manifest/full-release-validation-manifest.json"
          if [[ ! -f "$manifest" ]]; then
--- a/.github/workflows/openclaw-scheduled-live-checks.yml
+++ b/.github/workflows/openclaw-scheduled-live-checks.yml
@@ -38,6 +38,7 @@ jobs:
      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
      ANTHROPIC_API_KEY_OLD: ${{ secrets.ANTHROPIC_API_KEY_OLD }}
      ANTHROPIC_API_TOKEN: ${{ secrets.ANTHROPIC_API_TOKEN }}
+      FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }}
      BYTEPLUS_API_KEY: ${{ secrets.BYTEPLUS_API_KEY }}
      CEREBRAS_API_KEY: ${{ secrets.CEREBRAS_API_KEY }}
      DEEPINFRA_API_KEY: ${{ secrets.DEEPINFRA_API_KEY }}
--- a/.github/workflows/opengrep-precise.yml
+++ b/.github/workflows/opengrep-precise.yml
@@ -44,7 +44,7 @@ jobs:
        uses: actions/checkout@v6
        with:
          ref: ${{ github.sha }}
-          fetch-depth: 1
+          fetch-depth: 0
          fetch-tags: false
          persist-credentials: false
          submodules: false
--- a/.github/workflows/package-acceptance.yml
+++ b/.github/workflows/package-acceptance.yml
@@ -17,6 +17,7 @@ on:
          - npm
          - ref
          - url
+          - trusted-url
          - artifact
      package_ref:
        description: Trusted package source ref when source=ref
@@ -29,12 +30,17 @@ on:
        default: openclaw@beta
        type: string
      package_url:
-        description: HTTPS .tgz URL when source=url
+        description: HTTPS .tgz URL when source=url or source=trusted-url
        required: false
        default: ""
        type: string
      package_sha256:
-        description: Expected package SHA-256; required for source=url
+        description: Expected package SHA-256; required for source=url or source=trusted-url
+        required: false
+        default: ""
+        type: string
+      trusted_source_id:
+        description: Named trusted source policy when source=trusted-url
        required: false
        default: ""
        type: string
@@ -111,7 +117,7 @@ on:
        default: main
        type: string
      source:
-        description: "Package candidate source: npm, ref, url, or artifact"
+        description: "Package candidate source: npm, ref, url, trusted-url, or artifact"
        required: true
        type: string
      package_ref:
@@ -125,12 +131,17 @@ on:
        default: openclaw@beta
        type: string
      package_url:
-        description: HTTPS .tgz URL when source=url
+        description: HTTPS .tgz URL when source=url or source=trusted-url
        required: false
        default: ""
        type: string
      package_sha256:
-        description: Expected package SHA-256; required for source=url
+        description: Expected package SHA-256; required for source=url or source=trusted-url
+        required: false
+        default: ""
+        type: string
+      trusted_source_id:
+        description: Named trusted source policy when source=trusted-url
        required: false
        default: ""
        type: string
@@ -180,6 +191,8 @@ on:
        default: ""
        type: string
    secrets:
+      OPENCLAW_TRUSTED_PACKAGE_TOKEN:
+        required: false
      OPENAI_API_KEY:
        required: false
      OPENAI_BASE_URL:
@@ -190,6 +203,8 @@ on:
        required: false
      ANTHROPIC_API_TOKEN:
        required: false
+      FACTORY_API_KEY:
+        required: false
      BYTEPLUS_API_KEY:
        required: false
      CEREBRAS_API_KEY:
@@ -353,6 +368,8 @@ jobs:
          PACKAGE_SPEC: ${{ inputs.package_spec }}
          PACKAGE_URL: ${{ inputs.package_url }}
          PACKAGE_SHA256: ${{ inputs.package_sha256 }}
+          TRUSTED_SOURCE_ID: ${{ inputs.trusted_source_id }}
+          OPENCLAW_TRUSTED_PACKAGE_TOKEN: ${{ secrets.OPENCLAW_TRUSTED_PACKAGE_TOKEN }}
        shell: bash
        run: |
          set -euo pipefail
@@ -367,6 +384,7 @@ jobs:
            --package-spec "$PACKAGE_SPEC" \
            --package-url "$PACKAGE_URL" \
            --package-sha256 "$PACKAGE_SHA256" \
+            --trusted-source-id "$TRUSTED_SOURCE_ID" \
            --artifact-dir "${artifact_dir:-.}" \
            --output-dir .artifacts/docker-e2e-package \
            --output-name openclaw-current.tgz \
@@ -488,6 +506,7 @@ jobs:
          PACKAGE_SHA256: ${{ steps.resolve.outputs.sha256 }}
          PACKAGE_VERSION: ${{ steps.resolve.outputs.package_version }}
          PACKAGE_REF: ${{ inputs.package_ref }}
+          TRUSTED_SOURCE_ID: ${{ inputs.trusted_source_id }}
          SOURCE: ${{ inputs.source }}
          SUITE_PROFILE: ${{ inputs.suite_profile }}
          WORKFLOW_REF: ${{ inputs.workflow_ref }}
@@ -504,6 +523,9 @@ jobs:
            if [[ "${SOURCE}" == "ref" ]]; then
              echo "- Package ref: \`${PACKAGE_REF}\`"
            fi
+            if [[ "${SOURCE}" == "trusted-url" ]]; then
+              echo "- Trusted source: \`${TRUSTED_SOURCE_ID}\`"
+            fi
            echo "- Version: \`${PACKAGE_VERSION}\`"
            echo "- SHA-256: \`${PACKAGE_SHA256}\`"
            echo "- Profile: \`${SUITE_PROFILE}\`"
@@ -541,6 +563,11 @@ jobs:
  docker_acceptance:
    name: Docker product acceptance
    needs: [resolve_package, package_integrity]
+    permissions:
+      actions: read
+      contents: read
+      packages: write
+      pull-requests: read
    uses: ./.github/workflows/openclaw-live-and-e2e-checks-reusable.yml
    with:
      advisory: ${{ inputs.advisory }}
@@ -561,6 +588,7 @@ jobs:
      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
      ANTHROPIC_API_KEY_OLD: ${{ secrets.ANTHROPIC_API_KEY_OLD }}
      ANTHROPIC_API_TOKEN: ${{ secrets.ANTHROPIC_API_TOKEN }}
+      FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }}
      BYTEPLUS_API_KEY: ${{ secrets.BYTEPLUS_API_KEY }}
      CEREBRAS_API_KEY: ${{ secrets.CEREBRAS_API_KEY }}
      DEEPINFRA_API_KEY: ${{ secrets.DEEPINFRA_API_KEY }}
--- a/.github/workflows/plugin-prerelease.yml
+++ b/.github/workflows/plugin-prerelease.yml
@@ -52,7 +52,7 @@ jobs:
          ref: ${{ inputs.target_ref }}
          fetch-depth: 1
          fetch-tags: false
-          persist-credentials: false
+          persist-credentials: true
          submodules: false

      - name: Build plugin prerelease manifest
@@ -221,7 +221,7 @@ jobs:
          ref: ${{ needs.preflight.outputs.checkout_revision }}
          fetch-depth: 1
          fetch-tags: false
-          persist-credentials: false
+          persist-credentials: true
          submodules: false

      - name: Setup Node environment
@@ -257,7 +257,7 @@ jobs:
          ref: ${{ needs.preflight.outputs.checkout_revision }}
          fetch-depth: 1
          fetch-tags: false
-          persist-credentials: false
+          persist-credentials: true
          submodules: false

      - name: Setup Node environment
@@ -330,7 +330,7 @@ jobs:
          ref: ${{ needs.preflight.outputs.checkout_revision }}
          fetch-depth: 1
          fetch-tags: false
-          persist-credentials: false
+          persist-credentials: true
          submodules: false

      - name: Setup Node environment
@@ -344,7 +344,7 @@ jobs:
          OPENCLAW_EXTENSION_BATCH_PARALLEL: 2
          OPENCLAW_VITEST_MAX_WORKERS: 1
          OPENCLAW_EXTENSION_BATCH: ${{ matrix.extensions_csv }}
-        run: pnpm test:extensions:batch -- "$OPENCLAW_EXTENSION_BATCH"
+        run: pnpm test:extensions:batch "$OPENCLAW_EXTENSION_BATCH" -- --exclude extensions/codex/src/app-server/run-attempt.test.ts

  plugin-prerelease-inspector:
    permissions:
@@ -362,7 +362,7 @@ jobs:
          ref: ${{ needs.preflight.outputs.checkout_revision }}
          fetch-depth: 1
          fetch-tags: false
-          persist-credentials: false
+          persist-credentials: true
          submodules: false

      - name: Setup Node environment
--- a/.github/workflows/qa-live-transports-convex.yml
+++ b/.github/workflows/qa-live-transports-convex.yml
@@ -289,7 +289,7 @@ jobs:
            --concurrency "${QA_PARITY_CONCURRENCY}" \
            --model "${OPENCLAW_CI_OPENAI_MODEL}" \
            --alt-model "${OPENCLAW_CI_OPENAI_MODEL}" \
-            --runtime-pair pi,codex \
+            --runtime-pair openclaw,codex \
            --fast \
            --allow-failures \
            --output-dir "${output_dir}/runtime-suite"
--- a/.github/workflows/sandbox-common-smoke.yml
+++ b/.github/workflows/sandbox-common-smoke.yml
@@ -42,7 +42,7 @@ jobs:
        run: |
          set -euo pipefail

-          docker build -t openclaw-sandbox-smoke-base:bookworm-slim - <<'EOF'
+          timeout --kill-after=30s 5m docker build -t openclaw-sandbox-smoke-base:bookworm-slim - <<'EOF'
          FROM debian:bookworm-slim
          RUN useradd --create-home --shell /bin/bash sandbox
          USER sandbox
@@ -63,5 +63,5 @@ jobs:
            FINAL_USER=sandbox \
            scripts/sandbox-common-setup.sh

-          u="$(docker run --rm openclaw-sandbox-common-smoke:bookworm-slim sh -lc 'id -un')"
+          u="$(timeout --kill-after=30s 2m docker run --rm openclaw-sandbox-common-smoke:bookworm-slim sh -lc 'id -un')"
          test "$u" = "sandbox"
--- a/.github/workflows/tui-pty.yml
+++ b/.github/workflows/tui-pty.yml
@@ -38,4 +38,4 @@ jobs:
          install-bun: "false"

      - name: Run TUI PTY tests
-        run: timeout 120s node scripts/run-vitest.mjs run --config test/vitest/vitest.tui-pty.config.ts
+        run: timeout --kill-after=30s 120s node scripts/run-vitest.mjs run --config test/vitest/vitest.tui-pty.config.ts
--- a/.github/workflows/website-installer-sync.yml
+++ b/.github/workflows/website-installer-sync.yml
@@ -75,14 +75,14 @@ jobs:

      - name: install.sh in Docker
        run: |
-          docker run --rm \
+          timeout --kill-after=30s 20m docker run --rm \
            -v "$PWD/scripts/install.sh:/tmp/install.sh:ro" \
            node:24-bookworm-slim \
            bash -lc 'bash /tmp/install.sh --version latest && openclaw --version'

      - name: install-cli.sh in Docker
        run: |
-          docker run --rm \
+          timeout --kill-after=30s 20m docker run --rm \
            -e OPENCLAW_NO_ONBOARD=1 \
            -e OPENCLAW_NO_PROMPT=1 \
            -v "$PWD/scripts/install-cli.sh:/tmp/install-cli.sh:ro" \
--- a/.github/workflows/workflow-sanity.yml
+++ b/.github/workflows/workflow-sanity.yml
@@ -26,7 +26,16 @@ jobs:
    runs-on: ubuntu-24.04
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        env:
+          CHECKOUT_REPO: ${{ github.repository }}
+          CHECKOUT_SHA: ${{ github.sha }}
+        run: |
+          set -euo pipefail
+          git init "$GITHUB_WORKSPACE"
+          git -C "$GITHUB_WORKSPACE" config gc.auto 0
+          git -C "$GITHUB_WORKSPACE" remote add origin "https://github.com/${CHECKOUT_REPO}.git"
+          git -C "$GITHUB_WORKSPACE" fetch --no-tags --depth=1 origin "+${CHECKOUT_SHA}:refs/remotes/origin/checkout"
+          git -C "$GITHUB_WORKSPACE" checkout --detach refs/remotes/origin/checkout

      - name: Fail on tabs in workflow files
        run: |
@@ -58,7 +67,16 @@ jobs:
    runs-on: ubuntu-24.04
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        env:
+          CHECKOUT_REPO: ${{ github.repository }}
+          CHECKOUT_SHA: ${{ github.sha }}
+        run: |
+          set -euo pipefail
+          git init "$GITHUB_WORKSPACE"
+          git -C "$GITHUB_WORKSPACE" config gc.auto 0
+          git -C "$GITHUB_WORKSPACE" remote add origin "https://github.com/${CHECKOUT_REPO}.git"
+          git -C "$GITHUB_WORKSPACE" fetch --no-tags --depth=1 origin "+${CHECKOUT_SHA}:refs/remotes/origin/checkout"
+          git -C "$GITHUB_WORKSPACE" checkout --detach refs/remotes/origin/checkout

      - name: Install actionlint
        shell: bash
@@ -90,7 +108,16 @@ jobs:
    runs-on: ubuntu-24.04
    steps:
      - name: Checkout
-        uses: actions/checkout@v6
+        env:
+          CHECKOUT_REPO: ${{ github.repository }}
+          CHECKOUT_SHA: ${{ github.sha }}
+        run: |
+          set -euo pipefail
+          git init "$GITHUB_WORKSPACE"
+          git -C "$GITHUB_WORKSPACE" config gc.auto 0
+          git -C "$GITHUB_WORKSPACE" remote add origin "https://github.com/${CHECKOUT_REPO}.git"
+          git -C "$GITHUB_WORKSPACE" fetch --no-tags --depth=1 origin "+${CHECKOUT_SHA}:refs/remotes/origin/checkout"
+          git -C "$GITHUB_WORKSPACE" checkout --detach refs/remotes/origin/checkout

      - name: Setup Node environment
        uses: ./.github/actions/setup-node-env
--- a/.gitignore
+++ b/.gitignore
@@ -128,7 +128,8 @@ mantis/
 !.agents/skills/control-ui-e2e/**
 !.agents/skills/gitcrawl/
 !.agents/skills/gitcrawl/**
-!.agents/skills/openclaw-docs/**
+!.agents/skills/technical-documentation/
+!.agents/skills/technical-documentation/**
 !.agents/skills/openclaw-refactor-docs/
 !.agents/skills/openclaw-refactor-docs/**
 !.agents/skills/openclaw-debugging/
@@ -167,6 +168,8 @@ mantis/
 !.agents/skills/tag-duplicate-prs-issues/**
 !.agents/skills/autoreview/
 !.agents/skills/autoreview/**
+.agents/skills/**/__pycache__/
+.agents/skills/**/*.py[cod]

 # Agent credentials and memory (NEVER COMMIT)
 /memory/
@@ -246,6 +249,7 @@ extensions/qa-lab/web/dist/
 # Generated bundled plugin runtime dependency manifests
 extensions/**/.openclaw-runtime-deps.json
 extensions/**/.openclaw-runtime-deps-stamp.json
+extensions/diffs-language-pack/assets/viewer-runtime.js

 # Output dir for scripts/run-opengrep.sh (local opengrep scans)
 /.opengrep-out/
--- a/.oxfmtrc.jsonc
+++ b/.oxfmtrc.jsonc
@@ -30,6 +30,7 @@
    "docker-compose.yml",
    "dist/",
    "docs/_layouts/",
+    "extensions/diffs/assets/viewer-runtime.js",
    "**/*.json",
    "node_modules/",
    "patches/",
--- a/.oxlintrc.json
+++ b/.oxlintrc.json
@@ -182,6 +182,7 @@
    "dist-runtime/",
    "docs/_layouts/",
    "extensions/diffs/assets/viewer-runtime.js",
+    "extensions/diffs-language-pack/assets/viewer-runtime.js",
    "node_modules/",
    "patches/",
    "pnpm-lock.yaml",
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -17,12 +17,34 @@ Skills own workflows; root owns hard policy and routing.
 - New channel/plugin/app/doc surface: update `.github/labeler.yml` + GH labels.
 - New `AGENTS.md`: add sibling `CLAUDE.md` symlink; edit `AGENTS.md` only.

+## ClawSweeper Review Policy
+
+- OpenClaw-specific review rules live here; generic ClawSweeper prompts stay repo-agnostic.
+- ClawSweeper-owned schema, labels, close reasons, protected-label gates, maintainer-item gates, and mutation rules live in `openclaw/clawsweeper`.
+- Review workers read this full root `AGENTS.md` before judging; no reliance on search snippets, `head`, partial ranges, local excerpts, or truncated copies. Then read every scoped `AGENTS.md` that owns touched paths.
+- Optional integrations, providers, channels, skill bundles, MCP surfaces, and service workflows route to plugins, ClawHub, or owner repos when current seams suffice. Keep core items for missing core/plugin APIs, bundled regressions, security/core hardening, or maintainer product decisions.
+- Plugin APIs, provider routing, auth/session state, persisted preferences, config loading, config/default additions, migrations, setup, startup checks, and fallback behavior are compatibility/upgrade-sensitive. Treat config breaks, new config/default surfaces, removed fallbacks, fail-closed changes, stricter validation, or new operator action as merge risk even with green CI when they can affect existing users, upgrades, provider/plugin behavior, or maintainer operations.
+- For PRs that add, remove, or change config/default surfaces with possible compatibility, upgrade, provider/plugin, operator, setup, startup, or fallback impact, ClawSweeper review should emit a `reviewMetrics` entry when practical. The metric should name the count and direction of the changes, such as added, changed, or removed config/default surfaces, and explain why the metric matters before merge. When the metric indicates concrete merge risk, also surface the concern in `risks`, use `mergeRiskLabels` when the risk matches the label rubric, make `bestSolution` name the desired pre-merge state, and ensure `labelJustifications` explain the specific reason rather than restating the label.
+- Review whole decision surfaces, not only the touched runtime, provider, channel, harness, plugin seam, or context path. Check sibling Codex/Pi-style runtimes, provider/model routing, channel delivery, gateway/protocol, plugin SDK, and context-management paths when relevant.
+- One-sided fixes need sibling-surface proof, an explanation for why siblings are unaffected, or explicit follow-up work.
+- Changelog findings: see Docs / Changelog.
+- Public ClawSweeper comments prefer `https://docs.openclaw.ai/...` when a public docs page exists; structured evidence still cites repo files, lines, SHAs.
+- Findings need current source, shipped/current behavior, tests/CI evidence, and dependency contract proof when dependency-backed behavior is involved. Validation is judged against touched and sibling surfaces plus this file's commands; real behavior proof matters for user-visible changes, with Telegram/Desktop proof for Telegram-visible behavior when feasible.
+- Prefer findings for concrete behavior regressions, missing changed-surface proof, owner-boundary violations, security/API contract issues, or docs/config mismatches.
+- Do not file findings for repo policy preference when changed code follows the relevant scoped guide and no user-visible, runtime, security, or maintainer-risk impact is shown.
+
 ## Map

 - Core TS: `src/`, `ui/`, `packages/`; plugins: `extensions/`; SDK: `src/plugin-sdk/*`; channels: `src/channels/*`; loader: `src/plugins/*`; protocol: `src/gateway/protocol/*`; docs/apps: `docs/`, `apps/`.
 - Installers: sibling `../openclaw.ai`.
 - Scoped guides: `extensions/`, `src/{plugin-sdk,channels,plugins,gateway,gateway/protocol,agents}/`, `test/helpers*/`, `docs/`, `ui/`, `scripts/`.

+## Docs
+
+- Source docs: `docs/**`; publish repo: `openclaw/docs`; host: `https://docs.openclaw.ai`.
+- Flow: source -> `docs-sync-publish.yml` -> mirror build -> R2 -> Worker router.
+- Docs AI: `openclaw/ask-molty`; see its `AGENTS.md`.
+
 ## Architecture

 - Core stays plugin-agnostic. No bundled ids/defaults/policy in core when manifest/registry/capability contracts work.
@@ -34,16 +56,28 @@ Skills own workflows; root owns hard policy and routing.
 - Internal bundled plugins ship in core dist; bundled-only facade loader ok only for them.
 - External official plugins own package/deps and are excluded from core dist; core uses registry-aware `facade-runtime` or generic contracts.
 - Externalizing a bundled plugin: update package excludes, official catalogs, docs, tests, and prove core runtime paths resolve installed plugin roots before root-dep removal.
- Legacy config repair belongs in `openclaw doctor --fix`, not startup/load-time core migrations. Runtime paths use canonical contracts.
+- Runtime reads canonical config only. No silent compat for old/malformed config keys. If a config change invalidates existing files, add a matching `openclaw doctor --fix` migration. Core/auth config repairs live in core doctor; plugin-owned config repairs live in that plugin's doctor contract (`legacyConfigRules` / `normalizeCompatibilityConfig`).
 - Fix shape: default to clean bounded refactor, not smallest patch. Move ownership to right boundary; delete stale abstractions, duplicate policy, dead branches, wrappers, fallback stacks.
+- Fix observed local failures with generic product rules; do not hardcode names, ids, log phrases, or user examples in prod code unless they are an explicit contract.
+- Tests may use observed examples, but prod literals need a short contract reason.
+- Compatibility is opt-in. "Shipped" means reachable from a release Git tag; main/GitHub/PR/unreleased code is not shipped.
+- Refactor default: one canonical path. Delete the old path unless user explicitly wants compat or the shipped public contract is obvious and cited.
+- Keep old behavior only for an explicit public API/config/plugin SDK/data contract, tagged upgrade path, security/migration boundary, dependency contract, or observed prod state.
+- If unsure, ask before preserving compat. Do not keep aliases, shims, fallback stacks, stale names, or obsolete tests just in case.
+- Tests alone do not make internals contracts. If compat stays, name the contract and migration/removal plan in code, test, or PR.
 - Lean code is a goal. No internal shims, aliases, legacy names, broad fallbacks, or defensive branches just to reduce diff or handle unrealistic edge cases.
- Handle real production states, shipped upgrade paths, security boundaries, and dependency contracts. Public/hostile/observed malformed input gets care; hypothetical malformed input does not.
- Public plugin SDK/API is the compat exception. New API first, old path only via named compat/deprecation metadata, docs, warnings when useful, tests for old+new, planned removal.
+- Handle real production states, tagged upgrade paths, security boundaries, and dependency contracts. Public/hostile/observed malformed input gets care; hypothetical malformed input does not.
+- Deprecate shipped public contracts only.
+- Plugin SDK exception: shipped external API gets new API first plus named compat/deprecation, small tests/docs if useful, removal plan.
 - Migrate internal/bundled callers to modern API in the same change. Do not let internal compat become permanent architecture.
 - Channels are implementation under `src/channels/**`; plugin authors get SDK seams. Providers own auth/catalog/runtime hooks; core owns generic loop.
 - Hot paths should carry prepared facts forward: provider id, model ref, channel id, target, capability family, attachment class. Do not rediscover with broad plugin/provider/channel/capability loaders.
 - Do not fix repeated request-time discovery with scattered caches. Move the canonical fact earlier; reuse prepared runtime objects; delete duplicate lookup branches.
- Inline code comments: brief notes for tricky, bug-prone, or previously buggy logic.
+- Gateway/plugin metadata is process-stable: installs, manifests, catalogs, generated paths, bundled metadata. Changes require restart or explicit owner reload/install/doctor flow.
+- Runtime hot paths: no freshness polling (`stat`/`realpath`/JSON reread/hash). Reuse current snapshots, install records, discovery, lookup tables, root scopes, resolved paths.
+- Process-local metadata caches ok when lifecycle-owned and bounded/single-slot. Freshness exceptions need named owner + tests.
+- Inline comments: preserve reviewer context at the code site. Use for cross-path/state invariants, platform/dependency caps, deterministic ordering, compact encoded state, lifecycle ordering, ownership boundaries, session/id adoption, queue-depth symmetry, fallbacks, or intentional caller differences.
+- Comment shape: 1-3 short lines; state why the branch/helper exists, what contract it protects, and the bad outcome if removed. Cite nearby constants/helpers when useful. No syntax narration, PR/user-specific lore, or obvious mechanics.
 - Gateway protocol changes: additive first; incompatible needs versioning/docs/client follow-through.
 - Protocol version bumps: explicit owner confirmation only; never automatic/generated.
 - Config contract: exported types, schema/help, metadata, baselines, docs aligned. Retired public keys stay retired; compat in raw migration/doctor only.
@@ -55,7 +89,6 @@ Skills own workflows; root owns hard policy and routing.
 - Runtime: Node 22.19+; Node 24 recommended. Keep Node + Bun paths working.
 - Package manager/runtime: repo defaults only. No swaps without approval.
 - Install: `pnpm install` (keep Bun lock/patches aligned if touched).
- Sharp/Homebrew libvips source-build fail: `SHARP_IGNORE_GLOBAL_LIBVIPS=1 pnpm install`.
 - CLI: `pnpm openclaw ...` or `pnpm dev`; build: `pnpm build`.
 - Tests in a normal source checkout: `pnpm test <path-or-filter> [vitest args...]`, `pnpm test:changed`, `pnpm test:serial`, `pnpm test:coverage`; never raw `vitest`.
 - Tests in a Codex worktree or linked/sparse checkout: avoid direct local `pnpm test*`; use `node scripts/run-vitest.mjs <path-or-filter>` for tiny explicit-file proof, or Crabbox/Testbox for anything broader.
@@ -92,7 +125,6 @@ Skills own workflows; root owns hard policy and routing.
 - Do not leave associated issues open for hypothetical future repros. Close with rationale; ask for a new issue or reopen only if concrete new evidence appears. Close comment states: decision, why, supported alternative, and what evidence would change the decision.
 - PR review answer: bug/behavior, URL(s), affected surface, provenance for regressions when traceable, best-fix judgment, evidence from code/tests/CI/current or shipped behavior.
 - Issue/PR final answer: last line is the full GitHub URL.
- Changelog: PR landings/fixes need one unless pure test/internal. Do not mention missing changelog as a review finding; Codex handles it during fix/landing.
 - PR verification: before merge, post exact local commands, CI/Testbox run IDs, before/after proof when used, and known proof gaps.
 - Issue fixed on `main` with proof: comment proof + commit/PR, then close.
 - After landing or requested close/sweep: search duplicates; comment proof + canonical commit/PR/release before closing.
@@ -100,8 +132,10 @@ Skills own workflows; root owns hard policy and routing.
 - `ship` that fixes an issue: after push, comment proof + commit link, then close the issue.
 - GH comments with backticks, `$`, or shell snippets: use heredoc/body file, not inline double-quoted `--body`.
 - PR create: real body required. Include Summary + Verification; mention refs, behavior, and proof.
+- PR create/refresh: keep PR branches takeover-ready. Use a branch maintainers can push to, or for fork PRs ensure `maintainer_can_modify` / GitHub's `Allow edits by maintainers` is enabled unless explicitly told otherwise or GitHub's Actions/secrets warning makes that unsafe.
+- GitHub issue/PR create: read `$agent-transcript`; ask about sanitized transcript logs when available.
 - Real behavior proof section is parsed. Use exact `field: value` labels: `Behavior addressed`, `Real environment tested`, `Exact steps or command run after this patch`, `Evidence after fix`, `Observed result after fix`, `What was not tested`.
- PR artifacts/screenshots: attach to PR/comment/external artifact store. Do not commit `.github/pr-assets`.
+- PR artifacts/screenshots: attach to PR/comment/external artifact store. Never push screenshots, videos, proof images, or proof assets to OpenClaw or any product repo branch, including temp artifact branches. Use Crabbox artifact publishing plus the manifest URL. Do not commit `.github/pr-assets`.
 - CI polling: exact SHA, relevant checks only, minimal fields. Skip routine noise (`Auto response`, `Labeler`, docs agents, performance/stale). Logs only after failure/completion or concrete need.
 - Maintainers: may skip/ignore `Real behavior proof` when local tests or Crabbox verified behavior; record proof in PR verification.
 - `/landpr`: use `~/.codex/prompts/landpr.md`; do not idle on `auto-response` or `check-docs`.
@@ -112,14 +146,27 @@ Skills own workflows; root owns hard policy and routing.
 - No `@ts-nocheck`. Lint suppressions only intentional + explained.
 - External boundaries: prefer `zod` or existing schema helpers.
 - Runtime branching: discriminated unions/closed codes over freeform strings. Avoid semantic sentinels (`?? 0`, empty object/string).
+- Cross-function state: when valid combos matter, return a closed mode/result shape. Avoid parallel nullable fields or derived booleans that callers must keep in sync; make impossible states unrepresentable.
 - Formatter-friendly shape: when oxfmt explodes an expression vertically, extract named booleans, payloads, or small helpers. Do not change width or use format-ignore for local compactness.
 - Calls should be boring: complex decisions happen above; call args/object fields are names, literals, or simple property reads.
 - Prefer early returns over nested condition pyramids. Split code into gather -> normalize -> decide -> act.
 - Use named intermediates only for domain meaning or readability; avoid temp-variable soup.
+- Code size matters. Prefer small clear code; maintainability includes not growing LOC without payoff.
+- Refactors should delete about as much local complexity as they add. If LOC grows, the new ownership/API needs to clearly pay for it.
+- Before adding helpers/files, check whether existing code can absorb the behavior with less new surface.
+- Keep APIs narrow: export only current caller needs; keep types/helpers local by default.
+- Return the smallest useful shape. Avoid broad result objects, flags, metadata unless callers use them.
+- Avoid adapter layers that only rename fields. Move real responsibility or leave code local.
+- Inline simple one-use objects/spreads when clearer. Extract only when it removes duplication or hard logic.
+- Tests prove behavior/regressions, not every internal branch.
+- For non-trivial refactors, check `git diff --numstat` before closeout. If LOC grew, trim or explain why.
+- Prefer existing narrow helpers over repeated casts/guards. Add local helpers when 2+ nearby call sites share real boundary logic.
+- Prefer ctor parameter properties for injected deps/config. Do not ban them for erasable-syntax purity.
+- Prefer `satisfies` for registries/config maps; derive types from schemas when a runtime schema already exists.
+- Table-drive repetitive tests when it reduces code and keeps failure names clear.
 - Dynamic import: no static+dynamic import for same prod module. Use `*.runtime.ts` lazy boundary. After edits: `pnpm build`; check `[INEFFECTIVE_DYNAMIC_IMPORT]`.
 - Cycles: keep `pnpm check:import-cycles` + architecture/madge green.
 - Classes: no prototype mixins/mutations. Prefer inheritance/composition. Tests prefer per-instance stubs.
- Comments: brief, only non-obvious logic.
 - Split files around ~700 LOC when clarity/testability improves.
 - Naming: **OpenClaw** product/docs; `openclaw` CLI/package/path/config.
 - English: American spelling.
@@ -138,12 +185,12 @@ Skills own workflows; root owns hard policy and routing.

 ## Docs / Changelog

- Use `$openclaw-docs` for docs writing/review. Docs change with behavior/API.
+- Use `$technical-documentation` for docs writing/review. Docs change with behavior/API.
 - Codex harness upgrade (`extensions/codex/package.json` `@openai/codex`): refresh `docs/plugins/codex-harness.md` model snapshot from the new harness `model/list`.
 - Docs final answers: include relevant full `https://docs.openclaw.ai/...` URL(s). If issue/PR work too, GitHub URL last.
- Changelog entries: active version `### Changes`/`### Fixes`; single-line bullets only.
- Contributor PR authors should not edit `CHANGELOG.md`; maintainer/AI adds entries during landing/merge.
- Contributor-facing changelog entries thank credited human `@author`. Never thank bots, `@openclaw`, `@clawsweeper`, or `@steipete`; if unknown, omit thanks.
+- `CHANGELOG.md`: release-owned. Do not edit for normal PRs, direct `main` fixes, or `ship it`; only explicit release/changelog generation may rewrite it. Do not ask contributors/agents for changelog edits.
+- User-facing `fix`/`feat`/`perf`: put release-note context in PR body, squash message, or direct commit: behavior, surface, issue/PR refs, credited human author/reporter.
+- Release generation: derive `CHANGELOG.md` from merged PRs + all direct `main` commits. Entries: active `### Changes`/`### Fixes`, single-line, thank credited humans; never thank bots/forbidden handles: `@openclaw`, `@clawsweeper`, `@codex`, `@steipete`.

 ## Git

@@ -152,7 +199,7 @@ Skills own workflows; root owns hard policy and routing.
 - No manual stash/autostash unless explicit. No branch/worktree changes unless requested.
 - `main`: no merge commits; rebase on latest `origin/main` before push. After one green run plus clean rebase sanity, do not chase moving `main` with repeated full gates.
 - User says `commit`: your changes only. `commit all`: all changes in grouped chunks. `push`: may `git pull --rebase` first.
- User says `ship it`: changelog if needed, commit intended changes, pull --rebase, push.
+- User says `ship it`: commit intended changes, pull --rebase, push.
 - Do not delete/rename unexpected files; ask if blocking, else ignore.
 - Bulk PR close/reopen >5: ask with count/scope.

@@ -163,7 +210,8 @@ Skills own workflows; root owns hard policy and routing.
 - Dependency patches/overrides/vendor changes need explicit approval. `pnpm-workspace.yaml` patched dependencies use exact versions only.
 - Lockfiles/shrinkwrap are security surface: review `pnpm-lock.yaml`, `npm-shrinkwrap.json`, `package-lock.json`; root/plugin npm packages ship shrinkwrap, not package-lock.
 - Carbon pins owner-only: do not change `@buape/carbon` unless Shadow (`@thewilloftheshadow`, verified by `gh`) asks.
- Releases/publish/version bumps need explicit approval. Use `$openclaw-release-maintainer`.
+- Releases/publish/version bumps need explicit approval. Use `$release-openclaw-maintainer`.
+- Backport means apply to newest open `release/` branch unless user names another target.
 - GHSA/advisories: `$openclaw-ghsa-maintainer` / `$security-triage`. Secret scanning: `$openclaw-secret-scanning-maintainer`.
 - Beta tag/version match: `vYYYY.M.D-beta.N` -> npm `YYYY.M.D-beta.N --tag beta`.

@@ -174,7 +222,7 @@ Skills own workflows; root owns hard policy and routing.
 - SwiftUI: Observation (`@Observable`, `@Bindable`) over new `ObservableObject`.
 - Mac gateway: dev watch = `pnpm gateway:watch`; managed installs = `openclaw gateway restart/status --deep`; logs = `./scripts/clawlog.sh`. No launchd/ad-hoc tmux.
 - Mac app permission testing: stable app path + real signing identity required. No `--no-sign`, `SIGN_IDENTITY=-`, or raw debug binary; TCC prompts/listing won't stick.
- Version bump surfaces live in `$openclaw-release-maintainer`.
+- Version bump surfaces live in `$release-openclaw-maintainer`.
 - Parallels: `$openclaw-parallels-smoke`; Discord roundtrip: `$parallels-discord-roundtrip`.
 - Crabbox/WebVNC human demos: keep remote desktop visible/windowed; no fullscreen remote browser unless video/capture-style output.
 - ClawSweeper ops: `$clawsweeper`. Deployed hook sessions may post one concise `#clawsweeper` note only when surprising/actionable/risky; if using message tool, reply exactly `NO_REPLY`.
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,10 +2,277 @@

 Docs: https://docs.openclaw.ai

+## 2026.5.28
+
+### Highlights
+
+- Agent and Codex runtime recovery is steadier: subagents keep cwd/workspace separation, hook context stays prompt-local, session locks release on timeout abort, stale restart continuations are avoided, and Codex app-server/helper failures no longer tear down shared runtime state. (#87218, #86875, #87409, #87399, #87375)
+- Channel delivery and session identity got safer across outbound plugin hooks, Matrix room ids, iMessage reactions/approvals, Slack final replies, Discord recovered tool warnings, and Microsoft Teams service URL trust checks. (#73706, #75670, #87366, #87451, #87334)
+- CLI, auth, doctor, and provider paths fail faster and recover more clearly: malformed numeric/version options are rejected, OAuth and local service startup requests are bounded, legacy `api_key` auth profiles migrate to canonical form, and restart guidance is actionable. (#87398, #86281, #87361)
+- Plugin and Gateway hot paths do less repeated work while preserving cache correctness for install records, config JSON parsing, tool search catalogs, session stores, manifest model rows, auto-enabled plugin config, browser tokens, and viewer assets. (#86699)
+- Release, QA, and E2E validation now bound more log, artifact, harness, and cross-OS waits so failing lanes produce proof instead of hanging or false-greening.
+
+### Changes
+
+- Status: show active subagent details in status output.
+- Diffs: split the default language pack and expand default Diffs language coverage while keeping the host floor aligned. (#87370, #87372) Thanks @RomneyDa.
+- ClawHub: add plugin display names plus skill verification and trust surfaces. (#87354, #86699) Thanks @thewilloftheshadow and @Patrick-Erichsen.
+- Docs: clarify Codex computer-use setup, paste-token stdin auth setup, macOS gateway sleep troubleshooting, native Codex hook relay recovery, container model auth, install deployment cards, device-token admin gating, and backport targets. (#87313, #63050) Thanks @bdjben, @liaoandi, and @thewilloftheshadow.
+
+### Fixes
+
+- Agents/Codex: keep spawned agent cwd/workspace state separated, keep hook context prompt-local, release session locks on timeout abort, avoid session event queue self-wait, preserve shared app-server state across startup or helper failures, keep native hook relay alive across restarts, route workspace memory through tools, resolve Codex runtime models first, report quarantined dynamic tools, format `skills` command output, and bound compaction/steering retries. (#87218, #86875, #86123, #87399, #87375, #87383, #87400) Thanks @mbelinky, @Alix-007, @luoyanglang, @yetval, and @sjf.
+- Channels: thread canonical session keys into outbound hooks, preserve Matrix room-id case, keep fallback tool warnings mention-inert, retain delivered Slack final replies during late cleanup, continue iMessage polling after denied reactions, suppress duplicate native exec approvals, preserve Telegram SecretRef prompt config, suppress Discord recovered tool warnings, and block untrusted Teams service URLs. (#73706, #75670, #87366, #87451, #87334) Thanks @zeroaltitude, @lukeboyett, @xiaotian, and @eleqtrizit.
+- CLI/auth/doctor/providers: reject malformed numeric/timeout/subcommand-version inputs, wait for respawn child shutdown, bound Codex and GitHub Copilot OAuth/token requests, warm provider auth off the main thread, honor Codex response timeouts, bound local service startup, resolve GPT-5.5 without cached catalog, migrate legacy memory auto-provider config, rewrite non-canonical `api_key` auth profiles, and make doctor restart follow-ups actionable. (#87398, #86281, #87361) Thanks @Patrick-Erichsen, @samzong, @giodl73-repo, and @alkor2000.
+- Gateway/security/session state: expire browser tokens after auth rotation, scope assistant idempotency dedupe, drain probe client closes, avoid stale restart continuation reuse, preserve retry-after fallbacks, bound webchat image and artifact transcript scans, include seconds in inbound metadata timestamps, and evict current plugin-state namespaces at row caps.
+- Performance: trust install-record caches between reloads, prefer native JSON parsing, reuse unchanged tool-search catalogs, skip unchanged store serialization, add precomputed session patch writers, reduce store clone allocations, cache manifest model catalog rows and auto-enabled plugin config, and slim current metadata identity caches.
+- Docker/release/QA: package runtime workspace templates, stream cross-OS served artifacts, preserve sparse Crabbox run artifacts, bound OpenClaw instance logs, plugin gauntlet relay logs, MCP channel buffers, kitchen-sink scans, agent-turn assertions, and release scenario logs, and keep release/google live guards current.
+
+## 2026.5.27
+
+### Highlights
+
+- Safer local/runtime boundaries: OpenClaw now rejects unsafe command wrappers, malformed CLI numeric options, unsafe Node runtime env overrides, no-auth Tailscale exposure, and non-admin device-role pairing approvals before they can affect live runs. (#87308, #87305, #87292, #87146)
+- Matrix and auto-reply delivery are steadier: mention previews stay inert, final mention replies deliver normally, shared-DM notices are awaited, MXID parsing ignores filenames, and reasoning-prefixed `NO_REPLY` responses stay suppressed.
+- Provider and agent reliability improved across OpenAI-compatible embeddings, cached token usage, Anthropic/Codex/Claude runtime state, unsupported tool-schema quarantine, heartbeat templates, and session fallback errors. (#85269, #82062, #85416, #86855)
+- Plugin and package release paths got tighter: Pixverse ships as an external video plugin with region selection, package exclusions and shrinkwrap inventory match the published npm shape, and release/package smoke commands fail bounded instead of hanging.
+- Gateway hot paths do less rediscovery by reusing current plugin metadata fingerprints, stable plugin index fingerprints, read-only session metadata, active working stores, status fast paths, and auth/env snapshots. (#86439)
+
+### Changes
+
+- Memory: add a core OpenAI-compatible embedding provider for local and hosted OpenAI-style endpoints, with config, doctor, and docs support. (#85269) Thanks @dutifulbob.
+- Plugin SDK: mark memory-specific embedding provider registration as deprecated compatibility and surface non-bundled usage in plugin compatibility diagnostics. (#85072) Thanks @mbelinky.
+- Pixverse: add video generation provider support, API region selection, and external plugin publishing.
+- Plugins: expose approval action metadata for plugin-driven approval surfaces.
+
+### Fixes
+
+- Security/CLI/runtime: harden hostname normalization for repeated trailing dots, block side-effecting command wrappers, reject unsafe Node runtime env overrides, reject loose numeric CLI and gateway options, require admin approval for node device-role pairing, and reject no-auth Tailscale exposure. (#87305, #87292, #87308, #87146) Thanks @pgondhi987.
+- Doctor: validate runtime tool schemas for every configured embedded agent while skipping ACP-only profiles, so bad non-default plugin or MCP tools are reported before assistant turns.
+- Telegram: route `sendMessage` action replies through durable outbound delivery so completed agent responses remain retryable when the gateway send path times out. (#87261) Thanks @mbelinky.
+- Matrix/auto-reply: keep draft previews mention-inert, preserve final mention delivery, send mention finals normally, await shared DM notices, ignore filename-embedded MXIDs, and suppress reasoning-prefixed `NO_REPLY` responses.
+- Agents/providers: add OpenAI-compatible cache retention, forward cached token usage in chat completions, preserve runtime context before active user turns, strip stale Anthropic thinking, load Claude CLI OAuth for Pi auth profiles, avoid false Codex runtime live switches, and quarantine unsupported tool schemas. (#82062, #87167, #86855)
+- Gateway/performance: cache plugin metadata fingerprints and stable plugin index fingerprints, borrow read-only session metadata safely, keep the active session working store hot, keep status on a bounded fast path, and preserve model auth profile suffixes. (#86439)
+- Package/install/release: align npm package exclusions and inventory, omit unpacked test helpers, skip Homebrew until macOS packages need it, cap tsdown heap in containers, bound install/release smoke waits, and harden post-publish verification.
+- Codex/Auth: bound ChatGPT OAuth token exchange and refresh requests, and honor cancellation across Codex and Anthropic OAuth login flows.
+- QA/E2E/CI: bound Telegram, kitchen-sink, Open WebUI, ClawHub, MCP, Discord, realtime, labeler, and GitHub API waits; fail empty explicit test, live-media, gateway CPU, plugin gauntlet, and beta-smoke runs instead of false-greening.
+- Agents/Codex: keep spawned agent bootstrap files rooted in the agent workspace while running task commands, transcripts, and compaction from the requested cwd. (#87218) Thanks @mbelinky.
+
+## 2026.5.26
+
+### Highlights
+
+- Faster Gateway and replies: startup avoids repeated plugin, channel, session, usage-cost, warning, scheduled-service, and filesystem scans; visible replies separate user-facing sends from slower follow-up work; Gateway runtime/session caches churn less under load.
+- Transcripts are core: transcript-backed meeting summaries, source-provider chunks, cleaned user turns, media provenance, Codex mirrors, WebChat replies, and CLI/TUI replay now use one more reliable transcript path.
+- More channels are production-ready: Telegram keeps typing/progress context and forum topics, iMessage handles attachment roots, remote media staging, and duplicate local Messages sources, WhatsApp restores group/media behavior, Discord improves voice playback and model picking, and Signal/iMessage/WhatsApp get reaction approvals.
+- Better voice and Talk: realtime Talk runs can be inspected, steered, cancelled, or followed up from Web UI and Discord voice; wake-name handling is more tolerant without letting ambient speech trigger agents.
+- Safer content boundaries: Browser snapshot reads honor SSRF policy, system-event text cannot spoof nested prompt markers, fetched file text is wrapped as external content, ClickClack inbound sender allowlists run before agent dispatch, stale device tokens are rejected, and serialized tool-call text is scrubbed from replies.
+- Providers, Codex, and local models are steadier: named auth profiles, OpenAI sampling params, Codex app-server resume/timeout/usage-limit recovery, dynamic tool-schema guards, xAI usage-limit surfacing, Ollama top-p normalization, and local approval resolution reduce provider-specific dead ends.
+- More reliable install/update/release paths: Alpine installs, trusted runtime fallback roots, stable update channels, Docker/package timeouts, Windows Scheduled Tasks, Windows/macOS proof lanes, Testbox/Crabbox delegation, plugin publish checks, and macOS runner bootstraps all got hardened.
+- Better observability: Activity tab, gateway secret-prep traces, tool/model stream progress, explicit fast-mode status, systemd Gateway hygiene, OpenTelemetry LLM spans, release performance evidence, and richer telemetry signals make failures easier to inspect.
+
+### Changes
+
+- Transcripts: add core transcript capture and source-provider support for transcript-backed meeting summaries, including the renamed Transcripts docs, CLI surface, source-provider chunks, and cleaned user-turn persistence.
+- Auth: add named model login profiles and supported credential migration for Hermes, OpenCode, and Codex auth profiles, with explicit opt-out and non-interactive controls. (#85667) Thanks @fuller-stack-dev.
+- Diagnostics: trace gateway secret preparation, classify skill/tool usage, surface model stream progress, add OpenTelemetry LLM content spans, and expose alertable telemetry for blocked tools, failover, stale sessions, liveness, oversized payloads, and webhook ingress. (#83019, #80370, #86191)
+- Channels: add Signal reaction approvals, iMessage thumb approval reactions, and WhatsApp thumb approval reaction support so mobile approval flows work without textual `/approve` commands. (#85894, #85952, #85477)
+- Agents/API: forward OpenAI sampling params through the Gateway and expose estimated context-budget status for active agent runs. (#84094)
+- TUI/status: queue prompts submitted while an agent is busy and show explicit fast-mode state plus richer systemd Gateway hygiene in status output. (#86722, #87115, #86976)
+- Exec approvals: hide durable approval actions that are unavailable for the current prompt and keep approval runtime tokens local-only so stale prompts cannot offer misleading controls. (#86270, #86359)
+- Plugin SDK: add reaction approval helpers and keep diagnostic event root exports discoverable across function-name and alias-bound module graphs. (#86735, #87084)
+- Android/iOS: add the Android pair-new-gateway action and improve mobile Talk mode surfaces, including iOS realtime Talk mode and Android offline voice/gateway recovery. (#86798, #86355) Thanks @ngutman.
+- Performance: cache plugin metadata snapshots, package realpaths, stable gateway metadata, model cost indexes, channel resolution, usage-cost indexes, and session/auth hot-path facts so common Gateway and reply paths do less rediscovery. (#84649, #85843, #86517, #86678)
+- Voice: expose shared realtime turn-context tracking through the realtime voice SDK and reuse it for Discord speaker attribution and wake-name context recovery.
+- Voice: reuse shared realtime output activity tracking in Google Meet command and node audio bridges, including recent-output checks for local barge-in detection.
+- Voice: expose shared realtime output activity tracking through the realtime voice SDK and reuse it for Discord playback activity and barge-in decisions.
+- Voice: expose shared realtime consult question matching, speakable-result extraction, and alias-aware forced-consult coordination through the realtime voice SDK, then reuse it in Gateway Talk, Voice Call, and Discord voice paths.
+- Voice: share activation-name matching and consult-transcript screening through the realtime voice SDK so Discord, browser voice, and meeting surfaces can reuse one implementation.
+- Cron: default `cron.maxConcurrentRuns` to 8 so scheduled automations and their isolated agent turns can make progress in parallel without explicit configuration.
+- QA-Lab: add `qa coverage --match <query>` so focused proof selection can discover matching scenarios from existing metadata before running live or remote lanes.
+- Discord/model picker: surface an alpha-bucket select (e.g. `A–G (12) · H–N (18) · O–Z (5)`) when the provider list or a provider's model list exceeds 25 items, so configs with `provider/*` wildcards stay one click from the right page instead of paginating through prev/next; falls back to numeric chunks when every item shares the same first letter. (#86181) Thanks @rendrag-git.
+- Control UI: add an ephemeral Activity tab for sanitized live tool activity summaries without persisting raw telemetry. Fixes #12831. Thanks @BunsDev.
+- Build: include `ui:build` in the `full` and `ciArtifacts` profiles of `scripts/build-all.mjs` so `pnpm build` always rebuilds `dist/control-ui` after `tsdown` cleans `dist`, removing the second-command requirement and the missing-asset failure mode for source/runtime installs and CI artifact uploads. (#85206)
+- iOS: improve Talk mode with direct realtime voice sessions, compact toolbar status, and responsive voice waveform feedback. (#86355) Thanks @ngutman.
+- Media: replace the Sharp image backend with Rastermill for metadata, resizing, EXIF orientation, and PNG alpha-preserving optimization so OpenClaw no longer installs Sharp or the WhatsApp Jimp fallback for image processing. (#86437)
+- Codex: update the bundled Codex CLI to 0.134.0 and keep native compaction disabled for budget-triggered app-server turns so OpenClaw owns the recovery boundary. (#86772)
+
+### Fixes
+
+- Memory/security: reject prompt-like text submitted through the explicit `memory_store` tool before embedding or storage, matching the existing auto-capture prompt-injection filter. (#87142)
+- Gateway/security: enable the default auth rate limiter for remote non-browser and HTTP gateway auth failures when `gateway.auth.rateLimit` is unset, while preserving the loopback exemption. (#87148)
+- Prompt hardening: route untrusted group prompt metadata through sanitized untrusted structured context while preserving trusted operator-configured group system prompts and aligning the plugin SDK docs/test helpers. (#87144)
+- Security/content boundaries: validate Browser snapshot tab URLs against SSRF policy before ChromeMCP or direct CDP reads, sanitize queued system-event text so untrusted plugin/channel labels cannot spoof nested prompt markers, wrap fetched file text and metadata as external content, apply ClickClack `allowFrom` sender allowlists before agent dispatch, reject RPCs from invalidated device-token clients during rotation, require staged sandbox media refs, and scrub serialized tool-call text from replies. (#78526, #87094, #87062, #83741, #70707, #86924) Thanks @zsxsoft, @ttzero25, and @mmaps.
+- Transcripts/user turns: persist CLI, WebChat, media, follow-up, hook, and Codex-mirror user turns to the admitted session target; keep cleaned transcript text, inline image routing, provenance metadata, replay hooks, and fallback paths idempotent when runtimes fail or restart.
+- TUI/status/onboarding/UI: queue busy TUI prompts instead of dropping them, preserve the configured default model during onboarding, show failed tool results as errors, show config-open failures in Control UI, keep status JSON plugin scans healthy, preserve xAI usage-limit errors locally, and expose explicit fast-mode/systemd state. (#86722, #87000, #85786, #87108, #87001, #86614, #87115, #86976)
+- Plugin commands/SDK: preserve plugin LLM command auth, bind native plugin command dispatch to the host agent's LLM auth, keep `onDiagnosticEvent` exports discoverable through `Function.name`, stabilize diagnostic event root aliases, correlate pathless read diagnostics, suppress transient runner failures in channel command paths, and repair local approval resolution. (#85936, #87084, #86977, #87069, #86771)
+- Codex/providers: keep WebChat delivery hints out of user prompts, avoid false queued-terminal idle timeouts, share the native hook relay registry, quarantine unsupported dynamic tool schemas, preserve Claude resumed-session system prompts, normalize greedy Ollama `top_p`, preserve per-agent thinking defaults for ingress runs, and avoid native compaction takeover on budget-triggered Codex turns. (#87096, #73950, #87049, #86689, #86772)
+- Gateway/perf/release: reuse startup-warning metadata and prepared auth stores, avoid cloning live-switch and lifecycle session caches on read paths, defer warning and scheduled-service fallback imports, trim Gateway session/startup/runtime CPU churn, skip duplicate turn session touches, stop chat timeout fallback cascades, drop stale subagent announce history, bound benchmark/watch/kitchen-sink teardown waits, bound macOS/package/onboarding/plugin smoke commands, bound install finalization probes, resolve Parallels npm-update commands from guest `PATH`, and bootstrap raw AWS macOS Node/pnpm commands through `/usr/bin/env`. (#86997)
+- Reply/perf: reduce visible reply delivery latency by preserving Telegram typing/progress context, lazy-loading slash-command startup metadata, avoiding hot-path model hydration, flag-gating Codex profiler timing, deferring context compaction maintenance, and tracking delivery timing. (#86989, #86990, #86991, #86992, #86993, #86994) Thanks @keshavbotagent.
+- Reply/source delivery: keep TUI, Control UI, media, TTS, transcript, and Codex source-reply finals live without duplicate terminal events or stale replay artifacts.
+- Agents/replay: repair legacy tool results before replay, preserve `sessions_spawn` transcript payloads, restore current guard checks, stage sandboxed workspace media, and keep duplicate transcripts tool display metadata from reappearing. (#82203, #86934, #87025) Thanks @martingarramon, @vincentkoc, and @joshavant.
+- Agents/sessions: handle active-fallback failures in `sessions_send` so fallback routing reports the real failure and does not leave callers with an ambiguous dropped send. (#86638)
+- Agents/hooks/subagents: enforce default hook agent allowlists, recover failed subagent lifecycle completions, and keep node task lifecycle cleanup from closing the Gateway listener. (#86101)
+- Codex: project newer OpenClaw chat history into resumed app-server threads and keep Codex turn timeouts inside the Codex runtime boundary so timeouts do not poison shared app-server clients or fall through to unrelated provider fallback. (#86677, #86476) Thanks @TurboTheTurtle and @pashpashpash.
+- Config/doctor/update: narrow profiled tool-section doctor repair, keep runtime-injected legacy web-search provider config out of user-authored config validation, and keep prerelease tags excluded from stable updater resolution. (#87030, #86818, #86559) Thanks @joshavant, @luoyanglang, and @stevenepalmer.
+- Doctor/runtime: validate active bundled MCP tool schemas through the same runtime projection path so unsupported MCP input schemas are reported and quarantined instead of poisoning assistant startup.
+- CLI/Windows: add a Windows-only stack-size respawn for stack-heavy startup paths, default CLI logs to local timestamps, and validate timeout/banner TTY state more strictly. (#87031, #85387) Thanks @giodl73-repo and @vincentkoc.
+- Locking/security: require owner identity proof before stale plugin lock removal, memoize session lock owner arguments, and avoid writing default exec approval stores unless policy state actually changed. (#86814, #86964) Thanks @Alix-007 and @vincentkoc.
+- Install/release: bound Docker package build, inventory, pack, and tarball preparation with process-group timeouts; pin shrinkwrap patch drift to the pnpm lock; harden macOS restart and dSYM packaging; and run release Docker/live timeout wrappers in the foreground so child processes cannot wedge gates.
+- QA/Telegram: bound Telegram user credential tar and broker calls so live proof setup fails with a timeout instead of waiting for the outer Crabbox job deadline.
+- QA/Tool Search: bound gateway E2E HTTP probes, run only the fixture plugin, and clean up temporary fixture trees after the compact tool-catalog proof completes.
+- Telegram/network: treat `ENETDOWN` as a transient pre-connect network failure so Telegram sends, gateway unhandled-rejection handling, and cron network retries follow the same recovery path as sibling network outages. (#86762) Thanks @TurboTheTurtle.
+- Telegram: preserve inbound text entities, overlapping DM replies, account topic cache sidecars, outbound reply context, targeted bot-command mentions, durable group retry targets, forum topic names, and native progress callbacks. (#83873, #85361, #85555, #85656, #85709, #86299, #86553) Thanks @SebTardif, @luoyanglang, and @neeravmakwana.
+- iMessage: read image attachments from local Messages attachment roots, dedupe duplicate local Messages-source accounts, seed direct DM history, fix image/group media attachment commands, advance catchup cursors after live handling, and keep slash-command acknowledgements in the source conversation. (#82642, #85475, #86569, #86705, #86706, #86770) Thanks @homer-byte, @TurboTheTurtle, @swang430, and @OmarShahine.
+- WhatsApp/QQ/Twitch/IRC/Slack: restore WhatsApp ack identity and group-drop warnings, make QQ Bot media respect `OPENCLAW_HOME`, serialize Twitch auth disconnects, store IRC channel routes canonically, and keep Slack downloaded files out of reply media. (#83833, #85309, #85777, #85794, #85906, #86318, #86697) Thanks @sliverp, @neeravmakwana, and @Kailigithub.
+- Discord/voice: improve voice playback and wake replies, bucket large model picker menus, merge media captions into one message, route metadata through configured proxies, restore numeric channel sends, suppress self-reply echoes, and tighten wake matching without breaking fuzzy wake phrases. (#80227, #86238, #86487, #86571, #86595, #86601)
+- Codex: preserve native web-search metadata, keep oversized native thread reuse, bridge CLI API-key auth into the app server, preserve sandbox bootstrap path style, recover context-window prompt errors, honor yolo approval policy, disable native thread personality, and route compaction through Codex auth. (#85378, #85542, #85891, #85909, #86408)
+- Agents/runtime: enforce session lock max-hold reclaim, release embedded-attempt locks on all exits, treat aborted subagent runs as terminal, avoid runtime model hydration on hot paths, disclose scoped session list counts, derive overflow budgets from provider errors, and keep fallback errors scoped to the active model candidate. (#70473, #85764, #86014, #86134, #86427, #86944) Thanks @openperf, @fuller-stack-dev, @zhangguiping-xydt, and @ferminquant.
+- Config/update/doctor: retry config recovery after failed backup restore, skip shell env fallback on Windows, exclude prerelease tags from the stable git channel, support deep config edits, warn instead of aborting on unreadable cron stores, prune stale bundled plugin paths, and avoid duplicate restart prompts when the Gateway is already healthy. (#85739, #85787, #86060, #86260, #86384, #86533) Thanks @liaoyl830.
+- Install/release: support Alpine CLI installs and runtime floors, prefer trusted startup argv runtime fallback roots, reject stale CLI node runtimes, avoid npm `min-release-age` installer failures, bound npm/package/Docker install phases, restore config parent ownership in Docker, seed Docker lockfile package tarballs before prune, make release/plugin prerelease checks fail closed instead of hanging or false-greening, and use host-visible Crabbox local work roots for Docker-backed proof. (#85491)
+- Windows daemon: keep Scheduled Task gateway launches running on battery power and avoid workgroup-machine prompts for a domain user during task installation. (#59299)
+- Security: avoid printing Gateway tokens in Docker, validate plugin model-pattern regexes safely, escape transcript metadata field names, harden session allowlist glob matching, audit Claude permission overrides under YOLO, and require explicit allow for ACP auto approvals. (#85849, #85934, #86046, #86557)
+- Media/images: replace Sharp with Rastermill, keep EXIF normalization best-effort, normalize HEIC/HEIF before image descriptions, route Codex image API keys through OpenAI, preserve image compression metadata, and auto-scale live tool result caps. (#85776, #86037, #86437, #86857, #86923)
+- Memory: prevent semantic vector indexes from silently degrading when embeddings are unavailable, stop doctor OOMs on large session stores, preserve sidecar hooks/artifacts, write fallback dream diaries, use CJK-aware dreaming dedupe, and avoid per-file watcher FD fan-out. (#80613, #82928, #85060, #85704, #85967, #86701) Thanks @brokemac79, @openperf, and @yaaboo-gif.
+- Agents/sessions: include visibility metadata on restricted `sessions_list` results so scoped counts are clearly reported without widening access or exposing hidden-session counts. (#86944) Thanks @ferminquant.
+- Gateway/DNS: validate wide-area discovery domains before deriving zone paths or writing zone files, so invalid `discovery.wideArea.domain` and `dns setup --domain` values fail with a DNS-name diagnostic instead of falling through to unrelated configuration errors. Thanks @mmaps.
+- Agents/BTW: route fallback side-question streams through the embedded stream resolver so Anthropic-compatible MiniMax requests use the same capped transport as normal chat. (#86312) Thanks @neeravmakwana.
+- Telegram: treat `/command@TargetBot` bot-command entities as explicit mentions for the addressed bot so `requireMention` groups no longer drop targeted commands or captions. Fixes #84462. (#86553) Thanks @luoyanglang.
+- CI: bound Docker/Bash E2E tarball npm installs with `OPENCLAW_E2E_NPM_INSTALL_TIMEOUT` so package, onboarding, plugin, and upgrade lanes fail instead of hanging on a stuck npm install.
+- CI: fail Parallels npm-update smoke jobs after the guest command timeout and cleanup backstop instead of only logging a timeout line.
+- CI: bound kitchen-sink RPC HTTP probes so stalled gateway readiness or response bodies fail and retry instead of wedging the walker.
+- CI: bound Telegram user Crabbox proof Bot API calls so stalled Telegram responses fail instead of wedging credential and desktop proof cleanup.
+- CI: bound MCP channel stdio client initialization so Docker channel proof fails and closes the bridge transport instead of waiting for the outer job timeout.
+- CI: keep `OPENCLAW_TESTBOX=1 pnpm check:changed` delegating to Blacksmith Testbox through Crabbox without forwarding local Testbox or worker env into the remote command.
+- CI: send KILL after the TERM grace period for manual checkout fetch timeouts so stuck Testbox and workflow checkout retries cannot hang behind a wedged `git fetch`.
+- CI: send KILL after the TERM grace period for Bun global install smoke command timeouts so trapped `openclaw` child processes cannot wedge the scheduled install smoke.
+- iMessage: thread current channel/account inbound attachment roots into the image tool so iMessage-saved attachments under `~/Library/Messages/Attachments` (including the wildcard `/Users/*/Library/Messages/Attachments` root) are read through the existing inbound path policy instead of being rejected as `path-not-allowed`. Literal `localRoots` stays workspace-scoped. Fixes #30170. (#86569)
+- QQ Bot: respect `OPENCLAW_HOME` for outbound media path resolution so `<qqmedia>` sends no longer silently fail when `HOME` and `OPENCLAW_HOME` differ (Docker / multi-user hosts). Persisted QQ Bot data (sessions, known users, refs) stays anchored on the OS home for upgrade compatibility. Fixes #83562. Thanks @sliverp.
+- Update: report the primary malformed `openclaw.extensions` payload error without adding a duplicate missing-main diagnostic. (#86596) Thanks @ferminquant.
+- Control UI: keep host-local Markdown file paths inert while preserving app-relative links. (#86620) Thanks @BryanTegomoh.
+- Gateway: dampen repeated unauthenticated device-required probes per URL while preserving explicit-auth and paired recovery paths. (#86575) Thanks @ferminquant.
+- IRC: store inbound channel routes with the canonical `channel:#name` target and join transient channel sends before writing. (#85906) Thanks @Kailigithub.
+- Usage: surface unknown all-zero model pricing as missing cost entries instead of a confident `$0` total. (#85882) Thanks @MichaelZelbel.
+- Agents/Codex: honor yolo app-server approval policy only for the full `never` plus `danger-full-access` case. (#85909) Thanks @earlvanze.
+- Gateway/Gmail: clear Gmail watcher renewal intervals on re-entry so hot reloads do not leak lifecycle timers. (#82947) Thanks @SebTardif.
+- Logging: exit cleanly on broken stdout/stderr pipes without masking existing failure exit codes. (#80059) Thanks @pavelzak.
+- Gateway/security: escape transcript metadata field names while extracting oversized session line prefixes. (#85934) Thanks @SebTardif.
+- Plugins/security: validate manifest model pattern regexes with the safe-regex compiler so unsafe patterns are ignored before matching. (#86046) Thanks @SebTardif.
+- Discord: route gateway metadata REST lookups through the configured Discord proxy so proxied accounts do not fall back to direct `discord.com` connections before opening the WebSocket. Fixes #80227. Thanks @Clivilwalker.
+- Agents/media: hydrate current-turn image attachments from filename-derived MIME types so active vision can see generated or forwarded images whose source omitted an image content type. (#84812) Thanks @marchpure.
+- Agents/fs: point workspace-only scratch-path guidance at in-workspace temp directories while keeping host-root writes rejected by the tool guard. (#86501) Thanks @tianxiaochannel-oss88.
+- Agents/media: keep async cron media completions scoped to their run session while preserving direct delivery for stale generated-media success and failure notifications. (#86529) Thanks @ai-hpc.
+- Gateway: emit plugin `session_end`/`session_start` hooks when `agent.send` rotates or replaces a session id, keeping hook lifecycle state aligned with `sessions.changed` notifications. Fixes #83507. (#85875) Thanks @brokemac79.
+- OpenShell/SSH: reject malformed generated exec commands before sandbox/session setup so unresolved workflow placeholders fail fast instead of reaching the remote shell. Fixes #72373. Thanks @brokemac79.
+- Google: stop normalizing `gemini-3.1-flash-lite` to the retired preview endpoint and update Flash Lite alias guidance to the GA model id. Fixes #86151. (#86240) Thanks @SebTardif.
+- Installer: make Alpine apk installs cover Git, verify the Node runtime floor, try `nodejs-current`, and report Alpine version guidance when repositories only provide older Node packages.
+- Agents/status: prefer the active Claude CLI OAuth auth label over an unused Anthropic env API-key label for equivalent runtime aliases. Fixes #80184. (#86570) Thanks @brokemac79.
+- Agents/media: send direct fallback for generated media still missing after an active requester wake fails. (#85489) Thanks @fuller-stack-dev.
+- Agents: derive overflow compaction budgets from provider-reported and synthetic over-budget token counts so confirmed context overflows compact before retrying. (#70473) Thanks @fuller-stack-dev.
+- Agents/Codex: recover Codex context-window prompt errors through overflow compaction and surface reset guidance when recovery is exhausted. (#85542) Thanks @fuller-stack-dev.
+- Agents/Codex: allow Codex app-server runs to bootstrap from `CODEX_API_KEY` or `OPENAI_API_KEY` when no Codex auth profile is configured.
+- Agents/Codex: keep selected Codex runtime routing on OpenAI-Codex while preserving direct OpenAI API-key compaction fallback. (#86408) Thanks @funmerlin and @VACInc.
+- Agent transcript: include OpenClaw agent session logs when finding local transcript candidates.
+- Crabbox: bootstrap raw AWS macOS shell commands wrapped in absolute `time` paths so RSS probes can run Node and pnpm on fresh macOS runners.
+- Crabbox: bootstrap raw AWS macOS shell commands even when setup statements precede Node or pnpm usage.
+- TUI/local: skip unnecessary secret resolution, gateway model catalog loading, bootstrap, and skill scans in explicit local-model runs so startup reaches the model request faster.
+- Sessions/doctor: load large session stores without clone amplification during read-only doctor checks and reclaim stale `sessions.json.*.tmp` sidecars. Fixes #56827. Thanks @openperf.
+- Tests: clean successful plugin gateway gauntlet isolated temp roots while keeping an explicit preservation switch for failed/debug runs.
+- Plugins/perf: reuse derived plugin metadata snapshots for the lifetime of the process so reply-time skill setup no longer rescans plugin metadata on every turn.
+- Discord/OpenAI voice: keep wake-name master consults using the current speaker context after ignored ambient transcripts and shorten the default capture silence grace.
+- Doctor: skip redundant Gateway restart prompts when a recent supervisor restart leaves the Gateway healthy. Fixes #86518. (#86533) Thanks @liaoyl830.
+- Cron: restore suspended cron lanes to the configured/default concurrency instead of falling back to one after quota or circuit-breaker auto-resume.
+- Gateway: keep session-only Control UI tool-start mirrors flowing during diagnostic queue pressure instead of silently dropping non-terminal tool updates.
+- Agents/memory: return optional not-found context for missing date-only daily memory reads instead of logging benign first-run `ENOENT` failures. Fixes #82928. Thanks @galiniliev.
+- Discord: merge streamed text captions into following media block replies so captions and attachments send as one message. (#86487) Thanks @neeravmakwana.
+- Gateway: avoid sending duplicate tool-event frames to Control UI connections that are subscribed by both run and session.
+- Discord/OpenAI voice: accept broader edge-position fuzzy wake-name transcripts while keeping ambient speech gated.
+- Discord/OpenAI voice: accept longer leading wake-name mistranscripts such as "Open Club" for OpenClaw.
+- Agents/OpenAI-compatible: stop ModelStudio-compatible chat requests before sending system/tool-only payloads that have no usable user or assistant turn. (#86177) Thanks @TurboTheTurtle.
+- Gateway/plugins: reuse plugin package realpath checks while building installed plugin indexes so startup avoids repeated filesystem resolution work.
+- Kilo Gateway: send string `stop` sequences as arrays so Kilo accepts OpenAI-compatible chat completions. (#86461) Thanks @SebTardif.
+- Discord/OpenAI voice: accept leading fuzzy wake-name transcripts such as "Monty" or "Moti" for a Molty agent while keeping ambient speech gated.
+- Media understanding: convert HEIC and HEIF images to JPEG before image description providers run so iPhone photos work in direct and configured image-description flows. (#86037)
+- Agents: release embedded-attempt session locks from outer teardown so post-prompt exceptions cannot wedge later requests behind `SessionWriteLockTimeoutError`. Fixes #86014. Thanks @openperf.
+- Discord/OpenAI voice: rotate Realtime sessions at provider max duration without logging the expected session-expiry event as an error.
+- Sessions: skip metadata-only entries during QMD-slugified session lookup so one incomplete row does not block transcript hit resolution. (#86327) Thanks @abnershang.
+- Agents/media: derive bundled plugin local-media trust from plugin tool metadata instead of importing the full plugin registry on subscription paths. (#84409) Thanks @samzong.
+- Image tool: keep config-backed custom-provider API keys usable for auto-discovered vision models, including deferred image-tool execution without env keys or auth profiles. (#85733)
+- Memory/local embeddings: run local GGUF embeddings in an isolated worker sidecar and degrade to configured fallback or keyword search on worker failure so native embedding crashes do not take down the Gateway. (#85348) Thanks @osolmaz.
+- Gateway: clear the runtime config snapshot before `SIGUSR1` in-process restarts so config changes survive the next gateway loop. (#86388) Thanks @XuZehan-iCenter.
+- Models: show OAuth delegation markers as configured `models.json` auth while keeping runtime route usability checks strict. (#86378) Thanks @rohitjavvadi.
+- Cron: seed active scheduled and manual cron task rows with a progress summary so status surfaces do not look blank while jobs run. (#86313) Thanks @ferminquant.
+- Cron: preserve unsupported persisted cron payload rows during routine store writes while keeping those rows non-runnable. Fixes #84922. (#86415) Thanks @IWhatsskill.
+- Updater: exclude prerelease git tags from stable channel resolution so source updates do not check out newer alpha/rc/preview/canary tags. (#86260) Thanks @stevenepalmer.
+- Security/Audit: flag webhook `hooks.token` reuse of active Gateway password auth in `openclaw security audit` while keeping password-mode startup compatibility. (#84338) Thanks @coygeek.
+- QQBot: derive the outbound reply watchdog from configured agent and provider timeouts so slow local model replies are not cut off at five minutes. Fixes #85267. (#85271) Thanks @SymbolStar.
+- Agents/heartbeat: stop heartbeat turns after the first valid `heartbeat_respond` so repeated response loops do not burn tokens. (#86357) Thanks @udaymanish6.
+- Tasks: keep retained lost tasks out of default status health counts, explain their cleanup window during maintenance, and prune lost task records after 24 hours instead of the general 7-day terminal retention.
+- Memory-core: keep REM dreaming focused on live light-staged memories and mark staged entries as considered so old recall history no longer dominates fresh candidates. (#86302) Thanks @SebTardif.
+- Memory: abort sync instead of downgrading an existing semantic vector index to FTS-only when the configured embedding provider is temporarily unavailable. (#85704) Thanks @yaaboo-gif.
+- Telegram: propagate forum topic names through the account-scoped topic cache for native command context and topic create/edit actions. (#86299) Thanks @SebTardif.
+- Slack: keep downloaded read-only files out of reply media so Slack file reads do not echo files back to the conversation. (#86318) Thanks @neeravmakwana.
+- Cron: accept leading-plus relative durations such as `+5m` for one-shot `--at` schedules. (#86341) Thanks @mushuiyu886.
+- Agents/media: preserve async-started media tool metadata so background generation starts no longer surface generic incomplete-turn warnings while replay stays unsafe. (#85933) Thanks @fuller-stack-dev.
+- Docker E2E: dedupe scheduler lane resources so npm/service package lanes are not over-counted and serialized unnecessarily.
+- QA/diagnostics: add a collector-backed OpenTelemetry smoke lane, make the OTLP payload leak check scenario-aware, and keep source QA builds from failing on optional dependency imports resolved through pnpm's temp module path.
+- Crabbox: bootstrap Git metadata for sparse remote changed gates so raw synced workspaces can run `pnpm check:changed` from the intended diff.
+- xAI/LM Studio: avoid buffering ordinary bracketed or `final` prose until stream completion while watching for plain-text tool-call fallbacks.
+- Doctor: warn and continue when the cron job store exists but cannot be read so later health checks still run. Fixes #86102. (#86384) Thanks @1052326311.
+- Discord: suppress a bot's previous reply body and referenced media from prompt context when a user replies to that bot message, while keeping reply metadata for routing. (#86238) Thanks @fuller-stack-dev.
+- Discord: restore bare numeric channel IDs for outbound message-tool sends while keeping explicit DM targets unambiguous. (#86571) Thanks @joshavant.
+- Docker E2E: avoid rebuilding the Control UI twice while preparing the shared OpenClaw package tarball for package-backed scenario runs.
+- Tests: avoid rebuilding the Control UI twice during the installer Docker smoke now that `pnpm build` includes `ui:build`.
+- Tests: give QA config mutation RPCs enough native Windows budget to finish gateway config writes and restart settle after hot scenario runs.
+- Tests: keep the gateway restart-inflight QA scenario focused on restart recovery on native Windows by allowing expected embedded prompt handoff errors and using the Windows-safe timeout budget.
+- QA-Lab: make the synthetic OpenAI provider honor generic `reply exactly:` directives after required kickoff reads so restart-recovery scenarios do not fall through to generic repo-summary prose.
+- Gateway: abort active `agent` RPC runs during forced restart shutdown so stale in-process turns cannot keep writing a session after the Gateway lifecycle restarts.
+- Crabbox: sync clean sparse worktrees through a temporary full checkout even when reusing an existing lease so tracked build-time files are not omitted.
+- Build: route `scripts/ui.js` through the shared pnpm runner and keep Control UI chunking helpers in sparse-included source so native Windows Corepack builds can produce `dist/control-ui`.
+- Tests: give the memory fallback QA scenario enough turn budget to exercise native Windows gateway runs instead of failing on the client timeout while the mock agent is still dispatching.
+- Tests: collect QA gateway CPU/RSS metrics on native Windows and give the channel baseline enough turn budget to report slow gateway runs instead of timing out before proof.
+- Install/update: bypass npm `min-release-age` policies with `--min-release-age=0` instead of `--before` so hosted installers keep working on npm versions that reject the combined config. (#84749) Thanks @TeodoroRodrigo.
+- Diagnostics: reclaim wedged session lanes when stale active-run bookkeeping blocks queued work despite no forward progress. Fixes #85639. Thanks @openperf.
+- WebChat: keep message-tool replies visible in the chat while still summarizing internal tool results for the model. Fixes #86347. Thanks @shakkernerd.
+- Gateway/perf: fail startup benchmark samples when the Gateway process exits before benchmark teardown, including signal deaths after readiness probes.
+- Gateway/perf: fail restart benchmark samples when the Gateway exits before benchmark teardown, including clean exits and signal deaths after successful restart probes.
+- Agents/tests: keep model catalog visibility on static selection helpers so catalog visibility checks avoid the broad model-selection barrel import.
+- Agents/commitments: serialize commitment store load-modify-save writes so concurrent heartbeat and CLI updates no longer lose dismissal, sent, or attempt state. (#81153) Thanks @ai-hpc.
+- xAI/LM Studio: promote plain-text tool-call fallbacks into structured tool calls and strip leaked internal tool syntax before user-facing delivery. (#86222) Thanks @fuller-stack-dev.
+- CLI: suppress benign self-update version-skew warnings during package post-update finalization.
+- Gateway/perf: tighten restart and startup benchmark failure handling so long profiling runs, failed probes, and fresh Linux runners no longer produce false passing or `n/a` results.
+- Checks: keep intentional Knip unused-file findings optional so full CI and sparse proof workspaces stay aligned.
+- Docker: restore writable `~/.config` in runtime images. Fixes #85968. Thanks @hkoessler and @Bartok9.
+- Plugin SDK: keep legacy root diagnostic subscriptions connected when built plugin SDK aliases resolve diagnostic helpers through a separate module graph.
+- Diagnostics: export alertable OTel and Prometheus signals for blocked tools, model failover, stale sessions, liveness warnings, oversized payloads, and webhook ingress while fixing shared OTLP endpoints with query strings.
+- Tests: normalize macOS canonical temp paths in exec allowlists, fs-safe trash assertions, installed plugin matching, Telegram topic-name stores, and built ACPX MCP server expectations so native macOS proof runners cover the intended behavior.
+- Codex/app-server: preserve message-tool-only source reply delivery mode on active runs so sub-agent completion wakeups can steer the active Codex turn instead of being rejected. (#86287) Thanks @ferminquant.
+- Tests: sample the Windows kitchen-sink RPC gateway directly and serialize RSS probes so native runs keep the memory guard active.
+- Tests: normalize bundled plugin lifecycle probe paths and state-root lookup so native Windows release sweeps accept valid packaged plugin installs.
+- Agents/Claude CLI: route live native Bash permission requests through OpenClaw exec policy so Claude turns no longer stall on `control_request`, and document that OpenClaw exec policy is authoritative. Fixes #80819. (#86330, from #81971) Thanks @guthirry and @sallyom.
+- Security audit: warn when YOLO OpenClaw exec policy overrides a restrictive raw Claude `--permission-mode` for managed live sessions. (#86557) Thanks @sallyom.
+- Config: keep benign legacy metadata write anomalies out of default doctor and config command output while preserving explicit anomaly logging for diagnostics.
+- Codex: log when implicit app-server `never` approvals are promoted for OpenClaw tool policy, including whether the trigger was a `before_tool_call` hook or trusted tool policy.
+- Codex harness: make subscription usage-limit errors without reset times explain that OpenClaw cannot determine the reset and point users to wait until Codex is available, use another Codex account, or switch to another configured model/provider. Thanks @amknight.
+- Google Vertex: support production ADC modes such as Workload Identity Federation, service-account credentials, and metadata-server ADC for the native Vertex transport. (#83971) Thanks @damianFelixPago.
+- Telegram: route normal `[telegram][diag]` polling diagnostics through `runtime.log` while keeping non-diag warnings and persistence failures on `runtime.error`, so healthy polling startup no longer looks like an error. Fixes #82957. (#82958) Thanks @galiniliev.
+- Providers/Ollama: strip inline Kimi cloud reasoning prefixes from streamed and final visible replies while keeping ordinary Kimi answers append-only. (#86286) Thanks @jason-allen-oneal.
+
+- Gateway: require Talk secret authority before setup-code handoff can include Talk secrets. (#85690) Thanks @ngutman.
+- Agents: keep fallback error reporting scoped to the active model candidate so stale prior-provider quota/auth text is not reported for later fallback attempts. (#86134) Thanks @zhangguiping-xydt.
+- iMessage: dedupe watcher startup when `channels.imessage.accounts` lists both `default` and a named account that point at the same local Messages source, so the gateway no longer spawns two `imsg rpc` processes or doubles inbound replies; the dedupe is scoped to watcher startup, leaving duplicate accounts addressable for outbound sends, status, and capability listings, and `openclaw doctor` flags the redundant account with a rebinding hint. Fixes #65141. (#86705) Thanks @swang430.
+
 ## 2026.5.22

 ### Changes

+- Gateway/perf: reuse process-stable channel catalog reads, avoid repeated bundled-channel boundary checks, and rotate gateway watch CPU profiles so benchmark runs do not accumulate unbounded artifacts.
+- Gateway/perf: reuse immutable plugin metadata snapshots across startup, config, model, channel, setup, and secret metadata readers so hot paths avoid repeated plugin file stats and manifest registry reloads.
+- Gateway/perf: lazy-load startup-idle plugin work, core gateway method handlers, and the embedded ACPX runtime so Gateway health and ready signals no longer wait on unused handler trees or ACPX probes.
+- Gateway/perf: cache plugin SDK public-surface alias maps and skip irrelevant macOS Linuxbrew PATH probes so Gateway startup avoids repeated filesystem walks and slow missing-directory stats.
+- Transcripts: add the initial transcript capture and source-provider foundation, including auto-start capture config, manual transcript imports, read-only transcript access, and Discord voice as the first live source.
+- Docs/channels/config: add Signal `configPath`, Telegram wildcard topic defaults, local-time backup archive names, Termux home fallback, include-path validation, secret-scanner-safe placeholder guidance, Gemini CLI/Antigravity media guidance, and macOS VM auto-login guidance. Thanks @NorseGaud, @yudistiraashadi, @huangqian8, @VibhorGautam, @maweibin, @tianxingleo, @IgnacioPro, and @xzcxzcyy-claw.
+- Docs: clarify model-usage portability, Codex migration prerequisites, status bootstrap wording, thread-bound subagent limits, hook ownership, and config-preserving safety guidance. Thanks @aniruddhaadak80, @leno23, @TomDjerry, @matthewxmurphy, @vincentkoc, and @stablegenius49.
 - Docs: clarify README onboarding and Gateway startup paths, WhatsApp QR/408 recovery, cron output language prompts, skill advanced features, gateway upstream 403 troubleshooting, and plugin fallback override guidance. Thanks @deepujain, @Zacxxx, @Jah-yee, @neyric, @usimic, @Renu-Cybe, @BigUncle, and @SeashoreShi.
 - Docs: clarify context-pruning ratio bounds, local dashboard recovery, CLI env markers, remote onboarding token behavior, and Peekaboo Bridge permissions for subprocess agents. Thanks @ayesha-aziz123, @dishraters, @hougangdev, and @brandonlipman.
 - Docs: clarify browser CDP diagnostics, Plugin SDK allowlist imports, status-reaction timing defaults, queue steering behavior, limited-tool troubleshooting, cron HEARTBEAT handling, Telegram multi-agent groups, Bitwarden SecretRef setup, and EasyRunner deployments. Thanks @Quratulain-bilal, @mbelinky, @Mickey-, @vancece, @xenouzik, @posigit, @surlymochan, @janaka, and @choiking.
@@ -17,16 +284,17 @@ Docs: https://docs.openclaw.ai
 - Media understanding: stop auto-probing Gemini CLI and use Antigravity CLI only as a lower-priority image/video fallback after configured provider APIs.
 - Agents/subagents: limit default sub-agent bootstrap context to `AGENTS.md` and `TOOLS.md`, keeping persona, identity, user, memory, heartbeat, and setup files out of delegated workers by default. (#85283) Thanks @100yenadmin.
 - Maintainer skills: exclude plugin SDK/API boundary work from `openclaw-landable-bug-sweep` so bugbash sweeps stay focused on small paper-cut fixes.
+- QA-Lab/diagnostics: extend the OpenTelemetry smoke harness to prove trace, metric, and log export, and add first-class Prometheus and observability smoke aliases.
 - Plugin SDK: add a generic channel-message poll sender so channel plugins can expose poll delivery without depending on channel-specific SDK facades.
 - Crabbox: keep the local wrapper's provider validation synced with the installed Crabbox binary while preserving supported aliases such as `docker` and `blacksmith`. (#85302) Thanks @hxy91819.
 - Maintainer skills: add `openclaw-landable-bug-sweep` for producing five small, reviewed, CI-green OpenClaw bugfix PRs from issue/PR sweeps.
 - Control UI/chat: add search and Load More pagination to the chat session picker, keeping initial session loads bounded while making older conversations reachable. (#85237) Thanks @amknight.
 - CLI/onboarding: start classic onboarding when bare `openclaw` runs before an authored config exists, while keeping configured installs on Crestodian. (#72343) Thanks @fuller-stack-dev.
+- Agents/runtime: internalize the former Pi agent runtime into OpenClaw, remove legacy package dependencies, and keep Pi-named SDK aliases only as deprecated plugin compatibility.
 - Discord: allow configuring a bounded `agentComponents.ttlMs` callback registry lifetime for long-running component workflows, with per-account overrides and a 24-hour cap. (#84189) Thanks @100menotu001.
 - xAI/Grok: reuse xAI OAuth auth profiles for Grok `web_search`, thread active-agent auth through web search, add Grok model aliases, and let media providers declare default operation timeouts. (#85182) Thanks @fuller-stack-dev.
 - Plugin SDK: add row-level session workflow helpers and deprecate `loadSessionStore` so plugins can read and patch sessions without depending on the legacy whole-store shape. (#84693) Thanks @efpiva.
 - Gateway/plugins: reuse a compatible Gateway startup plugin registry during dispatch so safe plugin dispatches avoid redundant registry loading. (#84324) Thanks @ai-hpc.
- Control UI/debugging: add an explicit source-only Traces view for local LLM request debugging, including full prompt and tool payload capture behind `OPENCLAW_DEV_EXTENDED_TRACING`. Thanks @amknight.
 - Plugins/SDK: add a general `embeddingProviders` capability contract and registration API so embeddings can become a reusable provider surface outside memory-specific adapters.
 - Dependencies: refresh provider, plugin, UI, and tooling packages, update `protobufjs` to 8.4.0 to clear the current npm advisory, and carry the Claude ACP completion patch forward to `@agentclientprotocol/claude-agent-acp` 0.36.1.
 - Agents/tools: remove the old sender-owner tool gating path so configured tools stay visible for trusted sessions while command and channel-action auth still carry real sender identity.
@@ -48,13 +316,60 @@ Docs: https://docs.openclaw.ai

 ### Fixes

+- WebChat: summarize internal message-tool source replies so tool cards no longer duplicate the visible reply body. (#84773) Thanks @jason-allen-oneal.
+- Gateway: preserve deferred lifecycle-error cleanup across later non-terminal events so provider timeouts can persist failed session state instead of leaving sessions stuck running. (#85256, fixes #63819) Thanks @samzong.
+- Agents/subagents: report tool-only child progress during timeout summaries instead of showing no visible output.
+- Telegram/ACP: preserve explicit `:topic:` conversation suffixes when inbound ACP targets do not carry a separate thread id.
+- Browser/proxy: bypass the managed proxy for the exact local managed Chrome CDP readiness and DevTools WebSocket endpoints, so `openclaw browser start` works when the operator proxy blocks loopback egress. (#83255) Thanks @lightcap.
+- Ollama: bypass the managed proxy for configured local embedding origins while keeping SSRF guardrails on unconfigured targets. Thanks @Kaspre.
+- OpenAI/images: route Codex API-key image generation through the native OpenAI Images API instead of the Codex OAuth streaming backend, avoiding 401s from valid API keys.
+- Agents/OpenAI completions: omit empty tool payload fields for proxy-like OpenAI-compatible endpoints so strict vLLM-style servers accept tool-free turns. (#85835) Thanks @rendrag-git.
+- Checks/Windows: route full `pnpm check` stage commands through the managed child runner so Windows avoids Node shell-argv deprecation warnings there too.
+- Checks/Windows: run managed child commands through explicit `cmd.exe` wrapping instead of Node shell mode with argv, avoiding Node 24 subprocess deprecation warnings during changed checks.
+- Gateway: omit internal stream-error placeholder entries from agent prompt history so failed assistant turns are not replayed as model-authored text. (#85652) Thanks @anyech.
+- Sessions: enforce the session write-lock max-hold policy during lock acquisition so long-held locks can be reclaimed before the stale-lock window. (#85764) Thanks @njuboy11.
+- Models: prune retired Groq, GitHub Copilot, OpenAI, xAI, and old Claude catalog entries, with doctor migration to upgrade existing configs to current provider refs.
+- Doctor/update: recognize junction-backed source checkouts as git installs by comparing canonical paths before showing package-manager update guidance. Fixes #82215. Thanks @igormf.
+- Channels: honor `/verbose on` for tool/progress summaries across direct chats, groups, channels, and forum topics while preserving quiet default behavior. (#85488) Thanks @kurplunkin.
+- CLI/skills: show an all-ready note with next-step commands when skill setup has no missing dependencies to install. (#85032) Thanks @aniruddhaadak80.
+- Microsoft Foundry: route DeepSeek V4 Pro and Flash models through the Foundry Responses API while keeping older DeepSeek models on their existing path. (#85549) Thanks @roslinmahmud.
+- Status/usage: show configured cost estimates for AWS SDK models in full usage output while keeping token-only usage replies cost-free. (#85619) Thanks @ItsOtherMauridian.
+- Agents/OpenAI Responses: retry non-visible reasoning-only turns for OpenAI Responses API families instead of treating them as empty failed turns. (#85603) Thanks @SebTardif.
+- Directive tags: preserve message and content-part object identity when display stripping makes no directive-tag changes. (#85682) Thanks @willamhou.
+- Telegram: send local `path`/`filePath` and structured attachment media from `sendMessage` actions instead of dropping them or sending text-only messages. (#85219) Thanks @keshavbotagent.
+- Sessions/status: show the estimated context budget when fresh provider usage is unavailable and clear stale estimates across session resets and compaction boundaries. (#84830) Thanks @giodl73-repo.
+- Gateway/config: pin relative `OPENCLAW_STATE_DIR` overrides to an absolute path at startup so later working-directory changes cannot retarget gateway state. (#52264) Thanks @PerfectPan.
+- Release/package: run npm release, prepublish, and postpublish verification through Windows-safe npm command shims so native Windows checks can execute `npm.cmd` instead of treating it as a binary.
+- Agents/harness: pass CLI runtime aliases through harness selection so provider-owned CLI aliases no longer get rejected before reaching the right runtime. (#85631) Thanks @potterdigital.
+- Secrets: show the irreversible apply warning after interactive `secrets configure` confirmation so confirmed migrations still get the final safety prompt. (#85638) Thanks @alkor2000.
+- Agents/CLI output: ignore cumulative Claude `stream-json` result usage when assistant usage events are present, preventing inflated cache-read accounting. (#85625) Thanks @zhouhe-xydt.
+- CLI: keep `waitForever()` alive by leaving its keep-alive interval ref'd so the public helper no longer exits immediately with Node's unsettled-await code. (#85694) Thanks @m1qaweb.
+- Agents/bootstrap: guard bootstrap name checks against missing file names so malformed bootstrap entries warn and truncate instead of crashing. Fixes #85523. (#85615) Thanks @zhouhe-xydt.
+- CLI/tasks: reject partially numeric `openclaw tasks audit --limit` values so audit limits must be real positive integers instead of accepting strings like `5abc`. (#84901) Thanks @jbetala7.
+- Status/diagnostics: bound deep Docker audit probes so `openclaw status --deep` reports slow container checks instead of hanging behind unbounded inspection. (#85476) Thanks @giodl73-repo.
+- Providers/Anthropic: migrate 1M context handling to GA-capable Claude 4.x models by sizing eligible models at 1M without the retired `context-1m-2025-08-07` beta, ignoring that retired beta in older configs, and preserving OAuth-required Anthropic beta headers. (#45613) Thanks @haoyu-haoyu.
+- Cron/Telegram: parse forum-topic delivery targets through the Telegram plugin instead of cron core, including `:topic:` and `:topicId` forms for announce delivery. Thanks @etticat.
+- Twitch: keep stale message-handler cleanup callbacks from removing newer handler registrations for the same account, preserving inbound message delivery after reconnects. Fixes #83888. (#85425) Thanks @alkor2000.
+- Memory/LanceDB: expose public memory artifacts through the active memory provider bridge so memory-wiki imports durable memory files, daily notes, dream reports, and event logs without depending on memory-core internals. Fixes #83604. (#85060) Thanks @brokemac79.
+- Crabbox: keep AWS hydration compatible with local Actions replay by inlining the hydrate workflow's Node/pnpm setup instead of invoking repo-local composite actions.
+- Agents/subagents: simplify native sub-agent completion handoff so children report their latest visible assistant result to the requester without using `message`, while keeping parent-owned message-tool delivery policy intact. Fixes #85070. (#85089) Thanks @brokemac79.
+- Docker setup: stop printing the Gateway bearer token in setup logs and printed follow-up commands.
+- Agents: let embedded compaction fallback retries proceed when PI-compatible candidates do not need agent harness plugin preparation.
+- Agents/tools: honor configured custom provider API keys when deciding whether media, image-generation, video-generation, music-generation, and PDF tools are available. (#85570)
+- StepFun: stop advertising stale generic API key auth choices so onboarding only offers runtime-backed Standard and Step Plan choices.
+- Diagnostics: keep OpenTelemetry log bodies behind explicit content capture and scrub scoped agent-session keys from OpenTelemetry and Prometheus labels while preserving bounded queue-lane prefixes.
+- Windows installer: fail Git checkout installs when `pnpm install` or `pnpm build` fails instead of writing a wrapper to a missing CLI build.
+- Sessions: surface previous-transcript archive failures during `/new` rotation so disk rename errors are logged instead of silently hiding stranded transcript files. Fixes #81984. (#85586, from #82081) Thanks @0xghost42.
+- TUI/agents: mirror internal-ui message-tool replies into final chat output so message-tool-only agents remain visible in `openclaw tui`. Fixes #85538. Thanks @danpolasek.
 - Agents: keep parallel OpenAI-compatible tool-call deltas in separate argument buffers so interleaved tool calls no longer corrupt streamed arguments. (#82263) Thanks @luna-system.
 - Memory/doctor: report missing or unusable QMD workspace directories as workspace failures instead of generic binary failures. (#63167) Thanks @sercada.
 - Debug proxy: record CONNECT client-socket errors and destroy the paired upstream socket so abrupt client disconnects no longer leak tunnel resources. (#82444) Thanks @SebTardif.
 - Diffs: continue hydrating later diff cards when one card fails so a single broken card no longer blanks the whole diff viewer. (#84775) Thanks @cosmopolitan033.
 - Mac app: use the native settings sidebar window chrome so the sidebar toggle stays on the left and content no longer clips under oversized titlebar padding.
+- QA-Lab/Codex: bundle auth/plugin fixture imports for flow scenarios and let terminal async media tools end Codex app-server turns without timing out. (#80397, refs #80323) Thanks @100yenadmin.
 - Gateway/agents: preserve fresh session overrides and metadata when stale cached agent-session entries race with store updates, so subagent model/provider overrides and routing policy survive concurrent writes. (#19328) Thanks @CodeReclaimers.
 - Control UI/chat: keep chat session search inline with the session selector so the header no longer shows a duplicate standalone search row.
+- Control UI/chat: collapse focused-mode header chrome and suppress hidden-header scroll updates so focus mode no longer jumps while scrolling. Thanks @amknight.
 - Codex app-server: restart the native app-server and retry once when server-side compaction times out, so preflight compaction stalls recover instead of failing every dispatch. (#85500)
 - Restore Control UI gateway token pairing [AI]. (#85459) Thanks @pgondhi987.
 - OpenAI video: honor configured provider request private-network opt-in for local/custom video endpoints so explicitly trusted mock and self-hosted providers are not blocked. Thanks @shakkernerd.
@@ -108,6 +423,7 @@ Docs: https://docs.openclaw.ai
 - Providers/Gemini: strip fractional seconds from web-search time range filters so Gemini accepts freshness-bound search requests. (#85071) Thanks @Noerr.
 - OpenAI Codex: preserve image input support for sparse `openai-codex/gpt-5.5` catalog rows. (#85095) Thanks @sercada.
 - CLI/models: add a piped or pasted API-key path for OpenAI Codex auth and warn when API keys are pasted into token-mode auth. (#85533) Thanks @joshavant.
+- Telegram: dead-letter missing-harness isolated ingress failures so a poisoned spooled update no longer blocks later same-lane messages. Fixes #85470. (#85605) Thanks @joshavant.
 - Plugins/discovery: strip `-plugin` package suffixes when deriving plugin id hints so package names line up with manifest ids. (#85170) Thanks @JulyanXu.
 - Tlon: stop advertising a non-existent agent tool contract in the plugin manifest.
 - Telegram: preserve fenced code block languages through Markdown rendering so Telegram receives `language-*` code classes. (#85209) Thanks @leno23.
@@ -144,6 +460,7 @@ Docs: https://docs.openclaw.ai
 - Channels/message tool: resolve configured external channel plugins during in-agent channel selection, so `openclaw agent --local` message-tool sends no longer report an available channel as unavailable. (#85022) Thanks @Kaspre.
 - Agents/heartbeat: honor group/channel `message_tool` visible-reply policy and model-specific Codex runtime config for scheduled heartbeat runs, so failed internal tool output stays private. Fixes #85310. (#85357) Thanks @neeravmakwana.
 - Gateway/ACP: close child ACP sessions spawned via `sessions_spawn` when their parent session is reset or deleted, instead of leaving orphaned `claude-agent-acp` processes that accumulate and exhaust memory. Fixes #68916. (#85190) Thanks @openperf.
+- Codex app-server: block native execution paths when OpenClaw exec resolves to a node host while preserving the first-party CLI node binding path. Fixes #85012. (#85534) Thanks @joshavant.
 - Diagnostics: bound cleanup timeout detail logs, emit drop summaries when async diagnostic bursts exceed the queue cap, and surface async queue drops through diagnostic telemetry.
 - Agents/subagents: surface blocked child-run completions as errors instead of successful subagent finishes. (#80886) Thanks @TurboTheTurtle.
 - Context engines: fail closed with a descriptive error when the selected agent runtime cannot satisfy declared context-engine host requirements.
@@ -230,6 +547,8 @@ Docs: https://docs.openclaw.ai

 ### Fixes

+- Agents: validate a forced plugin harness against the candidate provider/model before pinning it, so unsupported fallback-chain candidates fail with a clear harness error instead of producing a late `Model provider X not found` from the underlying harness. Codex harness `supports()` now also accepts the canonical `openai` and `openai-codex` routing ids so documented Codex configs keep working. Thanks @cathrynlavery.
+- Control UI/WebChat: keep selected external-channel sessions live by mirroring Codex prompts at turn start, streaming hidden runs only to exact selected-session subscribers, and deduplicating accumulated stream snapshots around tool cards. Fixes #83528, #82611, refs #83949. Thanks @BunsDev.
 - CLI/tasks: include stale-running task maintenance decisions in `openclaw tasks maintenance --json` so retained and reconcile candidates explain backing-session, cron, CLI, and wedged-subagent state. (#84691) Thanks @efpiva.
 - Codex app-server: keep system-prompt reports working when bootstrap hooks provide workspace files with only a path and content, so hook-supplied SOUL/IDENTITY/TOOLS/USER context still reports injected characters correctly. (#84736) Thanks @JARVIS-Glasses.
 - Providers/MiniMax music: stop advertising `durationSeconds` control and remove prompt-injected duration hints, so `music_generate` reports MiniMax duration as an unsupported override instead of suggesting MiniMax can enforce track length. Fixes #84508. Thanks @neeravmakwana.
@@ -297,6 +616,7 @@ Docs: https://docs.openclaw.ai
 - Agents/Codex: keep encrypted Responses reasoning replay provenance-bound so stale mirrored Codex transcripts drop invalid encrypted content before request assembly while preserving matching same-session replay. Fixes #83836. (#84367) Thanks @joshavant.
 - Agents/subagents: skip stale embedded-run wake probes for dormant completion requesters, so late subagent completions go straight to requester-agent/direct handoff instead of producing `reason=no_active_run` queue noise. (#82964) Thanks @galiniliev.
 - CLI: retry config snapshot reads after a transient failure so one rejected read no longer poisons later commands in the same process. (#83931) Thanks @honor2030.
+- TUI: handle German-layout Kitty keyboard input by ignoring printable release events and accepting AltGr-produced printable characters such as `@` and `€`. Fixes #48897.
 - Media: decode URL path basenames before using them as remote media fallback filenames, so files like `My%20Report.pdf` are surfaced as `My Report.pdf`. Fixes #84050. (#84052) Thanks @jbetala7.
 - WhatsApp: clarify inbound group diagnostics so observed but unregistered groups point to `channels.whatsapp.groups` without changing routing or sender authorization. (#83846) Thanks @neeravmakwana.
 - WhatsApp: drain pending outbound deliveries on a 30s periodic timer in addition to the reconnect handler, so messages enqueued while the provider is already connected no longer wait for the next reconnect to send. (#79083) Thanks @Oviemudiaga.
@@ -365,6 +685,8 @@ Docs: https://docs.openclaw.ai
 - CLI: reject explicit port numbers above 65535 before they reach Gateway or Node bind paths. Fixes #83900. (#84008) Thanks @hclsys.
 - Codex app-server: preserve plugin tool auth profiles when Codex owns model transport so OpenClaw dynamic tools can resolve their provider credentials. (#83603) Thanks @rubencu.
 - Memory/search: scan the JS-side fallback vector path (used when the sqlite-vec index is unavailable or has a mismatched dimension) in bounded rowid batches and yield to the event loop between batches so large chunk tables can no longer pin the Node.js main thread for multi-second windows. Also keeps the SQL prepared statement rooted in a local so node:sqlite cannot finalize it mid-scan under heap pressure. Fixes #81172. Thanks @dev23xyz-oss.
+- Telegram: preserve inbound bold, italic, code, preformatted, strikethrough, underline, spoiler, and text-link entities as markdown in the agent-facing prompt body. Fixes #52859.
+- Backup: dereference hardlinks during archive creation and reject unsafe hardlink targets during verification so archives that pass `backup verify` do not fail broad extraction on macOS tar. Fixes #54242. Thanks @jason-allen-oneal.
 - Memory Wiki: preserve fs-safe diagnostics when bridge source page writes fail for non-symlink filesystem safety reasons, so directory collisions are reported with the underlying error code. (#83776) Thanks @TurboTheTurtle.
 - Telegram: keep forum topics from blocking sibling topic traffic by routing inbound serialization, media/text buffers, and account API queues on topic-aware lanes. (#83829)
 - Telegram: keep queued forum-topic follow-up messages from inheriting superseded source abort signals, so later same-topic user turns can still run and reply after an active turn is replaced. (#83827) Thanks @VACInc.
@@ -1591,6 +1913,7 @@ Docs: https://docs.openclaw.ai
 - Agents/read tool: treat positive offsets beyond EOF as empty ranges instead of surfacing the upstream read error, so stale pagination cursors no longer crash tool calls while unrelated read failures still fail loud. Fixes #62466. (#75536) Thanks @vyctorbrzezowski.
 - Google/Gemini: normalize retired Gemini 3 Pro Preview refs left in Google API-key onboarding model allowlists and fallbacks, so setup-emitted config keeps testing `google/gemini-3.1-pro-preview` instead of `google/gemini-3-pro-preview`.
 - Telegram/context: bound selected topic context to the active session so messages from before `/new` or `/reset` are not replayed into later turns. (#80848) Thanks @VACInc.
+- Docs/providers/openai: clarify that OpenAI Realtime voice goes through the OpenAI Platform Realtime API and requires Platform credits — Codex/ChatGPT subscription quota does not cover this route. Fixes #76498. Thanks @lonexreb.
 - Google/Gemini: normalize retired nested Gemini 3 Pro Preview ids when resolving exact configured proxy-provider refs, so `kilocode/google/gemini-3-pro-preview` resolves to `kilocode/google/gemini-3.1-pro-preview` for Gemini 3.1 testing.
 - CLI: strip generic OSC terminal escape payloads from sanitized output fields, preventing clipboard/title escape bodies from leaking into commitment tables and other terminal-safe text. Thanks @shakkernerd.
 - Codex app-server: match connector-backed plugin approval elicitations by stable connector id so enabled destructive actions no longer fall through to display-name-only rejection.
@@ -1835,6 +2158,7 @@ Docs: https://docs.openclaw.ai
 - Telegram/groups: include the recent local chat window and nearby reply-target window as generic inbound context so stale reply ancestry does not overshadow the live group conversation.
 - Plugins/Nix: allow externally configured plugin roots under `/nix/store` to load in `OPENCLAW_NIX_MODE=1` while keeping normal external plugin hardlink rejection unchanged. Thanks @joshp123.
 - Nextcloud Talk: include the required bot `response` feature in setup, explain missing `--feature response` on rejected sends, and surface missing response capability in doctor/status checks. Fixes #78935. (#79657) Thanks @joshavant.
+- Cron/diagnostics: emit the existing `message.queued`, `session.state` (processing/idle), and `message.processed` lifecycle events for isolated-cron agent turns in `runCronIsolatedAgentTurn`, matching the dispatch and embedded-runner paths so subscribers (diagnostics OTLP, OTel exporters, custom observability plugins) get per-run session attribution instead of bucketing isolated cron LLM calls under static fallback ids. Events are gated on `isDiagnosticsEnabled(cfg)` so the documented `diagnostics.enabled: false` master toggle continues to silence the recorder. (#79214) Thanks @arniesaha.
 - fix(discord): gate user allowlist name resolution [AI]. (#79002) Thanks @pgondhi987.
 - fix(msteams): gate startup user allowlist resolution [AI]. (#79003) Thanks @pgondhi987.
 - Infra/fetch-timeout: pass `operation` and `url` context to `buildTimeoutAbortSignal` from the music-generate reference fetch and the Matrix guarded redirect transport, so the `fetch timeout reached; aborting operation` warning carries actionable structured fields instead of a bare line. Fixes #79195. Thanks @pandadev66.
@@ -3169,6 +3493,7 @@ Docs: https://docs.openclaw.ai
 - CLI/plugins: refresh persisted plugin registry policy in place for `plugins enable` and `plugins disable`, so routine toggles no longer rebuild and hash every plugin source when the target is already indexed. Thanks @vincentkoc.
 - Windows/install: run npm from a writable installer temp directory and pin the Bedrock runtime dependency below a Windows ARM Node 24 npm resolver failure, so global OpenClaw installs no longer fail before onboarding. Thanks @mariozechner.
 - CLI/plugins: scope install and enable slot selection to the selected plugin manifest/runtime fallback, so plugin installs no longer load every plugin runtime or broad status snapshot just to update memory/context slots. Thanks @vincentkoc.
+- Browser/snapshot: propagate the configured snapshot timeout through the agent tool, Chrome MCP, and Playwright snapshot paths so snapshot actions honor the requested deadline instead of hanging. Fixes #72934. Thanks @masatohoshino.
 - Plugins/TTS: keep bundled speech-provider discovery available on cold package Gateway paths and add bundled plugin matrix runtime probes for health, readiness, RPC, TTS discovery, and post-ready runtime-deps watchdog coverage. Refs #75283. Thanks @vincentkoc.
 - Google Meet/Twilio: show delegated voice call ID, DTMF, and intro-greeting state in `googlemeet doctor`, and avoid claiming DTMF was sent when no Meet PIN sequence was configured. Refs #72478. Thanks @DougButdorf.
 - Plugins/tools: prefer built bundled plugin code during tool discovery and skip channel runtime hydration while preserving companion provider registrations, reducing per-run plugin-tool prep cost without dropping executable plugin tools. Fixes #75290. Thanks @thanos-openclaw.
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -107,6 +107,7 @@ For coordinated change sets that genuinely need more than 20 PRs, join the **#cl

 - Test locally with your OpenClaw instance
 - External PRs must include a filled **Real behavior proof** section in the PR body. Show the real setup you tested, the exact command or steps you ran after the patch, after-fix evidence, the observed result, and anything you did not test. Screenshots, recordings, terminal screenshots, console output, copied live output, linked artifacts, and redacted runtime logs all count. Unit tests, mocks, snapshots, lint, typechecks, and CI are useful but do not satisfy this requirement by themselves. Maintainers may apply `proof: override` only when the proof gate should not apply.
+- Keep PRs takeover-ready: open them from a branch maintainers can push to. For fork PRs, leave GitHub's **Allow edits by maintainers** option enabled so maintainers can finish urgent fixes, changelog entries, or merge prep when needed. If GitHub shows **Allow edits and access to secrets by maintainers**, enable it only when that workflow/secrets access is acceptable and say so in the PR.
 - Do not edit `CHANGELOG.md` in contributor PRs. Maintainers or ClawSweeper add the changelog entry when landing user-facing changes.
 - Run tests: `pnpm build && pnpm check && pnpm test`
 - For iterative local commits, `scripts/committer --fast "message" <files...>` passes `FAST_COMMIT=1` through to the pre-commit hook so it skips the repo-wide `pnpm check`. Only use it when you've already run equivalent targeted validation for the touched surface.
--- a/10
+++ b/10
@@ -60,7 +60,7 @@ COPY package.json pnpm-lock.yaml pnpm-workspace.yaml .npmrc ./
 COPY openclaw.mjs ./
 COPY ui/package.json ./ui/package.json
 COPY patches ./patches
-COPY scripts/postinstall-bundled-plugins.mjs scripts/preinstall-package-manager-warning.mjs scripts/npm-runner.mjs scripts/windows-cmd-helpers.mjs ./scripts/
+COPY scripts/postinstall-bundled-plugins.mjs scripts/preinstall-package-manager-warning.mjs scripts/npm-runner.mjs scripts/windows-cmd-helpers.mjs scripts/prepare-git-hooks.mjs ./scripts/
 COPY scripts/lib/package-dist-imports.mjs ./scripts/lib/package-dist-imports.mjs

 COPY --from=workspace-deps /out/packages/ ./packages/
@@ -178,6 +178,7 @@ COPY --from=runtime-assets --chown=node:node /app/package.json .
 COPY --from=runtime-assets --chown=node:node /app/pnpm-workspace.yaml .
 COPY --from=runtime-assets --chown=node:node /app/patches ./patches
 COPY --from=runtime-assets --chown=node:node /app/openclaw.mjs .
+COPY --from=runtime-assets --chown=node:node /app/src/agents/templates ./src/agents/templates
 COPY --from=runtime-assets --chown=node:node /app/${OPENCLAW_BUNDLED_PLUGIN_DIR} ./${OPENCLAW_BUNDLED_PLUGIN_DIR}
 COPY --from=runtime-assets --chown=node:node /app/skills ./skills
 COPY --from=runtime-assets --chown=node:node /app/docs ./docs
@@ -287,12 +288,17 @@ RUN ln -sf /app/openclaw.mjs /usr/local/bin/openclaw \

 # Pre-create default named-volume mount points so first-run Docker volumes copy
 # node ownership from the image instead of starting as root-owned directories.
-RUN install -d -m 0700 -o node -g node \
+# NOTE: /home/node/.config must be created with node ownership first so that
+# the leaf /home/node/.config/openclaw inherits the correct parent permissions.
+# Without this, install -d leaves /home/node/.config as root:root (issue #85968).
+RUN install -d -m 0755 -o node -g node /home/node/.config && \
+    install -d -m 0700 -o node -g node \
      /home/node/.openclaw \
      /home/node/.openclaw/workspace \
      /home/node/.config/openclaw && \
    stat -c '%U:%G %a' /home/node/.openclaw | grep -qx 'node:node 700' && \
    stat -c '%U:%G %a' /home/node/.openclaw/workspace | grep -qx 'node:node 700' && \
+    stat -c '%U:%G %a' /home/node/.config | grep -qx 'node:node 755' && \
    stat -c '%U:%G %a' /home/node/.config/openclaw | grep -qx 'node:node 700'

 ENV NODE_ENV=production
--- a/3
+++ b/3
@@ -19,3 +19,6 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
+
+Third-party notices for incorporated or adapted code are recorded in
+THIRD_PARTY_NOTICES.md.
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ If you want a personal, single-user assistant that feels local, fast, and always

 Supported channels include: WhatsApp, Telegram, Slack, Discord, Google Chat, Signal, iMessage, IRC, Microsoft Teams, Matrix, Feishu, LINE, Mattermost, Nextcloud Talk, Nostr, Synology Chat, Tlon, Twitch, Zalo, Zalo Personal, WeChat, QQ, WebChat.

-[Website](https://openclaw.ai) · [Docs](https://docs.openclaw.ai) · [Vision](VISION.md) · [DeepWiki](https://deepwiki.com/openclaw/openclaw) · [Getting Started](https://docs.openclaw.ai/start/getting-started) · [Updating](https://docs.openclaw.ai/install/updating) · [Showcase](https://docs.openclaw.ai/start/showcase) · [FAQ](https://docs.openclaw.ai/help/faq) · [Onboarding](https://docs.openclaw.ai/start/wizard) · [Nix](https://github.com/openclaw/nix-openclaw) · [Docker](https://docs.openclaw.ai/install/docker) · [Discord](https://discord.gg/clawd)
+[Website](https://openclaw.ai) · [Docs](https://docs.openclaw.ai) · [Vision](VISION.md) · [Third-party notices](THIRD_PARTY_NOTICES.md) · [DeepWiki](https://deepwiki.com/openclaw/openclaw) · [Getting Started](https://docs.openclaw.ai/start/getting-started) · [Updating](https://docs.openclaw.ai/install/updating) · [Showcase](https://docs.openclaw.ai/start/showcase) · [FAQ](https://docs.openclaw.ai/help/faq) · [Onboarding](https://docs.openclaw.ai/start/wizard) · [Nix](https://github.com/openclaw/nix-openclaw) · [Docker](https://docs.openclaw.ai/install/docker) · [Discord](https://discord.gg/clawd)

 New install? Start here: [Getting started](https://docs.openclaw.ai/start/getting-started)

@@ -306,7 +306,7 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines, maintainers, and how to s
 AI/vibe-coded PRs welcome! 🤖

 Special thanks to [Mario Zechner](https://mariozechner.at/) for his support and for
-[pi-mono](https://github.com/badlogic/pi-mono).
+[pi-mono](https://github.com/earendil-works/pi-mono).
 Special thanks to Adam Doppelt for the lobster.bot domain.

 Thanks to all clawtributors:
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -98,7 +98,7 @@ These are frequently reported but are typically closed with no code change:
 - Reports that treat `POST /tools/invoke` under shared-secret bearer auth (`gateway.auth.mode="token"` or `"password"`) as a narrower per-request/per-scope authorization surface. That endpoint is designed as the same trusted-operator HTTP boundary: shared-secret bearer auth is full operator access there, narrower `x-openclaw-scopes` values do not reduce that path, and owner-only tool policy follows the shared-secret operator contract.
 - Reports that only show differences in heuristic detection/parity (for example obfuscation-pattern detection on one exec path but not another, such as `node.invoke -> system.run` parity gaps) without demonstrating bypass of auth, approvals, allowlist enforcement, sandboxing, or other documented trust boundaries.
 - Reports that only show an ACP tool can indirectly execute, mutate, orchestrate sessions, or reach another tool/runtime without demonstrating bypass of ACP prompt/approval, allowlist enforcement, sandboxing, or another documented trust boundary. ACP silent approval is intentionally limited to narrow readonly classes; parity-only indirect-command findings are hardening, not vulnerabilities.
- Reports that only show untrusted media bytes reaching a maintained native decoder dependency (for example Sharp/libvips/libheif) without proving the shipped dependency version is vulnerable and demonstrating crash, memory corruption, data exposure, or a boundary bypass through OpenClaw. JavaScript header sniffing and image dimension fast-paths are preflight/UX checks, not the security boundary for native decoder correctness.
+- Reports that only show untrusted media bytes reaching a maintained native decoder dependency (for example image codec libraries such as libheif) without proving the shipped dependency version is vulnerable and demonstrating crash, memory corruption, data exposure, or a boundary bypass through OpenClaw. JavaScript header sniffing and image dimension fast-paths are preflight/UX checks, not the security boundary for native decoder correctness.
 - Reports whose only impact is transient extra memory, CPU, or allocation work from decoding, base64 expansion, media transcoding, serialization, or other format conversion after the input was already accepted under OpenClaw's configured size/trust limits, including base64 decode-before-size-estimate findings. These are performance issues, not vulnerabilities, unless the report demonstrates unauthenticated amplification, bypass of configured limits, crash/process termination, persistent resource exhaustion, data exposure, or another documented boundary bypass.
 - ReDoS/DoS claims that require trusted operator configuration input (for example catastrophic regex in `sessionFilter` or `logging.redactPatterns`) without a trust-boundary bypass.
 - Archive/install extraction claims that require pre-existing local filesystem priming in trusted state (for example planting symlink/hardlink aliases under destination directories such as skills/tools paths) without showing an untrusted path that can create/control that primitive.
--- a/THIRD_PARTY_NOTICES.md
+++ b/THIRD_PARTY_NOTICES.md
@@ -0,0 +1,37 @@
+# Third-party notices
+
+This file records third-party notices for code or substantial implementation
+portions incorporated into OpenClaw source, beyond normal package-manager
+dependency metadata.
+
+## Pi / pi-mono
+
+Portions of OpenClaw were adapted from Pi / pi-mono, and OpenClaw also depends
+on `@earendil-works/pi-tui` for terminal UI rendering.
+
+- Upstream: https://github.com/earendil-works/pi-mono
+- Package family: `@earendil-works/pi-*`
+- License: MIT
+- Copyright: Copyright (c) 2025 Mario Zechner
+
+MIT License
+
+Copyright (c) 2025 Mario Zechner
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/VISION.md
+++ b/VISION.md
@@ -38,6 +38,15 @@ Contribution rules:
 - Do not open large batches of tiny PRs at once; each PR has review cost.
 - For very small related fixes, grouping into one focused PR is encouraged.

+Configuration compatibility:
+
+OpenClaw runtime code reads the current configuration schema only.
+We do not keep long-lived aliases or compatibility branches that silently accept old, renamed, or malformed config keys.
+
+When a config change makes existing user config invalid, the same change needs a doctor migration.
+`openclaw doctor --fix` should detect the old shape, explain it, back it up when needed, and rewrite it to the canonical format.
+Core-owned config and auth state are repaired in core doctor code; plugin-owned config is repaired by that plugin's doctor contract.
+
 ## Security

 Security in OpenClaw is a deliberate tradeoff: strong defaults without killing capability.
--- a/appcast.xml
+++ b/appcast.xml
--- a/apps/android/app/build.gradle.kts
+++ b/apps/android/app/build.gradle.kts
@@ -65,8 +65,8 @@ android {
    applicationId = "ai.openclaw.app"
    minSdk = 31
    targetSdk = 36
-    versionCode = 2026052200
-    versionName = "2026.5.22"
+    versionCode = 2026052801
+    versionName = "2026.5.28"
    ndk {
      // Support all major ABIs — native libs are tiny (~47 KB per ABI)
      abiFilters += listOf("armeabi-v7a", "arm64-v8a", "x86", "x86_64")
--- a/apps/android/app/src/debug/AndroidManifest.xml
+++ b/apps/android/app/src/debug/AndroidManifest.xml
@@ -0,0 +1,14 @@
+<manifest xmlns:android="http://schemas.android.com/apk/res/android">
+    <application>
+        <receiver
+            android:name=".VoiceE2eReceiver"
+            android:exported="true">
+            <intent-filter>
+                <action android:name="ai.openclaw.app.debug.RUN_VOICE_E2E" />
+            </intent-filter>
+        </receiver>
+        <service
+            android:name=".VoiceE2eService"
+            android:exported="false" />
+    </application>
+</manifest>
--- a/apps/android/app/src/debug/java/ai/openclaw/app/VoiceE2eReceiver.kt
+++ b/apps/android/app/src/debug/java/ai/openclaw/app/VoiceE2eReceiver.kt
@@ -0,0 +1,195 @@
+package ai.openclaw.app
+
+import android.app.Service
+import android.content.BroadcastReceiver
+import android.content.Context
+import android.content.Intent
+import android.os.IBinder
+import android.util.Base64
+import android.util.Log
+import kotlinx.coroutines.CoroutineScope
+import kotlinx.coroutines.Dispatchers
+import kotlinx.coroutines.SupervisorJob
+import kotlinx.coroutines.cancel
+import kotlinx.coroutines.delay
+import kotlinx.coroutines.launch
+import kotlinx.coroutines.withTimeout
+import kotlinx.serialization.json.JsonNull
+import kotlinx.serialization.json.JsonPrimitive
+import kotlinx.serialization.json.buildJsonObject
+import java.io.File
+
+private const val tag = "VoiceE2E"
+private const val resultFileName = "voice_e2e_result.json"
+
+class VoiceE2eReceiver : BroadcastReceiver() {
+  override fun onReceive(
+    context: Context,
+    intent: Intent,
+  ) {
+    context.startService(
+      Intent(context, VoiceE2eService::class.java)
+        .putExtras(intent),
+    )
+  }
+}
+
+class VoiceE2eService : Service() {
+  private val serviceScope = CoroutineScope(SupervisorJob() + Dispatchers.IO)
+
+  override fun onBind(intent: Intent?): IBinder? = null
+
+  override fun onStartCommand(
+    intent: Intent?,
+    flags: Int,
+    startId: Int,
+  ): Int {
+    val command = intent ?: return START_NOT_STICKY
+    serviceScope.launch {
+      try {
+        runCommand(command)
+      } finally {
+        stopSelf(startId)
+      }
+    }
+    return START_NOT_STICKY
+  }
+
+  override fun onDestroy() {
+    serviceScope.cancel()
+    super.onDestroy()
+  }
+
+  private suspend fun runCommand(intent: Intent) {
+    try {
+      val app = applicationContext as NodeApp
+      val runtime = app.ensureRuntime()
+      val mode =
+        intent
+          .getDecodedStringExtra("mode")
+          ?.trim()
+          .orEmpty()
+          .ifEmpty { "both" }
+      if (mode == "stop") {
+        runtime.cancelMicCapture()
+        runtime.setTalkModeEnabled(false)
+        writeResult("""{"ok":true,"mode":"stop"}""")
+        return
+      }
+
+      val connect = !intent.getBooleanExtra("noConnect", false)
+      val connectTimeoutMs = intent.getLongExtra("connectTimeoutMs", 20_000L)
+      if (connect) {
+        configureGateway(runtime = runtime, intent = intent)
+      }
+      if (connect || !runtime.isConnected.value) {
+        awaitGateway(runtime = runtime, timeoutMs = connectTimeoutMs)
+      }
+
+      startActivity(
+        Intent(actionOpenVoiceE2e)
+          .setClass(this, MainActivity::class.java)
+          .addFlags(Intent.FLAG_ACTIVITY_NEW_TASK or Intent.FLAG_ACTIVITY_SINGLE_TOP or Intent.FLAG_ACTIVITY_CLEAR_TOP),
+      )
+
+      if (mode == "connect") {
+        val resultJson = """{"ok":true,"mode":"connect","connected":true}"""
+        writeResult(resultJson)
+        Log.i(tag, "PASS $resultJson")
+        return
+      }
+
+      val transcript =
+        intent
+          .getDecodedStringExtra("transcript")
+          ?.trim()
+          .orEmpty()
+          .ifEmpty { "Reply exactly: Android voice e2e normal path ok." }
+      val realtimeReply =
+        intent
+          .getDecodedStringExtra("realtimeAssistant")
+          ?.trim()
+          .orEmpty()
+          .ifEmpty { "Android realtime voice e2e relay path ok." }
+      val timeoutMs = intent.getLongExtra("timeoutMs", 60_000L)
+      val result =
+        runtime.runVoiceE2e(
+          mode = mode,
+          transcript = transcript,
+          realtimeAssistantText = realtimeReply,
+          timeoutMs = timeoutMs,
+        )
+      val resultJson = encodeResult(result)
+      writeResult(resultJson)
+      Log.i(tag, "PASS $resultJson")
+    } catch (err: Throwable) {
+      val resultJson =
+        buildJsonObject {
+          put("ok", JsonPrimitive(false))
+          put("error", JsonPrimitive(err.message ?: err::class.java.simpleName))
+        }.toString()
+      writeResult(resultJson)
+      Log.e(tag, "FAIL $resultJson", err)
+    }
+  }
+
+  private fun configureGateway(
+    runtime: NodeRuntime,
+    intent: Intent,
+  ) {
+    val host =
+      intent
+        .getDecodedStringExtra("host")
+        ?.trim()
+        .orEmpty()
+        .ifEmpty { "127.0.0.1" }
+    val port = intent.getIntExtra("port", 18789)
+    runtime.setManualEnabled(true)
+    runtime.setManualHost(host)
+    runtime.setManualPort(port)
+    runtime.setManualTls(intent.getBooleanExtra("tls", false))
+    runtime.setGatewayToken(intent.getDecodedStringExtra("token").orEmpty())
+    runtime.setGatewayBootstrapToken(intent.getDecodedStringExtra("bootstrapToken").orEmpty())
+    runtime.setGatewayPassword(intent.getDecodedStringExtra("password").orEmpty())
+    runtime.setOnboardingCompleted(true)
+    runtime.connectManual()
+  }
+
+  private suspend fun awaitGateway(
+    runtime: NodeRuntime,
+    timeoutMs: Long,
+  ) {
+    withTimeout(timeoutMs) {
+      while (!runtime.isConnected.value) {
+        delay(100L)
+      }
+    }
+  }
+
+  private fun encodeResult(result: NodeRuntime.VoiceE2eResult): String =
+    buildJsonObject {
+      put("ok", JsonPrimitive(true))
+      put("normal", result.normal?.let(::encodeSlice) ?: JsonNull)
+      put("realtime", result.realtime?.let(::encodeSlice) ?: JsonNull)
+    }.toString()
+
+  private fun encodeSlice(slice: NodeRuntime.VoiceE2eSliceResult) =
+    buildJsonObject {
+      put("mode", JsonPrimitive(slice.mode))
+      put("status", JsonPrimitive(slice.status))
+      put("userText", slice.userText?.let(::JsonPrimitive) ?: JsonNull)
+      put("assistantText", slice.assistantText?.let(::JsonPrimitive) ?: JsonNull)
+    }
+
+  private fun writeResult(json: String) {
+    File(cacheDir, resultFileName).writeText(json)
+  }
+}
+
+private fun Intent.getDecodedStringExtra(name: String): String? {
+  val encoded = getStringExtra("${name}Base64")
+  if (!encoded.isNullOrBlank()) {
+    return String(Base64.decode(encoded, Base64.NO_WRAP), Charsets.UTF_8)
+  }
+  return getStringExtra(name)
+}
--- a/apps/android/app/src/main/java/ai/openclaw/app/AssistantLaunch.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/AssistantLaunch.kt
@@ -3,6 +3,7 @@ package ai.openclaw.app
 import android.content.Intent

 const val actionAskOpenClaw = "ai.openclaw.app.action.ASK_OPENCLAW"
+const val actionOpenVoiceE2e = "ai.openclaw.app.debug.OPEN_VOICE_E2E"
 const val extraAssistantPrompt = "prompt"

 enum class HomeDestination {
@@ -19,6 +20,14 @@ data class AssistantLaunchRequest(
  val autoSend: Boolean,
 )

+fun parseHomeDestinationIntent(intent: Intent?): HomeDestination? {
+  val action = intent?.action ?: return null
+  return when {
+    BuildConfig.DEBUG && action == actionOpenVoiceE2e -> HomeDestination.Voice
+    else -> null
+  }
+}
+
 fun parseAssistantLaunchIntent(intent: Intent?): AssistantLaunchRequest? {
  val action = intent?.action ?: return null
  return when (action) {
--- a/apps/android/app/src/main/java/ai/openclaw/app/MainActivity.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/MainActivity.kt
@@ -79,6 +79,10 @@ class MainActivity : ComponentActivity() {
  }

  private fun handleAssistantIntent(intent: android.content.Intent?) {
+    parseHomeDestinationIntent(intent)?.let { destination ->
+      viewModel.requestHomeDestination(destination)
+      return
+    }
    val request = parseAssistantLaunchIntent(intent) ?: return
    viewModel.handleAssistantLaunch(request)
  }
--- a/apps/android/app/src/main/java/ai/openclaw/app/MainViewModel.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/MainViewModel.kt
@@ -32,6 +32,8 @@ class MainViewModel(
  private var foreground = true
  private val _requestedHomeDestination = MutableStateFlow<HomeDestination?>(null)
  val requestedHomeDestination: StateFlow<HomeDestination?> = _requestedHomeDestination
+  private val _startOnboardingAtGatewaySetup = MutableStateFlow(false)
+  val startOnboardingAtGatewaySetup: StateFlow<Boolean> = _startOnboardingAtGatewaySetup
  private val _chatDraft = MutableStateFlow<String?>(null)
  val chatDraft: StateFlow<String?> = _chatDraft
  private val _pendingAssistantAutoSend = MutableStateFlow<String?>(null)
@@ -159,6 +161,7 @@ class MainViewModel(
  val chatSessionKey: StateFlow<String> = runtimeState(initial = "main") { it.chatSessionKey }
  val chatSessionId: StateFlow<String?> = runtimeState(initial = null) { it.chatSessionId }
  val chatMessages: StateFlow<List<ChatMessage>> = runtimeState(initial = emptyList()) { it.chatMessages }
+  val chatHistoryLoading: StateFlow<Boolean> = runtimeState(initial = false) { it.chatHistoryLoading }
  val chatError: StateFlow<String?> = runtimeState(initial = null) { it.chatError }
  val chatHealthOk: StateFlow<Boolean> = runtimeState(initial = false) { it.chatHealthOk }
  val chatThinkingLevel: StateFlow<String> = runtimeState(initial = "off") { it.chatThinkingLevel }
@@ -262,6 +265,17 @@ class MainViewModel(
    prefs.setOnboardingCompleted(value)
  }

+  fun pairNewGateway() {
+    runtimeRef.value?.disconnect()
+    resetGatewaySetupAuth()
+    _startOnboardingAtGatewaySetup.value = true
+    prefs.setOnboardingCompleted(false)
+  }
+
+  fun clearGatewaySetupStartRequest() {
+    _startOnboardingAtGatewaySetup.value = false
+  }
+
  fun setCanvasDebugStatusEnabled(value: Boolean) {
    prefs.setCanvasDebugStatusEnabled(value)
  }
@@ -316,6 +330,10 @@ class MainViewModel(
    _requestedHomeDestination.value = null
  }

+  fun requestHomeDestination(destination: HomeDestination) {
+    _requestedHomeDestination.value = destination
+  }
+
  fun clearChatDraft() {
    _chatDraft.value = null
  }
--- a/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt
@@ -47,6 +47,7 @@ import ai.openclaw.app.protocol.OpenClawCanvasA2UIAction
 import ai.openclaw.app.voice.MicCaptureManager
 import ai.openclaw.app.voice.TalkModeManager
 import ai.openclaw.app.voice.VoiceConversationEntry
+import ai.openclaw.app.voice.VoiceConversationRole
 import android.Manifest
 import android.content.Context
 import android.content.pm.PackageManager
@@ -64,6 +65,7 @@ import kotlinx.coroutines.flow.asStateFlow
 import kotlinx.coroutines.flow.combine
 import kotlinx.coroutines.flow.distinctUntilChanged
 import kotlinx.coroutines.launch
+import kotlinx.coroutines.withTimeout
 import kotlinx.serialization.Serializable
 import kotlinx.serialization.json.Json
 import kotlinx.serialization.json.JsonArray
@@ -256,6 +258,18 @@ class NodeRuntime(
    val previousFingerprintSha256: String? = null,
  )

+  data class VoiceE2eSliceResult(
+    val mode: String,
+    val status: String,
+    val userText: String?,
+    val assistantText: String?,
+  )
+
+  data class VoiceE2eResult(
+    val normal: VoiceE2eSliceResult?,
+    val realtime: VoiceE2eSliceResult?,
+  )
+
  private val _isConnected = MutableStateFlow(false)
  val isConnected: StateFlow<Boolean> = _isConnected.asStateFlow()
  private val _nodeConnected = MutableStateFlow(false)
@@ -490,7 +504,6 @@ class NodeRuntime(
      scope = scope,
      session = operatorSession,
      json = json,
-      supportsChatSubscribe = false,
    ).also {
      it.applyMainSessionKey(_mainSessionKey.value)
    }
@@ -502,8 +515,7 @@ class NodeRuntime(
        context = appContext,
        scope = scope,
        session = operatorSession,
-        supportsChatSubscribe = false,
-        isConnected = { operatorConnected },
+        isConnected = { _isConnected.value },
        onBeforeSpeak = { micCapture.pauseForTts() },
        onAfterSpeak = { micCapture.resumeAfterTts() },
      ).also { speaker ->
@@ -610,8 +622,7 @@ class NodeRuntime(
      context = appContext,
      scope = scope,
      session = operatorSession,
-      supportsChatSubscribe = true,
-      isConnected = { operatorConnected },
+      isConnected = { _isConnected.value },
      onBeforeSpeak = { micCapture.pauseForTts() },
      onAfterSpeak = { micCapture.resumeAfterTts() },
      onStoppedByRelay = { finishTalkModeAfterRelayClose() },
@@ -858,6 +869,7 @@ class NodeRuntime(
  val chatSessionKey: StateFlow<String> = chat.sessionKey
  val chatSessionId: StateFlow<String?> = chat.sessionId
  val chatMessages: StateFlow<List<ChatMessage>> = chat.messages
+  val chatHistoryLoading: StateFlow<Boolean> = chat.historyLoading
  val chatError: StateFlow<String?> = chat.errorText
  val chatHealthOk: StateFlow<Boolean> = chat.healthOk
  val chatThinkingLevel: StateFlow<String> = chat.thinkingLevel
@@ -1150,7 +1162,7 @@ class NodeRuntime(
    NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.TalkMode)
    talkMode.ttsOnAllResponses = true
    talkMode.setPlaybackEnabled(speakerEnabled.value)
-    talkMode.ensureChatSubscribed()
+    talkMode.refreshConfig()
    externalAudioCaptureActive.value = true
  }

@@ -1189,6 +1201,115 @@ class NodeRuntime(
    talkMode.setPlaybackEnabled(value)
  }

+  suspend fun runVoiceE2e(
+    mode: String,
+    transcript: String,
+    realtimeAssistantText: String,
+    timeoutMs: Long,
+  ): VoiceE2eResult {
+    if (!BuildConfig.DEBUG) {
+      throw IllegalStateException("voice e2e is debug-only")
+    }
+    if (!_isConnected.value) {
+      throw IllegalStateException("gateway not connected")
+    }
+    if (!hasRecordAudioPermission()) {
+      throw IllegalStateException("microphone permission missing")
+    }
+
+    val normalizedMode = mode.trim().lowercase().ifEmpty { "both" }
+    val runNormal = normalizedMode == "both" || normalizedMode == "normal" || normalizedMode == "dictation"
+    val runRealtime = normalizedMode == "both" || normalizedMode == "realtime" || normalizedMode == "talk"
+    if (!runNormal && !runRealtime) {
+      throw IllegalArgumentException("unknown voice e2e mode: $mode")
+    }
+
+    val previousSpeakerEnabled = speakerEnabled.value
+    setSpeakerEnabled(false)
+    var completed = false
+    return try {
+      VoiceE2eResult(
+        normal =
+          if (runNormal) {
+            runNormalVoiceE2e(transcript = transcript, timeoutMs = timeoutMs)
+          } else {
+            null
+          },
+        realtime =
+          if (runRealtime) {
+            runRealtimeVoiceE2e(
+              transcript = transcript,
+              assistantText = realtimeAssistantText,
+              timeoutMs = timeoutMs,
+            )
+          } else {
+            null
+          },
+      ).also { completed = true }
+    } finally {
+      if (!completed) {
+        stopActiveVoiceSession()
+      }
+      setSpeakerEnabled(previousSpeakerEnabled)
+    }
+  }
+
+  private suspend fun runNormalVoiceE2e(
+    transcript: String,
+    timeoutMs: Long,
+  ): VoiceE2eSliceResult {
+    stopActiveVoiceSession()
+    setVoiceCaptureMode(VoiceCaptureMode.ManualMic)
+    micCapture.submitTranscribedMessage(transcript)
+    awaitVoiceConversation(timeoutMs = timeoutMs) {
+      micCapture.conversation.value.any { it.role == VoiceConversationRole.Assistant && !it.isStreaming }
+    }
+    val entries = micCapture.conversation.value
+    return VoiceE2eSliceResult(
+      mode = "normal",
+      status = micCapture.statusText.value,
+      userText = entries.lastOrNull { it.role == VoiceConversationRole.User }?.text,
+      assistantText = entries.lastOrNull { it.role == VoiceConversationRole.Assistant }?.text,
+    )
+  }
+
+  private suspend fun runRealtimeVoiceE2e(
+    transcript: String,
+    assistantText: String,
+    timeoutMs: Long,
+  ): VoiceE2eSliceResult {
+    stopActiveVoiceSession()
+    setVoiceCaptureMode(VoiceCaptureMode.TalkMode)
+    talkMode.runE2eRealtimeTurn(
+      userText = transcript,
+      assistantText = assistantText,
+      timeoutMs = timeoutMs,
+    )
+    awaitVoiceConversation(timeoutMs = timeoutMs) {
+      val entries = talkMode.conversation.value
+      entries.any { it.role == VoiceConversationRole.User && !it.isStreaming } &&
+        entries.any { it.role == VoiceConversationRole.Assistant && !it.isStreaming }
+    }
+    val entries = talkMode.conversation.value
+    return VoiceE2eSliceResult(
+      mode = "realtime",
+      status = talkMode.statusText.value,
+      userText = entries.lastOrNull { it.role == VoiceConversationRole.User }?.text,
+      assistantText = entries.lastOrNull { it.role == VoiceConversationRole.Assistant }?.text,
+    )
+  }
+
+  private suspend fun awaitVoiceConversation(
+    timeoutMs: Long,
+    ready: () -> Boolean,
+  ) {
+    withTimeout(timeoutMs) {
+      while (!ready()) {
+        delay(100L)
+      }
+    }
+  }
+
  private fun setVoiceCaptureMode(
    mode: VoiceCaptureMode,
    persistManualMic: Boolean = true,
@@ -1222,7 +1343,7 @@ class NodeRuntime(
        }
        // Tapping mic on interrupts any active TTS (barge-in).
        stopVoicePlayback()
-        scope.launch { talkMode.ensureChatSubscribed() }
+        scope.launch { talkMode.refreshConfig() }
        micCapture.setMicEnabled(true)
        externalAudioCaptureActive.value = true
      }
@@ -1235,7 +1356,7 @@ class NodeRuntime(
        NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.TalkMode)
        talkMode.ttsOnAllResponses = true
        talkMode.setPlaybackEnabled(speakerEnabled.value)
-        scope.launch { talkMode.ensureChatSubscribed() }
+        scope.launch { talkMode.refreshConfig() }
        talkMode.setEnabled(true)
        externalAudioCaptureActive.value = true
      }
@@ -1446,7 +1567,7 @@ class NodeRuntime(
    endpoint: GatewayEndpoint,
    auth: GatewayConnectAuth,
  ) {
-    if (operatorConnected || operatorStatusText == "Connecting…") {
+    if (operatorConnected) {
      return
    }
    val operatorAuth =
--- a/apps/android/app/src/main/java/ai/openclaw/app/chat/ChatController.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/chat/ChatController.kt
@@ -17,12 +17,12 @@ import kotlinx.serialization.json.JsonPrimitive
 import kotlinx.serialization.json.buildJsonObject
 import java.util.UUID
 import java.util.concurrent.ConcurrentHashMap
+import java.util.concurrent.atomic.AtomicLong

 class ChatController(
  private val scope: CoroutineScope,
  private val session: GatewaySession,
  private val json: Json,
-  private val supportsChatSubscribe: Boolean,
 ) {
  private var appliedMainSessionKey = "main"
  private val _sessionKey = MutableStateFlow("main")
@@ -34,6 +34,9 @@ class ChatController(
  private val _messages = MutableStateFlow<List<ChatMessage>>(emptyList())
  val messages: StateFlow<List<ChatMessage>> = _messages.asStateFlow()

+  private val _historyLoading = MutableStateFlow(false)
+  val historyLoading: StateFlow<Boolean> = _historyLoading.asStateFlow()
+
  private val _errorText = MutableStateFlow<String?>(null)
  val errorText: StateFlow<String?> = _errorText.asStateFlow()

@@ -60,25 +63,27 @@ class ChatController(
  private val pendingRunTimeoutJobs = ConcurrentHashMap<String, Job>()
  private val optimisticMessagesByRunId = LinkedHashMap<String, ChatMessage>()
  private val pendingRunTimeoutMs = 120_000L
+  private val historyLoadGeneration = AtomicLong(0)

  private var lastHealthPollAtMs: Long? = null

  fun onDisconnected(message: String) {
    _healthOk.value = false
-    // Not an error; keep connection status in the UI pill.
    _errorText.value = null
    clearPendingRuns()
    pendingToolCallsById.clear()
    publishPendingToolCalls()
    _streamingAssistantText.value = null
+    _historyLoading.value = false
    _sessionId.value = null
  }

  fun load(sessionKey: String) {
    val key = normalizeRequestedSessionKey(sessionKey)
-    _sessionKey.value = key
-    optimisticMessagesByRunId.clear()
-    scope.launch { bootstrap(forceHealth = true, refreshSessions = true) }
+    val generation = beginHistoryLoad(key, clearMessages = key != _sessionKey.value)
+    scope.launch {
+      bootstrap(sessionKey = key, generation = generation, forceHealth = true, refreshSessions = true)
+    }
  }

  fun applyMainSessionKey(mainSessionKey: String) {
@@ -92,12 +97,23 @@ class ChatController(
      )
    appliedMainSessionKey = nextState.appliedMainSessionKey
    if (_sessionKey.value == nextState.currentSessionKey) return
-    _sessionKey.value = nextState.currentSessionKey
-    scope.launch { bootstrap(forceHealth = true, refreshSessions = true) }
+    val generation = beginHistoryLoad(nextState.currentSessionKey, clearMessages = true)
+    scope.launch {
+      bootstrap(
+        sessionKey = nextState.currentSessionKey,
+        generation = generation,
+        forceHealth = true,
+        refreshSessions = true,
+      )
+    }
  }

  fun refresh() {
-    scope.launch { bootstrap(forceHealth = true, refreshSessions = true) }
+    val key = normalizeRequestedSessionKey(_sessionKey.value)
+    val generation = beginHistoryLoad(key, clearMessages = false)
+    scope.launch {
+      bootstrap(sessionKey = key, generation = generation, forceHealth = true, refreshSessions = true)
+    }
  }

  fun refreshSessions(limit: Int? = null) {
@@ -114,11 +130,30 @@ class ChatController(
    val key = normalizeRequestedSessionKey(sessionKey)
    if (key.isEmpty()) return
    if (key == _sessionKey.value) return
+    val generation = beginHistoryLoad(key, clearMessages = true)
+    scope.launch {
+      bootstrap(sessionKey = key, generation = generation, forceHealth = true, refreshSessions = false)
+    }
+  }
+
+  private fun beginHistoryLoad(
+    key: String,
+    clearMessages: Boolean,
+  ): Long {
+    val generation = historyLoadGeneration.incrementAndGet()
    _sessionKey.value = key
-    optimisticMessagesByRunId.clear()
-    // Keep the thread switch path lean: history + health are needed immediately,
-    // but the session list is usually unchanged and can refresh on explicit pull-to-refresh.
-    scope.launch { bootstrap(forceHealth = true, refreshSessions = false) }
+    _errorText.value = null
+    _healthOk.value = false
+    clearPendingRuns()
+    pendingToolCallsById.clear()
+    publishPendingToolCalls()
+    _streamingAssistantText.value = null
+    _sessionId.value = null
+    _historyLoading.value = true
+    if (clearMessages) {
+      _messages.value = emptyList()
+    }
+    return generation
  }

  private fun normalizeRequestedSessionKey(sessionKey: String): String {
@@ -289,27 +324,22 @@ class ChatController(
  }

  private suspend fun bootstrap(
+    sessionKey: String,
+    generation: Long,
    forceHealth: Boolean,
    refreshSessions: Boolean,
  ) {
-    _errorText.value = null
-    _healthOk.value = false
-    clearPendingRuns()
-    pendingToolCallsById.clear()
-    publishPendingToolCalls()
-    _streamingAssistantText.value = null
-    _sessionId.value = null
-
-    val key = _sessionKey.value
    try {
-      if (supportsChatSubscribe) {
-        session.sendNodeEvent("chat.subscribe", """{"sessionKey":"$key"}""")
-      }
-
-      val historyJson = session.request("chat.history", """{"sessionKey":"$key"}""")
-      val history = parseHistory(historyJson, sessionKey = key, previousMessages = _messages.value)
+      val historyJson =
+        session.request(
+          "chat.history",
+          buildJsonObject { put("sessionKey", JsonPrimitive(sessionKey)) }.toString(),
+        )
+      if (!isCurrentHistoryLoad(sessionKey, _sessionKey.value, generation, historyLoadGeneration.get())) return
+      val history = parseHistory(historyJson, sessionKey = sessionKey, previousMessages = _messages.value)
      _messages.value = mergeOptimisticMessages(incoming = history.messages, optimistic = optimisticMessagesByRunId.values)
      _sessionId.value = history.sessionId
+      _historyLoading.value = false
      history.thinkingLevel
        ?.trim()
        ?.takeIf { it.isNotEmpty() }
@@ -320,7 +350,9 @@ class ChatController(
        fetchSessions(limit = 50)
      }
    } catch (err: Throwable) {
+      if (!isCurrentHistoryLoad(sessionKey, _sessionKey.value, generation, historyLoadGeneration.get())) return
      _errorText.value = err.message
+      _historyLoading.value = false
    }
  }

@@ -387,9 +419,29 @@ class ChatController(
        _streamingAssistantText.value = null
        scope.launch {
          try {
+            val currentSessionKey = _sessionKey.value
+            val currentGeneration = historyLoadGeneration.get()
            val historyJson =
-              session.request("chat.history", """{"sessionKey":"${_sessionKey.value}"}""")
-            val history = parseHistory(historyJson, sessionKey = _sessionKey.value, previousMessages = _messages.value)
+              session.request(
+                "chat.history",
+                buildJsonObject { put("sessionKey", JsonPrimitive(currentSessionKey)) }.toString(),
+              )
+            if (
+              !isCurrentHistoryLoad(
+                currentSessionKey,
+                _sessionKey.value,
+                currentGeneration,
+                historyLoadGeneration.get(),
+              )
+            ) {
+              return@launch
+            }
+            val history =
+              parseHistory(
+                historyJson,
+                sessionKey = currentSessionKey,
+                previousMessages = _messages.value,
+              )
            _messages.value = mergeOptimisticMessages(incoming = history.messages, optimistic = optimisticMessagesByRunId.values)
            _sessionId.value = history.sessionId
            history.thinkingLevel
@@ -527,7 +579,7 @@ class ChatController(
      array.mapNotNull { item ->
        val obj = item.asObjectOrNull() ?: return@mapNotNull null
        val role = obj["role"].asStringOrNull() ?: return@mapNotNull null
-        val content = obj["content"].asArrayOrNull()?.mapNotNull(::parseMessageContent) ?: emptyList()
+        val content = obj["content"].asArrayOrNull()?.mapNotNull(::parseChatMessageContent) ?: emptyList()
        val ts = obj["timestamp"].asLongOrNull()
        ChatMessage(
          id = UUID.randomUUID().toString(),
@@ -545,21 +597,6 @@ class ChatController(
    )
  }

-  private fun parseMessageContent(el: JsonElement): ChatMessageContent? {
-    val obj = el.asObjectOrNull() ?: return null
-    val type = obj["type"].asStringOrNull() ?: "text"
-    return if (type == "text") {
-      ChatMessageContent(type = "text", text = obj["text"].asStringOrNull())
-    } else {
-      ChatMessageContent(
-        type = type,
-        mimeType = obj["mimeType"].asStringOrNull(),
-        fileName = obj["fileName"].asStringOrNull(),
-        base64 = obj["content"].asStringOrNull(),
-      )
-    }
-  }
-
  private fun parseSessions(jsonString: String): List<ChatSessionEntry> {
    val root = json.parseToJsonElement(jsonString).asObjectOrNull() ?: return emptyList()
    val sessions = root["sessions"].asArrayOrNull() ?: return emptyList()
@@ -593,6 +630,34 @@ class ChatController(
    }
 }

+internal fun isCurrentHistoryLoad(
+  requestedSessionKey: String,
+  currentSessionKey: String,
+  requestGeneration: Long,
+  activeGeneration: Long,
+): Boolean = requestedSessionKey == currentSessionKey && requestGeneration == activeGeneration
+
+internal fun parseChatMessageContent(el: JsonElement): ChatMessageContent? {
+  val obj = el.asObjectOrNull() ?: return null
+  return when (obj["type"].asStringOrNull() ?: "text") {
+    "text", "input_text", "output_text" ->
+      ChatMessageContent(
+        type = "text",
+        text = obj["text"].asStringOrNull() ?: obj["content"].asStringOrNull(),
+      )
+
+    "image" ->
+      ChatMessageContent(
+        type = "image",
+        mimeType = obj["mimeType"].asStringOrNull(),
+        fileName = obj["fileName"].asStringOrNull(),
+        base64 = obj["content"].asStringOrNull()?.takeIf { it.isNotBlank() },
+      )
+
+    else -> null
+  }
+}
+
 internal data class MainSessionState(
  val currentSessionKey: String,
  val appliedMainSessionKey: String,
--- a/Show More
+++ b/Show More