fix(line): canonicalize trailing-slash webhook paths (#91649 )

* fix(line): canonicalize trailing-slash webhook paths * fix(clownfish): address review for clawsweeper-commit-openclaw-openclaw-4cf228466770 (1) --------- Co-authored-by: openclaw-clownfish[bot] <280122609+openclaw-clownfish[bot]@users.noreply.github.com>
fix(microsoft-foundry): repair CI validation issues
2026-06-09 15:31:18 +08:00 · 2026-06-09 16:26:16 +09:00 · 2026-06-09 15:45:19 +09:00 · 2026-06-09 15:42:13 +09:00 · 2026-06-09 15:34:29 +09:00 · 2026-06-09 15:34:29 +09:00
942 changed files with 37578 additions and 91116 deletions
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -293,6 +293,10 @@
  - changed-files:
      - any-glob-to-any-file:
          - "extensions/lobster/**"
+"extensions: llama-cpp":
+  - changed-files:
+      - any-glob-to-any-file:
+          - "extensions/llama-cpp/**"
 "extensions: memory-core":
  - changed-files:
      - any-glob-to-any-file:
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -2093,7 +2093,7 @@ jobs:
        uses: actions/cache@v5
        with:
          path: ~/.android-sdk
-          key: ${{ runner.os }}-android-sdk-v1-cmdline-12266719-platform-36-build-tools-36.0.0
+          key: ${{ runner.os }}-android-sdk-v1-cmdline-14742923-platform-37.0-build-tools-36.0.0
          restore-keys: |
            ${{ runner.os }}-android-sdk-v1-

@@ -2101,7 +2101,7 @@ jobs:
        run: |
          set -euo pipefail
          ANDROID_SDK_ROOT="$HOME/.android-sdk"
-          CMDLINE_TOOLS_VERSION="12266719"
+          CMDLINE_TOOLS_VERSION="14742923"
          ARCHIVE="commandlinetools-linux-${CMDLINE_TOOLS_VERSION}_latest.zip"
          URL="https://dl.google.com/android/repository/${ARCHIVE}"

@@ -2123,7 +2123,7 @@ jobs:
          yes | sdkmanager --sdk_root="${ANDROID_SDK_ROOT}" --licenses >/dev/null
          sdkmanager --sdk_root="${ANDROID_SDK_ROOT}" --install \
            "platform-tools" \
-            "platforms;android-36" \
+            "platforms;android-37.0" \
            "build-tools;36.0.0"

      - name: Run Android ${{ matrix.task }}
--- a/.github/workflows/codeql-android-critical-security.yml
+++ b/.github/workflows/codeql-android-critical-security.yml
@@ -35,7 +35,7 @@ jobs:
          java-version: "21"

      - name: Initialize CodeQL
-        uses: github/codeql-action/init@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          languages: java-kotlin
          build-mode: manual
@@ -46,6 +46,6 @@ jobs:
        run: ./gradlew --no-daemon :app:assemblePlayDebug

      - name: Analyze
-        uses: github/codeql-action/analyze@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          category: "/codeql-critical-security/android"
--- a/.github/workflows/codeql-critical-quality.yml
+++ b/.github/workflows/codeql-critical-quality.yml
@@ -342,13 +342,13 @@ jobs:
          submodules: false

      - name: Initialize CodeQL
-        uses: github/codeql-action/init@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          languages: javascript-typescript
          config-file: ./.github/codeql/codeql-core-auth-secrets-critical-quality.yml

      - name: Analyze
-        uses: github/codeql-action/analyze@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          category: "/codeql-critical-quality/core-auth-secrets"

@@ -365,13 +365,13 @@ jobs:
          submodules: false

      - name: Initialize CodeQL
-        uses: github/codeql-action/init@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          languages: javascript-typescript
          config-file: ./.github/codeql/codeql-config-boundary-critical-quality.yml

      - name: Analyze
-        uses: github/codeql-action/analyze@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          category: "/codeql-critical-quality/config-boundary"

@@ -388,13 +388,13 @@ jobs:
          submodules: false

      - name: Initialize CodeQL
-        uses: github/codeql-action/init@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          languages: javascript-typescript
          config-file: ./.github/codeql/codeql-gateway-runtime-boundary-critical-quality.yml

      - name: Analyze
-        uses: github/codeql-action/analyze@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          category: "/codeql-critical-quality/gateway-runtime-boundary"

@@ -411,13 +411,13 @@ jobs:
          submodules: false

      - name: Initialize CodeQL
-        uses: github/codeql-action/init@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          languages: javascript-typescript
          config-file: ./.github/codeql/codeql-channel-runtime-boundary-critical-quality.yml

      - name: Analyze
-        uses: github/codeql-action/analyze@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          category: "/codeql-critical-quality/channel-runtime-boundary"

@@ -460,7 +460,7 @@ jobs:

      - name: Initialize CodeQL
        if: ${{ github.event_name != 'pull_request' }}
-        uses: github/codeql-action/init@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          languages: javascript-typescript
          config-file: ./.github/codeql/codeql-network-runtime-boundary-critical-quality.yml
@@ -468,7 +468,7 @@ jobs:
      - name: Analyze
        id: analyze
        if: ${{ github.event_name != 'pull_request' }}
-        uses: github/codeql-action/analyze@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          output: sarif-results
          category: "/codeql-critical-quality/network-runtime-boundary"
@@ -518,13 +518,13 @@ jobs:
          submodules: false

      - name: Initialize CodeQL
-        uses: github/codeql-action/init@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          languages: javascript-typescript
          config-file: ./.github/codeql/codeql-agent-runtime-boundary-critical-quality.yml

      - name: Analyze
-        uses: github/codeql-action/analyze@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          category: "/codeql-critical-quality/agent-runtime-boundary"

@@ -541,13 +541,13 @@ jobs:
          submodules: false

      - name: Initialize CodeQL
-        uses: github/codeql-action/init@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          languages: javascript-typescript
          config-file: ./.github/codeql/codeql-mcp-process-runtime-boundary-critical-quality.yml

      - name: Analyze
-        uses: github/codeql-action/analyze@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          category: "/codeql-critical-quality/mcp-process-runtime-boundary"

@@ -564,13 +564,13 @@ jobs:
          submodules: false

      - name: Initialize CodeQL
-        uses: github/codeql-action/init@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          languages: javascript-typescript
          config-file: ./.github/codeql/codeql-memory-runtime-boundary-critical-quality.yml

      - name: Analyze
-        uses: github/codeql-action/analyze@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          category: "/codeql-critical-quality/memory-runtime-boundary"

@@ -587,13 +587,13 @@ jobs:
          submodules: false

      - name: Initialize CodeQL
-        uses: github/codeql-action/init@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          languages: javascript-typescript
          config-file: ./.github/codeql/codeql-session-diagnostics-boundary-critical-quality.yml

      - name: Analyze
-        uses: github/codeql-action/analyze@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          category: "/codeql-critical-quality/session-diagnostics-boundary"

@@ -610,13 +610,13 @@ jobs:
          submodules: false

      - name: Initialize CodeQL
-        uses: github/codeql-action/init@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          languages: javascript-typescript
          config-file: ./.github/codeql/codeql-plugin-sdk-reply-runtime-critical-quality.yml

      - name: Analyze
-        uses: github/codeql-action/analyze@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          category: "/codeql-critical-quality/plugin-sdk-reply-runtime"

@@ -633,13 +633,13 @@ jobs:
          submodules: false

      - name: Initialize CodeQL
-        uses: github/codeql-action/init@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          languages: javascript-typescript
          config-file: ./.github/codeql/codeql-provider-runtime-boundary-critical-quality.yml

      - name: Analyze
-        uses: github/codeql-action/analyze@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          category: "/codeql-critical-quality/provider-runtime-boundary"

@@ -655,13 +655,13 @@ jobs:
          submodules: false

      - name: Initialize CodeQL
-        uses: github/codeql-action/init@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          languages: javascript-typescript
          config-file: ./.github/codeql/codeql-ui-control-plane-critical-quality.yml

      - name: Analyze
-        uses: github/codeql-action/analyze@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          category: "/codeql-critical-quality/ui-control-plane"

@@ -677,13 +677,13 @@ jobs:
          submodules: false

      - name: Initialize CodeQL
-        uses: github/codeql-action/init@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          languages: javascript-typescript
          config-file: ./.github/codeql/codeql-web-media-runtime-boundary-critical-quality.yml

      - name: Analyze
-        uses: github/codeql-action/analyze@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          category: "/codeql-critical-quality/web-media-runtime-boundary"

@@ -700,13 +700,13 @@ jobs:
          submodules: false

      - name: Initialize CodeQL
-        uses: github/codeql-action/init@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          languages: javascript-typescript
          config-file: ./.github/codeql/codeql-plugin-boundary-critical-quality.yml

      - name: Analyze
-        uses: github/codeql-action/analyze@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          category: "/codeql-critical-quality/plugin-boundary"

@@ -723,12 +723,12 @@ jobs:
          submodules: false

      - name: Initialize CodeQL
-        uses: github/codeql-action/init@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          languages: javascript-typescript
          config-file: ./.github/codeql/codeql-plugin-sdk-package-contract-critical-quality.yml

      - name: Analyze
-        uses: github/codeql-action/analyze@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          category: "/codeql-critical-quality/plugin-sdk-package-contract"
--- a/.github/workflows/codeql-macos-critical-security.yml
+++ b/.github/workflows/codeql-macos-critical-security.yml
@@ -35,7 +35,7 @@ jobs:
          swift --version

      - name: Initialize CodeQL
-        uses: github/codeql-action/init@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          languages: swift
          build-mode: manual
@@ -46,7 +46,7 @@ jobs:

      - name: Analyze
        id: analyze
-        uses: github/codeql-action/analyze@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          output: sarif-results
          upload: failure-only
@@ -83,7 +83,7 @@ jobs:
          done

      - name: Upload filtered SARIF
-        uses: github/codeql-action/upload-sarif@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/upload-sarif@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          sarif_file: sarif-results-filtered
          category: "/codeql-critical-security/macos"
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -101,12 +101,12 @@ jobs:
            .github/codeql

      - name: Initialize CodeQL
-        uses: github/codeql-action/init@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          languages: ${{ matrix.language }}
          config-file: ${{ matrix.config_file }}

      - name: Analyze
-        uses: github/codeql-action/analyze@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4
+        uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4
        with:
          category: "/codeql-security-high/${{ matrix.category }}"
--- a/.github/workflows/docker-release.yml
+++ b/.github/workflows/docker-release.yml
@@ -88,11 +88,30 @@ jobs:
          ref: ${{ github.event_name == 'workflow_dispatch' && format('refs/tags/{0}', inputs.tag) || github.ref }}
          fetch-depth: 0

+      - name: Pre-pull BuildKit image
+        shell: bash
+        env:
+          BUILDKIT_IMAGE: moby/buildkit:buildx-stable-1
+        run: |
+          set -euo pipefail
+          for attempt in 1 2 3 4; do
+            if docker pull "${BUILDKIT_IMAGE}"; then
+              exit 0
+            fi
+            if [[ "${attempt}" == "4" ]]; then
+              echo "::error::Failed to pull ${BUILDKIT_IMAGE} after ${attempt} attempts"
+              exit 1
+            fi
+            sleep_seconds=$((attempt * 10))
+            echo "BuildKit image pull failed; retrying in ${sleep_seconds}s (${attempt}/4)."
+            sleep "${sleep_seconds}"
+          done
+
      - name: Set up Docker Builder
        uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4

      - name: Login to GitHub Container Registry
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
+        uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.repository_owner }}
@@ -279,11 +298,30 @@ jobs:
          ref: ${{ github.event_name == 'workflow_dispatch' && format('refs/tags/{0}', inputs.tag) || github.ref }}
          fetch-depth: 0

+      - name: Pre-pull BuildKit image
+        shell: bash
+        env:
+          BUILDKIT_IMAGE: moby/buildkit:buildx-stable-1
+        run: |
+          set -euo pipefail
+          for attempt in 1 2 3 4; do
+            if docker pull "${BUILDKIT_IMAGE}"; then
+              exit 0
+            fi
+            if [[ "${attempt}" == "4" ]]; then
+              echo "::error::Failed to pull ${BUILDKIT_IMAGE} after ${attempt} attempts"
+              exit 1
+            fi
+            sleep_seconds=$((attempt * 10))
+            echo "BuildKit image pull failed; retrying in ${sleep_seconds}s (${attempt}/4)."
+            sleep "${sleep_seconds}"
+          done
+
      - name: Set up Docker Builder
        uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4

      - name: Login to GitHub Container Registry
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
+        uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.repository_owner }}
@@ -468,7 +506,7 @@ jobs:
          fetch-depth: 0

      - name: Login to GitHub Container Registry
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
+        uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.repository_owner }}
@@ -561,11 +599,30 @@ jobs:
        with:
          fetch-depth: 1

+      - name: Pre-pull BuildKit image
+        shell: bash
+        env:
+          BUILDKIT_IMAGE: moby/buildkit:buildx-stable-1
+        run: |
+          set -euo pipefail
+          for attempt in 1 2 3 4; do
+            if docker pull "${BUILDKIT_IMAGE}"; then
+              exit 0
+            fi
+            if [[ "${attempt}" == "4" ]]; then
+              echo "::error::Failed to pull ${BUILDKIT_IMAGE} after ${attempt} attempts"
+              exit 1
+            fi
+            sleep_seconds=$((attempt * 10))
+            echo "BuildKit image pull failed; retrying in ${sleep_seconds}s (${attempt}/4)."
+            sleep "${sleep_seconds}"
+          done
+
      - name: Set up Docker Builder
        uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4

      - name: Login to GitHub Container Registry
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
+        uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.repository_owner }}
--- a/.github/workflows/install-smoke.yml
+++ b/.github/workflows/install-smoke.yml
@@ -223,7 +223,7 @@ jobs:
          persist-credentials: false

      - name: Log in to GHCR
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
+        uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
@@ -311,7 +311,7 @@ jobs:
          persist-credentials: false

      - name: Log in to GHCR
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
+        uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
@@ -417,7 +417,7 @@ jobs:
          persist-credentials: false

      - name: Log in to GHCR
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
+        uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
@@ -503,7 +503,7 @@ jobs:
          persist-credentials: false

      - name: Log in to GHCR
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
+        uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
--- a/.github/workflows/live-media-runner-image.yml
+++ b/.github/workflows/live-media-runner-image.yml
@@ -29,7 +29,7 @@ jobs:
        uses: actions/checkout@v6

      - name: Login to GHCR
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
+        uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
--- a/.github/workflows/mantis-discord-smoke.yml
+++ b/.github/workflows/mantis-discord-smoke.yml
@@ -37,7 +37,7 @@ jobs:
    steps:
      - name: Require maintainer-level repository access
        id: permission
-        uses: actions/github-script@v8
+        uses: actions/github-script@v9
        with:
          script: |
            const allowed = new Set(["admin", "maintain", "write"]);
--- a/.github/workflows/mantis-discord-status-reactions.yml
+++ b/.github/workflows/mantis-discord-status-reactions.yml
@@ -56,7 +56,7 @@ jobs:
    steps:
      - name: Require maintainer-level repository access
        id: permission
-        uses: actions/github-script@v8
+        uses: actions/github-script@v9
        with:
          script: |
            const allowed = new Set(["admin", "maintain", "write"]);
@@ -91,7 +91,7 @@ jobs:
    steps:
      - name: Resolve refs and target PR
        id: resolve
-        uses: actions/github-script@v8
+        uses: actions/github-script@v9
        with:
          script: |
            const defaultBaseline = "0bf06e953fdda290799fc9fb9244a8f67fdae593";
@@ -581,7 +581,7 @@ jobs:
      issues: write
    steps:
      - name: Remove workflow eyes reaction
-        uses: actions/github-script@v8
+        uses: actions/github-script@v9
        with:
          script: |
            const { owner, repo } = context.repo;
--- a/.github/workflows/mantis-discord-thread-attachment.yml
+++ b/.github/workflows/mantis-discord-thread-attachment.yml
@@ -56,7 +56,7 @@ jobs:
    steps:
      - name: Require maintainer-level repository access
        id: permission
-        uses: actions/github-script@v8
+        uses: actions/github-script@v9
        with:
          script: |
            const allowed = new Set(["admin", "maintain", "write"]);
@@ -91,7 +91,7 @@ jobs:
    steps:
      - name: Resolve refs and target PR
        id: resolve
-        uses: actions/github-script@v8
+        uses: actions/github-script@v9
        with:
          script: |
            const defaultBaseline = "synthetic-reverted-thread-filepath-fix";
@@ -603,7 +603,7 @@ jobs:
      issues: write
    steps:
      - name: Remove workflow eyes reaction
-        uses: actions/github-script@v8
+        uses: actions/github-script@v9
        with:
          script: |
            const { owner, repo } = context.repo;
--- a/.github/workflows/mantis-slack-desktop-smoke.yml
+++ b/.github/workflows/mantis-slack-desktop-smoke.yml
@@ -81,7 +81,7 @@ jobs:
    steps:
      - name: Require maintainer-level repository access
        id: permission
-        uses: actions/github-script@v8
+        uses: actions/github-script@v9
        with:
          script: |
            const allowed = new Set(["admin", "maintain", "write"]);
@@ -180,7 +180,7 @@ jobs:
        run: pnpm build

      - name: Cache Mantis candidate pnpm store
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: |
            ~/.local/share/pnpm/store
--- a/.github/workflows/mantis-telegram-desktop-proof.yml
+++ b/.github/workflows/mantis-telegram-desktop-proof.yml
@@ -79,7 +79,7 @@ jobs:
    steps:
      - name: Require maintainer-level repository access
        id: permission
-        uses: actions/github-script@v8
+        uses: actions/github-script@v9
        with:
          script: |
            if (context.eventName === "pull_request_target") {
@@ -125,7 +125,7 @@ jobs:
    steps:
      - name: Resolve refs and target PR
        id: resolve
-        uses: actions/github-script@v8
+        uses: actions/github-script@v9
        with:
          script: |
            const eventName = context.eventName;
@@ -709,7 +709,7 @@ jobs:
      issues: write
    steps:
      - name: Remove workflow eyes reaction
-        uses: actions/github-script@v8
+        uses: actions/github-script@v9
        with:
          script: |
            const { owner, repo } = context.repo;
--- a/.github/workflows/mantis-telegram-live.yml
+++ b/.github/workflows/mantis-telegram-live.yml
@@ -68,7 +68,7 @@ jobs:
    steps:
      - name: Require maintainer-level repository access
        id: permission
-        uses: actions/github-script@v8
+        uses: actions/github-script@v9
        with:
          script: |
            const allowed = new Set(["admin", "maintain", "write"]);
@@ -105,7 +105,7 @@ jobs:
    steps:
      - name: Resolve refs and target PR
        id: resolve
-        uses: actions/github-script@v8
+        uses: actions/github-script@v9
        with:
          script: |
            const eventName = context.eventName;
@@ -327,7 +327,7 @@ jobs:
        run: pnpm build

      - name: Cache Mantis candidate pnpm store
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: |
            ~/.local/share/pnpm/store
@@ -573,7 +573,7 @@ jobs:
      issues: write
    steps:
      - name: Remove workflow eyes reaction
-        uses: actions/github-script@v8
+        uses: actions/github-script@v9
        with:
          script: |
            const { owner, repo } = context.repo;
--- a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml
+++ b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml
@@ -1503,31 +1503,66 @@ jobs:

      - name: Build and push bare Docker E2E image
        if: steps.plan.outputs.needs_bare_image == '1' && steps.image_exists.outputs.bare_exists != '1'
-        uses: useblacksmith/build-push-action@fb9e3e6a9299c78462bfadd0d93352c316adc9b8 # v2
-        with:
-          context: .
-          file: ./scripts/e2e/Dockerfile
-          target: bare
-          platforms: linux/amd64
-          tags: ${{ steps.image.outputs.bare_image }}
-          sbom: true
-          provenance: mode=max
-          push: true
+        shell: bash
+        env:
+          IMAGE_REF: ${{ steps.image.outputs.bare_image }}
+        run: |
+          set -euo pipefail
+          build_cmd=(
+            docker buildx build
+            --file ./scripts/e2e/Dockerfile
+            --target bare
+            --platform linux/amd64
+            --tag "$IMAGE_REF"
+            --sbom=true
+            --provenance=mode=max
+            --push
+            .
+          )
+          for attempt in 1 2 3 4; do
+            if "${build_cmd[@]}"; then
+              exit 0
+            fi
+            if [[ "$attempt" == "4" ]]; then
+              echo "::error::Failed to build Docker E2E bare image after ${attempt} attempts"
+              exit 1
+            fi
+            sleep_seconds=$((attempt * 20))
+            echo "Docker E2E bare image build failed; retrying in ${sleep_seconds}s (${attempt}/4)."
+            sleep "$sleep_seconds"
+          done

      - name: Build and push functional Docker E2E image
        if: steps.plan.outputs.needs_functional_image == '1' && steps.image_exists.outputs.functional_exists != '1'
-        uses: useblacksmith/build-push-action@fb9e3e6a9299c78462bfadd0d93352c316adc9b8 # v2
-        with:
-          context: .
-          file: ./scripts/e2e/Dockerfile
-          target: functional
-          build-contexts: |
-            openclaw_package=.artifacts/docker-e2e-package
-          platforms: linux/amd64
-          tags: ${{ steps.image.outputs.functional_image }}
-          sbom: true
-          provenance: mode=max
-          push: true
+        shell: bash
+        env:
+          IMAGE_REF: ${{ steps.image.outputs.functional_image }}
+        run: |
+          set -euo pipefail
+          build_cmd=(
+            docker buildx build
+            --file ./scripts/e2e/Dockerfile
+            --target functional
+            --build-context openclaw_package=.artifacts/docker-e2e-package
+            --platform linux/amd64
+            --tag "$IMAGE_REF"
+            --sbom=true
+            --provenance=mode=max
+            --push
+            .
+          )
+          for attempt in 1 2 3 4; do
+            if "${build_cmd[@]}"; then
+              exit 0
+            fi
+            if [[ "$attempt" == "4" ]]; then
+              echo "::error::Failed to build Docker E2E functional image after ${attempt} attempts"
+              exit 1
+            fi
+            sleep_seconds=$((attempt * 20))
+            echo "Docker E2E functional image build failed; retrying in ${sleep_seconds}s (${attempt}/4)."
+            sleep "$sleep_seconds"
+          done

  prepare_live_test_image:
    needs: validate_selected_ref
--- a/.github/workflows/opengrep-precise-full.yml
+++ b/.github/workflows/opengrep-precise-full.yml
@@ -53,7 +53,7 @@ jobs:
          scripts/run-opengrep.sh --sarif --error

      - name: Upload SARIF to GitHub Code Scanning
-        uses: github/codeql-action/upload-sarif@v4.36.1
+        uses: github/codeql-action/upload-sarif@v4.36.2
        # Only upload if the scan actually produced a SARIF file.
        if: always() && hashFiles('.opengrep-out/precise.sarif') != ''
        with:
--- a/.github/workflows/opengrep-precise.yml
+++ b/.github/workflows/opengrep-precise.yml
@@ -84,7 +84,7 @@ jobs:
          scripts/run-opengrep.sh --changed --sarif --error

      - name: Upload SARIF to GitHub Code Scanning
-        uses: github/codeql-action/upload-sarif@v4.36.1
+        uses: github/codeql-action/upload-sarif@v4.36.2
        # Only upload if the scan actually produced a SARIF file.
        if: always() && hashFiles('.opengrep-out/precise.sarif') != ''
        with:
--- a/.github/workflows/plugin-clawhub-release.yml
+++ b/.github/workflows/plugin-clawhub-release.yml
@@ -24,6 +24,11 @@ on:
        description: Approved OpenClaw Release Publish workflow run id
        required: false
        type: string
+      dry_run:
+        description: Validate the full ClawHub artifact handoff without publishing.
+        required: false
+        default: false
+        type: boolean

 concurrency:
  group: plugin-clawhub-release-${{ github.event_name == 'workflow_dispatch' && inputs.ref || github.sha }}
@@ -35,7 +40,7 @@ env:
  CLAWHUB_REGISTRY: "https://clawhub.ai"
  CLAWHUB_REPOSITORY: "openclaw/clawhub"
  # Pinned to a reviewed ClawHub commit so release behavior stays reproducible.
-  CLAWHUB_REF: "facf20ceb6cc459e2872d941e71335a784bbc55c"
+  CLAWHUB_REF: "c9bb13023598dcc547fdf4a93b9d42512b8c8854"

 jobs:
  preview_plugins_clawhub:
@@ -56,12 +61,6 @@ jobs:
          ref: ${{ github.ref }}
          fetch-depth: 0

-      - name: Setup Node environment
-        uses: ./.github/actions/setup-node-env
-        with:
-          node-version: ${{ env.NODE_VERSION }}
-          install-bun: "false"
-
      - name: Resolve checked-out ref
        id: ref
        env:
@@ -107,6 +106,12 @@ jobs:
          echo "Plugin ClawHub publishes must target a commit reachable from main, release/*, or the matching Tideclaw alpha branch." >&2
          exit 1

+      - name: Setup Node environment
+        uses: ./.github/actions/setup-node-env
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          install-bun: "false"
+
      - name: Validate publishable plugin metadata
        env:
          PUBLISH_SCOPE: ${{ github.event_name == 'workflow_dispatch' && inputs.publish_scope || '' }}
@@ -326,15 +331,12 @@ jobs:
          PACKAGE_DIR: ${{ matrix.plugin.packageDir }}
        run: bash scripts/plugin-clawhub-publish.sh --dry-run "${PACKAGE_DIR}"

-  publish_plugins_clawhub:
+  pack_plugins_clawhub_artifacts:
    needs: [preview_plugins_clawhub, preview_plugin_pack, validate_release_publish_approval]
    if: github.event_name == 'workflow_dispatch' && needs.preview_plugins_clawhub.outputs.has_candidates == 'true'
    runs-on: ubuntu-latest
-    environment: clawhub-plugin-release
    permissions:
-      actions: read
      contents: read
-      id-token: write
    strategy:
      fail-fast: false
      max-parallel: 32
@@ -407,73 +409,7 @@ jobs:
          chmod +x "$RUNNER_TEMP/clawhub"
          echo "$RUNNER_TEMP" >> "$GITHUB_PATH"

-      - name: Write ClawHub token config
-        env:
-          CLAWHUB_TOKEN: ${{ secrets.CLAWHUB_TOKEN }}
-          CLAWHUB_REGISTRY: ${{ env.CLAWHUB_REGISTRY }}
-        run: |
-          set -euo pipefail
-          if [[ -z "${CLAWHUB_TOKEN}" ]]; then
-            echo "No CLAWHUB_TOKEN secret configured; publish will rely on GitHub OIDC trusted publishing."
-            exit 0
-          fi
-          node --input-type=module <<'EOF'
-          import { writeFileSync } from "node:fs";
-          import { join } from "node:path";
-
-          const path = join(process.env.RUNNER_TEMP, "clawhub-config.json");
-          writeFileSync(
-            path,
-            `${JSON.stringify(
-              {
-                registry: process.env.CLAWHUB_REGISTRY,
-                token: process.env.CLAWHUB_TOKEN,
-              },
-              null,
-              2,
-            )}\n`,
-          );
-          console.log(path);
-          EOF
-          echo "CLAWHUB_CONFIG_PATH=${RUNNER_TEMP}/clawhub-config.json" >> "$GITHUB_ENV"
-
-      - name: Check ClawHub package version
-        id: clawhub_package_version
-        env:
-          PACKAGE_NAME: ${{ matrix.plugin.packageName }}
-          PACKAGE_VERSION: ${{ matrix.plugin.version }}
-          CLAWHUB_REGISTRY: ${{ env.CLAWHUB_REGISTRY }}
-        run: |
-          set -euo pipefail
-          encoded_name="$(node -e 'console.log(encodeURIComponent(process.env.PACKAGE_NAME ?? ""))')"
-          encoded_version="$(node -e 'console.log(encodeURIComponent(process.env.PACKAGE_VERSION ?? ""))')"
-          url="${CLAWHUB_REGISTRY%/}/api/v1/packages/${encoded_name}/versions/${encoded_version}"
-          status=""
-          for attempt in $(seq 1 8); do
-            status="$(curl --silent --show-error --output /dev/null --write-out '%{http_code}' "${url}")"
-            if [[ "${status}" == "404" || "${status}" =~ ^2 ]]; then
-              break
-            fi
-            if [[ "${status}" == "429" || "${status}" =~ ^5 ]]; then
-              echo "ClawHub availability check returned ${status} for ${PACKAGE_NAME}@${PACKAGE_VERSION}; retrying (${attempt}/8)."
-              sleep 60
-              continue
-            fi
-            break
-          done
-          if [[ "${status}" =~ ^2 ]]; then
-            echo "${PACKAGE_NAME}@${PACKAGE_VERSION} is already published on ClawHub."
-            echo "already_published=true" >> "$GITHUB_OUTPUT"
-            exit 0
-          fi
-          if [[ "${status}" != "404" ]]; then
-            echo "Unexpected ClawHub response (${status}) for ${PACKAGE_NAME}@${PACKAGE_VERSION}."
-            exit 1
-          fi
-          echo "already_published=false" >> "$GITHUB_OUTPUT"
-
-      - name: Publish
-        if: steps.clawhub_package_version.outputs.already_published != 'true'
+      - name: Pack ClawHub package artifact
        env:
          CLAWHUB_REGISTRY: ${{ env.CLAWHUB_REGISTRY }}
          SOURCE_REPO: ${{ github.repository }}
@@ -481,8 +417,65 @@ jobs:
          SOURCE_REF: ${{ github.ref }}
          PACKAGE_TAG: ${{ matrix.plugin.publishTag }}
          PACKAGE_DIR: ${{ matrix.plugin.packageDir }}
-        run: bash scripts/plugin-clawhub-publish.sh --publish "${PACKAGE_DIR}"
+          OPENCLAW_CLAWHUB_PACK_OUTPUT_DIR: ${{ runner.temp }}/clawhub-package-artifact
+        run: bash scripts/plugin-clawhub-publish.sh --pack "${PACKAGE_DIR}"

+      - name: Upload ClawHub package artifact
+        uses: actions/upload-artifact@v7
+        with:
+          name: ${{ matrix.plugin.artifactName }}
+          path: ${{ runner.temp }}/clawhub-package-artifact/*.tgz
+          if-no-files-found: error
+          retention-days: 7
+
+  approve_plugin_clawhub_release:
+    needs: [preview_plugins_clawhub, pack_plugins_clawhub_artifacts]
+    if: github.event_name == 'workflow_dispatch' && inputs.dry_run != true && needs.preview_plugins_clawhub.outputs.has_candidates == 'true'
+    runs-on: ubuntu-latest
+    environment: clawhub-plugin-release
+    permissions: {}
+    steps:
+      - name: Approve ClawHub package publish
+        run: echo "ClawHub package publish approved."
+
+  publish_plugins_clawhub:
+    needs: [preview_plugins_clawhub, pack_plugins_clawhub_artifacts, approve_plugin_clawhub_release]
+    if: always() && github.event_name == 'workflow_dispatch' && needs.preview_plugins_clawhub.outputs.has_candidates == 'true' && needs.pack_plugins_clawhub_artifacts.result == 'success' && (inputs.dry_run == true || needs.approve_plugin_clawhub_release.result == 'success')
+    permissions:
+      actions: read
+      contents: read
+      id-token: write
+    strategy:
+      fail-fast: false
+      max-parallel: 32
+      matrix:
+        plugin: ${{ fromJson(needs.preview_plugins_clawhub.outputs.matrix) }}
+    uses: openclaw/clawhub/.github/workflows/package-publish.yml@c9bb13023598dcc547fdf4a93b9d42512b8c8854
+    with:
+      dry_run: ${{ inputs.dry_run }}
+      json: true
+      package_artifact_name: ${{ matrix.plugin.artifactName }}
+      registry: https://clawhub.ai
+      site: https://clawhub.ai
+      source_repo: ${{ github.repository }}
+      source_commit: ${{ needs.preview_plugins_clawhub.outputs.ref_revision }}
+      source_ref: ${{ github.ref }}
+      tags: ${{ matrix.plugin.publishTag }}
+    secrets:
+      clawhub_token: ${{ secrets.CLAWHUB_TOKEN }}
+
+  verify_published_clawhub_package:
+    needs: [preview_plugins_clawhub, publish_plugins_clawhub]
+    if: github.event_name == 'workflow_dispatch' && inputs.dry_run != true && needs.preview_plugins_clawhub.outputs.has_candidates == 'true'
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    strategy:
+      fail-fast: false
+      max-parallel: 32
+      matrix:
+        plugin: ${{ fromJson(needs.preview_plugins_clawhub.outputs.matrix) }}
+    steps:
      - name: Verify published ClawHub package
        env:
          CLAWHUB_REGISTRY: ${{ env.CLAWHUB_REGISTRY }}
--- a/.github/workflows/qa-live-transports-convex.yml
+++ b/.github/workflows/qa-live-transports-convex.yml
@@ -65,7 +65,7 @@ jobs:
    steps:
      - name: Require maintainer-level repository access
        id: permission
-        uses: actions/github-script@v8
+        uses: actions/github-script@v9
        with:
          script: |
            if (context.eventName === "schedule") {
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -39,6 +39,7 @@ Docs: https://docs.openclaw.ai
 - Cron/update/service env: doctor config preflight now migrates legacy cron JSON stores into SQLite before runtime reads, service env planning skips unresolved placeholders that would mask state-dir `.env` values, and session transcript rewrites keep registry markers/discriminants consistent. (#90072, #90208, #90277, #90488) Thanks @MonkeyLeeT and @sallyom.
 - Security/config/tooling: guard MCP HTTP redirects, protect global agent config defaults, and keep release/test/tooling proof failures bounded and explicit. (#89732, #90145)
 - Channels: WhatsApp restarts when per-account config changes, bounds background startup waits, closes failed sockets, and preserves reconnect behavior; Mattermost slash commands keep their state on `globalThis`; Feishu streaming cards preserve full merged content; voice-call tracks Twilio streams after connect; ClickClack reply tools respect `toolsAllow`. (#87951, #87965, #90486, #68113, #90534, #90181, #90607, #89500) Thanks @MukundaKatta, @mcaxtr, @infoanton, @mushuiyu886, and @sahibzada-allahyar.
+- Feishu: retry transient send rate-limit errors (HTTP 429, per-chat code 230020, tenant-level code 11232) with linear backoff, including SDK responses that fulfill with rate-limit bodies instead of throwing, and route streaming-card sends through the retry wrapper. (#89659) Thanks @ladygege.
 - Release/CI/E2E: main CI guard drift, PR merge diff scoping, live Docker credential staging, base-image qualification, installer Docker classification, Playwright dependency install recovery, API-key auth for Codex live Docker lanes, Parallels option terminators, and JSON-mode progress handling are tighter so release proof fails cleaner. (#90532, #90287, #90058) Thanks @RomneyDa, @hxy91819, and @mrunalp.
 - Release/CI/E2E: Docker E2E and live Docker harness runs now apply default memory, CPU, and process ceilings while preserving explicit per-lane overrides.
 - Release/CI/E2E: plugin lifecycle matrix resource sampling now fails phases that exceed RSS, wall-clock, or CPU ceilings instead of only logging the measurements.
--- a/apps/android/app/build.gradle.kts
+++ b/apps/android/app/build.gradle.kts
@@ -41,7 +41,7 @@ plugins {

 android {
  namespace = "ai.openclaw.app"
-  compileSdk = 36
+  compileSdk = 37

  // Release signing is local-only; keep the keystore path and passwords out of the repo.
  signingConfigs {
--- a/apps/android/app/src/main/java/ai/openclaw/app/gateway/GatewayDiscovery.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/gateway/GatewayDiscovery.kt
@@ -49,6 +49,19 @@ import java.util.concurrent.Executors
 import kotlin.coroutines.resume
 import kotlin.coroutines.resumeWithException

+private fun createDnsResolver(context: Context): DnsResolver =
+  if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.CINNAMON_BUN) {
+    createContextDnsResolver(context)
+  } else {
+    createLegacyDnsResolver()
+  }
+
+@TargetApi(Build.VERSION_CODES.CINNAMON_BUN)
+private fun createContextDnsResolver(context: Context): DnsResolver = DnsResolver(context, null)
+
+@Suppress("DEPRECATION")
+private fun createLegacyDnsResolver(): DnsResolver = DnsResolver.getInstance()
+
 /**
 * Watches local DNS-SD and optional wide-area DNS-SD for reachable OpenClaw gateways.
 */
@@ -58,7 +71,7 @@ class GatewayDiscovery(
 ) {
  private val nsd = context.getSystemService(NsdManager::class.java)
  private val connectivity = context.getSystemService(ConnectivityManager::class.java)
-  private val dns = DnsResolver.getInstance()
+  private val dns = createDnsResolver(context)
  private val serviceType = "_openclaw-gw._tcp."
  private val wideAreaDomain = System.getenv("OPENCLAW_WIDE_AREA_DOMAIN")
  private val logTag = "OpenClaw/GatewayDiscovery"
--- a/apps/android/benchmark/build.gradle.kts
+++ b/apps/android/benchmark/build.gradle.kts
@@ -5,7 +5,7 @@ plugins {

 android {
  namespace = "ai.openclaw.app.benchmark"
-  compileSdk = 36
+  compileSdk = 37

  defaultConfig {
    minSdk = 31
--- a/apps/android/gradle/libs.versions.toml
+++ b/apps/android/gradle/libs.versions.toml
@@ -4,7 +4,7 @@ androidx-activity = "1.13.0"
 androidx-benchmark = "1.4.1"
 androidx-camera = "1.6.0"
 androidx-compose-bom = "2026.05.01"
-androidx-core = "1.18.0"
+androidx-core = "1.19.0"
 androidx-exifinterface = "1.4.2"
 androidx-lifecycle = "2.10.0"
 androidx-security = "1.1.0"
@@ -19,7 +19,7 @@ junit = "4.13.2"
 junit-vintage = "6.1.0"
 kotest = "6.1.11"
 ktlint-gradle = "14.2.0"
-kotlin = "2.3.21"
+kotlin = "2.4.0"
 material = "1.14.0"
 okhttp = "5.3.2"
 play-services-code-scanner = "16.1.0"
--- a/apps/ios/Sources/Design/AgentProModels.swift
+++ b/apps/ios/Sources/Design/AgentProModels.swift
@@ -329,6 +329,13 @@ struct AgentConfigLite: Decodable {
 struct ConfigPatchParams: Encodable {
    let raw: String
    let baseHash: String
+    let replacePaths: [String]?
+
+    init(raw: String, baseHash: String, replacePaths: [String]? = nil) {
+        self.raw = raw
+        self.baseHash = baseHash
+        self.replacePaths = replacePaths
+    }
 }

 enum SkillMutationError: LocalizedError {
--- a/apps/ios/Sources/Design/AgentProTab+Skills.swift
+++ b/apps/ios/Sources/Design/AgentProTab+Skills.swift
@@ -621,7 +621,10 @@ extension AgentProTab {
            }

            let raw = try Self.agentSkillsPatchRaw(agentId: self.activeAgentID, skills: skills)
-            let params = ConfigPatchParams(raw: raw, baseHash: baseHash)
+            let params = ConfigPatchParams(
+                raw: raw,
+                baseHash: baseHash,
+                replacePaths: ["agents.list[].skills"])
            let data = try JSONEncoder().encode(params)
            guard let json = String(data: data, encoding: .utf8) else {
                throw SkillMutationError.invalidPatchPayload
--- a/apps/macos/Package.resolved
+++ b/apps/macos/Package.resolved
@@ -1,5 +1,5 @@
 {
-  "originHash" : "a88730a64ccb5fd092108256c37d6c80bc7b92a5b6b563d83a9a26988550234d",
+  "originHash" : "035a4fe955164c62c1628de75f6437a14443a947eea2a1b0176ba484d6fde6f8",
  "pins" : [
    {
      "identity" : "axorcist",
@@ -42,8 +42,8 @@
      "kind" : "remoteSourceControl",
      "location" : "https://github.com/steipete/Peekaboo.git",
      "state" : {
-        "revision" : "faf843032772c2074d834b931911bf0002704136",
-        "version" : "3.3.0"
+        "revision" : "3a56ed2aa769bfefb5a78722dfce3c34088cfba1",
+        "version" : "3.4.0"
      }
    },
    {
--- a/apps/macos/Package.swift
+++ b/apps/macos/Package.swift
@@ -19,7 +19,7 @@ let package = Package(
        .package(url: "https://github.com/swiftlang/swift-subprocess.git", from: "0.4.0"),
        .package(url: "https://github.com/apple/swift-log.git", from: "1.10.1"),
        .package(url: "https://github.com/sparkle-project/Sparkle", from: "2.9.0"),
-        .package(url: "https://github.com/steipete/Peekaboo.git", exact: "3.3.0"),
+        .package(url: "https://github.com/steipete/Peekaboo.git", exact: "3.4.0"),
        .package(path: "../shared/OpenClawKit"),
        .package(path: "../swabble"),
    ],
--- a/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift
+++ b/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift
@@ -2773,6 +2773,7 @@ public struct ConfigPatchParams: Codable, Sendable {
    public let deliverycontext: [String: AnyCodable]?
    public let note: String?
    public let restartdelayms: Int?
+    public let replacepaths: [String]?

    public init(
        raw: String,
@@ -2780,7 +2781,8 @@ public struct ConfigPatchParams: Codable, Sendable {
        sessionkey: String?,
        deliverycontext: [String: AnyCodable]?,
        note: String?,
-        restartdelayms: Int?)
+        restartdelayms: Int?,
+        replacepaths: [String]?)
    {
        self.raw = raw
        self.basehash = basehash
@@ -2788,6 +2790,7 @@ public struct ConfigPatchParams: Codable, Sendable {
        self.deliverycontext = deliverycontext
        self.note = note
        self.restartdelayms = restartdelayms
+        self.replacepaths = replacepaths
    }

    private enum CodingKeys: String, CodingKey {
@@ -2797,6 +2800,7 @@ public struct ConfigPatchParams: Codable, Sendable {
        case deliverycontext = "deliveryContext"
        case note
        case restartdelayms = "restartDelayMs"
+        case replacepaths = "replacePaths"
    }
 }

--- a/config/knip.config.ts
+++ b/config/knip.config.ts
@@ -217,6 +217,16 @@ const config = {
      entry: ["index.js!", "scripts/postinstall.js!"],
      project: ["index.js!", "scripts/**/*.js!"],
    },
+    [`${BUNDLED_PLUGIN_ROOT_DIR}/llama-cpp`]: {
+      entry: bundledPluginEntries,
+      project: ["index.ts!", "src/**/*.{js,mjs,ts}!"],
+      ignoreDependencies: [
+        // The provider resolves node-llama-cpp from its own package at runtime
+        // so local embeddings use the plugin-owned native dependency.
+        "node-llama-cpp",
+        ...bundledPluginIgnoredRuntimeDependencies,
+      ],
+    },
    [`${BUNDLED_PLUGIN_ROOT_DIR}/*`]: {
      // Bundled plugins often load their public surface via string specifiers in
      // `index.ts` contracts, so Knip needs these convention-based entry files.
--- a/docs/.i18n/glossary.zh-CN.json
+++ b/docs/.i18n/glossary.zh-CN.json
--- a/docs/cli/browser.md
+++ b/docs/cli/browser.md
@@ -194,11 +194,14 @@ openclaw browser select <ref> OptionA OptionB
 openclaw browser fill --fields '[{"ref":"1","value":"Ada"}]'
 openclaw browser wait --text "Done"
 openclaw browser evaluate --fn '(el) => el.textContent' --ref <ref>
+openclaw browser evaluate --fn 'const title = document.title; return title;'
 openclaw browser evaluate --timeout-ms 30000 --fn 'async () => { await window.ready; return true; }'
 ```

-Use `evaluate --timeout-ms <ms>` when the page-side function may need longer
-than the default evaluate timeout.
+`evaluate --fn` accepts a function source, an expression, or a statement body.
+Statement bodies are wrapped as async functions, so use `return` for the value
+you want back. Use `evaluate --timeout-ms <ms>` when the page-side function may
+need longer than the default evaluate timeout.

 Action responses return the current raw `targetId` after action-triggered page
 replacement when OpenClaw can prove the replacement tab. Scripts should still
--- a/docs/concepts/context.md
+++ b/docs/concepts/context.md
@@ -21,7 +21,7 @@ Context is _not the same thing_ as "memory": memory can be stored on disk and re

 - `/status` → quick "how full is my window?" view + session settings.
 - `/context list` → what's injected + rough sizes (per file + totals).
- `/context detail` → deeper breakdown: per-file, per-tool schema sizes, per-skill entry sizes, and system prompt size.
+- `/context detail` → deeper breakdown: per-file, per-tool schema sizes, per-skill entry sizes, system prompt size, and compactable transcript message counts.
 - `/context map` → WinDirStat-style treemap image of the current session's tracked context contributors.
 - `/usage tokens` → append per-reply usage footer to normal replies.
 - `/compact` → summarize older history into a compact entry to free window space.
@@ -179,7 +179,7 @@ pluggable interface, lifecycle hooks, and configuration.
 - `System prompt (run)` = captured from the last embedded (tool-capable) run and persisted in the session store.
 - `System prompt (estimate)` = computed on the fly when no run report exists (or when running via a CLI backend that doesn't generate the report).

-Either way, it reports sizes and top contributors; it does **not** dump the full system prompt or tool schemas.
+Either way, it reports sizes and top contributors; it does **not** dump the full system prompt or tool schemas. In detailed mode, it also compares the session transcript with the same real-conversation message predicate used by compaction, so high prompt/cache usage is easier to distinguish from compactable conversation history.

 ## Related

--- a/docs/concepts/memory-builtin.md
+++ b/docs/concepts/memory-builtin.md
@@ -39,9 +39,12 @@ To set a provider explicitly:

 Without an embedding provider, only keyword search is available.

-To force the built-in local embedding provider, install the optional
-`node-llama-cpp` runtime package next to OpenClaw, then point `local.modelPath`
-at a GGUF file:
+To force local GGUF embeddings, install the official llama.cpp provider plugin,
+then point `local.modelPath` at a GGUF file:
+
+```bash
+openclaw plugins install @openclaw/llama-cpp-provider
+```

 ```json5
 {
@@ -67,7 +70,7 @@ at a GGUF file:
 | DeepInfra         | `deepinfra`         | Default: `BAAI/bge-m3`              |
 | Gemini            | `gemini`            | Supports multimodal (image + audio) |
 | GitHub Copilot    | `github-copilot`    | Uses Copilot subscription           |
-| Local             | `local`             | Optional `node-llama-cpp` runtime   |
+| Local             | `local`             | `@openclaw/llama-cpp-provider`      |
 | Mistral           | `mistral`           |                                     |
 | Ollama            | `ollama`            | Local/self-hosted                   |
 | OpenAI            | `openai`            | Default: `text-embedding-3-small`   |
--- a/docs/concepts/memory-qmd.md
+++ b/docs/concepts/memory-qmd.md
@@ -15,7 +15,7 @@ binary, and can index content beyond your workspace memory files.
 - **Reranking and query expansion** for better recall.
 - **Index extra directories** -- project docs, team notes, anything on disk.
 - **Index session transcripts** -- recall earlier conversations.
- **Fully local** -- runs with the optional node-llama-cpp runtime package and
+- **Fully local** -- runs with the official llama.cpp provider plugin and
  auto-downloads GGUF models.
 - **Automatic fallback** -- if QMD is unavailable, OpenClaw falls back to the
  builtin engine seamlessly.
--- a/docs/concepts/memory-search.md
+++ b/docs/concepts/memory-search.md
@@ -32,7 +32,8 @@ For multi-endpoint setups with memory-specific providers, `provider` can also
 be a custom `models.providers.<id>` entry, such as `ollama-5080`, when that
 provider sets `api: "ollama"` or another memory embedding adapter owner.

-For local embeddings with no API key, set `provider: "local"`. Source checkouts
+For local embeddings with no API key, install
+`@openclaw/llama-cpp-provider` and set `provider: "local"`. Source checkouts
 may still require native build approval: `pnpm approve-builds` then
 `pnpm rebuild node-llama-cpp`.

--- a/docs/concepts/openclaw-sdk.md
+++ b/docs/concepts/openclaw-sdk.md
@@ -1,323 +0,0 @@
---
-summary: "Public OpenClaw App SDK for external apps, scripts, dashboards, CI jobs, and IDE extensions"
-title: "OpenClaw App SDK"
-sidebarTitle: "App SDK"
-read_when:
-  - You are building an external app, script, dashboard, CI job, or IDE extension that talks to OpenClaw
-  - You are choosing between the App SDK and the Plugin SDK
-  - You are integrating with Gateway agent runs, sessions, events, approvals, models, or tools
---
-
-The **OpenClaw App SDK** is the public client API for apps outside the
-OpenClaw process. Use `@openclaw/sdk` when a script, dashboard, CI job, IDE
-extension, or other external app wants to connect to the Gateway, start agent
-runs, stream events, wait for results, cancel work, or inspect Gateway
-resources.
-
-<Note>
-  The App SDK is different from the [Plugin SDK](/plugins/sdk-overview).
-  `@openclaw/sdk` talks to the Gateway from outside OpenClaw.
-  `openclaw/plugin-sdk/*` is only for plugins that run inside OpenClaw and
-  register providers, channels, tools, hooks, or trusted runtimes.
-</Note>
-
-## What ships today
-
-`@openclaw/sdk` ships with:
-
-| Surface                   | Status  | What it does                                                                      |
-| ------------------------- | ------- | --------------------------------------------------------------------------------- |
-| `OpenClaw`                | Ready   | Main client entry point. Owns transport, connection, requests, and events.        |
-| `GatewayClientTransport`  | Ready   | WebSocket transport backed by the Gateway client.                                 |
-| `oc.agents`               | Ready   | Lists, creates, updates, deletes, and gets agent handles.                         |
-| `Agent.run()`             | Ready   | Starts a Gateway `agent` run and returns a `Run`.                                 |
-| `oc.runs`                 | Ready   | Creates, gets, waits for, cancels, and streams runs.                              |
-| `Run.events()`            | Ready   | Streams normalized per-run events with replay for fast runs.                      |
-| `Run.wait()`              | Ready   | Calls `agent.wait` and returns a stable `RunResult`.                              |
-| `Run.cancel()`            | Ready   | Calls `sessions.abort` by run id, with session key when available.                |
-| `oc.sessions`             | Ready   | Creates, resolves, sends to, patches, compacts, and gets session handles.         |
-| `Session.send()`          | Ready   | Calls `sessions.send` and returns a `Run`.                                        |
-| `oc.tasks`                | Ready   | Lists, reads, and cancels Gateway task ledger entries.                            |
-| `oc.models`               | Ready   | Calls `models.list` and the current `models.authStatus` status RPC.               |
-| `oc.tools`                | Ready   | Lists, scopes, and invokes Gateway tools through the policy pipeline.             |
-| `oc.artifacts`            | Ready   | Lists, gets, and downloads Gateway transcript artifacts.                          |
-| `oc.approvals`            | Ready   | Lists and resolves exec approvals through Gateway approval RPCs.                  |
-| `oc.environments`         | Partial | Lists Gateway-local and node environment candidates; create/delete are not wired. |
-| `oc.rawEvents()`          | Ready   | Exposes raw Gateway events for advanced consumers.                                |
-| `normalizeGatewayEvent()` | Ready   | Converts raw Gateway events into the stable SDK event shape.                      |
-
-The SDK also exports the core types used by those surfaces:
-`AgentRunParams`, `RunResult`, `RunStatus`, `OpenClawEvent`,
-`OpenClawEventType`, `GatewayEvent`, `OpenClawTransport`,
-`GatewayRequestOptions`, `SessionCreateParams`, `SessionSendParams`,
-`ArtifactSummary`, `ArtifactQuery`, `ArtifactsListResult`,
-`ArtifactsGetResult`, `ArtifactsDownloadResult`,
-`TaskSummary`, `TaskStatus`, `TasksListParams`, `TasksListResult`,
-`TasksGetResult`, `TasksCancelResult`, `RuntimeSelection`,
-`EnvironmentSelection`, `WorkspaceSelection`, `ApprovalMode`, and related
-result types.
-
-## Connect to a Gateway
-
-Create a client with an explicit Gateway URL, or inject a custom transport for
-tests and embedded app runtimes.
-
-```typescript
-import { OpenClaw } from "@openclaw/sdk";
-
-const oc = new OpenClaw({
-  url: "ws://127.0.0.1:18789",
-  token: process.env.OPENCLAW_GATEWAY_TOKEN,
-  requestTimeoutMs: 30_000,
-});
-
-await oc.connect();
-```
-
-`new OpenClaw({ gateway: "ws://..." })` is equivalent to `url`. The
-`gateway: "auto"` option is accepted by the constructor, but automatic Gateway
-discovery is not a separate SDK feature yet; pass `url` when the app does not
-already know how to discover the Gateway.
-
-For tests, pass an object that implements `OpenClawTransport`:
-
-```typescript
-const oc = new OpenClaw({
-  transport: {
-    async request(method, params) {
-      return { method, params };
-    },
-    async *events() {},
-  },
-});
-```
-
-## Run an agent
-
-Use `oc.agents.get(id)` when the app wants an agent handle, then call
-`agent.run()`.
-
-```typescript
-const agent = await oc.agents.get("main");
-
-const run = await agent.run({
-  input: "Review this pull request and suggest the smallest safe fix.",
-  model: "openai/gpt-5.5",
-  sessionKey: "main",
-  timeoutMs: 30_000,
-});
-
-for await (const event of run.events()) {
-  const data = event.data as { delta?: unknown };
-  if (event.type === "assistant.delta" && typeof data.delta === "string") {
-    process.stdout.write(data.delta);
-  }
-}
-
-const result = await run.wait({ timeoutMs: 120_000 });
-console.log(result.status);
-```
-
-Provider-qualified model refs such as `openai/gpt-5.5` are split into Gateway
-`provider` and `model` overrides. `timeoutMs` stays milliseconds in the SDK and
-is converted to Gateway timeout seconds for the `agent` RPC.
-
-`run.wait()` uses the Gateway `agent.wait` RPC. A wait deadline that expires
-while the run is still active returns `status: "accepted"` instead of pretending
-the run itself timed out. Runtime timeouts, aborted runs, and cancelled runs are
-normalized into `timed_out` or `cancelled`.
-
-## Create and reuse sessions
-
-Use sessions when the app wants durable transcript state.
-
-```typescript
-const session = await oc.sessions.create({
-  agentId: "main",
-  label: "release-review",
-});
-
-const run = await session.send("Prepare release notes from the current diff.");
-await run.wait();
-```
-
-`Session.send()` calls `sessions.send` and returns a `Run`. Session handles also
-support:
-
-```typescript
-await session.abort(run.id);
-await session.patch({ label: "renamed-session" });
-await session.compact({ maxLines: 200 });
-```
-
-## Stream events
-
-The SDK normalizes raw Gateway events into a stable `OpenClawEvent` envelope:
-
-```typescript
-type OpenClawEvent = {
-  version: 1;
-  id: string;
-  ts: number;
-  type: OpenClawEventType;
-  runId?: string;
-  sessionId?: string;
-  sessionKey?: string;
-  taskId?: string;
-  agentId?: string;
-  data: unknown;
-  raw?: GatewayEvent;
-};
-```
-
-Common event types include:
-
-| Event type            | Source Gateway event                        |
-| --------------------- | ------------------------------------------- |
-| `run.started`         | `agent` lifecycle start                     |
-| `run.completed`       | `agent` lifecycle end                       |
-| `run.failed`          | `agent` lifecycle error                     |
-| `run.cancelled`       | Aborted/cancelled lifecycle end             |
-| `run.timed_out`       | Timeout lifecycle end                       |
-| `assistant.delta`     | Assistant streaming delta                   |
-| `assistant.message`   | Assistant message                           |
-| `thinking.delta`      | Thinking or plan stream                     |
-| `tool.call.started`   | Tool/item/command start                     |
-| `tool.call.delta`     | Tool/item/command update                    |
-| `tool.call.completed` | Tool/item/command completion                |
-| `tool.call.failed`    | Tool/item/command failure or blocked status |
-| `approval.requested`  | Exec or plugin approval request             |
-| `approval.resolved`   | Exec or plugin approval resolution          |
-| `session.created`     | `sessions.changed` create                   |
-| `session.updated`     | `sessions.changed` update                   |
-| `session.compacted`   | `sessions.changed` compaction               |
-| `task.updated`        | Task update events                          |
-| `artifact.updated`    | Patch stream events                         |
-| `raw`                 | Any event without a stable SDK mapping yet  |
-
-`Run.events()` filters events to one run id and replays already-seen events for
-fast runs. That means the documented flow is safe:
-
-```typescript
-const run = await agent.run("Summarize the latest session.");
-
-for await (const event of run.events()) {
-  if (event.type === "run.completed") {
-    break;
-  }
-}
-```
-
-For app-wide streams, use `oc.events()`. For raw Gateway frames, use
-`oc.rawEvents()`.
-
-## Models, tools, artifacts, and approvals
-
-Model helpers map to current Gateway methods:
-
-```typescript
-await oc.models.list();
-await oc.models.status({ probe: false }); // calls models.authStatus
-```
-
-Tool helpers expose the Gateway catalog, effective tool view, and direct
-Gateway tool invocation. `oc.tools.invoke()` returns a typed envelope instead
-of throwing for policy or approval refusals.
-
-```typescript
-await oc.tools.list();
-await oc.tools.effective({ sessionKey: "main" });
-await oc.tools.invoke("tool-name", {
-  args: { input: "value" },
-  sessionKey: "main",
-  confirm: false,
-  idempotencyKey: "tool-call-1",
-});
-```
-
-Artifact helpers expose the Gateway artifact projection for session, run, or
-task context. Each call requires one explicit `sessionKey`, `runId`, or
-`taskId` scope:
-
-```typescript
-const { artifacts } = await oc.artifacts.list({ sessionKey: "main" });
-const first = artifacts[0];
-
-if (first) {
-  const { artifact } = await oc.artifacts.get(first.id, { sessionKey: "main" });
-  const download = await oc.artifacts.download(artifact.id, { sessionKey: "main" });
-  console.log(download.encoding, download.url);
-}
-```
-
-Approval helpers use the exec approval RPCs:
-
-```typescript
-const approvals = await oc.approvals.list();
-await oc.approvals.respond("approval-id", { decision: "approve" });
-```
-
-Task helpers use the durable task ledger that also backs `openclaw tasks`:
-
-```typescript
-const tasks = await oc.tasks.list({ status: "running", sessionKey: "agent:main:main" });
-const task = await oc.tasks.get(tasks.tasks[0].id);
-await oc.tasks.cancel(task.task.id, { reason: "user stopped task" });
-```
-
-Environment helpers expose read-only Gateway-local and node discovery:
-
-```typescript
-const { environments } = await oc.environments.list();
-await oc.environments.status(environments[0].id);
-```
-
-## Explicitly unsupported today
-
-The SDK includes names for the product model we want, but it does not silently
-pretend Gateway RPCs exist. These calls currently throw explicit unsupported
-errors:
-
-```typescript
-await oc.environments.create({});
-await oc.environments.delete("environment-id");
-```
-
-Per-run `workspace`, `runtime`, `environment`, and `approvals` fields are typed
-as future shape, but the current Gateway does not support those overrides on
-the `agent` RPC. If callers pass them, the SDK throws before submitting the run
-so work does not accidentally execute with default workspace, runtime,
-environment, or approval behavior.
-
-## App SDK vs Plugin SDK
-
-Use the App SDK when code lives outside OpenClaw:
-
- Node scripts that start or observe agent runs
- CI jobs that call a Gateway
- dashboards and admin panels
- IDE extensions
- external bridges that do not need to become channel plugins
- integration tests with fake or real Gateway transports
-
-Use the Plugin SDK when code runs inside OpenClaw:
-
- provider plugins
- channel plugins
- tool or lifecycle hooks
- agent harness plugins
- trusted runtime helpers
-
-App SDK code should import from `@openclaw/sdk`. Plugin code should import from
-documented `openclaw/plugin-sdk/*` subpaths. Do not mix the two contracts.
-
-## Related
-
- [OpenClaw App SDK API design](/reference/openclaw-sdk-api-design)
- [Gateway RPC reference](/reference/rpc)
- [Agent loop](/concepts/agent-loop)
- [Agent runtimes](/concepts/agent-runtimes)
- [Sessions](/concepts/session)
- [Background tasks](/automation/tasks)
- [ACP agents](/tools/acp-agents)
- [Plugin SDK overview](/plugins/sdk-overview)
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -60,6 +60,14 @@
      "source": "/install/migrating-matrix",
      "destination": "/channels/matrix-migration"
    },
+    {
+      "source": "/concepts/openclaw-sdk",
+      "destination": "/gateway/external-apps"
+    },
+    {
+      "source": "/reference/openclaw-sdk-api-design",
+      "destination": "/gateway/external-apps"
+    },
    {
      "source": "/mcp",
      "destination": "/cli/mcp"
@@ -1241,6 +1249,7 @@
                  "plugins/admin-http-rpc",
                  "plugins/voice-call",
                  "plugins/memory-wiki",
+                  "plugins/llama-cpp",
                  "plugins/memory-lancedb",
                  "plugins/oc-path",
                  "plugins/zalouser"
@@ -1741,8 +1750,7 @@
                "group": "RPC and API",
                "pages": [
                  "reference/rpc",
-                  "concepts/openclaw-sdk",
-                  "reference/openclaw-sdk-api-design",
+                  "gateway/external-apps",
                  "reference/code-mode",
                  "reference/device-models"
                ]
--- a/docs/gateway/config-tools.md
+++ b/docs/gateway/config-tools.md
@@ -20,12 +20,12 @@ sidebarTitle: "Tools and custom providers"
 Local onboarding defaults new local configs to `tools.profile: "coding"` when unset (existing explicit profiles are preserved).
 </Note>

-| Profile     | Includes                                                                                                                        |
-| ----------- | ------------------------------------------------------------------------------------------------------------------------------- |
-| `minimal`   | `session_status` only                                                                                                           |
-| `coding`    | `group:fs`, `group:runtime`, `group:web`, `group:sessions`, `group:memory`, `cron`, `image`, `image_generate`, `video_generate` |
-| `messaging` | `group:messaging`, `sessions_list`, `sessions_history`, `sessions_send`, `session_status`                                       |
-| `full`      | No restriction (same as unset)                                                                                                  |
+| Profile     | Includes                                                                                                                                          |
+| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `minimal`   | `session_status` only                                                                                                                             |
+| `coding`    | `group:fs`, `group:runtime`, `group:web`, `group:sessions`, `group:memory`, `cron`, `image`, `image_generate`, `skill_workshop`, `video_generate` |
+| `messaging` | `group:messaging`, `sessions_list`, `sessions_history`, `sessions_send`, `session_status`                                                         |
+| `full`      | No restriction (same as unset)                                                                                                                    |

 ### Tool groups

--- a/docs/gateway/configuration.md
+++ b/docs/gateway/configuration.md
@@ -601,7 +601,8 @@ For tooling that writes config over the gateway API, prefer this flow:
  summaries)
 - `config.get` to fetch the current snapshot plus `hash`
 - `config.patch` for partial updates (JSON merge patch: objects merge, `null`
-  deletes, arrays replace)
+  deletes, arrays replace when explicitly confirmed with `replacePaths` if
+  entries would be removed)
 - `config.apply` only when you intend to replace the entire config
 - `update.run` for explicit self-update plus restart; include `continuationMessage` when the post-restart session should run one follow-up turn
 - `update.status` to inspect the latest update restart sentinel and verify the running version after a restart
@@ -633,6 +634,14 @@ Both `config.apply` and `config.patch` accept `raw`, `baseHash`, `sessionKey`,
 `note`, and `restartDelayMs`. `baseHash` is required for both methods when a
 config already exists.

+`config.patch` also accepts `replacePaths`, an array of config paths whose array
+replacement is intentional. If a patch would replace or delete an existing array
+with fewer entries, the Gateway rejects the write unless that exact path appears
+in `replacePaths`; nested arrays under array entries use `[]`, such as
+`agents.list[].skills`. This prevents truncated `config.get` snapshots from
+silently clobbering routing or allowlist arrays. Use `config.apply` when you
+intend to replace the full config.
+
 ## Environment variables

 OpenClaw reads env vars from the parent process plus:
--- a/docs/gateway/external-apps.md
+++ b/docs/gateway/external-apps.md
@@ -0,0 +1,86 @@
+---
+summary: "Current integration path for external apps, scripts, dashboards, CI jobs, and IDE extensions"
+title: "Gateway integrations for external apps"
+sidebarTitle: "External apps"
+read_when:
+  - You are building an external app, script, dashboard, CI job, or IDE extension that talks to OpenClaw
+  - You are choosing between Gateway RPC and the Plugin SDK
+  - You are integrating with Gateway agent runs, sessions, events, approvals, models, or tools
+---
+
+External apps should talk to OpenClaw through the Gateway protocol today. Use
+Gateway WebSocket and RPC methods when a script, dashboard, CI job, IDE
+extension, or another process wants to start agent runs, stream events, wait for
+results, cancel work, or inspect Gateway resources.
+
+<Warning>
+  There is no public npm client package yet. Do not add OpenClaw client package
+  names as application dependencies until release notes announce a published
+  package and this page includes install instructions.
+</Warning>
+
+<Note>
+  This page is for code outside the OpenClaw process. Plugin code that runs
+  inside OpenClaw should use documented `openclaw/plugin-sdk/*` subpaths instead.
+</Note>
+
+## What is available today
+
+| Surface                                 | Status | Use it for                                                                                    |
+| --------------------------------------- | ------ | --------------------------------------------------------------------------------------------- |
+| [Gateway protocol](/gateway/protocol)   | Ready  | WebSocket transport, connect handshake, auth scopes, protocol versioning, and events.         |
+| [Gateway RPC reference](/reference/rpc) | Ready  | Current Gateway methods for agents, sessions, tasks, models, tools, artifacts, and approvals. |
+| [`openclaw agent`](/cli/agent)          | Ready  | One-shot script integration when shelling out to the CLI is enough.                           |
+| [`openclaw message`](/cli/message)      | Ready  | Sending messages or channel actions from scripts.                                             |
+
+The source tree contains internal package work for a future client library, but
+that is not a public install surface. Treat it as preview implementation detail
+until the packages are published and versioned.
+
+## Recommended path
+
+1. Run or discover a Gateway.
+2. Connect over the [Gateway protocol](/gateway/protocol).
+3. Call documented RPC methods from [Gateway RPC reference](/reference/rpc).
+4. Pin the OpenClaw version you test against.
+5. Recheck the RPC reference when upgrading OpenClaw.
+
+For agent runs, start with the `agent` RPC and pair it with `agent.wait` when
+you need a terminal result. For durable conversation state, use the `sessions.*`
+methods. For UI integrations, subscribe to Gateway events and render only the
+event families your app understands.
+
+## App code vs plugin code
+
+Use Gateway RPC when code lives outside OpenClaw:
+
+- Node scripts that start or observe agent runs
+- CI jobs that call a Gateway
+- dashboards and admin panels
+- IDE extensions
+- external bridges that do not need to become channel plugins
+- integration tests with fake or real Gateway transports
+
+Use the Plugin SDK when code runs inside OpenClaw:
+
+- provider plugins
+- channel plugins
+- tool or lifecycle hooks
+- agent harness plugins
+- trusted runtime helpers
+
+External apps should not import `openclaw/plugin-sdk/*`; those subpaths are for
+plugins loaded by OpenClaw.
+
+## Related
+
+- [Gateway protocol](/gateway/protocol)
+- [Gateway RPC reference](/reference/rpc)
+- [CLI agent command](/cli/agent)
+- [CLI message command](/cli/message)
+- [Agent loop](/concepts/agent-loop)
+- [Agent runtimes](/concepts/agent-runtimes)
+- [Sessions](/concepts/session)
+- [Background tasks](/automation/tasks)
+- [ACP agents](/tools/acp-agents)
+- [Plugin SDK overview](/plugins/sdk-overview)
--- a/docs/gateway/protocol.md
+++ b/docs/gateway/protocol.md
@@ -405,7 +405,9 @@ enumeration of `src/gateway/server-methods/*.ts`.
    - `secrets.resolve` resolves command-target secret assignments for a specific command/target set.
    - `config.get` returns the current config snapshot and hash.
    - `config.set` writes a validated config payload.
-    - `config.patch` merges a partial config update.
+    - `config.patch` merges a partial config update. Destructive array
+      replacement requires the affected path in `replacePaths`; nested arrays
+      under array entries use `[]` paths such as `agents.list[].skills`.
    - `config.apply` validates + replaces the full config payload.
    - `config.schema` returns the live config schema payload used by Control UI and CLI tooling: schema, `uiHints`, version, and generation metadata, including plugin + channel schema metadata when the runtime can load it. The schema includes field `title` / `description` metadata derived from the same labels and help text used by the UI, including nested object, wildcard, array-item, and `anyOf` / `oneOf` / `allOf` composition branches when matching field documentation exists.
    - `config.schema.lookup` returns a path-scoped lookup payload for one config path: normalized path, a shallow schema node, matched hint + `hintPath`, optional `reloadKind`, and immediate child summaries for UI/CLI drill-down. `reloadKind` is one of `restart`, `hot`, or `none` and mirrors the Gateway config reload planner for the requested path. Lookup schema nodes keep the user-facing docs and common validation fields (`title`, `description`, `type`, `enum`, `const`, `format`, `pattern`, numeric/string/array/object bounds, and flags like `additionalProperties`, `deprecated`, `readOnly`, `writeOnly`). Child summaries expose `key`, normalized `path`, `type`, `required`, `hasChildren`, optional `reloadKind`, plus the matched `hint` / `hintPath`.
--- a/docs/maturity-scorecard/LTS.md
+++ b/docs/maturity-scorecard/LTS.md
@@ -1,376 +0,0 @@
---
-title: LTS category proposal
-version: 1
---
-
-# LTS category proposal
-
-This proposal identifies a minimal set of maturity-scorecard categories that
-should be eligible for the first enterprise-oriented LTS support promise.
-
-Scores are shown as `Coverage/Quality` from the current
-`inventory/<surface>/scores.yaml` files. They are useful context, but LTS
-eligibility here is a human product-support decision and does not require the
-current mechanical threshold of `coverage > 90` and `quality > 80`.
-Coverage and Quality numbers are Codex-generated and still need human
-verification before they are treated as authoritative.
-Completeness is intentionally omitted until that score is ready for use.
-Category names link to the corresponding per-category evidence note.
-
-Legend:
-
- `Surface`: a top-level product or operating area in the taxonomy, such as `Gateway runtime`, `CLI`, `Slack`, or `Linux Gateway host`.
- `Category`: a scored capability area within one surface, used as the unit for maturity and LTS inclusion decisions.
- `✅`: category is included in the proposed initial LTS slice.
- `➡️`: category is deferred from the proposed initial LTS slice.
-
-## Proposed initial LTS Surfaces
-
-### Gateway runtime (12/13)
-
-| Status | Category                                                                                              | Score (Coverage/Quality) |
-| ------ | ----------------------------------------------------------------------------------------------------- | ------------------------ |
-| ✅     | [Gateway Lifecycle](inventory/gateway-runtime/runtime-lifecycle-and-supervision.md)                   | `86/82`                  |
-| ✅     | [WebSocket Connection](inventory/gateway-runtime/websocket-handshake-and-session-establishment.md)    | `84/76`                  |
-| ✅     | [Device Auth and Pairing](inventory/gateway-runtime/device-identity-auth-and-pairing.md)              | `88/72`                  |
-| ✅     | [Security Controls](inventory/gateway-runtime/security-and-hardening-posture.md)                      | `84/74`                  |
-| ✅     | [Approvals and Remote Execution](inventory/gateway-runtime/approval-and-execution-safety.md)          | `88/72`                  |
-| ✅     | [Roles and Permissions](inventory/gateway-runtime/roles-scopes-and-operator-policy.md)                | `85/62`                  |
-| ✅     | [Health, Diagnostics, and Repair](inventory/gateway-runtime/observability-health-and-repair.md)       | `68/62`                  |
-| ✅     | [HTTP APIs](inventory/gateway-runtime/http-apis.md)                                                   | `88/74`                  |
-| ✅     | [Hosted Web Surface](inventory/gateway-runtime/hosted-web-surface.md)                                 | `88/74`                  |
-| ✅     | [Gateway RPC APIs and Events](inventory/gateway-runtime/core-rpc-coverage.md)                         | `68/57`                  |
-| ✅     | [Network Access and Discovery](inventory/gateway-runtime/network-exposure-and-transport-selection.md) | `68/62`                  |
-| ➡️     | [Nodes and Remote Capabilities](inventory/gateway-runtime/node-transport-and-capability-relay.md)     | `84/63`                  |
-| ✅     | [Protocol Compatibility](inventory/gateway-runtime/protocol-typing-and-compatibility.md)              | `72/70`                  |
-
-### Security, auth, pairing, and secrets (5/6)
-
-| Status | Category                                                                                                                            | Score (Coverage/Quality) |
-| ------ | ----------------------------------------------------------------------------------------------------------------------------------- | ------------------------ |
-| ✅     | [Approval Policy and Tool Safeguards](inventory/security-auth-pairing-and-secrets/approval-policy-and-dangerous-tool-safeguards.md) | `86/72`                  |
-| ✅     | [Gateway Auth and Remote Access](inventory/security-auth-pairing-and-secrets/gateway-auth-and-network-exposure.md)                  | `82/68`                  |
-| ✅     | [Device and Node Pairing](inventory/security-auth-pairing-and-secrets/device-identity-and-operator-pairing.md)                      | `83/66`                  |
-| ✅     | [Credential and Secret Hygiene](inventory/security-auth-pairing-and-secrets/secrets-storage-redaction-and-configuration-hygiene.md) | `78/62`                  |
-| ✅     | [Channel Access Control](inventory/security-auth-pairing-and-secrets/channel-identity-allowlists-and-sender-pairing.md)             | `78/66`                  |
-| ➡️     | [Plugin Trust](inventory/security-auth-pairing-and-secrets/plugin-installation-trust-and-security-boundaries.md)                    | `76/70`                  |
-
-### Agent Runtime (6/9)
-
-| Status | Category                                                                                                                             | Score (Coverage/Quality) |
-| ------ | ------------------------------------------------------------------------------------------------------------------------------------ | ------------------------ |
-| ✅     | [Agent Turn Execution](inventory/agent-runtime-and-provider-execution/agent-turn-orchestration-and-runtime-lifecycle.md)             | `82/74`                  |
-| ✅     | [Model and Runtime Selection](inventory/agent-runtime-and-provider-execution/model-selection-provider-routing-and-runtime-policy.md) | `84/72`                  |
-| ✅     | [Hosted Provider Execution](inventory/agent-runtime-and-provider-execution/hosted-provider-adapters-and-payload-compatibility.md)    | `76/70`                  |
-| ✅     | [Tool Execution Controls](inventory/agent-runtime-and-provider-execution/tool-execution-approvals-and-sandbox-policy.md)             | `86/74`                  |
-| ✅     | [Provider Auth](inventory/agent-runtime-and-provider-execution/provider-auth-profiles-and-credential-health.md)                      | `80/66`                  |
-| ➡️     | [External Runtimes and Subagents](inventory/agent-runtime-and-provider-execution/cli-harnesses-external-runtimes-and-subagents.md)   | `78/66`                  |
-| ➡️     | [Local and Self-hosted Providers](inventory/agent-runtime-and-provider-execution/local-and-self-hosted-provider-execution.md)        | `70/60`                  |
-| ➡️     | [Streaming and Progress](inventory/agent-runtime-and-provider-execution/streaming-progress-and-preview-visibility.md)                | `84/70`                  |
-| ✅     | [Tool Calls and Response Handling](inventory/agent-runtime-and-provider-execution/streaming-tool-call-and-response-normalization.md) | `80/66`                  |
-
-### Session, memory, and context engine (6/9)
-
-| Status | Category                                                                                                                          | Score (Coverage/Quality) |
-| ------ | --------------------------------------------------------------------------------------------------------------------------------- | ------------------------ |
-| ✅     | [Session Routing](inventory/session-memory-and-context-engine/session-routing-and-conversation-binding.md)                        | `82/74`                  |
-| ✅     | [CLI Session and Transcript Management](inventory/session-memory-and-context-engine/cli-session-and-transcript-management.md)     | `74/68`                  |
-| ✅     | [Context Engine](inventory/session-memory-and-context-engine/context-engine-and-runtime-assembly.md)                              | `72/80`                  |
-| ✅     | [Transcript Persistence](inventory/session-memory-and-context-engine/transcript-persistence-and-durability.md)                    | `78/58`                  |
-| ✅     | [Token Management](inventory/session-memory-and-context-engine/compaction-pruning-and-token-pressure.md)                          | `78/60`                  |
-| ➡️     | [Cross-client History and Session Parity](inventory/session-memory-and-context-engine/cross-client-history-and-session-parity.md) | `76/62`                  |
-| ➡️     | [Diagnostics, Maintenance, and Recovery](inventory/session-memory-and-context-engine/diagnostics-maintenance-and-recovery.md)     | `72/68`                  |
-| ✅     | [Core Prompts and Context](inventory/session-memory-and-context-engine/instruction-profile-and-context-visibility.md)             | `68/70`                  |
-| ➡️     | [Memory](inventory/session-memory-and-context-engine/memory-files-tools-and-active-memory.md)                                     | `66/58`                  |
-
-### CLI (6/7)
-
-| Status | Category                                                                                                              | Score (Coverage/Quality) |
-| ------ | --------------------------------------------------------------------------------------------------------------------- | ------------------------ |
-| ✅     | [CLI Setup](inventory/cli-install-update-onboard-doctor/package-install-and-cli-entrypoints.md)                       | `78/75`                  |
-| ✅     | [Onboarding and Auth Setup](inventory/cli-install-update-onboard-doctor/first-run-onboarding-and-auth-selection.md)   | `86/78`                  |
-| ✅     | [Gateway Service Management](inventory/cli-install-update-onboard-doctor/gateway-service-install-and-lifecycle.md)    | `88/66`                  |
-| ✅     | [CLI Observability](inventory/cli-install-update-onboard-doctor/status-health-logs-and-diagnostics-support-path.md)   | `84/74`                  |
-| ✅     | [Doctor](inventory/cli-install-update-onboard-doctor/doctor-config-auth-plugin-and-lint.md)                           | `80/68`                  |
-| ✅     | [Updates and Upgrades](inventory/cli-install-update-onboard-doctor/update-channel-and-core-upgrade-flow.md)           | `82/68`                  |
-| ➡️     | [Plugin and Channel Setup](inventory/cli-install-update-onboard-doctor/plugin-and-channel-setup-during-onboarding.md) | `82/72`                  |
-
-### Linux Gateway host (4/5)
-
-| Status | Category                                                                                                              | Score (Coverage/Quality) |
-| ------ | --------------------------------------------------------------------------------------------------------------------- | ------------------------ |
-| ✅     | [Host Setup and Updates](inventory/linux-gateway-host/linux-cli-install-and-update-path.md)                           | `82/78`                  |
-| ✅     | [Gateway Runtime and Service Control](inventory/linux-gateway-host/foreground-gateway-runtime-and-process-control.md) | `83/78`                  |
-| ✅     | [Remote Access and Security](inventory/linux-gateway-host/remote-network-exposure-tls-and-tailscale.md)               | `78/74`                  |
-| ✅     | [Diagnostics and Repair](inventory/linux-gateway-host/diagnostics-logs-doctor-and-repair.md)                          | `82/78`                  |
-| ➡️     | [Deployment Targets](inventory/linux-gateway-host/vps-container-and-cloud-deployment-guidance.md)                     | `76/72`                  |
-
-### Windows via WSL2 (5/6)
-
-| Status | Category                                                                                          | Score (Coverage/Quality) |
-| ------ | ------------------------------------------------------------------------------------------------- | ------------------------ |
-| ✅     | [WSL Setup](inventory/windows-via-wsl2/wsl2-install-and-runtime-prerequisites.md)                 | `76/70`                  |
-| ✅     | [CLI](inventory/windows-via-wsl2/wsl2-cli.md)                                                     | `76/70`                  |
-| ✅     | [Gateway Service Lifecycle](inventory/windows-via-wsl2/systemd-gateway-service-lifecycle.md)      | `64/66`                  |
-| ✅     | [Gateway Access and Exposure](inventory/windows-via-wsl2/auth-secrets-and-exposure-posture.md)    | `70/65`                  |
-| ✅     | [Diagnostics and Repair](inventory/windows-via-wsl2/diagnostics-doctor-logs-and-repair.md)        | `74/72`                  |
-| ➡️     | [Browser and Control UI](inventory/windows-via-wsl2/split-host-browser-and-control-ui-interop.md) | `72/70`                  |
-
-### Native Windows (1/4)
-
-| Status | Category                                                                                                                | Score (Coverage/Quality) |
-| ------ | ----------------------------------------------------------------------------------------------------------------------- | ------------------------ |
-| ✅     | [CLI](inventory/native-windows-cli-and-gateway/native-powershell-install-and-cli-entrypoints.md)                        | `72/66`                  |
-| ➡️     | [Gateway Management](inventory/native-windows-cli-and-gateway/native-gateway-foreground-runtime-and-process-control.md) | `68/62`                  |
-| ➡️     | [Networking](inventory/native-windows-cli-and-gateway/windows-host-networking-portproxy-and-remote-access.md)           | `58/56`                  |
-| ➡️     | [Updates](inventory/native-windows-cli-and-gateway/windows-update-restart-handoff-and-package-locks.md)                 | `74/68`                  |
-
-### Observability (3/5)
-
-| Status | Category                                                                                                           | Score (Coverage/Quality) |
-| ------ | ------------------------------------------------------------------------------------------------------------------ | ------------------------ |
-| ✅     | [Health and Repair](inventory/telemetry-diagnostics-and-observability/health-status-probes.md)                     | `80/76`                  |
-| ✅     | [Logging](inventory/telemetry-diagnostics-and-observability/logging-log-tail-and-redaction.md)                     | `82/84`                  |
-| ✅     | [Session Diagnostics](inventory/telemetry-diagnostics-and-observability/session-run-and-usage-diagnostics.md)      | `82/78`                  |
-| ➡️     | [Diagnostic Collection](inventory/telemetry-diagnostics-and-observability/diagnostics-export-support-bundles.md)   | `76/74`                  |
-| ➡️     | [Telemetry Export](inventory/telemetry-diagnostics-and-observability/diagnostic-events-hooks-and-trace-context.md) | `78/78`                  |
-
-### Channel framework (5/8)
-
-| Status | Category                                                                                                          | Score (Coverage/Quality) |
-| ------ | ----------------------------------------------------------------------------------------------------------------- | ------------------------ |
-| ✅     | [Channel Setup](inventory/channel-framework/channel-setup.md)                                                     | `84/78`                  |
-| ✅     | [Inbound Access and Identity Gates](inventory/channel-framework/inbound-access-and-identity-gates.md)             | `80/76`                  |
-| ✅     | [Conversation Routing and Delivery](inventory/channel-framework/conversation-routing-and-delivery.md)             | `77/71`                  |
-| ✅     | [Outbound Delivery and Reply Pipeline](inventory/channel-framework/outbound-delivery-and-reply-pipeline.md)       | `82/75`                  |
-| ✅     | [Status Health and Operator Controls](inventory/channel-framework/status-health-and-operator-controls.md)         | `82/78`                  |
-| ➡️     | [Channel Actions Commands and Approvals](inventory/channel-framework/channel-actions-commands-and-approvals.md)   | `68/72`                  |
-| ➡️     | [Group Thread and Ambient Room Behavior](inventory/channel-framework/group-thread-and-ambient-room-behavior.md)   | `76/68`                  |
-| ➡️     | [Media Attachments and Rich Channel Data](inventory/channel-framework/media-attachments-and-rich-channel-data.md) | `68/70`                  |
-
-### Slack (5/5)
-
-| Status | Category                                                                                             | Score (Coverage/Quality) |
-| ------ | ---------------------------------------------------------------------------------------------------- | ------------------------ |
-| ✅     | [Channel Setup and Operations](inventory/slack/app-install-auth-manifest-and-scopes.md)              | `74/68`                  |
-| ✅     | [Access and Identity](inventory/slack/dm-pairing-and-sender-authorization.md)                        | `74/70`                  |
-| ✅     | [Conversation Routing and Delivery](inventory/slack/channel-thread-routing-and-session-isolation.md) | `64/66`                  |
-| ✅     | [Media and Rich Content](inventory/slack/media-attachments-files-and-vision.md)                      | `64/66`                  |
-| ✅     | [Native Controls and Approvals](inventory/slack/slash-commands-and-native-command-routing.md)        | `72/70`                  |
-
-### Discord (4/6)
-
-| Status | Category                                                                                                         | Score (Coverage/Quality) |
-| ------ | ---------------------------------------------------------------------------------------------------------------- | ------------------------ |
-| ✅     | [Channel Setup and Operations](inventory/discord/bot-setup-and-account-configuration.md)                         | `74/71`                  |
-| ✅     | [Access and Identity](inventory/discord/dm-pairing-and-sender-authorization.md)                                  | `74/72`                  |
-| ✅     | [Conversation Routing and Delivery](inventory/discord/guild-channel-routing-and-session-isolation.md)            | `74/72`                  |
-| ✅     | [Media and Rich Content](inventory/discord/media-attachments-and-voice-message-handling.md)                      | `74/72`                  |
-| ➡️     | [Native Controls and Approvals](inventory/discord/native-slash-commands-components-and-interactive-callbacks.md) | `58/72`                  |
-| ➡️     | [Realtime Voice and Calls](inventory/discord/realtime-discord-voice-channels.md)                                 | `74/66`                  |
-
-### Telegram (5/5)
-
-| Status | Category                                                                                         | Score (Coverage/Quality) |
-| ------ | ------------------------------------------------------------------------------------------------ | ------------------------ |
-| ✅     | [Channel Setup and Operations](inventory/telegram/bot-setup-and-account-configuration.md)        | `76/70`                  |
-| ✅     | [Access and Identity](inventory/telegram/dm-pairing-and-sender-authorization.md)                 | `76/68`                  |
-| ✅     | [Conversation Routing and Delivery](inventory/telegram/group-forum-topic-and-session-routing.md) | `74/68`                  |
-| ✅     | [Media and Rich Content](inventory/telegram/media-location-polls-and-rich-inputs.md)             | `74/72`                  |
-| ✅     | [Native Controls and Approvals](inventory/telegram/inline-buttons-approvals-and-actions.md)      | `74/72`                  |
-
-### OpenAI / Codex provider path (3/5)
-
-| Status | Category                                                                                                                        | Score (Coverage/Quality) |
-| ------ | ------------------------------------------------------------------------------------------------------------------------------- | ------------------------ |
-| ✅     | [Model and Auth](inventory/openai-codex-provider-path/canonical-openai-model-routing-and-catalog.md)                            | `78/66`                  |
-| ✅     | [Responses and Tool Compatibility](inventory/openai-codex-provider-path/codex-responses-transport-and-payload-compatibility.md) | `76/70`                  |
-| ✅     | [Native Codex Harness](inventory/openai-codex-provider-path/native-codex-app-server-harness-and-thread-lifecycle.md)            | `82/72`                  |
-| ➡️     | [Image and Multimodal Input](inventory/openai-codex-provider-path/image-generation-editing-and-multimodal-input.md)             | `80/72`                  |
-| ➡️     | [Voice and Realtime Audio](inventory/openai-codex-provider-path/realtime-voice-transcription-and-speech.md)                     | `72/68`                  |
-
-### Browser automation and exec/sandbox tools (2/3)
-
-| Status | Category                                                                                                                   | Score (Coverage/Quality) |
-| ------ | -------------------------------------------------------------------------------------------------------------------------- | ------------------------ |
-| ✅     | [Tool Invocation and Execution](inventory/browser-automation-and-exec-sandbox-tools/exec-routing-and-process-lifecycle.md) | `82/79`                  |
-| ✅     | [Sandbox and Tool Policy](inventory/browser-automation-and-exec-sandbox-tools/sandbox-backends-and-workspace-isolation.md) | `76/72`                  |
-| ➡️     | [Browser Automation](inventory/browser-automation-and-exec-sandbox-tools/browser-actions-snapshots-and-artifacts.md)       | `78/74`                  |
-
-### Plugins (7/9)
-
-| Status | Category                                                                                                                | Score (Coverage/Quality) |
-| ------ | ----------------------------------------------------------------------------------------------------------------------- | ------------------------ |
-| ✅     | [Installing and running plugins](inventory/plugin-sdk-and-bundled-plugin-architecture/runtime-loading-and-lifecycle.md) | `86/84`                  |
-| ✅     | [Bundled plugins](inventory/plugin-sdk-and-bundled-plugin-architecture/bundled-plugin-discovery-and-inventory.md)       | `86/84`                  |
-| ➡️     | [Canvas plugin](inventory/plugin-sdk-and-bundled-plugin-architecture/canvas-plugin.md)                                  | `76/66`                  |
-| ✅     | [Plugin approvals](inventory/plugin-sdk-and-bundled-plugin-architecture/approval-and-security-boundaries.md)            | `84/86`                  |
-| ✅     | [Provider and tool plugins](inventory/plugin-sdk-and-bundled-plugin-architecture/provider-tool-plugin-architecture.md)  | `84/82`                  |
-| ✅     | [Channel plugins](inventory/plugin-sdk-and-bundled-plugin-architecture/channel-plugin-architecture.md)                  | `82/78`                  |
-| ✅     | [Authoring and Packaging plugins](inventory/plugin-sdk-and-bundled-plugin-architecture/public-sdk-api-and-subpaths.md)  | `77/74`                  |
-| ✅     | [Publishing plugins](inventory/plugin-sdk-and-bundled-plugin-architecture/distribution-release-and-compatibility.md)    | `79/82`                  |
-| ➡️     | [Testing plugins](inventory/plugin-sdk-and-bundled-plugin-architecture/developer-testing-and-fixtures.md)               | `84/81`                  |
-
-## Prioritized non-LTS candidates
-
-This section ranks the currently non-LTS surface/category pairs that should be
-prioritized for future LTS eligibility. It is based on the current taxonomy,
-`inventory/**/scores.yaml`, and sentiment from local `discrawl` and `gitcrawl`
-archives.
-
-Current scan basis:
-
- Initial LTS slice: `68` categories.
- Total taxonomy: `279` categories.
- Non-LTS scan scope: `211` categories.
- `gitcrawl` freshness: synced through 2026-05-28.
- `discrawl` freshness: synced through 2026-05-29.
-
-### First Wave
-
-#### Docker / Podman hosting
-
- [Container Setup](inventory/docker-podman-hosting/docker-install-compose-and-first-run-setup.md): `74/76`
- [Container Operations](inventory/docker-podman-hosting/runtime-configuration-state-volumes-and-secrets.md): `76/70`
- [Image Release and Validation](inventory/docker-podman-hosting/image-build-release-packaging-and-attestations.md): `84/78`
- [Agent Sandbox and Tooling](inventory/docker-podman-hosting/containerized-agents-sandbox-and-tooling-support.md): `75/68`
-
-Why: this is the strongest enterprise deployment gap outside the initial LTS
-slice. Discord support sentiment repeatedly clusters around VPS, Docker, WSL,
-volume persistence, secrets, update, and rollback confusion. GitHub also has a
-current Docker gateway restart-loop issue, `#86612`.
-
-#### Microsoft Teams
-
- [Channel Setup and Operations](inventory/microsoft-teams/setup-app-registration-credentials-admin-install.md): `58/64`
- [Access and Identity](inventory/microsoft-teams/dm-pairing-sender-authorization-config-writes.md): `60/62`
- [Conversation Routing and Delivery](inventory/microsoft-teams/team-channel-routing-mention-gates-sessions-thread-context.md): `68/66`
- [Media and Rich Content](inventory/microsoft-teams/media-attachments-file-consent-graph-file-flows.md): `62/58`
- [Native Controls and Approvals](inventory/microsoft-teams/actions-reactions-polls-approvals-group-management.md): `64/66`
-
-Why: Teams has low current scores, but it is the obvious second enterprise
-workplace channel after Slack. GitHub has strong concrete signal for channel
-session behavior, multiple-bot support, attachment handling, managed identity,
-and setup/admin complexity: `#81084`, `#71058`, `#65329`, `#67177`, and
-`#85149`.
-
-#### Cross-provider auth
-
- Anthropic provider path / [Provider Auth and Recovery](inventory/anthropic-provider-path/auth-onboarding-and-credential-profile-health.md): `78/70`
- Google provider path / [Provider Setup and Credentials](inventory/google-provider-path/provider-auth-credentials-and-operator-setup.md): `72/60`
-
-Why: provider auth is one of the highest recurring Discord support themes.
-Users get stuck on missing auth, fallback routing, cooldowns, stale profiles,
-plaintext secrets, provider mismatch, and unclear recovery commands. These
-categories are prerequisites for making any multi-provider enterprise harness
-reliable.
-
-#### Gateway Web App
-
- [Browser Access and Trust](inventory/browser-control-ui-and-webchat/gateway-connection-auth-device-pairing-and-origins.md): `84/68`
- [Configuration](inventory/browser-control-ui-and-webchat/config-schema-editing-and-safe-writes.md): `82/78`
- [Browser UI](inventory/browser-control-ui-and-webchat/control-ui-static-shell-routing-and-pwa.md): `74/72`
- [WebChat Conversations](inventory/browser-control-ui-and-webchat/chat-composer-session-model-controls-and-rendering.md): `78/66`
- [Operator Console](inventory/browser-control-ui-and-webchat/diagnostics-logs-update-and-activity.md): `78/74`
-
-Why: this is the operator and admin surface for an enterprise deployment.
-GitHub has open UX and runtime issues around auth gates, transcript loss,
-uploads, CJK input and streaming, and partial reloads: `#85750`, `#72500`,
-`#83344`, `#81606`, `#86035`, `#60247`, and `#86435`.
-
-#### Automation: cron, hooks, tasks, polling
-
- [Cron Jobs](inventory/automation-cron-hooks-tasks-polling/cron-job-lifecycle.md): `82/73`
- [Background Tasks and Flows](inventory/automation-cron-hooks-tasks-polling/background-task-ledger.md): `73/68`
- [Event Ingress](inventory/automation-cron-hooks-tasks-polling/channel-polling-webhooks.md): `65/58`
- [Automation Hooks](inventory/automation-cron-hooks-tasks-polling/internal-hooks.md): `78/72`
- [Heartbeat](inventory/automation-cron-hooks-tasks-polling/heartbeat-commitments.md): `82/72`
-
-Why: enterprise agents need durable scheduled work, alerting, and recovery.
-GitHub has current signal for startup races, duplicate names, silent data loss,
-status visibility, elevated scoping, and owner-tool stripping: `#75889`,
-`#76160`, `#83538`, `#51184`, `#41484`, and `#72954`.
-
-#### TUI
-
- [Runtime Modes](inventory/tui-and-terminal-ux/launch-modes-and-cli-entrypoints.md): `78/72`
- [Input and Commands](inventory/tui-and-terminal-ux/composer-keybindings-and-input-editing.md): `76/70`
- [Session Management](inventory/tui-and-terminal-ux/session-lifecycle-history-and-resume.md): `80/68`
- [Local Shell Execution](inventory/tui-and-terminal-ux/local-shell-execution-and-approval-boundary.md): `70/76`
- [Rendering and Output Safety](inventory/tui-and-terminal-ux/streaming-message-rendering-and-tool-cards.md): `76/70`
-
-Why: TUI is a real operator-facing surface with broad docs and decent baseline
-coverage, but it is still less proven as a primary supported workflow than the
-CLI and Gateway host paths in the initial slice. Promote it when launch modes,
-command/input behavior, session resume, local shell boundaries, and streaming
-rendering are treated as one terminal-native support promise.
-
-### Second wave
-
-#### macOS Gateway host
-
- [Gateway Service Lifecycle](inventory/macos-gateway-host/launchagent-service-lifecycle.md): `82/76`
- [Local Gateway Integration](inventory/macos-gateway-host/local-gateway-mode-host-configuration.md): `76/82`
- [Diagnostics and Observability](inventory/macos-gateway-host/diagnostics-logs-operator-observability.md): `80/83`
- [CLI Setup](inventory/macos-gateway-host/cli-install-runtime-prerequisites.md): `82/76`
- [Remote Gateway Mode](inventory/macos-gateway-host/remote-gateway-mode-transport.md): `72/82`
-
-Why: Linux is the cleaner first LTS host, but macOS has heavy real-world support
-volume and strong desktop-gateway relevance. Current issues include LaunchAgent
-reporting, bind behavior, cert and update drift, external-volume failures,
-Homebrew/runtime drift, unrecoverable upgrades, restart loops, and install
-failures: `#81751`, `#65619`, `#86579`, `#87199`, `#75250`, `#85027`,
-`#73673`, and `#60398`.
-
-#### Browser automation and exec/sandbox tools
-
- [Browser Automation](inventory/browser-automation-and-exec-sandbox-tools/browser-actions-snapshots-and-artifacts.md): `78/74`
-
-Why: the initial LTS slice already includes core tool invocation and sandbox
-policy, but browser execution is part of a practical enterprise agent harness.
-Open issues include sandbox/runtime mismatch, non-Docker backend support,
-noVNC/CJK behavior, upload access, timeouts, and Control UI responsiveness.
-
-#### Web search tools
-
- [Network Safety](inventory/web-search-tools/network-safety-ssrf-redirects-and-untrusted-content.md): `84/84`
- [Tool Availability and Fetch](inventory/web-search-tools/tool-exposure-policy-and-runtime-tool-wiring.md): `82/80`
- [Search Providers](inventory/web-search-tools/bundled-structured-search-providers.md): `76/72`
- [Setup and Diagnostics](inventory/web-search-tools/operator-setup-provider-selection-and-credential-repair.md): `74/70`
-
-Why: web fetch and structured search are useful for enterprise research
-workflows, but they are outside the minimal first support promise. Promote this
-surface when network safety, runtime tool wiring, provider selection, timeout
-behavior, and operator repair are accepted together. GitHub has search timeout,
-provider-native tool, tool-drop, and provider option signal:
-`#87505`, `#23353`, `#77826`, and `#84872`.
-
-#### Gateway runtime
-
- [Nodes and Remote Capabilities](inventory/gateway-runtime/node-transport-and-capability-relay.md): `84/63`
-
-Why: node pairing and remote node capability relay still harden the perimeter
-around the existing Gateway LTS promise and need separate operational proof.
-
-### Lower priority for LTS
-
-Observability should add Diagnostic Collection and
-Telemetry Export hardening after the runtime and channel priorities above.
-Plugin SDK should add Testing plugins, Packaging plugins, then Publishing
-plugins; this matters for ecosystem durability, but has weaker direct
-enterprise sentiment than Docker, Teams, Slack, and provider auth.
-
-Continue to defer mobile apps, voice, media generation, regional channels,
-iMessage, Matrix, WhatsApp, and long-tail providers unless a specific customer
-commitment changes the support boundary.
-
-## Interpretation
-
-This LTS slice is intentionally conservative. It promises enough for an
-enterprise to run a usable agent harness with Gateway, auth and policy,
-session/runtime execution, operational diagnostics, Linux hosting, Slack,
-Discord, Telegram, the OpenAI/Codex provider path, and tool execution controls.
-
-Categories outside this slice can keep shipping, but should not be part of the
-initial LTS guarantee until their owner, support boundary, upgrade behavior,
-and enterprise failure modes are explicitly accepted.
--- a/docs/maturity-scorecard/README.md
+++ b/docs/maturity-scorecard/README.md
@@ -1,214 +0,0 @@
---
-title: Maturity scorecard process
-version: 3
---
-
-# Maturity scorecard process
-
-This directory is an artifact root maintained by the local `claw-score` skill
-defined in the external `claw-score` `SKILL.md`.
-
-The skill owns scoring policy, scoring workflow, validation, artifact shape,
-and renderer expectations. This README is the human-facing directory contract
-and process overview.
-
-The top-level scorecard layout is owned by the skill template
-`.agents/skills/claw-score/references/maturity-scorecard-template.md`, then
-rendered into [maturity-scorecard.md](maturity-scorecard.md).
-
-Operationally, the skill separates three workflows: taxonomy maintenance, score
-computation, and skill self-maintenance. The detailed agent instructions for
-those live in the skill reference files, not in this README.
-
-## Source files
-
- `taxonomy.yaml` is the source of truth for surfaces, maturity levels,
-  surface ids, category definitions, category `human_lts_override` values,
-  category `docs` reading lists, surface `completeness_instructions`, and
-  `last_score_run` provenance for the active in-repo surfaces.
- `/Users/kevinlin/tmp/maturity/taxonomy.yaml` stores the archived taxonomy for
-  the other surfaces that are temporarily out of the active in-repo scope.
- `<artifact-root>/<surface>/scores.yaml` is the per-surface score source for
-  Coverage, Quality, Completeness, and row identity (`name` and
-  `category_note`). The renderer joins taxonomy-owned category metadata from
-  `taxonomy.yaml`. Active artifact paths are derived by naming convention from
-  the taxonomy surface id: `inventory/<surface-id>/report.md`,
-  `inventory/<surface-id>/scores.yaml`, and `inventory/<surface-id>/<category-note>`.
-  Historical archived surfaces live at `/Users/kevinlin/tmp/maturity` and are
-  intentionally skipped by the normal `claw-score` render and sync workflows.
- [maturity-scorecard.md](maturity-scorecard.md), [taxonomy.md](taxonomy.md),
-  [taxonomy-outline.md](taxonomy-outline.md), and
-  `<artifact-root>/<surface>/report.md` are rendered Markdown artifacts. Do not
-  hand-edit their generated tables.
-
-## Directory layout
-
-```text
-docs/kevinslin/maturity-scorecard/
-├── README.md
-├── taxonomy.md
-├── taxonomy-outline.md
-├── maturity-scorecard.md
-└── inventory/
-    ├── gateway-runtime/
-    │   ├── report.md
-    │   ├── <category>.md
-    │   └── scores.yaml
-    └── plugin-sdk-and-bundled-plugin-architecture/
-        ├── report.md
-        ├── <category>.md
-        └── scores.yaml
-```
-
-Interpret these files as follows:
-
- `README.md`: human-facing process overview and artifact contract.
- `taxonomy.md`: rendered taxonomy reference generated from the skill-owned
-  taxonomy YAML.
- `taxonomy-outline.md`: rendered surface outline grouped by family, generated
-  from the skill-owned taxonomy YAML.
- [maturity-scorecard.md](maturity-scorecard.md): rendered top-level scorecard generated from the
-  skill-owned taxonomy.
- `inventory/`: canonical artifact root for active maturity-scorecard work.
- `/Users/kevinlin/tmp/maturity`: archive location for historical artifact
-  trees and the archived taxonomy file. Treat it as out of scope unless
-  explicitly restoring archived work.
- `<artifact-root>/<surface>/scores.yaml`: per-surface score source generated or
-  refreshed by the skill.
- `<artifact-root>/<surface>/report.md`: rendered surface report.
- `<artifact-root>/<surface>/<category>.md`: per-category evidence note.
-
-## Concepts
-
- `taxonomy`: the skill-owned YAML file that defines the top-level maturity
-  model, surface inventory, per-surface category metadata, and `last_score_run`
-  state.
- `scorecard`: the rendered top-level Markdown overview generated from the
-  taxonomy. Its generated table includes per-surface Coverage, Quality,
-  Completeness, and LTS status columns derived from `scores.yaml` plus
-  taxonomy `human_lts_override` metadata.
- `taxonomy doc`: the rendered Markdown reference view of the taxonomy,
-  including the surface inventory and per-surface categories.
- `taxonomy outline`: the rendered Markdown outline of active surfaces grouped
-  by family.
- `surface`: one scored product or platform area from the taxonomy.
- `surface slug`: the stable filesystem-friendly identifier used for a
-  surface's inventory directory and filenames.
- `artifact root`: the per-surface parent directory selected in taxonomy
-  naming convention. Active work currently uses `inventory/<surface-id>/`;
-  archived surfaces are marked in taxonomy with `archived: true`.
- `category`: a significant user-facing or operator-facing part of a surface
-  that gets its own evidence note and row in the per-surface score YAML. A
-  category should represent a capability area a user can actually utilize, not
-  an internal implementation bucket.
- `category note`: the per-category Markdown evidence artifact
-  `<artifact-root>/<surface>/<category>.md`. Notes include a taxonomy-derived
-  `## Features` section that mirrors the category feature list from
-  `taxonomy.yaml`.
- `scores.yaml`: the canonical per-surface score source
-  `<artifact-root>/<surface>/scores.yaml`; it stores Coverage, Quality,
-  Completeness, and row identity, while taxonomy owns features, docs, search
-  anchors, `human_lts_override`, and surface-level
-  `completeness_instructions`.
- `LTS.md`: hand-curated initial LTS slice. Its status rows must stay
-  synchronized with taxonomy `human_lts_override` values and rendered
-  per-surface report matrix LTS cells by running
-  `.agents/skills/claw-score/scripts/validate_lts_sync.py`.
- `completeness_instructions`: taxonomy-owned surface metadata pointing to a
-  skill-relative rubric file under `.agents/skills/claw-score/` that explains
-  how to score Completeness for that surface.
- `features`: taxonomy-owned category metadata stored as objects with `name`
-  and `description`. Keep `name` short and scannable; put the fuller
-  explanation in `description`. A feature should be a user-invokable
-  capability for that surface/category, not a handshake step or other
-  implementation-only detail.
- `docs`: taxonomy-owned category metadata listing repo-relative doc URLs that
-  best cover the category. Keep this as a short primary-reading list, not a
-  full evidence dump. During taxonomy maintenance, this list should be chosen
-  by scanning the OpenClaw docs corpus for the category and selecting the
-  canonical pages a reviewer should open first.
- `surface report`: the rendered per-surface Markdown report
-  `<artifact-root>/<surface>/report.md`.
-
-Category display names should be short, operator-facing capability names.
-Prefer fewer coarser categories, merge related concepts that share docs and
-operator workflows, and keep old or implementation-heavy terminology in
-`search_anchors`, feature descriptions, or evidence rather than in the display
-name.
-
-## Versioning
-
-Markdown scorecard artifacts use frontmatter `version` for the scoring process
-that produced that document.
-
-During a real rescore, the surface report and category notes should have
-frontmatter `version` equal to the active `scores.yaml process_version`.
-
-YAML sources use:
-
- `version`: schema version for the file shape. This starts at `1`.
- `process_version`: scoring process version. Current scoring runs use `3`.
-
-Do not bulk-update existing per-surface `last_score_run.process_version` or
-`scores.yaml process_version` for render-only, taxonomy-only, or mechanical doc
-changes. Update a surface's scoring provenance when that surface is actually
-rescored with refreshed evidence.
-
-## LTS
-
-LTS is generated, not scored by category agents.
-
-The renderer marks a category as LTS when either condition is true:
-
- `quality > 80 and coverage > 90`
- the matching taxonomy category sets `human_lts_override: true`
-
-Keep `human_lts_override` in `taxonomy.yaml`. Do not write it into
-`scores.yaml`.
-
-## Regeneration
-
-Use the skill scripts from the repository root:
-
-```bash
-python3 .agents/skills/claw-score/scripts/sync_taxonomy_categories.py \
-  --taxonomy .agents/skills/claw-score/taxonomy.yaml \
-  --scorecard-root docs/kevinslin/maturity-scorecard
-
-python3 .agents/skills/claw-score/scripts/sync_scores_yaml.py \
-  --taxonomy .agents/skills/claw-score/taxonomy.yaml \
-  --scorecard-root docs/kevinslin/maturity-scorecard
-
-python3 .agents/skills/claw-score/scripts/render_taxonomy_from_taxonomy.py \
-  --taxonomy .agents/skills/claw-score/taxonomy.yaml \
-  --taxonomy-doc docs/kevinslin/maturity-scorecard/taxonomy.md \
-  --taxonomy-outline-doc docs/kevinslin/maturity-scorecard/taxonomy-outline.md
-
-python3 .agents/skills/claw-score/scripts/render_scorecard_from_taxonomy.py \
-  --taxonomy .agents/skills/claw-score/taxonomy.yaml \
-  --scorecard docs/kevinslin/maturity-scorecard/maturity-scorecard.md
-```
-
-Use each command's `--check` mode before handoff when verifying artifacts.
-
-If the skill's renderers, sync scripts, or templates change, rerun the relevant
-commands above and update this README in the same change when the artifact
-contract or regeneration guidance changes.
-
-## Editing rules
-
- For scoring, rescoring, audits, taxonomy changes, report regeneration, or
-  output-shape changes, use `claw-score`.
- When updating the `claw-score` skill itself, update the relevant source
-  files under `.agents/skills/claw-score/` and keep this README aligned with
-  any artifact-contract, terminology, or regeneration changes.
- Do not hand-edit generated tables or inventories in `taxonomy.md` or
-  `taxonomy-outline.md`; rerender them through the skill scripts.
- Do not hand-edit generated score tables in `maturity-scorecard.md` or
-  `<artifact-root>/<surface>/report.md`; rerender them through the skill
-  scripts. That includes the report's feature lists, which are rendered from
-  taxonomy.
- Do not hand-edit taxonomy-derived `## Features` sections in category notes;
-  update `taxonomy.yaml` and rerender the owning surface report instead.
- Keep agent instructions in the external `claw-score` `SKILL.md`, not in this
-  directory.
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/agent-turn-orchestration-and-runtime-lifecycle.md
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/agent-turn-orchestration-and-runtime-lifecycle.md
@@ -1,89 +0,0 @@
---
-title: "Agent Runtime - Agent Turn Execution Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Agent Runtime - Agent Turn Execution Maturity Note
-
-## Summary
-
-Agent turns have a first-class runtime lifecycle: docs explain gateway/embedded starts, queueing, session locks, event streams, timeouts, and early termination; source centralizes turn execution in `runAgentTurnWithFallback`; tests exercise fallback orchestration, aborts, lifecycle backstops, event delivery, and runtime telemetry. Quality is Beta because archive evidence still shows recent empty/failed replies and timeout edge cases around long-running or restarted embedded turns.
-
-## Category Scope
-
-This category covers user/operator-visible turn execution: starting an agent turn, choosing gateway versus embedded runtime, establishing session/run ids, applying queue locks, bridging events, honoring aborts, timing provider/model work, and emitting terminal outcomes.
-
-## Features
-
- Turn startup and runtime choice: Starting an agent turn and choosing gateway versus embedded runtime execution.
- Session and run coordination: Establishing session and run ids, queue locks, and related execution coordination.
- Abort and terminal outcomes: Honoring aborts, timing provider/model work, and emitting terminal outcomes.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (82%)`
-
-Coverage is broad across concepts, CLI docs, source, and tests. The remaining coverage gap is direct scenario proof for every runtime restart/timeout path per provider release.
-
-## Quality Score
-
- Score: `Beta (74%)`
-
-The lifecycle has strong guardrails and diagnostics, but recent operational reports still show terminal-empty replies, restart recovery cases, and timeout-sensitive local/embedded runs that need clearer operator recovery behavior.
-
-## Completeness Score
-
- Score: `Stable (82%)`
- Surface instructions: evaluated against `references/completeness/agent-runtime-and-provider-execution.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Turn startup and runtime choice, Session and run coordination, Abort and terminal outcomes.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Some runtime failure modes are documented through tests and archived issues rather than a single operator-facing troubleshooting guide.
- Long-running local or external runtime turns still appear sensitive to timeout configuration.
- Archive searches found little direct GitHub issue coverage for the narrow `agent.wait` and embedded fallback terms, so Discord evidence carries more of the field-signal burden.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/concepts/agent-loop.md` documents the agent RPC shape, `agentCommand`, `runEmbeddedAgent`, event bridge, `agent.wait`, queueing/session locks, streaming/tool/final payload behavior, event streams, timeouts, and early termination reasons.
- `/Users/kevinlin/code/openclaw/docs/cli/agent.md` documents running an agent turn via Gateway, model/thinking/local/deliver/timeout options, local preload behavior, gateway timeout fallback session/run ids, and SIGTERM/SIGINT `chat.abort`.
- `/Users/kevinlin/code/openclaw/docs/concepts/agent-runtimes.md` explains runtimes versus providers/model/channel, embedded harnesses, CLI backends, Codex surfaces, runtime ownership, runtime selection, and fail-closed explicit runtimes.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.ts` imports `runEmbeddedAgent`, `runWithModelFallback`, runtime provider resolution, and outcome planning; it implements turn timing, context window resolution, `runAgentTurnWithFallback`, fallback candidate auth/profile setup, live model switches, run diagnostics, reply media, and compaction notices.
- `/Users/kevinlin/code/openclaw/src/agents/cli-runner.ts` finalizes CLI context engine turns, persists approved CLI transcripts, and runs CLI agent turns through the same hook path.
- `/Users/kevinlin/code/openclaw/packages/agent-core/src/agent-loop.test.ts` anchors EventStream failure handling in the lower-level agent loop package.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` covers `runAgentTurnWithFallback`, abort signal propagation, queued fallback rechecks, fallback auth availability, CLI assistant event previews, lifecycle terminal backstops, gateway restart copy, external error formatting, live model switch restart/retry caps, and auth profile state on retries.
- `/Users/kevinlin/code/openclaw/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts` covers spawned-session lifecycle cleanup, enough gateway request time, MCP cleanup, delete via `agent.wait`, timeout handling, account routing, and announce behavior.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/packages/agent-core/src/agent-loop.test.ts` covers EventStream error paths.
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` includes focused unit coverage for fallback orchestration, terminal result classification, empty result handling, and turn-level diagnostics.
-
-### Gitcrawl queries
-
- `gitcrawl --json search issues -R openclaw/openclaw "agent loop gateway_timeout chat.abort embedded fallback"` returned no matching issues, suggesting the exact lifecycle query is not where field reports are clustered.
- `gitcrawl --json search issues -R openclaw/openclaw "runAgentTurnWithFallback agent runner timeout"` returned no matching issues.
- `gitcrawl --json search issues -R openclaw/openclaw "local model provider context timeout Ollama"` returned issues including #87642 on exposing `waitForRun` timeout for slow local LLMs, #86599 on local provider calls blocking the gateway event loop on Windows, and #74204 on memory embed timeout for local GGUF.
-
-### Discrawl queries
-
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "runEmbeddedAgent agent.wait"` returned no matches.
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "agent.wait gateway_timeout embedded fallback"` returned no matches.
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "model fallback decision"` returned recent discussions around openai-codex timeouts, fallback decisions, No API key fallback decisions, OpenRouter timeout decisions, missing bearer logs, and repeated fallback errors in session repair loops.
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/cli-harnesses-external-runtimes-and-subagents.md
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/cli-harnesses-external-runtimes-and-subagents.md
@@ -1,94 +0,0 @@
---
-title: "Agent Runtime - External Runtimes and Subagents Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Agent Runtime - External Runtimes and Subagents Maturity Note
-
-## Summary
-
-OpenClaw treats external runtimes as a first-class execution mode: docs separate providers from runtimes, explain Codex/OpenClaw/ACP/external harness ownership, document Claude CLI and Gemini CLI aliases, and describe subagent auth, delivery, cleanup, and recovery. Source bridges CLI transcripts and events through `runCliAgent`, and tests cover CLI previews, subagent sessions, lifecycle cleanup, and runtime override boundaries. Quality is Alpha because archives show recurring issues around `claude-cli`, ACP/subagent delivery, unsupported backend settings, trajectory artifacts, and auth propagation from main sessions.
-
-## Category Scope
-
-This category covers operator-visible execution outside the default embedded
-provider path: choosing external harnesses, using CLI runtime aliases, running
-subagent turns, and recovering from cleanup, timeout, or liveness issues in
-those external runtimes.
-
-## Features
-
- External harness selection: Choosing Codex app-server, ACP, and other external runtime harnesses.
- CLI runtime aliases: Runtime aliases and CLI-based execution paths such as Claude CLI and Gemini CLI.
- Subagent turns: Spawning, delivering, and announcing subagent work outside the default embedded path.
- Runtime recovery: Cleanup, timeout, and liveness behavior for external runtimes and subagents.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Beta (78%)`
-
-Coverage is good for Codex/CLI/subagent workflows, but external harnesses and ACP have less uniform proof than the embedded runtime.
-
-## Quality Score
-
- Score: `Alpha (66%)`
-
-CLI/subagent execution is functional but operationally fragile where backend-specific auth, tool permission boundaries, unsupported settings, and result delivery vary by runtime.
-
-## Completeness Score
-
- Score: `Beta (78%)`
- Surface instructions: evaluated against `references/completeness/agent-runtime-and-provider-execution.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for External harness selection, CLI runtime aliases, Subagent turns, Runtime recovery.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- External runtime behavior shifts with upstream CLI tools and needs more release-by-release proof.
- Subagent UX and lifecycle semantics are well tested but still produce field reports around delivery, account routing, and parity.
- Some CLI artifacts and diagnostics, such as pure `claude-cli` trajectories, have active gap reports.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/concepts/agent-runtimes.md` documents runtimes versus providers/model/channel, embedded harnesses versus CLI backends, Codex surfaces, runtime decision tree, ownership split, runtime selection, fail-closed explicit runtimes, CLI backend aliases, Claude CLI, OpenAI default to Codex harness, and compatibility contract.
- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` documents Claude CLI setup, same-host requirement, canonical Anthropic refs with `agentRuntime.id: "claude-cli"`, legacy refs, and thinking defaults.
- `/Users/kevinlin/code/openclaw/docs/providers/google.md` documents Gemini CLI OAuth setup, plugin capabilities, legacy aliases, and capability expectations.
- `/Users/kevinlin/code/openclaw/docs/tools/subagents.md` documents subagent auth, announce behavior, delivery routing, sessions_history sanitation, subagent tool policy, concurrency, liveness, and recovery.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/src/agents/cli-runner.ts` persists approved CLI user turn transcripts, finalizes CLI context engine turns, invokes `before_agent_reply`, and runs CLI agent turns.
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.ts` resolves CLI runtime execution providers, handles runtime overrides, bridges CLI assistant events into previews, forwards runtime plan/approval/command/patch events, and enforces live switch retry caps.
- `/Users/kevinlin/code/openclaw/src/agents/agent-tools.ts` applies subagent/inherited tool policy and configures tool execution boundaries used by spawned runtime sessions.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts` covers `sessions_spawn` lifecycle behavior, cleanup, enough gateway request time, MCP cleanup, delete via `agent.wait`, timeout handling, account routing, and announce behavior.
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` covers forwarding static extra prompts to CLI backends, prepared CLI user turns at the persistence boundary, no CLI session reuse for room-event turns, CLI assistant event previews, reasoning previews, CLI runtime override boundaries, and Codex app-server telemetry.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` includes focused tests for runtime override resolution, CLI preview bridging, external error formatting, gateway restart recovery copy, and live model switch retry caps.
- `/Users/kevinlin/code/openclaw/src/agents/agent-tools.message-provider-policy.test.ts` covers provider-policy behavior that affects external runtime tool surfaces.
-
-### Gitcrawl queries
-
- `gitcrawl --json search issues -R openclaw/openclaw "claude-cli codex cli harness subagent sessions_spawn"` returned #73097 on PI harness ignoring `cliBackends` configuration and splitting subagent execution from chat path.
- `gitcrawl --json search issues -R openclaw/openclaw "openai-codex Anthropic Google provider tool call"` returned #80667 on `trajectory.jsonl` never being written for pure `claude-cli` sessions and #78196 on extension plugin loader behavior.
- `gitcrawl --json search issues -R openclaw/openclaw "local model provider context timeout Ollama"` returned #81214 on an OpenClaw subagent regression and #87642 on exposing subagent-control `waitForRun` timeout for slow local LLMs.
-
-### Discrawl queries
-
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "sessions_spawn claude-cli"` returned Apr-May 2026 discussions about ACP runtime failures with Claude Opus settings, Claude CLI tool availability, tool permission boundaries/sandboxing, ACP/sub-agent relay UX, and subagent/ACP result delivery regressions.
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "tool call streaming"` returned Claude CLI/WebChat tool visibility concerns and app-server watchdog discussions that affect external runtime delivery.
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "No API key found provider openai-codex"` returned related Codex OAuth profile propagation and rebuild-recognition reports that affect external runtime/subagent sessions.
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/hosted-provider-adapters-and-payload-compatibility.md
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/hosted-provider-adapters-and-payload-compatibility.md
@@ -1,98 +0,0 @@
---
-title: "Agent Runtime - Hosted Provider Execution Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Agent Runtime - Hosted Provider Execution Maturity Note
-
-## Summary
-
-Hosted provider adapter coverage is solid for OpenAI/Codex, Anthropic, Google, and OpenAI-compatible routes. Docs explain provider-specific setup, thinking controls, OAuth/API-key distinctions, CLI runtime alternatives, and capability expectations. Source includes provider-specific message/tool/thinking conversion, timeout handling, websocket/SSE behavior, prompt-cache affinity, and tool-call normalization. Quality is Beta because hosted provider payload semantics still change quickly, especially for Codex OAuth routing, Anthropic streaming JSON, Google tool-call ids, and OpenAI-compatible tool behavior.
-
-## Category Scope
-
-This category covers operator-visible hosted provider execution: running turns
-against hosted providers, using provider-specific model options, exercising
-hosted tool use, applying reasoning or cache controls, and receiving streamed
-or final replies despite provider payload differences.
-
-## Features
-
- Hosted provider turns: Running agent turns against hosted providers such as OpenAI, Anthropic, and Google.
- Provider-specific model options: Provider-specific model parameters and runtime request settings exposed to users or operators.
- Hosted tool use: Tool use behavior when the active runtime is a hosted provider.
- Reasoning and cache controls: Provider-specific reasoning, thinking, and cache-related controls during hosted execution.
- Hosted streaming and replies: Operator-visible streaming and reply behavior while hosted adapters normalize payload differences.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Beta (76%)`
-
-Coverage is good for the major providers, but OpenAI-compatible and fast-moving hosted provider variants still rely on scattered tests, docs, and archive evidence rather than a uniform compatibility table.
-
-## Quality Score
-
- Score: `Beta (70%)`
-
-Adapters include many compatibility guards, but provider payload drift and streaming/tool-call quirks remain visible in archived issues and Discord reports.
-
-## Completeness Score
-
- Score: `Beta (76%)`
- Surface instructions: evaluated against `references/completeness/agent-runtime-and-provider-execution.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Hosted provider turns, Provider-specific model options, Hosted tool use, Reasoning and cache controls, Hosted streaming and replies.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Provider-specific tool-call and thinking semantics still need recurring live proof.
- OpenAI-compatible hosted providers and route aliases have less systematic evidence than first-party routes.
- Some adapter failures surface as generic fallback or missing-key errors, making operator diagnosis harder.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/providers/openai.md` documents OpenAI/Codex route distinctions, naming maps, capability tables, GPT-5.5/Codex app-server notes, Codex OAuth setup, and default agent routes.
- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` documents Anthropic API key versus Claude CLI, canonical refs, legacy refs, thinking defaults, and prompt caching.
- `/Users/kevinlin/code/openclaw/docs/providers/google.md` documents Google plugin capabilities, Gemini CLI OAuth, model refs, capabilities, and thinking/reasoning controls.
- `/Users/kevinlin/code/openclaw/docs/concepts/models.md` documents model ref/runtime separation and fallback selection that provider adapters consume.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/src/llm/providers/openai-codex-responses.ts` implements Codex Responses transport setup, account id handling, body/header construction, timeout signals, websocket path, retryable error classification, and prompt-cache affinity.
- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.ts` implements Anthropic stream setup, request parameter construction, event handling, OAuth system prompt handling, thinking modes, tool id normalization, message transforms, fine-grained tool streaming beta, and tool conversion.
- `/Users/kevinlin/code/openclaw/src/llm/providers/google-shared.ts` implements Google thinking-part semantics, thought signature retention, tool-call id requirements, assistant text/thinking/tool-call conversion, and tool-result conversion.
- `/Users/kevinlin/code/openclaw/src/agents/transport-stream-shared.ts` provides cross-provider transport stream sanitization, tool-call argument coercion, metadata merge, finalization, and error stream handling.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` covers OpenAI session runtime overrides, Codex app-server telemetry, external error formatting, missing custom tool output guidance, and Bedrock tool mismatch reset hints.
- `/Users/kevinlin/code/openclaw/src/commands/models.list.e2e.test.ts` covers provider catalog rows, auth/local/provider behavior, and catalog responsiveness.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/src/llm/providers/openai-codex-responses.test.ts` covers account id decoding, transport timeouts, websocket/SSE behavior, timeout behavior, and prompt-cache affinity.
- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.test.ts` covers Anthropic provider auth and signed thinking replay.
- `/Users/kevinlin/code/openclaw/src/llm/providers/google-shared.test.ts` covers projecting text, thinking, tool calls, response ids, and usage.
- `/Users/kevinlin/code/openclaw/src/agents/transport-stream-shared.test.ts` covers sanitization, non-empty tool payload text, headers, success streams, and failure cleanup.
-
-### Gitcrawl queries
-
- `gitcrawl --json search issues -R openclaw/openclaw "openai-codex Anthropic Google provider tool call"` returned #80667 on pure `claude-cli` sessions missing `trajectory.jsonl` and #78196 on extension plugin loader behavior.
- `gitcrawl --json search issues -R openclaw/openclaw "tool call streaming truncated tool_call provider"` returned #60593 on recurring Anthropic streaming JSON parse errors, #70033 on tool calls emitting empty `{}` arguments for large content, and #87711 on empty assistant delivery.
- `gitcrawl --json search prs -R openclaw/openclaw "provider error descriptors fallback rate limit"` returned PR #86642 adding structured provider error descriptors.
-
-### Discrawl queries
-
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "tool call streaming"` returned discussions about native progress callbacks, provider streaming/tool-call wrapping, visible tool-call blocks, app-server idle watchdog behavior, Claude CLI/WebChat tool visibility, and Telegram progress modes.
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "openai-codex provider routing"` returned reports about OpenAI OAuth/Codex routing, direct OpenAI Responses path drift, stale persisted route state, and old config/runtime pins.
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "model fallback decision"` returned hosted-provider timeout and fallback discussions, including OpenRouter timeouts and missing bearer failures.
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/local-and-self-hosted-provider-execution.md
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/local-and-self-hosted-provider-execution.md
@@ -1,91 +0,0 @@
---
-title: "Agent Runtime - Local and Self-hosted Providers Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Agent Runtime - Local and Self-hosted Providers Maturity Note
-
-## Summary
-
-Local and self-hosted provider execution is documented and implemented, with especially detailed Ollama guidance for native `/api/chat`, OpenAI-compatible `/v1`, local markers, auth profile format, tool-support flags, lean profiles, context windows, timeouts, and live smoke commands. Coverage is Beta because it is concentrated in Ollama/local-model docs and command behavior. Quality is Alpha because archive evidence shows local models still struggle with tool calling, cold-start timeouts, raw JSON/tool text, and event-loop blocking.
-
-## Category Scope
-
-This category covers local and self-hosted execution paths visible to users/operators: Ollama, OpenAI-compatible local servers, local model profile configuration, tool-capability flags, timeouts, context windows, local image/model smoke checks, and local provider failure handling.
-
-## Features
-
- Local provider profiles: Local model profile configuration for Ollama and OpenAI-compatible local servers.
- Tool-capability flags: Local provider capability flags and behavior for tool use.
- Timeouts and context windows: Local provider timeout and context-window configuration.
- Local smoke checks: Local image and model smoke checks visible to operators.
- Local failure handling: Operator-facing failure handling for local and self-hosted providers.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Beta (70%)`
-
-Coverage is useful and operator-facing, but it is uneven across local backends and less uniformly tested than hosted providers.
-
-## Quality Score
-
- Score: `Alpha (60%)`
-
-Local execution is workable but still fragile in practice: model tool-calling quality, cold starts, context limits, local server blocking, and OpenAI-compatible mode quirks remain recurring issues.
-
-## Completeness Score
-
- Score: `Beta (70%)`
- Surface instructions: evaluated against `references/completeness/agent-runtime-and-provider-execution.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Local provider profiles, Tool-capability flags, Timeouts and context windows, Local smoke checks, Local failure handling.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Local provider evidence is strongly Ollama-centered; other local/self-hosted runtimes need the same level of scenario proof.
- Tool-calling behavior depends heavily on model capability and provider mode.
- Timeout guidance exists, but operator defaults still produce reports for slow local LLMs.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/providers/ollama.md` documents native `/api/chat`, `/v1` warnings, raw tool JSON behavior, local auth rules, provider IDs, `models list`, exact `/model ollama` failure behavior, endpoint preflight, live test command, custom base URLs, `compat.supportsTools: false`, `localModelLean`, `timeoutSeconds`, OpenAI-compatible mode tool/streaming reliability warnings, context windows, streaming/tool-calling/thinking support, garbled output handling, and cold local model timeouts.
- `/Users/kevinlin/code/openclaw/docs/concepts/models.md` documents local/GGUF refs, model allowlists, and runtime-independent model refs.
- `/Users/kevinlin/code/openclaw/docs/cli/agent.md` documents `--local`, timeout options, and embedded fallback behavior for local agent runs.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/src/agents/agent-tools.ts` applies model-provider tool policy and suppresses tools such as web search for local lean profiles.
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/model-selection.ts` resolves model allowlists, local refs, thinking/reasoning settings, and context token limits used by local providers.
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.ts` handles context-window hints, local/embedded runtime fallback behavior, provider timeout copy, and local model retry/fallback interactions.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/src/commands/models.list.e2e.test.ts` covers local/provider behavior in model catalog/status output.
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` covers context-aware reserve token floors, overflow recovery text, local/runtime fallback interactions, model capacity copy, and timeout/fallback diagnostics relevant to local providers.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/src/agents/agent-tools.message-provider-policy.test.ts` covers provider-based tool policy behavior.
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` includes unit-style coverage for context windows, provider fallback state, and timeout/failure copy.
-
-### Gitcrawl queries
-
- `gitcrawl --json search issues -R openclaw/openclaw "Ollama vLLM SGLang LM Studio tool calling"` returned no matches for the exact backend set.
- `gitcrawl --json search issues -R openclaw/openclaw "local model provider context timeout Ollama"` returned #87642 on exposing `waitForRun` timeout for slow local LLMs, #86599 on local model provider calls blocking the gateway event loop on Windows, #74204 on memory embed timeout for local GGUF, #81214 on subagent regression, and #65502 on resilient model fallback with retry and safe mode.
- `gitcrawl --json search prs -R openclaw/openclaw "Ollama native tool calling streaming"` returned no matching PRs.
-
-### Discrawl queries
-
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "Ollama tool calling OpenClaw"` returned guidance that some local models are poor at tool calling, user questions about local model limitations and tool use, maintainer guidance that raw tools printed as text indicate model/tool-calling compatibility problems, and comments closing issues around local backend support and Ollama `/v1` misconfiguration.
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "models list provider routing fallback"` returned user-helping-user guidance on Ollama provider versus session/tool pressure and local/custom provider handling.
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "usage limit fallback openai-codex"` included adjacent operator discussions about fallback configuration, useful as contrast but not primary local-provider evidence.
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/model-selection-provider-routing-and-runtime-policy.md
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/model-selection-provider-routing-and-runtime-policy.md
@@ -1,92 +0,0 @@
---
-title: "Agent Runtime - Model and Runtime Selection Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Agent Runtime - Model and Runtime Selection Maturity Note
-
-## Summary
-
-Model selection and provider routing are among the most mature parts of this surface. Docs explain model refs, configured defaults, user-selected strict refs, provider fallbacks, auth-profile fallbacks, `/model`, runtime overrides, and thinking/context policy. Source centralizes most state in `createModelSelectionState`, and tests cover model list/set behavior plus fallback/retry routing. Quality is Beta because archive evidence shows recent drift around Codex OAuth routes, stale `openai-codex` refs, and per-session/provider fallback state.
-
-## Category Scope
-
-This category covers selecting a model/provider/runtime for an agent turn, honoring user and config choices, resolving thinking/context settings, handling runtime provider overrides, and preserving or clearing invalid route state.
-
-## Features
-
- Model reference selection: Selecting the model reference for an agent turn from user or configured defaults.
- Provider and runtime overrides: Handling provider selection and runtime overrides for a turn.
- Thinking and context settings: Resolving thinking and context settings as part of model selection.
- Invalid route recovery: Preserving or clearing invalid route state when selections drift or fail.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (84%)`
-
-Coverage is strong across model docs, CLI command docs, provider docs, source, and e2e tests for model list/set and fallback normalization.
-
-## Quality Score
-
- Score: `Beta (72%)`
-
-Routing behavior is explicit and defensive, but quality is pulled down by recent operator-visible route repair, stale auth/provider refs, and fallback stickiness reports.
-
-## Completeness Score
-
- Score: `Stable (84%)`
- Surface instructions: evaluated against `references/completeness/agent-runtime-and-provider-execution.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Model reference selection, Provider and runtime overrides, Thinking and context settings, Invalid route recovery.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Operator recovery for stale provider/runtime refs is spread across doctor behavior, provider docs, and error copy.
- Runtime policy is strong for common providers but field reports show route drift when Codex OAuth, custom provider IDs, and fallback profiles interact.
- The system needs recurring release-level scenario proof for route repair and fallback reset behavior.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/concepts/models.md` documents model refs versus runtime, selection order, provider/auth fallbacks, configured defaults, auto fallback selections, strict user session selections, model allowlists, local/GGUF refs, `/model` switching, live switching, and strict selected refs.
- `/Users/kevinlin/code/openclaw/docs/cli/models.md` documents `models list`, `models set`, status/probe options, catalog/auth columns, provider catalog responsiveness, ref parsing/fallback, auth profiles, login, paste-api-key, and OpenAI API versus ChatGPT/OAuth routing.
- `/Users/kevinlin/code/openclaw/docs/providers/openai.md` documents OpenAI/Codex route distinctions, naming map, GPT-5.5/Codex app-server repair notes, capability tables, and the default OpenAI agent route summary.
- `/Users/kevinlin/code/openclaw/docs/concepts/agent-runtimes.md` documents runtime selection, fail-closed explicit runtimes, CLI backend aliases, and OpenAI defaulting to the Codex harness.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/model-selection.ts` implements provider/model initialization, allowlists, catalog visibility policy, direct stored override handling, stale legacy `openai-codex` override clearing, auth profile override validation, thinking/reasoning resolution, and context token resolution.
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.ts` applies fallback candidate auth profiles, live model switches, runtime config, runtime provider resolution, and retry state.
- `/Users/kevinlin/code/openclaw/src/agents/configured-provider-fallback.ts` defines configured fallback behavior for provider selection.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/src/commands/models.set.e2e.test.ts` covers model setting and fallback normalization.
- `/Users/kevinlin/code/openclaw/src/commands/models.list.e2e.test.ts` covers model list/status, catalog/auth/local/provider behavior, and provider visibility.
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` covers fallback rechecks, stale queued probe dropping after user model switches, preserving and re-persisting fallback origins, CLI runtime override boundaries, model capacity errors, live model switch restarts, and retry-loop caps.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` includes focused tests for provider/model fallback retry state, auth profile preservation, dropping `authProfileId` when fallback switches providers, and same-provider auth profile fallback.
- `/Users/kevinlin/code/openclaw/src/commands/models.auth.provider-resolution.test.ts` covers auth-provider resolution behavior for model commands.
-
-### Gitcrawl queries
-
- `gitcrawl --json search issues -R openclaw/openclaw "models list model selection fallback auth profile provider"` returned #59168 on using `provider/name` as the internal model key, #83954 on Pro-plan paths for `gpt-5.5-pro` and retired Spark via Codex CLI/app-server, and #70055 on disabling external CLI sync for auth profiles via config.
- `gitcrawl --json search issues -R openclaw/openclaw "No API key found provider openai-codex auth profile"` returned stale route and Codex OAuth issues including #86470 on doctor rewriting `openai-codex/*` to `openai/*`, #83223 on migrated routes still looking up `openai-codex` auth before fallback, and #86820 on compaction falling back to direct OpenAI API.
- `gitcrawl --json search issues -R openclaw/openclaw "rate limit fallback usage limit openai-codex"` returned #85103 on provider-wide quota fallback not triggering, #87467 on auto rate-limit fallback staying pinned after primary recovery, #79604 on rotating auth profiles before provider fallback, and #79611 on active-memory plugin failover.
-
-### Discrawl queries
-
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "models list provider routing fallback"` returned a May 16 beta announcement emphasizing Codex app-server reliability, progress timeouts, compaction handling, tool policy enforcement, OAuth fallback, local/custom providers, and guidance on Ollama provider pressure, vision routing, CLI crashes, and per-turn model routing with `before_model_resolve`.
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "openai-codex provider routing"` returned maintainer notes around OpenAI OAuth/Codex routing, `openai-codex` being load-bearing in auth profile resolution, compaction routing, context config, auth order, and stale persisted route state.
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "model fallback decision"` returned recent discussions about openai-codex timeouts, fallback decisions, No API key fallback decisions, OpenRouter timeouts, and session repair loops.
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/provider-auth-profiles-and-credential-health.md
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/provider-auth-profiles-and-credential-health.md
@@ -1,113 +0,0 @@
---
-title: "Agent Runtime - Provider Auth Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Agent Runtime - Provider Auth Maturity Note
-
-## Summary
-
-Provider auth is broad enough to cover setup, selection, health checks, fallback, and operator-facing diagnostics in one category. Docs explain API keys, OAuth, provider/auth-profile fallbacks, status and probe output, stale-route repair, and restart guidance. Source validates provider/profile compatibility, carries fallback candidate state, classifies structured provider failures, and formats missing-key, OAuth-refresh, capacity, and restart recovery guidance. Quality remains Alpha because archive evidence still shows repeated operator failures around Codex OAuth route repair, profile propagation, quota fallback semantics, sticky fallback state, and provider key discovery.
-
-## Category Scope
-
-This category covers provider credentials, auth profile health, and operator-visible provider recovery behavior: login and paste-key flows, provider auth profile selection, doctor and status repair, auth failover, provider fallback chains, quota and capacity recovery, missing-key and OAuth guidance, restart and stale-route hints, structured diagnostics, subagent credential propagation, and credential-related runtime errors.
-
-## Features
-
- Login and API-key setup: Login, OAuth, and paste-key flows for provider access.
- Auth profile selection: Selecting and validating provider auth profiles.
- Credential health checks: Doctor, status, and related credential health checks and repair signals.
- Auth failover: Same-provider and cross-profile auth fallback behavior.
- Provider fallback recovery: Provider and auth-profile fallback behavior when execution fails.
- Rate-limit and capacity recovery: Recovery paths for quota, capacity, and rate-limit failures.
- Missing-key and OAuth guidance: Operator guidance for missing keys, expired OAuth state, and related auth failures.
- Restart and stale-route recovery: Recovery from stale route state, restart requirements, and related provider drift.
- Structured provider diagnostics: Structured provider errors and diagnostics delivered into logs or agent replies.
- Subagent credential propagation: Propagating provider credentials into subagent and delegated runtime flows.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (80%)`
-
-Coverage is strong for OpenAI/Codex, Anthropic, Google, model commands, fallback state, and operator-facing recovery copy, but provider auth and diagnostics still span many flows and are not yet represented by a single end-to-end operator proof matrix.
-
-## Quality Score
-
- Score: `Alpha (66%)`
-
-Auth/profile behavior remains a frequent operational pain point, especially where Codex OAuth, direct OpenAI API routes, compaction, subagents, doctor repair, and quota fallback behavior overlap.
-
-## Completeness Score
-
- Score: `Stable (80%)`
- Surface instructions: evaluated against `references/completeness/agent-runtime-and-provider-execution.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Login and API-key setup, Auth profile selection, Credential health checks, Auth failover, Provider fallback recovery, Rate-limit and capacity recovery, Missing-key and OAuth guidance, Restart and stale-route recovery, Structured provider diagnostics, Subagent credential propagation.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Codex OAuth route repair still produces recent open GitHub and Discord reports.
- Subagent and compaction flows can lose or reinterpret auth profile state.
- Quota-wide and account-specific provider failures need clearer fallback semantics.
- Recovery from stale `openai-codex` route state still depends on doctor repair and explicit guidance.
- Some missing-key and fallback diagnostics are strong in tests but still too hard for operators to map to root cause.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/cli/models.md` documents model status/auth overview, Codex OAuth troubleshooting, auth profile listing, login, paste-api-key, OpenAI API versus ChatGPT/OAuth, and Claude CLI notes.
- `/Users/kevinlin/code/openclaw/docs/concepts/models.md` documents primary model selection, fallbacks, provider auth failover, auto fallback selections, strict user selections, and live model switching.
- `/Users/kevinlin/code/openclaw/docs/cli/agent.md` documents gateway fallback behavior, embedded fallback metadata, gateway timeout fallback session/run id, and SIGTERM/SIGINT `chat.abort`.
- `/Users/kevinlin/code/openclaw/docs/providers/openai.md` documents `openai`, `openai-codex`, Codex plugin and `agentRuntime` naming, OpenAI/Codex route selection, Codex OAuth setup, and doctor repair behavior.
- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` documents API key versus Claude CLI authentication and canonical Anthropic refs with `agentRuntime.id: "claude-cli"`.
- `/Users/kevinlin/code/openclaw/docs/providers/google.md` documents Google plugin auth, Gemini CLI OAuth setup, and warning/alias behavior.
- `/Users/kevinlin/code/openclaw/docs/tools/subagents.md` documents subagent auth resolution by agent id and fallback to main profiles.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/model-selection.ts` validates auth profile overrides against accepted auth providers, clears invalid overrides, and handles stale legacy `openai-codex` state.
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.ts` carries fallback candidate auth profile state, applies live model switch auth changes, preserves same-provider auth fallback, and drops auth profile ids when switching providers.
- `/Users/kevinlin/code/openclaw/src/llm/providers/openai-codex-responses.ts` classifies retryable errors and configures timeout/retry behavior for Codex Responses transport.
- `/Users/kevinlin/code/openclaw/src/agents/transport-stream-shared.ts` builds structured failure streams with error details.
- `/Users/kevinlin/code/openclaw/src/commands/auth-choice.apply.api-providers.test.ts` maps API key/token provider choices for auth flows.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/src/commands/models.list.e2e.test.ts` covers catalog auth/status presentation for providers.
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` covers OAuth refresh failure guidance, missing API key guidance, stale `openai-codex` missing-key failures pointing at doctor repair, auth profile state on retries, provider-switch auth profile dropping, and same-provider auth fallback.
- `/Users/kevinlin/code/openclaw/src/commands/models.set.e2e.test.ts` covers fallback normalization in model command behavior.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/src/commands/auth-choice.apply.api-providers.test.ts` covers auth choice mapping for API key/token providers.
- `/Users/kevinlin/code/openclaw/src/commands/models.auth.provider-resolution.test.ts` covers provider auth resolution for model commands.
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` includes focused auth-profile regression coverage.
- `/Users/kevinlin/code/openclaw/src/llm/providers/openai-codex-responses.test.ts` covers transport timeouts and websocket/SSE behavior feeding retry decisions.
- `/Users/kevinlin/code/openclaw/src/agents/transport-stream-shared.test.ts` covers failure cleanup and non-empty failure streams.
-
-### Gitcrawl queries
-
- `gitcrawl --json search issues -R openclaw/openclaw "No API key found provider openai-codex auth profile"` returned many active issues, including #84252 on doctor/status leaving `openai-codex` OAuth sidecar auth partially repaired, #87677 on memory embeddings through Codex OAuth runtime, #86470 on doctor rewriting `openai-codex/*` to `openai/*`, #85797 on image generation requiring an API key despite OAuth, #86820 on compaction falling back to direct OpenAI API, #87051 on OAuth profile not propagating to subagent sessions, #83223 on migrated routes still looking up `openai-codex` auth before fallback, and #80171 on runtime parity QA.
- `gitcrawl --json search issues -R openclaw/openclaw "openai-codex Anthropic Google provider tool call"` returned #80667 on missing `trajectory.jsonl` for pure `claude-cli` sessions and #78196 on extension plugin loader behavior.
- `gitcrawl --json search issues -R openclaw/openclaw "provider error guidance reauth fallback"` returned no direct matches.
- `gitcrawl --json search issues -R openclaw/openclaw "rate limit fallback usage limit openai-codex"` returned #85103 on model fallback chain not triggering for provider-wide quota exhaustion, #87467 on auto rate-limit fallback staying pinned to fallback after primary recovery, #79604 on rotating auth profiles within a candidate before next provider, and #79611 on active-memory plugin provider failover and timeout.
- `gitcrawl --json search prs -R openclaw/openclaw "provider error descriptors fallback rate limit"` returned #86642 adding structured provider error descriptors.
- `gitcrawl --json search prs -R openclaw/openclaw "agent runner fallback model switch"` returned PRs including #85235 on message-tool-only diagnostics, #80482 on cooldown inline API key billing failures, #62682 on terminal abort versus retryable failures, and #86089 on restart recovery replies.
-
-### Discrawl queries
-
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "No API key found provider openai-codex"` returned May 2026 reports around OpenAI OAuth/Codex routing, plugin errors with `No API key found for provider "openai-codex"`, existing Codex auth no longer recognized after rebuild, direct API routing failures, and users seeing missing OpenAI keys despite Codex OAuth.
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "reauth provider auth profile"` returned Codex auth refresh/persistence reports, scope issues, stale auth order, token rotation failures, and older reauth command confusion.
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "openai-codex provider routing"` returned maintainer/user notes about auth profile resolution, compaction routing, context config, auth order, stale route state, and doctor repair guidance.
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "usage limit fallback openai-codex"` returned discussions about Claude CLI usage/billing fallback losing context, multi-account Codex OAuth failover, OpenAI rate limit auth/provider guidance, Codex backend challenge/limit paths, rate-limit/account-id errors, fallback configs, and model failover being blocked or sticky.
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "model fallback decision"` returned recent fallback decision logs for openai-codex timeouts, No API key cases, OpenRouter timeouts, missing bearer errors, Anthropic empty responses, and session repair loops.
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/report.md
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/report.md
@@ -1,328 +0,0 @@
---
-title: "Agent Runtime Maturity Report"
-version: 3
-last_refreshed: 2026-05-31
-last_refreshed_by: codex
---
-
-# Agent Runtime Maturity Report
-
-## Top-level scores
-
-These rollups are simple arithmetic means over the category-note numeric
-scores in
-`scores.yaml`. Percentages are rounded to the nearest whole number.
-
- Coverage: `Stable (80%)`
- Quality: `Alpha (69%)`
- Completeness: `Stable (80%)`
- LTS Features: `6/9`
-
-## Summary
-
-This report promotes the archived `agent-runtime-and-provider-execution` maturity evidence from `/Users/kevinlin/tmp/maturity/agent-runtime-and-provider-execution` into the current process-version-3 inventory contract.
-
-The category Coverage and Quality scores come from the archived evidence-backed score rows. Completeness is initialized from the same archived evidence breadth and known-gap record, then joined with the surface-specific completeness rubric referenced by taxonomy.
-
-## Matrix
-
-| Category                                                                              | LTS | Coverage       | Quality       | Completeness   | Features to evaluate                                                                                                                                                                                                                                                                       |
-| ------------------------------------------------------------------------------------- | --- | -------------- | ------------- | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| [Agent Turn Execution](agent-turn-orchestration-and-runtime-lifecycle.md)             | ✅  | `Stable (82%)` | `Beta (74%)`  | `Stable (82%)` | Turn startup and runtime choice, Session and run coordination, Abort and terminal outcomes                                                                                                                                                                                                 |
-| [External Runtimes and Subagents](cli-harnesses-external-runtimes-and-subagents.md)   | ❌  | `Beta (78%)`   | `Alpha (66%)` | `Beta (78%)`   | External harness selection, CLI runtime aliases, Subagent turns, Runtime recovery                                                                                                                                                                                                          |
-| [Hosted Provider Execution](hosted-provider-adapters-and-payload-compatibility.md)    | ✅  | `Beta (76%)`   | `Beta (70%)`  | `Beta (76%)`   | Hosted provider turns, Provider-specific model options, Hosted tool use, Reasoning and cache controls, Hosted streaming and replies                                                                                                                                                        |
-| [Local and Self-hosted Providers](local-and-self-hosted-provider-execution.md)        | ❌  | `Beta (70%)`   | `Alpha (60%)` | `Beta (70%)`   | Local provider profiles, Tool-capability flags, Timeouts and context windows, Local smoke checks, Local failure handling                                                                                                                                                                   |
-| [Model and Runtime Selection](model-selection-provider-routing-and-runtime-policy.md) | ✅  | `Stable (84%)` | `Beta (72%)`  | `Stable (84%)` | Model reference selection, Provider and runtime overrides, Thinking and context settings, Invalid route recovery                                                                                                                                                                           |
-| [Provider Auth](provider-auth-profiles-and-credential-health.md)                      | ✅  | `Stable (80%)` | `Alpha (66%)` | `Stable (80%)` | Login and API-key setup, Auth profile selection, Credential health checks, Auth failover, Provider fallback recovery, Rate-limit and capacity recovery, Missing-key and OAuth guidance, Restart and stale-route recovery, Structured provider diagnostics, Subagent credential propagation |
-| [Streaming and Progress](streaming-progress-and-preview-visibility.md)                | ❌  | `Stable (84%)` | `Beta (70%)`  | `Stable (84%)` | Streaming replies, Progress visibility                                                                                                                                                                                                                                                     |
-| [Tool Calls and Response Handling](streaming-tool-call-and-response-normalization.md) | ✅  | `Stable (80%)` | `Alpha (66%)` | `Stable (80%)` | Tool-call handling, Usage and response reporting, Failure recovery                                                                                                                                                                                                                         |
-| [Tool Execution Controls](tool-execution-approvals-and-sandbox-policy.md)             | ✅  | `Stable (86%)` | `Beta (74%)`  | `Stable (86%)` | Tool availability rules, Sandboxed exec behavior, Approval flow, Elevated execution, Tool safety controls, Delegated tool access                                                                                                                                                           |
-
-## Scoring rubric
-
- Coverage:
-  maturity-label rating for integration, e2e, live, or server/runtime flow
-  evidence across the category. Unit tests can provide supporting context but never make a
-  feature covered by themselves.
- Quality:
-  maturity-label rating for implementation and operational robustness. Unit,
-  integration, e2e, live, and real runtime-flow test coverage are Coverage
-  inputs only; they do not raise or lower Quality.
- Completeness:
-  maturity-label rating for how fully the category delivers the intended
-  surface-specific capability set. Use the taxonomy-linked completeness
-  instructions for this surface.
- LTS:
-  calculated as `quality > 80 and coverage > 90`, or when the matching
-  taxonomy category sets `human_lts_override`.
- Shared score bands:
-  `Lovable = 95-100`, `Stable = 80-95`, `Beta = 70-80`,
-  `Alpha = 50-70`, and `Experimental = 0-50`. At shared boundaries, choose the
-  higher maturity label.
- Major quality/completeness gaps:
-  evidence text only, tracked in the detailed feature inventory rather than as a
-  separate scored dimension.
-
-## Detailed feature inventory
-
-### 1. Agent Turn Execution
-
-Search anchors: agent RPC shape and event stream, runAgentTurnWithFallback, agent.wait timeout and terminal outcomes.
-
-Category note: [Agent Turn Execution](agent-turn-orchestration-and-runtime-lifecycle.md)
-
-Score decisions:
-
- Coverage: `Stable (82%)`
- Quality: `Beta (74%)`
- Completeness: `Stable (82%)`
- LTS: ✅
-
-Features:
-
- Turn startup and runtime choice: Starting an agent turn and choosing gateway versus embedded runtime execution.
- Session and run coordination: Establishing session and run ids, queue locks, and related execution coordination.
- Abort and terminal outcomes: Honoring aborts, timing provider/model work, and emitting terminal outcomes.
-
-Primary docs:
-
- `docs/concepts/agent-loop.md`
- `docs/cli/agent.md`
- `docs/concepts/agent-runtimes.md`
-
-### 2. External Runtimes and Subagents
-
-Search anchors: agent runtimes, subagent turns, CLI runtime aliases.
-
-Category note: [External Runtimes and Subagents](cli-harnesses-external-runtimes-and-subagents.md)
-
-Score decisions:
-
- Coverage: `Beta (78%)`
- Quality: `Alpha (66%)`
- Completeness: `Beta (78%)`
- LTS: ❌
-
-Features:
-
- External harness selection: Choosing Codex app-server, ACP, and other external runtime harnesses.
- CLI runtime aliases: Runtime aliases and CLI-based execution paths such as Claude CLI and Gemini CLI.
- Subagent turns: Spawning, delivering, and announcing subagent work outside the default embedded path.
- Runtime recovery: Cleanup, timeout, and liveness behavior for external runtimes and subagents.
-
-Primary docs:
-
- `docs/concepts/agent-runtimes.md`
- `docs/providers/anthropic.md`
- `docs/providers/google.md`
- `docs/tools/subagents.md`
-
-### 3. Hosted Provider Execution
-
-Search anchors: hosted provider turns, provider-specific model options, streaming reply normalization.
-
-Category note: [Hosted Provider Execution](hosted-provider-adapters-and-payload-compatibility.md)
-
-Score decisions:
-
- Coverage: `Beta (76%)`
- Quality: `Beta (70%)`
- Completeness: `Beta (76%)`
- LTS: ✅
-
-Features:
-
- Hosted provider turns: Running agent turns against hosted providers such as OpenAI, Anthropic, and Google.
- Provider-specific model options: Provider-specific model parameters and runtime request settings exposed to users or operators.
- Hosted tool use: Tool use behavior when the active runtime is a hosted provider.
- Reasoning and cache controls: Provider-specific reasoning, thinking, and cache-related controls during hosted execution.
- Hosted streaming and replies: Operator-visible streaming and reply behavior while hosted adapters normalize payload differences.
-
-Primary docs:
-
- `docs/providers/openai.md`
- `docs/providers/anthropic.md`
- `docs/providers/google.md`
- `docs/concepts/models.md`
-
-### 4. Local and Self-hosted Providers
-
-Search anchors: Ollama local provider profiles, OpenAI-compatible local servers, local smoke checks.
-
-Category note: [Local and Self-hosted Providers](local-and-self-hosted-provider-execution.md)
-
-Score decisions:
-
- Coverage: `Beta (70%)`
- Quality: `Alpha (60%)`
- Completeness: `Beta (70%)`
- LTS: ❌
-
-Features:
-
- Local provider profiles: Local model profile configuration for Ollama and OpenAI-compatible local servers.
- Tool-capability flags: Local provider capability flags and behavior for tool use.
- Timeouts and context windows: Local provider timeout and context-window configuration.
- Local smoke checks: Local image and model smoke checks visible to operators.
- Local failure handling: Operator-facing failure handling for local and self-hosted providers.
-
-Primary docs:
-
- `docs/providers/ollama.md`
- `docs/concepts/models.md`
- `docs/cli/agent.md`
-
-### 5. Model and Runtime Selection
-
-Search anchors: model reference selection, runtime overrides, thinking and context settings.
-
-Category note: [Model and Runtime Selection](model-selection-provider-routing-and-runtime-policy.md)
-
-Score decisions:
-
- Coverage: `Stable (84%)`
- Quality: `Beta (72%)`
- Completeness: `Stable (84%)`
- LTS: ✅
-
-Features:
-
- Model reference selection: Selecting the model reference for an agent turn from user or configured defaults.
- Provider and runtime overrides: Handling provider selection and runtime overrides for a turn.
- Thinking and context settings: Resolving thinking and context settings as part of model selection.
- Invalid route recovery: Preserving or clearing invalid route state when selections drift or fail.
-
-Primary docs:
-
- `docs/concepts/models.md`
- `docs/cli/models.md`
- `docs/providers/openai.md`
- `docs/concepts/agent-runtimes.md`
-
-### 6. Provider Auth
-
-Search anchors: login and API-key setup, auth profile selection, provider fallback recovery.
-
-Category note: [Provider Auth](provider-auth-profiles-and-credential-health.md)
-
-Score decisions:
-
- Coverage: `Stable (80%)`
- Quality: `Alpha (66%)`
- Completeness: `Stable (80%)`
- LTS: ✅
-
-Features:
-
- Login and API-key setup: Login, OAuth, and paste-key flows for provider access.
- Auth profile selection: Selecting and validating provider auth profiles.
- Credential health checks: Doctor, status, and related credential health checks and repair signals.
- Auth failover: Same-provider and cross-profile auth fallback behavior.
- Provider fallback recovery: Provider and auth-profile fallback behavior when execution fails.
- Rate-limit and capacity recovery: Recovery paths for quota, capacity, and rate-limit failures.
- Missing-key and OAuth guidance: Operator guidance for missing keys, expired OAuth state, and related auth failures.
- Restart and stale-route recovery: Recovery from stale route state, restart requirements, and related provider drift.
- Structured provider diagnostics: Structured provider errors and diagnostics delivered into logs or agent replies.
- Subagent credential propagation: Propagating provider credentials into subagent and delegated runtime flows.
-
-Primary docs:
-
- `docs/concepts/models.md`
- `docs/cli/agent.md`
- `docs/cli/models.md`
- `docs/providers/openai.md`
- `docs/providers/anthropic.md`
- `docs/providers/google.md`
- `docs/tools/subagents.md`
-
-### 7. Streaming and Progress
-
-Search anchors: streaming replies, progress visibility, event delivery.
-
-Category note: [Streaming and Progress](streaming-progress-and-preview-visibility.md)
-
-Score decisions:
-
- Coverage: `Stable (84%)`
- Quality: `Beta (70%)`
- Completeness: `Stable (84%)`
- LTS: ❌
-
-Features:
-
- Streaming replies: Streaming block updates and partial assistant output before final delivery.
- Progress visibility: Progress preview events and item lifecycle updates surfaced during execution.
-
-Primary docs:
-
- `docs/concepts/streaming.md`
- `docs/concepts/agent-loop.md`
-
-### 8. Tool Calls and Response Handling
-
-Search anchors: tool-call handling, usage reporting, failure recovery.
-
-Category note: [Tool Calls and Response Handling](streaming-tool-call-and-response-normalization.md)
-
-Score decisions:
-
- Coverage: `Stable (80%)`
- Quality: `Alpha (66%)`
- Completeness: `Stable (80%)`
- LTS: ✅
-
-Features:
-
- Tool-call handling: Reliable tool-call behavior across providers, including malformed or provider-specific payload differences.
- Usage and response reporting: Response ids and usage accounting normalized into operator-visible runtime behavior.
- Failure recovery: Failure-stream finalization and cleanup when provider output is malformed or incomplete.
-
-Primary docs:
-
- `docs/concepts/agent-loop.md`
- `docs/providers/ollama.md`
-
-### 9. Tool Execution Controls
-
-Search anchors: tool availability rules, sandboxed exec behavior, approval flow.
-
-Category note: [Tool Execution Controls](tool-execution-approvals-and-sandbox-policy.md)
-
-Score decisions:
-
- Coverage: `Stable (86%)`
- Quality: `Beta (74%)`
- Completeness: `Stable (86%)`
- LTS: ✅
-
-Features:
-
- Tool availability rules: Which tools are available during a turn after policy resolution and provider-based suppression.
- Sandboxed exec behavior: Exec behavior, sandbox roots, and workspace constraints visible to operators.
- Approval flow: Operator approval gates for tool execution.
- Elevated execution: Elevated host execution rules and related controls.
- Tool safety controls: Before-tool-call hooks and related guardrails that shape operator-visible tool behavior.
- Delegated tool access: Inherited or narrowed tool policy for subagents and delegated execution.
-
-Primary docs:
-
- `docs/gateway/sandbox-vs-tool-policy-vs-elevated.md`
- `docs/concepts/agent-loop.md`
- `docs/tools/subagents.md`
-
-## Recommended scorecard interpretation
-
-Use this migrated score as the current inventory baseline. Refresh individual categories with live category-agent research before treating a high score as an LTS promotion gate.
-
-## Out of scope for this surface
-
- Redefining taxonomy category boundaries; taxonomy remains the source of truth for category identity, features, docs, and search anchors.
-
-## Audit provenance
-
- Score source:
-  `docs/kevinslin/maturity-scorecard/inventory/agent-runtime-and-provider-execution/scores.yaml`.
- Taxonomy metadata source:
-  `.agents/skills/claw-score/taxonomy.yaml`.
- Archived evidence source:
-  `/Users/kevinlin/tmp/maturity/agent-runtime-and-provider-execution`.
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/scores.yaml
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/scores.yaml
@@ -1,48 +0,0 @@
-version: 1
-process_version: 3
-data:
-  - name: Agent Turn Execution
-    category_note: agent-turn-orchestration-and-runtime-lifecycle.md
-    coverage: 82
-    quality: 74
-    completeness: 82
-  - name: External Runtimes and Subagents
-    category_note: cli-harnesses-external-runtimes-and-subagents.md
-    coverage: 78
-    quality: 66
-    completeness: 78
-  - name: Hosted Provider Execution
-    category_note: hosted-provider-adapters-and-payload-compatibility.md
-    coverage: 76
-    quality: 70
-    completeness: 76
-  - name: Local and Self-hosted Providers
-    category_note: local-and-self-hosted-provider-execution.md
-    coverage: 70
-    quality: 60
-    completeness: 70
-  - name: Model and Runtime Selection
-    category_note: model-selection-provider-routing-and-runtime-policy.md
-    coverage: 84
-    quality: 72
-    completeness: 84
-  - name: Provider Auth
-    category_note: provider-auth-profiles-and-credential-health.md
-    coverage: 80
-    quality: 66
-    completeness: 80
-  - name: Streaming and Progress
-    category_note: streaming-progress-and-preview-visibility.md
-    coverage: 84
-    quality: 70
-    completeness: 84
-  - name: Tool Calls and Response Handling
-    category_note: streaming-tool-call-and-response-normalization.md
-    coverage: 80
-    quality: 66
-    completeness: 80
-  - name: Tool Execution Controls
-    category_note: tool-execution-approvals-and-sandbox-policy.md
-    coverage: 86
-    quality: 74
-    completeness: 86
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/streaming-progress-and-preview-visibility.md
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/streaming-progress-and-preview-visibility.md
@@ -1,84 +0,0 @@
---
-title: "Agent Runtime - Streaming and Progress Maturity Note"
-version: 3
-last_refreshed: 2026-05-31
-last_refreshed_by: codex
---
-
-# Agent Runtime - Streaming and Progress Maturity Note
-
-## Summary
-
-Streaming and progress visibility are well covered in docs and tests: OpenClaw separates provider/runtime streaming from channel delivery, documents block and preview streaming modes, and surfaces tool-progress and item lifecycle updates before final delivery. Coverage is Stable. Quality is Beta because runtime and channel differences still produce missing progress updates, suppressed previews, and occasional terminal-update confusion.
-
-## Category Scope
-
-This category covers operator-visible streaming and progress behavior before
-final delivery: streaming replies, preview and block streaming modes, and
-progress visibility through tool-progress or item lifecycle updates.
-
-## Features
-
- Streaming replies: Streaming block updates and partial assistant output before final delivery.
- Progress visibility: Progress preview events and item lifecycle updates surfaced during execution.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (84%)`
-
-Coverage is strong across streaming docs, agent-loop docs, event plumbing, and focused tests for preview updates, item lifecycle events, duplicate progress suppression, and terminal delivery behavior.
-
-## Quality Score
-
- Score: `Beta (70%)`
-
-Streaming and progress behavior is broadly solid, but field reports still show runtime- and channel-specific differences around progress callbacks, terminal updates, and preview behavior.
-
-## Completeness Score
-
- Score: `Stable (84%)`
- Surface instructions: evaluated against `references/completeness/agent-runtime-and-provider-execution.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Streaming replies, Progress visibility.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived combined category.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Some progress visibility behavior still differs by runtime and channel.
- Native progress callbacks can still be suppressed or delayed in some flows.
- Terminal updates after long-running tool activity still need more consistent operator-facing behavior.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/concepts/streaming.md` documents the two streaming layers, block streaming, preview modes, channel mapping, runtime behavior, and tool-progress preview updates.
- `/Users/kevinlin/code/openclaw/docs/concepts/agent-loop.md` documents assistant and tool event streams, block streaming behavior, event-stream shapes, and session timeout semantics.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.ts` bridges assistant and tool events into previews, tracks item lifecycle events, suppresses duplicate progress, and handles terminal streaming text before final delivery.
- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.ts` emits streaming assistant and tool events, including fine-grained tool streaming behavior.
- `/Users/kevinlin/code/openclaw/src/llm/providers/google-shared.ts` emits assistant, thinking, and tool-call stream events that feed operator-visible progress and streaming behavior.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` covers CLI assistant event previews, item lifecycle events, duplicate progress skipping, raw tool progress details, tool-start progress before slow typing, and Codex app-server telemetry.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/src/llm/providers/google-shared.test.ts` covers streamed text, thinking, and tool event projection.
- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.test.ts` covers streaming behavior that affects signed thinking replay and provider event handling.
-
-### Gitcrawl queries
-
- `gitcrawl --json search issues -R openclaw/openclaw "tool call streaming truncated tool_call provider"` returned #60593 on recurring Anthropic streaming JSON parse errors, #70033 on tool calls emitting empty `{}` arguments for large content, and #87711 on empty assistant delivery.
-
-### Discrawl queries
-
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "tool call streaming"` returned May 2026 discussions about `/verbose off` suppressing native progress callbacks, missing terminal updates, provider streaming/tool-call wrapping, and tool-call visibility modes.
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/streaming-tool-call-and-response-normalization.md
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/streaming-tool-call-and-response-normalization.md
@@ -1,93 +0,0 @@
---
-title: "Agent Runtime - Tool Calls and Response Handling Maturity Note"
-version: 3
-last_refreshed: 2026-05-31
-last_refreshed_by: codex
---
-
-# Agent Runtime - Tool Calls and Response Handling Maturity Note
-
-## Summary
-
-Tool-call handling and response normalization are well covered in docs and tests: OpenClaw normalizes malformed tool-call arguments, provider-specific payload differences, usage accounting, and terminal failure streams across adapters and shared transport code. Coverage is Stable. Quality is Alpha because recent archive evidence still shows empty tool arguments, raw tool JSON or visible tool-call blocks, and terminal-empty assistant delivery that still leaks surprising states to users.
-
-## Category Scope
-
-This category covers operator-visible tool-call and response-handling behavior:
-reliable tool-call payload handling across providers, usage and response
-reporting, and recovery when provider output is malformed, empty, or
-incomplete.
-
-## Features
-
- Tool-call handling: Reliable tool-call behavior across providers, including malformed or provider-specific payload differences.
- Usage and response reporting: Response ids and usage accounting normalized into operator-visible runtime behavior.
- Failure recovery: Failure-stream finalization and cleanup when provider output is malformed or incomplete.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (80%)`
-
-Coverage is strong across provider adapters, shared transport code, and focused tests for tool payload coercion, response ids and usage, malformed provider output, and failure finalization.
-
-## Quality Score
-
- Score: `Alpha (66%)`
-
-Normalization is robust in source, but field reports show provider tool-call output and malformed responses still leak surprising states to users.
-
-## Completeness Score
-
- Score: `Stable (80%)`
- Surface instructions: evaluated against `references/completeness/agent-runtime-and-provider-execution.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Tool-call handling, Usage and response reporting, Failure recovery.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Empty or malformed tool-call arguments still emerge in provider-specific edge cases.
- Raw tool JSON or visible tool-call blocks still appear in some local or compatibility modes.
- Terminal-empty and tool-only responses still need more consistent operator-facing explanation.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/concepts/agent-loop.md` documents tool-event streams, final payload behavior, and timeout behavior around the agent loop.
- `/Users/kevinlin/code/openclaw/docs/providers/ollama.md` documents native tool-calling, OpenAI-compatible mode reliability warnings, raw tool JSON as text, garbled output handling, and tool-calling compatibility caveats.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/src/agents/transport-stream-shared.ts` coerces transport tool-call arguments, merges headers and metadata, finalizes failure streams, and normalizes transport error details.
- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.ts` normalizes streamed tool ids and partial JSON tool arguments for Anthropic responses.
- `/Users/kevinlin/code/openclaw/src/llm/providers/google-shared.ts` normalizes tool call ids, tool arguments, response ids, and usage accounting for Google-family providers.
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.ts` formats terminal empty/tool-only outcomes and bridges normalized tool results into operator-visible replies.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` covers media-only tool results, plan-only terminal result fallback, terminal-empty result classification, stripping glued leading `NO_REPLY` tokens, streamed tool results delivery, and tool-only outcome handling.
- `/Users/kevinlin/code/openclaw/src/agents/agent-tools.before-tool-call.integration.e2e.test.ts` covers hook-driven tool parameter modification, blocking, deduplication, and context around tool-call execution.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/src/agents/transport-stream-shared.test.ts` covers surrogate sanitization, non-empty tool payload text, header propagation, successful stream finalization, and failure cleanup.
- `/Users/kevinlin/code/openclaw/src/llm/providers/google-shared.test.ts` covers tool call projection, response ids, and usage.
- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.test.ts` covers signed thinking replay and provider behavior that affects tool payload normalization.
-
-### Gitcrawl queries
-
- `gitcrawl --json search issues -R openclaw/openclaw "tool call streaming truncated tool_call provider"` returned #60593 on recurring Anthropic streaming JSON parse errors, #70033 on tool calls emitting empty `{}` arguments for large content, and #87711 on empty assistant delivery.
- `gitcrawl --json search issues -R openclaw/openclaw "openai-codex Anthropic Google provider tool call"` returned adapter-adjacent issues for `claude-cli` session artifacts and extension plugin loading.
- `gitcrawl --json search prs -R openclaw/openclaw "provider error descriptors fallback rate limit"` returned #86642, which improves structured provider error descriptors feeding normalized runtime errors.
-
-### Discrawl queries
-
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "tool call streaming"` returned May 2026 discussions about provider streaming/tool-call wrapping, visible tool-call blocks, Claude CLI/WebChat tool visibility, and missing terminal updates.
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "Ollama tool calling OpenClaw"` returned reports and guidance on raw tools printed as text and model/tool-calling compatibility.
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "model fallback decision"` returned fallback logs where missing or empty provider output contributed to operator-visible failure paths.
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/tool-execution-approvals-and-sandbox-policy.md
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/tool-execution-approvals-and-sandbox-policy.md
@@ -1,94 +0,0 @@
---
-title: "Agent Runtime - Tool Execution Controls Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Agent Runtime - Tool Execution Controls Maturity Note
-
-## Summary
-
-Tool execution policy is the strongest component in this surface. Docs distinguish sandboxing, tool policy, and elevated approvals; source centralizes tool registration, workspace/sandbox roots, inherited/subagent policy, exec config, schema normalization, and before-tool-call hooks; tests cover approval gates, policy hooks, subagent tool restrictions, and progress behavior. Quality is Beta because archive evidence still shows edge cases around exec approval forwarding, per-agent deny rules, sandbox backend expectations, and plugin/service boundary assumptions.
-
-## Category Scope
-
-This category covers operator-visible control over tools during agent turns:
-tool availability rules, sandboxed exec behavior, approval flow, elevated
-execution, tool safety controls, and delegated tool access for subagents.
-
-## Features
-
- Tool availability rules: Which tools are available during a turn after policy resolution and provider-based suppression.
- Sandboxed exec behavior: Exec behavior, sandbox roots, and workspace constraints visible to operators.
- Approval flow: Operator approval gates for tool execution.
- Elevated execution: Elevated host execution rules and related controls.
- Tool safety controls: Before-tool-call hooks and related guardrails that shape operator-visible tool behavior.
- Delegated tool access: Inherited or narrowed tool policy for subagents and delegated execution.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (86%)`
-
-Coverage is strong across docs, source, e2e tests, unit tests, and archive evidence for policy and approval behavior.
-
-## Quality Score
-
- Score: `Beta (74%)`
-
-The design is mature, but policy semantics remain subtle for users and operators when CLI backends, subagents, plugin services, and elevated execution overlap.
-
-## Completeness Score
-
- Score: `Stable (86%)`
- Surface instructions: evaluated against `references/completeness/agent-runtime-and-provider-execution.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Tool availability rules, Sandboxed exec behavior, Approval flow, Elevated execution, Tool safety controls, Delegated tool access.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Operator docs explain the policy layers, but field reports still show confusion about what sandboxing does and does not constrain.
- CLI backend approval forwarding is not as settled as the main embedded runtime path.
- Per-agent and inherited policy behavior needs continued regression proof as subagents expand.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/gateway/sandbox-vs-tool-policy-vs-elevated.md` distinguishes sandbox, tool policy, and elevated exec; documents tool policy layers/rules, tool groups, sandboxed MCP server allow gates, elevated exec-only gates, and sandbox jail fixes.
- `/Users/kevinlin/code/openclaw/docs/concepts/agent-loop.md` documents plugin hooks including `before_tool_call`, tool-call handling, and runtime event streams.
- `/Users/kevinlin/code/openclaw/docs/tools/subagents.md` documents subagent tool policy, tool restriction, auth resolution, announce behavior, delivery routing, concurrency, liveness, and recovery.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/src/agents/agent-tools.ts` implements model-provider tool policy, local model tool suppression, exec config merging, tool policy setup, group/sender/sandbox/subagent/inherited policy, workspace/sandbox roots, `apply_patch` restrictions, exec tool setup, the tool policy pipeline, schema normalization, and `before_tool_call` hook wrapping.
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.ts` forwards plan, approval, command output, and patch events through runtime delivery.
- `/Users/kevinlin/code/openclaw/src/agents/cli-runner.ts` persists approved CLI user turn transcripts and runs CLI hooks around backend execution.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec-gateway-approval.e2e.test.ts` covers gateway-hosted exec approvals on separate connections.
- `/Users/kevinlin/code/openclaw/src/agents/agent-tools.before-tool-call.integration.e2e.test.ts` covers normal `before_tool_call` behavior, parameter modification, blocking, deduplication, and context.
- `/Users/kevinlin/code/openclaw/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts` covers subagent lifecycle, cleanup, timeout handling, account routing, announce behavior, and policy-adjacent session behavior.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/src/agents/agent-tools.message-provider-policy.test.ts` covers provider-based message/tool policy behavior.
- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` covers approval, command output, patch event forwarding, tool progress details, and streamed tool result delivery.
- `/Users/kevinlin/code/openclaw/src/agents/transport-stream-shared.test.ts` covers safe transport behavior for tool payloads.
-
-### Gitcrawl queries
-
- `gitcrawl --json search issues -R openclaw/openclaw "exec approvals tool policy sandbox agent tool"` returned #44253 on per-agent `tools.selfDeny`, #69512 on forwarding `exec-approvals.json` allowlists to `claude-cli` backend sessions, #78965 on local user sandbox backend, #48532 on security by intent, #67440 on optional TOTP for exec approvals, #48503 on enriching `before_tool_call` events with action classification/input provenance, and #82548 on safety/quality observability events.
- `gitcrawl --json search issues -R openclaw/openclaw "claude-cli codex cli harness subagent sessions_spawn"` returned #73097 on PI harness ignoring `cliBackends` configuration and splitting subagent execution from chat path.
-
-### Discrawl queries
-
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "exec approvals tool policy"` returned May 2026 release testing notes covering auth/profile, sandbox policy, and exec approvals; discussions of node file fetch policy; explanations that denying `exec` at agent tool policy level does not sandbox plugins/services; comments that sandbox/tool-policy/exec-approval controls are useful but not solved defaults; and issue-closing comments for related controls.
- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "sessions_spawn claude-cli"` returned Claude CLI and ACP runtime discussions that affect tool permissions, sandbox boundaries, and subagent UX.
--- a/docs/maturity-scorecard/inventory/android-app/background-service-reconnect-presence.md
+++ b/docs/maturity-scorecard/inventory/android-app/background-service-reconnect-presence.md
@@ -1,109 +0,0 @@
---
-title: "Android app - Background Service, Reconnect, and Presence Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Android app - Background Service, Reconnect, and Presence Maturity Note
-
-## Summary
-
-Android background operation is implemented around a foreground service, persistent notification, reconnecting Gateway sessions, presence-alive beacons, notification-listener state, and foreground-service microphone type switching. Coverage is Alpha because docs and source cover the intended behavior but no live backgrounding scorecard was found. Quality is the weakest Android component: archive evidence includes a foreground-service crash issue and an active PR to avoid persistent `dataSync` foreground service use.
-
-## Category Scope
-
- `NodeForegroundService`, persistent notification, background reconnect, node presence beacons, notification listener state, Gateway session reconnect, and reconnect after app backgrounding.
- Out of scope: individual node command handlers except where foreground/background state changes command availability.
-
-## Features
-
- Background reconnect and presence: Foreground-service presence, reconnect, and node presence behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Alpha (62%)`
- Positive signals: Docs explicitly say Android keeps the Gateway connection alive through a foreground service, auto-reconnects after first pairing, and sends `node.presence.alive` when backgrounded while connected. Source implements foreground notification state, reconnect loop, presence beacon payload/skip logic, and reconnect tests.
- Negative signals: No live Android backgrounding scenario was found that proves app background, foreground-service notification, presence beacon handling, Gateway restart, network loss, and app relaunch together.
- Integration gaps: Need a real-device background/reconnect scorecard across Android 14/15 service restrictions, battery saver, network changes, Gateway restart, and Talk Mode microphone service promotion/demotion.
-
-## Quality Score
-
- Score: `Alpha (55%)`
- Gitcrawl reports: `ForegroundServiceStartNotAllowedException Android` found issue #64903 for Android app crashes on `NodeForegroundService startForeground` with `ForegroundServiceStartNotAllowedException` and PR #80082 `fix(android): avoid dataSync FGS for persistent node`.
- Discrawl reports: `Android foreground service reconnect presence` returned no direct hits. Broader support context under node capability searches notes mobile node background state causes Canvas/camera/screen failures.
- Good qualities: The service uses a low-importance persistent notification, updates title/text from runtime state, adds a Disconnect action, switches service type for Talk Mode, and presence beacon responses require `handled: true` before counting durable last-seen updates.
- Bad qualities: Android foreground-service policy is a live crash risk, persistent node operation touches OS service quotas, and docs do not provide a current operator recipe for battery optimization, service denial, or reconnect triage.
- Excluded from quality: Test coverage and runtime-flow proof were not used to raise or lower Quality.
-
-## Completeness Score
-
- Score: `Alpha (62%)`
- Surface instructions: evaluated against `references/completeness/android-app.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Background reconnect and presence.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Prove background reconnect and presence on current Android OS versions with real devices.
- Add operator guidance for foreground-service denial, battery saver, OEM background restrictions, and notification permission states.
- Clarify which commands intentionally fail while the app is backgrounded and how to recover.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/platforms/android.md` documents foreground service connection keepalive, auto-reconnect on launch, and presence alive beacons after authenticated node session connect and backgrounding.
- `/Users/kevinlin/code/openclaw/apps/android/README.md` lists authenticated background presence beacons and push notifications in the rebuild checklist.
- `/Users/kevinlin/code/openclaw/docs/nodes/troubleshooting.md` is linked as related Android node troubleshooting.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/NodeForegroundService.kt` starts a foreground service, maintains the persistent notification, exposes Disconnect, and promotes service types for Talk Mode.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/ConnectionManager.kt` builds node connect options, user agent, advertised capabilities, and TLS policy used on reconnect.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/NodePresenceAliveBeacon.kt` builds and decodes `node.presence.alive`, throttles recent successes, and sanitizes failure reasons.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/gateway/GatewaySession.kt` owns connect, disconnect, reconnect, pause-after-auth-failure, and current connection closure.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/DeviceNotificationListenerService.kt` tracks notification-listener connection state and emits `notifications.changed` events.
-
-### Integration tests
-
- No live background/reconnect Android scenario was found.
- `/Users/kevinlin/code/openclaw/src/gateway/android-node.capabilities.live.test.ts` requires the app to stay unlocked and foregrounded for capability execution, which highlights the missing background scenario.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/NodeForegroundServiceTest.kt` covers foreground-service notification/type behavior.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/gateway/GatewaySessionReconnectTest.kt` covers replacing active connections and reconnect pause behavior.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/node/NodePresenceAliveBeaconTest.kt` covers presence beacon payload/response helper behavior.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/node/ConnectionManagerTest.kt` covers connection manager behavior.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "ForegroundServiceStartNotAllowedException Android" --json`
-
-Results:
-
- Issue #64903 `Android app crashes on NodeForegroundService startForeground with ForegroundServiceStartNotAllowedException`.
- PR #80082 `fix(android): avoid dataSync FGS for persistent node`.
-
-### Discrawl queries
-
-Query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode fts --limit 5 "Android foreground service reconnect presence"`
-
-Results:
-
- No direct hits.
-
-Additional query context:
-
- `/Users/kevinlin/.local/bin/discrawl search --mode fts --limit 5 "Android node capabilities gateway commands"` found support guidance that mobile node Canvas/camera/screen commands fail when the app is backgrounded.
--- a/docs/maturity-scorecard/inventory/android-app/camera-media-capture.md
+++ b/docs/maturity-scorecard/inventory/android-app/camera-media-capture.md
@@ -1,111 +0,0 @@
---
-title: "Android app - Media Capture Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Android app - Media Capture Maturity Note
-
-## Summary
-
-Android media capture includes CameraX photo and clip capture, image payload resizing, camera HUD feedback, WebView Canvas/A2UI, photo-library access in the third-party flavor, and live capability checks for camera/canvas commands. Coverage is Alpha because the implementation is real but foreground-only and the Android camera docs are not fully aligned with the source command set. Quality is Alpha because permissions, payload limits, WebView readiness, Play flavor restrictions, and foreground state make the operator path fragile.
-
-## Category Scope
-
-Included in this category:
-
- Camera and media capture: Camera listing, capture, photo, screen, and media capture behavior.
-
-## Features
-
- Camera and media capture: Camera listing, capture, photo, screen, and media capture behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Alpha (66%)`
- Positive signals: Android docs cover Canvas and camera foreground commands; source implements CameraX photo/clip capture, permission prompting, payload guards, A2UI/WebView actions, and photo-library access where the flavor permits it. The live capability suite profiles `camera.list`, `camera.snap`, `camera.clip`, `canvas.*`, and `canvas.a2ui.*`.
- Negative signals: Live capability tests are preconditioned on a paired, foregrounded, unlocked app and require the Screen tab for Canvas/A2UI. Docs for shared camera commands only list Android `camera.list` in the Android section even though source and platform docs expose snap/clip.
- Integration gaps: Need a real-device Android media scorecard that keeps the app foregrounded, grants camera/mic/photo permissions, invokes front/back photo, short clip with/without audio, Canvas navigate/eval/snapshot, and records background failure behavior.
-
-## Quality Score
-
- Score: `Alpha (62%)`
- Gitcrawl reports: `camera.snap Android` found issue #87058 where Android node connected but advertised zero commands; the snippet notes `camera.snap`, `camera.clip`, and `canvas.*` are high-risk gated commands. `photos.latest Android` returned no direct hits.
- Discrawl reports: Search found January support messages describing Android node camera snap/clip, Canvas, voice wake, and screen recording as supported node abilities, and warning that nodes are often offline or foreground dependent.
- Good qualities: Camera commands request runtime permissions, clamp clip duration, cap payload size, recompress JPEGs under API limits, show camera HUD state, and separate Play flavor from third-party photo access.
- Bad qualities: Media commands are foreground-only, WebView/A2UI depends on Screen tab readiness and Gateway canvas host reachability, and Play flavor removes photo-library permissions. Source/docs alignment is imperfect for Android camera command details.
- Excluded from quality: Test coverage and runtime-flow proof were not used to raise or lower Quality.
-
-## Completeness Score
-
- Score: `Alpha (66%)`
- Surface instructions: evaluated against `references/completeness/android-app.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Camera and media capture.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Align `/nodes/camera` Android command docs with source and platform docs for `camera.snap` and `camera.clip`.
- Add a foreground/background media failure-mode runbook with exact operator messages.
- Decide whether `photos.latest` is supported only for third-party builds or should have a Play-safe replacement.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/platforms/android.md` documents Canvas host, `canvas.eval`, `canvas.snapshot`, `canvas.navigate`, A2UI commands, and foreground-only camera commands `camera.snap` and `camera.clip`.
- `/Users/kevinlin/code/openclaw/docs/nodes/camera.md` documents Android camera settings, permissions, foreground requirement, and `camera.list`; its Android command list is narrower than the source and platform page.
- `/Users/kevinlin/code/openclaw/apps/android/README.md` documents Screen tab requirements for A2UI integration tests and says Play builds remove photo-library access.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/CameraCaptureManager.kt` implements CameraX device listing, photo capture, clip recording, permission requests, EXIF rotation, JPEG scaling, and payload limits.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/CameraHandler.kt` handles `camera.list`, `camera.snap`, `camera.clip`, HUD state, debug logging, clip size limits, and base64 payloads.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/PhotosHandler.kt` implements `photos.latest` with permission checks, latest image query, resizing, and base64 budget caps.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/ui/CanvasScreen.kt` implements the WebView Canvas, safe browsing settings, WebMessage A2UI bridge, visibility lifecycle, and render-process handling.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/InvokeDispatcher.kt` enforces foreground requirement for camera and canvas commands.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/src/gateway/android-node.capabilities.live.test.ts` includes profiles for `camera.list`, `camera.snap`, `camera.clip`, `canvas.present`, `canvas.navigate`, `canvas.eval`, `canvas.snapshot`, and A2UI push/reset commands.
- `/Users/kevinlin/code/openclaw/apps/android/scripts/perf-online-benchmark.sh` checks Screen tab WebView availability before running the screen benchmark.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/node/CameraHandlerTest.kt`, `JpegSizeLimiterTest.kt`, `PhotosHandlerTest.kt`, `CanvasControllerSnapshotParamsTest.kt`, `CanvasActionTrustTest.kt`, and `CanvasA2UIActionBridgeTest.kt` cover media and canvas helpers.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/node/InvokeDispatcherTest.kt` covers command dispatch behavior.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "camera.snap Android" --json`
-
-Results:
-
- Issue #87058 `Android node connects but advertises zero commands ...`; snippet notes `camera.snap`, `camera.clip`, and `canvas.*` as correctly gated high-risk commands.
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "photos.latest Android" --json`
-
-Results:
-
- No direct hits.
-
-### Discrawl queries
-
-Query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode fts --limit 5 "Android camera snap screen canvas"`
-
-Results:
-
- 2026-01-03 support messages describe Android nodes as paired companion devices that can expose camera snap/clip, Canvas, screen recording, and audio/TTS surfaces, while noting real availability depends on node connectivity.
--- a/docs/maturity-scorecard/inventory/android-app/chat-sessions-ui.md
+++ b/docs/maturity-scorecard/inventory/android-app/chat-sessions-ui.md
@@ -1,103 +0,0 @@
---
-title: "Android app - Mobile Chat Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Android app - Mobile Chat Maturity Note
-
-## Summary
-
-The Android app has a substantial mobile chat surface: session selection, history, optimistic sends, streaming assistant text, pending tool-call display, image attachments, thinking controls, markdown rendering, and online chat benchmark support. Coverage reaches Beta because the implementation spans Gateway chat RPCs and online UI proof, though no full Play-installed chat scenario was found. Quality stays Alpha because active archive evidence includes chat copy/reply review issues and the current source still depends on fast-moving mobile UI paths.
-
-## Category Scope
-
-Included in this category:
-
- Chat tab: Chat tab, session list/filtering, composer, image attachments, message parsing/rendering, model/provider status adjacent to chat, and Gateway chat RPC integration
-
-## Features
-
- Chat tab: Chat tab, session list/filtering, composer, image attachments, message parsing/rendering, model/provider status adjacent to chat, and Gateway chat RPC integration
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Beta (70%)`
- Positive signals: Docs describe `chat.history`, `chat.send`, `chat.subscribe`, session selection, display normalization, and best-effort push updates. Source implements history bootstrap, session switching, optimistic messages, streaming assistant text, pending tool calls, abort/refresh, attachments, and markdown rendering. The online benchmark explicitly checks connected state and live chat composer.
- Negative signals: Evidence is stronger for source/unit/UI benchmark slices than for an end-to-end Play-installed chat path through install, pair, send, stream, background, reconnect, and resume.
- Integration gaps: Need recurring mobile chat QA that sends text and images, changes sessions, streams a tool-using answer, backgrounds/reopens the app, and verifies history parity with another client.
-
-## Quality Score
-
- Score: `Alpha (66%)`
- Gitcrawl reports: `Android message copy text selection chat screen` found issue #57754 and PR #59603 for chat copy/text selection. The PR review record flagged reply quoting, attachment-only reply context, empty text actions, and reply send semantics.
- Discrawl reports: Search found GitHub mirror review comments on PR #59603 that identify user-visible reply/copy problems around multiline quoting, image-only messages, blank copy/share payloads, and local-only reply UI state.
- Good qualities: Chat logic tracks pending run IDs, normalizes sessions, strips noisy model-control/tool-call text from history, supports image attachments with size handling, and separates visible health/errors from composer send enablement.
- Bad qualities: Reply/copy behavior has had several subtle user-facing regressions, mobile chat UI is still being actively rebuilt, and session continuity across network changes/backgrounding lacks a published Android-specific runbook result.
- Excluded from quality: Test coverage and runtime-flow proof were not used to raise or lower Quality.
-
-## Completeness Score
-
- Score: `Beta (70%)`
- Surface instructions: evaluated against `references/completeness/android-app.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Chat tab.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Add a mobile chat parity smoke against WebChat/TUI for history, streaming, sessions, attachments, and abort.
- Confirm reply/copy/text-selection behavior after the PR #59603 review findings.
- Make reconnect and session-resume state explicit in Android chat diagnostics.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/platforms/android.md` documents Chat tab history via `chat.history`, send via `chat.send`, best-effort `chat.subscribe`, session selection, and display normalization behavior.
- `/Users/kevinlin/code/openclaw/apps/android/README.md` says the rebuild includes restyled Chat UI, streaming support, and push notifications for gateway/chat status updates.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/chat/ChatController.kt` implements session loading/switching, health, optimistic sends, pending runs, streaming assistant text, tool-call state, history, and Gateway `chat.send`.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/ui/chat/ChatScreen.kt` composes chat header, notices, message list, attachments, voice shortcut, thinking level, refresh/abort, and send path.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/ui/chat/ChatComposer.kt` implements composer controls, thinking selector, attachment strip, refresh, abort, and send enablement.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/ui/chat/ChatMarkdown.kt` renders markdown blocks, code, tables, task lists, links, images, and selection containers.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/ui/SessionsScreen.kt` renders recent/live session filters, sorting, and active-session rows.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/apps/android/scripts/perf-online-benchmark.sh` verifies the app reaches a visible connected state and that the live chat composer is present, then runs chat session-switch or scroll benchmarks.
- No full Android chat e2e through a real Gateway answer and another client history parity check was found.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/chat/ChatControllerMessageIdentityTest.kt`, `ChatControllerSessionPolicyTest.kt`, and `ChatMessageContentParsingTest.kt` cover chat model/controller behavior.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/ui/chat/ChatComposerDraftTest.kt`, `ChatImageCodecTest.kt`, `ChatMarkdownTest.kt`, `ChatSheetContentTest.kt`, and `SessionFiltersTest.kt` cover chat UI helpers.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "Android message copy text selection chat screen" --json`
-
-Results:
-
- Issue #57754 `Android: Add message copy and text selection to chat screen`.
- PR #59603 `feat(android): Add message copy and text selection to chat screen`.
-
-### Discrawl queries
-
-Query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode fts --limit 5 "Android chat screen message copy"`
-
-Results:
-
- 2026-04-03 GitHub mirror review comments on PR #59603 flagged multiline quote formatting, attachment-only reply context, empty copy/share actions for image-only messages, and missing reply target in outgoing send path.
--- a/docs/maturity-scorecard/inventory/android-app/gateway-pairing-security.md
+++ b/docs/maturity-scorecard/inventory/android-app/gateway-pairing-security.md
@@ -1,108 +0,0 @@
---
-title: "Android app - Connection Setup Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Android app - Connection Setup Maturity Note
-
-## Summary
-
-Android pairing and Gateway security have substantial implementation depth: setup-code/manual flows, mDNS and wide-area DNS-SD discovery, secure endpoint validation, device-token persistence, TLS fingerprint handling, node and operator roles, and reconnect policy. Coverage is Alpha near Beta because source and unit tests are strong but live Android pairing proof is preconditioned rather than turnkey. Quality remains Alpha because archive evidence shows repeated operator confusion around auth, LAN addressing, protocol/version skew, and manual `ws://` parsing.
-
-## Category Scope
-
-Included in this category:
-
- Gateway discovery: Gateway discovery, setup-code and manual endpoint parsing, WS/WSS connection setup, TLS trust decisions, device identity, stored device tokens, node/operator auth, and connection error handling
-
-## Features
-
- Gateway discovery: Gateway discovery, setup-code and manual endpoint parsing, WS/WSS connection setup, TLS trust decisions, device identity, stored device tokens, node/operator auth, and connection error handling
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Alpha (68%)`
- Positive signals: Docs describe mDNS, setup-code/manual connection, secure remote endpoint rules, Tailscale Serve guidance, pairing approval commands, auto-reconnect, and node status checks. Unit tests cover endpoint parsing, auth payloads, token storage, TLS probe cleanup, reconnect, and setup flow logic.
- Negative signals: The main live Android capability suite assumes the app is already installed, reachable, paired, approved, and foregrounded. No clean first-run Android connect-to-approval live scenario was found.
- Integration gaps: Need a single live scenario that starts a fresh Gateway, connects Android by setup code and manual URL, exercises TLS trust/cleartext policy, approves pairing, verifies node/operator sessions, and records reconnection after auth failure.
-
-## Quality Score
-
- Score: `Alpha (64%)`
- Gitcrawl reports: `Android pairing websocket TLS manual LAN setup protocol mismatch` found issue #87216 for manual LAN setup parsing `ws://` as host `ws`. Broader `Android app` search also surfaced #85966 for silent WebSocket close after node pairing and #78807 for private LAN pairing auth.
- Discrawl reports: Search found a March 7 GitHub mirror comment on #16638 where Android pairing with `gateway.auth.token` still hit `device signature invalid`, plus a February support thread walking a user through LAN IP, reachability, and auth/pairing diagnosis.
- Good qualities: The endpoint parser blocks insecure remote `ws://` while permitting loopback, emulator bridge, and private LAN hosts; stored device tokens are scoped by device and role; discovery TXT hints are not treated as authoritative TLS pins; node and operator roles are separated.
- Bad qualities: The user-facing failure surface is still easy to hit: wrong LAN host, token auth, stale protocol, TLS trust, and pairing retry states can all look like generic connection failure.
- Excluded from quality: Test coverage and runtime-flow proof were not used to raise or lower Quality.
-
-## Completeness Score
-
- Score: `Alpha (68%)`
- Surface instructions: evaluated against `references/completeness/android-app.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Gateway discovery.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Add a scripted first-run pairing smoke for setup code, manual LAN, and remote WSS.
- Improve operator copy around pairing/auth versus wrong address versus TLS trust.
- Keep Play Store version skew visible in pairing errors so old clients fail with an actionable upgrade message.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/platforms/android.md` documents the Android to Gateway WebSocket path, device pairing role, secure endpoint rules, setup-code/manual modes, pairing approval, auto-reconnect, and status verification.
- `/Users/kevinlin/code/openclaw/apps/android/README.md` documents USB-only gateway testing with `adb reverse` and Connect/Pair steps.
- `/Users/kevinlin/code/openclaw/docs/gateway/bonjour.md` is linked from the Android runbook for discovery debugging.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/gateway/GatewayDiscovery.kt` implements local NSD/mDNS plus optional wide-area DNS-SD discovery.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/ui/GatewayConfigResolver.kt` decodes setup codes, parses manual endpoints, and enforces secure remote URL rules.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/gateway/GatewaySession.kt` manages WebSocket connect, node/operator auth sources, RPCs, reconnect, and invoke handling.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/gateway/DeviceAuthStore.kt` persists device tokens and scopes by normalized device and role.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/ConnectionManager.kt` builds node/operator connect options and resolves TLS parameters.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/src/gateway/android-node.capabilities.live.test.ts` connects a Gateway client, selects an Android node, verifies paired/connected state, and uses remote config for remote runs, but requires manual setup first.
- No clean Android first-run pairing e2e was found.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/gateway/GatewaySessionReconnectTest.kt` covers reconnect and pairing-required pause/retry behavior.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/gateway/GatewaySessionInvokeTest.kt`, `GatewaySessionInvokeTimeoutTest.kt`, `DeviceAuthPayloadTest.kt`, `DeviceAuthStoreTest.kt`, `GatewayBootstrapAuthTest.kt`, `GatewayConfigResolverTest.kt`, and `OnboardingFlowLogicTest.kt` cover core auth, invoke, parsing, and onboarding logic.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "Android pairing websocket TLS manual LAN setup protocol mismatch" --json`
-
-Results:
-
- Issue #87216 `Android manual LAN setup parses ws:// as host ws and resolves http://ws:<port>`.
-
-Additional query context:
-
- `gitcrawl search openclaw/openclaw --query "Android app" --json` found #85966 `Android UI/operator WebSocket closes silently ... after successful node pair` and #78807 `fix(mobile): allow private LAN pairing auth`.
-
-### Discrawl queries
-
-Query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode fts --limit 5 "Android manual LAN ws host pairing"`
-
-Results:
-
- 2026-03-07 GitHub mirror comment on #16638 reports Android node cannot pair when `gateway.auth.token` is configured and still hits `device signature invalid`.
- 2026-02-06 support thread explains LAN IP, reachability, firewall/client isolation, and auth/pairing as likely Android connection blockers.
--- a/docs/maturity-scorecard/inventory/android-app/install-release-distribution.md
+++ b/docs/maturity-scorecard/inventory/android-app/install-release-distribution.md
@@ -1,117 +0,0 @@
---
-title: "Android app - Distribution Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Android app - Distribution Maturity Note
-
-## Summary
-
-The Android app has a public Google Play install path, source build/run docs, Play and third-party product flavors, signed AAB release automation, version-code auto-bumping, and startup/performance scripts. Coverage remains Alpha because the app README still marks the rebuild as extremely alpha and leaves full end-to-end QA and release hardening unchecked. Quality is also Alpha: the Play policy split is a strong design choice, but archive evidence includes an outdated Play Store protocol mismatch and an open request for prebuilt APK release artifacts.
-
-## Category Scope
-
-Included in this category:
-
- Public Google Play install path: Public Google Play install path and source build/run entrypoints
- Manual install path: Manual install path and Google Play distribution behavior.
- Release smoke and startup performance: Release smoke and startup performance checks for Android app distribution.
-
-## Features
-
- Public Google Play install path: Public Google Play install path and source build/run entrypoints
- Manual install path: Manual install path and Google Play distribution behavior.
- Release smoke and startup performance: Release smoke and startup performance checks for Android app distribution.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Alpha (60%)`
- Positive signals: Public install is documented, source build/run commands exist, package scripts expose assemble/install/test/release tasks, release automation builds signed Play and third-party AABs, and Android benchmark scripts cover startup and online UI paths.
- Negative signals: The app README still labels the rebuild extremely alpha and leaves full end-to-end QA and release hardening incomplete. The release path is mostly documented and scripted, but no recurring public release smoke record was found.
- Integration gaps: Need a repeatable release checklist that installs the Play artifact, pairs it to a current Gateway, runs chat/voice/camera/background scenarios, verifies version compatibility, and records Play Console policy status.
-
-## Quality Score
-
- Score: `Alpha (62%)`
- Gitcrawl reports: `Play Store Android app protocol mismatch` found issue #85971 for Play Store Android app v2026.4.5 protocol mismatch against Gateway >= v2026.5.12 and issue #87216 as a related manual LAN setup protocol-mismatch report. `Android APK releases` found issue #9443 requesting prebuilt Android APK releases.
- Discrawl reports: Search found a May 19 support message saying the Play Store app was outdated and had a protocol mismatch; the user built a newer app locally and then hit a connected/operator-offline state.
- Good qualities: Play and third-party flavors separate Google Play restricted permissions from sideload-only SMS, call-log, and photo surfaces. Release signing properties are kept local-only, release bundles are copied to a predictable output directory, and release AAB signatures are verified.
- Bad qualities: Distribution is still fragile for ordinary users because Play can lag Gateway protocol changes, APK artifacts are not fully productized, and docs explicitly say release hardening is unfinished.
- Excluded from quality: Test coverage and runtime-flow proof were not used to raise or lower Quality.
-
-## Completeness Score
-
- Score: `Alpha (60%)`
- Surface instructions: evaluated against `references/completeness/android-app.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Public Google Play install path, Manual install path, Release smoke and startup performance.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Publish and record a current Play smoke path against the current Gateway protocol.
- Decide whether GitHub release APKs or third-party AAB/APK artifacts are part of the supported distribution promise.
- Add release hardening evidence for Play policy declarations, app signing, version skew, rollback, and fresh-install pairing.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/platforms/android.md` links the official Google Play app, describes Android as a companion node, and points to source under `apps/android`.
- `/Users/kevinlin/code/openclaw/apps/android/README.md` marks the rebuild as extremely alpha, lists rebuild checklist items, documents Play and third-party builds, and calls out full end-to-end QA and release hardening as unchecked.
- `/Users/kevinlin/code/openclaw/README.md` lists Android as an optional node with Connect, Chat, Voice, Canvas, Camera, Screen, and device command families.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/package.json` defines `android:assemble`, `android:install`, `android:bundle:release`, `android:test`, `android:test:integration`, lint, and third-party variants.
- `/Users/kevinlin/code/openclaw/apps/android/app/build.gradle.kts` sets `applicationId = "ai.openclaw.app"`, `minSdk = 31`, `targetSdk = 36`, Play and third-party flavors, release signing checks, R8/resource shrinking, lint warnings as errors, and version `2026.5.28`.
- `/Users/kevinlin/code/openclaw/apps/android/scripts/build-release-aab.ts` auto-bumps version name/code, builds Play and third-party release bundles, verifies signatures with `jarsigner`, and prints SHA-256 hashes.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/play/AndroidManifest.xml` removes restricted media permissions from the Play flavor; `/Users/kevinlin/code/openclaw/apps/android/app/src/thirdParty/AndroidManifest.xml` adds SMS and call-log permissions for the third-party flavor.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/apps/android/benchmark/src/main/java/ai/openclaw/app/benchmark/StartupMacrobenchmark.kt` and `apps/android/scripts/perf-startup-benchmark.sh` cover startup measurement.
- `/Users/kevinlin/code/openclaw/apps/android/scripts/perf-online-benchmark.sh` measures launch-to-connected, Screen tab, and Chat tab paths on a connected device.
- No current Play Store install to paired Gateway release-smoke artifact was found.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/ui/OnboardingFlowLogicTest.kt` covers onboarding flow logic adjacent to install.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/SecurePrefsTest.kt` and `SecurePrefsNotificationForwardingTest.kt` cover stored app state used after install.
- Release AAB automation itself does not appear to have a dedicated unit test.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "Play Store Android app protocol mismatch" --json`
-
-Results:
-
- Issue #85971 `[Bug] Play Store Android app v2026.4.5 protocol mismatch against Gateway >= v2026.5.12 - clawx user report`.
- Issue #87216 `Android manual LAN setup parses ws:// as host ws and resolves http://ws:<port>`.
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "Android APK releases" --json`
-
-Results:
-
- Issue #9443 `Request: Prebuilt Android APK releases`.
-
-### Discrawl queries
-
-Query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode fts --limit 5 "Android Play Store protocol mismatch"`
-
-Results:
-
- 2026-05-19 support message: a user built the Android app locally because the Play Store app was outdated and had a protocol mismatch; the newer local build connected by Tailscale but reported operator offline.
--- a/docs/maturity-scorecard/inventory/android-app/node-device-capabilities.md
+++ b/docs/maturity-scorecard/inventory/android-app/node-device-capabilities.md
@@ -1,124 +0,0 @@
---
-title: "Android app - Device Runtime Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Android app - Device Runtime Maturity Note
-
-## Summary
-
-Android node capabilities are broad: device status/info/permissions/health, notifications, system notify, contacts, calendar, location, motion, camera, Canvas/A2UI, Talk PTT, and flavor-gated SMS/call-log/photos. Coverage reaches Beta because the Gateway live capability test executes the advertised non-interactive command surface against a paired Android node. Quality remains Alpha because archive evidence includes zero-command advertisement failures, notification forwarding cross-session risk, and multiple open requests for additional native Android capability families.
-
-## Category Scope
-
-Included in this category:
-
- Background reconnect and presence: Foreground-service presence, reconnect, and node presence behavior.
- Device command availability: Android device command availability and capability advertisement.
-
-## Features
-
- Background reconnect and presence: Foreground-service presence, reconnect, and node presence behavior.
- Device command availability: Android device command availability and capability advertisement.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Beta (70%)`
- Positive signals: Docs enumerate Android command families and flavor-dependent availability. Source has a central registry for advertised capabilities and commands, dispatcher gates for foreground and permission-sensitive commands, and handlers across the major device surfaces. The live capability test reads `node.describe`, applies the effective Gateway allowlist, invokes advertised non-interactive commands, and fails on unmapped command profiles.
- Negative signals: The live suite is preconditioned and excludes interactive screen-recording consent. It validates command contracts but does not prove every user-facing permission grant or long-lived device-state workflow.
- Integration gaps: Need a full Android node command scorecard that records command availability across Play and third-party builds, denied/granted permissions, foreground/background state, and Gateway allowlist/denylist policy.
-
-## Quality Score
-
- Score: `Alpha (63%)`
- Gitcrawl reports: `notifications.list Android node` found issue #48516 for notification forwarding causing cross-session replies and issue #87058 for Android node connecting but advertising zero commands. `Android Health Connect read-only node commands` found #78611, and `Google Home API bridge Android app native smart-home` found #78476 as future capability requests.
- Discrawl reports: Search found a GitHub mirror review note that the live Android suite now filters declared commands by effective policy allowlist, a review note asking to add `callLog.search` to live capability profiles, and support guidance that Canvas/camera/screen commands fail when a mobile node is backgrounded or does not advertise the capability.
- Good qualities: Capability advertisement is data-driven, sensitive surfaces are gated by build flavor and runtime availability, command dispatch returns structured errors, and Gateway policy is applied before live command execution.
- Bad qualities: Capability shape is large and permission-dependent; notification events can affect chat/session routing; Play flavor removes several high-value device commands; and future native Android capability asks are already accumulating.
- Excluded from quality: Test coverage and runtime-flow proof were not used to raise or lower Quality.
-
-## Completeness Score
-
- Score: `Beta (70%)`
- Surface instructions: evaluated against `references/completeness/android-app.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Background reconnect and presence, Device command availability.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Record command availability matrices for Play versus third-party flavor.
- Add release-smoke evidence for notification forwarding session routing and policy filters.
- Keep live capability profiles in lockstep with every newly advertised Android command.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/platforms/android.md` lists command families: device status/info/permissions/health, notifications, photos, contacts, calendar, call log, SMS, motion, camera, Canvas, and Talk.
- `/Users/kevinlin/code/openclaw/apps/android/README.md` documents Google Play restricted permissions and the Play versus third-party flavor split.
- `/Users/kevinlin/code/openclaw/docs/plugins/sdk-runtime.md` documents paired node invocation from Gateway-loaded plugins and CLI commands.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt` wires handlers, capability flags, node/operator sessions, sensitive feature config, and command dispatch.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/InvokeCommandRegistry.kt` defines advertised capabilities and commands plus availability gates.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/InvokeDispatcher.kt` routes commands and enforces foreground, debug, permission, flavor, and availability errors.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/DeviceHandler.kt`, `NotificationsHandler.kt`, `ContactsHandler.kt`, `CalendarHandler.kt`, `LocationHandler.kt`, `MotionHandler.kt`, `SystemHandler.kt`, `SmsHandler.kt`, and `CallLogHandler.kt` implement the command families.
- `/Users/kevinlin/code/openclaw/src/gateway/android-node.capabilities.live.test.ts` is the Gateway-side live capability harness.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/src/gateway/android-node.capabilities.live.test.ts` connects to a Gateway, selects a paired Android node, reads `node.describe`, resolves allowlist policy, invokes every mapped advertised non-interactive command, and verifies payload contracts or expected deterministic errors.
- The suite explicitly skips interactive screen-recording consent.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/node/InvokeCommandRegistryTest.kt`, `InvokeDispatcherTest.kt`, `DeviceHandlerTest.kt`, `NotificationsHandlerTest.kt`, `DeviceNotificationListenerServiceTest.kt`, `ContactsHandlerTest.kt`, `CalendarHandlerTest.kt`, `LocationHandlerTest.kt`, `MotionHandlerTest.kt`, and `SystemHandlerTest.kt` cover core command behavior.
- `/Users/kevinlin/code/openclaw/src/gateway/android-node.capabilities.policy-config.test.ts` and `android-node.capabilities.policy-source.test.ts` cover live-suite policy config behavior.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "notifications.list Android node" --json`
-
-Results:
-
- Issue #48516 `Android node notification forwarding causes cross-session replies (WhatsApp duplicate sends to wrong group)`.
- Issue #87058 `Android node connects but advertises zero commands ...`.
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "Android Health Connect read-only node commands" --json`
-
-Results:
-
- Issue #78611 `[Feature]: Android Health Connect read-only node commands`.
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "Google Home API bridge Android app native smart-home" --json`
-
-Results:
-
- Issue #78476 `Feature: Google Home API bridge in Android app for native smart-home control`.
-
-### Discrawl queries
-
-Query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode fts --limit 5 "Android node capabilities gateway commands"`
-
-Results:
-
- 2026-03-19 GitHub mirror review note says the live suite now treats policy allowlist as part of runnable preconditions.
- 2026-03-13 GitHub mirror review note asks to add a `callLog.search` profile to Android live capability checks.
- 2026-03-13 support thread explains Canvas/camera/screen capability failures when no paired node is connected, the mobile app is backgrounded, or capabilities are not advertised.
--- a/docs/maturity-scorecard/inventory/android-app/report.md
+++ b/docs/maturity-scorecard/inventory/android-app/report.md
@@ -1,237 +0,0 @@
---
-title: "Android app Maturity Report"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Android app Maturity Report
-
-## Top-level scores
-
-These rollups are simple arithmetic means over the category-note numeric
-scores in
-`scores.yaml`. Percentages are rounded to the nearest whole number.
-
- Coverage: `Alpha (65%)`
- Quality: `Alpha (62%)`
- Completeness: `Alpha (65%)`
- LTS Features: `0/7`
-
-## Summary
-
-This report promotes the archived `android-app` maturity evidence from `/Users/kevinlin/tmp/maturity/android-app` into the current process-version-3 inventory contract.
-
-The category Coverage and Quality scores come from the archived evidence-backed score rows. Completeness is initialized from the same archived evidence breadth and known-gap record, then joined with the surface-specific completeness rubric referenced by taxonomy.
-
-## Matrix
-
-| Category                                        | LTS | Coverage      | Quality       | Completeness  | Features to evaluate                                                                        |
-| ----------------------------------------------- | --- | ------------- | ------------- | ------------- | ------------------------------------------------------------------------------------------- |
-| [Media Capture](camera-media-capture.md)        | ❌  | `Alpha (66%)` | `Alpha (62%)` | `Alpha (66%)` | Camera and media capture                                                                    |
-| [Mobile Chat](chat-sessions-ui.md)              | ❌  | `Beta (70%)`  | `Alpha (66%)` | `Beta (70%)`  | Chat tab                                                                                    |
-| [Connection Setup](gateway-pairing-security.md) | ❌  | `Alpha (68%)` | `Alpha (64%)` | `Alpha (68%)` | Gateway discovery                                                                           |
-| [Distribution](install-release-distribution.md) | ❌  | `Alpha (60%)` | `Alpha (62%)` | `Alpha (60%)` | Public Google Play install path, Manual install path, Release smoke and startup performance |
-| [Settings](settings-permissions-diagnostics.md) | ❌  | `Alpha (64%)` | `Alpha (66%)` | `Alpha (64%)` | Settings sheet                                                                              |
-| [Voice](voice-talk-wake.md)                     | ❌  | `Alpha (66%)` | `Alpha (60%)` | `Alpha (66%)` | Voice tab                                                                                   |
-| [Device Runtime](node-device-capabilities.md)   | ❌  | `Alpha (62%)` | `Alpha (55%)` | `Alpha (62%)` | Background reconnect and presence, Device command availability                              |
-
-## Scoring rubric
-
- Coverage:
-  maturity-label rating for integration, e2e, live, or server/runtime flow
-  evidence across the category. Unit tests can provide supporting context but never make a
-  feature covered by themselves.
- Quality:
-  maturity-label rating for implementation and operational robustness. Unit,
-  integration, e2e, live, and real runtime-flow test coverage are Coverage
-  inputs only; they do not raise or lower Quality.
- Completeness:
-  maturity-label rating for how fully the category delivers the intended
-  surface-specific capability set. Use the taxonomy-linked completeness
-  instructions for this surface.
- LTS:
-  calculated as `quality > 80 and coverage > 90`, or when the matching
-  taxonomy category sets `human_lts_override`.
- Shared score bands:
-  `Lovable = 95-100`, `Stable = 80-95`, `Beta = 70-80`,
-  `Alpha = 50-70`, and `Experimental = 0-50`. At shared boundaries, choose the
-  higher maturity label.
- Major quality/completeness gaps:
-  evidence text only, tracked in the detailed feature inventory rather than as a
-  separate scored dimension.
-
-## Detailed feature inventory
-
-### 1. Media Capture
-
-Search anchors: camera.list, camera.capture, screen capture.
-
-Category note: [Media Capture](camera-media-capture.md)
-
-Score decisions:
-
- Coverage: `Alpha (66%)`
- Quality: `Alpha (62%)`
- Completeness: `Alpha (66%)`
- LTS: ❌
-
-Features:
-
- Camera and media capture: Camera listing, capture, photo, screen, and media capture behavior.
-
-Primary docs:
-
- `docs/platforms/android.md`
- `docs/nodes/camera.md`
-
-### 2. Mobile Chat
-
-Search anchors: Chat tab, chat.history, mobile UI.
-
-Category note: [Mobile Chat](chat-sessions-ui.md)
-
-Score decisions:
-
- Coverage: `Beta (70%)`
- Quality: `Alpha (66%)`
- Completeness: `Beta (70%)`
- LTS: ❌
-
-Features:
-
- Chat tab: Chat tab, session list/filtering, composer, image attachments, message parsing/rendering, model/provider status adjacent to chat, and Gateway chat RPC integration
-
-Primary docs:
-
- `docs/platforms/android.md`
-
-### 3. Connection Setup
-
-Search anchors: Setup Code, Manual, Bonjour.
-
-Category note: [Connection Setup](gateway-pairing-security.md)
-
-Score decisions:
-
- Coverage: `Alpha (68%)`
- Quality: `Alpha (64%)`
- Completeness: `Alpha (68%)`
- LTS: ❌
-
-Features:
-
- Gateway discovery: Gateway discovery, setup-code and manual endpoint parsing, WS/WSS connection setup, TLS trust decisions, device identity, stored device tokens, node/operator auth, and connection error handling
-
-Primary docs:
-
- `docs/platforms/android.md`
- `docs/gateway/bonjour.md`
- `docs/gateway/pairing.md`
-
-### 4. Distribution
-
-Search anchors: Google Play, Manual, Startup macrobenchmark.
-
-Category note: [Distribution](install-release-distribution.md)
-
-Score decisions:
-
- Coverage: `Alpha (60%)`
- Quality: `Alpha (62%)`
- Completeness: `Alpha (60%)`
- LTS: ❌
-
-Features:
-
- Public Google Play install path: Public Google Play install path and source build/run entrypoints
- Manual install path: Manual install path and Google Play distribution behavior.
- Release smoke and startup performance: Release smoke and startup performance checks for Android app distribution.
-
-Primary docs:
-
- `docs/platforms/android.md`
-
-### 5. Settings
-
-Search anchors: Settings sheet, Notification forwarding, diagnostics.
-
-Category note: [Settings](settings-permissions-diagnostics.md)
-
-Score decisions:
-
- Coverage: `Alpha (64%)`
- Quality: `Alpha (66%)`
- Completeness: `Alpha (64%)`
- LTS: ❌
-
-Features:
-
- Settings sheet: Settings sheet and settings detail screens, permission request UX, notification forwarding controls, Nodes & Devices status, provider/model diagnostics, secure preferences, and copyable Gateway diagnostic report
-
-Primary docs:
-
- `docs/platforms/android.md`
-
-### 6. Voice
-
-Search anchors: Talk Mode, Voice tab, wake.
-
-Category note: [Voice](voice-talk-wake.md)
-
-Score decisions:
-
- Coverage: `Alpha (66%)`
- Quality: `Alpha (60%)`
- Completeness: `Alpha (66%)`
- LTS: ❌
-
-Features:
-
- Voice tab: Voice tab, manual mic capture, Talk Mode listen/think/speak loop, Gateway Talk config, talk.speak, realtime relay mode, voice capture service type, and voice e2e receiver/script
-
-Primary docs:
-
- `docs/platforms/android.md`
- `docs/nodes/talk.md`
-
-### 7. Device Runtime
-
-Search anchors: foreground service, node.presence.alive, background reconnect, Additional Android command families, node capabilities, command handling.
-
-Category note: [Device Runtime](node-device-capabilities.md)
-
-Score decisions:
-
- Coverage: `Alpha (62%)`
- Quality: `Alpha (55%)`
- Completeness: `Alpha (62%)`
- LTS: ❌
-
-Features:
-
- Background reconnect and presence: Foreground-service presence, reconnect, and node presence behavior.
- Device command availability: Android device command availability and capability advertisement.
-
-Primary docs:
-
- `docs/platforms/android.md`
- `docs/nodes/troubleshooting.md`
- `docs/gateway/protocol.md`
-
-## Recommended scorecard interpretation
-
-Use this migrated score as the current inventory baseline. Refresh individual categories with live category-agent research before treating a high score as an LTS promotion gate.
-
-## Out of scope for this surface
-
- Redefining taxonomy category boundaries; taxonomy remains the source of truth for category identity, features, docs, and search anchors.
-
-## Audit provenance
-
- Score source:
-  `docs/kevinslin/maturity-scorecard/inventory/android-app/scores.yaml`.
- Taxonomy metadata source:
-  `.agents/skills/claw-score/taxonomy.yaml`.
- Archived evidence source:
-  `/Users/kevinlin/tmp/maturity/android-app`.
--- a/docs/maturity-scorecard/inventory/android-app/scores.yaml
+++ b/docs/maturity-scorecard/inventory/android-app/scores.yaml
@@ -1,38 +0,0 @@
-version: 1
-process_version: 3
-data:
-  - name: Media Capture
-    category_note: camera-media-capture.md
-    coverage: 66
-    quality: 62
-    completeness: 66
-  - name: Mobile Chat
-    category_note: chat-sessions-ui.md
-    coverage: 70
-    quality: 66
-    completeness: 70
-  - name: Connection Setup
-    category_note: gateway-pairing-security.md
-    coverage: 68
-    quality: 64
-    completeness: 68
-  - name: Distribution
-    category_note: install-release-distribution.md
-    coverage: 60
-    quality: 62
-    completeness: 60
-  - name: Settings
-    category_note: settings-permissions-diagnostics.md
-    coverage: 64
-    quality: 66
-    completeness: 64
-  - name: Voice
-    category_note: voice-talk-wake.md
-    coverage: 66
-    quality: 60
-    completeness: 66
-  - name: Device Runtime
-    category_note: node-device-capabilities.md
-    coverage: 62
-    quality: 55
-    completeness: 62
--- a/docs/maturity-scorecard/inventory/android-app/settings-permissions-diagnostics.md
+++ b/docs/maturity-scorecard/inventory/android-app/settings-permissions-diagnostics.md
@@ -1,112 +0,0 @@
---
-title: "Android app - Settings Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Android app - Settings Maturity Note
-
-## Summary
-
-Android settings and diagnostics cover a large operator surface: profile, Gateway settings, camera/location/mic/photos/motion/SMS/call-log/notification permissions, notification forwarding policy, nodes/devices view, provider/model state, and copyable Gateway diagnostics. Coverage is Alpha because the source and unit coverage are broad but no integrated operator recovery scenario was found. Quality is Alpha but stronger than the background service because the app has clear safety controls, policy filters, and copyable diagnostic text.
-
-## Category Scope
-
-Included in this category:
-
- Settings sheet: Settings sheet and settings detail screens, permission request UX, notification forwarding controls, Nodes & Devices status, provider/model diagnostics, secure preferences, and copyable Gateway diagnostic report
-
-## Features
-
- Settings sheet: Settings sheet and settings detail screens, permission request UX, notification forwarding controls, Nodes & Devices status, provider/model diagnostics, secure preferences, and copyable Gateway diagnostic report
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Alpha (64%)`
- Positive signals: Docs describe permission prerequisites and notification forwarding controls. Source implements permission launchers, rationale/settings dialogs, notification allowlist/blocklist/quiet-hours/rate-limit/session controls, node/device status panels, and diagnostic report copy. Unit tests cover several settings and policy helpers.
- Negative signals: No integrated Android operator recovery flow was found for "Gateway offline", "pairing/auth failure", "missing permission", "notification listener disabled", and "node capability unavailable" from one UI path.
- Integration gaps: Need a settings/diagnostics scenario that starts from common failures, copies diagnostics, changes permissions/policy, reconnects Gateway, and verifies the corresponding command/capability state changes.
-
-## Quality Score
-
- Score: `Alpha (66%)`
- Gitcrawl reports: `Android light mode theme toggle` found issue #87688 requesting a light mode/theme toggle. More capability-specific searches found future Health Connect and Google Home requests, which imply settings will need more capability management as Android expands.
- Discrawl reports: `Android settings permissions diagnostics notification forwarding` returned no direct hits.
- Good qualities: Permission prompts are centralized and can show rationale/settings dialogs; notification forwarding has allowlist/blocklist, quiet hours, rate limiting, session key, safer self-package handling, and app-picker UI; Gateway diagnostics text tells users what commands and facts to provide.
- Bad qualities: Operator recovery is spread across several screens, theme/accessibility customization is incomplete, and there is no recorded live flow tying settings changes to Gateway/node capability changes.
- Excluded from quality: Test coverage and runtime-flow proof were not used to raise or lower Quality.
-
-## Completeness Score
-
- Score: `Alpha (64%)`
- Surface instructions: evaluated against `references/completeness/android-app.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Settings sheet.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Add a compact Android diagnostics runbook for connection, permissions, notification forwarding, and node command availability.
- Add live proof that settings toggles update advertised capabilities without stale Gateway state.
- Decide whether theme/accessibility options are part of the Android app support promise before promotion.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/platforms/android.md` documents Android permissions, notification forwarding controls, connection diagnostics, and related troubleshooting links.
- `/Users/kevinlin/code/openclaw/apps/android/README.md` documents rebuild items for settings restyle, permission requests in onboarding/settings, push notifications, security hardening, and Play restricted permissions.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/ui/SettingsSheet.kt` implements broad settings and permission controls, notification forwarding UI, assistant role state, camera/location/mic/photos/motion/SMS/call-log availability, and installed-app picker state.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/ui/NodesDevicesSettingsScreen.kt` shows live nodes, paired devices, pending device requests, status badges, and refresh/error states.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/PermissionRequester.kt` centralizes missing-permission requests, rationale dialogs, timeouts, and settings redirects.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/NotificationForwardingPolicy.kt` implements package allow/block filtering, quiet-hours evaluation, and burst limiting.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/ui/GatewayDiagnostics.kt` builds a copyable diagnostic prompt with screen, app version, device, Android SDK, gateway address, and status/error.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/SecurePrefs.kt` persists app, Gateway, notification, and device settings.
-
-### Integration tests
-
- No integrated Android settings/operator recovery scenario was found.
- `/Users/kevinlin/code/openclaw/src/gateway/android-node.capabilities.live.test.ts` indirectly depends on settings-controlled command availability and policy allowlists.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/PermissionRequesterTest.kt`, `NotificationForwardingPolicyTest.kt`, `SecurePrefsTest.kt`, and `SecurePrefsNotificationForwardingTest.kt` cover permission and settings helpers.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/ui/SettingsSheetNotificationAppsTest.kt`, `ProviderModelStatusTest.kt`, and `GatewayConfigResolverTest.kt` cover settings UI helpers.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "Android light mode theme toggle" --json`
-
-Results:
-
- Issue #87688 `Android app: Add light mode / theme toggle`.
- Issue #28300 `Theme Customization System - Preset Themes + Custom Theme Studio` as adjacent theme work.
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "Android app settings permissions diagnostics" --json`
-
-Results:
-
- No direct hits.
-
-### Discrawl queries
-
-Query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode fts --limit 5 "Android settings permissions diagnostics notification forwarding"`
-
-Results:
-
- No direct hits.
--- a/docs/maturity-scorecard/inventory/android-app/voice-talk-wake.md
+++ b/docs/maturity-scorecard/inventory/android-app/voice-talk-wake.md
@@ -1,107 +0,0 @@
---
-title: "Android app - Voice Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Android app - Voice Maturity Note
-
-## Summary
-
-Android voice has moved beyond a placeholder: the app exposes manual mic dictation and Talk Mode UI, Gateway `talk.speak` synthesis with local fallback, realtime relay plumbing, foreground-service microphone type switching, voice e2e scripts, and focused unit coverage. Coverage is Alpha because the strongest e2e artifact is a debug-script path rather than a repeated user install flow. Quality is Alpha because the archive records voice churn, a prior mic-thrashing loop, and unresolved requests around agent/session switching and per-agent TTS voice.
-
-## Category Scope
-
-Included in this category:
-
- Voice tab: Voice tab, manual mic capture, Talk Mode listen/think/speak loop, Gateway Talk config, talk.speak, realtime relay mode, voice capture service type, and voice e2e receiver/script
-
-## Features
-
- Voice tab: Voice tab, manual mic capture, Talk Mode listen/think/speak loop, Gateway Talk config, talk.speak, realtime relay mode, voice capture service type, and voice e2e receiver/script
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Alpha (66%)`
- Positive signals: Docs describe manual mic and Talk capture modes, Android 14+ foreground-service microphone requirements, Gateway `talk.speak`, local TTS fallback, realtime Gateway relay conditions, and Voice Wake being disabled in UX/runtime. The debug voice e2e script can run normal and realtime voice paths through an installed debug app.
- Negative signals: Voice proof depends on debug receiver/script setup and synthetic transcripts; no recurring real-device audio latency, microphone permission, speech-recognizer failure, provider fallback, and background/foreground scenario was found.
- Integration gaps: Need a signed-app voice scorecard that grants microphone permission, runs manual mic and Talk Mode, exercises `talk.speak` fallback, verifies realtime relay when configured, backgrounds/reopens the app, and records failure classifications.
-
-## Quality Score
-
- Score: `Alpha (60%)`
- Gitcrawl reports: `Android Talk Mode` found issue #56613 requesting Voice/Talk tab agent switching and per-agent TTS voice, plus PR #80082 adjusting Android foreground-service use for Talk Mode. `Android app` search also surfaces the broader app rebuild/release context.
- Discrawl reports: Search found a landed PR comment for #66179 exposing Talk Mode in UI and foreground microphone permission; a comment closing #47883 after replacing a mic thrashing loop with the new manual mic path; and a support message noting older Android partial voice behavior and voice-wake/talk-mode churn.
- Good qualities: The current source separates manual mic from Talk Mode, checks microphone permission, handles speech recognizer availability, tracks listening/speaking state, pauses capture during TTS, and falls back from Gateway `talk.speak` when eligible.
- Bad qualities: Voice behavior is constrained by Android speech recognizer availability, audio focus, foreground-service policy, provider configuration, and session routing. User-facing requests for Talk tab agent/session switching remain open.
- Excluded from quality: Test coverage and runtime-flow proof were not used to raise or lower Quality.
-
-## Completeness Score
-
- Score: `Alpha (66%)`
- Surface instructions: evaluated against `references/completeness/android-app.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Voice tab.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Add real-device audio and provider failure scorecards for manual mic and Talk Mode.
- Add Voice tab session/agent selection if Android is expected to match Chat session controls.
- Keep docs aligned with actual Voice Wake state; current docs correctly say Android Voice Wake remains disabled.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/platforms/android.md` documents manual Mic, continuous Talk Mode, foreground-service microphone behavior, Gateway `talk.speak`, local TTS fallback, realtime relay conditions, and disabled Voice Wake.
- `/Users/kevinlin/code/openclaw/apps/android/README.md` lists Voice tab full functionality in the rebuild checklist and documents the `voice-e2e.sh` script.
- `/Users/kevinlin/code/openclaw/docs/nodes/talk.md` is the shared Talk behavior reference.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/ui/VoiceScreen.kt` exposes manual dictation and Talk UI, permission prompts, speaker toggle, status, and transcript rendering.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/voice/MicCaptureManager.kt` implements manual mic transcription, queueing, Gateway send, TTS pause/resume, and pending run timeout.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt` implements Talk Mode listening, speech recognizer lifecycle, chat finalization, realtime relay, audio playback, and interruption controls.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkSpeakClient.kt` calls Gateway `talk.speak` and classifies local fallback.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/NodeForegroundService.kt` promotes Talk Mode to `dataSync|microphone`.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/debug/java/ai/openclaw/app/VoiceE2eReceiver.kt` supports debug voice e2e orchestration.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/apps/android/scripts/voice-e2e.sh` installs the debug app, grants `RECORD_AUDIO`, uses `adb reverse`, drives normal and realtime voice modes through `VoiceE2eReceiver`, captures screenshots, and saves filtered logcat.
- No repeated signed Play build voice scenario was found.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/voice/MicCaptureManagerTest.kt`, `TalkModeManagerTest.kt`, `TalkSpeakClientTest.kt`, `TalkAudioPlayerTest.kt`, `TalkDirectiveParserTest.kt`, `TalkModeConfigParsingTest.kt`, `VoiceWakeCommandExtractorTest.kt`, `VoiceWakeManagerTest.kt`, and `ChatEventTextTest.kt` cover the main voice helpers.
- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/NodeForegroundServiceTest.kt` covers foreground-service type behavior.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "Android Talk Mode" --json`
-
-Results:
-
- Issue #56613 `[Feature]: Talk/Voice tab - agent/session switching + per-agent TTS voice`.
- PR #80082 `fix(android): avoid dataSync FGS for persistent node`.
-
-### Discrawl queries
-
-Query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode fts --limit 5 "Android Talk Mode Voice tab"`
-
-Results:
-
- 2026-04-25 GitHub mirror comment on #66179 says Android Talk Mode UI and foreground microphone permission landed.
- 2026-04-25 GitHub mirror comment on #47883 says the prior mic thrashing loop was replaced by manual Voice tab backed by `MicCaptureManager`.
- 2026-03-28 GitHub mirror issue #56613 requests Voice/Talk tab agent switching and per-agent TTS voice.
--- a/docs/maturity-scorecard/inventory/anthropic-provider-path/auth-onboarding-and-credential-profile-health.md
+++ b/docs/maturity-scorecard/inventory/anthropic-provider-path/auth-onboarding-and-credential-profile-health.md
@@ -1,147 +0,0 @@
---
-title: "Anthropic provider path - Provider Auth and Recovery Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Anthropic provider path - Provider Auth and Recovery Maturity Note
-
-## Summary
-
-Anthropic auth has first-class docs and source paths for API keys, Claude CLI
-credential reuse, setup-token profiles, auth profile ordering, and doctor hints.
-Coverage is Stable because the direct API-key and Claude CLI paths are present
-in docs, plugin registration, provider auth choices, config defaults, and
-focused tests. Quality is Beta because GitHub and Discord archive evidence still
-shows users hitting orphaned profiles, gateway-host credential mismatch, stale
-setup-token or OAuth behavior, and "No API key found" confusion.
-
-## Category Scope
-
-Included in this category:
-
- API-key onboarding: Covers API-key onboarding across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
- Claude CLI credential reuse: Covers Claude CLI credential reuse across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
- Setup-token auth: Covers Setup-token auth across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
- Auth profile health: Covers Auth profile health across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
- Model status: Covers Model status across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
- Usage windows: Covers Usage windows across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
- Cooldown/profile reporting: Covers Cooldown/profile reporting across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
- Long-context recovery: Covers Long-context recovery across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
- Fallback guidance: Covers Fallback guidance across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
-
-## Features
-
- API-key onboarding: Covers API-key onboarding across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
- Claude CLI credential reuse: Covers Claude CLI credential reuse across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
- Setup-token auth: Covers Setup-token auth across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
- Auth profile health: Covers Auth profile health across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
- Model status: Covers Model status across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
- Usage windows: Covers Usage windows across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
- Cooldown/profile reporting: Covers Cooldown/profile reporting across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
- Long-context recovery: Covers Long-context recovery across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
- Fallback guidance: Covers Fallback guidance across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (84%)`
- Positive signals: Anthropic docs describe API-key, Claude CLI, and setup-token style paths; `extensions/anthropic/openclaw.plugin.json` publishes auth choices and setup env vars; `extensions/anthropic/register.runtime.ts` implements API-key auth, setup-token auth, Claude CLI migration, synthetic auth, and doctor hints.
- Negative signals: Setup-token live proof is env-gated, and profile health depends on per-agent auth stores and gateway-host runtime state.
- Integration gaps: The audit found strong focused tests and one live setup-token lane, but not a repeated release artifact proving API-key, setup-token, and Claude CLI migration across fresh hosts every release.
-
-## Quality Score
-
- Score: `Beta (74%)`
- Gitcrawl reports: #83268 reports Anthropic API keys being registered under an orphan `claude` provider and silently falling back to OAuth; #72255 reports orphaned per-agent credentials after config declarations are removed; #80514 reports a Claude Pro Max cap warning being classified as billing failure.
- Discrawl reports: Discord archive results include "No API key found for provider anthropic" cases tied to shell versus daemon runtime mismatch, stale/broken auth stores, setup-token policy failures, and profile order confusion.
- Good qualities: The provider owns explicit auth methods, validates setup-token shape, writes auth profiles with locks, resolves Claude CLI native auth only for the synthetic `claude-cli` provider, and emits doctor guidance for legacy profile repair.
- Bad qualities: Users still need to understand several credential planes: gateway token, Anthropic API key, Anthropic setup-token, Claude CLI native auth, per-agent auth store, profile order, cooldown, and daemon environment.
- Excluded from quality: Unit, integration, e2e, live, and real runtime-flow test presence or absence; those are Coverage inputs only.
-
-## Completeness Score
-
- Score: `Stable (84%)`
- Surface instructions: evaluated against `references/completeness/anthropic-provider-path.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for API-key onboarding, Claude CLI credential reuse, Setup-token auth, Auth profile health, Model status, Usage windows, Cooldown/profile reporting, Long-context recovery, Fallback guidance.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Setup-token behavior can be upstream-policy dependent and less predictable
-  than API-key auth.
- Per-agent auth store routing and daemon environment mismatch remain frequent
-  support themes.
- Anthropic API-key and Claude CLI routes share the provider label but have
-  materially different billing and operational behavior.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` documents API-key setup, Claude CLI setup, setup-token-style troubleshooting, `openclaw models list --provider anthropic`, and states that API keys are the clearest production path for long-lived gateways.
- `/Users/kevinlin/code/openclaw/docs/gateway/doctor.md` documents OAuth expiry and stale Anthropic profile repair guidance.
- `/Users/kevinlin/code/openclaw/docs/gateway/configuration-examples.md` includes Anthropic API-key profile examples and model/fallback config.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/extensions/anthropic/openclaw.plugin.json` declares `providers: ["anthropic"]`, setup env vars `ANTHROPIC_OAUTH_TOKEN` and `ANTHROPIC_API_KEY`, and provider auth choices for Claude CLI, setup-token, and API key.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/register.runtime.ts` implements `runAnthropicSetupTokenAuth`, `runAnthropicSetupTokenNonInteractive`, `runAnthropicCliMigration`, `runAnthropicCliMigrationNonInteractive`, `resolveClaudeCliSyntheticAuth`, `createProviderApiKeyAuthMethod`, and `buildAnthropicAuthDoctorHint`.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/config-defaults.ts` resolves Anthropic default auth mode from profile order, API-key profiles, OAuth/token profiles, and env vars before seeding cache/heartbeat/default model behavior.
- `/Users/kevinlin/code/openclaw/src/commands/doctor-claude-cli.ts` inspects Claude CLI command, credential readability, workspace/project directory health, and selected `claude-cli` runtime agents.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/src/agents/anthropic.setup-token.live.test.ts` env-gates a live setup-token profile smoke that resolves Anthropic models, extracts the profile API key, and completes a simple prompt.
- `/Users/kevinlin/code/openclaw/test/scripts/package-acceptance-workflow.test.ts` verifies package-acceptance workflow wiring for Anthropic credentials and live Anthropic profiles.
- `/Users/kevinlin/code/openclaw/src/commands/models.list.e2e.test.ts` covers provider catalog/auth rows that include configured provider behavior.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/extensions/anthropic/index.test.ts` covers Claude CLI auth profile migration, synthetic OAuth/token auth, API defaulting, Anthropic config defaults, and doctor-profile hooks.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/provider-policy-api.test.ts` covers provider policy normalization, API-key defaults, Claude CLI config normalization, and thinking profile exposure.
- `/Users/kevinlin/code/openclaw/src/commands/doctor-claude-cli.test.ts` covers Claude CLI doctor behavior.
- `/Users/kevinlin/code/openclaw/src/llm/utils/oauth/anthropic.test.ts` covers Anthropic OAuth login and refresh utilities.
-
-### Gitcrawl queries
-
-Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic API key auth profile setup-token claude-cli"`
-
-Results:
-
- #83268 `[Bug]: Anthropic API key pasted via wizard is registered under provider claude (orphan), silently falling back to OAuth`.
- #72255 `[Bug]: Pi runtime silently uses orphaned credentials from per-agent auth-profiles.json after their declarations are removed from openclaw.json`.
-
-Query: `gitcrawl --json search issues -R openclaw/openclaw "anthropic claude-cli auth login setup token"`
-
-Results:
-
- #70279 `claude-cli backend silently skipped on systemd-managed root gateway, never spawns subprocess`.
- #72255 also appeared as an auth-store leak issue.
-
-Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic usage status API key Claude"`
-
-Results:
-
- #83268 repeated the orphaned provider registration issue.
- #80514 reported Claude Pro Max cap warning classification causing a false billing cooldown.
-
-### Discrawl queries
-
-Query: `discrawl search --limit 10 "Anthropic API key no credentials profile"`
-
-Results:
-
- Returned support threads for "No API key found for provider anthropic" where shell status and daemon runtime disagreed, setup-token paths were confused with API keys, auth profiles won over env vars, and profile order/cooldown affected fallback behavior.
-
-Query: `discrawl search --limit 10 "Anthropic usage status Claude API key"`
-
-Results:
-
- Returned discussions where users confused Claude account usage, Anthropic API-key billing, extra-usage errors, and active profile source in `openclaw models status`.
--- a/docs/maturity-scorecard/inventory/anthropic-provider-path/claude-cli-runtime-and-session-bridge.md
+++ b/docs/maturity-scorecard/inventory/anthropic-provider-path/claude-cli-runtime-and-session-bridge.md
@@ -1,136 +0,0 @@
---
-title: "Anthropic provider path - Claude CLI Backend Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Anthropic provider path - Claude CLI Backend Maturity Note
-
-## Summary
-
-The Claude CLI runtime is a supported bundled backend with docs, plugin
-registration, MCP bridge config, live stdio session defaults, permission-mode
-normalization, session resume, and `/think` effort mapping. Coverage is Stable
-because the main runtime contract is documented and implemented. Quality is
-Alpha because archive evidence shows active user-visible failures around
-backend registration, systemd/root gateway execution, permissions, stream
-buffering, and session resume.
-
-## Category Scope
-
-This category covers OpenClaw's host-local Claude CLI path after auth is
-available: the `claude-cli` backend, its command/args/env defaults, MCP tool
-bridge, native tool mode, live stdio JSONL sessions, permission-mode mapping,
-thinking effort args, session id persistence, transcript validation, and
-fallback prelude behavior.
-
-## Features
-
- Runtime selection: Covers Runtime selection across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
- Session continuity: Covers Session continuity across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
- MCP/tool bridge: Covers MCP/tool bridge across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
- Permission-mode mapping: Covers Permission-mode mapping across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
- Fallback prelude: Covers Fallback prelude across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (82%)`
- Positive signals: Docs cover Claude CLI setup, config, sessions, permissions, thinking effort, and fallback prelude; source registers a full backend with live stdio defaults and MCP bridge; tests cover backend registration and config normalization.
- Negative signals: Live Claude CLI coverage is largely indirect through package-acceptance workflow definitions and plugin/unit tests rather than a single direct live runtime test in this audit.
- Integration gaps: Channel-session and daemon/root gateway paths have archived failures that are not obviously covered by the focused backend tests.
-
-## Quality Score
-
- Score: `Alpha (68%)`
- Gitcrawl reports: #70279 reports the backend being skipped on a systemd-managed root gateway; #85408 reports hardcoded MCP flags blocking user-scope MCPs; #85601 reports a bundled MCP config tempDir race; #86050 reports Gateway buffering Claude CLI stream events; #78828 reports root gateway permission-mode stalls.
- Discrawl reports: Discord archive results include `MissingAgentHarnessError: claude-cli is not registered` in Discord group chats while DMs worked, plus guidance showing config path divergence across session routing.
- Good qualities: The backend has conservative default args, clears inherited Claude/Anthropic env that could steer child processes, serializes runs, validates project transcript resume, and maps OpenClaw exec policy into Claude permission mode.
- Bad qualities: The path depends on external CLI installation, local login, host PATH, local project transcript files, channel/session runtime lookup, and provider-owned CLI behavior.
- Excluded from quality: Unit, integration, e2e, live, and real runtime-flow test presence or absence; those are Coverage inputs only.
-
-## Completeness Score
-
- Score: `Stable (82%)`
- Surface instructions: evaluated against `references/completeness/anthropic-provider-path.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Runtime selection, Session continuity, MCP/tool bridge, Permission-mode mapping, Fallback prelude.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- The Claude CLI path is operationally sensitive to host setup and session
-  routing.
- Group/channel session paths have shown runtime lookup divergence from DM/main
-  session paths.
- Some fixes appear as active or recent PRs/issues, so the lived support record
-  is still noisy.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/gateway/cli-backends.md` documents the `claude-cli` backend, MCP bridge behavior, session support, native permission mapping, thinking effort mapping, login prerequisites, session resume, and fallback prelude.
- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` documents Claude CLI as the host-local credential reuse path and warns about same-host expectations.
- `/Users/kevinlin/code/openclaw/docs/gateway/config-agents.md` recommends canonical `anthropic/*` model refs plus model-scoped `agentRuntime.id: "claude-cli"`.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/extensions/anthropic/cli-backend.ts` registers `claude-cli` with `bundleMcp`, Claude config-file bridge, native tool mode, stream-json args, live stdio sessions, workspace-scoped image args, session ids, raw transcript reseed, watchdog defaults, and serialization.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/cli-shared.ts` clears inherited Anthropic/Claude env vars, normalizes `--setting-sources`, maps OpenClaw exec policy to Claude permission mode, and maps OpenClaw thinking levels to `--effort`.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/config-defaults.ts` backfills `agentRuntime.id: "claude-cli"` for selected canonical Anthropic refs when Claude CLI auth is selected.
- `/Users/kevinlin/code/openclaw/src/commands/doctor-claude-cli.ts` checks command resolution, credentials, workspace/project directory health, and active Claude CLI runtime agents.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/test/scripts/package-acceptance-workflow.test.ts` verifies live Anthropic and Claude CLI workflow wiring, including `OPENCLAW_LIVE_CLI_BACKEND_MODEL=claude-cli/claude-sonnet-4-6` and package install of `@anthropic-ai/claude-code`.
- `/Users/kevinlin/code/openclaw/scripts/e2e/mcp-channels-docker.sh` and `/Users/kevinlin/code/openclaw/scripts/e2e/mcp-channels-docker-client.ts` cover MCP channel notification/permission framing adjacent to Claude channel mode.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/extensions/anthropic/index.test.ts` verifies `claude-cli` backend registration, config defaults, auth migration, and synthetic auth.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/cli-shared.test.ts` verifies permission args, safe setting sources, effort mapping, config normalization, and transcript reseed config.
- `/Users/kevinlin/code/openclaw/src/commands/doctor-claude-cli.test.ts` covers doctor diagnostics for the Claude CLI path.
- `/Users/kevinlin/code/openclaw/src/plugins/bundle-claude-inspect.test.ts` covers bundled Claude inspection behavior.
-
-### Gitcrawl queries
-
-Query: `gitcrawl --json search issues -R openclaw/openclaw "claude-cli live session resume transcript missing permission mode"`
-
-Results:
-
- Returned no direct results for that exact combined query.
-
-Query: `gitcrawl --json search issues -R openclaw/openclaw "Claude CLI OpenClaw MCP allowedTools permission-mode"`
-
-Results:
-
- #85408 `openclaw agent CLI spawn hardcodes --strict-mcp-config + --allowedTools mcp__openclaw__*, blocking user-scope MCPs`.
- #85601 `[regression] Bundled MCP config tempDir race still present`.
- #86050 `[Bug]: Gateway buffers claude-cli stream events; surfaces only see the final assembled message`.
- #78828 `Claude CLI on root gateway: inferred bypassPermissions breaks, acceptEdits partly works, blocked turns can stall until timeout`.
-
-Query: `gitcrawl --json search prs -R openclaw/openclaw "claude-cli"`
-
-Results:
-
- Returned active/recent PRs including #73122 backend registration guardrails, #74990 subscription path in onboard wizard, #85505 host-only CLI auth epoch mode, #87702 env-var scrubbing when spawning Claude, #77148 session fork-on-resume, #86649 partial-message streaming deltas, and #86568 auth cooldown skip for CLI providers.
-
-### Discrawl queries
-
-Query: `discrawl search --limit 10 "Claude CLI OpenClaw auth login claude-cli"`
-
-Results:
-
- Returned a May 26, 2026 support thread where Discord DMs worked but group chats failed with `MissingAgentHarnessError: Requested agent harness "claude-cli" is not registered`, plus older archive entries closing Claude CLI persistence issues and noting implemented CLI delegation.
-
-Query: `discrawl search --limit 10 "Anthropic usage status Claude API key"`
-
-Results:
-
- Returned April 2026 guidance recommending the CLI subprocess path for Claude subscription usage and warning about direct API-key/API billing configuration.
--- a/docs/maturity-scorecard/inventory/anthropic-provider-path/diagnostics-usage-status-and-fallback-recovery.md
+++ b/docs/maturity-scorecard/inventory/anthropic-provider-path/diagnostics-usage-status-and-fallback-recovery.md
@@ -1,142 +0,0 @@
---
-title: "Anthropic provider path - Diagnostics and Recovery Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Anthropic provider path - Diagnostics and Recovery Maturity Note
-
-## Summary
-
-Anthropic diagnostics cover `models status`, auth profile health, OAuth/token
-usage windows, doctor hints, long-context 429 troubleshooting, cooldowns, and
-fallback guidance. Coverage is Beta because important diagnostics exist across
-docs, source, and tests, but the diagnostic surface is split across model
-status, doctor, provider usage, and user-runbook flows. Quality is Beta because
-archive evidence shows users still struggle to distinguish Anthropic API-key
-billing, Claude account usage, cooldowns, extra-usage errors, and profile-store
-state.
-
-## Category Scope
-
-This category covers operator diagnostics and recovery for Anthropic provider
-failures: status output, usage windows, auth profile source reporting, cooldown
-and disabled profile reporting, doctor hints, long-context 429 remediation,
-missing credentials guidance, fallback setup, and provider error/billing
-classification.
-
-## Features
-
- Model status: Covers Model status across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
- Usage windows: Covers Usage windows across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
- Cooldown/profile reporting: Covers Cooldown/profile reporting across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
- Long-context recovery: Covers Long-context recovery across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
- Fallback guidance: Covers Fallback guidance across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Beta (78%)`
- Positive signals: Docs cover common Anthropic credential errors and long-context 429s; source fetches Claude usage windows, reports doctor hints, and handles provider auth profile ordering/cooldowns; tests cover usage fetch and doctor behavior.
- Negative signals: Recovery is distributed across `models status`, `doctor`, troubleshooting docs, model fallback config, and auth-profile commands rather than one cohesive Anthropic diagnostics workflow.
- Integration gaps: The audit did not find a single live failure-to-repair scenario test for Anthropic auth, usage, cooldown, and fallback recovery.
-
-## Quality Score
-
- Score: `Beta (70%)`
- Gitcrawl reports: #80514 reports Claude Pro Max cap warning being classified as billing failure; #83268 reports API-key profile orphaning; #63145 requests per-model health probing across configured models; PR #85666 skips Anthropic API keys for usage status; PR #87697 clears stale provider cooldowns after reauth.
- Discrawl reports: Discord archive results include users seeing "out of extra usage", invalid bearer token, Anthropic API-key versus Claude account billing confusion, profile cooldown confusion, and daemon/auth-store mismatches.
- Good qualities: Docs name concrete commands, usage fetch handles OAuth/web fallback, doctor reports stale OAuth profiles and refresh guidance, and troubleshooting distinguishes credential eligibility from config shape.
- Bad qualities: Users still need to map upstream Anthropic billing and auth semantics to OpenClaw's auth-store, cooldown, fallback, and model-status vocabulary.
- Excluded from quality: Unit, integration, e2e, live, and real runtime-flow test presence or absence; those are Coverage inputs only.
-
-## Completeness Score
-
- Score: `Beta (78%)`
- Surface instructions: evaluated against `references/completeness/anthropic-provider-path.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Model status, Usage windows, Cooldown/profile reporting, Long-context recovery, Fallback guidance.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- `models status` helps, but users still often need maintainer interpretation
-  to connect auth-store source, billing status, and model fallback behavior.
- Usage status behavior differs by auth mode and scope.
- The long-context 429 path is documented, but upstream eligibility remains
-  outside OpenClaw control.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` includes troubleshooting accordions for token invalidity, no API key, no profile, and all profiles in cooldown.
- `/Users/kevinlin/code/openclaw/docs/gateway/troubleshooting.md` documents the exact long-context 429 symptom, commands to inspect logs/status/config, causes, and fix options.
- `/Users/kevinlin/code/openclaw/docs/gateway/doctor.md` documents OAuth expiry/refresh behavior, Anthropic API-key or setup-token suggestions, and profile cooldown/disabled reporting.
- `/Users/kevinlin/code/openclaw/docs/reference/prompt-caching.md` documents usage cache counters and Anthropic provider behavior.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/src/infra/provider-usage.fetch.claude.ts` fetches Anthropic OAuth usage windows, supports claude.ai web-session fallback for missing `user:profile` scope, and returns structured provider usage snapshots.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/register.runtime.ts` wires `fetchUsageSnapshot`, `resolveUsageAuth`, `buildAuthDoctorHint`, and `isCacheTtlEligible`.
- `/Users/kevinlin/code/openclaw/src/commands/doctor-claude-cli.ts` checks Claude CLI command, credentials, workspace/project dirs, and profile store health.
- `/Users/kevinlin/code/openclaw/src/agents/embedded-agent-helpers/provider-error-patterns.ts` and adjacent provider fallback helpers classify provider errors used in recovery/fallback decisions.
- `/Users/kevinlin/code/openclaw/src/commands/models/list.status-command.ts` and related model list/status modules render provider/auth health.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/src/commands/models.list.e2e.test.ts` covers provider/auth catalog responsiveness in the models command surface.
- `/Users/kevinlin/code/openclaw/test/scripts/package-acceptance-workflow.test.ts` verifies Anthropic credential requirements and live profile wiring in package acceptance.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/src/infra/provider-usage.fetch.claude.test.ts` covers Claude usage fetch behavior.
- `/Users/kevinlin/code/openclaw/src/commands/doctor-claude-cli.test.ts` covers Claude CLI doctor diagnostics.
- `/Users/kevinlin/code/openclaw/src/commands/models/list.status.test.ts` and related model status/list tests cover provider status rendering.
- `/Users/kevinlin/code/openclaw/src/agents/embedded-agent-helpers/provider-error-patterns.test.ts` covers provider error classification.
-
-### Gitcrawl queries
-
-Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic usage status API key Claude"`
-
-Results:
-
- #83268 reports API-key wizard provider orphaning.
- #80514 reports Claude Pro Max cap warning being classified as a billing failure and creating a false cooldown.
-
-Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic 429 long context extra usage required fallback"`
-
-Results:
-
- Returned no direct results for that exact issue query.
-
-Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic No API key found provider anthropic models status"`
-
-Results:
-
- Returned related auth/provider-status issues including #63145 for per-model health checks and auth-profile issues from other providers.
-
-### Discrawl queries
-
-Query: `discrawl search --limit 10 "Anthropic usage status Claude API key"`
-
-Results:
-
- Returned support threads about API-key billing versus Claude account usage, extra-usage errors, invalid bearer token, provider profile source, and what to inspect in `openclaw models status`.
-
-Query: `discrawl search --limit 10 "Claude 4.6 1M context Anthropic 429"`
-
-Results:
-
- Returned long-context 429 troubleshooting guidance and extra-usage eligibility notes.
-
-Query: `discrawl search --limit 10 "Anthropic API key no credentials profile"`
-
-Results:
-
- Returned profile/store mismatch, no-key/no-profile, and setup-token confusion threads.
--- a/docs/maturity-scorecard/inventory/anthropic-provider-path/direct-anthropic-messages-transport-and-streaming.md
+++ b/docs/maturity-scorecard/inventory/anthropic-provider-path/direct-anthropic-messages-transport-and-streaming.md
@@ -1,142 +0,0 @@
---
-title: "Anthropic provider path - Request Transport and Turn Semantics Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Anthropic provider path - Request Transport and Turn Semantics Maturity Note
-
-## Summary
-
-The direct Anthropic Messages transport is deeply implemented: it builds
-Anthropic request payloads, handles API-key and OAuth/token headers, decodes
-SSE events, tracks usage, maps stop reasons, handles aborts, and supports
-Anthropic-compatible endpoints. Coverage is Stable because source and tests
-exercise the main payload and stream behavior. Quality is Beta because archive
-evidence shows recurring malformed/truncated stream and tool-call failures that
-have required repeated fixes.
-
-## Category Scope
-
-Included in this category:
-
- API-key/OAuth transport: Covers API-key/OAuth transport across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
- Messages payloads: Covers Messages payloads across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
- Streaming decode: Covers Streaming decode across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
- Usage and stop reasons: Covers Usage and stop reasons across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
- Abort/error handling: Covers Abort/error handling across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
- Tool-use blocks: Covers Tool-use blocks across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
- Tool-result replay: Covers Tool-result replay across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
- Partial JSON recovery: Covers Partial JSON recovery across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
- Native thinking: Covers Native thinking across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
- Signed/redacted thinking replay: Covers Signed/redacted thinking replay across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
-
-## Features
-
- API-key/OAuth transport: Covers API-key/OAuth transport across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
- Messages payloads: Covers Messages payloads across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
- Streaming decode: Covers Streaming decode across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
- Usage and stop reasons: Covers Usage and stop reasons across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
- Abort/error handling: Covers Abort/error handling across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
- Tool-use blocks: Covers Tool-use blocks across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
- Tool-result replay: Covers Tool-result replay across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
- Partial JSON recovery: Covers Partial JSON recovery across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
- Native thinking: Covers Native thinking across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
- Signed/redacted thinking replay: Covers Signed/redacted thinking replay across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (84%)`
- Positive signals: Transport source covers client construction, headers, payloads, SSE event iteration, usage accounting, abort handling, and compatible endpoint behavior; unit tests cover direct Anthropic, OAuth, custom endpoints, malformed SSE, unsafe integer tool-use input, and abort behavior; a live transport test covers real HTTP stream abort.
- Negative signals: Some live provider behaviors are env-gated and provider-specific stream drift cannot be fully proven from local tests.
- Integration gaps: The audit found live abort proof and extensive unit coverage, but not repeated live proof for every Anthropic model/auth combination.
-
-## Quality Score
-
- Score: `Beta (72%)`
- Gitcrawl reports: #60593 reports recurring Anthropic streaming JSON parse errors where failover often failed; PR #62429 sanitized control characters in Anthropic streaming JSON; PR #61349 suppressed raw JSON parse errors from truncated tool-call streams; PR #86959 finalized abandoned managed-response streams to release sockets.
- Discrawl reports: Discord archive results include session corruption from truncated streaming tool calls, raw parse errors sent to users, and Anthropic stream parse fixes.
- Good qualities: The transport classifies malformed SSE as a stable transport error, preserves provider usage fields, avoids direct-Anthropic beta headers on custom hosts, cancels stalled reads on abort, and separates API-key from OAuth header behavior.
- Bad qualities: Anthropic and Anthropic-compatible stream shapes have produced recurring operational incidents around malformed JSON, partial tool deltas, control characters, and aborted streams.
- Excluded from quality: Unit, integration, e2e, live, and real runtime-flow test presence or absence; those are Coverage inputs only.
-
-## Completeness Score
-
- Score: `Stable (84%)`
- Surface instructions: evaluated against `references/completeness/anthropic-provider-path.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for API-key/OAuth transport, Messages payloads, Streaming decode, Usage and stop reasons, Abort/error handling, Tool-use blocks, Tool-result replay, Partial JSON recovery, Native thinking, Signed/redacted thinking replay.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Provider stream drift remains a recurring source of defects.
- Some compatible providers need custom handling for endpoint classification,
-  cache markers, reasoning content, and stream sanitization.
- Direct Anthropic model/auth combinations need recurring live proof beyond
-  local mock transport tests.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` documents API-key and Claude CLI routes, thinking defaults, prompt caching, fast mode, media, and 1M context behavior.
- `/Users/kevinlin/code/openclaw/docs/reference/prompt-caching.md` documents Anthropic usage counters and cache behavior that the transport reports.
- `/Users/kevinlin/code/openclaw/docs/gateway/troubleshooting.md` documents Anthropic long-context 429 errors and fallback guidance.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.ts` implements Anthropic SDK client construction, OAuth/API-key header handling, cache retention, SSE decoding, content/tool/thinking events, usage accounting, stop reason mapping, message conversion, image conversion, and tool conversion.
- `/Users/kevinlin/code/openclaw/src/agents/anthropic-transport-stream.ts` implements guarded fetch transport, direct Anthropic model-id stripping, endpoint classification, beta headers, OAuth identity headers, usage/cost accounting, malformed stream classification, and abort-safe streaming.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/stream-wrappers.ts` composes beta-header, fast-mode, service-tier, and thinking-prefill wrappers around Anthropic streams.
- `/Users/kevinlin/code/openclaw/src/agents/provider-transport-fetch.ts` provides guarded model fetch plumbing used by the transport.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/src/agents/anthropic-transport-stream.live.test.ts` starts a loopback HTTP SSE server and proves Anthropic transport aborts a real in-flight stream.
- `/Users/kevinlin/code/openclaw/test/scripts/package-acceptance-workflow.test.ts` verifies live Anthropic gateway smoke profile wiring.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/src/agents/anthropic-transport-stream.test.ts` covers guarded fetch usage, model-id stripping, custom endpoint header behavior, malformed stream classification, unsafe integer preservation, OAuth identity/tool remapping, text/thinking blocks, aborts, and adaptive thinking request shape.
- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.test.ts` covers SDK client construction behavior and signed thinking replay payloads.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/stream-wrappers.test.ts` covers beta stripping, OAuth/default beta headers, service-tier injection/skips, and thinking prefill stripping.
- `/Users/kevinlin/code/openclaw/src/agents/anthropic-payload-policy.test.ts` covers Anthropic cache and service-tier policy shaping.
-
-### Gitcrawl queries
-
-Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic tool call streaming JSON parse error"`
-
-Results:
-
- #60593 `Recurring Anthropic streaming JSON parse errors (Sonnet 4.5 / Opus) - failover often fails to recover`.
-
-Query: `gitcrawl --json search prs -R openclaw/openclaw "anthropic streaming"`
-
-Results:
-
- #62112 preserves Anthropic refusal handling.
- #74432 honors `ANTHROPIC_BASE_URL`.
- #86649 relays Claude CLI assistant partial messages as streaming deltas.
- #75136 preserves Anthropic stream usage.
- #62429 and #61349 appeared in archive results as stream/tool-call parse fixes.
-
-### Discrawl queries
-
-Query: `discrawl search --limit 10 "Anthropic tool call streaming parse JSON"`
-
-Results:
-
- Returned April 2026 reports for session corruption from truncated Anthropic streaming tool calls, issue #69846, PR #62429 for control-character sanitization, PR #61349 for raw parse error suppression, and PR #44237 for recovering tool-call args from `partialJson`.
-
-Query: `discrawl search --limit 10 "Anthropic thinking signature cache control"`
-
-Results:
-
- Returned no direct results for that exact query.
--- a/docs/maturity-scorecard/inventory/anthropic-provider-path/media-understanding-and-document-inputs.md
+++ b/docs/maturity-scorecard/inventory/anthropic-provider-path/media-understanding-and-document-inputs.md
@@ -1,112 +0,0 @@
---
-title: "Anthropic provider path - Media Inputs Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Anthropic provider path - Media Inputs Maturity Note
-
-## Summary
-
-Anthropic media support is a smaller, clearly bounded part of the provider
-path. Docs state that the bundled Anthropic plugin registers image and PDF
-understanding, source registers image capability with native PDF document input
-metadata, and model metadata normalizes image-capable Claude rows. Coverage is
-Beta because source and docs are clear but live Anthropic media scenario proof
-is thinner than text/tool transport proof. Quality is Stable because the surface
-is small, directly mapped to provider capabilities, and the archive search did
-not find feature-specific user reports after freshness checks.
-
-## Category Scope
-
-Included in this category:
-
- Image input: Covers Image input across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
- PDF document input: Covers PDF document input across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
- Media model fallback: Covers Media model fallback across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
- Image tool results: Covers Image tool results across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
-
-## Features
-
- Image input: Covers Image input across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
- PDF document input: Covers PDF document input across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
- Media model fallback: Covers Media model fallback across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
- Image tool results: Covers Image tool results across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Beta (74%)`
- Positive signals: Docs describe image and PDF understanding; manifest and provider source register media metadata; direct transport converts image blocks; tests cover image media metadata and image tool-result payload conversion.
- Negative signals: The audit did not find a dedicated live Anthropic image/PDF scenario artifact or per-release media smoke result.
- Integration gaps: Media support is covered more by provider registration and payload tests than by end-to-end media runs.
-
-## Quality Score
-
- Score: `Stable (82%)`
- Gitcrawl reports: The feature-specific GitHub issue query returned no direct Anthropic media reports after freshness checks.
- Discrawl reports: The feature-specific Discord query returned no direct Anthropic media reports after freshness checks.
- Good qualities: The capability is small, declarative, and aligned with modern Claude model metadata; source keeps media model defaults and native document input metadata in one plugin-owned surface.
- Bad qualities: Docs say image and PDF understanding, while the provider capability list is `["image"]` plus separate `nativeDocumentInputs: ["pdf"]`; that split can require careful wording as docs evolve.
- Excluded from quality: Unit, integration, e2e, live, and real runtime-flow test presence or absence; those are Coverage inputs only.
-
-## Completeness Score
-
- Score: `Beta (74%)`
- Surface instructions: evaluated against `references/completeness/anthropic-provider-path.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Image input, PDF document input, Media model fallback, Image tool results.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- No dedicated live Anthropic image/PDF proof was found in this audit.
- PDF support is represented as native document input metadata rather than a
-  separate capability id.
- Media generation is out of scope; this component is media understanding only.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` documents "Media understanding (image and PDF)", default model `claude-opus-4-7`, supported input images/PDF documents, and automatic routing through the Anthropic media understanding provider.
- `/Users/kevinlin/code/openclaw/docs/gateway/config-agents.md` documents image resize behavior for Claude Opus 4.7 and other vision models.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/extensions/anthropic/openclaw.plugin.json` declares media understanding provider metadata for Anthropic with capability `image`, default image model `claude-opus-4-7`, auto priority `20`, and native document input `pdf`.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/media-understanding-provider.ts` registers `anthropicMediaUnderstandingProvider` with image capabilities, default models, auto priority, native document inputs, and `describeImage`/`describeImages` helpers.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/register.runtime.ts` normalizes modern Claude models to include image input and model-specific media input sizing.
- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.ts` converts user image blocks and image tool-result blocks into Anthropic image content.
- `/Users/kevinlin/code/openclaw/src/agents/anthropic-transport-stream.ts` performs transport-side image and tool-result conversion for Anthropic Messages payloads.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/test/scripts/package-acceptance-workflow.test.ts` includes live Anthropic profile wiring but does not by itself prove image/PDF scenarios.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/extensions/anthropic/index.test.ts` covers stale text-only modern Claude vision row normalization and media metadata merge for `claude-opus-4-7`.
- `/Users/kevinlin/code/openclaw/src/agents/anthropic-transport-stream.test.ts` covers image tool-result conversion and image payload shape.
- `/Users/kevinlin/code/openclaw/src/agents/embedded-agent-runner/model.provider-runtime.test-support.ts` defines Anthropic vision model prefixes used in provider runtime test support.
-
-### Gitcrawl queries
-
-Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic media image PDF Claude"`
-
-Results:
-
- Returned no direct results for Anthropic media/image/PDF reports.
-
-### Discrawl queries
-
-Query: `discrawl search --limit 10 "Anthropic media understanding image PDF Claude"`
-
-Results:
-
- Returned no direct results for Anthropic media/image/PDF reports.
--- a/docs/maturity-scorecard/inventory/anthropic-provider-path/model-catalog-aliases-and-runtime-policy.md
+++ b/docs/maturity-scorecard/inventory/anthropic-provider-path/model-catalog-aliases-and-runtime-policy.md
@@ -1,147 +0,0 @@
---
-title: "Anthropic provider path - Model and Runtime Selection Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Anthropic provider path - Model and Runtime Selection Maturity Note
-
-## Summary
-
-Anthropic model catalog coverage is Stable. The bundled manifest publishes
-direct Anthropic and Claude CLI model rows, source backfills current Claude 4.x
-variants, normalizes image and 1M context metadata, and maps selected Claude CLI
-auth to model-scoped runtime policy. Quality is Beta because users still hit
-model allowlist/catalog confusion, and current Claude model naming/metadata
-requires frequent forward-compatibility maintenance.
-
-## Category Scope
-
-Included in this category:
-
- Bundled Claude catalog: Covers Bundled Claude catalog across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
- Canonical anthropic refs: Covers Canonical anthropic refs across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
- Claude CLI compatibility: Covers Claude CLI compatibility across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
- Model picker availability: Covers Model picker availability across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
- Capability metadata: Covers Capability metadata across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
- Runtime selection: Covers Runtime selection across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
- Session continuity: Covers Session continuity across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
- MCP/tool bridge: Covers MCP/tool bridge across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
- Permission-mode mapping: Covers Permission-mode mapping across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
- Fallback prelude: Covers Fallback prelude across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
-
-## Features
-
- Bundled Claude catalog: Covers Bundled Claude catalog across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
- Canonical anthropic refs: Covers Canonical anthropic refs across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
- Claude CLI compatibility: Covers Claude CLI compatibility across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
- Model picker availability: Covers Model picker availability across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
- Capability metadata: Covers Capability metadata across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
- Runtime selection: Covers Runtime selection across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
- Session continuity: Covers Session continuity across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
- MCP/tool bridge: Covers MCP/tool bridge across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
- Permission-mode mapping: Covers Permission-mode mapping across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
- Fallback prelude: Covers Fallback prelude across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (84%)`
- Positive signals: The bundled manifest includes direct Anthropic and Claude CLI models; docs describe canonical refs and runtime policy; source has forward-compatible model resolution, 1M context normalization, image-capability normalization, and alias migration; tests pin key model metadata behavior.
- Negative signals: The catalog remains static/discovery-light for direct Anthropic and relies on source-maintained forward-compatibility for new Claude ids.
- Integration gaps: Release proof for fresh upstream catalog drift is weaker than the source/unit-test proof.
-
-## Quality Score
-
- Score: `Beta (76%)`
- Gitcrawl reports: PR #75157 addresses catalog display names for agent models; PR #72404 defaults explicit-only vision-capable models to image-capable; PR #80394 adds per-agent model allowlists; PR #67731 pins Opus 4.7 variant resolution and thinking-default regression coverage.
- Discrawl reports: Discord archive includes `claude-cli models not in catalog` and "only Sonnet available" support threads tied to allowlist/catalog configuration and cooldown confusion.
- Good qualities: Canonical refs, alias handling, dynamic model fallback, 1M context metadata, image input normalization, and selected Claude CLI runtime backfill are centralized in the bundled provider.
- Bad qualities: Operators can still confuse auth/profile state, configured model allowlists, provider catalog rows, and runtime selection when a desired Claude model does not appear or is unavailable.
- Excluded from quality: Unit, integration, e2e, live, and real runtime-flow test presence or absence; those are Coverage inputs only.
-
-## Completeness Score
-
- Score: `Stable (84%)`
- Surface instructions: evaluated against `references/completeness/anthropic-provider-path.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Bundled Claude catalog, Canonical anthropic refs, Claude CLI compatibility, Model picker availability, Capability metadata, Runtime selection, Session continuity, MCP/tool bridge, Permission-mode mapping, Fallback prelude.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- New Claude model ids and dated variants require ongoing forward-compatibility
-  upkeep.
- Docs and config guidance have moved from `claude-cli/*` refs toward canonical
-  `anthropic/*` refs plus runtime policy, while legacy configs still exist.
- Model availability UX can still make catalog, allowlist, cooldown, and
-  credential problems look similar.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` documents `anthropic/*` refs, Claude CLI runtime override, Claude 4.6 thinking defaults, prompt caching, media support, and 1M context behavior.
- `/Users/kevinlin/code/openclaw/docs/gateway/config-agents.md` documents runtime-policy precedence and recommends canonical `anthropic/claude-opus-4-7` plus `agentRuntime.id: "claude-cli"`.
- `/Users/kevinlin/code/openclaw/docs/concepts/models.md` documents provider/model ref selection and fallback behavior used by Anthropic rows.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/extensions/anthropic/openclaw.plugin.json` publishes static model catalog rows for `claude-cli` and `anthropic`, including Opus 4.7, Sonnet 4.6, Opus 4.6, reasoning flags, image input metadata, context windows, max tokens, provider endpoints, alias normalization, and provider request family.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/register.runtime.ts` resolves modern Claude model ids, applies GA 1M context windows, normalizes image media input, publishes Claude CLI catalog entries, and exposes thinking profiles.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/claude-model-refs.ts` canonicalizes Claude family aliases, upgrades old Claude 3/4 refs, and maps legacy `claude-cli/*` refs back to canonical Anthropic refs.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/config-defaults.ts` collects Claude CLI runtime refs and backfills `agentRuntime.id: "claude-cli"` when Claude CLI auth or model selection is active.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/src/commands/models.list.e2e.test.ts` covers provider catalog rows and model list behavior.
- `/Users/kevinlin/code/openclaw/test/scripts/package-acceptance-workflow.test.ts` verifies live Anthropic model profile inputs such as `OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-7` and Sonnet/Haiku model lists.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/extensions/anthropic/index.test.ts` covers model API defaulting, Claude CLI allowlist backfill, shorthand refs, future Anthropic refs, Opus 4.7 resolution from templates, image media metadata, 1M context normalization, and synthetic auth.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/provider-policy-api.test.ts` covers public provider policy normalization and thinking profile exposure.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/cli-migration.test.ts` covers migration behavior from Claude CLI auth.
- `/Users/kevinlin/code/openclaw/src/agents/model-catalog-lookup.ts` and adjacent tests cover model catalog lookup used by agent runtime selection.
-
-### Gitcrawl queries
-
-Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic model catalog claude opus sonnet haiku models list"`
-
-Results:
-
- Returned no direct results for that exact issue query.
-
-Query: `gitcrawl --json search prs -R openclaw/openclaw "Anthropic model catalog Claude Opus 4.7 Sonnet 4.6"`
-
-Results:
-
- #75157 `fix(ui): use catalog display names for agent models`.
- #72404 `fix(models): default input=[text,image] for vision-capable explicit-only models`.
- #80394 `feat(agents): per-agent model allowlist (with fallback to global)`.
-
-Query: `gitcrawl --json search prs -R openclaw/openclaw "Anthropic thinking"`
-
-Results:
-
- #67731 `test(anthropic): pin Opus 4.7 variant resolution + thinking-default regression coverage`.
- #70584 `fix: clamp effort=low/minimal to medium for claude-opus-4.7`.
-
-### Discrawl queries
-
-Query: `discrawl search --limit 10 "Anthropic model catalog claude opus sonnet OpenClaw"`
-
-Results:
-
- Returned support threads for `claude-cli models not in catalog`, configuring Opus/Sonnet model allowlists, and users mistaking rate-limit cooldown for catalog changes.
-
-Query: `discrawl search --limit 10 "Claude CLI OpenClaw auth login claude-cli"`
-
-Results:
-
- Returned implemented Claude CLI delegation notes and user support threads where model/runtime policy differed between session paths.
--- a/docs/maturity-scorecard/inventory/anthropic-provider-path/prompt-caching-context-windows-and-request-knobs.md
+++ b/docs/maturity-scorecard/inventory/anthropic-provider-path/prompt-caching-context-windows-and-request-knobs.md
@@ -1,141 +0,0 @@
---
-title: "Anthropic provider path - Prompt Cache and Context Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Anthropic provider path - Prompt Cache and Context Maturity Note
-
-## Summary
-
-Anthropic prompt caching and request shaping are first-class: docs explain
-`cacheRetention`, 1M context windows, fast mode, and long-context troubleshooting;
-source injects cache markers, strips retired betas, applies service-tier knobs,
-and normalizes GA 1M context metadata. Coverage is Stable because docs, source,
-and tests cover the main knobs. Quality is Beta because Discord/GitHub archives
-show users still need help with cache TTL expectations, custom-provider long
-TTL limits, long-context 429s, and setup-token/API-key eligibility.
-
-## Category Scope
-
-Included in this category:
-
- Cache retention: Covers Cache retention across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
- System-prompt cache boundary: Covers System-prompt cache boundary across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
- 1M context: Covers 1M context across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
- Fast mode/service tier: Covers Fast mode/service tier across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
- Cache diagnostics: Covers Cache diagnostics across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
-
-## Features
-
- Cache retention: Covers Cache retention across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
- System-prompt cache boundary: Covers System-prompt cache boundary across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
- 1M context: Covers 1M context across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
- Fast mode/service tier: Covers Fast mode/service tier across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
- Cache diagnostics: Covers Cache diagnostics across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (82%)`
- Positive signals: Docs cover prompt caching, cacheRetention merge order, direct Anthropic 5-minute and 1-hour TTL behavior, 1M context, fast mode, and 429 remediation; source and tests cover cache markers, context window overrides, beta stripping, service-tier behavior, and cache defaults.
- Negative signals: Live proof for cache-hit behavior and 1M context eligibility depends on upstream account state and is not fully covered by deterministic local tests.
- Integration gaps: The audit found docs/source/test proof but not a repeated live Anthropic cache-hit and 1M-context release smoke artifact.
-
-## Quality Score
-
- Score: `Beta (76%)`
- Gitcrawl reports: #37966 reports `cacheRetention` ignored for LiteLLM-proxied Anthropic models; #62475 requests prompt-cache keep-warm pings; #63030 reports system prompt assembly drift causing Anthropic cache invalidation; PR #79370 fixes explicit cacheRetention for OpenRouter to Anthropic models.
- Discrawl reports: Discord archive results include custom provider 1-hour cache questions, Haiku cache-hit confusion, system-prompt cache-boundary guidance, and 1M context 429 extra-usage troubleshooting.
- Good qualities: Cache policy is centralized, long TTL is endpoint-gated, retired 1M beta headers are stripped, API-key auth gets conservative defaults, and docs distinguish direct Anthropic from custom/proxy behavior.
- Bad qualities: Users still need to reason about TTLs, heartbeat, context pruning, long-context account eligibility, API-key versus setup-token behavior, and custom endpoint limitations.
- Excluded from quality: Unit, integration, e2e, live, and real runtime-flow test presence or absence; those are Coverage inputs only.
-
-## Completeness Score
-
- Score: `Stable (82%)`
- Surface instructions: evaluated against `references/completeness/anthropic-provider-path.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Cache retention, System-prompt cache boundary, 1M context, Fast mode/service tier, Cache diagnostics.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- 1M context can be correctly sized locally while still failing upstream for
-  account eligibility.
- Long cache TTL does not apply uniformly to arbitrary Anthropic-compatible
-  custom hosts, which is easy to misconfigure.
- Cache-hit proof is mostly usage-derived and needs repeated live scenario
-  captures for release readiness.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` documents Claude 4.6 thinking defaults, `cacheRetention`, fast mode, media, 1M context, and troubleshooting for invalid/no credentials.
- `/Users/kevinlin/code/openclaw/docs/reference/prompt-caching.md` documents Anthropic direct API caching, `cacheRetention` merge order, cache-ttl pruning, heartbeat keep-warm, direct Anthropic 1-hour TTL, OpenRouter Anthropic cache handling, and system-prompt cache boundaries.
- `/Users/kevinlin/code/openclaw/docs/gateway/troubleshooting.md` documents `HTTP 429: rate_limit_error: Extra usage is required for long context requests` and fixes.
- `/Users/kevinlin/code/openclaw/docs/gateway/heartbeat.md` documents heartbeat intervals, including a longer interval for Anthropic OAuth/token auth.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/extensions/anthropic/config-defaults.ts` seeds `contextPruning.mode: "cache-ttl"`, heartbeat intervals, API-key `cacheRetention: "short"`, and Claude CLI runtime defaults.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/stream-wrappers.ts` strips retired context-1m betas, adds Anthropic beta headers, injects service-tier/fast-mode params for API keys, and strips unsafe thinking prefill.
- `/Users/kevinlin/code/openclaw/src/agents/anthropic-payload-policy.ts` applies Anthropic cache-control markers to system and trailing user turns, respects system-prompt cache boundaries, gates long TTL by endpoint, and injects service tier.
- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.ts` resolves cache retention, applies cache control to system/tool/message payloads, and records cache usage counters.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/register.runtime.ts` applies GA 1M context metadata to modern Claude 4.x models.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/test/scripts/package-acceptance-workflow.test.ts` includes live Anthropic model profiles that exercise modern Claude model refs.
- `/Users/kevinlin/code/openclaw/src/agents/anthropic.setup-token.live.test.ts` env-gates live setup-token completion, indirectly proving request knobs can coexist with resolved token auth.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/extensions/anthropic/stream-wrappers.test.ts` covers beta stripping, OAuth beta preservation, service-tier injection/skips, fast mode behavior, and thinking prefill stripping.
- `/Users/kevinlin/code/openclaw/src/agents/anthropic-payload-policy.test.ts` covers cache marker application, endpoint gating, system prompt boundary handling, and service-tier policy.
- `/Users/kevinlin/code/openclaw/src/llm/providers/stream-wrappers/anthropic-cache-control-payload.test.ts` covers cache-control payload marker behavior.
- `/Users/kevinlin/code/openclaw/src/agents/embedded-agent-runner/extra-params.cache-retention-default.test.ts` covers Anthropic-family cache semantics and explicit retention.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/index.test.ts` covers 1M context normalization and API-key cacheRetention defaults.
-
-### Gitcrawl queries
-
-Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic 429 long context extra usage required fallback"`
-
-Results:
-
- Returned no direct results for that exact GitHub issue query.
-
-Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic prompt caching cacheRetention"`
-
-Results:
-
- #37966 `[Bug]: cacheRetention ignored for LiteLLM-proxied Anthropic models`.
- #62475 requests prompt cache keep-warm pings.
- #63030 reports system prompt assembly differences causing continuous Anthropic cache invalidation.
-
-Query: `gitcrawl --json search prs -R openclaw/openclaw "anthropic cacheRetention"`
-
-Results:
-
- #79370 `fix(cache): honour explicit cacheRetention for OpenRouter to Anthropic models`.
- #76741 `fix(kimi): strip anthropic cache markers`.
-
-### Discrawl queries
-
-Query: `discrawl search --limit 10 "Claude 4.6 1M context Anthropic 429"`
-
-Results:
-
- Returned March 2026 support threads explaining `HTTP 429: rate_limit_error: Extra usage is required for long context requests`, extra-usage requirements, API-key eligibility, and config changes to remove `context1m`.
-
-Query: `discrawl search --limit 10 "Anthropic prompt caching cacheRetention OpenClaw"`
-
-Results:
-
- Returned prompt caching support threads about Haiku cache-hit rates, configurable `cacheRetention`, static/dynamic system-prompt split, custom provider 1-hour cache limitations, and cache write spikes.
--- a/docs/maturity-scorecard/inventory/anthropic-provider-path/report.md
+++ b/docs/maturity-scorecard/inventory/anthropic-provider-path/report.md
@@ -1,232 +0,0 @@
---
-title: "Anthropic provider path Maturity Report"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Anthropic provider path Maturity Report
-
-## Top-level scores
-
-These rollups are simple arithmetic means over the category-note numeric
-scores in
-`scores.yaml`. Percentages are rounded to the nearest whole number.
-
- Coverage: `Stable (80%)`
- Quality: `Beta (74%)`
- Completeness: `Stable (80%)`
- LTS Features: `0/5`
-
-## Summary
-
-This report promotes the archived `anthropic-provider-path` maturity evidence from `/Users/kevinlin/tmp/maturity/anthropic-provider-path` into the current process-version-3 inventory contract.
-
-The category Coverage and Quality scores come from the archived evidence-backed score rows. Completeness is initialized from the same archived evidence breadth and known-gap record, then joined with the surface-specific completeness rubric referenced by taxonomy.
-
-## Matrix
-
-| Category                                                                                     | LTS | Coverage       | Quality        | Completeness   | Features to evaluate                                                                                                                                                                                                          |
-| -------------------------------------------------------------------------------------------- | --- | -------------- | -------------- | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| [Provider Auth and Recovery](auth-onboarding-and-credential-profile-health.md)               | ❌  | `Beta (78%)`   | `Beta (70%)`   | `Beta (78%)`   | API-key onboarding, Claude CLI credential reuse, Setup-token auth, Auth profile health, Model status, Usage windows, Cooldown/profile reporting, Long-context recovery, Fallback guidance                                     |
-| [Model and Runtime Selection](model-catalog-aliases-and-runtime-policy.md)                   | ❌  | `Stable (82%)` | `Alpha (68%)`  | `Stable (82%)` | Bundled Claude catalog, Canonical anthropic refs, Claude CLI compatibility, Model picker availability, Capability metadata, Runtime selection, Session continuity, MCP/tool bridge, Permission-mode mapping, Fallback prelude |
-| [Request Transport and Turn Semantics](direct-anthropic-messages-transport-and-streaming.md) | ❌  | `Stable (82%)` | `Beta (72%)`   | `Stable (82%)` | API-key/OAuth transport, Messages payloads, Streaming decode, Usage and stop reasons, Abort/error handling, Tool-use blocks, Tool-result replay, Partial JSON recovery, Native thinking, Signed/redacted thinking replay      |
-| [Prompt Cache and Context](prompt-caching-context-windows-and-request-knobs.md)              | ❌  | `Stable (82%)` | `Beta (76%)`   | `Stable (82%)` | Cache retention, System-prompt cache boundary, 1M context, Fast mode/service tier, Cache diagnostics                                                                                                                          |
-| [Media Inputs](media-understanding-and-document-inputs.md)                                   | ❌  | `Beta (74%)`   | `Stable (82%)` | `Beta (74%)`   | Image input, PDF document input, Media model fallback, Image tool results                                                                                                                                                     |
-
-## Scoring rubric
-
- Coverage:
-  maturity-label rating for integration, e2e, live, or server/runtime flow
-  evidence across the category. Unit tests can provide supporting context but never make a
-  feature covered by themselves.
- Quality:
-  maturity-label rating for implementation and operational robustness. Unit,
-  integration, e2e, live, and real runtime-flow test coverage are Coverage
-  inputs only; they do not raise or lower Quality.
- Completeness:
-  maturity-label rating for how fully the category delivers the intended
-  surface-specific capability set. Use the taxonomy-linked completeness
-  instructions for this surface.
- LTS:
-  calculated as `quality > 80 and coverage > 90`, or when the matching
-  taxonomy category sets `human_lts_override`.
- Shared score bands:
-  `Lovable = 95-100`, `Stable = 80-95`, `Beta = 70-80`,
-  `Alpha = 50-70`, and `Experimental = 0-50`. At shared boundaries, choose the
-  higher maturity label.
- Major quality/completeness gaps:
-  evidence text only, tracked in the detailed feature inventory rather than as a
-  separate scored dimension.
-
-## Detailed feature inventory
-
-### 1. Provider Auth and Recovery
-
-Search anchors: API-key onboarding, Claude CLI credential reuse, Setup-token auth, Auth profile health, Model status, Usage windows, Cooldown/profile reporting, Long-context recovery, Fallback guidance.
-
-Category note: [Provider Auth and Recovery](auth-onboarding-and-credential-profile-health.md)
-
-Score decisions:
-
- Coverage: `Beta (78%)`
- Quality: `Beta (70%)`
- Completeness: `Beta (78%)`
- LTS: ❌
-
-Features:
-
- API-key onboarding: Covers API-key onboarding across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
- Claude CLI credential reuse: Covers Claude CLI credential reuse across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
- Setup-token auth: Covers Setup-token auth across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
- Auth profile health: Covers Auth profile health across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
- Model status: Covers Model status across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
- Usage windows: Covers Usage windows across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
- Cooldown/profile reporting: Covers Cooldown/profile reporting across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
- Long-context recovery: Covers Long-context recovery across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
- Fallback guidance: Covers Fallback guidance across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
-
-Primary docs:
-
- `docs/providers/anthropic.md`
- `docs/gateway/doctor.md`
- `docs/gateway/configuration-examples.md`
- `docs/gateway/troubleshooting.md`
- `docs/reference/prompt-caching.md`
-
-### 2. Model and Runtime Selection
-
-Search anchors: Bundled Claude catalog, Canonical anthropic refs, Claude CLI compatibility, Model picker availability, Capability metadata, Runtime selection, Session continuity, MCP/tool bridge, Permission-mode mapping, Fallback prelude.
-
-Category note: [Model and Runtime Selection](model-catalog-aliases-and-runtime-policy.md)
-
-Score decisions:
-
- Coverage: `Stable (82%)`
- Quality: `Alpha (68%)`
- Completeness: `Stable (82%)`
- LTS: ❌
-
-Features:
-
- Bundled Claude catalog: Covers Bundled Claude catalog across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
- Canonical anthropic refs: Covers Canonical anthropic refs across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
- Claude CLI compatibility: Covers Claude CLI compatibility across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
- Model picker availability: Covers Model picker availability across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
- Capability metadata: Covers Capability metadata across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
- Runtime selection: Covers Runtime selection across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
- Session continuity: Covers Session continuity across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
- MCP/tool bridge: Covers MCP/tool bridge across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
- Permission-mode mapping: Covers Permission-mode mapping across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
- Fallback prelude: Covers Fallback prelude across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
-
-Primary docs:
-
- `docs/providers/anthropic.md`
- `docs/gateway/config-agents.md`
- `docs/concepts/models.md`
- `docs/gateway/cli-backends.md`
-
-### 3. Request Transport and Turn Semantics
-
-Search anchors: API-key/OAuth transport, Messages payloads, Streaming decode, Usage and stop reasons, Abort/error handling, Tool-use blocks, Tool-result replay, Partial JSON recovery, Native thinking, Signed/redacted thinking replay.
-
-Category note: [Request Transport and Turn Semantics](direct-anthropic-messages-transport-and-streaming.md)
-
-Score decisions:
-
- Coverage: `Stable (82%)`
- Quality: `Beta (72%)`
- Completeness: `Stable (82%)`
- LTS: ❌
-
-Features:
-
- API-key/OAuth transport: Covers API-key/OAuth transport across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
- Messages payloads: Covers Messages payloads across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
- Streaming decode: Covers Streaming decode across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
- Usage and stop reasons: Covers Usage and stop reasons across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
- Abort/error handling: Covers Abort/error handling across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
- Tool-use blocks: Covers Tool-use blocks across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
- Tool-result replay: Covers Tool-result replay across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
- Partial JSON recovery: Covers Partial JSON recovery across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
- Native thinking: Covers Native thinking across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
- Signed/redacted thinking replay: Covers Signed/redacted thinking replay across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
-
-Primary docs:
-
- `docs/providers/anthropic.md`
- `docs/reference/prompt-caching.md`
- `docs/gateway/troubleshooting.md`
- `docs/gateway/cli-backends.md`
- `docs/concepts/model-providers.md`
-
-### 4. Prompt Cache and Context
-
-Search anchors: Cache retention, System-prompt cache boundary, 1M context, Fast mode/service tier, Cache diagnostics.
-
-Category note: [Prompt Cache and Context](prompt-caching-context-windows-and-request-knobs.md)
-
-Score decisions:
-
- Coverage: `Stable (82%)`
- Quality: `Beta (76%)`
- Completeness: `Stable (82%)`
- LTS: ❌
-
-Features:
-
- Cache retention: Covers Cache retention across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
- System-prompt cache boundary: Covers System-prompt cache boundary across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
- 1M context: Covers 1M context across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
- Fast mode/service tier: Covers Fast mode/service tier across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
- Cache diagnostics: Covers Cache diagnostics across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
-
-Primary docs:
-
- `docs/providers/anthropic.md`
- `docs/reference/prompt-caching.md`
- `docs/gateway/troubleshooting.md`
- `docs/gateway/heartbeat.md`
-
-### 5. Media Inputs
-
-Search anchors: Image input, PDF document input, Media model fallback, Image tool results.
-
-Category note: [Media Inputs](media-understanding-and-document-inputs.md)
-
-Score decisions:
-
- Coverage: `Beta (74%)`
- Quality: `Stable (82%)`
- Completeness: `Beta (74%)`
- LTS: ❌
-
-Features:
-
- Image input: Covers Image input across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
- PDF document input: Covers PDF document input across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
- Media model fallback: Covers Media model fallback across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
- Image tool results: Covers Image tool results across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
-
-Primary docs:
-
- `docs/providers/anthropic.md`
- `docs/gateway/config-agents.md`
-
-## Recommended scorecard interpretation
-
-Use this migrated score as the current inventory baseline. Refresh individual categories with live category-agent research before treating a high score as an LTS promotion gate.
-
-## Out of scope for this surface
-
- Redefining taxonomy category boundaries; taxonomy remains the source of truth for category identity, features, docs, and search anchors.
-
-## Audit provenance
-
- Score source:
-  `docs/kevinslin/maturity-scorecard/inventory/anthropic-provider-path/scores.yaml`.
- Taxonomy metadata source:
-  `.agents/skills/claw-score/taxonomy.yaml`.
- Archived evidence source:
-  `/Users/kevinlin/tmp/maturity/anthropic-provider-path`.
--- a/docs/maturity-scorecard/inventory/anthropic-provider-path/scores.yaml
+++ b/docs/maturity-scorecard/inventory/anthropic-provider-path/scores.yaml
@@ -1,28 +0,0 @@
-version: 1
-process_version: 3
-data:
-  - name: Provider Auth and Recovery
-    category_note: auth-onboarding-and-credential-profile-health.md
-    coverage: 78
-    quality: 70
-    completeness: 78
-  - name: Model and Runtime Selection
-    category_note: model-catalog-aliases-and-runtime-policy.md
-    coverage: 82
-    quality: 68
-    completeness: 82
-  - name: Request Transport and Turn Semantics
-    category_note: direct-anthropic-messages-transport-and-streaming.md
-    coverage: 82
-    quality: 72
-    completeness: 82
-  - name: Prompt Cache and Context
-    category_note: prompt-caching-context-windows-and-request-knobs.md
-    coverage: 82
-    quality: 76
-    completeness: 82
-  - name: Media Inputs
-    category_note: media-understanding-and-document-inputs.md
-    coverage: 74
-    quality: 82
-    completeness: 74
--- a/docs/maturity-scorecard/inventory/anthropic-provider-path/tool-calls-replay-and-native-thinking.md
+++ b/docs/maturity-scorecard/inventory/anthropic-provider-path/tool-calls-replay-and-native-thinking.md
@@ -1,136 +0,0 @@
---
-title: "Anthropic provider path - Tools and Thinking Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Anthropic provider path - Tools and Thinking Maturity Note
-
-## Summary
-
-Anthropic tool-call and native-thinking handling is broad: OpenClaw converts
-tools to Anthropic schemas, maps Claude Code tool names under OAuth, preserves
-signed/redacted thinking, sanitizes malformed replay, handles tool result media,
-and maps thinking levels to provider effort. Coverage is Stable because source
-and tests cover the key transformations and live replay. Quality is Beta
-because archived incidents show tool-call streaming and thinking replay have
-been frequent regression points.
-
-## Category Scope
-
-This category covers Anthropic-specific turn semantics inside agent runs:
-tool declarations, tool-use block conversion, tool-result conversion,
-tool-call id normalization, partial JSON handling, Claude Code tool-name
-mapping, native thinking blocks, redacted thinking, signed thinking replay,
-thinking effort/defaults, and turn validation for replay.
-
-## Features
-
- Tool-use blocks: Covers Tool-use blocks across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
- Tool-result replay: Covers Tool-result replay across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
- Partial JSON recovery: Covers Partial JSON recovery across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
- Native thinking: Covers Native thinking across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
- Signed/redacted thinking replay: Covers Signed/redacted thinking replay across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (82%)`
- Positive signals: Source handles native Anthropic thinking, signed/redacted thinking replay, tool schema conversion, tool-result grouping, image tool results, Claude Code tool aliases, partial-json scratch cleanup, and replay validation; tests cover signed thinking, tool-use replay, malformed tool args, and live tool replay.
- Negative signals: Several cases are covered by focused tests and env-gated live tests rather than always-on end-to-end Anthropic tool scenarios.
- Integration gaps: Full live tool-call scenario proof is limited by `ANTHROPIC_LIVE_TEST` and provider credentials.
-
-## Quality Score
-
- Score: `Beta (74%)`
- Gitcrawl reports: #60593 tracks recurring Anthropic streaming JSON parse errors; PR #68565 preserves signed/redacted thinking blocks; PR #70372 suppresses thinking narration leakage; PR #87346 merges consecutive assistant turns in validation; PR #61151 drops `partialJson` streaming artifacts from session history repair.
- Discrawl reports: Discord archive results include session corruption from truncated streaming tool calls, raw parse errors from Anthropic tool-call deltas, and downstream fixes for partial JSON recovery.
- Good qualities: The implementation preserves provider-signed thinking, strips synthetic reasoning from native Anthropic replay, coalesces consecutive tool results, coerces malformed tool-call args, and avoids persisting streaming scratch buffers.
- Bad qualities: Tool-call streaming is one of the highest-churn Anthropic edges because partial JSON, thinking signatures, provider-compatible endpoints, and replay validation interact.
- Excluded from quality: Unit, integration, e2e, live, and real runtime-flow test presence or absence; those are Coverage inputs only.
-
-## Completeness Score
-
- Score: `Stable (82%)`
- Surface instructions: evaluated against `references/completeness/anthropic-provider-path.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Tool-use blocks, Tool-result replay, Partial JSON recovery, Native thinking, Signed/redacted thinking replay.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Live proof is strongest for synthetic replay acceptance, not every real tool
-  invocation type.
- Tool-call and thinking behavior differs across direct Anthropic and
-  Anthropic-compatible providers, increasing maintenance pressure.
- Historical incidents show partial JSON and thinking display can corrupt
-  session history or leak confusing text when not carefully normalized.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` documents Claude 4.6 thinking defaults, `/think` overrides, cache behavior, and media/document handling.
- `/Users/kevinlin/code/openclaw/docs/gateway/cli-backends.md` documents Claude CLI permission mode, `/think` effort mapping, MCP bridge tools, and session behavior.
- `/Users/kevinlin/code/openclaw/docs/concepts/model-providers.md` covers model/provider behavior that feeds tool and thinking transport choices.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.ts` implements `convertTools`, `convertMessages`, `normalizeToolCallId`, signed/redacted thinking replay, `input_json_delta` accumulation, tool-use blocks, tool-result grouping, and thinking effort request construction.
- `/Users/kevinlin/code/openclaw/src/agents/anthropic-transport-stream.ts` implements transport-side tool argument coercion, unsafe integer preservation, reasoning content handling for compatible streams, and native Anthropic thinking replay behavior.
- `/Users/kevinlin/code/openclaw/src/agents/embedded-agent-runner/thinking.ts` wraps Anthropic streams with thinking recovery and blocks duplicate streaming retries after output begins.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/replay-policy.ts` defines Anthropic replay policy including strict tool ids, signature preservation, turn validation, and synthetic tool result allowance.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/src/agents/embedded-agent-runner.anthropic-tool-replay.live.test.ts` env-gates live Anthropic replay acceptance for regular text, omitted reasoning placeholder, and tool-call replay history.
- `/Users/kevinlin/code/openclaw/src/agents/anthropic-transport-stream.live.test.ts` covers live stream abort behavior adjacent to tool/thinking streaming.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/src/agents/anthropic-transport-stream.test.ts` covers unsafe integer tool-use deltas, OAuth tool-name remapping, signed thinking ingest, multiple signature deltas, reasoning_content compatible replay, malformed tool schemas, malformed tool-call args, empty tool results, image tool results, and thinking effort mapping.
- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.test.ts` covers signed thinking replay payload preservation.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/index.test.ts` covers replay policy and native reasoning output mode.
- `/Users/kevinlin/code/openclaw/extensions/anthropic/cli-shared.test.ts` covers Claude CLI thinking effort mapping.
-
-### Gitcrawl queries
-
-Query: `gitcrawl --json search issues -R openclaw/openclaw "anthropic thinking signature replay cache_control"`
-
-Results:
-
- Returned no direct results for that exact issue query.
-
-Query: `gitcrawl --json search prs -R openclaw/openclaw "Anthropic thinking"`
-
-Results:
-
- #70372 suppresses thinking narration leaking into channel messages for Anthropic/Bedrock.
- #68565 preserves signed/redacted thinking blocks.
- #87346 merges consecutive assistant turns in turn validation.
- #85381 emits thinking_delta events and handles redacted single-block shape.
-
-Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic tool call streaming JSON parse error"`
-
-Results:
-
- #60593 reports recurring Anthropic streaming JSON parse errors.
-
-### Discrawl queries
-
-Query: `discrawl search --limit 10 "Anthropic tool call streaming parse JSON"`
-
-Results:
-
- Returned archived user reports and PR notifications for truncated streaming tool-call corruption, raw parse errors, control-character sanitization, and tool-call argument recovery.
-
-Query: `discrawl search --limit 10 "Anthropic thinking signature cache control"`
-
-Results:
-
- Returned no direct results for that exact query.
--- a/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/background-task-ledger.md
+++ b/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/background-task-ledger.md
@@ -1,134 +0,0 @@
---
-title: "Automation: cron, hooks, tasks, polling - Background Tasks and Flows Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Automation: cron, hooks, tasks, polling - Background Tasks and Flows Maturity Note
-
-## Summary
-
-The background task ledger is well specified and implemented: it tracks detached ACP, subagent, cron, CLI, and media jobs; persists SQLite state; reconciles runtime backing; exposes CLI and Gateway methods; handles terminal notifications; and includes audit/maintenance. Quality is limited by restart/lost-task edge cases and operator confusion about the difference between task records and durable execution.
-
-## Category Scope
-
-Included in this category:
-
- Task list/show/cancel: Covers Task list/show/cancel across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
- Task notifications: Covers Task notifications across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
- Task audit and maintenance: Covers Task audit and maintenance across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
- Chat task board: Covers Chat task board across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
- Task pressure status: Covers Task pressure status across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
- Managed flows: Covers Managed flows across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
- Mirrored flows: Covers Mirrored flows across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
- openclaw tasks flow: Covers openclaw tasks flow across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
- Flow audit and maintenance: Covers Flow audit and maintenance across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
- Plugin managedFlows: Covers Plugin managedFlows across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
-
-## Features
-
- Task list/show/cancel: Covers Task list/show/cancel across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
- Task notifications: Covers Task notifications across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
- Task audit and maintenance: Covers Task audit and maintenance across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
- Chat task board: Covers Chat task board across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
- Task pressure status: Covers Task pressure status across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
- Managed flows: Covers Managed flows across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
- Mirrored flows: Covers Mirrored flows across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
- openclaw tasks flow: Covers openclaw tasks flow across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
- Flow audit and maintenance: Covers Flow audit and maintenance across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
- Plugin managedFlows: Covers Plugin managedFlows across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (84%)`
- Positive signals: The registry, store, maintenance, audit, reconcile, owner access, delivery, executor policy, status formatting, Gateway methods, and CLI commands all have focused tests.
- Negative signals: Coverage is weaker for full restart scenarios where the Gateway is killed during active tasks and then reconciles mixed ACP/subagent/cron/CLI backing state in a real process.
- Integration gaps: A restart-kill harness should create one task per runtime, force Gateway shutdown before drain, restart, and prove audit/maintenance outcomes, delivery notifications, and cleanup retention.
-
-## Quality Score
-
- Score: `Beta (77%)`
- Gitcrawl reports: PR #59719 tracks background exec liveness with CLI tasks; issue #42767 was closed after stale active tasks gained lost-state reconciliation; issue #66909 asked whether tasks resume after Gateway restart; issue #42246 requests batching/aggregation of outbound notifications for background tasks.
- Discrawl reports: Maintainer reports mention PR #78575 for stale task audit entries after forced/timed-out restarts, and user discussions advise treating OpenClaw tasks/transcripts as an operator audit trail rather than the sole durable work queue.
- Good qualities: The docs clearly state tasks are records, not schedulers; the registry persists to SQLite; reconciliation is runtime-aware; terminal rows retain for seven days; and task status output sanitizes internal runtime text.
- Bad qualities: The lived record shows task `lost` behavior, restart semantics, and notification volume remain hard for users to reason about.
- Excluded from quality: Test inventory and runtime proof depth; they are coverage inputs only.
-
-## Completeness Score
-
- Score: `Stable (84%)`
- Surface instructions: evaluated against `references/completeness/automation-cron-hooks-tasks-polling.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Task list/show/cancel, Task notifications, Task audit and maintenance, Chat task board, Task pressure status, Managed flows, Mirrored flows, openclaw tasks flow, Flow audit and maintenance, Plugin managedFlows.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Restart behavior should be summarized in CLI status and docs with one explicit table: what can resume, what becomes lost, and what TaskFlow adds.
- Notification aggregation or batching remains a user-facing need for busy background task deployments.
- Maintenance should remain strongly observable because `lost` is a normal recovery signal, not just an error.
-
-## Evidence
-
-### Docs
-
- `docs/automation/tasks.md` explains task sources, lifecycle, statuses, delivery, notify policies, audit, maintenance, chat `/tasks`, status integration, storage, and relation to cron/heartbeat/Task Flow.
- `docs/automation/index.md` positions tasks as the detached-work ledger rather than a scheduler.
- `docs/cli/tasks.md` documents CLI commands for listing, showing, cancelling, notifying, auditing, maintaining, and inspecting flows.
-
-### Source
-
- `src/tasks/task-registry.ts`, `src/tasks/task-registry.store.ts`, `src/tasks/task-registry.store.sqlite.ts`, `src/tasks/task-registry.reconcile.ts`, `src/tasks/task-registry.audit.ts`, `src/tasks/task-registry.maintenance.ts`, and `src/tasks/task-registry.types.ts` implement task persistence, reconciliation, audit, and maintenance.
- `src/tasks/task-executor.ts`, `src/tasks/task-executor-policy.ts`, `src/tasks/task-registry-delivery-runtime.ts`, and `src/tasks/task-status.ts` implement cancellation, notifications, delivery, and status formatting.
- `src/gateway/server-methods/tasks.ts` and `src/commands/tasks.ts` expose Gateway and CLI task operations.
-
-### Integration tests
-
- `src/gateway/server-methods/tasks.test.ts` covers Gateway methods for tasks.
- `test/scripts/openclaw-test-state.test.ts` exercises broader OpenClaw test state that includes runtime state management.
- No full process restart-kill e2e across all task runtime types was found.
-
-### Unit tests
-
- `src/tasks/task-registry.test.ts`, `src/tasks/task-registry.store.test.ts`, `src/tasks/task-registry.audit.test.ts`, `src/tasks/task-registry.maintenance.issue-60299.test.ts`, and `src/tasks/task-registry.process-state.test.ts` cover registry behavior.
- `src/tasks/task-executor.test.ts`, `src/tasks/task-executor-policy.test.ts`, `src/tasks/detached-task-runtime.test.ts`, and `src/tasks/task-status.test.ts` cover executor, notification, runtime, and status behavior.
- `src/commands/tasks.test.ts`, `src/commands/tasks-json.test.ts`, and `src/commands/tasks-audit-system.ts` cover CLI formatting and system audit behavior.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "tasks ledger stale lost maintenance cron subagent" --json --limit 5`
-
-Results:
-
- No hits for the exact query.
-
-Fallback query:
-
-`gitcrawl search openclaw/openclaw --query "background tasks lost" --json --limit 5`
-
-Results:
-
- PR #59719 fixes background exec liveness through CLI tasks.
- Issue #42767 discusses long-running tasks stuck as running; current main reconciles orphaned active tasks to `lost`.
- Issue #66909 asks whether tasks automatically resume after Gateway restart.
- Issue #42246 requests configurable batching/aggregation for outbound background task notifications.
-
-### Discrawl queries
-
-Query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "background tasks lost"`
-
-Results:
-
- Maintainers requested review of PR #78575, described as fixing stale task audit entries by marking running background tasks lost when forced/timed-out Gateway restart proceeds before drain completes.
- Maintainer/user discussions explain that background tasks can become `lost` and recommend treating OpenClaw tasks/transcripts as operator audit trail, with Postgres/Redis for a durable external work ledger when needed.
- Issue #66909 was closed after docs clarified that tasks persist tracking records but not execution state.
--- a/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/channel-polling-webhooks.md
+++ b/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/channel-polling-webhooks.md
@@ -1,158 +0,0 @@
---
-title: "Automation: cron, hooks, tasks, polling - Event Ingress Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Automation: cron, hooks, tasks, polling - Event Ingress Maturity Note
-
-## Summary
-
-Channel ingress polling and webhook monitors are mature for high-traffic channels such as Telegram and Zalo, with detailed docs, startup behavior, lease/session handling, watchdogs, and tests. The quality ceiling is limited by the lived record: polling stall detection, webhook/polling mutual exclusion, schema drift, startup blocking, and network failures are frequent operational hazards.
-
-## Category Scope
-
-Included in this category:
-
- Telegram long polling: Covers Telegram long polling across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
- Telegram webhook mode: Covers Telegram webhook mode across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
- Zalo polling/webhook mode: Covers Zalo polling/webhook mode across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
- Polling stall diagnostics: Covers Polling stall diagnostics across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
- iMessage watch fallback: Covers iMessage watch fallback across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
- Gmail setup wizard: Covers Gmail setup wizard across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
- Watcher start/serve: Covers Watcher start/serve across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
- Tailscale/public routing: Covers Tailscale/public routing across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
- Push token validation: Covers Push token validation across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
- Gmail event routing: Covers Gmail event routing across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
- POST /hooks/wake: Covers POST /hooks/wake across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
- POST /hooks/agent: Covers POST /hooks/agent across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
- Mapped hooks: Covers Mapped hooks across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
- Hook auth policy: Covers Hook auth policy across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
- Async dispatch: Covers Async dispatch across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
-
-## Features
-
- Telegram long polling: Covers Telegram long polling across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
- Telegram webhook mode: Covers Telegram webhook mode across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
- Zalo polling/webhook mode: Covers Zalo polling/webhook mode across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
- Polling stall diagnostics: Covers Polling stall diagnostics across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
- iMessage watch fallback: Covers iMessage watch fallback across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
- Gmail setup wizard: Covers Gmail setup wizard across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
- Watcher start/serve: Covers Watcher start/serve across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
- Tailscale/public routing: Covers Tailscale/public routing across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
- Push token validation: Covers Push token validation across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
- Gmail event routing: Covers Gmail event routing across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
- POST /hooks/wake: Covers POST /hooks/wake across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
- POST /hooks/agent: Covers POST /hooks/agent across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
- Mapped hooks: Covers Mapped hooks across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
- Hook auth policy: Covers Hook auth policy across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
- Async dispatch: Covers Async dispatch across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Beta (76%)`
- Positive signals: Telegram and Zalo have focused coverage for polling status, transport state, liveness, leases, sessions, webhook status, webhook handlers, lifecycle, and polling media replies.
- Negative signals: Long polling depends on real network behavior, host sleep, proxy/DNS/TLS state, Telegram/Zalo API behavior, and gateway event-loop load. Local tests cannot cover every failure mode.
- Integration gaps: Add a network-fault harness for Telegram/Zalo monitors that simulates long-poll timeout, host sleep, active webhook conflict, schema config drift, and restart after event-loop stalls.
-
-## Quality Score
-
- Score: `Beta (70%)`
- Gitcrawl reports: PR #73884 fixes false Telegram polling-stall restarts; query fallback found issue #86535 where Telegram polling stall detector treats sleep as active `getUpdates` stall.
- Discrawl reports: Review comments on PRs #41153, #70579, and #57737 focus on polling stall watchdog thresholds and schema metadata drift; user reports mention startup sequentially blocking on first Telegram poll under event-loop load.
- Good qualities: Telegram has a configurable `pollingStallThresholdMs`, lease protection for one active poller per token, transport-dirty restart behavior, and docs for webhook vs long-polling. Zalo documents and tests polling/webhook mutual exclusion and media reply behavior.
- Bad qualities: Polling health is sensitive to event-loop stalls, sleep, schema drift, and network failures. The real-world bug record is active and channel-specific.
- Excluded from quality: Test inventory and runtime proof depth; they are coverage inputs only.
-
-## Completeness Score
-
- Score: `Beta (76%)`
- Surface instructions: evaluated against `references/completeness/automation-cron-hooks-tasks-polling.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Telegram long polling, Telegram webhook mode, Zalo polling/webhook mode, Polling stall diagnostics, iMessage watch fallback, Gmail setup wizard, Watcher start/serve, Tailscale/public routing, Push token validation, Gmail event routing, POST /hooks/wake, POST /hooks/agent, Mapped hooks, Hook auth policy, Async dispatch.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Polling watchdogs should distinguish host sleep/event-loop pause from active API request stalls.
- Channel config schema generation must stay tied to runtime fields like `pollingStallThresholdMs`.
- Startup should avoid serial channel polling dependencies that can block the whole gateway under event-loop load.
-
-## Evidence
-
-### Docs
-
- `docs/channels/telegram.md` documents long polling as default, webhook mode, `pollingStallThresholdMs`, getUpdates conflicts, liveness troubleshooting, and doctor/status probes.
- `docs/channels/zalo.md` documents long-polling by default, webhook mode, and mutual exclusion between polling and webhook modes.
- `docs/channels/troubleshooting.md` includes polling stall diagnostics.
- `docs/channels/imessage-from-bluebubbles.md` notes iMessage watch behavior with a polling fallback.
-
-### Source
-
- `extensions/telegram/src/monitor.ts`, `extensions/telegram/src/monitor-polling.runtime.ts`, `extensions/telegram/src/polling-liveness.ts`, `extensions/telegram/src/polling-lease.ts`, `extensions/telegram/src/polling-session.ts`, `extensions/telegram/src/polling-status.ts`, and `extensions/telegram/src/webhook-status.ts` implement Telegram polling/webhook monitoring.
- `extensions/zalo/src/monitor.ts`, `extensions/zalo/src/monitor.webhook.ts`, `extensions/zalo/src/monitor-durable.ts`, and `extensions/zalo/src/outbound-media.ts` implement Zalo polling/webhook behavior.
- `extensions/imessage/src/approval-reaction-poller.ts` implements an iMessage approval polling path.
-
-### Integration tests
-
- `extensions/zalo/src/monitor.webhook-e2e.test.ts` covers Zalo webhook behavior.
- `extensions/zalo/src/monitor.polling.media-reply.test.ts` covers Zalo polling with media replies.
- Telegram monitor tests are mostly focused runtime tests rather than live API e2e.
-
-### Unit tests
-
- `extensions/telegram/src/polling-status.test.ts`, `extensions/telegram/src/polling-transport-state.test.ts`, `extensions/telegram/src/polling-session.test.ts`, `extensions/telegram/src/polling-liveness.test.ts`, `extensions/telegram/src/polling-lease.test.ts`, and `extensions/telegram/src/webhook-status.test.ts` cover Telegram monitor pieces.
- `extensions/zalo/src/monitor.lifecycle.test.ts`, `extensions/zalo/src/monitor.webhook.test.ts`, `extensions/zalo/src/monitor.polling.media-reply.test.ts`, and `extensions/zalo/src/monitor.image.polling.test.ts` cover Zalo behavior.
- `extensions/imessage/src/approval-reaction-poller.test.ts` covers iMessage reaction polling.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "channel polling webhook getUpdates polling stall Zalo Telegram" --json --limit 5`
-
-Results:
-
- No hits for the exact query.
-
-Fallback query:
-
-`gitcrawl search openclaw/openclaw --query "pollingStallThresholdMs" --json --limit 5`
-
-Results:
-
- PR #73884 fixes Telegram false polling-stall restarts.
-
-Fallback query:
-
-`gitcrawl search openclaw/openclaw --query "poll loop" --json --limit 5`
-
-Results:
-
- Issue #86535 reports Telegram polling stall detector treating host sleep/event-loop pause as an active `getUpdates` stall.
-
-### Discrawl queries
-
-Query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "pollingStallThresholdMs"`
-
-Results:
-
- PR #41153 closure says current main hardens Telegram polling-stall detection with a 120s default and configurable per-account override.
- PR #70579 review warns that schema validation drift could reject tuned `pollingStallThresholdMs` configs.
- PR #57737 review warns bundled schema metadata must be regenerated when adding `pollingStallThresholdMs`.
-
-Fallback query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "poll loop"`
-
-Results:
-
- Discord user report says Telegram account startup can block sequentially on first poll when event-loop load is high, causing timeouts and multi-minute stalls.
--- a/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/cron-delivery-alerts.md
+++ b/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/cron-delivery-alerts.md
@@ -1,105 +0,0 @@
---
-title: "Automation: cron, hooks, tasks, polling - Cron Delivery and Failure Alerts Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Automation: cron, hooks, tasks, polling - Cron Delivery and Failure Alerts Maturity Note
-
-## Summary
-
-Cron delivery is feature-rich: jobs can announce to channels, post webhook payloads, suppress runner fallback delivery, preserve last/current chat routing, mirror direct delivery into transcripts, suppress stale interim text, prefer descendant subagent output, and notify failure destinations. The implementation is powerful but complex enough that archive evidence still shows privacy and routing hazards around failure alerts and webhook mode.
-
-## Category Scope
-
-This category covers cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, failure destinations, skipped-run alerts, message-tool delivery awareness, descendant subagent delivery preference, and cleanup after isolated runs.
-
-## Features
-
- Chat announce delivery: Covers Chat announce delivery across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
- Webhook delivery: Covers Webhook delivery across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
- Failure destinations: Covers Failure destinations across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
- Skipped-run alerts: Covers Skipped-run alerts across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
- Delivery previews: Covers Delivery previews across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (82%)`
- Positive signals: Delivery has focused coverage for direct delivery, delivery target resolution, failure alerts, delivery plans/previews, double-announce prevention, named-agent delivery, outbound runtime dispatch, stale interim suppression, and delivery status persistence.
- Negative signals: Coverage is mostly simulated and component-level. Cross-channel live delivery, webhook delivery failure privacy, and descendant subagent delivery behavior are not proven by one broad e2e scenario.
- Integration gaps: A live matrix should cover announce, webhook, no-deliver, explicit failure destinations, stale target rejection, skipped provider-preflight alerts, and descendant subagent final-output delivery across at least one chat channel and one webhook receiver.
-
-## Quality Score
-
- Score: `Beta (74%)`
- Gitcrawl reports: PR #85394 references failure-alert schema and cron-tool decomposition. The query found ongoing work near skipped-run alert behavior.
- Discrawl reports: A review comment on PR #31059 warns that webhook-mode failure alerts without `to` could fall through to announce delivery and leak error details to chat targets.
- Good qualities: Delivery planning is explicit, provider selector prefixes are validated, direct delivery uses idempotency keys and transient retry loops, stale deliveries can be skipped, and isolated cleanup closes tracked browser/MCP resources best-effort without masking the run result.
- Bad qualities: Delivery has a large state space across channel routes, direct sends, fallback announcement, transcript mirroring, webhook mode, failure alerts, and subagent follow-up. The archive privacy warning shows mode boundaries can be easy to get wrong.
- Excluded from quality: Test inventory and runtime proof depth; they are coverage inputs only.
-
-## Completeness Score
-
- Score: `Stable (82%)`
- Surface instructions: evaluated against `references/completeness/automation-cron-hooks-tasks-polling.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Chat announce delivery, Webhook delivery, Failure destinations, Skipped-run alerts, Delivery previews.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Failure-alert mode should fail closed when destination fields are incomplete, with no fallback to a broader recipient than the operator selected.
- Delivery documentation should include a compact routing table for `last`, explicit channel targets, provider-prefixed targets, webhook mode, and failure destinations.
- A small local webhook fixture would make delivery regressions easier to prove without real channel credentials.
-
-## Evidence
-
-### Docs
-
- `docs/automation/cron-jobs.md` documents delivery modes `announce`, `webhook`, and `none`; explicit channel targets; channel prefix validation; message-tool interaction; output language; failure destinations; skipped alerts; and troubleshooting for no delivery.
- `docs/automation/tasks.md` documents cleanup and completion behavior for cron tasks and descendant subagent output preference.
- `docs/channels/discord.md` includes channel-specific cron delivery behavior for Discord text announcements.
-
-### Source
-
- `src/cron/delivery.ts`, `src/cron/delivery-plan.ts`, `src/cron/delivery-preview.ts`, `src/cron/delivery-context.ts`, `src/cron/delivery-field-schemas.ts`, and `src/cron/webhook-url.ts` implement delivery validation, planning, preview, and URL handling.
- `src/cron/isolated-agent/delivery-dispatch.ts`, `src/cron/isolated-agent/delivery-target.ts`, `src/cron/isolated-agent/delivery-outbound.runtime.ts`, and `src/cron/isolated-agent/subagent-followup.ts` implement isolated direct delivery, idempotency, retries, transcript mirroring, and descendant-output preference.
- `src/cron/service/initial-delivery.ts`, `src/cron/service/task-ledger.ts`, and `src/cron/service/timer.ts` feed initial delivery context, task state, and failure alerts from the scheduler.
-
-### Integration tests
-
- `src/cron/isolated-agent.direct-delivery-core-channels.test.ts` covers direct delivery across core channel abstractions.
- `src/cron/isolated-agent/delivery-dispatch.named-agent.test.ts` and `src/cron/isolated-agent/delivery-dispatch.double-announce.test.ts` exercise integrated isolated delivery dispatch cases.
- `src/cron/isolated-agent.delivery-awareness.test.ts` covers awareness of agent-sent messages versus fallback delivery.
-
-### Unit tests
-
- `src/cron/delivery.test.ts`, `src/cron/delivery-plan.test.ts`, `src/cron/delivery-preview.test.ts`, `src/cron/delivery.failure-notify.test.ts`, and `src/cron/delivery-context.test.ts` cover planning and alert logic.
- `src/cron/isolated-agent/delivery-target.test.ts`, `src/cron/isolated-agent/channel-output-policy.test.ts`, and `src/cron/isolated-agent/subagent-followup.test.ts` cover target resolution, channel output rules, and descendant follow-up.
- `src/cron/service.delivery-plan.test.ts`, `src/cron/service.failure-alert.test.ts`, and `src/cron/service.persists-delivered-status.test.ts` cover service-level delivery behavior.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "cron delivery failure alerts announce webhook skipped" --json --limit 5`
-
-Results:
-
- PR #85394, `refactor(cron-tool): decompose into per-action tools (WOR-317)`, includes failure-alert schema and skipped-run alert fields, showing this surface is actively evolving.
-
-### Discrawl queries
-
-Query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "cron delivery failure alerts announce webhook skipped"`
-
-Results:
-
- Review comment on PR #31059 warns that `sendCronFailureAlert` in webhook mode without `to` could fall through to announce delivery and leak failure text to chat targets.
--- a/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/cron-execution-diagnostics.md
+++ b/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/cron-execution-diagnostics.md
@@ -1,113 +0,0 @@
---
-title: "Automation: cron, hooks, tasks, polling - Cron Runs and Diagnostics Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Automation: cron, hooks, tasks, polling - Cron Runs and Diagnostics Maturity Note
-
-## Summary
-
-Cron execution has a mature service and isolated-agent implementation: it records run history, maps run outcomes, supports manual and due-only runs, enforces model selection, checks local provider reachability, records diagnostics, and creates task-ledger entries. Coverage is broad but mostly focused and simulated. Quality is limited by live reports around model preflight/fallback semantics and long-running deterministic jobs.
-
-## Category Scope
-
-This category covers scheduler dispatch, timer arming, manual/due runs, isolated agent execution, session identity, model selection, fallback policy, provider preflight, run timeouts, run diagnostics, run history, and task-ledger creation. It excludes delivery/alerts, which are scored separately.
-
-## Features
-
- Manual cron runs: Covers Manual cron runs across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
- Isolated cron execution: Covers Isolated cron execution across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
- Model/provider preflight: Covers Model/provider preflight across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
- Run history: Covers Run history across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
- Timeout and denial diagnostics: Covers Timeout and denial diagnostics across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (83%)`
- Positive signals: The `src/cron/service.*.test.ts` and `src/cron/isolated-agent/*.test.ts` suites cover dispatch, timer rearming, restart catchup, model overrides, fallback policy, local model preflight, run diagnostics, timeout policy, meta-error status propagation, session-key isolation, and task-ledger creation.
- Negative signals: The component has less live/e2e proof for real provider stalls, real Gateway restarts during active isolated runs, and long-running deterministic process supervision through the embedded tool path.
- Integration gaps: A live scenario should prove a due isolated run through a real Gateway, a model-preflight skip, a timeout, a manual `cron.run --wait`, and durable `cron.runs` recovery after restart.
-
-## Quality Score
-
- Score: `Beta (73%)`
- Gitcrawl reports: Issue #79329 reports cron model preflight skipping an entire run when a local primary is unreachable instead of trying configured cloud fallbacks.
- Discrawl reports: A maintainer/user thread on May 17 describes long-running deterministic cron work where Codex-native shell ownership can end before OpenClaw receives command output; the recommended pattern is embedded OpenClaw tools with `exec` plus `process` polling and an adequate `timeoutSeconds`.
- Good qualities: Execution paths separate main-session system events from isolated/current/custom `agentTurn` jobs, enforce model allowlists before runner start, persist run logs and diagnostics, and classify skipped/error/timeout states instead of treating every assistant reply as success.
- Bad qualities: Provider preflight and long-running process ownership remain operator-sensitive. The runtime has many guardrails, but the operational model still requires users to choose the right agent/tool execution path for deterministic shell work.
- Excluded from quality: Test inventory and runtime proof depth; they are coverage inputs only.
-
-## Completeness Score
-
- Score: `Stable (83%)`
- Surface instructions: evaluated against `references/completeness/automation-cron-hooks-tasks-polling.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Manual cron runs, Isolated cron execution, Model/provider preflight, Run history, Timeout and denial diagnostics.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Model preflight should align with fallback expectations or make strict-primary skip behavior unmistakable to operators.
- Long-running deterministic work needs a clearer first-class pattern or stronger docs that distinguish agent-turn orchestration from deterministic execution.
- A restart-resume proof for active isolated cron runs would reduce uncertainty around timeout and task reconciliation behavior.
-
-## Evidence
-
-### Docs
-
- `docs/automation/cron-jobs.md` documents execution styles, isolated session behavior, model/thinking/fallback precedence, local-provider preflight, timeout handling, run history, and manual `cron run --wait`.
- `.mem/main/ref/cron-run-diagnostics.md` and `.mem/main/pkg/claw/flow/cron-run-diagnostics.md` describe diagnostic expectations and recent repair context for cron run failures.
- `docs/automation/tasks.md` states that every cron execution creates a background task and explains cron-specific lost-state reconciliation.
-
-### Source
-
- `src/cron/service/timer.ts`, `src/cron/service/ops.ts`, `src/cron/service/timeout-policy.ts`, and `src/cron/service/task-ledger.ts` own dispatch, manual runs, timeout policy, and task creation.
- `src/cron/run-log.ts`, `src/cron/run-diagnostics.ts`, and `src/cron/retry-hint.ts` implement durable run history and diagnostic summaries.
- `src/cron/isolated-agent/run.ts`, `src/cron/isolated-agent/model-selection.ts`, `src/cron/isolated-agent/model-preflight.runtime.ts`, `src/cron/isolated-agent/run-fallback-policy.ts`, and `src/cron/isolated-agent/session-key.ts` implement isolated agent execution, model choice, preflight, fallbacks, and session identity.
-
-### Integration tests
-
- `src/cron/cron-protocol-conformance.test.ts` covers protocol-level cron behavior.
- `src/cron/isolated-agent/model-preflight.runtime.test.ts` exercises runtime provider-preflight behavior rather than only pure functions.
- `src/cron/isolated-agent/run.runtime-plugins.test.ts` exercises runtime plugin integration during cron runs.
-
-### Unit tests
-
- `src/cron/service.restart-catchup.test.ts`, `src/cron/service.rearm-timer-when-running.test.ts`, `src/cron/service.prevents-duplicate-timers.test.ts`, `src/cron/service.every-jobs-fire.test.ts`, and `src/cron/service/timeout-policy.test.ts` cover scheduler execution mechanics.
- `src/cron/isolated-agent/run.cron-model-override.test.ts`, `src/cron/isolated-agent/run.payload-fallbacks.test.ts`, `src/cron/isolated-agent/run.meta-error-status.test.ts`, `src/cron/isolated-agent/run.interim-retry.test.ts`, `src/cron/isolated-agent/run.live-session-model-switch.test.ts`, and `src/cron/isolated-agent/run.tools-allow.test.ts` cover isolated run behavior.
- `src/cron/run-log.test.ts`, `src/cron/run-log.error-reason.test.ts`, and `src/cron/run-diagnostics.test.ts` cover run history and diagnostics.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "cron timeout diagnostics model preflight run history" --json --limit 5`
-
-Results:
-
- Issue #79329, `Cron model preflight skips entire run when local primary is unreachable, ignoring configured cloud fallbacks [AI]`, is the only hit and directly lowers quality for model-preflight semantics.
-
-### Discrawl queries
-
-Query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "cron timeout diagnostics model preflight run history"`
-
-Results:
-
- No matching Discord messages returned for this exact query.
-
-Fallback query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "poll loop"`
-
-Results:
-
- May 17 maintainer/user discussion recommends embedded OpenClaw execution with `exec` plus `process` for long-running deterministic cron work, and warns that Codex-native shell ownership can end before the final result is observed.
--- a/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/cron-job-lifecycle.md
+++ b/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/cron-job-lifecycle.md
@@ -1,139 +0,0 @@
---
-title: "Automation: cron, hooks, tasks, polling - Cron Jobs Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Automation: cron, hooks, tasks, polling - Cron Jobs Maturity Note
-
-## Summary
-
-Cron job authoring and schedule management are documented and implemented across CLI, Gateway RPC, and agent-tool entrypoints. The scheduler supports one-shot, interval, and cron-expression jobs with timezone and stagger controls, plus durable job and runtime state files. The main risk is not basic feature absence; it is schedule-state edge behavior, where the archives show recurring reports around unresolved or stale `nextRunAtMs`, manual-run deletion, and long-uptime scheduler behavior.
-
-## Category Scope
-
-Included in this category:
-
- Create/edit/remove jobs: Covers Create/edit/remove jobs across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
- Schedule types: Covers Schedule types across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
- Timezone and stagger: Covers Timezone and stagger across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
- Cron RPCs: Covers Cron RPCs across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
- Agent cron tool: Covers Agent cron tool across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
- Manual cron runs: Covers Manual cron runs across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
- Isolated cron execution: Covers Isolated cron execution across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
- Model/provider preflight: Covers Model/provider preflight across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
- Run history: Covers Run history across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
- Timeout and denial diagnostics: Covers Timeout and denial diagnostics across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
- Chat announce delivery: Covers Chat announce delivery across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
- Webhook delivery: Covers Webhook delivery across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
- Failure destinations: Covers Failure destinations across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
- Skipped-run alerts: Covers Skipped-run alerts across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
- Delivery previews: Covers Delivery previews across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
-
-## Features
-
- Create/edit/remove jobs: Covers Create/edit/remove jobs across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
- Schedule types: Covers Schedule types across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
- Timezone and stagger: Covers Timezone and stagger across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
- Cron RPCs: Covers Cron RPCs across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
- Agent cron tool: Covers Agent cron tool across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
- Manual cron runs: Covers Manual cron runs across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
- Isolated cron execution: Covers Isolated cron execution across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
- Model/provider preflight: Covers Model/provider preflight across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
- Run history: Covers Run history across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
- Timeout and denial diagnostics: Covers Timeout and denial diagnostics across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
- Chat announce delivery: Covers Chat announce delivery across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
- Webhook delivery: Covers Webhook delivery across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
- Failure destinations: Covers Failure destinations across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
- Skipped-run alerts: Covers Skipped-run alerts across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
- Delivery previews: Covers Delivery previews across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (85%)`
- Positive signals: CLI, Gateway RPC, and agent-tool paths share normalization through `src/cron/normalize.ts` and `src/gateway/server-methods/cron.ts`; targeted unit coverage exists for schedule parsing, schedule options, store migration, `nextRunAtMs` repair, top-of-hour staggering, list pagination, and manual runs.
- Negative signals: Coverage is strongest at unit and focused service level; the evidence found fewer end-to-end proofs that create/edit/run flows survive long-running gateway uptime, hand-edited stores, and UI/CLI/agent-tool parity in one scenario.
- Integration gaps: No single live/e2e scenario was found that starts a real Gateway, creates all three schedule types through different user surfaces, restarts the Gateway, and proves the same persisted jobs still compute the correct next run.
-
-## Quality Score
-
- Score: `Beta (78%)`
- Gitcrawl reports: Open threads include PR #52109 for high-frequency `every` schedule refire gaps, issue #81691 for exact-second future-slot repair, PR #75970 for malformed persisted jobs, issue #83538 for manual-run `deleteAfterRun` data loss, and issue #73166 for long-uptime scheduler stoppage.
- Discrawl reports: Discord archive shows operator confusion around main-session cron rows that appeared enabled but had `lastError: "disabled"`, plus review discussion on unresolved next-run refire loops and schedule errors leaving timers idle.
- Good qualities: The source has a clear service boundary, schema validation before service mutation, a job/state file split, timestamp validation, schedule identity tracking, and explicit docs for timezone, day-of-month/day-of-week OR behavior, stagger, and state-file handling.
- Bad qualities: The lived bug record shows schedule-state repair remains subtle, and manual-run/delete semantics can surprise operators. Quality is limited by these operational edge cases, not by the test inventory.
- Excluded from quality: Test inventory and runtime proof depth; they are coverage inputs only.
-
-## Completeness Score
-
- Score: `Stable (85%)`
- Surface instructions: evaluated against `references/completeness/automation-cron-hooks-tasks-polling.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Create/edit/remove jobs, Schedule types, Timezone and stagger, Cron RPCs, Agent cron tool, Manual cron runs, Isolated cron execution, Model/provider preflight, Run history, Timeout and denial diagnostics, Chat announce delivery, Webhook delivery, Failure destinations, Skipped-run alerts, Delivery previews.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- A single operator-facing scenario proof should cover creating `at`, `every`, and `cron` jobs through CLI/Gateway/agent-tool paths, editing non-schedule metadata, preserving/repairing state, restarting the Gateway, and manually running a due and non-due job.
- Manual run semantics need sharper operator visibility around `deleteAfterRun` and no-op due checks.
- Schedule-state repair should remain a regression focus because multiple archive entries cluster around `nextRunAtMs` corruption, zero values, unresolved schedules, and exact-second slot identity.
-
-## Evidence
-
-### Docs
-
- `docs/automation/cron-jobs.md` documents job persistence at `~/.openclaw/cron/jobs.json`, runtime state in `jobs-state.json`, schedule kinds, timezone, stagger, manual run, run history, and management commands.
- `docs/cli/cron.md` provides the CLI reference for `openclaw cron add`, `list`, `get`, `show`, `edit`, `run`, `runs`, and `remove`.
- `docs/gateway/protocol.md` lists automation RPCs including `cron.get`, `cron.list`, `cron.status`, `cron.add`, `cron.update`, `cron.remove`, `cron.run`, and `cron.runs`.
-
-### Source
-
- `src/gateway/server-methods/cron.ts` implements validated Gateway methods for `cron.list`, `cron.add`, `cron.update`, `cron.remove`, `cron.run`, and `cron.runs`.
- `src/cli/cron-cli/register.cron-add.ts`, `src/cli/cron-cli/register.cron-edit.ts`, `src/cli/cron-cli/schedule-options.ts`, and `src/cli/cron-cli/shared.ts` implement CLI flag parsing, schedule construction, and display.
- `src/agents/tools/cron-tool.ts` exposes the agent cron tool, recovers flat params into job objects, handles self-scope introspection, and calls Gateway cron methods.
- `src/cron/normalize.ts`, `src/cron/schedule.ts`, `src/cron/stagger.ts`, `src/cron/service/jobs.ts`, `src/cron/service/ops.ts`, and `src/cron/service/store.ts` normalize, compute, persist, and mutate jobs.
-
-### Integration tests
-
- `test/gateway.multi.e2e.test.ts` is broad gateway e2e coverage but not specific to all cron schedule-management paths.
- `src/gateway/tools-invoke-http.cron-regression.test.ts` exercises cron through Gateway tool invocation.
- `src/cron/cron-protocol-conformance.test.ts` and `src/cron/cron-protocol-schema.test.ts` exercise protocol shape and compatibility.
-
-### Unit tests
-
- `src/cron/schedule.test.ts`, `src/cron/parse.test.ts`, `src/cron/normalize.test.ts`, `src/cron/stagger.test.ts`, and `src/cron/validate-timestamp.ts` cover schedule parsing and normalization.
- `src/cron/service.jobs.test.ts`, `src/cron/service.jobs.top-of-hour-stagger.test.ts`, `src/cron/service.issue-regressions.test.ts`, `src/cron/service.store-load-invalid-main-job.test.ts`, and `src/cron/service/ops.test.ts` cover job creation, mutation, repair, and store behavior.
- `src/cli/cron-cli/register.cron-simple.test.ts`, `src/cli/cron-cli/register.cron-edit.test.ts`, `src/cli/cron-cli/shared.test.ts`, and `src/agents/tools/cron-tool.schema.test.ts` cover CLI and agent-tool surfaces.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "cron add schedule nextRunAtMs" --json --limit 5`
-
-Results:
-
- PR #52109, `fix(cron): apply MIN_REFIRE_GAP_MS to every-schedule jobs`, reports high-frequency `every` schedule refire risk.
- Issue #81691, `Cron future-slot repair misclassifies exact second cron slots`, reports exact-second state repair trouble.
- PR #75970, `fix(cron): ignore malformed persisted jobs`, points to malformed persisted job handling.
- Issue #83538, `cron: deleteAfterRun fires on manual run even when no run executes`, reports manual-run data-loss risk.
- Issue #73166, `Cron scheduler silently stops firing after ~2.5 days of gateway uptime`, reports long-uptime scheduling failure.
-
-### Discrawl queries
-
-Query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "cron add schedule nextRunAtMs"`
-
-Results:
-
- PR #66083 discussion: unresolved `computeJobNextRunAtMs` results previously caused refire loops; current fix adds maintenance wake behavior for enabled jobs without a next run.
- PR #63507 discussion: `nextRunAtMs=0` on non-schedule edits required repair.
- User thread `Triggering main to do something in a cron.` includes a concrete main-session cron job that did not fire and showed `lastError: "disabled"`.
- Review comment on PR #52619 warns that schedule computation errors could leave an enabled job with no armed timer.
--- a/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/gmail-pubsub-watchers.md
+++ b/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/gmail-pubsub-watchers.md
@@ -1,123 +0,0 @@
---
-title: "Automation: cron, hooks, tasks, polling - Gmail Pub/Sub Watchers Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Automation: cron, hooks, tasks, polling - Gmail Pub/Sub Watchers Maturity Note
-
-## Summary
-
-Gmail PubSub integration is documented and has focused setup/runtime code, but it is one of the least mature automation components in this surface. It depends on Google Pub/Sub, `gog`, OpenClaw hooks, public HTTPS routing, and often Tailscale Funnel path mapping. Archive evidence shows real operator confusion and an open issue where Pub/Sub reaches the topic but OpenClaw does not process pushes in Docker plus Funnel.
-
-## Category Scope
-
-This category covers `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, Tailscale/Funnel routing, Gmail model/thinking overrides, push token handling, body inclusion limits, safe external-content handling, and routing Gmail events into mapped hook isolated runs.
-
-## Features
-
- Gmail setup wizard: Covers Gmail setup wizard across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
- Watcher start/serve: Covers Watcher start/serve across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
- Tailscale/public routing: Covers Tailscale/public routing across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
- Push token validation: Covers Push token validation across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
- Gmail event routing: Covers Gmail event routing across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Alpha (65%)`
- Positive signals: Focused tests cover Gmail config resolution, setup helpers, watcher lifecycle, stale cancellation, process replacement, and CLI setup behavior. Docs explain both wizard and manual setup.
- Negative signals: The most important behavior is live external ingress through Google Pub/Sub and a public HTTPS endpoint, and the local evidence does not prove the full external path under Docker/Funnel/reverse-proxy variants.
- Integration gaps: Missing repeatable live or fixture-backed proof for Pub/Sub push payload -> `gog watch serve` -> OpenClaw hook -> isolated agent run, including token/path failures and renewal.
-
-## Quality Score
-
- Score: `Alpha (58%)`
- Gitcrawl reports: Issue #77093 reports that Gmail Pub/Sub pushes reach the topic but OpenClaw's webhooks Gmail endpoint does not process real pushes in Docker plus Tailscale Funnel setup.
- Discrawl reports: Discord Gmail integration thread repeatedly drills into Tailscale Funnel path stripping, `serve.path`, `tailscale.path`, `tailscale.target`, and push token config, indicating setup is easy to misalign.
- Good qualities: Runtime config builder validates required token/account/topic/push-token fields, command builders keep sensitive flags known, watcher lifecycle guards stale cancellation and re-entry, and docs recommend wizard setup.
- Bad qualities: The component has many moving parts outside the Gateway process. Path/token/routing mistakes produce hard-to-debug failures, and the archive shows that the documented happy path is not enough for common Docker/Funnel deployments.
- Excluded from quality: Test inventory and runtime proof depth; they are coverage inputs only.
-
-## Completeness Score
-
- Score: `Alpha (65%)`
- Surface instructions: evaluated against `references/completeness/automation-cron-hooks-tasks-polling.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Gmail setup wizard, Watcher start/serve, Tailscale/public routing, Push token validation, Gmail event routing.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Add a local integration harness that simulates Pub/Sub push envelopes and proves the full watcher-to-hook path.
- Docs should include side-by-side path examples for Tailscale Funnel prefix stripping versus direct reverse proxy.
- Setup diagnostics should check `hooks.gmail.serve.path`, `hooks.gmail.tailscale.path`, `hooks.gmail.tailscale.target`, public endpoint reachability, and hook token/push token alignment.
-
-## Evidence
-
-### Docs
-
- `docs/automation/cron-jobs.md#gmail-pubsub-integration` documents wizard setup, manual Google project/topic setup, watcher auto-start, and Gmail model/thinking overrides.
- `docs/automation/gmail-pubsub.md` redirects to the scheduled-tasks Gmail PubSub section.
- `docs/cli/webhooks.md` documents the `openclaw webhooks gmail setup` command surface.
-
-### Source
-
- `src/hooks/gmail.ts` builds Gmail hook runtime config, token generation, hook URLs, `gog` watch start/serve args, Tailscale config, and topic parsing.
- `src/hooks/gmail-watcher.ts`, `src/hooks/gmail-watcher-lifecycle.ts`, and `src/hooks/gmail-watcher-errors.ts` manage watcher process lifecycle and errors.
- `src/hooks/gmail-setup-utils.ts`, `src/hooks/gmail-ops.ts`, and `src/cli/webhooks-cli.ts` implement setup and CLI behavior.
- `src/gateway/hooks-mapping.ts` defines the Gmail preset mapping, and `src/agents/model-selection-shared.ts` resolves Gmail hook model overrides.
-
-### Integration tests
-
- No full live Google Pub/Sub integration test was found in the audited tree.
- `src/hooks/gmail-watcher-lifecycle.test.ts` and `src/hooks/gmail-watcher.test.ts` are closest to integration-style process lifecycle tests for the watcher.
-
-### Unit tests
-
- `src/hooks/gmail.test.ts`, `src/hooks/gmail-setup-utils.test.ts`, `src/hooks/gmail-watcher.test.ts`, and `src/hooks/gmail-watcher-lifecycle.test.ts` cover config, setup helpers, watcher cancellation, and process replacement.
- `src/cli/webhooks-cli.test.ts` covers CLI setup behavior.
- `src/agents/openclaw-gateway-tool.test.ts` covers protected config paths such as `hooks.gmail.allowUnsafeExternalContent`.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "gmail pubsub watcher hooks gmail model" --json --limit 5`
-
-Results:
-
- Issue #77093 reports Gmail Pub/Sub push reaching the topic but OpenClaw not processing real pushes in Docker plus Tailscale Funnel setup.
-
-Fallback query:
-
-`gitcrawl search openclaw/openclaw --query "Gmail PubSub Funnel" --json --limit 5`
-
-Results:
-
- Issue #77093 is again the matching result, specifically mentioning Docker plus Tailscale Funnel.
-
-### Discrawl queries
-
-Query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "gmail pubsub watcher hooks gmail model"`
-
-Results:
-
- No matching Discord messages returned for this exact query.
-
-Fallback query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "Gmail PubSub Funnel"`
-
-Results:
-
- Discord Gmail integration thread explains that Tailscale Serve is not enough for Google Pub/Sub callbacks; Funnel or another public HTTPS URL is required.
- Same thread gives concrete `gog gmail watch serve` args and warns that Funnel can strip `/gmail-pubsub`, requiring `serve.path="/"` unless the target explicitly preserves the path.
- Same thread recommends checking `hooks.gmail.serve.path`, `hooks.gmail.tailscale.path`, `hooks.gmail.tailscale.target`, and `hooks.gmail.pushToken`.
--- a/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/heartbeat-commitments.md
+++ b/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/heartbeat-commitments.md
@@ -1,125 +0,0 @@
---
-title: "Automation: cron, hooks, tasks, polling - Heartbeat Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Automation: cron, hooks, tasks, polling - Heartbeat Maturity Note
-
-## Summary
-
-Heartbeat and inferred commitments form the approximate polling/follow-up side of automation. The implementation includes scheduling, active-hours gates, wake cooldowns, event filtering, busy-session deferral, delivery routing, response-tool handling, and commitment extraction/runtime. It is featureful, but archive evidence shows behavior around active hours, group wakes, first post-heartbeat messages, and activity-based idle expectations still confuses users.
-
-## Category Scope
-
-Included in this category:
-
- Heartbeat scheduling: Covers Heartbeat scheduling across periodic heartbeat runs, active-hours and variable schedule behavior, wake/cooldown handling, heartbeat prompts and due-only task mode, and related heartbeat and commitments behavior.
- Active hours: Covers Active hours across periodic heartbeat runs, active-hours and variable schedule behavior, wake/cooldown handling, heartbeat prompts and due-only task mode, and related heartbeat and commitments behavior.
- Wake and cooldown handling: Covers Wake and cooldown handling across periodic heartbeat runs, active-hours and variable schedule behavior, wake/cooldown handling, heartbeat prompts and due-only task mode, and related heartbeat and commitments behavior.
- Due-only heartbeat tasks: Covers Due-only heartbeat tasks across periodic heartbeat runs, active-hours and variable schedule behavior, wake/cooldown handling, heartbeat prompts and due-only task mode, and related heartbeat and commitments behavior.
- Commitment check-ins: Covers Commitment check-ins across periodic heartbeat runs, active-hours and variable schedule behavior, wake/cooldown handling, heartbeat prompts and due-only task mode, and related heartbeat and commitments behavior.
-
-## Features
-
- Heartbeat scheduling: Covers Heartbeat scheduling across periodic heartbeat runs, active-hours and variable schedule behavior, wake/cooldown handling, heartbeat prompts and due-only task mode, and related heartbeat and commitments behavior.
- Active hours: Covers Active hours across periodic heartbeat runs, active-hours and variable schedule behavior, wake/cooldown handling, heartbeat prompts and due-only task mode, and related heartbeat and commitments behavior.
- Wake and cooldown handling: Covers Wake and cooldown handling across periodic heartbeat runs, active-hours and variable schedule behavior, wake/cooldown handling, heartbeat prompts and due-only task mode, and related heartbeat and commitments behavior.
- Due-only heartbeat tasks: Covers Due-only heartbeat tasks across periodic heartbeat runs, active-hours and variable schedule behavior, wake/cooldown handling, heartbeat prompts and due-only task mode, and related heartbeat and commitments behavior.
- Commitment check-ins: Covers Commitment check-ins across periodic heartbeat runs, active-hours and variable schedule behavior, wake/cooldown handling, heartbeat prompts and due-only task mode, and related heartbeat and commitments behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (82%)`
- Positive signals: Heartbeat scheduling, active hours, cooldowns, event filtering, busy-session guards, delivery routing, model overrides, commitments, ghost reminders, and active-hours e2e scheduling have focused tests.
- Negative signals: Full live behavior depends on active channels, long-running sessions, and user message timing after heartbeat; those are hard to prove with unit-only fixtures.
- Integration gaps: A live scenario should cover heartbeat active hours, due-only task skip, cron wake event, exec completion wake, subagent busy deferral, commitment check-in, and group-channel delivery.
-
-## Quality Score
-
- Score: `Beta (72%)`
- Gitcrawl reports: Issue #14051 requests activity-based heartbeat idle timeout; PR #58683 adds time-of-day variable intervals; issue #40611 reports heartbeat drift retries blocking Telegram; issue #85614 reports the first user message after heartbeat poll misidentified as heartbeat continuation; PR #78718 fixes agent-level fallback defaults.
- Discrawl reports: Dreaming cron thread shows operators can be surprised that main-target managed jobs run through heartbeat and can be skipped by `activeHours`; group wake issue #47578 was closed after current main fixed exec/ACP completion wakes.
- Good qualities: Heartbeat has explicit skip reasons, active-hours logic, cooldown/flood guards, delivery target preservation, response-tool support, and commitment-specific runtime policy.
- Bad qualities: Heartbeat participates in many adjacent flows - cron, exec completions, commitments, dreaming, groups, and task completions - and users still misread why runs skip or route through heartbeat.
- Excluded from quality: Test inventory and runtime proof depth; they are coverage inputs only.
-
-## Completeness Score
-
- Score: `Stable (82%)`
- Surface instructions: evaluated against `references/completeness/automation-cron-hooks-tasks-polling.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Heartbeat scheduling, Active hours, Wake and cooldown handling, Due-only heartbeat tasks, Commitment check-ins.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Docs should make the "main-session cron uses heartbeat lane" relationship more prominent.
- Activity-based idle timeout remains an open product request.
- The first-message-after-heartbeat boundary should remain a regression focus because it can corrupt normal user interaction.
-
-## Evidence
-
-### Docs
-
- `docs/automation/index.md` compares cron and heartbeat, explains heartbeat as approximate periodic awareness, and notes task records are not created for heartbeat turns.
- `docs/gateway/heartbeat.md` documents heartbeat configuration, active hours, wake behavior, skip reasons, and troubleshooting.
- `docs/concepts/commitments.md` documents inferred commitments and heartbeat delivery of due check-ins.
-
-### Source
-
- `src/infra/heartbeat-runner.ts`, `src/infra/heartbeat-schedule.ts`, `src/infra/heartbeat-active-hours.ts`, `src/infra/heartbeat-cooldown.ts`, `src/infra/heartbeat-events-filter.ts`, `src/infra/heartbeat-wake.ts`, and `src/infra/heartbeat-visibility.ts` implement heartbeat scheduling and event handling.
- `src/commitments/runtime.ts`, `src/commitments/extraction.ts`, `src/commitments/store.ts`, and `src/commitments/model-selection.runtime.ts` implement inferred commitments.
- `src/auto-reply/heartbeat.ts`, `src/auto-reply/heartbeat-filter.ts`, and `src/agents/heartbeat-system-prompt.ts` connect heartbeat to agent prompting and response behavior.
-
-### Integration tests
-
- `src/infra/heartbeat-runner.active-hours-schedule.e2e.test.ts` tests active-hours-aware scheduling.
- `src/commitments/commitments-full-chain.integration.test.ts` and `src/commitments/commitments-heartbeat-policy.e2e.test.ts` cover commitment-to-heartbeat flows.
- `src/infra/heartbeat-runner.ghost-reminder.test.ts` covers cron/exec event routing through heartbeat.
-
-### Unit tests
-
- `src/infra/heartbeat-schedule.test.ts`, `src/infra/heartbeat-active-hours.test.ts`, `src/infra/heartbeat-cooldown.test.ts`, `src/infra/heartbeat-events-filter.test.ts`, `src/infra/heartbeat-runner.skips-busy-session-lane.test.ts`, `src/infra/heartbeat-runner.subagent-session-guard.test.ts`, and `src/infra/heartbeat-runner.model-override.test.ts` cover heartbeat mechanics.
- `src/commitments/extraction.test.ts`, `src/commitments/store.test.ts`, and `src/commitments/runtime.test.ts` cover commitments.
- `src/auto-reply/heartbeat.test.ts` and `src/auto-reply/heartbeat-filter.test.ts` cover auto-reply heartbeat behavior.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "heartbeat commitments skipWhenBusy activeHours no-tasks-due" --json --limit 5`
-
-Results:
-
- No hits for the exact query.
-
-Fallback query:
-
-`gitcrawl search openclaw/openclaw --query "heartbeat activeHours" --json --limit 5`
-
-Results:
-
- Issue #14051 requests activity-based heartbeat with idle timeout.
- PR #58683 adds time-of-day scheduling for variable intervals.
- Issue #40611 reports heartbeat drift retry blocking Telegram during active conversations.
- Issue #85614 reports first user message after heartbeat poll misidentified as heartbeat continuation.
- PR #78718 fixes agent-level heartbeat fallback defaults.
-
-### Discrawl queries
-
-Query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "heartbeat activeHours"`
-
-Results:
-
- Issue #14051 automated review keeps activity-based heartbeat idle timeout open.
- Issue #47578 closure says current main implements group-session exec/ACP wake path with targeted heartbeat wakes preserving session keys.
- Dreaming issue thread explains a managed dreaming cron targeted at `main` runs through heartbeat and can be skipped by active-hours quiet windows.
--- a/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/http-hooks-webhooks.md
+++ b/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/http-hooks-webhooks.md
@@ -1,128 +0,0 @@
---
-title: "Automation: cron, hooks, tasks, polling - HTTP Webhooks Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Automation: cron, hooks, tasks, polling - HTTP Webhooks Maturity Note
-
-## Summary
-
-HTTP hooks expose the external automation ingress for wake and isolated agent runs. The contract has strong security defaults: dedicated path, bearer or `x-openclaw-token` auth, query-token rejection, agent allowlists, session-key prefix gates, mapped hook transforms, and external-content boundaries. Coverage and quality are held back by user-facing integration friction and open requests for related webhook behavior.
-
-## Category Scope
-
-This category covers `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, request body limits, path/client-IP policy, allowed agent/session controls, idempotency keys, payload wrapping, asynchronous dispatch, and webhook plugin ingress helpers.
-
-## Features
-
- POST /hooks/wake: Covers POST /hooks/wake across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
- POST /hooks/agent: Covers POST /hooks/agent across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
- Mapped hooks: Covers Mapped hooks across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
- Hook auth policy: Covers Hook auth policy across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
- Async dispatch: Covers Async dispatch across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Beta (76%)`
- Positive signals: Unit and integration-style coverage exists for hook request handling, trust/session policy, mapping resolution, request timeout, plugin webhook guards, and the bundled webhooks extension.
- Negative signals: Real external integrations are harder to prove locally; Gmail/Tailscale and channel webhook setups show that end-to-end ingress depends on reverse-proxy path/token details outside the core handler.
- Integration gaps: A single e2e fixture should stand up a Gateway hook handler, POST `wake`, `agent`, and mapped hook requests, validate token/session/agent policy failures, and prove the resulting run/event appears in task or session state.
-
-## Quality Score
-
- Score: `Beta (70%)`
- Gitcrawl reports: PR #62528 requests `/hooks/message` with auth parity, PR #83118 requests tokenFile auth secrets, issue #77093 reports Gmail Pub/Sub push not processing in Docker plus Tailscale Funnel, and issue #64556 reports `hooks.mappings[].agentId`/`sessionKey` ignored for `action="wake"`.
- Discrawl reports: PR #69267 adds logging for 4xx hook gateway errors because invalid webhook POSTs previously left no trace; Discord user guidance emphasizes external workflow engines for deterministic human-in-the-loop Telegram flows and lists `/hooks/agent` and `/hooks/wake` as worker-runtime ingress.
- Good qualities: Auth is centralized, query-string tokens are rejected, session-key selection is opt-in and prefix-bound, templated mappings require prefix gates, and mapped external content can be wrapped as untrusted.
- Bad qualities: Debuggability and integration setup remain weak spots. Several reports cluster around silent validation failures, missing auth-secret ergonomics, and path/token mismatches with real reverse proxies.
- Excluded from quality: Test inventory and runtime proof depth; they are coverage inputs only.
-
-## Completeness Score
-
- Score: `Beta (76%)`
- Surface instructions: evaluated against `references/completeness/automation-cron-hooks-tasks-polling.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for POST /hooks/wake, POST /hooks/agent, Mapped hooks, Hook auth policy, Async dispatch.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Hook 4xx responses need consistently actionable logs and operator-facing troubleshooting.
- Token-file support and safer secret management would reduce config-risk pressure.
- Mapped wake action semantics need to be explicit for agent/session fields so operators know which fields apply.
-
-## Evidence
-
-### Docs
-
- `docs/automation/cron-jobs.md#webhooks` documents `/hooks/wake`, `/hooks/agent`, mapped hooks, auth headers, query-token rejection, allowed agents, session-key controls, and safety boundaries.
- `docs/automation/webhook.md` redirects to the scheduled-tasks webhook docs.
- `docs/cli/webhooks.md` documents webhook CLI setup, including Gmail setup.
-
-### Source
-
- `src/gateway/hooks.ts` resolves hook config, token extraction, body parsing, allowed agents, session-key policy, delivery fields, and payload normalization.
- `src/gateway/server/hooks.ts` and `src/gateway/server/hooks-request-handler.ts` implement request dispatch and HTTP handling.
- `src/gateway/hooks-mapping.ts` implements preset and custom mapped hook transforms, path matching, templates, and transform path containment.
- `src/gateway/hooks-policy.ts` and `src/gateway/server/hook-client-ip-config.ts` implement policy helpers.
- `src/plugin-sdk/webhook-ingress.ts`, `src/plugin-sdk/webhook-request-guards.ts`, `src/plugin-sdk/webhook-targets.ts`, and `extensions/webhooks/` implement plugin-facing webhook helpers and the bundled webhooks plugin.
-
-### Integration tests
-
- `src/gateway/server/hooks.agent-trust.test.ts` exercises hook dispatch trust boundaries.
- `src/gateway/server-http.hooks-request-timeout.test.ts` covers request timeout behavior.
- `extensions/webhooks/index.test.ts` and `extensions/webhooks/src/http.test.ts` exercise the bundled webhooks plugin path.
-
-### Unit tests
-
- `src/gateway/hooks.test.ts`, `src/gateway/hooks-mapping.test.ts`, `src/gateway/hooks-test-helpers.ts`, and `src/gateway/server.hooks.test.ts` cover core hook parsing and mapping behavior.
- `src/plugin-sdk/webhook-request-guards.test.ts`, `src/plugin-sdk/webhook-memory-guards.test.ts`, and `src/plugin-sdk/webhook-targets.test.ts` cover SDK guard helpers.
- `src/gateway/server/hooks.agent-trust.test.ts` covers agent/session trust policy details.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "hooks agent wake token allowedSessionKey" --json --limit 5`
-
-Results:
-
- No hits for the exact query.
-
-Fallback query:
-
-`gitcrawl search openclaw/openclaw --query "webhook token hook" --json --limit 5`
-
-Results:
-
- PR #62528 requests `/hooks/message` ingress with webhook auth parity.
- PR #83118 requests token-file auth secrets shared across gateway hooks and Gmail runtime/setup.
- Issue #77093 reports real Gmail Pub/Sub pushes reaching the topic but not processing through the webhook/watcher path.
- PR #64126 references shared secret comparison for hook token validation.
- Issue #64556 reports mapped hook `agentId` and `sessionKey` ignored for wake actions.
-
-### Discrawl queries
-
-Query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "hooks agent wake token allowedSessionKey"`
-
-Results:
-
- No matching Discord messages returned for this exact query.
-
-Fallback query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "webhook token hook"`
-
-Results:
-
- PR #69267 discussion adds logging for 4xx hook gateway errors, including token-in-query, invalid payload, disallowed agent, session-key errors, unauthorized requests, and missing endpoints.
- Discord workflow guidance recommends external workflow ownership for hard deterministic Telegram orchestration and treats `/hooks/agent` and `/hooks/wake` as OpenClaw worker-runtime ingress.
--- a/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/internal-hooks.md
+++ b/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/internal-hooks.md
@@ -1,144 +0,0 @@
---
-title: "Automation: cron, hooks, tasks, polling - Automation Hooks Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Automation: cron, hooks, tasks, polling - Automation Hooks Maturity Note
-
-## Summary
-
-Internal hooks are a usable operator automation surface with docs, CLI management, metadata-based discovery, bundled hooks, hook packs, workspace/managed precedence, and lifecycle events. The main maturity issues are scope clarity and operational overhead: users still ask for plugin-style pre-tool hooks in the internal hook system, and archive reports show bootstrap-extra-files behavior and hook overhead remain sources of confusion.
-
-## Category Scope
-
-Included in this category:
-
- HOOK.md authoring: Covers HOOK.md authoring across `HOOK.md` metadata, handler loading, bundled/managed/workspace/plugin hook discovery, eligibility policy, and related internal hooks behavior.
- Hook discovery: Covers Hook discovery across `HOOK.md` metadata, handler loading, bundled/managed/workspace/plugin hook discovery, eligibility policy, and related internal hooks behavior.
- Hook CLI management: Covers Hook CLI management across `HOOK.md` metadata, handler loading, bundled/managed/workspace/plugin hook discovery, eligibility policy, and related internal hooks behavior.
- Hook packs: Covers Hook packs across `HOOK.md` metadata, handler loading, bundled/managed/workspace/plugin hook discovery, eligibility policy, and related internal hooks behavior.
- Lifecycle event dispatch: Covers Lifecycle event dispatch across `HOOK.md` metadata, handler loading, bundled/managed/workspace/plugin hook discovery, eligibility policy, and related internal hooks behavior.
- api.on registration: Covers api.on registration across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
- Tool-call policy hooks: Covers Tool-call policy hooks across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
- Message hooks: Covers Message hooks across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
- Session/lifecycle hooks: Covers Session/lifecycle hooks across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
- Plugin approval requests: Covers Plugin approval requests across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
- cron_changed: Covers cron_changed across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
-
-## Features
-
- HOOK.md authoring: Covers HOOK.md authoring across `HOOK.md` metadata, handler loading, bundled/managed/workspace/plugin hook discovery, eligibility policy, and related internal hooks behavior.
- Hook discovery: Covers Hook discovery across `HOOK.md` metadata, handler loading, bundled/managed/workspace/plugin hook discovery, eligibility policy, and related internal hooks behavior.
- Hook CLI management: Covers Hook CLI management across `HOOK.md` metadata, handler loading, bundled/managed/workspace/plugin hook discovery, eligibility policy, and related internal hooks behavior.
- Hook packs: Covers Hook packs across `HOOK.md` metadata, handler loading, bundled/managed/workspace/plugin hook discovery, eligibility policy, and related internal hooks behavior.
- Lifecycle event dispatch: Covers Lifecycle event dispatch across `HOOK.md` metadata, handler loading, bundled/managed/workspace/plugin hook discovery, eligibility policy, and related internal hooks behavior.
- api.on registration: Covers api.on registration across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
- Tool-call policy hooks: Covers Tool-call policy hooks across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
- Message hooks: Covers Message hooks across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
- Session/lifecycle hooks: Covers Session/lifecycle hooks across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
- Plugin approval requests: Covers Plugin approval requests across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
- cron_changed: Covers cron_changed across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Beta (78%)`
- Positive signals: Source and tests cover frontmatter parsing, workspace loading, import URLs, module loading, config eligibility, hook installation/update, fire-and-forget behavior, bundled hook handlers, message mappers, and plugin-managed hook listing.
- Negative signals: Coverage is broad at module level but limited for real Gateway lifecycle order across startup, shutdown, message flow, compaction, and command events under multiple configured hook directories.
- Integration gaps: A single Gateway scenario should load bundled, managed, workspace, and plugin-managed hooks, verify precedence, exercise a replyable event and a non-replyable lifecycle event, and prove CLI status reflects execution eligibility.
-
-## Quality Score
-
- Score: `Beta (72%)`
- Gitcrawl reports: Issue #84744 reports `bootstrap-extra-files.paths` silently dropped by the recognized bootstrap basename whitelist; PR #74735 adds session-scoped extra files; issue #43454 requests broader Gateway lifecycle hooks; issue #53600 calls out hook overhead on constrained VPS setups.
- Discrawl reports: Discord logs show bundled hook loading in real gateway startup, user confusion around repeated `BOOT.md` reads, and an issue opened for a `before_tool` internal hook even though that belongs to plugin hooks today.
- Good qualities: Discovery has clear precedence, workspace hooks cannot override managed hooks with the same name, handler path boundary checks exist, mutable hooks get cache-busted import URLs, and bundled hooks are documented.
- Bad qualities: Internal hooks and typed plugin hooks remain easy to conflate. Some hook behavior silently filters user intent, and hook overhead can matter on small hosts.
- Excluded from quality: Test inventory and runtime proof depth; they are coverage inputs only.
-
-## Completeness Score
-
- Score: `Beta (78%)`
- Surface instructions: evaluated against `references/completeness/automation-cron-hooks-tasks-polling.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for HOOK.md authoring, Hook discovery, Hook CLI management, Hook packs, Lifecycle event dispatch, api.on registration, Tool-call policy hooks, Message hooks, Session/lifecycle hooks, Plugin approval requests, cron_changed.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Docs and CLI output should more aggressively distinguish internal hooks from typed plugin hooks.
- `bootstrap-extra-files` should report filtered paths clearly instead of making whitelist behavior feel silent.
- Startup/status output should make per-hook cost and loaded event scope more visible for constrained deployments.
-
-## Evidence
-
-### Docs
-
- `docs/automation/hooks.md` documents internal hook purpose, event types, `HOOK.md` structure, discovery precedence, bundled hooks, config, CLI reference, and best practices.
- `docs/cli/hooks.md` documents CLI operations for hook management.
- `docs/plugins/hooks.md` distinguishes typed plugin hooks from internal hooks.
-
-### Source
-
- `src/hooks/frontmatter.ts`, `src/hooks/workspace.ts`, `src/hooks/loader.ts`, `src/hooks/config.ts`, `src/hooks/policy.ts`, `src/hooks/internal-hooks.ts`, `src/hooks/install.ts`, and `src/hooks/update.ts` implement the core internal hook system.
- `src/hooks/bundled/session-memory/`, `src/hooks/bundled/bootstrap-extra-files/`, `src/hooks/bundled/command-logger/`, `src/hooks/bundled/compaction-notifier/`, and `src/hooks/bundled/boot-md/` implement bundled hooks.
- `src/cli/hooks-cli.ts`, `src/gateway/session-patch-hooks.ts`, `src/agents/bootstrap-hooks.ts`, and `src/auto-reply/reply/message-preprocess-hooks.ts` connect hooks to CLI and runtime events.
-
-### Integration tests
-
- `src/hooks/bundled/boot-md/handler.gateway-startup.integration.test.ts` exercises boot-md at gateway startup.
- `src/gateway/server.sessions.reset-hooks.test.ts` and `src/gateway/server.sessions.permissions-hooks.test.ts` exercise session hook integration.
- `src/auto-reply/reply/get-reply.message-hooks.test.ts` and `src/auto-reply/reply/message-preprocess-hooks.test.ts` exercise message-flow hook integration.
-
-### Unit tests
-
- `src/hooks/frontmatter.test.ts`, `src/hooks/workspace.test.ts`, `src/hooks/loader.test.ts`, `src/hooks/module-loader.test.ts`, `src/hooks/configured.ts`, `src/hooks/policy.test.ts`, `src/hooks/fire-and-forget.test.ts`, and `src/hooks/internal-hooks.test.ts` cover core behavior.
- `src/hooks/bundled/session-memory/handler.test.ts`, `src/hooks/bundled/bootstrap-extra-files/handler.test.ts`, and `src/hooks/bundled/boot-md/handler.test.ts` cover bundled hooks.
- `src/cli/hooks-cli.test.ts`, `src/hooks/hooks-install.test.ts`, and `src/hooks/update.test.ts` cover CLI/install/update behavior.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "internal hooks HOOK.md session-memory bootstrap-extra-files" --json --limit 5`
-
-Results:
-
- Issue #84744 reports `bootstrap-extra-files` user-configured paths silently dropped by the whitelist.
- PR #74735 adds session-scoped extra files.
- Issue #43454 requests broader Gateway lifecycle hooks.
- Issue #53600 mentions hook overhead per turn on constrained VPS setups.
-
-Fallback query:
-
-`gitcrawl search openclaw/openclaw --query "session-memory hook bootstrap-extra-files" --json --limit 5`
-
-Results:
-
- Same cluster plus issue #22438 on tiered bootstrap file loading, reinforcing bootstrap context-size pressure.
-
-### Discrawl queries
-
-Query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "internal hooks HOOK.md session-memory bootstrap-extra-files"`
-
-Results:
-
- No matching Discord messages returned for this exact query.
-
-Fallback query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "session-memory hook bootstrap-extra-files"`
-
-Results:
-
- Real gateway logs show bundled hooks loaded at startup: `boot-md`, `bootstrap-extra-files`, `command-logger`, and `session-memory`.
- User thread asks why agents repeatedly read `BOOT.md` and shows `openclaw hooks list` with bundled hooks and a plugin-managed memory-core hook.
- Issue #60065 discussion requests pre-tool hook capability in the internal hook surface, showing confusion between internal hooks and plugin hooks.
--- a/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/message-polls-process-polling.md
+++ b/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/message-polls-process-polling.md
@@ -1,135 +0,0 @@
---
-title: "Automation: cron, hooks, tasks, polling - Polling Controls Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Automation: cron, hooks, tasks, polling - Polling Controls Maturity Note
-
-## Summary
-
-This component covers two user-visible meanings of polling: channel poll creation through the message tool/CLI, and process polling for long-running commands. Message poll actions are supported for several channels, while process polling has guardrails for long waits and repeated no-progress loops. Quality is limited by archive reports around infinite/repeated polling loops and operators needing to pick the right `exec`/`process` pattern for long work.
-
-## Category Scope
-
-Included in this category:
-
- openclaw message poll: Covers openclaw message poll across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- Telegram polls: Covers Telegram polls across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- Teams polls: Covers Teams polls across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- Poll flags: Covers Poll flags across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- Channel capability gates: Covers Channel capability gates across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- process poll: Covers process poll across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- process log: Covers process log across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- Background process status: Covers Background process status across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- No-progress loop detection: Covers No-progress loop detection across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- Process input controls: Covers Process input controls across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
-
-## Features
-
- openclaw message poll: Covers openclaw message poll across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- Telegram polls: Covers Telegram polls across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- Teams polls: Covers Teams polls across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- Poll flags: Covers Poll flags across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- Channel capability gates: Covers Channel capability gates across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- process poll: Covers process poll across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- process log: Covers process log across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- Background process status: Covers Background process status across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- No-progress loop detection: Covers No-progress loop detection across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- Process input controls: Covers Process input controls across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Beta (74%)`
- Positive signals: Poll parameter parsing, message poll command registration, outbound poll actions, channel-specific poll adapters, process polling, command-poll backoff, and polling-loop detection have targeted tests.
- Negative signals: Channel poll support is fragmented by channel capabilities, and process polling behavior depends on real child-process timing, terminal/PTY state, and agent prompting.
- Integration gaps: Add an e2e that starts a long-running process, polls it with progress and no-progress cases, then sends a channel poll through one live-capable adapter and verifies the returned message/poll id.
-
-## Quality Score
-
- Score: `Beta (70%)`
- Gitcrawl reports: Issue #62432 reports agents repeatedly relaunching exec instead of switching to process poll; PR #81157 fixes invalid `process.action` causing infinite retry loops; issue #65223 discusses aborting process poll loops while keeping underlying exec alive; issue #69582 reports parameter injection causing an infinite tool loop.
- Discrawl reports: May 17 cron discussion says long deterministic cron work should use OpenClaw `exec` plus `process` polling rather than Codex-native shell babysitting. Clawsweeper report mentions a poll hang closed quickly as a notable issue.
- Good qualities: The process tool has explicit `poll` actions, timeout clamping, backoff state, no-progress loop detection, and system-prompt guidance against busy polling. Message poll actions go through channel capability gates.
- Bad qualities: The surface remains easy for agents to misuse: repeated exec relaunch, malformed process action loops, and no-progress polls all appear in archive reports.
- Excluded from quality: Test inventory and runtime proof depth; they are coverage inputs only.
-
-## Completeness Score
-
- Score: `Beta (74%)`
- Surface instructions: evaluated against `references/completeness/automation-cron-hooks-tasks-polling.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for openclaw message poll, Telegram polls, Teams polls, Poll flags, Channel capability gates, process poll, process log, Background process status, No-progress loop detection, Process input controls.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Agent guidance should more directly say when to switch from `exec` to `process poll` and when to stop polling.
- Channel poll docs should make support/capability differences more discoverable from the automation index.
- Loop detection should continue to block malformed or no-progress polling patterns before they burn runtime.
-
-## Evidence
-
-### Docs
-
- `docs/automation/poll.md` redirects to `docs/cli/message.md` for poll documentation.
- `docs/cli/message.md` documents message poll usage.
- `docs/channels/telegram.md` documents Telegram `openclaw message poll` usage and Telegram-specific poll flags.
- `docs/channels/msteams.md` documents Teams polls as Adaptive Cards.
- `docs/gateway/background-process.md` documents process polling/logging for background processes.
-
-### Source
-
- `src/polls.ts`, `src/poll-params.ts`, `src/cli/program/message/register.poll.ts`, and `src/infra/outbound/message-action-runner.poll.test.ts` cover message poll data and action execution.
- `src/agents/bash-tools.process.ts`, `src/agents/command-poll-backoff.ts`, and `src/agents/tool-loop-detection.ts` implement process polling and anti-loop behavior.
- `extensions/msteams/src/polls.ts`, `extensions/matrix/src/matrix/actions/polls.ts`, and Telegram channel action support implement channel-specific poll behavior.
-
-### Integration tests
-
- `src/agents/agent-tools.before-tool-call.e2e.test.ts` includes poll-loop behavior through agent tool execution.
- `src/infra/outbound/message-action-runner.poll.test.ts` exercises outbound poll action execution.
- Channel poll tests are mostly adapter-level rather than live channel e2e.
-
-### Unit tests
-
- `src/polls.test.ts` and `src/poll-params.test.ts` cover poll primitives.
- `src/agents/bash-tools.process.poll-timeout.test.ts`, `src/agents/command-poll-backoff.test.ts`, and `src/agents/tool-loop-detection.test.ts` cover process polling and loop detection.
- `extensions/msteams/src/polls.test.ts`, `extensions/matrix/src/matrix/actions/polls.test.ts`, and `extensions/matrix/src/matrix/poll-types.test.ts` cover channel poll adapters.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "message poll process poll polling loop no progress" --json --limit 5`
-
-Results:
-
- No hits for the exact query.
-
-Fallback query:
-
-`gitcrawl search openclaw/openclaw --query "poll loop" --json --limit 5`
-
-Results:
-
- Issue #65223 discusses process poll abort signal handling.
- PR #81157 fixes invalid `process.action` at the tool invocation boundary to prevent infinite loops.
- Issue #62432 reports repeated exec relaunch instead of switching to process poll.
- Issue #69582 reports parameter injection causing an infinite tool invocation loop.
-
-### Discrawl queries
-
-Query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "poll loop"`
-
-Results:
-
- Clawsweeper report calls out issue #86477 as a poll hang closed quickly.
- May 17 cron thread recommends `exec` plus `process` polling for long deterministic work and warns against Codex owning the shell loop.
--- a/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/plugin-hooks.md
+++ b/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/plugin-hooks.md
@@ -1,120 +0,0 @@
---
-title: "Automation: cron, hooks, tasks, polling - Plugin Hooks Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Automation: cron, hooks, tasks, polling - Plugin Hooks Maturity Note
-
-## Summary
-
-Typed plugin hooks are one of the more capable automation surfaces: they cover model resolution, prompt construction, tool policy, message dispatch, sessions, compaction, subagents, lifecycle, installation, and cron-change observation. Coverage is broad, but quality is limited by uneven path coverage and live reports where expected hook events do not fire on specific execution paths.
-
-## Category Scope
-
-This category covers `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, session and lifecycle hooks, subagent hooks, `cron_changed`, plugin approval requests, trusted tool policies, hook contexts, and SDK/runtime wiring.
-
-## Features
-
- api.on registration: Covers api.on registration across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
- Tool-call policy hooks: Covers Tool-call policy hooks across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
- Message hooks: Covers Message hooks across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
- Session/lifecycle hooks: Covers Session/lifecycle hooks across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
- Plugin approval requests: Covers Plugin approval requests across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
- cron_changed: Covers cron_changed across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (80%)`
- Positive signals: There is focused coverage for before/after tool hooks, before-agent hooks, reply/finalize hooks, lifecycle gates, security, correlation, wired gateway/session/subagent/reply-dispatch paths, and host-hook contracts.
- Negative signals: Archive evidence and source shape indicate not every tool execution path consistently goes through both pre and post hook wrappers, especially direct gateway tool invocation and MCP loopback.
- Integration gaps: A cross-runtime hook conformance suite should prove the same hook lifecycle for embedded OpenClaw tools, native Codex hooks, direct Gateway `tools.invoke`, MCP loopback, and channel-triggered runs.
-
-## Quality Score
-
- Score: `Beta (75%)`
- Gitcrawl reports: PR #62701 adds context to `before_tool_call`; issue #76201 reports plugin `before_tool_call` not firing for native exec on the Anthropic harness; issue #86777 asks to document Codex app-server report-mode handling of plugin `requireApproval`; issue #23451 keeps a built-in tool confirmation gate open even though plugin approvals exist.
- Discrawl reports: Maintainer discussion says `before_tool_call` and `after_tool_call` exist, and Codex native relay maps `PreToolUse`/`PostToolUse`, but direct gateway `tools.invoke` and MCP loopback appear to run `before_tool_call` without consistently running `after_tool_call`.
- Good qualities: The hook catalog is explicit, decision semantics are typed, priorities and timeouts are configurable, plugin config is injected per handler, and approval requests have a documented resolution contract.
- Bad qualities: Runtime coverage is uneven enough that plugin authors cannot assume every execution path fires the same hook sequence. This is a quality issue because policy and observability plugins depend on uniform hook boundaries.
- Excluded from quality: Test inventory and runtime proof depth; they are coverage inputs only.
-
-## Completeness Score
-
- Score: `Stable (80%)`
- Surface instructions: evaluated against `references/completeness/automation-cron-hooks-tasks-polling.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for api.on registration, Tool-call policy hooks, Message hooks, Session/lifecycle hooks, Plugin approval requests, cron_changed.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Centralize tool execution through a lifecycle wrapper so pre/post hooks and diagnostics cannot drift by execution path.
- Document native Codex and report-mode behavior for plugin approvals.
- Make hook-path coverage visible in SDK docs so plugin authors know which hooks are guaranteed for each runtime.
-
-## Evidence
-
-### Docs
-
- `docs/plugins/hooks.md` documents the hook catalog, priority and timeout behavior, decision result semantics, contexts, `cron_changed`, and tool-call policy.
- `docs/plugins/plugin-permission-requests.md` documents plugin approvals and how `before_tool_call.requireApproval` interacts with `/approve`.
- `docs/plugins/sdk-subpaths.md` lists `plugin-sdk/hook-runtime` and related runtime subpaths.
-
-### Source
-
- `src/plugins/hooks.ts`, `src/plugins/host-hooks.ts`, `src/plugins/host-hook-runtime.ts`, `src/plugins/hook-runner-global.ts`, `src/plugins/hook-decision-types.ts`, and `src/plugins/hook-agent-context.ts` implement typed hook registration and execution.
- `src/gateway/server-methods/plugin-host-hooks.ts` wires plugin hooks into Gateway methods.
- `src/plugin-sdk/hook-runtime.ts` exposes hook helpers through the SDK.
- `extensions/codex/src/app-server/native-hook-relay.ts` maps Codex native hooks into OpenClaw hook behavior.
-
-### Integration tests
-
- `src/plugins/wired-hooks-after-tool-call.e2e.test.ts`, `src/plugins/wired-hooks-gateway.test.ts`, `src/plugins/wired-hooks-session.test.ts`, `src/plugins/wired-hooks-subagent.test.ts`, and `src/plugins/wired-hooks-reply-dispatch.test.ts` exercise integrated hook wiring.
- `src/plugins/contracts/host-hooks.contract.test.ts` covers host-hook contract behavior.
- `extensions/codex/src/app-server/run-attempt.hooks.test.ts` and `extensions/codex/src/app-server/native-hook-relay.test.ts` cover Codex app-server hook relay paths.
-
-### Unit tests
-
- `src/plugins/hooks.before-tool-call.test.ts`, `src/plugins/hooks.before-agent-start.test.ts`, `src/plugins/hooks.before-agent-reply.test.ts`, `src/plugins/hooks.before-agent-finalize.test.ts`, `src/plugins/hooks.before-install.test.ts`, `src/plugins/hooks.security.test.ts`, and `src/plugins/hook-runner-global.test.ts` cover hook semantics.
- `src/plugins/hook-decision-types.test.ts`, `src/plugins/hook-agent-context.test.ts`, and `src/plugins/host-hook-cleanup-timeout.test.ts` cover decisions, context, and cleanup.
- `src/agents/agent-tools.before-tool-call.integration.e2e.test.ts` exercises before-tool behavior from the agent tools side.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "plugin hooks before_tool_call cron_changed before_agent_finalize" --json --limit 5`
-
-Results:
-
- No hits for the exact query.
-
-Fallback query:
-
-`gitcrawl search openclaw/openclaw --query "before_tool_call hook" --json --limit 5`
-
-Results:
-
- PR #62701 adds optional precedingText and messageId to `before_tool_call` context.
- Issue #76201 reports `before_tool_call` not firing for native exec on a specific harness.
- Issue #79168 references content-based prompt injection scanning on tool output.
- Issue #48509 requests a durable-state `before_persistence_write` hook.
- Issue #86777 asks to document plugin `requireApproval` in Codex app-server report mode.
-
-### Discrawl queries
-
-Query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "before_tool_call hook"`
-
-Results:
-
- Maintainer discussion says pre/post tool hooks exist and Codex native relay maps them, but post-hook coverage is uneven across direct Gateway `tools.invoke` and MCP loopback paths.
- GitHub issue comments kept open #23451, #13364, and #13225, clarifying that plugin `before_tool_call` exists but does not satisfy every requested internal-hook or model-delegation use case.
--- a/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/report.md
+++ b/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/report.md
@@ -1,292 +0,0 @@
---
-title: "Automation: cron, hooks, tasks, polling Maturity Report"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Automation: cron, hooks, tasks, polling Maturity Report
-
-## Top-level scores
-
-These rollups are simple arithmetic means over the category-note numeric
-scores in
-`scores.yaml`. Percentages are rounded to the nearest whole number.
-
- Coverage: `Beta (76%)`
- Quality: `Alpha (69%)`
- Completeness: `Beta (76%)`
- LTS Features: `0/6`
-
-## Summary
-
-This report promotes the archived `automation-cron-hooks-tasks-polling` maturity evidence from `/Users/kevinlin/tmp/maturity/automation-cron-hooks-tasks-polling` into the current process-version-3 inventory contract.
-
-The category Coverage and Quality scores come from the archived evidence-backed score rows. Completeness is initialized from the same archived evidence breadth and known-gap record, then joined with the surface-specific completeness rubric referenced by taxonomy.
-
-## Matrix
-
-| Category                                                | LTS | Coverage       | Quality       | Completeness   | Features to evaluate                                                                                                                                                                                                                                                                                                            |
-| ------------------------------------------------------- | --- | -------------- | ------------- | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| [Cron Jobs](cron-job-lifecycle.md)                      | ❌  | `Stable (82%)` | `Beta (73%)`  | `Stable (82%)` | Create/edit/remove jobs, Schedule types, Timezone and stagger, Cron RPCs, Agent cron tool, Manual cron runs, Isolated cron execution, Model/provider preflight, Run history, Timeout and denial diagnostics, Chat announce delivery, Webhook delivery, Failure destinations, Skipped-run alerts, Delivery previews              |
-| [Event Ingress](channel-polling-webhooks.md)            | ❌  | `Alpha (65%)`  | `Alpha (58%)` | `Alpha (65%)`  | Telegram long polling, Telegram webhook mode, Zalo polling/webhook mode, Polling stall diagnostics, iMessage watch fallback, Gmail setup wizard, Watcher start/serve, Tailscale/public routing, Push token validation, Gmail event routing, POST /hooks/wake, POST /hooks/agent, Mapped hooks, Hook auth policy, Async dispatch |
-| [Automation Hooks](internal-hooks.md)                   | ❌  | `Beta (78%)`   | `Beta (72%)`  | `Beta (78%)`   | HOOK.md authoring, Hook discovery, Hook CLI management, Hook packs, Lifecycle event dispatch, api.on registration, Tool-call policy hooks, Message hooks, Session/lifecycle hooks, Plugin approval requests, cron_changed                                                                                                       |
-| [Background Tasks and Flows](background-task-ledger.md) | ❌  | `Beta (73%)`   | `Alpha (68%)` | `Beta (73%)`   | Task list/show/cancel, Task notifications, Task audit and maintenance, Chat task board, Task pressure status, Managed flows, Mirrored flows, openclaw tasks flow, Flow audit and maintenance, Plugin managedFlows                                                                                                               |
-| [Heartbeat](heartbeat-commitments.md)                   | ❌  | `Stable (82%)` | `Beta (72%)`  | `Stable (82%)` | Heartbeat scheduling, Active hours, Wake and cooldown handling, Due-only heartbeat tasks, Commitment check-ins                                                                                                                                                                                                                  |
-| [Polling Controls](message-polls-process-polling.md)    | ❌  | `Beta (74%)`   | `Beta (70%)`  | `Beta (74%)`   | openclaw message poll, Telegram polls, Teams polls, Poll flags, Channel capability gates, process poll, process log, Background process status, No-progress loop detection, Process input controls                                                                                                                              |
-
-## Scoring rubric
-
- Coverage:
-  maturity-label rating for integration, e2e, live, or server/runtime flow
-  evidence across the category. Unit tests can provide supporting context but never make a
-  feature covered by themselves.
- Quality:
-  maturity-label rating for implementation and operational robustness. Unit,
-  integration, e2e, live, and real runtime-flow test coverage are Coverage
-  inputs only; they do not raise or lower Quality.
- Completeness:
-  maturity-label rating for how fully the category delivers the intended
-  surface-specific capability set. Use the taxonomy-linked completeness
-  instructions for this surface.
- LTS:
-  calculated as `quality > 80 and coverage > 90`, or when the matching
-  taxonomy category sets `human_lts_override`.
- Shared score bands:
-  `Lovable = 95-100`, `Stable = 80-95`, `Beta = 70-80`,
-  `Alpha = 50-70`, and `Experimental = 0-50`. At shared boundaries, choose the
-  higher maturity label.
- Major quality/completeness gaps:
-  evidence text only, tracked in the detailed feature inventory rather than as a
-  separate scored dimension.
-
-## Detailed feature inventory
-
-### 1. Cron Jobs
-
-Search anchors: Create/edit/remove jobs, Schedule types, Timezone and stagger, Cron RPCs, Agent cron tool, openclaw cron, Manual cron runs, Isolated cron execution, Model/provider preflight, Run history, Timeout and denial diagnostics, Chat announce delivery, Webhook delivery, Failure destinations, Skipped-run alerts, Delivery previews, failure destination, announce.
-
-Category note: [Cron Jobs](cron-job-lifecycle.md)
-
-Score decisions:
-
- Coverage: `Stable (82%)`
- Quality: `Beta (73%)`
- Completeness: `Stable (82%)`
- LTS: ❌
-
-Features:
-
- Create/edit/remove jobs: Covers Create/edit/remove jobs across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
- Schedule types: Covers Schedule types across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
- Timezone and stagger: Covers Timezone and stagger across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
- Cron RPCs: Covers Cron RPCs across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
- Agent cron tool: Covers Agent cron tool across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
- Manual cron runs: Covers Manual cron runs across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
- Isolated cron execution: Covers Isolated cron execution across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
- Model/provider preflight: Covers Model/provider preflight across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
- Run history: Covers Run history across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
- Timeout and denial diagnostics: Covers Timeout and denial diagnostics across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
- Chat announce delivery: Covers Chat announce delivery across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
- Webhook delivery: Covers Webhook delivery across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
- Failure destinations: Covers Failure destinations across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
- Skipped-run alerts: Covers Skipped-run alerts across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
- Delivery previews: Covers Delivery previews across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
-
-Primary docs:
-
- `docs/automation/cron-jobs.md`
- `docs/cli/cron.md`
- `docs/gateway/protocol.md`
- `docs/automation/tasks.md`
- `docs/channels/discord.md`
-
-### 2. Event Ingress
-
-Search anchors: Telegram long polling, Telegram webhook mode, Zalo polling/webhook mode, Polling stall diagnostics, iMessage watch fallback, Gmail setup wizard, Watcher start/serve, Tailscale/public routing, Push token validation, Gmail event routing, POST /hooks/wake, POST /hooks/agent, Mapped hooks, Hook auth policy, Async dispatch.
-
-Category note: [Event Ingress](channel-polling-webhooks.md)
-
-Score decisions:
-
- Coverage: `Alpha (65%)`
- Quality: `Alpha (58%)`
- Completeness: `Alpha (65%)`
- LTS: ❌
-
-Features:
-
- Telegram long polling: Covers Telegram long polling across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
- Telegram webhook mode: Covers Telegram webhook mode across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
- Zalo polling/webhook mode: Covers Zalo polling/webhook mode across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
- Polling stall diagnostics: Covers Polling stall diagnostics across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
- iMessage watch fallback: Covers iMessage watch fallback across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
- Gmail setup wizard: Covers Gmail setup wizard across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
- Watcher start/serve: Covers Watcher start/serve across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
- Tailscale/public routing: Covers Tailscale/public routing across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
- Push token validation: Covers Push token validation across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
- Gmail event routing: Covers Gmail event routing across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
- POST /hooks/wake: Covers POST /hooks/wake across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
- POST /hooks/agent: Covers POST /hooks/agent across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
- Mapped hooks: Covers Mapped hooks across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
- Hook auth policy: Covers Hook auth policy across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
- Async dispatch: Covers Async dispatch across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
-
-Primary docs:
-
- `docs/channels/telegram.md`
- `docs/channels/zalo.md`
- `docs/channels/troubleshooting.md`
- `docs/channels/imessage-from-bluebubbles.md`
- `docs/automation/cron-jobs.md#gmail-pubsub-integration`
- `docs/automation/gmail-pubsub.md`
- `docs/cli/webhooks.md`
- `docs/automation/cron-jobs.md#webhooks`
- `docs/automation/webhook.md`
-
-### 3. Automation Hooks
-
-Search anchors: HOOK.md authoring, Hook discovery, Hook CLI management, Hook packs, Lifecycle event dispatch, api.on registration, Tool-call policy hooks, Message hooks, Session/lifecycle hooks, Plugin approval requests, cron_changed.
-
-Category note: [Automation Hooks](internal-hooks.md)
-
-Score decisions:
-
- Coverage: `Beta (78%)`
- Quality: `Beta (72%)`
- Completeness: `Beta (78%)`
- LTS: ❌
-
-Features:
-
- HOOK.md authoring: Covers HOOK.md authoring across `HOOK.md` metadata, handler loading, bundled/managed/workspace/plugin hook discovery, eligibility policy, and related internal hooks behavior.
- Hook discovery: Covers Hook discovery across `HOOK.md` metadata, handler loading, bundled/managed/workspace/plugin hook discovery, eligibility policy, and related internal hooks behavior.
- Hook CLI management: Covers Hook CLI management across `HOOK.md` metadata, handler loading, bundled/managed/workspace/plugin hook discovery, eligibility policy, and related internal hooks behavior.
- Hook packs: Covers Hook packs across `HOOK.md` metadata, handler loading, bundled/managed/workspace/plugin hook discovery, eligibility policy, and related internal hooks behavior.
- Lifecycle event dispatch: Covers Lifecycle event dispatch across `HOOK.md` metadata, handler loading, bundled/managed/workspace/plugin hook discovery, eligibility policy, and related internal hooks behavior.
- api.on registration: Covers api.on registration across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
- Tool-call policy hooks: Covers Tool-call policy hooks across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
- Message hooks: Covers Message hooks across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
- Session/lifecycle hooks: Covers Session/lifecycle hooks across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
- Plugin approval requests: Covers Plugin approval requests across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
- cron_changed: Covers cron_changed across `api.on(...)` typed hooks, priority/timeout behavior, decision hooks such as `before_tool_call`, message and dispatch hooks, and related plugin hooks behavior.
-
-Primary docs:
-
- `docs/automation/hooks.md`
- `docs/cli/hooks.md`
- `docs/plugins/hooks.md`
- `docs/plugins/plugin-permission-requests.md`
- `docs/plugins/sdk-subpaths.md`
-
-### 4. Background Tasks and Flows
-
-Search anchors: Task list/show/cancel, Task notifications, Task audit and maintenance, Chat task board, Task pressure status, Managed flows, Mirrored flows, openclaw tasks flow, Flow audit and maintenance, Plugin managedFlows.
-
-Category note: [Background Tasks and Flows](background-task-ledger.md)
-
-Score decisions:
-
- Coverage: `Beta (73%)`
- Quality: `Alpha (68%)`
- Completeness: `Beta (73%)`
- LTS: ❌
-
-Features:
-
- Task list/show/cancel: Covers Task list/show/cancel across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
- Task notifications: Covers Task notifications across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
- Task audit and maintenance: Covers Task audit and maintenance across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
- Chat task board: Covers Chat task board across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
- Task pressure status: Covers Task pressure status across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
- Managed flows: Covers Managed flows across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
- Mirrored flows: Covers Mirrored flows across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
- openclaw tasks flow: Covers openclaw tasks flow across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
- Flow audit and maintenance: Covers Flow audit and maintenance across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
- Plugin managedFlows: Covers Plugin managedFlows across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
-
-Primary docs:
-
- `docs/automation/tasks.md`
- `docs/automation/index.md`
- `docs/cli/tasks.md`
- `docs/automation/taskflow.md`
- `docs/plugins/sdk-runtime.md`
-
-### 5. Heartbeat
-
-Search anchors: Heartbeat scheduling, Active hours, Wake and cooldown handling, Due-only heartbeat tasks, Commitment check-ins, openclaw cron.
-
-Category note: [Heartbeat](heartbeat-commitments.md)
-
-Score decisions:
-
- Coverage: `Stable (82%)`
- Quality: `Beta (72%)`
- Completeness: `Stable (82%)`
- LTS: ❌
-
-Features:
-
- Heartbeat scheduling: Covers Heartbeat scheduling across periodic heartbeat runs, active-hours and variable schedule behavior, wake/cooldown handling, heartbeat prompts and due-only task mode, and related heartbeat and commitments behavior.
- Active hours: Covers Active hours across periodic heartbeat runs, active-hours and variable schedule behavior, wake/cooldown handling, heartbeat prompts and due-only task mode, and related heartbeat and commitments behavior.
- Wake and cooldown handling: Covers Wake and cooldown handling across periodic heartbeat runs, active-hours and variable schedule behavior, wake/cooldown handling, heartbeat prompts and due-only task mode, and related heartbeat and commitments behavior.
- Due-only heartbeat tasks: Covers Due-only heartbeat tasks across periodic heartbeat runs, active-hours and variable schedule behavior, wake/cooldown handling, heartbeat prompts and due-only task mode, and related heartbeat and commitments behavior.
- Commitment check-ins: Covers Commitment check-ins across periodic heartbeat runs, active-hours and variable schedule behavior, wake/cooldown handling, heartbeat prompts and due-only task mode, and related heartbeat and commitments behavior.
-
-Primary docs:
-
- `docs/automation/index.md`
- `docs/gateway/heartbeat.md`
- `docs/concepts/commitments.md`
-
-### 6. Polling Controls
-
-Search anchors: openclaw message poll, Telegram polls, Teams polls, Poll flags, Channel capability gates, process poll, process log, Background process status.
-
-Category note: [Polling Controls](message-polls-process-polling.md)
-
-Score decisions:
-
- Coverage: `Beta (74%)`
- Quality: `Beta (70%)`
- Completeness: `Beta (74%)`
- LTS: ❌
-
-Features:
-
- openclaw message poll: Covers openclaw message poll across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- Telegram polls: Covers Telegram polls across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- Teams polls: Covers Teams polls across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- Poll flags: Covers Poll flags across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- Channel capability gates: Covers Channel capability gates across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- process poll: Covers process poll across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- process log: Covers process log across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- Background process status: Covers Background process status across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- No-progress loop detection: Covers No-progress loop detection across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
- Process input controls: Covers Process input controls across `openclaw message poll`, channel poll adapters, poll parameter normalization, Teams/Matrix/Telegram poll support, and related message polls and process polling behavior.
-
-Primary docs:
-
- `docs/automation/poll.md`
- `docs/cli/message.md`
- `docs/channels/telegram.md`
- `docs/channels/msteams.md`
- `docs/gateway/background-process.md`
-
-## Recommended scorecard interpretation
-
-Use this migrated score as the current inventory baseline. Refresh individual categories with live category-agent research before treating a high score as an LTS promotion gate.
-
-## Out of scope for this surface
-
- Redefining taxonomy category boundaries; taxonomy remains the source of truth for category identity, features, docs, and search anchors.
-
-## Audit provenance
-
- Score source:
-  `docs/kevinslin/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/scores.yaml`.
- Taxonomy metadata source:
-  `.agents/skills/claw-score/taxonomy.yaml`.
- Archived evidence source:
-  `/Users/kevinlin/tmp/maturity/automation-cron-hooks-tasks-polling`.
--- a/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/scores.yaml
+++ b/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/scores.yaml
@@ -1,33 +0,0 @@
-version: 1
-process_version: 3
-data:
-  - name: Cron Jobs
-    category_note: cron-job-lifecycle.md
-    coverage: 82
-    quality: 73
-    completeness: 82
-  - name: Event Ingress
-    category_note: channel-polling-webhooks.md
-    coverage: 65
-    quality: 58
-    completeness: 65
-  - name: Automation Hooks
-    category_note: internal-hooks.md
-    coverage: 78
-    quality: 72
-    completeness: 78
-  - name: Background Tasks and Flows
-    category_note: background-task-ledger.md
-    coverage: 73
-    quality: 68
-    completeness: 73
-  - name: Heartbeat
-    category_note: heartbeat-commitments.md
-    coverage: 82
-    quality: 72
-    completeness: 82
-  - name: Polling Controls
-    category_note: message-polls-process-polling.md
-    coverage: 74
-    quality: 70
-    completeness: 74
--- a/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/task-flow-orchestration.md
+++ b/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/task-flow-orchestration.md
@@ -1,120 +0,0 @@
---
-title: "Automation: cron, hooks, tasks, polling - Task Flow Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Automation: cron, hooks, tasks, polling - Task Flow Maturity Note
-
-## Summary
-
-Task Flow provides durable orchestration above individual background tasks. The source includes a registry, owner access, audit, maintenance, plugin runtime APIs, and Lobster integration. The component is promising but less mature than the plain task ledger: docs explain concepts, but archive evidence shows users still ask how flows are actually triggered and when to choose TaskFlow, background tasks, cron, or an external durable queue.
-
-## Category Scope
-
-This category covers managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, flow audit/maintenance, `openclaw tasks flow` CLI commands, plugin runtime `managedFlows`, Lobster workflow integration, cancellation, and relation to cron/background tasks.
-
-## Features
-
- Managed flows: Covers Managed flows across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
- Mirrored flows: Covers Mirrored flows across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
- openclaw tasks flow: Covers openclaw tasks flow across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
- Flow audit and maintenance: Covers Flow audit and maintenance across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
- Plugin managedFlows: Covers Plugin managedFlows across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Beta (73%)`
- Positive signals: The registry, SQLite store, owner access, audit, maintenance, plugin runtime API, and Lobster flow integration have focused tests.
- Negative signals: Coverage is thinner for real multi-step workflow execution across Gateway restart, external task mirroring, user CLI inspection, and approval/resume paths in one scenario.
- Integration gaps: Add a worked e2e flow: cron triggers a persistent session, plugin creates a managed flow with child tasks, one step waits for approval, Gateway restarts, and `openclaw tasks flow show` proves state and revision continuity.
-
-## Quality Score
-
- Score: `Alpha (68%)`
- Gitcrawl reports: PR #68687 routes durable agent work through TaskFlow; issue #78019 reports `inconsistent_timestamps`; PR #60183 improves TaskFlow audit freshness; PR #61242 improves managed child-task UX; issue #79038 reports webhook `run_task` route-session authority concerns.
- Discrawl reports: Maintainer reports say users ask how TaskFlow is triggered or built, and recommend adding a concrete "hello world" with trigger, state, resume, failure, and audit trail.
- Good qualities: The architecture separates orchestration from individual task execution, tracks revisions, has cancellation intent, and exposes owner-scoped access and maintenance.
- Bad qualities: The operational workflow is still under-documented, and archive reports show both UX gaps and consistency bugs in flow audit/timestamps/authority.
- Excluded from quality: Test inventory and runtime proof depth; they are coverage inputs only.
-
-## Completeness Score
-
- Score: `Beta (73%)`
- Surface instructions: evaluated against `references/completeness/automation-cron-hooks-tasks-polling.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Managed flows, Mirrored flows, openclaw tasks flow, Flow audit and maintenance, Plugin managedFlows.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Publish a concrete TaskFlow tutorial with trigger, state transitions, resume, failure, audit, and relation to background tasks.
- Strengthen restart/resume examples for managed flows and mirrored external tasks.
- Keep authority boundaries explicit for webhook-created tasks and route session trees.
-
-## Evidence
-
-### Docs
-
- `docs/automation/taskflow.md` documents Task Flow as an orchestration layer, managed and mirrored modes, durable state, revision tracking, cancellation, and CLI commands.
- `docs/automation/tasks.md` explains how tasks relate to Task Flow and links `openclaw tasks flow list|show|cancel`.
- `docs/plugins/sdk-runtime.md` references `api.runtime.tasks.managedFlows` and says Task Flow is not itself a scheduler.
-
-### Source
-
- `src/tasks/task-flow-registry.ts`, `src/tasks/task-flow-registry.store.ts`, `src/tasks/task-flow-registry.store.sqlite.ts`, `src/tasks/task-flow-registry.audit.ts`, `src/tasks/task-flow-registry.maintenance.ts`, and `src/tasks/task-flow-registry.types.ts` implement the flow registry.
- `src/tasks/task-flow-owner-access.ts` enforces owner-scoped flow access.
- `src/plugins/runtime/runtime-taskflow.ts` exposes plugin runtime APIs for managed flows.
- `extensions/lobster/src/lobster-taskflow.ts` binds Lobster workflow execution to Task Flow.
- `src/commands/tasks.ts` implements `openclaw tasks flow` CLI operations.
-
-### Integration tests
-
- `extensions/lobster/src/lobster-taskflow.test.ts` exercises a real plugin integration with the Task Flow API.
- `src/plugins/runtime/runtime-taskflow.test.ts` exercises plugin runtime Task Flow behavior.
- No full Gateway restart/resume e2e for Task Flow was found.
-
-### Unit tests
-
- `src/tasks/task-flow-registry.test.ts`, `src/tasks/task-flow-registry.store.test.ts`, `src/tasks/task-flow-registry.audit.test.ts`, `src/tasks/task-flow-registry.maintenance.test.ts`, and `src/tasks/task-flow-owner-access.test.ts` cover core Task Flow behavior.
- `src/tasks/task-registry.maintenance.ts` and `src/commands/tasks.ts` also include flow maintenance paths.
- `src/plugins/runtime/runtime-taskflow.test.ts` covers plugin API shape and behavior.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "task flow registry managed mirrored tasks flow" --json --limit 5`
-
-Results:
-
- No hits for the exact query.
-
-Fallback query:
-
-`gitcrawl search openclaw/openclaw --query "TaskFlow" --json --limit 5`
-
-Results:
-
- PR #68687 routes durable agent work through TaskFlow.
- Issue #78019 reports `TaskFlow inconsistent_timestamps`.
- PR #60183 improves TaskFlow audit freshness.
- PR #61242 improves managed child-task flow UX.
- Issue #79038 reports webhook `run_task` authority problems around route session trees.
-
-### Discrawl queries
-
-Query:
-
-`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "TaskFlow"`
-
-Results:
-
- Maintainer/user discussion says users ask how TaskFlow is actually triggered or built, and recommends a concrete TaskFlow hello-world with trigger, state, resume, failure, and audit trail.
- Same report frames TaskFlow as durable flow visibility above background tasks, with external Postgres/Redis still preferred for a hard durable work ledger in complex multi-agent deployments.
--- a/docs/maturity-scorecard/inventory/browser-automation-and-exec-sandbox-tools/browser-actions-snapshots-and-artifacts.md
+++ b/docs/maturity-scorecard/inventory/browser-automation-and-exec-sandbox-tools/browser-actions-snapshots-and-artifacts.md
@@ -1,191 +0,0 @@
---
-title: "Browser automation and exec/sandbox tools - Browser Automation Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Browser automation and exec/sandbox tools - Browser Automation Maturity Note
-
-## Summary
-
-Browser actions, snapshots, and artifacts is Stable on Coverage and exactly
-Stable at the 80% Quality boundary. The implementation covers rich Playwright
-actions, AI/role/ARIA snapshots, downloads, uploads, screenshots, PDFs, network
-artifacts, dialogs, and output-path safety. Quality is held at the lower Stable
-edge because file upload, stale refs, and existing-session artifact limits remain
-visible in current archive signals.
-
-## Category Scope
-
-Included in this category:
-
- Browser Actions: Covers Browser Actions across browser tool action schemas, navigate/act/snapshot/screenshot operations, AI/role/ARIA snapshot formats, action ref storage, and related browser actions, snapshots, and artifacts behavior.
- Snapshots: Covers Snapshots across browser tool action schemas, navigate/act/snapshot/screenshot operations, AI/role/ARIA snapshot formats, action ref storage, and related browser actions, snapshots, and artifacts behavior.
- Artifacts: Covers Artifacts across browser tool action schemas, navigate/act/snapshot/screenshot operations, AI/role/ARIA snapshot formats, action ref storage, and related browser actions, snapshots, and artifacts behavior.
- Browser Plugin Service: Covers Browser Plugin Service across bundled browser plugin activation, browser CLI registration, `browser.request` Gateway routing, control-service startup, and related browser plugin service and profiles behavior.
- Profiles: Covers Profiles across bundled browser plugin activation, browser CLI registration, `browser.request` Gateway routing, control-service startup, and related browser plugin service and profiles behavior.
- Browser Security: Covers Browser Security across browser-control auth, navigation URL validation, delayed navigation guards, strict private-network SSRF policy, and related browser security, ssrf, and remote control behavior.
- SSRF: Covers SSRF across browser-control auth, navigation URL validation, delayed navigation guards, strict private-network SSRF policy, and related browser security, ssrf, and remote control behavior.
- Remote Control: Covers Remote Control across browser-control auth, navigation URL validation, delayed navigation guards, strict private-network SSRF policy, and related browser security, ssrf, and remote control behavior.
-
-## Features
-
- Browser Actions: Covers Browser Actions across browser tool action schemas, navigate/act/snapshot/screenshot operations, AI/role/ARIA snapshot formats, action ref storage, and related browser actions, snapshots, and artifacts behavior.
- Snapshots: Covers Snapshots across browser tool action schemas, navigate/act/snapshot/screenshot operations, AI/role/ARIA snapshot formats, action ref storage, and related browser actions, snapshots, and artifacts behavior.
- Artifacts: Covers Artifacts across browser tool action schemas, navigate/act/snapshot/screenshot operations, AI/role/ARIA snapshot formats, action ref storage, and related browser actions, snapshots, and artifacts behavior.
- Browser Plugin Service: Covers Browser Plugin Service across bundled browser plugin activation, browser CLI registration, `browser.request` Gateway routing, control-service startup, and related browser plugin service and profiles behavior.
- Profiles: Covers Profiles across bundled browser plugin activation, browser CLI registration, `browser.request` Gateway routing, control-service startup, and related browser plugin service and profiles behavior.
- Browser Security: Covers Browser Security across browser-control auth, navigation URL validation, delayed navigation guards, strict private-network SSRF policy, and related browser security, ssrf, and remote control behavior.
- SSRF: Covers SSRF across browser-control auth, navigation URL validation, delayed navigation guards, strict private-network SSRF policy, and related browser security, ssrf, and remote control behavior.
- Remote Control: Covers Remote Control across browser-control auth, navigation URL validation, delayed navigation guards, strict private-network SSRF policy, and related browser security, ssrf, and remote control behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (84%)`
- Positive signals:
-  - Docs enumerate the browser control API and CLI actions, including snapshots,
-    screenshots, console, errors, requests, PDF, response body, downloads,
-    dialogs, uploads, and trace.
-  - Source has dedicated Playwright action, snapshot, download, route, and output
-    directory modules with navigation guards and output-root constraints.
-  - Tests cover snapshot storage, timeout forwarding, delayed navigation guards,
-    upload path revalidation, download finalization, response body, and CLI
-    action inputs.
-  - Docker browser CDP snapshot smoke proves live CDP/browser interaction, not
-    only schema-level behavior.
- Negative signals:
-  - Archive issues and PRs still mention upload hooks, stale-click failures,
-    snapshot scroll behavior, and CDP timeout/slow attach behavior.
-  - Existing-session profiles still lack some advanced artifact capabilities.
- Integration gaps:
-  - Add a live action matrix covering uploads, downloads, PDFs, response bodies,
-    dialogs, screenshots, AI snapshots, and role snapshots against the same
-    fixture.
-  - Add a browser upload regression lane that validates inbound media directory
-    upload and large/permission-sensitive file handling.
-
-## Quality Score
-
- Score: `Stable (80%)`
- Gitcrawl reports:
-  - `browser request upload` returned open PR #74352 for upload-hook timeout,
-    open PR #83660 for inbound media upload, issue #38844 for flaky file chooser
-    and stale-click misreporting, and issue #51395 for a non-standard upload
-    fallback.
-  - `browser cdp snapshot` returned issue #72653 for browser tool timeout despite
-    CDP working, issue #64929 for slow Brave mode, issue #53390 for snapshot
-    content before scroll, and docs contradiction #80587.
- Discrawl reports:
-  - `browser snapshot upload` returned user-facing automation guidance about
-    taking fresh snapshots, avoiding stale refs, and arming browser upload before
-    clicking file inputs.
- Good qualities:
-  - Action implementation separates interaction, snapshot, download, route, and
-    output concerns.
-  - Upload paths are revalidated at use time and resolved through constrained
-    upload/output directories.
-  - Download completion uses atomic finalization and sanitizes suggested names to
-    prevent traversal escapes.
-  - Navigation checks run after actions that can alter current page or open tabs.
- Bad qualities:
-  - Artifact support depends heavily on profile type; existing-session profiles
-    cannot do every raw CDP/managed-browser artifact path.
-  - Snapshot refs are inherently volatile, and stale-ref misuse remains a common
-    operator and agent-workflow failure mode.
-  - Upload flows remain sensitive to file chooser timing, non-standard inputs,
-    and inbound media path routing.
- Excluded from quality:
-  - Unit, integration, e2e, live, and runtime-flow test evidence affected
-    Coverage only.
-
-## Completeness Score
-
- Score: `Stable (84%)`
- Surface instructions: evaluated against `references/completeness/browser-automation-and-exec-sandbox-tools.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Browser Actions, Snapshots, Artifacts, Browser Plugin Service, Profiles, Browser Security, SSRF, Remote Control.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Browser upload needs stronger user-facing diagnostics when stale refs or
-  unsupported file inputs are the real failure.
- Artifact support matrix should be clearer for managed, remote CDP, attach-only,
-  and existing-session profiles.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/tools/browser-control.md:154`: CLI exposes screenshot, snapshot, console, errors, requests, PDF, and response body.
- `/Users/kevinlin/code/openclaw/docs/tools/browser-control.md:178`: CLI actions include navigate, click, type, drag, upload, wait, evaluate, and trace.
- `/Users/kevinlin/code/openclaw/docs/tools/browser-control.md:231`: upload/dialog arming, action refs, temp path constraints, stable tab ids, and snapshot flags are documented.
- `/Users/kevinlin/code/openclaw/docs/tools/browser-control.md:252`: AI, role, and ARIA refs plus Docker proof are documented.
- `/Users/kevinlin/code/openclaw/docs/help/testing.md:783`: docs identify the browser CDP snapshot Docker smoke as a browser doctor and snapshot verification lane.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser-tool.schema.ts:4`: browser act kinds are defined in schema.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser-tool.schema.ts:19`: browser tool actions and sandbox/host/node targets are defined.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser-tool.actions.ts:44`: action timeouts and existing-session behavior are configured.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/pw-tools-core.interactions.ts:84`: interaction code detects cross-document and hash-only navigation.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/pw-tools-core.interactions.ts:169`: delayed interaction navigation guard is applied.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/pw-tools-core.snapshot.ts:37`: snapshot code collects and appends snapshot URLs.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/pw-tools-core.snapshot.ts:109`: ARIA refs are stored via Playwright.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/pw-tools-core.downloads.ts:92`: downloads are saved inside output root.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/pw-tools-core.downloads.ts:130`: file upload arming validates existing paths under the upload dir.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/routes/agent.snapshot.ts:195`: screenshot response is normalized and saved.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/scripts/e2e/browser-cdp-snapshot-docker.sh:84`: Docker E2E runs browser doctor, opens a fixture, snapshots, and asserts output.
- `/Users/kevinlin/code/openclaw/scripts/e2e/lib/browser-cdp-snapshot/assert-snapshot.mjs:6`: snapshot assertion checks page text, URL, link refs, and iframe evidence.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/pw-tools-core.snapshot.test.ts:54`: verifies resolved pages are reused when storing ARIA refs.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/pw-tools-core.snapshot.test.ts:95`: verifies snapshot timeout behavior.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/pw-tools-core.interactions.set-input-files.test.ts:71`: verifies upload paths are revalidated and canonicalized.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/pw-tools-core.waits-next-download-saves-it.test.ts:170`: verifies explicit download paths are finalized.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/pw-tools-core.waits-next-download-saves-it.test.ts:402`: verifies suggested download filename sanitization.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/cli/browser-cli-actions-input/register.element.test.ts:1`: CLI action input coverage exists for browser element actions.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "browser request upload" --json`
-
-Results:
-
- Open PR #74352: `fix(browser): give upload hooks enough client timeout`.
- Open PR #83660: `fix(browser): allow upload from inbound media directory`.
- Open issue #38844: browser upload/file chooser flow can be flaky and misreport stale-click failures.
- Open issue #51395: browser upload fallback for non-standard file inputs.
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "browser cdp snapshot" --json`
-
-Results:
-
- Open issue #72653: browser tool times out despite CDP connection working.
- Open issue #64929: local managed Brave mode is slow due to CDP attach/discovery overhead.
- Open issue #53390: snapshot returns page content before scroll.
- Open issue #80587: docs contradiction on `browser wait --load networkidle`.
-
-### Discrawl queries
-
-Query:
-
-`discrawl search --mode fts --limit 5 "browser snapshot upload"`
-
-Results:
-
- User-support archive entries from 2026-03-20 describe recurring snapshot and upload rules: refresh snapshots before actions, avoid old refs, arm upload before clicking file inputs, and use compact interactive snapshots.
--- a/docs/maturity-scorecard/inventory/browser-automation-and-exec-sandbox-tools/browser-plugin-service-and-profiles.md
+++ b/docs/maturity-scorecard/inventory/browser-automation-and-exec-sandbox-tools/browser-plugin-service-and-profiles.md
@@ -1,184 +0,0 @@
---
-title: "Browser automation and exec/sandbox tools - Browser Plugin Service and Profiles Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Browser automation and exec/sandbox tools - Browser Plugin Service and Profiles Maturity Note
-
-## Summary
-
-Browser plugin service and profiles is a Stable component. The bundled plugin has
-explicit manifest contracts, CLI and Gateway request registration, a lazy control
-service, profile CRUD, default/openclaw/user/remote profile resolution, and hot
-reload coverage. The remaining risk is operational profile brittleness around
-existing Chrome sessions, WSL/macOS profile behavior, and remote CDP reachability.
-
-## Category Scope
-
-This note covers bundled browser plugin activation, browser CLI registration,
-`browser.request` Gateway routing, control-service startup, known profile
-enumeration, default profile resolution, profile create/delete, local managed
-profiles, `user`/existing-session profiles, attach-only and remote CDP profiles,
-and profile hot reload.
-
-## Features
-
- Browser Plugin Service: Covers Browser Plugin Service across bundled browser plugin activation, browser CLI registration, `browser.request` Gateway routing, control-service startup, and related browser plugin service and profiles behavior.
- Profiles: Covers Profiles across bundled browser plugin activation, browser CLI registration, `browser.request` Gateway routing, control-service startup, and related browser plugin service and profiles behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (86%)`
- Positive signals:
-  - Browser setup and profile docs cover plugin enablement, tool allowlists,
-    default profile selection, openclaw/user profile behavior, remote CDP, and
-    lifecycle commands.
-  - Source registration is manifest-first and connects tool, CLI, Gateway,
-    node-host proxy, plugin service, and security audit surfaces.
-  - Runtime tests cover lazy plugin startup, profile request routing, profile
-    CRUD, lifecycle cleanup, and hot reload after config changes.
-  - Docker browser CDP snapshot smoke reaches a live Gateway/browser fixture and
-    verifies browser doctor, tab opening, and snapshot assertions.
- Negative signals:
-  - Existing-session and user-profile flows have current archive bugs around
-    macOS, WSL, and timeout behavior.
-  - Remote browser Chrome DevTools MCP requests mostly resolve outside first
-    party core, leaving a sharper boundary between core CDP profiles and plugin
-    ecosystem work.
- Integration gaps:
-  - Add a profile lifecycle E2E matrix across managed, user/existing-session,
-    remote CDP, attach-only, macOS, WSL, and headless Linux profiles.
-  - Add a release-gate browser.request profile-routing scenario that exercises
-    profile selection from both query string and body.
-
-## Quality Score
-
- Score: `Stable (82%)`
- Gitcrawl reports:
-  - `browser plugin profiles browser.request openclaw browser command missing`
-    returned open PR #81076 for top-level act field backfill, open PR #85993
-    expanding Chrome MCP web capabilities, and open PR #74411 for download
-    actions.
-  - `browser profile` returned open PR #80143 for honoring `cdpUrl` on the user
-    default profile, issue #80036 for Chrome MCP existing-session timeout on
-    macOS, issue #62288 for brittle existing-session attach, and issue #43803
-    for browser profile hot-reload routing.
- Discrawl reports:
-  - `browser profiles openclaw browser` returned release/archive messages
-    around browser existing-session status probes, remote browser MCP being
-    plugin-path work, proxy configuration, and managed browser timeout fixes.
- Good qualities:
-  - The plugin is self-contained and defaults to enabled through a clear manifest
-    contract.
-  - Profile resolution is centralized, supports known runtime profiles, and
-    refreshes config when selecting/listing profiles.
-  - Profile CRUD validates remote/private-network CDP settings and avoids
-    deleting remote or existing-session browser data.
-  - Lazy startup avoids starting the control server during Gateway boot while
-    still supporting on-demand runtime cleanup.
- Bad qualities:
-  - The product surface combines several profile models with different behavior:
-    managed, existing-session, attach-only, remote CDP, and node-host proxy.
-  - Users can observe a profile as "ready" while later page tools time out if
-    the external browser/Chrome MCP/CDP layer is unhealthy.
-  - Hot reload and profile reconciliation are strong but subtle enough that
-    stale runtime state remains a current operational theme.
- Excluded from quality:
-  - Unit, integration, e2e, live, and runtime-flow test evidence affected
-    Coverage only.
-
-## Completeness Score
-
- Score: `Stable (86%)`
- Surface instructions: evaluated against `references/completeness/browser-automation-and-exec-sandbox-tools.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Browser Plugin Service, Profiles.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Existing-session profile behavior needs more cross-platform proof and clearer
-  failure taxonomy.
- Remote CDP and Chrome MCP profile lanes need stronger operator diagnostics
-  before this component should be considered Lovable.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/tools/browser.md:10`: browser tool uses a dedicated isolated profile by default and can control an existing Chrome profile via Chrome MCP.
- `/Users/kevinlin/code/openclaw/docs/tools/browser.md:21`: documented features include separate profile, deterministic tab control, actions, snapshots, screenshots, PDF, and multi-profile support.
- `/Users/kevinlin/code/openclaw/docs/tools/browser.md:67`: disabling the plugin removes CLI commands, Gateway method, agent tool, and control service.
- `/Users/kevinlin/code/openclaw/docs/tools/browser.md:119`: docs distinguish openclaw managed profile from user existing-session profile.
- `/Users/kevinlin/code/openclaw/docs/tools/browser.md:138`: docs describe browser config fields and profile config.
- `/Users/kevinlin/code/openclaw/docs/tools/browser-control.md:14`: browser control API exposes status, tabs, open, focus, close, screenshot, snapshot, console, errors, requests, PDF, response body, and act.
- `/Users/kevinlin/code/openclaw/docs/tools/browser-control.md:32`: profile query param selects profile and loopback auth follows gateway auth.
- `/Users/kevinlin/code/openclaw/docs/tools/browser-control.md:125`: control service is an internal loopback server backed by CDP/Playwright.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/extensions/browser/openclaw.plugin.json:1`: browser plugin manifest declares id, default enablement, startup/config hooks, tool contract, CLI aliases, and skills.
- `/Users/kevinlin/code/openclaw/extensions/browser/register.runtime.ts:1`: runtime exports browser tool, Gateway request handler, node-host proxy, plugin service, and security audit.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/server-context.ts:40`: known profile names are merged from config and runtime state.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/server-context.ts:51`: profile context wires profile lifecycle, tab operations, and availability.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/server-context.ts:143`: profile selection refreshes config and resolves default/current profile.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/server-context.ts:161`: profile listing includes Chrome MCP and CDP reachability.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/profiles-service.ts:48`: profile service validates and manages profile list/create/delete operations.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser-tool.ts:213`: browser node target resolution uses capabilities, commands, and connected browser-capable nodes.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/scripts/e2e/browser-cdp-snapshot-docker.sh:84`: Docker E2E runs browser doctor, opens the fixture, snapshots, and asserts the result.
- `/Users/kevinlin/code/openclaw/scripts/e2e/lib/browser-cdp-snapshot/assert-snapshot.mjs:6`: snapshot assertion checks page text, docs link, URL, and iframe refs.
- `/Users/kevinlin/code/openclaw/extensions/qa-lab/src/browser-runtime.ts:111`: QA browser runtime helper exercises `browser.request`, open, snapshot, and act flows.
- `/Users/kevinlin/code/openclaw/extensions/qa-lab/src/browser-runtime.ts:185`: QA helper waits for enabled/running/CDP-ready status.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/extensions/browser/src/plugin-service.test.ts:54`: verifies browser control service does not start during gateway startup by default.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/plugin-service.test.ts:101`: verifies on-demand browser runtime stops even when startup was lazy.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/server-context.hot-reload-profiles.test.ts:86`: verifies new profiles are hot-reloaded from config.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/server-context.hot-reload-profiles.test.ts:177`: verifies profile listing refreshes config before enumerating profiles.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/profiles-service.test.ts:204`: verifies remote Chrome profiles accept `cdpUrl`.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/gateway/browser-request.profile-from-body.test.ts:95`: verifies `browser.request` can use profile from the request body.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "browser plugin profiles browser.request openclaw browser command missing" --json`
-
-Results:
-
- Open PR #81076: `fix(browser): backfill top-level act fields into nested request`.
- Open PR #85993: `feat(browser): expand Chrome MCP web capabilities`.
- Open PR #74411: `feat(browser): add agent download actions`.
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "browser profile" --json`
-
-Results:
-
- Open PR #80143: `fix(browser): honor cdpUrl for user default profile`.
- Open issue #80036: Chrome MCP existing-session `profile=user` reports ready but page tools time out on macOS.
- Open issue #62288: existing-session attach is brittle and needs improved fallback/diagnostics.
- Open issue #43803: browser profile hot-reload path still has reload-mode risk.
-
-### Discrawl queries
-
-Query:
-
-`discrawl search --mode fts --limit 5 "browser profiles openclaw browser"`
-
-Results:
-
- Maintainers/release archive entry on 2026-05-10 includes browser existing-session status probe extension.
- OpenClaw archive comments on 2026-04-26 discuss browser proxy configuration, remote browser MCP as plugin work, and managed browser timeout fixes.
--- a/docs/maturity-scorecard/inventory/browser-automation-and-exec-sandbox-tools/browser-security-ssrf-and-remote-control.md
+++ b/docs/maturity-scorecard/inventory/browser-automation-and-exec-sandbox-tools/browser-security-ssrf-and-remote-control.md
@@ -1,180 +0,0 @@
---
-title: "Browser automation and exec/sandbox tools - Browser Security, Ssrf, and Remote Control Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Browser automation and exec/sandbox tools - Browser Security, Ssrf, and Remote Control Maturity Note
-
-## Summary
-
-Browser security, SSRF, and remote control is Beta. The controls are real:
-browser-control auth, strict navigation guards, SSRF policy, remote CDP audit
-findings, loopback/CDP reachability policy, and post-action navigation checks.
-The score remains Beta because localhost/file/private-network behavior is still
-surprising to users, remote CDP is inherently sensitive, and policy exceptions
-must be handled with precision.
-
-## Category Scope
-
-This note covers browser-control auth, navigation URL validation, delayed
-navigation guards, strict private-network SSRF policy, unsupported protocols,
-remote CDP reachability and audit warnings, CDP loopback bypass for OpenClaw's
-own control plane, and browser security docs.
-
-## Features
-
- Browser Security: Covers Browser Security across browser-control auth, navigation URL validation, delayed navigation guards, strict private-network SSRF policy, and related browser security, ssrf, and remote control behavior.
- SSRF: Covers SSRF across browser-control auth, navigation URL validation, delayed navigation guards, strict private-network SSRF policy, and related browser security, ssrf, and remote control behavior.
- Remote Control: Covers Remote Control across browser-control auth, navigation URL validation, delayed navigation guards, strict private-network SSRF policy, and related browser security, ssrf, and remote control behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Beta (78%)`
- Positive signals:
-  - Docs describe browser SSRF policy, loopback/private network handling, remote
-    CDP risk, and browser-control exposure.
-  - Source enforces navigation checks before and after actions and snapshots.
-  - Source audits missing browser-control auth, HTTP remote CDP, and private
-    remote CDP hosts.
-  - Tests cover navigation guard behavior, existing-session post-action guards,
-    loopback auth, remote profile validation, and security audit findings.
- Negative signals:
-  - Security behavior is not uniform across managed browser, existing-session,
-    remote CDP, and sandbox browser paths.
-  - Live archive evidence shows users still hit `browser navigation blocked by
-policy` for localhost and file URLs and need clarification.
- Integration gaps:
-  - Add a live security matrix for managed, existing-session, remote CDP, and
-    sandbox browser that proves localhost, file, private network, and explicit
-    allowlist behavior.
-  - Add docs examples that pair common local-dashboard workflows with the exact
-    SSRF allowlist or safer profile target.
-
-## Quality Score
-
- Score: `Beta (74%)`
- Gitcrawl reports:
-  - `browser SSRF remote CDP navigation blocked` returned open issue #67966 for
-    Playwright navigation interception in local-managed browser mode.
-  - Broader `browser sandbox` search returned issue #84942 about sandbox/browser
-    target mismatch, issue #52662 for non-Docker browser sandbox backends, issue
-    #64383 about simplifying sandbox browser CDP path, and issue #43803 on
-    browser profile hot reload.
- Discrawl reports:
-  - `browser navigation blocked policy` returned a 2026-05-11 report where
-    public HTTPS passed but `127.0.0.1`, `localhost`, and `file://` failed with
-    browser policy/unsupported protocol messages.
-  - The same query returned archive discussion of loopback CDP SSRF fixes and
-    URL redaction/security review requirements.
- Good qualities:
-  - Browser control auth is generated/persisted through gateway auth and can
-    fail closed.
-  - Navigation checks block unsupported protocols, strict private network access,
-    and blocked redirect chains.
-  - Remote CDP endpoints are audited for plain HTTP and private/internal hosts.
-  - Existing-session interactions re-check current and newly opened tab URLs
-    after delayed navigation.
- Bad qualities:
-  - The policy is precise but hard to explain: loopback CDP control may be
-    allowed while browser navigation to loopback remains blocked.
-  - `file://` and localhost/dashboard workflows are common local-dev cases but
-    can be rejected by default.
-  - Remote CDP is a trusted-control endpoint, and docs must keep reminding users
-    not to expose it casually.
- Excluded from quality:
-  - Unit, integration, e2e, live, and runtime-flow test evidence affected
-    Coverage only.
-
-## Completeness Score
-
- Score: `Beta (78%)`
- Surface instructions: evaluated against `references/completeness/browser-automation-and-exec-sandbox-tools.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Browser Security, SSRF, Remote Control.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- The security model needs clearer "why blocked" output for localhost, file,
-  remote CDP, and private-network browser destinations.
- Remote CDP security posture should stay under active audit because it is a
-  browser-control plane, not normal web browsing.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/tools/browser.md:216`: browser docs point to SSRF policy handling.
- `/Users/kevinlin/code/openclaw/docs/tools/browser-control.md:360`: security docs warn about browser evaluate, private Gateway/node access, remote CDP protection, and strict SSRF examples.
- `/Users/kevinlin/code/openclaw/docs/gateway/security/index.md:240`: security docs call out exec approval drift and browser control exposure as review areas.
- `/Users/kevinlin/code/openclaw/docs/gateway/security/index.md:1174`: private/internal/special-use browser destinations remain blocked unless explicitly allowed.
- `/Users/kevinlin/code/openclaw/docs/gateway/security/audit-checks.md:80`: audit table includes remote CDP over HTTP and private-host findings.
- `/Users/kevinlin/code/openclaw/docs/gateway/security/audit-checks.md:89`: audit table includes sandbox browser container non-loopback publish critical finding.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/navigation-guard.ts:10`: only http/https and about:blank URLs are valid browser navigation targets.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/navigation-guard.ts:90`: navigation URL checks block unsupported protocol, proxy-routed strict SSRF, and disallowed hostnames.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/navigation-guard.ts:151`: post-navigation redirect chains are checked.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/control-auth.ts:17`: browser control auth resolves from Gateway auth.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/control-auth.ts:117`: browser control auth can be generated and persisted.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/cdp-reachability-policy.ts:19`: CDP reachability bypasses local loopback only for OpenClaw's own control plane.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/security-audit.ts:68`: browser security audit emits a critical finding when control HTTP routes have no auth.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/security-audit.ts:93`: audit warns when remote CDP uses HTTP.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/security-audit.ts:102`: audit warns when remote CDP targets private/internal hosts under private-network opt-in.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/scripts/e2e/browser-cdp-snapshot-docker.sh:84`: Docker browser E2E verifies live CDP-based browser operation.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/chrome.loopback-ssrf.integration.test.ts:1`: integration coverage exists for loopback SSRF behavior.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/pw-tools-core.interactions.navigation-guard.test.ts:73`: verifies post-click navigation guard runs when navigation starts after click.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/pw-tools-core.interactions.navigation-guard.test.ts:267`: verifies subframe-only private navigation is blocked.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/routes/agent.act.existing-session-navigation-guard.test.ts:131`: verifies existing-session interaction checks navigation after click and key submit.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/routes/agent.act.existing-session-navigation-guard.test.ts:219`: verifies newly opened blocked-tab URLs fail closed.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/profiles-service.test.ts:225`: verifies strict SSRF mode rejects private-network remote CDP.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/browser/server.auth-token-gates-http.test.ts:1`: verifies browser HTTP auth token gates.
- `/Users/kevinlin/code/openclaw/extensions/browser/src/security-audit.test.ts:1`: verifies browser security audit findings.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "browser SSRF remote CDP navigation blocked" --json`
-
-Results:
-
- Open issue #67966: Playwright navigation interception for local-managed browser mode.
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "browser sandbox" --json`
-
-Results:
-
- Open issue #84942: sandbox policy reports sandboxed while target=sandbox browser is unavailable.
- Open issue #52662: browser sandbox should support non-Docker backends.
- Open issue #64383: evaluate simplifying sandbox browser CDP path.
-
-### Discrawl queries
-
-Query:
-
-`discrawl search --mode fts --limit 5 "browser navigation blocked policy"`
-
-Results:
-
- 2026-05-11 clawtributors report: public HTTPS passed, while `127.0.0.1`,
-  `localhost`, and `file://` dashboard URLs failed with browser policy or
-  unsupported-protocol messages.
- 2026-04-25 OpenClaw archive comments describe loopback CDP SSRF fixes and URL
-  redaction/security review requirements for tab URL exposure.
--- a/docs/maturity-scorecard/inventory/browser-automation-and-exec-sandbox-tools/direct-tool-invoke-api-and-node-system-run.md
+++ b/docs/maturity-scorecard/inventory/browser-automation-and-exec-sandbox-tools/direct-tool-invoke-api-and-node-system-run.md
@@ -1,185 +0,0 @@
---
-title: "Browser automation and exec/sandbox tools - Direct Tool Invoke API and Node System.run Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Browser automation and exec/sandbox tools - Direct Tool Invoke API and Node System.run Maturity Note
-
-## Summary
-
-Direct tool invoke API and node `system.run` is Stable on Coverage and Beta on
-Quality. The HTTP and RPC direct-invoke path is documented, auth-scoped, policy
-filtered, hook-aware, and covered by tests. Node `system.run` has explicit
-pairing/admin requirements, node-local approval policy, approval plan binding,
-and drift rejection. Quality remains Beta because the endpoint is intentionally
-full operator access, the hard-deny list is security-critical, and node
-`system.run` is remote command execution on a paired machine.
-
-## Category Scope
-
-This note covers HTTP `POST /tools/invoke`, Gateway RPC `tools.invoke`, request
-body and auth semantics, shared-secret operator scope restoration, policy
-filtering, before-tool-call hooks, HTTP deny list, response shapes, node pairing
-scopes, node command relay, `system.run`, `system.run.prepare`, `system.which`,
-approval plan binding, and node-host exec policy.
-
-## Features
-
- Direct Tool Invoke API: Covers Direct Tool Invoke API across HTTP `POST /tools/invoke`, Gateway RPC `tools.invoke`, request body and auth semantics, shared-secret operator scope restoration, and related direct tool invoke api and node system.run behavior.
- Node System.run: Covers Node System.run across HTTP `POST /tools/invoke`, Gateway RPC `tools.invoke`, request body and auth semantics, shared-secret operator scope restoration, and related direct tool invoke api and node system.run behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (82%)`
- Positive signals:
-  - Docs cover `/tools/invoke` auth, request shape, policy/routing behavior,
-    security boundary, hard deny list, response shape, and customization.
-  - Protocol docs cover `tools.invoke`, node scopes, node pairing approvals, and
-    exec approval binding for `system.run`.
-  - Source shares direct HTTP/RPC invocation through `invokeGatewayTool`, applies
-    Gateway-scoped tool resolution, runs before-tool-call hooks, and maps errors.
-  - Tests cover HTTP auth, policy denial, hard deny entries, plugin tool fallback,
-    RPC envelope, approval-needed payloads, node approval binding, and node-host
-    system.run policy.
- Negative signals:
-  - `/tools/invoke` is intentionally not a narrow per-user delegated auth model.
-  - Node `system.run` approval and allowlist drift have current issue/PR history.
- Integration gaps:
-  - Add a direct-invoke smoke that proves each default hard-deny tool stays
-    denied while a low-risk plugin/core tool remains callable.
-  - Add a node-host integration lane that proves approval plan binding rejects
-    command, cwd, agent, and session drift after approval.
-
-## Quality Score
-
- Score: `Beta (79%)`
- Gitcrawl reports:
-  - `tools invoke system.run approval node invoke` returned issue #77096 on
-    symlink cwd trust, PR #80532 for `allowSymlinkPath`, PR #81827 for
-    `tools.exec.denyPathPatterns`, PR #78226 for node allowlist writeback
-    restoring revoked exec approvals, PR #85543 for node shell fallback, PR
-    #70543 for normalized auto mode, and PR #81488 for node exec approval env
-    hardening.
- Discrawl reports:
-  - `tools invoke system run` returned 2026-04-27 guidance recommending
-    `/tools/invoke` for n8n fanout to multiple Feishu threads, plus archive
-    comments saying direct `nodes invoke system.run` is superseded by
-    `exec host=node` and that `system.run`/`exec` output/approval behavior is
-    security-sensitive.
- Good qualities:
-  - HTTP direct invoke uses Gateway auth and rate-limit path, and shared-secret
-    auth restores full operator defaults intentionally.
-  - HTTP hard deny defaults block exec, shell, file mutation, session spawning,
-    session send, cron, gateway, and node relay.
-  - RPC `tools.invoke` returns a typed envelope rather than throwing through
-    policy/approval refusals.
-  - Node pairing requires admin for system.run/system.which requests.
-  - Approved node `system.run` forwards only allowed fields and revalidates
-    approval plan details.
- Bad qualities:
-  - A valid Gateway bearer credential is owner/operator access for this endpoint.
-  - The hard-deny list is a critical control; custom `gateway.tools.allow` can
-    intentionally remove entries.
-  - Node execution depends on paired-device trust plus node-local approval
-    policy, making drift and writeback bugs high impact.
- Excluded from quality:
-  - Unit, integration, e2e, live, and runtime-flow test evidence affected
-    Coverage only.
-
-## Completeness Score
-
- Score: `Stable (82%)`
- Surface instructions: evaluated against `references/completeness/browser-automation-and-exec-sandbox-tools.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Direct Tool Invoke API, Node System.run.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Direct tool invocation should remain documented as operator-only unless
-  OpenClaw adds a narrower delegated auth model.
- Node `system.run` approval binding and node-local allowlist state should stay
-  under active security audit.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/gateway/tools-invoke-http-api.md:9`: docs state `/tools/invoke` is always enabled and uses Gateway auth plus tool policy.
- `/Users/kevinlin/code/openclaw/docs/gateway/tools-invoke-http-api.md:43`: docs identify the endpoint as full operator-access surface.
- `/Users/kevinlin/code/openclaw/docs/gateway/tools-invoke-http-api.md:89`: docs describe policy and routing behavior.
- `/Users/kevinlin/code/openclaw/docs/gateway/tools-invoke-http-api.md:101`: docs state exec approvals are not a separate authorization boundary for direct HTTP invoke.
- `/Users/kevinlin/code/openclaw/docs/gateway/tools-invoke-http-api.md:107`: docs list default hard-deny tools, including exec, shell, file mutation, sessions, cron, gateway, and nodes.
- `/Users/kevinlin/code/openclaw/docs/gateway/operator-scopes.md:99`: node pairing approval derives extra required scopes from command list.
- `/Users/kevinlin/code/openclaw/docs/gateway/operator-scopes.md:104`: `system.run`, `system.run.prepare`, and `system.which` require pairing plus admin.
- `/Users/kevinlin/code/openclaw/docs/gateway/protocol.md:573`: `tools.invoke` invokes one available tool through the same policy path as `/tools/invoke`.
- `/Users/kevinlin/code/openclaw/docs/gateway/protocol.md:627`: exec approvals for node use canonical `systemRunPlan` and reject mutation after approval.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/src/gateway/tools-invoke-http.ts:17`: HTTP handler routes `/tools/invoke`.
- `/Users/kevinlin/code/openclaw/src/gateway/tools-invoke-http.ts:45`: comments document shared-secret full operator trust model.
- `/Users/kevinlin/code/openclaw/src/gateway/tools-invoke-http.ts:63`: body parsing enforces max body size.
- `/Users/kevinlin/code/openclaw/src/gateway/tools-invoke-http.ts:77`: HTTP direct invoke calls shared `invokeGatewayTool`.
- `/Users/kevinlin/code/openclaw/src/gateway/server-methods/tools-invoke.ts:32`: RPC `tools.invoke` handler validates params and calls the shared invocation path.
- `/Users/kevinlin/code/openclaw/src/gateway/tools-invoke-shared.ts:146`: shared invocation resolves tool name, args, session, policy-scoped tools, hooks, and error mapping.
- `/Users/kevinlin/code/openclaw/src/gateway/tool-resolution.ts:105`: HTTP direct invoke applies default Gateway HTTP hard deny list.
- `/Users/kevinlin/code/openclaw/src/security/dangerous-tools.ts:9`: default HTTP deny list includes exec, shell, file mutation, session orchestration, cron, gateway, and nodes.
- `/Users/kevinlin/code/openclaw/src/gateway/node-invoke-system-run-approval.ts:190`: node `system.run` forwarding uses an allowlist of supported fields.
- `/Users/kevinlin/code/openclaw/src/gateway/node-invoke-system-run-approval.ts:214`: approval override fields are accepted only with a real approval record.
- `/Users/kevinlin/code/openclaw/src/node-host/invoke-system-run.ts:212`: node-host sends denied exec events and results on policy failure.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/src/gateway/tools-invoke-http.test.ts:440`: HTTP direct invoke test suite exercises real HTTP behavior.
- `/Users/kevinlin/code/openclaw/src/gateway/operator-approvals-client.e2e.test.ts:1`: operator approval e2e coverage exists.
- `/Users/kevinlin/code/openclaw/src/gateway/server.node-invoke-approval-bypass.test.ts:1`: node invoke approval bypass regression coverage exists.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/src/gateway/tools-invoke-http.test.ts:610`: verifies denied/profile-blocked tools return 404.
- `/Users/kevinlin/code/openclaw/src/gateway/tools-invoke-http.test.ts:638`: verifies HTTP denies `sessions_spawn` even when agent policy allows it.
- `/Users/kevinlin/code/openclaw/src/gateway/tools-invoke-http.test.ts:853`: verifies shared-secret bearer auth is full operator access on `/tools/invoke`.
- `/Users/kevinlin/code/openclaw/src/gateway/tools-invoke-http.test.ts:924`: verifies HTTP deny list extends to high-risk execution and file tools.
- `/Users/kevinlin/code/openclaw/src/gateway/tools-invoke-http.test.ts:966`: verifies RPC `tools.invoke` envelope.
- `/Users/kevinlin/code/openclaw/src/gateway/tools-invoke-http.test.ts:996`: verifies typed approval-needed refusal when the policy hook blocks.
- `/Users/kevinlin/code/openclaw/src/gateway/system-run-approval-binding.test.ts:1`: system.run approval binding tests exist.
- `/Users/kevinlin/code/openclaw/src/gateway/node-invoke-system-run-approval.test.ts:1`: node invoke system.run approval tests exist.
- `/Users/kevinlin/code/openclaw/src/node-host/invoke-system-run.test.ts:1`: node-host system.run tests exist.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "tools invoke system.run approval node invoke" --json`
-
-Results:
-
- Open issue #77096: opt-in symlink cwd for approval-bound `system.run`.
- Open PR #80532: add `allowSymlinkPath` config.
- Open PR #81827: add `tools.exec.denyPathPatterns`.
- Open PR #78226: node allowlist writeback can restore revoked exec approvals.
- Open PR #85543: retry node shell fallback on ENOENT.
- Open PR #70543: add normalized auto mode.
- Open PR #81488: harden node exec approval precheck env.
-
-### Discrawl queries
-
-Query:
-
-`discrawl search --mode fts --limit 5 "tools invoke system run"`
-
-Results:
-
- 2026-04-27 support archive recommends `/tools/invoke` for n8n fanout after
-  analysis-only webhook runs.
- 2026-04-25 OpenClaw archive comments state direct `nodes invoke system.run`
-  paths were superseded by `exec host=node` and that node shell execution routes
-  through approval-aware exec.
--- a/docs/maturity-scorecard/inventory/browser-automation-and-exec-sandbox-tools/exec-routing-and-process-lifecycle.md
+++ b/docs/maturity-scorecard/inventory/browser-automation-and-exec-sandbox-tools/exec-routing-and-process-lifecycle.md
@@ -1,179 +0,0 @@
---
-title: "Browser automation and exec/sandbox tools - Tool Invocation and Execution Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Browser automation and exec/sandbox tools - Tool Invocation and Execution Maturity Note
-
-## Summary
-
-Exec routing and process lifecycle is Stable. The surface has broad docs, source
-centralization, detailed timeout/output behavior, PTY and stdin handling,
-background process tracking, process follow-up actions, and host routing across
-auto, sandbox, gateway, and node. Remaining risk comes from long-running command
-survivability, background process state after restart/compaction, and the
-inherent complexity of routing shell execution across multiple hosts.
-
-## Category Scope
-
-Included in this category:
-
- Exec Routing: Covers Exec Routing across `exec` foreground and background execution, `yieldMs`, timeouts, PTY, and related exec routing and process lifecycle behavior.
- Process Lifecycle: Covers Process Lifecycle across `exec` foreground and background execution, `yieldMs`, timeouts, PTY, and related exec routing and process lifecycle behavior.
- Direct Tool Invoke API: Covers Direct Tool Invoke API across HTTP `POST /tools/invoke`, Gateway RPC `tools.invoke`, request body and auth semantics, shared-secret operator scope restoration, and related direct tool invoke api and node system.run behavior.
- Node System.run: Covers Node System.run across HTTP `POST /tools/invoke`, Gateway RPC `tools.invoke`, request body and auth semantics, shared-secret operator scope restoration, and related direct tool invoke api and node system.run behavior.
- Host Exec Approvals: Covers Host Exec Approvals across exec approval policy, local approvals state, approval request registration and waiting, allow-once consumption, and related host exec approvals and elevated mode behavior.
- Elevated Mode: Covers Elevated Mode across exec approval policy, local approvals state, approval request registration and waiting, allow-once consumption, and related host exec approvals and elevated mode behavior.
-
-## Features
-
- Exec Routing: Covers Exec Routing across `exec` foreground and background execution, `yieldMs`, timeouts, PTY, and related exec routing and process lifecycle behavior.
- Process Lifecycle: Covers Process Lifecycle across `exec` foreground and background execution, `yieldMs`, timeouts, PTY, and related exec routing and process lifecycle behavior.
- Direct Tool Invoke API: Covers Direct Tool Invoke API across HTTP `POST /tools/invoke`, Gateway RPC `tools.invoke`, request body and auth semantics, shared-secret operator scope restoration, and related direct tool invoke api and node system.run behavior.
- Node System.run: Covers Node System.run across HTTP `POST /tools/invoke`, Gateway RPC `tools.invoke`, request body and auth semantics, shared-secret operator scope restoration, and related direct tool invoke api and node system.run behavior.
- Host Exec Approvals: Covers Host Exec Approvals across exec approval policy, local approvals state, approval request registration and waiting, allow-once consumption, and related host exec approvals and elevated mode behavior.
- Elevated Mode: Covers Elevated Mode across exec approval policy, local approvals state, approval request registration and waiting, allow-once consumption, and related host exec approvals and elevated mode behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (88%)`
- Positive signals:
-  - Exec docs cover host routing, sandbox fallback, node routing, PATH behavior,
-    approvals, session overrides, and background process follow-up.
-  - Source centralizes target resolution, env/path sanitization, process
-    lifecycle, output aggregation, background registration, and failure
-    classification.
-  - Tests cover target resolution, PTY, background aborts, timeout guidance,
-    node/gateway/sandbox host routing, path behavior, script preflight, and
-    process event routing.
-  - Runtime evidence includes Gateway background process documentation and
-    source diagnostics emitted on exec completion.
- Negative signals:
-  - Archive issues remain around finite backgrounding, orphaned process trees,
-    payload leakage, and repeated relaunch instead of `process` polling.
-  - The same command can behave differently depending on target host,
-    sandbox availability, PTY mode, and approvals state.
- Integration gaps:
-  - Add a restart/compaction process-survivability matrix for background exec.
-  - Add a cross-host exec lane that runs the same command on sandbox, gateway,
-    and node with explicit routing and verifies process follow-up state.
-
-## Quality Score
-
- Score: `Stable (84%)`
- Gitcrawl reports:
-  - `exec process background` returned issue #82178 on finite backgrounding,
-    issue #65983 on orphan process trees after restart/session loss, PR #59719
-    tracking background exec liveness with CLI tasks, issue #70797 on payload
-    leakage, and issue #62432 on relaunching exec instead of process polling.
-  - `exec process background pty timeout host auto` returned issue #75811 about
-    model-controllable `security`/`elevated`/`ask` schema fields.
- Discrawl reports:
-  - `exec process background` returned 2026-05-17 guidance that `exec` starts
-    work and `process` tracks/polls it; cron timeout is the outer guardrail, not
-    process supervision.
- Good qualities:
-  - `exec` target resolution is explicit and fails closed when a requested host
-    override is not allowed.
-  - Host env and PATH handling are centralized, with dangerous inherited env
-    variables blocked for host execution.
-  - Process follow-up exposes list/poll/log/write/send-keys/submit/paste/kill/
-    clear/remove and reports waiting-for-input state.
-  - Failure messages direct long-running work toward registered background exec
-    instead of shell-backgrounding with `&`.
- Bad qualities:
-  - Background process tracking still has real-world reliability and UX edge
-    cases after restart, compaction, and provider retries.
-  - `exec` remains a shell surface; even with strong process tooling, user intent
-    and command side effects are difficult to model.
-  - Routing across sandbox/gateway/node is powerful but increases the cognitive
-    load for operators and agents.
- Excluded from quality:
-  - Unit, integration, e2e, live, and runtime-flow test evidence affected
-    Coverage only.
-
-## Completeness Score
-
- Score: `Stable (88%)`
- Surface instructions: evaluated against `references/completeness/browser-automation-and-exec-sandbox-tools.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Exec Routing, Process Lifecycle, Direct Tool Invoke API, Node System.run, Host Exec Approvals, Elevated Mode.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Background process state should become more durable across Gateway restarts
-  and session loss.
- Process follow-up guidance should be consistently visible to all provider
-  harnesses to avoid relaunch loops.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/tools/exec.md:9`: exec is documented as a mutating shell surface with process support.
- `/Users/kevinlin/code/openclaw/docs/tools/exec.md:44`: docs cover `host=auto`, sandbox, gateway, node, ask, and elevated params.
- `/Users/kevinlin/code/openclaw/docs/tools/exec.md:68`: docs describe host routing behavior and fail-closed sandbox/node behavior.
- `/Users/kevinlin/code/openclaw/docs/tools/exec.md:130`: docs describe PATH handling across host, sandbox, and node.
- `/Users/kevinlin/code/openclaw/docs/gateway/background-process.md:13`: background process docs define exec params and behavior.
- `/Users/kevinlin/code/openclaw/docs/gateway/background-process.md:59`: background process docs enumerate process actions.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec.ts:71`: foreground results carry status, exit code, duration, output, timeout, and cwd details.
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec-runtime.ts:78`: host base env sanitization removes dangerous inherited variables.
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec-runtime.ts:117`: default output, pending output, and approval timeout constants are centralized.
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec-runtime.ts:186`: exec completion emits diagnostic events with target, mode, duration, outcome, and failure metadata.
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec-runtime.ts:241`: host target resolution gates requested target overrides and maps auto to sandbox or gateway.
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec-runtime.ts:504`: timeout/failure guidance points long-running work at registered background exec and process polling.
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.process.ts:176`: process tool exposes list/poll/log/write/send-keys/submit/paste/kill/clear/remove.
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.process.ts:193`: process runtime reports stdin writability, waiting-for-input state, idle time, and last output time.
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.process.ts:223`: process kill falls back to process-tree termination.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec-gateway-approval.e2e.test.ts:1`: e2e coverage exists for Gateway exec approval execution.
- `/Users/kevinlin/code/openclaw/src/agents/sessions/exec.test.ts:1`: session-level exec coverage exists.
- `/Users/kevinlin/code/openclaw/src/agents/sessions/bash-executor.test.ts:1`: bash executor session coverage exists.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec-runtime.test.ts:110`: verifies exec target resolution.
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec-runtime.test.ts:177`: verifies gateway/node override rejection while sandbox is active.
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec-runtime.test.ts:383`: verifies notify-on-exit suppression and timeout behavior for background exec.
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec-runtime.test.ts:611`: verifies timeout guidance and failure classification.
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec.pty.test.ts:87`: PTY behavior is covered.
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec.background-abort.test.ts:1`: background abort behavior is covered.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "exec process background" --json`
-
-Results:
-
- Open issue #82178: finite exec backgrounding when process is hidden.
- Open issue #65983: background PTY exec can survive restart/session loss and become untracked.
- Open PR #59719: track background exec liveness with CLI tasks.
- Open issue #70797: tool-call payload leakage during background exec/process flows.
- Open issue #62432: sessions can relaunch exec after "Command still running" instead of switching to process poll.
-
-### Discrawl queries
-
-Query:
-
-`discrawl search --mode fts --limit 5 "exec process background"`
-
-Results:
-
- 2026-05-17 archive guidance distinguishes `exec` as the command starter and
-  `process` as the tracking/polling handle; it recommends background/trackable
-  exec with process polling for long-running work.
--- a/docs/maturity-scorecard/inventory/browser-automation-and-exec-sandbox-tools/host-exec-approvals-and-elevated-mode.md
+++ b/docs/maturity-scorecard/inventory/browser-automation-and-exec-sandbox-tools/host-exec-approvals-and-elevated-mode.md
@@ -1,190 +0,0 @@
---
-title: "Browser automation and exec/sandbox tools - Host Exec Approvals and Elevated Mode Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Browser automation and exec/sandbox tools - Host Exec Approvals and Elevated Mode Maturity Note
-
-## Summary
-
-Host exec approvals and elevated mode is Stable. The implementation has a
-layered approval system, two-phase approval registration, command highlighting,
-safe-bin and allowlist planning, strict inline-eval handling, node approval
-binding, and clear elevated-mode docs. It remains below Lovable because the
-policy is intentionally complex and user/operator configuration mistakes still
-create real risk.
-
-## Category Scope
-
-This note covers exec approval policy, local approvals state, approval request
-registration and waiting, allow-once consumption, safe bins, safe builtins,
-strict inline eval, interpreter planning, command spans, node `system.run`
-approval plan binding, follow-up delivery, and elevated mode.
-
-## Features
-
- Host Exec Approvals: Covers Host Exec Approvals across exec approval policy, local approvals state, approval request registration and waiting, allow-once consumption, and related host exec approvals and elevated mode behavior.
- Elevated Mode: Covers Elevated Mode across exec approval policy, local approvals state, approval request registration and waiting, allow-once consumption, and related host exec approvals and elevated mode behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (84%)`
- Positive signals:
-  - Docs cover basic and advanced exec approvals, safe bins, allowlists, strict
-    inline eval, approval forwarding, same-chat approvals, and elevated mode.
-  - Source implements two-phase registration before returning pending approval,
-    pending lookup, expiry, allow-once atomic consumption, and follow-up resume.
-  - Tests cover approval request parsing, command spans, manager timers,
-    allowlist matching, safe-bin policy, strict inline eval, approval parity,
-    native approval routing, and node binding.
-  - Node `system.run` path strips user control fields and revalidates approvals
-    against canonical command/cwd/session plans.
- Negative signals:
-  - Archive reports still show operator confusion around `security=full`,
-    `ask=off`, safe bins, inline eval, and node approval routing.
-  - Elevated mode intentionally bypasses sandboxing for exec and must be reasoned
-    about with tool policy and approvals together.
- Integration gaps:
-  - Add an operator UX smoke that walks a user from blocked command to approval,
-    allow-once, allow-always, and elevated mode across gateway and node hosts.
-  - Add a scorecard-specific matrix for safe bins, safe builtins, interpreters,
-    shell wrappers, and strict inline eval combinations.
-
-## Quality Score
-
- Score: `Stable (82%)`
- Gitcrawl reports:
-  - `exec approval safe bins` returned PR #79363 for opt-in safe builtins, issue
-    #46056 about shell builtins and approval gates, PR #71154 around allowlisted
-    command parsing, PR #80922 routing allow-always through the command planner,
-    and PR #84172 revamping command authorization candidates.
-  - `tools invoke system.run approval node invoke` returned issue #77096 on
-    symlink cwd trust, PR #81827 adding denyPathPatterns, PR #78226 on node
-    allowlist writeback restoring revoked approvals, and PR #81488 hardening
-    node exec approval precheck env.
- Discrawl reports:
-  - `exec approvals safe bins elevated` returned a 2026-03-06 support answer
-    explaining that `security="full"` plus `ask="off"` is raw shell access on
-    the selected host, subject only to tool policy and stricter approvals state.
- Good qualities:
-  - Approval registration is two-phase to avoid orphaned `/approve` races.
-  - Approval manager keeps resolved entries briefly for waiters and consumes
-    allow-once decisions atomically.
-  - Host approval params include command, argv, system run plan, cwd, env,
-    host, node id, security, ask, command spans, requester, and turn source.
-  - Node forwarding strips approval control fields from untrusted input and only
-    restores trusted approval state from Gateway approval records.
-  - Elevated docs explicitly say it does not override tool policy.
- Bad qualities:
-  - Safe-bin and allowlist semantics are difficult to explain because shell
-    wrappers, builtins, interpreters, inline eval, stdin trust, and path trust
-    all interact.
-  - `security`, `ask`, elevated state, and local approval defaults can produce a
-    stricter-than-expected or looser-than-expected result if operators configure
-    only one layer.
-  - Native approval delivery is spread across channels and can fail in ways that
-    look like exec policy failure.
- Excluded from quality:
-  - Unit, integration, e2e, live, and runtime-flow test evidence affected
-    Coverage only.
-
-## Completeness Score
-
- Score: `Stable (84%)`
- Surface instructions: evaluated against `references/completeness/browser-automation-and-exec-sandbox-tools.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Host Exec Approvals, Elevated Mode.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Approval policy needs clearer diagnostics when local approvals state and
-  `tools.exec.*` disagree.
- Safe-bin, strict-inline-eval, and interpreter policy should remain under
-  security review.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/tools/exec-approvals.md:11`: approvals are documented as part of a guardrail stack with tool policy and elevated mode.
- `/Users/kevinlin/code/openclaw/docs/tools/exec-approvals.md:18`: docs state the stricter of `tools.exec.*` and local approvals state wins.
- `/Users/kevinlin/code/openclaw/docs/tools/exec-approvals.md:48`: docs describe gateway/node hosts, trust model, file binding, and drift.
- `/Users/kevinlin/code/openclaw/docs/tools/exec-approvals.md:115`: docs cover security, ask, fallback, strict inline eval, command highlighting, and safe bins.
- `/Users/kevinlin/code/openclaw/docs/tools/exec-approvals-advanced.md:14`: docs explain safe bins as stdin-only and not generic trust.
- `/Users/kevinlin/code/openclaw/docs/tools/exec-approvals-advanced.md:66`: docs cover trusted dirs, shell chaining, wrappers, and strict inline eval.
- `/Users/kevinlin/code/openclaw/docs/tools/elevated.md:9`: elevated mode is documented as sandbox-to-host exec escape.
- `/Users/kevinlin/code/openclaw/docs/tools/elevated.md:103`: elevated does not override tool policy or host selection.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec-approval-request.ts:116`: approval request registration happens before returning `approval-pending`.
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec-approval-request.ts:137`: waitDecision handles timeout/missing approval as null decision.
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec-approval-request.ts:269`: host approval params include system run plan, env, cwd, host, security, ask, command spans, requester, and turn source.
- `/Users/kevinlin/code/openclaw/src/gateway/exec-approval-manager.ts:54`: approval manager tracks pending approval records.
- `/Users/kevinlin/code/openclaw/src/gateway/exec-approval-manager.ts:118`: approval resolution records decision and schedules cleanup.
- `/Users/kevinlin/code/openclaw/src/gateway/exec-approval-manager.ts:175`: allow-once decisions are consumed atomically.
- `/Users/kevinlin/code/openclaw/src/gateway/exec-approval-manager.ts:200`: approval lookup supports exact, prefix, ambiguous, and none results.
- `/Users/kevinlin/code/openclaw/src/gateway/node-invoke-system-run-approval.ts:190`: system.run forwarding allowlists fields understood by node host.
- `/Users/kevinlin/code/openclaw/src/gateway/node-invoke-system-run-approval.ts:214`: approval control fields are gated behind a real exec approval record.
- `/Users/kevinlin/code/openclaw/src/node-host/invoke-system-run.ts:106`: node system.run policy phase carries security, allowlist, safe bins, strict inline eval, and approval decisions.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec-gateway-approval.e2e.test.ts:1`: e2e coverage exists for Gateway exec approvals.
- `/Users/kevinlin/code/openclaw/src/gateway/operator-approvals-client.e2e.test.ts:1`: operator approval client e2e coverage exists.
- `/Users/kevinlin/code/openclaw/src/infra/approval-native-delivery.test.ts:1`: native approval delivery coverage exists.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec-approval-request.test.ts:94`: verifies string approval decisions are returned.
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec-approval-request.test.ts:183`: verifies registration response id is used when waiting for a decision.
- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec-approval-request.test.ts:276`: verifies command spans are added to host approval registration payloads.
- `/Users/kevinlin/code/openclaw/src/gateway/exec-approval-manager.test.ts:9`: verifies approval manager behavior.
- `/Users/kevinlin/code/openclaw/src/infra/exec-approvals-safe-bins.test.ts:1`: safe-bin policy tests exist.
- `/Users/kevinlin/code/openclaw/src/infra/system-run-approval-binding.test.ts:1`: system.run approval binding tests exist.
- `/Users/kevinlin/code/openclaw/src/gateway/node-invoke-system-run-approval.test.ts:1`: Gateway node system.run approval tests exist.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "exec approval safe bins" --json`
-
-Results:
-
- Open PR #79363: opt-in `tools.exec.safeBuiltins`.
- Open issue #46056: shell builtins always trigger approval gate with allowlist.
- Open PR #71154: accept POSIX backslash-newline in allowlisted commands.
- Open PR #80922: route allow-always through command authorization planner.
- Open PR #84172: revamp command authorization candidates.
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "tools invoke system.run approval node invoke" --json`
-
-Results:
-
- Open issue #77096: opt-in symlink cwd for approval-bound `system.run`.
- Open PR #81827: add `tools.exec.denyPathPatterns` hard-deny gate.
- Open PR #78226: node allowlist writeback can restore revoked exec approvals.
- Open PR #81488: harden node exec approval precheck env.
-
-### Discrawl queries
-
-Query:
-
-`discrawl search --mode fts --limit 5 "exec approvals safe bins elevated"`
-
-Results:
-
- 2026-03-06 support archive explains that `security="full"` plus `ask="off"`
-  means raw shell access on gateway/node host when tool policy and local
-  approvals state allow it.
--- a/docs/maturity-scorecard/inventory/browser-automation-and-exec-sandbox-tools/report.md
+++ b/docs/maturity-scorecard/inventory/browser-automation-and-exec-sandbox-tools/report.md
@@ -1,172 +0,0 @@
---
-title: "Browser automation and exec/sandbox tools Maturity Report"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Browser automation and exec/sandbox tools Maturity Report
-
-## Top-level scores
-
-These rollups are simple arithmetic means over the category-note numeric
-scores in
-`scores.yaml`. Percentages are rounded to the nearest whole number.
-
- Coverage: `Beta (79%)`
- Quality: `Beta (75%)`
- Completeness: `Beta (79%)`
- LTS Features: `2/3`
-
-## Summary
-
-This report promotes the archived `browser-automation-and-exec-sandbox-tools` maturity evidence from `/Users/kevinlin/tmp/maturity/browser-automation-and-exec-sandbox-tools` into the current process-version-3 inventory contract.
-
-The category Coverage and Quality scores come from the archived evidence-backed score rows. Completeness is initialized from the same archived evidence breadth and known-gap record, then joined with the surface-specific completeness rubric referenced by taxonomy.
-
-## Matrix
-
-| Category                                                               | LTS | Coverage       | Quality      | Completeness   | Features to evaluate                                                                                            |
-| ---------------------------------------------------------------------- | --- | -------------- | ------------ | -------------- | --------------------------------------------------------------------------------------------------------------- |
-| [Browser Automation](browser-actions-snapshots-and-artifacts.md)       | ❌  | `Beta (78%)`   | `Beta (74%)` | `Beta (78%)`   | Browser Actions, Snapshots, Artifacts, Browser Plugin Service, Profiles, Browser Security, SSRF, Remote Control |
-| [Tool Invocation and Execution](exec-routing-and-process-lifecycle.md) | ✅  | `Stable (82%)` | `Beta (79%)` | `Stable (82%)` | Exec Routing, Process Lifecycle, Direct Tool Invoke API, Node System.run, Host Exec Approvals, Elevated Mode    |
-| [Sandbox and Tool Policy](sandbox-backends-and-workspace-isolation.md) | ✅  | `Beta (76%)`   | `Beta (72%)` | `Beta (76%)`   | Sandbox Backends, Workspace Isolation, Sandboxed Browser, Codex Dynamic Tools, Tool Policy, Sandbox Tool Gates  |
-
-## Scoring rubric
-
- Coverage:
-  maturity-label rating for integration, e2e, live, or server/runtime flow
-  evidence across the category. Unit tests can provide supporting context but never make a
-  feature covered by themselves.
- Quality:
-  maturity-label rating for implementation and operational robustness. Unit,
-  integration, e2e, live, and real runtime-flow test coverage are Coverage
-  inputs only; they do not raise or lower Quality.
- Completeness:
-  maturity-label rating for how fully the category delivers the intended
-  surface-specific capability set. Use the taxonomy-linked completeness
-  instructions for this surface.
- LTS:
-  calculated as `quality > 80 and coverage > 90`, or when the matching
-  taxonomy category sets `human_lts_override`.
- Shared score bands:
-  `Lovable = 95-100`, `Stable = 80-95`, `Beta = 70-80`,
-  `Alpha = 50-70`, and `Experimental = 0-50`. At shared boundaries, choose the
-  higher maturity label.
- Major quality/completeness gaps:
-  evidence text only, tracked in the detailed feature inventory rather than as a
-  separate scored dimension.
-
-## Detailed feature inventory
-
-### 1. Browser Automation
-
-Search anchors: Browser Actions, Snapshots, Artifacts, browser automation and exec/sandbox tools browser actions, snapshots, and artifacts, browser actions, snapshots, and artifacts, Browser Plugin Service, Profiles, browser automation and exec/sandbox tools browser plugin service and profiles, browser plugin service and profiles, Browser Security, SSRF, Remote Control, browser automation and exec/sandbox tools browser security, ssrf, and remote control, browser security, ssrf, and remote control.
-
-Category note: [Browser Automation](browser-actions-snapshots-and-artifacts.md)
-
-Score decisions:
-
- Coverage: `Beta (78%)`
- Quality: `Beta (74%)`
- Completeness: `Beta (78%)`
- LTS: ❌
-
-Features:
-
- Browser Actions: Covers Browser Actions across browser tool action schemas, navigate/act/snapshot/screenshot operations, AI/role/ARIA snapshot formats, action ref storage, and related browser actions, snapshots, and artifacts behavior.
- Snapshots: Covers Snapshots across browser tool action schemas, navigate/act/snapshot/screenshot operations, AI/role/ARIA snapshot formats, action ref storage, and related browser actions, snapshots, and artifacts behavior.
- Artifacts: Covers Artifacts across browser tool action schemas, navigate/act/snapshot/screenshot operations, AI/role/ARIA snapshot formats, action ref storage, and related browser actions, snapshots, and artifacts behavior.
- Browser Plugin Service: Covers Browser Plugin Service across bundled browser plugin activation, browser CLI registration, `browser.request` Gateway routing, control-service startup, and related browser plugin service and profiles behavior.
- Profiles: Covers Profiles across bundled browser plugin activation, browser CLI registration, `browser.request` Gateway routing, control-service startup, and related browser plugin service and profiles behavior.
- Browser Security: Covers Browser Security across browser-control auth, navigation URL validation, delayed navigation guards, strict private-network SSRF policy, and related browser security, ssrf, and remote control behavior.
- SSRF: Covers SSRF across browser-control auth, navigation URL validation, delayed navigation guards, strict private-network SSRF policy, and related browser security, ssrf, and remote control behavior.
- Remote Control: Covers Remote Control across browser-control auth, navigation URL validation, delayed navigation guards, strict private-network SSRF policy, and related browser security, ssrf, and remote control behavior.
-
-Primary docs:
-
- `docs/tools/browser-control.md`
- `docs/help/testing.md`
- `docs/tools/browser.md`
- `docs/gateway/security/index.md`
- `docs/gateway/security/audit-checks.md`
-
-### 2. Tool Invocation and Execution
-
-Search anchors: Exec Routing, Process Lifecycle, browser automation and exec/sandbox tools exec routing and process lifecycle, exec routing and process lifecycle, Direct Tool Invoke API, Node System.run, browser automation and exec/sandbox tools direct tool invoke api and node system.run, direct tool invoke api and node system.run, Host Exec Approvals, Elevated Mode, browser automation and exec/sandbox tools host exec approvals and elevated mode, host exec approvals and elevated mode.
-
-Category note: [Tool Invocation and Execution](exec-routing-and-process-lifecycle.md)
-
-Score decisions:
-
- Coverage: `Stable (82%)`
- Quality: `Beta (79%)`
- Completeness: `Stable (82%)`
- LTS: ✅
-
-Features:
-
- Exec Routing: Covers Exec Routing across `exec` foreground and background execution, `yieldMs`, timeouts, PTY, and related exec routing and process lifecycle behavior.
- Process Lifecycle: Covers Process Lifecycle across `exec` foreground and background execution, `yieldMs`, timeouts, PTY, and related exec routing and process lifecycle behavior.
- Direct Tool Invoke API: Covers Direct Tool Invoke API across HTTP `POST /tools/invoke`, Gateway RPC `tools.invoke`, request body and auth semantics, shared-secret operator scope restoration, and related direct tool invoke api and node system.run behavior.
- Node System.run: Covers Node System.run across HTTP `POST /tools/invoke`, Gateway RPC `tools.invoke`, request body and auth semantics, shared-secret operator scope restoration, and related direct tool invoke api and node system.run behavior.
- Host Exec Approvals: Covers Host Exec Approvals across exec approval policy, local approvals state, approval request registration and waiting, allow-once consumption, and related host exec approvals and elevated mode behavior.
- Elevated Mode: Covers Elevated Mode across exec approval policy, local approvals state, approval request registration and waiting, allow-once consumption, and related host exec approvals and elevated mode behavior.
-
-Primary docs:
-
- `docs/tools/exec.md`
- `docs/gateway/background-process.md`
- `docs/gateway/tools-invoke-http-api.md`
- `docs/gateway/operator-scopes.md`
- `docs/gateway/protocol.md`
- `docs/tools/exec-approvals.md`
- `docs/tools/exec-approvals-advanced.md`
- `docs/tools/elevated.md`
-
-### 3. Sandbox and Tool Policy
-
-Search anchors: Sandbox Backends, Workspace Isolation, browser automation and exec/sandbox tools sandbox backends and workspace isolation, sandbox backends and workspace isolation, Sandboxed Browser, Codex Dynamic Tools, browser automation and exec/sandbox tools sandboxed browser and codex dynamic tools, sandboxed browser and codex dynamic tools, Tool Policy, Sandbox Tool Gates, browser automation and exec/sandbox tools tool policy and sandbox tool gates, tool policy and sandbox tool gates.
-
-Category note: [Sandbox and Tool Policy](sandbox-backends-and-workspace-isolation.md)
-
-Score decisions:
-
- Coverage: `Beta (76%)`
- Quality: `Beta (72%)`
- Completeness: `Beta (76%)`
- LTS: ✅
-
-Features:
-
- Sandbox Backends: Covers Sandbox Backends across sandbox modes, scopes, workspace roots, workspaceAccess, and related sandbox backends and workspace isolation behavior.
- Workspace Isolation: Covers Workspace Isolation across sandbox modes, scopes, workspace roots, workspaceAccess, and related sandbox backends and workspace isolation behavior.
- Sandboxed Browser: Covers Sandboxed Browser across sandbox browser config, Docker browser container creation, CDP relay authentication, noVNC password/token flow, and related sandboxed browser and codex dynamic tools behavior.
- Codex Dynamic Tools: Covers Codex Dynamic Tools across sandbox browser config, Docker browser container creation, CDP relay authentication, noVNC password/token flow, and related sandboxed browser and codex dynamic tools behavior.
- Tool Policy: Covers Tool Policy across tool profiles, tool groups, allow/deny policy, provider policy, and related tool policy and sandbox tool gates behavior.
- Sandbox Tool Gates: Covers Sandbox Tool Gates across tool profiles, tool groups, allow/deny policy, provider policy, and related tool policy and sandbox tool gates behavior.
-
-Primary docs:
-
- `docs/gateway/sandboxing.md`
- `docs/gateway/sandbox-vs-tool-policy-vs-elevated.md`
- `docs/tools/multi-agent-sandbox-tools.md`
- `docs/plugins/codex-harness-reference.md`
- `docs/gateway/config-tools.md`
-
-## Recommended scorecard interpretation
-
-Use this migrated score as the current inventory baseline. Refresh individual categories with live category-agent research before treating a high score as an LTS promotion gate.
-
-## Out of scope for this surface
-
- Redefining taxonomy category boundaries; taxonomy remains the source of truth for category identity, features, docs, and search anchors.
-
-## Audit provenance
-
- Score source:
-  `docs/kevinslin/maturity-scorecard/inventory/browser-automation-and-exec-sandbox-tools/scores.yaml`.
- Taxonomy metadata source:
-  `.agents/skills/claw-score/taxonomy.yaml`.
- Archived evidence source:
-  `/Users/kevinlin/tmp/maturity/browser-automation-and-exec-sandbox-tools`.
--- a/docs/maturity-scorecard/inventory/browser-automation-and-exec-sandbox-tools/sandbox-backends-and-workspace-isolation.md
+++ b/docs/maturity-scorecard/inventory/browser-automation-and-exec-sandbox-tools/sandbox-backends-and-workspace-isolation.md
@@ -1,200 +0,0 @@
---
-title: "Browser automation and exec/sandbox tools - Sandbox and Tool Policy Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Browser automation and exec/sandbox tools - Sandbox and Tool Policy Maturity Note
-
-## Summary
-
-Sandbox backends and workspace isolation is Stable on Coverage and Beta on
-Quality. Docker, SSH, and OpenShell backends are documented and represented in
-source/tests; workspace access, bind mounts, filesystem bridge, registry, and
-path guards are substantial. Quality remains Beta because Docker-in-Docker path
-parity, remote backend bridge behavior, read/write path translation, and browser
-backend limits remain operationally fragile.
-
-## Category Scope
-
-Included in this category:
-
- Sandbox Backends: Covers Sandbox Backends across sandbox modes, scopes, workspace roots, workspaceAccess, and related sandbox backends and workspace isolation behavior.
- Workspace Isolation: Covers Workspace Isolation across sandbox modes, scopes, workspace roots, workspaceAccess, and related sandbox backends and workspace isolation behavior.
- Sandboxed Browser: Covers Sandboxed Browser across sandbox browser config, Docker browser container creation, CDP relay authentication, noVNC password/token flow, and related sandboxed browser and codex dynamic tools behavior.
- Codex Dynamic Tools: Covers Codex Dynamic Tools across sandbox browser config, Docker browser container creation, CDP relay authentication, noVNC password/token flow, and related sandboxed browser and codex dynamic tools behavior.
- Tool Policy: Covers Tool Policy across tool profiles, tool groups, allow/deny policy, provider policy, and related tool policy and sandbox tool gates behavior.
- Sandbox Tool Gates: Covers Sandbox Tool Gates across tool profiles, tool groups, allow/deny policy, provider policy, and related tool policy and sandbox tool gates behavior.
-
-## Features
-
- Sandbox Backends: Covers Sandbox Backends across sandbox modes, scopes, workspace roots, workspaceAccess, and related sandbox backends and workspace isolation behavior.
- Workspace Isolation: Covers Workspace Isolation across sandbox modes, scopes, workspace roots, workspaceAccess, and related sandbox backends and workspace isolation behavior.
- Sandboxed Browser: Covers Sandboxed Browser across sandbox browser config, Docker browser container creation, CDP relay authentication, noVNC password/token flow, and related sandboxed browser and codex dynamic tools behavior.
- Codex Dynamic Tools: Covers Codex Dynamic Tools across sandbox browser config, Docker browser container creation, CDP relay authentication, noVNC password/token flow, and related sandboxed browser and codex dynamic tools behavior.
- Tool Policy: Covers Tool Policy across tool profiles, tool groups, allow/deny policy, provider policy, and related tool policy and sandbox tool gates behavior.
- Sandbox Tool Gates: Covers Sandbox Tool Gates across tool profiles, tool groups, allow/deny policy, provider policy, and related tool policy and sandbox tool gates behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Stable (85%)`
- Positive signals:
-  - Docs cover sandboxed tools, modes, scopes, Docker/SSH/OpenShell backend
-    matrix, Docker-in-Docker path parity, workspace access, binds, and browser
-    backend support.
-  - Source has a backend registry, Docker and SSH backends, context resolution,
-    workspace layout, filesystem bridge, path guard, and registry updates.
-  - Tests cover backend registry, Docker backend manager, SSH backend, sandbox
-    config merge, sandbox explain, workspace mounts, bind specs, fs bridge
-    boundary checks, fs bridge backend e2e, remote fs bridge, and sandbox media
-    paths.
-  - Docs and source explicitly fail when a backend does not support browser
-    sandboxes.
- Negative signals:
-  - Archive reports include Docker gateway restart loops, missing python in
-    sandbox FS bridge paths, and sandbox write/read path confusion.
-  - SSH/OpenShell are more remote-canonical and do not support sandbox browser
-    containers.
- Integration gaps:
-  - Add one backend matrix that runs the same exec/read/write/edit/apply_patch
-    flow across Docker, SSH, and OpenShell.
-  - Add Docker-in-Docker deployment smoke for host-path parity and FS bridge
-    heartbeat writes.
-
-## Quality Score
-
- Score: `Beta (78%)`
- Gitcrawl reports:
-  - `sandbox docker fs bridge` returned PR #56785 for python3 missing guidance,
-    issue #86612 for Docker gateway restart loop with sandbox enabled, issue
-    #7575 for Sysbox runtime, and PR #69824 for ACP runtime consolidation.
-  - `sandbox backend workspaceAccess bind fs bridge openshell ssh docker`
-    returned no focused hits; broader sandbox queries were needed.
- Discrawl reports:
-  - `sandbox backend fs bridge` returned 2026-04-16 support threads explaining
-    Docker image python requirements, SSH/OpenShell remote bridge risk, and that
-    write/edit use a Python helper inside the active sandbox runtime rather than
-    host Python.
-  - The same archive also included a hook/sandbox workspace report where
-    sandboxed writes were not visible on the expected host path, showing why
-    path translation diagnostics matter.
- Good qualities:
-  - Backend registration is explicit and fails when an unregistered backend is
-    requested.
-  - Sandbox context resolves effective runtime status, workspace layout,
-    backend, browser support, fs bridge, and registry entry in one path.
-  - Filesystem bridge uses path guards, pinned entries, access checks, and
-    backend shell commands instead of direct host writes.
-  - Docker backend reports config-label match and runtime removal errors.
- Bad qualities:
-  - Docker socket, host path parity, bind mounts, and read-only overlays are
-    powerful but easy to misconfigure.
-  - Remote backends have weaker browser support and rely on remote shell
-    environment assumptions.
-  - WorkspaceAccess behavior can be surprising because agent workspace,
-    sandbox workspace, and remote-canonical state may diverge.
- Excluded from quality:
-  - Unit, integration, e2e, live, and runtime-flow test evidence affected
-    Coverage only.
-
-## Completeness Score
-
- Score: `Stable (85%)`
- Surface instructions: evaluated against `references/completeness/browser-automation-and-exec-sandbox-tools.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Sandbox Backends, Workspace Isolation, Sandboxed Browser, Codex Dynamic Tools, Tool Policy, Sandbox Tool Gates.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- SSH and OpenShell backends need stronger parity proof against Docker for file
-  mutation and process execution.
- Sandbox diagnostics should make path ownership and host-vs-container path
-  parity errors obvious.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/gateway/sandboxing.md:9`: docs state OpenClaw can run tools inside sandbox backends while Gateway stays on host.
- `/Users/kevinlin/code/openclaw/docs/gateway/sandboxing.md:15`: tool execution and optional sandboxed browser are covered by sandboxing.
- `/Users/kevinlin/code/openclaw/docs/gateway/sandboxing.md:39`: sandbox modes include off, non-main, and all.
- `/Users/kevinlin/code/openclaw/docs/gateway/sandboxing.md:58`: sandbox scope controls agent/session/shared container reuse.
- `/Users/kevinlin/code/openclaw/docs/gateway/sandboxing.md:66`: backend docs list Docker, SSH, and OpenShell.
- `/Users/kevinlin/code/openclaw/docs/gateway/sandboxing.md:78`: backend matrix shows Docker supports browser sandbox while SSH/OpenShell do not.
- `/Users/kevinlin/code/openclaw/docs/gateway/sandboxing.md:94`: Docker-in-Docker warning documents host-path and FS bridge parity requirements.
- `/Users/kevinlin/code/openclaw/docs/gateway/sandbox-vs-tool-policy-vs-elevated.md:42`: bind mount security quick check warns about sandbox filesystem piercing.
- `/Users/kevinlin/code/openclaw/docs/tools/multi-agent-sandbox-tools.md:181`: per-agent sandbox settings override global defaults.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/backend.ts:43`: backend registry registers sandbox backends.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/backend.ts:70`: missing backend factory throws actionable configuration guidance.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/backend.ts:83`: Docker and SSH backends are registered.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/docker-backend.ts:32`: Docker backend ensures a container and returns an exec-capable handle.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/docker-backend.ts:63`: Docker backend advertises browser capability.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/context.ts:130`: sandbox context resolution starts from effective runtime status.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/context.ts:145`: workspace layout is ensured before backend creation.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/context.ts:159`: context requires the configured backend factory.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/context.ts:201`: backend without browser capability fails when browser sandbox is enabled.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/fs-bridge.ts:34`: sandbox filesystem bridge is created for a sandbox context.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/fs-bridge.ts:83`: writes require write access and path safety checks.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/fs-bridge.ts:251`: planned commands recheck path guards before execution.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/fs-bridge.backend.e2e.test.ts:72`: local backend e2e coverage exists for sandbox fs bridge behavior.
- `/Users/kevinlin/code/openclaw/test/scripts/sandbox-common-smoke-workflow.test.ts:1`: script smoke coverage exists for common sandbox workflow.
- `/Users/kevinlin/code/openclaw/scripts/test-live-cli-backend-docker.sh:346`: live Docker backend script exists for CLI backend validation.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/backend.test.ts:8`: verifies sandbox backend registry behavior.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/docker-backend.test.ts:46`: verifies Docker sandbox backend manager behavior.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/ssh-backend.test.ts:139`: verifies SSH sandbox backend behavior.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/fs-bridge.boundary.test.ts:18`: verifies writes into read-only bind mounts are blocked.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/fs-bridge.boundary.test.ts:62`: verifies pre-existing symlink escapes are rejected.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/workspace-mounts.test.ts:1`: workspace mount tests exist.
- `/Users/kevinlin/code/openclaw/src/commands/sandbox-explain.test.ts:1`: sandbox explain tests exist.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "sandbox docker fs bridge" --json`
-
-Results:
-
- Open PR #56785: sandbox guidance when python3 is missing.
- Open issue #86612: Docker gateway container restart loop when sandbox is enabled.
- Open issue #7575: Sysbox Docker runtime for secure container isolation.
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "sandbox backend workspaceAccess bind fs bridge openshell ssh docker" --json`
-
-Results:
-
- No focused hits returned; broader `sandbox docker fs bridge` and `sandbox browser`
-  queries supplied current archive evidence.
-
-### Discrawl queries
-
-Query:
-
-`discrawl search --mode fts --limit 5 "sandbox backend fs bridge"`
-
-Results:
-
- 2026-04-16 support archive explains Docker sandbox image python requirements,
-  SSH/OpenShell remote FS bridge risk, and that write/edit run helper code
-  inside the active sandbox runtime.
- 2026-04-08 hook/sandbox workspace report shows a sandboxed write path where
-  visible host filesystem effects were unclear, reinforcing the path-diagnostic
-  gap.
--- a/docs/maturity-scorecard/inventory/browser-automation-and-exec-sandbox-tools/sandboxed-browser-and-codex-dynamic-tools.md
+++ b/docs/maturity-scorecard/inventory/browser-automation-and-exec-sandbox-tools/sandboxed-browser-and-codex-dynamic-tools.md
@@ -1,204 +0,0 @@
---
-title: "Browser automation and exec/sandbox tools - Sandboxed Browser and Codex Dynamic Tools Maturity Note"
-version: 3
-last_refreshed: 2026-05-30
-last_refreshed_by: codex
---
-
-# Browser automation and exec/sandbox tools - Sandboxed Browser and Codex Dynamic Tools Maturity Note
-
-## Summary
-
-Sandboxed browser and Codex dynamic tools is Beta. The Docker sandbox browser
-path has real implementation and tests: CDP relay auth, noVNC tokens, browser
-config hashes, loopback publish, bridge reuse, and auto-start. Codex dynamic
-tools also have a clear fail-closed design that exposes `sandbox_exec` and
-`sandbox_process` when OpenClaw sandboxing disables host-native Code Mode. The
-score remains Beta because non-Docker browser sandboxing is unsupported, Codex
-sandbox exec-server is preview/local-only, and several active reports mention
-browser sandbox availability and CDP/noVNC friction.
-
-## Category Scope
-
-This note covers sandbox browser config, Docker browser container creation,
-CDP relay authentication, noVNC password/token flow, browser bridge server,
-CDP source ranges, config-hash recreation, `allowHostControl`, unsupported
-backends, Codex native execution disablement under active OpenClaw sandboxing,
-`sandbox_exec`, `sandbox_process`, and the preview Codex sandbox exec-server.
-
-## Features
-
- Sandboxed Browser: Covers Sandboxed Browser across sandbox browser config, Docker browser container creation, CDP relay authentication, noVNC password/token flow, and related sandboxed browser and codex dynamic tools behavior.
- Codex Dynamic Tools: Covers Codex Dynamic Tools across sandbox browser config, Docker browser container creation, CDP relay authentication, noVNC password/token flow, and related sandboxed browser and codex dynamic tools behavior.
-
-## Archive Freshness
-
- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
-
-## Coverage Score
-
- Score: `Beta (76%)`
- Positive signals:
-  - Docs explicitly describe sandboxed browser auto-start, dedicated Docker
-    network, CDP source range, noVNC observer token URLs, allowHostControl, and
-    custom control allowlists.
-  - Source implements Docker browser image contract checks, CDP auth, noVNC
-    password/token handling, config-hash recreation, loopback port publishing,
-    and bridge reuse.
-  - Codex docs and source implement fail-closed native execution and distinct
-    sandbox-backed dynamic tools.
-  - Tests cover sandbox browser create args, noVNC auth, CDP relay auth, bridge
-    policy changes, dynamic tool exposure, and Codex sandbox exec-server.
- Negative signals:
-  - Docs state sandbox browser support is Docker-only; SSH/OpenShell do not
-    support it.
-  - Codex sandbox exec-server is a preview path requiring newer app-server
-    support and a local loopback app-server.
-  - Archive reports include target=sandbox browser unavailable and requests for
-    non-Docker browser sandbox support.
- Integration gaps:
-  - Add a release-gate sandbox browser E2E that opens noVNC token flow, proves
-    CDP auth, and runs browser snapshot/action through the sandbox target.
-  - Add an app-server sandbox exec-server compatibility smoke for supported
-    Codex app-server versions and a fail-closed smoke for unsupported versions.
-
-## Quality Score
-
- Score: `Beta (72%)`
- Gitcrawl reports:
-  - `sandbox browser` returned issue #84942 about sandbox policy reporting
-    sandboxed while target=sandbox browser is unavailable, issue #52662 for
-    non-Docker browser sandbox backends, issue #49609 for noVNC clipboard
-    encoding, PR #85572 adding sandbox posture checks, and issue #64383 about
-    simplifying the sandbox browser CDP path.
-  - `sandbox browser sandbox_exec sandbox_process Codex app-server` returned no
-    focused hits, so broader sandbox/browser archive evidence was used.
- Discrawl reports:
-  - `browser sandbox` hybrid search returned a 2026-05-21 maintainer message
-    about using browser automation from a US-hosted devbox/session and avoiding
-    VPN-required browser workflows.
-  - The same search returned 2026-05-14 release notes saying browser/control UI
-    pairing got stricter and transcript/tool-result redaction became more
-    consistent.
-  - `sandbox_exec sandbox_process browser sandbox` returned no high-signal FTS
-    hits.
- Good qualities:
-  - Sandbox browser image and container config have explicit contract/hash
-    checks and stale-container recreation paths.
-  - CDP and noVNC are published on loopback and protected with auth/token
-    mechanisms.
-  - Active OpenClaw sandboxing disables Codex native host-side execution
-    surfaces instead of silently treating Codex's host sandbox as equivalent.
-  - Dynamic tool exposure uses distinct `sandbox_exec`/`sandbox_process` names
-    and follow-up guidance.
- Bad qualities:
-  - Browser sandbox is coupled to Docker today.
-  - The CDP/noVNC/container path is security-sensitive and operationally
-    complicated.
-  - Codex sandbox exec-server remains preview and local-only, which keeps the
-    stable path intentionally fail-closed.
- Excluded from quality:
-  - Unit, integration, e2e, live, and runtime-flow test evidence affected
-    Coverage only.
-
-## Completeness Score
-
- Score: `Beta (76%)`
- Surface instructions: evaluated against `references/completeness/browser-automation-and-exec-sandbox-tools.md`.
- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Sandboxed Browser, Codex Dynamic Tools.
- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
-
-## Known Gaps
-
- Non-Docker browser sandbox support needs a first-party story or a clearly
-  documented extension point.
- Codex sandbox exec-server should stay Beta until the environment contract is
-  stable and covered by release-gate integration.
-
-## Evidence
-
-### Docs
-
- `/Users/kevinlin/code/openclaw/docs/gateway/sandboxing.md:21`: sandboxed browser details document auto-start, network, CDP source range, noVNC token URL, allowHostControl, and custom target allowlists.
- `/Users/kevinlin/code/openclaw/docs/gateway/sandboxing.md:78`: backend matrix states browser sandbox is supported on Docker and not supported on SSH/OpenShell.
- `/Users/kevinlin/code/openclaw/docs/gateway/sandboxing.md:101`: active OpenClaw sandboxing disables Codex native Code Mode, user MCP, and app-backed plugins while exposing sandbox-backed tools.
- `/Users/kevinlin/code/openclaw/docs/plugins/codex-harness-reference.md:151`: Codex docs explain active OpenClaw sandboxing disables host-side native execution surfaces.
- `/Users/kevinlin/code/openclaw/docs/plugins/codex-harness-reference.md:170`: sandboxed native execution is preview and fail-closed by default.
- `/Users/kevinlin/code/openclaw/docs/plugins/codex-harness-reference.md:197`: preview path starts a loopback exec-server backed by the active sandbox and registers it with Codex app-server.
-
-### Source
-
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/browser.ts:77`: sandbox browser waits for CDP readiness with auth.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/browser.ts:162`: sandbox browser image contract is checked before use.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/browser.ts:210`: `ensureSandboxBrowser` creates or reuses a sandbox browser context.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/browser.ts:222`: browser sandbox is skipped when the sandbox tool policy does not allow browser.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/browser.ts:325`: new containers generate noVNC password and CDP auth token.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/browser.ts:364`: CDP and noVNC ports are published on loopback.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/browser.ts:478`: browser bridge server starts with resolved config, auth, auto-start, and noVNC token resolver.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/novnc-auth.ts:58`: noVNC observer tokens are one-time short-lived tokens.
- `/Users/kevinlin/code/openclaw/extensions/codex/src/app-server/dynamic-tool-build.ts:508`: sandbox shell dynamic tools are added when OpenClaw sandboxing disables native execution.
- `/Users/kevinlin/code/openclaw/extensions/codex/src/app-server/dynamic-tool-build.ts:526`: `sandbox_exec` wraps exec and rewrites follow-up guidance to `sandbox_process`.
- `/Users/kevinlin/code/openclaw/extensions/codex/src/app-server/native-execution-policy.ts:63`: native execution policy maps auto to sandbox/gateway and blocks node-targeted native surfaces.
- `/Users/kevinlin/code/openclaw/extensions/codex/src/app-server/sandbox-exec-server.ts:60`: Codex sandbox exec-server environment is registered only when an active sandbox backend exists.
-
-### Integration tests
-
- `/Users/kevinlin/code/openclaw/scripts/e2e/browser-cdp-snapshot-docker.sh:84`: Docker browser E2E validates CDP-backed browser interaction.
- `/Users/kevinlin/code/openclaw/extensions/codex/src/app-server/sandbox-exec-server.test.ts:116`: Codex sandbox exec-server routes process execution through a sandbox-backed environment.
- `/Users/kevinlin/code/openclaw/extensions/codex/src/app-server/sandbox-exec-server.http.test.ts:29`: Codex sandbox exec-server routes HTTP requests through the sandbox backend.
-
-### Unit tests
-
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/browser.create.test.ts:258`: verifies stale sandbox browser images are rejected.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/browser.create.test.ts:292`: verifies noVNC loopback publish and password env.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/browser.create.test.ts:431`: verifies browser SSRF policy is passed to sandbox bridge.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/browser.create.test.ts:647`: verifies sandbox CDP relay requires auth.
- `/Users/kevinlin/code/openclaw/src/agents/sandbox/browser.novnc-url.test.ts:26`: verifies one-time noVNC observer tokens.
- `/Users/kevinlin/code/openclaw/extensions/codex/src/app-server/dynamic-tool-build.test.ts:219`: verifies sandbox shell tools are exposed for non-Docker sandbox backends.
- `/Users/kevinlin/code/openclaw/extensions/codex/src/app-server/dynamic-tool-build.test.ts:689`: verifies Codex native surfaces are disabled when OpenClaw sandbox is active.
- `/Users/kevinlin/code/openclaw/extensions/codex/src/app-server/dynamic-tool-build.test.ts:739`: verifies sandbox exec-server native surfaces stay behind sandbox tool policy.
-
-### Gitcrawl queries
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "sandbox browser" --json`
-
-Results:
-
- Open issue #84942: sandboxed runtime reported while target=sandbox browser is unavailable.
- Open issue #52662: browser sandbox should support non-Docker backends.
- Open PR #85572: add sandbox posture conformance checks.
- Open issue #49609: sandbox-browser noVNC clipboard garbles non-Latin-1 characters.
- Open issue #64383: simplify sandbox browser CDP path.
-
-Query:
-
-`gitcrawl search openclaw/openclaw --query "sandbox browser sandbox_exec sandbox_process Codex app-server" --json`
-
-Results:
-
- No focused hits returned; broader `sandbox browser` results supplied current archive evidence.
-
-### Discrawl queries
-
-Query:
-
-`discrawl search --mode hybrid --limit 5 "browser sandbox"`
-
-Results:
-
- 2026-05-21 maintainers archive discusses browser automation from a hosted
-  devbox/session and local Playwright/Chrome alternatives.
- 2026-05-14 release archive notes stricter setup/browser/control UI pairing and
-  more consistent transcript/tool-result redaction.
-
-Query:
-
-`discrawl search --mode fts --limit 5 "sandbox_exec sandbox_process browser sandbox"`
-
-Results:
-
- No high-signal FTS hits.
--- a/docs/maturity-scorecard/inventory/browser-automation-and-exec-sandbox-tools/scores.yaml
+++ b/docs/maturity-scorecard/inventory/browser-automation-and-exec-sandbox-tools/scores.yaml
@@ -1,18 +0,0 @@
-version: 1
-process_version: 3
-data:
-  - name: Browser Automation
-    category_note: browser-actions-snapshots-and-artifacts.md
-    coverage: 78
-    quality: 74
-    completeness: 78
-  - name: Tool Invocation and Execution
-    category_note: exec-routing-and-process-lifecycle.md
-    coverage: 82
-    quality: 79
-    completeness: 82
-  - name: Sandbox and Tool Policy
-    category_note: sandbox-backends-and-workspace-isolation.md
-    coverage: 76
-    quality: 72
-    completeness: 76
--- a/Show More
+++ b/Show More